mirror of
https://github.com/colanode/colanode.git
synced 2025-12-16 11:47:47 +01:00
Add user-based access control to document and node retrieval services
This commit is contained in:
@@ -110,7 +110,8 @@ export const assistantResponseHandler = async (
|
|||||||
);
|
);
|
||||||
const documentDocs = await documentRetrievalService.retrieve(
|
const documentDocs = await documentRetrievalService.retrieve(
|
||||||
rewrittenQuery,
|
rewrittenQuery,
|
||||||
workspaceId
|
workspaceId,
|
||||||
|
user.id
|
||||||
);
|
);
|
||||||
const allContext = [...nodeDocs, ...documentDocs];
|
const allContext = [...nodeDocs, ...documentDocs];
|
||||||
const reranked = await rerankDocuments(
|
const reranked = await rerankDocuments(
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ import { OpenAIEmbeddings } from '@langchain/openai';
|
|||||||
import { ChunkingService } from '@/services/chunking-service';
|
import { ChunkingService } from '@/services/chunking-service';
|
||||||
import { database } from '@/data/database';
|
import { database } from '@/data/database';
|
||||||
import { configuration } from '@/lib/configuration';
|
import { configuration } from '@/lib/configuration';
|
||||||
import { CreateDocumentEmbedding } from '@/data/schema';
|
import { CreateDocumentEmbedding, SelectNode } from '@/data/schema';
|
||||||
import { sql } from 'kysely';
|
import { sql } from 'kysely';
|
||||||
import { fetchNode } from '@/lib/nodes';
|
import { fetchNode } from '@/lib/nodes';
|
||||||
import { DocumentContent, extractBlockTexts } from '@colanode/core';
|
import { DocumentContent, extractBlockTexts } from '@colanode/core';
|
||||||
@@ -22,47 +22,26 @@ declare module '@/types/jobs' {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const extractDocumentText = async (
|
const extractDocumentText = async (
|
||||||
documentId: string,
|
node: SelectNode,
|
||||||
content: DocumentContent
|
content: DocumentContent
|
||||||
): Promise<string> => {
|
): Promise<string> => {
|
||||||
const sections: string[] = [];
|
|
||||||
|
|
||||||
const node = await fetchNode(documentId);
|
|
||||||
if (!node) {
|
|
||||||
return '';
|
|
||||||
}
|
|
||||||
|
|
||||||
const nodeModel = getNodeModel(node.attributes.type);
|
const nodeModel = getNodeModel(node.attributes.type);
|
||||||
if (!nodeModel) {
|
if (!nodeModel) {
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
|
|
||||||
const nodeName = nodeModel.getName(documentId, node.attributes);
|
const documentText = nodeModel.getDocumentText(node.id, content);
|
||||||
if (nodeName) {
|
|
||||||
sections.push(`${node.attributes.type} "${nodeName}"`);
|
|
||||||
}
|
|
||||||
|
|
||||||
const attributesText = nodeModel.getAttributesText(
|
|
||||||
documentId,
|
|
||||||
node.attributes
|
|
||||||
);
|
|
||||||
if (attributesText) {
|
|
||||||
sections.push(attributesText);
|
|
||||||
}
|
|
||||||
|
|
||||||
const documentText = nodeModel.getDocumentText(documentId, content);
|
|
||||||
if (documentText) {
|
if (documentText) {
|
||||||
sections.push(documentText);
|
return documentText;
|
||||||
} else {
|
} else {
|
||||||
// Fallback to block text extraction if the model doesn't handle it
|
// Fallback to block text extraction if the model doesn't handle it
|
||||||
const blocksText = extractBlockTexts(documentId, content.blocks);
|
const blocksText = extractBlockTexts(node.id, content.blocks);
|
||||||
if (blocksText) {
|
if (blocksText) {
|
||||||
sections.push('Content:');
|
return blocksText;
|
||||||
sections.push(blocksText);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return sections.filter(Boolean).join('\n\n');
|
return '';
|
||||||
};
|
};
|
||||||
|
|
||||||
export const embedDocumentHandler = async (input: {
|
export const embedDocumentHandler = async (input: {
|
||||||
@@ -93,7 +72,7 @@ export const embedDocumentHandler = async (input: {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const text = await extractDocumentText(documentId, document.content);
|
const text = await extractDocumentText(node, document.content);
|
||||||
if (!text || text.trim() === '') {
|
if (!text || text.trim() === '') {
|
||||||
await database
|
await database
|
||||||
.deleteFrom('document_embeddings')
|
.deleteFrom('document_embeddings')
|
||||||
@@ -105,7 +84,7 @@ export const embedDocumentHandler = async (input: {
|
|||||||
const chunkingService = new ChunkingService();
|
const chunkingService = new ChunkingService();
|
||||||
const chunks = await chunkingService.chunkText(text, {
|
const chunks = await chunkingService.chunkText(text, {
|
||||||
type: 'document',
|
type: 'document',
|
||||||
id: documentId,
|
node: node,
|
||||||
});
|
});
|
||||||
const embeddings = new OpenAIEmbeddings({
|
const embeddings = new OpenAIEmbeddings({
|
||||||
apiKey: configuration.ai.embedding.apiKey,
|
apiKey: configuration.ai.embedding.apiKey,
|
||||||
|
|||||||
@@ -26,33 +26,15 @@ const extractNodeText = async (
|
|||||||
): Promise<string> => {
|
): Promise<string> => {
|
||||||
if (!node) return '';
|
if (!node) return '';
|
||||||
|
|
||||||
// Get the node model to use its text extraction methods
|
|
||||||
const nodeModel = getNodeModel(node.attributes.type);
|
const nodeModel = getNodeModel(node.attributes.type);
|
||||||
if (!nodeModel) return '';
|
if (!nodeModel) return '';
|
||||||
|
|
||||||
const sections: string[] = [];
|
|
||||||
|
|
||||||
// Get the node's name if available
|
|
||||||
const nodeName = nodeModel.getName(nodeId, node.attributes);
|
|
||||||
if (nodeName) {
|
|
||||||
sections.push(`${node.attributes.type} "${nodeName}"`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get text from attributes (this handles message content, record fields, etc.)
|
|
||||||
const attributesText = nodeModel.getAttributesText(nodeId, node.attributes);
|
const attributesText = nodeModel.getAttributesText(nodeId, node.attributes);
|
||||||
if (attributesText) {
|
if (attributesText) {
|
||||||
sections.push(attributesText);
|
return attributesText;
|
||||||
}
|
}
|
||||||
|
|
||||||
// For records, add database context
|
return '';
|
||||||
if (node.attributes.type === 'record') {
|
|
||||||
const databaseNode = await fetchNode(node.attributes.databaseId);
|
|
||||||
if (databaseNode?.attributes.type === 'database') {
|
|
||||||
sections.push(`In database "${databaseNode.attributes.name}"`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return sections.filter(Boolean).join('\n');
|
|
||||||
};
|
};
|
||||||
|
|
||||||
export const embedNodeHandler = async (input: {
|
export const embedNodeHandler = async (input: {
|
||||||
@@ -87,7 +69,7 @@ export const embedNodeHandler = async (input: {
|
|||||||
const chunkingService = new ChunkingService();
|
const chunkingService = new ChunkingService();
|
||||||
const chunks = await chunkingService.chunkText(text, {
|
const chunks = await chunkingService.chunkText(text, {
|
||||||
type: 'node',
|
type: 'node',
|
||||||
id: nodeId,
|
node,
|
||||||
});
|
});
|
||||||
const embeddings = new OpenAIEmbeddings({
|
const embeddings = new OpenAIEmbeddings({
|
||||||
apiKey: configuration.ai.embedding.apiKey,
|
apiKey: configuration.ai.embedding.apiKey,
|
||||||
|
|||||||
@@ -12,7 +12,6 @@ import type { SelectNode, SelectDocument, SelectUser } from '@/data/schema';
|
|||||||
|
|
||||||
type BaseMetadata = {
|
type BaseMetadata = {
|
||||||
id: string;
|
id: string;
|
||||||
type: string;
|
|
||||||
name?: string;
|
name?: string;
|
||||||
createdAt: Date;
|
createdAt: Date;
|
||||||
createdBy: string;
|
createdBy: string;
|
||||||
@@ -21,22 +20,23 @@ type BaseMetadata = {
|
|||||||
id: string;
|
id: string;
|
||||||
type: string;
|
type: string;
|
||||||
name?: string;
|
name?: string;
|
||||||
|
path?: string;
|
||||||
};
|
};
|
||||||
collaborators?: Array<{ id: string; name: string }>;
|
collaborators?: Array<{ id: string; name: string; role: string }>;
|
||||||
|
lastUpdated?: Date;
|
||||||
|
updatedBy?: { id: string; name: string };
|
||||||
|
workspace?: { id: string; name: string };
|
||||||
};
|
};
|
||||||
|
|
||||||
export type NodeMetadata = {
|
export type NodeMetadata = BaseMetadata & {
|
||||||
type: 'node';
|
type: 'node';
|
||||||
metadata: BaseMetadata & {
|
nodeType: string;
|
||||||
fields?: Record<string, unknown> | null;
|
fields?: Record<string, unknown> | null;
|
||||||
};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
export type DocumentMetadata = {
|
export type DocumentMetadata = BaseMetadata & {
|
||||||
type: 'document';
|
type: 'document';
|
||||||
metadata: BaseMetadata & {
|
content: DocumentContent;
|
||||||
content: DocumentContent;
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
export type ChunkingMetadata = NodeMetadata | DocumentMetadata;
|
export type ChunkingMetadata = NodeMetadata | DocumentMetadata;
|
||||||
@@ -44,7 +44,7 @@ export type ChunkingMetadata = NodeMetadata | DocumentMetadata;
|
|||||||
export class ChunkingService {
|
export class ChunkingService {
|
||||||
public async chunkText(
|
public async chunkText(
|
||||||
text: string,
|
text: string,
|
||||||
metadata?: { type: 'node' | 'document'; id: string }
|
metadata?: { type: 'node' | 'document'; node: SelectNode }
|
||||||
): Promise<string[]> {
|
): Promise<string[]> {
|
||||||
const chunkSize = configuration.ai.chunking.defaultChunkSize;
|
const chunkSize = configuration.ai.chunking.defaultChunkSize;
|
||||||
const chunkOverlap = configuration.ai.chunking.defaultOverlap;
|
const chunkOverlap = configuration.ai.chunking.defaultOverlap;
|
||||||
@@ -70,40 +70,25 @@ export class ChunkingService {
|
|||||||
|
|
||||||
private async fetchMetadata(metadata?: {
|
private async fetchMetadata(metadata?: {
|
||||||
type: 'node' | 'document';
|
type: 'node' | 'document';
|
||||||
id: string;
|
node: SelectNode;
|
||||||
}): Promise<ChunkingMetadata | undefined> {
|
}): Promise<ChunkingMetadata | undefined> {
|
||||||
if (!metadata) {
|
if (!metadata) {
|
||||||
return undefined;
|
return undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (metadata.type === 'node') {
|
if (metadata.type === 'node') {
|
||||||
const node = (await database
|
return this.buildNodeMetadata(metadata.node);
|
||||||
.selectFrom('nodes')
|
|
||||||
.selectAll()
|
|
||||||
.where('id', '=', metadata.id)
|
|
||||||
.executeTakeFirst()) as SelectNode | undefined;
|
|
||||||
if (!node) {
|
|
||||||
return undefined;
|
|
||||||
}
|
|
||||||
|
|
||||||
return this.buildNodeMetadata(node);
|
|
||||||
} else {
|
} else {
|
||||||
const document = (await database
|
const document = (await database
|
||||||
.selectFrom('documents')
|
.selectFrom('documents')
|
||||||
.selectAll()
|
.selectAll()
|
||||||
.where('id', '=', metadata.id)
|
.where('id', '=', metadata.node.id)
|
||||||
.executeTakeFirst()) as SelectDocument | undefined;
|
.executeTakeFirst()) as SelectDocument | undefined;
|
||||||
if (!document) {
|
if (!document) {
|
||||||
return undefined;
|
return undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
const node = (await database
|
return this.buildDocumentMetadata(document, metadata.node);
|
||||||
.selectFrom('nodes')
|
|
||||||
.selectAll()
|
|
||||||
.where('id', '=', document.id)
|
|
||||||
.executeTakeFirst()) as SelectNode | undefined;
|
|
||||||
|
|
||||||
return this.buildDocumentMetadata(document, node);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -138,10 +123,9 @@ export class ChunkingService {
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
type: 'node',
|
type: 'node',
|
||||||
metadata: {
|
nodeType: node.attributes.type,
|
||||||
...baseMetadata,
|
fields: 'fields' in node.attributes ? node.attributes.fields : null,
|
||||||
fields: 'fields' in node.attributes ? node.attributes.fields : null,
|
...baseMetadata,
|
||||||
},
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -151,7 +135,6 @@ export class ChunkingService {
|
|||||||
): Promise<DocumentMetadata> {
|
): Promise<DocumentMetadata> {
|
||||||
let baseMetadata: BaseMetadata = {
|
let baseMetadata: BaseMetadata = {
|
||||||
id: document.id,
|
id: document.id,
|
||||||
type: 'document',
|
|
||||||
createdAt: document.created_at,
|
createdAt: document.created_at,
|
||||||
createdBy: document.created_by,
|
createdBy: document.created_by,
|
||||||
};
|
};
|
||||||
@@ -172,14 +155,18 @@ export class ChunkingService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
type: 'document',
|
||||||
|
content: document.content,
|
||||||
|
...baseMetadata,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
type: 'document',
|
type: 'document',
|
||||||
metadata: {
|
content: document.content,
|
||||||
...baseMetadata,
|
...baseMetadata,
|
||||||
content: document.content,
|
|
||||||
},
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -187,30 +174,46 @@ export class ChunkingService {
|
|||||||
const nodeModel = getNodeModel(node.attributes.type);
|
const nodeModel = getNodeModel(node.attributes.type);
|
||||||
const nodeName = nodeModel?.getName(node.id, node.attributes);
|
const nodeName = nodeModel?.getName(node.id, node.attributes);
|
||||||
|
|
||||||
const author = (await database
|
const author = await database
|
||||||
.selectFrom('users')
|
.selectFrom('users')
|
||||||
.select(['id', 'name'])
|
.select(['id', 'name'])
|
||||||
.where('id', '=', node.created_by)
|
.where('id', '=', node.created_by)
|
||||||
.executeTakeFirst()) as SelectUser | undefined;
|
.executeTakeFirst();
|
||||||
|
|
||||||
|
const updatedBy = node.updated_by
|
||||||
|
? await database
|
||||||
|
.selectFrom('users')
|
||||||
|
.select(['id', 'name'])
|
||||||
|
.where('id', '=', node.updated_by)
|
||||||
|
.executeTakeFirst()
|
||||||
|
: undefined;
|
||||||
|
|
||||||
|
const workspace = await database
|
||||||
|
.selectFrom('workspaces')
|
||||||
|
.select(['id', 'name'])
|
||||||
|
.where('id', '=', node.workspace_id)
|
||||||
|
.executeTakeFirst();
|
||||||
|
|
||||||
return {
|
return {
|
||||||
id: node.id,
|
id: node.id,
|
||||||
type: node.attributes.type,
|
name: nodeName ?? '',
|
||||||
name: nodeName ?? undefined,
|
|
||||||
createdAt: node.created_at,
|
createdAt: node.created_at,
|
||||||
createdBy: node.created_by,
|
createdBy: node.created_by,
|
||||||
author: author ?? undefined,
|
author: author ?? undefined,
|
||||||
|
lastUpdated: node.updated_at ?? undefined,
|
||||||
|
updatedBy: updatedBy ?? undefined,
|
||||||
|
workspace: workspace ?? undefined,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
private async buildParentContext(
|
private async buildParentContext(
|
||||||
node: SelectNode
|
node: SelectNode
|
||||||
): Promise<BaseMetadata['parentContext'] | undefined> {
|
): Promise<BaseMetadata['parentContext'] | undefined> {
|
||||||
const parentNode = (await database
|
const parentNode = await database
|
||||||
.selectFrom('nodes')
|
.selectFrom('nodes')
|
||||||
.selectAll()
|
.selectAll()
|
||||||
.where('id', '=', node.parent_id)
|
.where('id', '=', node.parent_id)
|
||||||
.executeTakeFirst()) as SelectNode | undefined;
|
.executeTakeFirst();
|
||||||
|
|
||||||
if (!parentNode) {
|
if (!parentNode) {
|
||||||
return undefined;
|
return undefined;
|
||||||
@@ -226,24 +229,58 @@ export class ChunkingService {
|
|||||||
parentNode.attributes
|
parentNode.attributes
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Get the full path by traversing up the tree
|
||||||
|
const pathNodes = await database
|
||||||
|
.selectFrom('node_paths')
|
||||||
|
.innerJoin('nodes', 'nodes.id', 'node_paths.ancestor_id')
|
||||||
|
.select(['nodes.id', 'nodes.attributes'])
|
||||||
|
.where('node_paths.descendant_id', '=', node.id)
|
||||||
|
.orderBy('node_paths.level', 'asc')
|
||||||
|
.execute();
|
||||||
|
|
||||||
|
const path = pathNodes
|
||||||
|
.map((n) => {
|
||||||
|
const model = getNodeModel(n.attributes.type);
|
||||||
|
return model?.getName(n.id, n.attributes) ?? 'Untitled';
|
||||||
|
})
|
||||||
|
.join(' / ');
|
||||||
|
|
||||||
return {
|
return {
|
||||||
id: parentNode.id,
|
id: parentNode.id,
|
||||||
type: parentNode.attributes.type,
|
type: parentNode.attributes.type,
|
||||||
name: parentName ?? undefined,
|
name: parentName ?? undefined,
|
||||||
|
path,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
private async fetchCollaborators(
|
private async fetchCollaborators(
|
||||||
collaboratorIds: string[]
|
collaboratorIds: string[]
|
||||||
): Promise<Array<{ id: string; name: string }>> {
|
): Promise<Array<{ id: string; name: string; role: string }>> {
|
||||||
if (!collaboratorIds.length) {
|
if (!collaboratorIds.length) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
return database
|
const collaborators = await database
|
||||||
.selectFrom('users')
|
.selectFrom('users')
|
||||||
.select(['id', 'name'])
|
.select(['id', 'name'])
|
||||||
.where('id', 'in', collaboratorIds)
|
.where('id', 'in', collaboratorIds)
|
||||||
.execute() as Promise<Array<{ id: string; name: string }>>;
|
.execute();
|
||||||
|
|
||||||
|
// Get roles for each collaborator
|
||||||
|
return Promise.all(
|
||||||
|
collaborators.map(async (c) => {
|
||||||
|
const collaboration = await database
|
||||||
|
.selectFrom('collaborations')
|
||||||
|
.select(['role'])
|
||||||
|
.where('collaborator_id', '=', c.id)
|
||||||
|
.executeTakeFirst();
|
||||||
|
|
||||||
|
return {
|
||||||
|
id: c.id,
|
||||||
|
name: c.name,
|
||||||
|
role: collaboration?.role ?? 'unknown',
|
||||||
|
};
|
||||||
|
})
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ export class DocumentRetrievalService {
|
|||||||
public async retrieve(
|
public async retrieve(
|
||||||
query: string,
|
query: string,
|
||||||
workspaceId: string,
|
workspaceId: string,
|
||||||
|
userId: string,
|
||||||
limit = configuration.ai.retrieval.hybridSearch.maxResults
|
limit = configuration.ai.retrieval.hybridSearch.maxResults
|
||||||
): Promise<Document[]> {
|
): Promise<Document[]> {
|
||||||
const embedding = await this.embeddings.embedQuery(query);
|
const embedding = await this.embeddings.embedQuery(query);
|
||||||
@@ -30,20 +31,34 @@ export class DocumentRetrievalService {
|
|||||||
const semanticResults = await this.semanticSearch(
|
const semanticResults = await this.semanticSearch(
|
||||||
embedding,
|
embedding,
|
||||||
workspaceId,
|
workspaceId,
|
||||||
|
userId,
|
||||||
|
limit
|
||||||
|
);
|
||||||
|
const keywordResults = await this.keywordSearch(
|
||||||
|
query,
|
||||||
|
workspaceId,
|
||||||
|
userId,
|
||||||
limit
|
limit
|
||||||
);
|
);
|
||||||
const keywordResults = await this.keywordSearch(query, workspaceId, limit);
|
|
||||||
return this.combineSearchResults(semanticResults, keywordResults);
|
return this.combineSearchResults(semanticResults, keywordResults);
|
||||||
}
|
}
|
||||||
|
|
||||||
private async semanticSearch(
|
private async semanticSearch(
|
||||||
embedding: number[],
|
embedding: number[],
|
||||||
workspaceId: string,
|
workspaceId: string,
|
||||||
|
userId: string,
|
||||||
limit: number
|
limit: number
|
||||||
): Promise<SearchResult[]> {
|
): Promise<SearchResult[]> {
|
||||||
const results = await database
|
const results = await database
|
||||||
.selectFrom('document_embeddings')
|
.selectFrom('document_embeddings')
|
||||||
.innerJoin('documents', 'documents.id', 'document_embeddings.document_id')
|
.innerJoin('documents', 'documents.id', 'document_embeddings.document_id')
|
||||||
|
.innerJoin('nodes', 'nodes.id', 'documents.id')
|
||||||
|
.innerJoin('collaborations', (join) =>
|
||||||
|
join
|
||||||
|
.onRef('collaborations.node_id', '=', 'nodes.root_id')
|
||||||
|
.on('collaborations.collaborator_id', '=', sql.lit(userId))
|
||||||
|
.on('collaborations.deleted_at', 'is', null)
|
||||||
|
)
|
||||||
.select((eb) => [
|
.select((eb) => [
|
||||||
'document_embeddings.document_id as id',
|
'document_embeddings.document_id as id',
|
||||||
'document_embeddings.text',
|
'document_embeddings.text',
|
||||||
@@ -77,11 +92,19 @@ export class DocumentRetrievalService {
|
|||||||
private async keywordSearch(
|
private async keywordSearch(
|
||||||
query: string,
|
query: string,
|
||||||
workspaceId: string,
|
workspaceId: string,
|
||||||
|
userId: string,
|
||||||
limit: number
|
limit: number
|
||||||
): Promise<SearchResult[]> {
|
): Promise<SearchResult[]> {
|
||||||
const results = await database
|
const results = await database
|
||||||
.selectFrom('document_embeddings')
|
.selectFrom('document_embeddings')
|
||||||
.innerJoin('documents', 'documents.id', 'document_embeddings.document_id')
|
.innerJoin('documents', 'documents.id', 'document_embeddings.document_id')
|
||||||
|
.innerJoin('nodes', 'nodes.id', 'documents.id')
|
||||||
|
.innerJoin('collaborations', (join) =>
|
||||||
|
join
|
||||||
|
.onRef('collaborations.node_id', '=', 'nodes.root_id')
|
||||||
|
.on('collaborations.collaborator_id', '=', sql.lit(userId))
|
||||||
|
.on('collaborations.deleted_at', 'is', null)
|
||||||
|
)
|
||||||
.select((eb) => [
|
.select((eb) => [
|
||||||
'document_embeddings.document_id as id',
|
'document_embeddings.document_id as id',
|
||||||
'document_embeddings.text',
|
'document_embeddings.text',
|
||||||
|
|||||||
@@ -7,15 +7,11 @@ import { HumanMessage } from '@langchain/core/messages';
|
|||||||
import { configuration } from '@/lib/configuration';
|
import { configuration } from '@/lib/configuration';
|
||||||
import { Document } from '@langchain/core/documents';
|
import { Document } from '@langchain/core/documents';
|
||||||
import { z } from 'zod';
|
import { z } from 'zod';
|
||||||
import { NodeAttributes, NodeType } from '@colanode/core';
|
|
||||||
import type {
|
import type {
|
||||||
ChunkingMetadata,
|
ChunkingMetadata,
|
||||||
NodeMetadata,
|
NodeMetadata,
|
||||||
DocumentMetadata,
|
|
||||||
} from '@/services/chunking-service';
|
} from '@/services/chunking-service';
|
||||||
|
|
||||||
// Use proper Zod schemas and updated prompt templates
|
|
||||||
|
|
||||||
const rerankedDocumentsSchema = z.object({
|
const rerankedDocumentsSchema = z.object({
|
||||||
rankings: z.array(
|
rankings: z.array(
|
||||||
z.object({
|
z.object({
|
||||||
@@ -242,24 +238,13 @@ export async function assessUserIntent(
|
|||||||
: 'retrieve';
|
: 'retrieve';
|
||||||
}
|
}
|
||||||
|
|
||||||
interface NodeContextData {
|
|
||||||
metadata: {
|
|
||||||
type: NodeType;
|
|
||||||
name?: string;
|
|
||||||
author?: { id: string; name: string };
|
|
||||||
parentContext?: {
|
|
||||||
type: string;
|
|
||||||
name?: string;
|
|
||||||
};
|
|
||||||
collaborators?: Array<{ id: string; name: string }>;
|
|
||||||
fields?: Record<string, unknown>;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
const getNodeContextPrompt = (metadata: NodeMetadata): string => {
|
const getNodeContextPrompt = (metadata: NodeMetadata): string => {
|
||||||
const basePrompt = `Given the following context about a {nodeType}:
|
const basePrompt = `Given the following context about a {nodeType}:
|
||||||
Name: {name}
|
Name: {name}
|
||||||
Created by: {authorName}
|
Created by: {authorName} on {createdAt}
|
||||||
|
Last updated: {lastUpdated} by {updatedByName}
|
||||||
|
Location: {path}
|
||||||
|
Workspace: {workspaceName}
|
||||||
{additionalContext}
|
{additionalContext}
|
||||||
|
|
||||||
Full content:
|
Full content:
|
||||||
@@ -274,42 +259,56 @@ Generate a brief (50-100 tokens) contextual prefix that:
|
|||||||
3. Makes the chunk more understandable in isolation
|
3. Makes the chunk more understandable in isolation
|
||||||
Do not repeat the chunk content. Return only the contextual prefix.`;
|
Do not repeat the chunk content. Return only the contextual prefix.`;
|
||||||
|
|
||||||
const getCollaboratorNames = (
|
const getCollaboratorInfo = (
|
||||||
collaborators?: Array<{ id: string; name: string }>
|
collaborators?: Array<{ id: string; name: string; role: string }>
|
||||||
) => collaborators?.map((c) => c.name).join(', ') ?? 'unknown';
|
) => {
|
||||||
|
if (!collaborators?.length) return 'No collaborators';
|
||||||
|
return collaborators.map((c) => `${c.name} (${c.role})`).join(', ');
|
||||||
|
};
|
||||||
|
|
||||||
switch (metadata.metadata.type) {
|
const formatDate = (date?: Date) => {
|
||||||
|
if (!date) return 'unknown';
|
||||||
|
return new Date(date).toLocaleString();
|
||||||
|
};
|
||||||
|
|
||||||
|
switch (metadata.nodeType) {
|
||||||
case 'message':
|
case 'message':
|
||||||
return basePrompt.replace(
|
return basePrompt.replace(
|
||||||
'{additionalContext}',
|
'{additionalContext}',
|
||||||
`In: ${metadata.metadata.parentContext?.type ?? 'unknown'} "${metadata.metadata.parentContext?.name ?? 'unknown'}"
|
`In: ${metadata.parentContext?.type ?? 'unknown'} "${metadata.parentContext?.name ?? 'unknown'}"
|
||||||
Participants: ${getCollaboratorNames(metadata.metadata.collaborators)}`
|
Path: ${metadata.parentContext?.path ?? 'unknown'}
|
||||||
|
Participants: ${getCollaboratorInfo(metadata.collaborators)}`
|
||||||
);
|
);
|
||||||
|
|
||||||
case 'record':
|
case 'record':
|
||||||
return basePrompt.replace(
|
return basePrompt.replace(
|
||||||
'{additionalContext}',
|
'{additionalContext}',
|
||||||
`Database: ${metadata.metadata.parentContext?.name ?? 'unknown'}
|
`Database: ${metadata.parentContext?.name ?? 'unknown'}
|
||||||
Fields: ${Object.keys(metadata.metadata.fields ?? {}).join(', ')}`
|
Path: ${metadata.parentContext?.path ?? 'unknown'}
|
||||||
|
Fields: ${Object.keys(metadata.fields ?? {}).join(', ')}`
|
||||||
);
|
);
|
||||||
|
|
||||||
case 'page':
|
case 'page':
|
||||||
return basePrompt.replace(
|
return basePrompt.replace(
|
||||||
'{additionalContext}',
|
'{additionalContext}',
|
||||||
`Location: ${metadata.metadata.parentContext?.name ? `in ${metadata.metadata.parentContext.name}` : 'root level'}`
|
`Location: ${metadata.parentContext?.path ?? 'root level'}
|
||||||
|
Collaborators: ${getCollaboratorInfo(metadata.collaborators)}`
|
||||||
);
|
);
|
||||||
|
|
||||||
case 'database':
|
case 'database':
|
||||||
return basePrompt.replace(
|
return basePrompt.replace(
|
||||||
'{additionalContext}',
|
'{additionalContext}',
|
||||||
`Fields: ${Object.keys(metadata.metadata.fields ?? {}).join(', ')}`
|
`Path: ${metadata.parentContext?.path ?? 'root level'}
|
||||||
|
Fields: ${Object.keys(metadata.fields ?? {}).join(', ')}
|
||||||
|
Collaborators: ${getCollaboratorInfo(metadata.collaborators)}`
|
||||||
);
|
);
|
||||||
|
|
||||||
case 'channel':
|
case 'channel':
|
||||||
return basePrompt.replace(
|
return basePrompt.replace(
|
||||||
'{additionalContext}',
|
'{additionalContext}',
|
||||||
`Type: Channel
|
`Type: Channel
|
||||||
Members: ${getCollaboratorNames(metadata.metadata.collaborators)}`
|
Path: ${metadata.parentContext?.path ?? 'root level'}
|
||||||
|
Members: ${getCollaboratorInfo(metadata.collaborators)}`
|
||||||
);
|
);
|
||||||
|
|
||||||
default:
|
default:
|
||||||
@@ -321,6 +320,11 @@ interface PromptVariables {
|
|||||||
nodeType: string;
|
nodeType: string;
|
||||||
name: string;
|
name: string;
|
||||||
authorName: string;
|
authorName: string;
|
||||||
|
createdAt: string;
|
||||||
|
lastUpdated: string;
|
||||||
|
updatedByName: string;
|
||||||
|
path: string;
|
||||||
|
workspaceName: string;
|
||||||
fullText: string;
|
fullText: string;
|
||||||
chunk: string;
|
chunk: string;
|
||||||
[key: string]: string;
|
[key: string]: string;
|
||||||
@@ -330,8 +334,10 @@ const documentContextPrompt = PromptTemplate.fromTemplate(
|
|||||||
`Given the following context about a document:
|
`Given the following context about a document:
|
||||||
Type: {nodeType}
|
Type: {nodeType}
|
||||||
Name: {name}
|
Name: {name}
|
||||||
Parent: {parentName}
|
Location: {path}
|
||||||
Created by: {authorName}
|
Created by: {authorName} on {createdAt}
|
||||||
|
Last updated: {lastUpdated} by {updatedByName}
|
||||||
|
Workspace: {workspaceName}
|
||||||
|
|
||||||
Full content:
|
Full content:
|
||||||
{fullText}
|
{fullText}
|
||||||
@@ -362,31 +368,30 @@ export async function addContextToChunk(
|
|||||||
let prompt: string;
|
let prompt: string;
|
||||||
let promptVars: PromptVariables;
|
let promptVars: PromptVariables;
|
||||||
|
|
||||||
|
const formatDate = (date?: Date) => {
|
||||||
|
if (!date) return 'unknown';
|
||||||
|
return new Date(date).toLocaleString();
|
||||||
|
};
|
||||||
|
|
||||||
|
const baseVars = {
|
||||||
|
nodeType: metadata.type === 'node' ? metadata.nodeType : metadata.type,
|
||||||
|
name: metadata.name ?? 'Untitled',
|
||||||
|
authorName: metadata.author?.name ?? 'Unknown',
|
||||||
|
createdAt: formatDate(metadata.createdAt),
|
||||||
|
lastUpdated: formatDate(metadata.lastUpdated),
|
||||||
|
updatedByName: metadata.updatedBy?.name ?? 'Unknown',
|
||||||
|
path: metadata.parentContext?.path ?? 'root level',
|
||||||
|
workspaceName: metadata.workspace?.name ?? 'Unknown Workspace',
|
||||||
|
fullText,
|
||||||
|
chunk,
|
||||||
|
};
|
||||||
|
|
||||||
if (metadata.type === 'node') {
|
if (metadata.type === 'node') {
|
||||||
prompt = getNodeContextPrompt(metadata);
|
prompt = getNodeContextPrompt(metadata);
|
||||||
promptVars = {
|
promptVars = baseVars;
|
||||||
nodeType: metadata.metadata.type,
|
|
||||||
name: metadata.metadata.name ?? 'Untitled',
|
|
||||||
authorName: metadata.metadata.author?.name ?? 'Unknown',
|
|
||||||
fullText,
|
|
||||||
chunk,
|
|
||||||
};
|
|
||||||
} else {
|
} else {
|
||||||
prompt = await documentContextPrompt.format({
|
prompt = await documentContextPrompt.format(baseVars);
|
||||||
nodeType: metadata.metadata.type,
|
promptVars = baseVars;
|
||||||
name: metadata.metadata.name ?? 'Untitled',
|
|
||||||
parentName: metadata.metadata.parentContext?.name ?? 'Unknown',
|
|
||||||
authorName: metadata.metadata.author?.name ?? 'Unknown',
|
|
||||||
fullText,
|
|
||||||
chunk,
|
|
||||||
});
|
|
||||||
promptVars = {
|
|
||||||
nodeType: metadata.metadata.type,
|
|
||||||
name: metadata.metadata.name ?? 'Untitled',
|
|
||||||
authorName: metadata.metadata.author?.name ?? 'Unknown',
|
|
||||||
fullText,
|
|
||||||
chunk,
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const formattedPrompt = Object.entries(promptVars).reduce(
|
const formattedPrompt = Object.entries(promptVars).reduce(
|
||||||
|
|||||||
@@ -52,12 +52,17 @@ export class NodeRetrievalService {
|
|||||||
const results = await database
|
const results = await database
|
||||||
.selectFrom('node_embeddings')
|
.selectFrom('node_embeddings')
|
||||||
.innerJoin('nodes', 'nodes.id', 'node_embeddings.node_id')
|
.innerJoin('nodes', 'nodes.id', 'node_embeddings.node_id')
|
||||||
|
.innerJoin('collaborations', (join) =>
|
||||||
|
join
|
||||||
|
.onRef('collaborations.node_id', '=', 'node_embeddings.root_id')
|
||||||
|
.on('collaborations.collaborator_id', '=', sql.lit(userId))
|
||||||
|
.on('collaborations.deleted_at', 'is', null)
|
||||||
|
)
|
||||||
.select((eb) => [
|
.select((eb) => [
|
||||||
'node_embeddings.node_id as id',
|
'node_embeddings.node_id as id',
|
||||||
'node_embeddings.text',
|
'node_embeddings.text',
|
||||||
'nodes.created_at',
|
'nodes.created_at',
|
||||||
'node_embeddings.chunk as chunk_index',
|
'node_embeddings.chunk as chunk_index',
|
||||||
// Wrap raw expression to satisfy type:
|
|
||||||
sql<number>`('[${embedding.join(',')}]'::vector) <=> node_embeddings.embedding_vector`.as(
|
sql<number>`('[${embedding.join(',')}]'::vector) <=> node_embeddings.embedding_vector`.as(
|
||||||
'similarity'
|
'similarity'
|
||||||
),
|
),
|
||||||
@@ -92,6 +97,12 @@ export class NodeRetrievalService {
|
|||||||
const results = await database
|
const results = await database
|
||||||
.selectFrom('node_embeddings')
|
.selectFrom('node_embeddings')
|
||||||
.innerJoin('nodes', 'nodes.id', 'node_embeddings.node_id')
|
.innerJoin('nodes', 'nodes.id', 'node_embeddings.node_id')
|
||||||
|
.innerJoin('collaborations', (join) =>
|
||||||
|
join
|
||||||
|
.onRef('collaborations.node_id', '=', 'node_embeddings.root_id')
|
||||||
|
.on('collaborations.collaborator_id', '=', sql.lit(userId))
|
||||||
|
.on('collaborations.deleted_at', 'is', null)
|
||||||
|
)
|
||||||
.select((eb) => [
|
.select((eb) => [
|
||||||
'node_embeddings.node_id as id',
|
'node_embeddings.node_id as id',
|
||||||
'node_embeddings.text',
|
'node_embeddings.text',
|
||||||
|
|||||||
Reference in New Issue
Block a user