core: add support for fields in search (#8261)

* core: add support for field based searching

* core: add more fields

* core: add `tagged`, `colored`, `in_notebook` fields
This commit is contained in:
Abdullah Atta
2025-06-27 12:24:10 +05:00
parent 7ce44f4d84
commit e36fd28f47
2 changed files with 494 additions and 152 deletions

View File

@@ -30,12 +30,18 @@ import {
TrashItem TrashItem
} from "../types.js"; } from "../types.js";
import { DatabaseSchema, RawDatabaseSchema } from "../database/index.js"; import { DatabaseSchema, RawDatabaseSchema } from "../database/index.js";
import { AnyColumnWithTable, Kysely, sql } from "@streetwriters/kysely"; import {
AnyColumnWithTable,
ExpressionBuilder,
Kysely,
SelectQueryBuilder,
sql
} from "@streetwriters/kysely";
import { FilteredSelector } from "../database/sql-collection.js"; import { FilteredSelector } from "../database/sql-collection.js";
import { VirtualizedGrouping } from "../utils/virtualized-grouping.js"; import { VirtualizedGrouping } from "../utils/virtualized-grouping.js";
import { logger } from "../logger.js"; import { logger } from "../logger.js";
import { rebuildSearchIndex } from "../database/fts.js"; import { rebuildSearchIndex } from "../database/fts.js";
import { transformQuery } from "../utils/query-transformer.js"; import { QueryTokens, transformQuery } from "../utils/query-transformer.js";
import { getSortSelectors, groupArray } from "../utils/grouping.js"; import { getSortSelectors, groupArray } from "../utils/grouping.js";
import { fuzzy } from "../utils/fuzzy.js"; import { fuzzy } from "../utils/fuzzy.js";
import { extractMatchingBlocks } from "../utils/html-parser.js"; import { extractMatchingBlocks } from "../utils/html-parser.js";
@@ -69,38 +75,36 @@ export default class Lookup {
return this.toSearchResults(async (limit, sortOptions) => { return this.toSearchResults(async (limit, sortOptions) => {
const excludedIds = this.db.trash.cache.notes; const excludedIds = this.db.trash.cache.notes;
const { query: transformedQuery, tokens } = transformQuery(query); const { content, title } = transformQuery(query);
const resultsA: string[] = const ftsResults: string[] =
transformedQuery.length === 0 (await this.ftsQueryBuilder(
? [] { content: content?.query, title: title?.query },
: await this.ftsQueryBuilder(transformedQuery, excludedIds, notes) excludedIds,
.select(["results.id"]) notes
.groupBy("results.id")
.orderBy(
sql`SUM(results.rank)`,
sortOptions?.sortDirection || "desc"
) )
?.select(["results.id"])
.groupBy("results.id")
.orderBy(sql`SUM(results.rank)`, sortOptions?.sortDirection || "desc")
.execute() .execute()
.catch((e) => { .catch((e) => {
logger.error(e, `Error while searching`, { query }); logger.error(e, `Error while searching`, { query });
return []; return [];
}) })
.then((r) => r.map((r) => r.id)); .then((r) => r.map((r) => r.id))) || [];
const smallTokens = [...tokens.andTokens, ...tokens.orTokens].filter( const regexMatches = await this.regexQueryBuilder(
(token) => token.length < 3 {
); content: filterSmallTokens(content?.tokens),
if (smallTokens.length === 0) return resultsA; title: filterSmallTokens(title?.tokens)
},
const results = await this.regexQueryBuilder( (!!content || !!title) && ftsResults.length > 0 ? ftsResults : notes
smallTokens,
!!transformedQuery && resultsA.length > 0 ? resultsA : notes
) )
.select("results.id") ?.select("results.id")
.execute(); .execute();
if (!regexMatches) return ftsResults;
return results.map((r) => r.id); return regexMatches.map((r) => r.id);
}, notes || this.db.notes.all); }, notes || this.db.notes.all);
} }
@@ -112,34 +116,128 @@ export default class Lookup {
const db = this.db.sql() as unknown as Kysely<RawDatabaseSchema>; const db = this.db.sql() as unknown as Kysely<RawDatabaseSchema>;
const excludedIds = this.db.trash.cache.notes; const excludedIds = this.db.trash.cache.notes;
const { query: transformedQuery, tokens } = transformQuery(query); const {
content,
title,
tag,
color,
archived,
favorite,
locked,
pinned,
readonly,
created_before,
created_after,
edited_after,
edited_before,
colored,
tagged,
in_notebook,
filters
} = transformQuery(query);
if (filters > 0) {
const tagIds = tagged
? await this.db.tags.all.ids()
: tag?.length
? await this.db.tags.all
.where((eb) => eb("tags.title", "in", tag))
.ids()
: [];
const colorIds = colored
? await this.db.colors.all.ids()
: color?.length
? await this.db.colors.all
.where((eb) => eb("colors.title", "in", color))
.ids()
: [];
const notebookIds =
typeof in_notebook === "boolean"
? await this.db.notebooks.all.ids()
: [];
const defaultVault = await this.db.vaults.default();
notes = notes.where((eb) => {
const exprs = [];
const tagsFilter = this.db.relations
.from({ ids: tagIds, type: "tag" }, "note")
.selector.filter.select("id");
const colorsFilter = this.db.relations
.from({ ids: colorIds, type: "color" }, "note")
.selector.filter.select("id");
if (typeof tagged === "boolean")
exprs.push(eb("notes.id", tagged ? "in" : "not in", tagsFilter));
else if (tagIds.length > 0)
exprs.push(eb("notes.id", "in", tagsFilter));
if (typeof colored === "boolean")
exprs.push(eb("notes.id", colored ? "in" : "not in", colorsFilter));
else if (colorIds.length > 0)
exprs.push(eb("notes.id", "in", colorsFilter));
if (typeof in_notebook === "boolean")
exprs.push(
eb(
"notes.id",
in_notebook ? "in" : "not in",
this.db.relations
.from({ ids: notebookIds, type: "notebook" }, "note")
.selector.filter.select("id")
)
);
if (typeof locked === "boolean" && defaultVault) {
const filter = this.db.relations
.from(defaultVault, "note")
.selector.filter.select("id");
exprs.push(eb("notes.id", locked ? "in" : "not in", filter));
}
if (typeof archived === "boolean")
exprs.push(eb("notes.archived", "==", archived));
if (typeof favorite === "boolean")
exprs.push(eb("notes.favorite", "==", favorite));
if (typeof pinned === "boolean")
exprs.push(eb("notes.pinned", "==", pinned));
if (typeof readonly === "boolean")
exprs.push(eb("notes.readonly", "==", readonly));
if (typeof created_after === "number")
exprs.push(eb("notes.dateCreated", ">", created_after));
if (typeof created_before === "number")
exprs.push(eb("notes.dateCreated", "<", created_before));
if (typeof edited_after === "number")
exprs.push(eb("notes.dateEdited", ">", edited_after));
if (typeof edited_before === "number")
exprs.push(eb("notes.dateEdited", "<", edited_before));
return eb.and(exprs);
});
}
console.time("gather matches"); console.time("gather matches");
const ftsResults = const ftsResults =
transformedQuery.length <= 0 (await this.ftsQueryBuilder(
? [] { content: content?.query, title: title?.query },
: await this.ftsQueryBuilder(transformedQuery, excludedIds, notes) excludedIds,
.select(["id", "type", "rank"]) notes
)
?.select(["id", "type", "rank"])
.execute() .execute()
.catch((e) => { .catch((e) => {
logger.error(e, `Error while searching`, { query }); logger.error(e, `Error while searching`, { query });
return []; return [];
}); })) || [];
const smallTokens = [...tokens.andTokens, ...tokens.orTokens].filter(
(token) => token.length < 3
);
const ftsIds = ftsResults.map((r) => r.id); const ftsIds = ftsResults.map((r) => r.id);
const regexMatches = const regexMatches =
smallTokens.length > 0 (await this.regexQueryBuilder(
? await this.regexQueryBuilder( {
smallTokens, content: filterSmallTokens(content?.tokens),
!!transformedQuery && ftsIds.length > 0 ? ftsIds : notes title: filterSmallTokens(title?.tokens)
},
(!!content || !!title) && ftsIds.length > 0 ? ftsIds : notes
) )
.select(["results.id", "results.type", sql<number>`1`.as("rank")]) ?.select(["results.id", "results.type", sql<number>`1`.as("rank")])
.execute() .execute()) || [];
: [];
console.timeEnd("gather matches"); console.timeEnd("gather matches");
console.time("sorting matches"); console.time("sorting matches");
@@ -190,16 +288,21 @@ export default class Lookup {
} }
console.timeEnd("sorting matches"); console.timeEnd("sorting matches");
const andTokens = tokens.andTokens.map((t) => const isQueryless = !matches.ids.length && filters > 0;
t.replace(/"(.+)"/g, "$1").toLowerCase() if (isQueryless) {
); const ids = await notes.items(undefined, sortOptions);
const orTokens = tokens.orTokens.map((t) => for (const { id } of ids) {
t.replace(/"(.+)"/g, "$1").toLowerCase() matches.values.push({
); id,
const notTokens = tokens.notTokens.map((t) => rank: 1,
t.replace(/"(.+)"/g, "$1").toLowerCase() types: ["title"]
); });
const allTokens = [...andTokens, ...orTokens]; matches.ids.push(id);
}
}
const titleTokens = transformTokens(title?.tokens);
const contentTokens = transformTokens(content?.tokens);
return new VirtualizedGrouping<HighlightedResult>( return new VirtualizedGrouping<HighlightedResult>(
matches.ids.length, matches.ids.length,
@@ -224,7 +327,7 @@ export default class Lookup {
})); }));
const titles = const titles =
titleMatches.length > 0 titleMatches.length > 0 && !isQueryless
? await db ? await db
.selectFrom("notes") .selectFrom("notes")
.where("id", "in", titleMatches) .where("id", "in", titleMatches)
@@ -235,14 +338,9 @@ export default class Lookup {
for (const title of titles) { for (const title of titles) {
const { text: highlighted } = highlightQueries( const { text: highlighted } = highlightQueries(
title.title || "", title.title || "",
allTokens titleTokens.allTokens
);
const hasMatches = textContainsTokens(
highlighted,
andTokens,
orTokens,
notTokens
); );
const hasMatches = textContainsTokens(highlighted, titleTokens);
const result = results.find((c) => c.id === title.id); const result = results.find((c) => c.id === title.id);
if (!result) continue; if (!result) continue;
result.title = hasMatches result.title = hasMatches
@@ -251,7 +349,7 @@ export default class Lookup {
} }
const htmls = const htmls =
contentMatches.length > 0 contentMatches.length > 0 && !isQueryless
? await db ? await db
.selectFrom("content") .selectFrom("content")
.where("noteId", "in", contentMatches) .where("noteId", "in", contentMatches)
@@ -264,9 +362,11 @@ export default class Lookup {
const result = results.find((r) => r.id === html.id); const result = results.find((r) => r.id === html.id);
if (!result) continue; if (!result) continue;
const highlighted = highlightHtmlContent(html.data, allTokens); const highlighted = highlightHtmlContent(
if (!textContainsTokens(highlighted, andTokens, orTokens, notTokens)) html.data,
continue; contentTokens.allTokens
);
if (!textContainsTokens(highlighted, contentTokens)) continue;
result.content = extractMatchingBlocks( result.content = extractMatchingBlocks(
highlighted, highlighted,
MATCH_TAG_NAME MATCH_TAG_NAME
@@ -278,7 +378,11 @@ export default class Lookup {
} }
const resultsWithMissingTitle = results const resultsWithMissingTitle = results
.filter((r) => !r.title.length && r.content.length > 0) .filter(
isQueryless
? (r) => !r.title.length
: (r) => !r.title.length && r.content.length > 0
)
.map((r) => r.id); .map((r) => r.id);
if (resultsWithMissingTitle.length > 0) { if (resultsWithMissingTitle.length > 0) {
@@ -296,7 +400,9 @@ export default class Lookup {
for (const result of results) { for (const result of results) {
result.content.sort( result.content.sort(
(a, b) => getMatchScore(b, allTokens) - getMatchScore(a, allTokens) (a, b) =>
getMatchScore(b, contentTokens.allTokens) -
getMatchScore(a, contentTokens.allTokens)
); );
} }
@@ -323,14 +429,23 @@ export default class Lookup {
} }
private ftsQueryBuilder( private ftsQueryBuilder(
query: string, queries: {
title?: string;
content?: string;
},
excludedIds: string[] = [], excludedIds: string[] = [],
filter?: FilteredSelector<Note> filter?: FilteredSelector<Note>
) { ) {
if (!queries.content && !queries.title) return;
const db = this.db.sql() as unknown as Kysely<RawDatabaseSchema>; const db = this.db.sql() as unknown as Kysely<RawDatabaseSchema>;
return db.selectFrom((eb) => function buildTitleQuery(
eb eb:
| Kysely<RawDatabaseSchema>
| ExpressionBuilder<RawDatabaseSchema, never>
) {
return eb
.selectFrom("notes_fts") .selectFrom("notes_fts")
.$if(!!filter, (eb) => .$if(!!filter, (eb) =>
eb.where("id", "in", filter!.filter.select("id")) eb.where("id", "in", filter!.filter.select("id"))
@@ -338,11 +453,17 @@ export default class Lookup {
.$if(excludedIds.length > 0, (eb) => .$if(excludedIds.length > 0, (eb) =>
eb.where("id", "not in", excludedIds) eb.where("id", "not in", excludedIds)
) )
.where("title", "match", query) .where("title", "match", queries.title)
.where("rank", "=", sql<number>`'bm25(1.0, 10.0)'`) .where("rank", "=", sql<number>`'bm25(1.0, 10.0)'`)
.select(["id", "rank", sql<string>`'title'`.as("type")]) .select(["id", "rank", sql<string>`'title'`.as("type")]);
.unionAll((eb) => }
eb
function buildContentQuery(
eb:
| Kysely<RawDatabaseSchema>
| ExpressionBuilder<RawDatabaseSchema, never>
) {
return eb
.selectFrom("content_fts") .selectFrom("content_fts")
.$if(!!filter, (eb) => .$if(!!filter, (eb) =>
eb.where("noteId", "in", filter!.filter.select("id")) eb.where("noteId", "in", filter!.filter.select("id"))
@@ -350,28 +471,49 @@ export default class Lookup {
.$if(excludedIds.length > 0, (eb) => .$if(excludedIds.length > 0, (eb) =>
eb.where("noteId", "not in", excludedIds) eb.where("noteId", "not in", excludedIds)
) )
.where("data", "match", query) .where("data", "match", queries.content)
.where("rank", "=", sql<number>`'bm25(1.0, 1.0, 10.0)'`) .where("rank", "=", sql<number>`'bm25(1.0, 1.0, 10.0)'`)
.select(["noteId as id", "rank", sql<string>`'content'`.as("type")]) .select(["noteId as id", "rank", sql<string>`'content'`.as("type")])
.$castTo<{ .$castTo<{
id: string; id: string;
rank: number; rank: number;
type: "content" | "title"; type: "content" | "title";
}>() }>();
) }
if (queries.content && queries.title)
return db.selectFrom((eb) =>
buildTitleQuery(eb)
.unionAll((eb) => buildContentQuery(eb))
.as("results") .as("results")
); );
else if (queries.content)
return db.selectFrom((eb) => buildContentQuery(eb).as("results"));
else if (queries.title)
return db.selectFrom((eb) => buildTitleQuery(eb).as("results"));
} }
private regexQueryBuilder( private regexQueryBuilder(
queries: string[], queries: {
title?: string[];
content?: string[];
},
ids?: string[] | FilteredSelector<Note> ids?: string[] | FilteredSelector<Note>
) { ) {
const regex = queries if (!queries.content?.length && !queries.title?.length) return;
const buildRegex = (queries: string[]) =>
queries
.filter((q) => q && q.length > 0) .filter((q) => q && q.length > 0)
.map((q) => q.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")) .map((q) => q.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"))
.join("|"); .join("|");
return this.db.sql().selectFrom((eb) =>
eb function buildTitleQuery(
eb: Kysely<DatabaseSchema> | ExpressionBuilder<DatabaseSchema, never>,
queries: string[]
) {
const regex = buildRegex(queries);
return eb
.selectFrom("notes") .selectFrom("notes")
.$if(!!ids, (eb) => .$if(!!ids, (eb) =>
eb.where( eb.where(
@@ -381,9 +523,15 @@ export default class Lookup {
) )
) )
.where("title", "regexp", sql<string>`${regex}`) .where("title", "regexp", sql<string>`${regex}`)
.select(["id", sql<string>`'title'`.as("type")]) .select(["id", sql<string>`'title'`.as("type")]);
.unionAll((eb) => }
eb
function buildContentQuery(
eb: Kysely<DatabaseSchema> | ExpressionBuilder<DatabaseSchema, never>,
queries: string[]
) {
const regex = buildRegex(queries);
return eb
.selectFrom("content") .selectFrom("content")
.where("content.locked", "!=", true) .where("content.locked", "!=", true)
.$if(!!ids, (eb) => .$if(!!ids, (eb) =>
@@ -398,10 +546,25 @@ export default class Lookup {
.$castTo<{ .$castTo<{
id: string; id: string;
type: "content" | "title"; type: "content" | "title";
}>() }>();
) }
if (queries.content && queries.title)
return this.db.sql().selectFrom((eb) =>
buildTitleQuery(eb, queries.title!)
.unionAll((eb) => buildContentQuery(eb, queries.content!))
.as("results") .as("results")
); );
else if (queries.content)
return this.db
.sql()
.selectFrom((eb) =>
buildContentQuery(eb, queries.content!).as("results")
);
else if (queries.title)
return this.db
.sql()
.selectFrom((eb) => buildTitleQuery(eb, queries.title!).as("results"));
} }
notebooks(query: string) { notebooks(query: string) {
@@ -985,25 +1148,53 @@ function getMatchScore(
return score; return score;
} }
function textContainsTokens( function textContainsTokens(text: string, tokens: QueryTokens) {
text: string,
andTokens: string[],
orTokens: string[],
notTokens: string[]
) {
const lowerCasedText = text.toLowerCase(); const lowerCasedText = text.toLowerCase();
if ( if (
!notTokens.every( !tokens.notTokens.every(
(t) => !lowerCasedText.includes(`${MATCH_TAG_OPEN}${t}${MATCH_TAG_CLOSE}`) (t) => !lowerCasedText.includes(`${MATCH_TAG_OPEN}${t}${MATCH_TAG_CLOSE}`)
) )
) )
return false; return false;
return ( return (
andTokens.every((t) => tokens.andTokens.every((t) =>
lowerCasedText.includes(`${MATCH_TAG_OPEN}${t}${MATCH_TAG_CLOSE}`) lowerCasedText.includes(`${MATCH_TAG_OPEN}${t}${MATCH_TAG_CLOSE}`)
) || ) ||
orTokens.some((t) => tokens.orTokens.some((t) =>
lowerCasedText.includes(`${MATCH_TAG_OPEN}${t}${MATCH_TAG_CLOSE}`) lowerCasedText.includes(`${MATCH_TAG_OPEN}${t}${MATCH_TAG_CLOSE}`)
) )
); );
} }
function filterSmallTokens(tokens: QueryTokens | undefined) {
if (!tokens) return;
return [...tokens.andTokens, ...tokens.orTokens].filter(
(token) => token.length < 3
);
}
function transformTokens(tokens: QueryTokens | undefined) {
if (!tokens)
return {
andTokens: [],
orTokens: [],
notTokens: [],
allTokens: []
};
const andTokens = tokens.andTokens.map((t) =>
t.replace(/"(.+)"/g, "$1").toLowerCase()
);
const orTokens = tokens.orTokens.map((t) =>
t.replace(/"(.+)"/g, "$1").toLowerCase()
);
const notTokens = tokens.notTokens.map((t) =>
t.replace(/"(.+)"/g, "$1").toLowerCase()
);
return {
andTokens,
orTokens,
notTokens,
allTokens: [...andTokens, ...orTokens]
};
}

View File

@@ -17,7 +17,10 @@ You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
type ASTNode = QueryNode | PhraseNode | OperatorNode; import dayjs from "dayjs";
import { servicesVersion } from "typescript";
type ASTNode = QueryNode | PhraseNode | OperatorNode | FieldPhraseNode;
type QueryNode = { type QueryNode = {
type: "query"; type: "query";
@@ -29,10 +32,90 @@ type PhraseNode = {
value: string[]; value: string[];
}; };
type FieldPhraseNode = {
type: "field_phrase";
field: string;
value: QueryNode;
};
type OperatorNode = { type OperatorNode = {
type: "AND" | "OR" | "NOT"; type: "AND" | "OR" | "NOT";
}; };
const SUPPORTED_FIELDS = {
title: (ast) => {
const node =
ast.find((a) => a.type === "field_phrase" && a.field === "title") ||
ast.find((a) => a.type === "query");
return node && serializeQuery(node);
},
content: (ast) => {
const node =
ast.find((a) => a.type === "field_phrase" && a.field === "content") ||
ast.find((a) => a.type === "query");
return node && serializeQuery(node);
},
// array
tag: (ast) => parseArrayField("tag", ast),
color: (ast) => parseArrayField("color", ast),
// date
edited_before: (ast) => parseDateField("edited_before", ast),
edited_after: (ast) => parseDateField("edited_after", ast),
created_before: (ast) => parseDateField("created_before", ast),
created_after: (ast) => parseDateField("created_after", ast),
// boolean
pinned: (ast) => parseBooleanField("pinned", ast),
locked: (ast) => parseBooleanField("locked", ast),
readonly: (ast) => parseBooleanField("readonly", ast),
favorite: (ast) => parseBooleanField("favorite", ast),
archived: (ast) => parseBooleanField("archived", ast),
tagged: (ast) => parseBooleanField("tagged", ast),
colored: (ast) => parseBooleanField("colored", ast),
in_notebook: (ast) => parseBooleanField("in_notebook", ast)
} satisfies Record<string, (ast: (QueryNode | FieldPhraseNode)[]) => unknown>;
function isFieldSupported(field: string) {
return field in SUPPORTED_FIELDS;
}
function parseBooleanField(
field: string,
ast: (QueryNode | FieldPhraseNode)[]
): boolean | null {
const node = ast.find(
(a): a is FieldPhraseNode => a.type === "field_phrase" && a.field === field
);
const sql = node ? generateSQL(node.value) : "";
return sql === "false" ? false : sql === "true" ? true : null;
}
function parseArrayField(
field: string,
ast: (QueryNode | FieldPhraseNode)[]
): string[] | null {
const values = ast
.filter(
(a): a is FieldPhraseNode =>
a.type === "field_phrase" && a.field === field
)
.map((a) => generateSQL(a.value));
return values.length > 0 ? values : null;
}
function parseDateField(
field: string,
ast: (QueryNode | FieldPhraseNode)[]
): number | null {
const node = ast.find(
(a): a is FieldPhraseNode => a.type === "field_phrase" && a.field === field
);
const date = node ? dayjs(generateSQL(node.value)) : null;
return date?.isValid() ? date.toDate().getTime() : null;
}
const INVALID_QUERY_REGEX = /[!"#$%&'()*+,\-./:;<>=?@[\\\]^_`{|}~§]/; const INVALID_QUERY_REGEX = /[!"#$%&'()*+,\-./:;<>=?@[\\\]^_`{|}~§]/;
function escapeSQLString(str: string): string { function escapeSQLString(str: string): string {
if (str.startsWith('"') && str.endsWith('"')) { if (str.startsWith('"') && str.endsWith('"')) {
@@ -63,10 +146,13 @@ function escapeSQLString(str: string): string {
return str.replace(/"/g, '""'); return str.replace(/"/g, '""');
} }
function tokenize(query: string): string[] { function tokenizeWithFields(
const tokens: string[] = []; query: string
): Array<{ field?: string; token: string }> {
const tokens: Array<{ field?: string; token: string }> = [];
let buffer = ""; let buffer = "";
let isQuoted = false; let isQuoted = false;
let currentField: string | undefined = undefined;
for (let i = 0; i < query.length; ++i) { for (let i = 0; i < query.length; ++i) {
const char = query[i]; const char = query[i];
@@ -75,19 +161,51 @@ function tokenize(query: string): string[] {
} }
if (char === " " && !isQuoted) { if (char === " " && !isQuoted) {
if (buffer.length > 0) { if (buffer.length > 0) {
tokens.push(buffer); tokens.push({ field: currentField, token: buffer });
buffer = ""; buffer = "";
} }
} else if (char === ":" && !isQuoted) {
// Check for field
const maybeField = buffer.trim().toLowerCase();
if (isFieldSupported(maybeField)) {
currentField = maybeField;
buffer = "";
} else {
buffer += char;
}
} else { } else {
buffer += char; buffer += char;
} }
} }
if (buffer.length > 0) tokens.push(buffer); if (buffer.length > 0) tokens.push({ field: currentField, token: buffer });
return tokens; return tokens;
} }
function parseTokens(tokens: string[]): QueryNode { // Helper: group tokens by field
function groupTokensByField(tokens: Array<{ field?: string; token: string }>) {
const groups: Array<{ field?: string; tokens: string[] }> = [];
let currentField: string | undefined = undefined;
let currentTokens: string[] = [];
for (const { field, token } of tokens) {
if (field !== currentField) {
if (currentTokens.length > 0) {
groups.push({ field: currentField, tokens: currentTokens });
currentTokens = [];
}
currentField = field;
}
currentTokens.push(token);
}
if (currentTokens.length > 0) {
groups.push({ field: currentField, tokens: currentTokens });
}
return groups;
}
// Parse a group of tokens into a QueryNode (handles boolean ops, etc)
function parseTokensToQueryNode(tokens: string[]): QueryNode {
const ast: QueryNode = { type: "query", children: [] }; const ast: QueryNode = { type: "query", children: [] };
let currentPhrase: string[] = []; let currentPhrase: string[] = [];
@@ -102,15 +220,13 @@ function parseTokens(tokens: string[]): QueryNode {
currentPhrase.push(token); currentPhrase.push(token);
} }
} }
if (currentPhrase.length > 0) { if (currentPhrase.length > 0) {
ast.children.push({ type: "phrase", value: currentPhrase }); ast.children.push({ type: "phrase", value: currentPhrase });
} }
return ast; return ast;
} }
function transformAST(ast: QueryNode): QueryNode { function transformQueryNode(ast: QueryNode): QueryNode {
const transformedAST: QueryNode = { ...ast, children: [] }; const transformedAST: QueryNode = { ...ast, children: [] };
let lastWasPhrase = false; let lastWasPhrase = false;
@@ -150,7 +266,7 @@ function generateSQL(ast: QueryNode): string {
return ast.children return ast.children
.map((child) => { .map((child) => {
if (child.type === "phrase") { if (child.type === "phrase") {
return child.value.join(" AND "); return child.value.filter((v) => v.length >= 3).join(" AND ");
} }
if (child.type === "AND" || child.type === "OR" || child.type === "NOT") { if (child.type === "AND" || child.type === "OR" || child.type === "NOT") {
return child.type; return child.type;
@@ -160,18 +276,53 @@ function generateSQL(ast: QueryNode): string {
.join(" "); .join(" ");
} }
export function transformQuery(query: string) { // Main transformer: returns (QueryNode | FieldPhraseNode)[]
const tokens = tokenize(query); export function transformQuery(query: string): {
const largeTokens = tokens.filter( [K in keyof typeof SUPPORTED_FIELDS]?: ReturnType<
(token) => token.length >= 3 || token === "OR" (typeof SUPPORTED_FIELDS)[K]
); >;
} & { filters: number } {
const tokens = tokenizeWithFields(query);
const groups = groupTokensByField(tokens);
const ast = groups.map((group) => {
const node = parseTokensToQueryNode(group.tokens);
const transformedNode = transformQueryNode(node);
if (group.field) {
return { return {
query: generateSQL(transformAST(parseTokens(largeTokens))), type: "field_phrase",
tokens: tokenizeAst(transformAST(parseTokens(tokens))) field: group.field,
value: transformedNode
} as FieldPhraseNode;
} else {
return transformedNode;
}
});
let filters = 0;
const fields = Object.fromEntries(
Object.entries(SUPPORTED_FIELDS).map(([key, field]) => {
const value = field(ast);
if (
value !== null &&
value !== undefined &&
!["content", "title"].includes(key)
)
filters++;
return [key, value];
})
);
return { ...fields, filters };
}
function serializeQuery(node: QueryNode | FieldPhraseNode) {
return {
query: generateSQL(node.type === "query" ? node : node.value),
tokens: tokenizeAst(node.type === "query" ? node : node.value)
}; };
} }
interface QueryTokens { export interface QueryTokens {
andTokens: string[]; andTokens: string[];
orTokens: string[]; orTokens: string[];
notTokens: string[]; notTokens: string[];