mirror of
https://github.com/streetwriters/notesnook.git
synced 2025-12-21 14:09:34 +01:00
core: add support for fields in search (#8261)
* core: add support for field based searching * core: add more fields * core: add `tagged`, `colored`, `in_notebook` fields
This commit is contained in:
@@ -30,12 +30,18 @@ import {
|
||||
TrashItem
|
||||
} from "../types.js";
|
||||
import { DatabaseSchema, RawDatabaseSchema } from "../database/index.js";
|
||||
import { AnyColumnWithTable, Kysely, sql } from "@streetwriters/kysely";
|
||||
import {
|
||||
AnyColumnWithTable,
|
||||
ExpressionBuilder,
|
||||
Kysely,
|
||||
SelectQueryBuilder,
|
||||
sql
|
||||
} from "@streetwriters/kysely";
|
||||
import { FilteredSelector } from "../database/sql-collection.js";
|
||||
import { VirtualizedGrouping } from "../utils/virtualized-grouping.js";
|
||||
import { logger } from "../logger.js";
|
||||
import { rebuildSearchIndex } from "../database/fts.js";
|
||||
import { transformQuery } from "../utils/query-transformer.js";
|
||||
import { QueryTokens, transformQuery } from "../utils/query-transformer.js";
|
||||
import { getSortSelectors, groupArray } from "../utils/grouping.js";
|
||||
import { fuzzy } from "../utils/fuzzy.js";
|
||||
import { extractMatchingBlocks } from "../utils/html-parser.js";
|
||||
@@ -69,38 +75,36 @@ export default class Lookup {
|
||||
return this.toSearchResults(async (limit, sortOptions) => {
|
||||
const excludedIds = this.db.trash.cache.notes;
|
||||
|
||||
const { query: transformedQuery, tokens } = transformQuery(query);
|
||||
const { content, title } = transformQuery(query);
|
||||
|
||||
const resultsA: string[] =
|
||||
transformedQuery.length === 0
|
||||
? []
|
||||
: await this.ftsQueryBuilder(transformedQuery, excludedIds, notes)
|
||||
.select(["results.id"])
|
||||
.groupBy("results.id")
|
||||
.orderBy(
|
||||
sql`SUM(results.rank)`,
|
||||
sortOptions?.sortDirection || "desc"
|
||||
)
|
||||
.execute()
|
||||
.catch((e) => {
|
||||
logger.error(e, `Error while searching`, { query });
|
||||
return [];
|
||||
})
|
||||
.then((r) => r.map((r) => r.id));
|
||||
const ftsResults: string[] =
|
||||
(await this.ftsQueryBuilder(
|
||||
{ content: content?.query, title: title?.query },
|
||||
excludedIds,
|
||||
notes
|
||||
)
|
||||
?.select(["results.id"])
|
||||
.groupBy("results.id")
|
||||
.orderBy(sql`SUM(results.rank)`, sortOptions?.sortDirection || "desc")
|
||||
.execute()
|
||||
.catch((e) => {
|
||||
logger.error(e, `Error while searching`, { query });
|
||||
return [];
|
||||
})
|
||||
.then((r) => r.map((r) => r.id))) || [];
|
||||
|
||||
const smallTokens = [...tokens.andTokens, ...tokens.orTokens].filter(
|
||||
(token) => token.length < 3
|
||||
);
|
||||
if (smallTokens.length === 0) return resultsA;
|
||||
|
||||
const results = await this.regexQueryBuilder(
|
||||
smallTokens,
|
||||
!!transformedQuery && resultsA.length > 0 ? resultsA : notes
|
||||
const regexMatches = await this.regexQueryBuilder(
|
||||
{
|
||||
content: filterSmallTokens(content?.tokens),
|
||||
title: filterSmallTokens(title?.tokens)
|
||||
},
|
||||
(!!content || !!title) && ftsResults.length > 0 ? ftsResults : notes
|
||||
)
|
||||
.select("results.id")
|
||||
?.select("results.id")
|
||||
.execute();
|
||||
if (!regexMatches) return ftsResults;
|
||||
|
||||
return results.map((r) => r.id);
|
||||
return regexMatches.map((r) => r.id);
|
||||
}, notes || this.db.notes.all);
|
||||
}
|
||||
|
||||
@@ -112,34 +116,128 @@ export default class Lookup {
|
||||
const db = this.db.sql() as unknown as Kysely<RawDatabaseSchema>;
|
||||
const excludedIds = this.db.trash.cache.notes;
|
||||
|
||||
const { query: transformedQuery, tokens } = transformQuery(query);
|
||||
const {
|
||||
content,
|
||||
title,
|
||||
tag,
|
||||
color,
|
||||
archived,
|
||||
favorite,
|
||||
locked,
|
||||
pinned,
|
||||
readonly,
|
||||
created_before,
|
||||
created_after,
|
||||
edited_after,
|
||||
edited_before,
|
||||
colored,
|
||||
tagged,
|
||||
in_notebook,
|
||||
filters
|
||||
} = transformQuery(query);
|
||||
|
||||
if (filters > 0) {
|
||||
const tagIds = tagged
|
||||
? await this.db.tags.all.ids()
|
||||
: tag?.length
|
||||
? await this.db.tags.all
|
||||
.where((eb) => eb("tags.title", "in", tag))
|
||||
.ids()
|
||||
: [];
|
||||
const colorIds = colored
|
||||
? await this.db.colors.all.ids()
|
||||
: color?.length
|
||||
? await this.db.colors.all
|
||||
.where((eb) => eb("colors.title", "in", color))
|
||||
.ids()
|
||||
: [];
|
||||
const notebookIds =
|
||||
typeof in_notebook === "boolean"
|
||||
? await this.db.notebooks.all.ids()
|
||||
: [];
|
||||
|
||||
const defaultVault = await this.db.vaults.default();
|
||||
notes = notes.where((eb) => {
|
||||
const exprs = [];
|
||||
const tagsFilter = this.db.relations
|
||||
.from({ ids: tagIds, type: "tag" }, "note")
|
||||
.selector.filter.select("id");
|
||||
const colorsFilter = this.db.relations
|
||||
.from({ ids: colorIds, type: "color" }, "note")
|
||||
.selector.filter.select("id");
|
||||
|
||||
if (typeof tagged === "boolean")
|
||||
exprs.push(eb("notes.id", tagged ? "in" : "not in", tagsFilter));
|
||||
else if (tagIds.length > 0)
|
||||
exprs.push(eb("notes.id", "in", tagsFilter));
|
||||
|
||||
if (typeof colored === "boolean")
|
||||
exprs.push(eb("notes.id", colored ? "in" : "not in", colorsFilter));
|
||||
else if (colorIds.length > 0)
|
||||
exprs.push(eb("notes.id", "in", colorsFilter));
|
||||
|
||||
if (typeof in_notebook === "boolean")
|
||||
exprs.push(
|
||||
eb(
|
||||
"notes.id",
|
||||
in_notebook ? "in" : "not in",
|
||||
this.db.relations
|
||||
.from({ ids: notebookIds, type: "notebook" }, "note")
|
||||
.selector.filter.select("id")
|
||||
)
|
||||
);
|
||||
|
||||
if (typeof locked === "boolean" && defaultVault) {
|
||||
const filter = this.db.relations
|
||||
.from(defaultVault, "note")
|
||||
.selector.filter.select("id");
|
||||
exprs.push(eb("notes.id", locked ? "in" : "not in", filter));
|
||||
}
|
||||
if (typeof archived === "boolean")
|
||||
exprs.push(eb("notes.archived", "==", archived));
|
||||
if (typeof favorite === "boolean")
|
||||
exprs.push(eb("notes.favorite", "==", favorite));
|
||||
if (typeof pinned === "boolean")
|
||||
exprs.push(eb("notes.pinned", "==", pinned));
|
||||
if (typeof readonly === "boolean")
|
||||
exprs.push(eb("notes.readonly", "==", readonly));
|
||||
if (typeof created_after === "number")
|
||||
exprs.push(eb("notes.dateCreated", ">", created_after));
|
||||
if (typeof created_before === "number")
|
||||
exprs.push(eb("notes.dateCreated", "<", created_before));
|
||||
if (typeof edited_after === "number")
|
||||
exprs.push(eb("notes.dateEdited", ">", edited_after));
|
||||
if (typeof edited_before === "number")
|
||||
exprs.push(eb("notes.dateEdited", "<", edited_before));
|
||||
return eb.and(exprs);
|
||||
});
|
||||
}
|
||||
|
||||
console.time("gather matches");
|
||||
const ftsResults =
|
||||
transformedQuery.length <= 0
|
||||
? []
|
||||
: await this.ftsQueryBuilder(transformedQuery, excludedIds, notes)
|
||||
.select(["id", "type", "rank"])
|
||||
.execute()
|
||||
.catch((e) => {
|
||||
logger.error(e, `Error while searching`, { query });
|
||||
return [];
|
||||
});
|
||||
|
||||
const smallTokens = [...tokens.andTokens, ...tokens.orTokens].filter(
|
||||
(token) => token.length < 3
|
||||
);
|
||||
(await this.ftsQueryBuilder(
|
||||
{ content: content?.query, title: title?.query },
|
||||
excludedIds,
|
||||
notes
|
||||
)
|
||||
?.select(["id", "type", "rank"])
|
||||
.execute()
|
||||
.catch((e) => {
|
||||
logger.error(e, `Error while searching`, { query });
|
||||
return [];
|
||||
})) || [];
|
||||
|
||||
const ftsIds = ftsResults.map((r) => r.id);
|
||||
const regexMatches =
|
||||
smallTokens.length > 0
|
||||
? await this.regexQueryBuilder(
|
||||
smallTokens,
|
||||
!!transformedQuery && ftsIds.length > 0 ? ftsIds : notes
|
||||
)
|
||||
.select(["results.id", "results.type", sql<number>`1`.as("rank")])
|
||||
.execute()
|
||||
: [];
|
||||
(await this.regexQueryBuilder(
|
||||
{
|
||||
content: filterSmallTokens(content?.tokens),
|
||||
title: filterSmallTokens(title?.tokens)
|
||||
},
|
||||
(!!content || !!title) && ftsIds.length > 0 ? ftsIds : notes
|
||||
)
|
||||
?.select(["results.id", "results.type", sql<number>`1`.as("rank")])
|
||||
.execute()) || [];
|
||||
console.timeEnd("gather matches");
|
||||
|
||||
console.time("sorting matches");
|
||||
@@ -190,16 +288,21 @@ export default class Lookup {
|
||||
}
|
||||
console.timeEnd("sorting matches");
|
||||
|
||||
const andTokens = tokens.andTokens.map((t) =>
|
||||
t.replace(/"(.+)"/g, "$1").toLowerCase()
|
||||
);
|
||||
const orTokens = tokens.orTokens.map((t) =>
|
||||
t.replace(/"(.+)"/g, "$1").toLowerCase()
|
||||
);
|
||||
const notTokens = tokens.notTokens.map((t) =>
|
||||
t.replace(/"(.+)"/g, "$1").toLowerCase()
|
||||
);
|
||||
const allTokens = [...andTokens, ...orTokens];
|
||||
const isQueryless = !matches.ids.length && filters > 0;
|
||||
if (isQueryless) {
|
||||
const ids = await notes.items(undefined, sortOptions);
|
||||
for (const { id } of ids) {
|
||||
matches.values.push({
|
||||
id,
|
||||
rank: 1,
|
||||
types: ["title"]
|
||||
});
|
||||
matches.ids.push(id);
|
||||
}
|
||||
}
|
||||
|
||||
const titleTokens = transformTokens(title?.tokens);
|
||||
const contentTokens = transformTokens(content?.tokens);
|
||||
|
||||
return new VirtualizedGrouping<HighlightedResult>(
|
||||
matches.ids.length,
|
||||
@@ -224,7 +327,7 @@ export default class Lookup {
|
||||
}));
|
||||
|
||||
const titles =
|
||||
titleMatches.length > 0
|
||||
titleMatches.length > 0 && !isQueryless
|
||||
? await db
|
||||
.selectFrom("notes")
|
||||
.where("id", "in", titleMatches)
|
||||
@@ -235,14 +338,9 @@ export default class Lookup {
|
||||
for (const title of titles) {
|
||||
const { text: highlighted } = highlightQueries(
|
||||
title.title || "",
|
||||
allTokens
|
||||
);
|
||||
const hasMatches = textContainsTokens(
|
||||
highlighted,
|
||||
andTokens,
|
||||
orTokens,
|
||||
notTokens
|
||||
titleTokens.allTokens
|
||||
);
|
||||
const hasMatches = textContainsTokens(highlighted, titleTokens);
|
||||
const result = results.find((c) => c.id === title.id);
|
||||
if (!result) continue;
|
||||
result.title = hasMatches
|
||||
@@ -251,7 +349,7 @@ export default class Lookup {
|
||||
}
|
||||
|
||||
const htmls =
|
||||
contentMatches.length > 0
|
||||
contentMatches.length > 0 && !isQueryless
|
||||
? await db
|
||||
.selectFrom("content")
|
||||
.where("noteId", "in", contentMatches)
|
||||
@@ -264,9 +362,11 @@ export default class Lookup {
|
||||
const result = results.find((r) => r.id === html.id);
|
||||
if (!result) continue;
|
||||
|
||||
const highlighted = highlightHtmlContent(html.data, allTokens);
|
||||
if (!textContainsTokens(highlighted, andTokens, orTokens, notTokens))
|
||||
continue;
|
||||
const highlighted = highlightHtmlContent(
|
||||
html.data,
|
||||
contentTokens.allTokens
|
||||
);
|
||||
if (!textContainsTokens(highlighted, contentTokens)) continue;
|
||||
result.content = extractMatchingBlocks(
|
||||
highlighted,
|
||||
MATCH_TAG_NAME
|
||||
@@ -278,7 +378,11 @@ export default class Lookup {
|
||||
}
|
||||
|
||||
const resultsWithMissingTitle = results
|
||||
.filter((r) => !r.title.length && r.content.length > 0)
|
||||
.filter(
|
||||
isQueryless
|
||||
? (r) => !r.title.length
|
||||
: (r) => !r.title.length && r.content.length > 0
|
||||
)
|
||||
.map((r) => r.id);
|
||||
|
||||
if (resultsWithMissingTitle.length > 0) {
|
||||
@@ -296,7 +400,9 @@ export default class Lookup {
|
||||
|
||||
for (const result of results) {
|
||||
result.content.sort(
|
||||
(a, b) => getMatchScore(b, allTokens) - getMatchScore(a, allTokens)
|
||||
(a, b) =>
|
||||
getMatchScore(b, contentTokens.allTokens) -
|
||||
getMatchScore(a, contentTokens.allTokens)
|
||||
);
|
||||
}
|
||||
|
||||
@@ -323,14 +429,23 @@ export default class Lookup {
|
||||
}
|
||||
|
||||
private ftsQueryBuilder(
|
||||
query: string,
|
||||
queries: {
|
||||
title?: string;
|
||||
content?: string;
|
||||
},
|
||||
excludedIds: string[] = [],
|
||||
filter?: FilteredSelector<Note>
|
||||
) {
|
||||
if (!queries.content && !queries.title) return;
|
||||
|
||||
const db = this.db.sql() as unknown as Kysely<RawDatabaseSchema>;
|
||||
|
||||
return db.selectFrom((eb) =>
|
||||
eb
|
||||
function buildTitleQuery(
|
||||
eb:
|
||||
| Kysely<RawDatabaseSchema>
|
||||
| ExpressionBuilder<RawDatabaseSchema, never>
|
||||
) {
|
||||
return eb
|
||||
.selectFrom("notes_fts")
|
||||
.$if(!!filter, (eb) =>
|
||||
eb.where("id", "in", filter!.filter.select("id"))
|
||||
@@ -338,40 +453,67 @@ export default class Lookup {
|
||||
.$if(excludedIds.length > 0, (eb) =>
|
||||
eb.where("id", "not in", excludedIds)
|
||||
)
|
||||
.where("title", "match", query)
|
||||
.where("title", "match", queries.title)
|
||||
.where("rank", "=", sql<number>`'bm25(1.0, 10.0)'`)
|
||||
.select(["id", "rank", sql<string>`'title'`.as("type")])
|
||||
.unionAll((eb) =>
|
||||
eb
|
||||
.selectFrom("content_fts")
|
||||
.$if(!!filter, (eb) =>
|
||||
eb.where("noteId", "in", filter!.filter.select("id"))
|
||||
)
|
||||
.$if(excludedIds.length > 0, (eb) =>
|
||||
eb.where("noteId", "not in", excludedIds)
|
||||
)
|
||||
.where("data", "match", query)
|
||||
.where("rank", "=", sql<number>`'bm25(1.0, 1.0, 10.0)'`)
|
||||
.select(["noteId as id", "rank", sql<string>`'content'`.as("type")])
|
||||
.$castTo<{
|
||||
id: string;
|
||||
rank: number;
|
||||
type: "content" | "title";
|
||||
}>()
|
||||
.select(["id", "rank", sql<string>`'title'`.as("type")]);
|
||||
}
|
||||
|
||||
function buildContentQuery(
|
||||
eb:
|
||||
| Kysely<RawDatabaseSchema>
|
||||
| ExpressionBuilder<RawDatabaseSchema, never>
|
||||
) {
|
||||
return eb
|
||||
.selectFrom("content_fts")
|
||||
.$if(!!filter, (eb) =>
|
||||
eb.where("noteId", "in", filter!.filter.select("id"))
|
||||
)
|
||||
.as("results")
|
||||
);
|
||||
.$if(excludedIds.length > 0, (eb) =>
|
||||
eb.where("noteId", "not in", excludedIds)
|
||||
)
|
||||
.where("data", "match", queries.content)
|
||||
.where("rank", "=", sql<number>`'bm25(1.0, 1.0, 10.0)'`)
|
||||
.select(["noteId as id", "rank", sql<string>`'content'`.as("type")])
|
||||
.$castTo<{
|
||||
id: string;
|
||||
rank: number;
|
||||
type: "content" | "title";
|
||||
}>();
|
||||
}
|
||||
|
||||
if (queries.content && queries.title)
|
||||
return db.selectFrom((eb) =>
|
||||
buildTitleQuery(eb)
|
||||
.unionAll((eb) => buildContentQuery(eb))
|
||||
.as("results")
|
||||
);
|
||||
else if (queries.content)
|
||||
return db.selectFrom((eb) => buildContentQuery(eb).as("results"));
|
||||
else if (queries.title)
|
||||
return db.selectFrom((eb) => buildTitleQuery(eb).as("results"));
|
||||
}
|
||||
|
||||
private regexQueryBuilder(
|
||||
queries: string[],
|
||||
queries: {
|
||||
title?: string[];
|
||||
content?: string[];
|
||||
},
|
||||
ids?: string[] | FilteredSelector<Note>
|
||||
) {
|
||||
const regex = queries
|
||||
.filter((q) => q && q.length > 0)
|
||||
.map((q) => q.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"))
|
||||
.join("|");
|
||||
return this.db.sql().selectFrom((eb) =>
|
||||
eb
|
||||
if (!queries.content?.length && !queries.title?.length) return;
|
||||
|
||||
const buildRegex = (queries: string[]) =>
|
||||
queries
|
||||
.filter((q) => q && q.length > 0)
|
||||
.map((q) => q.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"))
|
||||
.join("|");
|
||||
|
||||
function buildTitleQuery(
|
||||
eb: Kysely<DatabaseSchema> | ExpressionBuilder<DatabaseSchema, never>,
|
||||
queries: string[]
|
||||
) {
|
||||
const regex = buildRegex(queries);
|
||||
return eb
|
||||
.selectFrom("notes")
|
||||
.$if(!!ids, (eb) =>
|
||||
eb.where(
|
||||
@@ -381,27 +523,48 @@ export default class Lookup {
|
||||
)
|
||||
)
|
||||
.where("title", "regexp", sql<string>`${regex}`)
|
||||
.select(["id", sql<string>`'title'`.as("type")])
|
||||
.unionAll((eb) =>
|
||||
eb
|
||||
.selectFrom("content")
|
||||
.where("content.locked", "!=", true)
|
||||
.$if(!!ids, (eb) =>
|
||||
eb.where(
|
||||
"noteId",
|
||||
"in",
|
||||
Array.isArray(ids) ? ids! : ids!.filter.select("id")
|
||||
)
|
||||
)
|
||||
.where("data", "regexp", sql<string>`${regex}`)
|
||||
.select(["noteId as id", sql<string>`'content'`.as("type")])
|
||||
.$castTo<{
|
||||
id: string;
|
||||
type: "content" | "title";
|
||||
}>()
|
||||
.select(["id", sql<string>`'title'`.as("type")]);
|
||||
}
|
||||
|
||||
function buildContentQuery(
|
||||
eb: Kysely<DatabaseSchema> | ExpressionBuilder<DatabaseSchema, never>,
|
||||
queries: string[]
|
||||
) {
|
||||
const regex = buildRegex(queries);
|
||||
return eb
|
||||
.selectFrom("content")
|
||||
.where("content.locked", "!=", true)
|
||||
.$if(!!ids, (eb) =>
|
||||
eb.where(
|
||||
"noteId",
|
||||
"in",
|
||||
Array.isArray(ids) ? ids! : ids!.filter.select("id")
|
||||
)
|
||||
)
|
||||
.as("results")
|
||||
);
|
||||
.where("data", "regexp", sql<string>`${regex}`)
|
||||
.select(["noteId as id", sql<string>`'content'`.as("type")])
|
||||
.$castTo<{
|
||||
id: string;
|
||||
type: "content" | "title";
|
||||
}>();
|
||||
}
|
||||
|
||||
if (queries.content && queries.title)
|
||||
return this.db.sql().selectFrom((eb) =>
|
||||
buildTitleQuery(eb, queries.title!)
|
||||
.unionAll((eb) => buildContentQuery(eb, queries.content!))
|
||||
.as("results")
|
||||
);
|
||||
else if (queries.content)
|
||||
return this.db
|
||||
.sql()
|
||||
.selectFrom((eb) =>
|
||||
buildContentQuery(eb, queries.content!).as("results")
|
||||
);
|
||||
else if (queries.title)
|
||||
return this.db
|
||||
.sql()
|
||||
.selectFrom((eb) => buildTitleQuery(eb, queries.title!).as("results"));
|
||||
}
|
||||
|
||||
notebooks(query: string) {
|
||||
@@ -985,25 +1148,53 @@ function getMatchScore(
|
||||
return score;
|
||||
}
|
||||
|
||||
function textContainsTokens(
|
||||
text: string,
|
||||
andTokens: string[],
|
||||
orTokens: string[],
|
||||
notTokens: string[]
|
||||
) {
|
||||
function textContainsTokens(text: string, tokens: QueryTokens) {
|
||||
const lowerCasedText = text.toLowerCase();
|
||||
if (
|
||||
!notTokens.every(
|
||||
!tokens.notTokens.every(
|
||||
(t) => !lowerCasedText.includes(`${MATCH_TAG_OPEN}${t}${MATCH_TAG_CLOSE}`)
|
||||
)
|
||||
)
|
||||
return false;
|
||||
return (
|
||||
andTokens.every((t) =>
|
||||
tokens.andTokens.every((t) =>
|
||||
lowerCasedText.includes(`${MATCH_TAG_OPEN}${t}${MATCH_TAG_CLOSE}`)
|
||||
) ||
|
||||
orTokens.some((t) =>
|
||||
tokens.orTokens.some((t) =>
|
||||
lowerCasedText.includes(`${MATCH_TAG_OPEN}${t}${MATCH_TAG_CLOSE}`)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
function filterSmallTokens(tokens: QueryTokens | undefined) {
|
||||
if (!tokens) return;
|
||||
return [...tokens.andTokens, ...tokens.orTokens].filter(
|
||||
(token) => token.length < 3
|
||||
);
|
||||
}
|
||||
|
||||
function transformTokens(tokens: QueryTokens | undefined) {
|
||||
if (!tokens)
|
||||
return {
|
||||
andTokens: [],
|
||||
orTokens: [],
|
||||
notTokens: [],
|
||||
allTokens: []
|
||||
};
|
||||
|
||||
const andTokens = tokens.andTokens.map((t) =>
|
||||
t.replace(/"(.+)"/g, "$1").toLowerCase()
|
||||
);
|
||||
const orTokens = tokens.orTokens.map((t) =>
|
||||
t.replace(/"(.+)"/g, "$1").toLowerCase()
|
||||
);
|
||||
const notTokens = tokens.notTokens.map((t) =>
|
||||
t.replace(/"(.+)"/g, "$1").toLowerCase()
|
||||
);
|
||||
return {
|
||||
andTokens,
|
||||
orTokens,
|
||||
notTokens,
|
||||
allTokens: [...andTokens, ...orTokens]
|
||||
};
|
||||
}
|
||||
|
||||
@@ -17,7 +17,10 @@ You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
type ASTNode = QueryNode | PhraseNode | OperatorNode;
|
||||
import dayjs from "dayjs";
|
||||
import { servicesVersion } from "typescript";
|
||||
|
||||
type ASTNode = QueryNode | PhraseNode | OperatorNode | FieldPhraseNode;
|
||||
|
||||
type QueryNode = {
|
||||
type: "query";
|
||||
@@ -29,10 +32,90 @@ type PhraseNode = {
|
||||
value: string[];
|
||||
};
|
||||
|
||||
type FieldPhraseNode = {
|
||||
type: "field_phrase";
|
||||
field: string;
|
||||
value: QueryNode;
|
||||
};
|
||||
|
||||
type OperatorNode = {
|
||||
type: "AND" | "OR" | "NOT";
|
||||
};
|
||||
|
||||
const SUPPORTED_FIELDS = {
|
||||
title: (ast) => {
|
||||
const node =
|
||||
ast.find((a) => a.type === "field_phrase" && a.field === "title") ||
|
||||
ast.find((a) => a.type === "query");
|
||||
return node && serializeQuery(node);
|
||||
},
|
||||
content: (ast) => {
|
||||
const node =
|
||||
ast.find((a) => a.type === "field_phrase" && a.field === "content") ||
|
||||
ast.find((a) => a.type === "query");
|
||||
return node && serializeQuery(node);
|
||||
},
|
||||
|
||||
// array
|
||||
tag: (ast) => parseArrayField("tag", ast),
|
||||
color: (ast) => parseArrayField("color", ast),
|
||||
|
||||
// date
|
||||
edited_before: (ast) => parseDateField("edited_before", ast),
|
||||
edited_after: (ast) => parseDateField("edited_after", ast),
|
||||
created_before: (ast) => parseDateField("created_before", ast),
|
||||
created_after: (ast) => parseDateField("created_after", ast),
|
||||
|
||||
// boolean
|
||||
pinned: (ast) => parseBooleanField("pinned", ast),
|
||||
locked: (ast) => parseBooleanField("locked", ast),
|
||||
readonly: (ast) => parseBooleanField("readonly", ast),
|
||||
favorite: (ast) => parseBooleanField("favorite", ast),
|
||||
archived: (ast) => parseBooleanField("archived", ast),
|
||||
tagged: (ast) => parseBooleanField("tagged", ast),
|
||||
colored: (ast) => parseBooleanField("colored", ast),
|
||||
in_notebook: (ast) => parseBooleanField("in_notebook", ast)
|
||||
} satisfies Record<string, (ast: (QueryNode | FieldPhraseNode)[]) => unknown>;
|
||||
|
||||
function isFieldSupported(field: string) {
|
||||
return field in SUPPORTED_FIELDS;
|
||||
}
|
||||
|
||||
function parseBooleanField(
|
||||
field: string,
|
||||
ast: (QueryNode | FieldPhraseNode)[]
|
||||
): boolean | null {
|
||||
const node = ast.find(
|
||||
(a): a is FieldPhraseNode => a.type === "field_phrase" && a.field === field
|
||||
);
|
||||
const sql = node ? generateSQL(node.value) : "";
|
||||
return sql === "false" ? false : sql === "true" ? true : null;
|
||||
}
|
||||
|
||||
function parseArrayField(
|
||||
field: string,
|
||||
ast: (QueryNode | FieldPhraseNode)[]
|
||||
): string[] | null {
|
||||
const values = ast
|
||||
.filter(
|
||||
(a): a is FieldPhraseNode =>
|
||||
a.type === "field_phrase" && a.field === field
|
||||
)
|
||||
.map((a) => generateSQL(a.value));
|
||||
return values.length > 0 ? values : null;
|
||||
}
|
||||
|
||||
function parseDateField(
|
||||
field: string,
|
||||
ast: (QueryNode | FieldPhraseNode)[]
|
||||
): number | null {
|
||||
const node = ast.find(
|
||||
(a): a is FieldPhraseNode => a.type === "field_phrase" && a.field === field
|
||||
);
|
||||
const date = node ? dayjs(generateSQL(node.value)) : null;
|
||||
return date?.isValid() ? date.toDate().getTime() : null;
|
||||
}
|
||||
|
||||
const INVALID_QUERY_REGEX = /[!"#$%&'()*+,\-./:;<>=?@[\\\]^_`{|}~§]/;
|
||||
function escapeSQLString(str: string): string {
|
||||
if (str.startsWith('"') && str.endsWith('"')) {
|
||||
@@ -63,10 +146,13 @@ function escapeSQLString(str: string): string {
|
||||
return str.replace(/"/g, '""');
|
||||
}
|
||||
|
||||
function tokenize(query: string): string[] {
|
||||
const tokens: string[] = [];
|
||||
function tokenizeWithFields(
|
||||
query: string
|
||||
): Array<{ field?: string; token: string }> {
|
||||
const tokens: Array<{ field?: string; token: string }> = [];
|
||||
let buffer = "";
|
||||
let isQuoted = false;
|
||||
let currentField: string | undefined = undefined;
|
||||
|
||||
for (let i = 0; i < query.length; ++i) {
|
||||
const char = query[i];
|
||||
@@ -75,19 +161,51 @@ function tokenize(query: string): string[] {
|
||||
}
|
||||
if (char === " " && !isQuoted) {
|
||||
if (buffer.length > 0) {
|
||||
tokens.push(buffer);
|
||||
tokens.push({ field: currentField, token: buffer });
|
||||
buffer = "";
|
||||
}
|
||||
} else if (char === ":" && !isQuoted) {
|
||||
// Check for field
|
||||
const maybeField = buffer.trim().toLowerCase();
|
||||
if (isFieldSupported(maybeField)) {
|
||||
currentField = maybeField;
|
||||
buffer = "";
|
||||
} else {
|
||||
buffer += char;
|
||||
}
|
||||
} else {
|
||||
buffer += char;
|
||||
}
|
||||
}
|
||||
if (buffer.length > 0) tokens.push(buffer);
|
||||
if (buffer.length > 0) tokens.push({ field: currentField, token: buffer });
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
function parseTokens(tokens: string[]): QueryNode {
|
||||
// Helper: group tokens by field
|
||||
function groupTokensByField(tokens: Array<{ field?: string; token: string }>) {
|
||||
const groups: Array<{ field?: string; tokens: string[] }> = [];
|
||||
let currentField: string | undefined = undefined;
|
||||
let currentTokens: string[] = [];
|
||||
|
||||
for (const { field, token } of tokens) {
|
||||
if (field !== currentField) {
|
||||
if (currentTokens.length > 0) {
|
||||
groups.push({ field: currentField, tokens: currentTokens });
|
||||
currentTokens = [];
|
||||
}
|
||||
currentField = field;
|
||||
}
|
||||
currentTokens.push(token);
|
||||
}
|
||||
if (currentTokens.length > 0) {
|
||||
groups.push({ field: currentField, tokens: currentTokens });
|
||||
}
|
||||
return groups;
|
||||
}
|
||||
|
||||
// Parse a group of tokens into a QueryNode (handles boolean ops, etc)
|
||||
function parseTokensToQueryNode(tokens: string[]): QueryNode {
|
||||
const ast: QueryNode = { type: "query", children: [] };
|
||||
let currentPhrase: string[] = [];
|
||||
|
||||
@@ -102,15 +220,13 @@ function parseTokens(tokens: string[]): QueryNode {
|
||||
currentPhrase.push(token);
|
||||
}
|
||||
}
|
||||
|
||||
if (currentPhrase.length > 0) {
|
||||
ast.children.push({ type: "phrase", value: currentPhrase });
|
||||
}
|
||||
|
||||
return ast;
|
||||
}
|
||||
|
||||
function transformAST(ast: QueryNode): QueryNode {
|
||||
function transformQueryNode(ast: QueryNode): QueryNode {
|
||||
const transformedAST: QueryNode = { ...ast, children: [] };
|
||||
let lastWasPhrase = false;
|
||||
|
||||
@@ -150,7 +266,7 @@ function generateSQL(ast: QueryNode): string {
|
||||
return ast.children
|
||||
.map((child) => {
|
||||
if (child.type === "phrase") {
|
||||
return child.value.join(" AND ");
|
||||
return child.value.filter((v) => v.length >= 3).join(" AND ");
|
||||
}
|
||||
if (child.type === "AND" || child.type === "OR" || child.type === "NOT") {
|
||||
return child.type;
|
||||
@@ -160,18 +276,53 @@ function generateSQL(ast: QueryNode): string {
|
||||
.join(" ");
|
||||
}
|
||||
|
||||
export function transformQuery(query: string) {
|
||||
const tokens = tokenize(query);
|
||||
const largeTokens = tokens.filter(
|
||||
(token) => token.length >= 3 || token === "OR"
|
||||
// Main transformer: returns (QueryNode | FieldPhraseNode)[]
|
||||
export function transformQuery(query: string): {
|
||||
[K in keyof typeof SUPPORTED_FIELDS]?: ReturnType<
|
||||
(typeof SUPPORTED_FIELDS)[K]
|
||||
>;
|
||||
} & { filters: number } {
|
||||
const tokens = tokenizeWithFields(query);
|
||||
const groups = groupTokensByField(tokens);
|
||||
|
||||
const ast = groups.map((group) => {
|
||||
const node = parseTokensToQueryNode(group.tokens);
|
||||
const transformedNode = transformQueryNode(node);
|
||||
if (group.field) {
|
||||
return {
|
||||
type: "field_phrase",
|
||||
field: group.field,
|
||||
value: transformedNode
|
||||
} as FieldPhraseNode;
|
||||
} else {
|
||||
return transformedNode;
|
||||
}
|
||||
});
|
||||
|
||||
let filters = 0;
|
||||
const fields = Object.fromEntries(
|
||||
Object.entries(SUPPORTED_FIELDS).map(([key, field]) => {
|
||||
const value = field(ast);
|
||||
if (
|
||||
value !== null &&
|
||||
value !== undefined &&
|
||||
!["content", "title"].includes(key)
|
||||
)
|
||||
filters++;
|
||||
return [key, value];
|
||||
})
|
||||
);
|
||||
return { ...fields, filters };
|
||||
}
|
||||
|
||||
function serializeQuery(node: QueryNode | FieldPhraseNode) {
|
||||
return {
|
||||
query: generateSQL(transformAST(parseTokens(largeTokens))),
|
||||
tokens: tokenizeAst(transformAST(parseTokens(tokens)))
|
||||
query: generateSQL(node.type === "query" ? node : node.value),
|
||||
tokens: tokenizeAst(node.type === "query" ? node : node.value)
|
||||
};
|
||||
}
|
||||
|
||||
interface QueryTokens {
|
||||
export interface QueryTokens {
|
||||
andTokens: string[];
|
||||
orTokens: string[];
|
||||
notTokens: string[];
|
||||
|
||||
Reference in New Issue
Block a user