core: take into account boolean parameters in query

This commit is contained in:
Abdullah Atta
2025-06-05 14:39:42 +05:00
parent fa43f97bb4
commit 6fccac9aea
2 changed files with 123 additions and 33 deletions

View File

@@ -89,10 +89,8 @@ export default class Lookup {
})
.then((r) => r.map((r) => r.id));
const smallTokens = Array.from(
new Set(
tokens.filter((token) => token.length < 3 && token !== "OR")
).values()
const smallTokens = [...tokens.andTokens, ...tokens.orTokens].filter(
(token) => token.length < 3
);
if (smallTokens.length === 0) return resultsA;
@@ -129,10 +127,8 @@ export default class Lookup {
return [];
});
const smallTokens = Array.from(
new Set(
tokens.filter((token) => token.length < 3 && token !== "OR")
).values()
const smallTokens = [...tokens.andTokens, ...tokens.orTokens].filter(
(token) => token.length < 3
);
const ftsIds = ftsResults.map((r) => r.id);
@@ -195,7 +191,16 @@ export default class Lookup {
}
console.timeEnd("sorting matches");
const highlightTokens = tokens.map((t) => t.replace(/"(.+)"/g, "$1"));
const andTokens = tokens.andTokens.map((t) =>
t.replace(/"(.+)"/g, "$1").toLowerCase()
);
const orTokens = tokens.orTokens.map((t) =>
t.replace(/"(.+)"/g, "$1").toLowerCase()
);
const notTokens = tokens.notTokens.map((t) =>
t.replace(/"(.+)"/g, "$1").toLowerCase()
);
const allTokens = [...andTokens, ...orTokens];
return new VirtualizedGrouping<HighlightedResult>(
matches.ids.length,
@@ -218,12 +223,15 @@ export default class Lookup {
.execute();
for (const title of titles) {
const highlighted = highlightQueries(
const { text: highlighted } = highlightQueries(
title.title || "",
highlightTokens
allTokens
);
const hasMatches = !highlightTokens.every((t) =>
highlighted.includes(`${MATCH_TAG_OPEN}${t}${MATCH_TAG_CLOSE}`)
const hasMatches = textContainsTokens(
highlighted,
andTokens,
orTokens,
notTokens
);
results.push({
id: title.id,
@@ -255,12 +263,8 @@ export default class Lookup {
dateCreated: 0,
dateModified: 0
});
const highlighted = highlightHtmlContent(html.data, highlightTokens);
if (
!highlightTokens.every((t) =>
highlighted.includes(`${MATCH_TAG_OPEN}${t}${MATCH_TAG_CLOSE}`)
)
)
const highlighted = highlightHtmlContent(html.data, allTokens);
if (!textContainsTokens(highlighted, andTokens, orTokens, notTokens))
continue;
result.content = extractMatchingBlocks(
highlighted,
@@ -291,9 +295,7 @@ export default class Lookup {
for (const result of results) {
result.content.sort(
(a, b) =>
getMatchScore(b, highlightTokens) -
getMatchScore(a, highlightTokens)
(a, b) => getMatchScore(b, allTokens) - getMatchScore(a, allTokens)
);
}
@@ -596,25 +598,31 @@ export default class Lookup {
}
}
function highlightQueries(text: string, queries: string[]): string {
if (!text || !queries.length) return text;
function highlightQueries(
text: string,
queries: string[]
): { text: string; hasMatches: boolean } {
if (!text || !queries.length) return { text, hasMatches: false };
const patterns = queries
.filter((q) => q.length > 0)
.map((q) => q.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"));
if (patterns.length === 0) return text;
if (patterns.length === 0) return { text, hasMatches: false };
try {
const regex = new RegExp(patterns.join("|"), "gi");
text.replace(
regex,
(match) => `${MATCH_TAG_OPEN}${match}${MATCH_TAG_CLOSE}`
);
return text;
let hasMatches = false;
const result = text.replace(regex, (match) => {
hasMatches = true;
return `${MATCH_TAG_OPEN}${match}${MATCH_TAG_CLOSE}`;
});
return { text: result, hasMatches };
} catch (error) {
return text;
return { text, hasMatches: false };
}
}
@@ -975,3 +983,26 @@ function getMatchScore(
return score;
}
function textContainsTokens(
text: string,
andTokens: string[],
orTokens: string[],
notTokens: string[]
) {
const lowerCasedText = text.toLowerCase();
if (
!notTokens.every(
(t) => !lowerCasedText.includes(`${MATCH_TAG_OPEN}${t}${MATCH_TAG_CLOSE}`)
)
)
return false;
return (
andTokens.every((t) =>
lowerCasedText.includes(`${MATCH_TAG_OPEN}${t}${MATCH_TAG_CLOSE}`)
) ||
orTokens.some((t) =>
lowerCasedText.includes(`${MATCH_TAG_OPEN}${t}${MATCH_TAG_CLOSE}`)
)
);
}

View File

@@ -165,8 +165,67 @@ export function transformQuery(query: string) {
const largeTokens = tokens.filter(
(token) => token.length >= 3 || token === "OR"
);
const ast = transformAST(parseTokens(largeTokens));
return {
query: generateSQL(transformAST(parseTokens(largeTokens))),
tokens
query: generateSQL(ast),
tokens: tokenizeAst(ast)
};
}
interface QueryTokens {
andTokens: string[];
orTokens: string[];
notTokens: string[];
}
function tokenizeAst(ast: QueryNode): QueryTokens {
const result: QueryTokens = {
andTokens: [],
orTokens: [],
notTokens: []
};
let isNextNot = false;
let isNextOr = false;
for (let i = 0; i < ast.children.length; i++) {
const node = ast.children[i];
if (node.type === "NOT") {
isNextNot = true;
continue;
}
if (node.type === "OR") {
isNextOr = true;
continue;
}
if (node.type === "phrase") {
// Handle each word in the phrase
for (const word of node.value) {
if (
result.orTokens.includes(word) ||
result.andTokens.includes(word) ||
result.notTokens.includes(word)
) {
isNextOr = false;
isNextNot = false;
continue;
}
if (isNextOr) {
result.orTokens.push(word);
} else if (isNextNot) {
result.notTokens.push(word);
} else {
result.andTokens.push(word);
}
}
isNextOr = false;
isNextNot = false;
}
}
return result;
}