core: add internal links extraction

This commit is contained in:
Abdullah Atta
2024-01-24 16:21:16 +05:00
parent ff87bebe8f
commit c8f9925973
5 changed files with 182 additions and 32 deletions

View File

@@ -395,16 +395,21 @@ export class Notes implements ICollection {
});
}
async getBlocks(id: string) {
const note = await this.collection.get(id);
if (note?.locked || !note?.contentId) return [];
const rawContent = await this.db.content.get(note.contentId);
if (!rawContent || rawContent.locked) return [];
async contentBlocks(id: string) {
const content = await this.db.content.findByNoteId(id);
if (!content || content.locked) return [];
return getContentFromData(
rawContent.type,
rawContent?.data
).extractBlocks();
return getContentFromData(content.type, content.data).extract("blocks")
.blocks;
}
async internalLinks(id: string) {
const content = await this.db.content.findByNoteId(id);
if (!content || content.locked) return [];
return getContentFromData(content.type, content.data).extract(
"internalLinks"
).internalLinks;
}
}

View File

@@ -18,8 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
import showdown from "@streetwriters/showdown";
import render from "dom-serializer";
import { find, isTag } from "domutils";
import { findAll, isTag } from "domutils";
import {
DomNode,
FormatOptions,
@@ -37,11 +36,19 @@ import {
import { HTMLRewriter } from "../utils/html-rewriter";
import { ContentBlock } from "../types";
import { InternalLink, parseInternalLink } from "../utils/internal-link";
import { Element } from "domhandler";
export type ResolveHashes = (
hashes: string[]
) => Promise<Record<string, string>>;
const ExtractableTypes = ["blocks", "internalLinks"] as const;
type ExtractableType = (typeof ExtractableTypes)[number];
type ExtractionResult = {
blocks: ContentBlock[];
internalLinks: InternalLink[];
};
const ATTRIBUTES = {
hash: "data-hash",
mime: "data-mime",
@@ -108,28 +115,41 @@ export class Tiptap {
}).transform(this.data);
}
async extractBlocks() {
const nodes: ContentBlock[] = [];
const document = parseDocument(this.data);
extract(...types: ExtractableType[]): ExtractionResult {
const result: ExtractionResult = { blocks: [], internalLinks: [] };
const document = parseDocument(this.data, {
withEndIndices: true,
withStartIndices: true
});
const elements = find(
(element) => {
return isTag(element) && !!element.attribs[ATTRIBUTES.blockId];
},
document.childNodes,
false,
Infinity
);
for (const node of elements) {
if (!isTag(node)) continue;
nodes.push({
id: node.attribs[ATTRIBUTES.blockId],
type: node.tagName.toLowerCase(),
content: convertHtmlToTxt(render(node))
});
if (types.includes("blocks")) {
result.blocks.push(
...document.childNodes
.filter((element): element is Element => {
return isTag(element) && !!element.attribs[ATTRIBUTES.blockId];
})
.map((node) => ({
id: node.attribs[ATTRIBUTES.blockId],
type: node.tagName.toLowerCase(),
content: convertHtmlToTxt(
this.data.slice(node.startIndex || 0, node.endIndex || 0)
)
}))
);
}
return nodes;
if (types.includes("internalLinks")) {
result.internalLinks.push(
...findAll(
(e) => e.tagName === "a" && e.attribs.href.startsWith("nn://"),
document.childNodes
)
.map((e) => parseInternalLink(e.attribs.href))
.filter((v): v is InternalLink => !!v)
);
}
return result;
}
/**
@@ -253,9 +273,15 @@ function convertHtmlToTxt(html: string) {
{ selector: "table", format: "dataTable" },
{ selector: "ul.checklist", format: "taskList" },
{ selector: "ul.simple-checklist", format: "checkList" },
{ selector: "p", format: "paragraph" }
{ selector: "p", format: "paragraph" },
{ selector: `a[href^="nn://"]`, format: "internalLink" }
],
formatters: {
internalLink: (elem, walk, builder) => {
builder.addInline(`[[${elem.attribs.href}|`);
walk(elem.children, builder);
builder.addInline("]]");
},
taskList: (elem, walk, builder, formatOptions) => {
return formatList(elem, walk, builder, formatOptions, (elem) => {
return elem.attribs.class && elem.attribs.class.includes("checked")

View File

@@ -26,3 +26,8 @@ export {
parseInternalLink,
type InternalLink
} from "./utils/internal-link";
export {
extractInternalLinks,
highlightInternalLinks,
type TextSlice
} from "./utils/content-block";

View File

@@ -0,0 +1,106 @@
/*
This file is part of the Notesnook project (https://notesnook.com/)
Copyright (C) 2023 Streetwriters (Private) Limited
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
import { ContentBlock } from "../types";
import { InternalLinkWithOffset, parseInternalLink } from "./internal-link";
const INTERNAL_LINK_REGEX = /(?:\[\[(nn:\/\/note\/.+?)\]\])/gm;
export function extractInternalLinks(block: ContentBlock) {
const matches = block.content.matchAll(INTERNAL_LINK_REGEX);
const links: InternalLinkWithOffset[] = [];
for (const match of matches || []) {
if (!match.index) continue;
const url = match[1].slice(0, match[1].indexOf("|"));
const text = match[1].slice(match[1].indexOf("|") + 1);
const link = parseInternalLink(url);
if (!link) continue;
links.push({
...link,
start: match.index,
end: match.index + match[0].length,
text
});
}
return links;
}
function normalize(block: ContentBlock, links: InternalLinkWithOffset[]) {
let diff = 0;
console.log(links);
for (const link of links) {
link.start -= diff;
link.end -= diff;
block.content =
block.content.slice(0, link.start) +
link.text +
block.content.slice(link.end);
diff += link.end - link.start - link.text.length;
link.end = link.start + link.text.length;
}
return block;
}
export type TextSlice = { text: string; highlighted: boolean };
export function highlightInternalLinks(
block: ContentBlock,
noteId: string
): [TextSlice, TextSlice, TextSlice][] {
const links = extractInternalLinks(block);
normalize(block, links);
const highlighted: [TextSlice, TextSlice, TextSlice][] = [];
for (const link of links) {
const start = block.content.slice(0, link.start);
const end = block.content.slice(link.end);
if (link.id !== noteId) continue;
highlighted.push([
{
text: ellipsize(start, 50, "start"),
highlighted: false
},
{
highlighted: link.id === noteId,
text: link.text
},
{
highlighted: false,
text: ellipsize(end, 50, "end")
}
]);
}
return highlighted;
}
function ellipsize(text: string, maxLength: number, from: "start" | "end") {
const needsTruncation = text.length > maxLength;
const offsets = needsTruncation
? from === "start"
? [-maxLength, undefined]
: [0, maxLength]
: [0, text.length];
const truncated = text.slice(offsets[0], offsets[1]);
return needsTruncation
? from === "start"
? "..." + truncated
: truncated + "..."
: truncated;
}

View File

@@ -24,6 +24,14 @@ export type InternalLink<T extends InternalLinkType = InternalLinkType> = {
id: string;
params?: Partial<InternalLinkParams[T]>;
};
export type InternalLinkWithOffset<
T extends InternalLinkType = InternalLinkType
> = InternalLink<T> & {
start: number;
end: number;
text: string;
};
type InternalLinkParams = {
note: { blockId: string };
};