mirror of
https://github.com/streetwriters/notesnook.git
synced 2025-12-23 06:59:31 +01:00
core: add internal links extraction
This commit is contained in:
@@ -395,16 +395,21 @@ export class Notes implements ICollection {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
async getBlocks(id: string) {
|
async contentBlocks(id: string) {
|
||||||
const note = await this.collection.get(id);
|
const content = await this.db.content.findByNoteId(id);
|
||||||
if (note?.locked || !note?.contentId) return [];
|
if (!content || content.locked) return [];
|
||||||
const rawContent = await this.db.content.get(note.contentId);
|
|
||||||
if (!rawContent || rawContent.locked) return [];
|
|
||||||
|
|
||||||
return getContentFromData(
|
return getContentFromData(content.type, content.data).extract("blocks")
|
||||||
rawContent.type,
|
.blocks;
|
||||||
rawContent?.data
|
}
|
||||||
).extractBlocks();
|
|
||||||
|
async internalLinks(id: string) {
|
||||||
|
const content = await this.db.content.findByNoteId(id);
|
||||||
|
if (!content || content.locked) return [];
|
||||||
|
|
||||||
|
return getContentFromData(content.type, content.data).extract(
|
||||||
|
"internalLinks"
|
||||||
|
).internalLinks;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -18,8 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import showdown from "@streetwriters/showdown";
|
import showdown from "@streetwriters/showdown";
|
||||||
import render from "dom-serializer";
|
import { findAll, isTag } from "domutils";
|
||||||
import { find, isTag } from "domutils";
|
|
||||||
import {
|
import {
|
||||||
DomNode,
|
DomNode,
|
||||||
FormatOptions,
|
FormatOptions,
|
||||||
@@ -37,11 +36,19 @@ import {
|
|||||||
import { HTMLRewriter } from "../utils/html-rewriter";
|
import { HTMLRewriter } from "../utils/html-rewriter";
|
||||||
import { ContentBlock } from "../types";
|
import { ContentBlock } from "../types";
|
||||||
import { InternalLink, parseInternalLink } from "../utils/internal-link";
|
import { InternalLink, parseInternalLink } from "../utils/internal-link";
|
||||||
|
import { Element } from "domhandler";
|
||||||
|
|
||||||
export type ResolveHashes = (
|
export type ResolveHashes = (
|
||||||
hashes: string[]
|
hashes: string[]
|
||||||
) => Promise<Record<string, string>>;
|
) => Promise<Record<string, string>>;
|
||||||
|
|
||||||
|
const ExtractableTypes = ["blocks", "internalLinks"] as const;
|
||||||
|
type ExtractableType = (typeof ExtractableTypes)[number];
|
||||||
|
type ExtractionResult = {
|
||||||
|
blocks: ContentBlock[];
|
||||||
|
internalLinks: InternalLink[];
|
||||||
|
};
|
||||||
|
|
||||||
const ATTRIBUTES = {
|
const ATTRIBUTES = {
|
||||||
hash: "data-hash",
|
hash: "data-hash",
|
||||||
mime: "data-mime",
|
mime: "data-mime",
|
||||||
@@ -108,28 +115,41 @@ export class Tiptap {
|
|||||||
}).transform(this.data);
|
}).transform(this.data);
|
||||||
}
|
}
|
||||||
|
|
||||||
async extractBlocks() {
|
extract(...types: ExtractableType[]): ExtractionResult {
|
||||||
const nodes: ContentBlock[] = [];
|
const result: ExtractionResult = { blocks: [], internalLinks: [] };
|
||||||
const document = parseDocument(this.data);
|
const document = parseDocument(this.data, {
|
||||||
|
withEndIndices: true,
|
||||||
|
withStartIndices: true
|
||||||
|
});
|
||||||
|
|
||||||
const elements = find(
|
if (types.includes("blocks")) {
|
||||||
(element) => {
|
result.blocks.push(
|
||||||
return isTag(element) && !!element.attribs[ATTRIBUTES.blockId];
|
...document.childNodes
|
||||||
},
|
.filter((element): element is Element => {
|
||||||
document.childNodes,
|
return isTag(element) && !!element.attribs[ATTRIBUTES.blockId];
|
||||||
false,
|
})
|
||||||
Infinity
|
.map((node) => ({
|
||||||
);
|
id: node.attribs[ATTRIBUTES.blockId],
|
||||||
|
type: node.tagName.toLowerCase(),
|
||||||
for (const node of elements) {
|
content: convertHtmlToTxt(
|
||||||
if (!isTag(node)) continue;
|
this.data.slice(node.startIndex || 0, node.endIndex || 0)
|
||||||
nodes.push({
|
)
|
||||||
id: node.attribs[ATTRIBUTES.blockId],
|
}))
|
||||||
type: node.tagName.toLowerCase(),
|
);
|
||||||
content: convertHtmlToTxt(render(node))
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
return nodes;
|
|
||||||
|
if (types.includes("internalLinks")) {
|
||||||
|
result.internalLinks.push(
|
||||||
|
...findAll(
|
||||||
|
(e) => e.tagName === "a" && e.attribs.href.startsWith("nn://"),
|
||||||
|
document.childNodes
|
||||||
|
)
|
||||||
|
.map((e) => parseInternalLink(e.attribs.href))
|
||||||
|
.filter((v): v is InternalLink => !!v)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -253,9 +273,15 @@ function convertHtmlToTxt(html: string) {
|
|||||||
{ selector: "table", format: "dataTable" },
|
{ selector: "table", format: "dataTable" },
|
||||||
{ selector: "ul.checklist", format: "taskList" },
|
{ selector: "ul.checklist", format: "taskList" },
|
||||||
{ selector: "ul.simple-checklist", format: "checkList" },
|
{ selector: "ul.simple-checklist", format: "checkList" },
|
||||||
{ selector: "p", format: "paragraph" }
|
{ selector: "p", format: "paragraph" },
|
||||||
|
{ selector: `a[href^="nn://"]`, format: "internalLink" }
|
||||||
],
|
],
|
||||||
formatters: {
|
formatters: {
|
||||||
|
internalLink: (elem, walk, builder) => {
|
||||||
|
builder.addInline(`[[${elem.attribs.href}|`);
|
||||||
|
walk(elem.children, builder);
|
||||||
|
builder.addInline("]]");
|
||||||
|
},
|
||||||
taskList: (elem, walk, builder, formatOptions) => {
|
taskList: (elem, walk, builder, formatOptions) => {
|
||||||
return formatList(elem, walk, builder, formatOptions, (elem) => {
|
return formatList(elem, walk, builder, formatOptions, (elem) => {
|
||||||
return elem.attribs.class && elem.attribs.class.includes("checked")
|
return elem.attribs.class && elem.attribs.class.includes("checked")
|
||||||
|
|||||||
@@ -26,3 +26,8 @@ export {
|
|||||||
parseInternalLink,
|
parseInternalLink,
|
||||||
type InternalLink
|
type InternalLink
|
||||||
} from "./utils/internal-link";
|
} from "./utils/internal-link";
|
||||||
|
export {
|
||||||
|
extractInternalLinks,
|
||||||
|
highlightInternalLinks,
|
||||||
|
type TextSlice
|
||||||
|
} from "./utils/content-block";
|
||||||
|
|||||||
106
packages/core/src/utils/content-block.ts
Normal file
106
packages/core/src/utils/content-block.ts
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
/*
|
||||||
|
This file is part of the Notesnook project (https://notesnook.com/)
|
||||||
|
|
||||||
|
Copyright (C) 2023 Streetwriters (Private) Limited
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { ContentBlock } from "../types";
|
||||||
|
import { InternalLinkWithOffset, parseInternalLink } from "./internal-link";
|
||||||
|
|
||||||
|
const INTERNAL_LINK_REGEX = /(?:\[\[(nn:\/\/note\/.+?)\]\])/gm;
|
||||||
|
export function extractInternalLinks(block: ContentBlock) {
|
||||||
|
const matches = block.content.matchAll(INTERNAL_LINK_REGEX);
|
||||||
|
|
||||||
|
const links: InternalLinkWithOffset[] = [];
|
||||||
|
for (const match of matches || []) {
|
||||||
|
if (!match.index) continue;
|
||||||
|
const url = match[1].slice(0, match[1].indexOf("|"));
|
||||||
|
const text = match[1].slice(match[1].indexOf("|") + 1);
|
||||||
|
const link = parseInternalLink(url);
|
||||||
|
if (!link) continue;
|
||||||
|
links.push({
|
||||||
|
...link,
|
||||||
|
start: match.index,
|
||||||
|
end: match.index + match[0].length,
|
||||||
|
text
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return links;
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalize(block: ContentBlock, links: InternalLinkWithOffset[]) {
|
||||||
|
let diff = 0;
|
||||||
|
console.log(links);
|
||||||
|
for (const link of links) {
|
||||||
|
link.start -= diff;
|
||||||
|
link.end -= diff;
|
||||||
|
block.content =
|
||||||
|
block.content.slice(0, link.start) +
|
||||||
|
link.text +
|
||||||
|
block.content.slice(link.end);
|
||||||
|
diff += link.end - link.start - link.text.length;
|
||||||
|
|
||||||
|
link.end = link.start + link.text.length;
|
||||||
|
}
|
||||||
|
return block;
|
||||||
|
}
|
||||||
|
|
||||||
|
export type TextSlice = { text: string; highlighted: boolean };
|
||||||
|
export function highlightInternalLinks(
|
||||||
|
block: ContentBlock,
|
||||||
|
noteId: string
|
||||||
|
): [TextSlice, TextSlice, TextSlice][] {
|
||||||
|
const links = extractInternalLinks(block);
|
||||||
|
normalize(block, links);
|
||||||
|
const highlighted: [TextSlice, TextSlice, TextSlice][] = [];
|
||||||
|
for (const link of links) {
|
||||||
|
const start = block.content.slice(0, link.start);
|
||||||
|
const end = block.content.slice(link.end);
|
||||||
|
if (link.id !== noteId) continue;
|
||||||
|
|
||||||
|
highlighted.push([
|
||||||
|
{
|
||||||
|
text: ellipsize(start, 50, "start"),
|
||||||
|
highlighted: false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
highlighted: link.id === noteId,
|
||||||
|
text: link.text
|
||||||
|
},
|
||||||
|
{
|
||||||
|
highlighted: false,
|
||||||
|
text: ellipsize(end, 50, "end")
|
||||||
|
}
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
return highlighted;
|
||||||
|
}
|
||||||
|
|
||||||
|
function ellipsize(text: string, maxLength: number, from: "start" | "end") {
|
||||||
|
const needsTruncation = text.length > maxLength;
|
||||||
|
const offsets = needsTruncation
|
||||||
|
? from === "start"
|
||||||
|
? [-maxLength, undefined]
|
||||||
|
: [0, maxLength]
|
||||||
|
: [0, text.length];
|
||||||
|
const truncated = text.slice(offsets[0], offsets[1]);
|
||||||
|
return needsTruncation
|
||||||
|
? from === "start"
|
||||||
|
? "..." + truncated
|
||||||
|
: truncated + "..."
|
||||||
|
: truncated;
|
||||||
|
}
|
||||||
@@ -24,6 +24,14 @@ export type InternalLink<T extends InternalLinkType = InternalLinkType> = {
|
|||||||
id: string;
|
id: string;
|
||||||
params?: Partial<InternalLinkParams[T]>;
|
params?: Partial<InternalLinkParams[T]>;
|
||||||
};
|
};
|
||||||
|
export type InternalLinkWithOffset<
|
||||||
|
T extends InternalLinkType = InternalLinkType
|
||||||
|
> = InternalLink<T> & {
|
||||||
|
start: number;
|
||||||
|
end: number;
|
||||||
|
text: string;
|
||||||
|
};
|
||||||
|
|
||||||
type InternalLinkParams = {
|
type InternalLinkParams = {
|
||||||
note: { blockId: string };
|
note: { blockId: string };
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user