diff --git a/packages/core/__tests__/notes.test.js b/packages/core/__tests__/notes.test.js
index 8dbd1ce7c..10146b93d 100644
--- a/packages/core/__tests__/notes.test.js
+++ b/packages/core/__tests__/notes.test.js
@@ -106,7 +106,7 @@ test("note should not get headline if there is no p tag", () =>
}
}).then(async ({ db, id }) => {
let note = db.notes.note(id);
- expect(note.headline).toBeUndefined();
+ expect(note.headline).toBe("");
}));
test("note title should allow trailing space", () =>
@@ -132,7 +132,7 @@ test("update note", () =>
title: "I am a new title",
content: {
type: TEST_NOTE.content.type,
- data: "
"
+ data: "
"
},
pinned: true,
favorite: true
@@ -348,7 +348,7 @@ test("note content should not contain image base64 data after save", () =>
const note = db.notes.note(id);
const content = await note.content();
expect(content).not.toContain(`src="data:image/png;`);
- expect(content).not.toContain(`src=`);
+ expect(content).toContain(`src=""`);
}));
test("adding a note with an invalid tag should clean the tag array", () =>
diff --git a/packages/core/__tests__/utils/index.js b/packages/core/__tests__/utils/index.js
index a2988b591..a9c362c18 100644
--- a/packages/core/__tests__/utils/index.js
+++ b/packages/core/__tests__/utils/index.js
@@ -49,7 +49,7 @@ const notebookTest = (notebook = TEST_NOTEBOOK) =>
var TEST_NOTE = {
content: {
type: "tiptap",
- data: `Hello
This is colorful
`
+ data: `Hello
This is colorful
`
}
};
diff --git a/packages/core/content-types/__tests__/tiptap.test.js b/packages/core/content-types/__tests__/tiptap.test.js
index 226bef1a7..985ab22d9 100644
--- a/packages/core/content-types/__tests__/tiptap.test.js
+++ b/packages/core/content-types/__tests__/tiptap.test.js
@@ -27,7 +27,7 @@ test("img src is empty after extract attachments", async () => {
});
expect(result.attachments).toHaveLength(1);
expect(result.data).not.toContain(`src="data:image/png;`);
- expect(result.data).not.toContain(`src=`);
+ expect(result.data).toContain(`src=""`);
expect(result.data).toContain(`data-hash="helloworld"`);
});
@@ -41,6 +41,12 @@ test("img src is present after insert attachments", async () => {
expect(result2).toContain(`src="i am a data"`);
});
+test("remove attachments with particular hash", async () => {
+ const tiptap = new Tiptap(IMG_CONTENT);
+ const result = tiptap.removeAttachments(["d3eab72e94e3cd35"]);
+ expect(result).not.toContain(`d3eab72e94e3cd35`);
+});
+
const HTMLS = {
tables: `| Goal | To introduce various features of the app to the user and to convert a user on trial or basic plan to upgrade. |
| Frequency | 1/week or 2/week |
| Types | Feature intro, upgrade promo, one time emails |
| |
Emails
Feature intro
Features:
- Web clipper on mobile
- Pin any note to notification
- Take notes from notifications
- App lock
- Importer
- Encrypted attachments
- Session history & automatic backups
- Note publishing
- Note exports
- Collapsible headers
Promos
- Trial about to end
- Trial ending (with option to request an extension)
- Try free for 14 days
One time
- End-of-month progress report
- What's coming/roadmap
- What we are working on
- Join the community
`,
tables2: `Note 8/6/22, 10:48 AM
diff --git a/packages/core/content-types/tiptap.js b/packages/core/content-types/tiptap.js
index 1f955e7b7..2d991712a 100644
--- a/packages/core/content-types/tiptap.js
+++ b/packages/core/content-types/tiptap.js
@@ -19,7 +19,19 @@ along with this program. If not, see .
import showdown from "@streetwriters/showdown";
import dataurl from "../utils/dataurl";
-import { getDummyDocument, parseHTML } from "../utils/html-parser";
+import {
+ extractFirstParagraph,
+ getDummyDocument,
+ parseHTML
+} from "../utils/html-parser";
+import { Attributes, HTMLParser, HTMLRewriter } from "../utils/html-rewriter";
+
+const ATTRIBUTES = {
+ hash: "data-hash",
+ mime: "data-mime",
+ filename: "data-filename",
+ src: "src"
+};
showdown.helper.document = getDummyDocument();
var converter = new showdown.Converter();
@@ -38,6 +50,7 @@ export class Tiptap {
}
toTXT() {
+ if (!this.document) this.document = parseHTML(this.data);
return this.document.body.innerText;
}
@@ -46,15 +59,12 @@ export class Tiptap {
}
toHeadline() {
- const paragraph = this.document.querySelector("p");
- if (!paragraph) return;
-
- return paragraph.innerText;
+ return extractFirstParagraph(this.data);
}
- isEmpty() {
- return this.toTXT().trim().length <= 0;
- }
+ // isEmpty() {
+ // return this.toTXT().trim().length <= 0;
+ // }
/**
* @returns {Boolean}
@@ -66,109 +76,140 @@ export class Tiptap {
}
async insertMedia(getData) {
- const attachmentElements = this.document.querySelectorAll("img");
- for (var i = 0; i < attachmentElements.length; ++i) {
- const attachment = attachmentElements[i];
- switch (attachment.tagName) {
- case "IMG": {
- const hash = getDatasetAttribute(attachment, "hash");
- if (!hash) continue;
+ let hashes = [];
+ new HTMLParser({
+ ontag: (name, attr) => {
+ const hash = Attributes.get(attr, ATTRIBUTES.hash);
+ if (name === "img" && hash) hashes.push(hash);
+ }
+ }).parse(this.data);
- const src = await getData(hash, {
- total: attachmentElements.length,
- current: i
- });
- if (!src) continue;
- attachment.setAttribute("src", src);
- break;
+ const images = {};
+ for (let i = 0; i < hashes.length; ++i) {
+ const hash = hashes[i];
+ const src = await getData(hash, {
+ total: hashes.length,
+ current: i
+ });
+ if (!src) continue;
+ images[hash] = src;
+ }
+
+ return new HTMLRewriter({
+ ontag: (name, attr) => {
+ const hash = Attributes.get(attr, ATTRIBUTES.hash);
+ if (name === "img" && hash) {
+ const src = images[Attributes.get(attr, ATTRIBUTES.hash)];
+ if (!src) return;
+
+ return { name, attr: Attributes.set(attr, ATTRIBUTES.src, src) };
}
}
- }
- return this.document.body.innerHTML;
+ }).transform(this.data);
}
+ /**
+ * @param {string[]} hashes
+ * @returns
+ */
removeAttachments(hashes) {
- const query = hashes.map((h) => `[data-hash="${h}"]`).join(",");
- const attachmentElements = this.document.querySelectorAll(query);
-
- for (var i = 0; i < attachmentElements.length; ++i) {
- const attachment = attachmentElements[i];
- attachment.remove();
- }
-
- return this.document.body.innerHTML;
+ return new HTMLRewriter({
+ ontag: (_name, attr) => {
+ if (hashes.includes(Attributes.get(attr, ATTRIBUTES.hash)))
+ return false;
+ }
+ }).transform(this.data);
}
async extractAttachments(store) {
- const attachments = [];
- const attachmentElements = this.document.querySelectorAll("img,span");
+ let sources = [];
+ new HTMLParser({
+ ontag: (name, attr, pos) => {
+ const hash = Attributes.get(attr, ATTRIBUTES.hash);
+ const src = Attributes.get(attr, ATTRIBUTES.src);
+ if (name === "img" && !hash && src) {
+ sources.push({
+ src,
+ id: `${pos.start}${pos.end}`
+ });
+ }
+ }
+ }).parse(this.data);
- for (var i = 0; i < attachmentElements.length; ++i) {
- const attachment = attachmentElements[i];
+ const images = {};
+ for (const image of sources) {
try {
- switch (attachment.tagName) {
- case "IMG": {
- if (!getDatasetAttribute(attachment, "hash")) {
- const src = attachment.getAttribute("src");
- if (!src) continue;
+ const { data, mime } = dataurl.toObject(image.src);
+ if (!data) continue;
+ const storeResult = await store(data, "base64");
+ if (!storeResult) continue;
- const { data, mime } = dataurl.toObject(src);
- if (!data) continue;
+ images[image.id] = { ...storeResult, mime };
+ } catch (e) {
+ console.error(e);
+ images[image.id] = false;
+ }
+ }
+
+ let attachments = [];
+ const html = new HTMLRewriter({
+ ontag: (name, attr, pos) => {
+ switch (name) {
+ case "img": {
+ const hash = Attributes.get(attr, ATTRIBUTES.hash);
+
+ if (hash) {
+ attachments.push({
+ hash
+ });
+ return {
+ name,
+ attr: Attributes.set(attr, ATTRIBUTES.src, "")
+ };
+ } else {
+ const imageData = images[`${pos.start}${pos.end}`];
+ if (!imageData) return imageData;
+
+ const { key, metadata, mime } = imageData;
+ if (!metadata.hash) return;
const type =
- getDatasetAttribute(attachment, "mime") || mime || "image/jpeg";
-
- const storeResult = await store(data, "base64");
- if (!storeResult) continue;
-
- const { key, metadata } = storeResult;
- if (!metadata.hash) continue;
-
- setDatasetAttribute(attachment, "hash", metadata.hash);
+ Attributes.get(attr, ATTRIBUTES.mime) || mime || "image/jpeg";
+ const filename =
+ Attributes.get(attr, ATTRIBUTES.filename) || metadata.hash;
attachments.push({
type,
- filename:
- getDatasetAttribute(attachment, "filename") || metadata.hash,
+ filename,
...metadata,
key
});
- } else {
- attachments.push({
- hash: getDatasetAttribute(attachment, "hash")
- });
+
+ return {
+ name,
+ attr: Attributes.set(
+ Attributes.set(attr, ATTRIBUTES.hash, metadata.hash),
+ ATTRIBUTES.src,
+ ""
+ )
+ };
}
- attachment.removeAttribute("src");
- break;
}
- default: {
- if (!getDatasetAttribute(attachment, "hash")) continue;
+ case "span": {
+ const hash = Attributes.get(attr, ATTRIBUTES.hash);
+ if (!hash) return;
attachments.push({
- hash: getDatasetAttribute(attachment, "hash")
+ hash
});
break;
}
}
- } catch (e) {
- if (e.message === "bad base-64") {
- attachment.remove();
- console.error(e);
- continue;
- }
- throw e;
}
- }
+ }).transform(this.data);
+
return {
- data: this.document.body.innerHTML,
+ data: html,
attachments
};
}
}
-
-function getDatasetAttribute(element, attribute) {
- return element.getAttribute(`data-${attribute}`);
-}
-
-function setDatasetAttribute(element, attribute, value) {
- return element.setAttribute(`data-${attribute}`, value);
-}
diff --git a/packages/core/package-lock.json b/packages/core/package-lock.json
index 5d94e6eb9..845911f81 100644
--- a/packages/core/package-lock.json
+++ b/packages/core/package-lock.json
@@ -17,7 +17,7 @@
"dayjs": "^1.11.3",
"entities": "^4.3.1",
"fflate": "^0.7.3",
- "htmlparser2": "^8.0.1",
+ "htmlparser2": "github:thecodrr/htmlparser2",
"linkedom": "^0.14.17",
"liqe": "^1.13.0",
"qclone": "^1.2.0",
@@ -5316,7 +5316,8 @@
"license": "MIT"
},
"node_modules/htmlparser2": {
- "version": "8.0.1",
+ "version": "8.1.1",
+ "resolved": "git+ssh://git@github.com/thecodrr/htmlparser2.git#fed70e52a067bd16b72f5ae71bc7020a34a2d8c9",
"funding": [
"https://github.com/fb55/htmlparser2?sponsor=1",
{
@@ -5327,9 +5328,9 @@
"license": "MIT",
"dependencies": {
"domelementtype": "^2.3.0",
- "domhandler": "^5.0.2",
+ "domhandler": "^5.0.3",
"domutils": "^3.0.1",
- "entities": "^4.3.0"
+ "entities": "^4.4.0"
}
},
"node_modules/http-proxy-agent": {
@@ -12530,12 +12531,13 @@
"dev": true
},
"htmlparser2": {
- "version": "8.0.1",
+ "version": "git+ssh://git@github.com/thecodrr/htmlparser2.git#fed70e52a067bd16b72f5ae71bc7020a34a2d8c9",
+ "from": "htmlparser2@github:thecodrr/htmlparser2",
"requires": {
"domelementtype": "^2.3.0",
- "domhandler": "^5.0.2",
+ "domhandler": "^5.0.3",
"domutils": "^3.0.1",
- "entities": "^4.3.0"
+ "entities": "^4.4.0"
}
},
"http-proxy-agent": {
@@ -13202,8 +13204,7 @@
},
"jest-pnp-resolver": {
"version": "1.2.2",
- "dev": true,
- "requires": {}
+ "dev": true
},
"jest-regex-util": {
"version": "28.0.2",
@@ -13745,8 +13746,7 @@
"dependencies": {
"ws": {
"version": "8.9.0",
- "dev": true,
- "requires": {}
+ "dev": true
}
}
},
@@ -14657,8 +14657,7 @@
}
},
"ws": {
- "version": "7.5.9",
- "requires": {}
+ "version": "7.5.9"
},
"xml-name-validator": {
"version": "4.0.0",
diff --git a/packages/core/package.json b/packages/core/package.json
index 7396ef108..750a759c6 100644
--- a/packages/core/package.json
+++ b/packages/core/package.json
@@ -42,7 +42,7 @@
"dayjs": "^1.11.3",
"entities": "^4.3.1",
"fflate": "^0.7.3",
- "htmlparser2": "^8.0.1",
+ "htmlparser2": "github:thecodrr/htmlparser2",
"linkedom": "^0.14.17",
"liqe": "^1.13.0",
"qclone": "^1.2.0",
diff --git a/packages/core/utils/html-diff.js b/packages/core/utils/html-diff.js
index 4cbc8ce44..b401450fc 100644
--- a/packages/core/utils/html-diff.js
+++ b/packages/core/utils/html-diff.js
@@ -19,7 +19,7 @@ along with this program. If not, see .
import { Parser } from "htmlparser2";
-const ALLOWED_ATTRIBUTES = ["href", "src"];
+const ALLOWED_ATTRIBUTES = ["href", "src", "data-hash"];
export function isHTMLEqual(one, two) {
if (typeof one !== "string" || typeof two !== "string") return false;
@@ -29,13 +29,22 @@ export function isHTMLEqual(one, two) {
function toDiffable(html) {
let text = "";
- const parser = new Parser({
- ontext: (data) => (text += data.trim()),
- onattribute: (name, value) => {
- if (ALLOWED_ATTRIBUTES.includes(name)) text += value.trim();
+ const parser = new Parser(
+ {
+ ontext: (data) => (text += data.trim()),
+ onopentag: (_name, attr) => {
+ for (const key of ALLOWED_ATTRIBUTES) {
+ const value = attr[key];
+ if (!value) continue;
+ text += value.trim();
+ }
+ }
+ },
+ {
+ lowerCaseTags: false,
+ parseAttributes: true
}
- });
- parser.write(html);
- parser.end();
+ );
+ parser.end(html);
return text;
}
diff --git a/packages/core/utils/html-parser.js b/packages/core/utils/html-parser.js
index 4a1d1e77f..3b5b4551a 100644
--- a/packages/core/utils/html-parser.js
+++ b/packages/core/utils/html-parser.js
@@ -18,6 +18,7 @@ along with this program. If not, see .
*/
import { decodeHTML5 } from "entities";
+import { Parser } from "htmlparser2";
export const parseHTML = (input) =>
new globalThis.DOMParser().parseFromString(
@@ -40,3 +41,31 @@ function wrapIntoHTMLDocument(input) {
return `Document Fragment${input}`;
}
+
+export function extractFirstParagraph(html) {
+ let text = "";
+ let start = false;
+ const parser = new Parser(
+ {
+ onopentag: (name) => {
+ if (name === "p") start = true;
+ },
+ onclosetag: (name) => {
+ if (name === "p") {
+ start = false;
+ parser.pause();
+ parser.reset();
+ }
+ },
+ ontext: (data) => {
+ if (start) text += data;
+ }
+ },
+ {
+ lowerCaseTags: false,
+ decodeEntities: true
+ }
+ );
+ parser.end(html);
+ return text;
+}
diff --git a/packages/core/utils/html-rewriter.js b/packages/core/utils/html-rewriter.js
new file mode 100644
index 000000000..438d2d17a
--- /dev/null
+++ b/packages/core/utils/html-rewriter.js
@@ -0,0 +1,186 @@
+/*
+This file is part of the Notesnook project (https://notesnook.com/)
+
+Copyright (C) 2022 Streetwriters (Private) Limited
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see .
+*/
+
+import { Parser, Attributes } from "htmlparser2";
+
+export class HTMLRewriter {
+ /**
+ *
+ * @param {{
+ * ontag?: (name: string, attr: string, pos: {start: number, end: number}) => false | {name: string, attr: string} | undefined
+ * }} options
+ */
+ constructor(options = {}) {
+ const { ontag } = options;
+ /**
+ * @private
+ */
+ this.transformed = "";
+
+ /** @private */
+ this.currentTag = null;
+
+ /** @private */
+ this.ignoreIndex = null;
+
+ /**
+ * @private
+ */
+ this.parser = new Parser(
+ {
+ onreset: () => {
+ this.transformed = "";
+ },
+ oncomment: () => this.write(""),
+ onopentag: (name, attr) => {
+ if (this.ignoreIndex !== null) {
+ this.ignoreIndex++;
+ return;
+ }
+
+ this.closeTag();
+
+ if (ontag) {
+ const result = ontag(name, attr, {
+ start: this.parser.startIndex,
+ end: this.parser.endIndex
+ });
+
+ if (result === false) {
+ this.ignoreIndex = 0;
+ return;
+ } else if (result) {
+ name = result.name;
+ attr = result.attr;
+ }
+ }
+
+ this.write(`<${name}`);
+ if (attr) this.write(` ${attr}`);
+ this.currentTag = name;
+ },
+ onclosetag: (name, isImplied) => {
+ if (this.ignoreIndex === 0) {
+ this.ignoreIndex = null;
+ return;
+ }
+
+ if (this.ignoreIndex !== null) {
+ this.ignoreIndex--;
+ return;
+ }
+
+ if (!isImplied) this.closeTag();
+
+ this.write(isImplied ? "/>" : `${name}>`);
+
+ if (this.currentTag) {
+ this.currentTag = null;
+ }
+ },
+ ontext: (data) => {
+ if (this.ignoreIndex !== null) {
+ return;
+ }
+
+ this.closeTag();
+
+ this.write(data);
+ }
+ },
+ {
+ recognizeSelfClosing: true,
+ xmlMode: false,
+ decodeEntities: false,
+ lowerCaseAttributeNames: false,
+ lowerCaseTags: false,
+ recognizeCDATA: false,
+ parseAttributes: false
+ }
+ );
+ }
+
+ /**
+ * @private
+ */
+ closeTag() {
+ if (this.currentTag) {
+ this.write(">");
+ this.currentTag = null;
+ }
+ }
+
+ transform(html) {
+ this.parser.end(html);
+ return this.transformed;
+ }
+
+ end() {
+ this.parser.reset();
+ }
+
+ /**
+ * @private
+ */
+ write(html) {
+ this.transformed += html;
+ }
+}
+
+export class HTMLParser {
+ /**
+ *
+ * @param {{
+ * ontag?: (name: string, attr: Record, pos: {start: number, end: number}) => void
+ * }} options
+ */
+ constructor(options = {}) {
+ const { ontag } = options;
+
+ /**
+ * @private
+ */
+ this.parser = new Parser(
+ {
+ onopentag: (name, attr) =>
+ ontag(name, attr, {
+ start: this.parser.startIndex,
+ end: this.parser.endIndex
+ })
+ },
+ {
+ recognizeSelfClosing: true,
+ xmlMode: false,
+ decodeEntities: false,
+ lowerCaseAttributeNames: false,
+ lowerCaseTags: false,
+ recognizeCDATA: false,
+ parseAttributes: false
+ }
+ );
+ }
+
+ parse(html) {
+ this.parser.end(html);
+ this.parser.reset();
+ }
+}
+
+export { Attributes };