diff --git a/packages/core/__tests__/notes.test.js b/packages/core/__tests__/notes.test.js index 8dbd1ce7c..10146b93d 100644 --- a/packages/core/__tests__/notes.test.js +++ b/packages/core/__tests__/notes.test.js @@ -106,7 +106,7 @@ test("note should not get headline if there is no p tag", () => } }).then(async ({ db, id }) => { let note = db.notes.note(id); - expect(note.headline).toBeUndefined(); + expect(note.headline).toBe(""); })); test("note title should allow trailing space", () => @@ -132,7 +132,7 @@ test("update note", () => title: "I am a new title", content: { type: TEST_NOTE.content.type, - data: "


" + data: "


" }, pinned: true, favorite: true @@ -348,7 +348,7 @@ test("note content should not contain image base64 data after save", () => const note = db.notes.note(id); const content = await note.content(); expect(content).not.toContain(`src="data:image/png;`); - expect(content).not.toContain(`src=`); + expect(content).toContain(`src=""`); })); test("adding a note with an invalid tag should clean the tag array", () => diff --git a/packages/core/__tests__/utils/index.js b/packages/core/__tests__/utils/index.js index a2988b591..a9c362c18 100644 --- a/packages/core/__tests__/utils/index.js +++ b/packages/core/__tests__/utils/index.js @@ -49,7 +49,7 @@ const notebookTest = (notebook = TEST_NOTEBOOK) => var TEST_NOTE = { content: { type: "tiptap", - data: `

Hello
This is colorful

` + data: `

Hello
This is colorful

` } }; diff --git a/packages/core/content-types/__tests__/tiptap.test.js b/packages/core/content-types/__tests__/tiptap.test.js index 226bef1a7..985ab22d9 100644 --- a/packages/core/content-types/__tests__/tiptap.test.js +++ b/packages/core/content-types/__tests__/tiptap.test.js @@ -27,7 +27,7 @@ test("img src is empty after extract attachments", async () => { }); expect(result.attachments).toHaveLength(1); expect(result.data).not.toContain(`src="data:image/png;`); - expect(result.data).not.toContain(`src=`); + expect(result.data).toContain(`src=""`); expect(result.data).toContain(`data-hash="helloworld"`); }); @@ -41,6 +41,12 @@ test("img src is present after insert attachments", async () => { expect(result2).toContain(`src="i am a data"`); }); +test("remove attachments with particular hash", async () => { + const tiptap = new Tiptap(IMG_CONTENT); + const result = tiptap.removeAttachments(["d3eab72e94e3cd35"]); + expect(result).not.toContain(`d3eab72e94e3cd35`); +}); + const HTMLS = { tables: `
GoalTo introduce various features of the app to the user and to convert a user on trial or basic plan to upgrade.
Frequency1/week or 2/week
TypesFeature intro, upgrade promo, one time emails

Emails

Feature intro

Features:

  1. Web clipper on mobile
  2. Pin any note to notification
  3. Take notes from notifications
  4. App lock
  5. Importer
  6. Encrypted attachments
  7. Session history & automatic backups
  8. Note publishing
  9. Note exports
  10. Collapsible headers

Promos

  1. Trial about to end
  2. Trial ending (with option to request an extension)
  3. Try free for 14 days

One time

  1. End-of-month progress report
  2. What's coming/roadmap
  3. What we are working on
  4. Join the community
`, tables2: `

Note 8/6/22, 10:48 AM

diff --git a/packages/core/content-types/tiptap.js b/packages/core/content-types/tiptap.js index 1f955e7b7..2d991712a 100644 --- a/packages/core/content-types/tiptap.js +++ b/packages/core/content-types/tiptap.js @@ -19,7 +19,19 @@ along with this program. If not, see . import showdown from "@streetwriters/showdown"; import dataurl from "../utils/dataurl"; -import { getDummyDocument, parseHTML } from "../utils/html-parser"; +import { + extractFirstParagraph, + getDummyDocument, + parseHTML +} from "../utils/html-parser"; +import { Attributes, HTMLParser, HTMLRewriter } from "../utils/html-rewriter"; + +const ATTRIBUTES = { + hash: "data-hash", + mime: "data-mime", + filename: "data-filename", + src: "src" +}; showdown.helper.document = getDummyDocument(); var converter = new showdown.Converter(); @@ -38,6 +50,7 @@ export class Tiptap { } toTXT() { + if (!this.document) this.document = parseHTML(this.data); return this.document.body.innerText; } @@ -46,15 +59,12 @@ export class Tiptap { } toHeadline() { - const paragraph = this.document.querySelector("p"); - if (!paragraph) return; - - return paragraph.innerText; + return extractFirstParagraph(this.data); } - isEmpty() { - return this.toTXT().trim().length <= 0; - } + // isEmpty() { + // return this.toTXT().trim().length <= 0; + // } /** * @returns {Boolean} @@ -66,109 +76,140 @@ export class Tiptap { } async insertMedia(getData) { - const attachmentElements = this.document.querySelectorAll("img"); - for (var i = 0; i < attachmentElements.length; ++i) { - const attachment = attachmentElements[i]; - switch (attachment.tagName) { - case "IMG": { - const hash = getDatasetAttribute(attachment, "hash"); - if (!hash) continue; + let hashes = []; + new HTMLParser({ + ontag: (name, attr) => { + const hash = Attributes.get(attr, ATTRIBUTES.hash); + if (name === "img" && hash) hashes.push(hash); + } + }).parse(this.data); - const src = await getData(hash, { - total: attachmentElements.length, - current: i - }); - if (!src) continue; - attachment.setAttribute("src", src); - break; + const images = {}; + for (let i = 0; i < hashes.length; ++i) { + const hash = hashes[i]; + const src = await getData(hash, { + total: hashes.length, + current: i + }); + if (!src) continue; + images[hash] = src; + } + + return new HTMLRewriter({ + ontag: (name, attr) => { + const hash = Attributes.get(attr, ATTRIBUTES.hash); + if (name === "img" && hash) { + const src = images[Attributes.get(attr, ATTRIBUTES.hash)]; + if (!src) return; + + return { name, attr: Attributes.set(attr, ATTRIBUTES.src, src) }; } } - } - return this.document.body.innerHTML; + }).transform(this.data); } + /** + * @param {string[]} hashes + * @returns + */ removeAttachments(hashes) { - const query = hashes.map((h) => `[data-hash="${h}"]`).join(","); - const attachmentElements = this.document.querySelectorAll(query); - - for (var i = 0; i < attachmentElements.length; ++i) { - const attachment = attachmentElements[i]; - attachment.remove(); - } - - return this.document.body.innerHTML; + return new HTMLRewriter({ + ontag: (_name, attr) => { + if (hashes.includes(Attributes.get(attr, ATTRIBUTES.hash))) + return false; + } + }).transform(this.data); } async extractAttachments(store) { - const attachments = []; - const attachmentElements = this.document.querySelectorAll("img,span"); + let sources = []; + new HTMLParser({ + ontag: (name, attr, pos) => { + const hash = Attributes.get(attr, ATTRIBUTES.hash); + const src = Attributes.get(attr, ATTRIBUTES.src); + if (name === "img" && !hash && src) { + sources.push({ + src, + id: `${pos.start}${pos.end}` + }); + } + } + }).parse(this.data); - for (var i = 0; i < attachmentElements.length; ++i) { - const attachment = attachmentElements[i]; + const images = {}; + for (const image of sources) { try { - switch (attachment.tagName) { - case "IMG": { - if (!getDatasetAttribute(attachment, "hash")) { - const src = attachment.getAttribute("src"); - if (!src) continue; + const { data, mime } = dataurl.toObject(image.src); + if (!data) continue; + const storeResult = await store(data, "base64"); + if (!storeResult) continue; - const { data, mime } = dataurl.toObject(src); - if (!data) continue; + images[image.id] = { ...storeResult, mime }; + } catch (e) { + console.error(e); + images[image.id] = false; + } + } + + let attachments = []; + const html = new HTMLRewriter({ + ontag: (name, attr, pos) => { + switch (name) { + case "img": { + const hash = Attributes.get(attr, ATTRIBUTES.hash); + + if (hash) { + attachments.push({ + hash + }); + return { + name, + attr: Attributes.set(attr, ATTRIBUTES.src, "") + }; + } else { + const imageData = images[`${pos.start}${pos.end}`]; + if (!imageData) return imageData; + + const { key, metadata, mime } = imageData; + if (!metadata.hash) return; const type = - getDatasetAttribute(attachment, "mime") || mime || "image/jpeg"; - - const storeResult = await store(data, "base64"); - if (!storeResult) continue; - - const { key, metadata } = storeResult; - if (!metadata.hash) continue; - - setDatasetAttribute(attachment, "hash", metadata.hash); + Attributes.get(attr, ATTRIBUTES.mime) || mime || "image/jpeg"; + const filename = + Attributes.get(attr, ATTRIBUTES.filename) || metadata.hash; attachments.push({ type, - filename: - getDatasetAttribute(attachment, "filename") || metadata.hash, + filename, ...metadata, key }); - } else { - attachments.push({ - hash: getDatasetAttribute(attachment, "hash") - }); + + return { + name, + attr: Attributes.set( + Attributes.set(attr, ATTRIBUTES.hash, metadata.hash), + ATTRIBUTES.src, + "" + ) + }; } - attachment.removeAttribute("src"); - break; } - default: { - if (!getDatasetAttribute(attachment, "hash")) continue; + case "span": { + const hash = Attributes.get(attr, ATTRIBUTES.hash); + if (!hash) return; attachments.push({ - hash: getDatasetAttribute(attachment, "hash") + hash }); break; } } - } catch (e) { - if (e.message === "bad base-64") { - attachment.remove(); - console.error(e); - continue; - } - throw e; } - } + }).transform(this.data); + return { - data: this.document.body.innerHTML, + data: html, attachments }; } } - -function getDatasetAttribute(element, attribute) { - return element.getAttribute(`data-${attribute}`); -} - -function setDatasetAttribute(element, attribute, value) { - return element.setAttribute(`data-${attribute}`, value); -} diff --git a/packages/core/package-lock.json b/packages/core/package-lock.json index 5d94e6eb9..845911f81 100644 --- a/packages/core/package-lock.json +++ b/packages/core/package-lock.json @@ -17,7 +17,7 @@ "dayjs": "^1.11.3", "entities": "^4.3.1", "fflate": "^0.7.3", - "htmlparser2": "^8.0.1", + "htmlparser2": "github:thecodrr/htmlparser2", "linkedom": "^0.14.17", "liqe": "^1.13.0", "qclone": "^1.2.0", @@ -5316,7 +5316,8 @@ "license": "MIT" }, "node_modules/htmlparser2": { - "version": "8.0.1", + "version": "8.1.1", + "resolved": "git+ssh://git@github.com/thecodrr/htmlparser2.git#fed70e52a067bd16b72f5ae71bc7020a34a2d8c9", "funding": [ "https://github.com/fb55/htmlparser2?sponsor=1", { @@ -5327,9 +5328,9 @@ "license": "MIT", "dependencies": { "domelementtype": "^2.3.0", - "domhandler": "^5.0.2", + "domhandler": "^5.0.3", "domutils": "^3.0.1", - "entities": "^4.3.0" + "entities": "^4.4.0" } }, "node_modules/http-proxy-agent": { @@ -12530,12 +12531,13 @@ "dev": true }, "htmlparser2": { - "version": "8.0.1", + "version": "git+ssh://git@github.com/thecodrr/htmlparser2.git#fed70e52a067bd16b72f5ae71bc7020a34a2d8c9", + "from": "htmlparser2@github:thecodrr/htmlparser2", "requires": { "domelementtype": "^2.3.0", - "domhandler": "^5.0.2", + "domhandler": "^5.0.3", "domutils": "^3.0.1", - "entities": "^4.3.0" + "entities": "^4.4.0" } }, "http-proxy-agent": { @@ -13202,8 +13204,7 @@ }, "jest-pnp-resolver": { "version": "1.2.2", - "dev": true, - "requires": {} + "dev": true }, "jest-regex-util": { "version": "28.0.2", @@ -13745,8 +13746,7 @@ "dependencies": { "ws": { "version": "8.9.0", - "dev": true, - "requires": {} + "dev": true } } }, @@ -14657,8 +14657,7 @@ } }, "ws": { - "version": "7.5.9", - "requires": {} + "version": "7.5.9" }, "xml-name-validator": { "version": "4.0.0", diff --git a/packages/core/package.json b/packages/core/package.json index 7396ef108..750a759c6 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -42,7 +42,7 @@ "dayjs": "^1.11.3", "entities": "^4.3.1", "fflate": "^0.7.3", - "htmlparser2": "^8.0.1", + "htmlparser2": "github:thecodrr/htmlparser2", "linkedom": "^0.14.17", "liqe": "^1.13.0", "qclone": "^1.2.0", diff --git a/packages/core/utils/html-diff.js b/packages/core/utils/html-diff.js index 4cbc8ce44..b401450fc 100644 --- a/packages/core/utils/html-diff.js +++ b/packages/core/utils/html-diff.js @@ -19,7 +19,7 @@ along with this program. If not, see . import { Parser } from "htmlparser2"; -const ALLOWED_ATTRIBUTES = ["href", "src"]; +const ALLOWED_ATTRIBUTES = ["href", "src", "data-hash"]; export function isHTMLEqual(one, two) { if (typeof one !== "string" || typeof two !== "string") return false; @@ -29,13 +29,22 @@ export function isHTMLEqual(one, two) { function toDiffable(html) { let text = ""; - const parser = new Parser({ - ontext: (data) => (text += data.trim()), - onattribute: (name, value) => { - if (ALLOWED_ATTRIBUTES.includes(name)) text += value.trim(); + const parser = new Parser( + { + ontext: (data) => (text += data.trim()), + onopentag: (_name, attr) => { + for (const key of ALLOWED_ATTRIBUTES) { + const value = attr[key]; + if (!value) continue; + text += value.trim(); + } + } + }, + { + lowerCaseTags: false, + parseAttributes: true } - }); - parser.write(html); - parser.end(); + ); + parser.end(html); return text; } diff --git a/packages/core/utils/html-parser.js b/packages/core/utils/html-parser.js index 4a1d1e77f..3b5b4551a 100644 --- a/packages/core/utils/html-parser.js +++ b/packages/core/utils/html-parser.js @@ -18,6 +18,7 @@ along with this program. If not, see . */ import { decodeHTML5 } from "entities"; +import { Parser } from "htmlparser2"; export const parseHTML = (input) => new globalThis.DOMParser().parseFromString( @@ -40,3 +41,31 @@ function wrapIntoHTMLDocument(input) { return `Document Fragment${input}`; } + +export function extractFirstParagraph(html) { + let text = ""; + let start = false; + const parser = new Parser( + { + onopentag: (name) => { + if (name === "p") start = true; + }, + onclosetag: (name) => { + if (name === "p") { + start = false; + parser.pause(); + parser.reset(); + } + }, + ontext: (data) => { + if (start) text += data; + } + }, + { + lowerCaseTags: false, + decodeEntities: true + } + ); + parser.end(html); + return text; +} diff --git a/packages/core/utils/html-rewriter.js b/packages/core/utils/html-rewriter.js new file mode 100644 index 000000000..438d2d17a --- /dev/null +++ b/packages/core/utils/html-rewriter.js @@ -0,0 +1,186 @@ +/* +This file is part of the Notesnook project (https://notesnook.com/) + +Copyright (C) 2022 Streetwriters (Private) Limited + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +import { Parser, Attributes } from "htmlparser2"; + +export class HTMLRewriter { + /** + * + * @param {{ + * ontag?: (name: string, attr: string, pos: {start: number, end: number}) => false | {name: string, attr: string} | undefined + * }} options + */ + constructor(options = {}) { + const { ontag } = options; + /** + * @private + */ + this.transformed = ""; + + /** @private */ + this.currentTag = null; + + /** @private */ + this.ignoreIndex = null; + + /** + * @private + */ + this.parser = new Parser( + { + onreset: () => { + this.transformed = ""; + }, + oncomment: () => this.write(""), + onopentag: (name, attr) => { + if (this.ignoreIndex !== null) { + this.ignoreIndex++; + return; + } + + this.closeTag(); + + if (ontag) { + const result = ontag(name, attr, { + start: this.parser.startIndex, + end: this.parser.endIndex + }); + + if (result === false) { + this.ignoreIndex = 0; + return; + } else if (result) { + name = result.name; + attr = result.attr; + } + } + + this.write(`<${name}`); + if (attr) this.write(` ${attr}`); + this.currentTag = name; + }, + onclosetag: (name, isImplied) => { + if (this.ignoreIndex === 0) { + this.ignoreIndex = null; + return; + } + + if (this.ignoreIndex !== null) { + this.ignoreIndex--; + return; + } + + if (!isImplied) this.closeTag(); + + this.write(isImplied ? "/>" : ``); + + if (this.currentTag) { + this.currentTag = null; + } + }, + ontext: (data) => { + if (this.ignoreIndex !== null) { + return; + } + + this.closeTag(); + + this.write(data); + } + }, + { + recognizeSelfClosing: true, + xmlMode: false, + decodeEntities: false, + lowerCaseAttributeNames: false, + lowerCaseTags: false, + recognizeCDATA: false, + parseAttributes: false + } + ); + } + + /** + * @private + */ + closeTag() { + if (this.currentTag) { + this.write(">"); + this.currentTag = null; + } + } + + transform(html) { + this.parser.end(html); + return this.transformed; + } + + end() { + this.parser.reset(); + } + + /** + * @private + */ + write(html) { + this.transformed += html; + } +} + +export class HTMLParser { + /** + * + * @param {{ + * ontag?: (name: string, attr: Record, pos: {start: number, end: number}) => void + * }} options + */ + constructor(options = {}) { + const { ontag } = options; + + /** + * @private + */ + this.parser = new Parser( + { + onopentag: (name, attr) => + ontag(name, attr, { + start: this.parser.startIndex, + end: this.parser.endIndex + }) + }, + { + recognizeSelfClosing: true, + xmlMode: false, + decodeEntities: false, + lowerCaseAttributeNames: false, + lowerCaseTags: false, + recognizeCDATA: false, + parseAttributes: false + } + ); + } + + parse(html) { + this.parser.end(html); + this.parser.reset(); + } +} + +export { Attributes };