core: use upstream htmlparser2 instead of in-house

This is mostly because I am not 100% certain about how stable our own
optimized version is. While it does perform a lot better, there is
significant risk that things might break in production. To avoid
that, I am replacing it with the upstream version which is much more
heavily tested.
This commit is contained in:
Abdullah Atta
2022-11-23 14:58:34 +05:00
parent b1f5ffc2ec
commit 5e6caa7273
6 changed files with 3862 additions and 1497 deletions

View File

@@ -348,7 +348,7 @@ test("note content should not contain image base64 data after save", () =>
const note = db.notes.note(id);
const content = await note.content();
expect(content).not.toContain(`src="data:image/png;`);
expect(content).toContain(`src=""`);
expect(content).not.toContain(`src=`);
}));
test("adding a note with an invalid tag should clean the tag array", () =>

View File

@@ -27,7 +27,7 @@ test("img src is empty after extract attachments", async () => {
});
expect(result.attachments).toHaveLength(1);
expect(result.data).not.toContain(`src="data:image/png;`);
expect(result.data).toContain(`src=""`);
expect(result.data).not.toContain(`src=""`);
expect(result.data).toContain(`data-hash="helloworld"`);
});

View File

@@ -20,7 +20,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
import showdown from "@streetwriters/showdown";
import dataurl from "../utils/dataurl";
import { extractFirstParagraph, getDummyDocument } from "../utils/html-parser";
import { Attributes, HTMLParser, HTMLRewriter } from "../utils/html-rewriter";
import { HTMLParser, HTMLRewriter } from "../utils/html-rewriter";
import { convert } from "html-to-text";
import { list } from "html-to-text/lib/formatter";
@@ -101,7 +101,7 @@ export class Tiptap {
let hashes = [];
new HTMLParser({
ontag: (name, attr) => {
const hash = Attributes.get(attr, ATTRIBUTES.hash);
const hash = attr[ATTRIBUTES.hash];
if (name === "img" && hash) hashes.push(hash);
}
}).parse(this.data);
@@ -119,12 +119,11 @@ export class Tiptap {
return new HTMLRewriter({
ontag: (name, attr) => {
const hash = Attributes.get(attr, ATTRIBUTES.hash);
const hash = attr[ATTRIBUTES.hash];
if (name === "img" && hash) {
const src = images[Attributes.get(attr, ATTRIBUTES.hash)];
const src = images[hash];
if (!src) return;
return { name, attr: Attributes.set(attr, ATTRIBUTES.src, src) };
attr[ATTRIBUTES.src] = src;
}
}
}).transform(this.data);
@@ -137,8 +136,7 @@ export class Tiptap {
removeAttachments(hashes) {
return new HTMLRewriter({
ontag: (_name, attr) => {
if (hashes.includes(Attributes.get(attr, ATTRIBUTES.hash)))
return false;
if (hashes.includes(attr[ATTRIBUTES.hash])) return false;
}
}).transform(this.data);
}
@@ -147,8 +145,8 @@ export class Tiptap {
let sources = [];
new HTMLParser({
ontag: (name, attr, pos) => {
const hash = Attributes.get(attr, ATTRIBUTES.hash);
const src = Attributes.get(attr, ATTRIBUTES.src);
const hash = attr[ATTRIBUTES.hash];
const src = attr[ATTRIBUTES.src];
if (name === "img" && !hash && src) {
sources.push({
src,
@@ -178,16 +176,13 @@ export class Tiptap {
ontag: (name, attr, pos) => {
switch (name) {
case "img": {
const hash = Attributes.get(attr, ATTRIBUTES.hash);
const hash = attr[ATTRIBUTES.hash];
if (hash) {
attachments.push({
hash
});
return {
name,
attr: Attributes.set(attr, ATTRIBUTES.src, "")
};
delete attr[ATTRIBUTES.src];
} else {
const imageData = images[`${pos.start}${pos.end}`];
if (!imageData) return imageData;
@@ -195,10 +190,8 @@ export class Tiptap {
const { key, metadata, mime } = imageData;
if (!metadata.hash) return;
const type =
Attributes.get(attr, ATTRIBUTES.mime) || mime || "image/jpeg";
const filename =
Attributes.get(attr, ATTRIBUTES.filename) || metadata.hash;
const type = attr[ATTRIBUTES.mime] || mime || "image/jpeg";
const filename = attr[ATTRIBUTES.filename] || metadata.hash;
attachments.push({
type,
@@ -207,18 +200,13 @@ export class Tiptap {
key
});
return {
name,
attr: Attributes.set(
Attributes.set(attr, ATTRIBUTES.hash, metadata.hash),
ATTRIBUTES.src,
""
)
};
attr[ATTRIBUTES.hash] = metadata.hash;
delete attr[ATTRIBUTES.src];
}
break;
}
case "span": {
const hash = Attributes.get(attr, ATTRIBUTES.hash);
const hash = attr[ATTRIBUTES.hash];
if (!hash) return;
attachments.push({
hash

File diff suppressed because it is too large Load Diff

View File

@@ -40,13 +40,16 @@
"async-mutex": "^0.3.2",
"dayjs": "^1.11.3",
"entities": "^4.3.1",
"htmlparser2": "github:thecodrr/htmlparser2",
"html-to-text": "github:thecodrr/node-html-to-text",
"htmlparser2": "^8.0.1",
"linkedom": "^0.14.17",
"liqe": "^1.13.0",
"qclone": "^1.2.0",
"spark-md5": "^3.0.2"
},
"overrides": {
"htmlparser2": "^8.0.1"
},
"np": {
"releaseDraft": false,
"message": "chore: bump version to %s"

View File

@@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
import { Parser, Attributes } from "htmlparser2";
import { Parser } from "htmlparser2";
export class HTMLRewriter {
/**
@@ -73,7 +73,12 @@ export class HTMLRewriter {
}
this.write(`<${name}`);
if (attr) this.write(` ${attr}`);
if (attr) {
for (const key in attr) {
if (!key) continue;
this.write(` ${key}="${attr[key]}"`);
}
}
this.currentTag = name;
},
onclosetag: (name, isImplied) => {
@@ -111,8 +116,7 @@ export class HTMLRewriter {
decodeEntities: false,
lowerCaseAttributeNames: false,
lowerCaseTags: false,
recognizeCDATA: false,
parseAttributes: false
recognizeCDATA: false
}
);
}
@@ -171,8 +175,7 @@ export class HTMLParser {
decodeEntities: false,
lowerCaseAttributeNames: false,
lowerCaseTags: false,
recognizeCDATA: false,
parseAttributes: false
recognizeCDATA: false
}
);
}
@@ -182,5 +185,3 @@ export class HTMLParser {
this.parser.reset();
}
}
export { Attributes };