mirror of
https://github.com/streetwriters/notesnook.git
synced 2025-12-23 23:19:40 +01:00
core: use htmlparser2 for html rewriting
This replaces DOMParser with htmlparser2 which is much, much faster. How much faster? 80%. This new implementation can parse at 50mb/s which is insane! The old one could only do 5-10mb/s We still haven't gotten rid of the DOMParser though since HTML-to-MD conversion still needs it. This will be done soon though by using `dr-sax`. This uses a custom implementation of htmlparser2 instead of the default one which is 50% faster.
This commit is contained in:
@@ -106,7 +106,7 @@ test("note should not get headline if there is no p tag", () =>
|
||||
}
|
||||
}).then(async ({ db, id }) => {
|
||||
let note = db.notes.note(id);
|
||||
expect(note.headline).toBeUndefined();
|
||||
expect(note.headline).toBe("");
|
||||
}));
|
||||
|
||||
test("note title should allow trailing space", () =>
|
||||
@@ -132,7 +132,7 @@ test("update note", () =>
|
||||
title: "I am a new title",
|
||||
content: {
|
||||
type: TEST_NOTE.content.type,
|
||||
data: "<p><br></p>"
|
||||
data: "<p><br/></p>"
|
||||
},
|
||||
pinned: true,
|
||||
favorite: true
|
||||
@@ -348,7 +348,7 @@ test("note content should not contain image base64 data after save", () =>
|
||||
const note = db.notes.note(id);
|
||||
const content = await note.content();
|
||||
expect(content).not.toContain(`src="data:image/png;`);
|
||||
expect(content).not.toContain(`src=`);
|
||||
expect(content).toContain(`src=""`);
|
||||
}));
|
||||
|
||||
test("adding a note with an invalid tag should clean the tag array", () =>
|
||||
|
||||
@@ -49,7 +49,7 @@ const notebookTest = (notebook = TEST_NOTEBOOK) =>
|
||||
var TEST_NOTE = {
|
||||
content: {
|
||||
type: "tiptap",
|
||||
data: `<p>Hello<br><span style="color:#f00">This is colorful</span></p>`
|
||||
data: `<p>Hello<br/><span style="color:#f00">This is colorful</span></p>`
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ test("img src is empty after extract attachments", async () => {
|
||||
});
|
||||
expect(result.attachments).toHaveLength(1);
|
||||
expect(result.data).not.toContain(`src="data:image/png;`);
|
||||
expect(result.data).not.toContain(`src=`);
|
||||
expect(result.data).toContain(`src=""`);
|
||||
expect(result.data).toContain(`data-hash="helloworld"`);
|
||||
});
|
||||
|
||||
@@ -41,6 +41,12 @@ test("img src is present after insert attachments", async () => {
|
||||
expect(result2).toContain(`src="i am a data"`);
|
||||
});
|
||||
|
||||
test("remove attachments with particular hash", async () => {
|
||||
const tiptap = new Tiptap(IMG_CONTENT);
|
||||
const result = tiptap.removeAttachments(["d3eab72e94e3cd35"]);
|
||||
expect(result).not.toContain(`d3eab72e94e3cd35`);
|
||||
});
|
||||
|
||||
const HTMLS = {
|
||||
tables: `<table style="border-collapse: collapse; width: 811px;" border="1"><tbody><tr><td style="width: 81.375px;">Goal</td><td style="width: 708.625px;">To introduce various features of the app to the user and to convert a user on trial or basic plan to upgrade.</td></tr><tr><td style="width: 81.375px;">Frequency</td><td style="width: 708.625px;">1/week or 2/week</td></tr><tr><td style="width: 81.375px;">Types</td><td style="width: 708.625px;">Feature intro, upgrade promo, one time emails</td></tr><tr><td style="width: 81.375px;"></td><td style="width: 708.625px;"></td></tr></tbody></table><h2>Emails</h2><h3>Feature intro</h3><p>Features:</p><ol style="list-style-type: decimal;"><li>Web clipper on mobile</li><li>Pin any note to notification</li><li>Take notes from notifications</li><li>App lock</li><li>Importer</li><li>Encrypted attachments</li><li>Session history & automatic backups</li><li>Note publishing</li><li>Note exports</li><li>Collapsible headers</li></ol><h3>Promos</h3><ol style="list-style-type: decimal;"><li>Trial about to end</li><li>Trial ending (with option to request an extension)</li><li>Try free for 14 days</li></ol><h3>One time</h3><ol style="list-style-type: decimal;"><li>End-of-month progress report</li><li>What's coming/roadmap</li><li>What we are working on</li><li>Join the community</li></ol>`,
|
||||
tables2: `<h1>Note 8/6/22, 10:48 AM</h1>
|
||||
|
||||
@@ -19,7 +19,19 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import showdown from "@streetwriters/showdown";
|
||||
import dataurl from "../utils/dataurl";
|
||||
import { getDummyDocument, parseHTML } from "../utils/html-parser";
|
||||
import {
|
||||
extractFirstParagraph,
|
||||
getDummyDocument,
|
||||
parseHTML
|
||||
} from "../utils/html-parser";
|
||||
import { Attributes, HTMLParser, HTMLRewriter } from "../utils/html-rewriter";
|
||||
|
||||
const ATTRIBUTES = {
|
||||
hash: "data-hash",
|
||||
mime: "data-mime",
|
||||
filename: "data-filename",
|
||||
src: "src"
|
||||
};
|
||||
|
||||
showdown.helper.document = getDummyDocument();
|
||||
var converter = new showdown.Converter();
|
||||
@@ -38,6 +50,7 @@ export class Tiptap {
|
||||
}
|
||||
|
||||
toTXT() {
|
||||
if (!this.document) this.document = parseHTML(this.data);
|
||||
return this.document.body.innerText;
|
||||
}
|
||||
|
||||
@@ -46,15 +59,12 @@ export class Tiptap {
|
||||
}
|
||||
|
||||
toHeadline() {
|
||||
const paragraph = this.document.querySelector("p");
|
||||
if (!paragraph) return;
|
||||
|
||||
return paragraph.innerText;
|
||||
return extractFirstParagraph(this.data);
|
||||
}
|
||||
|
||||
isEmpty() {
|
||||
return this.toTXT().trim().length <= 0;
|
||||
}
|
||||
// isEmpty() {
|
||||
// return this.toTXT().trim().length <= 0;
|
||||
// }
|
||||
|
||||
/**
|
||||
* @returns {Boolean}
|
||||
@@ -66,109 +76,140 @@ export class Tiptap {
|
||||
}
|
||||
|
||||
async insertMedia(getData) {
|
||||
const attachmentElements = this.document.querySelectorAll("img");
|
||||
for (var i = 0; i < attachmentElements.length; ++i) {
|
||||
const attachment = attachmentElements[i];
|
||||
switch (attachment.tagName) {
|
||||
case "IMG": {
|
||||
const hash = getDatasetAttribute(attachment, "hash");
|
||||
if (!hash) continue;
|
||||
let hashes = [];
|
||||
new HTMLParser({
|
||||
ontag: (name, attr) => {
|
||||
const hash = Attributes.get(attr, ATTRIBUTES.hash);
|
||||
if (name === "img" && hash) hashes.push(hash);
|
||||
}
|
||||
}).parse(this.data);
|
||||
|
||||
const src = await getData(hash, {
|
||||
total: attachmentElements.length,
|
||||
current: i
|
||||
});
|
||||
if (!src) continue;
|
||||
attachment.setAttribute("src", src);
|
||||
break;
|
||||
const images = {};
|
||||
for (let i = 0; i < hashes.length; ++i) {
|
||||
const hash = hashes[i];
|
||||
const src = await getData(hash, {
|
||||
total: hashes.length,
|
||||
current: i
|
||||
});
|
||||
if (!src) continue;
|
||||
images[hash] = src;
|
||||
}
|
||||
|
||||
return new HTMLRewriter({
|
||||
ontag: (name, attr) => {
|
||||
const hash = Attributes.get(attr, ATTRIBUTES.hash);
|
||||
if (name === "img" && hash) {
|
||||
const src = images[Attributes.get(attr, ATTRIBUTES.hash)];
|
||||
if (!src) return;
|
||||
|
||||
return { name, attr: Attributes.set(attr, ATTRIBUTES.src, src) };
|
||||
}
|
||||
}
|
||||
}
|
||||
return this.document.body.innerHTML;
|
||||
}).transform(this.data);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string[]} hashes
|
||||
* @returns
|
||||
*/
|
||||
removeAttachments(hashes) {
|
||||
const query = hashes.map((h) => `[data-hash="${h}"]`).join(",");
|
||||
const attachmentElements = this.document.querySelectorAll(query);
|
||||
|
||||
for (var i = 0; i < attachmentElements.length; ++i) {
|
||||
const attachment = attachmentElements[i];
|
||||
attachment.remove();
|
||||
}
|
||||
|
||||
return this.document.body.innerHTML;
|
||||
return new HTMLRewriter({
|
||||
ontag: (_name, attr) => {
|
||||
if (hashes.includes(Attributes.get(attr, ATTRIBUTES.hash)))
|
||||
return false;
|
||||
}
|
||||
}).transform(this.data);
|
||||
}
|
||||
|
||||
async extractAttachments(store) {
|
||||
const attachments = [];
|
||||
const attachmentElements = this.document.querySelectorAll("img,span");
|
||||
let sources = [];
|
||||
new HTMLParser({
|
||||
ontag: (name, attr, pos) => {
|
||||
const hash = Attributes.get(attr, ATTRIBUTES.hash);
|
||||
const src = Attributes.get(attr, ATTRIBUTES.src);
|
||||
if (name === "img" && !hash && src) {
|
||||
sources.push({
|
||||
src,
|
||||
id: `${pos.start}${pos.end}`
|
||||
});
|
||||
}
|
||||
}
|
||||
}).parse(this.data);
|
||||
|
||||
for (var i = 0; i < attachmentElements.length; ++i) {
|
||||
const attachment = attachmentElements[i];
|
||||
const images = {};
|
||||
for (const image of sources) {
|
||||
try {
|
||||
switch (attachment.tagName) {
|
||||
case "IMG": {
|
||||
if (!getDatasetAttribute(attachment, "hash")) {
|
||||
const src = attachment.getAttribute("src");
|
||||
if (!src) continue;
|
||||
const { data, mime } = dataurl.toObject(image.src);
|
||||
if (!data) continue;
|
||||
const storeResult = await store(data, "base64");
|
||||
if (!storeResult) continue;
|
||||
|
||||
const { data, mime } = dataurl.toObject(src);
|
||||
if (!data) continue;
|
||||
images[image.id] = { ...storeResult, mime };
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
images[image.id] = false;
|
||||
}
|
||||
}
|
||||
|
||||
let attachments = [];
|
||||
const html = new HTMLRewriter({
|
||||
ontag: (name, attr, pos) => {
|
||||
switch (name) {
|
||||
case "img": {
|
||||
const hash = Attributes.get(attr, ATTRIBUTES.hash);
|
||||
|
||||
if (hash) {
|
||||
attachments.push({
|
||||
hash
|
||||
});
|
||||
return {
|
||||
name,
|
||||
attr: Attributes.set(attr, ATTRIBUTES.src, "")
|
||||
};
|
||||
} else {
|
||||
const imageData = images[`${pos.start}${pos.end}`];
|
||||
if (!imageData) return imageData;
|
||||
|
||||
const { key, metadata, mime } = imageData;
|
||||
if (!metadata.hash) return;
|
||||
|
||||
const type =
|
||||
getDatasetAttribute(attachment, "mime") || mime || "image/jpeg";
|
||||
|
||||
const storeResult = await store(data, "base64");
|
||||
if (!storeResult) continue;
|
||||
|
||||
const { key, metadata } = storeResult;
|
||||
if (!metadata.hash) continue;
|
||||
|
||||
setDatasetAttribute(attachment, "hash", metadata.hash);
|
||||
Attributes.get(attr, ATTRIBUTES.mime) || mime || "image/jpeg";
|
||||
const filename =
|
||||
Attributes.get(attr, ATTRIBUTES.filename) || metadata.hash;
|
||||
|
||||
attachments.push({
|
||||
type,
|
||||
filename:
|
||||
getDatasetAttribute(attachment, "filename") || metadata.hash,
|
||||
filename,
|
||||
...metadata,
|
||||
key
|
||||
});
|
||||
} else {
|
||||
attachments.push({
|
||||
hash: getDatasetAttribute(attachment, "hash")
|
||||
});
|
||||
|
||||
return {
|
||||
name,
|
||||
attr: Attributes.set(
|
||||
Attributes.set(attr, ATTRIBUTES.hash, metadata.hash),
|
||||
ATTRIBUTES.src,
|
||||
""
|
||||
)
|
||||
};
|
||||
}
|
||||
attachment.removeAttribute("src");
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
if (!getDatasetAttribute(attachment, "hash")) continue;
|
||||
case "span": {
|
||||
const hash = Attributes.get(attr, ATTRIBUTES.hash);
|
||||
if (!hash) return;
|
||||
attachments.push({
|
||||
hash: getDatasetAttribute(attachment, "hash")
|
||||
hash
|
||||
});
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
if (e.message === "bad base-64") {
|
||||
attachment.remove();
|
||||
console.error(e);
|
||||
continue;
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}).transform(this.data);
|
||||
|
||||
return {
|
||||
data: this.document.body.innerHTML,
|
||||
data: html,
|
||||
attachments
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
function getDatasetAttribute(element, attribute) {
|
||||
return element.getAttribute(`data-${attribute}`);
|
||||
}
|
||||
|
||||
function setDatasetAttribute(element, attribute, value) {
|
||||
return element.setAttribute(`data-${attribute}`, value);
|
||||
}
|
||||
|
||||
25
packages/core/package-lock.json
generated
25
packages/core/package-lock.json
generated
@@ -17,7 +17,7 @@
|
||||
"dayjs": "^1.11.3",
|
||||
"entities": "^4.3.1",
|
||||
"fflate": "^0.7.3",
|
||||
"htmlparser2": "^8.0.1",
|
||||
"htmlparser2": "github:thecodrr/htmlparser2",
|
||||
"linkedom": "^0.14.17",
|
||||
"liqe": "^1.13.0",
|
||||
"qclone": "^1.2.0",
|
||||
@@ -5316,7 +5316,8 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/htmlparser2": {
|
||||
"version": "8.0.1",
|
||||
"version": "8.1.1",
|
||||
"resolved": "git+ssh://git@github.com/thecodrr/htmlparser2.git#fed70e52a067bd16b72f5ae71bc7020a34a2d8c9",
|
||||
"funding": [
|
||||
"https://github.com/fb55/htmlparser2?sponsor=1",
|
||||
{
|
||||
@@ -5327,9 +5328,9 @@
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"domelementtype": "^2.3.0",
|
||||
"domhandler": "^5.0.2",
|
||||
"domhandler": "^5.0.3",
|
||||
"domutils": "^3.0.1",
|
||||
"entities": "^4.3.0"
|
||||
"entities": "^4.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/http-proxy-agent": {
|
||||
@@ -12530,12 +12531,13 @@
|
||||
"dev": true
|
||||
},
|
||||
"htmlparser2": {
|
||||
"version": "8.0.1",
|
||||
"version": "git+ssh://git@github.com/thecodrr/htmlparser2.git#fed70e52a067bd16b72f5ae71bc7020a34a2d8c9",
|
||||
"from": "htmlparser2@github:thecodrr/htmlparser2",
|
||||
"requires": {
|
||||
"domelementtype": "^2.3.0",
|
||||
"domhandler": "^5.0.2",
|
||||
"domhandler": "^5.0.3",
|
||||
"domutils": "^3.0.1",
|
||||
"entities": "^4.3.0"
|
||||
"entities": "^4.4.0"
|
||||
}
|
||||
},
|
||||
"http-proxy-agent": {
|
||||
@@ -13202,8 +13204,7 @@
|
||||
},
|
||||
"jest-pnp-resolver": {
|
||||
"version": "1.2.2",
|
||||
"dev": true,
|
||||
"requires": {}
|
||||
"dev": true
|
||||
},
|
||||
"jest-regex-util": {
|
||||
"version": "28.0.2",
|
||||
@@ -13745,8 +13746,7 @@
|
||||
"dependencies": {
|
||||
"ws": {
|
||||
"version": "8.9.0",
|
||||
"dev": true,
|
||||
"requires": {}
|
||||
"dev": true
|
||||
}
|
||||
}
|
||||
},
|
||||
@@ -14657,8 +14657,7 @@
|
||||
}
|
||||
},
|
||||
"ws": {
|
||||
"version": "7.5.9",
|
||||
"requires": {}
|
||||
"version": "7.5.9"
|
||||
},
|
||||
"xml-name-validator": {
|
||||
"version": "4.0.0",
|
||||
|
||||
@@ -42,7 +42,7 @@
|
||||
"dayjs": "^1.11.3",
|
||||
"entities": "^4.3.1",
|
||||
"fflate": "^0.7.3",
|
||||
"htmlparser2": "^8.0.1",
|
||||
"htmlparser2": "github:thecodrr/htmlparser2",
|
||||
"linkedom": "^0.14.17",
|
||||
"liqe": "^1.13.0",
|
||||
"qclone": "^1.2.0",
|
||||
|
||||
@@ -19,7 +19,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import { Parser } from "htmlparser2";
|
||||
|
||||
const ALLOWED_ATTRIBUTES = ["href", "src"];
|
||||
const ALLOWED_ATTRIBUTES = ["href", "src", "data-hash"];
|
||||
|
||||
export function isHTMLEqual(one, two) {
|
||||
if (typeof one !== "string" || typeof two !== "string") return false;
|
||||
@@ -29,13 +29,22 @@ export function isHTMLEqual(one, two) {
|
||||
|
||||
function toDiffable(html) {
|
||||
let text = "";
|
||||
const parser = new Parser({
|
||||
ontext: (data) => (text += data.trim()),
|
||||
onattribute: (name, value) => {
|
||||
if (ALLOWED_ATTRIBUTES.includes(name)) text += value.trim();
|
||||
const parser = new Parser(
|
||||
{
|
||||
ontext: (data) => (text += data.trim()),
|
||||
onopentag: (_name, attr) => {
|
||||
for (const key of ALLOWED_ATTRIBUTES) {
|
||||
const value = attr[key];
|
||||
if (!value) continue;
|
||||
text += value.trim();
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
lowerCaseTags: false,
|
||||
parseAttributes: true
|
||||
}
|
||||
});
|
||||
parser.write(html);
|
||||
parser.end();
|
||||
);
|
||||
parser.end(html);
|
||||
return text;
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import { decodeHTML5 } from "entities";
|
||||
import { Parser } from "htmlparser2";
|
||||
|
||||
export const parseHTML = (input) =>
|
||||
new globalThis.DOMParser().parseFromString(
|
||||
@@ -40,3 +41,31 @@ function wrapIntoHTMLDocument(input) {
|
||||
|
||||
return `<!doctype html><html lang="en"><head><title>Document Fragment</title></head><body>${input}</body></html>`;
|
||||
}
|
||||
|
||||
export function extractFirstParagraph(html) {
|
||||
let text = "";
|
||||
let start = false;
|
||||
const parser = new Parser(
|
||||
{
|
||||
onopentag: (name) => {
|
||||
if (name === "p") start = true;
|
||||
},
|
||||
onclosetag: (name) => {
|
||||
if (name === "p") {
|
||||
start = false;
|
||||
parser.pause();
|
||||
parser.reset();
|
||||
}
|
||||
},
|
||||
ontext: (data) => {
|
||||
if (start) text += data;
|
||||
}
|
||||
},
|
||||
{
|
||||
lowerCaseTags: false,
|
||||
decodeEntities: true
|
||||
}
|
||||
);
|
||||
parser.end(html);
|
||||
return text;
|
||||
}
|
||||
|
||||
186
packages/core/utils/html-rewriter.js
Normal file
186
packages/core/utils/html-rewriter.js
Normal file
@@ -0,0 +1,186 @@
|
||||
/*
|
||||
This file is part of the Notesnook project (https://notesnook.com/)
|
||||
|
||||
Copyright (C) 2022 Streetwriters (Private) Limited
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import { Parser, Attributes } from "htmlparser2";
|
||||
|
||||
export class HTMLRewriter {
|
||||
/**
|
||||
*
|
||||
* @param {{
|
||||
* ontag?: (name: string, attr: string, pos: {start: number, end: number}) => false | {name: string, attr: string} | undefined
|
||||
* }} options
|
||||
*/
|
||||
constructor(options = {}) {
|
||||
const { ontag } = options;
|
||||
/**
|
||||
* @private
|
||||
*/
|
||||
this.transformed = "";
|
||||
|
||||
/** @private */
|
||||
this.currentTag = null;
|
||||
|
||||
/** @private */
|
||||
this.ignoreIndex = null;
|
||||
|
||||
/**
|
||||
* @private
|
||||
*/
|
||||
this.parser = new Parser(
|
||||
{
|
||||
onreset: () => {
|
||||
this.transformed = "";
|
||||
},
|
||||
oncomment: () => this.write("<!--"),
|
||||
oncommentend: () => this.write("-->"),
|
||||
onopentag: (name, attr) => {
|
||||
if (this.ignoreIndex !== null) {
|
||||
this.ignoreIndex++;
|
||||
return;
|
||||
}
|
||||
|
||||
this.closeTag();
|
||||
|
||||
if (ontag) {
|
||||
const result = ontag(name, attr, {
|
||||
start: this.parser.startIndex,
|
||||
end: this.parser.endIndex
|
||||
});
|
||||
|
||||
if (result === false) {
|
||||
this.ignoreIndex = 0;
|
||||
return;
|
||||
} else if (result) {
|
||||
name = result.name;
|
||||
attr = result.attr;
|
||||
}
|
||||
}
|
||||
|
||||
this.write(`<${name}`);
|
||||
if (attr) this.write(` ${attr}`);
|
||||
this.currentTag = name;
|
||||
},
|
||||
onclosetag: (name, isImplied) => {
|
||||
if (this.ignoreIndex === 0) {
|
||||
this.ignoreIndex = null;
|
||||
return;
|
||||
}
|
||||
|
||||
if (this.ignoreIndex !== null) {
|
||||
this.ignoreIndex--;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!isImplied) this.closeTag();
|
||||
|
||||
this.write(isImplied ? "/>" : `</${name}>`);
|
||||
|
||||
if (this.currentTag) {
|
||||
this.currentTag = null;
|
||||
}
|
||||
},
|
||||
ontext: (data) => {
|
||||
if (this.ignoreIndex !== null) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.closeTag();
|
||||
|
||||
this.write(data);
|
||||
}
|
||||
},
|
||||
{
|
||||
recognizeSelfClosing: true,
|
||||
xmlMode: false,
|
||||
decodeEntities: false,
|
||||
lowerCaseAttributeNames: false,
|
||||
lowerCaseTags: false,
|
||||
recognizeCDATA: false,
|
||||
parseAttributes: false
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @private
|
||||
*/
|
||||
closeTag() {
|
||||
if (this.currentTag) {
|
||||
this.write(">");
|
||||
this.currentTag = null;
|
||||
}
|
||||
}
|
||||
|
||||
transform(html) {
|
||||
this.parser.end(html);
|
||||
return this.transformed;
|
||||
}
|
||||
|
||||
end() {
|
||||
this.parser.reset();
|
||||
}
|
||||
|
||||
/**
|
||||
* @private
|
||||
*/
|
||||
write(html) {
|
||||
this.transformed += html;
|
||||
}
|
||||
}
|
||||
|
||||
export class HTMLParser {
|
||||
/**
|
||||
*
|
||||
* @param {{
|
||||
* ontag?: (name: string, attr: Record<string, string>, pos: {start: number, end: number}) => void
|
||||
* }} options
|
||||
*/
|
||||
constructor(options = {}) {
|
||||
const { ontag } = options;
|
||||
|
||||
/**
|
||||
* @private
|
||||
*/
|
||||
this.parser = new Parser(
|
||||
{
|
||||
onopentag: (name, attr) =>
|
||||
ontag(name, attr, {
|
||||
start: this.parser.startIndex,
|
||||
end: this.parser.endIndex
|
||||
})
|
||||
},
|
||||
{
|
||||
recognizeSelfClosing: true,
|
||||
xmlMode: false,
|
||||
decodeEntities: false,
|
||||
lowerCaseAttributeNames: false,
|
||||
lowerCaseTags: false,
|
||||
recognizeCDATA: false,
|
||||
parseAttributes: false
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
parse(html) {
|
||||
this.parser.end(html);
|
||||
this.parser.reset();
|
||||
}
|
||||
}
|
||||
|
||||
export { Attributes };
|
||||
Reference in New Issue
Block a user