mirror of
https://github.com/streetwriters/notesnook.git
synced 2025-12-23 15:09:33 +01:00
core: use htmlparser2 for html rewriting
This replaces DOMParser with htmlparser2 which is much, much faster. How much faster? 80%. This new implementation can parse at 50mb/s which is insane! The old one could only do 5-10mb/s We still haven't gotten rid of the DOMParser though since HTML-to-MD conversion still needs it. This will be done soon though by using `dr-sax`. This uses a custom implementation of htmlparser2 instead of the default one which is 50% faster.
This commit is contained in:
@@ -19,7 +19,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import { Parser } from "htmlparser2";
|
||||
|
||||
const ALLOWED_ATTRIBUTES = ["href", "src"];
|
||||
const ALLOWED_ATTRIBUTES = ["href", "src", "data-hash"];
|
||||
|
||||
export function isHTMLEqual(one, two) {
|
||||
if (typeof one !== "string" || typeof two !== "string") return false;
|
||||
@@ -29,13 +29,22 @@ export function isHTMLEqual(one, two) {
|
||||
|
||||
function toDiffable(html) {
|
||||
let text = "";
|
||||
const parser = new Parser({
|
||||
ontext: (data) => (text += data.trim()),
|
||||
onattribute: (name, value) => {
|
||||
if (ALLOWED_ATTRIBUTES.includes(name)) text += value.trim();
|
||||
const parser = new Parser(
|
||||
{
|
||||
ontext: (data) => (text += data.trim()),
|
||||
onopentag: (_name, attr) => {
|
||||
for (const key of ALLOWED_ATTRIBUTES) {
|
||||
const value = attr[key];
|
||||
if (!value) continue;
|
||||
text += value.trim();
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
lowerCaseTags: false,
|
||||
parseAttributes: true
|
||||
}
|
||||
});
|
||||
parser.write(html);
|
||||
parser.end();
|
||||
);
|
||||
parser.end(html);
|
||||
return text;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user