clipper: implement web clipper core library

This commit is contained in:
Abdullah Atta
2022-11-17 09:50:36 +05:00
parent b65761f9da
commit 96a3d8edac
27 changed files with 5913 additions and 0 deletions

View File

@@ -20,6 +20,7 @@ const SCOPES = [
"logger", "logger",
"theme", "theme",
"core", "core",
"clipper",
"config", "config",
"ci", "ci",
"setup", "setup",

6
packages/clipper/.gitignore vendored Normal file
View File

@@ -0,0 +1,6 @@
node_modules/
/test-results/
/playwright-report/
/playwright/.cache/
/__tests__/temp
test.ts

View File

@@ -0,0 +1,197 @@
/*
This file is part of the Notesnook project (https://notesnook.com/)
Copyright (C) 2022 Streetwriters (Private) Limited
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
import { expect, test } from "@playwright/test";
import { buildSync } from "esbuild";
import path, { join } from "path";
import type { Clipper } from "../src";
import Websites from "./pages.json";
import slugify from "slugify";
import { mkdirSync, rmSync, writeFileSync } from "fs";
declare global {
interface Window {
clipper: Clipper;
}
}
test.setTimeout(0);
test.use({ bypassCSP: true });
const tempDirPath = join(__dirname, "temp");
const output = buildSync({
bundle: true,
entryPoints: [path.join(__dirname, "../src/index.ts")],
minify: false,
write: false,
globalName: "clipper"
}).outputFiles[0].text;
test.beforeAll(() => {
mkdirSync(tempDirPath, { recursive: true });
});
// test.afterAll(() => {
// rmSync(tempDirPath, { recursive: true, force: true });
// });
for (const website of Websites) {
const domain = new URL(website.url).hostname;
test(`clip ${domain} (${website.title})`, async ({ page }, info) => {
info.setTimeout(0);
await page.goto(website.url);
await page.addScriptTag({ content: output, type: "text/javascript" });
await page.waitForLoadState("networkidle");
// const originalScreenshot = await page.screenshot({
// fullPage: true,
// type: "jpeg"
// });
// expect(originalScreenshot).toMatchSnapshot({
// name: `${slugify(website.title)}.jpg`,
// maxDiffPixelRatio: 0.1
// });
const result = await page.evaluate(async () => {
const html = await window.clipper.clipPage(window.document, true, false);
if (html) {
return `\ufeff${html}`;
}
return null;
});
if (!result) throw new Error("Failed to clip page.");
const tempFilePath = join(
tempDirPath,
`${slugify(website.title.toLowerCase())}.html`
);
writeFileSync(join(tempFilePath), result);
await page.goto(`file://${tempFilePath}`);
const clippedScreenshot = await page.screenshot({
fullPage: true,
type: "jpeg"
});
expect(clippedScreenshot).toMatchSnapshot({
name: `${slugify(website.title)}.jpg`,
maxDiffPixelRatio: 0.1
});
rmSync(tempFilePath, { force: true });
});
}
for (const website of Websites) {
const domain = new URL(website.url).hostname;
test(`clip as image ${domain} (${website.title})`, async ({ page }, info) => {
info.setTimeout(0);
await page.goto(website.url);
await page.addScriptTag({ content: output, type: "text/javascript" });
await page.waitForLoadState("networkidle");
// const originalScreenshot = await page.screenshot({
// fullPage: true,
// type: "jpeg"
// });
// expect(originalScreenshot).toMatchSnapshot({
// name: `${slugify(website.title)}.jpg`,
// maxDiffPixelRatio: 0.1
// });
const result = await page.evaluate(async () => {
const data = await window.clipper.clipScreenshot(undefined, "raw");
if (data) {
return base64ArrayBuffer(await data.arrayBuffer());
}
function base64ArrayBuffer(arrayBuffer) {
let base64 = "";
const encodings =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
const bytes = new Uint8Array(arrayBuffer);
const byteLength = bytes.byteLength;
const byteRemainder = byteLength % 3;
const mainLength = byteLength - byteRemainder;
let a, b, c, d;
let chunk;
// Main loop deals with bytes in chunks of 3
for (let i = 0; i < mainLength; i = i + 3) {
// Combine the three bytes into a single integer
chunk = (bytes[i] << 16) | (bytes[i + 1] << 8) | bytes[i + 2];
// Use bitmasks to extract 6-bit segments from the triplet
a = (chunk & 16515072) >> 18; // 16515072 = (2^6 - 1) << 18
b = (chunk & 258048) >> 12; // 258048 = (2^6 - 1) << 12
c = (chunk & 4032) >> 6; // 4032 = (2^6 - 1) << 6
d = chunk & 63; // 63 = 2^6 - 1
// Convert the raw binary segments to the appropriate ASCII encoding
base64 += encodings[a] + encodings[b] + encodings[c] + encodings[d];
}
// Deal with the remaining bytes and padding
if (byteRemainder == 1) {
chunk = bytes[mainLength];
a = (chunk & 252) >> 2; // 252 = (2^6 - 1) << 2
// Set the 4 least significant bits to zero
b = (chunk & 3) << 4; // 3 = 2^2 - 1
base64 += encodings[a] + encodings[b] + "==";
} else if (byteRemainder == 2) {
chunk = (bytes[mainLength] << 8) | bytes[mainLength + 1];
a = (chunk & 64512) >> 10; // 64512 = (2^6 - 1) << 10
b = (chunk & 1008) >> 4; // 1008 = (2^6 - 1) << 4
// Set the 2 least significant bits to zero
c = (chunk & 15) << 2; // 15 = 2^4 - 1
base64 += encodings[a] + encodings[b] + encodings[c] + "=";
}
return base64;
}
return null;
});
if (!result) throw new Error("Failed to clip page.");
expect(Buffer.from(result, "base64")).toMatchSnapshot({
name: `${slugify(website.title)}-image.png`,
maxDiffPixelRatio: 0.1
});
});
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 535 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 322 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 891 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.2 MiB

View File

@@ -0,0 +1,24 @@
[
{
"title": "My Ad Center helps you control the ads you see",
"url": "https://blog.google/technology/safety-security/my-ad-center/"
},
{
"title": "How to set Linux environment variables with Ansible - Stack Overflow",
"url": "https://stackoverflow.com/questions/27733511/how-to-set-linux-environment-variables-with-ansible"
},
{
"title": "IMDb: Ratings, Reviews, and Where to Watch the Best Movies & TV Shows",
"url": "https://www.imdb.com/"
},
{
"title": "House of the Dragon (TV Series 2022 ) - IMDb",
"url": "https://www.imdb.com/title/tt11198330/?ref_=hm_fanfav_tt_i_2_pd_fp1"
},
{ "title": "Apple", "url": "https://www.apple.com/" },
{ "title": "Docgen — Github", "url": "https://github.com/thecodrr/docgen" },
{
"title": "Playwright Docs",
"url": "https://playwright.dev/docs/api/class-testoptions"
}
]

3284
packages/clipper/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,45 @@
{
"name": "@notesnook/clipper",
"version": "1.0.0",
"description": "Web clipper core used by the Notesnook Web Clipper",
"keywords": [
"web-clipper"
],
"author": "Abdullah Atta <abdullahatta@streetwriters.co>",
"homepage": "https://notesnook.com/",
"license": "GPL-3.0-or-later",
"main": "dist/index.js",
"devDependencies": {
"@playwright/test": "^1.27.1",
"@types/css": "^0.0.33",
"@types/css-tree": "^1.0.7",
"@types/dompurify": "^2.3.4",
"@types/stylis": "^4.0.2",
"esbuild": "^0.15.9",
"ts-node": "^10.9.1",
"typescript": "^4.8.2",
"slugify": "^1.6.5"
},
"publishConfig": {
"access": "public"
},
"repository": {
"type": "git",
"url": "git+https://github.com/streetwriters/notesnook.git"
},
"scripts": {
"build": "tsc",
"test": "playwright test"
},
"bugs": {
"url": "https://github.com/streetwriters/notesnook/issues"
},
"dependencies": {
"@mozilla/readability": "^0.4.2",
"css-what": "^6.1.0",
"dompurify": "^2.4.0",
"hyperapp": "^2.0.22",
"jest-environment-jsdom": "^29.0.3",
"specificity": "^0.4.1"
}
}

View File

@@ -0,0 +1,131 @@
/*
This file is part of the Notesnook project (https://notesnook.com/)
Copyright (C) 2022 Streetwriters (Private) Limited
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
import type { PlaywrightTestConfig } from "@playwright/test";
import { devices } from "@playwright/test";
/**
* Read environment variables from file.
* https://github.com/motdotla/dotenv
*/
// require('dotenv').config();
/**
* See https://playwright.dev/docs/test-configuration.
*/
const config: PlaywrightTestConfig = {
testDir: "./__tests__",
/* Maximum time one test can run for. */
timeout: 30 * 1000,
expect: {
/**
* Maximum time expect() should wait for the condition to be met.
* For example in `await expect(locator).toHaveText();`
*/
timeout: 5000
},
/* Run tests in files in parallel */
fullyParallel: true,
/* Fail the build on CI if you accidentally left test.only in the source code. */
forbidOnly: !!process.env.CI,
/* Retry on CI only */
retries: process.env.CI ? 2 : 0,
/* Opt out of parallel tests on CI. */
workers: process.env.CI ? 2 : 1,
/* Reporter to use. See https://playwright.dev/docs/test-reporters */
reporter: "list",
/* Shared settings for all the projects below. See https://playwright.dev/docs/api/class-testoptions. */
use: {
/* Maximum time each action such as `click()` can take. Defaults to 0 (no limit). */
actionTimeout: 0,
/* Base URL to use in actions like `await page.goto('/')`. */
// baseURL: 'http://localhost:3000',
/* Collect trace when retrying the failed test. See https://playwright.dev/docs/trace-viewer */
trace: "retain-on-failure"
},
/* Configure projects for major browsers */
projects: [
{
name: "chromium",
use: {
...devices["Desktop Chrome"],
viewport: {
height: 1080,
width: 1920
}
}
}
// {
// name: "firefox",
// use: {
// ...devices["Desktop Firefox"]
// }
// },
// {
// name: "webkit",
// use: {
// ...devices["Desktop Safari"]
// }
// }
/* Test against mobile viewports. */
// {
// name: 'Mobile Chrome',
// use: {
// ...devices['Pixel 5'],
// },
// },
// {
// name: 'Mobile Safari',
// use: {
// ...devices['iPhone 12'],
// },
// },
/* Test against branded browsers. */
// {
// name: 'Microsoft Edge',
// use: {
// channel: 'msedge',
// },
// },
// {
// name: 'Google Chrome',
// use: {
// channel: 'chrome',
// },
// },
]
/* Folder for test artifacts such as screenshots, videos, traces, etc. */
// outputDir: 'test-results/',
/* Run your local dev server before starting the tests */
// webServer: {
// command: 'npm run start',
// port: 3000,
// },
};
export default config;

View File

@@ -0,0 +1,315 @@
/*
This file is part of the Notesnook project (https://notesnook.com/)
Copyright (C) 2022 Streetwriters (Private) Limited
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
import { createImage, FetchOptions } from "./fetch";
import { Filter } from "./types";
import { uid } from "./utils";
const SVGElements = [
"altGlyph",
"altGlyphDef",
"altGlyphItem",
"animate",
"animateColor",
"animateMotion",
"animateTransform",
"circle",
"clipPath",
"color-profile",
"cursor",
"defs",
"desc",
"ellipse",
"feBlend",
"feColorMatrix",
"feComponentTransfer",
"feComposite",
"feConvolveMatrix",
"feDiffuseLighting",
"feDisplacementMap",
"feDistantLight",
"feFlood",
"feFuncA",
"feFuncB",
"feFuncG",
"feFuncR",
"feGaussianBlur",
"feImage",
"feMerge",
"feMergeNode",
"feMorphology",
"feOffset",
"fePointLight",
"feSpecularLighting",
"feSpotLight",
"feTile",
"feTurbulence",
"filter",
"font-face",
"font-face-format",
"font-face-name",
"font-face-src",
"font-face-uri",
"foreignObject",
"g",
"glyph",
"glyphRef",
"hkern",
"image",
"line",
"linearGradient",
"marker",
"mask",
"metadata",
"missing-glyph",
"mpath",
"path",
"pattern",
"polygon",
"polyline",
"radialGradient",
"rect",
"set",
"stop",
"svg",
"switch",
"symbol",
"text",
"textPath",
"title",
"tref",
"tspan",
"use",
"view",
"vkern"
].map((a) => a.toLowerCase());
const INVALID_ELEMENTS = ["script"].map((a) => a.toLowerCase());
type CloneProps = {
filter?: Filter;
root: boolean;
vector: boolean;
getElementStyles?: (element: HTMLElement) => CSSStyleDeclaration | undefined;
getPseudoElementStyles?: (
element: HTMLElement,
pseudoElement: string
) => CSSStyleDeclaration | undefined;
fetchOptions?: FetchOptions;
};
export async function cloneNode(node: HTMLElement, options: CloneProps) {
const { root, filter } = options;
if (!root && filter && !filter(node)) return null;
let clone = await makeNodeCopy(node, options.fetchOptions || {});
if (!clone) return null;
clone = await cloneChildren(node, clone, options);
const processed = processClone(node, clone, options);
return processed;
}
function makeNodeCopy(original: HTMLElement, options: FetchOptions) {
try {
if (original instanceof HTMLCanvasElement)
return createImage(original.toDataURL(), options);
if (original.nodeType === Node.COMMENT_NODE) return null;
if (isInvalidElement(original)) return null;
if (original.nodeType !== Node.TEXT_NODE && !isSVGElement(original)) {
const { display, width, height } = window.getComputedStyle(original);
if (display === "none" || (width === "0px" && height === "0px"))
return null;
if (isCustomElement(original)) {
const isInline = display.includes("inline");
const element = document.createElement(isInline ? "span" : "div");
for (const attribute of original.attributes) {
element.setAttribute(attribute.name, attribute.value);
}
return element;
}
}
return original.cloneNode(false) as HTMLElement;
} catch (e) {
console.error("Failed to clone element", e);
return null;
}
}
function isCustomElement(element: HTMLElement) {
if (!element || !element.tagName) return false;
return (
!SVGElements.includes(element.tagName.toLowerCase()) &&
element.tagName.includes("-")
);
}
export function isSVGElement(element: HTMLElement) {
if (!element || !element.tagName) return false;
return SVGElements.includes(element.tagName.toLowerCase());
}
function isInvalidElement(element: HTMLElement) {
if (!element || !element.tagName) return false;
return INVALID_ELEMENTS.includes(element.tagName.toLowerCase());
}
async function cloneChildren(
original: HTMLElement,
clone: HTMLElement,
options: CloneProps
) {
const children = original.childNodes;
if (children.length === 0) return clone;
await cloneChildrenInOrder(clone, children, options);
return clone;
}
async function cloneChildrenInOrder(
parent: HTMLElement,
childs: NodeListOf<ChildNode>,
options: CloneProps
) {
for (const node of childs) {
const childClone = await cloneNode(node as HTMLElement, {
...options,
root: false
});
if (childClone) parent.appendChild(childClone);
}
}
function processClone(
original: HTMLElement,
clone: HTMLElement,
options: CloneProps
) {
if (!(clone instanceof Element)) return clone;
// if (clone instanceof HTMLElement) removeAttributes(clone);
copyStyle(original, clone, options);
clonePseudoElements(original, clone, options);
copyUserInput(original, clone);
fixSvg(clone);
return clone;
}
function copyFont(source: CSSStyleDeclaration, target: CSSStyleDeclaration) {
target.font = source.font;
target.fontFamily = source.fontFamily;
target.fontFeatureSettings = source.fontFeatureSettings;
target.fontKerning = source.fontKerning;
target.fontSize = source.fontSize;
target.fontStretch = source.fontStretch;
target.fontStyle = source.fontStyle;
target.fontVariant = source.fontVariant;
target.fontVariantCaps = source.fontVariantCaps;
target.fontVariantEastAsian = source.fontVariantEastAsian;
target.fontVariantLigatures = source.fontVariantLigatures;
target.fontVariantNumeric = source.fontVariantNumeric;
target.fontVariationSettings = source.fontVariationSettings;
target.fontWeight = source.fontWeight;
}
function copyStyle(
sourceElement: HTMLElement,
targetElement: HTMLElement,
options: CloneProps
) {
const { getElementStyles } = options;
const sourceComputedStyles =
getElementStyles && getElementStyles(sourceElement);
if (!sourceComputedStyles) return;
targetElement.style.cssText = sourceComputedStyles.cssText;
if (sourceElement.tagName.toLowerCase() === "body") {
copyFont(getComputedStyle(sourceElement), targetElement.style);
}
const styles = targetElement.getAttribute("style");
if (styles) targetElement.setAttribute("style", minifyStyles(styles));
}
function clonePseudoElements(
original: HTMLElement,
clone: HTMLElement,
options: CloneProps
) {
const { getPseudoElementStyles } = options;
let hasPseudoElements = false;
const styleElement = document.createElement("style");
const className = `pseudo--${uid()}`;
for (const element of [":before", ":after"]) {
const style =
(getPseudoElementStyles && getPseudoElementStyles(original, element)) ||
getComputedStyle(original, element);
if (!style.cssText) continue;
const selector = `.${className}:${element} {
${style.cssText}
}`;
styleElement.appendChild(document.createTextNode(selector));
hasPseudoElements = true;
}
if (hasPseudoElements) {
clone.className = className;
clone.appendChild(styleElement);
}
return hasPseudoElements;
}
function copyUserInput(original: HTMLElement, clone: HTMLElement) {
if (original instanceof HTMLTextAreaElement) clone.innerHTML = original.value;
if (original instanceof HTMLInputElement)
clone.setAttribute("value", original.value);
}
function fixSvg(clone: Element) {
if (!(clone instanceof SVGElement)) return;
clone.setAttribute("xmlns", "http://www.w3.org/2000/svg");
// if (!(clone instanceof SVGRectElement)) return;
["width", "height"].forEach(function (attribute) {
const value = clone.getAttribute(attribute);
if (!value || !!clone.style.getPropertyValue(attribute)) return;
clone.style.setProperty(attribute, value);
});
}
function minifyStyles(text: string) {
return text.replace(/(:?[:;])(:? +)/gm, (_full, sep) => {
return sep;
});
}

View File

@@ -0,0 +1,176 @@
/*
This file is part of the Notesnook project (https://notesnook.com/)
Copyright (C) 2022 Streetwriters (Private) Limited
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
const from = String.fromCharCode;
function trim(value: string): string {
return value.trim();
}
function charat(value: string, index: number): number {
return value.charCodeAt(index) | 0;
}
function strlen(value: string): number {
return value.length;
}
function substr(value: string, begin: number, end: number): string {
return value.slice(begin, end);
}
function append<T>(value: T, array: T[]): T {
array.push(value);
return value;
}
let line = 1;
let column = 1;
let length = 0;
let position = 0;
let character = 0;
let characters = "";
function next(): number {
character = position < length ? charat(characters, position++) : 0;
if ((column++, character === 10)) (column = 1), line++;
return character;
}
function peek(): number {
return charat(characters, position);
}
function slice(begin: number, end: number): string {
return substr(characters, begin, end);
}
function token(type: number): number {
switch (type) {
// \0 \t \n \r \s whitespace token
case 0:
case 9:
case 10:
case 13:
case 32:
return 5;
// ! + , / > @ ~ isolate token
case 33:
case 42:
case 43:
case 44:
case 47:
case 62:
case 64:
case 126:
case 59: /* ; { } breakpoint token */
case 123:
case 125:
return 4;
// : accompanied token
case 58:
return 3;
// " ' ( [ opening delimit token
case 34:
case 39:
case 40:
case 91:
return 2;
// ) ] closing delimit token
case 41:
case 93:
return 1;
}
return 0;
}
function alloc(value: string): [] {
line = column = 1;
length = strlen((characters = value));
position = 0;
return [];
}
function dealloc<T>(value: T): T {
characters = "";
return value;
}
function delimit(type: number): string {
return trim(
slice(
position - 1,
delimiter(type === 91 ? type + 2 : type === 40 ? type + 1 : type)
)
);
}
export function tokenize(value: string): string[] {
return dealloc(tokenizer(alloc(value)));
}
function tokenizer(children: string[]): string[] {
while (next())
switch (token(character)) {
case 0:
append(identifier(position - 1), children);
break;
case 2:
append(delimit(character), children);
break;
default:
append(from(character), children);
}
return children;
}
function delimiter(type: number): number {
while (next())
switch (character) {
// ] ) " '
case type:
return position;
// " '
case 34:
case 39:
if (type !== 34 && type !== 39) delimiter(character);
break;
// (
case 40:
if (type === 41) delimiter(type);
break;
// \
case 92:
next();
break;
}
return position;
}
function identifier(index: number): string {
while (!token(peek())) next();
return slice(index, position);
}

View File

@@ -0,0 +1,248 @@
/*
This file is part of the Notesnook project (https://notesnook.com/)
Copyright (C) 2022 Streetwriters (Private) Limited
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
import { cloneNode, isSVGElement } from "./clone";
import { createImage, FetchOptions } from "./fetch";
import { resolveAll } from "./fontfaces";
import { inlineAllImages } from "./images";
import { Options } from "./types";
import { canvasToBlob, delay, escapeXhtml, height, width } from "./utils";
import { cacheStylesheets, inlineStylesheets } from "./styles";
import purify from "dompurify";
// Default impl options
const defaultOptions: Options = {
fetchOptions: {},
inlineOptions: {}
};
async function getInlinedNode(node: HTMLElement, options: Options) {
const { fonts, images, stylesheets } = options.inlineOptions || {};
console.time("inline styles");
if (stylesheets) await inlineStylesheets(options.fetchOptions || {});
console.timeEnd("inline styles");
const documentStyles = getComputedStyle(document.documentElement);
console.time("cache styles");
const styleCache = cacheStylesheets(documentStyles);
console.timeEnd("cache styles");
console.time("clone");
let clone = await cloneNode(node, {
filter: options.filter,
root: true,
vector: !options.raster,
fetchOptions: options.fetchOptions,
getElementStyles: styleCache.get,
getPseudoElementStyles: styleCache.getPseudo
});
console.timeEnd("clone");
if (!clone || clone instanceof Text) return;
console.time("embed fonts");
if (fonts) clone = await embedFonts(clone, options.fetchOptions || {});
console.timeEnd("embed fonts");
console.time("inline images");
if (images) await inlineAllImages(clone, options.fetchOptions || {});
console.timeEnd("inline images");
finalize(clone);
return clone;
}
async function toSvg(node: HTMLElement, options: Options) {
options.inlineOptions = {
fonts: true,
images: true,
stylesheets: true,
...options.inlineOptions
};
let clone = await getInlinedNode(node, options);
if (!clone) return;
clone = applyOptions(clone, options);
clone = purify.sanitize(clone, {
RETURN_DOM: true,
KEEP_CONTENT: false,
ADD_ATTR: ["style"]
});
return makeSvgDataUri(
clone,
options.width || width(node),
options.height || height(node)
);
}
function applyOptions(clone: HTMLElement, options: Options) {
if (options.backgroundColor)
clone.style.backgroundColor = options.backgroundColor;
if (options.width) clone.style.width = options.width + "px";
if (options.height) clone.style.height = options.height + "px";
if (options.style) {
const style = options.style;
Object.keys(style).forEach(function (property) {
clone.style.setProperty(property, style.getPropertyValue(property));
// clone.style[property] = style[property];
});
}
options.onCloned?.(clone);
return clone;
}
function toPixelData(node: HTMLElement, options: Options) {
options = options || {};
options.raster = true;
return draw(node, options).then(function (canvas) {
return canvas
?.getContext("2d")
?.getImageData(0, 0, width(node), height(node)).data;
});
}
function toPng(node: HTMLElement, options: Options) {
options.raster = true;
return draw(node, options).then(function (canvas) {
return canvas?.toDataURL();
});
}
function toJpeg(node: HTMLElement, options: Options) {
options.raster = true;
return draw(node, options).then(function (canvas) {
return canvas?.toDataURL("image/jpeg", options.quality || 1.0);
});
}
function toBlob(node: HTMLElement, options: Options) {
options.raster = true;
return draw(node, options).then((canvas) => canvas && canvasToBlob(canvas));
}
function toCanvas(node: HTMLElement, options: Options) {
options.raster = true;
return draw(node, options);
}
function draw(domNode: HTMLElement, options: Options) {
options = { ...defaultOptions, ...options };
return toSvg(domNode, options)
.then((uri) => (uri ? createImage(uri, options.fetchOptions || {}) : null))
.then(delay(0))
.then(function (image) {
const scale = typeof options.scale !== "number" ? 1 : options.scale;
const canvas = newCanvas(domNode, scale, options);
const ctx = canvas?.getContext("2d");
if (!ctx) return null;
// ctx.mozImageSmoothingEnabled = false;
// ctx.msImageSmoothingEnabled = false;
ctx.imageSmoothingEnabled = false;
if (image) {
ctx.scale(scale, scale);
ctx.drawImage(image, 0, 0);
}
return canvas;
});
}
function newCanvas(node: HTMLElement, scale: number, options: Options) {
const canvas = document.createElement("canvas");
canvas.width = (options.width || width(node)) * scale;
canvas.height = (options.height || height(node)) * scale;
if (options.backgroundColor) {
const ctx = canvas.getContext("2d");
if (!ctx) return null;
ctx.fillStyle = options.backgroundColor;
ctx.fillRect(0, 0, canvas.width, canvas.height);
}
return canvas;
}
function embedFonts(node: HTMLElement, options: FetchOptions) {
return resolveAll(options).then(function (cssText) {
const styleNode = document.createElement("style");
node.appendChild(styleNode);
styleNode.appendChild(document.createTextNode(cssText));
return node;
});
}
function makeSvgDataUri(node: HTMLElement, width: number, height: number) {
node.setAttribute("xmlns", "http://www.w3.org/1999/xhtml");
const xhtml = escapeXhtml(new XMLSerializer().serializeToString(node));
const foreignObject =
'<foreignObject x="0" y="0" width="100%" height="100%">' +
xhtml +
"</foreignObject>";
const svgStr =
'<svg xmlns="http://www.w3.org/2000/svg" width="' +
width +
'" height="' +
height +
'">' +
foreignObject +
"</svg>";
return "data:image/svg+xml;charset=utf-8," + svgStr;
}
export { toJpeg, toBlob, toCanvas, toPixelData, toPng, toSvg, getInlinedNode };
const VALID_ATTRIBUTES = [
"src",
"href",
"title",
"style",
"srcset",
"sizes",
"width",
"height",
"target",
"rel"
];
function finalize(root: HTMLElement) {
for (const element of root.querySelectorAll("*")) {
if (!(element instanceof HTMLElement) || isSVGElement(element)) continue;
for (const attribute of Array.from(element.attributes)) {
if (attribute.name === "class" && element.className.includes("pseudo--"))
continue;
if (!VALID_ATTRIBUTES.includes(attribute.name)) {
element.removeAttribute(attribute.name);
}
}
if (element instanceof HTMLAnchorElement) {
element.href = element.href.startsWith("http")
? element.href
: document.location.origin + element.href;
}
}
}

View File

@@ -0,0 +1,87 @@
/*
This file is part of the Notesnook project (https://notesnook.com/)
Copyright (C) 2022 Streetwriters (Private) Limited
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
export type FetchOptions = {
bypassCors?: boolean;
corsHost?: string;
noCache?: boolean;
crossOrigin?: "anonymous" | "use-credentials" | null;
};
export async function fetchResource(url: string, options: FetchOptions) {
if (!url) return null;
const response = await fetch(constructUrl(url, options));
if (!response.ok) return "";
const blob = await response.blob();
const reader = new FileReader();
reader.readAsDataURL(blob);
return new Promise<string>((resolve) => {
reader.addEventListener("loadend", () => {
if (typeof reader.result === "string") resolve(reader.result);
});
});
}
export function createImage(url: string, options: FetchOptions) {
if (url === "data:,") return Promise.resolve(null);
return new Promise<HTMLImageElement>(function (resolve, reject) {
const image = new Image();
image.crossOrigin = options.crossOrigin || null;
image.onload = function () {
resolve(image);
};
image.onerror = reject;
image.src = constructUrl(url, options);
});
}
export function reloadImage(image: HTMLImageElement, options: FetchOptions) {
if (options.corsHost && image.currentSrc.startsWith(options.corsHost))
return Promise.resolve(null);
options.noCache = true;
return new Promise<HTMLImageElement>(function (resolve, reject) {
image.crossOrigin = options.crossOrigin || null;
image.onload = function () {
resolve(image);
};
image.onerror = (e) => {
console.error("Failed to load image", image.currentSrc);
reject(e);
};
image.src = constructUrl(image.currentSrc, options);
});
}
export function constructUrl(url: string, options: FetchOptions) {
if (!url.startsWith("http")) return url;
if (options.noCache) {
// Cache bypass so we dont have CORS issues with cached images
// Source: https://developer.mozilla.org/en/docs/Web/API/XMLHttpRequest/Using_XMLHttpRequest#Bypassing_the_cache
url += (/\?/.test(url) ? "&" : "?") + Date.now();
}
if (options.bypassCors && options.corsHost) {
if (url.startsWith(options.corsHost)) return url;
url = `${options.corsHost}/${url}`;
}
return url;
}

View File

@@ -0,0 +1,79 @@
/*
This file is part of the Notesnook project (https://notesnook.com/)
Copyright (C) 2022 Streetwriters (Private) Limited
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
import { FetchOptions } from "./fetch";
import { inlineAll, shouldProcess } from "./inliner";
async function resolveAll(options: FetchOptions) {
const fonts = readAll();
const cssStrings: string[] = [];
for (const font of fonts) {
cssStrings.push(await font.resolve(options));
}
return cssStrings.join("\n");
}
function readAll() {
const cssRules = getWebFonts(document.styleSheets);
const fonts = selectWebFontRules(cssRules);
return fonts.map(newWebFont);
}
function getWebFonts(styleSheets: StyleSheetList) {
const cssRules: CSSFontFaceRule[] = [];
for (const sheet of styleSheets) {
try {
const allFonts = selectWebFontRules(Array.from(sheet.cssRules));
if (allFonts.length > 3) cssRules.push(allFonts[0]);
} catch (e) {
if (e instanceof Error) {
console.log(
"Error while reading CSS rules from " + sheet.href,
e.toString()
);
}
}
}
return cssRules;
}
function newWebFont(webFontRule: CSSFontFaceRule) {
return {
resolve: function resolve(options: FetchOptions) {
const baseUrl = (webFontRule.parentStyleSheet || {}).href || undefined;
return inlineAll(webFontRule.cssText, options, baseUrl);
},
src: function () {
return webFontRule.style.getPropertyValue("src");
}
};
}
function selectWebFontRules(cssRules: CSSRule[]): CSSFontFaceRule[] {
return cssRules
.filter(function (rule) {
return rule.type === CSSRule.FONT_FACE_RULE;
})
.filter(function (rule) {
return shouldProcess(
(rule as CSSFontFaceRule).style.getPropertyValue("src")
);
}) as CSSFontFaceRule[];
}
export { resolveAll };

View File

@@ -0,0 +1,81 @@
/*
This file is part of the Notesnook project (https://notesnook.com/)
Copyright (C) 2022 Streetwriters (Private) Limited
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
import { FetchOptions, fetchResource } from "./fetch";
import { inlineAll } from "./inliner";
import { isDataUrl } from "./utils";
async function inlineAllImages(root: HTMLElement, options: FetchOptions) {
const imageNodes = root.querySelectorAll("img");
console.log("total images", imageNodes.length);
const promises: Promise<any>[] = [];
for (let i = 0; i < imageNodes.length; ++i) {
const image = imageNodes[i];
promises.push(inlineImage(image, options));
}
const backgroundImageNodes = root.querySelectorAll(
`[style*="background-image:"],[style*="background:"]`
);
for (let i = 0; i < backgroundImageNodes.length; ++i) {
const image = backgroundImageNodes[i];
promises.push(inlineBackground(image as HTMLElement, options));
}
await Promise.all(promises).catch((e) => console.error(e));
}
export { inlineAllImages };
async function inlineImage(element: HTMLImageElement, options: FetchOptions) {
if (isDataUrl(element.currentSrc)) return Promise.resolve(null);
const dataURL = await fetchResource(
element.currentSrc || element.src,
options
);
if (!dataURL) return null;
if (dataURL === "data:,") {
element.removeAttribute("src");
return element;
}
return new Promise<HTMLImageElement | null>(function (resolve, reject) {
if (element.parentElement?.tagName === "PICTURE") {
element.parentElement?.replaceWith(element);
}
element.onload = () => resolve(element);
// for any image with invalid src(such as <img src />), just ignore it
element.onerror = (e) => reject(e);
element.src = dataURL;
element.removeAttribute("srcset");
});
}
async function inlineBackground(
backgroundNode: HTMLElement,
options: FetchOptions
) {
const background = backgroundNode.style.getPropertyValue("background-image");
if (!background) return backgroundNode;
const inlined = await inlineAll(background, options);
backgroundNode.style.setProperty("background-image", inlined);
return backgroundNode;
}

View File

@@ -0,0 +1,500 @@
/*
This file is part of the Notesnook project (https://notesnook.com/)
Copyright (C) 2022 Streetwriters (Private) Limited
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
import purify from "dompurify";
import { Readability } from "@mozilla/readability";
import { injectCss } from "./utils";
import { app, h, text } from "hyperapp";
import {
getInlinedNode,
toBlob,
toJpeg,
toPixelData,
toPng
} from "./domtoimage";
import { InlineOptions } from "./types";
import { FetchOptions } from "./fetch";
type ReadabilityEnhanced = Readability<string> & {
PRESENTATIONAL_ATTRIBUTES: string[];
};
const CLASSES = {
nodeHover: "nn-node-selection--hover",
nodeSelected: "nn-node-selection--selected",
nodeSelectionContainer: "nn-node-selection-container"
};
const BLACKLIST = [CLASSES.nodeSelected, CLASSES.nodeSelectionContainer];
const fetchOptions: FetchOptions = {
bypassCors: true,
corsHost: "https://cors.eu.org",
crossOrigin: "anonymous",
noCache: true
};
const inlineOptions: InlineOptions = {
fonts: false,
images: true,
stylesheets: true
};
async function getPage(
document: Document,
styles: boolean,
onlyVisible = false
) {
const body = await getInlinedNode(document.body, {
raster: true,
fetchOptions,
inlineOptions,
filter: (node) => {
return !onlyVisible || isElementInViewport(node);
}
});
if (!body) return {};
const head = document.createElement("head");
head.title = document.title;
return {
body,
head
};
}
function toDocument(head: HTMLElement, body: HTMLElement) {
const newHTMLDocument = document.implementation.createHTMLDocument();
newHTMLDocument.open();
newHTMLDocument.write(head.outerHTML, body.outerHTML);
newHTMLDocument.close();
// newHTMLDocument.insertBefore(documentType, newHTMLDocument.childNodes[0]);
return newHTMLDocument;
}
async function clipPage(
document: Document,
withStyles: boolean,
onlyVisible: boolean
): Promise<string | null> {
const { body, head } = await getPage(document, withStyles, onlyVisible);
if (!body || !head) return null;
const result = toDocument(head, body).documentElement.outerHTML;
return `<!doctype html>\n${result}`;
}
const cleanup = () => {
setTimeout(() => {
document.querySelectorAll(`.${CLASSES.nodeSelected}`).forEach((node) => {
if (node instanceof HTMLElement) {
node.classList.remove(CLASSES.nodeSelected);
}
});
document
.querySelectorAll(`.${CLASSES.nodeSelectionContainer}`)
.forEach((node) => node.remove());
removeHoverListeners(document);
removeClickHandlers(document);
}, 0);
};
function clipNode(
element: HTMLElement | null | undefined,
keepStyles = true
): string | null {
if (!element) return null;
return purifyBody(element.outerHTML, keepStyles).outerHTML;
}
async function clipArticle(
doc: Document,
withStyles: boolean
): Promise<string | null> {
const { body, head } = await getPage(doc, withStyles);
if (!body || !head) return null;
const newDoc = toDocument(head, body);
const readability = new Readability(newDoc);
(readability as ReadabilityEnhanced).PRESENTATIONAL_ATTRIBUTES = [
"align",
"background",
"bgcolor",
"border",
"cellpadding",
"cellspacing",
"frame",
"hspace",
"rules",
"valign",
"vspace"
];
const result = readability.parse();
return `<html>${head?.outerHTML || ""}<body>${
result?.content || ""
}</body></html>`;
}
// async function clipSimplifiedArticle(doc: Document): Promise<string | null> {
// const article = await clipArticle(doc);
// if (!article) return null;
// return purifyBody(article, false).outerHTML;
// }
function purifyBody(htmlString: string, keepStyles = true) {
return purify.sanitize(htmlString, {
RETURN_DOM: true,
KEEP_CONTENT: false,
ADD_TAGS: keepStyles ? ["use"] : [],
ADD_ATTR: keepStyles ? ["style", "class", "id"] : [],
FORBID_ATTR: !keepStyles ? ["style", "class", "id"] : [],
FORBID_TAGS: !keepStyles ? ["style"] : [],
ADD_DATA_URI_TAGS: ["style"],
CUSTOM_ELEMENT_HANDLING: {
attributeNameCheck: /notesnook/ // allow all attributes containing "baz"
}
}) as HTMLElement;
}
async function clipScreenshot<
TOutputFormat extends "jpeg" | "png" | "raw",
TOutput extends TOutputFormat extends "jpeg"
? string
: TOutputFormat extends "png"
? string
: Blob | undefined
>(
target?: HTMLElement,
output: TOutputFormat = "jpeg" as TOutputFormat
): Promise<TOutput> {
const screenshotTarget = target || document.body;
const func = output === "jpeg" ? toJpeg : output === "png" ? toPng : toBlob;
const screenshot = await func(screenshotTarget, {
quality: 1,
backgroundColor: "white",
width: document.body.scrollWidth,
height: document.body.scrollHeight,
fetchOptions,
inlineOptions: {
fonts: true,
images: true,
stylesheets: true
}
});
if (output === "jpeg" || output === "png")
return `<img width="${document.body.scrollWidth}px" height="${document.body.scrollHeight}px" src="${screenshot}" />` as TOutput;
else return screenshot as TOutput;
}
function canSelect(target: HTMLElement) {
for (const className of BLACKLIST) {
if (target.classList.contains(className) || target.closest(`.${className}`))
return false;
}
return true;
}
const onMouseOver = (event: MouseEvent) => {
const target = event.target as HTMLElement;
if (target.classList.contains(CLASSES.nodeHover) || !canSelect(target))
return;
target.classList.add(CLASSES.nodeHover);
};
const onMouseLeave = (event: MouseEvent) => {
const target = event.target as HTMLElement;
if (!target.classList.contains(CLASSES.nodeHover)) return;
target.classList.remove(CLASSES.nodeHover);
};
function registerHoverListeners(doc: Document) {
doc.body.addEventListener("mouseout", onMouseLeave);
doc.body.addEventListener("mouseover", onMouseOver);
}
function removeHoverListeners(doc: Document) {
doc.body.removeEventListener("mouseout", onMouseLeave);
doc.body.removeEventListener("mouseover", onMouseOver);
}
const onMouseClick = (event: MouseEvent) => {
event.preventDefault();
const target = event.target as HTMLElement;
if (target.classList.contains(CLASSES.nodeSelected)) {
target.classList.remove(CLASSES.nodeSelected);
return;
}
if (!canSelect(target)) return;
target.classList.add(CLASSES.nodeSelected);
// const clipData = clipNode(target, false);
// useExtensionStore.getState().setClipData({
// type: "manualSelection",
// data: clipData,
// });
};
function registerClickListeners(doc: Document) {
doc.body.addEventListener("click", onMouseClick);
}
function removeClickHandlers(doc: Document) {
doc.body.removeEventListener("click", onMouseClick);
}
function enterNodeSelectionMode(doc: Document) {
setTimeout(() => {
registerClickListeners(doc);
registerHoverListeners(doc);
}, 0);
injectStyles();
return new Promise((resolve, reject) => {
injectNodeSelectionControls(
async () => {
const selectedNodes = document.querySelectorAll(
`.${CLASSES.nodeSelected}`
);
const div = document.createElement("div");
for (const node of selectedNodes) {
node.classList.remove(CLASSES.nodeSelected);
const inlined = await getInlinedNode(node as HTMLElement, {
raster: false,
fetchOptions,
inlineOptions
});
if (!inlined) continue;
div.appendChild(inlined);
}
cleanup();
resolve(div?.outerHTML);
},
() => reject("Cancelled.")
);
});
}
export {
clipPage,
clipArticle,
cleanup,
clipNode,
clipScreenshot,
enterNodeSelectionMode
};
const mod = {
clipPage,
clipArticle,
cleanup,
clipNode,
clipScreenshot,
enterNodeSelectionMode
};
export type Clipper = typeof mod;
function injectStyles() {
const css = `.${CLASSES.nodeHover} {
border: 1px solid green;
background-color: rgb(0,0,0,0.05);
cursor: pointer;
}
.${CLASSES.nodeSelected} {
border: 2px solid green;
cursor: pointer;
}
.${CLASSES.nodeSelectionContainer} {
position: fixed;
bottom: 0px;
right: 0px;
z-index: ${Number.MAX_VALUE};
}`;
injectCss(css, "nn-clipper-styles");
}
function injectNodeSelectionControls(
onDone?: () => void,
onCancel?: () => void
) {
const controlContainer = document.createElement("div");
controlContainer.classList.add(CLASSES.nodeSelectionContainer);
setTimeout(() => {
document.body.appendChild(controlContainer);
}, 0);
app({
init: {},
view: () =>
h(
"div",
{
style: {
padding: "10px",
backgroundColor: "white",
borderRadius: "5px",
boxShadow: "0px 0px 10px 0px #00000038"
}
},
[
h("p", { style: { marginBottom: "0px", fontSize: "18px" } }, [
text("Notesnook Web Clipper")
]),
h(
"p",
{
style: {
margin: "0px",
marginBottom: "5px",
fontStyle: "italic"
}
},
[text("Click on any element to select it.")]
),
h(
"div",
{
style: {
display: "flex",
alignItems: "center"
}
},
[
h(
"button",
{
onclick: (_state) => onDone?.(),
style: { marginRight: "5px" }
},
[text("Done")]
),
h(
"button",
{
onclick: (_state) => {
cleanup();
onCancel?.();
}
},
[text("Cancel")]
)
]
)
]
),
node: controlContainer
});
}
function isElementInViewport(el: HTMLElement) {
if (
(el.nodeType === Node.TEXT_NODE || !el.getBoundingClientRect) &&
el.parentElement
)
el = el.parentElement;
const info = getElementViewportInfo(el);
return info.isInViewport;
}
type ViewportInfo = {
isInViewport: boolean;
isPartiallyInViewport: boolean;
isInsideViewport: boolean;
isAroundViewport: boolean;
isOnEdge: boolean;
isOnTopEdge: boolean;
isOnRightEdge: boolean;
isOnBottomEdge: boolean;
isOnLeftEdge: boolean;
};
function getElementViewportInfo(el: HTMLElement) {
const result: ViewportInfo = {
isInViewport: false,
isPartiallyInViewport: false,
isInsideViewport: false,
isAroundViewport: false,
isOnEdge: false,
isOnTopEdge: false,
isOnRightEdge: false,
isOnBottomEdge: false,
isOnLeftEdge: false
};
const rect = el.getBoundingClientRect();
const windowHeight =
window.innerHeight || document.documentElement.clientHeight;
const windowWidth = window.innerWidth || document.documentElement.clientWidth;
const insideX = rect.left >= 0 && rect.left + rect.width <= windowWidth;
const insideY = rect.top >= 0 && rect.top + rect.height <= windowHeight;
result.isInsideViewport = insideX && insideY;
const aroundX = rect.left < 0 && rect.left + rect.width > windowWidth;
const aroundY = rect.top < 0 && rect.top + rect.height > windowHeight;
result.isAroundViewport = aroundX && aroundY;
const onTop = rect.top < 0 && rect.top + rect.height > 0;
const onRight =
rect.left < windowWidth && rect.left + rect.width > windowWidth;
const onLeft = rect.left < 0 && rect.left + rect.width > 0;
const onBottom =
rect.top < windowHeight && rect.top + rect.height > windowHeight;
const onY = insideY || aroundY || onTop || onBottom;
const onX = insideX || aroundX || onLeft || onRight;
result.isOnTopEdge = onTop && onX;
result.isOnRightEdge = onRight && onY;
result.isOnBottomEdge = onBottom && onX;
result.isOnLeftEdge = onLeft && onY;
result.isOnEdge =
result.isOnLeftEdge ||
result.isOnRightEdge ||
result.isOnTopEdge ||
result.isOnBottomEdge;
const isInX =
insideX || aroundX || result.isOnLeftEdge || result.isOnRightEdge;
const isInY =
insideY || aroundY || result.isOnTopEdge || result.isOnBottomEdge;
result.isInViewport = isInX && isInY;
result.isPartiallyInViewport = result.isInViewport && result.isOnEdge;
return result;
}

View File

@@ -0,0 +1,71 @@
/*
This file is part of the Notesnook project (https://notesnook.com/)
Copyright (C) 2022 Streetwriters (Private) Limited
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
import { FetchOptions, fetchResource } from "./fetch";
import { isDataUrl, resolveUrl, escape } from "./utils";
const URL_REGEX = /url\(['"]?([^'"]+?)['"]?\)/g;
function shouldProcess(string: string) {
return string.search(URL_REGEX) !== -1;
}
function readUrls(string: string) {
const result = [];
let match;
while ((match = URL_REGEX.exec(string)) !== null) {
result.push(match[1]);
}
return result.filter(function (url) {
return !isDataUrl(url);
});
}
async function inline(
string: string,
url: string,
options: FetchOptions,
baseUrl?: string
) {
url = baseUrl ? resolveUrl(url, baseUrl) : url;
const dataUrl = await fetchResource(url, options);
// const dataUrl = dataAsUrl(data, mimeType(url));
return string.replace(urlAsRegex(url), "$1" + dataUrl + "$3");
}
function urlAsRegex(urlValue: string) {
return new RegExp("(url\\(['\"]?)(" + escape(urlValue) + ")(['\"]?\\))", "g");
}
async function inlineAll(
string: string,
options: FetchOptions,
baseUrl?: string
) {
if (!shouldProcess(string)) return string;
const urls = readUrls(string);
let prefix = string;
for (const url of urls) {
prefix = await inline(prefix, url, options, baseUrl);
}
return prefix;
}
export { shouldProcess, inlineAll, readUrls };

View File

@@ -0,0 +1,425 @@
/*
This file is part of the Notesnook project (https://notesnook.com/)
Copyright (C) 2022 Streetwriters (Private) Limited
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
import { constructUrl, FetchOptions } from "./fetch";
import { compare, calculate, SpecificityArray } from "specificity";
import { tokenize } from "./css-tokenizer";
import { stringify, parse, SelectorType } from "css-what";
const SHORTHANDS = [
"animation",
"background",
"border",
"border-block-end",
"border-block-start",
"border-bottom",
"border-color",
"border-image",
"border-inline-end",
"border-inline-start",
"border-left",
"border-radius",
"border-right",
"border-style",
"border-top",
"border-width",
"column-rule",
"columns",
"contain-intrinsic-size",
"flex",
"flex-flow",
"font",
"gap",
"grid",
"grid-area",
"grid-column",
"grid-row",
"grid-template",
"grid-gap",
"list-style",
"margin",
"mask",
"offset",
"outline",
"overflow",
"padding",
"place-content",
"place-items",
"place-self",
"scroll-margin",
"scroll-padding",
"text-decoration",
"text-emphasis",
"transition"
];
export async function inlineStylesheets(options: FetchOptions) {
for (const sheet of document.styleSheets) {
if (skipStyleSheet(sheet)) continue;
const node = sheet.ownerNode;
if (sheet.href && node instanceof HTMLLinkElement) {
try {
sheet.cssRules.length;
} catch (_e) {
const styleNode = await downloadStylesheet(node.href, options);
if (styleNode) node.replaceWith(styleNode);
console.error("Failed to access sheet", node.href, _e);
}
}
}
await resolveImports(options);
}
async function resolveImports(options: FetchOptions) {
let index = 0;
for (const sheet of document.styleSheets) {
if (skipStyleSheet(sheet)) continue;
for (const rule of sheet.cssRules) {
if (rule.type === CSSRule.IMPORT_RULE) {
const href = (rule as CSSImportRule).href;
const result = await downloadStylesheet(href, options);
if (result) {
if (sheet.ownerNode) sheet.ownerNode.before(result);
else document.head.appendChild(result);
sheet.deleteRule(index);
}
}
++index;
}
}
}
async function downloadStylesheet(href: string, options: FetchOptions) {
try {
const style = document.createElement("style");
const response = await fetch(constructUrl(href, options));
if (!response.ok) return false;
style.innerText = await response.text();
style.setAttribute("href", href);
console.log(href);
return style;
} catch (e) {
console.error("Failed to inline stylesheet", href);
}
}
type StyleableElement = HTMLElement | SVGElement;
type BaseStyle = {
rule: CSSStyleDeclaration;
href: URL | null;
};
type SpecifiedStyle = BaseStyle & {
specificity: SpecificityArray;
};
type PseudoElementStyle = BaseStyle & {
pseudoElement: string;
};
type CSSStyledElements = Map<StyleableElement, SpecifiedStyle[]>;
type CSSPseudoElements = Map<StyleableElement, PseudoElementStyle[]>;
export function cacheStylesheets(documentStyles: CSSStyleDeclaration) {
const styledElements: CSSStyledElements = new Map();
const styledPseudoElements: CSSPseudoElements = new Map();
for (const sheet of document.styleSheets) {
if (skipStyleSheet(sheet)) continue;
let href = sheet.href || undefined;
if (!href && sheet.ownerNode instanceof HTMLElement)
href = sheet.ownerNode.getAttribute("href") || undefined;
walkRules(
sheet.cssRules,
documentStyles,
styledElements,
styledPseudoElements,
href
);
}
return {
getPseudo(element: StyleableElement, pseudoElement: string) {
const styles = styledPseudoElements
.get(element)
?.filter((s) => s.pseudoElement.includes(pseudoElement));
if (!styles || !styles.length) return;
return getElementStyles(element, styles, documentStyles);
},
get(element: StyleableElement) {
const styles = styledElements.get(element);
if (!styles) return;
const allStyles = styles.sort((a, b) =>
compare(a.specificity, b.specificity)
);
allStyles.push({
rule: element.style,
specificity: [0, 0, 0, 0],
href: null
});
return getElementStyles(element, allStyles, documentStyles);
}
};
}
function walkRules(
cssRules: CSSRuleList,
documentStyles: CSSStyleDeclaration,
styled: CSSStyledElements,
pseudoElements: CSSPseudoElements,
href?: string
) {
for (const rule of cssRules) {
if (rule instanceof CSSStyleRule) {
if (isPseudoSelector(rule.selectorText)) {
const selectors = parsePseudoSelector(rule.selectorText);
for (const selector of selectors) {
if (!selector || !selector.selector.trim()) continue;
const elements = document.querySelectorAll(
selector.selector
) as NodeListOf<StyleableElement>;
for (const element of elements) {
if (
!(element instanceof HTMLElement) &&
!(element instanceof SVGElement)
)
continue;
const styles: PseudoElementStyle[] =
pseudoElements.get(element) || [];
pseudoElements.set(element, styles);
styles.push({
rule: rule.style,
href: getBaseUrl(href),
pseudoElement: selector.pseudoElement
});
}
}
}
const elements = document.querySelectorAll(
rule.selectorText
) as NodeListOf<StyleableElement>;
for (const element of elements) {
if (
!(element instanceof HTMLElement) &&
!(element instanceof SVGElement)
)
continue;
const parts = rule.selectorText.split(",");
const styles: SpecifiedStyle[] = styled.get(element) || [];
styled.set(element, styles);
for (const part of parts) {
try {
const specificity = calculate(part)[0];
styles.push({
specificity: specificity.specificityArray,
rule: rule.style,
href: getBaseUrl(href)
});
break;
} catch (e) {
console.error(e, href && getBaseUrl(href));
// ignore
}
}
}
} else if (
rule instanceof CSSMediaRule &&
window.matchMedia(rule.conditionText).matches
) {
walkRules(rule.cssRules, documentStyles, styled, pseudoElements, href);
} else if (
rule instanceof CSSSupportsRule &&
CSS.supports(rule.conditionText)
) {
walkRules(rule.cssRules, documentStyles, styled, pseudoElements, href);
}
}
}
function getElementStyles(
element: StyleableElement,
styles: BaseStyle[],
documentStyles: CSSStyleDeclaration
) {
const newStyles = newStyleDeclaration();
const computedStyle = lazyComputedStyle(element);
const overrides = ["display"];
for (const style of styles) {
for (const property of [...style.rule, ...SHORTHANDS]) {
let value = style.rule.getPropertyValue(property);
if (overrides.includes(property))
value = computedStyle.style.getPropertyValue(property);
if (value.trim()) {
setStyle(
newStyles,
property,
value,
(variable) => {
return (
computedStyle.style.getPropertyValue(variable) ||
documentStyles.getPropertyValue(variable)
);
},
(url) => {
console.log("resolving url", url, style.href);
if (url.startsWith("data:") || !style.href) return url;
console.log("resolving url", url, style.href.href);
if (url.startsWith("/"))
return new URL(`${style.href.origin}${url}`).href;
return new URL(`${style.href.href}${url}`).href;
},
style.rule.getPropertyPriority(property)
);
}
}
}
return newStyles;
}
function setStyle(
target: CSSStyleDeclaration,
property: string,
value: string,
get: (variable: string) => string,
resolveUrl: (variable: string) => string,
priority?: string
) {
value = resolveCssVariables(value, get);
value = resolveCssUrl(value, resolveUrl);
target.setProperty(property, value, priority);
}
function newStyleDeclaration() {
const sheet = new CSSStyleSheet();
sheet.insertRule(".dummy{}");
return (sheet.cssRules[0] as CSSStyleRule).style;
}
function lazyComputedStyle(element: StyleableElement) {
let computedStyle: CSSStyleDeclaration | undefined;
return Object.defineProperty({}, "style", {
get: () => {
if (!computedStyle) computedStyle = getComputedStyle(element);
return computedStyle;
}
}) as { style: CSSStyleDeclaration };
}
function skipStyleSheet(sheet: StyleSheet) {
return sheet.media.mediaText
.split(",")
.map((t) => t.trim())
.includes("print");
}
function resolveCssVariables(css: string, get: (variable: string) => string) {
const tokens = tokenize(css);
const finalTokens: string[] = [];
for (let i = 0; i < tokens.length; ++i) {
const token = tokens[i];
if (token === "var") {
const args = tokenize(tokens[++i].slice(1, -1));
const [variable, operator, space, ...restArgs] = args;
const value = get(variable);
if (value) {
finalTokens.push(value);
} else if (operator && restArgs.length <= 1) {
finalTokens.push(restArgs[0] || space);
} else if (operator && restArgs.length === 2) {
finalTokens.push(resolveCssVariables(restArgs.join(""), get));
}
} else if (token.startsWith("(") && token.endsWith(")")) {
finalTokens.push("(", resolveCssVariables(token.slice(1, -1), get), ")");
} else finalTokens.push(token);
}
return finalTokens.join("");
}
function resolveCssUrl(css: string, get: (url: string) => string) {
const tokens = tokenize(css);
const finalTokens: string[] = [];
for (let i = 0; i < tokens.length; ++i) {
const token = tokens[i];
if (token === "url" && !tokens[i + 1].startsWith("(data")) {
const url = tokens[++i].slice(2, -2);
const resolvedUrl = get(url);
if (resolvedUrl) {
finalTokens.push(token);
finalTokens.push('("');
finalTokens.push(resolvedUrl);
finalTokens.push('")');
}
} else finalTokens.push(token);
}
return finalTokens.join("");
}
function getBaseUrl(href?: string | null) {
if (!href) return null;
if (href.startsWith("/")) href = `${document.location.origin}${href}`;
const url = new URL(href);
const basepath = url.pathname.split("/").slice(0, -1).join("/");
return new URL(`${url.origin}${basepath}/`);
}
function isPseudoSelector(text: string) {
return (
text.includes(":before") ||
text.includes(":after") ||
text.includes("::after") ||
text.includes("::before")
);
}
function parsePseudoSelector(selector: string) {
const output = [];
const selectors = parse(selector);
for (const part of selectors) {
const pseduoElementIndex = part.findIndex(
(s) =>
(s.type === SelectorType.Pseudo ||
s.type === SelectorType.PseudoElement) &&
(s.name === "after" || s.name === "before")
);
if (pseduoElementIndex <= -1) continue;
output.push({
selector: stringify([part.slice(0, pseduoElementIndex)]),
pseudoElement: stringify([part.slice(pseduoElementIndex)])
});
}
return output;
}

View File

@@ -0,0 +1,52 @@
/*
This file is part of the Notesnook project (https://notesnook.com/)
Copyright (C) 2022 Streetwriters (Private) Limited
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
import { FetchOptions } from "./fetch";
export type ClipArea = "full-page" | "visible" | "selection" | "article";
export type ClipMode = "simplified" | "screenshot" | "complete";
// | "full"
// | "article"
// | "simple-article"
// | "full-screenshot"
// | "screenshot"
// | "manual";
export type ClipData = string;
export type Filter = (node: HTMLElement) => boolean;
export type InlineOptions = {
stylesheets?: boolean;
fonts?: boolean;
images?: boolean;
};
export type Options = {
filter?: Filter;
onCloned?: (document: HTMLElement) => void;
backgroundColor?: CSSStyleDeclaration["backgroundColor"];
width?: number;
height?: number;
style?: CSSStyleDeclaration;
quality?: number;
raster?: boolean;
scale?: number;
fetchOptions?: FetchOptions;
inlineOptions?: InlineOptions;
};

View File

@@ -0,0 +1,183 @@
/*
This file is part of the Notesnook project (https://notesnook.com/)
Copyright (C) 2022 Streetwriters (Private) Limited
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* Only WOFF and EOT mime types for fonts are 'real'
* see http://www.iana.org/assignments/media-types/media-types.xhtml
*/
const WOFF = "application/font-woff";
const JPEG = "image/jpeg";
const mimes = {
woff: WOFF,
woff2: WOFF,
ttf: "application/font-truetype",
eot: "application/vnd.ms-fontobject",
png: "image/png",
jpg: JPEG,
jpeg: JPEG,
gif: "image/gif",
tiff: "image/tiff",
svg: "image/svg+xml"
};
function parseExtension(url: string) {
const match = /\.([^./]*?)(\?|$)/g.exec(url);
if (match) return match[1];
else return "";
}
function mimeType(url: string) {
const extension = parseExtension(url).toLowerCase();
return mimes[extension as keyof typeof mimes] || "";
}
function isDataUrl(url: string) {
return url.search(/^(data:)/) !== -1;
}
function asBlob(canvas: HTMLCanvasElement) {
const binaryString = atob(canvas.toDataURL().split(",")[1]);
const length = binaryString.length;
const binaryArray = new Uint8Array(length);
for (let i = 0; i < length; i++) binaryArray[i] = binaryString.charCodeAt(i);
return new Blob([binaryArray], {
type: "image/png"
});
}
function canvasToBlob(canvas: HTMLCanvasElement) {
if (canvas.toBlob)
return new Promise<Blob | null>(function (resolve) {
canvas.toBlob(resolve);
});
return Promise.resolve(asBlob(canvas));
}
function resolveUrl(url: string, baseUrl: string) {
const doc = document.implementation.createHTMLDocument();
const base = doc.createElement("base");
doc.head.appendChild(base);
const a = doc.createElement("a");
doc.body.appendChild(a);
base.href = baseUrl;
a.href = url;
return a.href;
}
let index = 0;
function uid() {
return "u" + fourRandomChars() + index++;
function fourRandomChars() {
/* see http://stackoverflow.com/a/6248722/2519373 */
return (
"0000" + ((Math.random() * Math.pow(36, 4)) << 0).toString(36)
).slice(-4);
}
}
function dataAsUrl(content: string, type: string) {
return "data:" + type + ";base64," + content;
}
function escape(string: string) {
return string.replace(/([.*+?^${}()|[\]/\\])/g, "\\$1");
}
function delay(ms: number) {
return function <T>(arg: T) {
return new Promise<T>(function (resolve) {
setTimeout(function () {
resolve(arg);
}, ms);
});
};
}
function asArray<T>(arrayLike: ArrayLike<T>) {
const array = [];
const length = arrayLike.length;
for (let i = 0; i < length; i++) array.push(arrayLike[i]);
return array;
}
function escapeXhtml(string: string) {
return string.replace(/%/g, "%25").replace(/#/g, "%23").replace(/\n/g, "%0A");
}
function width(node: HTMLElement) {
const leftBorder = px(node, "border-left-width");
const rightBorder = px(node, "border-right-width");
return node.scrollWidth + leftBorder + rightBorder;
}
function height(node: HTMLElement) {
const topBorder = px(node, "border-top-width");
const bottomBorder = px(node, "border-bottom-width");
return node.scrollHeight + topBorder + bottomBorder;
}
function px(node: HTMLElement, styleProperty: string) {
const value = getComputedStyle(node).getPropertyValue(styleProperty);
return parseFloat(value.replace("px", ""));
}
function injectCss(rules: string, id: string) {
const variableCss = document.getElementById(id);
const head = document.getElementsByTagName("head")[0];
if (variableCss) {
head.removeChild(variableCss);
}
const css = document.createElement("style");
css.type = "text/css";
css.id = id;
css.appendChild(document.createTextNode(rules));
head.insertBefore(css, getRootStylesheet());
}
function getRootStylesheet() {
for (const sty of document.querySelectorAll("style")) {
if (sty.innerHTML.includes("#root")) {
return sty;
}
}
return null;
}
export {
injectCss,
escape,
parseExtension,
mimeType,
dataAsUrl,
isDataUrl,
canvasToBlob,
resolveUrl,
uid,
delay,
asArray,
escapeXhtml,
width,
height
};

View File

@@ -0,0 +1,8 @@
{
"extends": "../../tsconfig",
"compilerOptions": {
"outDir": "./dist",
"module": "CommonJS"
},
"include": ["src/"]
}