mobile: clip dynamic websites

This commit is contained in:
ammarahm-ed
2023-09-18 07:06:58 +05:00
committed by Ammar Ahmed
parent 674d8672f0
commit 788429efb7
12 changed files with 2622 additions and 112 deletions

File diff suppressed because one or more lines are too long

View File

@@ -21,6 +21,12 @@
background-color: transparent !important;
}
img {
max-width: 100% !important;
background-color: transparent !important;
height: unset !important;
}
.editor {
overflow-x: hidden;
overflow-y: scroll;

View File

@@ -167,10 +167,9 @@ export const Editor = ({ onChange, onLoad }) => {
}}
nestedScrollEnabled
javaScriptEnabled={true}
focusable={true}
setSupportMultipleWindows={false}
overScrollMode="never"
scrollEnabled={false}
scrollEnabled={Platform.OS === "ios"}
keyboardDisplayRequiresUserAction={false}
cacheMode="LOAD_DEFAULT"
cacheEnabled={true}

View File

@@ -0,0 +1,237 @@
/*
This file is part of the Notesnook project (https://notesnook.com/)
Copyright (C) 2023 Streetwriters (Private) Limited
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
import React, { useEffect } from "react";
import { createRef, useImperativeHandle, useRef, useState } from "react";
import WebView from "react-native-webview";
import { Config } from "./store";
import RNFetchBlob from "react-native-blob-util";
import { Platform } from "react-native";
export const fetchHandle = createRef();
export const HtmlLoadingWebViewAgent = React.memo(
() => {
const [source, setSource] = useState(null);
const [clipper, setClipper] = useState(null);
const loadHandler = useRef();
const htmlHandler = useRef();
const webview = useRef();
useImperativeHandle(
fetchHandle,
() => ({
processUrl: (url) => {
return new Promise((resolve) => {
setSource(url);
console.log("processing...", url);
let resolved = false;
htmlHandler.current = (html) => {
if (resolved) return;
resolved = true;
setSource(null);
resolve(html);
};
loadHandler.current = (result) => {
if (resolved) return;
if (!result) {
resolved = true;
setSource(null);
resolve(null);
return;
}
console.log("loaded event fired");
};
});
}
}),
[]
);
useEffect(() => {
const clipperPath =
Platform.OS === "ios"
? RNFetchBlob.fs.dirs.MainBundleDir +
"/extension.bundle/clipper.bundle.js"
: RNFetchBlob.fs.asset("clipper.bundle.js");
RNFetchBlob.fs.readFile(clipperPath, "utf8").then((clipper) => {
setClipper(clipper);
});
}, []);
return !source || !clipper ? null : (
<WebView
ref={webview}
onLoad={() => {
console.log("Webview is loaded");
loadHandler.current?.(true);
}}
style={{
width: 100,
height: 100,
position: "absolute",
opacity: 0,
zIndex: -1
}}
pointerEvents="none"
onMessage={(event) => {
try {
const data = JSON.parse(event.nativeEvent.data);
if (data && data.type === "html") {
console.log("message recieved page loaded");
htmlHandler.current?.(data.value);
} else {
if (data.type === "error") {
htmlHandler.current?.(null);
}
}
} catch (e) {
console.log("Error handling webview message", e);
}
}}
injectedJavaScript={`
${clipper}
window.onload = () => {
function postMessage(type, value) {
if (window.ReactNativeWebView) {
window.ReactNativeWebView.postMessage(
JSON.stringify({
type: type,
value: value
})
);
}
}
globalThis.Clipper.clipArticle(document, {
images: true,
corsProxy: false
}).then(result => {
postMessage("html", result);
}).catch(e => {
postMessage("error");
})
};`}
onError={() => {
console.log("Error loading page");
loadHandler.current?.();
}}
source={{
uri: source
}}
/>
);
},
() => true
);
HtmlLoadingWebViewAgent.displayName = "HtmlLoadingWebViewAgent";
const old = `
window.onload = () => {
// Function to convert relative URLs to absolute URLs
function fixRelativeUrls(baseUrl, elements, attribute) {
elements.forEach((element) => {
const relativeUrl = element.getAttribute(attribute);
if (relativeUrl) {
const absoluteUrl = new URL(relativeUrl, baseUrl).href;
element.setAttribute(attribute, absoluteUrl);
}
});
}
// Function to remove unnecessary attributes from elements
function removeUnnecessaryAttributes(elements) {
elements.forEach((element) => {
// Remove unnecessary attributes
const unnecessaryAttributes = ["class", "id", "style", "data-*"];
unnecessaryAttributes.forEach((attr) => element.removeAttribute(attr));
});
}
// Function to exclude specific tags
function excludeTags(elements) {
elements.forEach((element) => {
element.remove();
});
}
// Extract the HTML content and modify it
function extractAndModifyHtml() {
const baseUrl = window.location.href;
const htmlContent = document.documentElement.outerHTML;
// Exclude specific tags (e.g., styles, scripts, and others)
const tagsToExclude = [
"style",
"script",
"head",
"button",
"select",
"form",
"link",
"canvas",
"nav",
"svg",
"audio",
"video",
"iframe",
"object",
"input",
"textarea",
"footer",
"dialog"
];
const elementsToExclude = tagsToExclude
.map((tagName) => [...document.querySelectorAll(tagName)])
.flat();
excludeTags(elementsToExclude);
// Select the remaining elements after excluding specific tags
const remainingElements = [...document.querySelectorAll("*")];
// Remove unnecessary attributes from the remaining elements
removeUnnecessaryAttributes(remainingElements);
// Convert relative URLs to absolute URLs in links, images, and other attributes
const elementsToFixUrls = [
...document.querySelectorAll(
"a[href], img[src], link[href], script[src], iframe[src], form[action], object[data]"
)
];
fixRelativeUrls(baseUrl, elementsToFixUrls, "href");
fixRelativeUrls(baseUrl, elementsToFixUrls, "src");
fixRelativeUrls(baseUrl, elementsToFixUrls, "action");
fixRelativeUrls(baseUrl, elementsToFixUrls, "data");
return document.documentElement.outerHTML;
}
function postMessage(type, value) {
if (window.ReactNativeWebView) {
window.ReactNativeWebView.postMessage(
JSON.stringify({
type: type,
value: value
})
);
}
}
const html = extractAndModifyHtml();
postMessage("html", html);
};
`;

View File

@@ -56,15 +56,15 @@ import { Editor } from "./editor";
import { Search } from "./search";
import { initDatabase, useShareStore } from "./store";
import { useThemeColors } from "@notesnook/theme";
import { HtmlLoadingWebViewAgent, fetchHandle } from "./fetch-webview";
const getLinkPreview = (url) => {
return getPreviewData(url, 5000);
};
async function sanitizeHtml(site) {
try {
let html = await fetch(site);
html = await html.text();
return sanitize(html, site);
let html = await fetchHandle.current?.processUrl(site);
return html;
} catch (e) {
return "";
}
@@ -82,93 +82,6 @@ function makeHtmlFromPlainText(text) {
.replace(/(?:\r\n|\r|\n)/g, "</p><p>")}</p>`;
}
function getBaseUrl(site) {
var url = site.split("/").slice(0, 3).join("/");
return url;
}
function wrapTablesWithDiv(document) {
const tables = document.getElementsByTagName("table");
for (let table of tables) {
table.setAttribute("contenteditable", "true");
const div = document.createElement("div");
div.setAttribute("contenteditable", "false");
div.innerHTML = table.outerHTML;
div.classList.add("table-container");
table.replaceWith(div);
}
return document;
}
let elementBlacklist = [
"script",
"button",
"input",
"textarea",
"style",
"form",
"link",
"head",
"nav",
"iframe",
"canvas",
"select",
"dialog",
"footer"
];
function removeInvalidElements(document) {
let elements = document.querySelectorAll(elementBlacklist.join(","));
for (let element of elements) {
element.remove();
}
return document;
}
function replaceSrcWithAbsoluteUrls(document, baseUrl) {
let images = document.querySelectorAll("img");
for (var i = 0; i < images.length; i++) {
let img = images[i];
let url = getBaseUrl(baseUrl);
let src = img.getAttribute("src");
if (src.startsWith("/")) {
if (src.startsWith("//")) {
src = src.replace("//", "https://");
} else {
src = url + src;
}
}
if (src.startsWith("data:")) {
img.remove();
} else {
img.setAttribute("src", src);
}
}
return document;
}
function fixCodeBlocks(document) {
let elements = document.querySelectorAll("code,pre");
for (let element of elements) {
element.classList.add(".hljs");
}
return document;
}
function sanitize(html, baseUrl) {
let parser = parseHTML(html);
parser = wrapTablesWithDiv(parser);
parser = removeInvalidElements(parser);
parser = replaceSrcWithAbsoluteUrls(parser, baseUrl);
parser = fixCodeBlocks(parser);
let htmlString = parser.body.outerHTML;
htmlString = htmlString + `<hr>${makeHtmlFromUrl(baseUrl)}`;
return htmlString;
}
let defaultNote = {
title: null,
id: null,
@@ -210,6 +123,7 @@ const ShareView = ({ quicknote = false }) => {
const [mode, setMode] = useState(1);
const keyboardHeight = useRef(0);
const { width, height } = useWindowDimensions();
const [loadingPage, setLoadingPage] = useState(false);
const insets =
Platform.OS === "android"
? { top: StatusBar.currentHeight }
@@ -260,6 +174,10 @@ const ShareView = ({ quicknote = false }) => {
const loadData = useCallback(async () => {
try {
if (noteContent.current) {
onLoad();
return;
}
defaultNote.content.data = null;
setNote({ ...defaultNote });
const data = await ShareExtension.data();
@@ -310,6 +228,7 @@ const ShareView = ({ quicknote = false }) => {
}, [onLoad]);
const onLoad = useCallback(() => {
console.log("sending event...");
eSendEvent(eOnLoadNote + "shareEditor", {
id: null,
content: {
@@ -397,11 +316,13 @@ const ShareView = ({ quicknote = false }) => {
setLoading(true);
try {
if (m === 2) {
setLoadingPage(true);
let html = await sanitizeHtml(rawData.value);
noteContent.current = html;
setLoadingPage(false);
onLoad();
setNote((note) => {
note.content.data = html;
noteContent.current = html;
onLoad();
return { ...note };
});
} else {
@@ -423,6 +344,7 @@ const ShareView = ({ quicknote = false }) => {
};
const onLoadEditor = useCallback(() => {
console.log("ON LOAD");
Storage.write("shareExtensionOpened", "opened");
loadData();
}, [loadData]);
@@ -450,6 +372,8 @@ const ShareView = ({ quicknote = false }) => {
justifyContent: quicknote ? "flex-start" : "flex-end"
}}
>
<HtmlLoadingWebViewAgent />
{quicknote && !searchMode ? (
<View
style={{
@@ -716,16 +640,27 @@ const ShareView = ({ quicknote = false }) => {
<SafeAreaProvider
style={{
flex: 1,
paddingTop: 10
paddingTop: 10,
justifyContent: loadingPage ? "center" : undefined,
alignItems: loadingPage ? "center" : undefined
}}
>
{!loadingExtension && (
{!loadingExtension && !loadingPage ? (
<Editor
onLoad={onLoadEditor}
onChange={(html) => {
noteContent.current = html;
}}
/>
) : (
<>
{loadingPage ? (
<>
<ActivityIndicator />
<Text>Preparing web clip...</Text>
</>
) : null}
</>
)}
</SafeAreaProvider>
</View>

View File

@@ -90,3 +90,7 @@ export const useShareStore = create((set) => ({
set({ selectedTags });
}
}));
export const Config = {
corsProxy: appSettings?.corsProxy
};

File diff suppressed because it is too large Load Diff

View File

@@ -11,7 +11,9 @@
"main": "dist/index.js",
"devDependencies": {
"@playwright/test": "^1.27.1",
"slugify": "^1.6.5"
"slugify": "^1.6.5",
"webpack": "^5.88.2",
"webpack-cli": "^5.1.4"
},
"publishConfig": {
"access": "public"
@@ -21,7 +23,7 @@
"url": "git+https://github.com/streetwriters/notesnook.git"
},
"scripts": {
"build": "tsc",
"build": "tsc && yarn webpack -c webpack.config.js",
"test": "playwright test",
"postinstall": "patch-package"
},

View File

@@ -0,0 +1,27 @@
/*
This file is part of the Notesnook project (https://notesnook.com/)
Copyright (C) 2023 Streetwriters (Private) Limited
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
import { clipArticle } from "./index";
declare module global {
var Clipper: any;
}
global.Clipper = {
clipArticle
};

View File

@@ -44,16 +44,10 @@ const inlineOptions: InlineOptions = {
async function clipPage(
document: Document,
withStyles: boolean,
onlyVisible: boolean,
config?: Config
): Promise<string | null> {
const { body, head } = await getPage(
document,
withStyles,
config,
onlyVisible
);
const { body, head } = await getPage(document, config, onlyVisible);
if (!body || !head) return null;
const result = toDocument(head, body).documentElement.outerHTML;
return `<!doctype html>\n${result}`;
@@ -61,10 +55,9 @@ async function clipPage(
async function clipArticle(
doc: Document,
withStyles: boolean,
config?: Config
): Promise<string | null> {
const { body, head } = await getPage(doc, withStyles, config);
const { body, head } = await getPage(doc, config);
if (!body || !head) return null;
const newDoc = toDocument(head, body);
@@ -454,7 +447,6 @@ function cleanup() {
async function getPage(
document: Document,
styles: boolean,
config?: Config,
onlyVisible = false
) {
@@ -463,10 +455,10 @@ async function getPage(
fetchOptions: resolveFetchOptions(config),
inlineOptions: {
fonts: false,
images: styles,
stylesheets: styles
images: config?.images,
stylesheets: config?.styles
},
styles,
styles: config?.styles,
filter: (node) => {
return !onlyVisible || isElementInViewport(node);
}

View File

@@ -52,4 +52,6 @@ export type Options = {
export type Config = {
corsProxy?: string;
images?: boolean;
styles?: boolean;
};

View File

@@ -0,0 +1,28 @@
/*
This file is part of the Notesnook project (https://notesnook.com/)
Copyright (C) 2023 Streetwriters (Private) Limited
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
const path = require("path");
module.exports = {
entry: ["./dist/index.global.js"],
mode: "production",
output: {
filename: "clipper.bundle.js",
path: path.resolve(__dirname, "../../apps/mobile/native/ios")
}
};