editor: add support for pasting as markdown

This commit is contained in:
Abdullah Atta
2025-02-04 13:39:19 +05:00
committed by Abdullah Atta
parent 49ffcbea03
commit c16bbcaad5
5 changed files with 3324 additions and 23 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -30,6 +30,7 @@
}, },
"license": "GPL-3.0-or-later", "license": "GPL-3.0-or-later",
"dependencies": { "dependencies": {
"@notesnook-importer/core": "^2.1.1",
"@notesnook/common": "file:../common", "@notesnook/common": "file:../common",
"@notesnook/intl": "file:../intl", "@notesnook/intl": "file:../intl",
"@notesnook/theme": "file:../theme", "@notesnook/theme": "file:../theme",

View File

@@ -22,7 +22,7 @@ import {
ParseOptions ParseOptions
} from "@tiptap/pm/model"; } from "@tiptap/pm/model";
import { encodeNonAsciiHTML } from "entities"; import { encodeNonAsciiHTML } from "entities";
import { Schema, Slice } from "prosemirror-model"; import { Node as PMNode, Schema, Slice } from "prosemirror-model";
import { inferLanguage } from "../code-block/index.js"; import { inferLanguage } from "../code-block/index.js";
import { hasPermission } from "../../types.js"; import { hasPermission } from "../../types.js";
@@ -47,6 +47,17 @@ export class ClipboardDOMParser extends ProsemirrorDOMParser {
} }
return super.parseSlice(dom, options); return super.parseSlice(dom, options);
} }
parse(dom: Node, options?: ParseOptions): PMNode {
if (dom instanceof HTMLElement || dom instanceof Document) {
convertGoogleDocsChecklist(dom);
formatCodeblocks(dom);
convertBrToSingleSpacedParagraphs(dom);
removeImages(dom);
removeBlockId(dom);
}
return super.parse(dom, options);
}
} }
export function removeBlockId(dom: HTMLElement | Document) { export function removeBlockId(dom: HTMLElement | Document) {

View File

@@ -21,19 +21,34 @@ import { ResolvedPos, Slice } from "@tiptap/pm/model";
import { encodeNonAsciiHTML } from "entities"; import { encodeNonAsciiHTML } from "entities";
import { ClipboardDOMParser } from "./clipboard-dom-parser.js"; import { ClipboardDOMParser } from "./clipboard-dom-parser.js";
import { EditorView } from "@tiptap/pm/view"; import { EditorView } from "@tiptap/pm/view";
import { markdowntoHTML } from "@notesnook-importer/core/dist/src/utils/to-html.js";
export function clipboardTextParser( export function clipboardTextParser(
text: string, text: string,
_$context: ResolvedPos, $context: ResolvedPos,
_plain: boolean, plain: boolean,
view: EditorView view: EditorView
): Slice { ): Slice {
if (!plain && isProbablyMarkdown(text)) {
const node = ClipboardDOMParser.fromSchema(view.state.schema).parse(
new DOMParser().parseFromString(
markdowntoHTML(text, { allowDangerousHtml: false }),
"text/html"
),
{
context: $context
}
);
return node.slice(0);
}
const doc = new DOMParser().parseFromString( const doc = new DOMParser().parseFromString(
convertTextToHTML(text), convertTextToHTML(text),
"text/html" "text/html"
); );
return ClipboardDOMParser.fromSchema(view.state.schema).parseSlice(doc, { return ClipboardDOMParser.fromSchema(view.state.schema).parseSlice(doc, {
preserveWhitespace: "full" preserveWhitespace: "full",
context: $context
}); });
} }
@@ -58,3 +73,166 @@ function encodeLine(line: string) {
}); });
return line; return line;
} }
interface MarkdownPattern {
pattern: RegExp;
score: number;
type: string;
}
const DEFINITE_PATTERNS: MarkdownPattern[] = [
{ pattern: /^[\t ]*#{1,6}\s+\S/, score: 0, type: "header" },
{ pattern: /^\s*[-*+]\s+\[[ x]\]/, score: 0, type: "task" },
{ pattern: /^\|.+\|.+\|$/, score: 0, type: "table" },
{ pattern: /^[\t ]*>\s+.+/, score: 0, type: "blockquote" },
{ pattern: /!\[[^\]]+\]\([^)\s]+(?:\s+"[^"]*")?\)/, score: 0, type: "image" }
];
const MARKDOWN_PATTERNS: MarkdownPattern[] = [
// Strong indicators
{ pattern: /^[\t ]*#{1,6}\s+\S/m, score: 3, type: "header" },
{ pattern: /^`{3}.*\n[\s\S]*?\n`{3}$/m, score: 3, type: "codeblock" },
{ pattern: /^\s*[-*+]\s+\[[ x]\]/m, score: 3, type: "tasklist" },
// Medium indicators
{ pattern: /\[[^\]]+\]\([^)\s]+(?:\s+"[^"]*")?\)/, score: 2, type: "link" },
{ pattern: /^\s*\[[^\]]+\]:\s+\S+/m, score: 2, type: "reference" },
{ pattern: /\[[^\]]+\]\[\w*\]/, score: 2, type: "referenceLink" },
{ pattern: /^[\t ]*>\s+.+/m, score: 2, type: "blockquote" },
{ pattern: /^[-*_]{3,}/m, score: 2, type: "hr" },
{ pattern: /^\|.+\|.+\|$/m, score: 2, type: "table" },
{ pattern: /^\s{0,3}[-*+]\s+\S/m, score: 2, type: "unorderedList" },
{ pattern: /^\s*\d+\.\s+\S/m, score: 2, type: "orderedList" },
{ pattern: /\$\$.+\$\$/m, score: 2, type: "math" },
{ pattern: /\{:.+\}/m, score: 2, type: "attribute" },
{ pattern: /\[:(.+?)\]/, score: 2, type: "footnote" },
// Weak indicators
{ pattern: /(?<!\*)\*\*[^*\n]+\*\*(?!\*)/m, score: 1, type: "bold" },
{ pattern: /(?<!\*)\*[^*\n]+\*(?!\*)/m, score: 1, type: "italic" },
{ pattern: /(?<!_)__[^_\n]+__(?!_)/m, score: 1, type: "boldUnderscore" },
{ pattern: /(?<!_)_[^_\n]+_(?!_)/m, score: 1, type: "italicUnderscore" },
{ pattern: /`[^`\n]+`/m, score: 1, type: "inlineCode" },
{ pattern: /~~[^~\n]+~~/m, score: 1, type: "strikethrough" },
{ pattern: /!\[[^\]]+\]\([^)\s]+(?:\s+"[^"]*")?\)/, score: 1, type: "image" }
];
const NEGATIVE_PATTERNS: MarkdownPattern[] = [
{ pattern: /<html>/, score: -5, type: "html" },
{ pattern: /<?xml/, score: -5, type: "xml" },
{ pattern: /^\s*[0-9,.]+$/, score: -3, type: "numbers" },
{
pattern: /^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$/,
score: -5,
type: "email"
},
{ pattern: /^https?:\/\/\S+$/, score: -3, type: "url" },
{ pattern: /^\s*[{[\]},;]\s*$/, score: -3, type: "punctuation" },
{ pattern: /^(Subject|From|To|Date):/, score: -4, type: "emailHeader" },
{ pattern: /^[A-Z]{2,}:/, score: -2, type: "capsHeader" },
{ pattern: /<script[\s\S]*?<\/script>/i, score: -4, type: "script" },
{ pattern: /<style[\s\S]*?<\/style>/i, score: -4, type: "style" }
];
export function isProbablyMarkdown(text: string, debug = false) {
function log(...args: any[]) {
if (debug) console.log(...args);
}
// Check definite patterns first
const definiteMatch = DEFINITE_PATTERNS.find((p) => p.pattern.test(text));
if (definiteMatch) {
log("Definite markdown match:", definiteMatch.type);
return true;
}
let score = 0;
const matches: string[] = [];
const lines = text.split("\n");
function pushMatch(match: string) {
if (debug) matches.push(match);
}
// Check positive patterns
for (const pattern of MARKDOWN_PATTERNS) {
const matchCount = getPatternMatches(text, pattern.pattern);
if (matchCount > 0) {
const patternScore = pattern.score * matchCount;
score += patternScore;
pushMatch(`+${patternScore} ${pattern.type} (${matchCount} matches)`);
}
}
// Check negative patterns
for (const pattern of NEGATIVE_PATTERNS) {
const matchCount = getPatternMatches(text, pattern.pattern);
if (matchCount > 0) {
const patternScore = pattern.score * matchCount;
score += patternScore;
pushMatch(`${patternScore} ${pattern.type} (${matchCount} matches)`);
}
}
// Structure indicators
if (lines.length > 1) {
score += 1;
pushMatch("+1 multiline");
}
if (/\n\n/.test(text)) {
score += 1;
pushMatch("+1 paragraphs");
}
if (/^\s{1,4}[^\s]/m.test(text)) {
score += 1;
pushMatch("+1 indentation");
}
// Check formatting consistency
let consistentFormatting = 0;
let prevLineIndent = -1;
for (const line of lines) {
const indent = line.search(/\S/);
if (prevLineIndent !== -1) {
if (indent === prevLineIndent || indent === prevLineIndent + 2) {
consistentFormatting++;
}
}
prevLineIndent = indent;
}
if (consistentFormatting > lines.length / 2) {
score += 2;
pushMatch("+2 consistentFormatting");
}
const threshold = text.length > 100 ? 4 : 3;
const confidence = Math.min(
100,
Math.max(0, score * (text.length > 100 ? 8 : 12))
);
if (debug) {
const result = {
score,
isLikelyMarkdown: score > threshold,
confidence,
details: {
length: text.length,
lines: lines.length,
consistentFormatting,
threshold,
matches
}
};
log("Markdown detection result:", result);
}
return score > threshold;
}
function getPatternMatches(text: string, pattern: RegExp) {
const matches = text.match(new RegExp(pattern, "gm")) || [];
return matches.length;
}

View File

@@ -18,16 +18,240 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
import { test } from "vitest"; import { test } from "vitest";
import { convertTextToHTML } from "../clipboard-text-parser.js"; import {
convertTextToHTML,
isProbablyMarkdown
} from "../clipboard-text-parser.js";
const cases = [ const textCases = [
`Hello\r\nworld`, `Hello\r\nworld`,
`What\n\n\nNO!`, `What\n\n\nNO!`,
`Hello\r\nWorld\nwhat?\nNay!` `Hello\r\nWorld\nwhat?\nNay!`
]; ];
for (const testCase of cases) { for (const testCase of textCases) {
test(`convert text to html`, (t) => { test(`convert text to html`, (t) => {
t.expect(convertTextToHTML(testCase)).toMatchSnapshot(); t.expect(convertTextToHTML(testCase)).toMatchSnapshot();
}); });
} }
const markdownCases = [
// Single-line markdown elements
{
text: "# Header",
isMarkdown: true
},
{
text: "## Second level",
isMarkdown: true
},
{
text: "#Not a header",
isMarkdown: false
},
{
text: "- [ ] Task",
isMarkdown: true
},
{
text: "- [x] Completed task",
isMarkdown: true
},
{
text: "- []Not a task",
isMarkdown: false
},
// Tables
{
text: "|Column 1|Column 2|",
isMarkdown: true
},
{
text: "| Name | Age |",
isMarkdown: true
},
{
text: "|Not|A|Table",
isMarkdown: false
},
// Blockquotes
{
text: "> Quoted text",
isMarkdown: true
},
{
text: ">Not a quote",
isMarkdown: false
},
// Multi-line content
{
text: `# Header
Some paragraph text
- List item 1
- List item 2`,
isMarkdown: true
},
{
text: `\`\`\`javascript
const x = 1;
console.log(x);
\`\`\``,
isMarkdown: true
},
// Inline formatting
{
text: "This is **bold** text with some other **bold** content and some *italic* text and some `code` text and ~~strikethough~~ is ~~awesome~~ just okey!",
isMarkdown: true
},
// Lists
{
text: `- Item 1
- Item 2`,
isMarkdown: true
},
{
text: `1. First
2. Second`,
isMarkdown: true
},
// Plain text (negative cases)
{
text: "Just plain text",
isMarkdown: false
},
{
text: "Hello world",
isMarkdown: false
},
{
text: "12345",
isMarkdown: false
},
// HTML (negative cases)
{
text: "<html><body>Test</body></html>",
isMarkdown: false
},
{
text: "<div>Content</div>",
isMarkdown: false
},
// Special cases (negative)
{
text: "test@example.com",
isMarkdown: false
},
{
text: "https://example.com",
isMarkdown: false
},
// Edge cases
{
text: "",
isMarkdown: false
},
{
text: " ",
isMarkdown: false
},
{
text: "*",
isMarkdown: false
},
{
text: "#",
isMarkdown: false
},
// Mixed content
{
text: `# Header
Regular paragraph with **bold** and *italic*.
\`\`\`
code block
\`\`\`
1. List item
2. Another item
> Blockquote`,
isMarkdown: true
},
// Ambiguous cases
{
text: "2 * 3 = 6",
isMarkdown: false
},
{
text: "c:\\path\\to\\file",
isMarkdown: false
},
{
text: "From: user@example.com",
isMarkdown: false
},
// Formatting combinations
{
text: "[Link](https://example.com) with some **bold** text and other _stuff_ **Bold _and italic_**.",
isMarkdown: true
},
{
text: "![Image](image.jpg)",
isMarkdown: true
},
// Reference-style links
{
text: `[link][1]
[1]: https://example.com`,
isMarkdown: true
},
// Whitespace variations
{
text: " # Header with spaces",
isMarkdown: true
},
{
text: "\t> Tabbed quote",
isMarkdown: true
},
// Common user input patterns
{
text: "Hello\nWorld",
isMarkdown: false
},
{
text: "Item one\nItem two\nItem three",
isMarkdown: false
},
{
text: "YES",
isMarkdown: false
},
{
text: "OK",
isMarkdown: false
}
];
for (const testCase of markdownCases) {
test(`detect as markdown ${testCase.text}`, (t) => {
t.expect(isProbablyMarkdown(testCase.text)).toBe(testCase.isMarkdown);
});
}