mirror of
https://github.com/streetwriters/notesnook.git
synced 2025-12-16 11:47:54 +01:00
editor: add support for pasting as markdown
This commit is contained in:
committed by
Abdullah Atta
parent
49ffcbea03
commit
c16bbcaad5
2917
packages/editor/package-lock.json
generated
2917
packages/editor/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -30,6 +30,7 @@
|
||||
},
|
||||
"license": "GPL-3.0-or-later",
|
||||
"dependencies": {
|
||||
"@notesnook-importer/core": "^2.1.1",
|
||||
"@notesnook/common": "file:../common",
|
||||
"@notesnook/intl": "file:../intl",
|
||||
"@notesnook/theme": "file:../theme",
|
||||
|
||||
@@ -22,7 +22,7 @@ import {
|
||||
ParseOptions
|
||||
} from "@tiptap/pm/model";
|
||||
import { encodeNonAsciiHTML } from "entities";
|
||||
import { Schema, Slice } from "prosemirror-model";
|
||||
import { Node as PMNode, Schema, Slice } from "prosemirror-model";
|
||||
import { inferLanguage } from "../code-block/index.js";
|
||||
import { hasPermission } from "../../types.js";
|
||||
|
||||
@@ -47,6 +47,17 @@ export class ClipboardDOMParser extends ProsemirrorDOMParser {
|
||||
}
|
||||
return super.parseSlice(dom, options);
|
||||
}
|
||||
|
||||
parse(dom: Node, options?: ParseOptions): PMNode {
|
||||
if (dom instanceof HTMLElement || dom instanceof Document) {
|
||||
convertGoogleDocsChecklist(dom);
|
||||
formatCodeblocks(dom);
|
||||
convertBrToSingleSpacedParagraphs(dom);
|
||||
removeImages(dom);
|
||||
removeBlockId(dom);
|
||||
}
|
||||
return super.parse(dom, options);
|
||||
}
|
||||
}
|
||||
|
||||
export function removeBlockId(dom: HTMLElement | Document) {
|
||||
|
||||
@@ -21,19 +21,34 @@ import { ResolvedPos, Slice } from "@tiptap/pm/model";
|
||||
import { encodeNonAsciiHTML } from "entities";
|
||||
import { ClipboardDOMParser } from "./clipboard-dom-parser.js";
|
||||
import { EditorView } from "@tiptap/pm/view";
|
||||
import { markdowntoHTML } from "@notesnook-importer/core/dist/src/utils/to-html.js";
|
||||
|
||||
export function clipboardTextParser(
|
||||
text: string,
|
||||
_$context: ResolvedPos,
|
||||
_plain: boolean,
|
||||
$context: ResolvedPos,
|
||||
plain: boolean,
|
||||
view: EditorView
|
||||
): Slice {
|
||||
if (!plain && isProbablyMarkdown(text)) {
|
||||
const node = ClipboardDOMParser.fromSchema(view.state.schema).parse(
|
||||
new DOMParser().parseFromString(
|
||||
markdowntoHTML(text, { allowDangerousHtml: false }),
|
||||
"text/html"
|
||||
),
|
||||
{
|
||||
context: $context
|
||||
}
|
||||
);
|
||||
return node.slice(0);
|
||||
}
|
||||
|
||||
const doc = new DOMParser().parseFromString(
|
||||
convertTextToHTML(text),
|
||||
"text/html"
|
||||
);
|
||||
return ClipboardDOMParser.fromSchema(view.state.schema).parseSlice(doc, {
|
||||
preserveWhitespace: "full"
|
||||
preserveWhitespace: "full",
|
||||
context: $context
|
||||
});
|
||||
}
|
||||
|
||||
@@ -58,3 +73,166 @@ function encodeLine(line: string) {
|
||||
});
|
||||
return line;
|
||||
}
|
||||
|
||||
interface MarkdownPattern {
|
||||
pattern: RegExp;
|
||||
score: number;
|
||||
type: string;
|
||||
}
|
||||
|
||||
const DEFINITE_PATTERNS: MarkdownPattern[] = [
|
||||
{ pattern: /^[\t ]*#{1,6}\s+\S/, score: 0, type: "header" },
|
||||
{ pattern: /^\s*[-*+]\s+\[[ x]\]/, score: 0, type: "task" },
|
||||
{ pattern: /^\|.+\|.+\|$/, score: 0, type: "table" },
|
||||
{ pattern: /^[\t ]*>\s+.+/, score: 0, type: "blockquote" },
|
||||
{ pattern: /!\[[^\]]+\]\([^)\s]+(?:\s+"[^"]*")?\)/, score: 0, type: "image" }
|
||||
];
|
||||
|
||||
const MARKDOWN_PATTERNS: MarkdownPattern[] = [
|
||||
// Strong indicators
|
||||
{ pattern: /^[\t ]*#{1,6}\s+\S/m, score: 3, type: "header" },
|
||||
{ pattern: /^`{3}.*\n[\s\S]*?\n`{3}$/m, score: 3, type: "codeblock" },
|
||||
{ pattern: /^\s*[-*+]\s+\[[ x]\]/m, score: 3, type: "tasklist" },
|
||||
|
||||
// Medium indicators
|
||||
{ pattern: /\[[^\]]+\]\([^)\s]+(?:\s+"[^"]*")?\)/, score: 2, type: "link" },
|
||||
{ pattern: /^\s*\[[^\]]+\]:\s+\S+/m, score: 2, type: "reference" },
|
||||
{ pattern: /\[[^\]]+\]\[\w*\]/, score: 2, type: "referenceLink" },
|
||||
{ pattern: /^[\t ]*>\s+.+/m, score: 2, type: "blockquote" },
|
||||
{ pattern: /^[-*_]{3,}/m, score: 2, type: "hr" },
|
||||
{ pattern: /^\|.+\|.+\|$/m, score: 2, type: "table" },
|
||||
{ pattern: /^\s{0,3}[-*+]\s+\S/m, score: 2, type: "unorderedList" },
|
||||
{ pattern: /^\s*\d+\.\s+\S/m, score: 2, type: "orderedList" },
|
||||
{ pattern: /\$\$.+\$\$/m, score: 2, type: "math" },
|
||||
{ pattern: /\{:.+\}/m, score: 2, type: "attribute" },
|
||||
{ pattern: /\[:(.+?)\]/, score: 2, type: "footnote" },
|
||||
|
||||
// Weak indicators
|
||||
{ pattern: /(?<!\*)\*\*[^*\n]+\*\*(?!\*)/m, score: 1, type: "bold" },
|
||||
{ pattern: /(?<!\*)\*[^*\n]+\*(?!\*)/m, score: 1, type: "italic" },
|
||||
{ pattern: /(?<!_)__[^_\n]+__(?!_)/m, score: 1, type: "boldUnderscore" },
|
||||
{ pattern: /(?<!_)_[^_\n]+_(?!_)/m, score: 1, type: "italicUnderscore" },
|
||||
{ pattern: /`[^`\n]+`/m, score: 1, type: "inlineCode" },
|
||||
{ pattern: /~~[^~\n]+~~/m, score: 1, type: "strikethrough" },
|
||||
|
||||
{ pattern: /!\[[^\]]+\]\([^)\s]+(?:\s+"[^"]*")?\)/, score: 1, type: "image" }
|
||||
];
|
||||
|
||||
const NEGATIVE_PATTERNS: MarkdownPattern[] = [
|
||||
{ pattern: /<html>/, score: -5, type: "html" },
|
||||
{ pattern: /<?xml/, score: -5, type: "xml" },
|
||||
{ pattern: /^\s*[0-9,.]+$/, score: -3, type: "numbers" },
|
||||
{
|
||||
pattern: /^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$/,
|
||||
score: -5,
|
||||
type: "email"
|
||||
},
|
||||
{ pattern: /^https?:\/\/\S+$/, score: -3, type: "url" },
|
||||
{ pattern: /^\s*[{[\]},;]\s*$/, score: -3, type: "punctuation" },
|
||||
{ pattern: /^(Subject|From|To|Date):/, score: -4, type: "emailHeader" },
|
||||
{ pattern: /^[A-Z]{2,}:/, score: -2, type: "capsHeader" },
|
||||
{ pattern: /<script[\s\S]*?<\/script>/i, score: -4, type: "script" },
|
||||
{ pattern: /<style[\s\S]*?<\/style>/i, score: -4, type: "style" }
|
||||
];
|
||||
|
||||
export function isProbablyMarkdown(text: string, debug = false) {
|
||||
function log(...args: any[]) {
|
||||
if (debug) console.log(...args);
|
||||
}
|
||||
|
||||
// Check definite patterns first
|
||||
const definiteMatch = DEFINITE_PATTERNS.find((p) => p.pattern.test(text));
|
||||
if (definiteMatch) {
|
||||
log("Definite markdown match:", definiteMatch.type);
|
||||
return true;
|
||||
}
|
||||
|
||||
let score = 0;
|
||||
const matches: string[] = [];
|
||||
const lines = text.split("\n");
|
||||
|
||||
function pushMatch(match: string) {
|
||||
if (debug) matches.push(match);
|
||||
}
|
||||
|
||||
// Check positive patterns
|
||||
for (const pattern of MARKDOWN_PATTERNS) {
|
||||
const matchCount = getPatternMatches(text, pattern.pattern);
|
||||
if (matchCount > 0) {
|
||||
const patternScore = pattern.score * matchCount;
|
||||
score += patternScore;
|
||||
pushMatch(`+${patternScore} ${pattern.type} (${matchCount} matches)`);
|
||||
}
|
||||
}
|
||||
|
||||
// Check negative patterns
|
||||
for (const pattern of NEGATIVE_PATTERNS) {
|
||||
const matchCount = getPatternMatches(text, pattern.pattern);
|
||||
if (matchCount > 0) {
|
||||
const patternScore = pattern.score * matchCount;
|
||||
score += patternScore;
|
||||
pushMatch(`${patternScore} ${pattern.type} (${matchCount} matches)`);
|
||||
}
|
||||
}
|
||||
|
||||
// Structure indicators
|
||||
if (lines.length > 1) {
|
||||
score += 1;
|
||||
pushMatch("+1 multiline");
|
||||
}
|
||||
if (/\n\n/.test(text)) {
|
||||
score += 1;
|
||||
pushMatch("+1 paragraphs");
|
||||
}
|
||||
if (/^\s{1,4}[^\s]/m.test(text)) {
|
||||
score += 1;
|
||||
pushMatch("+1 indentation");
|
||||
}
|
||||
|
||||
// Check formatting consistency
|
||||
let consistentFormatting = 0;
|
||||
let prevLineIndent = -1;
|
||||
|
||||
for (const line of lines) {
|
||||
const indent = line.search(/\S/);
|
||||
if (prevLineIndent !== -1) {
|
||||
if (indent === prevLineIndent || indent === prevLineIndent + 2) {
|
||||
consistentFormatting++;
|
||||
}
|
||||
}
|
||||
prevLineIndent = indent;
|
||||
}
|
||||
|
||||
if (consistentFormatting > lines.length / 2) {
|
||||
score += 2;
|
||||
pushMatch("+2 consistentFormatting");
|
||||
}
|
||||
|
||||
const threshold = text.length > 100 ? 4 : 3;
|
||||
const confidence = Math.min(
|
||||
100,
|
||||
Math.max(0, score * (text.length > 100 ? 8 : 12))
|
||||
);
|
||||
|
||||
if (debug) {
|
||||
const result = {
|
||||
score,
|
||||
isLikelyMarkdown: score > threshold,
|
||||
confidence,
|
||||
details: {
|
||||
length: text.length,
|
||||
lines: lines.length,
|
||||
consistentFormatting,
|
||||
threshold,
|
||||
matches
|
||||
}
|
||||
};
|
||||
log("Markdown detection result:", result);
|
||||
}
|
||||
return score > threshold;
|
||||
}
|
||||
|
||||
function getPatternMatches(text: string, pattern: RegExp) {
|
||||
const matches = text.match(new RegExp(pattern, "gm")) || [];
|
||||
return matches.length;
|
||||
}
|
||||
|
||||
@@ -18,16 +18,240 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import { test } from "vitest";
|
||||
import { convertTextToHTML } from "../clipboard-text-parser.js";
|
||||
import {
|
||||
convertTextToHTML,
|
||||
isProbablyMarkdown
|
||||
} from "../clipboard-text-parser.js";
|
||||
|
||||
const cases = [
|
||||
const textCases = [
|
||||
`Hello\r\nworld`,
|
||||
`What\n\n\nNO!`,
|
||||
`Hello\r\nWorld\nwhat?\nNay!`
|
||||
];
|
||||
|
||||
for (const testCase of cases) {
|
||||
for (const testCase of textCases) {
|
||||
test(`convert text to html`, (t) => {
|
||||
t.expect(convertTextToHTML(testCase)).toMatchSnapshot();
|
||||
});
|
||||
}
|
||||
|
||||
const markdownCases = [
|
||||
// Single-line markdown elements
|
||||
{
|
||||
text: "# Header",
|
||||
isMarkdown: true
|
||||
},
|
||||
{
|
||||
text: "## Second level",
|
||||
isMarkdown: true
|
||||
},
|
||||
{
|
||||
text: "#Not a header",
|
||||
isMarkdown: false
|
||||
},
|
||||
{
|
||||
text: "- [ ] Task",
|
||||
isMarkdown: true
|
||||
},
|
||||
{
|
||||
text: "- [x] Completed task",
|
||||
isMarkdown: true
|
||||
},
|
||||
{
|
||||
text: "- []Not a task",
|
||||
isMarkdown: false
|
||||
},
|
||||
|
||||
// Tables
|
||||
{
|
||||
text: "|Column 1|Column 2|",
|
||||
isMarkdown: true
|
||||
},
|
||||
{
|
||||
text: "| Name | Age |",
|
||||
isMarkdown: true
|
||||
},
|
||||
{
|
||||
text: "|Not|A|Table",
|
||||
isMarkdown: false
|
||||
},
|
||||
|
||||
// Blockquotes
|
||||
{
|
||||
text: "> Quoted text",
|
||||
isMarkdown: true
|
||||
},
|
||||
{
|
||||
text: ">Not a quote",
|
||||
isMarkdown: false
|
||||
},
|
||||
|
||||
// Multi-line content
|
||||
{
|
||||
text: `# Header
|
||||
Some paragraph text
|
||||
|
||||
- List item 1
|
||||
- List item 2`,
|
||||
isMarkdown: true
|
||||
},
|
||||
{
|
||||
text: `\`\`\`javascript
|
||||
const x = 1;
|
||||
console.log(x);
|
||||
\`\`\``,
|
||||
isMarkdown: true
|
||||
},
|
||||
|
||||
// Inline formatting
|
||||
{
|
||||
text: "This is **bold** text with some other **bold** content and some *italic* text and some `code` text and ~~strikethough~~ is ~~awesome~~ just okey!",
|
||||
isMarkdown: true
|
||||
},
|
||||
|
||||
// Lists
|
||||
{
|
||||
text: `- Item 1
|
||||
- Item 2`,
|
||||
isMarkdown: true
|
||||
},
|
||||
{
|
||||
text: `1. First
|
||||
2. Second`,
|
||||
isMarkdown: true
|
||||
},
|
||||
|
||||
// Plain text (negative cases)
|
||||
{
|
||||
text: "Just plain text",
|
||||
isMarkdown: false
|
||||
},
|
||||
{
|
||||
text: "Hello world",
|
||||
isMarkdown: false
|
||||
},
|
||||
{
|
||||
text: "12345",
|
||||
isMarkdown: false
|
||||
},
|
||||
|
||||
// HTML (negative cases)
|
||||
{
|
||||
text: "<html><body>Test</body></html>",
|
||||
isMarkdown: false
|
||||
},
|
||||
{
|
||||
text: "<div>Content</div>",
|
||||
isMarkdown: false
|
||||
},
|
||||
|
||||
// Special cases (negative)
|
||||
{
|
||||
text: "test@example.com",
|
||||
isMarkdown: false
|
||||
},
|
||||
{
|
||||
text: "https://example.com",
|
||||
isMarkdown: false
|
||||
},
|
||||
|
||||
// Edge cases
|
||||
{
|
||||
text: "",
|
||||
isMarkdown: false
|
||||
},
|
||||
{
|
||||
text: " ",
|
||||
isMarkdown: false
|
||||
},
|
||||
{
|
||||
text: "*",
|
||||
isMarkdown: false
|
||||
},
|
||||
{
|
||||
text: "#",
|
||||
isMarkdown: false
|
||||
},
|
||||
|
||||
// Mixed content
|
||||
{
|
||||
text: `# Header
|
||||
Regular paragraph with **bold** and *italic*.
|
||||
|
||||
\`\`\`
|
||||
code block
|
||||
\`\`\`
|
||||
|
||||
1. List item
|
||||
2. Another item
|
||||
|
||||
> Blockquote`,
|
||||
isMarkdown: true
|
||||
},
|
||||
|
||||
// Ambiguous cases
|
||||
{
|
||||
text: "2 * 3 = 6",
|
||||
isMarkdown: false
|
||||
},
|
||||
{
|
||||
text: "c:\\path\\to\\file",
|
||||
isMarkdown: false
|
||||
},
|
||||
{
|
||||
text: "From: user@example.com",
|
||||
isMarkdown: false
|
||||
},
|
||||
|
||||
// Formatting combinations
|
||||
|
||||
{
|
||||
text: "[Link](https://example.com) with some **bold** text and other _stuff_ **Bold _and italic_**.",
|
||||
isMarkdown: true
|
||||
},
|
||||
{
|
||||
text: "",
|
||||
isMarkdown: true
|
||||
},
|
||||
|
||||
// Reference-style links
|
||||
{
|
||||
text: `[link][1]
|
||||
[1]: https://example.com`,
|
||||
isMarkdown: true
|
||||
},
|
||||
|
||||
// Whitespace variations
|
||||
{
|
||||
text: " # Header with spaces",
|
||||
isMarkdown: true
|
||||
},
|
||||
{
|
||||
text: "\t> Tabbed quote",
|
||||
isMarkdown: true
|
||||
},
|
||||
|
||||
// Common user input patterns
|
||||
{
|
||||
text: "Hello\nWorld",
|
||||
isMarkdown: false
|
||||
},
|
||||
{
|
||||
text: "Item one\nItem two\nItem three",
|
||||
isMarkdown: false
|
||||
},
|
||||
{
|
||||
text: "YES",
|
||||
isMarkdown: false
|
||||
},
|
||||
{
|
||||
text: "OK",
|
||||
isMarkdown: false
|
||||
}
|
||||
];
|
||||
|
||||
for (const testCase of markdownCases) {
|
||||
test(`detect as markdown ${testCase.text}`, (t) => {
|
||||
t.expect(isProbablyMarkdown(testCase.text)).toBe(testCase.isMarkdown);
|
||||
});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user