医学道
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
video/src/utils/marked/src/rules.ts

384 lines
15 KiB

import {
noopTest,
edit
} from './helpers';
export type Rule = RegExp | string;
export interface Rules {
[ruleName: string]: Pick<RegExp, 'exec'> | Rule | Rules;
}
type BlockRuleNames =
| 'newline'
| 'code'
| 'fences'
| 'hr'
| 'heading'
| 'blockquote'
| 'list'
| 'html'
| 'def'
| 'lheading'
| '_paragraph'
| 'text'
| '_label'
| '_title'
| 'bullet'
| 'listItemStart'
| '_tag'
| '_comment'
| 'paragraph'
| 'uote' ;
type BlockSubRuleNames = 'normal' | 'gfm' | 'pedantic';
type InlineRuleNames =
| 'escape'
| 'autolink'
| 'tag'
| 'link'
| 'reflink'
| 'nolink'
| 'reflinkSearch'
| 'code'
| 'br'
| 'text'
| '_punctuation'
| 'punctuation'
| 'blockSkip'
| 'escapedEmSt'
| '_comment'
| '_escapes'
| '_scheme'
| '_email'
| '_attribute'
| '_label'
| '_href'
| '_title'
| 'strong'
| '_extended_email'
| '_backpedal';
type InlineSubRuleNames = 'gfm' | 'emStrong' | 'normal' | 'pedantic'| 'breaks';
/**
* Block-Level Grammar
*/
// Not all rules are defined in the object literal
// @ts-expect-error
export const block: Record<BlockRuleNames, Rule> & Record<BlockSubRuleNames, Rules> & Rules = {
newline: /^(?: *(?:\n|$))+/,
code: /^( {4}[^\n]+(?:\n(?: *(?:\n|$))*)?)+/,
fences: /^ {0,3}(`{3,}(?=[^`\n]*(?:\n|$))|~{3,})([^\n]*)(?:\n|$)(?:|([\s\S]*?)(?:\n|$))(?: {0,3}\1[~`]* *(?=\n|$)|$)/,
hr: /^ {0,3}((?:-[\t ]*){3,}|(?:_[ \t]*){3,}|(?:\*[ \t]*){3,})(?:\n+|$)/,
heading: /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/,
blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/,
list: /^( {0,3}bull)([ \t][^\n]+?)?(?:\n|$)/,
html: '^ {0,3}(?:' // optional indentation
+ '<(script|pre|style|textarea)[\\s>][\\s\\S]*?(?:</\\1>[^\\n]*\\n+|$)' // (1)
+ '|comment[^\\n]*(\\n+|$)' // (2)
+ '|<\\?[\\s\\S]*?(?:\\?>\\n*|$)' // (3)
+ '|<![A-Z][\\s\\S]*?(?:>\\n*|$)' // (4)
+ '|<!\\[CDATA\\[[\\s\\S]*?(?:\\]\\]>\\n*|$)' // (5)
+ '|</?(tag)(?: +|\\n|/?>)[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (6)
+ '|<(?!script|pre|style|textarea)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) open tag
+ '|</(?!script|pre|style|textarea)[a-z][\\w-]*\\s*>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) closing tag
+ ')',
def: /^ {0,3}\[(label)\]: *(?:\n *)?([^<\s][^\s]*|<.*?>)(?:(?: +(?:\n *)?| *\n *)(title))? *(?:\n+|$)/,
table: noopTest,
lheading: /^((?:(?!^bull ).|\n(?!\n|bull ))+?)\n {0,3}(=+|-+) *(?:\n+|$)/,
// regex template, placeholders will be replaced according to different paragraph
// interruption rules of commonmark and the original markdown spec:
_paragraph: /^([^\n]+(?:\n(?!hr|heading|lheading|blockquote|fences|list|html|table| +\n)[^\n]+)*)/,
text: /^[^\n]+/
};
block._label = /(?!\s*\])(?:\\.|[^\[\]\\])+/;
block._title = /(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/;
block.def = edit(block.def)
.replace('label', block._label)
.replace('title', block._title)
.getRegex();
block.bullet = /(?:[*+-]|\d{1,9}[.)])/;
block.listItemStart = edit(/^( *)(bull) */)
.replace('bull', block.bullet)
.getRegex();
block.list = edit(block.list)
.replace(/bull/g, block.bullet)
.replace('hr', '\\n+(?=\\1?(?:(?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$))')
.replace('def', '\\n+(?=' + block.def.source + ')')
.getRegex();
block._tag = 'address|article|aside|base|basefont|blockquote|body|caption'
+ '|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption'
+ '|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|html|iframe'
+ '|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option'
+ '|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr'
+ '|track|ul';
block._comment = /<!--(?!-?>)[\s\S]*?(?:-->|$)/;
block.html = edit(block.html, 'i')
.replace('comment', block._comment)
.replace('tag', block._tag)
.replace('attribute', / +[a-zA-Z:_][\w.:-]*(?: *= *"[^"\n]*"| *= *'[^'\n]*'| *= *[^\s"'=<>`]+)?/)
.getRegex();
block.lheading = edit(block.lheading)
.replace(/bull/g, block.bullet) // lists can interrupt
.getRegex();
block.paragraph = edit(block._paragraph)
.replace('hr', block.hr)
.replace('heading', ' {0,3}#{1,6} ')
.replace('|lheading', '') // setex headings don't interrupt commonmark paragraphs
.replace('|table', '')
.replace('blockquote', ' {0,3}>')
.replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
.replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
.replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)')
.replace('tag', block._tag) // pars can be interrupted by type (6) html blocks
.getRegex();
block.blockquote = edit(block.blockquote)
.replace('paragraph', block.paragraph)
.getRegex();
/**
* Normal Block Grammar
*/
block.normal = { ...block };
/**
* GFM Block Grammar
*/
block.gfm = {
...block.normal,
table: '^ *([^\\n ].*\\|.*)\\n' // Header
+ ' {0,3}(?:\\| *)?(:?-+:? *(?:\\| *:?-+:? *)*)(?:\\| *)?' // Align
+ '(?:\\n((?:(?! *\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)' // Cells
};
block.gfm.table = edit(block.gfm.table as Rule)
.replace('hr', block.hr)
.replace('heading', ' {0,3}#{1,6} ')
.replace('blockquote', ' {0,3}>')
.replace('code', ' {4}[^\\n]')
.replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
.replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
.replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)')
.replace('tag', block._tag) // tables can be interrupted by type (6) html blocks
.getRegex();
block.gfm.paragraph = edit(block._paragraph)
.replace('hr', block.hr)
.replace('heading', ' {0,3}#{1,6} ')
.replace('|lheading', '') // setex headings don't interrupt commonmark paragraphs
.replace('table', block.gfm.table as RegExp) // interrupt paragraphs with table
.replace('blockquote', ' {0,3}>')
.replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
.replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
.replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)')
.replace('tag', block._tag) // pars can be interrupted by type (6) html blocks
.getRegex();
/**
* Pedantic grammar (original John Gruber's loose markdown specification)
*/
block.pedantic = {
...block.normal,
html: edit(
'^ *(?:comment *(?:\\n|\\s*$)'
+ '|<(tag)[\\s\\S]+?</\\1> *(?:\\n{2,}|\\s*$)' // closed tag
+ '|<tag(?:"[^"]*"|\'[^\']*\'|\\s[^\'"/>\\s]*)*?/?> *(?:\\n{2,}|\\s*$))')
.replace('comment', block._comment)
.replace(/tag/g, '(?!(?:'
+ 'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub'
+ '|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)'
+ '\\b)\\w+(?!:|[^\\w\\s@]*@)\\b')
.getRegex(),
def: /^ *\[([^\]]+)\]: *<?([^\s>]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/,
heading: /^(#{1,6})(.*)(?:\n+|$)/,
fences: noopTest, // fences not supported
lheading: /^(.+?)\n {0,3}(=+|-+) *(?:\n+|$)/,
paragraph: edit(block.normal._paragraph as Rule)
.replace('hr', block.hr)
.replace('heading', ' *#{1,6} *[^\n]')
.replace('lheading', block.lheading)
.replace('blockquote', ' {0,3}>')
.replace('|fences', '')
.replace('|list', '')
.replace('|html', '')
.getRegex()
};
/**
* Inline-Level Grammar
*/
// Not all rules are defined in the object literal
// @ts-expect-error
export const inline: Record<InlineRuleNames, Rule> & Record<InlineSubRuleNames, Rules> & Rules = {
escape: /^\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/,
autolink: /^<(scheme:[^\s\x00-\x1f<>]*|email)>/,
url: noopTest,
tag: '^comment'
+ '|^</[a-zA-Z][\\w:-]*\\s*>' // self-closing tag
+ '|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>' // open tag
+ '|^<\\?[\\s\\S]*?\\?>' // processing instruction, e.g. <?php ?>
+ '|^<![a-zA-Z]+\\s[\\s\\S]*?>' // declaration, e.g. <!DOCTYPE html>
+ '|^<!\\[CDATA\\[[\\s\\S]*?\\]\\]>', // CDATA section
link: /^!?\[(label)\]\(\s*(href)(?:\s+(title))?\s*\)/,
reflink: /^!?\[(label)\]\[(ref)\]/,
nolink: /^!?\[(ref)\](?:\[\])?/,
reflinkSearch: 'reflink|nolink(?!\\()',
emStrong: {
lDelim: /^(?:\*+(?:((?!\*)[punct])|[^\s*]))|^_+(?:((?!_)[punct])|([^\s_]))/,
// (1) and (2) can only be a Right Delimiter. (3) and (4) can only be Left. (5) and (6) can be either Left or Right.
// | Skip orphan inside strong | Consume to delim | (1) #*** | (2) a***#, a*** | (3) #***a, ***a | (4) ***# | (5) #***# | (6) a***a
rDelimAst: /^[^_*]*?__[^_*]*?\*[^_*]*?(?=__)|[^*]+(?=[^*])|(?!\*)[punct](\*+)(?=[\s]|$)|[^punct\s](\*+)(?!\*)(?=[punct\s]|$)|(?!\*)[punct\s](\*+)(?=[^punct\s])|[\s](\*+)(?!\*)(?=[punct])|(?!\*)[punct](\*+)(?!\*)(?=[punct])|[^punct\s](\*+)(?=[^punct\s])/,
rDelimUnd: /^[^_*]*?\*\*[^_*]*?_[^_*]*?(?=\*\*)|[^_]+(?=[^_])|(?!_)[punct](_+)(?=[\s]|$)|[^punct\s](_+)(?!_)(?=[punct\s]|$)|(?!_)[punct\s](_+)(?=[^punct\s])|[\s](_+)(?!_)(?=[punct])|(?!_)[punct](_+)(?!_)(?=[punct])/ // ^- Not allowed for _
},
code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/,
br: /^( {2,}|\\)\n(?!\s*$)/,
del: noopTest,
text: /^(`+|[^`])(?:(?= {2,}\n)|[\s\S]*?(?:(?=[\\<!\[`*_]|\b_|$)|[^ ](?= {2,}\n)))/,
punctuation: /^((?![*_])[\spunctuation])/
};
// // list of unicode punctuation marks, plus any missing characters from CommonMark spec
// inline._punctuation = '\\p{P}$+<=>`^|~';
// list of punctuation marks from CommonMark spec
// without * and _ to handle the different emphasis markers * and _
inline._uc_punctuation = '\\u00A1\\u00A7\\u00AB\\u00B6\\u00B7\\u00BB\\u00BF\\u037E\\u0387\\u055A-\\u055F\\u0589\\u058A\\u05BE\\u05C0\\u05C3\\u05C6\\u05F3\\u05F4\\u0609\\u060A\\u060C\\u060D\\u061B\\u061E\\u061F\\u066A-\\u066D\\u06D4\\u0700-\\u070D\\u07F7-\\u07F9\\u0830-\\u083E\\u085E\\u0964\\u0965\\u0970\\u0AF0\\u0DF4\\u0E4F\\u0E5A\\u0E5B\\u0F04-\\u0F12\\u0F14\\u0F3A-\\u0F3D\\u0F85\\u0FD0-\\u0FD4\\u0FD9\\u0FDA\\u104A-\\u104F\\u10FB\\u1360-\\u1368\\u1400\\u166D\\u166E\\u169B\\u169C\\u16EB-\\u16ED\\u1735\\u1736\\u17D4-\\u17D6\\u17D8-\\u17DA\\u1800-\\u180A\\u1944\\u1945\\u1A1E\\u1A1F\\u1AA0-\\u1AA6\\u1AA8-\\u1AAD\\u1B5A-\\u1B60\\u1BFC-\\u1BFF\\u1C3B-\\u1C3F\\u1C7E\\u1C7F\\u1CC0-\\u1CC7\\u1CD3\\u2010-\\u2027\\u2030-\\u2043\\u2045-\\u2051\\u2053-\\u205E\\u207D\\u207E\\u208D\\u208E\\u2308-\\u230B\\u2329\\u232A\\u2768-\\u2775\\u27C5\\u27C6\\u27E6-\\u27EF\\u2983-\\u2998\\u29D8-\\u29DB\\u29FC\\u29FD\\u2CF9-\\u2CFC\\u2CFE\\u2CFF\\u2D70\\u2E00-\\u2E2E\\u2E30-\\u2E42\\u3001-\\u3003\\u3008-\\u3011\\u3014-\\u301F\\u3030\\u303D\\u30A0\\u30FB\\uA4FE\\uA4FF\\uA60D-\\uA60F\\uA673\\uA67E\\uA6F2-\\uA6F7\\uA874-\\uA877\\uA8CE\\uA8CF\\uA8F8-\\uA8FA\\uA8FC\\uA92E\\uA92F\\uA95F\\uA9C1-\\uA9CD\\uA9DE\\uA9DF\\uAA5C-\\uAA5F\\uAADE\\uAADF\\uAAF0\\uAAF1\\uABEB\\uFD3E\\uFD3F\\uFE10-\\uFE19\\uFE30-\\uFE52\\uFE54-\\uFE61\\uFE63\\uFE68\\uFE6A\\uFE6B\\uFF01-\\uFF03\\uFF05-\\uFF0A\\uFF0C-\\uFF0F\\uFF1A\\uFF1B\\uFF1F\\uFF20\\uFF3B-\\uFF3D\\uFF3F\\uFF5B\\uFF5D\\uFF5F-\\uFF65';
inline._punctuation = '!"#$%&\'()+\\-.,/:;<=>?@\\[\\]`^{|}~\\\\' + inline._uc_punctuation;
inline.punctuation = edit(inline.punctuation, 'u').replace(/punctuation/g, inline._punctuation).getRegex();
// sequences em should skip over [title](link), `code`, <html>
inline.blockSkip = /\[[^[\]]*?\]\([^\(\)]*?\)|`[^`]*?`|<[^<>]*?>/g;
inline.anyPunctuation = /\\[punct]/g;
inline._escapes = /\\([punct])/g;
inline._comment = edit(block._comment).replace('(?:-->|$)', '-->').getRegex();
inline.emStrong.lDelim = edit(inline.emStrong.lDelim as Rule, 'u')
.replace(/punct/g, inline._punctuation)
.getRegex();
inline.emStrong.rDelimAst = edit(inline.emStrong.rDelimAst as Rule, 'gu')
.replace(/punct/g, inline._punctuation)
.getRegex();
inline.emStrong.rDelimUnd = edit(inline.emStrong.rDelimUnd as Rule, 'gu')
.replace(/punct/g, inline._punctuation)
.getRegex();
inline.anyPunctuation = edit(inline.anyPunctuation as Rule, 'gu')
.replace(/punct/g, inline._punctuation)
.getRegex();
inline._escapes = edit(inline._escapes, 'gu')
.replace(/punct/g, inline._punctuation)
.getRegex();
inline._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/;
inline._email = /[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/;
inline.autolink = edit(inline.autolink)
.replace('scheme', inline._scheme)
.replace('email', inline._email)
.getRegex();
inline._attribute = /\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/;
inline.tag = edit(inline.tag)
.replace('comment', inline._comment)
.replace('attribute', inline._attribute)
.getRegex();
inline._label = /(?:\[(?:\\.|[^\[\]\\])*\]|\\.|`[^`]*`|[^\[\]\\`])*?/;
inline._href = /<(?:\\.|[^\n<>\\])+>|[^\s\x00-\x1f]*/;
inline._title = /"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)/;
inline.link = edit(inline.link)
.replace('label', inline._label)
.replace('href', inline._href)
.replace('title', inline._title)
.getRegex();
inline.reflink = edit(inline.reflink)
.replace('label', inline._label)
.replace('ref', block._label)
.getRegex();
inline.nolink = edit(inline.nolink)
.replace('ref', block._label)
.getRegex();
inline.reflinkSearch = edit(inline.reflinkSearch, 'g')
.replace('reflink', inline.reflink)
.replace('nolink', inline.nolink)
.getRegex();
/**
* Normal Inline Grammar
*/
inline.normal = { ...inline };
/**
* Pedantic Inline Grammar
*/
inline.pedantic = {
...inline.normal,
strong: {
start: /^__|\*\*/,
middle: /^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/,
endAst: /\*\*(?!\*)/g,
endUnd: /__(?!_)/g
},
em: {
start: /^_|\*/,
middle: /^()\*(?=\S)([\s\S]*?\S)\*(?!\*)|^_(?=\S)([\s\S]*?\S)_(?!_)/,
endAst: /\*(?!\*)/g,
endUnd: /_(?!_)/g
},
link: edit(/^!?\[(label)\]\((.*?)\)/)
.replace('label', inline._label)
.getRegex(),
reflink: edit(/^!?\[(label)\]\s*\[([^\]]*)\]/)
.replace('label', inline._label)
.getRegex()
};
/**
* GFM Inline Grammar
*/
inline.gfm = {
...inline.normal,
escape: edit(inline.escape).replace('])', '~|])').getRegex(),
_extended_email: /[A-Za-z0-9._+-]+(@)[a-zA-Z0-9-_]+(?:\.[a-zA-Z0-9-_]*[a-zA-Z0-9])+(?![-_])/,
url: /^((?:ftp|https?):\/\/|www\.)(?:[a-zA-Z0-9\-]+\.?)+[^\s<]*|^email/,
_backpedal: /(?:[^?!.,:;*_'"~()&]+|\([^)]*\)|&(?![a-zA-Z0-9]+;$)|[?!.,:;*_'"~)]+(?!$))+/,
del: /^(~~?)(?=[^\s~])([\s\S]*?[^\s~])\1(?=[^~]|$)/,
text: /^([`~]+|[^`~])(?:(?= {2,}\n)|(?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)|[\s\S]*?(?:(?=[\\<!\[`*~_]|\b_|https?:\/\/|ftp:\/\/|www\.|$)|[^ ](?= {2,}\n)|[^a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-](?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)))/
};
inline.gfm.url = edit(inline.gfm.url as Rule, 'i')
.replace('email', inline.gfm._extended_email as RegExp)
.getRegex();
/**
* GFM + Line Breaks Inline Grammar
*/
inline.breaks = {
...inline.gfm,
br: edit(inline.br).replace('{2,}', '*').getRegex(),
text: edit(inline.gfm.text as Rule)
.replace('\\b_', '\\b_| {2,}\\n')
.replace(/\{2,\}/g, '*')
.getRegex()
};