Coverage for gws-app/gws/lib/vendor/dog/markdown.py: 0%
309 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-17 01:37 +0200
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-17 01:37 +0200
1import re
2from typing import List
4import mistune
5from mistune import Markdown
7import pygments
8import pygments.util
9import pygments.lexers
10import pygments.formatters.html
12from . import util
15class Element(util.Data):
16 type: str
18 align: str
19 alt: str
20 children: List['Element']
21 info: str
22 is_head: bool
23 level: int
24 link: str
25 ordered: bool
26 sid: str
27 src: str
28 start: str
29 text: str
30 html: str
31 title: str
33 classname: str # inline_decoration_plugin
34 attributes: dict # link_attributes_plugin
36 def __repr__(self):
37 return repr(vars(self))
40def parser() -> Markdown:
41 md = mistune.create_markdown(
42 renderer=AstRenderer(),
43 plugins=['table', 'url', inline_decoration_plugin, link_attributes_plugin]
44 )
45 return md
48# plugin API reference: https://mistune.lepture.com/en/v2.0.5/advanced.html#create-plugins
50# plugin: inline decorations
51# {someclass some text} => <span class="decoration_someclass">some text</span>
54def inline_decoration_plugin(md):
55 name = 'inline_decoration'
56 pattern = r'\{(\w+ .+?)\}'
58 def parser(inline, m, state):
59 return name, *m.group(1).split(None, 1)
61 md.inline.register_rule(name, pattern, parser)
62 md.inline.rules.append(name)
65# plugin: link attributes
66# https://pandoc.org/MANUAL.html#extension-link_attributes
69def link_attributes_plugin(md):
70 name = 'link_attributes'
71 pattern = r'(?<=[)`]){.+?}'
73 def parser(inline, m, state):
74 text = m.group(0)
75 atts = parse_attributes(text[1:-1])
76 if atts:
77 return name, text, atts
78 return 'text', text
80 md.inline.register_rule(name, pattern, parser)
81 md.inline.rules.append(name)
84##
86def process(text):
87 md = parser()
88 els = md(text)
89 rd = Renderer()
90 return ''.join(rd.render_element(el) for el in els)
93def strip_text_content(el: Element):
94 while el.children:
95 if not el.children[-1].text:
96 return
97 el.children[-1].text = el.children[-1].text.rstrip()
98 if len(el.children[-1].text) > 0:
99 return
100 el.children.pop()
103def text_from_element(el: Element) -> str:
104 if el.text:
105 return el.text.strip()
106 if el.children:
107 return ' '.join(text_from_element(c) for c in el.children).strip()
108 return ''
111# based on mistune/renderers.AstRenderer
113class AstRenderer:
114 NAME = 'ast'
116 def __init__(self):
117 self.renderer = Renderer()
119 def register(self, name, method):
120 pass
122 def _get_method(self, name):
123 return getattr(self.renderer, f'{name}_parse')
125 def finalize(self, elements: List[Element]):
126 # merge 'link attributes' with the previous element
127 res = []
128 for el in elements:
129 if el.type == 'link_attributes':
130 if res and res[-1].type in {'image', 'link', 'codespan'}:
131 res[-1].attributes = el.attributes
132 continue
133 else:
134 el.type = 'text'
135 res.append(el)
136 return res
139##
141class Renderer:
143 def render_children(self, el: Element):
144 if el.children:
145 return ''.join(self.render_element(c) for c in el.children)
146 return ''
148 def render_element(self, el: Element):
149 fn = getattr(self, f'{el.type}_render')
150 return fn(el)
152 def render_a(self, href, title, content, el):
153 a = {'href': href}
154 if title:
155 a['title'] = escape(title)
156 if el.attributes:
157 a.update(el.attributes)
158 return f'<a{attributes(a)}>{content or href}</a>'
160 ##
162 def block_code_parse(self, text, info=None):
163 return Element(type='block_code', text=text, info=info)
165 def block_code_render(self, el: Element):
167 lang = ''
168 atts = {}
170 lines = [s.rstrip() for s in el.text.split('\n')]
171 while lines and not lines[0]:
172 lines.pop(0)
173 while lines and not lines[-1]:
174 lines.pop()
175 text = '\n'.join(lines)
177 if el.info:
178 # 'javascript' or 'javascript title=...' or 'title=...'
179 m = re.match(r'^(\w+(?=(\s|$)))?(.*)$', el.info.strip())
180 if m:
181 lang = m.group(1)
182 atts = parse_attributes(m.group(3))
184 lang = lang or 'text'
185 try:
186 lexer = pygments.lexers.get_lexer_by_name(lang, stripall=True)
187 except pygments.util.ClassNotFound:
188 util.log.warning(f'pygments lexer {lang!r} not found')
189 lexer = pygments.lexers.get_lexer_by_name('text', stripall=True)
191 kwargs = dict(
192 noclasses=True,
193 nobackground=True,
194 )
195 if 'numbers' in atts:
196 kwargs['linenos'] = 'table'
197 kwargs['linenostart'] = atts['numbers']
199 formatter = pygments.formatters.html.HtmlFormatter(**kwargs)
200 html = pygments.highlight(text, lexer, formatter)
202 if 'title' in atts:
203 html = f'<p class="highlighttitle">{escape(atts["title"])}</p>' + html
205 return html
207 def block_error_parse(self, children=None):
208 return Element(type='block_error', children=children)
210 def block_error_render(self, el: Element):
211 c = self.render_children(el)
212 return f'<div class="error">{c}</div>\n'
214 def block_html_parse(self, html):
215 return Element(type='block_html', html=html)
217 def block_html_render(self, el: Element):
218 return el.html
220 def block_quote_parse(self, children=None):
221 return Element(type='block_quote', children=children)
223 def block_quote_render(self, el: Element):
224 c = self.render_children(el)
225 return f'<blockquote>\n{c}</blockquote>\n'
227 def block_text_parse(self, children=None):
228 return Element(type='block_text', children=children)
230 def block_text_render(self, el: Element):
231 return self.render_children(el)
233 def codespan_parse(self, text):
234 return Element(type='codespan', text=text)
236 def codespan_render(self, el: Element):
237 c = escape(el.text)
238 return f'<code{attributes(el.attributes)}>{c}</code>'
240 def emphasis_parse(self, children):
241 return Element(type='emphasis', children=children)
243 def emphasis_render(self, el: Element):
244 c = self.render_children(el)
245 return f'<em>{c}</em>'
247 def heading_parse(self, children, level):
248 return Element(type='heading', children=children, level=level)
250 def heading_render(self, el: Element):
251 c = self.render_children(el)
252 tag = 'h' + str(el.level)
253 s = ''
254 if el.id:
255 s += f' id="{el.id}"'
256 return f'<{tag}{s}>{c}</{tag}>\n'
258 def image_parse(self, src, alt="", title=None):
259 return Element(type='image', src=src, alt=alt, title=title)
261 def image_render(self, el: Element):
262 a = {}
263 if el.src:
264 a['src'] = el.src
265 if el.alt:
266 a['alt'] = escape(el.alt)
267 if el.title:
268 a['title'] = escape(el.title)
269 if el.attributes:
270 a.update(el.attributes)
271 n = a.pop('width', '')
272 if n:
273 if n.isdigit():
274 n += 'px'
275 a['style'] = f"width:{n};" + a.get('style', '')
276 n = a.pop('height', '')
277 if n:
278 if n.isdigit():
279 n += 'px'
280 a['style'] = f"height:{n};" + a.get('style', '')
282 return f'<img{attributes(a)}/>'
284 def inline_decoration_parse(self, classname, text):
285 return Element(type='inline_decoration', classname=classname, text=text)
287 def inline_decoration_render(self, el: Element):
288 c = escape(el.text)
289 return f'<span class="decoration_{el.classname}">{c}</span>'
291 def inline_html_parse(self, html):
292 return Element(type='inline_html', html=html)
294 def inline_html_render(self, el: Element):
295 return el.html
297 def linebreak_parse(self):
298 return Element(type='linebreak')
300 def linebreak_render(self, el: Element):
301 return '<br/>\n'
303 def link_parse(self, link, children=None, title=None):
304 if isinstance(children, str):
305 children = [Element(type='text', text=children)]
306 return Element(type='link', link=link, children=children, title=title)
308 def link_render(self, el: Element):
309 c = self.render_children(el)
310 return self.render_a(el.link, el.title, c, el)
312 def link_attributes_parse(self, text, attributes):
313 return Element(type='link_attributes', text=text, attributes=attributes)
315 def list_item_parse(self, children, level):
316 return Element(type='list_item', children=children, level=level)
318 def list_item_render(self, el: Element):
319 c = self.render_children(el)
320 return f'<li>{c}</li>\n'
322 def list_parse(self, children, ordered, level, start=None):
323 return Element(type='list', children=children, ordered=ordered, level=level, start=start)
325 def list_render(self, el: Element):
326 c = self.render_children(el)
327 tag = 'ol' if el.ordered else 'ul'
328 a = {}
329 if el.start:
330 a['start'] = el.start
331 return f'<{tag}{attributes(a)}>\n{c}\n</{tag}>\n'
333 def newline_parse(self):
334 return Element(type='newline')
336 def newline_render(self, el: Element):
337 return ''
339 def paragraph_parse(self, children=None):
340 return Element(type='paragraph', children=children)
342 def paragraph_render(self, el: Element):
343 c = self.render_children(el)
344 return f'<p>{c}</p>\n'
346 def strong_parse(self, children=None):
347 return Element(type='strong', children=children)
349 def strong_render(self, el: Element):
350 c = self.render_children(el)
351 return f'<strong>{c}</strong>'
353 def table_body_parse(self, children=None):
354 return Element(type='table_body', children=children)
356 def table_body_render(self, el: Element):
357 c = self.render_children(el)
358 return f'<tbody>\n{c}</tbody>\n'
360 def table_cell_parse(self, children, align=None, is_head=False):
361 return Element(type='table_cell', children=children, align=align, is_head=is_head)
363 def table_cell_render(self, el: Element):
364 c = self.render_children(el)
365 tag = 'th' if el.is_head else 'td'
366 a = {}
367 if el.align:
368 a['style'] = f'text-align:{el.align}'
369 return f'<{tag}{attributes(a)}>{c}</{tag}>'
371 def table_head_parse(self, children=None):
372 return Element(type='table_head', children=children)
374 def table_head_render(self, el: Element):
375 c = self.render_children(el)
376 return f'<thead>\n<tr>{c}</tr>\n</thead>\n'
378 def table_parse(self, children=None):
379 return Element(type='table', children=children)
381 def table_render(self, el: Element):
382 c = self.render_children(el)
383 return f'<table class="markdown-table">{c}</table>\n'
385 def table_row_parse(self, children=None):
386 return Element(type='table_row', children=children)
388 def table_row_render(self, el: Element):
389 c = self.render_children(el)
390 return f'<tr>{c}</tr>\n'
392 def text_parse(self, text):
393 return Element(type='text', text=text)
395 def text_render(self, el: Element):
396 return escape(el.text)
398 def thematic_break_parse(self):
399 return Element(type='thematic_break')
401 def thematic_break_render(self, el: Element):
402 return '<hr/>\n'
405def escape(s, quote=True):
406 s = s.replace("&", "&")
407 s = s.replace("<", "<")
408 s = s.replace(">", ">")
409 if quote:
410 s = s.replace('"', """)
411 return s
414def attributes(attrs):
415 s = ''
416 if attrs:
417 for k, v in attrs.items():
418 s += f' {k}="{v}"'
419 return s
422##
425_ATTRIBUTE_RE = r'''(?x)
426 (
427 (\# (?P<id> [\w-]+) )
428 |
429 (\. (?P<class> [\w-]+) )
430 |
431 (
432 (?P<key> \w+)
433 =
434 (
435 " (?P<quoted> [^"]*) "
436 |
437 (?P<simple> \S+)
438 )
439 )
440 )
441 \x20
442'''
445def parse_attributes(text):
446 text = text.strip() + ' '
447 res = {}
449 while text:
450 m = re.match(_ATTRIBUTE_RE, text)
451 if not m:
452 return {}
454 text = text[m.end():].lstrip()
456 g = m.groupdict()
457 if g['id']:
458 res['id'] = g['id']
459 elif g['class']:
460 res['class'] = (res.get('class', '') + ' ' + g['class']).strip()
461 else:
462 res[g['key']] = g['simple'] or g['quoted'].strip()
464 return res