Coverage for gws-app/gws/lib/vendor/dog/markdown.py: 0%

309 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-04-17 01:37 +0200

1import re 

2from typing import List 

3 

4import mistune 

5from mistune import Markdown 

6 

7import pygments 

8import pygments.util 

9import pygments.lexers 

10import pygments.formatters.html 

11 

12from . import util 

13 

14 

15class Element(util.Data): 

16 type: str 

17 

18 align: str 

19 alt: str 

20 children: List['Element'] 

21 info: str 

22 is_head: bool 

23 level: int 

24 link: str 

25 ordered: bool 

26 sid: str 

27 src: str 

28 start: str 

29 text: str 

30 html: str 

31 title: str 

32 

33 classname: str # inline_decoration_plugin 

34 attributes: dict # link_attributes_plugin 

35 

36 def __repr__(self): 

37 return repr(vars(self)) 

38 

39 

40def parser() -> Markdown: 

41 md = mistune.create_markdown( 

42 renderer=AstRenderer(), 

43 plugins=['table', 'url', inline_decoration_plugin, link_attributes_plugin] 

44 ) 

45 return md 

46 

47 

48# plugin API reference: https://mistune.lepture.com/en/v2.0.5/advanced.html#create-plugins 

49 

50# plugin: inline decorations 

51# {someclass some text} => <span class="decoration_someclass">some text</span> 

52 

53 

54def inline_decoration_plugin(md): 

55 name = 'inline_decoration' 

56 pattern = r'\{(\w+ .+?)\}' 

57 

58 def parser(inline, m, state): 

59 return name, *m.group(1).split(None, 1) 

60 

61 md.inline.register_rule(name, pattern, parser) 

62 md.inline.rules.append(name) 

63 

64 

65# plugin: link attributes 

66# https://pandoc.org/MANUAL.html#extension-link_attributes 

67 

68 

69def link_attributes_plugin(md): 

70 name = 'link_attributes' 

71 pattern = r'(?<=[)`]){.+?}' 

72 

73 def parser(inline, m, state): 

74 text = m.group(0) 

75 atts = parse_attributes(text[1:-1]) 

76 if atts: 

77 return name, text, atts 

78 return 'text', text 

79 

80 md.inline.register_rule(name, pattern, parser) 

81 md.inline.rules.append(name) 

82 

83 

84## 

85 

86def process(text): 

87 md = parser() 

88 els = md(text) 

89 rd = Renderer() 

90 return ''.join(rd.render_element(el) for el in els) 

91 

92 

93def strip_text_content(el: Element): 

94 while el.children: 

95 if not el.children[-1].text: 

96 return 

97 el.children[-1].text = el.children[-1].text.rstrip() 

98 if len(el.children[-1].text) > 0: 

99 return 

100 el.children.pop() 

101 

102 

103def text_from_element(el: Element) -> str: 

104 if el.text: 

105 return el.text.strip() 

106 if el.children: 

107 return ' '.join(text_from_element(c) for c in el.children).strip() 

108 return '' 

109 

110 

111# based on mistune/renderers.AstRenderer 

112 

113class AstRenderer: 

114 NAME = 'ast' 

115 

116 def __init__(self): 

117 self.renderer = Renderer() 

118 

119 def register(self, name, method): 

120 pass 

121 

122 def _get_method(self, name): 

123 return getattr(self.renderer, f'{name}_parse') 

124 

125 def finalize(self, elements: List[Element]): 

126 # merge 'link attributes' with the previous element 

127 res = [] 

128 for el in elements: 

129 if el.type == 'link_attributes': 

130 if res and res[-1].type in {'image', 'link', 'codespan'}: 

131 res[-1].attributes = el.attributes 

132 continue 

133 else: 

134 el.type = 'text' 

135 res.append(el) 

136 return res 

137 

138 

139## 

140 

141class Renderer: 

142 

143 def render_children(self, el: Element): 

144 if el.children: 

145 return ''.join(self.render_element(c) for c in el.children) 

146 return '' 

147 

148 def render_element(self, el: Element): 

149 fn = getattr(self, f'{el.type}_render') 

150 return fn(el) 

151 

152 def render_a(self, href, title, content, el): 

153 a = {'href': href} 

154 if title: 

155 a['title'] = escape(title) 

156 if el.attributes: 

157 a.update(el.attributes) 

158 return f'<a{attributes(a)}>{content or href}</a>' 

159 

160 ## 

161 

162 def block_code_parse(self, text, info=None): 

163 return Element(type='block_code', text=text, info=info) 

164 

165 def block_code_render(self, el: Element): 

166 

167 lang = '' 

168 atts = {} 

169 

170 lines = [s.rstrip() for s in el.text.split('\n')] 

171 while lines and not lines[0]: 

172 lines.pop(0) 

173 while lines and not lines[-1]: 

174 lines.pop() 

175 text = '\n'.join(lines) 

176 

177 if el.info: 

178 # 'javascript' or 'javascript title=...' or 'title=...' 

179 m = re.match(r'^(\w+(?=(\s|$)))?(.*)$', el.info.strip()) 

180 if m: 

181 lang = m.group(1) 

182 atts = parse_attributes(m.group(3)) 

183 

184 lang = lang or 'text' 

185 try: 

186 lexer = pygments.lexers.get_lexer_by_name(lang, stripall=True) 

187 except pygments.util.ClassNotFound: 

188 util.log.warning(f'pygments lexer {lang!r} not found') 

189 lexer = pygments.lexers.get_lexer_by_name('text', stripall=True) 

190 

191 kwargs = dict( 

192 noclasses=True, 

193 nobackground=True, 

194 ) 

195 if 'numbers' in atts: 

196 kwargs['linenos'] = 'table' 

197 kwargs['linenostart'] = atts['numbers'] 

198 

199 formatter = pygments.formatters.html.HtmlFormatter(**kwargs) 

200 html = pygments.highlight(text, lexer, formatter) 

201 

202 if 'title' in atts: 

203 html = f'<p class="highlighttitle">{escape(atts["title"])}</p>' + html 

204 

205 return html 

206 

207 def block_error_parse(self, children=None): 

208 return Element(type='block_error', children=children) 

209 

210 def block_error_render(self, el: Element): 

211 c = self.render_children(el) 

212 return f'<div class="error">{c}</div>\n' 

213 

214 def block_html_parse(self, html): 

215 return Element(type='block_html', html=html) 

216 

217 def block_html_render(self, el: Element): 

218 return el.html 

219 

220 def block_quote_parse(self, children=None): 

221 return Element(type='block_quote', children=children) 

222 

223 def block_quote_render(self, el: Element): 

224 c = self.render_children(el) 

225 return f'<blockquote>\n{c}</blockquote>\n' 

226 

227 def block_text_parse(self, children=None): 

228 return Element(type='block_text', children=children) 

229 

230 def block_text_render(self, el: Element): 

231 return self.render_children(el) 

232 

233 def codespan_parse(self, text): 

234 return Element(type='codespan', text=text) 

235 

236 def codespan_render(self, el: Element): 

237 c = escape(el.text) 

238 return f'<code{attributes(el.attributes)}>{c}</code>' 

239 

240 def emphasis_parse(self, children): 

241 return Element(type='emphasis', children=children) 

242 

243 def emphasis_render(self, el: Element): 

244 c = self.render_children(el) 

245 return f'<em>{c}</em>' 

246 

247 def heading_parse(self, children, level): 

248 return Element(type='heading', children=children, level=level) 

249 

250 def heading_render(self, el: Element): 

251 c = self.render_children(el) 

252 tag = 'h' + str(el.level) 

253 s = '' 

254 if el.id: 

255 s += f' id="{el.id}"' 

256 return f'<{tag}{s}>{c}</{tag}>\n' 

257 

258 def image_parse(self, src, alt="", title=None): 

259 return Element(type='image', src=src, alt=alt, title=title) 

260 

261 def image_render(self, el: Element): 

262 a = {} 

263 if el.src: 

264 a['src'] = el.src 

265 if el.alt: 

266 a['alt'] = escape(el.alt) 

267 if el.title: 

268 a['title'] = escape(el.title) 

269 if el.attributes: 

270 a.update(el.attributes) 

271 n = a.pop('width', '') 

272 if n: 

273 if n.isdigit(): 

274 n += 'px' 

275 a['style'] = f"width:{n};" + a.get('style', '') 

276 n = a.pop('height', '') 

277 if n: 

278 if n.isdigit(): 

279 n += 'px' 

280 a['style'] = f"height:{n};" + a.get('style', '') 

281 

282 return f'<img{attributes(a)}/>' 

283 

284 def inline_decoration_parse(self, classname, text): 

285 return Element(type='inline_decoration', classname=classname, text=text) 

286 

287 def inline_decoration_render(self, el: Element): 

288 c = escape(el.text) 

289 return f'<span class="decoration_{el.classname}">{c}</span>' 

290 

291 def inline_html_parse(self, html): 

292 return Element(type='inline_html', html=html) 

293 

294 def inline_html_render(self, el: Element): 

295 return el.html 

296 

297 def linebreak_parse(self): 

298 return Element(type='linebreak') 

299 

300 def linebreak_render(self, el: Element): 

301 return '<br/>\n' 

302 

303 def link_parse(self, link, children=None, title=None): 

304 if isinstance(children, str): 

305 children = [Element(type='text', text=children)] 

306 return Element(type='link', link=link, children=children, title=title) 

307 

308 def link_render(self, el: Element): 

309 c = self.render_children(el) 

310 return self.render_a(el.link, el.title, c, el) 

311 

312 def link_attributes_parse(self, text, attributes): 

313 return Element(type='link_attributes', text=text, attributes=attributes) 

314 

315 def list_item_parse(self, children, level): 

316 return Element(type='list_item', children=children, level=level) 

317 

318 def list_item_render(self, el: Element): 

319 c = self.render_children(el) 

320 return f'<li>{c}</li>\n' 

321 

322 def list_parse(self, children, ordered, level, start=None): 

323 return Element(type='list', children=children, ordered=ordered, level=level, start=start) 

324 

325 def list_render(self, el: Element): 

326 c = self.render_children(el) 

327 tag = 'ol' if el.ordered else 'ul' 

328 a = {} 

329 if el.start: 

330 a['start'] = el.start 

331 return f'<{tag}{attributes(a)}>\n{c}\n</{tag}>\n' 

332 

333 def newline_parse(self): 

334 return Element(type='newline') 

335 

336 def newline_render(self, el: Element): 

337 return '' 

338 

339 def paragraph_parse(self, children=None): 

340 return Element(type='paragraph', children=children) 

341 

342 def paragraph_render(self, el: Element): 

343 c = self.render_children(el) 

344 return f'<p>{c}</p>\n' 

345 

346 def strong_parse(self, children=None): 

347 return Element(type='strong', children=children) 

348 

349 def strong_render(self, el: Element): 

350 c = self.render_children(el) 

351 return f'<strong>{c}</strong>' 

352 

353 def table_body_parse(self, children=None): 

354 return Element(type='table_body', children=children) 

355 

356 def table_body_render(self, el: Element): 

357 c = self.render_children(el) 

358 return f'<tbody>\n{c}</tbody>\n' 

359 

360 def table_cell_parse(self, children, align=None, is_head=False): 

361 return Element(type='table_cell', children=children, align=align, is_head=is_head) 

362 

363 def table_cell_render(self, el: Element): 

364 c = self.render_children(el) 

365 tag = 'th' if el.is_head else 'td' 

366 a = {} 

367 if el.align: 

368 a['style'] = f'text-align:{el.align}' 

369 return f'<{tag}{attributes(a)}>{c}</{tag}>' 

370 

371 def table_head_parse(self, children=None): 

372 return Element(type='table_head', children=children) 

373 

374 def table_head_render(self, el: Element): 

375 c = self.render_children(el) 

376 return f'<thead>\n<tr>{c}</tr>\n</thead>\n' 

377 

378 def table_parse(self, children=None): 

379 return Element(type='table', children=children) 

380 

381 def table_render(self, el: Element): 

382 c = self.render_children(el) 

383 return f'<table class="markdown-table">{c}</table>\n' 

384 

385 def table_row_parse(self, children=None): 

386 return Element(type='table_row', children=children) 

387 

388 def table_row_render(self, el: Element): 

389 c = self.render_children(el) 

390 return f'<tr>{c}</tr>\n' 

391 

392 def text_parse(self, text): 

393 return Element(type='text', text=text) 

394 

395 def text_render(self, el: Element): 

396 return escape(el.text) 

397 

398 def thematic_break_parse(self): 

399 return Element(type='thematic_break') 

400 

401 def thematic_break_render(self, el: Element): 

402 return '<hr/>\n' 

403 

404 

405def escape(s, quote=True): 

406 s = s.replace("&", "&amp;") 

407 s = s.replace("<", "&lt;") 

408 s = s.replace(">", "&gt;") 

409 if quote: 

410 s = s.replace('"', "&quot;") 

411 return s 

412 

413 

414def attributes(attrs): 

415 s = '' 

416 if attrs: 

417 for k, v in attrs.items(): 

418 s += f' {k}="{v}"' 

419 return s 

420 

421 

422## 

423 

424 

425_ATTRIBUTE_RE = r'''(?x) 

426 ( 

427 (\# (?P<id> [\w-]+) ) 

428 | 

429 (\. (?P<class> [\w-]+) ) 

430 | 

431 ( 

432 (?P<key> \w+) 

433 = 

434 ( 

435 " (?P<quoted> [^"]*) " 

436 | 

437 (?P<simple> \S+) 

438 ) 

439 ) 

440 ) 

441 \x20 

442''' 

443 

444 

445def parse_attributes(text): 

446 text = text.strip() + ' ' 

447 res = {} 

448 

449 while text: 

450 m = re.match(_ATTRIBUTE_RE, text) 

451 if not m: 

452 return {} 

453 

454 text = text[m.end():].lstrip() 

455 

456 g = m.groupdict() 

457 if g['id']: 

458 res['id'] = g['id'] 

459 elif g['class']: 

460 res['class'] = (res.get('class', '') + ' ' + g['class']).strip() 

461 else: 

462 res[g['key']] = g['simple'] or g['quoted'].strip() 

463 

464 return res