Coverage for gws-app/gws/spec/generator/parser.py: 10%
430 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-17 01:37 +0200
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-17 01:37 +0200
1"""Parse py source files and create a list of units of interest"""
3import ast
4import re
6from typing import cast
8from . import base, util
11def parse(gen: base.Generator, parse_all=False):
12 init_parser(gen)
13 for chunk in gen.chunks:
14 for path in chunk['paths']['python']:
15 parse_path(gen, path, chunk['name'], chunk['sourceDir'], parse_all)
18def init_parser(gen: base.Generator):
19 for b in base.BUILTINS:
20 typ = gen.new_type(base.C.ATOM, name=b)
21 gen.types[typ.uid] = typ
24def parse_path(gen: base.Generator, path: str, base_name: str, base_dir: str, parse_all=True):
25 pp = None
27 base.log.debug(f'parsing {path=}')
29 try:
30 # <base_dir>/a/b/__init__.py => <base_name>.a.b
31 # <base_dir>/a/b/c.py => <base_name>.a.b.c
33 if not path.startswith(base_dir):
34 raise ValueError(f'invalid path {path!r}')
35 p = path[len(base_dir):].split('/')
36 f = p.pop().split(DOT)[0]
37 if f != '__init__':
38 p.append(f)
39 mod_name = base_name + DOT.join(p)
41 text = util.read_file(path)
42 pp = _PythonParser(gen, mod_name, path, text, parse_all)
43 pp.run()
45 except Exception as exc:
46 lineno = '?'
47 if pp and pp.context:
48 lineno = pp.context[-1].lineno
49 msg = str(exc.args[0]) if hasattr(exc, 'args') else repr(exc)
50 raise base.Error(f'{msg} in {path}:{lineno}')
53##
55class _PythonParser:
56 lines: list[str]
57 module_node: ast.Module
58 module_name: str
59 docs: dict[int, str]
60 imports: dict[str, str]
62 def __init__(self, gen: base.Generator, module_name: str, path: str, text: str, parse_all: bool):
63 self.gen = gen
64 self.module_name = module_name
65 self.module_path = path
66 self.tModule = ''
67 self.text = text
68 self.source_lines = [''] + self.text.splitlines()
69 self.is_init = path.endswith('__init__.py')
70 self.context: list = []
71 self.parse_all = parse_all
73 def run(self):
74 if any('# gws:nospec' in ln for ln in self.source_lines):
75 return
77 tree = ast.parse(self.text)
79 for node in ast.walk(tree):
80 if _cls(node) == 'Module':
81 self.module_node = cast(ast.Module, node)
82 break
83 else:
84 raise ValueError('module node not found')
86 typ = self.add(base.C.MODULE, name=self.module_name, path=self.module_path, doc=self.inner_doc(self.module_node))
87 self.tModule = typ.uid
89 self.imports = self.prepare_imports()
91 for node in self.nodes(self.module_node.body):
92 cc = _cls(node)
93 if cc == 'Expr':
94 self.parse_ext_declaration(node)
95 if cc == 'ClassDef':
96 self.parse_class(node)
97 elif cc in {'Assign', 'AnnAssign'}:
98 self.parse_assign(node, self.outer_doc(node, self.module_node.body), annotated=(cc == 'AnnAssign'))
100 def prepare_imports(self):
101 # map import names to module names
102 imp = {}
104 # "import a.b.c as foo" => {foo: a.b.c}
105 for node in self.nodes(self.module_node.body, 'Import'):
106 for nn in node.names:
107 imp[nn.asname or nn.name] = nn.name
109 for node in self.nodes(self.module_node.body, 'ImportFrom'):
111 # "from a.b.c import foo" => {foo: a.b.c.foo}
112 if node.level == 0:
113 for nn in node.names:
114 imp[nn.asname or nn.name] = node.module + DOT + nn.name
115 continue
117 # "from . import foo" => {foo: "<mod-name>.{
118 # "from .. import foo" => "<mod-name-before-dot>.foo"
119 # "from ..abc import foo" => "<mod-name-before-dot>.abc.foo"
121 m = self.module_name.split(DOT)
122 level = node.level - self.is_init
123 if level:
124 m = m[:-level]
125 m = DOT.join(m)
126 if node.module:
127 m += DOT + node.module
128 for nn in node.names:
129 imp[nn.asname or nn.name] = m + DOT + nn.name
131 # create aliases for imported types
132 for alias, target in imp.items():
133 if _is_type_name(alias) and not _builtin_name(alias):
134 self.gen.aliases[self.module_name + DOT + alias] = target
136 return imp
138 def parse_ext_declaration(self, node):
139 if _cls(node.value) != 'Call':
140 return
141 call = cast(ast.Call, node.value)
142 try:
143 decl = _name(call.func)
144 except ValueError:
145 return
146 if not decl.startswith(base.EXT_DECL_PREFIX):
147 return
148 if not call.args:
149 raise ValueError('invalid gws.ext declaration')
151 args = list(call.args)
152 tail = decl.split(DOT).pop() + DOT + _name(args.pop(0))
153 self.add(
154 base.C.EXT,
155 extName=base.EXT_OBJECT_PREFIX + tail,
156 tTarget=self.qname(args.pop(0) if args else base.EXT_OBJECT_CLASS)
157 )
158 self.add(
159 base.C.EXT,
160 extName=base.EXT_CONFIG_PREFIX + tail,
161 tTarget=self.qname(args.pop(0) if args else base.EXT_CONFIG_CLASS)
162 )
163 self.add(
164 base.C.EXT,
165 extName=base.EXT_PROPS_PREFIX + tail,
166 tTarget=self.qname(args.pop(0) if args else base.EXT_PROPS_CLASS)
167 )
169 def parse_assign(self, node, doc, annotated):
170 """Parse a module level assignment, possibly a type alias or a constant."""
172 if annotated:
173 name_node = node.target
174 else:
175 if len(node.targets) > 1:
176 return
177 name_node = node.targets[0]
179 if _cls(name_node) != 'Name' or not _is_type_name(name_node.id):
180 return
182 typ = None
183 if hasattr(node, 'annotation'):
184 typ = self.type_from_node(node.annotation)
186 if typ and typ.name == 'TypeAlias':
187 # type alias
188 target_type = self.type_from_node(node.value)
189 if doc.startswith(base.VARIANT_COMMENT_PREFIX):
190 # variant
191 if target_type.c != base.C.UNION:
192 raise ValueError('a Variant must be a Union')
193 doc = doc.partition(base.VARIANT_COMMENT_PREFIX)[-1].strip()
194 target_type = self.add(base.C.VARIANT, tItems=target_type.tItems)
195 self.add(base.C.TYPE, doc=doc, ident=name_node.id, name=self.qname(name_node), tTarget=target_type.uid)
197 return
199 # possibly, a constant
201 c, value = self.parse_const_value(node.value)
202 if c == base.C.LITERAL:
203 self.add(base.C.CONSTANT, doc=doc, ident=name_node.id, name=self.qname(name_node), value=value)
205 def parse_class(self, node):
206 if not _is_type_name(node.name):
207 return
209 supers = [self.qname(b) for b in node.bases]
210 if supers and _builtin_name(supers[0]) == 'Enum':
211 return self.parse_enum(node)
213 typ = self.add(
214 base.C.CLASS,
215 doc=self.inner_doc(node),
216 ident=node.name,
217 name=self.qname(node),
218 tSupers=[self.type_from_name(s).uid for s in supers if not _builtin_name(s)],
219 )
221 for nn in self.nodes(node.body):
222 cc = _cls(nn)
223 if cc in {'Assign', 'AnnAssign'}:
224 doc = self.outer_doc(nn, node.body)
225 self.parse_property(typ, nn, doc, annotated=(cc == 'AnnAssign'))
226 elif cc == 'FunctionDef':
227 self.parse_method(typ, nn)
229 def parse_enum(self, node):
230 docs = {}
231 vals = {}
233 for nn in self.nodes(node.body):
234 if _cls(nn) == 'Assign':
235 ident = nn.targets[0].id
236 c, value = self.parse_const_value(nn.value)
237 if c != base.C.LITERAL:
238 raise ValueError(f'invalid Enum item {ident!r}')
239 docs[ident] = self.outer_doc(nn, node.body)
240 vals[ident] = value
242 self.add(
243 base.C.ENUM,
244 doc=self.inner_doc(node),
245 ident=node.name,
246 name=self.qname(node),
247 enumDocs=docs,
248 enumValues=vals,
249 )
251 def parse_property(self, owner_typ: base.Type, node, doc: str, annotated: bool):
252 ident = node.target.id if annotated else node.targets[0].id
253 if ident.startswith('_'):
254 return
256 typ = self.add(
257 base.C.PROPERTY,
258 name=owner_typ.name + DOT + ident,
259 doc=doc,
260 ident=ident,
261 tOwner=owner_typ.uid,
262 tValue='any',
263 defaultValue=None,
264 hasDefault=False,
265 )
267 c, value = self.parse_const_value(node.value)
268 if c == base.C.LITERAL:
269 typ.defaultValue = value
270 typ.hasDefault = True
271 if c == base.C.EXPR:
272 # see normalizer._evaluate_defaults
273 typ.EVAL_DEFAULT = [c, value]
275 property_type = None
276 if hasattr(node, 'annotation'):
277 property_type = self.type_from_node(node.annotation)
279 if not property_type:
280 t = 'any'
281 if typ.hasDefault:
282 t = type(typ.defaultValue).__name__
283 property_type = self.type_from_name(t)
285 if property_type:
286 if property_type.c == base.C.OPTIONAL:
287 typ.tValue = property_type.tTarget
288 if not typ.hasDefault:
289 typ.defaultValue = None
290 typ.hasDefault = True
291 else:
292 typ.tValue = property_type.uid
294 def parse_method(self, owner_typ: base.Type, node):
295 ext = self.gws_decorator(node, 'method')
297 if not ext and not self.parse_all:
298 return
300 typ = self.add(
301 base.C.METHOD,
302 doc=self.inner_doc(node),
303 ident=node.name,
304 name=owner_typ.name + DOT + node.name,
305 tOwner=owner_typ.uid,
306 tArgs=[],
307 tReturn='any',
308 extName=ext,
309 )
311 args = node.args.args
312 if not self.parse_all:
313 # ext methods have only one spec'able arg (the last one)
314 args = args[-1:]
316 for arg_node in args:
317 t = 'any'
318 if arg_node.annotation:
319 arg_type = self.type_from_node(arg_node.annotation)
320 if arg_type:
321 t = arg_type.uid
322 typ.tArgs.append(t)
324 if node.returns:
325 ret_type = self.type_from_node(node.returns)
326 typ.tReturn = ret_type.uid if ret_type else 'any'
328 def gws_decorator(self, node, kind):
329 for d in getattr(node, 'decorator_list', []):
331 if _cls(d) != 'Call' or len(d.args) != 1:
332 continue
334 name = _name(d.func)
335 if not name.startswith(base.EXT_PREFIX):
336 continue
338 name = name + DOT + _name(d.args[0])
339 ns = name.split(DOT)
341 if kind == 'method':
342 if len(ns) == 5:
343 # gws.ext.command.api.mapGetBox
344 return name
345 raise ValueError(f'invalid function decorator {name!r}')
347 return ''
349 ##
351 def type_from_node(self, node) -> base.Type:
352 # here, node is a type declaration (an alias or an annotation)
354 cc = _cls(node)
356 # foo: SomeType
357 if cc in {'Str', 'Name', 'Attribute', 'Constant'}:
358 return self.type_from_name(self.qname(node))
360 # foo: Generic[SomeType]
361 if cc == 'Subscript':
362 # Subscript(slice=Index(value=Name... in py3.8
363 # Subscript(slice=Name... in py3.9
364 return self.type_from_name(
365 self.qname(node.value),
366 node.slice.value if _cls(node.slice) == 'Index' else node.slice)
368 # foo: [SomeType, SomeType]
369 if cc in {'List', 'Tuple'}:
370 item_types = [self.type_from_node(e) for e in node.elts]
371 return self.add(base.C.TUPLE, tItems=[typ.uid for typ in item_types])
373 # foo: SomeType | SomeType | ...
374 if cc == 'BinOp' and _cls(node.op) == 'BitOr':
375 item_types = []
376 while _cls(node) == 'BinOp' and _cls(node.op) == 'BitOr':
377 item_types.insert(0, self.type_from_node(node.right))
378 node = node.left
379 item_types.insert(0, self.type_from_node(node))
380 return self.add(base.C.UNION, tItems=[typ.uid for typ in item_types])
382 raise ValueError(f'unsupported type: {cc!r}')
384 def type_from_name(self, name: str, param=None) -> base.Type:
385 if not param and name in self.gen.types:
386 return self.gen.types[name]
388 g = _builtin_name(name)
390 if g == 'Any':
391 return self.gen.types['any']
393 # literal - 'param' is a value or a tuple of values
394 if g == 'Literal':
395 if not param:
396 raise ValueError('invalid literal')
397 elts = param.elts if _cls(param) == 'Tuple' else [param]
398 vals = [self.parse_literal_value(e) for e in elts]
399 return self.add(base.C.LITERAL, literalValues=vals)
401 # in other cases, 'param' is a type or a tuple of types
403 param_typ = param_items = None
404 if param:
405 param_typ = self.type_from_node(param)
406 if param_typ.c == base.C.TUPLE:
407 param_items = param_typ.tItems
409 if g == 'Optional':
410 if not param_typ:
411 raise ValueError('invalid optional type')
412 return self.add(base.C.OPTIONAL, tTarget=param_typ.uid)
414 if g.lower() == 'list':
415 return self.add(base.C.LIST, tItem=param_typ.uid if param_typ else 'any')
417 if g.lower() == 'set':
418 return self.add(base.C.SET, tItem=param_typ.uid if param_typ else 'any')
420 if g.lower() == 'dict':
421 if param_items:
422 if len(param_items) != 2:
423 raise ValueError('invalid dict arguments')
424 key, val = param_items
425 elif param_typ:
426 key = 'str'
427 val = param_typ.uid
428 else:
429 key = 'str'
430 val = 'any'
431 return self.add(base.C.DICT, tKey=key, tValue=val)
433 if g == 'Union':
434 if not param_items:
435 raise ValueError('invalid Union')
436 return self.add(base.C.UNION, tItems=sorted(param_items))
438 if g.lower() == 'tuple':
439 if not param_typ:
440 return self.add(base.C.TUPLE, tItems=[])
441 if not param_items:
442 raise ValueError('invalid Tuple')
443 return self.add(base.C.TUPLE, tItems=list(param_items))
445 if g.lower() == 'callable':
446 if not param_typ:
447 return self.add(base.C.CALLABLE, tItems=[])
448 if not param_items:
449 raise ValueError('invalid Callable')
450 return self.add(base.C.CALLABLE, tItems=list(param_items))
452 if param:
453 raise ValueError('invalid generic type')
455 if g:
456 base.log.debug(f'created ATOM for {name!r}, builtin {g!r}')
457 return self.add(base.C.ATOM, name=name)
459 return self.add(base.C.UNDEFINED, name=name)
461 ##
463 @property
464 def pos(self):
465 return self.module_path + ':' + str(self.context[-1].lineno if self.context else 0)
467 def add(self, c: str, **kwargs) -> base.Type:
468 kwargs['pos'] = self.pos
469 kwargs['tModule'] = self.tModule
470 typ = self.gen.new_type(c, **kwargs)
471 base.log.debug(f'added {typ.uid=} {typ=}')
472 self.gen.types[typ.uid] = typ
473 return typ
475 def inner_doc(self, node):
476 """Returns a normal docstring (first child of the node)."""
478 return self.docstring_from(node.body[0]) if node.body else ''
480 def outer_doc(self, node, nodes):
481 """Returns a docstring which immediately follows this node in a list of nodes."""
483 try:
484 nxt = nodes[nodes.index(node) + 1]
485 except IndexError:
486 return ''
487 return self.docstring_from(nxt)
489 def docstring_from(self, node):
490 """If node is a docstring, return its content."""
492 if _cls(node) == 'Expr':
493 if _cls(node.value) == 'Constant':
494 v = node.value.value
495 if isinstance(v, str):
496 return v.strip()
497 if _cls(node.value) == 'Str':
498 return node.value.s.strip()
499 return ''
501 def qname(self, node):
502 name = _name(node)
503 b = _builtin_name(name)
504 if b:
505 return b
506 name = self.qualified(name)
507 return name
509 def qualified(self, name):
510 for alias, mod in self.imports.items():
511 if name == mod or name.startswith(mod + DOT):
512 return name
513 if name == alias:
514 return mod
515 if name.startswith(alias + DOT):
516 return mod + DOT + name[(len(alias) + 1):]
517 return self.module_name + DOT + name
519 def nodes(self, where, *cls):
520 for node in where:
521 if not cls or _cls(node) in cls:
522 self.context.append(node)
523 yield node
524 self.context.pop()
526 ##
528 def parse_literal_value(self, node):
529 c, value = self.parse_const_value(node)
530 if c == base.C.LITERAL and _is_scalar(value):
531 return value
532 raise ValueError(f'invalid literal value')
534 def parse_const_value(self, node):
535 if node is None:
536 return None, None
538 cc = _cls(node)
540 if cc == 'Num':
541 return base.C.LITERAL, node.n
543 if cc in ('Str', 'Bytes'):
544 return base.C.LITERAL, node.s
546 if cc in ('Constant', 'NameConstant'):
547 return base.C.LITERAL, node.value
549 if cc in {'Name', 'Attribute'}:
550 # SomeConstant or Something.someKey - possible constant/enum value
551 return base.C.EXPR, self.qname(node)
553 if cc in {'List', 'Tuple'}:
554 exprlst, lst = [], []
555 for elt in node.elts:
556 c, value = self.parse_const_value(elt)
557 if not c:
558 return False, None
559 if c == base.C.LITERAL:
560 lst.append(value)
561 exprlst.append([c, value])
562 if len(lst) == len(exprlst):
563 return base.C.LITERAL, lst
564 return base.C.EXPR, exprlst
566 if cc == 'Dict':
567 exprdct, dct = {}, {}
568 for k, v in zip(node.keys, node.values):
569 c, key = self.parse_const_value(k)
570 if c != base.C.LITERAL:
571 return False, None
572 c, value = self.parse_const_value(v)
573 if not c:
574 return False, None
575 if c == base.C.LITERAL:
576 dct[key] = value
577 exprdct[key] = [c, value]
578 if len(dct) == len(exprdct):
579 return base.C.LITERAL, dct
580 return base.C.EXPR, exprdct
582 return None, None
585##
588def _is_scalar(val):
589 return isinstance(val, (str, bytes, int, float, bool))
592def _is_type_name(name: str) -> bool:
593 return bool(name) and bool(re.match(r'^[A-Z]', name))
596def _builtin_name(name: str) -> str:
597 if name in base.BUILTINS:
598 return name
599 if name in base.BUILTIN_TYPES:
600 return name
601 for b in base.BUILTIN_TYPES:
602 if name.endswith(DOT + b):
603 return b
604 if name.startswith(b + DOT):
605 return b
606 return ''
609def _is_a(full_name: str, name: str) -> bool:
610 # if the name is like 'Object', check if the full name ends with it
611 # if the name is like 'some.module', check if the full name starts with it
612 if name[0].isupper():
613 return full_name == name or full_name.endswith(DOT + name)
614 return full_name == name or full_name.startswith(name + DOT)
617def _cls(node):
618 return node.__class__.__name__
621def _name(node):
622 if isinstance(node, str):
623 return node
625 cc = _cls(node)
627 if cc == 'Name':
628 return node.id
629 if cc == 'Attribute':
630 return _name(node.value) + DOT + node.attr
631 if cc == 'Str':
632 return node.s
633 if cc == 'Constant':
634 v = node.value
635 return v if isinstance(v, str) else repr(v)
636 if cc == 'ClassDef':
637 return node.name
638 if cc == 'FunctionDef':
639 return node.name
641 raise ValueError(f'node name missing in {cc!r}')
644def _camelize(name):
645 p = name.split('_')
646 return p[0] + ''.join(_ucfirst(s) for s in p[1:])
649def _ucfirst(s):
650 return s[0].upper() + s[1:]
653DOT = '.'