Coverage for gws-app/gws/lib/vendor/slon/__init__.py: 13%
484 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-17 01:37 +0200
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-17 01:37 +0200
1"""slon - Simple Lightweight Object Notation.
3Data exchange and configuration notation, similar to JSON.
5See <https://github.com/gebrkn/slon>
6"""
8__version__ = '0.2.2'
9__author__ = 'Georg Barikin <georg@merribithouse.net>'
12def loads(text, as_array=False, as_object=False, hooks=None):
13 b = _Buf(text, _prepare_hooks(hooks))
15 _ws(b)
17 if as_array:
18 v = _value(b) if _chr(b) == '[' else _array(b, b.p, term=_EOF)
19 elif as_object:
20 v = _value(b) if _chr(b) == '{' else _object(b, b.p, term=_EOF)
21 else:
22 v = _value(b)
24 _ws(b)
26 if _chr(b) == _EOF:
27 return v
29 _err(b, 'TRAILING_GARBAGE')
32parse = loads
34###
36_ERRORS = {
37 'INVALID_ESCAPE_SEQUENCE': 'Invalid escape sequence',
38 'INVALID_HOOK': 'Invalid hook name',
39 'INVALID_KEY_TYPE': 'Invalid key type',
40 'INVALID_NUMBER': 'Invalid number',
41 'INVALID_TOKEN': 'Invalid token',
42 'MISSING_DELIMITER': 'Whitespace or "," expected',
43 'MISSING_KEY_DELIMITER': 'Whitespace or ":" expected',
44 'MUST_BE_ARRAY': 'Unable to assign index (array expected)',
45 'MUST_BE_OBJECT': 'Unable to assign index (object expected)',
46 'TRAILING_GARBAGE': 'Trailing garbage',
47 'UNTERMINATED_ARRAY': 'Unterminated array',
48 'UNTERMINATED_CALL': '")" expected',
49 'UNTERMINATED_COMMENT': 'Unterminated block comment',
50 'UNTERMINATED_OBJECT': 'Unterminated object',
51 'UNTERMINATED_STRING': 'Unterminated string',
52}
55class SlonError(ValueError):
56 def __init__(self, code, message, pos, row, col, start_pos, start_row, start_col):
57 ValueError.__init__(self, message, pos, row, col, start_pos, start_row, start_col)
58 self.code = code
59 self.message = message
60 self.pos = pos
61 self.row = row
62 self.col = col
63 self.start_pos = start_pos
64 self.start_row = start_row
65 self.start_col = start_col
68###
70_EOF = '\U0001FFFF'
71_EOL = '\n'
72_SQ = "'"
73_DQ = '"'
74_SLASH = "\\"
75_BACKTICK = '`'
77_P_WS = 1 << 1
78_P_PUNCT = 1 << 2
79_P_LIST_DELIM = 1 << 3
80_P_KEY_DELIM = 1 << 4
81_P_DEC = 1 << 5
82_P_HEX = 1 << 6
83_P_OCT = 1 << 7
84_P_BIN = 1 << 8
86_P_NON_WORD = _P_WS | _P_PUNCT
88_CMAP = {
89 ' ': _P_WS,
90 '\n': _P_WS,
91 '\r': _P_WS,
92 '\t': _P_WS,
93 '\f': _P_WS,
95 '[': _P_PUNCT,
96 ']': _P_PUNCT,
97 '{': _P_PUNCT,
98 '}': _P_PUNCT,
99 '(': _P_PUNCT,
100 ')': _P_PUNCT,
101 ',': _P_PUNCT | _P_LIST_DELIM,
102 '=': _P_PUNCT | _P_KEY_DELIM,
103 ':': _P_PUNCT | _P_KEY_DELIM,
104 '#': _P_PUNCT,
105 '/': _P_PUNCT,
107 _SLASH: _P_PUNCT,
108 _SQ: _P_PUNCT,
109 _DQ: _P_PUNCT,
110 _BACKTICK: _P_PUNCT,
112 '0': _P_HEX | _P_DEC | _P_OCT | _P_BIN,
113 '1': _P_HEX | _P_DEC | _P_OCT | _P_BIN,
114 '2': _P_HEX | _P_DEC | _P_OCT,
115 '3': _P_HEX | _P_DEC | _P_OCT,
116 '4': _P_HEX | _P_DEC | _P_OCT,
117 '5': _P_HEX | _P_DEC | _P_OCT,
118 '6': _P_HEX | _P_DEC | _P_OCT,
119 '7': _P_HEX | _P_DEC | _P_OCT,
120 '8': _P_HEX | _P_DEC,
121 '9': _P_HEX | _P_DEC,
122 'a': _P_HEX,
123 'b': _P_HEX,
124 'c': _P_HEX,
125 'd': _P_HEX,
126 'e': _P_HEX,
127 'f': _P_HEX,
128 'A': _P_HEX,
129 'B': _P_HEX,
130 'C': _P_HEX,
131 'D': _P_HEX,
132 'E': _P_HEX,
133 'F': _P_HEX,
135}
137_ESCAPES = {
138 "'": "'",
139 '"': '"',
140 '0': '\0',
141 '/': '/',
142 '\\': '\\',
143 'b': '\b',
144 'f': '\f',
145 'n': '\n',
146 'r': '\r',
147 't': '\t',
148}
150_WORDS = {
151 'true': True,
152 'on': True,
153 'yes': True,
154 'false': False,
155 'off': False,
156 'no': False,
157 'null': None,
158 'none': None,
159}
161_SURR_1_START = 0xD800
162_SURR_1_END = 0xDBFF
163_SURR_2_START = 0xDC00
164_SURR_2_END = 0xDFFF
166_MAX_UNICODE = 0x110000
169###
171class _Buf:
172 def __init__(self, text, hooks):
173 self.text = text
174 self.tlen = len(text)
175 self.p = 0
176 self.hooks = hooks
179###
181def _prepare_hooks(hooks):
182 if not hooks:
183 return {}
185 # 'hooks' can be an iterable, e.g. a dict
186 try:
187 _ = "test" in hooks
188 return hooks
189 except TypeError:
190 pass
192 # ...or an object
193 d = {}
194 for key in dir(hooks):
195 if not key.startswith('__'):
196 f = getattr(hooks, key, None)
197 if callable(f):
198 d[key] = f
199 return d
202###
204def _value(b):
205 ch = _chr(b)
207 if ch == _SQ:
208 if _chr(b, 1) == _SQ and _chr(b, 2) == _SQ:
209 b.p += 3
210 return _single3(b, b.p - 3)
211 else:
212 b.p += 1
213 return _single1(b, b.p - 1)
215 if ch == _DQ:
216 if _chr(b, 1) == _DQ and _chr(b, 2) == _DQ:
217 b.p += 3
218 return _double3(b, b.p - 3, backtick=False)
219 else:
220 b.p += 1
221 return _double1(b, b.p - 1)
223 if ch == _BACKTICK:
224 b.p += 1
225 return _double3(b, b.p - 1, backtick=True)
227 if ch == '+':
228 b.p += 1
229 return _number(b, b.p - 1)
231 if ch == '-':
232 b.p += 1
233 return -1 * _number(b, b.p - 1)
235 if _CMAP.get(ch, 0) & _P_DEC or ch == '.':
236 return _number(b, b.p)
238 if ch == '[':
239 b.p += 1
240 return _array(b, b.p - 1, term=']')
242 if ch == '{':
243 b.p += 1
244 return _object(b, b.p - 1, term='}')
246 if _CMAP.get(ch, 0) & _P_NON_WORD:
247 _err(b, 'INVALID_TOKEN')
249 return _word(b, b.p)
252###
254def _single1(b, start):
255 out = ''
257 while 1:
258 ch = _chr(b)
259 if ch == _EOL or ch == _EOF:
260 _err(b, 'UNTERMINATED_STRING', start)
261 if ch == _SQ:
262 b.p += 1
263 break
264 out += ch
265 b.p += 1
267 return out
270def _single3(b, start):
271 out = ''
272 dedent = _CMAP.get(_chr(b), 0) & _P_WS
274 while 1:
275 ch = _chr(b)
276 if ch == _EOF:
277 _err(b, 'UNTERMINATED_STRING', start)
278 if ch == _SQ and _chr(b, 1) == _SQ and _chr(b, 2) == _SQ:
279 b.p += 3
280 break
281 out += ch
282 b.p += 1
284 out = _dedent(out) if dedent else _compress(out)
285 return out
288def _double1(b, start):
289 out = ''
291 while 1:
292 ch = _chr(b)
293 if ch == _EOL or ch == _EOF:
294 _err(b, 'UNTERMINATED_STRING', start)
295 if ch == _DQ:
296 b.p += 1
297 break
298 if ch == _SLASH:
299 b.p += 1
300 out += _escape(b, b.p - 1)
301 else:
302 out += ch
303 b.p += 1
305 return out
308def _double3(b, start, backtick):
309 out = ''
310 escapes = []
311 esc_mark = _EOF
312 dedent = _CMAP.get(_chr(b), 0) & _P_WS
314 while 1:
315 ch = _chr(b)
316 if ch == _EOF:
317 _err(b, 'UNTERMINATED_STRING', start)
318 if backtick and ch == _BACKTICK:
319 b.p += 1
320 break
321 if not backtick and ch == _DQ and _chr(b, 1) == _DQ and _chr(b, 2) == _DQ:
322 b.p += 3
323 break
324 if ch == _SLASH:
325 # escapes shouldn't affect the reflow,
326 # so place them in a buffer and paste afterwards
327 out += esc_mark
328 b.p += 1
329 escapes.append(_escape(b, b.p - 1))
330 else:
331 out += ch
332 b.p += 1
334 out = _dedent(out) if dedent else _compress(out)
335 if not escapes:
336 return out
338 # paste escapes back
340 out2 = ''
341 n = 0
342 for ch in out:
343 if ch == esc_mark:
344 out2 += escapes[n]
345 n += 1
346 else:
347 out2 += ch
348 return out2
351def _dedent(s):
352 indent = 100_000
353 lines = [ln.rstrip() for ln in s.split(_EOL)]
355 if lines and not lines[0]:
356 lines.pop(0)
357 if lines and not lines[-1]:
358 lines.pop(-1)
360 for ln in lines:
361 if ln:
362 indent = min(indent, len(ln) - len(ln.strip()))
364 return _EOL.join(ln[indent:] for ln in lines)
367def _compress(s):
368 return ' '.join(s.strip().split())
371def _escape(b, start):
372 ch = _chr(b)
374 if ch in _ESCAPES:
375 b.p += 1
376 return _ESCAPES[ch]
378 cp = _unicode_escape(b)
379 if cp < 0 or cp >= _MAX_UNICODE:
380 _err(b, 'INVALID_ESCAPE_SEQUENCE', pos=start)
382 return chr(cp)
385def _unicode_escape(b):
386 ch = _chr(b)
388 # \xXX
389 if ch == 'x':
390 b.p += 1
391 return _hexval(b, 2, 2)
393 # \UXXXXXXXX
394 if ch == 'U':
395 b.p += 1
396 return _hexval(b, 8, 8)
398 # \u{XX...}
399 if ch == 'u' and _chr(b, 1) == '{':
400 b.p += 2
401 cp = _hexval(b, 1, 8)
402 if cp < 0 or _chr(b) != '}':
403 return -1
404 b.p += 1
405 return cp
407 # \uXXXX
408 if ch == 'u':
409 b.p += 1
410 cp = _hexval(b, 4, 4)
411 if cp < 0:
412 return -1
414 # this is a high surrogate, try to read a low surrogate and return the combined char
415 if _SURR_1_START <= cp <= _SURR_1_END:
416 savep = b.p
418 if _chr(b) == _SLASH and _chr(b, 1) == 'u':
419 b.p += 2
420 cp2 = _hexval(b, 4, 4)
421 if _SURR_2_START <= cp2 <= _SURR_2_END:
422 return 0x10000 + (((cp - _SURR_1_START) << 10) | (cp2 - _SURR_2_START))
424 # not a valid surrogate pair, backtrack
425 b.p = savep
427 return cp
429 return -1
432def _hexval(b, minlen, maxlen):
433 out = ''
434 n = 0
436 while n < maxlen:
437 ch = _chr(b)
438 if _CMAP.get(ch, 0) & _P_HEX:
439 out += ch
440 b.p += 1
441 n += 1
442 else:
443 break
445 if minlen <= n <= maxlen:
446 return _int(out, 16)
448 return -1
451###
453def _number(b, start):
454 if _chr(b) == '0':
455 ch = _chr(b, 1)
456 if ch == 'x' or ch == 'X':
457 b.p += 2
458 return _nondec(b, start, _P_HEX, 16)
459 if ch == 'o' or ch == 'O':
460 b.p += 2
461 return _nondec(b, start, _P_OCT, 8)
462 if ch == 'b' or ch == 'B':
463 b.p += 2
464 return _nondec(b, start, _P_BIN, 2)
466 return _decnum(b, start)
469def _decnum(b, start):
470 i = _intseq(b, _P_DEC)
472 f = ''
473 if _chr(b) == '.':
474 b.p += 1
475 f = _intseq(b, _P_DEC)
476 if not f:
477 _err(b, 'INVALID_NUMBER', pos=start)
479 if not i and not f:
480 _err(b, 'INVALID_NUMBER', pos=start)
482 e = ''
483 esign = ''
485 ch = _chr(b)
486 if ch == 'e' or ch == 'E':
487 b.p += 1
488 ch = _chr(b)
489 if ch == '+' or ch == '-':
490 esign = ch
491 b.p += 1
492 e = _intseq(b, _P_DEC)
493 if not e:
494 _err(b, 'INVALID_NUMBER', pos=start)
496 if f or e:
497 return _float(i, f, esign, e)
499 return _int(i, 10)
502def _nondec(b, start, prop, base):
503 n = _intseq(b, prop)
504 if not n:
505 _err(b, 'INVALID_NUMBER', pos=start)
506 return _int(n, base)
509def _intseq(b, prop):
510 out = ''
512 while 1:
513 ch = _chr(b)
514 if _CMAP.get(ch, 0) & prop:
515 out += ch
516 b.p += 1
517 elif ch == '_':
518 b.p += 1
519 else:
520 break
522 return out
525###
527def _array(b, start, term):
528 out = []
529 has_ws = True
531 _ws(b, _P_WS | _P_LIST_DELIM)
533 while 1:
534 ch = _chr(b)
536 if ch == term:
537 b.p += 1
538 break
540 if ch == _EOF:
541 _err(b, 'UNTERMINATED_ARRAY', start)
543 if not has_ws:
544 _err(b, 'MISSING_DELIMITER')
546 out.append(_value(b))
547 has_ws = _ws(b, _P_WS | _P_LIST_DELIM)
549 return out
552###
554def _object(b, start, term):
555 out = {}
556 has_ws = True
558 _ws(b, _P_WS | _P_LIST_DELIM)
560 while 1:
561 ch = _chr(b)
563 if ch == term:
564 b.p += 1
565 break
567 if ch == _EOF:
568 _err(b, 'UNTERMINATED_OBJECT', start)
570 if not has_ws:
571 _err(b, 'MISSING_DELIMITER')
573 key_pos = b.p
574 is_quoted = ch == _SQ or ch == _DQ or ch == _BACKTICK
575 key = _value(b)
577 if not _is_number(key) and not _is_bool(key) and not _is_str(key):
578 _err(b, 'INVALID_KEY_TYPE', pos=key_pos)
580 if not _ws(b, _P_WS | _P_KEY_DELIM):
581 _err(b, 'MISSING_KEY_DELIMITER')
583 val = _value(b)
585 if _is_str(key) and not is_quoted and ('.' in key or '+' in key):
586 _store(b, out, key, val, key_pos)
587 else:
588 out[key] = val
590 has_ws = _ws(b, _P_WS | _P_LIST_DELIM)
592 return out
595def _store(b, obj, cmp_key, val, key_pos):
596 obj = [obj]
597 key = 0
598 is_int = True
600 keys = ['']
601 n = 0
602 for ch in cmp_key:
603 if ch == '.':
604 keys.append('')
605 n += 1
606 elif ch == '+':
607 keys.append('+')
608 n += 1
609 else:
610 keys[n] += ch
612 for k in keys:
613 if k.isdigit():
614 obj = _store_one(b, obj, key, is_int, [], False, key_pos)
615 key = _int(k, 10)
616 is_int = True
617 elif k == '+':
618 obj = _store_one(b, obj, key, is_int, [], False, key_pos)
619 key = -1
620 is_int = True
621 else:
622 obj = _store_one(b, obj, key, is_int, {}, False, key_pos)
623 key = k
624 is_int = False
626 _store_one(b, obj, key, is_int, val, True, key_pos)
629def _store_one(b, obj, key, is_int, val, force, key_pos):
630 if is_int:
631 if not _is_array(obj):
632 _err(b, 'MUST_BE_ARRAY', pos=key_pos)
634 le = len(obj)
635 if key == -1:
636 key = le
637 while key >= le:
638 obj.append(None)
639 le += 1
641 if force or obj[key] is None:
642 obj[key] = val
643 return obj[key]
645 else:
646 if not _is_object(obj):
647 _err(b, 'MUST_BE_OBJECT', pos=key_pos)
649 if force or key not in obj:
650 obj[key] = val
651 return obj[key]
654###
656def _word(b, start):
657 w = ''
659 while 1:
660 ch = _chr(b)
661 if ch == _EOF or (_CMAP.get(ch, 0) & _P_NON_WORD):
662 break
663 w += ch
664 b.p += 1
666 # keyword?
668 k = w.lower()
669 if k in _WORDS:
670 return _WORDS[k]
672 # hook?
674 if _chr(b) == '(':
675 call_pos = b.p
676 b.p += 1
678 _ws(b)
679 val = _value(b)
680 _ws(b)
682 if _chr(b) != ')':
683 _err(b, 'UNTERMINATED_CALL', start=call_pos)
685 b.p += 1
687 if w not in b.hooks:
688 _err(b, 'INVALID_HOOK', pos=start)
689 return b.hooks[w](val)
691 # none of the above, simple string
692 return w
695###
697def _ws(b, prop=_P_WS):
698 start = b.p
700 while 1:
701 ch = _chr(b)
702 if _CMAP.get(ch, 0) & prop:
703 b.p += 1
704 elif ch == '#':
705 b.p += 1
706 _line_comment(b, b.p - 1)
707 elif ch == '/' and _chr(b, 1) == '/':
708 b.p += 2
709 _line_comment(b, b.p - 2)
710 elif ch == '/' and _chr(b, 1) == '*':
711 b.p += 2
712 _block_comment(b, b.p - 2)
713 else:
714 break
716 return b.p > start
719def _line_comment(b, start):
720 while 1:
721 ch = _chr(b)
722 if ch == _EOF:
723 break
724 if ch == _EOL:
725 b.p += 1
726 break
727 b.p += 1
730def _block_comment(b, start):
731 while 1:
732 ch = _chr(b)
733 if ch == _EOF:
734 _err(b, 'UNTERMINATED_COMMENT', start)
735 if ch == '*' and _chr(b, 1) == '/':
736 b.p += 2
737 break
738 b.p += 1
741def _chr(b, d=0):
742 try:
743 return b.text[b.p + d]
744 except IndexError:
745 return _EOF
748def _int(s, base):
749 return int(s, base)
752def _float(i, f, esign, e):
753 return float((i or '0') + '.' + (f or '0') + 'E' + (esign or '') + (e or '0'))
756def _is_number(x):
757 return isinstance(x, (int, float))
760def _is_str(x):
761 return isinstance(x, str)
764def _is_bool(x):
765 return isinstance(x, bool)
768def _is_array(x):
769 return isinstance(x, list)
772def _is_object(x):
773 return isinstance(x, dict)
776###
778def _err(b, code, start=None, pos=None):
779 if pos is None:
780 pos = b.p
782 message = _ERRORS[code]
784 row, col = _rowcol(b, pos)
786 message = '%s: line %d column %d (offset %d)' % (message, row, col, pos)
788 if start is not None:
789 start_row, start_col = _rowcol(b, start)
790 message += ', started at line %d column %d (offset %d)' % (start_row, start_col, start)
791 else:
792 start = pos
793 start_row = row
794 start_col = col
796 raise SlonError(code, message, pos, row, col, start, start_row, start_col)
799def _rowcol(b, pos):
800 n = r = c = 0
801 while n < pos:
802 if b.text[n] == _EOL:
803 r += 1
804 c = 0
805 else:
806 c += 1
807 n += 1
808 return r + 1, c + 1