ssc_codegen 0.25.2__tar.gz → 0.26.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/PKG-INFO +1 -1
  2. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/pyproject.toml +1 -1
  3. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/converters/request_spec.py +1 -1
  4. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/core/expressions.py +17 -4
  5. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/core/linting.py +12 -0
  6. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/core/module_handler.py +11 -5
  7. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/core/reader.py +34 -2
  8. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/core/struct_parser.py +21 -8
  9. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/main.py +10 -7
  10. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/.gitignore +0 -0
  11. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/LICENSE +0 -0
  12. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/README.md +0 -0
  13. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/__init__.py +0 -0
  14. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/_logging.py +0 -0
  15. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/ast/__init__.py +0 -0
  16. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/ast/array.py +0 -0
  17. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/ast/base.py +0 -0
  18. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/ast/cast.py +0 -0
  19. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/ast/control.py +0 -0
  20. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/ast/extract.py +0 -0
  21. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/ast/helpers.py +0 -0
  22. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/ast/jsondef.py +0 -0
  23. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/ast/module.py +0 -0
  24. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/ast/predicate_containers.py +0 -0
  25. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/ast/predicate_ops.py +0 -0
  26. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/ast/regex.py +0 -0
  27. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/ast/selectors.py +0 -0
  28. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/ast/string.py +0 -0
  29. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/ast/struct.py +0 -0
  30. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/ast/transform.py +0 -0
  31. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/ast/typedef.py +0 -0
  32. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/ast/types.py +0 -0
  33. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/converters/base.py +0 -0
  34. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/converters/helpers.py +0 -0
  35. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/converters/js_pure.py +0 -0
  36. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/converters/py_bs4.py +0 -0
  37. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/converters/py_helpers.py +0 -0
  38. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/converters/py_lxml.py +0 -0
  39. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/converters/py_parsel.py +0 -0
  40. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/converters/py_render.py +0 -0
  41. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/converters/py_slax.py +0 -0
  42. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/core/__init__.py +0 -0
  43. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/core/adapter.py +0 -0
  44. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/core/contexts.py +0 -0
  45. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/core/format.py +0 -0
  46. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/core/predicates.py +0 -0
  47. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/core/type_checking.py +0 -0
  48. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/document_utils.py +0 -0
  49. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/exceptions.py +0 -0
  50. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/health.py +0 -0
  51. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/kdl/__init__.py +0 -0
  52. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/kdl/dict_reader.py +0 -0
  53. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/kdl/parser.py +0 -0
  54. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/kdl/reader.py +0 -0
  55. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/parsers/__init__.py +0 -0
  56. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/parsers/curl.py +0 -0
  57. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/parsers/http.py +0 -0
  58. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/pseudo_selectors.py +0 -0
  59. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/regex_utils.py +0 -0
  60. {ssc_codegen-0.25.2 → ssc_codegen-0.26.1}/ssc_codegen/selector_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ssc_codegen
3
- Version: 0.25.2
3
+ Version: 0.26.1
4
4
  Summary: Python-dsl code converter to html parser for web scraping
5
5
  Project-URL: Documentation, https://github.com/vypivshiy/selector_schema_codegen#readme
6
6
  Project-URL: Issues, https://github.com/vypivshiy/selector_schema_codegen/issues
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "ssc_codegen"
3
- version = "0.25.2"
3
+ version = "0.26.1"
4
4
  description = "Python-dsl code converter to html parser for web scraping "
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
@@ -20,7 +20,7 @@ from ssc_codegen.parsers.http import parse_http_request
20
20
  # Groups: 1=NAME, 2=PRIM, 3="[]", 4="?", 5=STYLE.
21
21
  _PH = re.compile(
22
22
  r"\{\{"
23
- r"([A-Za-z][A-Za-z0-9_-]*)"
23
+ r"([a-z][a-z0-9_-]*)"
24
24
  r"(?::(str|int|float|bool))?"
25
25
  r"(\[\])?"
26
26
  r"(\?)?"
@@ -167,7 +167,7 @@ _FLOAT_RE = _re.compile(
167
167
  r"[+-]?(?:\d(?:[\d_])*\.\d(?:[\d_])*|\d(?:[\d_])*[eE][+-]?\d(?:[\d_])*|\d(?:[\d_]*)\.\d(?:[\d_]*)[eE][+-]?\d(?:[\d_])*)\Z"
168
168
  )
169
169
 
170
- _DEFINE_REF_RE = _re.compile(r"\{\{([A-Za-z][A-Za-z0-9_-]*)\}\}")
170
+ _DEFINE_REF_RE = _re.compile(r"\{\{([A-Z_][A-Z0-9_-]*)\}\}")
171
171
 
172
172
 
173
173
  def _resolve_define_references(value: str, ctx: ParseContext) -> str:
@@ -175,7 +175,7 @@ def _resolve_define_references(value: str, ctx: ParseContext) -> str:
175
175
  name = m.group(1)
176
176
  resolved = ctx.property_defines.get(name)
177
177
  if resolved is None:
178
- raise ParseError(f"define references undefined name {name!r}")
178
+ return m.group(0)
179
179
  return str(resolved)
180
180
 
181
181
  return _DEFINE_REF_RE.sub(_replacer, value)
@@ -438,6 +438,9 @@ def parse_expressions(
438
438
  lint.push(node.name)
439
439
  lint_pipeline_op(node, lint)
440
440
  expr = handler(node, parent, ctx, lint) # type: ignore[assignment,arg-type]
441
+ if expr is None:
442
+ lint.pop()
443
+ continue
441
444
  if isinstance(expr, Fallback):
442
445
  lint.pop()
443
446
  continue
@@ -920,7 +923,12 @@ def _expr_jsonify(
920
923
  )
921
924
  json_def = ctx.json_defs.get(schema_name)
922
925
  if json_def is None:
923
- raise ParseError(f"jsonify: JSON schema '{schema_name}' not found")
926
+ lint.error(
927
+ node,
928
+ message=f"jsonify: JSON schema '{schema_name}' not found",
929
+ code="E300",
930
+ )
931
+ return None
924
932
  ret_type, is_array = resolve_jsonify_type(json_def, path, ctx)
925
933
  return Jsonify(
926
934
  parent=parent,
@@ -938,7 +946,12 @@ def _expr_nested(
938
946
  struct_name = str(node.args[0].value)
939
947
  struct = ctx.structs.get(struct_name)
940
948
  if struct is None:
941
- raise ParseError(f"nested: struct '{struct_name}' not found")
949
+ lint.error(
950
+ node,
951
+ message=f"nested: struct '{struct_name}' not found",
952
+ code="E300",
953
+ )
954
+ return None
942
955
  is_array = struct.struct_type in (StructType.FLAT, StructType.LIST)
943
956
  return Nested(parent=parent, struct_name=struct_name, is_array=is_array)
944
957
 
@@ -1111,12 +1111,24 @@ def lint_rest_cross_refs(ctx: ParseContext, lint: LintContext) -> None:
1111
1111
  )
1112
1112
 
1113
1113
 
1114
+ _DEFINE_NAME_RE = _re.compile(r"^[A-Z_][A-Z0-9_-]*\Z")
1115
+
1116
+
1114
1117
  def lint_define_node(
1115
1118
  node: KdlNode, ctx: ParseContext, lint: LintContext
1116
1119
  ) -> None:
1117
1120
  """Validate module-level define."""
1118
1121
  children = lint.get_children_nodes(node)
1119
1122
  args = lint.get_args(node)
1123
+ if args:
1124
+ name = args[0]
1125
+ if not _DEFINE_NAME_RE.match(name):
1126
+ lint.error(
1127
+ node,
1128
+ message=f"define name '{name}' must be UPPER_CASE ([A-Z_][A-Z0-9_-]*)",
1129
+ code="E002",
1130
+ hint="use UPPER_CASE: define MY-VAR=... or define MY_BLOCK { ... }",
1131
+ )
1120
1132
  if children:
1121
1133
  if not args:
1122
1134
  lint.error(
@@ -12,7 +12,7 @@ from ssc_codegen.ast import (
12
12
  TransformDef,
13
13
  TransformTarget,
14
14
  )
15
- from ssc_codegen.exceptions import BuildTimeError, ParseError
15
+ from ssc_codegen.exceptions import BuildTimeError
16
16
  from ssc_codegen.kdl import (
17
17
  KDL2CSTParser,
18
18
  KdlArg,
@@ -142,13 +142,19 @@ def handle_transform(
142
142
  ).value
143
143
  )
144
144
  if accept_str not in _VAR_TYPE_MAP:
145
- raise ParseError(
146
- f"transform '{name}': invalid accept type '{accept_str}' (AUTO not allowed)"
145
+ lint.error(
146
+ node,
147
+ message=f"transform '{name}': invalid accept type '{accept_str}' (AUTO not allowed)",
148
+ code="E002",
147
149
  )
150
+ return
148
151
  if ret_str not in _VAR_TYPE_MAP:
149
- raise ParseError(
150
- f"transform '{name}': invalid return type '{ret_str}' (AUTO not allowed)"
152
+ lint.error(
153
+ node,
154
+ message=f"transform '{name}': invalid return type '{ret_str}' (AUTO not allowed)",
155
+ code="E002",
151
156
  )
157
+ return
152
158
  accept_type = _VAR_TYPE_MAP[accept_str]
153
159
  ret_type = _VAR_TYPE_MAP[ret_str]
154
160
  transform_def = TransformDef(name=name, accept=accept_type, ret=ret_type)
@@ -6,13 +6,17 @@ from collections.abc import Mapping
6
6
  from pathlib import Path
7
7
 
8
8
  from ssc_codegen.ast import Module
9
+ from ssc_codegen.exceptions import BuildTimeError, ParseError
9
10
  from ssc_codegen.kdl import (
10
11
  KDL2CSTParser,
12
+ KDLParseError,
11
13
  KdlArg,
12
14
  KdlNode,
15
+ Position,
13
16
  ReadDiagnostic,
14
17
  Reader,
15
18
  Severity,
19
+ Span,
16
20
  WalkContext,
17
21
  parse_into,
18
22
  )
@@ -128,6 +132,34 @@ def parse_module(
128
132
  ) -> tuple[Module, list[ReadDiagnostic]]:
129
133
  """Parse KDL source -> Module AST + diagnostics."""
130
134
  parser = KDL2CSTParser()
131
- doc = parser.parse(src)
135
+ try:
136
+ doc = parser.parse(src)
137
+ except KDLParseError as exc:
138
+ pos = Position(offset=exc.offset, line=exc.line, column=exc.column)
139
+ span = Span(start=pos, end=pos)
140
+ return Module(), [
141
+ ReadDiagnostic(
142
+ message=exc.message,
143
+ severity=Severity.ERROR,
144
+ span=span,
145
+ path=str(source_path) if source_path else "",
146
+ hint="Fix the syntax error and try again.",
147
+ code="E000",
148
+ label="syntax error",
149
+ )
150
+ ]
132
151
  reader = SscReader(source_path=source_path)
133
- return parse_into(doc, reader)
152
+ try:
153
+ return parse_into(doc, reader)
154
+ except (ParseError, BuildTimeError) as exc:
155
+ pos = Position(offset=0, line=0, column=0)
156
+ span = Span(start=pos, end=pos)
157
+ return Module(), [
158
+ ReadDiagnostic(
159
+ message=str(exc),
160
+ severity=Severity.ERROR,
161
+ span=span,
162
+ path=str(source_path) if source_path else "",
163
+ code="E000",
164
+ )
165
+ ]
@@ -27,7 +27,6 @@ from ssc_codegen.ast import (
27
27
  Value,
28
28
  VariableType,
29
29
  )
30
- from ssc_codegen.exceptions import ParseError
31
30
  from ssc_codegen.kdl import KdlArg, KdlNode
32
31
 
33
32
  from ssc_codegen.core.contexts import LintContext, ParseContext, WalkCtx
@@ -56,9 +55,12 @@ def parse_struct(
56
55
  parent.body.append(expr)
57
56
  elif node.name == "@check":
58
57
  if not node.args:
59
- raise ParseError(
60
- "@check requires a name: @check <name> { ... }"
58
+ lint.error(
59
+ node,
60
+ message="@check requires a name: @check <name> { ... }",
61
+ code="E001",
61
62
  )
63
+ continue
62
64
  check_name = str(node.args[0].value)
63
65
  expr = CheckMethod(parent=parent, name=check_name)
64
66
  parse_expressions(node.children, expr, ctx, lint)
@@ -89,9 +91,12 @@ def parse_struct(
89
91
  parent.body.append(expr)
90
92
  elif node.name == "@request":
91
93
  if not node.args:
92
- raise ParseError(
93
- "@request requires a multiline string argument"
94
+ lint.error(
95
+ node,
96
+ message="@request requires a multiline string argument",
97
+ code="E001",
94
98
  )
99
+ continue
95
100
  raw_payload = str(
96
101
  ctx.property_defines.get(node.args[0].value, node.args[0].value)
97
102
  )
@@ -135,14 +140,22 @@ def parse_struct(
135
140
  parent.body.append(req)
136
141
  elif node.name == "@error":
137
142
  if not node.args or len(node.args) < 2:
138
- raise ParseError("@error requires both status and schema name")
143
+ lint.error(
144
+ node,
145
+ message="@error requires both status and schema name",
146
+ code="E001",
147
+ )
148
+ continue
139
149
  status_raw = node.args[0].value
140
150
  try:
141
151
  status_int = int(status_raw)
142
152
  except (TypeError, ValueError):
143
- raise ParseError(
144
- f"@error status must be integer, got {status_raw!r}"
153
+ lint.error(
154
+ node,
155
+ message=f"@error status must be integer, got {status_raw!r}",
156
+ code="E002",
145
157
  )
158
+ continue
146
159
  schema_name = str(
147
160
  ctx.property_defines.get(node.args[1].value, node.args[1].value)
148
161
  )
@@ -270,7 +270,9 @@ def generate(
270
270
  if skip_lint:
271
271
  pass
272
272
  else:
273
- lint_output = format_diagnostics(err, fmt=fmt.value)
273
+ lint_output = format_diagnostics(
274
+ err, filepath=kdl_file, fmt=fmt.value
275
+ )
274
276
  if lint_output:
275
277
  typer.echo(lint_output, err=True)
276
278
  errors.append(lint_output)
@@ -377,9 +379,9 @@ def check(
377
379
 
378
380
  if errs:
379
381
  total_errors += len(errs)
380
- # TODO: json output
381
- for e in format_diagnostics(errs, fmt=fmt.value):
382
- typer.echo(e, err=True)
382
+ output = format_diagnostics(errs, filepath=kdl_file, fmt=fmt.value)
383
+ if output:
384
+ typer.echo(output, err=True)
383
385
 
384
386
  if total_errors > 0:
385
387
  if fmt == FmtType.TEXT:
@@ -495,9 +497,10 @@ def run(
495
497
  kdl_path.read_text(encoding="utf-8"), source_path=kdl_path
496
498
  )
497
499
  if errs:
498
- for e in format_diagnostics(errs, fmt=fmt.value):
499
- typer.echo(e, err=True)
500
- raise typer.Exit(1)
500
+ output = format_diagnostics(errs, filepath=kdl_path, fmt=fmt.value)
501
+ if output:
502
+ typer.echo(output, err=True)
503
+ raise typer.Exit(1)
501
504
 
502
505
  except Exception as exc:
503
506
  if verbose:
File without changes
File without changes
File without changes