ssc_codegen 0.26.0__tar.gz → 0.26.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/PKG-INFO +1 -1
  2. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/pyproject.toml +1 -1
  3. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/converters/py_bs4.py +2 -14
  4. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/converters/py_helpers.py +12 -0
  5. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/converters/py_lxml.py +2 -0
  6. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/converters/py_parsel.py +1 -0
  7. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/converters/py_slax.py +1 -0
  8. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/core/expressions.py +15 -2
  9. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/core/module_handler.py +11 -5
  10. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/core/reader.py +34 -2
  11. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/core/struct_parser.py +21 -8
  12. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/main.py +10 -7
  13. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/.gitignore +0 -0
  14. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/LICENSE +0 -0
  15. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/README.md +0 -0
  16. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/__init__.py +0 -0
  17. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/_logging.py +0 -0
  18. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/ast/__init__.py +0 -0
  19. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/ast/array.py +0 -0
  20. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/ast/base.py +0 -0
  21. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/ast/cast.py +0 -0
  22. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/ast/control.py +0 -0
  23. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/ast/extract.py +0 -0
  24. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/ast/helpers.py +0 -0
  25. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/ast/jsondef.py +0 -0
  26. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/ast/module.py +0 -0
  27. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/ast/predicate_containers.py +0 -0
  28. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/ast/predicate_ops.py +0 -0
  29. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/ast/regex.py +0 -0
  30. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/ast/selectors.py +0 -0
  31. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/ast/string.py +0 -0
  32. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/ast/struct.py +0 -0
  33. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/ast/transform.py +0 -0
  34. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/ast/typedef.py +0 -0
  35. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/ast/types.py +0 -0
  36. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/converters/base.py +0 -0
  37. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/converters/helpers.py +0 -0
  38. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/converters/js_pure.py +0 -0
  39. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/converters/py_render.py +0 -0
  40. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/converters/request_spec.py +0 -0
  41. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/core/__init__.py +0 -0
  42. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/core/adapter.py +0 -0
  43. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/core/contexts.py +0 -0
  44. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/core/format.py +0 -0
  45. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/core/linting.py +0 -0
  46. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/core/predicates.py +0 -0
  47. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/core/type_checking.py +0 -0
  48. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/document_utils.py +0 -0
  49. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/exceptions.py +0 -0
  50. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/health.py +0 -0
  51. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/kdl/__init__.py +0 -0
  52. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/kdl/dict_reader.py +0 -0
  53. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/kdl/parser.py +0 -0
  54. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/kdl/reader.py +0 -0
  55. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/parsers/__init__.py +0 -0
  56. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/parsers/curl.py +0 -0
  57. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/parsers/http.py +0 -0
  58. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/pseudo_selectors.py +0 -0
  59. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/regex_utils.py +0 -0
  60. {ssc_codegen-0.26.0 → ssc_codegen-0.26.2}/ssc_codegen/selector_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ssc_codegen
3
- Version: 0.26.0
3
+ Version: 0.26.2
4
4
  Summary: Python-dsl code converter to html parser for web scraping
5
5
  Project-URL: Documentation, https://github.com/vypivshiy/selector_schema_codegen#readme
6
6
  Project-URL: Issues, https://github.com/vypivshiy/selector_schema_codegen/issues
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "ssc_codegen"
3
- version = "0.26.0"
3
+ version = "0.26.2"
4
4
  description = "Python-dsl code converter to html parser for web scraping "
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
@@ -62,12 +62,8 @@ def pre_imports(node: a.Imports, ctx: ConverterContext):
62
62
  "import re",
63
63
  "import sys",
64
64
  "from typing import TypedDict, Optional, Any, List, Dict, Union, Literal",
65
- "",
66
- "if sys.version_info >= (3, 11):",
67
- " from typing import NotRequired",
68
- "else:",
69
- " from typing_extensions import NotRequired",
70
65
  ]
66
+ base_imports.extend(py_helpers.NOT_REQUIRED_IMPORT)
71
67
  else:
72
68
  base_imports = [
73
69
  "import json",
@@ -77,15 +73,7 @@ def pre_imports(node: a.Imports, ctx: ConverterContext):
77
73
  ]
78
74
  if not py_helpers.module_is_rest_only(node):
79
75
  base_imports.append("from html import unescape as _html_unescape")
80
- base_imports.extend(
81
- [
82
- "",
83
- "if sys.version_info >= (3, 11):",
84
- " from typing import NotRequired",
85
- "else:",
86
- " from typing_extensions import NotRequired",
87
- ]
88
- )
76
+ base_imports.extend(py_helpers.NOT_REQUIRED_IMPORT)
89
77
  base_imports.extend(py_helpers.rest_imports(node))
90
78
 
91
79
  # Get transform imports for Python (already collected during parsing)
@@ -10,9 +10,21 @@ import ssc_codegen.ast as a
10
10
  from ssc_codegen.ast.struct import PlaceholderSpec
11
11
 
12
12
  from ssc_codegen.converters.base import ConverterContext
13
+ import sys
14
+
13
15
  from ssc_codegen.converters.helpers import to_pascal_case
14
16
 
17
+ NOT_REQUIRED_IMPORT: list[str] = [
18
+ "",
19
+ "if sys.version_info >= (3, 11):",
20
+ " from typing import NotRequired",
21
+ "else:",
22
+ " from typing_extensions import NotRequired",
23
+ ]
24
+
15
25
  __all__ = [
26
+ # Shared import fragments
27
+ "NOT_REQUIRED_IMPORT",
16
28
  # Runtime module generation
17
29
  "base_utility_lines",
18
30
  "runtime_export_names",
@@ -49,6 +49,8 @@ def pre_imports(node: a.Imports, ctx: ConverterContext):
49
49
  base_imports.append("from html import unescape as _html_unescape")
50
50
  base_imports.extend(py_helpers.rest_imports(node))
51
51
 
52
+ base_imports.extend(py_helpers.NOT_REQUIRED_IMPORT)
53
+
52
54
  # Get transform imports for Python (already collected during parsing)
53
55
  transform_imports = sorted(node.transform_imports.get("py", set()))
54
56
 
@@ -30,6 +30,7 @@ def pre_imports(node: a.Imports, _: ConverterContext):
30
30
  if not py_helpers.module_is_rest_only(node):
31
31
  base_imports.append("from html import unescape as _html_unescape")
32
32
  base_imports.extend(py_helpers.rest_imports(node))
33
+ base_imports.extend(py_helpers.NOT_REQUIRED_IMPORT)
33
34
 
34
35
  transform_imports = sorted(node.transform_imports.get("py", set()))
35
36
 
@@ -29,6 +29,7 @@ def pre_imports(node: a.Imports, _: ConverterContext):
29
29
  if not py_helpers.module_is_rest_only(node):
30
30
  base_imports.append("from html import unescape as _html_unescape")
31
31
  base_imports.extend(py_helpers.rest_imports(node))
32
+ base_imports.extend(py_helpers.NOT_REQUIRED_IMPORT)
32
33
 
33
34
  transform_imports = sorted(node.transform_imports.get("py", set()))
34
35
 
@@ -438,6 +438,9 @@ def parse_expressions(
438
438
  lint.push(node.name)
439
439
  lint_pipeline_op(node, lint)
440
440
  expr = handler(node, parent, ctx, lint) # type: ignore[assignment,arg-type]
441
+ if expr is None:
442
+ lint.pop()
443
+ continue
441
444
  if isinstance(expr, Fallback):
442
445
  lint.pop()
443
446
  continue
@@ -920,7 +923,12 @@ def _expr_jsonify(
920
923
  )
921
924
  json_def = ctx.json_defs.get(schema_name)
922
925
  if json_def is None:
923
- raise ParseError(f"jsonify: JSON schema '{schema_name}' not found")
926
+ lint.error(
927
+ node,
928
+ message=f"jsonify: JSON schema '{schema_name}' not found",
929
+ code="E300",
930
+ )
931
+ return None
924
932
  ret_type, is_array = resolve_jsonify_type(json_def, path, ctx)
925
933
  return Jsonify(
926
934
  parent=parent,
@@ -938,7 +946,12 @@ def _expr_nested(
938
946
  struct_name = str(node.args[0].value)
939
947
  struct = ctx.structs.get(struct_name)
940
948
  if struct is None:
941
- raise ParseError(f"nested: struct '{struct_name}' not found")
949
+ lint.error(
950
+ node,
951
+ message=f"nested: struct '{struct_name}' not found",
952
+ code="E300",
953
+ )
954
+ return None
942
955
  is_array = struct.struct_type in (StructType.FLAT, StructType.LIST)
943
956
  return Nested(parent=parent, struct_name=struct_name, is_array=is_array)
944
957
 
@@ -12,7 +12,7 @@ from ssc_codegen.ast import (
12
12
  TransformDef,
13
13
  TransformTarget,
14
14
  )
15
- from ssc_codegen.exceptions import BuildTimeError, ParseError
15
+ from ssc_codegen.exceptions import BuildTimeError
16
16
  from ssc_codegen.kdl import (
17
17
  KDL2CSTParser,
18
18
  KdlArg,
@@ -142,13 +142,19 @@ def handle_transform(
142
142
  ).value
143
143
  )
144
144
  if accept_str not in _VAR_TYPE_MAP:
145
- raise ParseError(
146
- f"transform '{name}': invalid accept type '{accept_str}' (AUTO not allowed)"
145
+ lint.error(
146
+ node,
147
+ message=f"transform '{name}': invalid accept type '{accept_str}' (AUTO not allowed)",
148
+ code="E002",
147
149
  )
150
+ return
148
151
  if ret_str not in _VAR_TYPE_MAP:
149
- raise ParseError(
150
- f"transform '{name}': invalid return type '{ret_str}' (AUTO not allowed)"
152
+ lint.error(
153
+ node,
154
+ message=f"transform '{name}': invalid return type '{ret_str}' (AUTO not allowed)",
155
+ code="E002",
151
156
  )
157
+ return
152
158
  accept_type = _VAR_TYPE_MAP[accept_str]
153
159
  ret_type = _VAR_TYPE_MAP[ret_str]
154
160
  transform_def = TransformDef(name=name, accept=accept_type, ret=ret_type)
@@ -6,13 +6,17 @@ from collections.abc import Mapping
6
6
  from pathlib import Path
7
7
 
8
8
  from ssc_codegen.ast import Module
9
+ from ssc_codegen.exceptions import BuildTimeError, ParseError
9
10
  from ssc_codegen.kdl import (
10
11
  KDL2CSTParser,
12
+ KDLParseError,
11
13
  KdlArg,
12
14
  KdlNode,
15
+ Position,
13
16
  ReadDiagnostic,
14
17
  Reader,
15
18
  Severity,
19
+ Span,
16
20
  WalkContext,
17
21
  parse_into,
18
22
  )
@@ -128,6 +132,34 @@ def parse_module(
128
132
  ) -> tuple[Module, list[ReadDiagnostic]]:
129
133
  """Parse KDL source -> Module AST + diagnostics."""
130
134
  parser = KDL2CSTParser()
131
- doc = parser.parse(src)
135
+ try:
136
+ doc = parser.parse(src)
137
+ except KDLParseError as exc:
138
+ pos = Position(offset=exc.offset, line=exc.line, column=exc.column)
139
+ span = Span(start=pos, end=pos)
140
+ return Module(), [
141
+ ReadDiagnostic(
142
+ message=exc.message,
143
+ severity=Severity.ERROR,
144
+ span=span,
145
+ path=str(source_path) if source_path else "",
146
+ hint="Fix the syntax error and try again.",
147
+ code="E000",
148
+ label="syntax error",
149
+ )
150
+ ]
132
151
  reader = SscReader(source_path=source_path)
133
- return parse_into(doc, reader)
152
+ try:
153
+ return parse_into(doc, reader)
154
+ except (ParseError, BuildTimeError) as exc:
155
+ pos = Position(offset=0, line=0, column=0)
156
+ span = Span(start=pos, end=pos)
157
+ return Module(), [
158
+ ReadDiagnostic(
159
+ message=str(exc),
160
+ severity=Severity.ERROR,
161
+ span=span,
162
+ path=str(source_path) if source_path else "",
163
+ code="E000",
164
+ )
165
+ ]
@@ -27,7 +27,6 @@ from ssc_codegen.ast import (
27
27
  Value,
28
28
  VariableType,
29
29
  )
30
- from ssc_codegen.exceptions import ParseError
31
30
  from ssc_codegen.kdl import KdlArg, KdlNode
32
31
 
33
32
  from ssc_codegen.core.contexts import LintContext, ParseContext, WalkCtx
@@ -56,9 +55,12 @@ def parse_struct(
56
55
  parent.body.append(expr)
57
56
  elif node.name == "@check":
58
57
  if not node.args:
59
- raise ParseError(
60
- "@check requires a name: @check <name> { ... }"
58
+ lint.error(
59
+ node,
60
+ message="@check requires a name: @check <name> { ... }",
61
+ code="E001",
61
62
  )
63
+ continue
62
64
  check_name = str(node.args[0].value)
63
65
  expr = CheckMethod(parent=parent, name=check_name)
64
66
  parse_expressions(node.children, expr, ctx, lint)
@@ -89,9 +91,12 @@ def parse_struct(
89
91
  parent.body.append(expr)
90
92
  elif node.name == "@request":
91
93
  if not node.args:
92
- raise ParseError(
93
- "@request requires a multiline string argument"
94
+ lint.error(
95
+ node,
96
+ message="@request requires a multiline string argument",
97
+ code="E001",
94
98
  )
99
+ continue
95
100
  raw_payload = str(
96
101
  ctx.property_defines.get(node.args[0].value, node.args[0].value)
97
102
  )
@@ -135,14 +140,22 @@ def parse_struct(
135
140
  parent.body.append(req)
136
141
  elif node.name == "@error":
137
142
  if not node.args or len(node.args) < 2:
138
- raise ParseError("@error requires both status and schema name")
143
+ lint.error(
144
+ node,
145
+ message="@error requires both status and schema name",
146
+ code="E001",
147
+ )
148
+ continue
139
149
  status_raw = node.args[0].value
140
150
  try:
141
151
  status_int = int(status_raw)
142
152
  except (TypeError, ValueError):
143
- raise ParseError(
144
- f"@error status must be integer, got {status_raw!r}"
153
+ lint.error(
154
+ node,
155
+ message=f"@error status must be integer, got {status_raw!r}",
156
+ code="E002",
145
157
  )
158
+ continue
146
159
  schema_name = str(
147
160
  ctx.property_defines.get(node.args[1].value, node.args[1].value)
148
161
  )
@@ -270,7 +270,9 @@ def generate(
270
270
  if skip_lint:
271
271
  pass
272
272
  else:
273
- lint_output = format_diagnostics(err, fmt=fmt.value)
273
+ lint_output = format_diagnostics(
274
+ err, filepath=kdl_file, fmt=fmt.value
275
+ )
274
276
  if lint_output:
275
277
  typer.echo(lint_output, err=True)
276
278
  errors.append(lint_output)
@@ -377,9 +379,9 @@ def check(
377
379
 
378
380
  if errs:
379
381
  total_errors += len(errs)
380
- # TODO: json output
381
- for e in format_diagnostics(errs, fmt=fmt.value):
382
- typer.echo(e, err=True)
382
+ output = format_diagnostics(errs, filepath=kdl_file, fmt=fmt.value)
383
+ if output:
384
+ typer.echo(output, err=True)
383
385
 
384
386
  if total_errors > 0:
385
387
  if fmt == FmtType.TEXT:
@@ -495,9 +497,10 @@ def run(
495
497
  kdl_path.read_text(encoding="utf-8"), source_path=kdl_path
496
498
  )
497
499
  if errs:
498
- for e in format_diagnostics(errs, fmt=fmt.value):
499
- typer.echo(e, err=True)
500
- raise typer.Exit(1)
500
+ output = format_diagnostics(errs, filepath=kdl_path, fmt=fmt.value)
501
+ if output:
502
+ typer.echo(output, err=True)
503
+ raise typer.Exit(1)
501
504
 
502
505
  except Exception as exc:
503
506
  if verbose:
File without changes
File without changes
File without changes