ssc_codegen 0.26.1__tar.gz → 0.26.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/PKG-INFO +1 -1
  2. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/pyproject.toml +1 -1
  3. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/converters/py_bs4.py +2 -14
  4. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/converters/py_helpers.py +11 -0
  5. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/converters/py_lxml.py +2 -0
  6. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/converters/py_parsel.py +21 -9
  7. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/converters/py_slax.py +21 -9
  8. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/core/expressions.py +1 -1
  9. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/main.py +40 -16
  10. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/.gitignore +0 -0
  11. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/LICENSE +0 -0
  12. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/README.md +0 -0
  13. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/__init__.py +0 -0
  14. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/_logging.py +0 -0
  15. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/__init__.py +0 -0
  16. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/array.py +0 -0
  17. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/base.py +0 -0
  18. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/cast.py +0 -0
  19. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/control.py +0 -0
  20. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/extract.py +0 -0
  21. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/helpers.py +0 -0
  22. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/jsondef.py +0 -0
  23. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/module.py +0 -0
  24. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/predicate_containers.py +0 -0
  25. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/predicate_ops.py +0 -0
  26. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/regex.py +0 -0
  27. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/selectors.py +0 -0
  28. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/string.py +0 -0
  29. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/struct.py +0 -0
  30. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/transform.py +0 -0
  31. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/typedef.py +0 -0
  32. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/types.py +0 -0
  33. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/converters/base.py +0 -0
  34. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/converters/helpers.py +0 -0
  35. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/converters/js_pure.py +0 -0
  36. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/converters/py_render.py +0 -0
  37. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/converters/request_spec.py +0 -0
  38. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/core/__init__.py +0 -0
  39. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/core/adapter.py +0 -0
  40. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/core/contexts.py +0 -0
  41. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/core/format.py +0 -0
  42. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/core/linting.py +0 -0
  43. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/core/module_handler.py +0 -0
  44. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/core/predicates.py +0 -0
  45. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/core/reader.py +0 -0
  46. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/core/struct_parser.py +0 -0
  47. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/core/type_checking.py +0 -0
  48. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/document_utils.py +0 -0
  49. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/exceptions.py +0 -0
  50. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/health.py +0 -0
  51. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/kdl/__init__.py +0 -0
  52. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/kdl/dict_reader.py +0 -0
  53. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/kdl/parser.py +0 -0
  54. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/kdl/reader.py +0 -0
  55. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/parsers/__init__.py +0 -0
  56. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/parsers/curl.py +0 -0
  57. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/parsers/http.py +0 -0
  58. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/pseudo_selectors.py +0 -0
  59. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/regex_utils.py +0 -0
  60. {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/selector_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ssc_codegen
3
- Version: 0.26.1
3
+ Version: 0.26.3
4
4
  Summary: Python-dsl code converter to html parser for web scraping
5
5
  Project-URL: Documentation, https://github.com/vypivshiy/selector_schema_codegen#readme
6
6
  Project-URL: Issues, https://github.com/vypivshiy/selector_schema_codegen/issues
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "ssc_codegen"
3
- version = "0.26.1"
3
+ version = "0.26.3"
4
4
  description = "Python-dsl code converter to html parser for web scraping "
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
@@ -62,12 +62,8 @@ def pre_imports(node: a.Imports, ctx: ConverterContext):
62
62
  "import re",
63
63
  "import sys",
64
64
  "from typing import TypedDict, Optional, Any, List, Dict, Union, Literal",
65
- "",
66
- "if sys.version_info >= (3, 11):",
67
- " from typing import NotRequired",
68
- "else:",
69
- " from typing_extensions import NotRequired",
70
65
  ]
66
+ base_imports.extend(py_helpers.NOT_REQUIRED_IMPORT)
71
67
  else:
72
68
  base_imports = [
73
69
  "import json",
@@ -77,15 +73,7 @@ def pre_imports(node: a.Imports, ctx: ConverterContext):
77
73
  ]
78
74
  if not py_helpers.module_is_rest_only(node):
79
75
  base_imports.append("from html import unescape as _html_unescape")
80
- base_imports.extend(
81
- [
82
- "",
83
- "if sys.version_info >= (3, 11):",
84
- " from typing import NotRequired",
85
- "else:",
86
- " from typing_extensions import NotRequired",
87
- ]
88
- )
76
+ base_imports.extend(py_helpers.NOT_REQUIRED_IMPORT)
89
77
  base_imports.extend(py_helpers.rest_imports(node))
90
78
 
91
79
  # Get transform imports for Python (already collected during parsing)
@@ -10,9 +10,20 @@ import ssc_codegen.ast as a
10
10
  from ssc_codegen.ast.struct import PlaceholderSpec
11
11
 
12
12
  from ssc_codegen.converters.base import ConverterContext
13
+
13
14
  from ssc_codegen.converters.helpers import to_pascal_case
14
15
 
16
+ NOT_REQUIRED_IMPORT: list[str] = [
17
+ "",
18
+ "if sys.version_info >= (3, 11):",
19
+ " from typing import NotRequired",
20
+ "else:",
21
+ " from typing_extensions import NotRequired",
22
+ ]
23
+
15
24
  __all__ = [
25
+ # Shared import fragments
26
+ "NOT_REQUIRED_IMPORT",
16
27
  # Runtime module generation
17
28
  "base_utility_lines",
18
29
  "runtime_export_names",
@@ -49,6 +49,8 @@ def pre_imports(node: a.Imports, ctx: ConverterContext):
49
49
  base_imports.append("from html import unescape as _html_unescape")
50
50
  base_imports.extend(py_helpers.rest_imports(node))
51
51
 
52
+ base_imports.extend(py_helpers.NOT_REQUIRED_IMPORT)
53
+
52
54
  # Get transform imports for Python (already collected during parsing)
53
55
  transform_imports = sorted(node.transform_imports.get("py", set()))
54
56
 
@@ -21,15 +21,27 @@ PY_TYPES[VT.LIST_DOCUMENT] = "SelectorList"
21
21
 
22
22
 
23
23
  @PY_PARSEL_CONVERTER(a.Imports)
24
- def pre_imports(node: a.Imports, _: ConverterContext):
25
- base_imports = [
26
- "import re",
27
- "import sys",
28
- "from typing import TypedDict, Optional, Any, List, Dict, Union, Literal",
29
- ]
30
- if not py_helpers.module_is_rest_only(node):
31
- base_imports.append("from html import unescape as _html_unescape")
32
- base_imports.extend(py_helpers.rest_imports(node))
24
+ def pre_imports(node: a.Imports, ctx: ConverterContext):
25
+ runtime = ctx.meta.get("runtime_module")
26
+ if runtime:
27
+ base_imports = [
28
+ "import json",
29
+ "import re",
30
+ "import sys",
31
+ "from typing import TypedDict, Optional, Any, List, Dict, Union, Literal",
32
+ ]
33
+ base_imports.extend(py_helpers.NOT_REQUIRED_IMPORT)
34
+ else:
35
+ base_imports = [
36
+ "import json",
37
+ "import re",
38
+ "import sys",
39
+ "from typing import TypedDict, Optional, Any, List, Dict, Union, Literal",
40
+ ]
41
+ if not py_helpers.module_is_rest_only(node):
42
+ base_imports.append("from html import unescape as _html_unescape")
43
+ base_imports.extend(py_helpers.NOT_REQUIRED_IMPORT)
44
+ base_imports.extend(py_helpers.rest_imports(node))
33
45
 
34
46
  transform_imports = sorted(node.transform_imports.get("py", set()))
35
47
 
@@ -20,15 +20,27 @@ PY_TYPES[VT.LIST_DOCUMENT] = "List[Node]"
20
20
 
21
21
 
22
22
  @PY_SLAX_CONVERTER(a.Imports)
23
- def pre_imports(node: a.Imports, _: ConverterContext):
24
- base_imports = [
25
- "import re",
26
- "import sys",
27
- "from typing import TypedDict, Optional, Any, List, Dict, Union, Literal",
28
- ]
29
- if not py_helpers.module_is_rest_only(node):
30
- base_imports.append("from html import unescape as _html_unescape")
31
- base_imports.extend(py_helpers.rest_imports(node))
23
+ def pre_imports(node: a.Imports, ctx: ConverterContext):
24
+ runtime = ctx.meta.get("runtime_module")
25
+ if runtime:
26
+ base_imports = [
27
+ "import json",
28
+ "import re",
29
+ "import sys",
30
+ "from typing import TypedDict, Optional, Any, List, Dict, Union, Literal",
31
+ ]
32
+ base_imports.extend(py_helpers.NOT_REQUIRED_IMPORT)
33
+ else:
34
+ base_imports = [
35
+ "import json",
36
+ "import re",
37
+ "import sys",
38
+ "from typing import TypedDict, Optional, Any, List, Dict, Union, Literal",
39
+ ]
40
+ if not py_helpers.module_is_rest_only(node):
41
+ base_imports.append("from html import unescape as _html_unescape")
42
+ base_imports.extend(py_helpers.NOT_REQUIRED_IMPORT)
43
+ base_imports.extend(py_helpers.rest_imports(node))
32
44
 
33
45
  transform_imports = sorted(node.transform_imports.get("py", set()))
34
46
 
@@ -74,7 +74,7 @@ from ssc_codegen.ast import (
74
74
  XpathSelect,
75
75
  XpathSelectAll,
76
76
  )
77
- from ssc_codegen.exceptions import BuildTimeError, ParseError
77
+ from ssc_codegen.exceptions import BuildTimeError
78
78
  from ssc_codegen.kdl import KdlArg, KdlNode
79
79
  from ssc_codegen.regex_utils import normalize_regex_pattern
80
80
  from typing import cast
@@ -260,16 +260,16 @@ def generate(
260
260
  meta["_include_fallback_html"] = target == Target.PY_LXML
261
261
  register_runtime_file(converter, _runtime_name)
262
262
 
263
+ # Phase 1: parse all KDL files
264
+ from ssc_codegen.ast import Module, Struct
265
+
266
+ parsed: list[tuple[Path, Module]] = []
263
267
  for kdl_file in kdl_files:
264
- out_file = output / kdl_file.with_suffix(ext).name
265
- logger.debug("processing: %s -> %s", kdl_file, out_file)
266
268
  try:
267
269
  ast, err = parse_module(
268
270
  kdl_file.read_text(encoding="utf-8"), source_path=kdl_file
269
271
  )
270
- if skip_lint:
271
- pass
272
- else:
272
+ if not skip_lint:
273
273
  lint_output = format_diagnostics(
274
274
  err, filepath=kdl_file, fmt=fmt.value
275
275
  )
@@ -278,18 +278,42 @@ def generate(
278
278
  errors.append(lint_output)
279
279
  continue
280
280
  logger.debug("AST built for %s", kdl_file)
281
-
282
- if converter.has_support_files:
283
- generated: dict[str, str] = converter.convert_all(ast, **meta)
284
- for name, content in generated.items():
285
- target_path = out_file if name == "" else output / name
286
- target_path.write_text(content, encoding="utf-8")
287
- if name:
288
- typer.echo(f" -> {target_path}")
289
- code = generated[""]
281
+ parsed.append((kdl_file, ast))
282
+ except Exception as exc:
283
+ if verbose:
284
+ typer.echo(traceback.format_exc(), err=True)
290
285
  else:
291
- code = converter.convert(ast, **meta)
292
- out_file.write_text(code, encoding="utf-8")
286
+ typer.echo(f" ERROR {kdl_file}: {exc}", err=True)
287
+ errors.append(str(kdl_file))
288
+
289
+ # Phase 2: write runtime file once (if -R is used)
290
+ if separate_runtime and parsed:
291
+ from ssc_codegen.converters.py_helpers import runtime_module_content
292
+
293
+ include_fallback = target == Target.PY_LXML
294
+ # Pick an AST that has REST structs so the runtime includes helpers
295
+ ref_ast = next(
296
+ (
297
+ ast
298
+ for _, ast in parsed
299
+ if any(isinstance(n, Struct) and n.is_rest for n in ast.body)
300
+ ),
301
+ parsed[0][1],
302
+ )
303
+ runtime_path = output / f"{_runtime_name}.py"
304
+ runtime_path.write_text(
305
+ runtime_module_content(ref_ast, include_fallback),
306
+ encoding="utf-8",
307
+ )
308
+ typer.echo(f" -> {runtime_path}")
309
+
310
+ # Phase 3: generate and write each main module
311
+ for kdl_file, ast in parsed:
312
+ out_file = output / kdl_file.with_suffix(ext).name
313
+ logger.debug("processing: %s -> %s", kdl_file, out_file)
314
+ try:
315
+ code = converter.convert(ast, **meta)
316
+ out_file.write_text(code, encoding="utf-8")
293
317
 
294
318
  logger.debug(
295
319
  "code generated for %s (%d chars)", kdl_file, len(code)
File without changes
File without changes
File without changes