ssc_codegen 0.26.1__tar.gz → 0.26.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/PKG-INFO +1 -1
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/pyproject.toml +1 -1
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/converters/py_bs4.py +2 -14
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/converters/py_helpers.py +11 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/converters/py_lxml.py +2 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/converters/py_parsel.py +21 -9
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/converters/py_slax.py +21 -9
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/core/expressions.py +1 -1
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/main.py +40 -16
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/.gitignore +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/LICENSE +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/README.md +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/__init__.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/_logging.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/__init__.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/array.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/base.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/cast.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/control.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/extract.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/helpers.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/jsondef.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/module.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/predicate_containers.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/predicate_ops.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/regex.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/selectors.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/string.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/struct.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/transform.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/typedef.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/ast/types.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/converters/base.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/converters/helpers.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/converters/js_pure.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/converters/py_render.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/converters/request_spec.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/core/__init__.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/core/adapter.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/core/contexts.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/core/format.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/core/linting.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/core/module_handler.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/core/predicates.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/core/reader.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/core/struct_parser.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/core/type_checking.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/document_utils.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/exceptions.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/health.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/kdl/__init__.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/kdl/dict_reader.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/kdl/parser.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/kdl/reader.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/parsers/__init__.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/parsers/curl.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/parsers/http.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/pseudo_selectors.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/regex_utils.py +0 -0
- {ssc_codegen-0.26.1 → ssc_codegen-0.26.3}/ssc_codegen/selector_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ssc_codegen
|
|
3
|
-
Version: 0.26.
|
|
3
|
+
Version: 0.26.3
|
|
4
4
|
Summary: Python-dsl code converter to html parser for web scraping
|
|
5
5
|
Project-URL: Documentation, https://github.com/vypivshiy/selector_schema_codegen#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/vypivshiy/selector_schema_codegen/issues
|
|
@@ -62,12 +62,8 @@ def pre_imports(node: a.Imports, ctx: ConverterContext):
|
|
|
62
62
|
"import re",
|
|
63
63
|
"import sys",
|
|
64
64
|
"from typing import TypedDict, Optional, Any, List, Dict, Union, Literal",
|
|
65
|
-
"",
|
|
66
|
-
"if sys.version_info >= (3, 11):",
|
|
67
|
-
" from typing import NotRequired",
|
|
68
|
-
"else:",
|
|
69
|
-
" from typing_extensions import NotRequired",
|
|
70
65
|
]
|
|
66
|
+
base_imports.extend(py_helpers.NOT_REQUIRED_IMPORT)
|
|
71
67
|
else:
|
|
72
68
|
base_imports = [
|
|
73
69
|
"import json",
|
|
@@ -77,15 +73,7 @@ def pre_imports(node: a.Imports, ctx: ConverterContext):
|
|
|
77
73
|
]
|
|
78
74
|
if not py_helpers.module_is_rest_only(node):
|
|
79
75
|
base_imports.append("from html import unescape as _html_unescape")
|
|
80
|
-
base_imports.extend(
|
|
81
|
-
[
|
|
82
|
-
"",
|
|
83
|
-
"if sys.version_info >= (3, 11):",
|
|
84
|
-
" from typing import NotRequired",
|
|
85
|
-
"else:",
|
|
86
|
-
" from typing_extensions import NotRequired",
|
|
87
|
-
]
|
|
88
|
-
)
|
|
76
|
+
base_imports.extend(py_helpers.NOT_REQUIRED_IMPORT)
|
|
89
77
|
base_imports.extend(py_helpers.rest_imports(node))
|
|
90
78
|
|
|
91
79
|
# Get transform imports for Python (already collected during parsing)
|
|
@@ -10,9 +10,20 @@ import ssc_codegen.ast as a
|
|
|
10
10
|
from ssc_codegen.ast.struct import PlaceholderSpec
|
|
11
11
|
|
|
12
12
|
from ssc_codegen.converters.base import ConverterContext
|
|
13
|
+
|
|
13
14
|
from ssc_codegen.converters.helpers import to_pascal_case
|
|
14
15
|
|
|
16
|
+
NOT_REQUIRED_IMPORT: list[str] = [
|
|
17
|
+
"",
|
|
18
|
+
"if sys.version_info >= (3, 11):",
|
|
19
|
+
" from typing import NotRequired",
|
|
20
|
+
"else:",
|
|
21
|
+
" from typing_extensions import NotRequired",
|
|
22
|
+
]
|
|
23
|
+
|
|
15
24
|
__all__ = [
|
|
25
|
+
# Shared import fragments
|
|
26
|
+
"NOT_REQUIRED_IMPORT",
|
|
16
27
|
# Runtime module generation
|
|
17
28
|
"base_utility_lines",
|
|
18
29
|
"runtime_export_names",
|
|
@@ -49,6 +49,8 @@ def pre_imports(node: a.Imports, ctx: ConverterContext):
|
|
|
49
49
|
base_imports.append("from html import unescape as _html_unescape")
|
|
50
50
|
base_imports.extend(py_helpers.rest_imports(node))
|
|
51
51
|
|
|
52
|
+
base_imports.extend(py_helpers.NOT_REQUIRED_IMPORT)
|
|
53
|
+
|
|
52
54
|
# Get transform imports for Python (already collected during parsing)
|
|
53
55
|
transform_imports = sorted(node.transform_imports.get("py", set()))
|
|
54
56
|
|
|
@@ -21,15 +21,27 @@ PY_TYPES[VT.LIST_DOCUMENT] = "SelectorList"
|
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
@PY_PARSEL_CONVERTER(a.Imports)
|
|
24
|
-
def pre_imports(node: a.Imports,
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
24
|
+
def pre_imports(node: a.Imports, ctx: ConverterContext):
|
|
25
|
+
runtime = ctx.meta.get("runtime_module")
|
|
26
|
+
if runtime:
|
|
27
|
+
base_imports = [
|
|
28
|
+
"import json",
|
|
29
|
+
"import re",
|
|
30
|
+
"import sys",
|
|
31
|
+
"from typing import TypedDict, Optional, Any, List, Dict, Union, Literal",
|
|
32
|
+
]
|
|
33
|
+
base_imports.extend(py_helpers.NOT_REQUIRED_IMPORT)
|
|
34
|
+
else:
|
|
35
|
+
base_imports = [
|
|
36
|
+
"import json",
|
|
37
|
+
"import re",
|
|
38
|
+
"import sys",
|
|
39
|
+
"from typing import TypedDict, Optional, Any, List, Dict, Union, Literal",
|
|
40
|
+
]
|
|
41
|
+
if not py_helpers.module_is_rest_only(node):
|
|
42
|
+
base_imports.append("from html import unescape as _html_unescape")
|
|
43
|
+
base_imports.extend(py_helpers.NOT_REQUIRED_IMPORT)
|
|
44
|
+
base_imports.extend(py_helpers.rest_imports(node))
|
|
33
45
|
|
|
34
46
|
transform_imports = sorted(node.transform_imports.get("py", set()))
|
|
35
47
|
|
|
@@ -20,15 +20,27 @@ PY_TYPES[VT.LIST_DOCUMENT] = "List[Node]"
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
@PY_SLAX_CONVERTER(a.Imports)
|
|
23
|
-
def pre_imports(node: a.Imports,
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
23
|
+
def pre_imports(node: a.Imports, ctx: ConverterContext):
|
|
24
|
+
runtime = ctx.meta.get("runtime_module")
|
|
25
|
+
if runtime:
|
|
26
|
+
base_imports = [
|
|
27
|
+
"import json",
|
|
28
|
+
"import re",
|
|
29
|
+
"import sys",
|
|
30
|
+
"from typing import TypedDict, Optional, Any, List, Dict, Union, Literal",
|
|
31
|
+
]
|
|
32
|
+
base_imports.extend(py_helpers.NOT_REQUIRED_IMPORT)
|
|
33
|
+
else:
|
|
34
|
+
base_imports = [
|
|
35
|
+
"import json",
|
|
36
|
+
"import re",
|
|
37
|
+
"import sys",
|
|
38
|
+
"from typing import TypedDict, Optional, Any, List, Dict, Union, Literal",
|
|
39
|
+
]
|
|
40
|
+
if not py_helpers.module_is_rest_only(node):
|
|
41
|
+
base_imports.append("from html import unescape as _html_unescape")
|
|
42
|
+
base_imports.extend(py_helpers.NOT_REQUIRED_IMPORT)
|
|
43
|
+
base_imports.extend(py_helpers.rest_imports(node))
|
|
32
44
|
|
|
33
45
|
transform_imports = sorted(node.transform_imports.get("py", set()))
|
|
34
46
|
|
|
@@ -74,7 +74,7 @@ from ssc_codegen.ast import (
|
|
|
74
74
|
XpathSelect,
|
|
75
75
|
XpathSelectAll,
|
|
76
76
|
)
|
|
77
|
-
from ssc_codegen.exceptions import BuildTimeError
|
|
77
|
+
from ssc_codegen.exceptions import BuildTimeError
|
|
78
78
|
from ssc_codegen.kdl import KdlArg, KdlNode
|
|
79
79
|
from ssc_codegen.regex_utils import normalize_regex_pattern
|
|
80
80
|
from typing import cast
|
|
@@ -260,16 +260,16 @@ def generate(
|
|
|
260
260
|
meta["_include_fallback_html"] = target == Target.PY_LXML
|
|
261
261
|
register_runtime_file(converter, _runtime_name)
|
|
262
262
|
|
|
263
|
+
# Phase 1: parse all KDL files
|
|
264
|
+
from ssc_codegen.ast import Module, Struct
|
|
265
|
+
|
|
266
|
+
parsed: list[tuple[Path, Module]] = []
|
|
263
267
|
for kdl_file in kdl_files:
|
|
264
|
-
out_file = output / kdl_file.with_suffix(ext).name
|
|
265
|
-
logger.debug("processing: %s -> %s", kdl_file, out_file)
|
|
266
268
|
try:
|
|
267
269
|
ast, err = parse_module(
|
|
268
270
|
kdl_file.read_text(encoding="utf-8"), source_path=kdl_file
|
|
269
271
|
)
|
|
270
|
-
if skip_lint:
|
|
271
|
-
pass
|
|
272
|
-
else:
|
|
272
|
+
if not skip_lint:
|
|
273
273
|
lint_output = format_diagnostics(
|
|
274
274
|
err, filepath=kdl_file, fmt=fmt.value
|
|
275
275
|
)
|
|
@@ -278,18 +278,42 @@ def generate(
|
|
|
278
278
|
errors.append(lint_output)
|
|
279
279
|
continue
|
|
280
280
|
logger.debug("AST built for %s", kdl_file)
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
target_path = out_file if name == "" else output / name
|
|
286
|
-
target_path.write_text(content, encoding="utf-8")
|
|
287
|
-
if name:
|
|
288
|
-
typer.echo(f" -> {target_path}")
|
|
289
|
-
code = generated[""]
|
|
281
|
+
parsed.append((kdl_file, ast))
|
|
282
|
+
except Exception as exc:
|
|
283
|
+
if verbose:
|
|
284
|
+
typer.echo(traceback.format_exc(), err=True)
|
|
290
285
|
else:
|
|
291
|
-
|
|
292
|
-
|
|
286
|
+
typer.echo(f" ERROR {kdl_file}: {exc}", err=True)
|
|
287
|
+
errors.append(str(kdl_file))
|
|
288
|
+
|
|
289
|
+
# Phase 2: write runtime file once (if -R is used)
|
|
290
|
+
if separate_runtime and parsed:
|
|
291
|
+
from ssc_codegen.converters.py_helpers import runtime_module_content
|
|
292
|
+
|
|
293
|
+
include_fallback = target == Target.PY_LXML
|
|
294
|
+
# Pick an AST that has REST structs so the runtime includes helpers
|
|
295
|
+
ref_ast = next(
|
|
296
|
+
(
|
|
297
|
+
ast
|
|
298
|
+
for _, ast in parsed
|
|
299
|
+
if any(isinstance(n, Struct) and n.is_rest for n in ast.body)
|
|
300
|
+
),
|
|
301
|
+
parsed[0][1],
|
|
302
|
+
)
|
|
303
|
+
runtime_path = output / f"{_runtime_name}.py"
|
|
304
|
+
runtime_path.write_text(
|
|
305
|
+
runtime_module_content(ref_ast, include_fallback),
|
|
306
|
+
encoding="utf-8",
|
|
307
|
+
)
|
|
308
|
+
typer.echo(f" -> {runtime_path}")
|
|
309
|
+
|
|
310
|
+
# Phase 3: generate and write each main module
|
|
311
|
+
for kdl_file, ast in parsed:
|
|
312
|
+
out_file = output / kdl_file.with_suffix(ext).name
|
|
313
|
+
logger.debug("processing: %s -> %s", kdl_file, out_file)
|
|
314
|
+
try:
|
|
315
|
+
code = converter.convert(ast, **meta)
|
|
316
|
+
out_file.write_text(code, encoding="utf-8")
|
|
293
317
|
|
|
294
318
|
logger.debug(
|
|
295
319
|
"code generated for %s (%d chars)", kdl_file, len(code)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|