ssc_codegen 0.15.2__tar.gz → 0.17.0a0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ssc_codegen-0.17.0a0/MANIFEST.in +3 -0
- ssc_codegen-0.17.0a0/PKG-INFO +111 -0
- ssc_codegen-0.17.0a0/README.md +78 -0
- {ssc_codegen-0.15.2 → ssc_codegen-0.17.0a0}/pyproject.toml +16 -19
- ssc_codegen-0.17.0a0/setup.cfg +4 -0
- ssc_codegen-0.17.0a0/setup.py +68 -0
- ssc_codegen-0.17.0a0/ssc_codegen/__init__.py +27 -0
- ssc_codegen-0.17.0a0/ssc_codegen/_logging.py +102 -0
- ssc_codegen-0.17.0a0/ssc_codegen/ast/__init__.py +169 -0
- ssc_codegen-0.17.0a0/ssc_codegen/ast/array.py +56 -0
- ssc_codegen-0.17.0a0/ssc_codegen/ast/base.py +24 -0
- ssc_codegen-0.17.0a0/ssc_codegen/ast/cast.py +72 -0
- ssc_codegen-0.17.0a0/ssc_codegen/ast/control.py +82 -0
- ssc_codegen-0.17.0a0/ssc_codegen/ast/extract.py +51 -0
- ssc_codegen-0.17.0a0/ssc_codegen/ast/helpers.py +7 -0
- ssc_codegen-0.17.0a0/ssc_codegen/ast/jsondef.py +37 -0
- ssc_codegen-0.17.0a0/ssc_codegen/ast/module.py +92 -0
- ssc_codegen-0.17.0a0/ssc_codegen/ast/predicate_containers.py +44 -0
- ssc_codegen-0.17.0a0/ssc_codegen/ast/predicate_ops.py +280 -0
- ssc_codegen-0.17.0a0/ssc_codegen/ast/regex.py +57 -0
- ssc_codegen-0.17.0a0/ssc_codegen/ast/selectors.py +51 -0
- ssc_codegen-0.17.0a0/ssc_codegen/ast/string.py +158 -0
- ssc_codegen-0.17.0a0/ssc_codegen/ast/struct.py +232 -0
- ssc_codegen-0.17.0a0/ssc_codegen/ast/transform.py +63 -0
- ssc_codegen-0.17.0a0/ssc_codegen/ast/typedef.py +41 -0
- ssc_codegen-0.17.0a0/ssc_codegen/ast/types.py +79 -0
- ssc_codegen-0.17.0a0/ssc_codegen/converters/base.py +279 -0
- ssc_codegen-0.17.0a0/ssc_codegen/converters/helpers.py +76 -0
- ssc_codegen-0.17.0a0/ssc_codegen/converters/js_pure.py +1353 -0
- ssc_codegen-0.17.0a0/ssc_codegen/converters/py_bs4.py +1324 -0
- ssc_codegen-0.17.0a0/ssc_codegen/converters/py_lxml.py +480 -0
- ssc_codegen-0.17.0a0/ssc_codegen/converters/py_parsel.py +429 -0
- ssc_codegen-0.17.0a0/ssc_codegen/converters/py_slax.py +403 -0
- ssc_codegen-0.17.0a0/ssc_codegen/document_utils.py +94 -0
- ssc_codegen-0.17.0a0/ssc_codegen/exceptions.py +11 -0
- ssc_codegen-0.17.0a0/ssc_codegen/health.py +400 -0
- ssc_codegen-0.17.0a0/ssc_codegen/linter/__init__.py +26 -0
- ssc_codegen-0.17.0a0/ssc_codegen/linter/_kdl_lang.py +12 -0
- ssc_codegen-0.17.0a0/ssc_codegen/linter/base.py +786 -0
- ssc_codegen-0.17.0a0/ssc_codegen/linter/errors.py +78 -0
- ssc_codegen-0.17.0a0/ssc_codegen/linter/format_errors.py +285 -0
- ssc_codegen-0.17.0a0/ssc_codegen/linter/metadata.py +18 -0
- ssc_codegen-0.17.0a0/ssc_codegen/linter/navigation.py +240 -0
- ssc_codegen-0.17.0a0/ssc_codegen/linter/path.py +36 -0
- ssc_codegen-0.17.0a0/ssc_codegen/linter/rule_keywords.py +323 -0
- ssc_codegen-0.17.0a0/ssc_codegen/linter/rules.py +713 -0
- ssc_codegen-0.17.0a0/ssc_codegen/linter/rules_struct.py +536 -0
- ssc_codegen-0.17.0a0/ssc_codegen/linter/type_rules.py +523 -0
- ssc_codegen-0.17.0a0/ssc_codegen/linter/types.py +188 -0
- ssc_codegen-0.17.0a0/ssc_codegen/main.py +596 -0
- ssc_codegen-0.17.0a0/ssc_codegen/parser.py +2195 -0
- ssc_codegen-0.17.0a0/ssc_codegen/pseudo_selectors.py +39 -0
- ssc_codegen-0.17.0a0/ssc_codegen/regex_utils.py +204 -0
- ssc_codegen-0.17.0a0/ssc_codegen/selector_utils.py +8 -0
- ssc_codegen-0.17.0a0/ssc_codegen.egg-info/PKG-INFO +111 -0
- ssc_codegen-0.17.0a0/ssc_codegen.egg-info/SOURCES.txt +68 -0
- ssc_codegen-0.17.0a0/ssc_codegen.egg-info/dependency_links.txt +1 -0
- ssc_codegen-0.17.0a0/ssc_codegen.egg-info/entry_points.txt +2 -0
- ssc_codegen-0.17.0a0/ssc_codegen.egg-info/requires.txt +13 -0
- ssc_codegen-0.17.0a0/ssc_codegen.egg-info/top_level.txt +1 -0
- ssc_codegen-0.17.0a0/tests/test_css_to_xpath.py +84 -0
- ssc_codegen-0.17.0a0/tests/test_imports.py +210 -0
- ssc_codegen-0.17.0a0/tests/test_parser.py +472 -0
- ssc_codegen-0.17.0a0/vendor/tree-sitter-kdl/bindings/python/tree_sitter_kdl/binding.c +35 -0
- ssc_codegen-0.17.0a0/vendor/tree-sitter-kdl/src/parser.c +7268 -0
- ssc_codegen-0.17.0a0/vendor/tree-sitter-kdl/src/scanner.c +120 -0
- ssc_codegen-0.17.0a0/vendor/tree-sitter-kdl/src/tree_sitter/alloc.h +54 -0
- ssc_codegen-0.17.0a0/vendor/tree-sitter-kdl/src/tree_sitter/array.h +291 -0
- ssc_codegen-0.17.0a0/vendor/tree-sitter-kdl/src/tree_sitter/parser.h +286 -0
- ssc_codegen-0.15.2/.gitignore +0 -143
- ssc_codegen-0.15.2/PKG-INFO +0 -255
- ssc_codegen-0.15.2/README.md +0 -220
- ssc_codegen-0.15.2/ssc_codegen/__init__.py +0 -99
- ssc_codegen-0.15.2/ssc_codegen/_compat.py +0 -31
- ssc_codegen-0.15.2/ssc_codegen/ast_/__init__.py +0 -131
- ssc_codegen-0.15.2/ssc_codegen/ast_/base.py +0 -242
- ssc_codegen-0.15.2/ssc_codegen/ast_/nodes_array.py +0 -87
- ssc_codegen-0.15.2/ssc_codegen/ast_/nodes_cast.py +0 -154
- ssc_codegen-0.15.2/ssc_codegen/ast_/nodes_core.py +0 -591
- ssc_codegen-0.15.2/ssc_codegen/ast_/nodes_filter.py +0 -659
- ssc_codegen-0.15.2/ssc_codegen/ast_/nodes_selectors.py +0 -305
- ssc_codegen-0.15.2/ssc_codegen/ast_/nodes_string.py +0 -495
- ssc_codegen-0.15.2/ssc_codegen/ast_/nodes_validate.py +0 -207
- ssc_codegen-0.15.2/ssc_codegen/ast_build/__init__.py +0 -1
- ssc_codegen-0.15.2/ssc_codegen/ast_build/builder.py +0 -464
- ssc_codegen-0.15.2/ssc_codegen/ast_build/main.py +0 -94
- ssc_codegen-0.15.2/ssc_codegen/ast_build/utils.py +0 -155
- ssc_codegen-0.15.2/ssc_codegen/ast_grep_rules/js_rules.yml +0 -20
- ssc_codegen-0.15.2/ssc_codegen/ast_grep_rules/py_drop_prefix_suffix_backport.yml +0 -47
- ssc_codegen-0.15.2/ssc_codegen/ast_grep_rules/py_rules.yml +0 -20
- ssc_codegen-0.15.2/ssc_codegen/cli/__init__.py +0 -0
- ssc_codegen-0.15.2/ssc_codegen/cli/ast_grep.py +0 -10
- ssc_codegen-0.15.2/ssc_codegen/cli/cli_callbacks.py +0 -34
- ssc_codegen-0.15.2/ssc_codegen/cli/cli_utils.py +0 -84
- ssc_codegen-0.15.2/ssc_codegen/cli/code_callbacks.py +0 -32
- ssc_codegen-0.15.2/ssc_codegen/cli/consts.py +0 -61
- ssc_codegen-0.15.2/ssc_codegen/cli/main.py +0 -643
- ssc_codegen-0.15.2/ssc_codegen/cli/runtime_parse_runners.py +0 -72
- ssc_codegen-0.15.2/ssc_codegen/compiler.py +0 -113
- ssc_codegen-0.15.2/ssc_codegen/converters/__init__.py +0 -0
- ssc_codegen-0.15.2/ssc_codegen/converters/base.py +0 -577
- ssc_codegen-0.15.2/ssc_codegen/converters/go_goquery.py +0 -2022
- ssc_codegen-0.15.2/ssc_codegen/converters/helpers.py +0 -257
- ssc_codegen-0.15.2/ssc_codegen/converters/js_pure.py +0 -1534
- ssc_codegen-0.15.2/ssc_codegen/converters/lua_htmlparser.py +0 -1510
- ssc_codegen-0.15.2/ssc_codegen/converters/py_base.py +0 -1359
- ssc_codegen-0.15.2/ssc_codegen/converters/py_bs4.py +0 -504
- ssc_codegen-0.15.2/ssc_codegen/converters/py_lxml.py +0 -551
- ssc_codegen-0.15.2/ssc_codegen/converters/py_parsel.py +0 -533
- ssc_codegen-0.15.2/ssc_codegen/converters/py_selectolax.py +0 -501
- ssc_codegen-0.15.2/ssc_codegen/converters/templates/__init__.py +0 -3
- ssc_codegen-0.15.2/ssc_codegen/converters/templates/go_goquery.py +0 -473
- ssc_codegen-0.15.2/ssc_codegen/converters/templates/js_pure.py +0 -38
- ssc_codegen-0.15.2/ssc_codegen/converters/templates/lua_base.py +0 -710
- ssc_codegen-0.15.2/ssc_codegen/converters/templates/lua_css_compat.py +0 -157
- ssc_codegen-0.15.2/ssc_codegen/converters/templates/lua_re_compat.py +0 -517
- ssc_codegen-0.15.2/ssc_codegen/converters/templates/py_base.py +0 -60
- ssc_codegen-0.15.2/ssc_codegen/document.py +0 -2661
- ssc_codegen-0.15.2/ssc_codegen/document_utlis.py +0 -201
- ssc_codegen-0.15.2/ssc_codegen/json_struct.py +0 -272
- ssc_codegen-0.15.2/ssc_codegen/json_to_scc.py +0 -180
- ssc_codegen-0.15.2/ssc_codegen/logs.py +0 -49
- ssc_codegen-0.15.2/ssc_codegen/pseudo_selectors.py +0 -70
- ssc_codegen-0.15.2/ssc_codegen/schema.py +0 -368
- ssc_codegen-0.15.2/ssc_codegen/selector_utils.py +0 -78
- ssc_codegen-0.15.2/ssc_codegen/static_checker/__init__.py +0 -62
- ssc_codegen-0.15.2/ssc_codegen/static_checker/base.py +0 -91
- ssc_codegen-0.15.2/ssc_codegen/static_checker/callbacks.py +0 -484
- ssc_codegen-0.15.2/ssc_codegen/str_utils.py +0 -230
- ssc_codegen-0.15.2/ssc_codegen/tokens.py +0 -291
- ssc_codegen-0.15.2/ssc_codegen/transform.py +0 -95
- {ssc_codegen-0.15.2 → ssc_codegen-0.17.0a0}/LICENSE +0 -0
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ssc_codegen
|
|
3
|
+
Version: 0.17.0a0
|
|
4
|
+
Summary: Python-dsl code converter to html parser for web scraping
|
|
5
|
+
Project-URL: Documentation, https://github.com/vypivshiy/selector_schema_codegen#readme
|
|
6
|
+
Project-URL: Issues, https://github.com/vypivshiy/selector_schema_codegen/issues
|
|
7
|
+
Project-URL: Source, https://github.com/vypivshiy/selector_schema_codegen
|
|
8
|
+
Project-URL: Examples, https://github.com/vypivshiy/selector_schema_codegen/examples
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Environment :: Console
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Topic :: Software Development :: Code Generators
|
|
14
|
+
Classifier: Topic :: Text Processing :: Markup :: HTML
|
|
15
|
+
Classifier: Topic :: Utilities
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Requires-Python: >=3.10
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
License-File: LICENSE
|
|
23
|
+
Requires-Dist: bs4>=0.0.2
|
|
24
|
+
Requires-Dist: colorama>=0.4.6; sys_platform == "win32"
|
|
25
|
+
Requires-Dist: cssselect>=1.2.0
|
|
26
|
+
Requires-Dist: lxml>=5.3.0
|
|
27
|
+
Requires-Dist: soupsieve>=2.6
|
|
28
|
+
Requires-Dist: typer>=0.15.1
|
|
29
|
+
Requires-Dist: typing_extensions; python_version < "3.11"
|
|
30
|
+
Requires-Dist: click<8.2.0
|
|
31
|
+
Requires-Dist: tree-sitter>=0.25.2
|
|
32
|
+
Dynamic: license-file
|
|
33
|
+
|
|
34
|
+
# Selector Schema codegen
|
|
35
|
+
|
|
36
|
+
Experimental PoC implementation of a code generator based on KDL2.0 syntax DSL.
|
|
37
|
+
|
|
38
|
+
## install
|
|
39
|
+
|
|
40
|
+
### From git (requires C/C++ compiler)
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
# Clone with submodules
|
|
44
|
+
git clone --recursive https://github.com/vypivshiy/selector_schema_codegen
|
|
45
|
+
cd selector_schema_codegen
|
|
46
|
+
|
|
47
|
+
# Install with pip (builds tree-sitter-kdl extension)
|
|
48
|
+
pip install .
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
**Requirements:**
|
|
52
|
+
- Linux/macOS: `gcc` or `clang`
|
|
53
|
+
- Windows: MSVC (Visual Studio Build Tools or Visual Studio)
|
|
54
|
+
|
|
55
|
+
### Via uv tool
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
uv tool install git+https://github.com/vypivshiy/selector_schema_codegen@features-kdl
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## usage
|
|
62
|
+
|
|
63
|
+
### generate modules
|
|
64
|
+
|
|
65
|
+
```
|
|
66
|
+
ssc-gen generate examples/ -t js-pure -o .
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### lint syntax
|
|
70
|
+
|
|
71
|
+
```
|
|
72
|
+
ssc-gen check examples/
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### test schema by html output
|
|
76
|
+
|
|
77
|
+
from file:
|
|
78
|
+
```
|
|
79
|
+
python main.py run .\examples\booksToScrape.kdl:MainCatalogue -t py-bs4 -i index.html
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
from stdin:
|
|
83
|
+
```
|
|
84
|
+
curl https://books.toscrape.com/ | python main.py run .\examples\booksToScrape.kdl:MainCatalogue -t py-bs4
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### test selectors:
|
|
88
|
+
|
|
89
|
+
from file
|
|
90
|
+
```
|
|
91
|
+
python main.py health .\examples\booksToScrape.kdl:MainCatalogue -i index.html
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
from stdin
|
|
95
|
+
```
|
|
96
|
+
curl https://books.toscrape.com/catalogue/page-2.html | python main.py health .\examples\booksToScrape.kdl:MainCatalogue
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
## syntax
|
|
101
|
+
|
|
102
|
+
see [docs](docs) and [examples](examples) how to use syntax
|
|
103
|
+
|
|
104
|
+
## LLM generate dsl config (experimental, not ready)
|
|
105
|
+
|
|
106
|
+
### prompt
|
|
107
|
+
|
|
108
|
+
use [SYSTEM_PROMPT](SYSTEM_PROMPT.md) for use in API pipelines or chats. before generate, call `ssc-gen check [FILES...] -f json` liner and send errors output if exists
|
|
109
|
+
|
|
110
|
+
### skill
|
|
111
|
+
use [kdl-schema-dsl](.agents/skills/kdl-schema-dsl) for generate config
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# Selector Schema codegen
|
|
2
|
+
|
|
3
|
+
Experimental PoC implementation of a code generator based on KDL2.0 syntax DSL.
|
|
4
|
+
|
|
5
|
+
## install
|
|
6
|
+
|
|
7
|
+
### From git (requires C/C++ compiler)
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
# Clone with submodules
|
|
11
|
+
git clone --recursive https://github.com/vypivshiy/selector_schema_codegen
|
|
12
|
+
cd selector_schema_codegen
|
|
13
|
+
|
|
14
|
+
# Install with pip (builds tree-sitter-kdl extension)
|
|
15
|
+
pip install .
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
**Requirements:**
|
|
19
|
+
- Linux/macOS: `gcc` or `clang`
|
|
20
|
+
- Windows: MSVC (Visual Studio Build Tools or Visual Studio)
|
|
21
|
+
|
|
22
|
+
### Via uv tool
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
uv tool install git+https://github.com/vypivshiy/selector_schema_codegen@features-kdl
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## usage
|
|
29
|
+
|
|
30
|
+
### generate modules
|
|
31
|
+
|
|
32
|
+
```
|
|
33
|
+
ssc-gen generate examples/ -t js-pure -o .
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
### lint syntax
|
|
37
|
+
|
|
38
|
+
```
|
|
39
|
+
ssc-gen check examples/
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### test schema by html output
|
|
43
|
+
|
|
44
|
+
from file:
|
|
45
|
+
```
|
|
46
|
+
python main.py run .\examples\booksToScrape.kdl:MainCatalogue -t py-bs4 -i index.html
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
from stdin:
|
|
50
|
+
```
|
|
51
|
+
curl https://books.toscrape.com/ | python main.py run .\examples\booksToScrape.kdl:MainCatalogue -t py-bs4
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### test selectors:
|
|
55
|
+
|
|
56
|
+
from file
|
|
57
|
+
```
|
|
58
|
+
python main.py health .\examples\booksToScrape.kdl:MainCatalogue -i index.html
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
from stdin
|
|
62
|
+
```
|
|
63
|
+
curl https://books.toscrape.com/catalogue/page-2.html | python main.py health .\examples\booksToScrape.kdl:MainCatalogue
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
## syntax
|
|
68
|
+
|
|
69
|
+
see [docs](docs) and [examples](examples) how to use syntax
|
|
70
|
+
|
|
71
|
+
## LLM generate dsl config (experimental, not ready)
|
|
72
|
+
|
|
73
|
+
### prompt
|
|
74
|
+
|
|
75
|
+
use [SYSTEM_PROMPT](SYSTEM_PROMPT.md) for use in API pipelines or chats. before generate, call `ssc-gen check [FILES...] -f json` liner and send errors output if exists
|
|
76
|
+
|
|
77
|
+
### skill
|
|
78
|
+
use [kdl-schema-dsl](.agents/skills/kdl-schema-dsl) for generate config
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "ssc_codegen"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.17.0a"
|
|
4
4
|
description = "Python-dsl code converter to html parser for web scraping "
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.10"
|
|
@@ -8,8 +8,6 @@ dependencies = [
|
|
|
8
8
|
"bs4>=0.0.2",
|
|
9
9
|
"colorama>=0.4.6 ; sys_platform == 'win32'",
|
|
10
10
|
"cssselect>=1.2.0",
|
|
11
|
-
"httpx>=0.28.1",
|
|
12
|
-
"ichrome>=4.0.4",
|
|
13
11
|
"lxml>=5.3.0",
|
|
14
12
|
"soupsieve>=2.6",
|
|
15
13
|
"typer>=0.15.1",
|
|
@@ -17,9 +15,10 @@ dependencies = [
|
|
|
17
15
|
# https://github.com/fastapi/typer/discussions/1215
|
|
18
16
|
# https://github.com/fastapi/typer/pull/1145
|
|
19
17
|
"click<8.2.0",
|
|
20
|
-
"
|
|
21
|
-
"tinycss2>=1.4.0",
|
|
18
|
+
"tree-sitter>=0.25.2",
|
|
22
19
|
]
|
|
20
|
+
# Для Windows нужен MSVC, для Linux/macOS — gcc/clang
|
|
21
|
+
# Install C/C++ compiler: https://wiki.python.org/moin/CompilingPython
|
|
23
22
|
|
|
24
23
|
classifiers = [
|
|
25
24
|
"Development Status :: 3 - Alpha",
|
|
@@ -43,33 +42,31 @@ Examples = "https://github.com/vypivshiy/selector_schema_codegen/examples"
|
|
|
43
42
|
|
|
44
43
|
|
|
45
44
|
[project.scripts]
|
|
46
|
-
ssc-gen = 'ssc_codegen.
|
|
45
|
+
ssc-gen = 'ssc_codegen.main:main'
|
|
47
46
|
|
|
48
47
|
[build-system]
|
|
49
|
-
requires = ["
|
|
50
|
-
build-backend = "
|
|
48
|
+
requires = ["setuptools>=62.4.0", "wheel"]
|
|
49
|
+
build-backend = "setuptools.build_meta"
|
|
51
50
|
|
|
52
|
-
[tool.
|
|
53
|
-
include = [
|
|
54
|
-
|
|
55
|
-
]
|
|
51
|
+
[tool.setuptools.packages.find]
|
|
52
|
+
include = ["ssc_codegen*"]
|
|
53
|
+
|
|
54
|
+
[tool.setuptools.package-data]
|
|
55
|
+
ssc_codegen = ["py.typed"]
|
|
56
56
|
|
|
57
57
|
[dependency-groups]
|
|
58
58
|
dev = [
|
|
59
|
+
"bs4>=0.0.2",
|
|
59
60
|
"coverage>=7.6.12",
|
|
60
61
|
"httpx>=0.28.1",
|
|
62
|
+
"hypothesis>=6.151.9",
|
|
63
|
+
"lxml>=5.3.0",
|
|
61
64
|
"mypy>=1.14.1",
|
|
62
|
-
"parsel>=1.
|
|
65
|
+
"parsel>=1.9.1",
|
|
63
66
|
"pytest>=8.3.4",
|
|
64
67
|
"ruff>=0.9.3",
|
|
65
68
|
"selectolax>=0.3.27",
|
|
66
69
|
]
|
|
67
|
-
# python generated code tests
|
|
68
|
-
tests = [
|
|
69
|
-
"bs4>=0.0.2",
|
|
70
|
-
"parsel>=1.10.0",
|
|
71
|
-
"selectolax>=0.3.27",
|
|
72
|
-
]
|
|
73
70
|
|
|
74
71
|
[tool.ruff]
|
|
75
72
|
target-version = "py310"
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Setup script for ssc_codegen with tree-sitter-kdl extension.
|
|
3
|
+
Requires C/C++ compiler (MSVC on Windows, gcc/clang on Linux/macOS).
|
|
4
|
+
|
|
5
|
+
Before installing, initialize the submodule:
|
|
6
|
+
git submodule update --init
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from setuptools import setup, Extension
|
|
11
|
+
from setuptools.command.build_ext import build_ext
|
|
12
|
+
|
|
13
|
+
# Relative paths from setup.py location
|
|
14
|
+
VENDOR_KDL_REL = Path("vendor") / "tree-sitter-kdl"
|
|
15
|
+
VENDOR_KDL_SRC_REL = VENDOR_KDL_REL / "src"
|
|
16
|
+
VENDOR_BINDING_C = VENDOR_KDL_REL / "bindings" / "python" / "tree_sitter_kdl" / "binding.c"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class KdlBuildExt(build_ext):
|
|
20
|
+
"""Custom build_ext that builds tree-sitter-kdl from copied sources."""
|
|
21
|
+
|
|
22
|
+
def build_extension(self, ext: Extension):
|
|
23
|
+
# Compiler flags
|
|
24
|
+
if self.compiler.compiler_type != "msvc":
|
|
25
|
+
ext.extra_compile_args = ["-std=c11", "-fvisibility=hidden", "-O2"]
|
|
26
|
+
else:
|
|
27
|
+
ext.extra_compile_args = ["/std:c11", "/utf-8", "/O2"]
|
|
28
|
+
|
|
29
|
+
# Add scanner.c if present (relative to setup.py)
|
|
30
|
+
scanner_c = VENDOR_KDL_SRC_REL / "scanner.c"
|
|
31
|
+
if scanner_c.exists():
|
|
32
|
+
ext.sources.append(str(scanner_c))
|
|
33
|
+
|
|
34
|
+
# Py_LIMITED_API for abi3 wheels
|
|
35
|
+
if ext.py_limited_api:
|
|
36
|
+
ext.define_macros.append(("Py_LIMITED_API", "0x030A0000"))
|
|
37
|
+
|
|
38
|
+
super().build_extension(ext)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def get_kdl_extension():
|
|
42
|
+
"""Create Extension for tree-sitter-kdl."""
|
|
43
|
+
parser_c = VENDOR_KDL_SRC_REL / "parser.c"
|
|
44
|
+
if not parser_c.exists():
|
|
45
|
+
raise RuntimeError(
|
|
46
|
+
f"tree-sitter-kdl sources not found at {VENDOR_KDL_SRC_REL}.\n"
|
|
47
|
+
"Initialize the git submodule before installing:\n"
|
|
48
|
+
" git submodule update --init"
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
if not VENDOR_BINDING_C.exists():
|
|
52
|
+
raise RuntimeError(f"binding.c not found at {VENDOR_BINDING_C}")
|
|
53
|
+
|
|
54
|
+
return Extension(
|
|
55
|
+
name="ssc_codegen.linter._binding",
|
|
56
|
+
sources=[str(VENDOR_BINDING_C), str(parser_c)],
|
|
57
|
+
include_dirs=[str(VENDOR_KDL_SRC_REL)],
|
|
58
|
+
define_macros=[
|
|
59
|
+
("PY_SSIZE_T_CLEAN", None),
|
|
60
|
+
("TREE_SITTER_HIDE_SYMBOLS", None),
|
|
61
|
+
],
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
setup(
|
|
66
|
+
cmdclass={"build_ext": KdlBuildExt},
|
|
67
|
+
ext_modules=[get_kdl_extension()],
|
|
68
|
+
)
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from ssc_codegen.parser import Module, PARSER
|
|
4
|
+
|
|
5
|
+
_KDL_TEXT_ENCODING = "utf-8-sig"
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def parse_ast(
|
|
9
|
+
src: str | None = None,
|
|
10
|
+
path: str | None = None,
|
|
11
|
+
*,
|
|
12
|
+
css_to_xpath: bool = False,
|
|
13
|
+
) -> Module:
|
|
14
|
+
if not src and not path:
|
|
15
|
+
raise AttributeError("required src or path argument")
|
|
16
|
+
source_path: Path | None = None
|
|
17
|
+
if path:
|
|
18
|
+
source_path = Path(path).resolve()
|
|
19
|
+
src = source_path.read_text(encoding=_KDL_TEXT_ENCODING)
|
|
20
|
+
if not src:
|
|
21
|
+
raise AttributeError("required src or path argument")
|
|
22
|
+
module = PARSER.parse(src, source_path=source_path)
|
|
23
|
+
if css_to_xpath:
|
|
24
|
+
from ssc_codegen.document_utils import convert_css_to_xpath_module
|
|
25
|
+
|
|
26
|
+
convert_css_to_xpath_module(module)
|
|
27
|
+
return module
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"""Logging configuration for ssc_codegen.
|
|
2
|
+
|
|
3
|
+
Usage
|
|
4
|
+
-----
|
|
5
|
+
Import the logger in any submodule::
|
|
6
|
+
|
|
7
|
+
from ssc_codegen._logging import logger
|
|
8
|
+
|
|
9
|
+
To enable DEBUG output from the CLI, pass ``--verbose`` / ``-v`` flag,
|
|
10
|
+
or configure it manually::
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
logging.getLogger("ssc_codegen").setLevel(logging.DEBUG)
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import logging
|
|
19
|
+
import sys
|
|
20
|
+
|
|
21
|
+
# Windows ANSI backport
|
|
22
|
+
try:
|
|
23
|
+
import colorama
|
|
24
|
+
|
|
25
|
+
colorama.init(autoreset=False)
|
|
26
|
+
_COLORAMA_AVAILABLE = True
|
|
27
|
+
except ImportError:
|
|
28
|
+
_COLORAMA_AVAILABLE = False
|
|
29
|
+
|
|
30
|
+
# ANSI color codes (used on all platforms; colorama translates them on Windows)
|
|
31
|
+
_RESET = "\033[0m"
|
|
32
|
+
_BOLD = "\033[1m"
|
|
33
|
+
_COLORS: dict[int, str] = {
|
|
34
|
+
logging.DEBUG: "\033[36m", # cyan
|
|
35
|
+
logging.INFO: "\033[32m", # green
|
|
36
|
+
logging.WARNING: "\033[33m", # yellow
|
|
37
|
+
logging.ERROR: "\033[31m", # red
|
|
38
|
+
logging.CRITICAL: "\033[35m", # magenta
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class _ColorFormatter(logging.Formatter):
|
|
43
|
+
"""Formatter that wraps the level name in ANSI color codes."""
|
|
44
|
+
|
|
45
|
+
_FMT = "[{color}{bold}{level}{reset}] {name}: {message}"
|
|
46
|
+
|
|
47
|
+
def format(self, record: logging.LogRecord) -> str: # noqa: A003
|
|
48
|
+
color = _COLORS.get(record.levelno, "")
|
|
49
|
+
level = record.levelname
|
|
50
|
+
name = record.name
|
|
51
|
+
# format the message part the normal way (handles exc_info etc.)
|
|
52
|
+
record.message = record.getMessage()
|
|
53
|
+
if record.exc_info and not record.exc_text:
|
|
54
|
+
record.exc_text = self.formatException(record.exc_info)
|
|
55
|
+
|
|
56
|
+
msg = self._FMT.format(
|
|
57
|
+
color=color,
|
|
58
|
+
bold=_BOLD,
|
|
59
|
+
level=level,
|
|
60
|
+
reset=_RESET,
|
|
61
|
+
name=name,
|
|
62
|
+
message=record.message,
|
|
63
|
+
)
|
|
64
|
+
if record.exc_text:
|
|
65
|
+
msg = f"{msg}\n{record.exc_text}"
|
|
66
|
+
return msg
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# Single named logger for the entire ssc_codegen package.
|
|
70
|
+
# All child loggers (parser, main, …) are children of this one,
|
|
71
|
+
# so a single ``logging.getLogger("ssc_codegen").setLevel(DEBUG)``
|
|
72
|
+
# enables everything at once.
|
|
73
|
+
logger = logging.getLogger("ssc_codegen")
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def setup_debug_logging() -> None:
|
|
77
|
+
"""Enable DEBUG-level logging to stderr for the ssc_codegen logger.
|
|
78
|
+
|
|
79
|
+
Called by the CLI when ``--verbose`` is passed.
|
|
80
|
+
Idempotent: calling multiple times is safe.
|
|
81
|
+
"""
|
|
82
|
+
pkg_logger = logging.getLogger("ssc_codegen")
|
|
83
|
+
if pkg_logger.level > logging.DEBUG or pkg_logger.level == logging.NOTSET:
|
|
84
|
+
pkg_logger.setLevel(logging.DEBUG)
|
|
85
|
+
|
|
86
|
+
# Avoid adding duplicate handlers if already configured
|
|
87
|
+
if not any(
|
|
88
|
+
isinstance(h, logging.StreamHandler) for h in pkg_logger.handlers
|
|
89
|
+
):
|
|
90
|
+
# Use colors only when stderr is a real TTY or colorama is available
|
|
91
|
+
use_color = _COLORAMA_AVAILABLE or (
|
|
92
|
+
hasattr(sys.stderr, "isatty") and sys.stderr.isatty()
|
|
93
|
+
)
|
|
94
|
+
handler = logging.StreamHandler()
|
|
95
|
+
handler.setLevel(logging.DEBUG)
|
|
96
|
+
formatter: logging.Formatter = (
|
|
97
|
+
_ColorFormatter()
|
|
98
|
+
if use_color
|
|
99
|
+
else logging.Formatter("[%(levelname)s] %(name)s: %(message)s")
|
|
100
|
+
)
|
|
101
|
+
handler.setFormatter(formatter)
|
|
102
|
+
pkg_logger.addHandler(handler)
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AST nodes for the KDL Schema DSL.
|
|
3
|
+
|
|
4
|
+
Import everything from here:
|
|
5
|
+
from kdl_ast import Module, Field, CssSelect, ...
|
|
6
|
+
"""
|
|
7
|
+
from .types import VariableType, StructType
|
|
8
|
+
|
|
9
|
+
from .base import Node
|
|
10
|
+
|
|
11
|
+
from .module import (
|
|
12
|
+
Module,
|
|
13
|
+
CodeStartHook,
|
|
14
|
+
CodeEndHook,
|
|
15
|
+
Docstring,
|
|
16
|
+
Imports,
|
|
17
|
+
Utilities,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
from .typedef import TypeDef, TypeDefField
|
|
21
|
+
|
|
22
|
+
from .jsondef import JsonDef, JsonDefField
|
|
23
|
+
|
|
24
|
+
from .struct import (
|
|
25
|
+
Struct,
|
|
26
|
+
StructDocstring,
|
|
27
|
+
PreValidate,
|
|
28
|
+
Init,
|
|
29
|
+
InitField,
|
|
30
|
+
SplitDoc,
|
|
31
|
+
Key,
|
|
32
|
+
Value,
|
|
33
|
+
TableConfig,
|
|
34
|
+
TableRow,
|
|
35
|
+
TableMatchKey,
|
|
36
|
+
Field,
|
|
37
|
+
StartParse
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
from .selectors import (
|
|
41
|
+
CssSelect,
|
|
42
|
+
CssSelectAll,
|
|
43
|
+
XpathSelect,
|
|
44
|
+
XpathSelectAll,
|
|
45
|
+
CssRemove,
|
|
46
|
+
XpathRemove,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
from .extract import Text, Raw, Attr
|
|
50
|
+
|
|
51
|
+
from .string import (
|
|
52
|
+
Trim,
|
|
53
|
+
Ltrim,
|
|
54
|
+
Rtrim,
|
|
55
|
+
NormalizeSpace,
|
|
56
|
+
RmPrefix,
|
|
57
|
+
RmSuffix,
|
|
58
|
+
RmPrefixSuffix,
|
|
59
|
+
Fmt,
|
|
60
|
+
Repl,
|
|
61
|
+
ReplMap,
|
|
62
|
+
Lower,
|
|
63
|
+
Upper,
|
|
64
|
+
Split,
|
|
65
|
+
Join,
|
|
66
|
+
Unescape,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
from .regex import Re, ReAll, ReSub
|
|
70
|
+
|
|
71
|
+
from .array import Index, Slice, Len, Unique
|
|
72
|
+
|
|
73
|
+
from .cast import ToInt, ToFloat, ToBool, Jsonify, Nested
|
|
74
|
+
|
|
75
|
+
from .control import Self, Fallback, FallbackStart, FallbackEnd, Return
|
|
76
|
+
|
|
77
|
+
from .predicate_containers import Filter, Assert, Match
|
|
78
|
+
|
|
79
|
+
from .predicate_ops import (
|
|
80
|
+
PredEq,
|
|
81
|
+
PredNe,
|
|
82
|
+
PredGt,
|
|
83
|
+
PredLt,
|
|
84
|
+
PredGe,
|
|
85
|
+
PredLe,
|
|
86
|
+
PredRange,
|
|
87
|
+
PredStarts,
|
|
88
|
+
PredEnds,
|
|
89
|
+
PredContains,
|
|
90
|
+
PredIn,
|
|
91
|
+
PredRe,
|
|
92
|
+
PredReAny,
|
|
93
|
+
PredReAll,
|
|
94
|
+
PredCss,
|
|
95
|
+
PredXpath,
|
|
96
|
+
PredHasAttr,
|
|
97
|
+
PredCountEq,
|
|
98
|
+
PredCountGt,
|
|
99
|
+
PredCountLt,
|
|
100
|
+
PredAttrEnds,
|
|
101
|
+
PredAttrEq,
|
|
102
|
+
PredAttrNe,
|
|
103
|
+
PredAttrRe,
|
|
104
|
+
PredAttrStarts,
|
|
105
|
+
PredAttrContains,
|
|
106
|
+
PredTextContains,
|
|
107
|
+
PredTextEnds,
|
|
108
|
+
PredTextRe,
|
|
109
|
+
PredTextStarts,
|
|
110
|
+
LogicNot,
|
|
111
|
+
LogicAnd,
|
|
112
|
+
LogicOr,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
from .transform import TransformDef, TransformTarget, TransformCall
|
|
116
|
+
|
|
117
|
+
__all__ = [
|
|
118
|
+
# types
|
|
119
|
+
"VariableType", "StructType",
|
|
120
|
+
# base
|
|
121
|
+
"Node",
|
|
122
|
+
# module
|
|
123
|
+
"Module", "CodeStartHook", "CodeEndHook",
|
|
124
|
+
"Docstring", "Imports", "Utilities",
|
|
125
|
+
# typedef
|
|
126
|
+
"TypeDef", "TypeDefField",
|
|
127
|
+
# jsondef
|
|
128
|
+
"JsonDef", "JsonDefField",
|
|
129
|
+
# struct
|
|
130
|
+
"Struct", "StructDocstring", "PreValidate",
|
|
131
|
+
"Init", "InitField", "SplitDoc",
|
|
132
|
+
"Key", "Value",
|
|
133
|
+
"TableConfig", "TableRow", "TableMatchKey",
|
|
134
|
+
"Field", "StartParse",
|
|
135
|
+
# selectors
|
|
136
|
+
"CssSelect", "CssSelectAll",
|
|
137
|
+
"XpathSelect", "XpathSelectAll",
|
|
138
|
+
"CssRemove", "XpathRemove",
|
|
139
|
+
# extract
|
|
140
|
+
"Text", "Raw", "Attr",
|
|
141
|
+
# string
|
|
142
|
+
"Trim", "Ltrim", "Rtrim", "NormalizeSpace",
|
|
143
|
+
"RmPrefix", "RmSuffix", "RmPrefixSuffix",
|
|
144
|
+
"Fmt", "Repl", "ReplMap",
|
|
145
|
+
"Lower", "Upper", "Split", "Join", "Unescape",
|
|
146
|
+
# regex
|
|
147
|
+
"Re", "ReAll", "ReSub",
|
|
148
|
+
# array
|
|
149
|
+
"Index", "Slice", "Len", "Unique",
|
|
150
|
+
# cast
|
|
151
|
+
"ToInt", "ToFloat", "ToBool", "Jsonify", "Nested",
|
|
152
|
+
# control
|
|
153
|
+
"Self", "Fallback", "FallbackStart", "FallbackEnd", "Return",
|
|
154
|
+
# predicate containers
|
|
155
|
+
"Filter", "Assert", "Match",
|
|
156
|
+
# predicate ops
|
|
157
|
+
"PredEq", "PredNe",
|
|
158
|
+
"PredGt", "PredLt", "PredGe", "PredLe", "PredRange",
|
|
159
|
+
"PredStarts", "PredEnds", "PredContains", "PredIn",
|
|
160
|
+
"PredRe", "PredReAny", "PredReAll",
|
|
161
|
+
"PredCss", "PredXpath", "PredHasAttr",
|
|
162
|
+
"PredAttrEq", "PredAttrNe",
|
|
163
|
+
"PredAttrStarts", "PredAttrEnds", "PredAttrContains", "PredAttrRe",
|
|
164
|
+
"PredTextStarts", "PredTextEnds", "PredTextContains", "PredTextRe",
|
|
165
|
+
"PredCountEq", "PredCountGt", "PredCountLt",
|
|
166
|
+
"LogicNot", "LogicAnd", "LogicOr",
|
|
167
|
+
# transform
|
|
168
|
+
"TransformDef", "TransformTarget", "TransformCall",
|
|
169
|
+
]
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
|
|
4
|
+
from .base import Node
|
|
5
|
+
from .types import VariableType
|
|
6
|
+
|
|
7
|
+
# Index, First, Last, Slice accept LIST_AUTO / return AUTO or LIST_AUTO.
|
|
8
|
+
# Concrete types are resolved by the builder from the cursor type via
|
|
9
|
+
# VariableType.scalar / VariableType.as_list helpers.
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class Index(Node):
|
|
14
|
+
"""
|
|
15
|
+
Returns element at position i.
|
|
16
|
+
Negative index counts from end.
|
|
17
|
+
LIST_AUTO → AUTO (resolved by builder).
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
i: int = 0
|
|
21
|
+
accept: VariableType = field(default=VariableType.LIST_AUTO)
|
|
22
|
+
ret: VariableType = field(default=VariableType.AUTO)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class Slice(Node):
|
|
27
|
+
"""
|
|
28
|
+
Returns sublist [start:end].
|
|
29
|
+
LIST_AUTO → LIST_AUTO (resolved by builder).
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
start: int = 0
|
|
33
|
+
end: int = 0
|
|
34
|
+
accept: VariableType = field(default=VariableType.LIST_AUTO)
|
|
35
|
+
ret: VariableType = field(default=VariableType.LIST_AUTO)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class Len(Node):
|
|
40
|
+
"""Returns list length as INT."""
|
|
41
|
+
|
|
42
|
+
accept: VariableType = field(default=VariableType.LIST_AUTO)
|
|
43
|
+
ret: VariableType = field(default=VariableType.INT)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class Unique(Node):
|
|
48
|
+
"""
|
|
49
|
+
Removes duplicate strings from list.
|
|
50
|
+
keep_order=True — preserves original order (default: False).
|
|
51
|
+
LIST_STRING → LIST_STRING.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
keep_order: bool = False
|
|
55
|
+
accept: VariableType = field(default=VariableType.LIST_STRING)
|
|
56
|
+
ret: VariableType = field(default=VariableType.LIST_STRING)
|