ssc_codegen 0.15.2__tar.gz → 0.17.0a0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. ssc_codegen-0.17.0a0/MANIFEST.in +3 -0
  2. ssc_codegen-0.17.0a0/PKG-INFO +111 -0
  3. ssc_codegen-0.17.0a0/README.md +78 -0
  4. {ssc_codegen-0.15.2 → ssc_codegen-0.17.0a0}/pyproject.toml +16 -19
  5. ssc_codegen-0.17.0a0/setup.cfg +4 -0
  6. ssc_codegen-0.17.0a0/setup.py +68 -0
  7. ssc_codegen-0.17.0a0/ssc_codegen/__init__.py +27 -0
  8. ssc_codegen-0.17.0a0/ssc_codegen/_logging.py +102 -0
  9. ssc_codegen-0.17.0a0/ssc_codegen/ast/__init__.py +169 -0
  10. ssc_codegen-0.17.0a0/ssc_codegen/ast/array.py +56 -0
  11. ssc_codegen-0.17.0a0/ssc_codegen/ast/base.py +24 -0
  12. ssc_codegen-0.17.0a0/ssc_codegen/ast/cast.py +72 -0
  13. ssc_codegen-0.17.0a0/ssc_codegen/ast/control.py +82 -0
  14. ssc_codegen-0.17.0a0/ssc_codegen/ast/extract.py +51 -0
  15. ssc_codegen-0.17.0a0/ssc_codegen/ast/helpers.py +7 -0
  16. ssc_codegen-0.17.0a0/ssc_codegen/ast/jsondef.py +37 -0
  17. ssc_codegen-0.17.0a0/ssc_codegen/ast/module.py +92 -0
  18. ssc_codegen-0.17.0a0/ssc_codegen/ast/predicate_containers.py +44 -0
  19. ssc_codegen-0.17.0a0/ssc_codegen/ast/predicate_ops.py +280 -0
  20. ssc_codegen-0.17.0a0/ssc_codegen/ast/regex.py +57 -0
  21. ssc_codegen-0.17.0a0/ssc_codegen/ast/selectors.py +51 -0
  22. ssc_codegen-0.17.0a0/ssc_codegen/ast/string.py +158 -0
  23. ssc_codegen-0.17.0a0/ssc_codegen/ast/struct.py +232 -0
  24. ssc_codegen-0.17.0a0/ssc_codegen/ast/transform.py +63 -0
  25. ssc_codegen-0.17.0a0/ssc_codegen/ast/typedef.py +41 -0
  26. ssc_codegen-0.17.0a0/ssc_codegen/ast/types.py +79 -0
  27. ssc_codegen-0.17.0a0/ssc_codegen/converters/base.py +279 -0
  28. ssc_codegen-0.17.0a0/ssc_codegen/converters/helpers.py +76 -0
  29. ssc_codegen-0.17.0a0/ssc_codegen/converters/js_pure.py +1353 -0
  30. ssc_codegen-0.17.0a0/ssc_codegen/converters/py_bs4.py +1324 -0
  31. ssc_codegen-0.17.0a0/ssc_codegen/converters/py_lxml.py +480 -0
  32. ssc_codegen-0.17.0a0/ssc_codegen/converters/py_parsel.py +429 -0
  33. ssc_codegen-0.17.0a0/ssc_codegen/converters/py_slax.py +403 -0
  34. ssc_codegen-0.17.0a0/ssc_codegen/document_utils.py +94 -0
  35. ssc_codegen-0.17.0a0/ssc_codegen/exceptions.py +11 -0
  36. ssc_codegen-0.17.0a0/ssc_codegen/health.py +400 -0
  37. ssc_codegen-0.17.0a0/ssc_codegen/linter/__init__.py +26 -0
  38. ssc_codegen-0.17.0a0/ssc_codegen/linter/_kdl_lang.py +12 -0
  39. ssc_codegen-0.17.0a0/ssc_codegen/linter/base.py +786 -0
  40. ssc_codegen-0.17.0a0/ssc_codegen/linter/errors.py +78 -0
  41. ssc_codegen-0.17.0a0/ssc_codegen/linter/format_errors.py +285 -0
  42. ssc_codegen-0.17.0a0/ssc_codegen/linter/metadata.py +18 -0
  43. ssc_codegen-0.17.0a0/ssc_codegen/linter/navigation.py +240 -0
  44. ssc_codegen-0.17.0a0/ssc_codegen/linter/path.py +36 -0
  45. ssc_codegen-0.17.0a0/ssc_codegen/linter/rule_keywords.py +323 -0
  46. ssc_codegen-0.17.0a0/ssc_codegen/linter/rules.py +713 -0
  47. ssc_codegen-0.17.0a0/ssc_codegen/linter/rules_struct.py +536 -0
  48. ssc_codegen-0.17.0a0/ssc_codegen/linter/type_rules.py +523 -0
  49. ssc_codegen-0.17.0a0/ssc_codegen/linter/types.py +188 -0
  50. ssc_codegen-0.17.0a0/ssc_codegen/main.py +596 -0
  51. ssc_codegen-0.17.0a0/ssc_codegen/parser.py +2195 -0
  52. ssc_codegen-0.17.0a0/ssc_codegen/pseudo_selectors.py +39 -0
  53. ssc_codegen-0.17.0a0/ssc_codegen/regex_utils.py +204 -0
  54. ssc_codegen-0.17.0a0/ssc_codegen/selector_utils.py +8 -0
  55. ssc_codegen-0.17.0a0/ssc_codegen.egg-info/PKG-INFO +111 -0
  56. ssc_codegen-0.17.0a0/ssc_codegen.egg-info/SOURCES.txt +68 -0
  57. ssc_codegen-0.17.0a0/ssc_codegen.egg-info/dependency_links.txt +1 -0
  58. ssc_codegen-0.17.0a0/ssc_codegen.egg-info/entry_points.txt +2 -0
  59. ssc_codegen-0.17.0a0/ssc_codegen.egg-info/requires.txt +13 -0
  60. ssc_codegen-0.17.0a0/ssc_codegen.egg-info/top_level.txt +1 -0
  61. ssc_codegen-0.17.0a0/tests/test_css_to_xpath.py +84 -0
  62. ssc_codegen-0.17.0a0/tests/test_imports.py +210 -0
  63. ssc_codegen-0.17.0a0/tests/test_parser.py +472 -0
  64. ssc_codegen-0.17.0a0/vendor/tree-sitter-kdl/bindings/python/tree_sitter_kdl/binding.c +35 -0
  65. ssc_codegen-0.17.0a0/vendor/tree-sitter-kdl/src/parser.c +7268 -0
  66. ssc_codegen-0.17.0a0/vendor/tree-sitter-kdl/src/scanner.c +120 -0
  67. ssc_codegen-0.17.0a0/vendor/tree-sitter-kdl/src/tree_sitter/alloc.h +54 -0
  68. ssc_codegen-0.17.0a0/vendor/tree-sitter-kdl/src/tree_sitter/array.h +291 -0
  69. ssc_codegen-0.17.0a0/vendor/tree-sitter-kdl/src/tree_sitter/parser.h +286 -0
  70. ssc_codegen-0.15.2/.gitignore +0 -143
  71. ssc_codegen-0.15.2/PKG-INFO +0 -255
  72. ssc_codegen-0.15.2/README.md +0 -220
  73. ssc_codegen-0.15.2/ssc_codegen/__init__.py +0 -99
  74. ssc_codegen-0.15.2/ssc_codegen/_compat.py +0 -31
  75. ssc_codegen-0.15.2/ssc_codegen/ast_/__init__.py +0 -131
  76. ssc_codegen-0.15.2/ssc_codegen/ast_/base.py +0 -242
  77. ssc_codegen-0.15.2/ssc_codegen/ast_/nodes_array.py +0 -87
  78. ssc_codegen-0.15.2/ssc_codegen/ast_/nodes_cast.py +0 -154
  79. ssc_codegen-0.15.2/ssc_codegen/ast_/nodes_core.py +0 -591
  80. ssc_codegen-0.15.2/ssc_codegen/ast_/nodes_filter.py +0 -659
  81. ssc_codegen-0.15.2/ssc_codegen/ast_/nodes_selectors.py +0 -305
  82. ssc_codegen-0.15.2/ssc_codegen/ast_/nodes_string.py +0 -495
  83. ssc_codegen-0.15.2/ssc_codegen/ast_/nodes_validate.py +0 -207
  84. ssc_codegen-0.15.2/ssc_codegen/ast_build/__init__.py +0 -1
  85. ssc_codegen-0.15.2/ssc_codegen/ast_build/builder.py +0 -464
  86. ssc_codegen-0.15.2/ssc_codegen/ast_build/main.py +0 -94
  87. ssc_codegen-0.15.2/ssc_codegen/ast_build/utils.py +0 -155
  88. ssc_codegen-0.15.2/ssc_codegen/ast_grep_rules/js_rules.yml +0 -20
  89. ssc_codegen-0.15.2/ssc_codegen/ast_grep_rules/py_drop_prefix_suffix_backport.yml +0 -47
  90. ssc_codegen-0.15.2/ssc_codegen/ast_grep_rules/py_rules.yml +0 -20
  91. ssc_codegen-0.15.2/ssc_codegen/cli/__init__.py +0 -0
  92. ssc_codegen-0.15.2/ssc_codegen/cli/ast_grep.py +0 -10
  93. ssc_codegen-0.15.2/ssc_codegen/cli/cli_callbacks.py +0 -34
  94. ssc_codegen-0.15.2/ssc_codegen/cli/cli_utils.py +0 -84
  95. ssc_codegen-0.15.2/ssc_codegen/cli/code_callbacks.py +0 -32
  96. ssc_codegen-0.15.2/ssc_codegen/cli/consts.py +0 -61
  97. ssc_codegen-0.15.2/ssc_codegen/cli/main.py +0 -643
  98. ssc_codegen-0.15.2/ssc_codegen/cli/runtime_parse_runners.py +0 -72
  99. ssc_codegen-0.15.2/ssc_codegen/compiler.py +0 -113
  100. ssc_codegen-0.15.2/ssc_codegen/converters/__init__.py +0 -0
  101. ssc_codegen-0.15.2/ssc_codegen/converters/base.py +0 -577
  102. ssc_codegen-0.15.2/ssc_codegen/converters/go_goquery.py +0 -2022
  103. ssc_codegen-0.15.2/ssc_codegen/converters/helpers.py +0 -257
  104. ssc_codegen-0.15.2/ssc_codegen/converters/js_pure.py +0 -1534
  105. ssc_codegen-0.15.2/ssc_codegen/converters/lua_htmlparser.py +0 -1510
  106. ssc_codegen-0.15.2/ssc_codegen/converters/py_base.py +0 -1359
  107. ssc_codegen-0.15.2/ssc_codegen/converters/py_bs4.py +0 -504
  108. ssc_codegen-0.15.2/ssc_codegen/converters/py_lxml.py +0 -551
  109. ssc_codegen-0.15.2/ssc_codegen/converters/py_parsel.py +0 -533
  110. ssc_codegen-0.15.2/ssc_codegen/converters/py_selectolax.py +0 -501
  111. ssc_codegen-0.15.2/ssc_codegen/converters/templates/__init__.py +0 -3
  112. ssc_codegen-0.15.2/ssc_codegen/converters/templates/go_goquery.py +0 -473
  113. ssc_codegen-0.15.2/ssc_codegen/converters/templates/js_pure.py +0 -38
  114. ssc_codegen-0.15.2/ssc_codegen/converters/templates/lua_base.py +0 -710
  115. ssc_codegen-0.15.2/ssc_codegen/converters/templates/lua_css_compat.py +0 -157
  116. ssc_codegen-0.15.2/ssc_codegen/converters/templates/lua_re_compat.py +0 -517
  117. ssc_codegen-0.15.2/ssc_codegen/converters/templates/py_base.py +0 -60
  118. ssc_codegen-0.15.2/ssc_codegen/document.py +0 -2661
  119. ssc_codegen-0.15.2/ssc_codegen/document_utlis.py +0 -201
  120. ssc_codegen-0.15.2/ssc_codegen/json_struct.py +0 -272
  121. ssc_codegen-0.15.2/ssc_codegen/json_to_scc.py +0 -180
  122. ssc_codegen-0.15.2/ssc_codegen/logs.py +0 -49
  123. ssc_codegen-0.15.2/ssc_codegen/pseudo_selectors.py +0 -70
  124. ssc_codegen-0.15.2/ssc_codegen/schema.py +0 -368
  125. ssc_codegen-0.15.2/ssc_codegen/selector_utils.py +0 -78
  126. ssc_codegen-0.15.2/ssc_codegen/static_checker/__init__.py +0 -62
  127. ssc_codegen-0.15.2/ssc_codegen/static_checker/base.py +0 -91
  128. ssc_codegen-0.15.2/ssc_codegen/static_checker/callbacks.py +0 -484
  129. ssc_codegen-0.15.2/ssc_codegen/str_utils.py +0 -230
  130. ssc_codegen-0.15.2/ssc_codegen/tokens.py +0 -291
  131. ssc_codegen-0.15.2/ssc_codegen/transform.py +0 -95
  132. {ssc_codegen-0.15.2 → ssc_codegen-0.17.0a0}/LICENSE +0 -0
@@ -0,0 +1,3 @@
1
+ # Включаем C-исходники tree-sitter-kdl из submodule в sdist
2
+ recursive-include vendor/tree-sitter-kdl/src *.c *.h
3
+ recursive-include vendor/tree-sitter-kdl/bindings/python/tree_sitter_kdl *.c
@@ -0,0 +1,111 @@
1
+ Metadata-Version: 2.4
2
+ Name: ssc_codegen
3
+ Version: 0.17.0a0
4
+ Summary: Python-dsl code converter to html parser for web scraping
5
+ Project-URL: Documentation, https://github.com/vypivshiy/selector_schema_codegen#readme
6
+ Project-URL: Issues, https://github.com/vypivshiy/selector_schema_codegen/issues
7
+ Project-URL: Source, https://github.com/vypivshiy/selector_schema_codegen
8
+ Project-URL: Examples, https://github.com/vypivshiy/selector_schema_codegen/examples
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Environment :: Console
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Topic :: Software Development :: Code Generators
14
+ Classifier: Topic :: Text Processing :: Markup :: HTML
15
+ Classifier: Topic :: Utilities
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Requires-Python: >=3.10
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Requires-Dist: bs4>=0.0.2
24
+ Requires-Dist: colorama>=0.4.6; sys_platform == "win32"
25
+ Requires-Dist: cssselect>=1.2.0
26
+ Requires-Dist: lxml>=5.3.0
27
+ Requires-Dist: soupsieve>=2.6
28
+ Requires-Dist: typer>=0.15.1
29
+ Requires-Dist: typing_extensions; python_version < "3.11"
30
+ Requires-Dist: click<8.2.0
31
+ Requires-Dist: tree-sitter>=0.25.2
32
+ Dynamic: license-file
33
+
34
+ # Selector Schema codegen
35
+
36
+ Experimental PoC implementation of a code generator based on KDL2.0 syntax DSL.
37
+
38
+ ## install
39
+
40
+ ### From git (requires C/C++ compiler)
41
+
42
+ ```bash
43
+ # Clone with submodules
44
+ git clone --recursive https://github.com/vypivshiy/selector_schema_codegen
45
+ cd selector_schema_codegen
46
+
47
+ # Install with pip (builds tree-sitter-kdl extension)
48
+ pip install .
49
+ ```
50
+
51
+ **Requirements:**
52
+ - Linux/macOS: `gcc` or `clang`
53
+ - Windows: MSVC (Visual Studio Build Tools or Visual Studio)
54
+
55
+ ### Via uv tool
56
+
57
+ ```bash
58
+ uv tool install git+https://github.com/vypivshiy/selector_schema_codegen@features-kdl
59
+ ```
60
+
61
+ ## usage
62
+
63
+ ### generate modules
64
+
65
+ ```
66
+ ssc-gen generate examples/ -t js-pure -o .
67
+ ```
68
+
69
+ ### lint syntax
70
+
71
+ ```
72
+ ssc-gen check examples/
73
+ ```
74
+
75
+ ### test schema by html output
76
+
77
+ from file:
78
+ ```
79
+ python main.py run .\examples\booksToScrape.kdl:MainCatalogue -t py-bs4 -i index.html
80
+ ```
81
+
82
+ from stdin:
83
+ ```
84
+ curl https://books.toscrape.com/ | python main.py run .\examples\booksToScrape.kdl:MainCatalogue -t py-bs4
85
+ ```
86
+
87
+ ### test selectors:
88
+
89
+ from file
90
+ ```
91
+ python main.py health .\examples\booksToScrape.kdl:MainCatalogue -i index.html
92
+ ```
93
+
94
+ from stdin
95
+ ```
96
+ curl https://books.toscrape.com/catalogue/page-2.html | python main.py health .\examples\booksToScrape.kdl:MainCatalogue
97
+ ```
98
+
99
+
100
+ ## syntax
101
+
102
+ see [docs](docs) and [examples](examples) how to use syntax
103
+
104
+ ## LLM generate dsl config (experimental, not ready)
105
+
106
+ ### prompt
107
+
108
+ use [SYSTEM_PROMPT](SYSTEM_PROMPT.md) for use in API pipelines or chats. before generate, call `ssc-gen check [FILES...] -f json` liner and send errors output if exists
109
+
110
+ ### skill
111
+ use [kdl-schema-dsl](.agents/skills/kdl-schema-dsl) for generate config
@@ -0,0 +1,78 @@
1
+ # Selector Schema codegen
2
+
3
+ Experimental PoC implementation of a code generator based on KDL2.0 syntax DSL.
4
+
5
+ ## install
6
+
7
+ ### From git (requires C/C++ compiler)
8
+
9
+ ```bash
10
+ # Clone with submodules
11
+ git clone --recursive https://github.com/vypivshiy/selector_schema_codegen
12
+ cd selector_schema_codegen
13
+
14
+ # Install with pip (builds tree-sitter-kdl extension)
15
+ pip install .
16
+ ```
17
+
18
+ **Requirements:**
19
+ - Linux/macOS: `gcc` or `clang`
20
+ - Windows: MSVC (Visual Studio Build Tools or Visual Studio)
21
+
22
+ ### Via uv tool
23
+
24
+ ```bash
25
+ uv tool install git+https://github.com/vypivshiy/selector_schema_codegen@features-kdl
26
+ ```
27
+
28
+ ## usage
29
+
30
+ ### generate modules
31
+
32
+ ```
33
+ ssc-gen generate examples/ -t js-pure -o .
34
+ ```
35
+
36
+ ### lint syntax
37
+
38
+ ```
39
+ ssc-gen check examples/
40
+ ```
41
+
42
+ ### test schema by html output
43
+
44
+ from file:
45
+ ```
46
+ python main.py run .\examples\booksToScrape.kdl:MainCatalogue -t py-bs4 -i index.html
47
+ ```
48
+
49
+ from stdin:
50
+ ```
51
+ curl https://books.toscrape.com/ | python main.py run .\examples\booksToScrape.kdl:MainCatalogue -t py-bs4
52
+ ```
53
+
54
+ ### test selectors:
55
+
56
+ from file
57
+ ```
58
+ python main.py health .\examples\booksToScrape.kdl:MainCatalogue -i index.html
59
+ ```
60
+
61
+ from stdin
62
+ ```
63
+ curl https://books.toscrape.com/catalogue/page-2.html | python main.py health .\examples\booksToScrape.kdl:MainCatalogue
64
+ ```
65
+
66
+
67
+ ## syntax
68
+
69
+ see [docs](docs) and [examples](examples) how to use syntax
70
+
71
+ ## LLM generate dsl config (experimental, not ready)
72
+
73
+ ### prompt
74
+
75
+ use [SYSTEM_PROMPT](SYSTEM_PROMPT.md) for use in API pipelines or chats. before generate, call `ssc-gen check [FILES...] -f json` liner and send errors output if exists
76
+
77
+ ### skill
78
+ use [kdl-schema-dsl](.agents/skills/kdl-schema-dsl) for generate config
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "ssc_codegen"
3
- version = "0.15.2"
3
+ version = "0.17.0a"
4
4
  description = "Python-dsl code converter to html parser for web scraping "
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
@@ -8,8 +8,6 @@ dependencies = [
8
8
  "bs4>=0.0.2",
9
9
  "colorama>=0.4.6 ; sys_platform == 'win32'",
10
10
  "cssselect>=1.2.0",
11
- "httpx>=0.28.1",
12
- "ichrome>=4.0.4",
13
11
  "lxml>=5.3.0",
14
12
  "soupsieve>=2.6",
15
13
  "typer>=0.15.1",
@@ -17,9 +15,10 @@ dependencies = [
17
15
  # https://github.com/fastapi/typer/discussions/1215
18
16
  # https://github.com/fastapi/typer/pull/1145
19
17
  "click<8.2.0",
20
- "ast-grep-cli>=0.38.1",
21
- "tinycss2>=1.4.0",
18
+ "tree-sitter>=0.25.2",
22
19
  ]
20
+ # Для Windows нужен MSVC, для Linux/macOS — gcc/clang
21
+ # Install C/C++ compiler: https://wiki.python.org/moin/CompilingPython
23
22
 
24
23
  classifiers = [
25
24
  "Development Status :: 3 - Alpha",
@@ -43,33 +42,31 @@ Examples = "https://github.com/vypivshiy/selector_schema_codegen/examples"
43
42
 
44
43
 
45
44
  [project.scripts]
46
- ssc-gen = 'ssc_codegen.cli.main:main'
45
+ ssc-gen = 'ssc_codegen.main:main'
47
46
 
48
47
  [build-system]
49
- requires = ["hatchling"]
50
- build-backend = "hatchling.build"
48
+ requires = ["setuptools>=62.4.0", "wheel"]
49
+ build-backend = "setuptools.build_meta"
51
50
 
52
- [tool.hatch.build.targets.sdist]
53
- include = [
54
- "ssc_codegen/*",
55
- ]
51
+ [tool.setuptools.packages.find]
52
+ include = ["ssc_codegen*"]
53
+
54
+ [tool.setuptools.package-data]
55
+ ssc_codegen = ["py.typed"]
56
56
 
57
57
  [dependency-groups]
58
58
  dev = [
59
+ "bs4>=0.0.2",
59
60
  "coverage>=7.6.12",
60
61
  "httpx>=0.28.1",
62
+ "hypothesis>=6.151.9",
63
+ "lxml>=5.3.0",
61
64
  "mypy>=1.14.1",
62
- "parsel>=1.10.0",
65
+ "parsel>=1.9.1",
63
66
  "pytest>=8.3.4",
64
67
  "ruff>=0.9.3",
65
68
  "selectolax>=0.3.27",
66
69
  ]
67
- # python generated code tests
68
- tests = [
69
- "bs4>=0.0.2",
70
- "parsel>=1.10.0",
71
- "selectolax>=0.3.27",
72
- ]
73
70
 
74
71
  [tool.ruff]
75
72
  target-version = "py310"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,68 @@
1
+ """
2
+ Setup script for ssc_codegen with tree-sitter-kdl extension.
3
+ Requires C/C++ compiler (MSVC on Windows, gcc/clang on Linux/macOS).
4
+
5
+ Before installing, initialize the submodule:
6
+ git submodule update --init
7
+ """
8
+
9
+ from pathlib import Path
10
+ from setuptools import setup, Extension
11
+ from setuptools.command.build_ext import build_ext
12
+
13
+ # Relative paths from setup.py location
14
+ VENDOR_KDL_REL = Path("vendor") / "tree-sitter-kdl"
15
+ VENDOR_KDL_SRC_REL = VENDOR_KDL_REL / "src"
16
+ VENDOR_BINDING_C = VENDOR_KDL_REL / "bindings" / "python" / "tree_sitter_kdl" / "binding.c"
17
+
18
+
19
+ class KdlBuildExt(build_ext):
20
+ """Custom build_ext that builds tree-sitter-kdl from copied sources."""
21
+
22
+ def build_extension(self, ext: Extension):
23
+ # Compiler flags
24
+ if self.compiler.compiler_type != "msvc":
25
+ ext.extra_compile_args = ["-std=c11", "-fvisibility=hidden", "-O2"]
26
+ else:
27
+ ext.extra_compile_args = ["/std:c11", "/utf-8", "/O2"]
28
+
29
+ # Add scanner.c if present (relative to setup.py)
30
+ scanner_c = VENDOR_KDL_SRC_REL / "scanner.c"
31
+ if scanner_c.exists():
32
+ ext.sources.append(str(scanner_c))
33
+
34
+ # Py_LIMITED_API for abi3 wheels
35
+ if ext.py_limited_api:
36
+ ext.define_macros.append(("Py_LIMITED_API", "0x030A0000"))
37
+
38
+ super().build_extension(ext)
39
+
40
+
41
+ def get_kdl_extension():
42
+ """Create Extension for tree-sitter-kdl."""
43
+ parser_c = VENDOR_KDL_SRC_REL / "parser.c"
44
+ if not parser_c.exists():
45
+ raise RuntimeError(
46
+ f"tree-sitter-kdl sources not found at {VENDOR_KDL_SRC_REL}.\n"
47
+ "Initialize the git submodule before installing:\n"
48
+ " git submodule update --init"
49
+ )
50
+
51
+ if not VENDOR_BINDING_C.exists():
52
+ raise RuntimeError(f"binding.c not found at {VENDOR_BINDING_C}")
53
+
54
+ return Extension(
55
+ name="ssc_codegen.linter._binding",
56
+ sources=[str(VENDOR_BINDING_C), str(parser_c)],
57
+ include_dirs=[str(VENDOR_KDL_SRC_REL)],
58
+ define_macros=[
59
+ ("PY_SSIZE_T_CLEAN", None),
60
+ ("TREE_SITTER_HIDE_SYMBOLS", None),
61
+ ],
62
+ )
63
+
64
+
65
+ setup(
66
+ cmdclass={"build_ext": KdlBuildExt},
67
+ ext_modules=[get_kdl_extension()],
68
+ )
@@ -0,0 +1,27 @@
1
+ from pathlib import Path
2
+
3
+ from ssc_codegen.parser import Module, PARSER
4
+
5
+ _KDL_TEXT_ENCODING = "utf-8-sig"
6
+
7
+
8
+ def parse_ast(
9
+ src: str | None = None,
10
+ path: str | None = None,
11
+ *,
12
+ css_to_xpath: bool = False,
13
+ ) -> Module:
14
+ if not src and not path:
15
+ raise AttributeError("required src or path argument")
16
+ source_path: Path | None = None
17
+ if path:
18
+ source_path = Path(path).resolve()
19
+ src = source_path.read_text(encoding=_KDL_TEXT_ENCODING)
20
+ if not src:
21
+ raise AttributeError("required src or path argument")
22
+ module = PARSER.parse(src, source_path=source_path)
23
+ if css_to_xpath:
24
+ from ssc_codegen.document_utils import convert_css_to_xpath_module
25
+
26
+ convert_css_to_xpath_module(module)
27
+ return module
@@ -0,0 +1,102 @@
1
+ """Logging configuration for ssc_codegen.
2
+
3
+ Usage
4
+ -----
5
+ Import the logger in any submodule::
6
+
7
+ from ssc_codegen._logging import logger
8
+
9
+ To enable DEBUG output from the CLI, pass ``--verbose`` / ``-v`` flag,
10
+ or configure it manually::
11
+
12
+ import logging
13
+ logging.getLogger("ssc_codegen").setLevel(logging.DEBUG)
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import logging
19
+ import sys
20
+
21
+ # Windows ANSI backport
22
+ try:
23
+ import colorama
24
+
25
+ colorama.init(autoreset=False)
26
+ _COLORAMA_AVAILABLE = True
27
+ except ImportError:
28
+ _COLORAMA_AVAILABLE = False
29
+
30
+ # ANSI color codes (used on all platforms; colorama translates them on Windows)
31
+ _RESET = "\033[0m"
32
+ _BOLD = "\033[1m"
33
+ _COLORS: dict[int, str] = {
34
+ logging.DEBUG: "\033[36m", # cyan
35
+ logging.INFO: "\033[32m", # green
36
+ logging.WARNING: "\033[33m", # yellow
37
+ logging.ERROR: "\033[31m", # red
38
+ logging.CRITICAL: "\033[35m", # magenta
39
+ }
40
+
41
+
42
+ class _ColorFormatter(logging.Formatter):
43
+ """Formatter that wraps the level name in ANSI color codes."""
44
+
45
+ _FMT = "[{color}{bold}{level}{reset}] {name}: {message}"
46
+
47
+ def format(self, record: logging.LogRecord) -> str: # noqa: A003
48
+ color = _COLORS.get(record.levelno, "")
49
+ level = record.levelname
50
+ name = record.name
51
+ # format the message part the normal way (handles exc_info etc.)
52
+ record.message = record.getMessage()
53
+ if record.exc_info and not record.exc_text:
54
+ record.exc_text = self.formatException(record.exc_info)
55
+
56
+ msg = self._FMT.format(
57
+ color=color,
58
+ bold=_BOLD,
59
+ level=level,
60
+ reset=_RESET,
61
+ name=name,
62
+ message=record.message,
63
+ )
64
+ if record.exc_text:
65
+ msg = f"{msg}\n{record.exc_text}"
66
+ return msg
67
+
68
+
69
+ # Single named logger for the entire ssc_codegen package.
70
+ # All child loggers (parser, main, …) are children of this one,
71
+ # so a single ``logging.getLogger("ssc_codegen").setLevel(DEBUG)``
72
+ # enables everything at once.
73
+ logger = logging.getLogger("ssc_codegen")
74
+
75
+
76
+ def setup_debug_logging() -> None:
77
+ """Enable DEBUG-level logging to stderr for the ssc_codegen logger.
78
+
79
+ Called by the CLI when ``--verbose`` is passed.
80
+ Idempotent: calling multiple times is safe.
81
+ """
82
+ pkg_logger = logging.getLogger("ssc_codegen")
83
+ if pkg_logger.level > logging.DEBUG or pkg_logger.level == logging.NOTSET:
84
+ pkg_logger.setLevel(logging.DEBUG)
85
+
86
+ # Avoid adding duplicate handlers if already configured
87
+ if not any(
88
+ isinstance(h, logging.StreamHandler) for h in pkg_logger.handlers
89
+ ):
90
+ # Use colors only when stderr is a real TTY or colorama is available
91
+ use_color = _COLORAMA_AVAILABLE or (
92
+ hasattr(sys.stderr, "isatty") and sys.stderr.isatty()
93
+ )
94
+ handler = logging.StreamHandler()
95
+ handler.setLevel(logging.DEBUG)
96
+ formatter: logging.Formatter = (
97
+ _ColorFormatter()
98
+ if use_color
99
+ else logging.Formatter("[%(levelname)s] %(name)s: %(message)s")
100
+ )
101
+ handler.setFormatter(formatter)
102
+ pkg_logger.addHandler(handler)
@@ -0,0 +1,169 @@
1
+ """
2
+ AST nodes for the KDL Schema DSL.
3
+
4
+ Import everything from here:
5
+ from kdl_ast import Module, Field, CssSelect, ...
6
+ """
7
+ from .types import VariableType, StructType
8
+
9
+ from .base import Node
10
+
11
+ from .module import (
12
+ Module,
13
+ CodeStartHook,
14
+ CodeEndHook,
15
+ Docstring,
16
+ Imports,
17
+ Utilities,
18
+ )
19
+
20
+ from .typedef import TypeDef, TypeDefField
21
+
22
+ from .jsondef import JsonDef, JsonDefField
23
+
24
+ from .struct import (
25
+ Struct,
26
+ StructDocstring,
27
+ PreValidate,
28
+ Init,
29
+ InitField,
30
+ SplitDoc,
31
+ Key,
32
+ Value,
33
+ TableConfig,
34
+ TableRow,
35
+ TableMatchKey,
36
+ Field,
37
+ StartParse
38
+ )
39
+
40
+ from .selectors import (
41
+ CssSelect,
42
+ CssSelectAll,
43
+ XpathSelect,
44
+ XpathSelectAll,
45
+ CssRemove,
46
+ XpathRemove,
47
+ )
48
+
49
+ from .extract import Text, Raw, Attr
50
+
51
+ from .string import (
52
+ Trim,
53
+ Ltrim,
54
+ Rtrim,
55
+ NormalizeSpace,
56
+ RmPrefix,
57
+ RmSuffix,
58
+ RmPrefixSuffix,
59
+ Fmt,
60
+ Repl,
61
+ ReplMap,
62
+ Lower,
63
+ Upper,
64
+ Split,
65
+ Join,
66
+ Unescape,
67
+ )
68
+
69
+ from .regex import Re, ReAll, ReSub
70
+
71
+ from .array import Index, Slice, Len, Unique
72
+
73
+ from .cast import ToInt, ToFloat, ToBool, Jsonify, Nested
74
+
75
+ from .control import Self, Fallback, FallbackStart, FallbackEnd, Return
76
+
77
+ from .predicate_containers import Filter, Assert, Match
78
+
79
+ from .predicate_ops import (
80
+ PredEq,
81
+ PredNe,
82
+ PredGt,
83
+ PredLt,
84
+ PredGe,
85
+ PredLe,
86
+ PredRange,
87
+ PredStarts,
88
+ PredEnds,
89
+ PredContains,
90
+ PredIn,
91
+ PredRe,
92
+ PredReAny,
93
+ PredReAll,
94
+ PredCss,
95
+ PredXpath,
96
+ PredHasAttr,
97
+ PredCountEq,
98
+ PredCountGt,
99
+ PredCountLt,
100
+ PredAttrEnds,
101
+ PredAttrEq,
102
+ PredAttrNe,
103
+ PredAttrRe,
104
+ PredAttrStarts,
105
+ PredAttrContains,
106
+ PredTextContains,
107
+ PredTextEnds,
108
+ PredTextRe,
109
+ PredTextStarts,
110
+ LogicNot,
111
+ LogicAnd,
112
+ LogicOr,
113
+ )
114
+
115
+ from .transform import TransformDef, TransformTarget, TransformCall
116
+
117
+ __all__ = [
118
+ # types
119
+ "VariableType", "StructType",
120
+ # base
121
+ "Node",
122
+ # module
123
+ "Module", "CodeStartHook", "CodeEndHook",
124
+ "Docstring", "Imports", "Utilities",
125
+ # typedef
126
+ "TypeDef", "TypeDefField",
127
+ # jsondef
128
+ "JsonDef", "JsonDefField",
129
+ # struct
130
+ "Struct", "StructDocstring", "PreValidate",
131
+ "Init", "InitField", "SplitDoc",
132
+ "Key", "Value",
133
+ "TableConfig", "TableRow", "TableMatchKey",
134
+ "Field", "StartParse",
135
+ # selectors
136
+ "CssSelect", "CssSelectAll",
137
+ "XpathSelect", "XpathSelectAll",
138
+ "CssRemove", "XpathRemove",
139
+ # extract
140
+ "Text", "Raw", "Attr",
141
+ # string
142
+ "Trim", "Ltrim", "Rtrim", "NormalizeSpace",
143
+ "RmPrefix", "RmSuffix", "RmPrefixSuffix",
144
+ "Fmt", "Repl", "ReplMap",
145
+ "Lower", "Upper", "Split", "Join", "Unescape",
146
+ # regex
147
+ "Re", "ReAll", "ReSub",
148
+ # array
149
+ "Index", "Slice", "Len", "Unique",
150
+ # cast
151
+ "ToInt", "ToFloat", "ToBool", "Jsonify", "Nested",
152
+ # control
153
+ "Self", "Fallback", "FallbackStart", "FallbackEnd", "Return",
154
+ # predicate containers
155
+ "Filter", "Assert", "Match",
156
+ # predicate ops
157
+ "PredEq", "PredNe",
158
+ "PredGt", "PredLt", "PredGe", "PredLe", "PredRange",
159
+ "PredStarts", "PredEnds", "PredContains", "PredIn",
160
+ "PredRe", "PredReAny", "PredReAll",
161
+ "PredCss", "PredXpath", "PredHasAttr",
162
+ "PredAttrEq", "PredAttrNe",
163
+ "PredAttrStarts", "PredAttrEnds", "PredAttrContains", "PredAttrRe",
164
+ "PredTextStarts", "PredTextEnds", "PredTextContains", "PredTextRe",
165
+ "PredCountEq", "PredCountGt", "PredCountLt",
166
+ "LogicNot", "LogicAnd", "LogicOr",
167
+ # transform
168
+ "TransformDef", "TransformTarget", "TransformCall",
169
+ ]
@@ -0,0 +1,56 @@
1
+ from __future__ import annotations
2
+ from dataclasses import dataclass, field
3
+
4
+ from .base import Node
5
+ from .types import VariableType
6
+
7
+ # Index, First, Last, Slice accept LIST_AUTO / return AUTO or LIST_AUTO.
8
+ # Concrete types are resolved by the builder from the cursor type via
9
+ # VariableType.scalar / VariableType.as_list helpers.
10
+
11
+
12
+ @dataclass
13
+ class Index(Node):
14
+ """
15
+ Returns element at position i.
16
+ Negative index counts from end.
17
+ LIST_AUTO → AUTO (resolved by builder).
18
+ """
19
+
20
+ i: int = 0
21
+ accept: VariableType = field(default=VariableType.LIST_AUTO)
22
+ ret: VariableType = field(default=VariableType.AUTO)
23
+
24
+
25
+ @dataclass
26
+ class Slice(Node):
27
+ """
28
+ Returns sublist [start:end].
29
+ LIST_AUTO → LIST_AUTO (resolved by builder).
30
+ """
31
+
32
+ start: int = 0
33
+ end: int = 0
34
+ accept: VariableType = field(default=VariableType.LIST_AUTO)
35
+ ret: VariableType = field(default=VariableType.LIST_AUTO)
36
+
37
+
38
+ @dataclass
39
+ class Len(Node):
40
+ """Returns list length as INT."""
41
+
42
+ accept: VariableType = field(default=VariableType.LIST_AUTO)
43
+ ret: VariableType = field(default=VariableType.INT)
44
+
45
+
46
+ @dataclass
47
+ class Unique(Node):
48
+ """
49
+ Removes duplicate strings from list.
50
+ keep_order=True — preserves original order (default: False).
51
+ LIST_STRING → LIST_STRING.
52
+ """
53
+
54
+ keep_order: bool = False
55
+ accept: VariableType = field(default=VariableType.LIST_STRING)
56
+ ret: VariableType = field(default=VariableType.LIST_STRING)