ssc_codegen 0.15.3__tar.gz → 0.17.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. {ssc_codegen-0.15.3 → ssc_codegen-0.17.1}/.gitignore +5 -1
  2. ssc_codegen-0.17.1/PKG-INFO +155 -0
  3. ssc_codegen-0.17.1/README.md +124 -0
  4. {ssc_codegen-0.15.3 → ssc_codegen-0.17.1}/pyproject.toml +10 -14
  5. ssc_codegen-0.17.1/ssc_codegen/__init__.py +27 -0
  6. ssc_codegen-0.17.1/ssc_codegen/_logging.py +102 -0
  7. ssc_codegen-0.17.1/ssc_codegen/ast/__init__.py +174 -0
  8. ssc_codegen-0.17.1/ssc_codegen/ast/array.py +56 -0
  9. ssc_codegen-0.17.1/ssc_codegen/ast/base.py +24 -0
  10. ssc_codegen-0.17.1/ssc_codegen/ast/cast.py +72 -0
  11. ssc_codegen-0.17.1/ssc_codegen/ast/control.py +82 -0
  12. ssc_codegen-0.17.1/ssc_codegen/ast/extract.py +51 -0
  13. ssc_codegen-0.17.1/ssc_codegen/ast/helpers.py +7 -0
  14. ssc_codegen-0.17.1/ssc_codegen/ast/jsondef.py +37 -0
  15. ssc_codegen-0.17.1/ssc_codegen/ast/module.py +92 -0
  16. ssc_codegen-0.17.1/ssc_codegen/ast/predicate_containers.py +44 -0
  17. ssc_codegen-0.17.1/ssc_codegen/ast/predicate_ops.py +309 -0
  18. ssc_codegen-0.17.1/ssc_codegen/ast/regex.py +57 -0
  19. ssc_codegen-0.17.1/ssc_codegen/ast/selectors.py +55 -0
  20. ssc_codegen-0.17.1/ssc_codegen/ast/string.py +158 -0
  21. ssc_codegen-0.17.1/ssc_codegen/ast/struct.py +232 -0
  22. ssc_codegen-0.17.1/ssc_codegen/ast/transform.py +63 -0
  23. ssc_codegen-0.17.1/ssc_codegen/ast/typedef.py +41 -0
  24. ssc_codegen-0.17.1/ssc_codegen/ast/types.py +79 -0
  25. ssc_codegen-0.17.1/ssc_codegen/converters/base.py +327 -0
  26. ssc_codegen-0.17.1/ssc_codegen/converters/helpers.py +76 -0
  27. ssc_codegen-0.17.1/ssc_codegen/converters/js_pure.py +1445 -0
  28. ssc_codegen-0.17.1/ssc_codegen/converters/py_bs4.py +1384 -0
  29. ssc_codegen-0.17.1/ssc_codegen/converters/py_lxml.py +538 -0
  30. ssc_codegen-0.17.1/ssc_codegen/converters/py_parsel.py +475 -0
  31. ssc_codegen-0.17.1/ssc_codegen/converters/py_slax.py +428 -0
  32. ssc_codegen-0.17.1/ssc_codegen/document_utils.py +107 -0
  33. ssc_codegen-0.17.1/ssc_codegen/exceptions.py +11 -0
  34. ssc_codegen-0.17.1/ssc_codegen/health.py +418 -0
  35. ssc_codegen-0.17.1/ssc_codegen/kdl/__init__.py +35 -0
  36. ssc_codegen-0.17.1/ssc_codegen/kdl/parser.py +954 -0
  37. ssc_codegen-0.17.1/ssc_codegen/linter/__init__.py +26 -0
  38. ssc_codegen-0.17.1/ssc_codegen/linter/_kdl_lang.py +415 -0
  39. ssc_codegen-0.17.1/ssc_codegen/linter/base.py +818 -0
  40. ssc_codegen-0.17.1/ssc_codegen/linter/errors.py +78 -0
  41. ssc_codegen-0.17.1/ssc_codegen/linter/format_errors.py +285 -0
  42. ssc_codegen-0.17.1/ssc_codegen/linter/metadata.py +18 -0
  43. ssc_codegen-0.17.1/ssc_codegen/linter/navigation.py +262 -0
  44. ssc_codegen-0.17.1/ssc_codegen/linter/path.py +36 -0
  45. ssc_codegen-0.17.1/ssc_codegen/linter/rule_keywords.py +320 -0
  46. ssc_codegen-0.17.1/ssc_codegen/linter/rules.py +848 -0
  47. ssc_codegen-0.17.1/ssc_codegen/linter/rules_struct.py +571 -0
  48. ssc_codegen-0.17.1/ssc_codegen/linter/type_rules.py +523 -0
  49. ssc_codegen-0.17.1/ssc_codegen/linter/types.py +188 -0
  50. ssc_codegen-0.17.1/ssc_codegen/main.py +616 -0
  51. ssc_codegen-0.17.1/ssc_codegen/parser.py +2298 -0
  52. ssc_codegen-0.17.1/ssc_codegen/pseudo_selectors.py +39 -0
  53. ssc_codegen-0.17.1/ssc_codegen/regex_utils.py +204 -0
  54. ssc_codegen-0.17.1/ssc_codegen/selector_utils.py +8 -0
  55. ssc_codegen-0.15.3/PKG-INFO +0 -255
  56. ssc_codegen-0.15.3/README.md +0 -220
  57. ssc_codegen-0.15.3/ssc_codegen/__init__.py +0 -99
  58. ssc_codegen-0.15.3/ssc_codegen/_compat.py +0 -31
  59. ssc_codegen-0.15.3/ssc_codegen/ast_/__init__.py +0 -131
  60. ssc_codegen-0.15.3/ssc_codegen/ast_/base.py +0 -242
  61. ssc_codegen-0.15.3/ssc_codegen/ast_/nodes_array.py +0 -87
  62. ssc_codegen-0.15.3/ssc_codegen/ast_/nodes_cast.py +0 -154
  63. ssc_codegen-0.15.3/ssc_codegen/ast_/nodes_core.py +0 -591
  64. ssc_codegen-0.15.3/ssc_codegen/ast_/nodes_filter.py +0 -659
  65. ssc_codegen-0.15.3/ssc_codegen/ast_/nodes_selectors.py +0 -305
  66. ssc_codegen-0.15.3/ssc_codegen/ast_/nodes_string.py +0 -495
  67. ssc_codegen-0.15.3/ssc_codegen/ast_/nodes_validate.py +0 -207
  68. ssc_codegen-0.15.3/ssc_codegen/ast_build/__init__.py +0 -1
  69. ssc_codegen-0.15.3/ssc_codegen/ast_build/builder.py +0 -464
  70. ssc_codegen-0.15.3/ssc_codegen/ast_build/main.py +0 -94
  71. ssc_codegen-0.15.3/ssc_codegen/ast_build/utils.py +0 -155
  72. ssc_codegen-0.15.3/ssc_codegen/ast_grep_rules/js_rules.yml +0 -20
  73. ssc_codegen-0.15.3/ssc_codegen/ast_grep_rules/py_drop_prefix_suffix_backport.yml +0 -47
  74. ssc_codegen-0.15.3/ssc_codegen/ast_grep_rules/py_rules.yml +0 -20
  75. ssc_codegen-0.15.3/ssc_codegen/cli/__init__.py +0 -0
  76. ssc_codegen-0.15.3/ssc_codegen/cli/ast_grep.py +0 -10
  77. ssc_codegen-0.15.3/ssc_codegen/cli/cli_callbacks.py +0 -34
  78. ssc_codegen-0.15.3/ssc_codegen/cli/cli_utils.py +0 -84
  79. ssc_codegen-0.15.3/ssc_codegen/cli/code_callbacks.py +0 -32
  80. ssc_codegen-0.15.3/ssc_codegen/cli/consts.py +0 -61
  81. ssc_codegen-0.15.3/ssc_codegen/cli/main.py +0 -643
  82. ssc_codegen-0.15.3/ssc_codegen/cli/runtime_parse_runners.py +0 -72
  83. ssc_codegen-0.15.3/ssc_codegen/compiler.py +0 -113
  84. ssc_codegen-0.15.3/ssc_codegen/converters/__init__.py +0 -0
  85. ssc_codegen-0.15.3/ssc_codegen/converters/base.py +0 -577
  86. ssc_codegen-0.15.3/ssc_codegen/converters/go_goquery.py +0 -2022
  87. ssc_codegen-0.15.3/ssc_codegen/converters/helpers.py +0 -257
  88. ssc_codegen-0.15.3/ssc_codegen/converters/js_pure.py +0 -1534
  89. ssc_codegen-0.15.3/ssc_codegen/converters/lua_htmlparser.py +0 -1510
  90. ssc_codegen-0.15.3/ssc_codegen/converters/py_base.py +0 -1359
  91. ssc_codegen-0.15.3/ssc_codegen/converters/py_bs4.py +0 -504
  92. ssc_codegen-0.15.3/ssc_codegen/converters/py_lxml.py +0 -551
  93. ssc_codegen-0.15.3/ssc_codegen/converters/py_parsel.py +0 -533
  94. ssc_codegen-0.15.3/ssc_codegen/converters/py_selectolax.py +0 -501
  95. ssc_codegen-0.15.3/ssc_codegen/converters/templates/__init__.py +0 -3
  96. ssc_codegen-0.15.3/ssc_codegen/converters/templates/go_goquery.py +0 -473
  97. ssc_codegen-0.15.3/ssc_codegen/converters/templates/js_pure.py +0 -38
  98. ssc_codegen-0.15.3/ssc_codegen/converters/templates/lua_base.py +0 -710
  99. ssc_codegen-0.15.3/ssc_codegen/converters/templates/lua_css_compat.py +0 -157
  100. ssc_codegen-0.15.3/ssc_codegen/converters/templates/lua_re_compat.py +0 -517
  101. ssc_codegen-0.15.3/ssc_codegen/converters/templates/py_base.py +0 -60
  102. ssc_codegen-0.15.3/ssc_codegen/document.py +0 -2661
  103. ssc_codegen-0.15.3/ssc_codegen/document_utlis.py +0 -201
  104. ssc_codegen-0.15.3/ssc_codegen/json_struct.py +0 -272
  105. ssc_codegen-0.15.3/ssc_codegen/json_to_scc.py +0 -180
  106. ssc_codegen-0.15.3/ssc_codegen/logs.py +0 -49
  107. ssc_codegen-0.15.3/ssc_codegen/pseudo_selectors.py +0 -70
  108. ssc_codegen-0.15.3/ssc_codegen/schema.py +0 -368
  109. ssc_codegen-0.15.3/ssc_codegen/selector_utils.py +0 -78
  110. ssc_codegen-0.15.3/ssc_codegen/static_checker/__init__.py +0 -62
  111. ssc_codegen-0.15.3/ssc_codegen/static_checker/base.py +0 -91
  112. ssc_codegen-0.15.3/ssc_codegen/static_checker/callbacks.py +0 -484
  113. ssc_codegen-0.15.3/ssc_codegen/str_utils.py +0 -230
  114. ssc_codegen-0.15.3/ssc_codegen/tokens.py +0 -291
  115. ssc_codegen-0.15.3/ssc_codegen/transform.py +0 -133
  116. {ssc_codegen-0.15.3 → ssc_codegen-0.17.1}/LICENSE +0 -0
@@ -3,8 +3,12 @@ __pycache__/
3
3
  *.py[cod]
4
4
  *$py.class
5
5
 
6
- # C extensions
6
+ # C extensions / native shared libs
7
7
  *.so
8
+ *.dll
9
+ *.dylib
10
+ *.exp
11
+ *.lib
8
12
 
9
13
  # Distribution / packaging
10
14
  .Python
@@ -0,0 +1,155 @@
1
+ Metadata-Version: 2.4
2
+ Name: ssc_codegen
3
+ Version: 0.17.1
4
+ Summary: Python-dsl code converter to html parser for web scraping
5
+ Project-URL: Documentation, https://github.com/vypivshiy/selector_schema_codegen#readme
6
+ Project-URL: Issues, https://github.com/vypivshiy/selector_schema_codegen/issues
7
+ Project-URL: Source, https://github.com/vypivshiy/selector_schema_codegen
8
+ Project-URL: Examples, https://github.com/vypivshiy/selector_schema_codegen/examples
9
+ License-File: LICENSE
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Environment :: Console
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Topic :: Software Development :: Code Generators
19
+ Classifier: Topic :: Text Processing :: Markup :: HTML
20
+ Classifier: Topic :: Utilities
21
+ Requires-Python: >=3.10
22
+ Requires-Dist: bs4>=0.0.2
23
+ Requires-Dist: click<8.2.0
24
+ Requires-Dist: colorama>=0.4.6; sys_platform == 'win32'
25
+ Requires-Dist: cssselect>=1.2.0
26
+ Requires-Dist: lxml>=5.3.0
27
+ Requires-Dist: soupsieve>=2.6
28
+ Requires-Dist: typer>=0.15.1
29
+ Requires-Dist: typing-extensions; python_version < '3.11'
30
+ Description-Content-Type: text/markdown
31
+
32
+ # ssc-codegen
33
+
34
+ Code generator for web scraping parsers. Describe HTML extraction rules in a declarative KDL 2.0 DSL, then generate ready-to-use parser code for multiple languages and libraries.
35
+
36
+ ```
37
+ .kdl schema --> [kdl parser] --> AST --> [linter] --> [converter] --> output code
38
+ ```
39
+
40
+ ## Features
41
+
42
+ - Declarative DSL based on KDL 2.0 syntax
43
+ - Static type checking and linting before code generation
44
+ - Multiple output targets: Python (bs4, lxml, parsel, selectolax), JavaScript (DOM API)
45
+ - Struct types: `item`, `list`, `dict`, `table`, `flat`
46
+ - LLM-friendly: system prompt + linter loop for AI-assisted schema generation
47
+
48
+ ## Install
49
+
50
+ ```bash
51
+ uv tool install ssc_codegen
52
+ ```
53
+
54
+ ## Quick example
55
+
56
+ `books.kdl`:
57
+
58
+ ```kdl
59
+ struct Book type=list {
60
+ @split-doc { css-all ".product-card" }
61
+
62
+ title { css ".title"; text }
63
+ price { css ".price"; text; re #"(\d+\.\d+)"#; to-float }
64
+ url { css "a[href]"; attr "href"; fallback #null }
65
+ }
66
+ ```
67
+
68
+ Generate Python parser:
69
+
70
+ ```bash
71
+ ssc-gen generate books.kdl -t py-bs4 -o ./output
72
+ ```
73
+
74
+ ## Usage
75
+
76
+ ### Generate code
77
+
78
+ ```bash
79
+ # single file
80
+ ssc-gen generate schema.kdl -t py-bs4 -o ./output
81
+
82
+ # all .kdl files in a directory
83
+ ssc-gen generate examples/ -t js-pure -o ./output
84
+
85
+ # with custom package name (for Go and other targets)
86
+ ssc-gen generate schema.kdl -t go-goquery -o ./parsers --package scraper
87
+ ```
88
+
89
+ Targets: `py-bs4`, `py-lxml`, `py-parsel`, `py-slax`, `js-pure`
90
+
91
+ ### Lint schemas
92
+
93
+ ```bash
94
+ # human-readable output
95
+ ssc-gen check schema.kdl
96
+
97
+ # JSON output (for LLM pipelines)
98
+ ssc-gen check schema.kdl -f json
99
+
100
+ # check all files in a directory
101
+ ssc-gen check examples/
102
+ ```
103
+
104
+ ### Test schema against HTML
105
+
106
+ ```bash
107
+ # from file
108
+ ssc-gen run examples/booksToScrape.kdl:MainCatalogue -t py-bs4 -i page.html
109
+
110
+ # from stdin
111
+ curl https://books.toscrape.com/ | ssc-gen run examples/booksToScrape.kdl:MainCatalogue -t py-bs4
112
+ ```
113
+
114
+ ### Health check (verify selectors match elements)
115
+
116
+ ```bash
117
+ # from file
118
+ ssc-gen health examples/booksToScrape.kdl:MainCatalogue -i page.html
119
+
120
+ # from stdin
121
+ curl https://books.toscrape.com/ | ssc-gen health examples/booksToScrape.kdl:MainCatalogue
122
+ ```
123
+
124
+ ## Documentation
125
+
126
+ - [Quick start](docs2/guide.md)
127
+ - [Syntax and file structure](docs2/syntax.md)
128
+ - [Type system](docs2/types.md)
129
+ - [Pipeline operations](docs2/operations.md)
130
+ - [Predicates and logic](docs2/predicates.md)
131
+ - [JSON schemas and jsonify](docs2/json.md)
132
+ - [Transforms and dsl blocks](docs2/transforms.md)
133
+ - [LLM-compact reference](docs2/llm.txt) -- full DSL spec in one file for LLM context
134
+ - [Examples](examples/)
135
+
136
+ ## LLM integration
137
+
138
+ LLM agents can generate and validate `.kdl` schemas automatically using the linter feedback loop.
139
+
140
+ ### In chats (ChatGPT, Claude, etc.)
141
+
142
+ Use [SYSTEM_PROMPT.md](SYSTEM_PROMPT.md) as system prompt. After generation, run `ssc-gen check -f json` and send errors back to the LLM for correction.
143
+
144
+ ### In AI-powered IDEs (Claude Code, Cursor, etc.)
145
+
146
+ Use the [kdl-schema-dsl](.agents/skills/kdl-schema-dsl) skill for automatic generation, validation, and iteration.
147
+
148
+ ## Development
149
+
150
+ ```bash
151
+ uv sync # install dependencies
152
+ uv build --wheel # build wheel
153
+ uv run pytest # run tests
154
+ uv run ruff check ssc_codegen/
155
+ ```
@@ -0,0 +1,124 @@
1
+ # ssc-codegen
2
+
3
+ Code generator for web scraping parsers. Describe HTML extraction rules in a declarative KDL 2.0 DSL, then generate ready-to-use parser code for multiple languages and libraries.
4
+
5
+ ```
6
+ .kdl schema --> [kdl parser] --> AST --> [linter] --> [converter] --> output code
7
+ ```
8
+
9
+ ## Features
10
+
11
+ - Declarative DSL based on KDL 2.0 syntax
12
+ - Static type checking and linting before code generation
13
+ - Multiple output targets: Python (bs4, lxml, parsel, selectolax), JavaScript (DOM API)
14
+ - Struct types: `item`, `list`, `dict`, `table`, `flat`
15
+ - LLM-friendly: system prompt + linter loop for AI-assisted schema generation
16
+
17
+ ## Install
18
+
19
+ ```bash
20
+ uv tool install ssc_codegen
21
+ ```
22
+
23
+ ## Quick example
24
+
25
+ `books.kdl`:
26
+
27
+ ```kdl
28
+ struct Book type=list {
29
+ @split-doc { css-all ".product-card" }
30
+
31
+ title { css ".title"; text }
32
+ price { css ".price"; text; re #"(\d+\.\d+)"#; to-float }
33
+ url { css "a[href]"; attr "href"; fallback #null }
34
+ }
35
+ ```
36
+
37
+ Generate Python parser:
38
+
39
+ ```bash
40
+ ssc-gen generate books.kdl -t py-bs4 -o ./output
41
+ ```
42
+
43
+ ## Usage
44
+
45
+ ### Generate code
46
+
47
+ ```bash
48
+ # single file
49
+ ssc-gen generate schema.kdl -t py-bs4 -o ./output
50
+
51
+ # all .kdl files in a directory
52
+ ssc-gen generate examples/ -t js-pure -o ./output
53
+
54
+ # with custom package name (for Go and other targets)
55
+ ssc-gen generate schema.kdl -t go-goquery -o ./parsers --package scraper
56
+ ```
57
+
58
+ Targets: `py-bs4`, `py-lxml`, `py-parsel`, `py-slax`, `js-pure`
59
+
60
+ ### Lint schemas
61
+
62
+ ```bash
63
+ # human-readable output
64
+ ssc-gen check schema.kdl
65
+
66
+ # JSON output (for LLM pipelines)
67
+ ssc-gen check schema.kdl -f json
68
+
69
+ # check all files in a directory
70
+ ssc-gen check examples/
71
+ ```
72
+
73
+ ### Test schema against HTML
74
+
75
+ ```bash
76
+ # from file
77
+ ssc-gen run examples/booksToScrape.kdl:MainCatalogue -t py-bs4 -i page.html
78
+
79
+ # from stdin
80
+ curl https://books.toscrape.com/ | ssc-gen run examples/booksToScrape.kdl:MainCatalogue -t py-bs4
81
+ ```
82
+
83
+ ### Health check (verify selectors match elements)
84
+
85
+ ```bash
86
+ # from file
87
+ ssc-gen health examples/booksToScrape.kdl:MainCatalogue -i page.html
88
+
89
+ # from stdin
90
+ curl https://books.toscrape.com/ | ssc-gen health examples/booksToScrape.kdl:MainCatalogue
91
+ ```
92
+
93
+ ## Documentation
94
+
95
+ - [Quick start](docs2/guide.md)
96
+ - [Syntax and file structure](docs2/syntax.md)
97
+ - [Type system](docs2/types.md)
98
+ - [Pipeline operations](docs2/operations.md)
99
+ - [Predicates and logic](docs2/predicates.md)
100
+ - [JSON schemas and jsonify](docs2/json.md)
101
+ - [Transforms and dsl blocks](docs2/transforms.md)
102
+ - [LLM-compact reference](docs2/llm.txt) -- full DSL spec in one file for LLM context
103
+ - [Examples](examples/)
104
+
105
+ ## LLM integration
106
+
107
+ LLM agents can generate and validate `.kdl` schemas automatically using the linter feedback loop.
108
+
109
+ ### In chats (ChatGPT, Claude, etc.)
110
+
111
+ Use [SYSTEM_PROMPT.md](SYSTEM_PROMPT.md) as system prompt. After generation, run `ssc-gen check -f json` and send errors back to the LLM for correction.
112
+
113
+ ### In AI-powered IDEs (Claude Code, Cursor, etc.)
114
+
115
+ Use the [kdl-schema-dsl](.agents/skills/kdl-schema-dsl) skill for automatic generation, validation, and iteration.
116
+
117
+ ## Development
118
+
119
+ ```bash
120
+ uv sync # install dependencies
121
+ uv build --wheel # build wheel
122
+ uv run pytest # run tests
123
+ uv run ruff check ssc_codegen/
124
+ ```
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "ssc_codegen"
3
- version = "0.15.3"
3
+ version = "0.17.1"
4
4
  description = "Python-dsl code converter to html parser for web scraping "
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
@@ -8,8 +8,6 @@ dependencies = [
8
8
  "bs4>=0.0.2",
9
9
  "colorama>=0.4.6 ; sys_platform == 'win32'",
10
10
  "cssselect>=1.2.0",
11
- "httpx>=0.28.1",
12
- "ichrome>=4.0.4",
13
11
  "lxml>=5.3.0",
14
12
  "soupsieve>=2.6",
15
13
  "typer>=0.15.1",
@@ -17,8 +15,6 @@ dependencies = [
17
15
  # https://github.com/fastapi/typer/discussions/1215
18
16
  # https://github.com/fastapi/typer/pull/1145
19
17
  "click<8.2.0",
20
- "ast-grep-cli>=0.38.1",
21
- "tinycss2>=1.4.0",
22
18
  ]
23
19
 
24
20
  classifiers = [
@@ -43,7 +39,7 @@ Examples = "https://github.com/vypivshiy/selector_schema_codegen/examples"
43
39
 
44
40
 
45
41
  [project.scripts]
46
- ssc-gen = 'ssc_codegen.cli.main:main'
42
+ ssc-gen = 'ssc_codegen.main:main'
47
43
 
48
44
  [build-system]
49
45
  requires = ["hatchling"]
@@ -51,25 +47,25 @@ build-backend = "hatchling.build"
51
47
 
52
48
  [tool.hatch.build.targets.sdist]
53
49
  include = [
54
- "ssc_codegen/*",
50
+ "ssc_codegen/**",
55
51
  ]
56
52
 
53
+ [tool.hatch.build.targets.wheel]
54
+ packages = ["ssc_codegen"]
55
+
57
56
  [dependency-groups]
58
57
  dev = [
58
+ "bs4>=0.0.2",
59
59
  "coverage>=7.6.12",
60
60
  "httpx>=0.28.1",
61
+ "hypothesis>=6.151.9",
62
+ "lxml>=5.3.0",
61
63
  "mypy>=1.14.1",
62
- "parsel>=1.10.0",
64
+ "parsel>=1.9.1",
63
65
  "pytest>=8.3.4",
64
66
  "ruff>=0.9.3",
65
67
  "selectolax>=0.3.27",
66
68
  ]
67
- # python generated code tests
68
- tests = [
69
- "bs4>=0.0.2",
70
- "parsel>=1.10.0",
71
- "selectolax>=0.3.27",
72
- ]
73
69
 
74
70
  [tool.ruff]
75
71
  target-version = "py310"
@@ -0,0 +1,27 @@
1
+ from pathlib import Path
2
+
3
+ from ssc_codegen.parser import Module, PARSER
4
+
5
+ _KDL_TEXT_ENCODING = "utf-8-sig"
6
+
7
+
8
+ def parse_ast(
9
+ src: str | None = None,
10
+ path: str | None = None,
11
+ *,
12
+ css_to_xpath: bool = False,
13
+ ) -> Module:
14
+ if not src and not path:
15
+ raise AttributeError("required src or path argument")
16
+ source_path: Path | None = None
17
+ if path:
18
+ source_path = Path(path).resolve()
19
+ src = source_path.read_text(encoding=_KDL_TEXT_ENCODING)
20
+ if not src:
21
+ raise AttributeError("required src or path argument")
22
+ module = PARSER.parse(src, source_path=source_path)
23
+ if css_to_xpath:
24
+ from ssc_codegen.document_utils import convert_css_to_xpath_module
25
+
26
+ convert_css_to_xpath_module(module)
27
+ return module
@@ -0,0 +1,102 @@
1
+ """Logging configuration for ssc_codegen.
2
+
3
+ Usage
4
+ -----
5
+ Import the logger in any submodule::
6
+
7
+ from ssc_codegen._logging import logger
8
+
9
+ To enable DEBUG output from the CLI, pass ``--verbose`` / ``-v`` flag,
10
+ or configure it manually::
11
+
12
+ import logging
13
+ logging.getLogger("ssc_codegen").setLevel(logging.DEBUG)
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import logging
19
+ import sys
20
+
21
+ # Windows ANSI backport
22
+ try:
23
+ import colorama
24
+
25
+ colorama.init(autoreset=False)
26
+ _COLORAMA_AVAILABLE = True
27
+ except ImportError:
28
+ _COLORAMA_AVAILABLE = False
29
+
30
+ # ANSI color codes (used on all platforms; colorama translates them on Windows)
31
+ _RESET = "\033[0m"
32
+ _BOLD = "\033[1m"
33
+ _COLORS: dict[int, str] = {
34
+ logging.DEBUG: "\033[36m", # cyan
35
+ logging.INFO: "\033[32m", # green
36
+ logging.WARNING: "\033[33m", # yellow
37
+ logging.ERROR: "\033[31m", # red
38
+ logging.CRITICAL: "\033[35m", # magenta
39
+ }
40
+
41
+
42
+ class _ColorFormatter(logging.Formatter):
43
+ """Formatter that wraps the level name in ANSI color codes."""
44
+
45
+ _FMT = "[{color}{bold}{level}{reset}] {name}: {message}"
46
+
47
+ def format(self, record: logging.LogRecord) -> str: # noqa: A003
48
+ color = _COLORS.get(record.levelno, "")
49
+ level = record.levelname
50
+ name = record.name
51
+ # format the message part the normal way (handles exc_info etc.)
52
+ record.message = record.getMessage()
53
+ if record.exc_info and not record.exc_text:
54
+ record.exc_text = self.formatException(record.exc_info)
55
+
56
+ msg = self._FMT.format(
57
+ color=color,
58
+ bold=_BOLD,
59
+ level=level,
60
+ reset=_RESET,
61
+ name=name,
62
+ message=record.message,
63
+ )
64
+ if record.exc_text:
65
+ msg = f"{msg}\n{record.exc_text}"
66
+ return msg
67
+
68
+
69
+ # Single named logger for the entire ssc_codegen package.
70
+ # All child loggers (parser, main, …) are children of this one,
71
+ # so a single ``logging.getLogger("ssc_codegen").setLevel(DEBUG)``
72
+ # enables everything at once.
73
+ logger = logging.getLogger("ssc_codegen")
74
+
75
+
76
+ def setup_debug_logging() -> None:
77
+ """Enable DEBUG-level logging to stderr for the ssc_codegen logger.
78
+
79
+ Called by the CLI when ``--verbose`` is passed.
80
+ Idempotent: calling multiple times is safe.
81
+ """
82
+ pkg_logger = logging.getLogger("ssc_codegen")
83
+ if pkg_logger.level > logging.DEBUG or pkg_logger.level == logging.NOTSET:
84
+ pkg_logger.setLevel(logging.DEBUG)
85
+
86
+ # Avoid adding duplicate handlers if already configured
87
+ if not any(
88
+ isinstance(h, logging.StreamHandler) for h in pkg_logger.handlers
89
+ ):
90
+ # Use colors only when stderr is a real TTY or colorama is available
91
+ use_color = _COLORAMA_AVAILABLE or (
92
+ hasattr(sys.stderr, "isatty") and sys.stderr.isatty()
93
+ )
94
+ handler = logging.StreamHandler()
95
+ handler.setLevel(logging.DEBUG)
96
+ formatter: logging.Formatter = (
97
+ _ColorFormatter()
98
+ if use_color
99
+ else logging.Formatter("[%(levelname)s] %(name)s: %(message)s")
100
+ )
101
+ handler.setFormatter(formatter)
102
+ pkg_logger.addHandler(handler)
@@ -0,0 +1,174 @@
1
+ """
2
+ AST nodes for the KDL Schema DSL.
3
+
4
+ Import everything from here:
5
+ from kdl_ast import Module, Field, CssSelect, ...
6
+ """
7
+ from .types import VariableType, StructType
8
+
9
+ from .base import Node
10
+
11
+ from .module import (
12
+ Module,
13
+ CodeStartHook,
14
+ CodeEndHook,
15
+ Docstring,
16
+ Imports,
17
+ Utilities,
18
+ )
19
+
20
+ from .typedef import TypeDef, TypeDefField
21
+
22
+ from .jsondef import JsonDef, JsonDefField
23
+
24
+ from .struct import (
25
+ Struct,
26
+ StructDocstring,
27
+ PreValidate,
28
+ Init,
29
+ InitField,
30
+ SplitDoc,
31
+ Key,
32
+ Value,
33
+ TableConfig,
34
+ TableRow,
35
+ TableMatchKey,
36
+ Field,
37
+ StartParse
38
+ )
39
+
40
+ from .selectors import (
41
+ CssSelect,
42
+ CssSelectAll,
43
+ XpathSelect,
44
+ XpathSelectAll,
45
+ CssRemove,
46
+ XpathRemove,
47
+ )
48
+
49
+ from .extract import Text, Raw, Attr
50
+
51
+ from .string import (
52
+ Trim,
53
+ Ltrim,
54
+ Rtrim,
55
+ NormalizeSpace,
56
+ RmPrefix,
57
+ RmSuffix,
58
+ RmPrefixSuffix,
59
+ Fmt,
60
+ Repl,
61
+ ReplMap,
62
+ Lower,
63
+ Upper,
64
+ Split,
65
+ Join,
66
+ Unescape,
67
+ )
68
+
69
+ from .regex import Re, ReAll, ReSub
70
+
71
+ from .array import Index, Slice, Len, Unique
72
+
73
+ from .cast import ToInt, ToFloat, ToBool, Jsonify, Nested
74
+
75
+ from .control import Self, Fallback, FallbackStart, FallbackEnd, Return
76
+
77
+ from .predicate_containers import Filter, Assert, Match
78
+
79
+ from .predicate_ops import (
80
+ PredEq,
81
+ PredNe,
82
+ PredGt,
83
+ PredLt,
84
+ PredGe,
85
+ PredLe,
86
+ PredRange,
87
+ PredStarts,
88
+ PredEnds,
89
+ PredContains,
90
+ PredIn,
91
+ PredRe,
92
+ PredReAny,
93
+ PredReAll,
94
+ PredCss,
95
+ PredXpath,
96
+ PredHasAttr,
97
+ PredCountEq,
98
+ PredCountGt,
99
+ PredCountLt,
100
+ PredCountNe,
101
+ PredCountGe,
102
+ PredCountLe,
103
+ PredCountRange,
104
+ PredAttrEnds,
105
+ PredAttrEq,
106
+ PredAttrNe,
107
+ PredAttrRe,
108
+ PredAttrStarts,
109
+ PredAttrContains,
110
+ PredTextContains,
111
+ PredTextEnds,
112
+ PredTextRe,
113
+ PredTextStarts,
114
+ LogicNot,
115
+ LogicAnd,
116
+ LogicOr,
117
+ )
118
+
119
+ from .transform import TransformDef, TransformTarget, TransformCall
120
+
121
+ __all__ = [
122
+ # types
123
+ "VariableType", "StructType",
124
+ # base
125
+ "Node",
126
+ # module
127
+ "Module", "CodeStartHook", "CodeEndHook",
128
+ "Docstring", "Imports", "Utilities",
129
+ # typedef
130
+ "TypeDef", "TypeDefField",
131
+ # jsondef
132
+ "JsonDef", "JsonDefField",
133
+ # struct
134
+ "Struct", "StructDocstring", "PreValidate",
135
+ "Init", "InitField", "SplitDoc",
136
+ "Key", "Value",
137
+ "TableConfig", "TableRow", "TableMatchKey",
138
+ "Field", "StartParse",
139
+ # selectors
140
+ "CssSelect", "CssSelectAll",
141
+ "XpathSelect", "XpathSelectAll",
142
+ "CssRemove", "XpathRemove",
143
+ # extract
144
+ "Text", "Raw", "Attr",
145
+ # string
146
+ "Trim", "Ltrim", "Rtrim", "NormalizeSpace",
147
+ "RmPrefix", "RmSuffix", "RmPrefixSuffix",
148
+ "Fmt", "Repl", "ReplMap",
149
+ "Lower", "Upper", "Split", "Join", "Unescape",
150
+ # regex
151
+ "Re", "ReAll", "ReSub",
152
+ # array
153
+ "Index", "Slice", "Len", "Unique",
154
+ # cast
155
+ "ToInt", "ToFloat", "ToBool", "Jsonify", "Nested",
156
+ # control
157
+ "Self", "Fallback", "FallbackStart", "FallbackEnd", "Return",
158
+ # predicate containers
159
+ "Filter", "Assert", "Match",
160
+ # predicate ops
161
+ "PredEq", "PredNe",
162
+ "PredGt", "PredLt", "PredGe", "PredLe", "PredRange",
163
+ "PredStarts", "PredEnds", "PredContains", "PredIn",
164
+ "PredRe", "PredReAny", "PredReAll",
165
+ "PredCss", "PredXpath", "PredHasAttr",
166
+ "PredAttrEq", "PredAttrNe",
167
+ "PredAttrStarts", "PredAttrEnds", "PredAttrContains", "PredAttrRe",
168
+ "PredTextStarts", "PredTextEnds", "PredTextContains", "PredTextRe",
169
+ "PredCountEq", "PredCountGt", "PredCountLt",
170
+ "PredCountNe", "PredCountGe", "PredCountLe", "PredCountRange",
171
+ "LogicNot", "LogicAnd", "LogicOr",
172
+ # transform
173
+ "TransformDef", "TransformTarget", "TransformCall",
174
+ ]