ssc_codegen 0.17.1__tar.gz → 0.18.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/.gitignore +147 -147
  2. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/LICENSE +20 -20
  3. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/PKG-INFO +2 -2
  4. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/README.md +124 -124
  5. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/pyproject.toml +112 -112
  6. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/__init__.py +27 -27
  7. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/_logging.py +102 -102
  8. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/ast/__init__.py +175 -174
  9. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/ast/array.py +56 -56
  10. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/ast/base.py +24 -24
  11. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/ast/cast.py +72 -72
  12. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/ast/control.py +82 -82
  13. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/ast/extract.py +51 -51
  14. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/ast/helpers.py +7 -7
  15. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/ast/jsondef.py +37 -37
  16. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/ast/module.py +92 -92
  17. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/ast/predicate_containers.py +44 -44
  18. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/ast/predicate_ops.py +309 -309
  19. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/ast/regex.py +57 -57
  20. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/ast/selectors.py +55 -55
  21. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/ast/string.py +158 -158
  22. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/ast/struct.py +283 -232
  23. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/ast/transform.py +63 -63
  24. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/ast/typedef.py +41 -41
  25. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/ast/types.py +79 -79
  26. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/converters/base.py +331 -327
  27. ssc_codegen-0.18.0/ssc_codegen/converters/go_goquery.py +1872 -0
  28. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/converters/helpers.py +76 -76
  29. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/converters/js_pure.py +1610 -1445
  30. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/converters/py_bs4.py +1509 -1384
  31. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/converters/py_lxml.py +548 -538
  32. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/converters/py_parsel.py +478 -475
  33. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/converters/py_slax.py +433 -428
  34. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/document_utils.py +107 -107
  35. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/exceptions.py +11 -11
  36. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/health.py +426 -418
  37. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/kdl/__init__.py +35 -35
  38. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/kdl/parser.py +1366 -954
  39. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/linter/__init__.py +26 -26
  40. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/linter/_kdl_lang.py +435 -415
  41. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/linter/base.py +822 -818
  42. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/linter/errors.py +78 -78
  43. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/linter/format_errors.py +285 -285
  44. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/linter/metadata.py +18 -18
  45. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/linter/navigation.py +262 -262
  46. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/linter/path.py +36 -36
  47. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/linter/rule_keywords.py +321 -320
  48. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/linter/rules.py +848 -848
  49. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/linter/rules_struct.py +611 -571
  50. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/linter/type_rules.py +523 -523
  51. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/linter/types.py +188 -188
  52. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/main.py +668 -616
  53. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/parser.py +2357 -2298
  54. ssc_codegen-0.18.0/ssc_codegen/parsers/__init__.py +23 -0
  55. ssc_codegen-0.18.0/ssc_codegen/parsers/curl.py +240 -0
  56. ssc_codegen-0.18.0/ssc_codegen/parsers/http.py +218 -0
  57. ssc_codegen-0.18.0/ssc_codegen/parsers/spec.py +327 -0
  58. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/pseudo_selectors.py +39 -39
  59. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/regex_utils.py +204 -204
  60. {ssc_codegen-0.17.1 → ssc_codegen-0.18.0}/ssc_codegen/selector_utils.py +8 -8
@@ -1,147 +1,147 @@
1
- # Byte-compiled / optimized / DLL files
2
- __pycache__/
3
- *.py[cod]
4
- *$py.class
5
-
6
- # C extensions / native shared libs
7
- *.so
8
- *.dll
9
- *.dylib
10
- *.exp
11
- *.lib
12
-
13
- # Distribution / packaging
14
- .Python
15
- build/
16
- develop-eggs/
17
- dist/
18
- downloads/
19
- eggs/
20
- .eggs/
21
- lib/
22
- lib64/
23
- parts/
24
- sdist/
25
- var/
26
- wheels/
27
- pip-wheel-metadata/
28
- share/python-wheels/
29
- .ruff_cache/
30
- *.egg-info/
31
- .installed.cfg
32
- *.egg
33
- MANIFEST
34
-
35
- # PyInstaller
36
- # Usually these files are written by a python script from a template
37
- # before PyInstaller builds the exe, so as to inject date/other infos into it.
38
- *.manifest
39
- *.spec
40
-
41
- # Installer logs
42
- pip-log.txt
43
- pip-delete-this-directory.txt
44
-
45
- # Unit tests / coverage reports
46
- htmlcov/
47
- .tox/
48
- .nox/
49
- .coverage
50
- .coverage.*
51
- .cache
52
- nosetests.xml
53
- coverage.xml
54
- *.cover
55
- *.py,cover
56
- .hypothesis/
57
- .pytest_cache/
58
-
59
- # Translations
60
- *.mo
61
- *.pot
62
-
63
- # Django stuff:
64
- *.log
65
- local_settings.py
66
- db.sqlite3
67
- db.sqlite3-journal
68
-
69
- # Flask stuff:
70
- instance/
71
- .webassets-cache
72
-
73
- # Scrapy stuff:
74
- .scrapy
75
-
76
- # Sphinx documentation
77
- docs/source/_build/
78
-
79
- # PyBuilder
80
- target/
81
-
82
- # Jupyter Notebook
83
- .ipynb_checkpoints
84
-
85
- # IPython
86
- profile_default/
87
- ipython_config.py
88
-
89
- # pyenv
90
- .python-version
91
-
92
- # pipenv
93
- # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
94
- # However, in case of collaboration, if having platform-specific dependencies or dependencies
95
- # having no cross-platform support, pipenv may install dependencies that don't work, or not
96
- # install all needed dependencies.
97
- #Pipfile.lock
98
-
99
- # PEP 582; used by e.g. github.com/David-OConnor/pyflow
100
- __pypackages__/
101
-
102
- # Celery stuff
103
- celerybeat-schedule
104
- celerybeat.pid
105
-
106
- # SageMath parsed files
107
- *.sage.py
108
-
109
- # Environments
110
- .env
111
- .venv
112
- env/
113
- venv/
114
- ENV/
115
- env.bak/
116
- venv.bak/
117
-
118
- # Spyder project settings
119
- .spyderproject
120
- .spyproject
121
-
122
- # Rope project settings
123
- .ropeproject
124
-
125
- # mkdocs documentation
126
- /site
127
-
128
- # mypy
129
- .mypy_cache/
130
- .dmypy.json
131
- dmypy.json
132
-
133
- # Pyre type checker
134
- .pyre/
135
-
136
- # project
137
- test_schemas/
138
- .idea/.gitignore
139
- .idea/.name
140
- .idea/misc.xml
141
- .idea/modules.xml
142
- .idea/selector_schema_codegen.iml
143
- .idea/vcs.xml
144
- .idea/yh_parser.iml
145
- .idea/inspectionProfiles/profiles_settings.xml
146
- .idea/inspectionProfiles/Project_Default.xml
147
- .idea/libraries/Dart_SDK.xml
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions / native shared libs
7
+ *.so
8
+ *.dll
9
+ *.dylib
10
+ *.exp
11
+ *.lib
12
+
13
+ # Distribution / packaging
14
+ .Python
15
+ build/
16
+ develop-eggs/
17
+ dist/
18
+ downloads/
19
+ eggs/
20
+ .eggs/
21
+ lib/
22
+ lib64/
23
+ parts/
24
+ sdist/
25
+ var/
26
+ wheels/
27
+ pip-wheel-metadata/
28
+ share/python-wheels/
29
+ .ruff_cache/
30
+ *.egg-info/
31
+ .installed.cfg
32
+ *.egg
33
+ MANIFEST
34
+
35
+ # PyInstaller
36
+ # Usually these files are written by a python script from a template
37
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
38
+ *.manifest
39
+ *.spec
40
+
41
+ # Installer logs
42
+ pip-log.txt
43
+ pip-delete-this-directory.txt
44
+
45
+ # Unit tests / coverage reports
46
+ htmlcov/
47
+ .tox/
48
+ .nox/
49
+ .coverage
50
+ .coverage.*
51
+ .cache
52
+ nosetests.xml
53
+ coverage.xml
54
+ *.cover
55
+ *.py,cover
56
+ .hypothesis/
57
+ .pytest_cache/
58
+
59
+ # Translations
60
+ *.mo
61
+ *.pot
62
+
63
+ # Django stuff:
64
+ *.log
65
+ local_settings.py
66
+ db.sqlite3
67
+ db.sqlite3-journal
68
+
69
+ # Flask stuff:
70
+ instance/
71
+ .webassets-cache
72
+
73
+ # Scrapy stuff:
74
+ .scrapy
75
+
76
+ # Sphinx documentation
77
+ docs/source/_build/
78
+
79
+ # PyBuilder
80
+ target/
81
+
82
+ # Jupyter Notebook
83
+ .ipynb_checkpoints
84
+
85
+ # IPython
86
+ profile_default/
87
+ ipython_config.py
88
+
89
+ # pyenv
90
+ .python-version
91
+
92
+ # pipenv
93
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
94
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
95
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
96
+ # install all needed dependencies.
97
+ #Pipfile.lock
98
+
99
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
100
+ __pypackages__/
101
+
102
+ # Celery stuff
103
+ celerybeat-schedule
104
+ celerybeat.pid
105
+
106
+ # SageMath parsed files
107
+ *.sage.py
108
+
109
+ # Environments
110
+ .env
111
+ .venv
112
+ env/
113
+ venv/
114
+ ENV/
115
+ env.bak/
116
+ venv.bak/
117
+
118
+ # Spyder project settings
119
+ .spyderproject
120
+ .spyproject
121
+
122
+ # Rope project settings
123
+ .ropeproject
124
+
125
+ # mkdocs documentation
126
+ /site
127
+
128
+ # mypy
129
+ .mypy_cache/
130
+ .dmypy.json
131
+ dmypy.json
132
+
133
+ # Pyre type checker
134
+ .pyre/
135
+
136
+ # project
137
+ test_schemas/
138
+ .idea/.gitignore
139
+ .idea/.name
140
+ .idea/misc.xml
141
+ .idea/modules.xml
142
+ .idea/selector_schema_codegen.iml
143
+ .idea/vcs.xml
144
+ .idea/yh_parser.iml
145
+ .idea/inspectionProfiles/profiles_settings.xml
146
+ .idea/inspectionProfiles/Project_Default.xml
147
+ .idea/libraries/Dart_SDK.xml
@@ -1,21 +1,21 @@
1
- MIT License
2
-
3
- Copyright (c) 2023 Georgiy
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Georgiy
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
21
  SOFTWARE.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ssc_codegen
3
- Version: 0.17.1
3
+ Version: 0.18.0
4
4
  Summary: Python-dsl code converter to html parser for web scraping
5
5
  Project-URL: Documentation, https://github.com/vypivshiy/selector_schema_codegen#readme
6
6
  Project-URL: Issues, https://github.com/vypivshiy/selector_schema_codegen/issues
@@ -86,7 +86,7 @@ ssc-gen generate examples/ -t js-pure -o ./output
86
86
  ssc-gen generate schema.kdl -t go-goquery -o ./parsers --package scraper
87
87
  ```
88
88
 
89
- Targets: `py-bs4`, `py-lxml`, `py-parsel`, `py-slax`, `js-pure`
89
+ Targets: `py-bs4`, `py-lxml`, `py-parsel`, `py-slax`, `js-pure`, `go-goquery`
90
90
 
91
91
  ### Lint schemas
92
92
 
@@ -1,124 +1,124 @@
1
- # ssc-codegen
2
-
3
- Code generator for web scraping parsers. Describe HTML extraction rules in a declarative KDL 2.0 DSL, then generate ready-to-use parser code for multiple languages and libraries.
4
-
5
- ```
6
- .kdl schema --> [kdl parser] --> AST --> [linter] --> [converter] --> output code
7
- ```
8
-
9
- ## Features
10
-
11
- - Declarative DSL based on KDL 2.0 syntax
12
- - Static type checking and linting before code generation
13
- - Multiple output targets: Python (bs4, lxml, parsel, selectolax), JavaScript (DOM API)
14
- - Struct types: `item`, `list`, `dict`, `table`, `flat`
15
- - LLM-friendly: system prompt + linter loop for AI-assisted schema generation
16
-
17
- ## Install
18
-
19
- ```bash
20
- uv tool install ssc_codegen
21
- ```
22
-
23
- ## Quick example
24
-
25
- `books.kdl`:
26
-
27
- ```kdl
28
- struct Book type=list {
29
- @split-doc { css-all ".product-card" }
30
-
31
- title { css ".title"; text }
32
- price { css ".price"; text; re #"(\d+\.\d+)"#; to-float }
33
- url { css "a[href]"; attr "href"; fallback #null }
34
- }
35
- ```
36
-
37
- Generate Python parser:
38
-
39
- ```bash
40
- ssc-gen generate books.kdl -t py-bs4 -o ./output
41
- ```
42
-
43
- ## Usage
44
-
45
- ### Generate code
46
-
47
- ```bash
48
- # single file
49
- ssc-gen generate schema.kdl -t py-bs4 -o ./output
50
-
51
- # all .kdl files in a directory
52
- ssc-gen generate examples/ -t js-pure -o ./output
53
-
54
- # with custom package name (for Go and other targets)
55
- ssc-gen generate schema.kdl -t go-goquery -o ./parsers --package scraper
56
- ```
57
-
58
- Targets: `py-bs4`, `py-lxml`, `py-parsel`, `py-slax`, `js-pure`
59
-
60
- ### Lint schemas
61
-
62
- ```bash
63
- # human-readable output
64
- ssc-gen check schema.kdl
65
-
66
- # JSON output (for LLM pipelines)
67
- ssc-gen check schema.kdl -f json
68
-
69
- # check all files in a directory
70
- ssc-gen check examples/
71
- ```
72
-
73
- ### Test schema against HTML
74
-
75
- ```bash
76
- # from file
77
- ssc-gen run examples/booksToScrape.kdl:MainCatalogue -t py-bs4 -i page.html
78
-
79
- # from stdin
80
- curl https://books.toscrape.com/ | ssc-gen run examples/booksToScrape.kdl:MainCatalogue -t py-bs4
81
- ```
82
-
83
- ### Health check (verify selectors match elements)
84
-
85
- ```bash
86
- # from file
87
- ssc-gen health examples/booksToScrape.kdl:MainCatalogue -i page.html
88
-
89
- # from stdin
90
- curl https://books.toscrape.com/ | ssc-gen health examples/booksToScrape.kdl:MainCatalogue
91
- ```
92
-
93
- ## Documentation
94
-
95
- - [Quick start](docs2/guide.md)
96
- - [Syntax and file structure](docs2/syntax.md)
97
- - [Type system](docs2/types.md)
98
- - [Pipeline operations](docs2/operations.md)
99
- - [Predicates and logic](docs2/predicates.md)
100
- - [JSON schemas and jsonify](docs2/json.md)
101
- - [Transforms and dsl blocks](docs2/transforms.md)
102
- - [LLM-compact reference](docs2/llm.txt) -- full DSL spec in one file for LLM context
103
- - [Examples](examples/)
104
-
105
- ## LLM integration
106
-
107
- LLM agents can generate and validate `.kdl` schemas automatically using the linter feedback loop.
108
-
109
- ### In chats (ChatGPT, Claude, etc.)
110
-
111
- Use [SYSTEM_PROMPT.md](SYSTEM_PROMPT.md) as system prompt. After generation, run `ssc-gen check -f json` and send errors back to the LLM for correction.
112
-
113
- ### In AI-powered IDEs (Claude Code, Cursor, etc.)
114
-
115
- Use the [kdl-schema-dsl](.agents/skills/kdl-schema-dsl) skill for automatic generation, validation, and iteration.
116
-
117
- ## Development
118
-
119
- ```bash
120
- uv sync # install dependencies
121
- uv build --wheel # build wheel
122
- uv run pytest # run tests
123
- uv run ruff check ssc_codegen/
124
- ```
1
+ # ssc-codegen
2
+
3
+ Code generator for web scraping parsers. Describe HTML extraction rules in a declarative KDL 2.0 DSL, then generate ready-to-use parser code for multiple languages and libraries.
4
+
5
+ ```
6
+ .kdl schema --> [kdl parser] --> AST --> [linter] --> [converter] --> output code
7
+ ```
8
+
9
+ ## Features
10
+
11
+ - Declarative DSL based on KDL 2.0 syntax
12
+ - Static type checking and linting before code generation
13
+ - Multiple output targets: Python (bs4, lxml, parsel, selectolax), JavaScript (DOM API)
14
+ - Struct types: `item`, `list`, `dict`, `table`, `flat`
15
+ - LLM-friendly: system prompt + linter loop for AI-assisted schema generation
16
+
17
+ ## Install
18
+
19
+ ```bash
20
+ uv tool install ssc_codegen
21
+ ```
22
+
23
+ ## Quick example
24
+
25
+ `books.kdl`:
26
+
27
+ ```kdl
28
+ struct Book type=list {
29
+ @split-doc { css-all ".product-card" }
30
+
31
+ title { css ".title"; text }
32
+ price { css ".price"; text; re #"(\d+\.\d+)"#; to-float }
33
+ url { css "a[href]"; attr "href"; fallback #null }
34
+ }
35
+ ```
36
+
37
+ Generate Python parser:
38
+
39
+ ```bash
40
+ ssc-gen generate books.kdl -t py-bs4 -o ./output
41
+ ```
42
+
43
+ ## Usage
44
+
45
+ ### Generate code
46
+
47
+ ```bash
48
+ # single file
49
+ ssc-gen generate schema.kdl -t py-bs4 -o ./output
50
+
51
+ # all .kdl files in a directory
52
+ ssc-gen generate examples/ -t js-pure -o ./output
53
+
54
+ # with custom package name (for Go and other targets)
55
+ ssc-gen generate schema.kdl -t go-goquery -o ./parsers --package scraper
56
+ ```
57
+
58
+ Targets: `py-bs4`, `py-lxml`, `py-parsel`, `py-slax`, `js-pure`, `go-goquery`
59
+
60
+ ### Lint schemas
61
+
62
+ ```bash
63
+ # human-readable output
64
+ ssc-gen check schema.kdl
65
+
66
+ # JSON output (for LLM pipelines)
67
+ ssc-gen check schema.kdl -f json
68
+
69
+ # check all files in a directory
70
+ ssc-gen check examples/
71
+ ```
72
+
73
+ ### Test schema against HTML
74
+
75
+ ```bash
76
+ # from file
77
+ ssc-gen run examples/booksToScrape.kdl:MainCatalogue -t py-bs4 -i page.html
78
+
79
+ # from stdin
80
+ curl https://books.toscrape.com/ | ssc-gen run examples/booksToScrape.kdl:MainCatalogue -t py-bs4
81
+ ```
82
+
83
+ ### Health check (verify selectors match elements)
84
+
85
+ ```bash
86
+ # from file
87
+ ssc-gen health examples/booksToScrape.kdl:MainCatalogue -i page.html
88
+
89
+ # from stdin
90
+ curl https://books.toscrape.com/ | ssc-gen health examples/booksToScrape.kdl:MainCatalogue
91
+ ```
92
+
93
+ ## Documentation
94
+
95
+ - [Quick start](docs2/guide.md)
96
+ - [Syntax and file structure](docs2/syntax.md)
97
+ - [Type system](docs2/types.md)
98
+ - [Pipeline operations](docs2/operations.md)
99
+ - [Predicates and logic](docs2/predicates.md)
100
+ - [JSON schemas and jsonify](docs2/json.md)
101
+ - [Transforms and dsl blocks](docs2/transforms.md)
102
+ - [LLM-compact reference](docs2/llm.txt) -- full DSL spec in one file for LLM context
103
+ - [Examples](examples/)
104
+
105
+ ## LLM integration
106
+
107
+ LLM agents can generate and validate `.kdl` schemas automatically using the linter feedback loop.
108
+
109
+ ### In chats (ChatGPT, Claude, etc.)
110
+
111
+ Use [SYSTEM_PROMPT.md](SYSTEM_PROMPT.md) as system prompt. After generation, run `ssc-gen check -f json` and send errors back to the LLM for correction.
112
+
113
+ ### In AI-powered IDEs (Claude Code, Cursor, etc.)
114
+
115
+ Use the [kdl-schema-dsl](.agents/skills/kdl-schema-dsl) skill for automatic generation, validation, and iteration.
116
+
117
+ ## Development
118
+
119
+ ```bash
120
+ uv sync # install dependencies
121
+ uv build --wheel # build wheel
122
+ uv run pytest # run tests
123
+ uv run ruff check ssc_codegen/
124
+ ```