ssc_codegen 0.18.0__tar.gz → 0.19.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/.gitignore +3 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/PKG-INFO +1 -1
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/pyproject.toml +1 -1
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/__init__.py +3 -1
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/struct.py +87 -9
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/types.py +1 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/converters/js_pure.py +539 -9
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/converters/py_bs4.py +402 -18
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/converters/py_lxml.py +4 -1
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/converters/py_parsel.py +1 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/converters/py_slax.py +1 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/linter/rules_struct.py +371 -11
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/parser.py +52 -2
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/parsers/__init__.py +4 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/parsers/spec.py +196 -34
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/LICENSE +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/README.md +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/__init__.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/_logging.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/array.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/base.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/cast.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/control.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/extract.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/helpers.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/jsondef.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/module.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/predicate_containers.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/predicate_ops.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/regex.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/selectors.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/string.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/transform.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/typedef.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/converters/base.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/converters/go_goquery.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/converters/helpers.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/document_utils.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/exceptions.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/health.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/kdl/__init__.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/kdl/parser.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/linter/__init__.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/linter/_kdl_lang.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/linter/base.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/linter/errors.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/linter/format_errors.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/linter/metadata.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/linter/navigation.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/linter/path.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/linter/rule_keywords.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/linter/rules.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/linter/type_rules.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/linter/types.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/main.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/parsers/curl.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/parsers/http.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/pseudo_selectors.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/regex_utils.py +0 -0
- {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/selector_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ssc_codegen
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.19.1
|
|
4
4
|
Summary: Python-dsl code converter to html parser for web scraping
|
|
5
5
|
Project-URL: Documentation, https://github.com/vypivshiy/selector_schema_codegen#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/vypivshiy/selector_schema_codegen/issues
|
|
@@ -34,6 +34,8 @@ from .struct import (
|
|
|
34
34
|
TableRow,
|
|
35
35
|
TableMatchKey,
|
|
36
36
|
RequestConfig,
|
|
37
|
+
ErrorResponse,
|
|
38
|
+
PlaceholderSpec,
|
|
37
39
|
Field,
|
|
38
40
|
StartParse,
|
|
39
41
|
)
|
|
@@ -136,7 +138,7 @@ __all__ = [
|
|
|
136
138
|
"Init", "InitField", "SplitDoc",
|
|
137
139
|
"Key", "Value",
|
|
138
140
|
"TableConfig", "TableRow", "TableMatchKey",
|
|
139
|
-
"RequestConfig", "Field", "StartParse",
|
|
141
|
+
"RequestConfig", "ErrorResponse", "PlaceholderSpec", "Field", "StartParse",
|
|
140
142
|
# selectors
|
|
141
143
|
"CssSelect", "CssSelectAll",
|
|
142
144
|
"XpathSelect", "XpathSelectAll",
|
|
@@ -6,7 +6,46 @@ from typing import cast
|
|
|
6
6
|
from .base import Node
|
|
7
7
|
from .types import VariableType, StructType
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
# Typed placeholder grammar:
|
|
10
|
+
# {{ NAME ( : PRIM )? ( [] )? ( ? )? ( | STYLE )? }}
|
|
11
|
+
# NAME = [A-Za-z][A-Za-z0-9_-]* (first char must be a letter)
|
|
12
|
+
# PRIM = str | int | float | bool (default: str)
|
|
13
|
+
# STYLE = repeat | csv | bracket | pipe | space (arrays only; default: repeat)
|
|
14
|
+
# Legacy `{{name}}` remains valid (groups 2-5 = None → str, scalar, required).
|
|
15
|
+
_PLACEHOLDER_RE = _re.compile(
|
|
16
|
+
r"\{\{"
|
|
17
|
+
r"([A-Za-z][A-Za-z0-9_-]*)"
|
|
18
|
+
r"(?::(str|int|float|bool))?"
|
|
19
|
+
r"(\[\])?"
|
|
20
|
+
r"(\?)?"
|
|
21
|
+
r"(?:\|(repeat|csv|bracket|pipe|space))?"
|
|
22
|
+
r"\}\}"
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
# Widened pattern — any `{{…}}`-shaped token. Used by the linter to flag
|
|
26
|
+
# malformed placeholders that the strict _PLACEHOLDER_RE would silently skip.
|
|
27
|
+
_PLACEHOLDER_WIDE_RE = _re.compile(r"\{\{([^{}]*)\}\}")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class PlaceholderSpec:
|
|
32
|
+
"""Parsed `{{…}}` token from an @request payload."""
|
|
33
|
+
|
|
34
|
+
name: str = ""
|
|
35
|
+
type_name: str = "str" # "str" | "int" | "float" | "bool"
|
|
36
|
+
is_array: bool = False
|
|
37
|
+
is_optional: bool = False
|
|
38
|
+
style: str | None = None # None == default "repeat" when is_array
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _parse_placeholder(match: "_re.Match[str]") -> PlaceholderSpec:
|
|
42
|
+
return PlaceholderSpec(
|
|
43
|
+
name=match.group(1),
|
|
44
|
+
type_name=match.group(2) or "str",
|
|
45
|
+
is_array=bool(match.group(3)),
|
|
46
|
+
is_optional=bool(match.group(4)),
|
|
47
|
+
style=match.group(5) or None,
|
|
48
|
+
)
|
|
10
49
|
|
|
11
50
|
|
|
12
51
|
@dataclass
|
|
@@ -51,6 +90,14 @@ class Struct(Node):
|
|
|
51
90
|
def use_request(self) -> bool:
|
|
52
91
|
return bool(self.request_configs)
|
|
53
92
|
|
|
93
|
+
@property
|
|
94
|
+
def is_rest(self) -> bool:
|
|
95
|
+
return self.struct_type == StructType.REST
|
|
96
|
+
|
|
97
|
+
@property
|
|
98
|
+
def errors(self) -> "list[ErrorResponse]":
|
|
99
|
+
return [n for n in self.body if isinstance(n, ErrorResponse)]
|
|
100
|
+
|
|
54
101
|
|
|
55
102
|
@dataclass
|
|
56
103
|
class StructDocstring(Node):
|
|
@@ -183,7 +230,8 @@ class TableMatchKey(Node):
|
|
|
183
230
|
class RequestConfig(Node):
|
|
184
231
|
"""
|
|
185
232
|
Optional transport layer config for a struct.
|
|
186
|
-
DSL: @request [name="suffix"] [response-path="..."] [response-join="..."]
|
|
233
|
+
DSL: @request [name="suffix"] [response-path="..."] [response-join="..."]
|
|
234
|
+
[response=JsonSchema] [doc="..."] \"""...\"""
|
|
187
235
|
|
|
188
236
|
raw_payload stores the verbatim curl or raw HTTP string (with {{placeholders}}).
|
|
189
237
|
Transport normalization (curl/HTTP parse → kwargs) happens at converter stage.
|
|
@@ -191,6 +239,10 @@ class RequestConfig(Node):
|
|
|
191
239
|
name="" (unnamed) generates fetch()/async_fetch().
|
|
192
240
|
name="by-id" generates fetch_by_id()/async_fetch_by_id() (Python)
|
|
193
241
|
or fetchById() (JS).
|
|
242
|
+
|
|
243
|
+
response_schema (type=rest only): json schema name for typed 2xx response.
|
|
244
|
+
Empty string = void return.
|
|
245
|
+
doc (type=rest only): per-method docstring.
|
|
194
246
|
"""
|
|
195
247
|
|
|
196
248
|
raw_payload: str = ""
|
|
@@ -199,18 +251,44 @@ class RequestConfig(Node):
|
|
|
199
251
|
"" # join separator when response-path resolves to list[str]
|
|
200
252
|
)
|
|
201
253
|
name: str = "" # method name suffix; "" = default fetch()
|
|
254
|
+
response_schema: str = "" # type=rest: json schema for 2xx body
|
|
255
|
+
doc: str = "" # type=rest: per-method docstring
|
|
202
256
|
|
|
203
257
|
@property
|
|
204
|
-
def placeholders(self) -> list[
|
|
205
|
-
"""Unique
|
|
258
|
+
def placeholders(self) -> list[PlaceholderSpec]:
|
|
259
|
+
"""Unique placeholders in declaration order (dedup by name)."""
|
|
206
260
|
seen: set[str] = set()
|
|
207
|
-
result: list[
|
|
208
|
-
for
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
261
|
+
result: list[PlaceholderSpec] = []
|
|
262
|
+
for m in _PLACEHOLDER_RE.finditer(self.raw_payload):
|
|
263
|
+
spec = _parse_placeholder(m)
|
|
264
|
+
if spec.name not in seen:
|
|
265
|
+
seen.add(spec.name)
|
|
266
|
+
result.append(spec)
|
|
212
267
|
return result
|
|
213
268
|
|
|
269
|
+
@property
|
|
270
|
+
def placeholder_names(self) -> list[str]:
|
|
271
|
+
"""Unique placeholder names in declaration order."""
|
|
272
|
+
return [p.name for p in self.placeholders]
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
@dataclass
|
|
276
|
+
class ErrorResponse(Node):
|
|
277
|
+
"""
|
|
278
|
+
Error response mapping for type=rest struct.
|
|
279
|
+
DSL: @error <status> [field="<name>"] <JsonSchema>
|
|
280
|
+
|
|
281
|
+
status: HTTP status code [100..599].
|
|
282
|
+
schema_name: json schema reference for deserialised error body.
|
|
283
|
+
discriminator_field: optional body field name. When set, the error triggers
|
|
284
|
+
on 2xx responses where <field> is present in the parsed JSON body
|
|
285
|
+
(used for APIs that return 200 + error payload).
|
|
286
|
+
"""
|
|
287
|
+
|
|
288
|
+
status: int = 0
|
|
289
|
+
schema_name: str = ""
|
|
290
|
+
discriminator_field: str | None = None
|
|
291
|
+
|
|
214
292
|
|
|
215
293
|
@dataclass
|
|
216
294
|
class Field(Node):
|