ssc_codegen 0.18.0__tar.gz → 0.19.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/.gitignore +3 -0
  2. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/PKG-INFO +1 -1
  3. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/pyproject.toml +1 -1
  4. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/__init__.py +3 -1
  5. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/struct.py +87 -9
  6. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/types.py +1 -0
  7. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/converters/js_pure.py +539 -9
  8. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/converters/py_bs4.py +402 -18
  9. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/converters/py_lxml.py +4 -1
  10. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/converters/py_parsel.py +1 -0
  11. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/converters/py_slax.py +1 -0
  12. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/linter/rules_struct.py +371 -11
  13. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/parser.py +52 -2
  14. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/parsers/__init__.py +4 -0
  15. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/parsers/spec.py +196 -34
  16. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/LICENSE +0 -0
  17. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/README.md +0 -0
  18. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/__init__.py +0 -0
  19. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/_logging.py +0 -0
  20. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/array.py +0 -0
  21. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/base.py +0 -0
  22. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/cast.py +0 -0
  23. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/control.py +0 -0
  24. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/extract.py +0 -0
  25. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/helpers.py +0 -0
  26. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/jsondef.py +0 -0
  27. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/module.py +0 -0
  28. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/predicate_containers.py +0 -0
  29. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/predicate_ops.py +0 -0
  30. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/regex.py +0 -0
  31. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/selectors.py +0 -0
  32. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/string.py +0 -0
  33. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/transform.py +0 -0
  34. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/ast/typedef.py +0 -0
  35. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/converters/base.py +0 -0
  36. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/converters/go_goquery.py +0 -0
  37. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/converters/helpers.py +0 -0
  38. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/document_utils.py +0 -0
  39. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/exceptions.py +0 -0
  40. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/health.py +0 -0
  41. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/kdl/__init__.py +0 -0
  42. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/kdl/parser.py +0 -0
  43. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/linter/__init__.py +0 -0
  44. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/linter/_kdl_lang.py +0 -0
  45. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/linter/base.py +0 -0
  46. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/linter/errors.py +0 -0
  47. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/linter/format_errors.py +0 -0
  48. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/linter/metadata.py +0 -0
  49. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/linter/navigation.py +0 -0
  50. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/linter/path.py +0 -0
  51. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/linter/rule_keywords.py +0 -0
  52. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/linter/rules.py +0 -0
  53. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/linter/type_rules.py +0 -0
  54. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/linter/types.py +0 -0
  55. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/main.py +0 -0
  56. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/parsers/curl.py +0 -0
  57. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/parsers/http.py +0 -0
  58. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/pseudo_selectors.py +0 -0
  59. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/regex_utils.py +0 -0
  60. {ssc_codegen-0.18.0 → ssc_codegen-0.19.1}/ssc_codegen/selector_utils.py +0 -0
@@ -145,3 +145,6 @@ test_schemas/
145
145
  .idea/inspectionProfiles/profiles_settings.xml
146
146
  .idea/inspectionProfiles/Project_Default.xml
147
147
  .idea/libraries/Dart_SDK.xml
148
+
149
+ # slopmachines
150
+ .claude/*
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ssc_codegen
3
- Version: 0.18.0
3
+ Version: 0.19.1
4
4
  Summary: Python-dsl code converter to html parser for web scraping
5
5
  Project-URL: Documentation, https://github.com/vypivshiy/selector_schema_codegen#readme
6
6
  Project-URL: Issues, https://github.com/vypivshiy/selector_schema_codegen/issues
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "ssc_codegen"
3
- version = "0.18.0"
3
+ version = "0.19.1"
4
4
  description = "Python-dsl code converter to html parser for web scraping "
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
@@ -34,6 +34,8 @@ from .struct import (
34
34
  TableRow,
35
35
  TableMatchKey,
36
36
  RequestConfig,
37
+ ErrorResponse,
38
+ PlaceholderSpec,
37
39
  Field,
38
40
  StartParse,
39
41
  )
@@ -136,7 +138,7 @@ __all__ = [
136
138
  "Init", "InitField", "SplitDoc",
137
139
  "Key", "Value",
138
140
  "TableConfig", "TableRow", "TableMatchKey",
139
- "RequestConfig", "Field", "StartParse",
141
+ "RequestConfig", "ErrorResponse", "PlaceholderSpec", "Field", "StartParse",
140
142
  # selectors
141
143
  "CssSelect", "CssSelectAll",
142
144
  "XpathSelect", "XpathSelectAll",
@@ -6,7 +6,46 @@ from typing import cast
6
6
  from .base import Node
7
7
  from .types import VariableType, StructType
8
8
 
9
- _PLACEHOLDER_RE = _re.compile(r"\{\{([\w-]+)\}\}")
9
+ # Typed placeholder grammar:
10
+ # {{ NAME ( : PRIM )? ( [] )? ( ? )? ( | STYLE )? }}
11
+ # NAME = [A-Za-z][A-Za-z0-9_-]* (first char must be a letter)
12
+ # PRIM = str | int | float | bool (default: str)
13
+ # STYLE = repeat | csv | bracket | pipe | space (arrays only; default: repeat)
14
+ # Legacy `{{name}}` remains valid (groups 2-5 = None → str, scalar, required).
15
+ _PLACEHOLDER_RE = _re.compile(
16
+ r"\{\{"
17
+ r"([A-Za-z][A-Za-z0-9_-]*)"
18
+ r"(?::(str|int|float|bool))?"
19
+ r"(\[\])?"
20
+ r"(\?)?"
21
+ r"(?:\|(repeat|csv|bracket|pipe|space))?"
22
+ r"\}\}"
23
+ )
24
+
25
+ # Widened pattern — any `{{…}}`-shaped token. Used by the linter to flag
26
+ # malformed placeholders that the strict _PLACEHOLDER_RE would silently skip.
27
+ _PLACEHOLDER_WIDE_RE = _re.compile(r"\{\{([^{}]*)\}\}")
28
+
29
+
30
+ @dataclass
31
+ class PlaceholderSpec:
32
+ """Parsed `{{…}}` token from an @request payload."""
33
+
34
+ name: str = ""
35
+ type_name: str = "str" # "str" | "int" | "float" | "bool"
36
+ is_array: bool = False
37
+ is_optional: bool = False
38
+ style: str | None = None # None == default "repeat" when is_array
39
+
40
+
41
+ def _parse_placeholder(match: "_re.Match[str]") -> PlaceholderSpec:
42
+ return PlaceholderSpec(
43
+ name=match.group(1),
44
+ type_name=match.group(2) or "str",
45
+ is_array=bool(match.group(3)),
46
+ is_optional=bool(match.group(4)),
47
+ style=match.group(5) or None,
48
+ )
10
49
 
11
50
 
12
51
  @dataclass
@@ -51,6 +90,14 @@ class Struct(Node):
51
90
  def use_request(self) -> bool:
52
91
  return bool(self.request_configs)
53
92
 
93
+ @property
94
+ def is_rest(self) -> bool:
95
+ return self.struct_type == StructType.REST
96
+
97
+ @property
98
+ def errors(self) -> "list[ErrorResponse]":
99
+ return [n for n in self.body if isinstance(n, ErrorResponse)]
100
+
54
101
 
55
102
  @dataclass
56
103
  class StructDocstring(Node):
@@ -183,7 +230,8 @@ class TableMatchKey(Node):
183
230
  class RequestConfig(Node):
184
231
  """
185
232
  Optional transport layer config for a struct.
186
- DSL: @request [name="suffix"] [response-path="..."] [response-join="..."] \"""...\"""
233
+ DSL: @request [name="suffix"] [response-path="..."] [response-join="..."]
234
+ [response=JsonSchema] [doc="..."] \"""...\"""
187
235
 
188
236
  raw_payload stores the verbatim curl or raw HTTP string (with {{placeholders}}).
189
237
  Transport normalization (curl/HTTP parse → kwargs) happens at converter stage.
@@ -191,6 +239,10 @@ class RequestConfig(Node):
191
239
  name="" (unnamed) generates fetch()/async_fetch().
192
240
  name="by-id" generates fetch_by_id()/async_fetch_by_id() (Python)
193
241
  or fetchById() (JS).
242
+
243
+ response_schema (type=rest only): json schema name for typed 2xx response.
244
+ Empty string = void return.
245
+ doc (type=rest only): per-method docstring.
194
246
  """
195
247
 
196
248
  raw_payload: str = ""
@@ -199,18 +251,44 @@ class RequestConfig(Node):
199
251
  "" # join separator when response-path resolves to list[str]
200
252
  )
201
253
  name: str = "" # method name suffix; "" = default fetch()
254
+ response_schema: str = "" # type=rest: json schema for 2xx body
255
+ doc: str = "" # type=rest: per-method docstring
202
256
 
203
257
  @property
204
- def placeholders(self) -> list[str]:
205
- """Unique placeholder names in declaration order."""
258
+ def placeholders(self) -> list[PlaceholderSpec]:
259
+ """Unique placeholders in declaration order (dedup by name)."""
206
260
  seen: set[str] = set()
207
- result: list[str] = []
208
- for name in _PLACEHOLDER_RE.findall(self.raw_payload):
209
- if name not in seen:
210
- seen.add(name)
211
- result.append(name)
261
+ result: list[PlaceholderSpec] = []
262
+ for m in _PLACEHOLDER_RE.finditer(self.raw_payload):
263
+ spec = _parse_placeholder(m)
264
+ if spec.name not in seen:
265
+ seen.add(spec.name)
266
+ result.append(spec)
212
267
  return result
213
268
 
269
+ @property
270
+ def placeholder_names(self) -> list[str]:
271
+ """Unique placeholder names in declaration order."""
272
+ return [p.name for p in self.placeholders]
273
+
274
+
275
+ @dataclass
276
+ class ErrorResponse(Node):
277
+ """
278
+ Error response mapping for type=rest struct.
279
+ DSL: @error <status> [field="<name>"] <JsonSchema>
280
+
281
+ status: HTTP status code [100..599].
282
+ schema_name: json schema reference for deserialised error body.
283
+ discriminator_field: optional body field name. When set, the error triggers
284
+ on 2xx responses where <field> is present in the parsed JSON body
285
+ (used for APIs that return 200 + error payload).
286
+ """
287
+
288
+ status: int = 0
289
+ schema_name: str = ""
290
+ discriminator_field: str | None = None
291
+
214
292
 
215
293
  @dataclass
216
294
  class Field(Node):
@@ -77,3 +77,4 @@ class StructType(IntEnum):
77
77
  DICT = auto()
78
78
  TABLE = auto()
79
79
  FLAT = auto()
80
+ REST = auto()