ssc_codegen 0.20.0__tar.gz → 0.21.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/PKG-INFO +4 -1
  2. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/pyproject.toml +119 -113
  3. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/main.py +146 -1
  4. ssc_codegen-0.21.0/ssc_codegen/repl.py +769 -0
  5. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/.gitignore +0 -0
  6. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/LICENSE +0 -0
  7. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/README.md +0 -0
  8. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/__init__.py +0 -0
  9. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/_logging.py +0 -0
  10. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/ast/__init__.py +0 -0
  11. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/ast/array.py +0 -0
  12. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/ast/base.py +0 -0
  13. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/ast/cast.py +0 -0
  14. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/ast/control.py +0 -0
  15. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/ast/extract.py +0 -0
  16. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/ast/helpers.py +0 -0
  17. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/ast/jsondef.py +0 -0
  18. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/ast/module.py +0 -0
  19. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/ast/predicate_containers.py +0 -0
  20. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/ast/predicate_ops.py +0 -0
  21. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/ast/regex.py +0 -0
  22. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/ast/selectors.py +0 -0
  23. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/ast/string.py +0 -0
  24. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/ast/struct.py +0 -0
  25. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/ast/transform.py +0 -0
  26. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/ast/typedef.py +0 -0
  27. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/ast/types.py +0 -0
  28. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/converters/base.py +0 -0
  29. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/converters/go_goquery.py +0 -0
  30. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/converters/helpers.py +0 -0
  31. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/converters/js_pure.py +0 -0
  32. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/converters/py_bs4.py +0 -0
  33. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/converters/py_helpers.py +0 -0
  34. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/converters/py_lxml.py +0 -0
  35. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/converters/py_parsel.py +0 -0
  36. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/converters/py_slax.py +0 -0
  37. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/document_utils.py +0 -0
  38. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/exceptions.py +0 -0
  39. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/health.py +0 -0
  40. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/kdl/__init__.py +0 -0
  41. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/kdl/parser.py +0 -0
  42. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/linter/__init__.py +0 -0
  43. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/linter/_kdl_lang.py +0 -0
  44. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/linter/base.py +0 -0
  45. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/linter/errors.py +0 -0
  46. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/linter/format_errors.py +0 -0
  47. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/linter/metadata.py +0 -0
  48. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/linter/navigation.py +0 -0
  49. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/linter/path.py +0 -0
  50. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/linter/rule_keywords.py +0 -0
  51. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/linter/rules.py +0 -0
  52. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/linter/rules_struct.py +0 -0
  53. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/linter/type_rules.py +0 -0
  54. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/linter/types.py +0 -0
  55. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/openapi/__init__.py +0 -0
  56. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/openapi/converter.py +0 -0
  57. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/openapi/emitter.py +0 -0
  58. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/openapi/parser.py +0 -0
  59. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/parser.py +0 -0
  60. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/parsers/__init__.py +0 -0
  61. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/parsers/curl.py +0 -0
  62. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/parsers/http.py +0 -0
  63. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/parsers/spec.py +0 -0
  64. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/pseudo_selectors.py +0 -0
  65. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/regex_utils.py +0 -0
  66. {ssc_codegen-0.20.0 → ssc_codegen-0.21.0}/ssc_codegen/selector_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ssc_codegen
3
- Version: 0.20.0
3
+ Version: 0.21.0
4
4
  Summary: Python-dsl code converter to html parser for web scraping
5
5
  Project-URL: Documentation, https://github.com/vypivshiy/selector_schema_codegen#readme
6
6
  Project-URL: Issues, https://github.com/vypivshiy/selector_schema_codegen/issues
@@ -28,6 +28,9 @@ Requires-Dist: pyyaml>=6.0
28
28
  Requires-Dist: soupsieve>=2.6
29
29
  Requires-Dist: typer>=0.15.1
30
30
  Requires-Dist: typing-extensions; python_version < '3.11'
31
+ Provides-Extra: repl
32
+ Requires-Dist: httpx>=0.28.1; extra == 'repl'
33
+ Requires-Dist: ipython>=8.39.0; extra == 'repl'
31
34
  Description-Content-Type: text/markdown
32
35
 
33
36
  # ssc-codegen
@@ -1,113 +1,119 @@
1
- [project]
2
- name = "ssc_codegen"
3
- version = "0.20.0"
4
- description = "Python-dsl code converter to html parser for web scraping "
5
- readme = "README.md"
6
- requires-python = ">=3.10"
7
- dependencies = [
8
- "bs4>=0.0.2",
9
- "colorama>=0.4.6 ; sys_platform == 'win32'",
10
- "cssselect>=1.2.0",
11
- "lxml>=5.3.0",
12
- "soupsieve>=2.6",
13
- "typer>=0.15.1",
14
- "typing_extensions;python_version<'3.11'",
15
- # https://github.com/fastapi/typer/discussions/1215
16
- # https://github.com/fastapi/typer/pull/1145
17
- "click<8.2.0",
18
- "pyyaml>=6.0",
19
- ]
20
-
21
- classifiers = [
22
- "Development Status :: 3 - Alpha",
23
- "Environment :: Console",
24
- "Intended Audience :: Developers",
25
- "License :: OSI Approved :: MIT License",
26
- "Topic :: Software Development :: Code Generators",
27
- "Topic :: Text Processing :: Markup :: HTML",
28
- "Topic :: Utilities",
29
- "Programming Language :: Python :: 3.10",
30
- "Programming Language :: Python :: 3.11",
31
- "Programming Language :: Python :: 3.12",
32
- "Programming Language :: Python :: 3.13",
33
- ]
34
-
35
- [project.urls]
36
- Documentation = "https://github.com/vypivshiy/selector_schema_codegen#readme"
37
- Issues = "https://github.com/vypivshiy/selector_schema_codegen/issues"
38
- Source = "https://github.com/vypivshiy/selector_schema_codegen"
39
- Examples = "https://github.com/vypivshiy/selector_schema_codegen/examples"
40
-
41
-
42
- [project.scripts]
43
- ssc-gen = 'ssc_codegen.main:main'
44
-
45
- [build-system]
46
- requires = ["hatchling"]
47
- build-backend = "hatchling.build"
48
-
49
- [tool.hatch.build.targets.sdist]
50
- include = [
51
- "ssc_codegen/**",
52
- ]
53
-
54
- [tool.hatch.build.targets.wheel]
55
- packages = ["ssc_codegen"]
56
-
57
- [dependency-groups]
58
- dev = [
59
- "bs4>=0.0.2",
60
- "coverage>=7.6.12",
61
- "httpx>=0.28.1",
62
- "hypothesis>=6.151.9",
63
- "lxml>=5.3.0",
64
- "mypy>=1.14.1",
65
- "parsel>=1.9.1",
66
- "pytest>=8.3.4",
67
- "ruff>=0.9.3",
68
- "selectolax>=0.3.27",
69
- ]
70
-
71
- [tool.ruff]
72
- target-version = "py310"
73
- line-length = 80
74
- exclude = ["__init__.py"]
75
-
76
- [tool.mypy]
77
- python_version = "3.10"
78
- pretty = true
79
- ignore_missing_imports = true
80
- disallow_untyped_defs = true
81
- show_error_codes = true
82
- no_implicit_optional = true
83
- mypy_path = 'ssc_codegen'
84
- exclude = [
85
- "converters/.*\\.py$", # to tired typing
86
- "examples/.*\\.py$"
87
- ]
88
- [[tool.mypy.overrides]]
89
- module="tests.*"
90
- disallow_untyped_defs = false
91
-
92
-
93
- [tool.coverage.report]
94
- exclude_also = [
95
- 'def __repr__',
96
- 'if self.debug:',
97
- 'if settings.DEBUG',
98
- 'raise AssertionError',
99
- 'raise NotImplementedError',
100
- 'if 0:',
101
- 'if __name__ == .__main__.:',
102
- 'if TYPE_CHECKING:',
103
- 'class .*\bProtocol\):',
104
- '@(abc\.)?abstractmethod',
105
- ]
106
-
107
- [tool.coverage.run]
108
- source = ["ssc_codegen"]
109
- omit = [
110
- "*/.venv/*",
111
- "*/scripts/*",
112
- "*/tests/*"
113
- ]
1
+ [project]
2
+ name = "ssc_codegen"
3
+ version = "0.21.0"
4
+ description = "Python-dsl code converter to html parser for web scraping "
5
+ readme = "README.md"
6
+ requires-python = ">=3.10"
7
+ dependencies = [
8
+ "bs4>=0.0.2",
9
+ "colorama>=0.4.6 ; sys_platform == 'win32'",
10
+ "cssselect>=1.2.0",
11
+ "lxml>=5.3.0",
12
+ "soupsieve>=2.6",
13
+ "typer>=0.15.1",
14
+ "typing_extensions;python_version<'3.11'",
15
+ # https://github.com/fastapi/typer/discussions/1215
16
+ # https://github.com/fastapi/typer/pull/1145
17
+ "click<8.2.0",
18
+ "pyyaml>=6.0",
19
+ ]
20
+
21
+ classifiers = [
22
+ "Development Status :: 3 - Alpha",
23
+ "Environment :: Console",
24
+ "Intended Audience :: Developers",
25
+ "License :: OSI Approved :: MIT License",
26
+ "Topic :: Software Development :: Code Generators",
27
+ "Topic :: Text Processing :: Markup :: HTML",
28
+ "Topic :: Utilities",
29
+ "Programming Language :: Python :: 3.10",
30
+ "Programming Language :: Python :: 3.11",
31
+ "Programming Language :: Python :: 3.12",
32
+ "Programming Language :: Python :: 3.13",
33
+ ]
34
+
35
+ [project.urls]
36
+ Documentation = "https://github.com/vypivshiy/selector_schema_codegen#readme"
37
+ Issues = "https://github.com/vypivshiy/selector_schema_codegen/issues"
38
+ Source = "https://github.com/vypivshiy/selector_schema_codegen"
39
+ Examples = "https://github.com/vypivshiy/selector_schema_codegen/examples"
40
+
41
+
42
+ [project.scripts]
43
+ ssc-gen = 'ssc_codegen.main:main'
44
+
45
+ [project.optional-dependencies]
46
+ repl = [
47
+ "httpx>=0.28.1",
48
+ "ipython>=8.39.0",
49
+ ]
50
+
51
+ [build-system]
52
+ requires = ["hatchling"]
53
+ build-backend = "hatchling.build"
54
+
55
+ [tool.hatch.build.targets.sdist]
56
+ include = [
57
+ "ssc_codegen/**",
58
+ ]
59
+
60
+ [tool.hatch.build.targets.wheel]
61
+ packages = ["ssc_codegen"]
62
+
63
+ [dependency-groups]
64
+ dev = [
65
+ "bs4>=0.0.2",
66
+ "coverage>=7.6.12",
67
+ "httpx>=0.28.1",
68
+ "hypothesis>=6.151.9",
69
+ "lxml>=5.3.0",
70
+ "mypy>=1.14.1",
71
+ "parsel>=1.9.1",
72
+ "pytest>=8.3.4",
73
+ "ruff>=0.9.3",
74
+ "selectolax>=0.3.27",
75
+ ]
76
+
77
+ [tool.ruff]
78
+ target-version = "py310"
79
+ line-length = 80
80
+ exclude = ["__init__.py"]
81
+
82
+ [tool.mypy]
83
+ python_version = "3.10"
84
+ pretty = true
85
+ ignore_missing_imports = true
86
+ disallow_untyped_defs = true
87
+ show_error_codes = true
88
+ no_implicit_optional = true
89
+ mypy_path = 'ssc_codegen'
90
+ exclude = [
91
+ "converters/.*\\.py$", # to tired typing
92
+ "examples/.*\\.py$"
93
+ ]
94
+ [[tool.mypy.overrides]]
95
+ module="tests.*"
96
+ disallow_untyped_defs = false
97
+
98
+
99
+ [tool.coverage.report]
100
+ exclude_also = [
101
+ 'def __repr__',
102
+ 'if self.debug:',
103
+ 'if settings.DEBUG',
104
+ 'raise AssertionError',
105
+ 'raise NotImplementedError',
106
+ 'if 0:',
107
+ 'if __name__ == .__main__.:',
108
+ 'if TYPE_CHECKING:',
109
+ 'class .*\bProtocol\):',
110
+ '@(abc\.)?abstractmethod',
111
+ ]
112
+
113
+ [tool.coverage.run]
114
+ source = ["ssc_codegen"]
115
+ omit = [
116
+ "*/.venv/*",
117
+ "*/scripts/*",
118
+ "*/tests/*"
119
+ ]
@@ -660,7 +660,152 @@ def health(
660
660
  raise typer.Exit(code=1)
661
661
 
662
662
 
663
- @app.command(name="openapi-to-ssc", help="Experimental converter swagger openapi 3.0 to kdl DSL")
663
+ @app.command()
664
+ def shell(
665
+ schema: Annotated[
666
+ str | None,
667
+ typer.Argument(
668
+ help="Schema target in format 'path/to/schema.kdl:StructName'. "
669
+ "If omitted, starts empty REPL.",
670
+ ),
671
+ ] = None,
672
+ target: Annotated[
673
+ _PyTarget,
674
+ typer.Option(
675
+ "--target",
676
+ "-t",
677
+ help="Python backend for the REPL.",
678
+ case_sensitive=False,
679
+ ),
680
+ ] = _PyTarget.PY_BS4,
681
+ input_file: Annotated[
682
+ Path | None,
683
+ typer.Option(
684
+ "--input",
685
+ "-i",
686
+ help="HTML input file.",
687
+ exists=True,
688
+ file_okay=True,
689
+ dir_okay=False,
690
+ readable=True,
691
+ ),
692
+ ] = None,
693
+ url: Annotated[
694
+ str | None,
695
+ typer.Option(
696
+ "--url",
697
+ help="URL to fetch HTML from.",
698
+ ),
699
+ ] = None,
700
+ http_client: Annotated[
701
+ str | None,
702
+ typer.Option(
703
+ "--http-client",
704
+ help="HTTP client for REST structs: httpx (default) or requests.",
705
+ ),
706
+ ] = None,
707
+ verbose: Annotated[
708
+ bool,
709
+ typer.Option(
710
+ "--verbose",
711
+ "-v",
712
+ help="Print generated code and enable DEBUG logging.",
713
+ ),
714
+ ] = False,
715
+ css_to_xpath: Annotated[
716
+ bool,
717
+ typer.Option(
718
+ "--css-to-xpath",
719
+ help="Convert CSS selectors to XPath.",
720
+ ),
721
+ ] = False,
722
+ ) -> None:
723
+ """Launch an interactive REPL shell for testing KDL schema parsers.
724
+
725
+ \b
726
+ Examples:
727
+ ssc-gen shell examples/booksToScrape.kdl:Book --url https://books.toscrape.com/
728
+ ssc-gen shell schema.kdl:Product -i page.html -t py-lxml
729
+ ssc-gen shell examples/restApiLike.kdl:DummyJsonApi --http-client httpx
730
+ ssc-gen shell
731
+ """
732
+ from ssc_codegen.repl import Repl, ReplState
733
+
734
+ if verbose:
735
+ setup_debug_logging()
736
+
737
+ state = ReplState(
738
+ target=target.value,
739
+ http_client=http_client or "httpx",
740
+ verbose=verbose,
741
+ css_to_xpath=css_to_xpath,
742
+ )
743
+
744
+ if input_file is not None:
745
+ state.html = input_file.read_text(encoding="utf-8")
746
+
747
+ if schema is not None:
748
+ if ":" in schema:
749
+ path_part, struct_name = schema.rsplit(":", 1)
750
+ kdl_path = Path(path_part)
751
+ else:
752
+ kdl_path = Path(schema)
753
+ struct_name = ""
754
+ if not kdl_path.is_file():
755
+ typer.echo(f"ERROR: file not found: {kdl_path}", err=True)
756
+ raise typer.Exit(code=1)
757
+ from ssc_codegen import parse_ast
758
+ from ssc_codegen.ast import Struct as StructNode
759
+
760
+ try:
761
+ state.module_ast = parse_ast(
762
+ path=str(kdl_path), css_to_xpath=css_to_xpath
763
+ )
764
+ except Exception as exc:
765
+ if verbose:
766
+ typer.echo(traceback.format_exc(), err=True)
767
+ else:
768
+ typer.echo(
769
+ f"ERROR: failed to parse {kdl_path}: {exc}", err=True
770
+ )
771
+ raise typer.Exit(code=1)
772
+
773
+ state.schema_path = kdl_path
774
+ state.kdl_source = kdl_path.read_text(encoding="utf-8-sig")
775
+
776
+ structs = [
777
+ n for n in state.module_ast.body if isinstance(n, StructNode)
778
+ ]
779
+ if not struct_name:
780
+ if structs:
781
+ struct_name = structs[0].name
782
+ struct_names = [s.name for s in structs]
783
+ if struct_name and struct_name not in struct_names:
784
+ typer.echo(
785
+ f"ERROR: struct '{struct_name}' not found. "
786
+ f"Available: {', '.join(struct_names)}",
787
+ err=True,
788
+ )
789
+ raise typer.Exit(code=1)
790
+ state.struct_name = struct_name
791
+
792
+ if url is not None and not state.html:
793
+ try:
794
+ from ssc_codegen.repl import _fetch_html
795
+
796
+ state.html = _fetch_html(url)
797
+ except Exception as exc:
798
+ typer.echo(f"ERROR fetching URL: {exc}", err=True)
799
+ raise typer.Exit(code=1)
800
+
801
+ repl = Repl(state)
802
+ repl.cmdloop()
803
+
804
+
805
+ @app.command(
806
+ name="openapi-to-ssc",
807
+ help="Experimental converter swagger openapi 3.0 to kdl DSL",
808
+ )
664
809
  def openapi_to_ssc(
665
810
  spec: Annotated[
666
811
  Path,