table2rules 0.4.0__tar.gz → 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {table2rules-0.4.0/src/table2rules.egg-info → table2rules-0.4.1}/PKG-INFO +1 -1
  2. {table2rules-0.4.0 → table2rules-0.4.1}/pyproject.toml +1 -1
  3. {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules/grid_parser.py +5 -0
  4. {table2rules-0.4.0 → table2rules-0.4.1/src/table2rules.egg-info}/PKG-INFO +1 -1
  5. {table2rules-0.4.0 → table2rules-0.4.1}/tests/test_public_api.py +49 -0
  6. {table2rules-0.4.0 → table2rules-0.4.1}/LICENSE +0 -0
  7. {table2rules-0.4.0 → table2rules-0.4.1}/README.md +0 -0
  8. {table2rules-0.4.0 → table2rules-0.4.1}/setup.cfg +0 -0
  9. {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules/__init__.py +0 -0
  10. {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules/__main__.py +0 -0
  11. {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules/_core.py +0 -0
  12. {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules/cleanup.py +0 -0
  13. {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules/errors.py +0 -0
  14. {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules/exporters/__init__.py +0 -0
  15. {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules/exporters/base.py +0 -0
  16. {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules/exporters/rules.py +0 -0
  17. {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules/maze_pathfinder.py +0 -0
  18. {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules/models.py +0 -0
  19. {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules/py.typed +0 -0
  20. {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules/quality_gate.py +0 -0
  21. {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules/report.py +0 -0
  22. {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules/simple_repair.py +0 -0
  23. {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules/spans.py +0 -0
  24. {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules.egg-info/SOURCES.txt +0 -0
  25. {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules.egg-info/dependency_links.txt +0 -0
  26. {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules.egg-info/entry_points.txt +0 -0
  27. {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules.egg-info/requires.txt +0 -0
  28. {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules.egg-info/top_level.txt +0 -0
  29. {table2rules-0.4.0 → table2rules-0.4.1}/tests/test_correctness_oracle.py +0 -0
  30. {table2rules-0.4.0 → table2rules-0.4.1}/tests/test_determinism.py +0 -0
  31. {table2rules-0.4.0 → table2rules-0.4.1}/tests/test_regression_golds.py +0 -0
  32. {table2rules-0.4.0 → table2rules-0.4.1}/tests/test_robustness_mutations.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: table2rules
3
- Version: 0.4.0
3
+ Version: 0.4.1
4
4
  Summary: Convert HTML tables to flat, LLM-friendly rules using spatial pathfinding.
5
5
  Author: PebbleRoad Pte Ltd
6
6
  License-Expression: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "table2rules"
7
- version = "0.4.0"
7
+ version = "0.4.1"
8
8
  description = "Convert HTML tables to flat, LLM-friendly rules using spatial pathfinding."
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -78,6 +78,11 @@ def extract_cell_text(cell) -> str:
78
78
  if parent is None:
79
79
  continue
80
80
 
81
+ # Skip text content of <style> and <script> tags embedded in cells
82
+ # (Wikipedia injects inline <style> blocks for multi-column templates).
83
+ if isinstance(parent, Tag) and parent.name in ("style", "script"):
84
+ continue
85
+
81
86
  nearest_cell: Optional[Tag]
82
87
  if parent.name in ("td", "th"):
83
88
  nearest_cell = parent
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: table2rules
3
- Version: 0.4.0
3
+ Version: 0.4.1
4
4
  Summary: Convert HTML tables to flat, LLM-friendly rules using spatial pathfinding.
5
5
  Author: PebbleRoad Pte Ltd
6
6
  License-Expression: MIT
@@ -452,3 +452,52 @@ def test_reasons_by_severity_partitions_catalogue() -> None:
452
452
  def test_reasons_by_severity_has_expected_buckets() -> None:
453
453
  # Renaming a bucket is a breaking change — guard it.
454
454
  assert set(REASONS_BY_SEVERITY) == {"defensive", "confidence", "input"}
455
+
456
+
457
+ # --- Cell text extraction: inline <style> / <script> noise -----------------
458
+
459
+
460
+ def test_inline_style_tag_excluded_from_cell_text() -> None:
461
+ """Inline <style> blocks injected by Wikipedia templates must not appear
462
+ in emitted rule values. Regression for the CSS-noise silent failure."""
463
+ html = """
464
+ <table>
465
+ <thead>
466
+ <tr><th>District</th><th>Talukas</th></tr>
467
+ </thead>
468
+ <tbody>
469
+ <tr>
470
+ <th scope="row">Bagalkot</th>
471
+ <td><style>.div-col{column-width:30em}</style>Badami Bagalkot Bilagi</td>
472
+ </tr>
473
+ </tbody>
474
+ </table>
475
+ """
476
+ text, report = process_tables_with_stats(html, strict=False)
477
+
478
+ assert report.tables[0].render_mode == "rules"
479
+ assert ".div-col" not in text
480
+ assert "column-width" not in text
481
+ assert "Badami" in text
482
+
483
+
484
+ def test_inline_script_tag_excluded_from_cell_text() -> None:
485
+ """Inline <script> blocks must not bleed into emitted rule values."""
486
+ html = """
487
+ <table>
488
+ <thead>
489
+ <tr><th>Region</th><th>Population</th></tr>
490
+ </thead>
491
+ <tbody>
492
+ <tr>
493
+ <th scope="row">South</th>
494
+ <td><script>var x=1;</script>4,200,000</td>
495
+ </tr>
496
+ </tbody>
497
+ </table>
498
+ """
499
+ text, report = process_tables_with_stats(html, strict=False)
500
+
501
+ assert report.tables[0].render_mode == "rules"
502
+ assert "var x" not in text
503
+ assert "4,200,000" in text
File without changes
File without changes
File without changes