table2rules 0.4.0__tar.gz → 0.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {table2rules-0.4.0/src/table2rules.egg-info → table2rules-0.4.1}/PKG-INFO +1 -1
- {table2rules-0.4.0 → table2rules-0.4.1}/pyproject.toml +1 -1
- {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules/grid_parser.py +5 -0
- {table2rules-0.4.0 → table2rules-0.4.1/src/table2rules.egg-info}/PKG-INFO +1 -1
- {table2rules-0.4.0 → table2rules-0.4.1}/tests/test_public_api.py +49 -0
- {table2rules-0.4.0 → table2rules-0.4.1}/LICENSE +0 -0
- {table2rules-0.4.0 → table2rules-0.4.1}/README.md +0 -0
- {table2rules-0.4.0 → table2rules-0.4.1}/setup.cfg +0 -0
- {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules/__init__.py +0 -0
- {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules/__main__.py +0 -0
- {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules/_core.py +0 -0
- {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules/cleanup.py +0 -0
- {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules/errors.py +0 -0
- {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules/exporters/__init__.py +0 -0
- {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules/exporters/base.py +0 -0
- {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules/exporters/rules.py +0 -0
- {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules/maze_pathfinder.py +0 -0
- {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules/models.py +0 -0
- {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules/py.typed +0 -0
- {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules/quality_gate.py +0 -0
- {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules/report.py +0 -0
- {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules/simple_repair.py +0 -0
- {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules/spans.py +0 -0
- {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules.egg-info/SOURCES.txt +0 -0
- {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules.egg-info/dependency_links.txt +0 -0
- {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules.egg-info/entry_points.txt +0 -0
- {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules.egg-info/requires.txt +0 -0
- {table2rules-0.4.0 → table2rules-0.4.1}/src/table2rules.egg-info/top_level.txt +0 -0
- {table2rules-0.4.0 → table2rules-0.4.1}/tests/test_correctness_oracle.py +0 -0
- {table2rules-0.4.0 → table2rules-0.4.1}/tests/test_determinism.py +0 -0
- {table2rules-0.4.0 → table2rules-0.4.1}/tests/test_regression_golds.py +0 -0
- {table2rules-0.4.0 → table2rules-0.4.1}/tests/test_robustness_mutations.py +0 -0
|
@@ -78,6 +78,11 @@ def extract_cell_text(cell) -> str:
|
|
|
78
78
|
if parent is None:
|
|
79
79
|
continue
|
|
80
80
|
|
|
81
|
+
# Skip text content of <style> and <script> tags embedded in cells
|
|
82
|
+
# (Wikipedia injects inline <style> blocks for multi-column templates).
|
|
83
|
+
if isinstance(parent, Tag) and parent.name in ("style", "script"):
|
|
84
|
+
continue
|
|
85
|
+
|
|
81
86
|
nearest_cell: Optional[Tag]
|
|
82
87
|
if parent.name in ("td", "th"):
|
|
83
88
|
nearest_cell = parent
|
|
@@ -452,3 +452,52 @@ def test_reasons_by_severity_partitions_catalogue() -> None:
|
|
|
452
452
|
def test_reasons_by_severity_has_expected_buckets() -> None:
|
|
453
453
|
# Renaming a bucket is a breaking change — guard it.
|
|
454
454
|
assert set(REASONS_BY_SEVERITY) == {"defensive", "confidence", "input"}
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
# --- Cell text extraction: inline <style> / <script> noise -----------------
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
def test_inline_style_tag_excluded_from_cell_text() -> None:
|
|
461
|
+
"""Inline <style> blocks injected by Wikipedia templates must not appear
|
|
462
|
+
in emitted rule values. Regression for the CSS-noise silent failure."""
|
|
463
|
+
html = """
|
|
464
|
+
<table>
|
|
465
|
+
<thead>
|
|
466
|
+
<tr><th>District</th><th>Talukas</th></tr>
|
|
467
|
+
</thead>
|
|
468
|
+
<tbody>
|
|
469
|
+
<tr>
|
|
470
|
+
<th scope="row">Bagalkot</th>
|
|
471
|
+
<td><style>.div-col{column-width:30em}</style>Badami Bagalkot Bilagi</td>
|
|
472
|
+
</tr>
|
|
473
|
+
</tbody>
|
|
474
|
+
</table>
|
|
475
|
+
"""
|
|
476
|
+
text, report = process_tables_with_stats(html, strict=False)
|
|
477
|
+
|
|
478
|
+
assert report.tables[0].render_mode == "rules"
|
|
479
|
+
assert ".div-col" not in text
|
|
480
|
+
assert "column-width" not in text
|
|
481
|
+
assert "Badami" in text
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
def test_inline_script_tag_excluded_from_cell_text() -> None:
|
|
485
|
+
"""Inline <script> blocks must not bleed into emitted rule values."""
|
|
486
|
+
html = """
|
|
487
|
+
<table>
|
|
488
|
+
<thead>
|
|
489
|
+
<tr><th>Region</th><th>Population</th></tr>
|
|
490
|
+
</thead>
|
|
491
|
+
<tbody>
|
|
492
|
+
<tr>
|
|
493
|
+
<th scope="row">South</th>
|
|
494
|
+
<td><script>var x=1;</script>4,200,000</td>
|
|
495
|
+
</tr>
|
|
496
|
+
</tbody>
|
|
497
|
+
</table>
|
|
498
|
+
"""
|
|
499
|
+
text, report = process_tables_with_stats(html, strict=False)
|
|
500
|
+
|
|
501
|
+
assert report.tables[0].render_mode == "rules"
|
|
502
|
+
assert "var x" not in text
|
|
503
|
+
assert "4,200,000" in text
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|