techreg-parser 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. techreg_parser-0.1.0/LICENSE +21 -0
  2. techreg_parser-0.1.0/MANIFEST.in +9 -0
  3. techreg_parser-0.1.0/PKG-INFO +181 -0
  4. techreg_parser-0.1.0/README.md +148 -0
  5. techreg_parser-0.1.0/TechRegParser/__init__.py +48 -0
  6. techreg_parser-0.1.0/TechRegParser/agents/__init__.py +7 -0
  7. techreg_parser-0.1.0/TechRegParser/agents/orchestrator.py +1525 -0
  8. techreg_parser-0.1.0/TechRegParser/config.py +216 -0
  9. techreg_parser-0.1.0/TechRegParser/evaluation/__init__.py +5 -0
  10. techreg_parser-0.1.0/TechRegParser/evaluation/eval_framework.py +235 -0
  11. techreg_parser-0.1.0/TechRegParser/main.py +383 -0
  12. techreg_parser-0.1.0/TechRegParser/memory/__init__.py +5 -0
  13. techreg_parser-0.1.0/TechRegParser/memory/session_store.py +195 -0
  14. techreg_parser-0.1.0/TechRegParser/models/__init__.py +24 -0
  15. techreg_parser-0.1.0/TechRegParser/models/citation.py +60 -0
  16. techreg_parser-0.1.0/TechRegParser/models/requirement.py +103 -0
  17. techreg_parser-0.1.0/TechRegParser/models/statute_structure.py +395 -0
  18. techreg_parser-0.1.0/TechRegParser/skills/statute-guide/SKILL.md +33 -0
  19. techreg_parser-0.1.0/TechRegParser/skills/statute-guide/assets/images/A-Guide-to-Reading-Interpreting-and-Applying-Statutes-1_page1_img1.jpeg +0 -0
  20. techreg_parser-0.1.0/TechRegParser/skills/statute-guide/references/a_guide_to_reading_interpreting_and_applying_statutes_1.md +318 -0
  21. techreg_parser-0.1.0/TechRegParser/skills/statute-guide/references/extraction_playbook.md +54 -0
  22. techreg_parser-0.1.0/TechRegParser/skills/statute-guide/references/index.md +13 -0
  23. techreg_parser-0.1.0/TechRegParser/tools/__init__.py +11 -0
  24. techreg_parser-0.1.0/TechRegParser/tools/citation_verify.py +624 -0
  25. techreg_parser-0.1.0/TechRegParser/tools/definition_lookup.py +231 -0
  26. techreg_parser-0.1.0/pyproject.toml +76 -0
  27. techreg_parser-0.1.0/setup.cfg +4 -0
  28. techreg_parser-0.1.0/techreg_parser.egg-info/PKG-INFO +181 -0
  29. techreg_parser-0.1.0/techreg_parser.egg-info/SOURCES.txt +31 -0
  30. techreg_parser-0.1.0/techreg_parser.egg-info/dependency_links.txt +1 -0
  31. techreg_parser-0.1.0/techreg_parser.egg-info/entry_points.txt +2 -0
  32. techreg_parser-0.1.0/techreg_parser.egg-info/requires.txt +13 -0
  33. techreg_parser-0.1.0/techreg_parser.egg-info/top_level.txt +1 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 rafal-fryc
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,9 @@
1
+ include README.md
2
+ include LICENSE
3
+ recursive-include TechRegParser/skills **/*
4
+ recursive-exclude * __pycache__
5
+ recursive-exclude * *.pyc
6
+ exclude *.pdf
7
+ exclude *.json
8
+ exclude viewer.html
9
+ exclude nul
@@ -0,0 +1,181 @@
1
+ Metadata-Version: 2.4
2
+ Name: techreg-parser
3
+ Version: 0.1.0
4
+ Summary: Multi-agent system for extracting requirements from data privacy and tech regulation statutes
5
+ Author: rafal-fryc
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/rafal-fryc/TechRegParser
8
+ Project-URL: Documentation, https://github.com/rafal-fryc/TechRegParser#readme
9
+ Project-URL: Repository, https://github.com/rafal-fryc/TechRegParser
10
+ Keywords: statute,legal,privacy,compliance,ai,agent,tech-regulation
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Legal Industry
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: Text Processing :: General
18
+ Requires-Python: >=3.11
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Requires-Dist: claude-agent-sdk>=0.1.0
22
+ Requires-Dist: pydantic>=2.0.0
23
+ Provides-Extra: dev
24
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
25
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
26
+ Requires-Dist: black>=23.0.0; extra == "dev"
27
+ Requires-Dist: isort>=5.12.0; extra == "dev"
28
+ Requires-Dist: mypy>=1.0.0; extra == "dev"
29
+ Provides-Extra: pdf
30
+ Requires-Dist: pypdf>=3.0.0; extra == "pdf"
31
+ Requires-Dist: pdfplumber>=0.9.0; extra == "pdf"
32
+ Dynamic: license-file
33
+
34
+ # TechRegParser
35
+
36
+ A multi-agent system for extracting requirements from data privacy and tech regulation statutes using the Anthropic Agent SDK.
37
+
38
+ ## Features
39
+
40
+ - **Multi-agent architecture**: Specialized agents for different tasks:
41
+ - **Statute Reader**: Parses statute structure (definitions, applicability, rights, duties, exemptions, enforcement)
42
+ - **Section Analyzer**: Extracts specific requirements with exact citations
43
+ - **Citation Verifier**: Validates all citations against the original text
44
+ - **Requirement Classifier**: Categorizes requirements (disclosure, operational, technical, enforcement)
45
+
46
+ - **Model Configuration**:
47
+ - **Orchestrator**: Uses Opus for complex coordination
48
+ - **Subagents**: Use Sonnet for specialized tasks
49
+
50
+ - **Anti-hallucination measures**:
51
+ - Every requirement must have a direct quote from the statute
52
+ - Two-pass verification (extract then verify)
53
+ - Confidence scoring for citations
54
+ - Flagging of unverified requirements
55
+
56
+ - **Diagnostics and logging**:
57
+ - Phase 1 logs section/definition counts on success, or a clear warning on failure
58
+ - Parse failures log the raw agent response details for debugging
59
+
60
+ - **Statute interpretation skill**: Incorporates statutory interpretation guidance from legal experts
61
+
62
+ - **PDF Support**: Can parse both text files and PDFs (with pdfplumber or pypdf)
63
+
64
+ ## Installation
65
+
66
+ ```bash
67
+ # Install from PyPI
68
+ pip install techreg-parser
69
+
70
+ # With PDF support
71
+ pip install techreg-parser[pdf]
72
+
73
+ # Or install locally for development
74
+ pip install -e .
75
+ ```
76
+
77
+ ## Usage
78
+
79
+ ### Command Line
80
+
81
+ ```bash
82
+ # Analyze a statute and output JSON
83
+ techreg-parser path/to/statute.txt --output results.json
84
+
85
+ # Analyze a PDF statute
86
+ techreg-parser path/to/statute.pdf --output results.json
87
+
88
+ # Output markdown report
89
+ techreg-parser path/to/statute.txt --output analysis.md --format markdown
90
+
91
+ # Skip citation verification (faster but less reliable)
92
+ techreg-parser path/to/statute.txt --no-verify
93
+ ```
94
+
95
+ ### Python API
96
+
97
+ ```python
98
+ import asyncio
99
+ from TechRegParser import TechRegParserOrchestrator, OrchestratorConfig
100
+
101
+ async def main():
102
+ config = OrchestratorConfig(
103
+ verify_citations=True,
104
+ classify_requirements=True,
105
+ )
106
+
107
+ parser = TechRegParserOrchestrator(config=config)
108
+
109
+ result = await parser.analyze_statute(
110
+ statute_path="path/to/texas_privacy_law.txt",
111
+ output_format="json"
112
+ )
113
+
114
+ # Access results
115
+ for req in result.requirements:
116
+ print(f"Requirement: {req.description}")
117
+ print(f" Citation: {req.citation.section}")
118
+ print(f" Category: {req.category.value}")
119
+ print(f" Verified: {req.verified}")
120
+ print()
121
+
122
+ # Export to file
123
+ await parser.export_results(result, "output.json", format="json")
124
+
125
+ asyncio.run(main())
126
+ ```
127
+
128
+ ## Architecture
129
+
130
+ ```
131
+ +-------------------+
132
+ | Orchestrator |
133
+ | (Opus Model) |
134
+ +--------+----------+
135
+ |
136
+ +--------------------+--------------------+
137
+ | | | |
138
+ +-------v----+ +----v-----+ +-----v------+ +-----v------+
139
+ | Statute | | Section | | Citation | |Requirement |
140
+ | Reader | | Analyzer | | Verifier | | Classifier |
141
+ | (Sonnet) | | (Sonnet) | | (Python) | | (Sonnet) |
142
+ +------------+ +----------+ +------------+ +------------+
143
+ ```
144
+
145
+ ## Requirement Categories
146
+
147
+ - **DISCLOSURE**: Must be stated in privacy policy/notice
148
+ - **OPERATIONAL**: Internal compliance processes (response times, procedures)
149
+ - **TECHNICAL**: System/UI implementation (GPC signals, security measures, link placement, UI elements)
150
+ - **LEGAL FRAMEWORK**: Enforcement mechanisms, penalties, AG authority, cure periods
151
+
152
+ ## Output
153
+
154
+ The analysis produces:
155
+ - **Requirements**: List of all extracted requirements with citations
156
+ - **Definitions**: All defined terms from the statute
157
+ - **Structure**: Full statute section tree (IDs, types, titles, line ranges) — included by default in JSON export for the viewer's Structure tab
158
+ - **Verification**: Status of citation verification
159
+ - **Classification**: Category for each requirement
160
+
161
+ ## Key Principles
162
+
163
+ Based on lessons from analyzing tech regulation statutes:
164
+
165
+ 1. Start with definitions sections to anchor interpretation — defined terms control meaning throughout
166
+ 2. Separate disclosure requirements from operational and technical requirements
167
+ 3. Tech regulation statutes follow predictable architecture (definitions, scope, rights, duties, exemptions, enforcement)
168
+ 4. Obligations and defined terms vary across jurisdictions and regulatory domains — never assume uniformity
169
+ 5. Work section by section, not requirement by requirement — structure drives accurate extraction
170
+ 6. Every extracted requirement must trace back to a specific statutory provision with a verbatim quote
171
+
172
+ ## Requirements
173
+
174
+ - Python 3.11+
175
+ - Anthropic Agent SDK (`claude-agent-sdk`)
176
+ - Pydantic 2.0+
177
+ - Optional: pdfplumber or pypdf for PDF support
178
+
179
+ ## License
180
+
181
+ MIT
@@ -0,0 +1,148 @@
1
+ # TechRegParser
2
+
3
+ A multi-agent system for extracting requirements from data privacy and tech regulation statutes using the Anthropic Agent SDK.
4
+
5
+ ## Features
6
+
7
+ - **Multi-agent architecture**: Specialized agents for different tasks:
8
+ - **Statute Reader**: Parses statute structure (definitions, applicability, rights, duties, exemptions, enforcement)
9
+ - **Section Analyzer**: Extracts specific requirements with exact citations
10
+ - **Citation Verifier**: Validates all citations against the original text
11
+ - **Requirement Classifier**: Categorizes requirements (disclosure, operational, technical, enforcement)
12
+
13
+ - **Model Configuration**:
14
+ - **Orchestrator**: Uses Opus for complex coordination
15
+ - **Subagents**: Use Sonnet for specialized tasks
16
+
17
+ - **Anti-hallucination measures**:
18
+ - Every requirement must have a direct quote from the statute
19
+ - Two-pass verification (extract then verify)
20
+ - Confidence scoring for citations
21
+ - Flagging of unverified requirements
22
+
23
+ - **Diagnostics and logging**:
24
+ - Phase 1 logs section/definition counts on success, or a clear warning on failure
25
+ - Parse failures log the raw agent response details for debugging
26
+
27
+ - **Statute interpretation skill**: Incorporates statutory interpretation guidance from legal experts
28
+
29
+ - **PDF Support**: Can parse both text files and PDFs (with pdfplumber or pypdf)
30
+
31
+ ## Installation
32
+
33
+ ```bash
34
+ # Install from PyPI
35
+ pip install techreg-parser
36
+
37
+ # With PDF support
38
+ pip install techreg-parser[pdf]
39
+
40
+ # Or install locally for development
41
+ pip install -e .
42
+ ```
43
+
44
+ ## Usage
45
+
46
+ ### Command Line
47
+
48
+ ```bash
49
+ # Analyze a statute and output JSON
50
+ techreg-parser path/to/statute.txt --output results.json
51
+
52
+ # Analyze a PDF statute
53
+ techreg-parser path/to/statute.pdf --output results.json
54
+
55
+ # Output markdown report
56
+ techreg-parser path/to/statute.txt --output analysis.md --format markdown
57
+
58
+ # Skip citation verification (faster but less reliable)
59
+ techreg-parser path/to/statute.txt --no-verify
60
+ ```
61
+
62
+ ### Python API
63
+
64
+ ```python
65
+ import asyncio
66
+ from TechRegParser import TechRegParserOrchestrator, OrchestratorConfig
67
+
68
+ async def main():
69
+ config = OrchestratorConfig(
70
+ verify_citations=True,
71
+ classify_requirements=True,
72
+ )
73
+
74
+ parser = TechRegParserOrchestrator(config=config)
75
+
76
+ result = await parser.analyze_statute(
77
+ statute_path="path/to/texas_privacy_law.txt",
78
+ output_format="json"
79
+ )
80
+
81
+ # Access results
82
+ for req in result.requirements:
83
+ print(f"Requirement: {req.description}")
84
+ print(f" Citation: {req.citation.section}")
85
+ print(f" Category: {req.category.value}")
86
+ print(f" Verified: {req.verified}")
87
+ print()
88
+
89
+ # Export to file
90
+ await parser.export_results(result, "output.json", format="json")
91
+
92
+ asyncio.run(main())
93
+ ```
94
+
95
+ ## Architecture
96
+
97
+ ```
98
+ +-------------------+
99
+ | Orchestrator |
100
+ | (Opus Model) |
101
+ +--------+----------+
102
+ |
103
+ +--------------------+--------------------+
104
+ | | | |
105
+ +-------v----+ +----v-----+ +-----v------+ +-----v------+
106
+ | Statute | | Section | | Citation | |Requirement |
107
+ | Reader | | Analyzer | | Verifier | | Classifier |
108
+ | (Sonnet) | | (Sonnet) | | (Python) | | (Sonnet) |
109
+ +------------+ +----------+ +------------+ +------------+
110
+ ```
111
+
112
+ ## Requirement Categories
113
+
114
+ - **DISCLOSURE**: Must be stated in privacy policy/notice
115
+ - **OPERATIONAL**: Internal compliance processes (response times, procedures)
116
+ - **TECHNICAL**: System/UI implementation (GPC signals, security measures, link placement, UI elements)
117
+ - **LEGAL FRAMEWORK**: Enforcement mechanisms, penalties, AG authority, cure periods
118
+
119
+ ## Output
120
+
121
+ The analysis produces:
122
+ - **Requirements**: List of all extracted requirements with citations
123
+ - **Definitions**: All defined terms from the statute
124
+ - **Structure**: Full statute section tree (IDs, types, titles, line ranges) — included by default in JSON export for the viewer's Structure tab
125
+ - **Verification**: Status of citation verification
126
+ - **Classification**: Category for each requirement
127
+
128
+ ## Key Principles
129
+
130
+ Based on lessons from analyzing tech regulation statutes:
131
+
132
+ 1. Start with definitions sections to anchor interpretation — defined terms control meaning throughout
133
+ 2. Separate disclosure requirements from operational and technical requirements
134
+ 3. Tech regulation statutes follow predictable architecture (definitions, scope, rights, duties, exemptions, enforcement)
135
+ 4. Obligations and defined terms vary across jurisdictions and regulatory domains — never assume uniformity
136
+ 5. Work section by section, not requirement by requirement — structure drives accurate extraction
137
+ 6. Every extracted requirement must trace back to a specific statutory provision with a verbatim quote
138
+
139
+ ## Requirements
140
+
141
+ - Python 3.11+
142
+ - Anthropic Agent SDK (`claude-agent-sdk`)
143
+ - Pydantic 2.0+
144
+ - Optional: pdfplumber or pypdf for PDF support
145
+
146
+ ## License
147
+
148
+ MIT
@@ -0,0 +1,48 @@
1
+ """TechRegParser - Multi-agent system for extracting requirements from tech regulation statutes.
2
+
3
+ This package provides a multi-agent system using the Anthropic Agent SDK
4
+ to read data privacy and tech regulation statutes, extract significant
5
+ requirements, and provide verified citations to prevent hallucinations.
6
+
7
+ Example usage:
8
+ from TechRegParser import TechRegParserOrchestrator
9
+
10
+ async def main():
11
+ parser = TechRegParserOrchestrator()
12
+ results = await parser.analyze_statute("path/to/statute.txt")
13
+
14
+ for req in results.requirements:
15
+ print(f"{req.description}")
16
+ print(f" Citation: {req.citation.section}")
17
+ print(f" Verified: {req.verified}")
18
+ """
19
+
20
+ from .agents import TechRegParserOrchestrator
21
+ from .models import (
22
+ AnalysisResult,
23
+ Citation,
24
+ Definition,
25
+ LegislativeIntent,
26
+ Requirement,
27
+ RequirementCategory,
28
+ StatuteStructure,
29
+ StatuteSection,
30
+ SectionType,
31
+ )
32
+ from .config import OrchestratorConfig
33
+
34
+ __version__ = "0.1.0"
35
+
36
+ __all__ = [
37
+ "TechRegParserOrchestrator",
38
+ "OrchestratorConfig",
39
+ "AnalysisResult",
40
+ "Citation",
41
+ "Definition",
42
+ "LegislativeIntent",
43
+ "Requirement",
44
+ "RequirementCategory",
45
+ "StatuteStructure",
46
+ "StatuteSection",
47
+ "SectionType",
48
+ ]
@@ -0,0 +1,7 @@
1
+ """Agent definitions for the TechRegParser system."""
2
+
3
+ from .orchestrator import TechRegParserOrchestrator
4
+
5
+ __all__ = [
6
+ "TechRegParserOrchestrator",
7
+ ]