usdm4-protocol 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- usdm4_protocol/__info__.py +3 -0
- usdm4_protocol/__init__.py +104 -0
- usdm4_protocol/common/__init__.py +1 -0
- usdm4_protocol/common/ai/__init__.py +1 -0
- usdm4_protocol/common/ai/base_ai.py +53 -0
- usdm4_protocol/common/ai/claude_provider.py +175 -0
- usdm4_protocol/common/ai/fallback_provider.py +34 -0
- usdm4_protocol/common/assemble/__init__.py +1 -0
- usdm4_protocol/common/assemble/assemble_usdm.py +52 -0
- usdm4_protocol/common/base_import.py +60 -0
- usdm4_protocol/common/extract/__init__.py +1 -0
- usdm4_protocol/common/extract/combined_row_classifier.py +254 -0
- usdm4_protocol/common/extract/content_extractor.py +67 -0
- usdm4_protocol/common/extract/ie_extractor.py +328 -0
- usdm4_protocol/common/extract/section_finder.py +49 -0
- usdm4_protocol/common/extract/soa_row_classifier.py +201 -0
- usdm4_protocol/common/extract/utility.py +155 -0
- usdm4_protocol/common/html/__init__.py +1 -0
- usdm4_protocol/common/html/clean_html.py +1 -0
- usdm4_protocol/common/html/expand_table.py +133 -0
- usdm4_protocol/common/html/soup_utils.py +35 -0
- usdm4_protocol/common/load/__init__.py +18 -0
- usdm4_protocol/cpt/__init__.py +66 -0
- usdm4_protocol/cpt/import_/__init__.py +0 -0
- usdm4_protocol/cpt/import_/cpt_import.py +53 -0
- usdm4_protocol/cpt/import_/extract/__init__.py +55 -0
- usdm4_protocol/cpt/import_/extract/lab_tests.py +397 -0
- usdm4_protocol/cpt/import_/extract/title_page.py +197 -0
- usdm4_protocol/cpt/import_/load/__init__.py +2 -0
- usdm4_protocol/cpt/views/__init__.py +0 -0
- usdm4_protocol/cpt/views/document_view.py +49 -0
- usdm4_protocol/legacy/__init__.py +41 -0
- usdm4_protocol/legacy/import_/__init__.py +0 -0
- usdm4_protocol/legacy/import_/extract/__init__.py +132 -0
- usdm4_protocol/legacy/import_/extract/inclusion_exclusion.py +182 -0
- usdm4_protocol/legacy/import_/extract/schedule_of_activities.py +277 -0
- usdm4_protocol/legacy/import_/extract/title_page.py +87 -0
- usdm4_protocol/legacy/import_/legacy_import.py +54 -0
- usdm4_protocol/legacy/import_/load/__init__.py +19 -0
- usdm4_protocol/legacy/import_/load/clean_html.py +216 -0
- usdm4_protocol/legacy/import_/load/split_html.py +307 -0
- usdm4_protocol/legacy/import_/load/to_html.py +54 -0
- usdm4_protocol/legacy/import_/load/to_html_base.py +30 -0
- usdm4_protocol/legacy/import_/load/to_html_docling.py +49 -0
- usdm4_protocol/legacy/import_/load/to_html_pymupdf.py +203 -0
- usdm4_protocol/legacy/views/__init__.py +0 -0
- usdm4_protocol/m11/__init__.py +101 -0
- usdm4_protocol/m11/data/mapping/title_page_mapping.yaml +1488 -0
- usdm4_protocol/m11/data/specification/elements/title_page_elements.yaml +3153 -0
- usdm4_protocol/m11/data/specification/sections.yaml +10 -0
- usdm4_protocol/m11/data/specification/templates/title_page_template.html +260 -0
- usdm4_protocol/m11/elements/__init__.py +3 -0
- usdm4_protocol/m11/elements/elements.py +479 -0
- usdm4_protocol/m11/export/__init__.py +0 -0
- usdm4_protocol/m11/export/m11_export.py +88 -0
- usdm4_protocol/m11/import_/__init__.py +0 -0
- usdm4_protocol/m11/import_/extract/__init__.py +46 -0
- usdm4_protocol/m11/import_/extract/amendments.py +381 -0
- usdm4_protocol/m11/import_/extract/document.py +86 -0
- usdm4_protocol/m11/import_/extract/inclusion_exclusion.py +155 -0
- usdm4_protocol/m11/import_/extract/title_page.py +708 -0
- usdm4_protocol/m11/import_/extract/utility.py +10 -0
- usdm4_protocol/m11/import_/load/__init__.py +2 -0
- usdm4_protocol/m11/import_/m11_import.py +20 -0
- usdm4_protocol/m11/specification/__init__.py +46 -0
- usdm4_protocol/m11/specification/files.py +30 -0
- usdm4_protocol/m11/specification/section.py +54 -0
- usdm4_protocol/m11/specification/sections.py +24 -0
- usdm4_protocol/m11/views/__init__.py +0 -0
- usdm4_protocol/m11/views/data_view.py +37 -0
- usdm4_protocol/m11/views/document_view.py +20 -0
- usdm4_protocol/soa/__init__.py +1 -0
- usdm4_protocol/soa/decode_soa.py +197 -0
- usdm4_protocol/soa/features/__init__.py +1 -0
- usdm4_protocol/soa/features/activities.py +167 -0
- usdm4_protocol/soa/features/activity_row.py +80 -0
- usdm4_protocol/soa/features/conditions.py +49 -0
- usdm4_protocol/soa/features/epochs.py +241 -0
- usdm4_protocol/soa/features/notes.py +171 -0
- usdm4_protocol/soa/features/row_classifier.py +318 -0
- usdm4_protocol/soa/features/timepoints.py +475 -0
- usdm4_protocol/soa/features/utility.py +47 -0
- usdm4_protocol/soa/features/visits.py +222 -0
- usdm4_protocol/soa/features/windows.py +293 -0
- usdm4_protocol/soa/soa_extractor.py +103 -0
- usdm4_protocol/soa/soa_model.py +236 -0
- usdm4_protocol-0.1.0.dist-info/METADATA +286 -0
- usdm4_protocol-0.1.0.dist-info/RECORD +91 -0
- usdm4_protocol-0.1.0.dist-info/WHEEL +5 -0
- usdm4_protocol-0.1.0.dist-info/licenses/LICENSE +661 -0
- usdm4_protocol-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
from usdm4_protocol.__info__ import (
|
|
2
|
+
__package_version__ as __package_version__,
|
|
3
|
+
__model_version__ as __model_version__,
|
|
4
|
+
__system_name__ as __system_name__,
|
|
5
|
+
)
|
|
6
|
+
from usdm4_protocol.m11 import USDM4M11
|
|
7
|
+
from usdm4_protocol.cpt import USDM4CPT
|
|
8
|
+
from usdm4_protocol.legacy import USDM4Legacy
|
|
9
|
+
from simple_error_log.errors import Errors
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class USDM4Protocol:
|
|
13
|
+
"""Unified entry point for importing clinical trial protocols into USDM4.
|
|
14
|
+
|
|
15
|
+
Supports three protocol formats:
|
|
16
|
+
- M11 (ICH M11 template, DOCX)
|
|
17
|
+
- CPT (TransCelerate Common Protocol Template, DOCX)
|
|
18
|
+
- Legacy (unknown sponsor format, PDF)
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self):
|
|
22
|
+
self._errors = Errors()
|
|
23
|
+
self._handler = None
|
|
24
|
+
|
|
25
|
+
def from_m11(self, filepath: str, use_ai: bool = False):
|
|
26
|
+
"""Import an M11-formatted protocol (DOCX)."""
|
|
27
|
+
self._handler = USDM4M11()
|
|
28
|
+
return self._handler.from_docx(filepath, use_ai=use_ai)
|
|
29
|
+
|
|
30
|
+
def from_cpt(self, filepath: str):
|
|
31
|
+
"""Import a CPT-formatted protocol (DOCX)."""
|
|
32
|
+
self._handler = USDM4CPT()
|
|
33
|
+
return self._handler.from_docx(filepath)
|
|
34
|
+
|
|
35
|
+
def from_pdf(self, filepath: str, pdf_converter: str = "auto"):
|
|
36
|
+
"""Import a legacy PDF protocol.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
filepath: Path to the PDF file.
|
|
40
|
+
pdf_converter: Which converter to use.
|
|
41
|
+
- "auto": Use docling if available, otherwise pymupdf.
|
|
42
|
+
- "docling": Use docling (pip install usdm4_protocol[pdf-docling]).
|
|
43
|
+
- "pymupdf": Use pymupdf (pip install usdm4_protocol[pdf]).
|
|
44
|
+
"""
|
|
45
|
+
self._handler = USDM4Legacy()
|
|
46
|
+
return self._handler.from_pdf(filepath, pdf_converter=pdf_converter)
|
|
47
|
+
|
|
48
|
+
def from_file(self, filepath: str, use_ai: bool = False):
|
|
49
|
+
"""Import a protocol, detecting format from the file extension and content.
|
|
50
|
+
|
|
51
|
+
For .pdf files, uses the Legacy handler.
|
|
52
|
+
For .docx files, attempts M11 first (if use_ai is requested or M11 markers
|
|
53
|
+
are detected), otherwise falls back to CPT.
|
|
54
|
+
"""
|
|
55
|
+
lower = filepath.lower()
|
|
56
|
+
if lower.endswith(".pdf"):
|
|
57
|
+
return self.from_pdf(filepath, pdf_converter="auto")
|
|
58
|
+
elif lower.endswith(".docx"):
|
|
59
|
+
if use_ai:
|
|
60
|
+
return self.from_m11(filepath, use_ai=True)
|
|
61
|
+
return self.from_cpt(filepath)
|
|
62
|
+
else:
|
|
63
|
+
self._errors.error(f"Unsupported file format: {filepath}")
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
def to_html(self, file_path: str, template: str = "M11") -> str | None:
|
|
67
|
+
"""Export USDM4 data to HTML.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
file_path: Path to USDM4 JSON file.
|
|
71
|
+
template: Template to use ("M11" or "CPT").
|
|
72
|
+
"""
|
|
73
|
+
if template.upper() == "M11":
|
|
74
|
+
handler = USDM4M11()
|
|
75
|
+
return handler.to_html(file_path)
|
|
76
|
+
elif template.upper() == "CPT":
|
|
77
|
+
handler = USDM4CPT()
|
|
78
|
+
return handler.to_html(file_path)
|
|
79
|
+
else:
|
|
80
|
+
self._errors.error(f"Unsupported template: {template}")
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
def data_views(self, file_path: str) -> dict:
|
|
84
|
+
"""Generate data views from USDM4 data (M11 format)."""
|
|
85
|
+
handler = USDM4M11()
|
|
86
|
+
return handler.data_views(file_path)
|
|
87
|
+
|
|
88
|
+
@property
|
|
89
|
+
def source(self) -> dict:
|
|
90
|
+
if self._handler:
|
|
91
|
+
return self._handler.source
|
|
92
|
+
return {}
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
def source_no_sections(self) -> dict:
|
|
96
|
+
if self._handler:
|
|
97
|
+
return self._handler.source_no_sections
|
|
98
|
+
return {}
|
|
99
|
+
|
|
100
|
+
@property
|
|
101
|
+
def errors(self):
|
|
102
|
+
if self._handler:
|
|
103
|
+
return self._handler.errors
|
|
104
|
+
return self._errors
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Common utilities for all packages
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# AI providers
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class BaseAIProvider(ABC):
|
|
5
|
+
"""Abstract base class for AI providers."""
|
|
6
|
+
|
|
7
|
+
@property
|
|
8
|
+
@abstractmethod
|
|
9
|
+
def available(self) -> bool:
|
|
10
|
+
"""Check if the AI provider is available."""
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
@abstractmethod
|
|
14
|
+
def prompt(self, text: str, system: str = "") -> str:
|
|
15
|
+
"""
|
|
16
|
+
Send a prompt to the AI model and get a response.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
text: The prompt text
|
|
20
|
+
system: Optional system message for context
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
The response text from the AI model, or None if unavailable
|
|
24
|
+
"""
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
@abstractmethod
|
|
28
|
+
def streaming_prompt(self, text: str, system_message: str = "") -> str:
|
|
29
|
+
"""
|
|
30
|
+
Send a prompt to the AI model with streaming response.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
text: The prompt text
|
|
34
|
+
system_message: Optional system message for context
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
The full response text from the AI model, or None if unavailable
|
|
38
|
+
"""
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
@abstractmethod
|
|
42
|
+
def extract_json(self, text: str, dict: bool = True) -> dict | list | None:
|
|
43
|
+
"""
|
|
44
|
+
Extract JSON from AI response text.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
text: The response text containing JSON
|
|
48
|
+
dict: If True, extract dictionary; if False, extract list
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
Parsed JSON object/list, or None if extraction failed
|
|
52
|
+
"""
|
|
53
|
+
pass
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from simple_error_log.errors import Errors
|
|
3
|
+
from simple_error_log.error_location import KlassMethodLocation
|
|
4
|
+
from usdm4_protocol.common.ai.base_ai import BaseAIProvider
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
from anthropic import Anthropic
|
|
8
|
+
from d4k_ms_base.service_environment import ServiceEnvironment
|
|
9
|
+
|
|
10
|
+
ANTHROPIC_AVAILABLE = True
|
|
11
|
+
except ImportError:
|
|
12
|
+
ANTHROPIC_AVAILABLE = False
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ClaudeProvider(BaseAIProvider):
|
|
16
|
+
"""Claude AI provider using Anthropic API."""
|
|
17
|
+
|
|
18
|
+
MODULE = "usdm4_protocol.common.ai.claude_provider.ClaudeProvider"
|
|
19
|
+
DEFAULT_MODEL = "claude-haiku-4-5-20251001"
|
|
20
|
+
MODEL_PRICING = {DEFAULT_MODEL: {"input": 15.0, "output": 75.0}}
|
|
21
|
+
|
|
22
|
+
def __init__(self, errors: Errors, model: str = None):
|
|
23
|
+
"""
|
|
24
|
+
Initialize the Claude provider.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
errors: Error logging object
|
|
28
|
+
model: Model name (defaults to haiku)
|
|
29
|
+
"""
|
|
30
|
+
self._errors = errors
|
|
31
|
+
self._model = model or self.DEFAULT_MODEL
|
|
32
|
+
self._client = None
|
|
33
|
+
self._available = False
|
|
34
|
+
|
|
35
|
+
if not ANTHROPIC_AVAILABLE:
|
|
36
|
+
errors.warning(
|
|
37
|
+
"Anthropic package not installed. Install with 'pip install anthropic d4k_ms_base'",
|
|
38
|
+
KlassMethodLocation(self.MODULE, "__init__"),
|
|
39
|
+
)
|
|
40
|
+
return
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
api_key = ServiceEnvironment().get("ANTHROPIC_API_KEY")
|
|
44
|
+
if not api_key:
|
|
45
|
+
errors.error(
|
|
46
|
+
"Anthropic API key environment variable is not set",
|
|
47
|
+
KlassMethodLocation(self.MODULE, "__init__"),
|
|
48
|
+
)
|
|
49
|
+
else:
|
|
50
|
+
self._client = Anthropic(api_key=api_key)
|
|
51
|
+
self._available = True
|
|
52
|
+
except Exception as e:
|
|
53
|
+
errors.exception(
|
|
54
|
+
"Failed to initialize Anthropic client",
|
|
55
|
+
e,
|
|
56
|
+
KlassMethodLocation(self.MODULE, "__init__"),
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
def available(self) -> bool:
|
|
61
|
+
"""Check if the Claude provider is available."""
|
|
62
|
+
return self._available
|
|
63
|
+
|
|
64
|
+
def prompt(self, text: str, system: str = "") -> str:
|
|
65
|
+
"""Send a prompt to Claude and get a response."""
|
|
66
|
+
if not self._client:
|
|
67
|
+
self._errors.error(
|
|
68
|
+
"No client object found",
|
|
69
|
+
KlassMethodLocation(self.MODULE, "prompt"),
|
|
70
|
+
)
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
try:
|
|
74
|
+
message = self._client.messages.create(
|
|
75
|
+
max_tokens=1024,
|
|
76
|
+
system=system if system else "",
|
|
77
|
+
messages=[
|
|
78
|
+
{
|
|
79
|
+
"role": "user",
|
|
80
|
+
"content": text,
|
|
81
|
+
}
|
|
82
|
+
],
|
|
83
|
+
model=self._model,
|
|
84
|
+
)
|
|
85
|
+
return message.content[0].text
|
|
86
|
+
except Exception as e:
|
|
87
|
+
self._errors.exception(
|
|
88
|
+
"Error executing prompt",
|
|
89
|
+
e,
|
|
90
|
+
KlassMethodLocation(self.MODULE, "prompt"),
|
|
91
|
+
)
|
|
92
|
+
return None
|
|
93
|
+
|
|
94
|
+
def streaming_prompt(self, text: str, system_message: str = "") -> str:
|
|
95
|
+
"""Send a prompt to Claude with streaming response."""
|
|
96
|
+
if not self._client:
|
|
97
|
+
self._errors.error(
|
|
98
|
+
"No client object found",
|
|
99
|
+
KlassMethodLocation(self.MODULE, "streaming_prompt"),
|
|
100
|
+
)
|
|
101
|
+
return None
|
|
102
|
+
|
|
103
|
+
try:
|
|
104
|
+
with self._client.messages.stream(
|
|
105
|
+
model=self._model,
|
|
106
|
+
max_tokens=16384,
|
|
107
|
+
temperature=0,
|
|
108
|
+
system=system_message if system_message else "",
|
|
109
|
+
messages=[
|
|
110
|
+
{
|
|
111
|
+
"role": "user",
|
|
112
|
+
"content": text,
|
|
113
|
+
}
|
|
114
|
+
],
|
|
115
|
+
) as stream:
|
|
116
|
+
return self._streaming_response(stream)
|
|
117
|
+
except Exception as e:
|
|
118
|
+
self._errors.exception(
|
|
119
|
+
"Error executing streaming prompt",
|
|
120
|
+
e,
|
|
121
|
+
KlassMethodLocation(self.MODULE, "streaming_prompt"),
|
|
122
|
+
)
|
|
123
|
+
return None
|
|
124
|
+
|
|
125
|
+
def extract_json(self, text: str, dict: bool = True) -> dict | list | None:
|
|
126
|
+
"""Extract JSON from response text."""
|
|
127
|
+
if not text:
|
|
128
|
+
self._errors.error(
|
|
129
|
+
"Error decoding Claude response - empty text",
|
|
130
|
+
KlassMethodLocation(self.MODULE, "extract_json"),
|
|
131
|
+
)
|
|
132
|
+
return None
|
|
133
|
+
|
|
134
|
+
try:
|
|
135
|
+
result = text.replace("\n", "")
|
|
136
|
+
if dict:
|
|
137
|
+
s_index = result.find("{")
|
|
138
|
+
e_index = result.rfind("}")
|
|
139
|
+
else:
|
|
140
|
+
s_index = result.find("[")
|
|
141
|
+
e_index = result.rfind("]")
|
|
142
|
+
|
|
143
|
+
if s_index >= 0 and e_index >= 0 and e_index > s_index:
|
|
144
|
+
result = result[s_index : e_index + 1]
|
|
145
|
+
return json.loads(result)
|
|
146
|
+
else:
|
|
147
|
+
self._errors.error(
|
|
148
|
+
"Error decoding Claude response - no JSON found",
|
|
149
|
+
KlassMethodLocation(self.MODULE, "extract_json"),
|
|
150
|
+
)
|
|
151
|
+
return None
|
|
152
|
+
except Exception as e:
|
|
153
|
+
self._errors.exception(
|
|
154
|
+
"Error decoding Claude JSON",
|
|
155
|
+
e,
|
|
156
|
+
KlassMethodLocation(self.MODULE, "extract_json"),
|
|
157
|
+
)
|
|
158
|
+
return None
|
|
159
|
+
|
|
160
|
+
def _streaming_response(self, stream) -> str:
|
|
161
|
+
"""Process streamed response from Claude."""
|
|
162
|
+
full_response = ""
|
|
163
|
+
try:
|
|
164
|
+
for chunk in stream:
|
|
165
|
+
if hasattr(chunk, "delta") and hasattr(chunk.delta, "text"):
|
|
166
|
+
content = chunk.delta.text
|
|
167
|
+
full_response += content
|
|
168
|
+
except Exception as e:
|
|
169
|
+
self._errors.exception(
|
|
170
|
+
"Error decoding Claude stream",
|
|
171
|
+
e,
|
|
172
|
+
KlassMethodLocation(self.MODULE, "_streaming_response"),
|
|
173
|
+
)
|
|
174
|
+
return None
|
|
175
|
+
return full_response
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from simple_error_log.errors import Errors
|
|
2
|
+
from usdm4_protocol.common.ai.base_ai import BaseAIProvider
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class FallbackProvider(BaseAIProvider):
|
|
6
|
+
"""Fallback provider that returns None for all operations."""
|
|
7
|
+
|
|
8
|
+
MODULE = "usdm4_protocol.common.ai.fallback_provider.FallbackProvider"
|
|
9
|
+
|
|
10
|
+
def __init__(self, errors: Errors):
|
|
11
|
+
"""
|
|
12
|
+
Initialize the fallback provider.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
errors: Error logging object
|
|
16
|
+
"""
|
|
17
|
+
self._errors = errors
|
|
18
|
+
|
|
19
|
+
@property
|
|
20
|
+
def available(self) -> bool:
|
|
21
|
+
"""Fallback provider is never available."""
|
|
22
|
+
return False
|
|
23
|
+
|
|
24
|
+
def prompt(self, text: str, system: str = "") -> None:
|
|
25
|
+
"""Fallback always returns None."""
|
|
26
|
+
return None
|
|
27
|
+
|
|
28
|
+
def streaming_prompt(self, text: str, system_message: str = "") -> None:
|
|
29
|
+
"""Fallback always returns None."""
|
|
30
|
+
return None
|
|
31
|
+
|
|
32
|
+
def extract_json(self, text: str, dict: bool = True) -> None:
|
|
33
|
+
"""Fallback always returns None."""
|
|
34
|
+
return None
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Assembly utilities
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from usdm4 import USDM4
|
|
2
|
+
from simple_error_log.errors import Errors
|
|
3
|
+
from simple_error_log.error_location import KlassMethodLocation
|
|
4
|
+
from usdm4.assembler.assembler import Assembler
|
|
5
|
+
from usdm4.api.wrapper import Wrapper
|
|
6
|
+
from usdm4_protocol.__info__ import (
|
|
7
|
+
__package_version__ as system_version,
|
|
8
|
+
__system_name__ as system_name,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class AssembleUSDM:
|
|
13
|
+
"""Unified USDM assembler for all source formats."""
|
|
14
|
+
|
|
15
|
+
MODULE = "usdm4_protocol.common.assemble.assemble_usdm.AssembleUSDM"
|
|
16
|
+
|
|
17
|
+
def __init__(self, source_data: dict, errors: Errors):
|
|
18
|
+
"""
|
|
19
|
+
Initialize the USDM assembler.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
source_data: Source data dictionary to be assembled into USDM4
|
|
23
|
+
errors: Error logging object
|
|
24
|
+
"""
|
|
25
|
+
self._source_data = source_data
|
|
26
|
+
self._errors = errors
|
|
27
|
+
self._usdm4 = USDM4()
|
|
28
|
+
self._assembler: Assembler = self._usdm4.assembler(self._errors)
|
|
29
|
+
|
|
30
|
+
def process(self) -> Wrapper:
|
|
31
|
+
"""
|
|
32
|
+
Process the source data and assemble into USDM4.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
USDM4 wrapper object or empty dict if assembly fails
|
|
36
|
+
"""
|
|
37
|
+
try:
|
|
38
|
+
self._assembler.execute(self._source_data)
|
|
39
|
+
return self._assembler.wrapper(system_name, system_version)
|
|
40
|
+
except Exception as e:
|
|
41
|
+
location = KlassMethodLocation(self.MODULE, "process")
|
|
42
|
+
self._errors.exception(
|
|
43
|
+
"Exception raised assembling USDM",
|
|
44
|
+
e,
|
|
45
|
+
location,
|
|
46
|
+
)
|
|
47
|
+
return {}
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def source(self):
|
|
51
|
+
"""Get the source data."""
|
|
52
|
+
return self._source_data
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from simple_error_log.errors import Errors
|
|
4
|
+
from simple_error_log.error_location import KlassMethodLocation
|
|
5
|
+
from usdm4_protocol.common.assemble.assemble_usdm import AssembleUSDM
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class BaseImport(ABC):
|
|
9
|
+
"""Base class for format-specific import handlers.
|
|
10
|
+
|
|
11
|
+
Provides the shared Load -> Extract -> Assemble -> Wrapper pipeline and
|
|
12
|
+
common properties (source, source_no_sections, extra). Subclasses must
|
|
13
|
+
implement _load() and _extract() to supply format-specific behaviour.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
MODULE = "usdm4_protocol.common.base_import.BaseImport"
|
|
17
|
+
|
|
18
|
+
def __init__(self, file_path: str, errors: Errors):
|
|
19
|
+
self._file_path = file_path
|
|
20
|
+
self._errors = errors
|
|
21
|
+
self._study = None
|
|
22
|
+
|
|
23
|
+
def process(self):
|
|
24
|
+
try:
|
|
25
|
+
loaded = self._load()
|
|
26
|
+
self._study = self._extract(loaded)
|
|
27
|
+
assembler = AssembleUSDM(self._study, self._errors)
|
|
28
|
+
wrapper = assembler.process()
|
|
29
|
+
return wrapper
|
|
30
|
+
except Exception as e:
|
|
31
|
+
location = KlassMethodLocation(self.MODULE, "process")
|
|
32
|
+
self._errors.exception(
|
|
33
|
+
f"Exception raised processing '{self._file_path}'",
|
|
34
|
+
e,
|
|
35
|
+
location,
|
|
36
|
+
)
|
|
37
|
+
return None
|
|
38
|
+
|
|
39
|
+
@abstractmethod
|
|
40
|
+
def _load(self):
|
|
41
|
+
"""Load the source file. Returns format-specific data."""
|
|
42
|
+
|
|
43
|
+
@abstractmethod
|
|
44
|
+
def _extract(self, loaded) -> dict:
|
|
45
|
+
"""Extract study data from the loaded source. Returns study dict."""
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def source(self) -> dict:
|
|
49
|
+
return self._study
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def source_no_sections(self) -> dict:
|
|
53
|
+
the_copy = copy.deepcopy(self._study)
|
|
54
|
+
the_copy["document"]["sections"] = []
|
|
55
|
+
return the_copy
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def extra(self) -> dict:
|
|
59
|
+
"""Override in subclasses that need extra metadata."""
|
|
60
|
+
return {}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Extract utilities
|