xfmr-zem 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
xfmr_zem/cli.py CHANGED
@@ -193,15 +193,52 @@ def run(config_file, params, verbose):
193
193
 
194
194
  try:
195
195
  client = PipelineClient(abs_config, params_path=params)
196
+
197
+ # Dashboard URL (Pre-run)
198
+ try:
199
+ workspace_name = "default"
200
+ try:
201
+ from zenml.client import Client
202
+ zn_client = Client()
203
+ workspace_name = getattr(zn_client, "active_workspace_name",
204
+ getattr(zn_client.active_workspace, "name", "default"))
205
+ except:
206
+ pass
207
+ pre_run_url = f"http://127.0.0.1:8871/projects/{workspace_name}/runs"
208
+ console.print(f"[bold blue]Dashboard URL (Pre-run):[/bold blue] [link={pre_run_url}]{pre_run_url}[/link]")
209
+ except:
210
+ pass
211
+
196
212
  run_response = client.run()
197
213
 
198
214
  console.print(f"\n[bold blue]Pipeline Execution Finished![/bold blue]")
199
215
  console.print(f"Run Name: [cyan]{run_response.name}[/cyan]")
200
216
  console.print(f"Status: [yellow]{run_response.status}[/yellow]")
201
217
 
218
+ # ZenML dashboard URL
219
+ try:
220
+ run_id = getattr(run_response, "id", None)
221
+ if run_id:
222
+ workspace_name = "default"
223
+ try:
224
+ from zenml.client import Client
225
+ client = Client()
226
+ # Try to get active workspace name
227
+ if hasattr(client, "active_workspace_name"):
228
+ workspace_name = client.active_workspace_name
229
+ elif hasattr(client, "active_workspace"):
230
+ workspace_name = client.active_workspace.name
231
+ except:
232
+ pass
233
+
234
+ dashboard_url = f"http://127.0.0.1:8871/projects/{workspace_name}/runs/{run_id}/dag"
235
+ console.print(f"Dashboard URL (Run): [link={dashboard_url}]{dashboard_url}[/link]")
236
+ except Exception as e:
237
+ logger.debug(f"Could not generate dashboard URL: {e}")
238
+
202
239
  console.print(f"\n[dim]To visualize this run, ensure ZenML dashboard is running:[/dim]")
203
240
  console.print(f"[dim]uv run zenml up --port 8871[/dim]")
204
- console.print(f"[dim]Or view runs via: zem dashboard[/dim]") # Future proofing hint
241
+ console.print(f"[dim]Or view runs via: zem dashboard[/dim]")
205
242
 
206
243
  except Exception as e:
207
244
  console.print(f"\n[bold red]Pipeline Failed:[/bold red] {e}")
xfmr_zem/client.py CHANGED
@@ -51,22 +51,48 @@ class PipelineClient:
51
51
  items.append((new_key, v))
52
52
  return dict(items)
53
53
 
54
+ def _unflatten_params(self, flat_dict: Dict[str, Any]) -> Dict[str, Any]:
55
+ """Expand dot-notation keys into nested dictionaries."""
56
+ nested = {}
57
+ for key, value in flat_dict.items():
58
+ if "." in key:
59
+ parts = key.split(".")
60
+ d = nested
61
+ for part in parts[:-1]:
62
+ if part not in d or not isinstance(d[part], dict):
63
+ d[part] = {}
64
+ d = d[part]
65
+ d[parts[-1]] = value
66
+ else:
67
+ if isinstance(value, dict) and key in nested and isinstance(nested[key], dict):
68
+ nested[key].update(value)
69
+ else:
70
+ nested[key] = value
71
+ return nested
72
+
54
73
  def _load_config_dict(self, path: Path) -> Dict[str, Any]:
55
74
  """Load YAML config and perform substitution."""
56
75
  with open(path, "r") as f:
57
76
  raw_content = f.read()
58
77
 
59
- self.params = self._load_params(None)
78
+ # 1. Load parameters from file
79
+ base_params = self._load_params(None)
80
+
81
+ # 2. Add custom parameters file if provided
82
+ if self.params_path:
83
+ custom_params = self._load_params(self.params_path)
84
+ base_params.update(custom_params)
85
+
86
+ # 3. Load internal parameters from the config file itself
60
87
  preliminary_dict = yaml.safe_load(raw_content) or {}
61
88
  internal_params = preliminary_dict.get("parameters", {})
62
89
  if internal_params:
63
- self.params.update(internal_params)
64
-
65
- if self.params_path:
66
- custom_params = self._load_params(self.params_path)
67
- self.params.update(custom_params)
90
+ base_params.update(internal_params)
68
91
 
69
- # Flatten params for template substitution
92
+ # Store unflattened parameters for hierarchical lookup
93
+ self.params = self._unflatten_params(base_params)
94
+
95
+ # 4. Flatten all params for template substitution ({{ key }})
70
96
  flat_params = self._flatten_params(self.params)
71
97
 
72
98
  content = raw_content
@@ -105,11 +131,12 @@ class PipelineClient:
105
131
  env["PYTHONPATH"] = f"{src_path}:{current_pythonpath}" if current_pythonpath else src_path
106
132
 
107
133
  server_specific_params = {}
108
- prefix = f"{name}."
109
134
  for key, value in self.params.items():
110
- if key.startswith(prefix):
111
- server_specific_params[key[len(prefix):]] = value
112
- else:
135
+ if key == name and isinstance(value, dict):
136
+ # Direct match: ocr -> { ... }
137
+ server_specific_params.update(value)
138
+ elif not isinstance(value, dict):
139
+ # Global scalars
113
140
  server_specific_params[key] = value
114
141
 
115
142
  env["ZEM_PARAMETERS"] = yaml.dump(server_specific_params)
@@ -43,6 +43,7 @@ def extract_pdf_pages(
43
43
  # Temporary save for engine compatibility (engines expect path)
44
44
  temp_path = os.path.join(temp_dir, f"ocr_page_{os.getpid()}_{page_num}.png")
45
45
  img.save(temp_path)
46
+ logger.debug(f"Saved temporary page image to: {temp_path}")
46
47
 
47
48
  try:
48
49
  ocr_result = ocr_engine.process(temp_path)
@@ -0,0 +1,66 @@
1
+ import os
2
+ import abc
3
+ from typing import Dict, Any, List
4
+ from loguru import logger
5
+
6
+ class VoiceEngineBase(abc.ABC):
7
+ """
8
+ Abstract Base Class for Voice Engines.
9
+ """
10
+ @abc.abstractmethod
11
+ def transcribe(self, audio_path: str) -> Dict[str, Any]:
12
+ """Transcribe an audio file and return text and metadata."""
13
+ pass
14
+
15
+ class WhisperEngine(VoiceEngineBase):
16
+ """
17
+ ASR using OpenAI Whisper.
18
+ """
19
+ def __init__(self, model_size: str = "base"):
20
+ self.model_size = model_size or "base"
21
+ self.model = None
22
+
23
+ def _lazy_load(self):
24
+ if self.model is None:
25
+ try:
26
+ import whisper
27
+ import torch
28
+
29
+ device = "cuda" if torch.cuda.is_available() else "cpu"
30
+ logger.info(f"Loading Whisper model: {self.model_size} on {device}...")
31
+ self.model = whisper.load_model(self.model_size, device=device)
32
+ logger.debug("Whisper model loaded successfully")
33
+ except ImportError:
34
+ logger.error("openai-whisper not installed. Please install with 'pip install openai-whisper'")
35
+ raise
36
+ except Exception as e:
37
+ logger.error(f"Error loading Whisper model: {e}")
38
+ raise
39
+
40
+ def transcribe(self, audio_path: str) -> Dict[str, Any]:
41
+ self._lazy_load()
42
+ logger.info(f"Using Whisper ({self.model_size}) to transcribe: {audio_path}")
43
+
44
+ result = self.model.transcribe(audio_path)
45
+
46
+ return {
47
+ "text": result["text"],
48
+ "segments": result.get("segments", []),
49
+ "language": result.get("language"),
50
+ "engine": f"whisper-{self.model_size}",
51
+ "metadata": {
52
+ "model_size": self.model_size,
53
+ "file": audio_path
54
+ }
55
+ }
56
+
57
+ class VoiceEngineFactory:
58
+ """
59
+ Factory to create Voice engines.
60
+ """
61
+ @staticmethod
62
+ def get_engine(engine_type: str, **kwargs) -> VoiceEngineBase:
63
+ if engine_type == "whisper":
64
+ return WhisperEngine(model_size=kwargs.get("model_size"))
65
+ else:
66
+ raise ValueError(f"Unknown voice engine type: {engine_type}")
@@ -0,0 +1,2 @@
1
+ engine: whisper
2
+ model_size: base
@@ -0,0 +1,54 @@
1
+ import os
2
+ import pandas as pd
3
+ from xfmr_zem.server import ZemServer
4
+ from xfmr_zem.servers.voice.engines import VoiceEngineFactory
5
+ from loguru import logger
6
+
7
+ # Initialize ZemServer for Voice
8
+ mcp = ZemServer("voice")
9
+
10
+ @mcp.tool()
11
+ async def transcribe(
12
+ file_path: str,
13
+ engine: str = "whisper",
14
+ model_size: str = "base"
15
+ ) -> pd.DataFrame:
16
+ """
17
+ Transcribes an audio file using the specified voice engine.
18
+
19
+ Args:
20
+ file_path: Path to the audio file (wav, mp3, m4a, etc.).
21
+ engine: The voice engine to use (currently only "whisper"). Defaults to "whisper".
22
+ model_size: Whisper model size ("tiny", "base", "small", "medium", "large"). Defaults to "base".
23
+ """
24
+ logger.info(f"Voice Transcription: {file_path} using {engine} ({model_size})")
25
+
26
+ if not os.path.exists(file_path):
27
+ raise FileNotFoundError(f"File not found: {file_path}")
28
+
29
+ try:
30
+ # Get engine from factory
31
+ voice_engine = VoiceEngineFactory.get_engine(engine, model_size=model_size)
32
+
33
+ # Transcribe
34
+ result = voice_engine.transcribe(file_path)
35
+
36
+ # Format as DataFrame
37
+ df = pd.DataFrame([{
38
+ "text": result["text"].strip(),
39
+ "language": result["language"],
40
+ "engine": result["engine"],
41
+ "metadata": result["metadata"]
42
+ }])
43
+
44
+ logger.info(f"Successfully transcribed {file_path}")
45
+ return df.to_dict(orient="records")
46
+
47
+ except Exception as e:
48
+ logger.error(f"Voice Error with {engine}: {e}")
49
+ import traceback
50
+ logger.error(traceback.format_exc())
51
+ raise RuntimeError(f"Transcription failed: {str(e)}")
52
+
53
+ if __name__ == "__main__":
54
+ mcp.run()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xfmr-zem
3
- Version: 0.2.7
3
+ Version: 0.2.9
4
4
  Summary: Zem: Unified Data Pipeline Framework (ZenML + NeMo Curator + DataJuicer) for multi-domain processing
5
5
  Project-URL: Homepage, https://github.com/OAI-Labs/xfmr-zem
6
6
  Project-URL: Repository, https://github.com/OAI-Labs/xfmr-zem
@@ -58,6 +58,10 @@ Requires-Dist: shapely; extra == 'ocr'
58
58
  Requires-Dist: torch==2.5.1; extra == 'ocr'
59
59
  Requires-Dist: torchvision==0.20.1; extra == 'ocr'
60
60
  Requires-Dist: transformers>=4.40.0; extra == 'ocr'
61
+ Provides-Extra: voice
62
+ Requires-Dist: librosa; extra == 'voice'
63
+ Requires-Dist: openai-whisper; extra == 'voice'
64
+ Requires-Dist: soundfile; extra == 'voice'
61
65
  Provides-Extra: zenml
62
66
  Requires-Dist: zenml>=0.75.0; extra == 'zenml'
63
67
  Description-Content-Type: text/markdown
@@ -1,6 +1,6 @@
1
1
  xfmr_zem/__init__.py,sha256=Abx2BepsZu-e7E93N2lOgu9w0b4TBZLN6MEzCzDCn_A,1138
2
- xfmr_zem/cli.py,sha256=5oz4qxXthU4mXu7bSbfKreVkAvCqrieXpGoKhJBXBvk,12538
3
- xfmr_zem/client.py,sha256=wf9N_fILDBvWd-08TnNq3B1PqKQPhR0pvVuJq0vidk0,11435
2
+ xfmr_zem/cli.py,sha256=-X44G4mVApz2NW5nbgrS9O9y9g51ZDyxXns54nATXnM,14132
3
+ xfmr_zem/client.py,sha256=2PkJavZ8kMVq0dXoeZvpRODO96tWiXyT1alZLcw5RH0,12601
4
4
  xfmr_zem/schemas.py,sha256=0tHM0ftOWTWxNiqmAZn_MyIYJwF2p9brHK0MHlOMlKY,494
5
5
  xfmr_zem/server.py,sha256=EeohfqhUiCm0cGnV85H2ODZ4FLXjcTjbkdHrHuGHW4I,8363
6
6
  xfmr_zem/zenml_wrapper.py,sha256=LHgDewuPBjCl4EiU6JZVU-_lyEi-ATURDSG9Vf7PbEY,6739
@@ -18,7 +18,7 @@ xfmr_zem/servers/nemo_curator/server.py,sha256=zcHoSwxxoK_rMaDIAbEy1s8qfdp68Ue4B
18
18
  xfmr_zem/servers/ocr/engines.py,sha256=zScn4Qjxbpl2nB8UXEf3kd9l8z84TEwGs6bV5ka8Lks,10295
19
19
  xfmr_zem/servers/ocr/install_models.py,sha256=t02zpoy8djVhITOLEaRJ2mjiMrFfA9H6fpeHD3hXuio,2135
20
20
  xfmr_zem/servers/ocr/parameters.yml,sha256=UTMwtTu0Eeit0tFkYcZOxpuzD78UBlpONXZIx6STYwc,144
21
- xfmr_zem/servers/ocr/server.py,sha256=eJtQnMVBFX6PLZMxZITNlNEXGarjsvkz003-uT1iIo0,4369
21
+ xfmr_zem/servers/ocr/server.py,sha256=wfk9L1776TOpFNlmc73jknEMDDobfcFgqBUhcVX2elc,4441
22
22
  xfmr_zem/servers/ocr/deepdoc_vietocr/__init__.py,sha256=XJE7RnOu5oo5p902HPWPDBd7FhVQXetmnr2-kWEG0nI,2419
23
23
  xfmr_zem/servers/ocr/deepdoc_vietocr/implementations.py,sha256=79fYr76fx8yZda3HaFcK1d5G-4sDVf1JFHNW_OBQAk8,47348
24
24
  xfmr_zem/servers/ocr/deepdoc_vietocr/layout_recognizer.py,sha256=7BeLHzf9FQUkkHMb5jDpggruJmfXVMU78MF_EeZ9PG4,10462
@@ -51,8 +51,11 @@ xfmr_zem/servers/sinks/parameters.yml,sha256=9HAnv84Utw2qWsVZH8uOjVE62lnAKBkzv4P
51
51
  xfmr_zem/servers/sinks/server.py,sha256=jI_r4sq_U_avNwF1PiE0alpaDrYpzOI-qPeLU7hgHP0,1589
52
52
  xfmr_zem/servers/unstructured/parameters.yml,sha256=N31cmc56GTr3rkVhbni4yOpbnHISReN8f-KnRZTDbBc,118
53
53
  xfmr_zem/servers/unstructured/server.py,sha256=0XmXWMAUNEJboX-J4bn_8EBUfMHIqu_ylNC_s9YOZdk,1996
54
- xfmr_zem-0.2.7.dist-info/METADATA,sha256=Iv77eb-eHw6rdJhG1LfoNY4Hf9I7oFlIsx1K3K7_sH0,6379
55
- xfmr_zem-0.2.7.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
56
- xfmr_zem-0.2.7.dist-info/entry_points.txt,sha256=uxs-IXFxpSakHivpFN3mEr13cz-z-0vkeSF_4dEBMa4,65
57
- xfmr_zem-0.2.7.dist-info/licenses/LICENSE,sha256=kf_ILr0zLkSy5-EBu0VF2PGaOykYo83z3UijI-bZeAE,11342
58
- xfmr_zem-0.2.7.dist-info/RECORD,,
54
+ xfmr_zem/servers/voice/engines.py,sha256=bF_wMJCNue3JQ6otYASoan7O70s4rqSHL1MbXB2Mlyo,2235
55
+ xfmr_zem/servers/voice/parameters.yml,sha256=oM9hidow8nY6N6G80jjBCGczIbDGuGa9rmRVDXFREIs,33
56
+ xfmr_zem/servers/voice/server.py,sha256=FCrVyj2mDRJTjEYCRtZ9D1ZwDemiDMO075DDqP_KcW0,1736
57
+ xfmr_zem-0.2.9.dist-info/METADATA,sha256=so_5K_Da2QateoVzsJ_b2BZK5Rm8bMa0PzldHprg9-M,6533
58
+ xfmr_zem-0.2.9.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
59
+ xfmr_zem-0.2.9.dist-info/entry_points.txt,sha256=uxs-IXFxpSakHivpFN3mEr13cz-z-0vkeSF_4dEBMa4,65
60
+ xfmr_zem-0.2.9.dist-info/licenses/LICENSE,sha256=kf_ILr0zLkSy5-EBu0VF2PGaOykYo83z3UijI-bZeAE,11342
61
+ xfmr_zem-0.2.9.dist-info/RECORD,,