PyPI - xfmr-zem - Versions diffs - 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl - Mend

xfmr-zem 0.2.7py3-none-any.whl → 0.2.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

xfmr_zem/cli.py +38 -1
xfmr_zem/client.py +38 -11
xfmr_zem/servers/ocr/server.py +1 -0
xfmr_zem/servers/voice/engines.py +66 -0
xfmr_zem/servers/voice/parameters.yml +2 -0
xfmr_zem/servers/voice/server.py +54 -0
{xfmr_zem-0.2.7.dist-info → xfmr_zem-0.2.9.dist-info}/METADATA +5 -1
{xfmr_zem-0.2.7.dist-info → xfmr_zem-0.2.9.dist-info}/RECORD +11 -8
{xfmr_zem-0.2.7.dist-info → xfmr_zem-0.2.9.dist-info}/WHEEL +0 -0
{xfmr_zem-0.2.7.dist-info → xfmr_zem-0.2.9.dist-info}/entry_points.txt +0 -0
{xfmr_zem-0.2.7.dist-info → xfmr_zem-0.2.9.dist-info}/licenses/LICENSE +0 -0

xfmr_zem/cli.py CHANGED Viewed

@@ -193,15 +193,52 @@ def run(config_file, params, verbose):
     try:
         client = PipelineClient(abs_config, params_path=params)
+        # Dashboard URL (Pre-run)
+        try:
+            workspace_name = "default"
+            try:
+                from zenml.client import Client
+                zn_client = Client()
+                workspace_name = getattr(zn_client, "active_workspace_name",
+                                       getattr(zn_client.active_workspace, "name", "default"))
+            except:
+                pass
+            pre_run_url = f"http://127.0.0.1:8871/projects/{workspace_name}/runs"
+            console.print(f"[bold blue]Dashboard URL (Pre-run):[/bold blue] [link={pre_run_url}]{pre_run_url}[/link]")
+        except:
+            pass
         run_response = client.run()
         console.print(f"\n[bold blue]Pipeline Execution Finished![/bold blue]")
         console.print(f"Run Name: [cyan]{run_response.name}[/cyan]")
         console.print(f"Status: [yellow]{run_response.status}[/yellow]")
+        # ZenML dashboard URL
+        try:
+            run_id = getattr(run_response, "id", None)
+            if run_id:
+                workspace_name = "default"
+                try:
+                    from zenml.client import Client
+                    client = Client()
+                    # Try to get active workspace name
+                    if hasattr(client, "active_workspace_name"):
+                        workspace_name = client.active_workspace_name
+                    elif hasattr(client, "active_workspace"):
+                        workspace_name = client.active_workspace.name
+                except:
+                    pass
+                dashboard_url = f"http://127.0.0.1:8871/projects/{workspace_name}/runs/{run_id}/dag"
+                console.print(f"Dashboard URL (Run): [link={dashboard_url}]{dashboard_url}[/link]")
+        except Exception as e:
+            logger.debug(f"Could not generate dashboard URL: {e}")
         console.print(f"\n[dim]To visualize this run, ensure ZenML dashboard is running:[/dim]")
         console.print(f"[dim]uv run zenml up --port 8871[/dim]")
-        console.print(f"[dim]Or view runs via: zem dashboard[/dim]") # Future proofing hint
+        console.print(f"[dim]Or view runs via: zem dashboard[/dim]")
     except Exception as e:
         console.print(f"\n[bold red]Pipeline Failed:[/bold red] {e}")

xfmr_zem/client.py CHANGED Viewed

@@ -51,22 +51,48 @@ class PipelineClient:
                 items.append((new_key, v))
         return dict(items)
+    def _unflatten_params(self, flat_dict: Dict[str, Any]) -> Dict[str, Any]:
+        """Expand dot-notation keys into nested dictionaries."""
+        nested = {}
+        for key, value in flat_dict.items():
+            if "." in key:
+                parts = key.split(".")
+                d = nested
+                for part in parts[:-1]:
+                    if part not in d or not isinstance(d[part], dict):
+                        d[part] = {}
+                    d = d[part]
+                d[parts[-1]] = value
+            else:
+                if isinstance(value, dict) and key in nested and isinstance(nested[key], dict):
+                    nested[key].update(value)
+                else:
+                    nested[key] = value
+        return nested
     def _load_config_dict(self, path: Path) -> Dict[str, Any]:
         """Load YAML config and perform substitution."""
         with open(path, "r") as f:
             raw_content = f.read()
-        self.params = self._load_params(None)
+        # 1. Load parameters from file
+        base_params = self._load_params(None)
+        # 2. Add custom parameters file if provided
+        if self.params_path:
+            custom_params = self._load_params(self.params_path)
+            base_params.update(custom_params)
+        # 3. Load internal parameters from the config file itself
         preliminary_dict = yaml.safe_load(raw_content) or {}
         internal_params = preliminary_dict.get("parameters", {})
         if internal_params:
-            self.params.update(internal_params)
-        if self.params_path:
-            custom_params = self._load_params(self.params_path)
-            self.params.update(custom_params)
+            base_params.update(internal_params)
-        # Flatten params for template substitution
+        # Store unflattened parameters for hierarchical lookup
+        self.params = self._unflatten_params(base_params)
+        # 4. Flatten all params for template substitution ({{ key }})
         flat_params = self._flatten_params(self.params)
         content = raw_content
@@ -105,11 +131,12 @@ class PipelineClient:
             env["PYTHONPATH"] = f"{src_path}:{current_pythonpath}" if current_pythonpath else src_path
             server_specific_params = {}
-            prefix = f"{name}."
             for key, value in self.params.items():
-                if key.startswith(prefix):
-                    server_specific_params[key[len(prefix):]] = value
-                else:
+                if key == name and isinstance(value, dict):
+                    # Direct match: ocr -> { ... }
+                    server_specific_params.update(value)
+                elif not isinstance(value, dict):
+                    # Global scalars
                     server_specific_params[key] = value
             env["ZEM_PARAMETERS"] = yaml.dump(server_specific_params)

xfmr_zem/servers/ocr/server.py CHANGED Viewed

@@ -43,6 +43,7 @@ def extract_pdf_pages(
             # Temporary save for engine compatibility (engines expect path)
             temp_path = os.path.join(temp_dir, f"ocr_page_{os.getpid()}_{page_num}.png")
             img.save(temp_path)
+            logger.debug(f"Saved temporary page image to: {temp_path}")
             try:
                 ocr_result = ocr_engine.process(temp_path)

xfmr_zem/servers/voice/engines.py ADDED Viewed

@@ -0,0 +1,66 @@
+import os
+import abc
+from typing import Dict, Any, List
+from loguru import logger
+class VoiceEngineBase(abc.ABC):
+    """
+    Abstract Base Class for Voice Engines.
+    """
+    @abc.abstractmethod
+    def transcribe(self, audio_path: str) -> Dict[str, Any]:
+        """Transcribe an audio file and return text and metadata."""
+        pass
+class WhisperEngine(VoiceEngineBase):
+    """
+    ASR using OpenAI Whisper.
+    """
+    def __init__(self, model_size: str = "base"):
+        self.model_size = model_size or "base"
+        self.model = None
+    def _lazy_load(self):
+        if self.model is None:
+            try:
+                import whisper
+                import torch
+                device = "cuda" if torch.cuda.is_available() else "cpu"
+                logger.info(f"Loading Whisper model: {self.model_size} on {device}...")
+                self.model = whisper.load_model(self.model_size, device=device)
+                logger.debug("Whisper model loaded successfully")
+            except ImportError:
+                logger.error("openai-whisper not installed. Please install with 'pip install openai-whisper'")
+                raise
+            except Exception as e:
+                logger.error(f"Error loading Whisper model: {e}")
+                raise
+    def transcribe(self, audio_path: str) -> Dict[str, Any]:
+        self._lazy_load()
+        logger.info(f"Using Whisper ({self.model_size}) to transcribe: {audio_path}")
+        result = self.model.transcribe(audio_path)
+        return {
+            "text": result["text"],
+            "segments": result.get("segments", []),
+            "language": result.get("language"),
+            "engine": f"whisper-{self.model_size}",
+            "metadata": {
+                "model_size": self.model_size,
+                "file": audio_path
+            }
+        }
+class VoiceEngineFactory:
+    """
+    Factory to create Voice engines.
+    """
+    @staticmethod
+    def get_engine(engine_type: str, **kwargs) -> VoiceEngineBase:
+        if engine_type == "whisper":
+            return WhisperEngine(model_size=kwargs.get("model_size"))
+        else:
+            raise ValueError(f"Unknown voice engine type: {engine_type}")

xfmr_zem/servers/voice/parameters.yml ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ engine: whisper
2	+ model_size: base

xfmr_zem/servers/voice/server.py ADDED Viewed

@@ -0,0 +1,54 @@
+import os
+import pandas as pd
+from xfmr_zem.server import ZemServer
+from xfmr_zem.servers.voice.engines import VoiceEngineFactory
+from loguru import logger
+# Initialize ZemServer for Voice
+mcp = ZemServer("voice")
+@mcp.tool()
+async def transcribe(
+    file_path: str,
+    engine: str = "whisper",
+    model_size: str = "base"
+) -> pd.DataFrame:
+    """
+    Transcribes an audio file using the specified voice engine.
+    Args:
+        file_path: Path to the audio file (wav, mp3, m4a, etc.).
+        engine: The voice engine to use (currently only "whisper"). Defaults to "whisper".
+        model_size: Whisper model size ("tiny", "base", "small", "medium", "large"). Defaults to "base".
+    """
+    logger.info(f"Voice Transcription: {file_path} using {engine} ({model_size})")
+    if not os.path.exists(file_path):
+        raise FileNotFoundError(f"File not found: {file_path}")
+    try:
+        # Get engine from factory
+        voice_engine = VoiceEngineFactory.get_engine(engine, model_size=model_size)
+        # Transcribe
+        result = voice_engine.transcribe(file_path)
+        # Format as DataFrame
+        df = pd.DataFrame([{
+            "text": result["text"].strip(),
+            "language": result["language"],
+            "engine": result["engine"],
+            "metadata": result["metadata"]
+        }])
+        logger.info(f"Successfully transcribed {file_path}")
+        return df.to_dict(orient="records")
+    except Exception as e:
+        logger.error(f"Voice Error with {engine}: {e}")
+        import traceback
+        logger.error(traceback.format_exc())
+        raise RuntimeError(f"Transcription failed: {str(e)}")
+if __name__ == "__main__":
+    mcp.run()

{xfmr_zem-0.2.7.dist-info → xfmr_zem-0.2.9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: xfmr-zem
-Version: 0.2.7
+Version: 0.2.9
 Summary: Zem: Unified Data Pipeline Framework (ZenML + NeMo Curator + DataJuicer) for multi-domain processing
 Project-URL: Homepage, https://github.com/OAI-Labs/xfmr-zem
 Project-URL: Repository, https://github.com/OAI-Labs/xfmr-zem
@@ -58,6 +58,10 @@ Requires-Dist: shapely; extra == 'ocr'
 Requires-Dist: torch==2.5.1; extra == 'ocr'
 Requires-Dist: torchvision==0.20.1; extra == 'ocr'
 Requires-Dist: transformers>=4.40.0; extra == 'ocr'
+Provides-Extra: voice
+Requires-Dist: librosa; extra == 'voice'
+Requires-Dist: openai-whisper; extra == 'voice'
+Requires-Dist: soundfile; extra == 'voice'
 Provides-Extra: zenml
 Requires-Dist: zenml>=0.75.0; extra == 'zenml'
 Description-Content-Type: text/markdown

{xfmr_zem-0.2.7.dist-info → xfmr_zem-0.2.9.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 xfmr_zem/__init__.py,sha256=Abx2BepsZu-e7E93N2lOgu9w0b4TBZLN6MEzCzDCn_A,1138
-xfmr_zem/cli.py,sha256=5oz4qxXthU4mXu7bSbfKreVkAvCqrieXpGoKhJBXBvk,12538
-xfmr_zem/client.py,sha256=wf9N_fILDBvWd-08TnNq3B1PqKQPhR0pvVuJq0vidk0,11435
+xfmr_zem/cli.py,sha256=-X44G4mVApz2NW5nbgrS9O9y9g51ZDyxXns54nATXnM,14132
+xfmr_zem/client.py,sha256=2PkJavZ8kMVq0dXoeZvpRODO96tWiXyT1alZLcw5RH0,12601
 xfmr_zem/schemas.py,sha256=0tHM0ftOWTWxNiqmAZn_MyIYJwF2p9brHK0MHlOMlKY,494
 xfmr_zem/server.py,sha256=EeohfqhUiCm0cGnV85H2ODZ4FLXjcTjbkdHrHuGHW4I,8363
 xfmr_zem/zenml_wrapper.py,sha256=LHgDewuPBjCl4EiU6JZVU-_lyEi-ATURDSG9Vf7PbEY,6739
@@ -18,7 +18,7 @@ xfmr_zem/servers/nemo_curator/server.py,sha256=zcHoSwxxoK_rMaDIAbEy1s8qfdp68Ue4B
 xfmr_zem/servers/ocr/engines.py,sha256=zScn4Qjxbpl2nB8UXEf3kd9l8z84TEwGs6bV5ka8Lks,10295
 xfmr_zem/servers/ocr/install_models.py,sha256=t02zpoy8djVhITOLEaRJ2mjiMrFfA9H6fpeHD3hXuio,2135
 xfmr_zem/servers/ocr/parameters.yml,sha256=UTMwtTu0Eeit0tFkYcZOxpuzD78UBlpONXZIx6STYwc,144
-xfmr_zem/servers/ocr/server.py,sha256=eJtQnMVBFX6PLZMxZITNlNEXGarjsvkz003-uT1iIo0,4369
+xfmr_zem/servers/ocr/server.py,sha256=wfk9L1776TOpFNlmc73jknEMDDobfcFgqBUhcVX2elc,4441
 xfmr_zem/servers/ocr/deepdoc_vietocr/__init__.py,sha256=XJE7RnOu5oo5p902HPWPDBd7FhVQXetmnr2-kWEG0nI,2419
 xfmr_zem/servers/ocr/deepdoc_vietocr/implementations.py,sha256=79fYr76fx8yZda3HaFcK1d5G-4sDVf1JFHNW_OBQAk8,47348
 xfmr_zem/servers/ocr/deepdoc_vietocr/layout_recognizer.py,sha256=7BeLHzf9FQUkkHMb5jDpggruJmfXVMU78MF_EeZ9PG4,10462
@@ -51,8 +51,11 @@ xfmr_zem/servers/sinks/parameters.yml,sha256=9HAnv84Utw2qWsVZH8uOjVE62lnAKBkzv4P
 xfmr_zem/servers/sinks/server.py,sha256=jI_r4sq_U_avNwF1PiE0alpaDrYpzOI-qPeLU7hgHP0,1589
 xfmr_zem/servers/unstructured/parameters.yml,sha256=N31cmc56GTr3rkVhbni4yOpbnHISReN8f-KnRZTDbBc,118
 xfmr_zem/servers/unstructured/server.py,sha256=0XmXWMAUNEJboX-J4bn_8EBUfMHIqu_ylNC_s9YOZdk,1996
-xfmr_zem-0.2.7.dist-info/METADATA,sha256=Iv77eb-eHw6rdJhG1LfoNY4Hf9I7oFlIsx1K3K7_sH0,6379
-xfmr_zem-0.2.7.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-xfmr_zem-0.2.7.dist-info/entry_points.txt,sha256=uxs-IXFxpSakHivpFN3mEr13cz-z-0vkeSF_4dEBMa4,65
-xfmr_zem-0.2.7.dist-info/licenses/LICENSE,sha256=kf_ILr0zLkSy5-EBu0VF2PGaOykYo83z3UijI-bZeAE,11342
-xfmr_zem-0.2.7.dist-info/RECORD,,
+xfmr_zem/servers/voice/engines.py,sha256=bF_wMJCNue3JQ6otYASoan7O70s4rqSHL1MbXB2Mlyo,2235
+xfmr_zem/servers/voice/parameters.yml,sha256=oM9hidow8nY6N6G80jjBCGczIbDGuGa9rmRVDXFREIs,33
+xfmr_zem/servers/voice/server.py,sha256=FCrVyj2mDRJTjEYCRtZ9D1ZwDemiDMO075DDqP_KcW0,1736
+xfmr_zem-0.2.9.dist-info/METADATA,sha256=so_5K_Da2QateoVzsJ_b2BZK5Rm8bMa0PzldHprg9-M,6533
+xfmr_zem-0.2.9.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+xfmr_zem-0.2.9.dist-info/entry_points.txt,sha256=uxs-IXFxpSakHivpFN3mEr13cz-z-0vkeSF_4dEBMa4,65
+xfmr_zem-0.2.9.dist-info/licenses/LICENSE,sha256=kf_ILr0zLkSy5-EBu0VF2PGaOykYo83z3UijI-bZeAE,11342
+xfmr_zem-0.2.9.dist-info/RECORD,,

{xfmr_zem-0.2.7.dist-info → xfmr_zem-0.2.9.dist-info}/WHEEL RENAMED Viewed

File without changes

{xfmr_zem-0.2.7.dist-info → xfmr_zem-0.2.9.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{xfmr_zem-0.2.7.dist-info → xfmr_zem-0.2.9.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

xfmr-zem 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl

xfmr-zem 0.2.7py3-none-any.whl → 0.2.9py3-none-any.whl