xfmr-zem 0.2.8__py3-none-any.whl → 0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xfmr_zem/cli.py +38 -1
- xfmr_zem/servers/voice/engines.py +66 -0
- xfmr_zem/servers/voice/parameters.yml +2 -0
- xfmr_zem/servers/voice/server.py +54 -0
- {xfmr_zem-0.2.8.dist-info → xfmr_zem-0.2.9.dist-info}/METADATA +5 -1
- {xfmr_zem-0.2.8.dist-info → xfmr_zem-0.2.9.dist-info}/RECORD +9 -6
- {xfmr_zem-0.2.8.dist-info → xfmr_zem-0.2.9.dist-info}/WHEEL +0 -0
- {xfmr_zem-0.2.8.dist-info → xfmr_zem-0.2.9.dist-info}/entry_points.txt +0 -0
- {xfmr_zem-0.2.8.dist-info → xfmr_zem-0.2.9.dist-info}/licenses/LICENSE +0 -0
xfmr_zem/cli.py
CHANGED
|
@@ -193,15 +193,52 @@ def run(config_file, params, verbose):
|
|
|
193
193
|
|
|
194
194
|
try:
|
|
195
195
|
client = PipelineClient(abs_config, params_path=params)
|
|
196
|
+
|
|
197
|
+
# Dashboard URL (Pre-run)
|
|
198
|
+
try:
|
|
199
|
+
workspace_name = "default"
|
|
200
|
+
try:
|
|
201
|
+
from zenml.client import Client
|
|
202
|
+
zn_client = Client()
|
|
203
|
+
workspace_name = getattr(zn_client, "active_workspace_name",
|
|
204
|
+
getattr(zn_client.active_workspace, "name", "default"))
|
|
205
|
+
except:
|
|
206
|
+
pass
|
|
207
|
+
pre_run_url = f"http://127.0.0.1:8871/projects/{workspace_name}/runs"
|
|
208
|
+
console.print(f"[bold blue]Dashboard URL (Pre-run):[/bold blue] [link={pre_run_url}]{pre_run_url}[/link]")
|
|
209
|
+
except:
|
|
210
|
+
pass
|
|
211
|
+
|
|
196
212
|
run_response = client.run()
|
|
197
213
|
|
|
198
214
|
console.print(f"\n[bold blue]Pipeline Execution Finished![/bold blue]")
|
|
199
215
|
console.print(f"Run Name: [cyan]{run_response.name}[/cyan]")
|
|
200
216
|
console.print(f"Status: [yellow]{run_response.status}[/yellow]")
|
|
201
217
|
|
|
218
|
+
# ZenML dashboard URL
|
|
219
|
+
try:
|
|
220
|
+
run_id = getattr(run_response, "id", None)
|
|
221
|
+
if run_id:
|
|
222
|
+
workspace_name = "default"
|
|
223
|
+
try:
|
|
224
|
+
from zenml.client import Client
|
|
225
|
+
client = Client()
|
|
226
|
+
# Try to get active workspace name
|
|
227
|
+
if hasattr(client, "active_workspace_name"):
|
|
228
|
+
workspace_name = client.active_workspace_name
|
|
229
|
+
elif hasattr(client, "active_workspace"):
|
|
230
|
+
workspace_name = client.active_workspace.name
|
|
231
|
+
except:
|
|
232
|
+
pass
|
|
233
|
+
|
|
234
|
+
dashboard_url = f"http://127.0.0.1:8871/projects/{workspace_name}/runs/{run_id}/dag"
|
|
235
|
+
console.print(f"Dashboard URL (Run): [link={dashboard_url}]{dashboard_url}[/link]")
|
|
236
|
+
except Exception as e:
|
|
237
|
+
logger.debug(f"Could not generate dashboard URL: {e}")
|
|
238
|
+
|
|
202
239
|
console.print(f"\n[dim]To visualize this run, ensure ZenML dashboard is running:[/dim]")
|
|
203
240
|
console.print(f"[dim]uv run zenml up --port 8871[/dim]")
|
|
204
|
-
console.print(f"[dim]Or view runs via: zem dashboard[/dim]")
|
|
241
|
+
console.print(f"[dim]Or view runs via: zem dashboard[/dim]")
|
|
205
242
|
|
|
206
243
|
except Exception as e:
|
|
207
244
|
console.print(f"\n[bold red]Pipeline Failed:[/bold red] {e}")
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import abc
|
|
3
|
+
from typing import Dict, Any, List
|
|
4
|
+
from loguru import logger
|
|
5
|
+
|
|
6
|
+
class VoiceEngineBase(abc.ABC):
|
|
7
|
+
"""
|
|
8
|
+
Abstract Base Class for Voice Engines.
|
|
9
|
+
"""
|
|
10
|
+
@abc.abstractmethod
|
|
11
|
+
def transcribe(self, audio_path: str) -> Dict[str, Any]:
|
|
12
|
+
"""Transcribe an audio file and return text and metadata."""
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
class WhisperEngine(VoiceEngineBase):
|
|
16
|
+
"""
|
|
17
|
+
ASR using OpenAI Whisper.
|
|
18
|
+
"""
|
|
19
|
+
def __init__(self, model_size: str = "base"):
|
|
20
|
+
self.model_size = model_size or "base"
|
|
21
|
+
self.model = None
|
|
22
|
+
|
|
23
|
+
def _lazy_load(self):
|
|
24
|
+
if self.model is None:
|
|
25
|
+
try:
|
|
26
|
+
import whisper
|
|
27
|
+
import torch
|
|
28
|
+
|
|
29
|
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
30
|
+
logger.info(f"Loading Whisper model: {self.model_size} on {device}...")
|
|
31
|
+
self.model = whisper.load_model(self.model_size, device=device)
|
|
32
|
+
logger.debug("Whisper model loaded successfully")
|
|
33
|
+
except ImportError:
|
|
34
|
+
logger.error("openai-whisper not installed. Please install with 'pip install openai-whisper'")
|
|
35
|
+
raise
|
|
36
|
+
except Exception as e:
|
|
37
|
+
logger.error(f"Error loading Whisper model: {e}")
|
|
38
|
+
raise
|
|
39
|
+
|
|
40
|
+
def transcribe(self, audio_path: str) -> Dict[str, Any]:
|
|
41
|
+
self._lazy_load()
|
|
42
|
+
logger.info(f"Using Whisper ({self.model_size}) to transcribe: {audio_path}")
|
|
43
|
+
|
|
44
|
+
result = self.model.transcribe(audio_path)
|
|
45
|
+
|
|
46
|
+
return {
|
|
47
|
+
"text": result["text"],
|
|
48
|
+
"segments": result.get("segments", []),
|
|
49
|
+
"language": result.get("language"),
|
|
50
|
+
"engine": f"whisper-{self.model_size}",
|
|
51
|
+
"metadata": {
|
|
52
|
+
"model_size": self.model_size,
|
|
53
|
+
"file": audio_path
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
class VoiceEngineFactory:
|
|
58
|
+
"""
|
|
59
|
+
Factory to create Voice engines.
|
|
60
|
+
"""
|
|
61
|
+
@staticmethod
|
|
62
|
+
def get_engine(engine_type: str, **kwargs) -> VoiceEngineBase:
|
|
63
|
+
if engine_type == "whisper":
|
|
64
|
+
return WhisperEngine(model_size=kwargs.get("model_size"))
|
|
65
|
+
else:
|
|
66
|
+
raise ValueError(f"Unknown voice engine type: {engine_type}")
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from xfmr_zem.server import ZemServer
|
|
4
|
+
from xfmr_zem.servers.voice.engines import VoiceEngineFactory
|
|
5
|
+
from loguru import logger
|
|
6
|
+
|
|
7
|
+
# Initialize ZemServer for Voice
|
|
8
|
+
mcp = ZemServer("voice")
|
|
9
|
+
|
|
10
|
+
@mcp.tool()
|
|
11
|
+
async def transcribe(
|
|
12
|
+
file_path: str,
|
|
13
|
+
engine: str = "whisper",
|
|
14
|
+
model_size: str = "base"
|
|
15
|
+
) -> pd.DataFrame:
|
|
16
|
+
"""
|
|
17
|
+
Transcribes an audio file using the specified voice engine.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
file_path: Path to the audio file (wav, mp3, m4a, etc.).
|
|
21
|
+
engine: The voice engine to use (currently only "whisper"). Defaults to "whisper".
|
|
22
|
+
model_size: Whisper model size ("tiny", "base", "small", "medium", "large"). Defaults to "base".
|
|
23
|
+
"""
|
|
24
|
+
logger.info(f"Voice Transcription: {file_path} using {engine} ({model_size})")
|
|
25
|
+
|
|
26
|
+
if not os.path.exists(file_path):
|
|
27
|
+
raise FileNotFoundError(f"File not found: {file_path}")
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
# Get engine from factory
|
|
31
|
+
voice_engine = VoiceEngineFactory.get_engine(engine, model_size=model_size)
|
|
32
|
+
|
|
33
|
+
# Transcribe
|
|
34
|
+
result = voice_engine.transcribe(file_path)
|
|
35
|
+
|
|
36
|
+
# Format as DataFrame
|
|
37
|
+
df = pd.DataFrame([{
|
|
38
|
+
"text": result["text"].strip(),
|
|
39
|
+
"language": result["language"],
|
|
40
|
+
"engine": result["engine"],
|
|
41
|
+
"metadata": result["metadata"]
|
|
42
|
+
}])
|
|
43
|
+
|
|
44
|
+
logger.info(f"Successfully transcribed {file_path}")
|
|
45
|
+
return df.to_dict(orient="records")
|
|
46
|
+
|
|
47
|
+
except Exception as e:
|
|
48
|
+
logger.error(f"Voice Error with {engine}: {e}")
|
|
49
|
+
import traceback
|
|
50
|
+
logger.error(traceback.format_exc())
|
|
51
|
+
raise RuntimeError(f"Transcription failed: {str(e)}")
|
|
52
|
+
|
|
53
|
+
if __name__ == "__main__":
|
|
54
|
+
mcp.run()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: xfmr-zem
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.9
|
|
4
4
|
Summary: Zem: Unified Data Pipeline Framework (ZenML + NeMo Curator + DataJuicer) for multi-domain processing
|
|
5
5
|
Project-URL: Homepage, https://github.com/OAI-Labs/xfmr-zem
|
|
6
6
|
Project-URL: Repository, https://github.com/OAI-Labs/xfmr-zem
|
|
@@ -58,6 +58,10 @@ Requires-Dist: shapely; extra == 'ocr'
|
|
|
58
58
|
Requires-Dist: torch==2.5.1; extra == 'ocr'
|
|
59
59
|
Requires-Dist: torchvision==0.20.1; extra == 'ocr'
|
|
60
60
|
Requires-Dist: transformers>=4.40.0; extra == 'ocr'
|
|
61
|
+
Provides-Extra: voice
|
|
62
|
+
Requires-Dist: librosa; extra == 'voice'
|
|
63
|
+
Requires-Dist: openai-whisper; extra == 'voice'
|
|
64
|
+
Requires-Dist: soundfile; extra == 'voice'
|
|
61
65
|
Provides-Extra: zenml
|
|
62
66
|
Requires-Dist: zenml>=0.75.0; extra == 'zenml'
|
|
63
67
|
Description-Content-Type: text/markdown
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
xfmr_zem/__init__.py,sha256=Abx2BepsZu-e7E93N2lOgu9w0b4TBZLN6MEzCzDCn_A,1138
|
|
2
|
-
xfmr_zem/cli.py,sha256
|
|
2
|
+
xfmr_zem/cli.py,sha256=-X44G4mVApz2NW5nbgrS9O9y9g51ZDyxXns54nATXnM,14132
|
|
3
3
|
xfmr_zem/client.py,sha256=2PkJavZ8kMVq0dXoeZvpRODO96tWiXyT1alZLcw5RH0,12601
|
|
4
4
|
xfmr_zem/schemas.py,sha256=0tHM0ftOWTWxNiqmAZn_MyIYJwF2p9brHK0MHlOMlKY,494
|
|
5
5
|
xfmr_zem/server.py,sha256=EeohfqhUiCm0cGnV85H2ODZ4FLXjcTjbkdHrHuGHW4I,8363
|
|
@@ -51,8 +51,11 @@ xfmr_zem/servers/sinks/parameters.yml,sha256=9HAnv84Utw2qWsVZH8uOjVE62lnAKBkzv4P
|
|
|
51
51
|
xfmr_zem/servers/sinks/server.py,sha256=jI_r4sq_U_avNwF1PiE0alpaDrYpzOI-qPeLU7hgHP0,1589
|
|
52
52
|
xfmr_zem/servers/unstructured/parameters.yml,sha256=N31cmc56GTr3rkVhbni4yOpbnHISReN8f-KnRZTDbBc,118
|
|
53
53
|
xfmr_zem/servers/unstructured/server.py,sha256=0XmXWMAUNEJboX-J4bn_8EBUfMHIqu_ylNC_s9YOZdk,1996
|
|
54
|
-
xfmr_zem
|
|
55
|
-
xfmr_zem
|
|
56
|
-
xfmr_zem
|
|
57
|
-
xfmr_zem-0.2.
|
|
58
|
-
xfmr_zem-0.2.
|
|
54
|
+
xfmr_zem/servers/voice/engines.py,sha256=bF_wMJCNue3JQ6otYASoan7O70s4rqSHL1MbXB2Mlyo,2235
|
|
55
|
+
xfmr_zem/servers/voice/parameters.yml,sha256=oM9hidow8nY6N6G80jjBCGczIbDGuGa9rmRVDXFREIs,33
|
|
56
|
+
xfmr_zem/servers/voice/server.py,sha256=FCrVyj2mDRJTjEYCRtZ9D1ZwDemiDMO075DDqP_KcW0,1736
|
|
57
|
+
xfmr_zem-0.2.9.dist-info/METADATA,sha256=so_5K_Da2QateoVzsJ_b2BZK5Rm8bMa0PzldHprg9-M,6533
|
|
58
|
+
xfmr_zem-0.2.9.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
59
|
+
xfmr_zem-0.2.9.dist-info/entry_points.txt,sha256=uxs-IXFxpSakHivpFN3mEr13cz-z-0vkeSF_4dEBMa4,65
|
|
60
|
+
xfmr_zem-0.2.9.dist-info/licenses/LICENSE,sha256=kf_ILr0zLkSy5-EBu0VF2PGaOykYo83z3UijI-bZeAE,11342
|
|
61
|
+
xfmr_zem-0.2.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|