PyPI - transcribe-studio - Versions diffs - 0.2.0__py3-none-any.whl - Mend

transcribe-studio 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

app/__init__.py +0 -0
app/analytics.py +131 -0
app/config/evaluation.toml +45 -0
app/config/languages/en.toml +242 -0
app/config/transcript_formats.toml +47 -0
app/database.py +126 -0
app/evaluation/__init__.py +18 -0
app/evaluation/config.py +197 -0
app/evaluation/engine.py +289 -0
app/evaluation/matchers/__init__.py +7 -0
app/evaluation/matchers/base.py +44 -0
app/evaluation/matchers/semantic.py +244 -0
app/evaluation/metrics/__init__.py +7 -0
app/evaluation/metrics/base.py +61 -0
app/evaluation/metrics/wer.py +195 -0
app/evaluation/models.py +133 -0
app/evaluation/normalizers/__init__.py +7 -0
app/evaluation/normalizers/base.py +49 -0
app/evaluation/normalizers/en.py +137 -0
app/export_formats.py +178 -0
app/main.py +854 -0
app/navigation.py +103 -0
app/paths.py +22 -0
app/services/__init__.py +1 -0
app/services/projects.py +182 -0
app/static/css/style.css +1892 -0
app/static/js/editor.js +1001 -0
app/static/js/evaluation.js +361 -0
app/templates/analysis.html +97 -0
app/templates/base.html +31 -0
app/templates/dashboard.html +77 -0
app/templates/editor.html +223 -0
app/templates/evaluation.html +129 -0
app/templates/partials/breadcrumbs.html +15 -0
app/templates/partials/recording_header.html +32 -0
app/templates/partials/recording_tabs.html +10 -0
app/templates/partials/sidebar.html +47 -0
app/templates/partials/upload_form.html +26 -0
app/templates/recordings.html +66 -0
app/templates/screens/dashboard.html +80 -0
app/templates/screens/project_detail.html +89 -0
app/templates/screens/project_form.html +98 -0
app/templates/screens/project_upload.html +47 -0
app/templates/upload.html +64 -0
app/transcript_formats/__init__.py +21 -0
app/transcript_formats/align.py +77 -0
app/transcript_formats/config.py +86 -0
app/transcript_formats/models.py +42 -0
app/transcript_formats/parsers/__init__.py +9 -0
app/transcript_formats/parsers/base.py +49 -0
app/transcript_formats/parsers/json_segments.py +81 -0
app/transcript_formats/parsers/plain_text.py +21 -0
app/transcript_formats/parsers/timestamp_speaker.py +80 -0
app/transcript_formats/registry.py +103 -0
app/web/__init__.py +1 -0
app/web/context.py +87 -0
app/web/deps.py +43 -0
app/web/routes/__init__.py +16 -0
app/web/routes/dashboard.py +34 -0
app/web/routes/legacy.py +66 -0
app/web/routes/projects.py +116 -0
app/web/routes/recordings_pages.py +86 -0
transcribe_studio-0.2.0.dist-info/METADATA +128 -0
transcribe_studio-0.2.0.dist-info/RECORD +68 -0
transcribe_studio-0.2.0.dist-info/WHEEL +5 -0
transcribe_studio-0.2.0.dist-info/entry_points.txt +2 -0
transcribe_studio-0.2.0.dist-info/licenses/LICENSE +21 -0
transcribe_studio-0.2.0.dist-info/top_level.txt +1 -0

app/__init__.py ADDED Viewed

File without changes

app/analytics.py ADDED Viewed

@@ -0,0 +1,131 @@
+"""Dashboard and recording-level transcription analytics."""
+from __future__ import annotations
+import re
+from typing import Any
+def _word_count(text: str) -> int:
+    return len(re.findall(r"\S+", text or ""))
+def _segment_duration(seg: dict) -> int:
+    return max(0, seg["end_ms"] - seg["start_ms"])
+def analyze_segments(segments: list[dict], duration_ms: int | None) -> dict[str, Any]:
+    total = len(segments)
+    transcribed = sum(1 for s in segments if (s.get("transcript") or "").strip())
+    labeled = sum(1 for s in segments if (s.get("speaker") or "").strip())
+    seg_ms = sum(_segment_duration(s) for s in segments)
+    words = sum(_word_count(s.get("transcript") or "") for s in segments)
+    speaker_stats: dict[str, dict[str, int]] = {}
+    for s in segments:
+        name = (s.get("speaker") or "").strip() or "Unlabeled"
+        if name not in speaker_stats:
+            speaker_stats[name] = {"segments": 0, "words": 0, "duration_ms": 0}
+        speaker_stats[name]["segments"] += 1
+        speaker_stats[name]["words"] += _word_count(s.get("transcript") or "")
+        speaker_stats[name]["duration_ms"] += _segment_duration(s)
+    speakers = [
+        {"name": k, **v}
+        for k, v in sorted(speaker_stats.items(), key=lambda x: -x[1]["duration_ms"])
+    ]
+    dur = duration_ms or 0
+    coverage_pct = min(100, round(seg_ms / dur * 100)) if dur else 0
+    transcript_pct = round(transcribed / total * 100) if total else 0
+    speaker_pct = round(labeled / total * 100) if total else 0
+    avg_seg_ms = round(seg_ms / total) if total else 0
+    return {
+        "segment_count": total,
+        "transcribed_segments": transcribed,
+        "speaker_labeled_segments": labeled,
+        "empty_segments": total - transcribed,
+        "total_words": words,
+        "segmented_duration_ms": seg_ms,
+        "coverage_pct": coverage_pct,
+        "transcript_pct": transcript_pct,
+        "speaker_label_pct": speaker_pct,
+        "avg_segment_ms": avg_seg_ms,
+        "speakers": speakers,
+    }
+def analyze_recording(rec: dict, segments: list[dict]) -> dict[str, Any]:
+    stats = analyze_segments(segments, rec.get("duration_ms"))
+    return {
+        "id": rec["id"],
+        "title": rec["title"],
+        "duration_ms": rec.get("duration_ms"),
+        "created_at": rec.get("created_at", "")[:10],
+        "notes": rec.get("notes") or "",
+        **stats,
+    }
+def dashboard_stats(conn) -> dict[str, Any]:
+    recordings = conn.execute(
+        "SELECT id, title, duration_ms, created_at FROM recordings ORDER BY id DESC"
+    ).fetchall()
+    total_segments = conn.execute("SELECT COUNT(*) FROM segments").fetchone()[0]
+    total_duration = conn.execute(
+        "SELECT COALESCE(SUM(duration_ms), 0) FROM recordings"
+    ).fetchone()[0]
+    all_segments = conn.execute(
+        "SELECT recording_id, start_ms, end_ms, speaker, transcript FROM segments"
+    ).fetchall()
+    seg_list = [dict(s) for s in all_segments]
+    transcribed = sum(1 for s in seg_list if (s.get("transcript") or "").strip())
+    words = sum(_word_count(s.get("transcript") or "") for s in seg_list)
+    segmented_ms = sum(_segment_duration(s) for s in seg_list)
+    speakers = {
+        (s.get("speaker") or "").strip() or "Unlabeled"
+        for s in seg_list
+        if (s.get("transcript") or "").strip() or (s.get("speaker") or "").strip()
+    }
+    recording_stats = []
+    for rec in recordings:
+        rec_segs = [s for s in seg_list if s["recording_id"] == rec["id"]]
+        recording_stats.append(analyze_recording(dict(rec), rec_segs))
+    overall_transcript_pct = (
+        round(transcribed / total_segments * 100) if total_segments else 0
+    )
+    overall_coverage_pct = (
+        min(100, round(segmented_ms / total_duration * 100)) if total_duration else 0
+    )
+    return {
+        "recording_count": len(recordings),
+        "segment_count": total_segments,
+        "total_duration_ms": total_duration,
+        "segmented_duration_ms": segmented_ms,
+        "transcribed_segments": transcribed,
+        "total_words": words,
+        "unique_speakers": len(speakers),
+        "transcript_pct": overall_transcript_pct,
+        "coverage_pct": overall_coverage_pct,
+        "recordings": recording_stats,
+    }
+def fmt_duration(ms: int | None) -> str:
+    if not ms:
+        return "—"
+    s = ms / 1000
+    h = int(s // 3600)
+    m = int((s % 3600) // 60)
+    sec = s % 60
+    if h:
+        return f"{h}h {m}m"
+    if m:
+        return f"{m}m {sec:.0f}s"
+    return f"{sec:.1f}s"

app/config/evaluation.toml ADDED Viewed

@@ -0,0 +1,45 @@
+# Transcribe Studio - Evaluation Configuration
+# https://github.com/Mishkat-Quantum-Labs/transcribe-studio
+[evaluation]
+version = "1.0"
+default_language = "en"
+# Metrics to compute
+# Set enabled = false to skip a metric
+# weight is used for weighted averaging in overall score
+[evaluation.metrics]
+[evaluation.metrics.wer]
+enabled = true
+weight = 1.0
+case_sensitive = false
+description = "Word Error Rate - standard ASR metric"
+[evaluation.metrics.cer]
+enabled = false
+weight = 0.0
+description = "Character Error Rate - useful for character-level languages"
+[evaluation.metrics.semantic_score]
+enabled = true
+weight = 0.5
+description = "Semantic equivalence score - partial credit for meaning"
+# Text normalization settings
+# These apply before metric calculation
+[evaluation.normalization]
+lowercase = true
+trim_whitespace = true
+remove_punctuation = false
+normalize_quotes = true
+remove_special_chars = false
+# UI Settings
+[evaluation.ui]
+show_detailed_breakdown = true
+highlight_errors = true
+color_scheme = "auto"

app/config/languages/en.toml ADDED Viewed

@@ -0,0 +1,242 @@
+# English Language Configuration
+# Semantic equivalence rules for English
+[language]
+code = "en"
+name = "English"
+normalizer_class = "en"
+# ============================================================
+# SEMANTIC MATCHING RULES
+# ============================================================
+# These rules define phrases that are semantically equivalent
+# even when they differ in exact wording.
+#
+# Each rule has:
+# - variants: list of alternative phrasings
+# - canonical: the "standard" form to compare against
+# - weight: 0.0-1.0, confidence of equivalence
+#
+# Matching works bidirectionally:
+# "gonna" matches "going to" and vice versa
+# ============================================================
+[[semantic_matchers.group]]
+name = "contractions_informal"
+description = "Contractions and informal speech → formal forms"
+enabled = true
+[[semantic_matchers.group.rule]]
+variants = ["gonna", "gon na", "gunna", "gonna"]
+canonical = "going to"
+weight = 0.95
+[[semantic_matchers.group.rule]]
+variants = ["wanna", "wanner"]
+canonical = "want to"
+weight = 0.95
+[[semantic_matchers.group.rule]]
+variants = ["gotta", "got a"]
+canonical = "got to"
+weight = 0.9
+[[semantic_matchers.group.rule]]
+variants = ["kinda", "kind of"]
+canonical = "kind of"
+weight = 0.9
+[[semantic_matchers.group.rule]]
+variants = ["sorta", "sort of"]
+canonical = "sort of"
+weight = 0.9
+[[semantic_matchers.group.rule]]
+variants = ["outta", "out of"]
+canonical = "out of"
+weight = 0.95
+[[semantic_matchers.group.rule]]
+variants = ["lemme", "let me"]
+canonical = "let me"
+weight = 0.95
+[[semantic_matchers.group.rule]]
+variants = ["gimme", "give me"]
+canonical = "give me"
+weight = 0.95
+[[semantic_matchers.group.rule]]
+variants = ["dunno", "dont know", "do not know", "don't know"]
+canonical = "do not know"
+weight = 0.9
+[[semantic_matchers.group.rule]]
+variants = ["coulda", "could have", "could've"]
+canonical = "could have"
+weight = 0.95
+[[semantic_matchers.group.rule]]
+variants = ["woulda", "would have", "would've"]
+canonical = "would have"
+weight = 0.95
+[[semantic_matchers.group.rule]]
+variants = ["shoulda", "should have", "should've"]
+canonical = "should have"
+weight = 0.95
+[[semantic_matchers.group.rule]]
+variants = ["lotsa", "lots of"]
+canonical = "lots of"
+weight = 0.95
+[[semantic_matchers.group.rule]]
+variants = ["cause", "cos", "cuz"]
+canonical = "because"
+weight = 0.85
+[[semantic_matchers.group.rule]]
+variants = ["nvm", "nvr", "nevermind", "never mind"]
+canonical = "never mind"
+weight = 0.95
+[[semantic_matchers.group.rule]]
+variants = ["thru", "through"]
+canonical = "through"
+weight = 0.98
+[[semantic_matchers.group.rule]]
+variants = ["u", "you"]
+canonical = "you"
+weight = 0.8
+[[semantic_matchers.group.rule]]
+variants = ["ur", "you're", "your"]
+canonical = "your"
+weight = 0.7
+[[semantic_matchers.group.rule]]
+variants = ["ok", "okay", "ok"]
+canonical = "okay"
+weight = 1.0
+[[semantic_matchers.group.rule]]
+variants = ["yeah", "yes", "yea", "yah"]
+canonical = "yes"
+weight = 0.95
+[[semantic_matchers.group.rule]]
+variants = ["nope", "no", "nah"]
+canonical = "no"
+weight = 0.95
+[[semantic_matchers.group.rule]]
+variants = ["alright", "all right", "allright"]
+canonical = "all right"
+weight = 1.0
+[[semantic_matchers.group.rule]]
+variants = ["gonna", "goin", "goin to", "going"]
+canonical = "going"
+weight = 0.8
+[[semantic_matchers.group]]
+name = "repeated_sounds"
+description = "Stuttered/repeated sounds - common in spontaneous speech"
+enabled = true
+[[semantic_matchers.group.rule]]
+variants = ["um", "uh", "er", "erm"]
+canonical = ""
+weight = 0.5
+[[semantic_matchers.group]]
+name = "common_confusions"
+description = "Common ASR/LLM transcription confusions"
+enabled = true
+[[semantic_matchers.group.rule]]
+variants = ["i am", "i'm", "im"]
+canonical = "i am"
+weight = 1.0
+[[semantic_matchers.group.rule]]
+variants = ["you know", "yknow", "y'know"]
+canonical = "you know"
+weight = 0.9
+[[semantic_matchers.group.rule]]
+variants = ["like", "like like"]
+canonical = "like"
+weight = 0.7
+[[semantic_matchers.group]]
+name = "numbers"
+description = "Number word ↔ digit equivalence"
+enabled = true
+[[semantic_matchers.group.rule]]
+variants = ["for", "four"]
+canonical = "four"
+weight = 0.9
+[[semantic_matchers.group.rule]]
+variants = ["to", "two", "too"]
+canonical = "two"
+weight = 0.8
+[[semantic_matchers.group]]
+name = "contractions"
+description = "Standard English contractions"
+enabled = true
+[[semantic_matchers.group.rule]]
+variants = ["don't", "do not"]
+canonical = "do not"
+weight = 1.0
+[[semantic_matchers.group.rule]]
+variants = ["can't", "cannot"]
+canonical = "cannot"
+weight = 1.0
+[[semantic_matchers.group.rule]]
+variants = ["won't", "will not"]
+canonical = "will not"
+weight = 1.0
+[[semantic_matchers.group.rule]]
+variants = ["i've", "i have"]
+canonical = "i have"
+weight = 1.0
+[[semantic_matchers.group.rule]]
+variants = ["i'll", "i will"]
+canonical = "i will"
+weight = 1.0
+[[semantic_matchers.group.rule]]
+variants = ["it's", "it is", "its"]
+canonical = "it is"
+weight = 0.95
+[[semantic_matchers.group.rule]]
+variants = ["that's", "that is"]
+canonical = "that is"
+weight = 1.0
+[[semantic_matchers.group.rule]]
+variants = ["there's", "there is"]
+canonical = "there is"
+weight = 1.0
+[[semantic_matchers.group.rule]]
+variants = ["here's", "here is"]
+canonical = "here is"
+weight = 1.0
+[[semantic_matchers.group.rule]]
+variants = ["what's", "what is"]
+canonical = "what is"
+weight = 1.0

app/config/transcript_formats.toml ADDED Viewed

@@ -0,0 +1,47 @@
+# Transcribe Studio — transcript import formats
+# Contributors: add a parser module + entry here
+[transcript_formats]
+# First matching parser wins (auto-detect)
+detection_order = ["timestamp_speaker", "json_segments", "plain_text"]
+# File upload only — paste accepts any text (format auto-detected from content)
+[transcript_formats.upload]
+accepted_extensions = [".json", ".txt", ".transcript"]
+max_bytes = 5242880
+[transcript_formats.alignment]
+# When aligning [MM:SS] lines to audio chunks, prefix with speaker name?
+include_speaker = false
+[transcript_formats.formats.timestamp_speaker]
+name = "Timestamp + Speaker"
+description = "Lines like [MM:SS] Speaker: text — common in LLM / classroom exports"
+enabled = true
+extensions = [".txt", ".transcript"]
+example = "[00:07] Teacher: At the beginning of the year..."
+[transcript_formats.formats.json_segments]
+name = "JSON segments"
+description = "Structured JSON with segment id or start_ms"
+enabled = true
+extensions = [".json"]
+example = '{"segments": [{"id": 1, "text": "Hello"}]}'
+[transcript_formats.formats.plain_text]
+name = "Plain text"
+description = "Single block of text (applied to every chunk — quick test only)"
+enabled = true
+extensions = [".txt"]
+example = "Full transcript as one paragraph..."
+# Future formats (enable when parser exists):
+# [transcript_formats.formats.srt]
+# name = "SubRip subtitles"
+# enabled = false
+# extensions = [".srt"]
+# [transcript_formats.formats.vtt]
+# name = "WebVTT"
+# enabled = false
+# extensions = [".vtt"]

app/database.py ADDED Viewed

@@ -0,0 +1,126 @@
+import sqlite3
+from app.paths import data_dir
+DATA_DIR = data_dir()
+DB_PATH = DATA_DIR / "transcribe.db"
+def get_conn() -> sqlite3.Connection:
+    DATA_DIR.mkdir(parents=True, exist_ok=True)
+    conn = sqlite3.connect(DB_PATH, check_same_thread=False)
+    conn.row_factory = sqlite3.Row
+    conn.execute("PRAGMA foreign_keys = ON")
+    return conn
+def init_db() -> None:
+    conn = get_conn()
+    conn.executescript(
+        """
+        CREATE TABLE IF NOT EXISTS projects (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            name TEXT NOT NULL,
+            description TEXT DEFAULT '',
+            created_at TEXT DEFAULT (datetime('now'))
+        );
+        CREATE TABLE IF NOT EXISTS recordings (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            project_id INTEGER REFERENCES projects(id) ON DELETE SET NULL,
+            title TEXT NOT NULL,
+            filename TEXT NOT NULL,
+            duration_ms INTEGER,
+            notes TEXT DEFAULT '',
+            llm_transcript_file TEXT DEFAULT '',
+            llm_transcript_lang TEXT DEFAULT 'en',
+            llm_transcript_format TEXT DEFAULT '',
+            created_at TEXT DEFAULT (datetime('now'))
+        );
+        CREATE TABLE IF NOT EXISTS segments (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            recording_id INTEGER NOT NULL REFERENCES recordings(id) ON DELETE CASCADE,
+            start_ms INTEGER NOT NULL,
+            end_ms INTEGER NOT NULL,
+            speaker TEXT NOT NULL DEFAULT '',
+            transcript TEXT NOT NULL DEFAULT '',
+            llm_transcript TEXT NOT NULL DEFAULT '',
+            sort_order INTEGER NOT NULL DEFAULT 0,
+            created_at TEXT DEFAULT (datetime('now')),
+            updated_at TEXT DEFAULT (datetime('now'))
+        );
+        """
+    )
+    conn.commit()
+    conn.close()
+def migrate_add_llm_transcript() -> None:
+    """Add llm_transcript column to segments if it doesn't exist."""
+    conn = get_conn()
+    try:
+        conn.execute("ALTER TABLE segments ADD COLUMN llm_transcript TEXT NOT NULL DEFAULT ''")
+        conn.commit()
+    except sqlite3.OperationalError:
+        pass  # Column already exists
+    finally:
+        conn.close()
+def migrate_add_recording_llm_fields() -> None:
+    """Add LLM transcript fields to recordings table if they don't exist."""
+    conn = get_conn()
+    for sql in (
+        "ALTER TABLE recordings ADD COLUMN llm_transcript_file TEXT DEFAULT ''",
+        "ALTER TABLE recordings ADD COLUMN llm_transcript_lang TEXT DEFAULT 'en'",
+        "ALTER TABLE recordings ADD COLUMN llm_transcript_format TEXT DEFAULT ''",
+    ):
+        try:
+            conn.execute(sql)
+            conn.commit()
+        except sqlite3.OperationalError:
+            pass
+    conn.close()
+def migrate_add_projects() -> None:
+    """Ensure projects exist and recordings are assigned."""
+    conn = get_conn()
+    conn.execute(
+        """
+        CREATE TABLE IF NOT EXISTS projects (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            name TEXT NOT NULL,
+            description TEXT DEFAULT '',
+            created_at TEXT DEFAULT (datetime('now'))
+        )
+        """
+    )
+    conn.commit()
+    try:
+        conn.execute(
+            "ALTER TABLE recordings ADD COLUMN project_id INTEGER REFERENCES projects(id)"
+        )
+        conn.commit()
+    except sqlite3.OperationalError:
+        pass
+    default = conn.execute(
+        "SELECT id FROM projects WHERE name = 'Default project' LIMIT 1"
+    ).fetchone()
+    if not default:
+        cur = conn.execute(
+            "INSERT INTO projects (name, description) VALUES (?, ?)",
+            ("Default project", "Imported and new recordings"),
+        )
+        default_id = cur.lastrowid
+    else:
+        default_id = default["id"]
+    conn.execute(
+        "UPDATE recordings SET project_id = ? WHERE project_id IS NULL",
+        (default_id,),
+    )
+    conn.commit()
+    conn.close()

app/evaluation/__init__.py ADDED Viewed

@@ -0,0 +1,18 @@
+"""
+Transcribe Studio Evaluation Module.
+Provides WER, semantic matching, and extensible evaluation metrics
+for comparing human transcriptions against LLM-generated transcripts.
+"""
+from app.evaluation.config import EvaluationConfig, get_config, reload_config
+from app.evaluation.engine import EvaluationEngine
+from app.evaluation.models import EvaluationResult, SegmentResult
+__all__ = [
+    "EvaluationEngine",
+    "EvaluationConfig",
+    "EvaluationResult",
+    "SegmentResult",
+    "get_config",
+    "reload_config",
+]