PyPI - zeno-cli - Versions diffs - 0.3.4__py3-none-any.whl - Mend

zeno-cli 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

zeno_adapters/__init__.py +17 -0
zeno_adapters/_common.py +38 -0
zeno_adapters/anthropic.py +68 -0
zeno_adapters/claude_code.py +101 -0
zeno_adapters/crewai.py +92 -0
zeno_adapters/langgraph.py +49 -0
zeno_adapters/openai.py +108 -0
zeno_cli/__init__.py +1 -0
zeno_cli/_hooks/cc_bridge.py +1016 -0
zeno_cli/doctor.py +535 -0
zeno_cli/hook_install.py +269 -0
zeno_cli/hud/__init__.py +1 -0
zeno_cli/hud/hud_install.py +652 -0
zeno_cli/hud/zeno_attention.py +288 -0
zeno_cli/hud/zeno_cognition.py +457 -0
zeno_cli/hud/zeno_hud.py +496 -0
zeno_cli/interview_invites.py +342 -0
zeno_cli/login.py +241 -0
zeno_cli/main.py +2534 -0
zeno_cli/onboard.py +206 -0
zeno_cli/outreach.py +456 -0
zeno_cli/version.py +67 -0
zeno_cli-0.3.4.dist-info/METADATA +161 -0
zeno_cli-0.3.4.dist-info/RECORD +69 -0
zeno_cli-0.3.4.dist-info/WHEEL +4 -0
zeno_cli-0.3.4.dist-info/entry_points.txt +4 -0
zeno_core/__init__.py +67 -0
zeno_core/analytics.py +193 -0
zeno_core/rtlx_s.py +460 -0
zeno_core/streak.py +178 -0
zeno_core/tlx_s.py +192 -0
zeno_sdk/__init__.py +6 -0
zeno_sdk/_generated/__init__.py +6 -0
zeno_sdk/_generated/client.py +819 -0
zeno_sdk/_migrations/alembic/env.py +33 -0
zeno_sdk/_migrations/alembic/script.py.mako +18 -0
zeno_sdk/_migrations/alembic/versions/0001_initial.py +79 -0
zeno_sdk/_migrations/alembic/versions/0002_cognition_samples.py +53 -0
zeno_sdk/_migrations/alembic/versions/0003_cognition_drivers.py +41 -0
zeno_sdk/_migrations/alembic/versions/0004_transcript_intelligence.py +248 -0
zeno_sdk/_migrations/alembic.ini +35 -0
zeno_sdk/_runtime.py +12 -0
zeno_sdk/adapters/__init__.py +15 -0
zeno_sdk/adapters/anthropic.py +5 -0
zeno_sdk/adapters/claude_code.py +5 -0
zeno_sdk/adapters/crewai.py +5 -0
zeno_sdk/adapters/langgraph.py +5 -0
zeno_sdk/adapters/openai.py +5 -0
zeno_sdk/auth.py +25 -0
zeno_sdk/client.py +87 -0
zeno_sdk/config.py +61 -0
zeno_sdk/daemon.py +72 -0
zeno_sdk/privacy.py +46 -0
zeno_sdk/session.py +179 -0
zeno_sdk/storage.py +487 -0
zeno_sdk/types/__init__.py +121 -0
zeno_session_intel/__init__.py +19 -0
zeno_session_intel/analytics.py +588 -0
zeno_session_intel/compression.py +123 -0
zeno_session_intel/ingest.py +376 -0
zeno_session_intel/model.py +129 -0
zeno_session_intel/parsers/__init__.py +31 -0
zeno_session_intel/parsers/claude_code.py +169 -0
zeno_session_intel/parsers/codex.py +265 -0
zeno_session_intel/parsers/cursor.py +198 -0
zeno_session_intel/prices.py +281 -0
zeno_session_intel/schema.py +277 -0
zeno_session_intel/signals.py +319 -0
zeno_session_intel/taxonomy.py +71 -0

zeno_cli/hud/zeno_cognition.py ADDED Viewed

@@ -0,0 +1,457 @@
+#!/usr/bin/env python3
+"""zeno_cognition - the v2 multi-dimensional cognition model (single source of truth).
+v1 (zeno_attention.py) produced ONE attention score by re-parsing the transcript
+tail at render time; in practice it sat flat near 50. v2 captures signal at EVENT
+TIME (in the cc-bridge hook) across five named drivers and composes them into one
+attention score, so both the terminal HUD and the dashboard read the same per-turn
+row and agree by construction.
+The five drivers (each a raw 0..1, then standardized against the user's OWN rolling
+baseline because absolute values vary 3-4x between people):
+  effort        how hard the human is driving (real prompt richness + reasoning level)
+  autonomy      AI-led vs human-driven  (tool activity vs human input) - drift risk
+  verification  supervision/review cost (review gap, accept/reject, "fix it", churn)
+  fatigue       accumulated time-on-task decay (vigilance decrement; a PENALTY)
+  flow          rhythm: steady cadence vs rapid-fire thrash vs irregular fatigue pauses
+Honesty: this is a transparent, tunable v1 HEURISTIC, not a validated instrument.
+Every weight is an env var; scores are relative to the user's own baseline and read
+"calibrating" until enough history exists; it is NOT calibrated against the RTLX-S
+survey (the pre-registered SCED stays untouched). The research behind each driver is
+real but qualified (behavioral proxies reach AUC ~72-80% for fatigue, not certainty)
+- so we show the drivers alongside the composite and never claim measurement.
+Design: stdlib-only, Python 3.9+ safe, never raises. Imported by the cc-bridge hook
+(the writer), the HUD (the reader), and the dashboard exporter.
+"""
+import json
+import math
+import os
+# ---------------------------------------------------------------------------
+# tunable weights (env-overridable; the composite is a transparent weighted sum)
+# effort/verification/flow raise attention; autonomy raises it (active hand-off
+# still counts as engaged supervision up to a point); fatigue is a penalty.
+# ---------------------------------------------------------------------------
+DRIVERS = ("effort", "autonomy", "verification", "fatigue", "flow")
+def _w(name, default):
+    try:
+        return float(os.environ.get(name, str(default)))
+    except Exception:
+        return default
+W_EFFORT = _w("ZENO_COG_W_EFFORT", 0.30)
+W_VERIF = _w("ZENO_COG_W_VERIF", 0.25)
+W_AUTONOMY = _w("ZENO_COG_W_AUTONOMY", 0.20)
+W_FLOW = _w("ZENO_COG_W_FLOW", 0.15)
+W_FATIGUE = _w("ZENO_COG_W_FATIGUE", 0.10)  # applied as a penalty
+# How many baseline samples before a driver is "calibrated" (else "calibrating").
+CALIBRATION_MIN = int(os.environ.get("ZENO_COG_CALIBRATION_MIN", "20"))
+BASELINE_WINDOW = int(os.environ.get("ZENO_COG_BASELINE_WINDOW", "400"))
+ATTENTION_RED = int(os.environ.get("ZENO_ATTENTION_RED", "55"))
+# ANSI color codes (shared with the HUD); dashboard maps the same bands to its theme.
+GREEN, YELLOW, RED, CYAN = "92", "93", "91", "96"
+# ---------------------------------------------------------------------------
+# canonical cognition_samples schema (single source of truth for the stdlib
+# writers - the HUD and the cc-bridge hook). Mirror any change here into the
+# alembic migration (packages/sdk-python/alembic/versions/0003_cognition_drivers.py);
+# the schema-drift test guards the two from diverging.
+# ---------------------------------------------------------------------------
+SAMPLE_COLUMNS = (
+    ("id", "TEXT PRIMARY KEY"),
+    ("session_id", "TEXT NOT NULL"),
+    ("ts", "TEXT NOT NULL"),
+    ("context_pct", "REAL"),
+    ("input_tokens", "INTEGER"),
+    ("output_tokens", "INTEGER"),
+    ("cache_read_tokens", "INTEGER"),
+    ("cache_creation_tokens", "INTEGER"),
+    ("total_tokens", "INTEGER"),
+    ("attention_score", "INTEGER"),
+    ("attention_effort", "REAL"),
+    ("attention_deliberation", "REAL"),  # v1 legacy component, kept for back-compat
+    ("attention_trend", "REAL"),
+    ("model", "TEXT"),
+    ("harness", "TEXT"),
+    # --- v2 drivers ---
+    ("attention_autonomy", "REAL"),
+    ("attention_verification", "REAL"),
+    ("attention_fatigue", "REAL"),
+    ("attention_flow", "REAL"),
+)
+# the v2 driver columns, in DB form (attention_<driver>)
+DRIVER_COLUMNS = tuple("attention_" + d for d in DRIVERS)
+def ensure_schema(con):
+    """Create cognition_samples if missing and add any missing v2 driver columns.
+    Idempotent and migration-safe: works on a fresh DB and on a v1 DB (adds the
+    four driver columns via ALTER). Used by both stdlib writers so a fresh machine
+    works before the SDK alembic migration has ever run. Never raises."""
+    try:
+        cols_sql = ", ".join(f"{n} {t}" for n, t in SAMPLE_COLUMNS)
+        con.execute(f"CREATE TABLE IF NOT EXISTS cognition_samples ({cols_sql})")
+        con.execute(
+            "CREATE INDEX IF NOT EXISTS ix_cognition_samples_session_id "
+            "ON cognition_samples (session_id)"
+        )
+        existing = {r[1] for r in con.execute("PRAGMA table_info(cognition_samples)").fetchall()}
+        for name, decl in SAMPLE_COLUMNS:
+            if name not in existing:
+                base_type = decl.split()[0]  # ALTER ADD cannot carry PK/NOT NULL
+                try:
+                    con.execute(f"ALTER TABLE cognition_samples ADD COLUMN {name} {base_type}")
+                except Exception:
+                    pass
+    except Exception:
+        pass
+# ---------------------------------------------------------------------------
+# raw per-turn driver scoring (each returns 0..1)
+# ---------------------------------------------------------------------------
+def effort_raw(prompt_chars, has_code=False, multiline=False, effort_level=None, lead_word=False):
+    """How much effort the human put into driving this turn.
+    Long, structured, code-bearing prompts score high; one-word 'let the AI lead'
+    prompts score low. The model's reasoning effort.level nudges it up."""
+    n = max(0, int(prompt_chars or 0))
+    base = 1.0 - math.exp(-n / 180.0)  # ~0 tiny, ~0.8 @300 chars, ->1 long
+    bonus = 0.0
+    if has_code or multiline:
+        bonus += 0.15
+    if lead_word and n <= 6:
+        base *= 0.25
+    lvl = {"low": 0.0, "medium": 0.04, "high": 0.08, "xhigh": 0.12, "max": 0.15}.get(
+        (effort_level or "").lower(), 0.0
+    )
+    return max(0.0, min(1.0, base + bonus + lvl))
+def autonomy_raw(tool_uses, human_prompt_chars, autonomous_seconds):
+    """AI-led ratio: lots of tool activity + a thin human prompt + a long autonomous
+    stretch means the agent is driving. High autonomy is a drift-risk input (the
+    out-of-the-loop problem), not inherently bad - classify() decides if it matters."""
+    tools = max(0, int(tool_uses or 0))
+    human = max(0.0, float(human_prompt_chars or 0))
+    secs = max(0.0, float(autonomous_seconds or 0))
+    tool_load = 1.0 - math.exp(-tools / 6.0)  # ~0.8 at 10 tool uses
+    thin_human = math.exp(-human / 120.0)  # thin prompt -> closer to 1
+    long_run = 1.0 - math.exp(-secs / 240.0)  # 4-min run -> ~0.6
+    return max(0.0, min(1.0, 0.5 * tool_load + 0.3 * thin_human + 0.2 * long_run))
+def verification_raw(review_gap_s, reprompts=0, churn=0.0, accepts=0, rejects=0):
+    """How much the human is actually reviewing the agent's work: time spent before
+    the next action, correction re-prompts ('fix it'), edit churn, and accept/reject
+    activity. This is the supervision-cost signal."""
+    gap = max(0.0, float(review_gap_s or 0))
+    # a 30s-120s review window is healthy engagement; <4s is rubber-stamping
+    if gap < 4:
+        gap_score = 0.15
+    elif gap <= 180:
+        gap_score = min(1.0, gap / 90.0)
+    else:
+        gap_score = 0.6  # very long: still engaged, possibly distracted
+    decisions = max(0, int(accepts or 0)) + max(0, int(rejects or 0))
+    decide_score = 1.0 - math.exp(-decisions / 3.0)
+    reprompt_score = 1.0 - math.exp(-max(0, int(reprompts or 0)) / 2.0)
+    churn_score = max(0.0, min(1.0, float(churn or 0.0)))
+    return max(
+        0.0,
+        min(1.0, 0.5 * gap_score + 0.2 * decide_score + 0.15 * reprompt_score + 0.15 * churn_score),
+    )
+def fatigue_raw(active_minutes, inter_turn_var=0.0, hour=None):
+    """Accumulated time-on-task fatigue. The vigilance literature gives an exponential
+    decrement with a 20-30 min inflection and a clear arousal drop past ~60 min, so
+    fatigue rises with continuous active minutes; high inter-turn timing variability
+    and late hours add to it. Higher = more tired (a penalty on attention)."""
+    mins = max(0.0, float(active_minutes or 0))
+    # exponential approach to 1; ~0.4 at 30 min, ~0.7 at 60 min, ~0.9 at 120 min
+    time_fat = 1.0 - math.exp(-mins / 65.0)
+    var = max(0.0, min(1.0, float(inter_turn_var or 0.0)))
+    late = 0.0
+    if isinstance(hour, (int, float)):
+        h = int(hour) % 24
+        if h >= 22 or h < 6:
+            late = 0.15
+    return max(0.0, min(1.0, 0.75 * time_fat + 0.2 * var + late))
+def flow_raw(cadence_regularity, boundary_pause_ratio=0.5):
+    """Rhythm quality. Flow shows steady cadence with pauses clustered at semantic
+    boundaries; thrash is rapid-fire; fatigue is irregular. Higher = better rhythm.
+    cadence_regularity and boundary_pause_ratio are each 0..1."""
+    reg = max(0.0, min(1.0, float(cadence_regularity or 0.0)))
+    boundary = max(
+        0.0, min(1.0, float(boundary_pause_ratio if boundary_pause_ratio is not None else 0.5))
+    )
+    return max(0.0, min(1.0, 0.6 * reg + 0.4 * boundary))
+# ---------------------------------------------------------------------------
+# per-user baselines (relative scoring; absolute values vary 3-4x between people)
+# ---------------------------------------------------------------------------
+class Baselines:
+    """Rolling per-driver baselines for robust standardization. Keeps the last
+    BASELINE_WINDOW raw values per driver and standardizes new values to a robust
+    z-score (median + IQR), so a driver reads 'relative to your own normal'. Stdlib,
+    JSON-backed, never raises into the caller."""
+    def __init__(self, data=None):
+        self.data = data or {}
+    @classmethod
+    def load(cls, path):
+        try:
+            if path and os.path.exists(path):
+                with open(path) as f:
+                    return cls(json.load(f) or {})
+        except Exception:
+            pass
+        return cls({})
+    def save(self, path):
+        try:
+            if not path:
+                return
+            os.makedirs(os.path.dirname(path), exist_ok=True)
+            tmp = path + ".tmp"
+            with open(tmp, "w") as f:
+                json.dump(self.data, f)
+            os.replace(tmp, path)
+        except Exception:
+            pass
+    def _vals(self, driver):
+        v = self.data.get(driver)
+        return v if isinstance(v, list) else []
+    def update(self, driver, value):
+        try:
+            v = self._vals(driver)
+            v.append(round(float(value), 4))
+            if len(v) > BASELINE_WINDOW:
+                v = v[-BASELINE_WINDOW:]
+            self.data[driver] = v
+        except Exception:
+            pass
+    def n(self, driver):
+        return len(self._vals(driver))
+    def calibrating(self, driver):
+        return self.n(driver) < CALIBRATION_MIN
+    def standardize(self, driver, value):
+        """Return a 0..100 score: relative to the user's baseline once calibrated,
+        else the raw value scaled (cold start). 50 == your median."""
+        try:
+            x = float(value)
+        except Exception:
+            return None
+        vals = self._vals(driver)
+        if len(vals) < CALIBRATION_MIN:
+            return round(max(0.0, min(1.0, x)) * 100.0, 1)  # cold start: raw 0..1 -> 0..100
+        s = sorted(vals)
+        med = _median(s)
+        iqr = _percentile(s, 75) - _percentile(s, 25)
+        scale = (iqr / 1.349) if iqr > 1e-6 else 0.15
+        z = (x - med) / scale
+        return round(_sigmoid(z) * 100.0, 1)
+# ---------------------------------------------------------------------------
+# stats helpers
+# ---------------------------------------------------------------------------
+def _sigmoid(x):
+    try:
+        return 1.0 / (1.0 + math.exp(-max(-12.0, min(12.0, x))))
+    except Exception:
+        return 0.5
+def _median(sorted_vals):
+    n = len(sorted_vals)
+    if n == 0:
+        return 0.0
+    m = n // 2
+    return sorted_vals[m] if n % 2 else (sorted_vals[m - 1] + sorted_vals[m]) / 2.0
+def _percentile(sorted_vals, p):
+    n = len(sorted_vals)
+    if n == 0:
+        return 0.0
+    if n == 1:
+        return sorted_vals[0]
+    k = (p / 100.0) * (n - 1)
+    lo = int(math.floor(k))
+    hi = int(math.ceil(k))
+    if lo == hi:
+        return sorted_vals[lo]
+    return sorted_vals[lo] + (sorted_vals[hi] - sorted_vals[lo]) * (k - lo)
+# ---------------------------------------------------------------------------
+# compose + classify
+# ---------------------------------------------------------------------------
+def compose(raw, baselines=None, recent_scores=None, context=None):
+    """Compose the five raw drivers (0..1 each) into the attention dict.
+    `raw`: {effort, autonomy, verification, fatigue, flow} in 0..1 (missing -> skipped).
+    `baselines`: a Baselines instance for relative standardization (optional).
+    `recent_scores`: recent composite scores for the trend (optional).
+    `context`: {long_session, churn, errors} for the context-aware nudge.
+    Returns {ok, score, drivers (0..100), top_driver, trend, label, nudge, glyph,
+    color, calibrating}. Never raises."""
+    try:
+        bl = baselines or Baselines({})
+        std = {}
+        for d in DRIVERS:
+            if d in raw and raw[d] is not None:
+                s = bl.standardize(d, raw[d])
+                if s is not None:
+                    std[d] = s
+        if not std:
+            return {"ok": False}
+        def g(d, default=50.0):
+            return std.get(d, default)
+        # composite: effort/verification/flow/autonomy raise, fatigue penalizes.
+        wsum = W_EFFORT + W_VERIF + W_AUTONOMY + W_FLOW
+        pos = (
+            W_EFFORT * g("effort")
+            + W_VERIF * g("verification")
+            + W_AUTONOMY * g("autonomy")
+            + W_FLOW * g("flow")
+        ) / (wsum if wsum else 1.0)
+        penalty = W_FATIGUE * (g("fatigue", 0.0) / 100.0) * 100.0
+        score = int(round(max(0.0, min(100.0, pos - penalty))))
+        # top driver = largest absolute deviation from the user's median (50)
+        top = max(std.items(), key=lambda kv: abs(kv[1] - 50.0))[0]
+        # trend from recent composites
+        trend = 0.0
+        if recent_scores:
+            rs = [s for s in recent_scores if isinstance(s, (int, float))][-6:]
+            if len(rs) >= 4:
+                half = len(rs) // 2
+                trend = max(
+                    -1.0,
+                    min(
+                        1.0, ((sum(rs[half:]) / (len(rs) - half)) - (sum(rs[:half]) / half)) / 100.0
+                    ),
+                )
+        calibrating = any(bl.calibrating(d) for d in std)
+        label, nudge, glyph, color = classify(score, trend, std, context or {})
+        return {
+            "ok": True,
+            "score": score,
+            "drivers": std,
+            "top_driver": top,
+            "trend": round(trend, 3),
+            "label": label,
+            "nudge": nudge,
+            "glyph": glyph,
+            "color": color,
+            "calibrating": calibrating,
+        }
+    except Exception:
+        return {"ok": False}
+def classify(score, trend, drivers, context):
+    """Map score + drivers + context to (label, nudge, glyph, color).
+    Context-aware so we do not pathologize adaptive dips (Strategic Allocation
+    Theory): low engagement during a clean autonomous run is fine ('monitoring');
+    low engagement with churn/errors is drift ('step in'); long + tired is 'rest'."""
+    glyph = "▲" if trend > 0.08 else ("▼" if trend < -0.08 else "─")
+    long_session = bool(context.get("long_session"))
+    churn = float(context.get("churn", 0.0) or 0.0)
+    errors = bool(context.get("errors"))
+    autonomy = float(drivers.get("autonomy", 50.0))
+    verification = float(drivers.get("verification", 50.0))
+    fatigue = float(drivers.get("fatigue", 0.0))
+    if score >= 70:
+        label, nudge, col = "sharp", "keep going", GREEN
+    elif score >= 45:
+        label, nudge, col = "steady", "in rhythm", CYAN
+    elif score >= 25:
+        label, nudge, col = "easing", "wrap soon" if long_session else "ride it out", YELLOW
+    else:
+        label, nudge, col = "fading", "rest", RED
+    # context-aware overrides for the low bands
+    if score < 55:
+        if (churn > 0.5 or errors) and verification < 55:
+            nudge, col = "drift - step in", RED
+        elif autonomy >= 65 and verification >= 45 and churn <= 0.3 and not errors:
+            label, nudge, col = "monitoring", "agent on track", CYAN
+    if long_session and fatigue >= 70 and score < 70:
+        nudge = "rest soon"
+        col = RED if score < 45 else YELLOW
+    return label, nudge, glyph, col
+def present_sample(row):
+    """Rebuild the renderable attention dict from a persisted cognition_samples row
+    (the v2 read path). Reads the stored composite + the 5 driver columns and
+    re-derives label/nudge/glyph via the same classify, so the HUD bar and the
+    dashboard agree by construction. Falls back gracefully on partial rows."""
+    if not row:
+        return {"ok": False}
+    score = row.get("attention_score")
+    if score is None:
+        return {"ok": False}
+    try:
+        score = int(round(float(score)))
+    except (TypeError, ValueError):
+        return {"ok": False}
+    drivers = {}
+    for d in DRIVERS:
+        v = row.get("attention_" + d)
+        if isinstance(v, (int, float)):
+            drivers[d] = float(v)
+    trend = row.get("attention_trend")
+    trend = float(trend) if isinstance(trend, (int, float)) else 0.0
+    context = {
+        "long_session": bool(row.get("long_session")),
+        "churn": row.get("churn", 0.0),
+        "errors": bool(row.get("errors")),
+    }
+    label, nudge, glyph, color = classify(score, trend, drivers, context)
+    top = None
+    if drivers:
+        top = max(drivers.items(), key=lambda kv: abs(kv[1] - 50.0))[0]
+    return {
+        "ok": True,
+        "score": score,
+        "drivers": drivers,
+        "top_driver": top,
+        "trend": trend,
+        "label": label,
+        "nudge": nudge,
+        "glyph": glyph,
+        "color": color,
+    }