zerottmm 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zerottmm/store.py ADDED
@@ -0,0 +1,380 @@
1
+ """Persistence layer for ttmm.
2
+
3
+ This module defines an SQLite schema and helper functions for storing
4
+ information about files, symbols, call edges, metrics and trace runs.
5
+ The database is stored under ``.ttmm/ttmm.db`` in the repository root.
6
+
7
+ The tables are:
8
+
9
+ ``files``
10
+ One row per indexed file (Python module). Stores the relative path and
11
+ modification time.
12
+
13
+ ``symbols``
14
+ One row per function or method. Holds the file it belongs to, a
15
+ qualified name (``module:Function`` or ``module:Class.method``), the
16
+ line range, a type (``function`` or ``method``) and an optional
17
+ docstring.
18
+
19
+ ``edges``
20
+ Static call edges extracted from AST. Each row links a caller symbol
21
+ (``caller_id``) to a callee symbol (``callee_id``) or provides the
22
+ unresolved callee name if the call target could not be resolved
23
+ statically.
24
+
25
+ ``metrics``
26
+ Stores per‑symbol metrics such as cyclomatic complexity, lines of code
27
+ and churn. The final hotspot score should be computed outside this
28
+ module.
29
+
30
+ ``trace_runs`` and ``trace_events``
31
+ Used by ``ttmm.trace`` to record actual runtime call edges. Each run
32
+ gets an entry in ``trace_runs`` and individual call pairs are stored
33
+ in ``trace_events`` referencing the run. Dynamic edges use the
34
+ same symbol IDs as static edges; if a symbol is removed and re‑indexed
35
+ the foreign keys will cascade delete the associated dynamic events.
36
+
37
+ The ``connect`` function creates the database on demand and ensures that
38
+ the schema is up to date.
39
+ """
40
+
41
+ from __future__ import annotations
42
+
43
+ import os
44
+ import sqlite3
45
+ from typing import Dict, List, Optional, Tuple
46
+
47
+
48
+ def get_db_path(repo_path: str) -> str:
49
+ """Return the absolute path to the ttmm database for a given repository.
50
+
51
+ The database lives in ``.ttmm/ttmm.db`` under the repository root.
52
+ """
53
+ return os.path.join(repo_path, ".ttmm", "ttmm.db")
54
+
55
+
56
+ def connect(repo_path: str) -> sqlite3.Connection:
57
+ """Open a SQLite connection for a repository.
58
+
59
+ Ensures that the target directory exists and the schema is created.
60
+
61
+ Parameters
62
+ ----------
63
+ repo_path: str
64
+ Absolute or relative path to the repository root.
65
+
66
+ Returns
67
+ -------
68
+ sqlite3.Connection
69
+ An open connection with the correct row factory set.
70
+ """
71
+ db_path = get_db_path(repo_path)
72
+ os.makedirs(os.path.dirname(db_path), exist_ok=True)
73
+ conn = sqlite3.connect(db_path)
74
+ conn.row_factory = sqlite3.Row
75
+ ensure_schema(conn)
76
+ return conn
77
+
78
+
79
+ def ensure_schema(conn: sqlite3.Connection) -> None:
80
+ """Create schema tables if they do not exist."""
81
+ cur = conn.cursor()
82
+ # Enable foreign keys
83
+ cur.execute("PRAGMA foreign_keys = ON")
84
+ cur.executescript(
85
+ """
86
+ CREATE TABLE IF NOT EXISTS files (
87
+ id INTEGER PRIMARY KEY,
88
+ path TEXT UNIQUE,
89
+ mtime REAL
90
+ );
91
+ CREATE TABLE IF NOT EXISTS symbols (
92
+ id INTEGER PRIMARY KEY,
93
+ file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
94
+ qualname TEXT NOT NULL,
95
+ lineno INTEGER NOT NULL,
96
+ endlineno INTEGER NOT NULL,
97
+ type TEXT NOT NULL,
98
+ doc TEXT
99
+ );
100
+ CREATE INDEX IF NOT EXISTS idx_symbols_qualname ON symbols(qualname);
101
+ CREATE INDEX IF NOT EXISTS idx_symbols_file_id ON symbols(file_id);
102
+ CREATE TABLE IF NOT EXISTS edges (
103
+ id INTEGER PRIMARY KEY,
104
+ caller_id INTEGER NOT NULL REFERENCES symbols(id) ON DELETE CASCADE,
105
+ callee_id INTEGER REFERENCES symbols(id) ON DELETE CASCADE,
106
+ callee_name TEXT,
107
+ unresolved INTEGER NOT NULL DEFAULT 0
108
+ );
109
+ CREATE INDEX IF NOT EXISTS idx_edges_caller ON edges(caller_id);
110
+ CREATE INDEX IF NOT EXISTS idx_edges_callee ON edges(callee_id);
111
+ CREATE TABLE IF NOT EXISTS metrics (
112
+ symbol_id INTEGER PRIMARY KEY REFERENCES symbols(id) ON DELETE CASCADE,
113
+ complexity REAL,
114
+ loc INTEGER,
115
+ churn REAL
116
+ );
117
+ CREATE TABLE IF NOT EXISTS trace_runs (
118
+ id INTEGER PRIMARY KEY,
119
+ ts REAL,
120
+ description TEXT
121
+ );
122
+ CREATE TABLE IF NOT EXISTS trace_events (
123
+ id INTEGER PRIMARY KEY,
124
+ run_id INTEGER NOT NULL REFERENCES trace_runs(id) ON DELETE CASCADE,
125
+ caller_id INTEGER,
126
+ callee_id INTEGER
127
+ );
128
+ """
129
+ )
130
+ conn.commit()
131
+
132
+
133
+ def reset_static_tables(conn: sqlite3.Connection) -> None:
134
+ """Remove all static data (files, symbols, edges, metrics).
135
+
136
+ Trace tables are left intact. This is useful when re‑indexing a repository
137
+ from scratch.
138
+ """
139
+ cur = conn.cursor()
140
+ cur.executescript(
141
+ """
142
+ DELETE FROM edges;
143
+ DELETE FROM metrics;
144
+ DELETE FROM symbols;
145
+ DELETE FROM files;
146
+ """
147
+ )
148
+ conn.commit()
149
+
150
+
151
+ def insert_static_data(
152
+ conn: sqlite3.Connection,
153
+ files_data: List[Tuple[str, float]],
154
+ symbols_data: List[Dict[str, object]],
155
+ calls_data: List[Dict[str, object]],
156
+ metrics_data: Dict[str, Tuple[float, int, float]],
157
+ ) -> None:
158
+ """Insert static analysis results into the database.
159
+
160
+ Parameters
161
+ ----------
162
+ conn: sqlite3.Connection
163
+ An open connection with schema prepared.
164
+ files_data: List[Tuple[str, float]]
165
+ List of ``(relative_path, mtime)`` for each Python file indexed.
166
+ symbols_data: List[Dict[str, object]]
167
+ Each entry must contain ``qualname``, ``path`` (relative file path),
168
+ ``lineno``, ``endlineno``, ``type`` and ``doc``.
169
+ calls_data: List[Dict[str, object]]
170
+ Each entry contains ``caller_qualname``, ``callee_name`` and
171
+ ``unresolved`` (boolean indicating an attribute call). Caller
172
+ qualnames must correspond to entries in ``symbols_data``.
173
+ metrics_data: Dict[str, Tuple[float, int, float]]
174
+ Mapping from symbol qualname to a tuple ``(complexity, loc, churn)``.
175
+ """
176
+ cur = conn.cursor()
177
+ # Insert files and build a map path -> id
178
+ file_ids: Dict[str, int] = {}
179
+ for path, mtime in files_data:
180
+ cur.execute(
181
+ "INSERT OR REPLACE INTO files (path, mtime) VALUES (?, ?)",
182
+ (path, mtime),
183
+ )
184
+ # Load ids
185
+ cur.execute("SELECT id, path FROM files")
186
+ for row in cur.fetchall():
187
+ file_ids[row["path"]] = row["id"]
188
+
189
+ # Insert symbols
190
+ for sym in symbols_data:
191
+ file_id = file_ids[sym["path"]]
192
+ cur.execute(
193
+ "INSERT INTO symbols (file_id, qualname, lineno, endlineno, type, doc) "
194
+ "VALUES (?, ?, ?, ?, ?, ?)",
195
+ (
196
+ file_id,
197
+ sym["qualname"],
198
+ sym["lineno"],
199
+ sym["endlineno"],
200
+ sym["type"],
201
+ sym.get("doc"),
202
+ ),
203
+ )
204
+ # Build a map from qualname to symbol_id
205
+ cur.execute("SELECT id, qualname FROM symbols")
206
+ sym_map: Dict[str, int] = {}
207
+ for row in cur.fetchall():
208
+ sym_map[row["qualname"]] = row["id"]
209
+
210
+ # Insert edges
211
+ for call in calls_data:
212
+ caller_id = sym_map.get(call["caller_qualname"])
213
+ if caller_id is None:
214
+ continue # skip unknown caller (should not happen)
215
+ callee_name = call["callee_name"]
216
+ # Attempt to resolve callee name among all symbols
217
+ callee_matches = [
218
+ sid for qn, sid in sym_map.items()
219
+ if qn.endswith(":" + callee_name) or qn.split(":")[-1].split(".")[-1] == callee_name
220
+ ]
221
+ callee_id: Optional[int] = None
222
+ unresolved = call.get("unresolved", False)
223
+ if not unresolved and len(callee_matches) == 1:
224
+ callee_id = callee_matches[0]
225
+ else:
226
+ callee_id = None
227
+ cur.execute(
228
+ "INSERT INTO edges (caller_id, callee_id, callee_name, unresolved) VALUES (?, ?, ?, ?)",
229
+ (caller_id, callee_id, callee_name, 1 if callee_id is None else 0),
230
+ )
231
+ # Insert metrics
232
+ for qualname, (complexity, loc, churn) in metrics_data.items():
233
+ sid = sym_map.get(qualname)
234
+ if sid is None:
235
+ continue
236
+ cur.execute(
237
+ "INSERT INTO metrics (symbol_id, complexity, loc, churn) VALUES (?, ?, ?, ?)",
238
+ (sid, complexity, loc, churn),
239
+ )
240
+ conn.commit()
241
+
242
+
243
+ def get_hotspots(conn: sqlite3.Connection, limit: int = 10) -> List[sqlite3.Row]:
244
+ """Return top symbols by hotspot score.
245
+
246
+ The hotspot score is defined as ``complexity * (1 + sqrt(churn))`` and
247
+ computed on the fly. Complexity and churn come from the ``metrics``
248
+ table. Symbols without metrics are ignored.
249
+ """
250
+ cur = conn.cursor()
251
+ cur.execute(
252
+ """
253
+ SELECT symbols.qualname AS qualname,
254
+ files.path AS file_path,
255
+ symbols.lineno AS lineno,
256
+ metrics.complexity AS complexity,
257
+ metrics.churn AS churn,
258
+ metrics.loc AS loc
259
+ FROM symbols
260
+ JOIN metrics ON metrics.symbol_id = symbols.id
261
+ JOIN files ON files.id = symbols.file_id
262
+ ORDER BY metrics.complexity DESC, metrics.churn DESC
263
+ LIMIT ?
264
+ """,
265
+ (limit,),
266
+ )
267
+ return cur.fetchall()
268
+
269
+
270
+ def resolve_symbol(conn: sqlite3.Connection, name: str) -> Optional[int]:
271
+ """Resolve a user‑supplied symbol name to an ID.
272
+
273
+ The name may be fully qualified (e.g. ``package.module:Class.method``)
274
+ or just a bare name. If it is fully qualified, an exact match is
275
+ attempted. Otherwise the function searches for symbols whose
276
+ qualname ends with ``:name`` or ``.<name>``. If multiple matches
277
+ are found, the one with the highest hotspot score is chosen. If no
278
+ match is found, ``None`` is returned.
279
+ """
280
+ cur = conn.cursor()
281
+ # Try exact match
282
+ cur.execute("SELECT id FROM symbols WHERE qualname = ?", (name,))
283
+ row = cur.fetchone()
284
+ if row:
285
+ return row[0]
286
+ # Fallback search: match by suffix after colon or dot
287
+ # We need metrics to rank by hotspot score
288
+ cur.execute(
289
+ """
290
+ SELECT symbols.id AS id,
291
+ symbols.qualname AS qualname,
292
+ metrics.complexity AS complexity,
293
+ metrics.churn AS churn
294
+ FROM symbols
295
+ JOIN metrics ON metrics.symbol_id = symbols.id
296
+ WHERE symbols.qualname LIKE '%' || ?
297
+ ORDER BY metrics.complexity DESC, metrics.churn DESC
298
+ """,
299
+ (":" + name,) # match ':name' anywhere
300
+ )
301
+ candidates = cur.fetchall()
302
+ if not candidates:
303
+ # also try matching end with .name (for methods)
304
+ cur.execute(
305
+ """
306
+ SELECT symbols.id AS id,
307
+ symbols.qualname AS qualname,
308
+ metrics.complexity AS complexity,
309
+ metrics.churn AS churn
310
+ FROM symbols
311
+ JOIN metrics ON metrics.symbol_id = symbols.id
312
+ WHERE symbols.qualname LIKE '%' || ?
313
+ ORDER BY metrics.complexity DESC, metrics.churn DESC
314
+ """,
315
+ ("." + name,),
316
+ )
317
+ candidates = cur.fetchall()
318
+ if not candidates:
319
+ return None
320
+ # Choose candidate with highest complexity, then churn
321
+ import math
322
+ best = max(candidates, key=lambda r: r["complexity"] * (1.0 + math.sqrt(r["churn"])))
323
+ return best["id"]
324
+
325
+
326
+ def get_callees(conn: sqlite3.Connection, symbol_id: int) -> List[Tuple[str, Optional[str], bool]]:
327
+ """Return a list of callees for a given symbol.
328
+
329
+ Each returned tuple is ``(qualname_or_name, file_path, unresolved)``.
330
+ If the call was unresolved, ``file_path`` is ``None``.
331
+ """
332
+ cur = conn.cursor()
333
+ cur.execute(
334
+ """
335
+ SELECT edges.callee_id AS callee_id,
336
+ edges.callee_name AS callee_name,
337
+ edges.unresolved AS unresolved,
338
+ symbols.qualname AS qualname,
339
+ files.path AS file_path
340
+ FROM edges
341
+ LEFT JOIN symbols ON edges.callee_id = symbols.id
342
+ LEFT JOIN files ON symbols.file_id = files.id
343
+ WHERE edges.caller_id = ?
344
+ ORDER BY edges.id
345
+ """,
346
+ (symbol_id,),
347
+ )
348
+ results: List[Tuple[str, Optional[str], bool]] = []
349
+ for row in cur.fetchall():
350
+ if row["unresolved"]:
351
+ results.append((row["callee_name"], None, True))
352
+ else:
353
+ results.append((row["qualname"], row["file_path"], False))
354
+ return results
355
+
356
+
357
+ def get_callers(conn: sqlite3.Connection, symbol_id: int) -> List[Tuple[str, str]]:
358
+ """Return a list of callers for a given symbol.
359
+
360
+ Each tuple contains the caller qualname and the relative file path.
361
+ """
362
+ cur = conn.cursor()
363
+ cur.execute(
364
+ """
365
+ SELECT symbols.qualname AS qualname,
366
+ files.path AS file_path
367
+ FROM edges
368
+ JOIN symbols ON edges.caller_id = symbols.id
369
+ JOIN files ON symbols.file_id = files.id
370
+ WHERE edges.callee_id = ?
371
+ ORDER BY edges.id
372
+ """,
373
+ (symbol_id,),
374
+ )
375
+ return [(row["qualname"], row["file_path"]) for row in cur.fetchall()]
376
+
377
+
378
+ def close(conn: sqlite3.Connection) -> None:
379
+ """Close the SQLite connection."""
380
+ conn.close()
zerottmm/trace.py ADDED
@@ -0,0 +1,178 @@
1
+ """Runtime tracing for ttmm.
2
+
3
+ This module provides a function to trace the runtime call graph of a
4
+ Python module or script. It uses ``sys.settrace`` to record every
5
+ function call within the indexed repository and persists the edges in
6
+ the database under ``trace_runs`` and ``trace_events``. Trace data is
7
+ stored separately from static edges and does not affect index state.
8
+
9
+ Example usage:
10
+
11
+ ```
12
+ from ttmm.trace import run_tracing
13
+ run_tracing("./myrepo", module="mypkg.cli:main", args=["--help"])
14
+ ```
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import importlib
20
+ import os
21
+ import runpy
22
+ import sys
23
+ import time
24
+ from typing import Dict, List, Optional, Tuple
25
+
26
+ from . import store
27
+
28
+
29
+ def _build_symbol_intervals(conn, repo_path: str) -> Dict[str, List[Tuple[int, int, int]]]:
30
+ """Build a mapping from file path to sorted intervals for symbol lookup.
31
+
32
+ Each entry maps ``relative_path`` to a list of ``(start, end, symbol_id)``
33
+ sorted by descending start. This allows efficient lookup of the
34
+ innermost containing symbol during tracing.
35
+ """
36
+ cur = conn.cursor()
37
+ cur.execute(
38
+ """
39
+ SELECT symbols.id AS id, symbols.lineno AS start, symbols.endlineno AS end,
40
+ files.path AS file_path
41
+ FROM symbols
42
+ JOIN files ON files.id = symbols.file_id
43
+ ORDER BY symbols.lineno DESC
44
+ """
45
+ )
46
+ mapping: Dict[str, List[Tuple[int, int, int]]] = {}
47
+ for row in cur.fetchall():
48
+ mapping.setdefault(row["file_path"], []).append((row["start"], row["end"], row["id"]))
49
+ # Sort each list by descending start line for quick first match
50
+ for lst in mapping.values():
51
+ lst.sort(key=lambda t: t[0], reverse=True)
52
+ return mapping
53
+
54
+
55
+ def _lookup_symbol(
56
+ symbol_intervals: Dict[str, List[Tuple[int, int, int]]], rel_path: str, lineno: int
57
+ ) -> Optional[int]:
58
+ """Return the symbol id containing a given file/line position, or None."""
59
+ intervals = symbol_intervals.get(rel_path)
60
+ if not intervals:
61
+ return None
62
+ for start, end, sid in intervals:
63
+ if start <= lineno <= end:
64
+ return sid
65
+ return None
66
+
67
+
68
+ def run_tracing(
69
+ repo_path: str,
70
+ module: Optional[str] = None,
71
+ script: Optional[str] = None,
72
+ args: Optional[List[str]] = None,
73
+ ) -> None:
74
+ """Trace execution of a module or script and persist call edges.
75
+
76
+ Exactly one of ``module`` or ``script`` must be provided. The
77
+ function will import and run the target while recording calls
78
+ between functions defined within ``repo_path``. The trace is
79
+ persisted in the ``trace_runs`` and ``trace_events`` tables.
80
+
81
+ Parameters
82
+ ----------
83
+ repo_path: str
84
+ Path to the root of the repository to trace. Should have been
85
+ indexed previously.
86
+ module: Optional[str]
87
+ A string of the form ``pkg.mod:func`` pointing to an entry
88
+ point to call. If the part after ``:`` is omitted the module
89
+ itself is executed (its top‑level code runs).
90
+ script: Optional[str]
91
+ Path to a Python script relative to ``repo_path`` to execute.
92
+ Cannot be used together with ``module``.
93
+ args: Optional[List[str]]
94
+ List of arguments to pass to the function or script. For a
95
+ module function these are passed positionally; for scripts they
96
+ populate ``sys.argv`` starting at index 1.
97
+ """
98
+ if (module is None and script is None) or (module is not None and script is not None):
99
+ raise ValueError("Exactly one of module or script must be specified")
100
+ repo_path = os.path.abspath(repo_path)
101
+ args = args or []
102
+ conn = store.connect(repo_path)
103
+ try:
104
+ symbol_intervals = _build_symbol_intervals(conn, repo_path)
105
+ # Precompute mapping from abs file path -> rel path
106
+ file_map: Dict[str, str] = {}
107
+ for rel_path in symbol_intervals.keys():
108
+ file_map[os.path.abspath(os.path.join(repo_path, rel_path))] = rel_path
109
+ call_pairs: List[Tuple[int, int]] = []
110
+
111
+ call_stack: List[Optional[int]] = []
112
+
113
+ def tracer(frame, event, arg):
114
+ if event == "call":
115
+ code = frame.f_code
116
+ abs_path = os.path.abspath(code.co_filename)
117
+ rel_path = file_map.get(abs_path)
118
+ if rel_path is not None:
119
+ lineno = frame.f_lineno
120
+ callee_id = _lookup_symbol(symbol_intervals, rel_path, lineno)
121
+ # Determine caller from stack
122
+ caller_id = call_stack[-1] if call_stack else None
123
+ if caller_id is not None and callee_id is not None:
124
+ call_pairs.append((caller_id, callee_id))
125
+ # Push callee_id (may be None)
126
+ call_stack.append(callee_id)
127
+ else:
128
+ # External call: push None to maintain stack depth
129
+ call_stack.append(None)
130
+ return tracer
131
+ elif event == "return":
132
+ # Pop stack
133
+ if call_stack:
134
+ call_stack.pop()
135
+ return tracer
136
+ return tracer
137
+
138
+ # Prepare to run
139
+ # Save original argv and modules to restore later
140
+ old_argv = sys.argv.copy()
141
+ try:
142
+ sys.settrace(tracer)
143
+ if module:
144
+ if ":" in module:
145
+ mod_name, func_name = module.split(":", 1)
146
+ mod = importlib.import_module(mod_name)
147
+ func = getattr(mod, func_name)
148
+ # Call with provided args
149
+ func(*args)
150
+ else:
151
+ # Import module and run top‑level code
152
+ importlib.import_module(module)
153
+ else:
154
+ # script
155
+ script_path = os.path.join(repo_path, script)
156
+ sys.argv = [script_path] + args
157
+ runpy.run_path(script_path, run_name="__main__")
158
+ finally:
159
+ sys.settrace(None)
160
+ sys.argv = old_argv
161
+ # Persist run and events
162
+ if call_pairs:
163
+ cur = conn.cursor()
164
+ ts = time.time()
165
+ description = module or script or "trace"
166
+ cur.execute(
167
+ "INSERT INTO trace_runs (ts, description) VALUES (?, ?)",
168
+ (ts, description),
169
+ )
170
+ run_id = cur.lastrowid
171
+ for caller_id, callee_id in call_pairs:
172
+ cur.execute(
173
+ "INSERT INTO trace_events (run_id, caller_id, callee_id) VALUES (?, ?, ?)",
174
+ (run_id, caller_id, callee_id),
175
+ )
176
+ conn.commit()
177
+ finally:
178
+ store.close(conn)