PyPI - threadcheck - Versions diffs - 0.0.1__py3-none-any.whl - Mend

threadcheck 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

threadcheck/__init__.py +13 -0
threadcheck/__main__.py +3 -0
threadcheck/_version.py +1 -0
threadcheck/cli.py +89 -0
threadcheck/dynamic/__init__.py +0 -0
threadcheck/dynamic/__main__.py +38 -0
threadcheck/dynamic/clock.py +31 -0
threadcheck/dynamic/hook.py +97 -0
threadcheck/dynamic/tracker.py +191 -0
threadcheck/dynamic/transform.py +192 -0
threadcheck/pytest_plugin.py +60 -0
threadcheck/reporting/__init__.py +0 -0
threadcheck/reporting/formatter.py +33 -0
threadcheck/reporting/sarif.py +100 -0
threadcheck/reporting/types.py +3 -0
threadcheck/static/__init__.py +0 -0
threadcheck/static/analyzer.py +104 -0
threadcheck/static/lock_tracker.py +42 -0
threadcheck/static/models.py +48 -0
threadcheck/static/visitors.py +324 -0
threadcheck-0.0.1.dist-info/METADATA +248 -0
threadcheck-0.0.1.dist-info/RECORD +25 -0
threadcheck-0.0.1.dist-info/WHEEL +4 -0
threadcheck-0.0.1.dist-info/entry_points.txt +5 -0
threadcheck-0.0.1.dist-info/licenses/LICENSE +21 -0

threadcheck/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+"""
+threadcheck — Data-race detector for multi-threaded Python.
+Supports both AST-based static analysis and runtime dynamic
+detection via bytecode instrumentation.
+Targets Python 3.14+ free-threading builds.
+"""
+from ._version import __version__
+from .static.analyzer import analyze_path, analyze_file
+from .static.models import RaceWarning, Severity, WarningCategory

threadcheck/__main__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .cli import main
+main()

threadcheck/_version.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.0.1"

threadcheck/cli.py ADDED Viewed

@@ -0,0 +1,89 @@
+import argparse
+import json
+import sys
+from pathlib import Path
+from ._version import __version__
+from .static.analyzer import analyze_path
+from .reporting.formatter import format_report
+from .reporting.sarif import format_sarif
+from .dynamic.__main__ import run_script
+def main():
+    parser = argparse.ArgumentParser(
+        prog="threadcheck",
+        description="Data Race Detector for Python",
+    )
+    parser.add_argument(
+        "--version", action="version", version=f"threadcheck {__version__}"
+    )
+    sub = parser.add_subparsers(dest="command", required=True)
+    scan = sub.add_parser("scan", help="Static analysis for data races")
+    scan.add_argument("path", help="File or directory to scan")
+    fmt = scan.add_mutually_exclusive_group()
+    fmt.add_argument("--json", action="store_true", help="Output in JSON format")
+    fmt.add_argument("--sarif", action="store_true", help="Output in SARIF v2.1.0 format")
+    scan.add_argument("-o", "--output", help="Write output to file (default: stdout)")
+    run = sub.add_parser("run", help="Dynamic race detection (Phase 3)")
+    run.add_argument("script", help="Python script to execute")
+    compat = sub.add_parser("check-compat", help="Check free-threading compatibility (Phase 7)")
+    compat.add_argument("path", nargs="?", default=".", help="Project path")
+    args = parser.parse_args()
+    if args.command == "scan":
+        _do_scan(args)
+    elif args.command == "run":
+        run_script(args.script)
+    elif args.command == "check-compat":
+        print("Not implemented: free-threading compatibility check (Phase 7)", file=sys.stderr)
+        sys.exit(1)
+def _do_scan(args):
+    path = Path(args.path).resolve()
+    if not path.exists():
+        print(f"Path does not exist: {path}", file=sys.stderr)
+        sys.exit(1)
+    print(f"threadcheck scan -- analysing {path}")
+    print()
+    warnings = analyze_path(str(path))
+    total = len(warnings)
+    errors = sum(1 for w in warnings if w.severity.value == "error")
+    warns = sum(1 for w in warnings if w.severity.value == "warning")
+    infos = sum(1 for w in warnings if w.severity.value == "info")
+    if args.json:
+        output = json.dumps(
+            [w.to_dict() for w in warnings], indent=2, ensure_ascii=False
+        )
+        _write_output(args.output, output)
+    elif args.sarif:
+        output = format_sarif(warnings)
+        _write_output(args.output, output)
+    else:
+        text = format_report(warnings)
+        _write_output(args.output, text)
+    print()
+    print(f"Total: {total} issue(s) ({errors} error(s), {warns} warning(s), {infos} info)")
+def _write_output(path_arg: str | None, content: str):
+    if path_arg:
+        Path(path_arg).write_text(content, encoding="utf-8")
+    else:
+        print(content)
+if __name__ == "__main__":
+    main()

threadcheck/dynamic/__init__.py ADDED Viewed

File without changes

threadcheck/dynamic/__main__.py ADDED Viewed

@@ -0,0 +1,38 @@
+import ast
+import sys
+from pathlib import Path
+from .tracker import ThreadCheckTracker
+from .transform import TrackInjector
+def run_script(script_path: str):
+    path = Path(script_path).resolve()
+    if not path.exists():
+        print(f"File not found: {path}", file=sys.stderr)
+        sys.exit(1)
+    source = path.read_text(encoding="utf-8")
+    filename = str(path)
+    try:
+        tree = ast.parse(source, filename=filename)
+    except SyntaxError as e:
+        print(f"Syntax error: {e}", file=sys.stderr)
+        sys.exit(1)
+    TrackInjector(filename=filename).transform(tree)
+    ast.fix_missing_locations(tree)
+    code = compile(tree, filename, "exec")
+    ThreadCheckTracker.start()
+    try:
+        exec(code, {"_threadcheck_tracker": ThreadCheckTracker, "__file__": filename})
+    except SystemExit:
+        pass
+    finally:
+        ThreadCheckTracker.stop()
+    print(ThreadCheckTracker.format_races())
+    ThreadCheckTracker.reset()

threadcheck/dynamic/clock.py ADDED Viewed

@@ -0,0 +1,31 @@
+from collections import defaultdict
+class VectorClock:
+    def __init__(self):
+        self._clock: dict[int, int] = defaultdict(int)
+    def tick(self, thread_id: int) -> int:
+        self._clock[thread_id] += 1
+        return self._clock[thread_id]
+    def merge(self, other: "VectorClock"):
+        for k, v in other._clock.items():
+            self._clock[k] = max(self._clock.get(k, 0), v)
+    def conflicts_with(self, other: "VectorClock") -> bool:
+        return not (self._leq(other) or other._leq(self))
+    def _leq(self, other: "VectorClock") -> bool:
+        for k, v in self._clock.items():
+            if v > other._clock.get(k, 0):
+                return False
+        return True
+    def copy(self) -> "VectorClock":
+        vc = VectorClock()
+        vc._clock = self._clock.copy()
+        return vc
+    def __repr__(self) -> str:
+        return f"VectorClock({dict(self._clock)})"

threadcheck/dynamic/hook.py ADDED Viewed

@@ -0,0 +1,97 @@
+import sys
+import ast
+import builtins
+import importlib.util
+import importlib.abc
+from pathlib import Path
+from .transform import TrackInjector
+from .tracker import ThreadCheckTracker
+class ThreadCheckLoader(importlib.abc.Loader):
+    def __init__(self, tracker=None):
+        self.tracker = tracker or ThreadCheckTracker
+    def create_module(self, spec):
+        return None
+    def exec_module(self, module):
+        spec = module.__spec__
+        source = self._get_source(spec, module)
+        if source is None:
+            raise ImportError(f"cannot load source for {spec.name}")
+        module.__file__ = spec.origin
+        tree = ast.parse(source, filename=spec.origin)
+        TrackInjector(filename=str(spec.origin)).transform(tree)
+        ast.fix_missing_locations(tree)
+        code = compile(tree, spec.origin, "exec")
+        globals_dict = module.__dict__
+        globals_dict["_threadcheck_tracker"] = self.tracker
+        builtins._threadcheck_tracker = self.tracker
+        exec(code, globals_dict)
+    @staticmethod
+    def _get_source(spec, module=None):
+        for candidate in (spec.origin, getattr(module, "__file__", None)):
+            if candidate and Path(candidate).suffix == ".py":
+                try:
+                    return Path(candidate).read_text(encoding="utf-8")
+                except Exception:
+                    pass
+        if hasattr(spec.loader, "get_source"):
+            try:
+                return spec.loader.get_source(spec.name)
+            except Exception:
+                pass
+        return None
+class ThreadCheckFinder(importlib.abc.MetaPathFinder):
+    def __init__(self, tracker=None, include_paths=None):
+        self.tracker = tracker or ThreadCheckTracker
+        self._include_paths = (
+            [Path(p).resolve() for p in include_paths] if include_paths else []
+        )
+    def _should_instrument(self, filepath: Path) -> bool:
+        if not self._include_paths:
+            return True
+        resolved = filepath.resolve()
+        return any(_is_under(resolved, inc) for inc in self._include_paths)
+    def find_spec(self, fullname, path, target=None):
+        for entry in (path or sys.path):
+            if entry == "":
+                entry = "."
+            base = Path(entry) / f"{fullname.replace('.', '/')}.py"
+            if base.exists() and self._should_instrument(base):
+                spec = importlib.util.spec_from_file_location(
+                    fullname,
+                    str(base),
+                    loader=ThreadCheckLoader(self.tracker),
+                )
+                return spec
+        return None
+def install_hook(tracker=None, include_paths=None):
+    hook = ThreadCheckFinder(tracker, include_paths)
+    sys.meta_path.insert(0, hook)
+    return hook
+def uninstall_hook(hook):
+    if hook in sys.meta_path:
+        sys.meta_path.remove(hook)
+def _is_under(child: Path, parent: Path) -> bool:
+    try:
+        child.relative_to(parent)
+        return True
+    except ValueError:
+        return False

threadcheck/dynamic/tracker.py ADDED Viewed

@@ -0,0 +1,191 @@
+import os
+import sys
+import threading
+from collections import Counter, defaultdict
+from dataclasses import dataclass, field
+from .clock import VectorClock
+@dataclass
+class AccessRecord:
+    var_name: str
+    operation: str
+    thread_id: int
+    clock: VectorClock = field(default_factory=VectorClock)
+    location: tuple = ("", 0)
+class ThreadCheckTracker:
+    _lock = threading.Lock()
+    _access_log: dict[str, list[AccessRecord]] = defaultdict(list)
+    _thread_clocks: dict[int, VectorClock] = {}
+    _lock_clocks: dict[str, VectorClock] = {}
+    _active = False
+    @classmethod
+    def start(cls):
+        cls._active = True
+    @classmethod
+    def stop(cls):
+        cls._active = False
+    @classmethod
+    def _get_clock(cls) -> VectorClock:
+        tid = threading.get_ident()
+        if tid not in cls._thread_clocks:
+            with cls._lock:
+                if tid not in cls._thread_clocks:
+                    cls._thread_clocks[tid] = VectorClock()
+        return cls._thread_clocks[tid]
+    _diag_count = 0
+    @classmethod
+    def write_before(cls, var_name: str, file: str = "", line: int = 0):
+        if not cls._active:
+            return
+        clock = cls._get_clock()
+        tid = threading.get_ident()
+        clock.tick(tid)
+        record = AccessRecord(
+            var_name=var_name,
+            operation="write",
+            thread_id=tid,
+            clock=clock.copy(),
+            location=(file, line),
+        )
+        with cls._lock:
+            cls._access_log[var_name].append(record)
+            if cls._diag_count < 10:
+                cls._diag_count += 1
+                ct = threading.current_thread()
+                print(
+                    f"[TC_DIAG] write_before tid={tid} ct_name={ct.name} ct_ident={ct.ident} var={var_name}",
+                    file=sys.stderr, flush=True,
+                )
+    @classmethod
+    def read_before(cls, var_name: str, file: str = "", line: int = 0):
+        if not cls._active:
+            return
+        clock = cls._get_clock()
+        tid = threading.get_ident()
+        clock.tick(tid)
+        record = AccessRecord(
+            var_name=var_name,
+            operation="read",
+            thread_id=tid,
+            clock=clock.copy(),
+            location=(file, line),
+        )
+        with cls._lock:
+            cls._access_log[var_name].append(record)
+    @classmethod
+    def lock_acquire(cls, lock_name: str, file: str = "", line: int = 0):
+        if not cls._active:
+            return
+        tid = threading.get_ident()
+        clock = cls._get_clock()
+        with cls._lock:
+            if lock_name in cls._lock_clocks:
+                clock.merge(cls._lock_clocks[lock_name])
+        clock.tick(tid)
+    @classmethod
+    def lock_release(cls, lock_name: str, file: str = "", line: int = 0):
+        if not cls._active:
+            return
+        clock = cls._get_clock()
+        tid = threading.get_ident()
+        with cls._lock:
+            cls._lock_clocks[lock_name] = clock.copy()
+    @classmethod
+    def reset(cls):
+        with cls._lock:
+            cls._access_log.clear()
+            cls._thread_clocks.clear()
+            cls._lock_clocks.clear()
+        cls._active = False
+    @classmethod
+    def reset_logs(cls):
+        with cls._lock:
+            cls._access_log.clear()
+            cls._thread_clocks.clear()
+            cls._lock_clocks.clear()
+    @classmethod
+    def _race_key(cls, r1: AccessRecord, r2: AccessRecord) -> tuple:
+        tid1, tid2 = sorted([r1.thread_id, r2.thread_id])
+        loc1, loc2 = sorted([r1.location, r2.location])
+        return (r1.var_name, tid1, tid2, loc1, loc2)
+    @classmethod
+    def detect_races(cls) -> list[tuple[str, AccessRecord, AccessRecord]]:
+        raw: list[tuple[str, AccessRecord, AccessRecord]] = []
+        with cls._lock:
+            for var_name, records in cls._access_log.items():
+                for i, r1 in enumerate(records):
+                    for r2 in records[i + 1 :]:
+                        if r1.thread_id != r2.thread_id:
+                            if r1.operation == "write" or r2.operation == "write":
+                                if r1.clock.conflicts_with(r2.clock):
+                                    raw.append((var_name, r1, r2))
+        seen: set[tuple] = set()
+        races: list[tuple[str, AccessRecord, AccessRecord]] = []
+        for entry in raw:
+            _, r1, r2 = entry
+            key = cls._race_key(r1, r2)
+            if key not in seen:
+                seen.add(key)
+                races.append(entry)
+        return races
+    @classmethod
+    def format_races(cls) -> str:
+        races = cls.detect_races()
+        if not races:
+            return "No data races detected"
+        overlap = Counter()
+        with cls._lock:
+            for var_name, records in cls._access_log.items():
+                for i, r1 in enumerate(records):
+                    for r2 in records[i + 1 :]:
+                        if r1.thread_id != r2.thread_id:
+                            if r1.operation == "write" or r2.operation == "write":
+                                if r1.clock.conflicts_with(r2.clock):
+                                    key = cls._race_key(r1, r2)
+                                    overlap[key] += 1
+        lines = ["Data races detected:", ""]
+        for var_name, r1, r2 in races:
+            f1, l1 = r1.location
+            f2, l2 = r2.location
+            key = cls._race_key(r1, r2)
+            count = overlap.get(key, 0)
+            lines.append(f"  [!] `{var_name}`")
+            lines.append(
+                f"      Thread-{r1.thread_id} ({r1.operation})"
+                f" at {f1}:{l1}"
+            )
+            lines.append(
+                f"      Thread-{r2.thread_id} ({r2.operation})"
+                f" at {f2}:{l2}"
+            )
+            if count > 1:
+                lines.append(f"      ({count} overlapping accesses)")
+            lines.append("")
+        total_unique = len(races)
+        total_overlap = sum(overlap.values())
+        lines.append(
+            f"Summary: {total_unique} unique race pair(s), "
+            f"{total_overlap} total overlapping access(es)"
+        )
+        return "\n".join(lines)

threadcheck/dynamic/transform.py ADDED Viewed

@@ -0,0 +1,192 @@
+import ast
+_LOCK_NAMES = frozenset({"Lock", "RLock", "Semaphore", "BoundedSemaphore"})
+_TRACKER_IMPORT = ast.parse(
+    "from threadcheck.dynamic.tracker import ThreadCheckTracker as _threadcheck_tracker"
+).body[0]
+class TrackInjector:
+    def __init__(self, filename: str = "<unknown>"):
+        self.filename = filename
+    def transform(self, tree: ast.Module) -> ast.Module:
+        tree.body.insert(0, _TRACKER_IMPORT)
+        scopes = {}
+        self._collect_scopes(tree, scopes)
+        self._inject(tree, scopes)
+        return tree
+    def _collect_scopes(self, node, scopes):
+        if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
+            func_id = id(node)
+            info = {"globals": set(), "nonlocals": set()}
+            for child in ast.walk(node):
+                if isinstance(child, ast.Global):
+                    info["globals"].update(child.names)
+                elif isinstance(child, ast.Nonlocal):
+                    info["nonlocals"].update(child.names)
+            scopes[func_id] = info
+            for child in ast.iter_child_nodes(node):
+                self._collect_scopes(child, scopes)
+        else:
+            for child in ast.iter_child_nodes(node):
+                self._collect_scopes(child, scopes)
+    def _inject(self, node, scopes, func_id=None):
+        if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
+            func_id = id(node)
+        for field in ("body", "orelse", "finalbody"):
+            old = getattr(node, field, None)
+            if isinstance(old, list):
+                setattr(node, field, self._transform_list(old, scopes, func_id))
+        for handler in getattr(node, "handlers", []):
+            handler.body = self._transform_list(handler.body, scopes, func_id)
+        for child in ast.iter_child_nodes(node):
+            self._inject(child, scopes, func_id)
+    def _transform_list(self, stmts, scopes, func_id):
+        if func_id is None or func_id not in scopes:
+            return stmts
+        info = scopes[func_id]
+        shared = info["globals"] | info["nonlocals"]
+        new: list[ast.stmt] = []
+        for stmt in stmts:
+            if isinstance(stmt, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
+                new.append(stmt)
+                continue
+            if isinstance(stmt, ast.Assign):
+                targets = [
+                    t
+                    for t in stmt.targets
+                    if isinstance(t, ast.Name) and t.id in shared
+                ]
+                for t in targets:
+                    new.append(_make_write_before(t.id, self.filename, stmt.lineno))
+                new.append(stmt)
+            elif isinstance(stmt, ast.AugAssign):
+                if isinstance(stmt.target, ast.Name) and stmt.target.id in shared:
+                    new.append(
+                        _make_write_before(
+                            stmt.target.id, self.filename, stmt.lineno
+                        )
+                    )
+                new.append(stmt)
+            elif isinstance(stmt, ast.Delete):
+                targets = [
+                    t
+                    for t in stmt.targets
+                    if isinstance(t, ast.Name) and t.id in shared
+                ]
+                for t in targets:
+                    new.append(
+                        _make_write_before(t.id, self.filename, stmt.lineno)
+                    )
+                new.append(stmt)
+            elif isinstance(stmt, ast.With):
+                lock_name = _resolve_lock_name(stmt)
+                new.append(stmt)
+                if lock_name:
+                    stmt.body.insert(
+                        0,
+                        _make_lock_acquire(lock_name, self.filename, stmt.lineno),
+                    )
+                    stmt.body.append(
+                        _make_lock_release(lock_name, self.filename, stmt.lineno),
+                    )
+            else:
+                new.append(stmt)
+        return new
+def _make_write_before(var_name: str, filename: str, lineno: int) -> ast.Expr:
+    return ast.Expr(
+        value=ast.Call(
+            func=ast.Attribute(
+                value=ast.Name(id="_threadcheck_tracker", ctx=ast.Load()),
+                attr="write_before",
+                ctx=ast.Load(),
+            ),
+            args=[
+                ast.Constant(value=var_name),
+                ast.Constant(value=filename),
+                ast.Constant(value=lineno),
+            ],
+            keywords=[],
+        ),
+    )
+def _make_lock_acquire(lock_name: str, filename: str, lineno: int) -> ast.Expr:
+    return ast.Expr(
+        value=ast.Call(
+            func=ast.Attribute(
+                value=ast.Name(id="_threadcheck_tracker", ctx=ast.Load()),
+                attr="lock_acquire",
+                ctx=ast.Load(),
+            ),
+            args=[
+                ast.Constant(value=lock_name),
+                ast.Constant(value=filename),
+                ast.Constant(value=lineno),
+            ],
+            keywords=[],
+        ),
+    )
+def _make_lock_release(lock_name: str, filename: str, lineno: int) -> ast.Expr:
+    return ast.Expr(
+        value=ast.Call(
+            func=ast.Attribute(
+                value=ast.Name(id="_threadcheck_tracker", ctx=ast.Load()),
+                attr="lock_release",
+                ctx=ast.Load(),
+            ),
+            args=[
+                ast.Constant(value=lock_name),
+                ast.Constant(value=filename),
+                ast.Constant(value=lineno),
+            ],
+            keywords=[],
+        ),
+    )
+def _resolve_lock_name(with_stmt: ast.With) -> str | None:
+    for item in with_stmt.items:
+        expr = item.context_expr
+        if isinstance(expr, ast.Name):
+            return expr.id
+        if isinstance(expr, ast.Call):
+            if isinstance(expr.func, ast.Name) and expr.func.id in _LOCK_NAMES:
+                return ast.unparse(expr)
+            if isinstance(expr.func, ast.Attribute) and expr.func.attr in _LOCK_NAMES:
+                return ast.unparse(expr)
+    return None
+def transform_source(source: str, filename: str = "<unknown>") -> str:
+    tree = ast.parse(source, filename=filename)
+    TrackInjector(filename=filename).transform(tree)
+    return ast.unparse(tree)
+def transform_and_compile(source: str, filename: str = "<unknown>") -> str:
+    tree = ast.parse(source, filename=filename)
+    TrackInjector(filename=filename).transform(tree)
+    ast.fix_missing_locations(tree)
+    return compile(tree, filename, "exec")