PyPI - source-graphh - Versions diffs - 0.0.1__py3-none-any.whl - Mend

source-graphh 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

source_graph/__init__.py +17 -0
source_graph/cli.py +193 -0
source_graph/data_builder.py +327 -0
source_graph/extractor.py +39 -0
source_graph/models.py +62 -0
source_graph/python_import_extractor.py +227 -0
source_graph/python_structure_extractor.py +224 -0
source_graph/server/__init__.py +5 -0
source_graph/server/app.py +33 -0
source_graph/server/config.py +14 -0
source_graph/store.py +370 -0
source_graph/web/app.js +634 -0
source_graph/web/index.html +43 -0
source_graph/web/style.css +173 -0
source_graphh-0.0.1.dist-info/METADATA +94 -0
source_graphh-0.0.1.dist-info/RECORD +18 -0
source_graphh-0.0.1.dist-info/WHEEL +4 -0
source_graphh-0.0.1.dist-info/entry_points.txt +2 -0

source_graph/__init__.py ADDED Viewed

@@ -0,0 +1,17 @@
+"""source-graph: extract, store, and visualize code relationships."""
+from .models import Node, Relation, Range, NodeFilter, RelationFilter
+from .store import RelationStore
+from .extractor import Extractor
+from .python_structure_extractor import PythonStructureExtractor
+__all__ = [
+    "Node",
+    "Relation",
+    "Range",
+    "NodeFilter",
+    "RelationFilter",
+    "RelationStore",
+    "Extractor",
+    "PythonStructureExtractor",
+]

source_graph/cli.py ADDED Viewed

@@ -0,0 +1,193 @@
+"""CLI entry point for source-graph."""
+import argparse
+import hashlib
+import importlib.metadata
+import logging
+import os
+import sys
+from pathlib import Path
+from typing import List, Optional
+from .models import NodeFilter
+from .python_import_extractor import PythonImportExtractor
+from .python_structure_extractor import PythonStructureExtractor
+from .store import RelationStore
+logger = logging.getLogger(__name__)
+def _collect_valid_paths(file_paths: List[str]) -> List[str]:
+    seen = set()
+    valid_paths = []
+    for file_path in file_paths:
+        path = Path(file_path)
+        if not path.exists():
+            logger.warning("File or directory not found: %s", file_path)
+            continue
+        if path.is_dir():
+            for root, _dirs, files in os.walk(path, followlinks=True):
+                for name in files:
+                    if name.endswith(".py"):
+                        py_path = Path(root) / name
+                        resolved = py_path.resolve()
+                        if resolved not in seen:
+                            seen.add(resolved)
+                            valid_paths.append(str(py_path))
+        elif path.is_file():
+            resolved = path.resolve()
+            if resolved not in seen:
+                seen.add(resolved)
+                valid_paths.append(str(path))
+        else:
+            logger.warning("Not a file or directory: %s", file_path)
+    return valid_paths
+def _setup_logging(verbose: bool) -> None:
+    logging.basicConfig(
+        level=logging.DEBUG if verbose else logging.INFO,
+        format="%(levelname)s: %(message)s",
+    )
+def extract_command(args: argparse.Namespace) -> int:
+    _setup_logging(args.verbose)
+    output_dir = Path(args.output)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    db_path = output_dir / "source_graph.db"
+    valid_paths = _collect_valid_paths(args.files)
+    if not valid_paths:
+        logger.error("No valid files to analyze.")
+        return 1
+    store = RelationStore(str(db_path))
+    structure_extractor = PythonStructureExtractor()
+    import_extractor = PythonImportExtractor(valid_paths)
+    for file_path in valid_paths:
+        logger.info("Extracting: %s", file_path)
+        # Read and archive source content
+        try:
+            with open(file_path, "r", encoding="utf-8") as f:
+                content = f.read()
+        except Exception as e:
+            logger.warning("Failed to read file %s: %s", file_path, e)
+            continue
+        content_hash = hashlib.sha256(content.encode("utf-8")).hexdigest()
+        store.add_source_content(content_hash, content, len(content))
+        store.add_source_path(file_path, content_hash, os.path.getmtime(file_path))
+        # Extract using archived content
+        nodes, relations = structure_extractor.extract(content, file_path)
+        for node in nodes:
+            node.source_content_id = content_hash
+        store.add_nodes(nodes)
+        store.add_relations(relations)
+        logger.info("  [structure] %d nodes, %d relations", len(nodes), len(relations))
+        nodes, relations = import_extractor.extract(content, file_path)
+        for node in nodes:
+            node.source_content_id = content_hash
+        store.add_nodes(nodes)
+        store.add_relations(relations)
+        logger.info("  [dependencies] %d nodes, %d relations", len(nodes), len(relations))
+    if not store.get_nodes(NodeFilter()):
+        logger.error("No nodes extracted. Nothing to save.")
+        return 1
+    logger.info("SQLite database written to: %s", db_path)
+    return 0
+def serve_command(args: argparse.Namespace) -> int:
+    _setup_logging(args.verbose)
+    db_path = Path(args.db_path)
+    if not db_path.exists():
+        logger.error("Database not found: %s", db_path)
+        return 1
+    from .server.config import configure
+    from .server.app import app
+    import uvicorn
+    configure(str(db_path))
+    uvicorn.run(app, host=args.host, port=args.port)
+    return 0
+def main(argv: Optional[List[str]] = None) -> int:
+    parser = argparse.ArgumentParser(
+        prog="source-graph",
+        description="Extract, store, and visualize code relationships.",
+    )
+    parser.add_argument(
+        "--version",
+        action="version",
+        version=f"%(prog)s {importlib.metadata.version('source-graphh')}",
+    )
+    parser.add_argument(
+        "--verbose",
+        "-v",
+        action="store_true",
+        help="Enable verbose logging.",
+    )
+    subparsers = parser.add_subparsers(dest="command")
+    extract_parser = subparsers.add_parser(
+        "extract",
+        help="Extract relationships from source files and save to a database.",
+    )
+    extract_parser.add_argument(
+        "files",
+        nargs="+",
+        help="One or more Python source files or directories to analyze.",
+    )
+    extract_parser.add_argument(
+        "--output",
+        "-o",
+        required=True,
+        help="Output directory for SQLite database.",
+    )
+    serve_parser = subparsers.add_parser(
+        "serve",
+        help="Serve an interactive HTML report from an existing database.",
+    )
+    serve_parser.add_argument(
+        "db_path",
+        help="Path to the source_graph.db file.",
+    )
+    serve_parser.add_argument(
+        "--host",
+        default="127.0.0.1",
+        help="Host to bind to (default: 127.0.0.1)",
+    )
+    serve_parser.add_argument(
+        "--port",
+        type=int,
+        default=8080,
+        help="Port to listen on (default: 8080)",
+    )
+    args = parser.parse_args(argv)
+    if args.command == "extract":
+        return extract_command(args)
+    elif args.command == "serve":
+        return serve_command(args)
+    else:
+        parser.print_usage()
+        return 1
+if __name__ == "__main__":
+    sys.exit(main())

source_graph/data_builder.py ADDED Viewed

@@ -0,0 +1,327 @@
+"""Data-building logic for source-graph visualization.
+Pure functions that transform GraphStore data into view-ready payloads.
+"""
+from typing import Any, Dict, List, Optional, Set, Tuple
+from .models import NodeFilter, RelationFilter
+from .store import GraphStore
+def build_tree_data(store: GraphStore, dimension: str) -> List[Dict[str, Any]]:
+    """Build hierarchical tree data for a given dimension."""
+    relations = store.get_relations(RelationFilter(dimension=dimension))
+    # Build parent -> children mapping
+    children_map: Dict[str, List[str]] = {}
+    all_node_ids: set = set()
+    for relation in relations:
+        all_node_ids.add(relation.source_id)
+        all_node_ids.add(relation.target_id)
+        children_map.setdefault(relation.source_id, []).append(relation.target_id)
+    # Find root nodes: nodes that are sources but never targets
+    target_ids = {r.target_id for r in relations}
+    root_ids = [nid for nid in all_node_ids if nid not in target_ids]
+    # Also add orphan nodes (nodes with no relations at all)
+    all_store_nodes = store.get_nodes(NodeFilter())
+    for node in all_store_nodes:
+        if node.id not in all_node_ids:
+            root_ids.append(node.id)
+    node_lookup = {n.id: n for n in all_store_nodes}
+    def build_node(node_id: str) -> Optional[Dict[str, Any]]:
+        node = node_lookup.get(node_id)
+        if node is None:
+            return None
+        children = []
+        for child_id in children_map.get(node_id, []):
+            child = build_node(child_id)
+            if child:
+                children.append(child)
+        result = {
+            "id": node.id,
+            "name": node.name,
+            "kind": node.kind,
+            "fqn": node.fqn,
+            "source_file": node.source_file,
+            "range": {
+                "start_line": node.range.start_line,
+                "end_line": node.range.end_line,
+            } if node.range else None,
+            "source_content_id": node.source_content_id,
+            "children": children,
+            "collapsed": False,
+        }
+        return result
+    trees = []
+    for root_id in root_ids:
+        tree = build_node(root_id)
+        if tree:
+            trees.append(tree)
+    return trees
+def build_dependency_data(store: GraphStore, dimension: str) -> Dict[str, Any]:
+    """Build structured dependency graph data for a given dimension."""
+    relations = store.get_relations(RelationFilter(dimension=dimension))
+    if not relations:
+        return {"nodes": {}, "edges": [], "roots": []}
+    node_ids = set()
+    for r in relations:
+        node_ids.add(r.source_id)
+        node_ids.add(r.target_id)
+    all_nodes = store.get_nodes(NodeFilter())
+    node_lookup = {n.id: n for n in all_nodes if n.id in node_ids}
+    edges = []
+    adj: Dict[str, List[Tuple[str, bool]]] = {}
+    for r in relations:
+        src = node_lookup.get(r.source_id)
+        tgt = node_lookup.get(r.target_id)
+        if not src or not tgt:
+            continue
+        is_external = tgt.kind == "external_module"
+        edges.append({"source": src.name, "target": tgt.name, "is_external": is_external})
+        adj.setdefault(src.name, []).append((tgt.name, is_external))
+    if not edges:
+        return {"nodes": {}, "edges": [], "roots": []}
+    # Sort children alphabetically for stable display
+    for src_name in adj:
+        adj[src_name].sort(key=lambda x: x[0])
+    # Build nodes metadata
+    nodes: Dict[str, Dict[str, Any]] = {}
+    for node in node_lookup.values():
+        internal_out = sum(
+            1 for e in edges if e["source"] == node.name and not e["is_external"]
+        )
+        nodes[node.name] = {
+            "is_external": node.kind == "external_module",
+            "internal_out_degree": internal_out,
+            "kind": node.kind,
+        }
+    # Root nodes: all nodes that have outgoing edges
+    root_names = list(adj.keys())
+    root_names.sort(key=lambda n: nodes[n]["internal_out_degree"])
+    return {
+        "nodes": nodes,
+        "edges": edges,
+        "roots": root_names,
+    }
+def build_dependency_map_data(store: GraphStore, dimension: str) -> Dict[str, Any]:
+    """Build layered dependency map data (internal files only) for a given dimension."""
+    relations = store.get_relations(RelationFilter(dimension=dimension))
+    if not relations:
+        return {"nodes": [], "edges": []}
+    all_nodes = store.get_nodes(NodeFilter())
+    node_lookup = {n.id: n for n in all_nodes}
+    # Filter to internal-only edges
+    internal_edges: List[Tuple[str, str]] = []
+    internal_node_names: Set[str] = set()
+    for r in relations:
+        src = node_lookup.get(r.source_id)
+        tgt = node_lookup.get(r.target_id)
+        if not src or not tgt:
+            continue
+        if src.kind == "external_module" or tgt.kind == "external_module":
+            continue
+        internal_edges.append((src.name, tgt.name))
+        internal_node_names.add(src.name)
+        internal_node_names.add(tgt.name)
+    if not internal_node_names:
+        return {"nodes": [], "edges": []}
+    # Build adjacency list
+    adj: Dict[str, List[str]] = {name: [] for name in internal_node_names}
+    for src, tgt in internal_edges:
+        adj[src].append(tgt)
+    # Tarjan SCC algorithm
+    index_counter = [0]
+    stack: List[str] = []
+    lowlinks: Dict[str, int] = {}
+    index: Dict[str, int] = {}
+    on_stack: Dict[str, bool] = {}
+    sccs: List[List[str]] = []
+    def strongconnect(v: str) -> None:
+        index[v] = index_counter[0]
+        lowlinks[v] = index_counter[0]
+        index_counter[0] += 1
+        stack.append(v)
+        on_stack[v] = True
+        for w in adj.get(v, []):
+            if w not in internal_node_names:
+                continue
+            if w not in index:
+                strongconnect(w)
+                lowlinks[v] = min(lowlinks[v], lowlinks[w])
+            elif on_stack.get(w, False):
+                lowlinks[v] = min(lowlinks[v], index[w])
+        if lowlinks[v] == index[v]:
+            scc: List[str] = []
+            while True:
+                w = stack.pop()
+                on_stack[w] = False
+                scc.append(w)
+                if w == v:
+                    break
+            sccs.append(scc)
+    for v in internal_node_names:
+        if v not in index:
+            strongconnect(v)
+    # Build node-to-SCC mapping
+    node_to_scc: Dict[str, int] = {}
+    for i, scc in enumerate(sccs):
+        for node in scc:
+            node_to_scc[node] = i
+    # Build compressed DAG edges
+    scc_adj: List[Set[int]] = [set() for _ in sccs]
+    for src, tgt in internal_edges:
+        src_scc = node_to_scc[src]
+        tgt_scc = node_to_scc[tgt]
+        if src_scc != tgt_scc:
+            scc_adj[src_scc].add(tgt_scc)
+    # Bottom-up layer assignment
+    layer: Dict[int, int] = {}
+    visited: Set[int] = set()
+    def assign_layer(scc_id: int) -> int:
+        if scc_id in visited:
+            return layer.get(scc_id, 0)
+        visited.add(scc_id)
+        max_child_layer = -1
+        for child_id in scc_adj[scc_id]:
+            child_layer = assign_layer(child_id)
+            max_child_layer = max(max_child_layer, child_layer)
+        layer[scc_id] = max_child_layer + 1
+        return layer[scc_id]
+    for i in range(len(sccs)):
+        assign_layer(i)
+    # Build output nodes
+    nodes = []
+    for i, scc in enumerate(sccs):
+        scc.sort()
+        nodes.append({
+            "id": f"scc_{i}",
+            "names": scc,
+            "layer": layer[i],
+        })
+    # Sort by layer descending, then alphabetically by first name
+    nodes.sort(key=lambda n: (-n["layer"], n["names"][0]))
+    # Build output edges
+    edges = []
+    seen_edges: Set[Tuple[int, int]] = set()
+    for src_id in range(len(sccs)):
+        for tgt_id in scc_adj[src_id]:
+            key = (src_id, tgt_id)
+            if key in seen_edges:
+                continue
+            seen_edges.add(key)
+            edges.append({
+                "source": f"scc_{src_id}",
+                "target": f"scc_{tgt_id}",
+            })
+    return {"nodes": nodes, "edges": edges}
+def build_graph_data(store: GraphStore, dimension: str) -> Dict[str, Any]:
+    """Build raw graph data (nodes and edges) for Cytoscape rendering."""
+    relations = store.get_relations(RelationFilter(dimension=dimension))
+    if not relations:
+        return {"nodes": [], "edges": []}
+    all_nodes = store.get_nodes(NodeFilter())
+    node_lookup = {n.id: n for n in all_nodes}
+    nodes: List[Dict[str, Any]] = []
+    node_names: Set[str] = set()
+    edges: List[Dict[str, str]] = []
+    for r in relations:
+        src = node_lookup.get(r.source_id)
+        tgt = node_lookup.get(r.target_id)
+        if not src or not tgt:
+            continue
+        if src.kind == "external_module" or tgt.kind == "external_module":
+            continue
+        if src.name not in node_names:
+            nodes.append({
+                "id": src.name,
+                "name": src.name,
+                "kind": src.kind,
+                "fqn": src.fqn,
+            })
+            node_names.add(src.name)
+        if tgt.name not in node_names:
+            nodes.append({
+                "id": tgt.name,
+                "name": tgt.name,
+                "kind": tgt.kind,
+                "fqn": tgt.fqn,
+            })
+            node_names.add(tgt.name)
+        edges.append({"source": src.name, "target": tgt.name})
+    return {"nodes": nodes, "edges": edges}
+def build_all_data(store: GraphStore) -> Dict[str, Any]:
+    """Assemble the complete payload for the frontend API.
+    Returns all dimension data, source contents, and source paths.
+    """
+    dimensions = store.get_dimensions()
+    if not dimensions:
+        dimensions = ["structure"]
+    result: Dict[str, Any] = {}
+    for dim in dimensions:
+        if dim == "dependencies":
+            result[dim] = {
+                "type": "dependency_graph",
+                "data": {
+                    "tree": build_dependency_data(store, dim),
+                    "map": build_dependency_map_data(store, dim),
+                    "graph": build_graph_data(store, dim),
+                },
+            }
+        else:
+            result[dim] = {
+                "type": "tree",
+                "data": build_tree_data(store, dim),
+            }
+    return {
+        "dimensions": result,
+        "sourceContents": store.get_source_contents(),
+        "sourcePaths": store.get_source_paths(),
+    }

source_graph/extractor.py ADDED Viewed

@@ -0,0 +1,39 @@
+"""Extractor abstract base class."""
+from abc import ABC, abstractmethod
+from typing import List, Tuple
+from .models import Node, Relation
+class Extractor(ABC):
+    """Abstract base class for source graph extractors.
+    Each extractor is responsible for extracting nodes and relations
+    from source files for a specific dimension.
+    Contract:
+    - `dimension` returns the dimension name this extractor produces.
+    - `extract(file_path)` reads a single file and returns (nodes, relations).
+    - All returned nodes must have globally unique IDs.
+    - All returned relations must reference valid node IDs.
+    """
+    @property
+    @abstractmethod
+    def dimension(self) -> str:
+        """Return the dimension name produced by this extractor, e.g. 'structure'."""
+        pass
+    @abstractmethod
+    def extract(self, content: str, file_path: str) -> Tuple[List[Node], List[Relation]]:
+        """Extract nodes and relations from a single file.
+        Args:
+            content: Raw source text of the file.
+            file_path: Path to the source file (for FQN generation and import resolution).
+        Returns:
+            A tuple of (nodes, relations) found in the file.
+        """
+        pass

source_graph/models.py ADDED Viewed

@@ -0,0 +1,62 @@
+"""Core data models for source-graph."""
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional
+@dataclass
+class Range:
+    """Source code location range."""
+    start_line: int
+    start_col: int
+    end_line: int
+    end_col: int
+@dataclass
+class Node:
+    """A node in the source graph."""
+    id: str
+    name: str
+    kind: str
+    fqn: str
+    source_file: str
+    range: Optional[Range] = None
+    parent_id: Optional[str] = None
+    properties: Dict = field(default_factory=dict)
+    source_content_id: Optional[str] = None
+@dataclass
+class Relation:
+    """A relation between two nodes."""
+    id: str
+    source_id: str
+    target_id: str
+    type: str
+    dimension: str
+    properties: Dict = field(default_factory=dict)
+@dataclass
+class NodeFilter:
+    """Filter conditions for querying nodes."""
+    kind: Optional[str] = None
+    source_file: Optional[str] = None
+    dimension: Optional[str] = None
+    parent_id: Optional[str] = None
+@dataclass
+class RelationFilter:
+    """Filter conditions for querying relations."""
+    type: Optional[str] = None
+    dimension: Optional[str] = None
+    source_id: Optional[str] = None
+    target_id: Optional[str] = None
+    source_file: Optional[str] = None