source-graphh 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ """source-graph: extract, store, and visualize code relationships."""
2
+
3
+ from .models import Node, Relation, Range, NodeFilter, RelationFilter
4
+ from .store import RelationStore
5
+ from .extractor import Extractor
6
+ from .python_structure_extractor import PythonStructureExtractor
7
+
8
+ __all__ = [
9
+ "Node",
10
+ "Relation",
11
+ "Range",
12
+ "NodeFilter",
13
+ "RelationFilter",
14
+ "RelationStore",
15
+ "Extractor",
16
+ "PythonStructureExtractor",
17
+ ]
source_graph/cli.py ADDED
@@ -0,0 +1,193 @@
1
+ """CLI entry point for source-graph."""
2
+
3
+ import argparse
4
+ import hashlib
5
+ import importlib.metadata
6
+ import logging
7
+ import os
8
+ import sys
9
+ from pathlib import Path
10
+ from typing import List, Optional
11
+
12
+ from .models import NodeFilter
13
+ from .python_import_extractor import PythonImportExtractor
14
+ from .python_structure_extractor import PythonStructureExtractor
15
+ from .store import RelationStore
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ def _collect_valid_paths(file_paths: List[str]) -> List[str]:
21
+ seen = set()
22
+ valid_paths = []
23
+ for file_path in file_paths:
24
+ path = Path(file_path)
25
+ if not path.exists():
26
+ logger.warning("File or directory not found: %s", file_path)
27
+ continue
28
+ if path.is_dir():
29
+ for root, _dirs, files in os.walk(path, followlinks=True):
30
+ for name in files:
31
+ if name.endswith(".py"):
32
+ py_path = Path(root) / name
33
+ resolved = py_path.resolve()
34
+ if resolved not in seen:
35
+ seen.add(resolved)
36
+ valid_paths.append(str(py_path))
37
+ elif path.is_file():
38
+ resolved = path.resolve()
39
+ if resolved not in seen:
40
+ seen.add(resolved)
41
+ valid_paths.append(str(path))
42
+ else:
43
+ logger.warning("Not a file or directory: %s", file_path)
44
+ return valid_paths
45
+
46
+
47
+ def _setup_logging(verbose: bool) -> None:
48
+ logging.basicConfig(
49
+ level=logging.DEBUG if verbose else logging.INFO,
50
+ format="%(levelname)s: %(message)s",
51
+ )
52
+
53
+
54
+ def extract_command(args: argparse.Namespace) -> int:
55
+ _setup_logging(args.verbose)
56
+
57
+ output_dir = Path(args.output)
58
+ output_dir.mkdir(parents=True, exist_ok=True)
59
+
60
+ db_path = output_dir / "source_graph.db"
61
+
62
+ valid_paths = _collect_valid_paths(args.files)
63
+ if not valid_paths:
64
+ logger.error("No valid files to analyze.")
65
+ return 1
66
+
67
+ store = RelationStore(str(db_path))
68
+ structure_extractor = PythonStructureExtractor()
69
+ import_extractor = PythonImportExtractor(valid_paths)
70
+
71
+ for file_path in valid_paths:
72
+ logger.info("Extracting: %s", file_path)
73
+
74
+ # Read and archive source content
75
+ try:
76
+ with open(file_path, "r", encoding="utf-8") as f:
77
+ content = f.read()
78
+ except Exception as e:
79
+ logger.warning("Failed to read file %s: %s", file_path, e)
80
+ continue
81
+
82
+ content_hash = hashlib.sha256(content.encode("utf-8")).hexdigest()
83
+ store.add_source_content(content_hash, content, len(content))
84
+ store.add_source_path(file_path, content_hash, os.path.getmtime(file_path))
85
+
86
+ # Extract using archived content
87
+ nodes, relations = structure_extractor.extract(content, file_path)
88
+ for node in nodes:
89
+ node.source_content_id = content_hash
90
+ store.add_nodes(nodes)
91
+ store.add_relations(relations)
92
+ logger.info(" [structure] %d nodes, %d relations", len(nodes), len(relations))
93
+
94
+ nodes, relations = import_extractor.extract(content, file_path)
95
+ for node in nodes:
96
+ node.source_content_id = content_hash
97
+ store.add_nodes(nodes)
98
+ store.add_relations(relations)
99
+ logger.info(" [dependencies] %d nodes, %d relations", len(nodes), len(relations))
100
+
101
+ if not store.get_nodes(NodeFilter()):
102
+ logger.error("No nodes extracted. Nothing to save.")
103
+ return 1
104
+
105
+ logger.info("SQLite database written to: %s", db_path)
106
+ return 0
107
+
108
+
109
+ def serve_command(args: argparse.Namespace) -> int:
110
+ _setup_logging(args.verbose)
111
+
112
+ db_path = Path(args.db_path)
113
+ if not db_path.exists():
114
+ logger.error("Database not found: %s", db_path)
115
+ return 1
116
+
117
+ from .server.config import configure
118
+ from .server.app import app
119
+ import uvicorn
120
+
121
+ configure(str(db_path))
122
+ uvicorn.run(app, host=args.host, port=args.port)
123
+ return 0
124
+
125
+
126
+ def main(argv: Optional[List[str]] = None) -> int:
127
+ parser = argparse.ArgumentParser(
128
+ prog="source-graph",
129
+ description="Extract, store, and visualize code relationships.",
130
+ )
131
+ parser.add_argument(
132
+ "--version",
133
+ action="version",
134
+ version=f"%(prog)s {importlib.metadata.version('source-graphh')}",
135
+ )
136
+ parser.add_argument(
137
+ "--verbose",
138
+ "-v",
139
+ action="store_true",
140
+ help="Enable verbose logging.",
141
+ )
142
+
143
+ subparsers = parser.add_subparsers(dest="command")
144
+
145
+ extract_parser = subparsers.add_parser(
146
+ "extract",
147
+ help="Extract relationships from source files and save to a database.",
148
+ )
149
+ extract_parser.add_argument(
150
+ "files",
151
+ nargs="+",
152
+ help="One or more Python source files or directories to analyze.",
153
+ )
154
+ extract_parser.add_argument(
155
+ "--output",
156
+ "-o",
157
+ required=True,
158
+ help="Output directory for SQLite database.",
159
+ )
160
+
161
+ serve_parser = subparsers.add_parser(
162
+ "serve",
163
+ help="Serve an interactive HTML report from an existing database.",
164
+ )
165
+ serve_parser.add_argument(
166
+ "db_path",
167
+ help="Path to the source_graph.db file.",
168
+ )
169
+ serve_parser.add_argument(
170
+ "--host",
171
+ default="127.0.0.1",
172
+ help="Host to bind to (default: 127.0.0.1)",
173
+ )
174
+ serve_parser.add_argument(
175
+ "--port",
176
+ type=int,
177
+ default=8080,
178
+ help="Port to listen on (default: 8080)",
179
+ )
180
+
181
+ args = parser.parse_args(argv)
182
+
183
+ if args.command == "extract":
184
+ return extract_command(args)
185
+ elif args.command == "serve":
186
+ return serve_command(args)
187
+ else:
188
+ parser.print_usage()
189
+ return 1
190
+
191
+
192
+ if __name__ == "__main__":
193
+ sys.exit(main())
@@ -0,0 +1,327 @@
1
+ """Data-building logic for source-graph visualization.
2
+
3
+ Pure functions that transform GraphStore data into view-ready payloads.
4
+ """
5
+
6
+ from typing import Any, Dict, List, Optional, Set, Tuple
7
+
8
+ from .models import NodeFilter, RelationFilter
9
+ from .store import GraphStore
10
+
11
+
12
+ def build_tree_data(store: GraphStore, dimension: str) -> List[Dict[str, Any]]:
13
+ """Build hierarchical tree data for a given dimension."""
14
+ relations = store.get_relations(RelationFilter(dimension=dimension))
15
+
16
+ # Build parent -> children mapping
17
+ children_map: Dict[str, List[str]] = {}
18
+ all_node_ids: set = set()
19
+ for relation in relations:
20
+ all_node_ids.add(relation.source_id)
21
+ all_node_ids.add(relation.target_id)
22
+ children_map.setdefault(relation.source_id, []).append(relation.target_id)
23
+
24
+ # Find root nodes: nodes that are sources but never targets
25
+ target_ids = {r.target_id for r in relations}
26
+ root_ids = [nid for nid in all_node_ids if nid not in target_ids]
27
+
28
+ # Also add orphan nodes (nodes with no relations at all)
29
+ all_store_nodes = store.get_nodes(NodeFilter())
30
+ for node in all_store_nodes:
31
+ if node.id not in all_node_ids:
32
+ root_ids.append(node.id)
33
+
34
+ node_lookup = {n.id: n for n in all_store_nodes}
35
+
36
+ def build_node(node_id: str) -> Optional[Dict[str, Any]]:
37
+ node = node_lookup.get(node_id)
38
+ if node is None:
39
+ return None
40
+ children = []
41
+ for child_id in children_map.get(node_id, []):
42
+ child = build_node(child_id)
43
+ if child:
44
+ children.append(child)
45
+ result = {
46
+ "id": node.id,
47
+ "name": node.name,
48
+ "kind": node.kind,
49
+ "fqn": node.fqn,
50
+ "source_file": node.source_file,
51
+ "range": {
52
+ "start_line": node.range.start_line,
53
+ "end_line": node.range.end_line,
54
+ } if node.range else None,
55
+ "source_content_id": node.source_content_id,
56
+ "children": children,
57
+ "collapsed": False,
58
+ }
59
+ return result
60
+
61
+ trees = []
62
+ for root_id in root_ids:
63
+ tree = build_node(root_id)
64
+ if tree:
65
+ trees.append(tree)
66
+
67
+ return trees
68
+
69
+
70
+ def build_dependency_data(store: GraphStore, dimension: str) -> Dict[str, Any]:
71
+ """Build structured dependency graph data for a given dimension."""
72
+ relations = store.get_relations(RelationFilter(dimension=dimension))
73
+ if not relations:
74
+ return {"nodes": {}, "edges": [], "roots": []}
75
+
76
+ node_ids = set()
77
+ for r in relations:
78
+ node_ids.add(r.source_id)
79
+ node_ids.add(r.target_id)
80
+
81
+ all_nodes = store.get_nodes(NodeFilter())
82
+ node_lookup = {n.id: n for n in all_nodes if n.id in node_ids}
83
+
84
+ edges = []
85
+ adj: Dict[str, List[Tuple[str, bool]]] = {}
86
+ for r in relations:
87
+ src = node_lookup.get(r.source_id)
88
+ tgt = node_lookup.get(r.target_id)
89
+ if not src or not tgt:
90
+ continue
91
+ is_external = tgt.kind == "external_module"
92
+ edges.append({"source": src.name, "target": tgt.name, "is_external": is_external})
93
+ adj.setdefault(src.name, []).append((tgt.name, is_external))
94
+
95
+ if not edges:
96
+ return {"nodes": {}, "edges": [], "roots": []}
97
+
98
+ # Sort children alphabetically for stable display
99
+ for src_name in adj:
100
+ adj[src_name].sort(key=lambda x: x[0])
101
+
102
+ # Build nodes metadata
103
+ nodes: Dict[str, Dict[str, Any]] = {}
104
+ for node in node_lookup.values():
105
+ internal_out = sum(
106
+ 1 for e in edges if e["source"] == node.name and not e["is_external"]
107
+ )
108
+ nodes[node.name] = {
109
+ "is_external": node.kind == "external_module",
110
+ "internal_out_degree": internal_out,
111
+ "kind": node.kind,
112
+ }
113
+
114
+ # Root nodes: all nodes that have outgoing edges
115
+ root_names = list(adj.keys())
116
+ root_names.sort(key=lambda n: nodes[n]["internal_out_degree"])
117
+
118
+ return {
119
+ "nodes": nodes,
120
+ "edges": edges,
121
+ "roots": root_names,
122
+ }
123
+
124
+
125
+ def build_dependency_map_data(store: GraphStore, dimension: str) -> Dict[str, Any]:
126
+ """Build layered dependency map data (internal files only) for a given dimension."""
127
+ relations = store.get_relations(RelationFilter(dimension=dimension))
128
+ if not relations:
129
+ return {"nodes": [], "edges": []}
130
+
131
+ all_nodes = store.get_nodes(NodeFilter())
132
+ node_lookup = {n.id: n for n in all_nodes}
133
+
134
+ # Filter to internal-only edges
135
+ internal_edges: List[Tuple[str, str]] = []
136
+ internal_node_names: Set[str] = set()
137
+ for r in relations:
138
+ src = node_lookup.get(r.source_id)
139
+ tgt = node_lookup.get(r.target_id)
140
+ if not src or not tgt:
141
+ continue
142
+ if src.kind == "external_module" or tgt.kind == "external_module":
143
+ continue
144
+ internal_edges.append((src.name, tgt.name))
145
+ internal_node_names.add(src.name)
146
+ internal_node_names.add(tgt.name)
147
+
148
+ if not internal_node_names:
149
+ return {"nodes": [], "edges": []}
150
+
151
+ # Build adjacency list
152
+ adj: Dict[str, List[str]] = {name: [] for name in internal_node_names}
153
+ for src, tgt in internal_edges:
154
+ adj[src].append(tgt)
155
+
156
+ # Tarjan SCC algorithm
157
+ index_counter = [0]
158
+ stack: List[str] = []
159
+ lowlinks: Dict[str, int] = {}
160
+ index: Dict[str, int] = {}
161
+ on_stack: Dict[str, bool] = {}
162
+ sccs: List[List[str]] = []
163
+
164
+ def strongconnect(v: str) -> None:
165
+ index[v] = index_counter[0]
166
+ lowlinks[v] = index_counter[0]
167
+ index_counter[0] += 1
168
+ stack.append(v)
169
+ on_stack[v] = True
170
+
171
+ for w in adj.get(v, []):
172
+ if w not in internal_node_names:
173
+ continue
174
+ if w not in index:
175
+ strongconnect(w)
176
+ lowlinks[v] = min(lowlinks[v], lowlinks[w])
177
+ elif on_stack.get(w, False):
178
+ lowlinks[v] = min(lowlinks[v], index[w])
179
+
180
+ if lowlinks[v] == index[v]:
181
+ scc: List[str] = []
182
+ while True:
183
+ w = stack.pop()
184
+ on_stack[w] = False
185
+ scc.append(w)
186
+ if w == v:
187
+ break
188
+ sccs.append(scc)
189
+
190
+ for v in internal_node_names:
191
+ if v not in index:
192
+ strongconnect(v)
193
+
194
+ # Build node-to-SCC mapping
195
+ node_to_scc: Dict[str, int] = {}
196
+ for i, scc in enumerate(sccs):
197
+ for node in scc:
198
+ node_to_scc[node] = i
199
+
200
+ # Build compressed DAG edges
201
+ scc_adj: List[Set[int]] = [set() for _ in sccs]
202
+ for src, tgt in internal_edges:
203
+ src_scc = node_to_scc[src]
204
+ tgt_scc = node_to_scc[tgt]
205
+ if src_scc != tgt_scc:
206
+ scc_adj[src_scc].add(tgt_scc)
207
+
208
+ # Bottom-up layer assignment
209
+ layer: Dict[int, int] = {}
210
+ visited: Set[int] = set()
211
+
212
+ def assign_layer(scc_id: int) -> int:
213
+ if scc_id in visited:
214
+ return layer.get(scc_id, 0)
215
+ visited.add(scc_id)
216
+ max_child_layer = -1
217
+ for child_id in scc_adj[scc_id]:
218
+ child_layer = assign_layer(child_id)
219
+ max_child_layer = max(max_child_layer, child_layer)
220
+ layer[scc_id] = max_child_layer + 1
221
+ return layer[scc_id]
222
+
223
+ for i in range(len(sccs)):
224
+ assign_layer(i)
225
+
226
+ # Build output nodes
227
+ nodes = []
228
+ for i, scc in enumerate(sccs):
229
+ scc.sort()
230
+ nodes.append({
231
+ "id": f"scc_{i}",
232
+ "names": scc,
233
+ "layer": layer[i],
234
+ })
235
+
236
+ # Sort by layer descending, then alphabetically by first name
237
+ nodes.sort(key=lambda n: (-n["layer"], n["names"][0]))
238
+
239
+ # Build output edges
240
+ edges = []
241
+ seen_edges: Set[Tuple[int, int]] = set()
242
+ for src_id in range(len(sccs)):
243
+ for tgt_id in scc_adj[src_id]:
244
+ key = (src_id, tgt_id)
245
+ if key in seen_edges:
246
+ continue
247
+ seen_edges.add(key)
248
+ edges.append({
249
+ "source": f"scc_{src_id}",
250
+ "target": f"scc_{tgt_id}",
251
+ })
252
+
253
+ return {"nodes": nodes, "edges": edges}
254
+
255
+
256
+ def build_graph_data(store: GraphStore, dimension: str) -> Dict[str, Any]:
257
+ """Build raw graph data (nodes and edges) for Cytoscape rendering."""
258
+ relations = store.get_relations(RelationFilter(dimension=dimension))
259
+ if not relations:
260
+ return {"nodes": [], "edges": []}
261
+
262
+ all_nodes = store.get_nodes(NodeFilter())
263
+ node_lookup = {n.id: n for n in all_nodes}
264
+
265
+ nodes: List[Dict[str, Any]] = []
266
+ node_names: Set[str] = set()
267
+ edges: List[Dict[str, str]] = []
268
+
269
+ for r in relations:
270
+ src = node_lookup.get(r.source_id)
271
+ tgt = node_lookup.get(r.target_id)
272
+ if not src or not tgt:
273
+ continue
274
+ if src.kind == "external_module" or tgt.kind == "external_module":
275
+ continue
276
+ if src.name not in node_names:
277
+ nodes.append({
278
+ "id": src.name,
279
+ "name": src.name,
280
+ "kind": src.kind,
281
+ "fqn": src.fqn,
282
+ })
283
+ node_names.add(src.name)
284
+ if tgt.name not in node_names:
285
+ nodes.append({
286
+ "id": tgt.name,
287
+ "name": tgt.name,
288
+ "kind": tgt.kind,
289
+ "fqn": tgt.fqn,
290
+ })
291
+ node_names.add(tgt.name)
292
+ edges.append({"source": src.name, "target": tgt.name})
293
+
294
+ return {"nodes": nodes, "edges": edges}
295
+
296
+
297
+ def build_all_data(store: GraphStore) -> Dict[str, Any]:
298
+ """Assemble the complete payload for the frontend API.
299
+
300
+ Returns all dimension data, source contents, and source paths.
301
+ """
302
+ dimensions = store.get_dimensions()
303
+ if not dimensions:
304
+ dimensions = ["structure"]
305
+
306
+ result: Dict[str, Any] = {}
307
+ for dim in dimensions:
308
+ if dim == "dependencies":
309
+ result[dim] = {
310
+ "type": "dependency_graph",
311
+ "data": {
312
+ "tree": build_dependency_data(store, dim),
313
+ "map": build_dependency_map_data(store, dim),
314
+ "graph": build_graph_data(store, dim),
315
+ },
316
+ }
317
+ else:
318
+ result[dim] = {
319
+ "type": "tree",
320
+ "data": build_tree_data(store, dim),
321
+ }
322
+
323
+ return {
324
+ "dimensions": result,
325
+ "sourceContents": store.get_source_contents(),
326
+ "sourcePaths": store.get_source_paths(),
327
+ }
@@ -0,0 +1,39 @@
1
+ """Extractor abstract base class."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from typing import List, Tuple
5
+
6
+ from .models import Node, Relation
7
+
8
+
9
+ class Extractor(ABC):
10
+ """Abstract base class for source graph extractors.
11
+
12
+ Each extractor is responsible for extracting nodes and relations
13
+ from source files for a specific dimension.
14
+
15
+ Contract:
16
+ - `dimension` returns the dimension name this extractor produces.
17
+ - `extract(file_path)` reads a single file and returns (nodes, relations).
18
+ - All returned nodes must have globally unique IDs.
19
+ - All returned relations must reference valid node IDs.
20
+ """
21
+
22
+ @property
23
+ @abstractmethod
24
+ def dimension(self) -> str:
25
+ """Return the dimension name produced by this extractor, e.g. 'structure'."""
26
+ pass
27
+
28
+ @abstractmethod
29
+ def extract(self, content: str, file_path: str) -> Tuple[List[Node], List[Relation]]:
30
+ """Extract nodes and relations from a single file.
31
+
32
+ Args:
33
+ content: Raw source text of the file.
34
+ file_path: Path to the source file (for FQN generation and import resolution).
35
+
36
+ Returns:
37
+ A tuple of (nodes, relations) found in the file.
38
+ """
39
+ pass
source_graph/models.py ADDED
@@ -0,0 +1,62 @@
1
+ """Core data models for source-graph."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import Dict, List, Optional
5
+
6
+
7
+ @dataclass
8
+ class Range:
9
+ """Source code location range."""
10
+
11
+ start_line: int
12
+ start_col: int
13
+ end_line: int
14
+ end_col: int
15
+
16
+
17
+ @dataclass
18
+ class Node:
19
+ """A node in the source graph."""
20
+
21
+ id: str
22
+ name: str
23
+ kind: str
24
+ fqn: str
25
+ source_file: str
26
+ range: Optional[Range] = None
27
+ parent_id: Optional[str] = None
28
+ properties: Dict = field(default_factory=dict)
29
+ source_content_id: Optional[str] = None
30
+
31
+
32
+ @dataclass
33
+ class Relation:
34
+ """A relation between two nodes."""
35
+
36
+ id: str
37
+ source_id: str
38
+ target_id: str
39
+ type: str
40
+ dimension: str
41
+ properties: Dict = field(default_factory=dict)
42
+
43
+
44
+ @dataclass
45
+ class NodeFilter:
46
+ """Filter conditions for querying nodes."""
47
+
48
+ kind: Optional[str] = None
49
+ source_file: Optional[str] = None
50
+ dimension: Optional[str] = None
51
+ parent_id: Optional[str] = None
52
+
53
+
54
+ @dataclass
55
+ class RelationFilter:
56
+ """Filter conditions for querying relations."""
57
+
58
+ type: Optional[str] = None
59
+ dimension: Optional[str] = None
60
+ source_id: Optional[str] = None
61
+ target_id: Optional[str] = None
62
+ source_file: Optional[str] = None