source-graphh 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,227 @@
1
+ """Python import dependency extractor using the ast module."""
2
+
3
+ import ast
4
+ import hashlib
5
+ import logging
6
+ import os
7
+ from pathlib import Path
8
+ from typing import Dict, List, Optional, Tuple
9
+
10
+ from .extractor import Extractor
11
+ from .models import Node, Relation
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class PythonImportExtractor(Extractor):
17
+ """Extracts cross-file import dependencies.
18
+
19
+ Expects to be initialized with the full set of files being analyzed
20
+ so that imported module names can be resolved to files within the set.
21
+ """
22
+
23
+ def __init__(self, all_file_paths: Optional[List[str]] = None) -> None:
24
+ self._original_paths = list(all_file_paths or [])
25
+ self._abs_to_orig: Dict[str, str] = {
26
+ os.path.realpath(os.path.abspath(p)): p for p in self._original_paths
27
+ }
28
+ self._all_files: set = set(self._abs_to_orig.keys())
29
+ self._package_root = self._compute_package_root(list(self._all_files))
30
+ self._external_nodes: Dict[str, Node] = {}
31
+
32
+ @property
33
+ def dimension(self) -> str:
34
+ return "dependencies"
35
+
36
+ def _make_id(self, fqn: str) -> str:
37
+ return hashlib.sha256(fqn.encode()).hexdigest()[:16]
38
+
39
+ def _compute_lcp_of_dirs(self, dir_paths: List[str]) -> str:
40
+ """Compute longest common directory path from directory paths."""
41
+ if not dir_paths:
42
+ return ""
43
+ path_objs = [Path(p) for p in dir_paths]
44
+ first_parts = path_objs[0].parts
45
+ common = []
46
+ for parts in zip(*[p.parts for p in path_objs]):
47
+ if len(set(parts)) == 1:
48
+ common.append(parts[0])
49
+ else:
50
+ break
51
+ return str(Path(*common)) if common else ""
52
+
53
+ def _compute_package_root(self, abs_paths: List[str]) -> str:
54
+ """Infer package root from __init__.py locations.
55
+
56
+ Finds directories containing __init__.py among analyzed files,
57
+ then takes the LCP of their parent directories.
58
+ Falls back to traditional file parent LCP if no __init__.py found.
59
+ """
60
+ package_dirs = set()
61
+ for p in abs_paths:
62
+ dir_path = Path(p).parent
63
+ init_abs = os.path.realpath(os.path.abspath(dir_path / "__init__.py"))
64
+ if init_abs in self._all_files:
65
+ package_dirs.add(str(dir_path))
66
+
67
+ if package_dirs:
68
+ parent_dirs = [str(Path(d).parent) for d in package_dirs]
69
+ return self._compute_lcp_of_dirs(parent_dirs)
70
+
71
+ # Fallback: LCP of file parent dirs
72
+ return self._compute_lcp_of_dirs([str(Path(p).parent) for p in abs_paths])
73
+
74
+ def _resolve_module(self, module_name: str) -> Optional[str]:
75
+ """Resolve module name to a file path in the analyzed set.
76
+
77
+ Returns the original path (as passed to __init__) for consistent IDs.
78
+ """
79
+ if not module_name or not self._package_root:
80
+ return None
81
+ parts = module_name.split(".")
82
+
83
+ candidate_py = Path(self._package_root) / Path(*parts).with_suffix(".py")
84
+ candidate_init = Path(self._package_root) / Path(*parts) / "__init__.py"
85
+
86
+ for candidate in [candidate_py, candidate_init]:
87
+ resolved = str(candidate.resolve())
88
+ if resolved in self._all_files:
89
+ return self._abs_to_orig[resolved]
90
+ return None
91
+
92
+ def _get_external_node(self, module_name: str) -> Node:
93
+ """Get or create a deduplicated external module node."""
94
+ if module_name not in self._external_nodes:
95
+ fqn = f"<external>/{module_name}"
96
+ self._external_nodes[module_name] = Node(
97
+ id=self._make_id(fqn),
98
+ name=module_name,
99
+ kind="external_module",
100
+ fqn=fqn,
101
+ source_file="<external>",
102
+ )
103
+ return self._external_nodes[module_name]
104
+
105
+ def _module_path_for_file(self, abs_path: str) -> List[str]:
106
+ """Compute module path parts for a file relative to package root.
107
+
108
+ Regular .py: root/pkg/a.py -> ['pkg', 'a']
109
+ __init__.py: root/pkg/__init__.py -> ['pkg']
110
+ """
111
+ rel = Path(abs_path).relative_to(self._package_root)
112
+ parts = list(rel.with_suffix("").parts)
113
+ if parts and parts[-1] == "__init__":
114
+ parts = parts[:-1]
115
+ return parts
116
+
117
+ def _resolve_relative_module(
118
+ self, file_path: str, level: int, module: Optional[str]
119
+ ) -> Optional[str]:
120
+ """Resolve a relative ImportFrom to an absolute module name.
121
+
122
+ Returns the target module name (e.g., 'pkg.sub.module') or None
123
+ if the relative import escapes the package root.
124
+ """
125
+ abs_path = os.path.realpath(os.path.abspath(file_path))
126
+ module_path = self._module_path_for_file(abs_path)
127
+
128
+ # current_package: module path minus the file itself (if not __init__)
129
+ if Path(file_path).name == "__init__.py":
130
+ current_package = module_path
131
+ else:
132
+ current_package = module_path[:-1] if module_path else []
133
+
134
+ # level dots = backtrack (level - 1) from current package
135
+ backtrack = level - 1
136
+ if backtrack > len(current_package):
137
+ return None
138
+
139
+ target_package = current_package[: len(current_package) - backtrack]
140
+
141
+ if module is None:
142
+ target_module_parts = target_package
143
+ else:
144
+ target_module_parts = target_package + module.split(".")
145
+
146
+ if not target_module_parts:
147
+ return None
148
+
149
+ return ".".join(target_module_parts)
150
+
151
+ def extract(self, content: str, file_path: str) -> Tuple[List[Node], List[Relation]]:
152
+ nodes: List[Node] = []
153
+ relations: List[Relation] = []
154
+
155
+ abs_path = os.path.abspath(file_path)
156
+ orig_path = self._abs_to_orig.get(abs_path, file_path)
157
+
158
+ try:
159
+ tree = ast.parse(content)
160
+ except SyntaxError as e:
161
+ logger.warning("Syntax error in %s: %s", file_path, e)
162
+ return nodes, relations
163
+
164
+ # Source file node (consistent ID with PythonStructureExtractor)
165
+ file_id = self._make_id(orig_path)
166
+ file_node = Node(
167
+ id=file_id,
168
+ name=orig_path,
169
+ kind="file",
170
+ fqn=orig_path,
171
+ source_file=orig_path,
172
+ )
173
+ nodes.append(file_node)
174
+
175
+ # Collect imported modules: (module_name, is_relative)
176
+ imported_modules: List[Tuple[str, bool]] = []
177
+ for node in ast.walk(tree):
178
+ if isinstance(node, ast.Import):
179
+ for alias in node.names:
180
+ imported_modules.append((alias.name, False))
181
+ elif isinstance(node, ast.ImportFrom):
182
+ if node.level == 0:
183
+ if node.module:
184
+ imported_modules.append((node.module, False))
185
+ else:
186
+ module_name = self._resolve_relative_module(
187
+ file_path, node.level, node.module
188
+ )
189
+ if module_name is not None:
190
+ imported_modules.append((module_name, True))
191
+
192
+ for module_name, is_relative in imported_modules:
193
+ if not module_name:
194
+ continue
195
+
196
+ resolved_path = self._resolve_module(module_name)
197
+ if resolved_path:
198
+ target_id = self._make_id(resolved_path)
199
+ target_node = Node(
200
+ id=target_id,
201
+ name=resolved_path,
202
+ kind="file",
203
+ fqn=resolved_path,
204
+ source_file=resolved_path,
205
+ )
206
+ nodes.append(target_node)
207
+ else:
208
+ target_node = self._get_external_node(module_name)
209
+ nodes.append(target_node)
210
+ target_id = target_node.id
211
+
212
+ rel_props = {}
213
+ if is_relative:
214
+ rel_props["is_relative"] = True
215
+
216
+ relations.append(
217
+ Relation(
218
+ id=f"{file_id}-{target_id}",
219
+ source_id=file_id,
220
+ target_id=target_id,
221
+ type="file_imports_module",
222
+ dimension=self.dimension,
223
+ properties=rel_props,
224
+ )
225
+ )
226
+
227
+ return nodes, relations
@@ -0,0 +1,224 @@
1
+ """Python structure extractor using the ast module."""
2
+
3
+ import ast
4
+ import hashlib
5
+ import logging
6
+ from typing import List, Tuple, Union
7
+
8
+ from .extractor import Extractor
9
+ from .models import Node, Range, Relation
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class PythonStructureExtractor(Extractor):
15
+ """Extracts file/class/function/method containment relations."""
16
+
17
+ @property
18
+ def dimension(self) -> str:
19
+ return "structure"
20
+
21
+ def _make_id(self, fqn: str) -> str:
22
+ return hashlib.sha256(fqn.encode()).hexdigest()[:16]
23
+
24
+ def _make_range(self, node: ast.AST) -> Range:
25
+ end_lineno = getattr(node, "end_lineno", node.lineno)
26
+ end_col_offset = getattr(node, "end_col_offset", 0)
27
+ return Range(
28
+ start_line=node.lineno,
29
+ start_col=node.col_offset,
30
+ end_line=end_lineno,
31
+ end_col=end_col_offset,
32
+ )
33
+
34
+ def extract(self, content: str, file_path: str) -> Tuple[List[Node], List[Relation]]:
35
+ nodes: List[Node] = []
36
+ relations: List[Relation] = []
37
+
38
+ try:
39
+ tree = ast.parse(content)
40
+ except SyntaxError as e:
41
+ logger.warning("Syntax error in %s: %s", file_path, e)
42
+ return nodes, relations
43
+
44
+ file_fqn = file_path
45
+ file_id = self._make_id(file_fqn)
46
+ file_node = Node(
47
+ id=file_id,
48
+ name=file_path,
49
+ kind="file",
50
+ fqn=file_fqn,
51
+ source_file=file_path,
52
+ range=None,
53
+ parent_id=None,
54
+ )
55
+ nodes.append(file_node)
56
+
57
+ for body_node in tree.body:
58
+ if isinstance(body_node, ast.ClassDef):
59
+ self._extract_class(body_node, file_id, file_fqn, file_path, nodes, relations)
60
+ elif isinstance(body_node, (ast.FunctionDef, ast.AsyncFunctionDef)):
61
+ self._extract_function(body_node, file_id, file_fqn, file_path, nodes, relations)
62
+ elif isinstance(body_node, ast.Assign):
63
+ for target in body_node.targets:
64
+ if isinstance(target, ast.Name):
65
+ var_fqn = f"{file_fqn}/{target.id}"
66
+ var_id = self._make_id(var_fqn)
67
+ var_node = Node(
68
+ id=var_id,
69
+ name=target.id,
70
+ kind="variable",
71
+ fqn=var_fqn,
72
+ source_file=file_path,
73
+ range=self._make_range(body_node),
74
+ parent_id=file_id,
75
+ )
76
+ nodes.append(var_node)
77
+ relations.append(
78
+ Relation(
79
+ id=f"{file_id}-{var_id}",
80
+ source_id=file_id,
81
+ target_id=var_id,
82
+ type="file_contains_variable",
83
+ dimension=self.dimension,
84
+ )
85
+ )
86
+
87
+ return nodes, relations
88
+
89
+ def _extract_class(
90
+ self,
91
+ class_node: ast.ClassDef,
92
+ parent_id: str,
93
+ parent_fqn: str,
94
+ file_path: str,
95
+ nodes: List[Node],
96
+ relations: List[Relation],
97
+ ) -> None:
98
+ class_fqn = f"{parent_fqn}/{class_node.name}"
99
+ class_id = self._make_id(class_fqn)
100
+ nodes.append(
101
+ Node(
102
+ id=class_id,
103
+ name=class_node.name,
104
+ kind="class",
105
+ fqn=class_fqn,
106
+ source_file=file_path,
107
+ range=self._make_range(class_node),
108
+ parent_id=parent_id,
109
+ )
110
+ )
111
+ relations.append(
112
+ Relation(
113
+ id=f"{parent_id}-{class_id}",
114
+ source_id=parent_id,
115
+ target_id=class_id,
116
+ type="file_contains_class" if parent_fqn == file_path else "class_contains_class",
117
+ dimension=self.dimension,
118
+ )
119
+ )
120
+
121
+ for body_node in class_node.body:
122
+ if isinstance(body_node, ast.ClassDef):
123
+ self._extract_class(body_node, class_id, class_fqn, file_path, nodes, relations)
124
+ elif isinstance(body_node, (ast.FunctionDef, ast.AsyncFunctionDef)):
125
+ self._extract_method(body_node, class_id, class_fqn, file_path, nodes, relations)
126
+ elif isinstance(body_node, ast.Assign):
127
+ for target in body_node.targets:
128
+ if isinstance(target, ast.Name):
129
+ var_fqn = f"{class_fqn}/{target.id}"
130
+ var_id = self._make_id(var_fqn)
131
+ nodes.append(
132
+ Node(
133
+ id=var_id,
134
+ name=target.id,
135
+ kind="variable",
136
+ fqn=var_fqn,
137
+ source_file=file_path,
138
+ range=self._make_range(body_node),
139
+ parent_id=class_id,
140
+ )
141
+ )
142
+ relations.append(
143
+ Relation(
144
+ id=f"{class_id}-{var_id}",
145
+ source_id=class_id,
146
+ target_id=var_id,
147
+ type="class_contains_variable",
148
+ dimension=self.dimension,
149
+ )
150
+ )
151
+
152
+ def _extract_function(
153
+ self,
154
+ func_node: Union[ast.FunctionDef, ast.AsyncFunctionDef],
155
+ parent_id: str,
156
+ parent_fqn: str,
157
+ file_path: str,
158
+ nodes: List[Node],
159
+ relations: List[Relation],
160
+ ) -> None:
161
+ func_fqn = f"{parent_fqn}/{func_node.name}"
162
+ func_id = self._make_id(func_fqn)
163
+ nodes.append(
164
+ Node(
165
+ id=func_id,
166
+ name=func_node.name,
167
+ kind="function",
168
+ fqn=func_fqn,
169
+ source_file=file_path,
170
+ range=self._make_range(func_node),
171
+ parent_id=parent_id,
172
+ )
173
+ )
174
+ relations.append(
175
+ Relation(
176
+ id=f"{parent_id}-{func_id}",
177
+ source_id=parent_id,
178
+ target_id=func_id,
179
+ type="file_contains_function",
180
+ dimension=self.dimension,
181
+ )
182
+ )
183
+
184
+ # Handle nested functions
185
+ for body_node in func_node.body:
186
+ if isinstance(body_node, (ast.FunctionDef, ast.AsyncFunctionDef)):
187
+ self._extract_function(body_node, func_id, func_fqn, file_path, nodes, relations)
188
+
189
+ def _extract_method(
190
+ self,
191
+ func_node: Union[ast.FunctionDef, ast.AsyncFunctionDef],
192
+ parent_id: str,
193
+ parent_fqn: str,
194
+ file_path: str,
195
+ nodes: List[Node],
196
+ relations: List[Relation],
197
+ ) -> None:
198
+ func_fqn = f"{parent_fqn}/{func_node.name}"
199
+ func_id = self._make_id(func_fqn)
200
+ nodes.append(
201
+ Node(
202
+ id=func_id,
203
+ name=func_node.name,
204
+ kind="method",
205
+ fqn=func_fqn,
206
+ source_file=file_path,
207
+ range=self._make_range(func_node),
208
+ parent_id=parent_id,
209
+ )
210
+ )
211
+ relations.append(
212
+ Relation(
213
+ id=f"{parent_id}-{func_id}",
214
+ source_id=parent_id,
215
+ target_id=func_id,
216
+ type="class_contains_method",
217
+ dimension=self.dimension,
218
+ )
219
+ )
220
+
221
+ # Handle nested functions inside methods
222
+ for body_node in func_node.body:
223
+ if isinstance(body_node, (ast.FunctionDef, ast.AsyncFunctionDef)):
224
+ self._extract_function(body_node, func_id, func_fqn, file_path, nodes, relations)
@@ -0,0 +1,5 @@
1
+ """FastAPI server package for source-graph."""
2
+
3
+ from .app import app
4
+
5
+ __all__ = ["app"]
@@ -0,0 +1,33 @@
1
+ """FastAPI application for source-graph serve mode."""
2
+
3
+ import importlib.resources as resources
4
+
5
+ from fastapi import FastAPI
6
+ from fastapi.responses import FileResponse, JSONResponse
7
+ from fastapi.staticfiles import StaticFiles
8
+
9
+ from ..data_builder import build_all_data
10
+ from ..store import RelationStore
11
+ from . import config
12
+
13
+ app = FastAPI()
14
+
15
+ # Determine the web directory path at import time
16
+ _web_dir = resources.files("source_graph") / "web"
17
+
18
+ # Serve static assets
19
+ app.mount("/static", StaticFiles(directory=str(_web_dir)), name="static")
20
+
21
+
22
+ @app.get("/")
23
+ async def root() -> FileResponse:
24
+ """Serve the main index.html."""
25
+ return FileResponse(str(_web_dir / "index.html"))
26
+
27
+
28
+ @app.get("/api/data")
29
+ async def get_data() -> JSONResponse:
30
+ """Return all dimension data, source contents, and source paths."""
31
+ store = RelationStore(config.get_db_path())
32
+ payload = build_all_data(store)
33
+ return JSONResponse(content=payload)
@@ -0,0 +1,14 @@
1
+ """Server configuration."""
2
+
3
+ _db_path: str = ""
4
+
5
+
6
+ def configure(db_path: str) -> None:
7
+ """Set the database path for the server."""
8
+ global _db_path
9
+ _db_path = db_path
10
+
11
+
12
+ def get_db_path() -> str:
13
+ """Return the configured database path."""
14
+ return _db_path