uce-engine 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uce_engine-0.1.0/PKG-INFO +86 -0
- uce_engine-0.1.0/README.md +69 -0
- uce_engine-0.1.0/pyproject.toml +30 -0
- uce_engine-0.1.0/setup.cfg +4 -0
- uce_engine-0.1.0/uce/__init__.py +3 -0
- uce_engine-0.1.0/uce/core/__init__.py +1 -0
- uce_engine-0.1.0/uce/core/config.py +103 -0
- uce_engine-0.1.0/uce/core/graph_db.py +61 -0
- uce_engine-0.1.0/uce/core/risk_model.py +54 -0
- uce_engine-0.1.0/uce/ingestion/__init__.py +1 -0
- uce_engine-0.1.0/uce/ingestion/code_parser.py +280 -0
- uce_engine-0.1.0/uce/ingestion/graph_builder.py +377 -0
- uce_engine-0.1.0/uce/ingestion/policy_parser.py +48 -0
- uce_engine-0.1.0/uce/ingestion/requirement_parser.py +55 -0
- uce_engine-0.1.0/uce/ingestion/schema_parser.py +276 -0
- uce_engine-0.1.0/uce/reasoning/__init__.py +1 -0
- uce_engine-0.1.0/uce/reasoning/impact_analysis.py +391 -0
- uce_engine-0.1.0/uce/reasoning/trace_engine.py +129 -0
- uce_engine-0.1.0/uce/run.py +112 -0
- uce_engine-0.1.0/uce/runtime/__init__.py +1 -0
- uce_engine-0.1.0/uce/runtime/updater.py +216 -0
- uce_engine-0.1.0/uce/runtime/watcher.py +90 -0
- uce_engine-0.1.0/uce/server/__init__.py +1 -0
- uce_engine-0.1.0/uce/server/mcp_server.py +301 -0
- uce_engine-0.1.0/uce_engine.egg-info/PKG-INFO +86 -0
- uce_engine-0.1.0/uce_engine.egg-info/SOURCES.txt +28 -0
- uce_engine-0.1.0/uce_engine.egg-info/dependency_links.txt +1 -0
- uce_engine-0.1.0/uce_engine.egg-info/entry_points.txt +2 -0
- uce_engine-0.1.0/uce_engine.egg-info/requires.txt +6 -0
- uce_engine-0.1.0/uce_engine.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: uce-engine
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Unified Context Engine
|
|
5
|
+
Author: UCE Contributors
|
|
6
|
+
License: MIT
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
+
Requires-Python: <3.13,>=3.10
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
Requires-Dist: neo4j>=5.14.0
|
|
12
|
+
Requires-Dist: watchdog>=3.0.0
|
|
13
|
+
Requires-Dist: PyYAML>=6.0.1
|
|
14
|
+
Requires-Dist: fastmcp>=0.1.0
|
|
15
|
+
Requires-Dist: tree_sitter==0.20.1
|
|
16
|
+
Requires-Dist: tree_sitter_languages==1.10.2
|
|
17
|
+
|
|
18
|
+
# Unified Context Engine (UCE)
|
|
19
|
+
|
|
20
|
+
## What It Does
|
|
21
|
+
UCE is a deterministic semantic governance engine for agentic systems. It builds a Neo4j knowledge graph across code, schema, requirements, policies, APIs, and services, then performs multi-hop reasoning to produce explainable impact analysis and risk scoring.
|
|
22
|
+
|
|
23
|
+
## Why It Matters
|
|
24
|
+
Traditional RAG is probabilistic and not auditable. UCE provides deterministic, traceable reasoning paths that enterprises can validate before allowing agents to execute changes.
|
|
25
|
+
|
|
26
|
+
## Architecture Overview
|
|
27
|
+
- File graph and import dependencies
|
|
28
|
+
- Function graph and call edges
|
|
29
|
+
- Table/column schema layer
|
|
30
|
+
- Requirement and policy governance
|
|
31
|
+
- API exposure and service ownership
|
|
32
|
+
- Deterministic risk scoring
|
|
33
|
+
|
|
34
|
+
## Quick Start
|
|
35
|
+
```
|
|
36
|
+
python ingest/file_graph.py
|
|
37
|
+
python ingest/function_graph.py
|
|
38
|
+
python ingest/db_schema.py
|
|
39
|
+
python ingest/requirements.py
|
|
40
|
+
python ingest/policies.py
|
|
41
|
+
python ingest/api_graph.py
|
|
42
|
+
python -c "from impact import explain_change; print(explain_change('table','meetings'))"
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Example Output
|
|
46
|
+
```json
|
|
47
|
+
{
|
|
48
|
+
"entity": "meetings",
|
|
49
|
+
"affected_files": ["app/api/webhook/route.ts"],
|
|
50
|
+
"affected_functions": ["POST@app/api/webhook/route.ts"],
|
|
51
|
+
"affected_apis": ["POST /api/webhook"],
|
|
52
|
+
"affected_services": ["webhook"],
|
|
53
|
+
"violated_requirements": ["RQ-003"],
|
|
54
|
+
"enforced_policies": ["P-001"],
|
|
55
|
+
"risk_breakdown": {
|
|
56
|
+
"backend_files": 1,
|
|
57
|
+
"violated_requirements": 1,
|
|
58
|
+
"enforced_policies": 1,
|
|
59
|
+
"affected_apis": 1,
|
|
60
|
+
"risk_score": 15
|
|
61
|
+
},
|
|
62
|
+
"risk_score": 15,
|
|
63
|
+
"trace_paths": [
|
|
64
|
+
"Table meetings -> File app/api/webhook/route.ts -> Function POST -> API POST /api/webhook -> Service webhook",
|
|
65
|
+
"Table meetings -> Requirement RQ-003 -> Policy P-001"
|
|
66
|
+
]
|
|
67
|
+
}
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Risk Model
|
|
71
|
+
```
|
|
72
|
+
risk_score =
|
|
73
|
+
2 * backend_files
|
|
74
|
+
4 * violated_requirements
|
|
75
|
+
6 * enforced_policies
|
|
76
|
+
3 * affected_apis
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## Roadmap
|
|
80
|
+
1. Cross-repository dependency linking
|
|
81
|
+
2. Temporal graph snapshots
|
|
82
|
+
3. CI integration for pre-merge governance
|
|
83
|
+
4. Schema change impact automation
|
|
84
|
+
|
|
85
|
+
## License
|
|
86
|
+
Apache 2.0
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# Unified Context Engine (UCE)
|
|
2
|
+
|
|
3
|
+
## What It Does
|
|
4
|
+
UCE is a deterministic semantic governance engine for agentic systems. It builds a Neo4j knowledge graph across code, schema, requirements, policies, APIs, and services, then performs multi-hop reasoning to produce explainable impact analysis and risk scoring.
|
|
5
|
+
|
|
6
|
+
## Why It Matters
|
|
7
|
+
Traditional RAG is probabilistic and not auditable. UCE provides deterministic, traceable reasoning paths that enterprises can validate before allowing agents to execute changes.
|
|
8
|
+
|
|
9
|
+
## Architecture Overview
|
|
10
|
+
- File graph and import dependencies
|
|
11
|
+
- Function graph and call edges
|
|
12
|
+
- Table/column schema layer
|
|
13
|
+
- Requirement and policy governance
|
|
14
|
+
- API exposure and service ownership
|
|
15
|
+
- Deterministic risk scoring
|
|
16
|
+
|
|
17
|
+
## Quick Start
|
|
18
|
+
```
|
|
19
|
+
python ingest/file_graph.py
|
|
20
|
+
python ingest/function_graph.py
|
|
21
|
+
python ingest/db_schema.py
|
|
22
|
+
python ingest/requirements.py
|
|
23
|
+
python ingest/policies.py
|
|
24
|
+
python ingest/api_graph.py
|
|
25
|
+
python -c "from impact import explain_change; print(explain_change('table','meetings'))"
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Example Output
|
|
29
|
+
```json
|
|
30
|
+
{
|
|
31
|
+
"entity": "meetings",
|
|
32
|
+
"affected_files": ["app/api/webhook/route.ts"],
|
|
33
|
+
"affected_functions": ["POST@app/api/webhook/route.ts"],
|
|
34
|
+
"affected_apis": ["POST /api/webhook"],
|
|
35
|
+
"affected_services": ["webhook"],
|
|
36
|
+
"violated_requirements": ["RQ-003"],
|
|
37
|
+
"enforced_policies": ["P-001"],
|
|
38
|
+
"risk_breakdown": {
|
|
39
|
+
"backend_files": 1,
|
|
40
|
+
"violated_requirements": 1,
|
|
41
|
+
"enforced_policies": 1,
|
|
42
|
+
"affected_apis": 1,
|
|
43
|
+
"risk_score": 15
|
|
44
|
+
},
|
|
45
|
+
"risk_score": 15,
|
|
46
|
+
"trace_paths": [
|
|
47
|
+
"Table meetings -> File app/api/webhook/route.ts -> Function POST -> API POST /api/webhook -> Service webhook",
|
|
48
|
+
"Table meetings -> Requirement RQ-003 -> Policy P-001"
|
|
49
|
+
]
|
|
50
|
+
}
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Risk Model
|
|
54
|
+
```
|
|
55
|
+
risk_score =
|
|
56
|
+
2 * backend_files
|
|
57
|
+
4 * violated_requirements
|
|
58
|
+
6 * enforced_policies
|
|
59
|
+
3 * affected_apis
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## Roadmap
|
|
63
|
+
1. Cross-repository dependency linking
|
|
64
|
+
2. Temporal graph snapshots
|
|
65
|
+
3. CI integration for pre-merge governance
|
|
66
|
+
4. Schema change impact automation
|
|
67
|
+
|
|
68
|
+
## License
|
|
69
|
+
Apache 2.0
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "uce-engine"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Unified Context Engine"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10,<3.13"
|
|
11
|
+
license = {text = "MIT"}
|
|
12
|
+
authors = [{name = "UCE Contributors"}]
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Programming Language :: Python :: 3",
|
|
15
|
+
"License :: OSI Approved :: MIT License",
|
|
16
|
+
]
|
|
17
|
+
dependencies = [
|
|
18
|
+
"neo4j>=5.14.0",
|
|
19
|
+
"watchdog>=3.0.0",
|
|
20
|
+
"PyYAML>=6.0.1",
|
|
21
|
+
"fastmcp>=0.1.0",
|
|
22
|
+
"tree_sitter==0.20.1",
|
|
23
|
+
"tree_sitter_languages==1.10.2",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
[project.scripts]
|
|
27
|
+
uce = "uce.run:main"
|
|
28
|
+
|
|
29
|
+
[tool.setuptools.packages.find]
|
|
30
|
+
include = ["uce*"]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
import yaml
|
|
7
|
+
except ImportError as exc: # pragma: no cover - explicit runtime guidance
|
|
8
|
+
raise ImportError(
|
|
9
|
+
"PyYAML is required. Install with `pip install pyyaml`."
|
|
10
|
+
) from exc
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(frozen=True)
|
|
14
|
+
class Neo4jConfig:
|
|
15
|
+
uri: str
|
|
16
|
+
user: str
|
|
17
|
+
password: str
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass(frozen=True)
|
|
21
|
+
class PathsConfig:
|
|
22
|
+
code: tuple[str, ...]
|
|
23
|
+
schema: tuple[str, ...]
|
|
24
|
+
requirements: tuple[str, ...]
|
|
25
|
+
policies: tuple[str, ...]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass(frozen=True)
|
|
29
|
+
class UceConfig:
|
|
30
|
+
project_root: str
|
|
31
|
+
languages: tuple[str, ...]
|
|
32
|
+
paths: PathsConfig
|
|
33
|
+
ignore: tuple[str, ...]
|
|
34
|
+
aliases: dict[str, str]
|
|
35
|
+
neo4j: Neo4jConfig
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _as_tuple(value: Any) -> tuple[str, ...]:
|
|
39
|
+
if value is None:
|
|
40
|
+
return tuple()
|
|
41
|
+
if isinstance(value, (list, tuple)):
|
|
42
|
+
return tuple(str(v) for v in value if v is not None)
|
|
43
|
+
return (str(value),)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _normalize_path(root: str, path: str) -> str:
|
|
47
|
+
joined = os.path.abspath(os.path.join(root, path))
|
|
48
|
+
return os.path.normpath(joined)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def load_config(config_path: str, project_root_override: str | None = None) -> UceConfig:
|
|
52
|
+
if not os.path.exists(config_path):
|
|
53
|
+
raise FileNotFoundError(f"Config not found: {config_path}")
|
|
54
|
+
|
|
55
|
+
with open(config_path, "r", encoding="utf-8") as handle:
|
|
56
|
+
raw = yaml.safe_load(handle) or {}
|
|
57
|
+
|
|
58
|
+
project_root = project_root_override or raw.get("project_root") or "."
|
|
59
|
+
project_root = os.path.abspath(project_root)
|
|
60
|
+
|
|
61
|
+
languages = _as_tuple(raw.get("languages") or [])
|
|
62
|
+
languages = tuple(sorted({lang.lower() for lang in languages}))
|
|
63
|
+
|
|
64
|
+
paths_raw = raw.get("paths") or {}
|
|
65
|
+
paths = PathsConfig(
|
|
66
|
+
code=_as_tuple(paths_raw.get("code") or ["."]),
|
|
67
|
+
schema=_as_tuple(paths_raw.get("schema") or ["db"]),
|
|
68
|
+
requirements=_as_tuple(paths_raw.get("requirements") or ["requirements"]),
|
|
69
|
+
policies=_as_tuple(paths_raw.get("policies") or ["policies"]),
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
ignore = _as_tuple(raw.get("ignore") or [])
|
|
73
|
+
|
|
74
|
+
aliases = {str(k): str(v) for k, v in (raw.get("aliases") or {}).items()}
|
|
75
|
+
|
|
76
|
+
neo4j_raw = raw.get("neo4j") or {}
|
|
77
|
+
env_uri = os.getenv("NEO4J_URI")
|
|
78
|
+
env_user = os.getenv("NEO4J_USER")
|
|
79
|
+
env_pass = os.getenv("NEO4J_PASSWORD")
|
|
80
|
+
neo4j = Neo4jConfig(
|
|
81
|
+
uri=str(env_uri or neo4j_raw.get("uri") or "bolt://localhost:7687"),
|
|
82
|
+
user=str(env_user or neo4j_raw.get("user") or "neo4j"),
|
|
83
|
+
password=str(env_pass or neo4j_raw.get("password") or "password"),
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
return UceConfig(
|
|
87
|
+
project_root=project_root,
|
|
88
|
+
languages=languages,
|
|
89
|
+
paths=paths,
|
|
90
|
+
ignore=ignore,
|
|
91
|
+
aliases=aliases,
|
|
92
|
+
neo4j=neo4j,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def resolve_paths(config: UceConfig) -> dict[str, tuple[str, ...]]:
|
|
97
|
+
root = config.project_root
|
|
98
|
+
return {
|
|
99
|
+
"code": tuple(_normalize_path(root, p) for p in config.paths.code),
|
|
100
|
+
"schema": tuple(_normalize_path(root, p) for p in config.paths.schema),
|
|
101
|
+
"requirements": tuple(_normalize_path(root, p) for p in config.paths.requirements),
|
|
102
|
+
"policies": tuple(_normalize_path(root, p) for p in config.paths.policies),
|
|
103
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from neo4j import GraphDatabase
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class GraphDB:
|
|
7
|
+
def __init__(self, uri: str, user: str, password: str):
|
|
8
|
+
self.driver = GraphDatabase.driver(uri, auth=(user, password))
|
|
9
|
+
|
|
10
|
+
def close(self) -> None:
|
|
11
|
+
self.driver.close()
|
|
12
|
+
|
|
13
|
+
def run(self, query: str, **params):
|
|
14
|
+
with self.driver.session() as session:
|
|
15
|
+
return list(session.run(query, **params))
|
|
16
|
+
|
|
17
|
+
def ensure_file(self, path: str) -> None:
|
|
18
|
+
self.run(
|
|
19
|
+
"MERGE (f:File {path: $path}) SET f.last_seen = timestamp()",
|
|
20
|
+
path=path,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
def clear_file_relationships(self, path: str) -> None:
|
|
24
|
+
self.run(
|
|
25
|
+
"MATCH (f:File {path: $path})-[r]->() DELETE r",
|
|
26
|
+
path=path,
|
|
27
|
+
)
|
|
28
|
+
self.run(
|
|
29
|
+
"MATCH (fn:Function {file_path: $path})-[r]->() DELETE r",
|
|
30
|
+
path=path,
|
|
31
|
+
)
|
|
32
|
+
self.run(
|
|
33
|
+
"MATCH (c:Class {file_path: $path})-[r]->() DELETE r",
|
|
34
|
+
path=path,
|
|
35
|
+
)
|
|
36
|
+
self.run(
|
|
37
|
+
"MATCH (m:Method {file_path: $path})-[r]->() DELETE r",
|
|
38
|
+
path=path,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
def delete_file(self, path: str) -> None:
|
|
42
|
+
self.run("MATCH (f:File {path: $path}) DETACH DELETE f", path=path)
|
|
43
|
+
self.run("MATCH (fn:Function {file_path: $path}) DETACH DELETE fn", path=path)
|
|
44
|
+
self.run("MATCH (c:Class {file_path: $path}) DETACH DELETE c", path=path)
|
|
45
|
+
self.run("MATCH (m:Method {file_path: $path}) DETACH DELETE m", path=path)
|
|
46
|
+
|
|
47
|
+
def cleanup_orphan_apis(self) -> None:
|
|
48
|
+
self.run(
|
|
49
|
+
"MATCH (a:API) WHERE NOT ( ()-[:EXPOSED_AS]->(a) ) DETACH DELETE a"
|
|
50
|
+
)
|
|
51
|
+
self.run(
|
|
52
|
+
"MATCH (s:Service) WHERE NOT ( ()-[:BELONGS_TO]->(s) ) DETACH DELETE s"
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
def cleanup_missing_files(self, known_paths: list[str]) -> None:
|
|
56
|
+
if not known_paths:
|
|
57
|
+
return
|
|
58
|
+
self.run(
|
|
59
|
+
"MATCH (f:File) WHERE NOT f.path IN $paths DETACH DELETE f",
|
|
60
|
+
paths=known_paths,
|
|
61
|
+
)
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
@dataclass(frozen=True)
|
|
5
|
+
class RiskAssessment:
|
|
6
|
+
risk_score: int
|
|
7
|
+
severity: str
|
|
8
|
+
rationale: str
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def score_from_counts(
|
|
12
|
+
affected_files: int,
|
|
13
|
+
affected_functions: int,
|
|
14
|
+
affected_apis: int,
|
|
15
|
+
violated_requirements: int,
|
|
16
|
+
enforced_policies: int,
|
|
17
|
+
) -> int:
|
|
18
|
+
return (
|
|
19
|
+
2 * affected_files
|
|
20
|
+
+ affected_functions
|
|
21
|
+
+ 3 * affected_apis
|
|
22
|
+
+ 4 * violated_requirements
|
|
23
|
+
+ 3 * enforced_policies
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def assess_risk(
|
|
28
|
+
affected_files: int,
|
|
29
|
+
affected_functions: int,
|
|
30
|
+
affected_apis: int,
|
|
31
|
+
violated_requirements: int,
|
|
32
|
+
enforced_policies: int,
|
|
33
|
+
) -> RiskAssessment:
|
|
34
|
+
score = score_from_counts(
|
|
35
|
+
affected_files,
|
|
36
|
+
affected_functions,
|
|
37
|
+
affected_apis,
|
|
38
|
+
violated_requirements,
|
|
39
|
+
enforced_policies,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
if score >= 20:
|
|
43
|
+
severity = "high"
|
|
44
|
+
elif score >= 8:
|
|
45
|
+
severity = "moderate"
|
|
46
|
+
else:
|
|
47
|
+
severity = "low"
|
|
48
|
+
|
|
49
|
+
rationale = (
|
|
50
|
+
f"files={affected_files}, functions={affected_functions}, apis={affected_apis}, "
|
|
51
|
+
f"violated_requirements={violated_requirements}, enforced_policies={enforced_policies}"
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
return RiskAssessment(risk_score=score, severity=severity, rationale=rationale)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Iterable
|
|
6
|
+
from importlib import metadata
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
from tree_sitter_languages import get_parser
|
|
10
|
+
except ImportError as exc: # pragma: no cover - explicit runtime guidance
|
|
11
|
+
raise ImportError(
|
|
12
|
+
"tree_sitter_languages is required. Install with `pip install tree_sitter_languages`."
|
|
13
|
+
) from exc
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
LANGUAGE_BY_EXTENSION = {
|
|
17
|
+
".py": "python",
|
|
18
|
+
".ts": "typescript",
|
|
19
|
+
".tsx": "typescript",
|
|
20
|
+
".js": "javascript",
|
|
21
|
+
".jsx": "javascript",
|
|
22
|
+
".go": "go",
|
|
23
|
+
".java": "java",
|
|
24
|
+
".c": "c",
|
|
25
|
+
".h": "c",
|
|
26
|
+
".cpp": "cpp",
|
|
27
|
+
".hpp": "cpp",
|
|
28
|
+
".cc": "cpp",
|
|
29
|
+
".cxx": "cpp",
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
FUNCTION_NODES = {
|
|
33
|
+
"function_definition",
|
|
34
|
+
"function_declaration",
|
|
35
|
+
"function_item",
|
|
36
|
+
"function",
|
|
37
|
+
"function_expression",
|
|
38
|
+
"function_signature",
|
|
39
|
+
"arrow_function",
|
|
40
|
+
"lambda_expression",
|
|
41
|
+
"function_literal",
|
|
42
|
+
"method_declaration",
|
|
43
|
+
"method_definition",
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
CLASS_NODES = {
|
|
47
|
+
"class_definition",
|
|
48
|
+
"class_declaration",
|
|
49
|
+
"class_specifier",
|
|
50
|
+
"struct_specifier",
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
METHOD_NODES = {
|
|
54
|
+
"method_definition",
|
|
55
|
+
"method_declaration",
|
|
56
|
+
"constructor_declaration",
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
CALL_NODES = {
|
|
60
|
+
"call_expression",
|
|
61
|
+
"call",
|
|
62
|
+
"function_call",
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
IMPORT_NODES = {
|
|
66
|
+
"import_statement",
|
|
67
|
+
"import_from_statement",
|
|
68
|
+
"import_declaration",
|
|
69
|
+
"import_clause",
|
|
70
|
+
"import_spec",
|
|
71
|
+
"import_specifier",
|
|
72
|
+
"require_call",
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
STRING_NODES = {"string", "string_literal", "interpreted_string_literal", "raw_string_literal"}
|
|
76
|
+
IDENTIFIER_NODES = {
|
|
77
|
+
"identifier",
|
|
78
|
+
"property_identifier",
|
|
79
|
+
"type_identifier",
|
|
80
|
+
"field_identifier",
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@dataclass(frozen=True)
|
|
85
|
+
class ParsedCode:
|
|
86
|
+
language: str
|
|
87
|
+
imports: tuple[str, ...]
|
|
88
|
+
functions: tuple[str, ...]
|
|
89
|
+
classes: tuple[str, ...]
|
|
90
|
+
methods: tuple[tuple[str, str | None], ...]
|
|
91
|
+
calls: tuple[str, ...]
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
_PARSER_CACHE: dict[str, object] = {}
|
|
95
|
+
_PARSER_ERROR: Exception | None = None
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def detect_language(path: str) -> str | None:
|
|
99
|
+
_, ext = os.path.splitext(path)
|
|
100
|
+
return LANGUAGE_BY_EXTENSION.get(ext.lower())
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _node_text(node, source: bytes) -> str:
|
|
104
|
+
return source[node.start_byte : node.end_byte].decode("utf-8", errors="ignore")
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _first_identifier(node, source: bytes) -> str | None:
|
|
108
|
+
name_node = node.child_by_field_name("name")
|
|
109
|
+
if name_node is not None:
|
|
110
|
+
return _node_text(name_node, source)
|
|
111
|
+
|
|
112
|
+
for child in node.children:
|
|
113
|
+
if child.type in IDENTIFIER_NODES:
|
|
114
|
+
return _node_text(child, source)
|
|
115
|
+
return None
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _last_identifier(node, source: bytes) -> str | None:
|
|
119
|
+
identifiers = []
|
|
120
|
+
for child in node.children:
|
|
121
|
+
if child.type in IDENTIFIER_NODES:
|
|
122
|
+
identifiers.append(_node_text(child, source))
|
|
123
|
+
if identifiers:
|
|
124
|
+
return identifiers[-1]
|
|
125
|
+
return None
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _extract_import(node, source: bytes) -> str | None:
|
|
129
|
+
for child in node.children:
|
|
130
|
+
if child.type in STRING_NODES:
|
|
131
|
+
value = _node_text(child, source)
|
|
132
|
+
return value.strip("\"'` ")
|
|
133
|
+
|
|
134
|
+
for child in node.children:
|
|
135
|
+
if child.type in IDENTIFIER_NODES:
|
|
136
|
+
return _node_text(child, source)
|
|
137
|
+
|
|
138
|
+
text = _node_text(node, source).strip()
|
|
139
|
+
return text or None
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _extract_call_name(node, source: bytes) -> str | None:
|
|
143
|
+
target = node.child_by_field_name("function") or node.child_by_field_name("callee")
|
|
144
|
+
if target is None:
|
|
145
|
+
target = node
|
|
146
|
+
|
|
147
|
+
name = _last_identifier(target, source)
|
|
148
|
+
if name:
|
|
149
|
+
return name
|
|
150
|
+
|
|
151
|
+
for child in target.children:
|
|
152
|
+
if child.type in IDENTIFIER_NODES:
|
|
153
|
+
return _node_text(child, source)
|
|
154
|
+
|
|
155
|
+
return None
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _walk(node) -> Iterable:
|
|
159
|
+
yield node
|
|
160
|
+
for child in node.children:
|
|
161
|
+
yield from _walk(child)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def parse_source(source_bytes: bytes, language: str) -> ParsedCode:
|
|
165
|
+
parser = _get_parser(language)
|
|
166
|
+
tree = parser.parse(source_bytes)
|
|
167
|
+
root = tree.root_node
|
|
168
|
+
|
|
169
|
+
imports: set[str] = set()
|
|
170
|
+
functions: set[str] = set()
|
|
171
|
+
classes: set[str] = set()
|
|
172
|
+
methods: set[tuple[str, str | None]] = set()
|
|
173
|
+
calls: set[str] = set()
|
|
174
|
+
|
|
175
|
+
class_stack: list[str] = []
|
|
176
|
+
|
|
177
|
+
def visit(node):
|
|
178
|
+
nonlocal class_stack
|
|
179
|
+
if node.type in CLASS_NODES:
|
|
180
|
+
class_name = _first_identifier(node, source_bytes)
|
|
181
|
+
if class_name:
|
|
182
|
+
classes.add(class_name)
|
|
183
|
+
class_stack.append(class_name)
|
|
184
|
+
for child in node.children:
|
|
185
|
+
visit(child)
|
|
186
|
+
if class_name:
|
|
187
|
+
class_stack.pop()
|
|
188
|
+
return
|
|
189
|
+
|
|
190
|
+
if node.type in METHOD_NODES:
|
|
191
|
+
method_name = _first_identifier(node, source_bytes)
|
|
192
|
+
if method_name:
|
|
193
|
+
methods.add((method_name, class_stack[-1] if class_stack else None))
|
|
194
|
+
for child in node.children:
|
|
195
|
+
visit(child)
|
|
196
|
+
return
|
|
197
|
+
|
|
198
|
+
if node.type in FUNCTION_NODES:
|
|
199
|
+
fn_name = _first_identifier(node, source_bytes)
|
|
200
|
+
if fn_name:
|
|
201
|
+
functions.add(fn_name)
|
|
202
|
+
|
|
203
|
+
if node.type in CALL_NODES:
|
|
204
|
+
call_name = _extract_call_name(node, source_bytes)
|
|
205
|
+
if call_name:
|
|
206
|
+
calls.add(call_name)
|
|
207
|
+
|
|
208
|
+
if node.type in IMPORT_NODES:
|
|
209
|
+
imported = _extract_import(node, source_bytes)
|
|
210
|
+
if imported:
|
|
211
|
+
imports.add(imported)
|
|
212
|
+
|
|
213
|
+
for child in node.children:
|
|
214
|
+
visit(child)
|
|
215
|
+
|
|
216
|
+
visit(root)
|
|
217
|
+
|
|
218
|
+
return ParsedCode(
|
|
219
|
+
language=language,
|
|
220
|
+
imports=tuple(sorted(imports)),
|
|
221
|
+
functions=tuple(sorted(functions)),
|
|
222
|
+
classes=tuple(sorted(classes)),
|
|
223
|
+
methods=tuple(sorted(methods)),
|
|
224
|
+
calls=tuple(sorted(calls)),
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def parse_file(path: str) -> ParsedCode | None:
|
|
229
|
+
language = detect_language(path)
|
|
230
|
+
if not language:
|
|
231
|
+
return None
|
|
232
|
+
with open(path, "rb") as handle:
|
|
233
|
+
source_bytes = handle.read()
|
|
234
|
+
return parse_source(source_bytes, language)
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def _get_parser(language: str):
|
|
238
|
+
global _PARSER_ERROR
|
|
239
|
+
if _PARSER_ERROR is not None:
|
|
240
|
+
raise RuntimeError(_parser_error_message(_PARSER_ERROR)) from _PARSER_ERROR
|
|
241
|
+
|
|
242
|
+
if language in _PARSER_CACHE:
|
|
243
|
+
return _PARSER_CACHE[language]
|
|
244
|
+
|
|
245
|
+
try:
|
|
246
|
+
parser = get_parser(language)
|
|
247
|
+
except TypeError as exc:
|
|
248
|
+
_PARSER_ERROR = exc
|
|
249
|
+
raise RuntimeError(_parser_error_message(exc)) from exc
|
|
250
|
+
except Exception as exc:
|
|
251
|
+
_PARSER_ERROR = exc
|
|
252
|
+
raise RuntimeError(
|
|
253
|
+
f"Failed to load tree-sitter parser for '{language}': {exc}"
|
|
254
|
+
) from exc
|
|
255
|
+
|
|
256
|
+
_PARSER_CACHE[language] = parser
|
|
257
|
+
return parser
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def validate_languages(languages: Iterable[str]) -> None:
|
|
261
|
+
for lang in sorted({lang for lang in languages if lang}):
|
|
262
|
+
_get_parser(lang)
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def _parser_error_message(exc: Exception) -> str:
|
|
266
|
+
try:
|
|
267
|
+
ts_version = metadata.version("tree_sitter")
|
|
268
|
+
except Exception:
|
|
269
|
+
ts_version = "unknown"
|
|
270
|
+
try:
|
|
271
|
+
tsl_version = metadata.version("tree_sitter_languages")
|
|
272
|
+
except Exception:
|
|
273
|
+
tsl_version = "unknown"
|
|
274
|
+
|
|
275
|
+
return (
|
|
276
|
+
"Tree-sitter parser load failed due to an incompatibility between "
|
|
277
|
+
f"tree_sitter ({ts_version}) and tree_sitter_languages ({tsl_version}). "
|
|
278
|
+
"Install a compatible pair, for example `pip install tree_sitter==0.20.1` "
|
|
279
|
+
"or upgrade tree_sitter_languages to match your tree_sitter version."
|
|
280
|
+
)
|