stravinsky 0.4.18__py3-none-any.whl → 0.4.66__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of stravinsky might be problematic. Click here for more details.
- mcp_bridge/__init__.py +1 -1
- mcp_bridge/auth/__init__.py +16 -6
- mcp_bridge/auth/cli.py +202 -11
- mcp_bridge/auth/oauth.py +1 -2
- mcp_bridge/auth/openai_oauth.py +4 -7
- mcp_bridge/auth/token_store.py +0 -1
- mcp_bridge/cli/__init__.py +1 -1
- mcp_bridge/cli/install_hooks.py +503 -107
- mcp_bridge/cli/session_report.py +0 -3
- mcp_bridge/config/__init__.py +2 -2
- mcp_bridge/config/hook_config.py +3 -5
- mcp_bridge/config/rate_limits.py +108 -13
- mcp_bridge/hooks/HOOKS_SETTINGS.json +17 -4
- mcp_bridge/hooks/__init__.py +14 -4
- mcp_bridge/hooks/agent_reminder.py +4 -4
- mcp_bridge/hooks/auto_slash_command.py +5 -5
- mcp_bridge/hooks/budget_optimizer.py +2 -2
- mcp_bridge/hooks/claude_limits_hook.py +114 -0
- mcp_bridge/hooks/comment_checker.py +3 -4
- mcp_bridge/hooks/compaction.py +2 -2
- mcp_bridge/hooks/context.py +2 -1
- mcp_bridge/hooks/context_monitor.py +2 -2
- mcp_bridge/hooks/delegation_policy.py +85 -0
- mcp_bridge/hooks/directory_context.py +3 -3
- mcp_bridge/hooks/edit_recovery.py +3 -2
- mcp_bridge/hooks/edit_recovery_policy.py +49 -0
- mcp_bridge/hooks/empty_message_sanitizer.py +2 -2
- mcp_bridge/hooks/events.py +160 -0
- mcp_bridge/hooks/git_noninteractive.py +4 -4
- mcp_bridge/hooks/keyword_detector.py +8 -10
- mcp_bridge/hooks/manager.py +35 -22
- mcp_bridge/hooks/notification_hook.py +13 -6
- mcp_bridge/hooks/parallel_enforcement_policy.py +67 -0
- mcp_bridge/hooks/parallel_enforcer.py +5 -5
- mcp_bridge/hooks/parallel_execution.py +22 -10
- mcp_bridge/hooks/post_tool/parallel_validation.py +103 -0
- mcp_bridge/hooks/pre_compact.py +8 -9
- mcp_bridge/hooks/pre_tool/agent_spawn_validator.py +115 -0
- mcp_bridge/hooks/preemptive_compaction.py +2 -3
- mcp_bridge/hooks/routing_notifications.py +80 -0
- mcp_bridge/hooks/rules_injector.py +11 -19
- mcp_bridge/hooks/session_idle.py +4 -4
- mcp_bridge/hooks/session_notifier.py +4 -4
- mcp_bridge/hooks/session_recovery.py +4 -5
- mcp_bridge/hooks/stravinsky_mode.py +1 -1
- mcp_bridge/hooks/subagent_stop.py +1 -3
- mcp_bridge/hooks/task_validator.py +2 -2
- mcp_bridge/hooks/tmux_manager.py +7 -8
- mcp_bridge/hooks/todo_delegation.py +4 -1
- mcp_bridge/hooks/todo_enforcer.py +180 -10
- mcp_bridge/hooks/truncation_policy.py +37 -0
- mcp_bridge/hooks/truncator.py +1 -2
- mcp_bridge/metrics/cost_tracker.py +115 -0
- mcp_bridge/native_search.py +93 -0
- mcp_bridge/native_watcher.py +118 -0
- mcp_bridge/notifications.py +3 -4
- mcp_bridge/orchestrator/enums.py +11 -0
- mcp_bridge/orchestrator/router.py +165 -0
- mcp_bridge/orchestrator/state.py +32 -0
- mcp_bridge/orchestrator/visualization.py +14 -0
- mcp_bridge/orchestrator/wisdom.py +34 -0
- mcp_bridge/prompts/__init__.py +1 -8
- mcp_bridge/prompts/dewey.py +1 -1
- mcp_bridge/prompts/planner.py +2 -4
- mcp_bridge/prompts/stravinsky.py +53 -31
- mcp_bridge/proxy/__init__.py +0 -0
- mcp_bridge/proxy/client.py +70 -0
- mcp_bridge/proxy/model_server.py +157 -0
- mcp_bridge/routing/__init__.py +43 -0
- mcp_bridge/routing/config.py +250 -0
- mcp_bridge/routing/model_tiers.py +135 -0
- mcp_bridge/routing/provider_state.py +261 -0
- mcp_bridge/routing/task_classifier.py +190 -0
- mcp_bridge/server.py +363 -34
- mcp_bridge/server_tools.py +298 -6
- mcp_bridge/tools/__init__.py +19 -8
- mcp_bridge/tools/agent_manager.py +549 -799
- mcp_bridge/tools/background_tasks.py +13 -17
- mcp_bridge/tools/code_search.py +54 -51
- mcp_bridge/tools/continuous_loop.py +0 -1
- mcp_bridge/tools/dashboard.py +19 -0
- mcp_bridge/tools/find_code.py +296 -0
- mcp_bridge/tools/init.py +1 -0
- mcp_bridge/tools/list_directory.py +42 -0
- mcp_bridge/tools/lsp/__init__.py +8 -8
- mcp_bridge/tools/lsp/manager.py +51 -28
- mcp_bridge/tools/lsp/tools.py +98 -65
- mcp_bridge/tools/model_invoke.py +1047 -152
- mcp_bridge/tools/mux_client.py +75 -0
- mcp_bridge/tools/project_context.py +1 -2
- mcp_bridge/tools/query_classifier.py +132 -49
- mcp_bridge/tools/read_file.py +84 -0
- mcp_bridge/tools/replace.py +45 -0
- mcp_bridge/tools/run_shell_command.py +38 -0
- mcp_bridge/tools/search_enhancements.py +347 -0
- mcp_bridge/tools/semantic_search.py +677 -92
- mcp_bridge/tools/session_manager.py +0 -2
- mcp_bridge/tools/skill_loader.py +0 -1
- mcp_bridge/tools/task_runner.py +5 -7
- mcp_bridge/tools/templates.py +3 -3
- mcp_bridge/tools/tool_search.py +331 -0
- mcp_bridge/tools/write_file.py +29 -0
- mcp_bridge/update_manager.py +33 -37
- mcp_bridge/update_manager_pypi.py +6 -8
- mcp_bridge/utils/cache.py +82 -0
- mcp_bridge/utils/process.py +71 -0
- mcp_bridge/utils/session_state.py +51 -0
- mcp_bridge/utils/truncation.py +76 -0
- {stravinsky-0.4.18.dist-info → stravinsky-0.4.66.dist-info}/METADATA +84 -35
- stravinsky-0.4.66.dist-info/RECORD +198 -0
- {stravinsky-0.4.18.dist-info → stravinsky-0.4.66.dist-info}/entry_points.txt +1 -0
- stravinsky_claude_assets/HOOKS_INTEGRATION.md +316 -0
- stravinsky_claude_assets/agents/HOOKS.md +437 -0
- stravinsky_claude_assets/agents/code-reviewer.md +210 -0
- stravinsky_claude_assets/agents/comment_checker.md +580 -0
- stravinsky_claude_assets/agents/debugger.md +254 -0
- stravinsky_claude_assets/agents/delphi.md +495 -0
- stravinsky_claude_assets/agents/dewey.md +248 -0
- stravinsky_claude_assets/agents/explore.md +1198 -0
- stravinsky_claude_assets/agents/frontend.md +472 -0
- stravinsky_claude_assets/agents/implementation-lead.md +164 -0
- stravinsky_claude_assets/agents/momus.md +464 -0
- stravinsky_claude_assets/agents/research-lead.md +141 -0
- stravinsky_claude_assets/agents/stravinsky.md +730 -0
- stravinsky_claude_assets/commands/delphi.md +9 -0
- stravinsky_claude_assets/commands/dewey.md +54 -0
- stravinsky_claude_assets/commands/git-master.md +112 -0
- stravinsky_claude_assets/commands/index.md +49 -0
- stravinsky_claude_assets/commands/publish.md +86 -0
- stravinsky_claude_assets/commands/review.md +73 -0
- stravinsky_claude_assets/commands/str/agent_cancel.md +70 -0
- stravinsky_claude_assets/commands/str/agent_list.md +56 -0
- stravinsky_claude_assets/commands/str/agent_output.md +92 -0
- stravinsky_claude_assets/commands/str/agent_progress.md +74 -0
- stravinsky_claude_assets/commands/str/agent_retry.md +94 -0
- stravinsky_claude_assets/commands/str/cancel.md +51 -0
- stravinsky_claude_assets/commands/str/clean.md +97 -0
- stravinsky_claude_assets/commands/str/continue.md +38 -0
- stravinsky_claude_assets/commands/str/index.md +199 -0
- stravinsky_claude_assets/commands/str/list_watchers.md +96 -0
- stravinsky_claude_assets/commands/str/search.md +205 -0
- stravinsky_claude_assets/commands/str/start_filewatch.md +136 -0
- stravinsky_claude_assets/commands/str/stats.md +71 -0
- stravinsky_claude_assets/commands/str/stop_filewatch.md +89 -0
- stravinsky_claude_assets/commands/str/unwatch.md +42 -0
- stravinsky_claude_assets/commands/str/watch.md +45 -0
- stravinsky_claude_assets/commands/strav.md +53 -0
- stravinsky_claude_assets/commands/stravinsky.md +292 -0
- stravinsky_claude_assets/commands/verify.md +60 -0
- stravinsky_claude_assets/commands/version.md +5 -0
- stravinsky_claude_assets/hooks/README.md +248 -0
- stravinsky_claude_assets/hooks/comment_checker.py +193 -0
- stravinsky_claude_assets/hooks/context.py +38 -0
- stravinsky_claude_assets/hooks/context_monitor.py +153 -0
- stravinsky_claude_assets/hooks/dependency_tracker.py +73 -0
- stravinsky_claude_assets/hooks/edit_recovery.py +46 -0
- stravinsky_claude_assets/hooks/execution_state_tracker.py +68 -0
- stravinsky_claude_assets/hooks/notification_hook.py +103 -0
- stravinsky_claude_assets/hooks/notification_hook_v2.py +96 -0
- stravinsky_claude_assets/hooks/parallel_execution.py +241 -0
- stravinsky_claude_assets/hooks/parallel_reinforcement.py +106 -0
- stravinsky_claude_assets/hooks/parallel_reinforcement_v2.py +112 -0
- stravinsky_claude_assets/hooks/pre_compact.py +123 -0
- stravinsky_claude_assets/hooks/ralph_loop.py +173 -0
- stravinsky_claude_assets/hooks/session_recovery.py +263 -0
- stravinsky_claude_assets/hooks/stop_hook.py +89 -0
- stravinsky_claude_assets/hooks/stravinsky_metrics.py +164 -0
- stravinsky_claude_assets/hooks/stravinsky_mode.py +146 -0
- stravinsky_claude_assets/hooks/subagent_stop.py +98 -0
- stravinsky_claude_assets/hooks/todo_continuation.py +111 -0
- stravinsky_claude_assets/hooks/todo_delegation.py +96 -0
- stravinsky_claude_assets/hooks/tool_messaging.py +281 -0
- stravinsky_claude_assets/hooks/truncator.py +23 -0
- stravinsky_claude_assets/rules/deployment_safety.md +51 -0
- stravinsky_claude_assets/rules/integration_wiring.md +89 -0
- stravinsky_claude_assets/rules/pypi_deployment.md +220 -0
- stravinsky_claude_assets/rules/stravinsky_orchestrator.md +32 -0
- stravinsky_claude_assets/settings.json +152 -0
- stravinsky_claude_assets/skills/chrome-devtools/SKILL.md +81 -0
- stravinsky_claude_assets/skills/sqlite/SKILL.md +77 -0
- stravinsky_claude_assets/skills/supabase/SKILL.md +74 -0
- stravinsky_claude_assets/task_dependencies.json +34 -0
- stravinsky-0.4.18.dist-info/RECORD +0 -88
- {stravinsky-0.4.18.dist-info → stravinsky-0.4.66.dist-info}/WHEEL +0 -0
|
@@ -18,20 +18,27 @@ Architecture:
|
|
|
18
18
|
- Chunking strategy: function/class level with context
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
|
+
import asyncio
|
|
21
22
|
import atexit
|
|
22
23
|
import hashlib
|
|
23
24
|
import logging
|
|
25
|
+
import signal
|
|
24
26
|
import sys
|
|
25
27
|
import threading
|
|
26
28
|
from abc import ABC, abstractmethod
|
|
27
29
|
from pathlib import Path
|
|
28
|
-
from typing import Literal
|
|
30
|
+
from typing import TYPE_CHECKING, Literal
|
|
31
|
+
|
|
32
|
+
if TYPE_CHECKING:
|
|
33
|
+
import pathspec
|
|
29
34
|
|
|
30
35
|
import httpx
|
|
31
36
|
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
|
|
32
37
|
|
|
33
38
|
from mcp_bridge.auth.token_store import TokenStore
|
|
34
39
|
from mcp_bridge.tools.query_classifier import QueryCategory, classify_query
|
|
40
|
+
from mcp_bridge.native_search import native_chunk_code
|
|
41
|
+
from mcp_bridge.native_watcher import NativeFileWatcher
|
|
35
42
|
|
|
36
43
|
logger = logging.getLogger(__name__)
|
|
37
44
|
|
|
@@ -295,7 +302,10 @@ class GitIgnoreManager:
|
|
|
295
302
|
rel_path_str = str(rel_path).replace("\\", "/")
|
|
296
303
|
|
|
297
304
|
# Check against patterns
|
|
298
|
-
|
|
305
|
+
spec = self.spec
|
|
306
|
+
if spec is None:
|
|
307
|
+
return False # No patterns loaded, nothing is ignored
|
|
308
|
+
return spec.match_file(rel_path_str)
|
|
299
309
|
except ValueError:
|
|
300
310
|
# Path is outside project - not ignored by gitignore (but may be ignored for other reasons)
|
|
301
311
|
return False
|
|
@@ -925,40 +935,120 @@ class CodebaseVectorStore:
|
|
|
925
935
|
".toml",
|
|
926
936
|
}
|
|
927
937
|
|
|
928
|
-
# Directories to skip
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
".git",
|
|
938
|
+
# Directories to skip (non-code related)
|
|
939
|
+
SKIP_DUW = {
|
|
940
|
+
# Python
|
|
932
941
|
"__pycache__",
|
|
933
942
|
".venv",
|
|
934
943
|
"venv",
|
|
935
944
|
"env",
|
|
936
|
-
"
|
|
937
|
-
"
|
|
938
|
-
".
|
|
939
|
-
".nuxt",
|
|
940
|
-
"target",
|
|
945
|
+
".env",
|
|
946
|
+
"virtualenv",
|
|
947
|
+
".virtualenv",
|
|
941
948
|
".tox",
|
|
949
|
+
".nox",
|
|
942
950
|
".pytest_cache",
|
|
943
951
|
".mypy_cache",
|
|
944
952
|
".ruff_cache",
|
|
953
|
+
".pytype",
|
|
954
|
+
".pyre",
|
|
955
|
+
"*.egg-info",
|
|
956
|
+
".eggs",
|
|
957
|
+
"pip-wheel-metadata",
|
|
958
|
+
# Node.js
|
|
959
|
+
"node_modules",
|
|
960
|
+
".npm",
|
|
961
|
+
".yarn",
|
|
962
|
+
".pnpm-store",
|
|
963
|
+
"bower_components",
|
|
964
|
+
# Build outputs
|
|
965
|
+
"dist",
|
|
966
|
+
"build",
|
|
967
|
+
"out",
|
|
968
|
+
"_build",
|
|
969
|
+
".next",
|
|
970
|
+
".nuxt",
|
|
971
|
+
".output",
|
|
972
|
+
".cache",
|
|
973
|
+
".parcel-cache",
|
|
974
|
+
".turbo",
|
|
975
|
+
# Version control
|
|
976
|
+
".git",
|
|
977
|
+
".svn",
|
|
978
|
+
".hg",
|
|
979
|
+
# IDE/Editor
|
|
980
|
+
".idea",
|
|
981
|
+
".vscode",
|
|
982
|
+
".vs",
|
|
983
|
+
# Test/coverage
|
|
945
984
|
"coverage",
|
|
985
|
+
"htmlcov",
|
|
986
|
+
".coverage",
|
|
987
|
+
".nyc_output",
|
|
988
|
+
# Rust/Go/Java
|
|
989
|
+
"target",
|
|
990
|
+
"vendor",
|
|
991
|
+
"Godeps",
|
|
992
|
+
# Misc
|
|
946
993
|
".stravinsky",
|
|
947
994
|
"scratches",
|
|
948
995
|
"consoles",
|
|
949
|
-
"
|
|
996
|
+
"logs",
|
|
997
|
+
"tmp",
|
|
998
|
+
"temp",
|
|
950
999
|
}
|
|
951
1000
|
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
1001
|
+
@staticmethod
|
|
1002
|
+
def _normalize_project_path(path: str) -> Path:
|
|
1003
|
+
"""
|
|
1004
|
+
Normalize project path to git root if available.
|
|
1005
|
+
|
|
1006
|
+
This ensures one index per repo regardless of invocation directory.
|
|
1007
|
+
If not a git repo, returns resolved absolute path.
|
|
1008
|
+
"""
|
|
1009
|
+
import subprocess
|
|
1010
|
+
|
|
1011
|
+
resolved = Path(path).resolve()
|
|
1012
|
+
|
|
1013
|
+
# Try to find git root
|
|
1014
|
+
try:
|
|
1015
|
+
result = subprocess.run(
|
|
1016
|
+
["git", "-C", str(resolved), "rev-parse", "--show-toplevel"],
|
|
1017
|
+
capture_output=True,
|
|
1018
|
+
text=True,
|
|
1019
|
+
timeout=2,
|
|
1020
|
+
check=False,
|
|
1021
|
+
)
|
|
1022
|
+
if result.returncode == 0:
|
|
1023
|
+
git_root = Path(result.stdout.strip())
|
|
1024
|
+
logger.debug(f"Normalized {resolved} → {git_root} (git root)")
|
|
1025
|
+
return git_root
|
|
1026
|
+
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
1027
|
+
pass
|
|
1028
|
+
|
|
1029
|
+
# Not a git repo or git not available, use resolved path
|
|
1030
|
+
return resolved
|
|
1031
|
+
|
|
1032
|
+
def __init__(
|
|
1033
|
+
self,
|
|
1034
|
+
project_path: str,
|
|
1035
|
+
provider: EmbeddingProvider = "ollama",
|
|
1036
|
+
base_path: Path | None = None,
|
|
1037
|
+
):
|
|
1038
|
+
self.project_path = self._normalize_project_path(project_path)
|
|
1039
|
+
self.repo_name = self.project_path.name
|
|
955
1040
|
|
|
956
1041
|
# Initialize embedding provider
|
|
957
1042
|
self.provider_name = provider
|
|
958
1043
|
self.provider = get_embedding_provider(provider)
|
|
959
1044
|
|
|
960
|
-
# Store in user's home directory
|
|
961
|
-
|
|
1045
|
+
# Store in provided base_path or user's home directory
|
|
1046
|
+
# Separate by provider to avoid dimension mismatch
|
|
1047
|
+
if base_path:
|
|
1048
|
+
self.db_path = base_path / f"{self.repo_name}_{provider}"
|
|
1049
|
+
else:
|
|
1050
|
+
self.db_path = Path.home() / ".stravinsky" / "vectordb" / f"{self.repo_name}_{provider}"
|
|
1051
|
+
|
|
962
1052
|
self.db_path.mkdir(parents=True, exist_ok=True)
|
|
963
1053
|
|
|
964
1054
|
# File lock for single-process access to ChromaDB (prevents corruption)
|
|
@@ -1073,7 +1163,7 @@ class CodebaseVectorStore:
|
|
|
1073
1163
|
sorted_results = sorted(results, key=lambda x: x[0])
|
|
1074
1164
|
return [emb for _, emb in sorted_results]
|
|
1075
1165
|
|
|
1076
|
-
def _chunk_file(self, file_path: Path) -> list[dict]:
|
|
1166
|
+
async def _chunk_file(self, file_path: Path) -> list[dict]:
|
|
1077
1167
|
"""Split a file into chunks with metadata.
|
|
1078
1168
|
|
|
1079
1169
|
Uses AST-aware chunking for Python files to respect function/class
|
|
@@ -1091,7 +1181,42 @@ class CodebaseVectorStore:
|
|
|
1091
1181
|
rel_path = str(file_path.resolve().relative_to(self.project_path.resolve()))
|
|
1092
1182
|
language = file_path.suffix.lstrip(".")
|
|
1093
1183
|
|
|
1094
|
-
#
|
|
1184
|
+
# Try native AST-aware chunking first
|
|
1185
|
+
native_results = await native_chunk_code(content, language)
|
|
1186
|
+
if native_results:
|
|
1187
|
+
chunks = []
|
|
1188
|
+
for nc in native_results:
|
|
1189
|
+
start_line = nc["start_line"]
|
|
1190
|
+
end_line = nc["end_line"]
|
|
1191
|
+
chunk_text = nc["content"]
|
|
1192
|
+
content_hash = hashlib.md5(chunk_text.encode("utf-8")).hexdigest()[:12]
|
|
1193
|
+
|
|
1194
|
+
node_type = nc.get("node_type", "unknown")
|
|
1195
|
+
name = nc.get("name")
|
|
1196
|
+
|
|
1197
|
+
if name:
|
|
1198
|
+
header = f"File: {rel_path}\n{node_type.capitalize()}: {name}\nLines: {start_line}-{end_line}"
|
|
1199
|
+
else:
|
|
1200
|
+
header = f"File: {rel_path}\nLines: {start_line}-{end_line}"
|
|
1201
|
+
|
|
1202
|
+
document = f"{header}\n\n{chunk_text}"
|
|
1203
|
+
|
|
1204
|
+
chunks.append({
|
|
1205
|
+
"id": f"{rel_path}:{start_line}-{end_line}:{content_hash}",
|
|
1206
|
+
"document": document,
|
|
1207
|
+
"metadata": {
|
|
1208
|
+
"file_path": rel_path,
|
|
1209
|
+
"start_line": start_line,
|
|
1210
|
+
"end_line": end_line,
|
|
1211
|
+
"language": language,
|
|
1212
|
+
"node_type": node_type,
|
|
1213
|
+
"name": name or "",
|
|
1214
|
+
}
|
|
1215
|
+
})
|
|
1216
|
+
if chunks:
|
|
1217
|
+
return chunks
|
|
1218
|
+
|
|
1219
|
+
# Use AST-aware chunking for Python files (fallback)
|
|
1095
1220
|
if language == "py":
|
|
1096
1221
|
chunks = self._chunk_python_ast(content, rel_path, language)
|
|
1097
1222
|
if chunks: # If AST parsing succeeded
|
|
@@ -1307,8 +1432,80 @@ class CodebaseVectorStore:
|
|
|
1307
1432
|
|
|
1308
1433
|
return chunks
|
|
1309
1434
|
|
|
1435
|
+
def _load_whitelist(self) -> set[Path] | None:
|
|
1436
|
+
"""Load whitelist from .stravinskyadd file if present.
|
|
1437
|
+
|
|
1438
|
+
File format:
|
|
1439
|
+
- One path per line (relative to project root)
|
|
1440
|
+
- Lines starting with # are comments
|
|
1441
|
+
- Empty lines are ignored
|
|
1442
|
+
- Glob patterns are supported (e.g., src/**/*.py)
|
|
1443
|
+
- Directories implicitly include all files within (src/ includes src/**/*.*)
|
|
1444
|
+
|
|
1445
|
+
Returns:
|
|
1446
|
+
Set of resolved file paths to include, or None if no whitelist file exists.
|
|
1447
|
+
"""
|
|
1448
|
+
whitelist_file = self.project_path / ".stravinskyadd"
|
|
1449
|
+
if not whitelist_file.exists():
|
|
1450
|
+
return None
|
|
1451
|
+
|
|
1452
|
+
whitelist_paths: set[Path] = set()
|
|
1453
|
+
try:
|
|
1454
|
+
content = whitelist_file.read_text(encoding="utf-8")
|
|
1455
|
+
for line in content.splitlines():
|
|
1456
|
+
line = line.strip()
|
|
1457
|
+
# Skip empty lines and comments
|
|
1458
|
+
if not line or line.startswith("#"):
|
|
1459
|
+
continue
|
|
1460
|
+
|
|
1461
|
+
# Handle glob patterns
|
|
1462
|
+
if "*" in line or "?" in line:
|
|
1463
|
+
for matched_path in self.project_path.glob(line):
|
|
1464
|
+
if (
|
|
1465
|
+
matched_path.is_file()
|
|
1466
|
+
and matched_path.suffix.lower() in self.CODE_EXTENSIONS
|
|
1467
|
+
):
|
|
1468
|
+
whitelist_paths.add(matched_path.resolve())
|
|
1469
|
+
else:
|
|
1470
|
+
target = self.project_path / line
|
|
1471
|
+
if target.exists():
|
|
1472
|
+
if target.is_file():
|
|
1473
|
+
# Direct file reference
|
|
1474
|
+
if target.suffix.lower() in self.CODE_EXTENSIONS:
|
|
1475
|
+
whitelist_paths.add(target.resolve())
|
|
1476
|
+
elif target.is_dir():
|
|
1477
|
+
# Directory: include all code files recursively
|
|
1478
|
+
for file_path in target.rglob("*"):
|
|
1479
|
+
if (
|
|
1480
|
+
file_path.is_file()
|
|
1481
|
+
and file_path.suffix.lower() in self.CODE_EXTENSIONS
|
|
1482
|
+
):
|
|
1483
|
+
# Apply SKIP_DUW even within whitelisted directories
|
|
1484
|
+
if not any(
|
|
1485
|
+
skip_dir in file_path.parts for skip_dir in self.SKIP_DUW
|
|
1486
|
+
):
|
|
1487
|
+
whitelist_paths.add(file_path.resolve())
|
|
1488
|
+
|
|
1489
|
+
logger.info(f"Loaded whitelist from .stravinskyadd: {len(whitelist_paths)} files")
|
|
1490
|
+
return whitelist_paths
|
|
1491
|
+
|
|
1492
|
+
except Exception as e:
|
|
1493
|
+
logger.warning(f"Failed to parse .stravinskyadd: {e}")
|
|
1494
|
+
return None
|
|
1495
|
+
|
|
1310
1496
|
def _get_files_to_index(self) -> list[Path]:
|
|
1311
|
-
"""Get all indexable files in the project.
|
|
1497
|
+
"""Get all indexable files in the project.
|
|
1498
|
+
|
|
1499
|
+
If a .stravinskyadd whitelist file exists, ONLY those paths are indexed.
|
|
1500
|
+
Otherwise, all code files are indexed (excluding SKIP_DUW).
|
|
1501
|
+
"""
|
|
1502
|
+
# Check for whitelist mode
|
|
1503
|
+
whitelist = self._load_whitelist()
|
|
1504
|
+
if whitelist is not None:
|
|
1505
|
+
logger.info(f"Whitelist mode: indexing {len(whitelist)} files from .stravinskyadd")
|
|
1506
|
+
return sorted(whitelist) # Return sorted for deterministic order
|
|
1507
|
+
|
|
1508
|
+
# Standard mode: crawl entire project
|
|
1312
1509
|
files = []
|
|
1313
1510
|
for file_path in self.project_path.rglob("*"):
|
|
1314
1511
|
if file_path.is_file():
|
|
@@ -1345,7 +1542,7 @@ class CodebaseVectorStore:
|
|
|
1345
1542
|
continue
|
|
1346
1543
|
|
|
1347
1544
|
# Skip excluded directories
|
|
1348
|
-
if any(skip_dir in file_path.parts for skip_dir in self.
|
|
1545
|
+
if any(skip_dir in file_path.parts for skip_dir in self.SKIP_DUW):
|
|
1349
1546
|
continue
|
|
1350
1547
|
|
|
1351
1548
|
# Only include code files
|
|
@@ -1374,6 +1571,38 @@ class CodebaseVectorStore:
|
|
|
1374
1571
|
with self._cancel_lock:
|
|
1375
1572
|
return self._cancel_indexing
|
|
1376
1573
|
|
|
1574
|
+
def _get_manifest_path(self) -> Path:
|
|
1575
|
+
"""Get the path to the incremental indexing manifest."""
|
|
1576
|
+
return self.db_path / "manifest.json"
|
|
1577
|
+
|
|
1578
|
+
def _load_manifest(self) -> dict:
|
|
1579
|
+
"""Load the indexing manifest."""
|
|
1580
|
+
manifest_path = self._get_manifest_path()
|
|
1581
|
+
if not manifest_path.exists():
|
|
1582
|
+
return {}
|
|
1583
|
+
try:
|
|
1584
|
+
import json
|
|
1585
|
+
|
|
1586
|
+
with open(manifest_path, "r", encoding="utf-8") as f:
|
|
1587
|
+
return json.load(f)
|
|
1588
|
+
except Exception as e:
|
|
1589
|
+
logger.warning(f"Failed to load manifest: {e}")
|
|
1590
|
+
return {}
|
|
1591
|
+
|
|
1592
|
+
def _save_manifest(self, manifest: dict) -> None:
|
|
1593
|
+
"""Save the indexing manifest."""
|
|
1594
|
+
manifest_path = self._get_manifest_path()
|
|
1595
|
+
try:
|
|
1596
|
+
import json
|
|
1597
|
+
|
|
1598
|
+
# Atomic write
|
|
1599
|
+
temp_path = manifest_path.with_suffix(".tmp")
|
|
1600
|
+
with open(temp_path, "w", encoding="utf-8") as f:
|
|
1601
|
+
json.dump(manifest, f, indent=2)
|
|
1602
|
+
temp_path.replace(manifest_path)
|
|
1603
|
+
except Exception as e:
|
|
1604
|
+
logger.warning(f"Failed to save manifest: {e}")
|
|
1605
|
+
|
|
1377
1606
|
async def index_codebase(self, force: bool = False) -> dict:
|
|
1378
1607
|
"""
|
|
1379
1608
|
Index the entire codebase into the vector store.
|
|
@@ -1398,7 +1627,7 @@ class CodebaseVectorStore:
|
|
|
1398
1627
|
print(f"🔍 SEMANTIC-INDEX: {self.project_path}", file=sys.stderr)
|
|
1399
1628
|
|
|
1400
1629
|
# Notify reindex start (non-blocking)
|
|
1401
|
-
notifier = None
|
|
1630
|
+
notifier = None
|
|
1402
1631
|
try:
|
|
1403
1632
|
from mcp_bridge.notifications import get_notification_manager
|
|
1404
1633
|
|
|
@@ -1410,7 +1639,6 @@ class CodebaseVectorStore:
|
|
|
1410
1639
|
try:
|
|
1411
1640
|
if not await self.check_embedding_service():
|
|
1412
1641
|
error_msg = "Embedding service not available"
|
|
1413
|
-
# Notify error
|
|
1414
1642
|
try:
|
|
1415
1643
|
if notifier:
|
|
1416
1644
|
await notifier.notify_reindex_error(error_msg)
|
|
@@ -1427,25 +1655,74 @@ class CodebaseVectorStore:
|
|
|
1427
1655
|
except Exception:
|
|
1428
1656
|
pass
|
|
1429
1657
|
|
|
1658
|
+
manifest = {}
|
|
1430
1659
|
if force:
|
|
1431
|
-
# Clear existing collection
|
|
1660
|
+
# Clear existing collection and manifest
|
|
1432
1661
|
try:
|
|
1433
1662
|
self.client.delete_collection("codebase")
|
|
1434
1663
|
self._collection = None
|
|
1435
1664
|
existing_ids = set()
|
|
1436
1665
|
except Exception:
|
|
1437
1666
|
pass
|
|
1667
|
+
else:
|
|
1668
|
+
manifest = self._load_manifest()
|
|
1438
1669
|
|
|
1439
1670
|
files = self._get_files_to_index()
|
|
1440
1671
|
all_chunks = []
|
|
1441
1672
|
current_chunk_ids = set()
|
|
1442
1673
|
|
|
1674
|
+
# Track manifest updates
|
|
1675
|
+
new_manifest = {}
|
|
1676
|
+
|
|
1677
|
+
# Stats
|
|
1678
|
+
reused_files = 0
|
|
1679
|
+
|
|
1443
1680
|
# Mark: Generate all chunks for current codebase
|
|
1444
1681
|
for file_path in files:
|
|
1445
|
-
|
|
1682
|
+
str_path = str(file_path.resolve())
|
|
1683
|
+
|
|
1684
|
+
# Get file stats
|
|
1685
|
+
try:
|
|
1686
|
+
stat = file_path.stat()
|
|
1687
|
+
mtime = stat.st_mtime
|
|
1688
|
+
size = stat.st_size
|
|
1689
|
+
except OSError:
|
|
1690
|
+
continue # File might have been deleted during iteration
|
|
1691
|
+
|
|
1692
|
+
# Check manifest
|
|
1693
|
+
manifest_entry = manifest.get(str_path)
|
|
1694
|
+
|
|
1695
|
+
# Reuse chunks if file hasn't changed AND chunks exist in DB
|
|
1696
|
+
if (
|
|
1697
|
+
not force
|
|
1698
|
+
and manifest_entry
|
|
1699
|
+
and manifest_entry.get("mtime") == mtime
|
|
1700
|
+
and manifest_entry.get("size") == size
|
|
1701
|
+
):
|
|
1702
|
+
chunk_ids = manifest_entry.get("chunk_ids", [])
|
|
1703
|
+
|
|
1704
|
+
# Verify all chunks actually exist in DB (integrity check)
|
|
1705
|
+
if chunk_ids and all(cid in existing_ids for cid in chunk_ids):
|
|
1706
|
+
current_chunk_ids.update(chunk_ids)
|
|
1707
|
+
new_manifest[str_path] = manifest_entry
|
|
1708
|
+
reused_files += 1
|
|
1709
|
+
continue
|
|
1710
|
+
|
|
1711
|
+
# If we get here: file changed, new, or chunks missing from DB
|
|
1712
|
+
chunks = await self._chunk_file(file_path)
|
|
1446
1713
|
all_chunks.extend(chunks)
|
|
1714
|
+
|
|
1715
|
+
new_chunk_ids = []
|
|
1447
1716
|
for c in chunks:
|
|
1448
|
-
|
|
1717
|
+
cid = c["id"]
|
|
1718
|
+
current_chunk_ids.add(cid)
|
|
1719
|
+
new_chunk_ids.append(cid)
|
|
1720
|
+
|
|
1721
|
+
# Update manifest
|
|
1722
|
+
new_manifest[str_path] = {"mtime": mtime, "size": size, "chunk_ids": new_chunk_ids}
|
|
1723
|
+
|
|
1724
|
+
# Save updated manifest
|
|
1725
|
+
self._save_manifest(new_manifest)
|
|
1449
1726
|
|
|
1450
1727
|
# Sweep: Identify stale chunks to remove
|
|
1451
1728
|
to_delete = existing_ids - current_chunk_ids
|
|
@@ -1464,10 +1741,10 @@ class CodebaseVectorStore:
|
|
|
1464
1741
|
"indexed": 0,
|
|
1465
1742
|
"pruned": len(to_delete),
|
|
1466
1743
|
"total_files": len(files),
|
|
1467
|
-
"
|
|
1744
|
+
"reused_files": reused_files,
|
|
1745
|
+
"message": f"No new chunks to index (reused {reused_files} files)",
|
|
1468
1746
|
"time_taken": round(time.time() - start_time, 1),
|
|
1469
1747
|
}
|
|
1470
|
-
# Notify completion
|
|
1471
1748
|
try:
|
|
1472
1749
|
if notifier:
|
|
1473
1750
|
await notifier.notify_reindex_complete(stats)
|
|
@@ -1492,7 +1769,6 @@ class CodebaseVectorStore:
|
|
|
1492
1769
|
"cancelled": True,
|
|
1493
1770
|
"message": f"Cancelled after {total_indexed}/{len(chunks_to_add)} chunks",
|
|
1494
1771
|
}
|
|
1495
|
-
# Notify cancellation
|
|
1496
1772
|
try:
|
|
1497
1773
|
if notifier:
|
|
1498
1774
|
await notifier.notify_reindex_error(
|
|
@@ -1520,11 +1796,11 @@ class CodebaseVectorStore:
|
|
|
1520
1796
|
"indexed": total_indexed,
|
|
1521
1797
|
"pruned": len(to_delete),
|
|
1522
1798
|
"total_files": len(files),
|
|
1799
|
+
"reused_files": reused_files,
|
|
1523
1800
|
"db_path": str(self.db_path),
|
|
1524
1801
|
"time_taken": round(time.time() - start_time, 1),
|
|
1525
1802
|
}
|
|
1526
1803
|
|
|
1527
|
-
# Notify completion
|
|
1528
1804
|
try:
|
|
1529
1805
|
if notifier:
|
|
1530
1806
|
await notifier.notify_reindex_complete(stats)
|
|
@@ -1536,14 +1812,11 @@ class CodebaseVectorStore:
|
|
|
1536
1812
|
except Exception as e:
|
|
1537
1813
|
error_msg = str(e)
|
|
1538
1814
|
logger.error(f"Reindexing failed: {error_msg}")
|
|
1539
|
-
|
|
1540
|
-
# Notify error
|
|
1541
1815
|
try:
|
|
1542
1816
|
if notifier:
|
|
1543
1817
|
await notifier.notify_reindex_error(error_msg)
|
|
1544
1818
|
except Exception as notify_error:
|
|
1545
1819
|
logger.warning(f"Failed to send reindex error notification: {notify_error}")
|
|
1546
|
-
|
|
1547
1820
|
raise
|
|
1548
1821
|
|
|
1549
1822
|
async def search(
|
|
@@ -1749,6 +2022,87 @@ def _cleanup_watchers():
|
|
|
1749
2022
|
atexit.register(_cleanup_watchers)
|
|
1750
2023
|
|
|
1751
2024
|
|
|
2025
|
+
def _check_index_exists(store: "CodebaseVectorStore") -> bool:
|
|
2026
|
+
"""Check if semantic index exists for this project."""
|
|
2027
|
+
try:
|
|
2028
|
+
doc_count = store.collection.count()
|
|
2029
|
+
return doc_count > 0
|
|
2030
|
+
except Exception as e:
|
|
2031
|
+
logger.warning(f"Could not check index status: {e}")
|
|
2032
|
+
return False
|
|
2033
|
+
|
|
2034
|
+
|
|
2035
|
+
def _prompt_with_timeout(prompt_text: str, timeout: int = 30) -> str:
|
|
2036
|
+
"""
|
|
2037
|
+
Prompt user with timeout. Returns 'n' if timeout or non-interactive.
|
|
2038
|
+
|
|
2039
|
+
Args:
|
|
2040
|
+
prompt_text: The prompt to display
|
|
2041
|
+
timeout: Timeout in seconds (default: 30)
|
|
2042
|
+
|
|
2043
|
+
Returns:
|
|
2044
|
+
User response or 'n' if timeout/non-interactive
|
|
2045
|
+
"""
|
|
2046
|
+
# Check if stdin is interactive
|
|
2047
|
+
if not sys.stdin.isatty():
|
|
2048
|
+
return "n" # Non-interactive, skip prompt
|
|
2049
|
+
|
|
2050
|
+
# Windows doesn't support SIGALRM, so we need a different approach
|
|
2051
|
+
if sys.platform == "win32":
|
|
2052
|
+
try:
|
|
2053
|
+
import msvcrt
|
|
2054
|
+
import time
|
|
2055
|
+
|
|
2056
|
+
print(prompt_text, end="", flush=True, file=sys.stderr)
|
|
2057
|
+
start_time = time.time()
|
|
2058
|
+
response = []
|
|
2059
|
+
|
|
2060
|
+
while time.time() - start_time < timeout:
|
|
2061
|
+
if msvcrt.kbhit():
|
|
2062
|
+
char = msvcrt.getwche()
|
|
2063
|
+
if char in ("\r", "\n"):
|
|
2064
|
+
print(file=sys.stderr) # Newline after input
|
|
2065
|
+
return "".join(response)
|
|
2066
|
+
response.append(char)
|
|
2067
|
+
time.sleep(0.1)
|
|
2068
|
+
|
|
2069
|
+
print("\n⏱️ Timeout - skipping index creation", file=sys.stderr)
|
|
2070
|
+
return "n"
|
|
2071
|
+
except (ImportError, Exception):
|
|
2072
|
+
# Fallback: just use input() without timeout on Windows
|
|
2073
|
+
try:
|
|
2074
|
+
return input(prompt_text)
|
|
2075
|
+
except EOFError:
|
|
2076
|
+
return "n"
|
|
2077
|
+
|
|
2078
|
+
# Unix-like systems (Linux, macOS)
|
|
2079
|
+
def timeout_handler(signum, frame):
|
|
2080
|
+
raise TimeoutError()
|
|
2081
|
+
|
|
2082
|
+
try:
|
|
2083
|
+
# Save old handler
|
|
2084
|
+
old_handler = signal.signal(signal.SIGALRM, timeout_handler)
|
|
2085
|
+
signal.alarm(timeout)
|
|
2086
|
+
response = input(prompt_text)
|
|
2087
|
+
signal.alarm(0) # Cancel alarm
|
|
2088
|
+
# Restore old handler
|
|
2089
|
+
signal.signal(signal.SIGALRM, old_handler)
|
|
2090
|
+
return response
|
|
2091
|
+
except (TimeoutError, EOFError):
|
|
2092
|
+
signal.alarm(0) # Cancel alarm
|
|
2093
|
+
# Restore old handler
|
|
2094
|
+
try:
|
|
2095
|
+
signal.signal(signal.SIGALRM, old_handler)
|
|
2096
|
+
except Exception:
|
|
2097
|
+
pass
|
|
2098
|
+
print("\n⏱️ Timeout - skipping index creation", file=sys.stderr)
|
|
2099
|
+
return "n"
|
|
2100
|
+
except Exception as e:
|
|
2101
|
+
signal.alarm(0) # Cancel alarm
|
|
2102
|
+
logger.warning(f"Error during prompt: {e}")
|
|
2103
|
+
return "n"
|
|
2104
|
+
|
|
2105
|
+
|
|
1752
2106
|
def get_store(project_path: str, provider: EmbeddingProvider = "ollama") -> CodebaseVectorStore:
|
|
1753
2107
|
"""Get or create a vector store for a project.
|
|
1754
2108
|
|
|
@@ -1794,6 +2148,58 @@ async def semantic_search(
|
|
|
1794
2148
|
Formatted search results with file paths and code snippets.
|
|
1795
2149
|
"""
|
|
1796
2150
|
store = get_store(project_path, provider)
|
|
2151
|
+
|
|
2152
|
+
# Check if index exists before searching
|
|
2153
|
+
if not _check_index_exists(store):
|
|
2154
|
+
print("\n⚠️ No semantic index found for this project.", file=sys.stderr)
|
|
2155
|
+
print(f"📁 Project: {project_path}", file=sys.stderr)
|
|
2156
|
+
print(f"🔍 Provider: {provider}", file=sys.stderr)
|
|
2157
|
+
|
|
2158
|
+
# Interactive prompt with timeout
|
|
2159
|
+
response = _prompt_with_timeout("\n🤔 Create semantic index now? [Y/n] (30s timeout): ")
|
|
2160
|
+
|
|
2161
|
+
if response.lower() in ["", "y", "yes"]:
|
|
2162
|
+
print("\n📋 Creating semantic index...", file=sys.stderr)
|
|
2163
|
+
try:
|
|
2164
|
+
# Call index_codebase function
|
|
2165
|
+
index_result = await index_codebase(project_path, provider=provider, force=False)
|
|
2166
|
+
print(f"✅ {index_result}", file=sys.stderr)
|
|
2167
|
+
|
|
2168
|
+
# Auto-start file watcher
|
|
2169
|
+
print("🔄 Starting file watcher for auto-updates...", file=sys.stderr)
|
|
2170
|
+
await start_file_watcher(project_path, provider)
|
|
2171
|
+
print("✅ File watcher started - index will auto-update on changes", file=sys.stderr)
|
|
2172
|
+
|
|
2173
|
+
except Exception as e:
|
|
2174
|
+
logger.error(f"Failed to create index: {e}")
|
|
2175
|
+
return (
|
|
2176
|
+
f"❌ Failed to create index: {e}\n\n"
|
|
2177
|
+
"**Manual fix:**\n"
|
|
2178
|
+
"```python\n"
|
|
2179
|
+
f'index_codebase(project_path="{project_path}", provider="{provider}")\n'
|
|
2180
|
+
"```"
|
|
2181
|
+
)
|
|
2182
|
+
else:
|
|
2183
|
+
return (
|
|
2184
|
+
"❌ Index required for semantic search.\n\n"
|
|
2185
|
+
"**To create index manually:**\n"
|
|
2186
|
+
"```python\n"
|
|
2187
|
+
f'index_codebase(project_path="{project_path}", provider="{provider}")\n'
|
|
2188
|
+
"```\n\n"
|
|
2189
|
+
"This indexes your codebase for natural language search. "
|
|
2190
|
+
"Run it once per project (takes 30s-2min depending on size)."
|
|
2191
|
+
)
|
|
2192
|
+
else:
|
|
2193
|
+
# Index exists, ensure watcher is running
|
|
2194
|
+
# We don't await this to avoid blocking search if it takes a moment
|
|
2195
|
+
# But for tests we might need to await it or mock it properly
|
|
2196
|
+
# The test expects it to be called.
|
|
2197
|
+
# Let's just call it. start_file_watcher is async.
|
|
2198
|
+
try:
|
|
2199
|
+
await start_file_watcher(project_path, provider)
|
|
2200
|
+
except Exception as e:
|
|
2201
|
+
logger.warning(f"Failed to auto-start watcher: {e}")
|
|
2202
|
+
|
|
1797
2203
|
results = await store.search(
|
|
1798
2204
|
query,
|
|
1799
2205
|
n_results,
|
|
@@ -1810,6 +2216,17 @@ async def semantic_search(
|
|
|
1810
2216
|
if "error" in results[0]:
|
|
1811
2217
|
return f"Error: {results[0]['error']}\nHint: {results[0].get('hint', 'Check Ollama is running')}"
|
|
1812
2218
|
|
|
2219
|
+
# Auto-start file watcher if not already running (index exists and search succeeded)
|
|
2220
|
+
try:
|
|
2221
|
+
active_watcher = get_file_watcher(project_path)
|
|
2222
|
+
if active_watcher is None:
|
|
2223
|
+
# Index exists but no watcher - start it silently in background
|
|
2224
|
+
logger.info(f"Auto-starting file watcher for {project_path}")
|
|
2225
|
+
await start_file_watcher(project_path, provider, debounce_seconds=2.0)
|
|
2226
|
+
except Exception as e:
|
|
2227
|
+
# Don't fail the search if watcher fails to start
|
|
2228
|
+
logger.warning(f"Could not auto-start file watcher: {e}")
|
|
2229
|
+
|
|
1813
2230
|
lines = [f"Found {len(results)} results for: '{query}'\n"]
|
|
1814
2231
|
for i, r in enumerate(results, 1):
|
|
1815
2232
|
lines.append(f"{i}. {r['file']}:{r['lines']} (relevance: {r['relevance']})")
|
|
@@ -2050,16 +2467,16 @@ def delete_index(
|
|
|
2050
2467
|
except Exception as e:
|
|
2051
2468
|
return f"❌ Error deleting all indexes: {e}"
|
|
2052
2469
|
|
|
2053
|
-
# Generate
|
|
2470
|
+
# Generate repo name
|
|
2054
2471
|
project_path_resolved = Path(project_path).resolve()
|
|
2055
|
-
|
|
2472
|
+
repo_name = project_path_resolved.name
|
|
2056
2473
|
|
|
2057
2474
|
deleted = []
|
|
2058
2475
|
errors = []
|
|
2059
2476
|
|
|
2060
2477
|
if provider:
|
|
2061
2478
|
# Delete specific provider index for this project
|
|
2062
|
-
index_path = vectordb_base / f"{
|
|
2479
|
+
index_path = vectordb_base / f"{repo_name}_{provider}"
|
|
2063
2480
|
if index_path.exists():
|
|
2064
2481
|
try:
|
|
2065
2482
|
shutil.rmtree(index_path)
|
|
@@ -2072,7 +2489,7 @@ def delete_index(
|
|
|
2072
2489
|
# Delete all provider indexes for this project
|
|
2073
2490
|
providers: list[EmbeddingProvider] = ["ollama", "mxbai", "gemini", "openai", "huggingface"]
|
|
2074
2491
|
for prov in providers:
|
|
2075
|
-
index_path = vectordb_base / f"{
|
|
2492
|
+
index_path = vectordb_base / f"{repo_name}_{prov}"
|
|
2076
2493
|
if index_path.exists():
|
|
2077
2494
|
try:
|
|
2078
2495
|
shutil.rmtree(index_path)
|
|
@@ -2081,7 +2498,7 @@ def delete_index(
|
|
|
2081
2498
|
errors.append(f"{prov}: {e}")
|
|
2082
2499
|
|
|
2083
2500
|
if not deleted and not errors:
|
|
2084
|
-
return f"⚠️ No indexes found for project: {project_path_resolved}\
|
|
2501
|
+
return f"⚠️ No indexes found for project: {project_path_resolved}\nRepo name: {repo_name}"
|
|
2085
2502
|
|
|
2086
2503
|
result = []
|
|
2087
2504
|
if deleted:
|
|
@@ -2144,45 +2561,44 @@ async def start_file_watcher(
|
|
|
2144
2561
|
Returns:
|
|
2145
2562
|
The started CodebaseFileWatcher instance
|
|
2146
2563
|
"""
|
|
2147
|
-
|
|
2564
|
+
normalized_path = CodebaseVectorStore._normalize_project_path(project_path)
|
|
2565
|
+
path_key = str(normalized_path)
|
|
2566
|
+
|
|
2148
2567
|
with _watchers_lock:
|
|
2149
|
-
if
|
|
2568
|
+
if path_key not in _watchers:
|
|
2150
2569
|
store = get_store(project_path, provider)
|
|
2151
2570
|
|
|
2152
|
-
# Check if index exists -
|
|
2571
|
+
# Check if index exists - create if missing, update if stale
|
|
2153
2572
|
try:
|
|
2154
2573
|
stats = store.get_stats()
|
|
2155
2574
|
chunks_indexed = stats.get("chunks_indexed", 0)
|
|
2156
|
-
if chunks_indexed == 0:
|
|
2157
|
-
raise ValueError(
|
|
2158
|
-
f"No semantic index found for '{path}'. "
|
|
2159
|
-
f"Run semantic_index(project_path='{path}', provider='{provider}') "
|
|
2160
|
-
f"before starting the file watcher."
|
|
2161
|
-
)
|
|
2162
2575
|
|
|
2163
|
-
|
|
2164
|
-
|
|
2165
|
-
|
|
2166
|
-
|
|
2576
|
+
if chunks_indexed == 0:
|
|
2577
|
+
# No index exists - create initial index
|
|
2578
|
+
print("📋 No index found, creating initial index...", file=sys.stderr)
|
|
2579
|
+
await store.index_codebase(force=False)
|
|
2580
|
+
print("✅ Initial index created, starting file watcher", file=sys.stderr)
|
|
2581
|
+
else:
|
|
2582
|
+
# Index exists - catch up on any missed changes since watcher was off
|
|
2583
|
+
print("📋 Catching up on changes since last index...", file=sys.stderr)
|
|
2584
|
+
await store.index_codebase(force=False)
|
|
2585
|
+
print("✅ Index updated, starting file watcher", file=sys.stderr)
|
|
2167
2586
|
|
|
2168
|
-
except ValueError:
|
|
2169
|
-
# Re-raise ValueError (our intentional error)
|
|
2170
|
-
raise
|
|
2171
2587
|
except Exception as e:
|
|
2172
|
-
#
|
|
2173
|
-
|
|
2174
|
-
|
|
2175
|
-
|
|
2176
|
-
|
|
2177
|
-
)
|
|
2588
|
+
# Failed to index - log and create watcher anyway (it will index on file changes)
|
|
2589
|
+
logger.warning(f"Failed to index before starting watcher: {e}")
|
|
2590
|
+
print(f"⚠️ Warning: Could not index project: {e}", file=sys.stderr)
|
|
2591
|
+
print(
|
|
2592
|
+
"🔄 Starting watcher anyway - will index on first file change", file=sys.stderr
|
|
2593
|
+
)
|
|
2178
2594
|
|
|
2179
2595
|
watcher = store.start_watching(debounce_seconds=debounce_seconds)
|
|
2180
|
-
_watchers[
|
|
2596
|
+
_watchers[path_key] = watcher
|
|
2181
2597
|
else:
|
|
2182
|
-
watcher = _watchers[
|
|
2598
|
+
watcher = _watchers[path_key]
|
|
2183
2599
|
if not watcher.is_running():
|
|
2184
2600
|
watcher.start()
|
|
2185
|
-
return _watchers[
|
|
2601
|
+
return _watchers[path_key]
|
|
2186
2602
|
|
|
2187
2603
|
|
|
2188
2604
|
def stop_file_watcher(project_path: str) -> bool:
|
|
@@ -2194,12 +2610,14 @@ def stop_file_watcher(project_path: str) -> bool:
|
|
|
2194
2610
|
Returns:
|
|
2195
2611
|
True if watcher was stopped, False if no watcher was active
|
|
2196
2612
|
"""
|
|
2197
|
-
|
|
2613
|
+
normalized_path = CodebaseVectorStore._normalize_project_path(project_path)
|
|
2614
|
+
path_key = str(normalized_path)
|
|
2615
|
+
|
|
2198
2616
|
with _watchers_lock:
|
|
2199
|
-
if
|
|
2200
|
-
watcher = _watchers[
|
|
2617
|
+
if path_key in _watchers:
|
|
2618
|
+
watcher = _watchers[path_key]
|
|
2201
2619
|
watcher.stop()
|
|
2202
|
-
del _watchers[
|
|
2620
|
+
del _watchers[path_key]
|
|
2203
2621
|
return True
|
|
2204
2622
|
return False
|
|
2205
2623
|
|
|
@@ -2213,9 +2631,11 @@ def get_file_watcher(project_path: str) -> "CodebaseFileWatcher | None":
|
|
|
2213
2631
|
Returns:
|
|
2214
2632
|
The CodebaseFileWatcher if active, None otherwise
|
|
2215
2633
|
"""
|
|
2216
|
-
|
|
2634
|
+
normalized_path = CodebaseVectorStore._normalize_project_path(project_path)
|
|
2635
|
+
path_key = str(normalized_path)
|
|
2636
|
+
|
|
2217
2637
|
with _watchers_lock:
|
|
2218
|
-
watcher = _watchers.get(
|
|
2638
|
+
watcher = _watchers.get(path_key)
|
|
2219
2639
|
if watcher is not None and watcher.is_running():
|
|
2220
2640
|
return watcher
|
|
2221
2641
|
return None
|
|
@@ -2281,7 +2701,7 @@ db session factory pattern"""
|
|
|
2281
2701
|
result = await invoke_gemini(
|
|
2282
2702
|
token_store=TokenStore(),
|
|
2283
2703
|
prompt=prompt,
|
|
2284
|
-
model="gemini-
|
|
2704
|
+
model="gemini-3-flash",
|
|
2285
2705
|
temperature=0.7,
|
|
2286
2706
|
max_tokens=200,
|
|
2287
2707
|
)
|
|
@@ -2335,7 +2755,7 @@ authentication logic"""
|
|
|
2335
2755
|
result = await invoke_gemini(
|
|
2336
2756
|
token_store=TokenStore(),
|
|
2337
2757
|
prompt=prompt,
|
|
2338
|
-
model="gemini-
|
|
2758
|
+
model="gemini-3-flash",
|
|
2339
2759
|
temperature=0.3, # Lower temperature for more consistent decomposition
|
|
2340
2760
|
max_tokens=150,
|
|
2341
2761
|
)
|
|
@@ -2674,6 +3094,145 @@ async def enhanced_search(
|
|
|
2674
3094
|
# ========================
|
|
2675
3095
|
|
|
2676
3096
|
|
|
3097
|
+
class DedicatedIndexingWorker:
|
|
3098
|
+
"""Single-threaded worker for all indexing operations.
|
|
3099
|
+
|
|
3100
|
+
Prevents concurrent indexing by serializing all operations through a queue.
|
|
3101
|
+
Uses asyncio.run() for each operation to avoid event loop reuse issues.
|
|
3102
|
+
"""
|
|
3103
|
+
|
|
3104
|
+
def __init__(self, store: "CodebaseVectorStore"):
|
|
3105
|
+
"""Initialize the indexing worker.
|
|
3106
|
+
|
|
3107
|
+
Args:
|
|
3108
|
+
store: CodebaseVectorStore instance for reindexing
|
|
3109
|
+
"""
|
|
3110
|
+
import queue
|
|
3111
|
+
|
|
3112
|
+
self.store = store
|
|
3113
|
+
self._queue: queue.Queue = queue.Queue(maxsize=1) # Max 1 pending request (debouncing)
|
|
3114
|
+
self._thread: threading.Thread | None = None
|
|
3115
|
+
self._shutdown = threading.Event()
|
|
3116
|
+
self._log_file = Path.home() / ".stravinsky" / "logs" / "file_watcher.log"
|
|
3117
|
+
self._log_file.parent.mkdir(parents=True, exist_ok=True)
|
|
3118
|
+
|
|
3119
|
+
def start(self) -> None:
|
|
3120
|
+
"""Start the worker thread."""
|
|
3121
|
+
if self._thread is not None and self._thread.is_alive():
|
|
3122
|
+
logger.warning("Indexing worker already running")
|
|
3123
|
+
return
|
|
3124
|
+
|
|
3125
|
+
self._shutdown.clear()
|
|
3126
|
+
self._thread = threading.Thread(
|
|
3127
|
+
target=self._run_worker, daemon=False, name="IndexingWorker"
|
|
3128
|
+
)
|
|
3129
|
+
self._thread.start()
|
|
3130
|
+
logger.info(f"Started indexing worker for {self.store.project_path}")
|
|
3131
|
+
|
|
3132
|
+
def _log_error(self, msg: str, exc: Exception | None = None):
|
|
3133
|
+
"""Write error to log file with timestamp and full traceback."""
|
|
3134
|
+
import traceback
|
|
3135
|
+
from datetime import datetime
|
|
3136
|
+
|
|
3137
|
+
timestamp = datetime.now().isoformat()
|
|
3138
|
+
try:
|
|
3139
|
+
with open(self._log_file, "a") as f:
|
|
3140
|
+
f.write(f"\n{'=' * 80}\n")
|
|
3141
|
+
f.write(f"[{timestamp}] {msg}\n")
|
|
3142
|
+
if exc:
|
|
3143
|
+
f.write(f"Exception: {type(exc).__name__}: {exc}\n")
|
|
3144
|
+
f.write(traceback.format_exc())
|
|
3145
|
+
f.write(f"{'=' * 80}\n")
|
|
3146
|
+
except Exception as log_exc:
|
|
3147
|
+
logger.error(f"Failed to write to log file: {log_exc}")
|
|
3148
|
+
logger.error(f"{msg} (logged to {self._log_file})")
|
|
3149
|
+
|
|
3150
|
+
def _run_worker(self) -> None:
|
|
3151
|
+
"""Worker thread entry point - processes queue with asyncio.run() per operation."""
|
|
3152
|
+
import queue
|
|
3153
|
+
|
|
3154
|
+
self._log_error(f"🟢 File watcher started for {self.store.project_path}")
|
|
3155
|
+
|
|
3156
|
+
try:
|
|
3157
|
+
while not self._shutdown.is_set():
|
|
3158
|
+
try:
|
|
3159
|
+
# Wait for reindex request (blocking with timeout)
|
|
3160
|
+
self._queue.get(timeout=0.5)
|
|
3161
|
+
self._queue.task_done()
|
|
3162
|
+
|
|
3163
|
+
# Use asyncio.run() for each operation (creates fresh loop)
|
|
3164
|
+
# This avoids "event loop already running" errors
|
|
3165
|
+
try:
|
|
3166
|
+
asyncio.run(self._do_reindex())
|
|
3167
|
+
self._log_error(f"✅ Reindex completed for {self.store.project_path}")
|
|
3168
|
+
except Exception as e:
|
|
3169
|
+
self._log_error(f"⚠️ Reindex failed for {self.store.project_path}", e)
|
|
3170
|
+
|
|
3171
|
+
except queue.Empty:
|
|
3172
|
+
continue # No work, check shutdown flag
|
|
3173
|
+
except Exception as e:
|
|
3174
|
+
self._log_error(f"⚠️ Queue processing error for {self.store.project_path}", e)
|
|
3175
|
+
|
|
3176
|
+
except Exception as e:
|
|
3177
|
+
self._log_error(f"⚠️ Worker thread crashed for {self.store.project_path}", e)
|
|
3178
|
+
finally:
|
|
3179
|
+
self._log_error(f"🔴 File watcher stopped for {self.store.project_path}")
|
|
3180
|
+
|
|
3181
|
+
async def _do_reindex(self) -> None:
|
|
3182
|
+
"""Execute reindex with retry logic for ALL error types."""
|
|
3183
|
+
import sqlite3
|
|
3184
|
+
|
|
3185
|
+
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
|
|
3186
|
+
|
|
3187
|
+
@retry(
|
|
3188
|
+
stop=stop_after_attempt(3),
|
|
3189
|
+
wait=wait_exponential(multiplier=1, min=2, max=10),
|
|
3190
|
+
retry=retry_if_exception_type(
|
|
3191
|
+
(
|
|
3192
|
+
httpx.HTTPError,
|
|
3193
|
+
ConnectionError,
|
|
3194
|
+
TimeoutError,
|
|
3195
|
+
sqlite3.OperationalError, # Database locked
|
|
3196
|
+
OSError, # File system errors
|
|
3197
|
+
)
|
|
3198
|
+
),
|
|
3199
|
+
reraise=True,
|
|
3200
|
+
)
|
|
3201
|
+
async def _indexed():
|
|
3202
|
+
await self.store.index_codebase(force=False)
|
|
3203
|
+
|
|
3204
|
+
await _indexed()
|
|
3205
|
+
|
|
3206
|
+
def request_reindex(self, files: list[Path]) -> None:
|
|
3207
|
+
"""Request reindex from any thread (thread-safe).
|
|
3208
|
+
|
|
3209
|
+
Args:
|
|
3210
|
+
files: List of files that changed (for logging only)
|
|
3211
|
+
"""
|
|
3212
|
+
import queue
|
|
3213
|
+
|
|
3214
|
+
try:
|
|
3215
|
+
# Non-blocking put - drops if queue full (natural debouncing)
|
|
3216
|
+
self._queue.put_nowait("reindex")
|
|
3217
|
+
logger.debug(f"📥 Queued reindex for {len(files)} files: {[f.name for f in files[:5]]}")
|
|
3218
|
+
except queue.Full:
|
|
3219
|
+
# Already have pending reindex - this is fine (debouncing)
|
|
3220
|
+
logger.debug(f"Reindex already queued, skipping {len(files)} files")
|
|
3221
|
+
|
|
3222
|
+
def shutdown(self) -> None:
|
|
3223
|
+
"""Graceful shutdown of worker thread."""
|
|
3224
|
+
if self._shutdown.is_set():
|
|
3225
|
+
return # Already shutting down
|
|
3226
|
+
|
|
3227
|
+
self._shutdown.set()
|
|
3228
|
+
if self._thread is not None and self._thread.is_alive():
|
|
3229
|
+
self._thread.join(timeout=10) # Wait up to 10 seconds
|
|
3230
|
+
if self._thread.is_alive():
|
|
3231
|
+
self._log_error("⚠️ Worker thread failed to stop within timeout")
|
|
3232
|
+
self._thread = None
|
|
3233
|
+
logger.info("Indexing worker shut down")
|
|
3234
|
+
|
|
3235
|
+
|
|
2677
3236
|
class CodebaseFileWatcher:
|
|
2678
3237
|
"""Watch a project directory for file changes and trigger reindexing.
|
|
2679
3238
|
|
|
@@ -2684,6 +3243,7 @@ class CodebaseFileWatcher:
|
|
|
2684
3243
|
- Debounces rapid changes to batch them into a single reindex
|
|
2685
3244
|
- Thread-safe with daemon threads for clean shutdown
|
|
2686
3245
|
- Integrates with CodebaseVectorStore for incremental indexing
|
|
3246
|
+
- Uses dedicated worker thread to prevent concurrent indexing
|
|
2687
3247
|
"""
|
|
2688
3248
|
|
|
2689
3249
|
# Default debounce time in seconds
|
|
@@ -2709,6 +3269,9 @@ class CodebaseFileWatcher:
|
|
|
2709
3269
|
# Observer and handler for watchdog
|
|
2710
3270
|
self._observer = None
|
|
2711
3271
|
self._event_handler = None
|
|
3272
|
+
|
|
3273
|
+
# Native watcher
|
|
3274
|
+
self._native_watcher: NativeFileWatcher | None = None
|
|
2712
3275
|
|
|
2713
3276
|
# Thread safety
|
|
2714
3277
|
self._lock = threading.Lock()
|
|
@@ -2719,10 +3282,14 @@ class CodebaseFileWatcher:
|
|
|
2719
3282
|
self._pending_files: set[Path] = set()
|
|
2720
3283
|
self._pending_lock = threading.Lock()
|
|
2721
3284
|
|
|
3285
|
+
# Dedicated indexing worker (prevents concurrent access)
|
|
3286
|
+
self._indexing_worker = DedicatedIndexingWorker(store)
|
|
3287
|
+
|
|
2722
3288
|
def start(self) -> None:
|
|
2723
3289
|
"""Start watching the project directory.
|
|
2724
3290
|
|
|
2725
3291
|
Creates and starts a watchdog observer in a daemon thread.
|
|
3292
|
+
Also starts the dedicated indexing worker thread.
|
|
2726
3293
|
"""
|
|
2727
3294
|
with self._lock:
|
|
2728
3295
|
if self._running:
|
|
@@ -2730,6 +3297,23 @@ class CodebaseFileWatcher:
|
|
|
2730
3297
|
return
|
|
2731
3298
|
|
|
2732
3299
|
try:
|
|
3300
|
+
# Start indexing worker first (must be running before file events arrive)
|
|
3301
|
+
self._indexing_worker.start()
|
|
3302
|
+
|
|
3303
|
+
# Try native watcher first
|
|
3304
|
+
try:
|
|
3305
|
+
self._native_watcher = NativeFileWatcher(
|
|
3306
|
+
str(self.project_path),
|
|
3307
|
+
on_change=lambda type, path: self._on_file_changed(Path(path))
|
|
3308
|
+
)
|
|
3309
|
+
self._native_watcher.start()
|
|
3310
|
+
self._running = True
|
|
3311
|
+
logger.info(f"Native file watcher started for {self.project_path}")
|
|
3312
|
+
return
|
|
3313
|
+
except (FileNotFoundError, Exception) as e:
|
|
3314
|
+
logger.info(f"Native watcher not available, falling back to watchdog: {e}")
|
|
3315
|
+
self._native_watcher = None
|
|
3316
|
+
|
|
2733
3317
|
watchdog = get_watchdog()
|
|
2734
3318
|
Observer = watchdog["Observer"]
|
|
2735
3319
|
|
|
@@ -2755,25 +3339,37 @@ class CodebaseFileWatcher:
|
|
|
2755
3339
|
except Exception as e:
|
|
2756
3340
|
logger.error(f"Failed to start file watcher: {e}")
|
|
2757
3341
|
self._running = False
|
|
3342
|
+
# Clean up worker if observer failed
|
|
3343
|
+
self._indexing_worker.shutdown()
|
|
2758
3344
|
raise
|
|
2759
3345
|
|
|
2760
3346
|
def stop(self) -> None:
|
|
2761
3347
|
"""Stop watching the project directory.
|
|
2762
3348
|
|
|
2763
|
-
Cancels any pending reindex timers and
|
|
3349
|
+
Cancels any pending reindex timers, stops the observer, and shuts down the indexing worker.
|
|
2764
3350
|
"""
|
|
2765
3351
|
with self._lock:
|
|
2766
|
-
# Cancel pending reindex
|
|
2767
|
-
|
|
2768
|
-
self._pending_reindex_timer
|
|
2769
|
-
|
|
3352
|
+
# Cancel pending reindex timer
|
|
3353
|
+
with self._pending_lock:
|
|
3354
|
+
if self._pending_reindex_timer:
|
|
3355
|
+
self._pending_reindex_timer.cancel()
|
|
3356
|
+
self._pending_reindex_timer = None
|
|
3357
|
+
self._pending_files.clear()
|
|
3358
|
+
|
|
3359
|
+
# Stop native watcher
|
|
3360
|
+
if self._native_watcher:
|
|
3361
|
+
self._native_watcher.stop()
|
|
3362
|
+
self._native_watcher = None
|
|
2770
3363
|
|
|
2771
3364
|
# Stop observer
|
|
2772
|
-
if self._observer
|
|
3365
|
+
if self._observer:
|
|
2773
3366
|
self._observer.stop()
|
|
2774
3367
|
self._observer.join(timeout=5) # Wait up to 5 seconds for shutdown
|
|
2775
3368
|
self._observer = None
|
|
2776
3369
|
|
|
3370
|
+
# Shutdown indexing worker
|
|
3371
|
+
self._indexing_worker.shutdown()
|
|
3372
|
+
|
|
2777
3373
|
self._event_handler = None
|
|
2778
3374
|
self._running = False
|
|
2779
3375
|
logger.info(f"File watcher stopped for {self.project_path}")
|
|
@@ -2820,11 +3416,9 @@ class CodebaseFileWatcher:
|
|
|
2820
3416
|
def _trigger_reindex(self) -> None:
|
|
2821
3417
|
"""Trigger reindexing of accumulated changed files.
|
|
2822
3418
|
|
|
2823
|
-
This is called after the debounce period expires.
|
|
2824
|
-
|
|
3419
|
+
This is called after the debounce period expires. Delegates to the
|
|
3420
|
+
dedicated indexing worker to prevent concurrent access.
|
|
2825
3421
|
"""
|
|
2826
|
-
import asyncio
|
|
2827
|
-
|
|
2828
3422
|
with self._pending_lock:
|
|
2829
3423
|
if not self._pending_files:
|
|
2830
3424
|
self._pending_reindex_timer = None
|
|
@@ -2834,17 +3428,8 @@ class CodebaseFileWatcher:
|
|
|
2834
3428
|
self._pending_files.clear()
|
|
2835
3429
|
self._pending_reindex_timer = None
|
|
2836
3430
|
|
|
2837
|
-
#
|
|
2838
|
-
|
|
2839
|
-
loop = asyncio.new_event_loop()
|
|
2840
|
-
asyncio.set_event_loop(loop)
|
|
2841
|
-
try:
|
|
2842
|
-
loop.run_until_complete(self.store.index_codebase(force=False))
|
|
2843
|
-
logger.debug(f"Reindexed {len(files_to_index)} changed files")
|
|
2844
|
-
finally:
|
|
2845
|
-
loop.close()
|
|
2846
|
-
except Exception as e:
|
|
2847
|
-
logger.error(f"Error during file watcher reindex: {e}")
|
|
3431
|
+
# Delegate to dedicated worker (prevents concurrent indexing)
|
|
3432
|
+
self._indexing_worker.request_reindex(files_to_index)
|
|
2848
3433
|
|
|
2849
3434
|
|
|
2850
3435
|
def _create_file_change_handler_class():
|