stravinsky 0.2.67__py3-none-any.whl → 0.4.66__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of stravinsky might be problematic. Click here for more details.

Files changed (190) hide show
  1. mcp_bridge/__init__.py +1 -1
  2. mcp_bridge/auth/__init__.py +16 -6
  3. mcp_bridge/auth/cli.py +202 -11
  4. mcp_bridge/auth/oauth.py +1 -2
  5. mcp_bridge/auth/openai_oauth.py +4 -7
  6. mcp_bridge/auth/token_store.py +112 -11
  7. mcp_bridge/cli/__init__.py +1 -1
  8. mcp_bridge/cli/install_hooks.py +503 -107
  9. mcp_bridge/cli/session_report.py +0 -3
  10. mcp_bridge/config/MANIFEST_SCHEMA.md +305 -0
  11. mcp_bridge/config/README.md +276 -0
  12. mcp_bridge/config/__init__.py +2 -2
  13. mcp_bridge/config/hook_config.py +247 -0
  14. mcp_bridge/config/hooks_manifest.json +138 -0
  15. mcp_bridge/config/rate_limits.py +317 -0
  16. mcp_bridge/config/skills_manifest.json +128 -0
  17. mcp_bridge/hooks/HOOKS_SETTINGS.json +17 -4
  18. mcp_bridge/hooks/__init__.py +19 -4
  19. mcp_bridge/hooks/agent_reminder.py +4 -4
  20. mcp_bridge/hooks/auto_slash_command.py +5 -5
  21. mcp_bridge/hooks/budget_optimizer.py +2 -2
  22. mcp_bridge/hooks/claude_limits_hook.py +114 -0
  23. mcp_bridge/hooks/comment_checker.py +3 -4
  24. mcp_bridge/hooks/compaction.py +2 -2
  25. mcp_bridge/hooks/context.py +2 -1
  26. mcp_bridge/hooks/context_monitor.py +2 -2
  27. mcp_bridge/hooks/delegation_policy.py +85 -0
  28. mcp_bridge/hooks/directory_context.py +3 -3
  29. mcp_bridge/hooks/edit_recovery.py +3 -2
  30. mcp_bridge/hooks/edit_recovery_policy.py +49 -0
  31. mcp_bridge/hooks/empty_message_sanitizer.py +2 -2
  32. mcp_bridge/hooks/events.py +160 -0
  33. mcp_bridge/hooks/git_noninteractive.py +4 -4
  34. mcp_bridge/hooks/keyword_detector.py +8 -10
  35. mcp_bridge/hooks/manager.py +43 -22
  36. mcp_bridge/hooks/notification_hook.py +13 -6
  37. mcp_bridge/hooks/parallel_enforcement_policy.py +67 -0
  38. mcp_bridge/hooks/parallel_enforcer.py +5 -5
  39. mcp_bridge/hooks/parallel_execution.py +22 -10
  40. mcp_bridge/hooks/post_tool/parallel_validation.py +103 -0
  41. mcp_bridge/hooks/pre_compact.py +8 -9
  42. mcp_bridge/hooks/pre_tool/agent_spawn_validator.py +115 -0
  43. mcp_bridge/hooks/preemptive_compaction.py +2 -3
  44. mcp_bridge/hooks/routing_notifications.py +80 -0
  45. mcp_bridge/hooks/rules_injector.py +11 -19
  46. mcp_bridge/hooks/session_idle.py +4 -4
  47. mcp_bridge/hooks/session_notifier.py +4 -4
  48. mcp_bridge/hooks/session_recovery.py +4 -5
  49. mcp_bridge/hooks/stravinsky_mode.py +1 -1
  50. mcp_bridge/hooks/subagent_stop.py +1 -3
  51. mcp_bridge/hooks/task_validator.py +2 -2
  52. mcp_bridge/hooks/tmux_manager.py +7 -8
  53. mcp_bridge/hooks/todo_delegation.py +4 -1
  54. mcp_bridge/hooks/todo_enforcer.py +180 -10
  55. mcp_bridge/hooks/tool_messaging.py +113 -10
  56. mcp_bridge/hooks/truncation_policy.py +37 -0
  57. mcp_bridge/hooks/truncator.py +1 -2
  58. mcp_bridge/metrics/cost_tracker.py +115 -0
  59. mcp_bridge/native_search.py +93 -0
  60. mcp_bridge/native_watcher.py +118 -0
  61. mcp_bridge/notifications.py +150 -0
  62. mcp_bridge/orchestrator/enums.py +11 -0
  63. mcp_bridge/orchestrator/router.py +165 -0
  64. mcp_bridge/orchestrator/state.py +32 -0
  65. mcp_bridge/orchestrator/visualization.py +14 -0
  66. mcp_bridge/orchestrator/wisdom.py +34 -0
  67. mcp_bridge/prompts/__init__.py +1 -8
  68. mcp_bridge/prompts/dewey.py +1 -1
  69. mcp_bridge/prompts/planner.py +2 -4
  70. mcp_bridge/prompts/stravinsky.py +53 -31
  71. mcp_bridge/proxy/__init__.py +0 -0
  72. mcp_bridge/proxy/client.py +70 -0
  73. mcp_bridge/proxy/model_server.py +157 -0
  74. mcp_bridge/routing/__init__.py +43 -0
  75. mcp_bridge/routing/config.py +250 -0
  76. mcp_bridge/routing/model_tiers.py +135 -0
  77. mcp_bridge/routing/provider_state.py +261 -0
  78. mcp_bridge/routing/task_classifier.py +190 -0
  79. mcp_bridge/server.py +542 -59
  80. mcp_bridge/server_tools.py +738 -6
  81. mcp_bridge/tools/__init__.py +40 -25
  82. mcp_bridge/tools/agent_manager.py +616 -697
  83. mcp_bridge/tools/background_tasks.py +13 -17
  84. mcp_bridge/tools/code_search.py +70 -53
  85. mcp_bridge/tools/continuous_loop.py +0 -1
  86. mcp_bridge/tools/dashboard.py +19 -0
  87. mcp_bridge/tools/find_code.py +296 -0
  88. mcp_bridge/tools/init.py +1 -0
  89. mcp_bridge/tools/list_directory.py +42 -0
  90. mcp_bridge/tools/lsp/__init__.py +12 -5
  91. mcp_bridge/tools/lsp/manager.py +471 -0
  92. mcp_bridge/tools/lsp/tools.py +723 -207
  93. mcp_bridge/tools/model_invoke.py +1195 -273
  94. mcp_bridge/tools/mux_client.py +75 -0
  95. mcp_bridge/tools/project_context.py +1 -2
  96. mcp_bridge/tools/query_classifier.py +406 -0
  97. mcp_bridge/tools/read_file.py +84 -0
  98. mcp_bridge/tools/replace.py +45 -0
  99. mcp_bridge/tools/run_shell_command.py +38 -0
  100. mcp_bridge/tools/search_enhancements.py +347 -0
  101. mcp_bridge/tools/semantic_search.py +3627 -0
  102. mcp_bridge/tools/session_manager.py +0 -2
  103. mcp_bridge/tools/skill_loader.py +0 -1
  104. mcp_bridge/tools/task_runner.py +5 -7
  105. mcp_bridge/tools/templates.py +3 -3
  106. mcp_bridge/tools/tool_search.py +331 -0
  107. mcp_bridge/tools/write_file.py +29 -0
  108. mcp_bridge/update_manager.py +585 -0
  109. mcp_bridge/update_manager_pypi.py +297 -0
  110. mcp_bridge/utils/cache.py +82 -0
  111. mcp_bridge/utils/process.py +71 -0
  112. mcp_bridge/utils/session_state.py +51 -0
  113. mcp_bridge/utils/truncation.py +76 -0
  114. stravinsky-0.4.66.dist-info/METADATA +517 -0
  115. stravinsky-0.4.66.dist-info/RECORD +198 -0
  116. {stravinsky-0.2.67.dist-info → stravinsky-0.4.66.dist-info}/entry_points.txt +1 -0
  117. stravinsky_claude_assets/HOOKS_INTEGRATION.md +316 -0
  118. stravinsky_claude_assets/agents/HOOKS.md +437 -0
  119. stravinsky_claude_assets/agents/code-reviewer.md +210 -0
  120. stravinsky_claude_assets/agents/comment_checker.md +580 -0
  121. stravinsky_claude_assets/agents/debugger.md +254 -0
  122. stravinsky_claude_assets/agents/delphi.md +495 -0
  123. stravinsky_claude_assets/agents/dewey.md +248 -0
  124. stravinsky_claude_assets/agents/explore.md +1198 -0
  125. stravinsky_claude_assets/agents/frontend.md +472 -0
  126. stravinsky_claude_assets/agents/implementation-lead.md +164 -0
  127. stravinsky_claude_assets/agents/momus.md +464 -0
  128. stravinsky_claude_assets/agents/research-lead.md +141 -0
  129. stravinsky_claude_assets/agents/stravinsky.md +730 -0
  130. stravinsky_claude_assets/commands/delphi.md +9 -0
  131. stravinsky_claude_assets/commands/dewey.md +54 -0
  132. stravinsky_claude_assets/commands/git-master.md +112 -0
  133. stravinsky_claude_assets/commands/index.md +49 -0
  134. stravinsky_claude_assets/commands/publish.md +86 -0
  135. stravinsky_claude_assets/commands/review.md +73 -0
  136. stravinsky_claude_assets/commands/str/agent_cancel.md +70 -0
  137. stravinsky_claude_assets/commands/str/agent_list.md +56 -0
  138. stravinsky_claude_assets/commands/str/agent_output.md +92 -0
  139. stravinsky_claude_assets/commands/str/agent_progress.md +74 -0
  140. stravinsky_claude_assets/commands/str/agent_retry.md +94 -0
  141. stravinsky_claude_assets/commands/str/cancel.md +51 -0
  142. stravinsky_claude_assets/commands/str/clean.md +97 -0
  143. stravinsky_claude_assets/commands/str/continue.md +38 -0
  144. stravinsky_claude_assets/commands/str/index.md +199 -0
  145. stravinsky_claude_assets/commands/str/list_watchers.md +96 -0
  146. stravinsky_claude_assets/commands/str/search.md +205 -0
  147. stravinsky_claude_assets/commands/str/start_filewatch.md +136 -0
  148. stravinsky_claude_assets/commands/str/stats.md +71 -0
  149. stravinsky_claude_assets/commands/str/stop_filewatch.md +89 -0
  150. stravinsky_claude_assets/commands/str/unwatch.md +42 -0
  151. stravinsky_claude_assets/commands/str/watch.md +45 -0
  152. stravinsky_claude_assets/commands/strav.md +53 -0
  153. stravinsky_claude_assets/commands/stravinsky.md +292 -0
  154. stravinsky_claude_assets/commands/verify.md +60 -0
  155. stravinsky_claude_assets/commands/version.md +5 -0
  156. stravinsky_claude_assets/hooks/README.md +248 -0
  157. stravinsky_claude_assets/hooks/comment_checker.py +193 -0
  158. stravinsky_claude_assets/hooks/context.py +38 -0
  159. stravinsky_claude_assets/hooks/context_monitor.py +153 -0
  160. stravinsky_claude_assets/hooks/dependency_tracker.py +73 -0
  161. stravinsky_claude_assets/hooks/edit_recovery.py +46 -0
  162. stravinsky_claude_assets/hooks/execution_state_tracker.py +68 -0
  163. stravinsky_claude_assets/hooks/notification_hook.py +103 -0
  164. stravinsky_claude_assets/hooks/notification_hook_v2.py +96 -0
  165. stravinsky_claude_assets/hooks/parallel_execution.py +241 -0
  166. stravinsky_claude_assets/hooks/parallel_reinforcement.py +106 -0
  167. stravinsky_claude_assets/hooks/parallel_reinforcement_v2.py +112 -0
  168. stravinsky_claude_assets/hooks/pre_compact.py +123 -0
  169. stravinsky_claude_assets/hooks/ralph_loop.py +173 -0
  170. stravinsky_claude_assets/hooks/session_recovery.py +263 -0
  171. stravinsky_claude_assets/hooks/stop_hook.py +89 -0
  172. stravinsky_claude_assets/hooks/stravinsky_metrics.py +164 -0
  173. stravinsky_claude_assets/hooks/stravinsky_mode.py +146 -0
  174. stravinsky_claude_assets/hooks/subagent_stop.py +98 -0
  175. stravinsky_claude_assets/hooks/todo_continuation.py +111 -0
  176. stravinsky_claude_assets/hooks/todo_delegation.py +96 -0
  177. stravinsky_claude_assets/hooks/tool_messaging.py +281 -0
  178. stravinsky_claude_assets/hooks/truncator.py +23 -0
  179. stravinsky_claude_assets/rules/deployment_safety.md +51 -0
  180. stravinsky_claude_assets/rules/integration_wiring.md +89 -0
  181. stravinsky_claude_assets/rules/pypi_deployment.md +220 -0
  182. stravinsky_claude_assets/rules/stravinsky_orchestrator.md +32 -0
  183. stravinsky_claude_assets/settings.json +152 -0
  184. stravinsky_claude_assets/skills/chrome-devtools/SKILL.md +81 -0
  185. stravinsky_claude_assets/skills/sqlite/SKILL.md +77 -0
  186. stravinsky_claude_assets/skills/supabase/SKILL.md +74 -0
  187. stravinsky_claude_assets/task_dependencies.json +34 -0
  188. stravinsky-0.2.67.dist-info/METADATA +0 -284
  189. stravinsky-0.2.67.dist-info/RECORD +0 -76
  190. {stravinsky-0.2.67.dist-info → stravinsky-0.4.66.dist-info}/WHEEL +0 -0
@@ -0,0 +1,75 @@
1
+ import asyncio
2
+ import json
3
+ import logging
4
+ import os
5
+ import socket
6
+ import time
7
+ from dataclasses import asdict, dataclass
8
+ from datetime import datetime
9
+ from typing import Any
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ SOCKET_PATH = "/tmp/stravinsky.sock"
14
+
15
+ @dataclass
16
+ class LogMessage:
17
+ agent_id: str
18
+ type: str # stdout, stderr, event, lifecycle
19
+ content: str
20
+ timestamp: str
21
+
22
+ class MuxClient:
23
+ def __init__(self, agent_id: str):
24
+ self.agent_id = agent_id
25
+ self._socket: socket.socket | None = None
26
+ self._connected = False
27
+
28
+ def connect(self):
29
+ try:
30
+ if not os.path.exists(SOCKET_PATH):
31
+ return
32
+
33
+ self._socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
34
+ self._socket.connect(SOCKET_PATH)
35
+ self._socket.setblocking(False)
36
+ self._connected = True
37
+ except Exception as e:
38
+ logger.debug(f"Failed to connect to mux: {e}")
39
+ self._connected = False
40
+
41
+ def log(self, content: str, stream: str = "stdout"):
42
+ if not self._connected:
43
+ self.connect()
44
+
45
+ if not self._connected or not self._socket:
46
+ return
47
+
48
+ msg = LogMessage(
49
+ agent_id=self.agent_id,
50
+ type=stream,
51
+ content=content,
52
+ timestamp=datetime.now().isoformat()
53
+ )
54
+
55
+ try:
56
+ data = json.dumps(asdict(msg)) + "\n"
57
+ self._socket.sendall(data.encode('utf-8'))
58
+ except (BrokenPipeError, OSError):
59
+ self._connected = False
60
+ self._socket.close()
61
+ self._socket = None
62
+
63
+ def close(self):
64
+ if self._socket:
65
+ self._socket.close()
66
+ self._connected = False
67
+
68
+ # Global instance for the main process
69
+ _global_mux: MuxClient | None = None
70
+
71
+ def get_mux(agent_id: str = "main") -> MuxClient:
72
+ global _global_mux
73
+ if _global_mux is None:
74
+ _global_mux = MuxClient(agent_id)
75
+ return _global_mux
@@ -10,12 +10,11 @@ import shutil
10
10
  import subprocess
11
11
  import sys
12
12
  from pathlib import Path
13
- from typing import Any, Dict, List, Optional
14
13
 
15
14
  from ..auth.token_store import TokenStore
16
15
 
17
16
 
18
- async def get_project_context(project_path: Optional[str] = None) -> str:
17
+ async def get_project_context(project_path: str | None = None) -> str:
19
18
  """
20
19
  Summarize project environment: Git status, local rules, and pending todos.
21
20
 
@@ -0,0 +1,406 @@
1
+ """Query classifier for intelligent search routing.
2
+
3
+ This module provides a fast, regex-based system that categorizes search queries
4
+ into four types: PATTERN (exact text matching), STRUCTURAL (AST-aware code structure),
5
+ SEMANTIC (conceptual/behavioral), and HYBRID (multi-modal).
6
+
7
+ It enables intelligent routing to the optimal search tool without LLM overhead.
8
+
9
+ Design Goals:
10
+ - Fast: <10ms classification per query
11
+ - No LLM calls: Pure regex-based detection (no API overhead)
12
+ - Confidence scoring: Return probability (0.0-1.0) for each category
13
+ - Fallback safe: Default to HYBRID when ambiguous
14
+ - Extensible: Easy to add new patterns/indicators
15
+ """
16
+
17
+ import logging
18
+ import re
19
+ from dataclasses import dataclass
20
+ from enum import Enum
21
+ from typing import Literal
22
+
23
+ # Module-level logger
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ class QueryCategory(Enum):
28
+ """Query classification categories."""
29
+
30
+ SEMANTIC = "semantic" # Conceptual, "what it does" queries
31
+ PATTERN = "pattern" # Exact text/regex matching
32
+ STRUCTURAL = "structural" # AST-aware code structure queries
33
+ HYBRID = "hybrid" # Multi-modal search recommended
34
+
35
+
36
+ @dataclass
37
+ class QueryClassification:
38
+ """Result of query classification.
39
+
40
+ Attributes:
41
+ category: The classified query category (SEMANTIC, PATTERN, STRUCTURAL, HYBRID)
42
+ confidence: Confidence score from 0.0 (low) to 1.0 (high)
43
+ indicators: List of matched patterns/reasons that led to this classification
44
+ suggested_tool: The recommended search tool to use
45
+ - "grep_search" for PATTERN queries
46
+ - "ast_grep_search" for STRUCTURAL queries
47
+ - "semantic_search" for SEMANTIC queries
48
+ - "enhanced_search" for HYBRID queries
49
+ reasoning: Human-readable explanation of the classification
50
+ """
51
+
52
+ category: QueryCategory
53
+ confidence: float # 0.0-1.0
54
+ indicators: list[str] # Matched patterns/reasons
55
+ suggested_tool: Literal[
56
+ "semantic_search", "grep_search", "ast_grep_search", "enhanced_search"
57
+ ]
58
+ reasoning: str # Human-readable explanation
59
+
60
+
61
+ # Phase 1: Exact Pattern Detection (High Confidence)
62
+ # Triggered when query contains quoted strings, exact identifiers with code syntax,
63
+ # file paths, regular expressions, or known constant patterns.
64
+ # Format: (regex_pattern, indicator_name)
65
+ PATTERN_INDICATORS = [
66
+ (r'\bgrep\b', 'explicit_grep'), # Explicit "grep" in query
67
+ (r'["\'][\w_()\.]+["\']', 'quoted_identifier'), # Quoted identifiers like "authenticate()" or 'API_KEY'
68
+ (r'\b\w+\(\)', 'function_call'), # Function calls with () like authenticate()
69
+ (r'[\w_]+\.[\w_]+', 'dot_notation'), # Dot notation (Class.method) like database.query()
70
+ (r'[\w/]+\.\w{2,4}$', 'file_path'), # File paths with extension
71
+ (r'/.*?/', 'regex_pattern'), # Regex patterns
72
+ (r'\b[A-Z_]{4,}\b', 'constant_name'), # CONSTANT_NAMES (4+ uppercase chars)
73
+ ]
74
+
75
+ # Phase 2: Structural Detection (High Confidence)
76
+ # Triggered when query contains AST keywords, structural relationships,
77
+ # or code structure terms.
78
+ # Format: (regex_pattern, indicator_name)
79
+ STRUCTURAL_INDICATORS = [
80
+ (r'\b(class|function|method|async|interface)\b', 'ast_keyword'), # AST keywords
81
+ (r'\b(inherits?|inheriting)\b', 'inheritance'), # Inheritance
82
+ (r'\b(extends?|extending)\b', 'extends'), # Extension
83
+ (r'\b(implements?|implementing)\b', 'implements'), # Implementation
84
+ (r'\b(overrides?|overriding)\b', 'override'), # Override
85
+ (r'\b(decorated?)\s+(with|by)\b', 'decorator_pattern'), # Decorator patterns
86
+ (r'\@\w+', 'decorator_syntax'), # Decorator syntax
87
+ (r'\b(definition|declaration|signature)\b', 'code_structure'), # Code structure terms
88
+ ]
89
+
90
+ # Phase 3: Conceptual Detection (Medium-High Confidence)
91
+ # Triggered when query contains intent verbs, how/why/where questions,
92
+ # design patterns, conceptual nouns, or cross-cutting concerns.
93
+ # Format: (regex_pattern, indicator_name)
94
+ SEMANTIC_INDICATORS = [
95
+ (r'\bhow\s+(?:does|is|are)', 'how'), # How questions (non-capturing group)
96
+ (r'\bwhy\s+(?:does|is|are)', 'why'), # Why questions (non-capturing group)
97
+ (r'\bwhere\s+(?:does|is|are)', 'where'), # Where questions (non-capturing group)
98
+ (r'\b(handles?|manages?|processes?|validates?|validated?|transforms?)\b', 'intent'), # Intent verbs
99
+ (r'\b(logic|mechanism|strategy|approach|workflow|implementation)\b', 'conceptual'), # Conceptual nouns
100
+ (r'\b(patterns?|anti-patterns?)\b', 'design_pattern'), # Design patterns
101
+ (r'\b(authentication|authorization|caching|logging|error handling|middleware)\b', 'cross_cutting'), # Cross-cutting
102
+ (r'\bfind\s+(all\s+)?(code|places|instances|implementations)\s+that\b', 'find_pattern'), # Find code pattern
103
+ ]
104
+
105
+ # Phase 4: Hybrid Detection (Medium Confidence)
106
+ # Triggered when query contains multiple concepts, both exact + conceptual,
107
+ # broad scopes, or vague qualifiers.
108
+ # Format: (regex_pattern, indicator_name)
109
+ HYBRID_INDICATORS = [
110
+ (r'\s+(and|then|also|plus|with)\s+', 'conjunction'), # Conjunctions
111
+ (r'\b(across|throughout|in all|system-wide)\b', 'broad_scope'), # Broad scopes
112
+ (r'\b(similar|related|like|kind of|type of)\b', 'vague_qualifier'), # Vague qualifiers
113
+ (r'\b(all|every|any)\s+\w+\s+(that|which|where)\b', 'broad_quantifier'), # Broad quantifiers
114
+ ]
115
+
116
+ # Tool routing based on category
117
+ TOOL_ROUTING = {
118
+ QueryCategory.PATTERN: "grep_search",
119
+ QueryCategory.STRUCTURAL: "ast_grep_search",
120
+ QueryCategory.SEMANTIC: "semantic_search",
121
+ QueryCategory.HYBRID: "enhanced_search",
122
+ }
123
+
124
+
125
+ def classify_query(query: str) -> QueryClassification:
126
+ """Classify a search query into one of four categories.
127
+
128
+ This function analyzes a search query using regex-based pattern matching
129
+ to determine its type (PATTERN, STRUCTURAL, SEMANTIC, or HYBRID) and
130
+ recommends the most appropriate search tool.
131
+
132
+ The classification process has 4 phases:
133
+ 1. Pattern Detection: Looks for exact identifiers, quoted strings, file paths
134
+ 2. Structural Detection: Looks for AST keywords (class, function, etc.)
135
+ 3. Conceptual Detection: Looks for intent verbs and semantic concepts
136
+ 4. Hybrid Detection: Looks for conjunctions and broad scopes
137
+ 5. Fallback: Defaults to HYBRID with 0.5 confidence if no strong match
138
+
139
+ Args:
140
+ query: Natural language search query (e.g., "Find authenticate()" or
141
+ "Where is authentication handled?")
142
+
143
+ Returns:
144
+ QueryClassification object containing:
145
+ - category: One of SEMANTIC, PATTERN, STRUCTURAL, HYBRID
146
+ - confidence: Score from 0.0 to 1.0 (capped at 0.95, never 1.0)
147
+ - indicators: List of matched pattern names
148
+ - suggested_tool: Recommended tool (grep_search, ast_grep_search,
149
+ semantic_search, or enhanced_search)
150
+ - reasoning: Human-readable explanation
151
+
152
+ Examples:
153
+ >>> result = classify_query("Find all calls to authenticate()")
154
+ >>> result.category
155
+ <QueryCategory.PATTERN: 'pattern'>
156
+ >>> result.confidence
157
+ 0.9
158
+ >>> result.suggested_tool
159
+ 'grep_search'
160
+
161
+ >>> result = classify_query("Where is authentication handled?")
162
+ >>> result.category
163
+ <QueryCategory.SEMANTIC: 'semantic'>
164
+ >>> result.confidence
165
+ 0.85
166
+ >>> result.suggested_tool
167
+ 'semantic_search'
168
+
169
+ >>> result = classify_query("Find class definitions inheriting from Base")
170
+ >>> result.category
171
+ <QueryCategory.STRUCTURAL: 'structural'>
172
+ >>> result.confidence
173
+ 0.95
174
+ >>> result.suggested_tool
175
+ 'ast_grep_search'
176
+
177
+ Performance:
178
+ - Target: <10ms per classification
179
+ - Uses only pure Python stdlib (re module)
180
+ - No external dependencies or API calls
181
+ """
182
+ try:
183
+ # Input validation
184
+ if not query or not isinstance(query, str):
185
+ return QueryClassification(
186
+ category=QueryCategory.HYBRID,
187
+ confidence=0.5,
188
+ indicators=["invalid_input"],
189
+ suggested_tool="enhanced_search",
190
+ reasoning="Invalid or empty query, using safe default",
191
+ )
192
+
193
+ # Normalize query
194
+ query_normalized = query.strip()
195
+ if len(query_normalized) < 3:
196
+ return QueryClassification(
197
+ category=QueryCategory.HYBRID,
198
+ confidence=0.5,
199
+ indicators=["too_short"],
200
+ suggested_tool="enhanced_search",
201
+ reasoning="Query too short for accurate classification",
202
+ )
203
+
204
+ query_lower = query_normalized.lower()
205
+
206
+ # Phase 1: Pattern Detection (use original case for case-sensitive patterns)
207
+ pattern_matches = []
208
+ pattern_indicators = []
209
+ for pattern, indicator_name in PATTERN_INDICATORS:
210
+ # Case-insensitive for 'explicit_grep', case-sensitive for others (CONSTANTS, etc.)
211
+ query_to_match = query_lower if indicator_name == 'explicit_grep' else query_normalized
212
+ if re.search(pattern, query_to_match):
213
+ pattern_matches.append(pattern)
214
+ pattern_indicators.append(indicator_name)
215
+
216
+ # Phase 2: Structural Detection
217
+ structural_matches = []
218
+ structural_indicators = []
219
+ for pattern, indicator_name in STRUCTURAL_INDICATORS:
220
+ if re.search(pattern, query_lower):
221
+ structural_matches.append(pattern)
222
+ structural_indicators.append(indicator_name)
223
+
224
+ # Phase 3: Semantic Detection
225
+ semantic_matches = []
226
+ semantic_indicators = []
227
+ for pattern, indicator_name in SEMANTIC_INDICATORS:
228
+ match = re.search(pattern, query_lower)
229
+ if match:
230
+ semantic_matches.append(pattern)
231
+ # Use captured group (matched word) if available, else use indicator name
232
+ matched_word = match.group(1) if match.groups() else indicator_name
233
+ semantic_indicators.append(matched_word if matched_word else indicator_name)
234
+
235
+ # Phase 4: Hybrid Detection
236
+ hybrid_matches = []
237
+ hybrid_indicators = []
238
+ for pattern, indicator_name in HYBRID_INDICATORS:
239
+ match = re.search(pattern, query_lower)
240
+ if match:
241
+ hybrid_matches.append(pattern)
242
+ # Use captured group (matched word) if available, else use indicator name
243
+ matched_word = match.group(1) if match.groups() else indicator_name
244
+ hybrid_indicators.append(matched_word if matched_word else indicator_name)
245
+
246
+ # Confidence Scoring
247
+ # Base scores per match:
248
+ # - PATTERN: 0.50 base + 0.45 bonus for high-value patterns = 0.95 max
249
+ # - STRUCTURAL: 0.95 (single AST keyword should be high confidence)
250
+ # - SEMANTIC: 0.95 (single intent/concept should be high confidence)
251
+ # - HYBRID: 0.40 (multi-modal indicators)
252
+ # Note: Scores capped at 0.95 max
253
+
254
+ # Apply bonus for high-value patterns (CONSTANTS, quoted identifiers, explicit grep)
255
+ pattern_score = len(pattern_matches) * 0.50
256
+ if pattern_matches:
257
+ # Check if query contains CONSTANTS (4+ uppercase), quoted strings, or explicit grep
258
+ if (re.search(r'\b[A-Z_]{4,}\b', query_normalized) or
259
+ re.search(r'["\'][\w_()\.]+["\']', query_normalized) or
260
+ re.search(r'\bgrep\b', query_lower)):
261
+ pattern_score += 0.45 # Bonus to reach 0.95
262
+
263
+ scores = {
264
+ QueryCategory.PATTERN: pattern_score,
265
+ QueryCategory.STRUCTURAL: len(structural_matches) * 0.95,
266
+ QueryCategory.SEMANTIC: len(semantic_matches) * 0.95,
267
+ QueryCategory.HYBRID: len(hybrid_matches) * 0.40,
268
+ }
269
+
270
+ # HYBRID preference logic
271
+ # Exception: Don't boost if PATTERN has high-value matches (they take precedence)
272
+ has_high_value_pattern = (
273
+ pattern_matches and
274
+ (re.search(r'\b[A-Z_]{4,}\b', query_normalized) or
275
+ re.search(r'["\'][\w_()\.]+["\']', query_normalized) or
276
+ re.search(r'\bgrep\b', query_lower))
277
+ )
278
+
279
+ # Count how many non-HYBRID categories have matches
280
+ categories_with_matches = sum([
281
+ 1 if pattern_matches else 0,
282
+ 1 if structural_matches else 0,
283
+ 1 if semantic_matches else 0,
284
+ ])
285
+
286
+ # Boost HYBRID score based on type of HYBRID indicator and what categories match
287
+ # Exception: Don't boost if PATTERN has high-value matches (they take precedence)
288
+ if hybrid_matches and not has_high_value_pattern:
289
+ # Check if we have strong HYBRID signals
290
+ # Look for the actual captured words, not indicator names
291
+ broad_scope_words = ['across', 'throughout', 'in all', 'system-wide']
292
+ conjunction_words = ['and', 'then', 'also', 'plus', 'with']
293
+ vague_words = ['related', 'like'] # Strong vague qualifiers (but not "similar" with design patterns)
294
+ has_broad_scope = any(word in str(hybrid_indicators).lower() for word in broad_scope_words)
295
+ has_conjunction = any(word in hybrid_indicators for word in conjunction_words)
296
+ has_vague = any(word in hybrid_indicators for word in vague_words)
297
+
298
+ # Boost to 0.95 if:
299
+ # 1. Multiple categories match (PATTERN+SEMANTIC, STRUCTURAL+SEMANTIC, etc.), OR
300
+ # 2. Broad scope, conjunction, or vague qualifiers (strong HYBRID signals)
301
+ if categories_with_matches >= 2 or has_broad_scope or has_conjunction or has_vague:
302
+ scores[QueryCategory.HYBRID] = 0.95
303
+ # Or if PATTERN or STRUCTURAL matches (even with just 1), boost slightly
304
+ elif pattern_matches or structural_matches:
305
+ scores[QueryCategory.HYBRID] = 0.90
306
+ # For SEMANTIC + "similar" only: don't boost above, handled by tie-breaking
307
+
308
+ # Find maximum score
309
+ max_score = max(scores.values())
310
+
311
+ # Fallback to HYBRID if no matches
312
+ if max_score == 0:
313
+ result = QueryClassification(
314
+ category=QueryCategory.HYBRID,
315
+ confidence=0.5,
316
+ indicators=[],
317
+ suggested_tool="enhanced_search",
318
+ reasoning="No clear indicators found, using multi-modal search",
319
+ )
320
+ logger.debug(
321
+ f"QUERY-CLASSIFY: query='{query_normalized[:50]}...' "
322
+ f"category={result.category.value} "
323
+ f"confidence={result.confidence:.2f} "
324
+ f"tool={result.suggested_tool}"
325
+ )
326
+ return result
327
+
328
+ # Find all categories with maximum score (potential ties)
329
+ winners = [cat for cat, score in scores.items() if score == max_score]
330
+
331
+ # Tie-breaking logic
332
+ if len(winners) > 1:
333
+ confidence = min(max_score, 0.95)
334
+ # Prefer PATTERN if it has high-value matches (CONSTANTS, quoted strings, explicit grep)
335
+ if QueryCategory.PATTERN in winners and has_high_value_pattern:
336
+ category = QueryCategory.PATTERN
337
+ # Prefer SEMANTIC if it has design pattern indicators (semantic concept wins over vague "similar")
338
+ elif QueryCategory.SEMANTIC in winners and any('pattern' in str(ind).lower() for ind in semantic_indicators):
339
+ category = QueryCategory.SEMANTIC
340
+ else:
341
+ # Otherwise use HYBRID for mixed queries
342
+ category = QueryCategory.HYBRID
343
+ else:
344
+ confidence = min(max_score, 0.95)
345
+ category = winners[0]
346
+
347
+ # Gather all indicators for reporting (use specific names)
348
+ all_indicators = []
349
+ if pattern_indicators:
350
+ all_indicators.extend(pattern_indicators)
351
+ if structural_indicators:
352
+ all_indicators.extend(structural_indicators)
353
+ if semantic_indicators:
354
+ all_indicators.extend(semantic_indicators)
355
+ if hybrid_indicators:
356
+ all_indicators.extend(hybrid_indicators)
357
+
358
+ # Generate reasoning
359
+ reasoning_parts = []
360
+ if category == QueryCategory.PATTERN:
361
+ reasoning_parts.append(
362
+ "Query contains exact identifiers or code syntax"
363
+ )
364
+ elif category == QueryCategory.STRUCTURAL:
365
+ reasoning_parts.append(
366
+ "Query requires AST-level understanding of code structure"
367
+ )
368
+ elif category == QueryCategory.SEMANTIC:
369
+ reasoning_parts.append(
370
+ "Query asks about conceptual logic or behavior"
371
+ )
372
+ elif category == QueryCategory.HYBRID:
373
+ reasoning_parts.append(
374
+ "Query combines multiple search approaches or is ambiguous"
375
+ )
376
+
377
+ reasoning = "; ".join(reasoning_parts)
378
+
379
+ result = QueryClassification(
380
+ category=category,
381
+ confidence=confidence,
382
+ indicators=all_indicators,
383
+ suggested_tool=TOOL_ROUTING[category],
384
+ reasoning=reasoning,
385
+ )
386
+
387
+ # Log classification for analytics
388
+ logger.debug(
389
+ f"QUERY-CLASSIFY: query='{query_normalized[:50]}...' "
390
+ f"category={result.category.value} "
391
+ f"confidence={result.confidence:.2f} "
392
+ f"tool={result.suggested_tool}"
393
+ )
394
+
395
+ return result
396
+
397
+ except Exception as e:
398
+ # Safe fallback on any error
399
+ logger.exception(f"Error classifying query: {e}")
400
+ return QueryClassification(
401
+ category=QueryCategory.HYBRID,
402
+ confidence=0.5,
403
+ indicators=["error"],
404
+ suggested_tool="enhanced_search",
405
+ reasoning=f"Classification error: {str(e)}, using safe default",
406
+ )
@@ -0,0 +1,84 @@
1
+ import os
2
+ from pathlib import Path
3
+ from typing import Optional
4
+ from mcp_bridge.utils.truncation import truncate_output, TruncationStrategy
5
+
6
+ from mcp_bridge.utils.cache import IOCache
7
+
8
+ async def read_file(
9
+ path: str,
10
+ offset: int = 0,
11
+ limit: Optional[int] = None,
12
+ max_chars: int = 20000
13
+ ) -> str:
14
+ """
15
+ Read the contents of a file with smart truncation and log-awareness.
16
+ """
17
+ # USER-VISIBLE NOTIFICATION
18
+ import sys
19
+ print(f"📖 READ: {path} (offset={offset}, limit={limit})", file=sys.stderr)
20
+
21
+ cache = IOCache.get_instance()
22
+ cache_key = f"read_file:{os.path.realpath(path)}:{offset}:{limit}:{max_chars}"
23
+
24
+ cached_result = cache.get(cache_key)
25
+ if cached_result:
26
+ return cached_result
27
+
28
+ file_path = Path(path)
29
+ if not file_path.exists():
30
+ return f"Error: File not found: {path}"
31
+
32
+ if not file_path.is_file():
33
+ return f"Error: Path is not a file: {path}"
34
+
35
+ try:
36
+ # Detect log files
37
+ is_log = file_path.suffix.lower() in (".log", ".out", ".err")
38
+
39
+ # Read lines
40
+ with open(file_path, "r", encoding="utf-8", errors="replace") as f:
41
+ lines = f.readlines()
42
+
43
+ total_lines = len(lines)
44
+
45
+ # Default behavior for log files if no limit/offset specified
46
+ if is_log and limit is None and offset == 0 and total_lines > 100:
47
+ # Default to last 100 lines for large logs
48
+ offset = max(0, total_lines - 100)
49
+ limit = 100
50
+ strategy = TruncationStrategy.TAIL
51
+ guidance = "Log file detected. Reading last 100 lines by default."
52
+ else:
53
+ strategy = TruncationStrategy.MIDDLE
54
+ guidance = None
55
+
56
+ # Apply line-based filtering
57
+ start = offset
58
+ end = total_lines
59
+ if limit is not None:
60
+ end = start + limit
61
+
62
+ selected_lines = lines[start:end]
63
+ content = "".join(selected_lines)
64
+
65
+ # Apply character-based truncation (universal cap)
66
+ result = truncate_output(
67
+ content,
68
+ limit=max_chars,
69
+ strategy=strategy,
70
+ custom_guidance=guidance
71
+ )
72
+
73
+ # If truncate_output didn't add guidance (because content < max_chars)
74
+ # but we have log-based guidance, add it manually
75
+ if guidance and guidance not in result:
76
+ result = f"{result}\n\n[{guidance}]"
77
+
78
+ # Cache for 5 seconds
79
+ cache.set(cache_key, result)
80
+
81
+ return result
82
+
83
+ except Exception as e:
84
+ return f"Error reading file {path}: {str(e)}"
@@ -0,0 +1,45 @@
1
+ import os
2
+ from pathlib import Path
3
+ from mcp_bridge.utils.cache import IOCache
4
+
5
+ async def replace(
6
+ path: str,
7
+ old_string: str,
8
+ new_string: str,
9
+ instruction: str,
10
+ expected_replacements: int = 1
11
+ ) -> str:
12
+ """
13
+ Replace text in a file and invalidate cache.
14
+ """
15
+ # USER-VISIBLE NOTIFICATION
16
+ import sys
17
+ print(f"🔄 REPLACE: {path} (instruction: {instruction})", file=sys.stderr)
18
+
19
+ file_path = Path(path)
20
+ if not file_path.exists():
21
+ return f"Error: File not found: {path}"
22
+
23
+ try:
24
+ content = file_path.read_text(encoding="utf-8")
25
+
26
+ # Check occurrence count
27
+ count = content.count(old_string)
28
+ if count == 0:
29
+ return f"Error: Could not find exact match for old_string in {path}"
30
+
31
+ if count != expected_replacements:
32
+ return f"Error: Found {count} occurrences of old_string, but expected {expected_replacements} in {path}"
33
+
34
+ # Perform replacement
35
+ new_content = content.replace(old_string, new_string)
36
+ file_path.write_text(new_content, encoding="utf-8")
37
+
38
+ # Invalidate cache
39
+ cache = IOCache.get_instance()
40
+ cache.invalidate_path(str(file_path))
41
+
42
+ return f"Successfully modified file: {path} ({count} replacements)."
43
+
44
+ except Exception as e:
45
+ return f"Error modifying file {path}: {str(e)}"
@@ -0,0 +1,38 @@
1
+ import os
2
+ from mcp_bridge.utils.cache import IOCache
3
+ from mcp_bridge.utils.process import async_execute
4
+
5
+ async def run_shell_command(command: str, description: str, dir_path: str = ".") -> str:
6
+ """
7
+ Execute a shell command and invalidate cache if it looks like a write.
8
+ """
9
+ # USER-VISIBLE NOTIFICATION
10
+ import sys
11
+ print(f"🐚 BASH: {command} ({description})", file=sys.stderr)
12
+
13
+ try:
14
+ # Run command asynchronously
15
+ result = await async_execute(command, cwd=dir_path, timeout=300)
16
+
17
+ # Check if it looks like a write command (simplistic heuristic)
18
+ write_keywords = ["git commit", "git push", "rm ", "mv ", "cp ", "touch ", "> ", ">> ", "sed ", "chmod "]
19
+ is_write = any(kw in command for kw in write_keywords)
20
+
21
+ if is_write:
22
+ # Broad invalidation for write commands
23
+ cache = IOCache.get_instance()
24
+ # If we're in a specific dir, invalidate that dir
25
+ cache.invalidate_path(os.path.abspath(dir_path))
26
+
27
+ # Format output
28
+ output = []
29
+ output.append(f"Command: {command}")
30
+ output.append(f"Directory: {dir_path}")
31
+ output.append(f"Stdout: {result.stdout}")
32
+ output.append(f"Stderr: {result.stderr}")
33
+ output.append(f"Exit Code: {result.returncode}")
34
+
35
+ return "\n".join(output)
36
+
37
+ except Exception as e:
38
+ return f"Error executing command: {str(e)}"