stravinsky 0.2.67__py3-none-any.whl → 0.4.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of stravinsky might be problematic. Click here for more details.
- mcp_bridge/__init__.py +1 -1
- mcp_bridge/auth/token_store.py +113 -11
- mcp_bridge/config/MANIFEST_SCHEMA.md +305 -0
- mcp_bridge/config/README.md +276 -0
- mcp_bridge/config/hook_config.py +249 -0
- mcp_bridge/config/hooks_manifest.json +138 -0
- mcp_bridge/config/rate_limits.py +222 -0
- mcp_bridge/config/skills_manifest.json +128 -0
- mcp_bridge/hooks/__init__.py +8 -3
- mcp_bridge/hooks/manager.py +8 -0
- mcp_bridge/hooks/tool_messaging.py +113 -10
- mcp_bridge/notifications.py +151 -0
- mcp_bridge/server.py +202 -48
- mcp_bridge/server_tools.py +440 -0
- mcp_bridge/tools/__init__.py +22 -18
- mcp_bridge/tools/agent_manager.py +197 -28
- mcp_bridge/tools/code_search.py +16 -2
- mcp_bridge/tools/lsp/__init__.py +7 -0
- mcp_bridge/tools/lsp/manager.py +448 -0
- mcp_bridge/tools/lsp/tools.py +634 -151
- mcp_bridge/tools/model_invoke.py +186 -159
- mcp_bridge/tools/query_classifier.py +323 -0
- mcp_bridge/tools/semantic_search.py +3042 -0
- mcp_bridge/update_manager.py +589 -0
- mcp_bridge/update_manager_pypi.py +299 -0
- {stravinsky-0.2.67.dist-info → stravinsky-0.4.18.dist-info}/METADATA +209 -25
- {stravinsky-0.2.67.dist-info → stravinsky-0.4.18.dist-info}/RECORD +29 -17
- {stravinsky-0.2.67.dist-info → stravinsky-0.4.18.dist-info}/WHEEL +0 -0
- {stravinsky-0.2.67.dist-info → stravinsky-0.4.18.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
"""Query classifier for intelligent search routing.
|
|
2
|
+
|
|
3
|
+
This module provides a fast, regex-based system that categorizes search queries
|
|
4
|
+
into four types: PATTERN (exact text matching), STRUCTURAL (AST-aware code structure),
|
|
5
|
+
SEMANTIC (conceptual/behavioral), and HYBRID (multi-modal).
|
|
6
|
+
|
|
7
|
+
It enables intelligent routing to the optimal search tool without LLM overhead.
|
|
8
|
+
|
|
9
|
+
Design Goals:
|
|
10
|
+
- Fast: <10ms classification per query
|
|
11
|
+
- No LLM calls: Pure regex-based detection (no API overhead)
|
|
12
|
+
- Confidence scoring: Return probability (0.0-1.0) for each category
|
|
13
|
+
- Fallback safe: Default to HYBRID when ambiguous
|
|
14
|
+
- Extensible: Easy to add new patterns/indicators
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
import re
|
|
19
|
+
from dataclasses import dataclass
|
|
20
|
+
from enum import Enum
|
|
21
|
+
from typing import Literal
|
|
22
|
+
|
|
23
|
+
# Module-level logger
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class QueryCategory(Enum):
|
|
28
|
+
"""Query classification categories."""
|
|
29
|
+
|
|
30
|
+
SEMANTIC = "semantic" # Conceptual, "what it does" queries
|
|
31
|
+
PATTERN = "pattern" # Exact text/regex matching
|
|
32
|
+
STRUCTURAL = "structural" # AST-aware code structure queries
|
|
33
|
+
HYBRID = "hybrid" # Multi-modal search recommended
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class QueryClassification:
|
|
38
|
+
"""Result of query classification.
|
|
39
|
+
|
|
40
|
+
Attributes:
|
|
41
|
+
category: The classified query category (SEMANTIC, PATTERN, STRUCTURAL, HYBRID)
|
|
42
|
+
confidence: Confidence score from 0.0 (low) to 1.0 (high)
|
|
43
|
+
indicators: List of matched patterns/reasons that led to this classification
|
|
44
|
+
suggested_tool: The recommended search tool to use
|
|
45
|
+
- "grep_search" for PATTERN queries
|
|
46
|
+
- "ast_grep_search" for STRUCTURAL queries
|
|
47
|
+
- "semantic_search" for SEMANTIC queries
|
|
48
|
+
- "enhanced_search" for HYBRID queries
|
|
49
|
+
reasoning: Human-readable explanation of the classification
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
category: QueryCategory
|
|
53
|
+
confidence: float # 0.0-1.0
|
|
54
|
+
indicators: list[str] # Matched patterns/reasons
|
|
55
|
+
suggested_tool: Literal[
|
|
56
|
+
"semantic_search", "grep_search", "ast_grep_search", "enhanced_search"
|
|
57
|
+
]
|
|
58
|
+
reasoning: str # Human-readable explanation
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# Phase 1: Exact Pattern Detection (High Confidence)
|
|
62
|
+
# Triggered when query contains quoted strings, exact identifiers with code syntax,
|
|
63
|
+
# file paths, regular expressions, or known constant patterns.
|
|
64
|
+
PATTERN_INDICATORS = [
|
|
65
|
+
r'["\'][\w_]+["\']', # Quoted identifiers like "authenticate()" or 'API_KEY'
|
|
66
|
+
r'\b\w+\(\)', # Function calls with () like authenticate()
|
|
67
|
+
r'[\w_]+\.[\w_]+', # Dot notation (Class.method) like database.query()
|
|
68
|
+
r'[\w/]+\.\w{2,4}$', # File paths with extension
|
|
69
|
+
r'/.*?/', # Regex patterns
|
|
70
|
+
r'\b[A-Z_]{4,}\b', # CONSTANT_NAMES (4+ uppercase chars)
|
|
71
|
+
]
|
|
72
|
+
|
|
73
|
+
# Phase 2: Structural Detection (High Confidence)
|
|
74
|
+
# Triggered when query contains AST keywords, structural relationships,
|
|
75
|
+
# or code structure terms.
|
|
76
|
+
STRUCTURAL_INDICATORS = [
|
|
77
|
+
r'\b(class|function|method|async|interface)\b', # AST keywords
|
|
78
|
+
r'\b(inherits?|extends?|implements?|overrides?)\b', # Structural relationships
|
|
79
|
+
r'\b(decorated?)\s+(with|by)\b', # Decorator patterns
|
|
80
|
+
r'\@\w+', # Decorator syntax
|
|
81
|
+
r'\b(definition|declaration|signature)\b', # Code structure terms
|
|
82
|
+
]
|
|
83
|
+
|
|
84
|
+
# Phase 3: Conceptual Detection (Medium-High Confidence)
|
|
85
|
+
# Triggered when query contains intent verbs, how/why/where questions,
|
|
86
|
+
# design patterns, conceptual nouns, or cross-cutting concerns.
|
|
87
|
+
SEMANTIC_INDICATORS = [
|
|
88
|
+
r'\b(how|why|where)\s+(does|is|are)', # How/why/where questions
|
|
89
|
+
r'\b(handles?|manages?|processes?|validates?|transforms?)\b', # Intent verbs
|
|
90
|
+
r'\b(logic|mechanism|strategy|approach|workflow|implementation)\b', # Conceptual nouns
|
|
91
|
+
r'\b(pattern|anti-pattern)\b', # Design patterns
|
|
92
|
+
r'\b(authentication|authorization|caching|logging|error handling)\b', # Cross-cutting
|
|
93
|
+
r'\bfind\s+(all\s+)?(code|places|instances|implementations)\s+that\b', # Find code pattern
|
|
94
|
+
]
|
|
95
|
+
|
|
96
|
+
# Phase 4: Hybrid Detection (Medium Confidence)
|
|
97
|
+
# Triggered when query contains multiple concepts, both exact + conceptual,
|
|
98
|
+
# broad scopes, or vague qualifiers.
|
|
99
|
+
HYBRID_INDICATORS = [
|
|
100
|
+
r'\s+(and|then|also|plus|with)\s+', # Conjunctions
|
|
101
|
+
r'\b(across|throughout|in all|system-wide)\b', # Broad scopes
|
|
102
|
+
r'\b(similar|related|like|kind of|type of)\b', # Vague qualifiers
|
|
103
|
+
r'\b(all|every|any)\s+\w+\s+(that|which|where)\b', # Broad quantifiers
|
|
104
|
+
]
|
|
105
|
+
|
|
106
|
+
# Tool routing based on category
|
|
107
|
+
TOOL_ROUTING = {
|
|
108
|
+
QueryCategory.PATTERN: "grep_search",
|
|
109
|
+
QueryCategory.STRUCTURAL: "ast_grep_search",
|
|
110
|
+
QueryCategory.SEMANTIC: "semantic_search",
|
|
111
|
+
QueryCategory.HYBRID: "enhanced_search",
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def classify_query(query: str) -> QueryClassification:
|
|
116
|
+
"""Classify a search query into one of four categories.
|
|
117
|
+
|
|
118
|
+
This function analyzes a search query using regex-based pattern matching
|
|
119
|
+
to determine its type (PATTERN, STRUCTURAL, SEMANTIC, or HYBRID) and
|
|
120
|
+
recommends the most appropriate search tool.
|
|
121
|
+
|
|
122
|
+
The classification process has 4 phases:
|
|
123
|
+
1. Pattern Detection: Looks for exact identifiers, quoted strings, file paths
|
|
124
|
+
2. Structural Detection: Looks for AST keywords (class, function, etc.)
|
|
125
|
+
3. Conceptual Detection: Looks for intent verbs and semantic concepts
|
|
126
|
+
4. Hybrid Detection: Looks for conjunctions and broad scopes
|
|
127
|
+
5. Fallback: Defaults to HYBRID with 0.5 confidence if no strong match
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
query: Natural language search query (e.g., "Find authenticate()" or
|
|
131
|
+
"Where is authentication handled?")
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
QueryClassification object containing:
|
|
135
|
+
- category: One of SEMANTIC, PATTERN, STRUCTURAL, HYBRID
|
|
136
|
+
- confidence: Score from 0.0 to 1.0 (capped at 0.95, never 1.0)
|
|
137
|
+
- indicators: List of matched pattern names
|
|
138
|
+
- suggested_tool: Recommended tool (grep_search, ast_grep_search,
|
|
139
|
+
semantic_search, or enhanced_search)
|
|
140
|
+
- reasoning: Human-readable explanation
|
|
141
|
+
|
|
142
|
+
Examples:
|
|
143
|
+
>>> result = classify_query("Find all calls to authenticate()")
|
|
144
|
+
>>> result.category
|
|
145
|
+
<QueryCategory.PATTERN: 'pattern'>
|
|
146
|
+
>>> result.confidence
|
|
147
|
+
0.9
|
|
148
|
+
>>> result.suggested_tool
|
|
149
|
+
'grep_search'
|
|
150
|
+
|
|
151
|
+
>>> result = classify_query("Where is authentication handled?")
|
|
152
|
+
>>> result.category
|
|
153
|
+
<QueryCategory.SEMANTIC: 'semantic'>
|
|
154
|
+
>>> result.confidence
|
|
155
|
+
0.85
|
|
156
|
+
>>> result.suggested_tool
|
|
157
|
+
'semantic_search'
|
|
158
|
+
|
|
159
|
+
>>> result = classify_query("Find class definitions inheriting from Base")
|
|
160
|
+
>>> result.category
|
|
161
|
+
<QueryCategory.STRUCTURAL: 'structural'>
|
|
162
|
+
>>> result.confidence
|
|
163
|
+
0.95
|
|
164
|
+
>>> result.suggested_tool
|
|
165
|
+
'ast_grep_search'
|
|
166
|
+
|
|
167
|
+
Performance:
|
|
168
|
+
- Target: <10ms per classification
|
|
169
|
+
- Uses only pure Python stdlib (re module)
|
|
170
|
+
- No external dependencies or API calls
|
|
171
|
+
"""
|
|
172
|
+
try:
|
|
173
|
+
# Input validation
|
|
174
|
+
if not query or not isinstance(query, str):
|
|
175
|
+
return QueryClassification(
|
|
176
|
+
category=QueryCategory.HYBRID,
|
|
177
|
+
confidence=0.5,
|
|
178
|
+
indicators=["invalid_input"],
|
|
179
|
+
suggested_tool="enhanced_search",
|
|
180
|
+
reasoning="Invalid or empty query, using safe default",
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
# Normalize query
|
|
184
|
+
query_normalized = query.strip()
|
|
185
|
+
if len(query_normalized) < 3:
|
|
186
|
+
return QueryClassification(
|
|
187
|
+
category=QueryCategory.HYBRID,
|
|
188
|
+
confidence=0.5,
|
|
189
|
+
indicators=["too_short"],
|
|
190
|
+
suggested_tool="enhanced_search",
|
|
191
|
+
reasoning="Query too short for accurate classification",
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
query_lower = query_normalized.lower()
|
|
195
|
+
|
|
196
|
+
# Phase 1: Pattern Detection
|
|
197
|
+
pattern_matches = []
|
|
198
|
+
for pattern in PATTERN_INDICATORS:
|
|
199
|
+
if re.search(pattern, query_lower):
|
|
200
|
+
pattern_matches.append(pattern)
|
|
201
|
+
|
|
202
|
+
# Phase 2: Structural Detection
|
|
203
|
+
structural_matches = []
|
|
204
|
+
for pattern in STRUCTURAL_INDICATORS:
|
|
205
|
+
if re.search(pattern, query_lower):
|
|
206
|
+
structural_matches.append(pattern)
|
|
207
|
+
|
|
208
|
+
# Phase 3: Semantic Detection
|
|
209
|
+
semantic_matches = []
|
|
210
|
+
for pattern in SEMANTIC_INDICATORS:
|
|
211
|
+
if re.search(pattern, query_lower):
|
|
212
|
+
semantic_matches.append(pattern)
|
|
213
|
+
|
|
214
|
+
# Phase 4: Hybrid Detection
|
|
215
|
+
hybrid_matches = []
|
|
216
|
+
for pattern in HYBRID_INDICATORS:
|
|
217
|
+
if re.search(pattern, query_lower):
|
|
218
|
+
hybrid_matches.append(pattern)
|
|
219
|
+
|
|
220
|
+
# Confidence Scoring
|
|
221
|
+
# Score calculation:
|
|
222
|
+
# - Each pattern match: +0.15
|
|
223
|
+
# - Each structural match: +0.20
|
|
224
|
+
# - Each semantic match: +0.15
|
|
225
|
+
# - Each hybrid match: +0.10
|
|
226
|
+
scores = {
|
|
227
|
+
QueryCategory.PATTERN: len(pattern_matches) * 0.15,
|
|
228
|
+
QueryCategory.STRUCTURAL: len(structural_matches) * 0.20,
|
|
229
|
+
QueryCategory.SEMANTIC: len(semantic_matches) * 0.15,
|
|
230
|
+
QueryCategory.HYBRID: len(hybrid_matches) * 0.10,
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
# Find maximum score
|
|
234
|
+
max_score = max(scores.values())
|
|
235
|
+
|
|
236
|
+
# Fallback to HYBRID if no matches
|
|
237
|
+
if max_score == 0:
|
|
238
|
+
result = QueryClassification(
|
|
239
|
+
category=QueryCategory.HYBRID,
|
|
240
|
+
confidence=0.5,
|
|
241
|
+
indicators=[],
|
|
242
|
+
suggested_tool="enhanced_search",
|
|
243
|
+
reasoning="No clear indicators found, using multi-modal search",
|
|
244
|
+
)
|
|
245
|
+
logger.debug(
|
|
246
|
+
f"QUERY-CLASSIFY: query='{query_normalized[:50]}...' "
|
|
247
|
+
f"category={result.category.value} "
|
|
248
|
+
f"confidence={result.confidence:.2f} "
|
|
249
|
+
f"tool={result.suggested_tool}"
|
|
250
|
+
)
|
|
251
|
+
return result
|
|
252
|
+
|
|
253
|
+
# Find all categories with maximum score (potential ties)
|
|
254
|
+
winners = [cat for cat, score in scores.items() if score == max_score]
|
|
255
|
+
|
|
256
|
+
# If tie, use HYBRID
|
|
257
|
+
if len(winners) > 1:
|
|
258
|
+
confidence = min(max_score, 0.95)
|
|
259
|
+
category = QueryCategory.HYBRID
|
|
260
|
+
else:
|
|
261
|
+
confidence = min(max_score, 0.95)
|
|
262
|
+
category = winners[0]
|
|
263
|
+
|
|
264
|
+
# Gather all indicators for reporting
|
|
265
|
+
all_indicators = []
|
|
266
|
+
if pattern_matches:
|
|
267
|
+
all_indicators.append("pattern_match")
|
|
268
|
+
if structural_matches:
|
|
269
|
+
all_indicators.append("structural_match")
|
|
270
|
+
if semantic_matches:
|
|
271
|
+
all_indicators.append("semantic_match")
|
|
272
|
+
if hybrid_matches:
|
|
273
|
+
all_indicators.append("hybrid_match")
|
|
274
|
+
|
|
275
|
+
# Generate reasoning
|
|
276
|
+
reasoning_parts = []
|
|
277
|
+
if category == QueryCategory.PATTERN:
|
|
278
|
+
reasoning_parts.append(
|
|
279
|
+
"Query contains exact identifiers or code syntax"
|
|
280
|
+
)
|
|
281
|
+
elif category == QueryCategory.STRUCTURAL:
|
|
282
|
+
reasoning_parts.append(
|
|
283
|
+
"Query requires AST-level understanding of code structure"
|
|
284
|
+
)
|
|
285
|
+
elif category == QueryCategory.SEMANTIC:
|
|
286
|
+
reasoning_parts.append(
|
|
287
|
+
"Query asks about conceptual logic or behavior"
|
|
288
|
+
)
|
|
289
|
+
elif category == QueryCategory.HYBRID:
|
|
290
|
+
reasoning_parts.append(
|
|
291
|
+
"Query combines multiple search approaches or is ambiguous"
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
reasoning = "; ".join(reasoning_parts)
|
|
295
|
+
|
|
296
|
+
result = QueryClassification(
|
|
297
|
+
category=category,
|
|
298
|
+
confidence=confidence,
|
|
299
|
+
indicators=all_indicators,
|
|
300
|
+
suggested_tool=TOOL_ROUTING[category],
|
|
301
|
+
reasoning=reasoning,
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
# Log classification for analytics
|
|
305
|
+
logger.debug(
|
|
306
|
+
f"QUERY-CLASSIFY: query='{query_normalized[:50]}...' "
|
|
307
|
+
f"category={result.category.value} "
|
|
308
|
+
f"confidence={result.confidence:.2f} "
|
|
309
|
+
f"tool={result.suggested_tool}"
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
return result
|
|
313
|
+
|
|
314
|
+
except Exception as e:
|
|
315
|
+
# Safe fallback on any error
|
|
316
|
+
logger.exception(f"Error classifying query: {e}")
|
|
317
|
+
return QueryClassification(
|
|
318
|
+
category=QueryCategory.HYBRID,
|
|
319
|
+
confidence=0.5,
|
|
320
|
+
indicators=["error"],
|
|
321
|
+
suggested_tool="enhanced_search",
|
|
322
|
+
reasoning=f"Classification error: {str(e)}, using safe default",
|
|
323
|
+
)
|