tarang 4.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tarang/__init__.py +23 -0
- tarang/cli.py +1168 -0
- tarang/client/__init__.py +19 -0
- tarang/client/api_client.py +701 -0
- tarang/client/auth.py +178 -0
- tarang/context/__init__.py +41 -0
- tarang/context/bm25.py +218 -0
- tarang/context/chunker.py +984 -0
- tarang/context/graph.py +464 -0
- tarang/context/indexer.py +514 -0
- tarang/context/retriever.py +270 -0
- tarang/context/skeleton.py +282 -0
- tarang/context_collector.py +449 -0
- tarang/executor/__init__.py +6 -0
- tarang/executor/diff_apply.py +246 -0
- tarang/executor/linter.py +184 -0
- tarang/stream.py +1346 -0
- tarang/ui/__init__.py +7 -0
- tarang/ui/console.py +407 -0
- tarang/ui/diff_viewer.py +146 -0
- tarang/ui/formatter.py +1151 -0
- tarang/ui/keyboard.py +197 -0
- tarang/ws/__init__.py +14 -0
- tarang/ws/client.py +464 -0
- tarang/ws/executor.py +638 -0
- tarang/ws/handlers.py +590 -0
- tarang-4.4.0.dist-info/METADATA +102 -0
- tarang-4.4.0.dist-info/RECORD +31 -0
- tarang-4.4.0.dist-info/WHEEL +5 -0
- tarang-4.4.0.dist-info/entry_points.txt +2 -0
- tarang-4.4.0.dist-info/top_level.txt +1 -0
tarang/client/auth.py
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tarang Authentication - CLI login and token management.
|
|
3
|
+
|
|
4
|
+
Handles OAuth flow via browser and secure token storage.
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import json
|
|
10
|
+
import webbrowser
|
|
11
|
+
from http.server import BaseHTTPRequestHandler, HTTPServer
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Optional
|
|
14
|
+
from urllib.parse import parse_qs, urlparse
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
CONFIG_DIR = Path.home() / ".tarang"
|
|
18
|
+
CONFIG_FILE = CONFIG_DIR / "config.json"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class TarangAuth:
|
|
22
|
+
"""
|
|
23
|
+
Handles CLI authentication via browser OAuth flow.
|
|
24
|
+
|
|
25
|
+
Stores credentials securely in ~/.tarang/config.json
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, web_url: str = "https://devtarang.ai"):
|
|
29
|
+
self.web_url = web_url
|
|
30
|
+
self.token: Optional[str] = None
|
|
31
|
+
|
|
32
|
+
def load_credentials(self) -> Optional[dict]:
|
|
33
|
+
"""Load saved credentials from config file."""
|
|
34
|
+
if CONFIG_FILE.exists():
|
|
35
|
+
try:
|
|
36
|
+
return json.loads(CONFIG_FILE.read_text())
|
|
37
|
+
except (json.JSONDecodeError, IOError):
|
|
38
|
+
return None
|
|
39
|
+
return None
|
|
40
|
+
|
|
41
|
+
def save_credentials(self, **kwargs) -> None:
|
|
42
|
+
"""Save credentials to config file."""
|
|
43
|
+
CONFIG_DIR.mkdir(parents=True, exist_ok=True)
|
|
44
|
+
config = self.load_credentials() or {}
|
|
45
|
+
config.update(kwargs)
|
|
46
|
+
CONFIG_FILE.write_text(json.dumps(config, indent=2))
|
|
47
|
+
CONFIG_FILE.chmod(0o600) # Secure permissions
|
|
48
|
+
|
|
49
|
+
def get_token(self) -> Optional[str]:
|
|
50
|
+
"""Get saved auth token."""
|
|
51
|
+
creds = self.load_credentials()
|
|
52
|
+
return creds.get("token") if creds else None
|
|
53
|
+
|
|
54
|
+
def get_openrouter_key(self) -> Optional[str]:
|
|
55
|
+
"""Get saved OpenRouter API key."""
|
|
56
|
+
creds = self.load_credentials()
|
|
57
|
+
return creds.get("openrouter_key") if creds else None
|
|
58
|
+
|
|
59
|
+
def save_token(self, token: str) -> None:
|
|
60
|
+
"""Save auth token."""
|
|
61
|
+
self.save_credentials(token=token)
|
|
62
|
+
self.token = token
|
|
63
|
+
|
|
64
|
+
def save_openrouter_key(self, key: str) -> None:
|
|
65
|
+
"""Save OpenRouter API key."""
|
|
66
|
+
self.save_credentials(openrouter_key=key)
|
|
67
|
+
|
|
68
|
+
def clear_credentials(self) -> None:
|
|
69
|
+
"""Clear all saved credentials."""
|
|
70
|
+
if CONFIG_FILE.exists():
|
|
71
|
+
CONFIG_FILE.unlink()
|
|
72
|
+
|
|
73
|
+
async def login(self, callback_port: int = 54321) -> str:
|
|
74
|
+
"""
|
|
75
|
+
Start OAuth flow via browser.
|
|
76
|
+
|
|
77
|
+
Steps:
|
|
78
|
+
1. Open browser to auth page
|
|
79
|
+
2. Start local server to receive callback
|
|
80
|
+
3. Store token securely
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
The auth token
|
|
84
|
+
"""
|
|
85
|
+
# Create callback server
|
|
86
|
+
server = _CallbackServer(("localhost", callback_port))
|
|
87
|
+
|
|
88
|
+
# Open browser to auth URL
|
|
89
|
+
auth_url = f"{self.web_url}/auth/cli?callback=http://localhost:{callback_port}"
|
|
90
|
+
print(f"Opening browser for authentication...")
|
|
91
|
+
print(f"If browser doesn't open, visit: {auth_url}")
|
|
92
|
+
webbrowser.open(auth_url)
|
|
93
|
+
|
|
94
|
+
print("\nWaiting for authentication...")
|
|
95
|
+
print("Please log in with GitHub in your browser.")
|
|
96
|
+
|
|
97
|
+
# Wait for callback (timeout 5 min)
|
|
98
|
+
try:
|
|
99
|
+
token = await asyncio.wait_for(
|
|
100
|
+
server.wait_for_token(),
|
|
101
|
+
timeout=300
|
|
102
|
+
)
|
|
103
|
+
except asyncio.TimeoutError:
|
|
104
|
+
raise TimeoutError("Authentication timed out. Please try again.")
|
|
105
|
+
|
|
106
|
+
# Save and return token
|
|
107
|
+
self.save_token(token)
|
|
108
|
+
return token
|
|
109
|
+
|
|
110
|
+
def is_authenticated(self) -> bool:
|
|
111
|
+
"""Check if user is authenticated."""
|
|
112
|
+
return bool(self.get_token())
|
|
113
|
+
|
|
114
|
+
def has_openrouter_key(self) -> bool:
|
|
115
|
+
"""Check if OpenRouter key is configured."""
|
|
116
|
+
return bool(self.get_openrouter_key())
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class _CallbackServer:
|
|
120
|
+
"""Local HTTP server to receive OAuth callback."""
|
|
121
|
+
|
|
122
|
+
def __init__(self, address):
|
|
123
|
+
self.token: Optional[str] = None
|
|
124
|
+
self._received = asyncio.Event()
|
|
125
|
+
self.server = HTTPServer(address, self._make_handler())
|
|
126
|
+
self.server.timeout = 1 # Allow checking for cancellation
|
|
127
|
+
|
|
128
|
+
def _make_handler(self):
|
|
129
|
+
parent = self
|
|
130
|
+
|
|
131
|
+
class Handler(BaseHTTPRequestHandler):
|
|
132
|
+
def do_GET(self):
|
|
133
|
+
query = parse_qs(urlparse(self.path).query)
|
|
134
|
+
parent.token = query.get("token", [None])[0]
|
|
135
|
+
|
|
136
|
+
self.send_response(200)
|
|
137
|
+
self.send_header("Content-type", "text/html")
|
|
138
|
+
self.end_headers()
|
|
139
|
+
|
|
140
|
+
html = """
|
|
141
|
+
<!DOCTYPE html>
|
|
142
|
+
<html>
|
|
143
|
+
<head>
|
|
144
|
+
<title>Tarang - Authentication Successful</title>
|
|
145
|
+
<style>
|
|
146
|
+
body { font-family: -apple-system, sans-serif; text-align: center; padding-top: 50px; }
|
|
147
|
+
h1 { color: #10B981; }
|
|
148
|
+
</style>
|
|
149
|
+
</head>
|
|
150
|
+
<body>
|
|
151
|
+
<h1>Authentication Successful!</h1>
|
|
152
|
+
<p>You can close this window and return to the terminal.</p>
|
|
153
|
+
</body>
|
|
154
|
+
</html>
|
|
155
|
+
"""
|
|
156
|
+
self.wfile.write(html.encode())
|
|
157
|
+
|
|
158
|
+
if parent.token:
|
|
159
|
+
print("\nReceived CLI callback, completing login...")
|
|
160
|
+
parent._received.set()
|
|
161
|
+
else:
|
|
162
|
+
print("\nReceived CLI callback without token. Please retry login.")
|
|
163
|
+
|
|
164
|
+
def log_message(self, *args):
|
|
165
|
+
pass # Suppress HTTP logs
|
|
166
|
+
|
|
167
|
+
return Handler
|
|
168
|
+
|
|
169
|
+
async def wait_for_token(self) -> str:
|
|
170
|
+
"""Wait for token from callback."""
|
|
171
|
+
loop = asyncio.get_event_loop()
|
|
172
|
+
|
|
173
|
+
while not self._received.is_set():
|
|
174
|
+
# Handle one request (non-blocking)
|
|
175
|
+
await loop.run_in_executor(None, self.server.handle_request)
|
|
176
|
+
await asyncio.sleep(0.1)
|
|
177
|
+
|
|
178
|
+
return self.token
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tarang Context - Code indexing and retrieval system.
|
|
3
|
+
|
|
4
|
+
Provides:
|
|
5
|
+
- AST-based code chunking (tree-sitter)
|
|
6
|
+
- BM25 keyword search
|
|
7
|
+
- Symbol Graph (Code Knowledge Graph)
|
|
8
|
+
- Graph-augmented retrieval
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from tarang.context.skeleton import SkeletonGenerator, ProjectSkeleton
|
|
12
|
+
from tarang.context.chunker import Chunk, Chunker, SymbolInfo
|
|
13
|
+
from tarang.context.bm25 import BM25Index, SearchResult
|
|
14
|
+
from tarang.context.graph import SymbolGraph, SymbolNode
|
|
15
|
+
from tarang.context.retriever import ContextRetriever, RetrievalResult, create_retriever
|
|
16
|
+
from tarang.context.indexer import ProjectIndexer, IndexStats, index_project, get_retriever
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
# Skeleton (existing)
|
|
20
|
+
"SkeletonGenerator",
|
|
21
|
+
"ProjectSkeleton",
|
|
22
|
+
# Chunker
|
|
23
|
+
"Chunk",
|
|
24
|
+
"Chunker",
|
|
25
|
+
"SymbolInfo",
|
|
26
|
+
# BM25
|
|
27
|
+
"BM25Index",
|
|
28
|
+
"SearchResult",
|
|
29
|
+
# Graph
|
|
30
|
+
"SymbolGraph",
|
|
31
|
+
"SymbolNode",
|
|
32
|
+
# Retriever
|
|
33
|
+
"ContextRetriever",
|
|
34
|
+
"RetrievalResult",
|
|
35
|
+
"create_retriever",
|
|
36
|
+
# Indexer
|
|
37
|
+
"ProjectIndexer",
|
|
38
|
+
"IndexStats",
|
|
39
|
+
"index_project",
|
|
40
|
+
"get_retriever",
|
|
41
|
+
]
|
tarang/context/bm25.py
ADDED
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
"""
|
|
2
|
+
BM25 Index - Fast keyword-based search over code chunks.
|
|
3
|
+
|
|
4
|
+
Uses the Okapi BM25 algorithm for ranking code chunks by relevance.
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import pickle
|
|
9
|
+
import re
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Dict, List, Optional
|
|
13
|
+
|
|
14
|
+
from .chunker import Chunk
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class SearchResult:
|
|
19
|
+
"""A search result with chunk and score."""
|
|
20
|
+
chunk: Chunk
|
|
21
|
+
score: float
|
|
22
|
+
|
|
23
|
+
def __repr__(self) -> str:
|
|
24
|
+
return f"SearchResult({self.chunk.id}, score={self.score:.3f})"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class BM25Index:
|
|
28
|
+
"""
|
|
29
|
+
BM25 index for code chunk search.
|
|
30
|
+
|
|
31
|
+
Uses rank-bm25 library for efficient BM25 ranking.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self):
|
|
35
|
+
self._bm25 = None
|
|
36
|
+
self._chunks: List[Chunk] = []
|
|
37
|
+
self._chunk_map: Dict[str, Chunk] = {} # id -> chunk
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def is_empty(self) -> bool:
|
|
41
|
+
"""Check if index is empty."""
|
|
42
|
+
return len(self._chunks) == 0
|
|
43
|
+
|
|
44
|
+
def build(self, chunks: List[Chunk]) -> None:
|
|
45
|
+
"""
|
|
46
|
+
Build BM25 index from chunks.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
chunks: List of code chunks with tokens
|
|
50
|
+
"""
|
|
51
|
+
from rank_bm25 import BM25Okapi
|
|
52
|
+
|
|
53
|
+
self._chunks = chunks
|
|
54
|
+
self._chunk_map = {c.id: c for c in chunks}
|
|
55
|
+
|
|
56
|
+
# Build BM25 index from tokens
|
|
57
|
+
tokenized_corpus = [c.tokens for c in chunks]
|
|
58
|
+
self._bm25 = BM25Okapi(tokenized_corpus)
|
|
59
|
+
|
|
60
|
+
def add_chunks(self, new_chunks: List[Chunk]) -> None:
|
|
61
|
+
"""
|
|
62
|
+
Add chunks to existing index (requires rebuild).
|
|
63
|
+
|
|
64
|
+
For now, this rebuilds the entire index.
|
|
65
|
+
Future optimization: incremental BM25.
|
|
66
|
+
"""
|
|
67
|
+
# Merge chunks, replacing existing by ID
|
|
68
|
+
for chunk in new_chunks:
|
|
69
|
+
self._chunk_map[chunk.id] = chunk
|
|
70
|
+
|
|
71
|
+
self._chunks = list(self._chunk_map.values())
|
|
72
|
+
self.build(self._chunks)
|
|
73
|
+
|
|
74
|
+
def remove_chunks(self, chunk_ids: List[str]) -> None:
|
|
75
|
+
"""
|
|
76
|
+
Remove chunks from index (requires rebuild).
|
|
77
|
+
"""
|
|
78
|
+
for chunk_id in chunk_ids:
|
|
79
|
+
self._chunk_map.pop(chunk_id, None)
|
|
80
|
+
|
|
81
|
+
self._chunks = list(self._chunk_map.values())
|
|
82
|
+
if self._chunks:
|
|
83
|
+
self.build(self._chunks)
|
|
84
|
+
else:
|
|
85
|
+
self._bm25 = None
|
|
86
|
+
|
|
87
|
+
def search(self, query: str, k: int = 10) -> List[SearchResult]:
|
|
88
|
+
"""
|
|
89
|
+
Search for chunks matching query.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
query: Search query (natural language)
|
|
93
|
+
k: Maximum results to return
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
List of SearchResult sorted by score (descending)
|
|
97
|
+
"""
|
|
98
|
+
if self._bm25 is None or not self._chunks:
|
|
99
|
+
return []
|
|
100
|
+
|
|
101
|
+
# Tokenize query
|
|
102
|
+
query_tokens = self._tokenize_query(query)
|
|
103
|
+
if not query_tokens:
|
|
104
|
+
return []
|
|
105
|
+
|
|
106
|
+
# Get BM25 scores
|
|
107
|
+
scores = self._bm25.get_scores(query_tokens)
|
|
108
|
+
|
|
109
|
+
# Pair with chunks and sort
|
|
110
|
+
results = [
|
|
111
|
+
SearchResult(chunk=self._chunks[i], score=score)
|
|
112
|
+
for i, score in enumerate(scores)
|
|
113
|
+
if score > 0
|
|
114
|
+
]
|
|
115
|
+
|
|
116
|
+
results.sort(key=lambda r: r.score, reverse=True)
|
|
117
|
+
return results[:k]
|
|
118
|
+
|
|
119
|
+
def get_chunk(self, chunk_id: str) -> Optional[Chunk]:
|
|
120
|
+
"""Get a chunk by ID."""
|
|
121
|
+
return self._chunk_map.get(chunk_id)
|
|
122
|
+
|
|
123
|
+
def get_chunks_for_file(self, file_path: str) -> List[Chunk]:
|
|
124
|
+
"""Get all chunks for a file."""
|
|
125
|
+
return [c for c in self._chunks if c.file == file_path]
|
|
126
|
+
|
|
127
|
+
def _tokenize_query(self, query: str) -> List[str]:
|
|
128
|
+
"""Tokenize search query."""
|
|
129
|
+
# Split on whitespace and punctuation
|
|
130
|
+
words = re.findall(r'\b\w+\b', query.lower())
|
|
131
|
+
|
|
132
|
+
tokens = []
|
|
133
|
+
for word in words:
|
|
134
|
+
# Split snake_case
|
|
135
|
+
if "_" in word:
|
|
136
|
+
tokens.extend(word.split("_"))
|
|
137
|
+
# Split camelCase
|
|
138
|
+
elif any(c.isupper() for c in word[1:]):
|
|
139
|
+
parts = re.findall(r'[A-Z]?[a-z]+|[A-Z]+(?=[A-Z]|$)', word)
|
|
140
|
+
tokens.extend(p.lower() for p in parts)
|
|
141
|
+
else:
|
|
142
|
+
tokens.append(word)
|
|
143
|
+
|
|
144
|
+
# Filter stop words
|
|
145
|
+
stop_words = {
|
|
146
|
+
"the", "a", "an", "is", "are", "was", "were", "be", "been",
|
|
147
|
+
"have", "has", "had", "do", "does", "did", "will", "would",
|
|
148
|
+
"could", "should", "may", "might", "must", "can", "need",
|
|
149
|
+
"to", "of", "in", "for", "on", "with", "at", "by", "from",
|
|
150
|
+
"as", "into", "through", "during", "before", "after",
|
|
151
|
+
"and", "but", "if", "or", "because", "until", "while",
|
|
152
|
+
"this", "that", "these", "those", "i", "me", "my", "we",
|
|
153
|
+
"you", "your", "it", "its", "they", "them", "their",
|
|
154
|
+
"what", "which", "who", "how", "where", "when", "why",
|
|
155
|
+
"add", "create", "make", "build", "implement", "write",
|
|
156
|
+
"fix", "update", "change", "modify", "remove", "delete",
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
return [t for t in tokens if len(t) > 2 and t not in stop_words]
|
|
160
|
+
|
|
161
|
+
def save(self, path: Path) -> None:
|
|
162
|
+
"""
|
|
163
|
+
Save index to disk.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
path: Path to save file (pickle format)
|
|
167
|
+
"""
|
|
168
|
+
data = {
|
|
169
|
+
"chunks": [c.to_dict() for c in self._chunks],
|
|
170
|
+
"bm25": self._bm25,
|
|
171
|
+
}
|
|
172
|
+
with open(path, "wb") as f:
|
|
173
|
+
pickle.dump(data, f)
|
|
174
|
+
|
|
175
|
+
def load(self, path: Path) -> bool:
|
|
176
|
+
"""
|
|
177
|
+
Load index from disk.
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
path: Path to saved file
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
True if loaded successfully
|
|
184
|
+
"""
|
|
185
|
+
if not path.exists():
|
|
186
|
+
return False
|
|
187
|
+
|
|
188
|
+
try:
|
|
189
|
+
with open(path, "rb") as f:
|
|
190
|
+
data = pickle.load(f)
|
|
191
|
+
|
|
192
|
+
self._chunks = [Chunk.from_dict(d) for d in data["chunks"]]
|
|
193
|
+
self._chunk_map = {c.id: c for c in self._chunks}
|
|
194
|
+
self._bm25 = data["bm25"]
|
|
195
|
+
return True
|
|
196
|
+
|
|
197
|
+
except Exception:
|
|
198
|
+
return False
|
|
199
|
+
|
|
200
|
+
def stats(self) -> Dict:
|
|
201
|
+
"""Get index statistics."""
|
|
202
|
+
if not self._chunks:
|
|
203
|
+
return {
|
|
204
|
+
"total_chunks": 0,
|
|
205
|
+
"total_files": 0,
|
|
206
|
+
"chunk_types": {},
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
files = set(c.file for c in self._chunks)
|
|
210
|
+
types = {}
|
|
211
|
+
for c in self._chunks:
|
|
212
|
+
types[c.type] = types.get(c.type, 0) + 1
|
|
213
|
+
|
|
214
|
+
return {
|
|
215
|
+
"total_chunks": len(self._chunks),
|
|
216
|
+
"total_files": len(files),
|
|
217
|
+
"chunk_types": types,
|
|
218
|
+
}
|