tarang 4.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tarang/client/auth.py ADDED
@@ -0,0 +1,178 @@
1
+ """
2
+ Tarang Authentication - CLI login and token management.
3
+
4
+ Handles OAuth flow via browser and secure token storage.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ import asyncio
9
+ import json
10
+ import webbrowser
11
+ from http.server import BaseHTTPRequestHandler, HTTPServer
12
+ from pathlib import Path
13
+ from typing import Optional
14
+ from urllib.parse import parse_qs, urlparse
15
+
16
+
17
+ CONFIG_DIR = Path.home() / ".tarang"
18
+ CONFIG_FILE = CONFIG_DIR / "config.json"
19
+
20
+
21
+ class TarangAuth:
22
+ """
23
+ Handles CLI authentication via browser OAuth flow.
24
+
25
+ Stores credentials securely in ~/.tarang/config.json
26
+ """
27
+
28
+ def __init__(self, web_url: str = "https://devtarang.ai"):
29
+ self.web_url = web_url
30
+ self.token: Optional[str] = None
31
+
32
+ def load_credentials(self) -> Optional[dict]:
33
+ """Load saved credentials from config file."""
34
+ if CONFIG_FILE.exists():
35
+ try:
36
+ return json.loads(CONFIG_FILE.read_text())
37
+ except (json.JSONDecodeError, IOError):
38
+ return None
39
+ return None
40
+
41
+ def save_credentials(self, **kwargs) -> None:
42
+ """Save credentials to config file."""
43
+ CONFIG_DIR.mkdir(parents=True, exist_ok=True)
44
+ config = self.load_credentials() or {}
45
+ config.update(kwargs)
46
+ CONFIG_FILE.write_text(json.dumps(config, indent=2))
47
+ CONFIG_FILE.chmod(0o600) # Secure permissions
48
+
49
+ def get_token(self) -> Optional[str]:
50
+ """Get saved auth token."""
51
+ creds = self.load_credentials()
52
+ return creds.get("token") if creds else None
53
+
54
+ def get_openrouter_key(self) -> Optional[str]:
55
+ """Get saved OpenRouter API key."""
56
+ creds = self.load_credentials()
57
+ return creds.get("openrouter_key") if creds else None
58
+
59
+ def save_token(self, token: str) -> None:
60
+ """Save auth token."""
61
+ self.save_credentials(token=token)
62
+ self.token = token
63
+
64
+ def save_openrouter_key(self, key: str) -> None:
65
+ """Save OpenRouter API key."""
66
+ self.save_credentials(openrouter_key=key)
67
+
68
+ def clear_credentials(self) -> None:
69
+ """Clear all saved credentials."""
70
+ if CONFIG_FILE.exists():
71
+ CONFIG_FILE.unlink()
72
+
73
+ async def login(self, callback_port: int = 54321) -> str:
74
+ """
75
+ Start OAuth flow via browser.
76
+
77
+ Steps:
78
+ 1. Open browser to auth page
79
+ 2. Start local server to receive callback
80
+ 3. Store token securely
81
+
82
+ Returns:
83
+ The auth token
84
+ """
85
+ # Create callback server
86
+ server = _CallbackServer(("localhost", callback_port))
87
+
88
+ # Open browser to auth URL
89
+ auth_url = f"{self.web_url}/auth/cli?callback=http://localhost:{callback_port}"
90
+ print(f"Opening browser for authentication...")
91
+ print(f"If browser doesn't open, visit: {auth_url}")
92
+ webbrowser.open(auth_url)
93
+
94
+ print("\nWaiting for authentication...")
95
+ print("Please log in with GitHub in your browser.")
96
+
97
+ # Wait for callback (timeout 5 min)
98
+ try:
99
+ token = await asyncio.wait_for(
100
+ server.wait_for_token(),
101
+ timeout=300
102
+ )
103
+ except asyncio.TimeoutError:
104
+ raise TimeoutError("Authentication timed out. Please try again.")
105
+
106
+ # Save and return token
107
+ self.save_token(token)
108
+ return token
109
+
110
+ def is_authenticated(self) -> bool:
111
+ """Check if user is authenticated."""
112
+ return bool(self.get_token())
113
+
114
+ def has_openrouter_key(self) -> bool:
115
+ """Check if OpenRouter key is configured."""
116
+ return bool(self.get_openrouter_key())
117
+
118
+
119
+ class _CallbackServer:
120
+ """Local HTTP server to receive OAuth callback."""
121
+
122
+ def __init__(self, address):
123
+ self.token: Optional[str] = None
124
+ self._received = asyncio.Event()
125
+ self.server = HTTPServer(address, self._make_handler())
126
+ self.server.timeout = 1 # Allow checking for cancellation
127
+
128
+ def _make_handler(self):
129
+ parent = self
130
+
131
+ class Handler(BaseHTTPRequestHandler):
132
+ def do_GET(self):
133
+ query = parse_qs(urlparse(self.path).query)
134
+ parent.token = query.get("token", [None])[0]
135
+
136
+ self.send_response(200)
137
+ self.send_header("Content-type", "text/html")
138
+ self.end_headers()
139
+
140
+ html = """
141
+ <!DOCTYPE html>
142
+ <html>
143
+ <head>
144
+ <title>Tarang - Authentication Successful</title>
145
+ <style>
146
+ body { font-family: -apple-system, sans-serif; text-align: center; padding-top: 50px; }
147
+ h1 { color: #10B981; }
148
+ </style>
149
+ </head>
150
+ <body>
151
+ <h1>Authentication Successful!</h1>
152
+ <p>You can close this window and return to the terminal.</p>
153
+ </body>
154
+ </html>
155
+ """
156
+ self.wfile.write(html.encode())
157
+
158
+ if parent.token:
159
+ print("\nReceived CLI callback, completing login...")
160
+ parent._received.set()
161
+ else:
162
+ print("\nReceived CLI callback without token. Please retry login.")
163
+
164
+ def log_message(self, *args):
165
+ pass # Suppress HTTP logs
166
+
167
+ return Handler
168
+
169
+ async def wait_for_token(self) -> str:
170
+ """Wait for token from callback."""
171
+ loop = asyncio.get_event_loop()
172
+
173
+ while not self._received.is_set():
174
+ # Handle one request (non-blocking)
175
+ await loop.run_in_executor(None, self.server.handle_request)
176
+ await asyncio.sleep(0.1)
177
+
178
+ return self.token
@@ -0,0 +1,41 @@
1
+ """
2
+ Tarang Context - Code indexing and retrieval system.
3
+
4
+ Provides:
5
+ - AST-based code chunking (tree-sitter)
6
+ - BM25 keyword search
7
+ - Symbol Graph (Code Knowledge Graph)
8
+ - Graph-augmented retrieval
9
+ """
10
+
11
+ from tarang.context.skeleton import SkeletonGenerator, ProjectSkeleton
12
+ from tarang.context.chunker import Chunk, Chunker, SymbolInfo
13
+ from tarang.context.bm25 import BM25Index, SearchResult
14
+ from tarang.context.graph import SymbolGraph, SymbolNode
15
+ from tarang.context.retriever import ContextRetriever, RetrievalResult, create_retriever
16
+ from tarang.context.indexer import ProjectIndexer, IndexStats, index_project, get_retriever
17
+
18
+ __all__ = [
19
+ # Skeleton (existing)
20
+ "SkeletonGenerator",
21
+ "ProjectSkeleton",
22
+ # Chunker
23
+ "Chunk",
24
+ "Chunker",
25
+ "SymbolInfo",
26
+ # BM25
27
+ "BM25Index",
28
+ "SearchResult",
29
+ # Graph
30
+ "SymbolGraph",
31
+ "SymbolNode",
32
+ # Retriever
33
+ "ContextRetriever",
34
+ "RetrievalResult",
35
+ "create_retriever",
36
+ # Indexer
37
+ "ProjectIndexer",
38
+ "IndexStats",
39
+ "index_project",
40
+ "get_retriever",
41
+ ]
tarang/context/bm25.py ADDED
@@ -0,0 +1,218 @@
1
+ """
2
+ BM25 Index - Fast keyword-based search over code chunks.
3
+
4
+ Uses the Okapi BM25 algorithm for ranking code chunks by relevance.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ import pickle
9
+ import re
10
+ from dataclasses import dataclass
11
+ from pathlib import Path
12
+ from typing import Dict, List, Optional
13
+
14
+ from .chunker import Chunk
15
+
16
+
17
+ @dataclass
18
+ class SearchResult:
19
+ """A search result with chunk and score."""
20
+ chunk: Chunk
21
+ score: float
22
+
23
+ def __repr__(self) -> str:
24
+ return f"SearchResult({self.chunk.id}, score={self.score:.3f})"
25
+
26
+
27
+ class BM25Index:
28
+ """
29
+ BM25 index for code chunk search.
30
+
31
+ Uses rank-bm25 library for efficient BM25 ranking.
32
+ """
33
+
34
+ def __init__(self):
35
+ self._bm25 = None
36
+ self._chunks: List[Chunk] = []
37
+ self._chunk_map: Dict[str, Chunk] = {} # id -> chunk
38
+
39
+ @property
40
+ def is_empty(self) -> bool:
41
+ """Check if index is empty."""
42
+ return len(self._chunks) == 0
43
+
44
+ def build(self, chunks: List[Chunk]) -> None:
45
+ """
46
+ Build BM25 index from chunks.
47
+
48
+ Args:
49
+ chunks: List of code chunks with tokens
50
+ """
51
+ from rank_bm25 import BM25Okapi
52
+
53
+ self._chunks = chunks
54
+ self._chunk_map = {c.id: c for c in chunks}
55
+
56
+ # Build BM25 index from tokens
57
+ tokenized_corpus = [c.tokens for c in chunks]
58
+ self._bm25 = BM25Okapi(tokenized_corpus)
59
+
60
+ def add_chunks(self, new_chunks: List[Chunk]) -> None:
61
+ """
62
+ Add chunks to existing index (requires rebuild).
63
+
64
+ For now, this rebuilds the entire index.
65
+ Future optimization: incremental BM25.
66
+ """
67
+ # Merge chunks, replacing existing by ID
68
+ for chunk in new_chunks:
69
+ self._chunk_map[chunk.id] = chunk
70
+
71
+ self._chunks = list(self._chunk_map.values())
72
+ self.build(self._chunks)
73
+
74
+ def remove_chunks(self, chunk_ids: List[str]) -> None:
75
+ """
76
+ Remove chunks from index (requires rebuild).
77
+ """
78
+ for chunk_id in chunk_ids:
79
+ self._chunk_map.pop(chunk_id, None)
80
+
81
+ self._chunks = list(self._chunk_map.values())
82
+ if self._chunks:
83
+ self.build(self._chunks)
84
+ else:
85
+ self._bm25 = None
86
+
87
+ def search(self, query: str, k: int = 10) -> List[SearchResult]:
88
+ """
89
+ Search for chunks matching query.
90
+
91
+ Args:
92
+ query: Search query (natural language)
93
+ k: Maximum results to return
94
+
95
+ Returns:
96
+ List of SearchResult sorted by score (descending)
97
+ """
98
+ if self._bm25 is None or not self._chunks:
99
+ return []
100
+
101
+ # Tokenize query
102
+ query_tokens = self._tokenize_query(query)
103
+ if not query_tokens:
104
+ return []
105
+
106
+ # Get BM25 scores
107
+ scores = self._bm25.get_scores(query_tokens)
108
+
109
+ # Pair with chunks and sort
110
+ results = [
111
+ SearchResult(chunk=self._chunks[i], score=score)
112
+ for i, score in enumerate(scores)
113
+ if score > 0
114
+ ]
115
+
116
+ results.sort(key=lambda r: r.score, reverse=True)
117
+ return results[:k]
118
+
119
+ def get_chunk(self, chunk_id: str) -> Optional[Chunk]:
120
+ """Get a chunk by ID."""
121
+ return self._chunk_map.get(chunk_id)
122
+
123
+ def get_chunks_for_file(self, file_path: str) -> List[Chunk]:
124
+ """Get all chunks for a file."""
125
+ return [c for c in self._chunks if c.file == file_path]
126
+
127
+ def _tokenize_query(self, query: str) -> List[str]:
128
+ """Tokenize search query."""
129
+ # Split on whitespace and punctuation
130
+ words = re.findall(r'\b\w+\b', query.lower())
131
+
132
+ tokens = []
133
+ for word in words:
134
+ # Split snake_case
135
+ if "_" in word:
136
+ tokens.extend(word.split("_"))
137
+ # Split camelCase
138
+ elif any(c.isupper() for c in word[1:]):
139
+ parts = re.findall(r'[A-Z]?[a-z]+|[A-Z]+(?=[A-Z]|$)', word)
140
+ tokens.extend(p.lower() for p in parts)
141
+ else:
142
+ tokens.append(word)
143
+
144
+ # Filter stop words
145
+ stop_words = {
146
+ "the", "a", "an", "is", "are", "was", "were", "be", "been",
147
+ "have", "has", "had", "do", "does", "did", "will", "would",
148
+ "could", "should", "may", "might", "must", "can", "need",
149
+ "to", "of", "in", "for", "on", "with", "at", "by", "from",
150
+ "as", "into", "through", "during", "before", "after",
151
+ "and", "but", "if", "or", "because", "until", "while",
152
+ "this", "that", "these", "those", "i", "me", "my", "we",
153
+ "you", "your", "it", "its", "they", "them", "their",
154
+ "what", "which", "who", "how", "where", "when", "why",
155
+ "add", "create", "make", "build", "implement", "write",
156
+ "fix", "update", "change", "modify", "remove", "delete",
157
+ }
158
+
159
+ return [t for t in tokens if len(t) > 2 and t not in stop_words]
160
+
161
+ def save(self, path: Path) -> None:
162
+ """
163
+ Save index to disk.
164
+
165
+ Args:
166
+ path: Path to save file (pickle format)
167
+ """
168
+ data = {
169
+ "chunks": [c.to_dict() for c in self._chunks],
170
+ "bm25": self._bm25,
171
+ }
172
+ with open(path, "wb") as f:
173
+ pickle.dump(data, f)
174
+
175
+ def load(self, path: Path) -> bool:
176
+ """
177
+ Load index from disk.
178
+
179
+ Args:
180
+ path: Path to saved file
181
+
182
+ Returns:
183
+ True if loaded successfully
184
+ """
185
+ if not path.exists():
186
+ return False
187
+
188
+ try:
189
+ with open(path, "rb") as f:
190
+ data = pickle.load(f)
191
+
192
+ self._chunks = [Chunk.from_dict(d) for d in data["chunks"]]
193
+ self._chunk_map = {c.id: c for c in self._chunks}
194
+ self._bm25 = data["bm25"]
195
+ return True
196
+
197
+ except Exception:
198
+ return False
199
+
200
+ def stats(self) -> Dict:
201
+ """Get index statistics."""
202
+ if not self._chunks:
203
+ return {
204
+ "total_chunks": 0,
205
+ "total_files": 0,
206
+ "chunk_types": {},
207
+ }
208
+
209
+ files = set(c.file for c in self._chunks)
210
+ types = {}
211
+ for c in self._chunks:
212
+ types[c.type] = types.get(c.type, 0) + 1
213
+
214
+ return {
215
+ "total_chunks": len(self._chunks),
216
+ "total_files": len(files),
217
+ "chunk_types": types,
218
+ }