sourcefire 0.2.1__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sourcefire-0.2.1/sourcefire.egg-info → sourcefire-0.3.0}/PKG-INFO +3 -1
- {sourcefire-0.2.1 → sourcefire-0.3.0}/pyproject.toml +2 -1
- {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/api/models.py +1 -0
- {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/api/routes.py +3 -2
- {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/chain/rag_chain.py +14 -14
- {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/cli.py +101 -13
- {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/config.py +13 -2
- {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/db.py +36 -22
- sourcefire-0.3.0/sourcefire/global_config.py +87 -0
- {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/indexer/pipeline.py +5 -3
- {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/static/app.js +3 -0
- {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/watcher.py +2 -2
- {sourcefire-0.2.1 → sourcefire-0.3.0/sourcefire.egg-info}/PKG-INFO +3 -1
- {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire.egg-info/SOURCES.txt +1 -1
- {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire.egg-info/requires.txt +3 -0
- sourcefire-0.2.1/sourcefire/static/.DS_Store +0 -0
- {sourcefire-0.2.1 → sourcefire-0.3.0}/LICENSE +0 -0
- {sourcefire-0.2.1 → sourcefire-0.3.0}/MANIFEST.in +0 -0
- {sourcefire-0.2.1 → sourcefire-0.3.0}/README.md +0 -0
- {sourcefire-0.2.1 → sourcefire-0.3.0}/setup.cfg +0 -0
- {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/__init__.py +0 -0
- {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/api/__init__.py +0 -0
- {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/chain/__init__.py +0 -0
- {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/chain/prompts.py +0 -0
- {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/indexer/__init__.py +0 -0
- {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/indexer/embeddings.py +0 -0
- {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/indexer/language_profiles.py +0 -0
- {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/indexer/metadata.py +0 -0
- {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/init.py +0 -0
- {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/prompts/system.md +0 -0
- {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/retriever/__init__.py +0 -0
- {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/retriever/graph.py +0 -0
- {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/retriever/search.py +0 -0
- {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/static/index.html +0 -0
- {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/static/styles.css +0 -0
- {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire.egg-info/dependency_links.txt +0 -0
- {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire.egg-info/entry_points.txt +0 -0
- {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire.egg-info/top_level.txt +0 -0
- {sourcefire-0.2.1 → sourcefire-0.3.0}/tests/test_config.py +0 -0
- {sourcefire-0.2.1 → sourcefire-0.3.0}/tests/test_graph.py +0 -0
- {sourcefire-0.2.1 → sourcefire-0.3.0}/tests/test_metadata.py +0 -0
- {sourcefire-0.2.1 → sourcefire-0.3.0}/tests/test_prompts.py +0 -0
- {sourcefire-0.2.1 → sourcefire-0.3.0}/tests/test_routes.py +0 -0
- {sourcefire-0.2.1 → sourcefire-0.3.0}/tests/test_search.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sourcefire
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Get instant context on any codebase. One command to index, ask questions in plain English, get answers grounded in actual code.
|
|
5
5
|
Author-email: Athar Wani <athar@cravv.com>
|
|
6
6
|
License: MIT
|
|
@@ -35,6 +35,8 @@ Requires-Dist: tree-sitter
|
|
|
35
35
|
Requires-Dist: python-dotenv
|
|
36
36
|
Requires-Dist: watchfiles
|
|
37
37
|
Requires-Dist: tomli-w
|
|
38
|
+
Provides-Extra: ast
|
|
39
|
+
Requires-Dist: tree-sitter-languages; extra == "ast"
|
|
38
40
|
Provides-Extra: dev
|
|
39
41
|
Requires-Dist: pytest; extra == "dev"
|
|
40
42
|
Requires-Dist: pytest-asyncio; extra == "dev"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "sourcefire"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.3.0"
|
|
4
4
|
description = "Get instant context on any codebase. One command to index, ask questions in plain English, get answers grounded in actual code."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = {text = "MIT"}
|
|
@@ -39,6 +39,7 @@ dependencies = [
|
|
|
39
39
|
]
|
|
40
40
|
|
|
41
41
|
[project.optional-dependencies]
|
|
42
|
+
ast = ["tree-sitter-languages"]
|
|
42
43
|
dev = ["pytest", "pytest-asyncio", "httpx"]
|
|
43
44
|
|
|
44
45
|
[project.urls]
|
|
@@ -27,6 +27,7 @@ _index_status: dict[str, Any] = {
|
|
|
27
27
|
"last_indexed": "never",
|
|
28
28
|
"index_status": "not_ready",
|
|
29
29
|
"language": "generic",
|
|
30
|
+
"project_name": "Sourcefire",
|
|
30
31
|
}
|
|
31
32
|
|
|
32
33
|
|
|
@@ -135,10 +136,9 @@ async def sources(path: str = Query(..., description="Relative path within the c
|
|
|
135
136
|
if _project_dir is None:
|
|
136
137
|
raise HTTPException(status_code=503, detail="Project directory not initialized.")
|
|
137
138
|
|
|
138
|
-
codebase_resolved = _project_dir.resolve()
|
|
139
139
|
full_path = (_project_dir / path).resolve()
|
|
140
140
|
|
|
141
|
-
if not
|
|
141
|
+
if not full_path.is_relative_to(_project_dir.resolve()):
|
|
142
142
|
raise HTTPException(status_code=400, detail="Path traversal detected.")
|
|
143
143
|
|
|
144
144
|
if not full_path.is_file():
|
|
@@ -163,4 +163,5 @@ async def status() -> StatusResponse:
|
|
|
163
163
|
last_indexed=str(_index_status.get("last_indexed", "never")),
|
|
164
164
|
index_status=str(_index_status.get("index_status", "not_ready")),
|
|
165
165
|
language=str(_index_status.get("language", "generic")),
|
|
166
|
+
project_name=str(_index_status.get("project_name", "Sourcefire")),
|
|
166
167
|
)
|
|
@@ -181,7 +181,7 @@ async def retrieve_for_mode(
|
|
|
181
181
|
profile: LanguageProfile | None = None,
|
|
182
182
|
) -> list[dict[str, Any]]:
|
|
183
183
|
"""Embed *query* and dispatch to the mode-specific retriever."""
|
|
184
|
-
loop = asyncio.
|
|
184
|
+
loop = asyncio.get_running_loop()
|
|
185
185
|
query_vector: list[float] = await loop.run_in_executor(None, embed_text, query)
|
|
186
186
|
|
|
187
187
|
if mode == "debug":
|
|
@@ -213,7 +213,7 @@ def _get_tools(
|
|
|
213
213
|
Provide the relative filepath (e.g. 'src/main.py').
|
|
214
214
|
"""
|
|
215
215
|
full_path = (_project_dir / filepath).resolve()
|
|
216
|
-
if not
|
|
216
|
+
if not full_path.is_relative_to(_project_dir.resolve()):
|
|
217
217
|
return "Error: Path traversal not allowed."
|
|
218
218
|
if not full_path.is_file():
|
|
219
219
|
return f"Error: File '{filepath}' not found in the codebase."
|
|
@@ -236,7 +236,7 @@ def _get_tools(
|
|
|
236
236
|
else:
|
|
237
237
|
full_path = (_project_dir / dir_path).resolve()
|
|
238
238
|
|
|
239
|
-
if not
|
|
239
|
+
if not full_path.is_relative_to(_project_dir.resolve()):
|
|
240
240
|
return "Error: Path traversal not allowed."
|
|
241
241
|
if not full_path.is_dir():
|
|
242
242
|
return f"Error: Directory '{dir_path}' not found."
|
|
@@ -270,7 +270,7 @@ def _get_tools(
|
|
|
270
270
|
full_path = _project_dir if dir_path == "." else (_project_dir / dir_path)
|
|
271
271
|
full_path = full_path.resolve()
|
|
272
272
|
|
|
273
|
-
if not
|
|
273
|
+
if not full_path.is_relative_to(_project_dir.resolve()):
|
|
274
274
|
return "Error: Path traversal not allowed."
|
|
275
275
|
|
|
276
276
|
if not full_path.is_dir() and not full_path.is_file():
|
|
@@ -362,7 +362,7 @@ def _get_tools(
|
|
|
362
362
|
else:
|
|
363
363
|
full_path = (_project_dir / dir_path).resolve()
|
|
364
364
|
|
|
365
|
-
if not
|
|
365
|
+
if not full_path.is_relative_to(_project_dir.resolve()):
|
|
366
366
|
return "Error: Path traversal not allowed."
|
|
367
367
|
if not full_path.is_dir():
|
|
368
368
|
return f"Error: Directory '{dir_path}' not found."
|
|
@@ -401,7 +401,7 @@ def _get_tools(
|
|
|
401
401
|
Provide relative filepath.
|
|
402
402
|
"""
|
|
403
403
|
full_path = (_project_dir / filepath).resolve()
|
|
404
|
-
if not
|
|
404
|
+
if not full_path.is_relative_to(_project_dir.resolve()):
|
|
405
405
|
return "Error: Path traversal not allowed."
|
|
406
406
|
try:
|
|
407
407
|
result = subprocess.run(
|
|
@@ -423,7 +423,7 @@ def _get_tools(
|
|
|
423
423
|
Use when you need to know who last changed specific lines and why.
|
|
424
424
|
"""
|
|
425
425
|
full_path = (_project_dir / filepath).resolve()
|
|
426
|
-
if not
|
|
426
|
+
if not full_path.is_relative_to(_project_dir.resolve()):
|
|
427
427
|
return "Error: Path traversal not allowed."
|
|
428
428
|
try:
|
|
429
429
|
result = subprocess.run(
|
|
@@ -444,7 +444,7 @@ def _get_tools(
|
|
|
444
444
|
Line numbers are 1-based. Returns lines with line numbers prefixed.
|
|
445
445
|
"""
|
|
446
446
|
full_path = (_project_dir / filepath).resolve()
|
|
447
|
-
if not
|
|
447
|
+
if not full_path.is_relative_to(_project_dir.resolve()):
|
|
448
448
|
return "Error: Path traversal not allowed."
|
|
449
449
|
if not full_path.is_file():
|
|
450
450
|
return f"Error: File '{filepath}' not found."
|
|
@@ -468,7 +468,7 @@ def _get_tools(
|
|
|
468
468
|
import re as re_mod
|
|
469
469
|
full_path = _project_dir if dir_path == "." else (_project_dir / dir_path)
|
|
470
470
|
full_path = full_path.resolve()
|
|
471
|
-
if not
|
|
471
|
+
if not full_path.is_relative_to(_project_dir.resolve()):
|
|
472
472
|
return "Error: Path traversal not allowed."
|
|
473
473
|
|
|
474
474
|
try:
|
|
@@ -507,7 +507,7 @@ def _get_tools(
|
|
|
507
507
|
"""
|
|
508
508
|
full_path = _project_dir if dir_path == "." else (_project_dir / dir_path)
|
|
509
509
|
full_path = full_path.resolve()
|
|
510
|
-
if not
|
|
510
|
+
if not full_path.is_relative_to(_project_dir.resolve()):
|
|
511
511
|
return "Error: Path traversal not allowed."
|
|
512
512
|
|
|
513
513
|
results = []
|
|
@@ -542,7 +542,7 @@ def _get_tools(
|
|
|
542
542
|
cmd = ["git", "diff", "--stat", "-p", ref]
|
|
543
543
|
if filepath:
|
|
544
544
|
full_path = (_project_dir / filepath).resolve()
|
|
545
|
-
if not
|
|
545
|
+
if not full_path.is_relative_to(_project_dir.resolve()):
|
|
546
546
|
return "Error: Path traversal not allowed."
|
|
547
547
|
cmd.extend(["--", filepath])
|
|
548
548
|
try:
|
|
@@ -600,7 +600,7 @@ def _get_tools(
|
|
|
600
600
|
Use to quickly assess file complexity and recency.
|
|
601
601
|
"""
|
|
602
602
|
full_path = (_project_dir / filepath).resolve()
|
|
603
|
-
if not
|
|
603
|
+
if not full_path.is_relative_to(_project_dir.resolve()):
|
|
604
604
|
return "Error: Path traversal not allowed."
|
|
605
605
|
if not full_path.is_file():
|
|
606
606
|
return f"Error: File '{filepath}' not found."
|
|
@@ -667,7 +667,7 @@ def _get_tools(
|
|
|
667
667
|
(functions/methods invoked inside the given function's body).
|
|
668
668
|
"""
|
|
669
669
|
full_path = (_project_dir / filepath).resolve()
|
|
670
|
-
if not
|
|
670
|
+
if not full_path.is_relative_to(_project_dir.resolve()):
|
|
671
671
|
return "Error: Path traversal not allowed."
|
|
672
672
|
if not full_path.is_file():
|
|
673
673
|
return f"Error: File '{filepath}' not found."
|
|
@@ -771,7 +771,7 @@ def _get_tools(
|
|
|
771
771
|
return "Error: Vector database not available."
|
|
772
772
|
try:
|
|
773
773
|
full_path = (_project_dir / filepath).resolve()
|
|
774
|
-
if not
|
|
774
|
+
if not full_path.is_relative_to(_project_dir.resolve()):
|
|
775
775
|
return "Error: Path traversal not allowed."
|
|
776
776
|
if not full_path.is_file():
|
|
777
777
|
return f"Error: File '{filepath}' not found."
|
|
@@ -4,14 +4,19 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import argparse
|
|
6
6
|
import asyncio
|
|
7
|
-
import fcntl
|
|
8
7
|
import os
|
|
8
|
+
import socket
|
|
9
9
|
import sys
|
|
10
10
|
import webbrowser
|
|
11
11
|
from contextlib import asynccontextmanager
|
|
12
12
|
from datetime import datetime, timezone
|
|
13
13
|
from pathlib import Path
|
|
14
14
|
|
|
15
|
+
if sys.platform == "win32":
|
|
16
|
+
import msvcrt
|
|
17
|
+
else:
|
|
18
|
+
import fcntl
|
|
19
|
+
|
|
15
20
|
import uvicorn
|
|
16
21
|
from dotenv import load_dotenv
|
|
17
22
|
from fastapi import FastAPI
|
|
@@ -30,6 +35,8 @@ def parse_args() -> argparse.Namespace:
|
|
|
30
35
|
parser.add_argument("--no-open", action="store_true", help="Don't auto-open browser")
|
|
31
36
|
parser.add_argument("--reinit", action="store_true", help="Regenerate .sourcefire/config.toml via LLM")
|
|
32
37
|
parser.add_argument("--verbose", action="store_true", help="Verbose logging")
|
|
38
|
+
parser.add_argument("--uninstall", action="store_true", help="Remove global ~/.sourcefire/ config directory")
|
|
39
|
+
parser.add_argument("--version", action="store_true", help="Show version and exit")
|
|
33
40
|
return parser.parse_args()
|
|
34
41
|
|
|
35
42
|
|
|
@@ -58,7 +65,10 @@ def acquire_lock(lock_path: Path) -> int | None:
|
|
|
58
65
|
lock_path.parent.mkdir(parents=True, exist_ok=True)
|
|
59
66
|
try:
|
|
60
67
|
fd = os.open(str(lock_path), os.O_CREAT | os.O_RDWR)
|
|
61
|
-
|
|
68
|
+
if sys.platform == "win32":
|
|
69
|
+
msvcrt.locking(fd, msvcrt.LK_NBLCK, 1)
|
|
70
|
+
else:
|
|
71
|
+
fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
62
72
|
return fd
|
|
63
73
|
except (OSError, BlockingIOError):
|
|
64
74
|
return None
|
|
@@ -67,12 +77,34 @@ def acquire_lock(lock_path: Path) -> int | None:
|
|
|
67
77
|
def release_lock(fd: int, lock_path: Path) -> None:
|
|
68
78
|
"""Release the file lock."""
|
|
69
79
|
try:
|
|
70
|
-
|
|
80
|
+
if sys.platform == "win32":
|
|
81
|
+
msvcrt.locking(fd, msvcrt.LK_UNLCK, 1)
|
|
82
|
+
else:
|
|
83
|
+
fcntl.flock(fd, fcntl.LOCK_UN)
|
|
71
84
|
os.close(fd)
|
|
72
85
|
except OSError:
|
|
73
86
|
pass
|
|
74
87
|
|
|
75
88
|
|
|
89
|
+
def _port_available(host: str, port: int) -> bool:
|
|
90
|
+
"""Check if a port is available."""
|
|
91
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
92
|
+
try:
|
|
93
|
+
s.bind((host, port))
|
|
94
|
+
return True
|
|
95
|
+
except OSError:
|
|
96
|
+
return False
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def find_available_port(host: str, preferred: int, max_attempts: int = 20) -> int:
|
|
100
|
+
"""Find an available port, starting from preferred and incrementing."""
|
|
101
|
+
for offset in range(max_attempts):
|
|
102
|
+
port = preferred + offset
|
|
103
|
+
if _port_available(host, port):
|
|
104
|
+
return port
|
|
105
|
+
raise RuntimeError(f"No available port found in range {preferred}-{preferred + max_attempts - 1}")
|
|
106
|
+
|
|
107
|
+
|
|
76
108
|
# ---------------------------------------------------------------------------
|
|
77
109
|
# App state (shared between main() and lifespan)
|
|
78
110
|
# ---------------------------------------------------------------------------
|
|
@@ -148,6 +180,7 @@ async def lifespan(app: FastAPI):
|
|
|
148
180
|
"last_indexed": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
149
181
|
"index_status": "ready",
|
|
150
182
|
"language": lang_name,
|
|
183
|
+
"project_name": config.project_name or project_dir.name,
|
|
151
184
|
}
|
|
152
185
|
|
|
153
186
|
# Inject dependencies into routes
|
|
@@ -183,14 +216,20 @@ async def lifespan(app: FastAPI):
|
|
|
183
216
|
# App
|
|
184
217
|
# ---------------------------------------------------------------------------
|
|
185
218
|
|
|
219
|
+
from importlib.metadata import version as _pkg_version
|
|
186
220
|
from importlib.resources import files as _resource_files
|
|
187
221
|
|
|
188
222
|
_static_dir = str(Path(_resource_files("sourcefire")) / "static")
|
|
189
223
|
|
|
224
|
+
try:
|
|
225
|
+
_version = _pkg_version("sourcefire")
|
|
226
|
+
except Exception:
|
|
227
|
+
_version = "0.0.0"
|
|
228
|
+
|
|
190
229
|
app = FastAPI(
|
|
191
230
|
title="Sourcefire",
|
|
192
231
|
description="AI-powered codebase RAG. Created by Athar Wani.",
|
|
193
|
-
version=
|
|
232
|
+
version=_version,
|
|
194
233
|
lifespan=lifespan,
|
|
195
234
|
)
|
|
196
235
|
|
|
@@ -214,18 +253,59 @@ def main() -> None:
|
|
|
214
253
|
"""Sourcefire CLI entry point."""
|
|
215
254
|
args = parse_args()
|
|
216
255
|
|
|
256
|
+
# Handle --version
|
|
257
|
+
if args.version:
|
|
258
|
+
from importlib.metadata import version as _get_ver
|
|
259
|
+
try:
|
|
260
|
+
print(f"sourcefire {_get_ver('sourcefire')}")
|
|
261
|
+
except Exception:
|
|
262
|
+
print("sourcefire (version unknown)")
|
|
263
|
+
return
|
|
264
|
+
|
|
265
|
+
# Handle --uninstall
|
|
266
|
+
if args.uninstall:
|
|
267
|
+
from sourcefire.global_config import uninstall
|
|
268
|
+
uninstall()
|
|
269
|
+
return
|
|
270
|
+
|
|
217
271
|
project_dir, sourcefire_dir = discover_project()
|
|
218
272
|
|
|
273
|
+
# Safety check: warn if running in a broad directory (home, /, etc.)
|
|
274
|
+
needs_init = not sourcefire_dir.exists() or not (sourcefire_dir / "config.toml").exists()
|
|
275
|
+
if needs_init:
|
|
276
|
+
dangerous_dirs = {
|
|
277
|
+
Path.home().resolve(),
|
|
278
|
+
Path("/").resolve(),
|
|
279
|
+
}
|
|
280
|
+
# Also flag common broad directories
|
|
281
|
+
for name in ("Documents", "Downloads", "Desktop"):
|
|
282
|
+
dangerous_dirs.add((Path.home() / name).resolve())
|
|
283
|
+
|
|
284
|
+
if project_dir.resolve() in dangerous_dirs:
|
|
285
|
+
print(f"\n WARNING: You are about to index: {project_dir.resolve()}")
|
|
286
|
+
print(" This is a broad directory and may index thousands of files.\n")
|
|
287
|
+
try:
|
|
288
|
+
confirm = input(" Do you trust this folder? (yes/no): ").strip().lower()
|
|
289
|
+
except (EOFError, KeyboardInterrupt):
|
|
290
|
+
print("\nAborted.")
|
|
291
|
+
sys.exit(1)
|
|
292
|
+
if confirm not in ("yes", "y"):
|
|
293
|
+
print("Aborted. Run sourcefire from a project directory instead.")
|
|
294
|
+
sys.exit(0)
|
|
295
|
+
|
|
219
296
|
# Acquire lock
|
|
220
297
|
lock_fd = acquire_lock(sourcefire_dir / ".lock")
|
|
221
298
|
if lock_fd is None:
|
|
222
299
|
print("Error: Another sourcefire instance is already running for this project.")
|
|
223
300
|
sys.exit(1)
|
|
224
301
|
|
|
225
|
-
# Check for API key
|
|
226
|
-
|
|
302
|
+
# Check for API key: env var -> ~/.sourcefire/config.toml -> prompt
|
|
303
|
+
from sourcefire.global_config import get_api_key, save_api_key, get_global_dir
|
|
304
|
+
|
|
305
|
+
api_key = get_api_key()
|
|
227
306
|
if not api_key:
|
|
228
|
-
print("No
|
|
307
|
+
print("No Gemini API key found.")
|
|
308
|
+
print(f"It will be saved to {get_global_dir() / 'config.toml'} (global, works across all projects).\n")
|
|
229
309
|
try:
|
|
230
310
|
api_key = input("Enter your Gemini API key: ").strip()
|
|
231
311
|
except (EOFError, KeyboardInterrupt):
|
|
@@ -238,12 +318,8 @@ def main() -> None:
|
|
|
238
318
|
release_lock(lock_fd, sourcefire_dir / ".lock")
|
|
239
319
|
sys.exit(1)
|
|
240
320
|
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
with open(env_path, "a") as f:
|
|
244
|
-
f.write(f"\nGEMINI_API_KEY={api_key}\n")
|
|
245
|
-
os.environ["GEMINI_API_KEY"] = api_key
|
|
246
|
-
print(f"API key saved to {env_path}")
|
|
321
|
+
save_api_key(api_key)
|
|
322
|
+
print(f"API key saved to {get_global_dir() / 'config.toml'}\n")
|
|
247
323
|
|
|
248
324
|
# Auto-init or reinit
|
|
249
325
|
needs_init = not sourcefire_dir.exists() or not (sourcefire_dir / "config.toml").exists()
|
|
@@ -268,6 +344,18 @@ def main() -> None:
|
|
|
268
344
|
if args.port:
|
|
269
345
|
config.port = args.port
|
|
270
346
|
|
|
347
|
+
# Find available port (auto-increment if taken)
|
|
348
|
+
try:
|
|
349
|
+
actual_port = find_available_port(config.host, config.port)
|
|
350
|
+
except RuntimeError:
|
|
351
|
+
print(f"Error: No available port found starting from {config.port}.")
|
|
352
|
+
release_lock(lock_fd, sourcefire_dir / ".lock")
|
|
353
|
+
sys.exit(1)
|
|
354
|
+
|
|
355
|
+
if actual_port != config.port:
|
|
356
|
+
print(f"Port {config.port} is in use, using {actual_port} instead.")
|
|
357
|
+
config.port = actual_port
|
|
358
|
+
|
|
271
359
|
# Store state for lifespan access
|
|
272
360
|
_app_state["config"] = config
|
|
273
361
|
_app_state["project_dir"] = project_dir
|
|
@@ -87,8 +87,19 @@ def default_config(project_dir: Path) -> SourcefireConfig:
|
|
|
87
87
|
def load_config(project_dir: Path, sourcefire_dir: Path) -> SourcefireConfig:
|
|
88
88
|
"""Load config from .sourcefire/config.toml."""
|
|
89
89
|
config_path = sourcefire_dir / "config.toml"
|
|
90
|
-
|
|
91
|
-
|
|
90
|
+
try:
|
|
91
|
+
raw = config_path.read_text(encoding="utf-8")
|
|
92
|
+
except FileNotFoundError:
|
|
93
|
+
print(f"Error: Config file not found at {config_path}")
|
|
94
|
+
print("Run `sourcefire --reinit` to regenerate it.")
|
|
95
|
+
raise SystemExit(1)
|
|
96
|
+
|
|
97
|
+
try:
|
|
98
|
+
data = tomllib.loads(raw)
|
|
99
|
+
except tomllib.TOMLDecodeError as exc:
|
|
100
|
+
print(f"Error: Invalid TOML in {config_path}: {exc}")
|
|
101
|
+
print("Fix the syntax or run `sourcefire --reinit` to regenerate.")
|
|
102
|
+
raise SystemExit(1)
|
|
92
103
|
|
|
93
104
|
project = data.get("project", {})
|
|
94
105
|
indexer = data.get("indexer", {})
|
|
@@ -131,29 +131,43 @@ def get_chunks_by_files(
|
|
|
131
131
|
return rows
|
|
132
132
|
|
|
133
133
|
|
|
134
|
-
def
|
|
135
|
-
"""Return set of
|
|
136
|
-
|
|
134
|
+
def get_indexed_files_and_mtimes(collection: chromadb.Collection) -> tuple[set[str], dict[str, float]]:
|
|
135
|
+
"""Return (set of filenames, {filename: mtime}) for all indexed chunks.
|
|
136
|
+
|
|
137
|
+
Uses pagination to avoid loading the entire collection into memory at once.
|
|
138
|
+
"""
|
|
137
139
|
files: set[str] = set()
|
|
138
|
-
|
|
140
|
+
mtimes: dict[str, float] = {}
|
|
141
|
+
batch_size = 10000
|
|
142
|
+
offset = 0
|
|
143
|
+
|
|
144
|
+
total = collection.count()
|
|
145
|
+
if total == 0:
|
|
146
|
+
return files, mtimes
|
|
147
|
+
|
|
148
|
+
while offset < total:
|
|
149
|
+
results = collection.get(
|
|
150
|
+
include=["metadatas"],
|
|
151
|
+
limit=batch_size,
|
|
152
|
+
offset=offset,
|
|
153
|
+
)
|
|
154
|
+
if not results["metadatas"]:
|
|
155
|
+
break
|
|
139
156
|
for meta in results["metadatas"]:
|
|
140
157
|
if meta and "filename" in meta:
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
158
|
+
fname = meta["filename"]
|
|
159
|
+
files.add(fname)
|
|
160
|
+
if "mtime" in meta:
|
|
161
|
+
try:
|
|
162
|
+
stored = float(meta["mtime"])
|
|
163
|
+
# Keep the max mtime per file (chunks share the same mtime)
|
|
164
|
+
if fname not in mtimes or stored > mtimes[fname]:
|
|
165
|
+
mtimes[fname] = stored
|
|
166
|
+
except (ValueError, TypeError):
|
|
167
|
+
pass
|
|
168
|
+
offset += batch_size
|
|
144
169
|
|
|
145
|
-
|
|
146
|
-
"""Get stored mtimes for all indexed files from ChromaDB metadata."""
|
|
147
|
-
results = collection.get(include=["metadatas"])
|
|
148
|
-
mtimes: dict[str, float] = {}
|
|
149
|
-
if results["metadatas"]:
|
|
150
|
-
for meta in results["metadatas"]:
|
|
151
|
-
if meta and "filename" in meta and "mtime" in meta:
|
|
152
|
-
try:
|
|
153
|
-
mtimes[meta["filename"]] = float(meta["mtime"])
|
|
154
|
-
except (ValueError, TypeError):
|
|
155
|
-
pass
|
|
156
|
-
return mtimes
|
|
170
|
+
return files, mtimes
|
|
157
171
|
|
|
158
172
|
|
|
159
173
|
# ---------------------------------------------------------------------------
|
|
@@ -168,7 +182,7 @@ async def async_query_similar(
|
|
|
168
182
|
where: dict | None = None,
|
|
169
183
|
) -> list[dict[str, Any]]:
|
|
170
184
|
"""Async wrapper for query_similar."""
|
|
171
|
-
loop = asyncio.
|
|
185
|
+
loop = asyncio.get_running_loop()
|
|
172
186
|
return await loop.run_in_executor(
|
|
173
187
|
None, partial(query_similar, collection, query_embedding, n_results, where)
|
|
174
188
|
)
|
|
@@ -179,7 +193,7 @@ async def async_get_chunks_by_files(
|
|
|
179
193
|
filenames: list[str],
|
|
180
194
|
) -> list[dict[str, Any]]:
|
|
181
195
|
"""Async wrapper for get_chunks_by_files."""
|
|
182
|
-
loop = asyncio.
|
|
196
|
+
loop = asyncio.get_running_loop()
|
|
183
197
|
return await loop.run_in_executor(
|
|
184
198
|
None, partial(get_chunks_by_files, collection, filenames)
|
|
185
199
|
)
|
|
@@ -190,7 +204,7 @@ async def async_delete_file_chunks(
|
|
|
190
204
|
filename: str,
|
|
191
205
|
) -> None:
|
|
192
206
|
"""Async wrapper for delete_file_chunks."""
|
|
193
|
-
loop = asyncio.
|
|
207
|
+
loop = asyncio.get_running_loop()
|
|
194
208
|
await loop.run_in_executor(
|
|
195
209
|
None, partial(delete_file_chunks, collection, filename)
|
|
196
210
|
)
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""Global Sourcefire configuration — stored in ~/.sourcefire/
|
|
2
|
+
|
|
3
|
+
This directory holds user-level settings (API keys, preferences) that
|
|
4
|
+
apply across all projects. Separate from the per-project .sourcefire/
|
|
5
|
+
directory which holds index data and project config.
|
|
6
|
+
|
|
7
|
+
On uninstall, `sourcefire --uninstall` removes this directory.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import os
|
|
13
|
+
import platform
|
|
14
|
+
import tomllib
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
import tomli_w
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_global_dir() -> Path:
|
|
21
|
+
"""Return the global Sourcefire config directory.
|
|
22
|
+
|
|
23
|
+
- macOS/Linux: ~/.sourcefire/
|
|
24
|
+
- Windows: %APPDATA%/sourcefire/
|
|
25
|
+
"""
|
|
26
|
+
if platform.system() == "Windows":
|
|
27
|
+
base = Path(os.environ.get("APPDATA", Path.home() / "AppData" / "Roaming"))
|
|
28
|
+
return base / "sourcefire"
|
|
29
|
+
return Path.home() / ".sourcefire"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def get_global_config_path() -> Path:
|
|
33
|
+
return get_global_dir() / "config.toml"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def load_global_config() -> dict:
|
|
37
|
+
"""Load global config. Returns empty dict if not found."""
|
|
38
|
+
path = get_global_config_path()
|
|
39
|
+
if not path.is_file():
|
|
40
|
+
return {}
|
|
41
|
+
try:
|
|
42
|
+
return tomllib.loads(path.read_text(encoding="utf-8"))
|
|
43
|
+
except Exception:
|
|
44
|
+
return {}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def save_global_config(data: dict) -> None:
|
|
48
|
+
"""Save global config."""
|
|
49
|
+
path = get_global_config_path()
|
|
50
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
51
|
+
path.write_text(tomli_w.dumps(data), encoding="utf-8")
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def get_api_key() -> str:
|
|
55
|
+
"""Get the Gemini API key. Checks in order:
|
|
56
|
+
|
|
57
|
+
1. GEMINI_API_KEY environment variable
|
|
58
|
+
2. Global config (~/.sourcefire/config.toml)
|
|
59
|
+
"""
|
|
60
|
+
# 1. Environment variable
|
|
61
|
+
key = os.getenv("GEMINI_API_KEY", "")
|
|
62
|
+
if key:
|
|
63
|
+
return key
|
|
64
|
+
|
|
65
|
+
# 2. Global config
|
|
66
|
+
config = load_global_config()
|
|
67
|
+
return config.get("gemini_api_key", "")
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def save_api_key(key: str) -> None:
|
|
71
|
+
"""Save API key to global config."""
|
|
72
|
+
config = load_global_config()
|
|
73
|
+
config["gemini_api_key"] = key
|
|
74
|
+
save_global_config(config)
|
|
75
|
+
os.environ["GEMINI_API_KEY"] = key
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def uninstall() -> None:
|
|
79
|
+
"""Remove the global ~/.sourcefire/ directory."""
|
|
80
|
+
import shutil
|
|
81
|
+
|
|
82
|
+
global_dir = get_global_dir()
|
|
83
|
+
if global_dir.is_dir():
|
|
84
|
+
shutil.rmtree(global_dir)
|
|
85
|
+
print(f"Removed {global_dir}")
|
|
86
|
+
else:
|
|
87
|
+
print(f"Nothing to remove — {global_dir} does not exist.")
|
|
@@ -14,7 +14,7 @@ from typing import Any
|
|
|
14
14
|
import chromadb
|
|
15
15
|
|
|
16
16
|
from sourcefire.config import SourcefireConfig
|
|
17
|
-
from sourcefire.db import add_chunks, reset_collection, delete_file_chunks,
|
|
17
|
+
from sourcefire.db import add_chunks, reset_collection, delete_file_chunks, get_indexed_files_and_mtimes
|
|
18
18
|
from sourcefire.indexer.embeddings import embed_batch
|
|
19
19
|
from sourcefire.indexer.language_profiles import LanguageProfile, get_profile, get_profile_for_extension
|
|
20
20
|
from sourcefire.indexer.metadata import chunk_source_file, extract_metadata
|
|
@@ -74,6 +74,9 @@ def _chunk_plain_text(
|
|
|
74
74
|
if len(text) <= chunk_size:
|
|
75
75
|
return [text]
|
|
76
76
|
|
|
77
|
+
# Guard against infinite loop if overlap >= size
|
|
78
|
+
chunk_overlap = min(chunk_overlap, chunk_size - 1)
|
|
79
|
+
|
|
77
80
|
chunks: list[str] = []
|
|
78
81
|
start = 0
|
|
79
82
|
while start < len(text):
|
|
@@ -233,8 +236,7 @@ def run_indexing(
|
|
|
233
236
|
files_to_index = all_disk_files
|
|
234
237
|
elif not full:
|
|
235
238
|
# Incremental: compare mtimes
|
|
236
|
-
indexed_files =
|
|
237
|
-
stored_mtimes = get_stored_mtimes(collection)
|
|
239
|
+
indexed_files, stored_mtimes = get_indexed_files_and_mtimes(collection)
|
|
238
240
|
|
|
239
241
|
current_files: dict[str, Path] = {}
|
|
240
242
|
for f in all_disk_files:
|
|
@@ -401,6 +401,9 @@ async function pollStatus() {
|
|
|
401
401
|
if (data.language && data.language !== 'generic') {
|
|
402
402
|
langBadge.textContent = data.language;
|
|
403
403
|
}
|
|
404
|
+
if (data.project_name) {
|
|
405
|
+
document.title = data.project_name + ' — Sourcefire';
|
|
406
|
+
}
|
|
404
407
|
} catch(e) {
|
|
405
408
|
indexStatus.textContent = 'offline';
|
|
406
409
|
indexStatus.classList.add('is-error');
|
|
@@ -75,7 +75,7 @@ async def watch_and_reindex(
|
|
|
75
75
|
# Handle deletions
|
|
76
76
|
for rel in deleted_files:
|
|
77
77
|
try:
|
|
78
|
-
loop = asyncio.
|
|
78
|
+
loop = asyncio.get_running_loop()
|
|
79
79
|
await loop.run_in_executor(None, delete_file_chunks, collection, rel)
|
|
80
80
|
graph.remove_file(rel)
|
|
81
81
|
print(f"[watcher] Removed: {rel}")
|
|
@@ -85,7 +85,7 @@ async def watch_and_reindex(
|
|
|
85
85
|
# Handle additions/modifications
|
|
86
86
|
if changed_files:
|
|
87
87
|
try:
|
|
88
|
-
loop = asyncio.
|
|
88
|
+
loop = asyncio.get_running_loop()
|
|
89
89
|
file_imports = await loop.run_in_executor(
|
|
90
90
|
None, index_files, collection, changed_files, config, profile
|
|
91
91
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sourcefire
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Get instant context on any codebase. One command to index, ask questions in plain English, get answers grounded in actual code.
|
|
5
5
|
Author-email: Athar Wani <athar@cravv.com>
|
|
6
6
|
License: MIT
|
|
@@ -35,6 +35,8 @@ Requires-Dist: tree-sitter
|
|
|
35
35
|
Requires-Dist: python-dotenv
|
|
36
36
|
Requires-Dist: watchfiles
|
|
37
37
|
Requires-Dist: tomli-w
|
|
38
|
+
Provides-Extra: ast
|
|
39
|
+
Requires-Dist: tree-sitter-languages; extra == "ast"
|
|
38
40
|
Provides-Extra: dev
|
|
39
41
|
Requires-Dist: pytest; extra == "dev"
|
|
40
42
|
Requires-Dist: pytest-asyncio; extra == "dev"
|
|
@@ -6,6 +6,7 @@ sourcefire/__init__.py
|
|
|
6
6
|
sourcefire/cli.py
|
|
7
7
|
sourcefire/config.py
|
|
8
8
|
sourcefire/db.py
|
|
9
|
+
sourcefire/global_config.py
|
|
9
10
|
sourcefire/init.py
|
|
10
11
|
sourcefire/watcher.py
|
|
11
12
|
sourcefire.egg-info/PKG-INFO
|
|
@@ -29,7 +30,6 @@ sourcefire/prompts/system.md
|
|
|
29
30
|
sourcefire/retriever/__init__.py
|
|
30
31
|
sourcefire/retriever/graph.py
|
|
31
32
|
sourcefire/retriever/search.py
|
|
32
|
-
sourcefire/static/.DS_Store
|
|
33
33
|
sourcefire/static/app.js
|
|
34
34
|
sourcefire/static/index.html
|
|
35
35
|
sourcefire/static/styles.css
|
|
Binary file
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|