sourcefire 0.2.1__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {sourcefire-0.2.1/sourcefire.egg-info → sourcefire-0.3.0}/PKG-INFO +3 -1
  2. {sourcefire-0.2.1 → sourcefire-0.3.0}/pyproject.toml +2 -1
  3. {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/api/models.py +1 -0
  4. {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/api/routes.py +3 -2
  5. {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/chain/rag_chain.py +14 -14
  6. {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/cli.py +101 -13
  7. {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/config.py +13 -2
  8. {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/db.py +36 -22
  9. sourcefire-0.3.0/sourcefire/global_config.py +87 -0
  10. {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/indexer/pipeline.py +5 -3
  11. {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/static/app.js +3 -0
  12. {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/watcher.py +2 -2
  13. {sourcefire-0.2.1 → sourcefire-0.3.0/sourcefire.egg-info}/PKG-INFO +3 -1
  14. {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire.egg-info/SOURCES.txt +1 -1
  15. {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire.egg-info/requires.txt +3 -0
  16. sourcefire-0.2.1/sourcefire/static/.DS_Store +0 -0
  17. {sourcefire-0.2.1 → sourcefire-0.3.0}/LICENSE +0 -0
  18. {sourcefire-0.2.1 → sourcefire-0.3.0}/MANIFEST.in +0 -0
  19. {sourcefire-0.2.1 → sourcefire-0.3.0}/README.md +0 -0
  20. {sourcefire-0.2.1 → sourcefire-0.3.0}/setup.cfg +0 -0
  21. {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/__init__.py +0 -0
  22. {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/api/__init__.py +0 -0
  23. {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/chain/__init__.py +0 -0
  24. {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/chain/prompts.py +0 -0
  25. {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/indexer/__init__.py +0 -0
  26. {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/indexer/embeddings.py +0 -0
  27. {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/indexer/language_profiles.py +0 -0
  28. {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/indexer/metadata.py +0 -0
  29. {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/init.py +0 -0
  30. {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/prompts/system.md +0 -0
  31. {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/retriever/__init__.py +0 -0
  32. {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/retriever/graph.py +0 -0
  33. {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/retriever/search.py +0 -0
  34. {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/static/index.html +0 -0
  35. {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire/static/styles.css +0 -0
  36. {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire.egg-info/dependency_links.txt +0 -0
  37. {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire.egg-info/entry_points.txt +0 -0
  38. {sourcefire-0.2.1 → sourcefire-0.3.0}/sourcefire.egg-info/top_level.txt +0 -0
  39. {sourcefire-0.2.1 → sourcefire-0.3.0}/tests/test_config.py +0 -0
  40. {sourcefire-0.2.1 → sourcefire-0.3.0}/tests/test_graph.py +0 -0
  41. {sourcefire-0.2.1 → sourcefire-0.3.0}/tests/test_metadata.py +0 -0
  42. {sourcefire-0.2.1 → sourcefire-0.3.0}/tests/test_prompts.py +0 -0
  43. {sourcefire-0.2.1 → sourcefire-0.3.0}/tests/test_routes.py +0 -0
  44. {sourcefire-0.2.1 → sourcefire-0.3.0}/tests/test_search.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcefire
3
- Version: 0.2.1
3
+ Version: 0.3.0
4
4
  Summary: Get instant context on any codebase. One command to index, ask questions in plain English, get answers grounded in actual code.
5
5
  Author-email: Athar Wani <athar@cravv.com>
6
6
  License: MIT
@@ -35,6 +35,8 @@ Requires-Dist: tree-sitter
35
35
  Requires-Dist: python-dotenv
36
36
  Requires-Dist: watchfiles
37
37
  Requires-Dist: tomli-w
38
+ Provides-Extra: ast
39
+ Requires-Dist: tree-sitter-languages; extra == "ast"
38
40
  Provides-Extra: dev
39
41
  Requires-Dist: pytest; extra == "dev"
40
42
  Requires-Dist: pytest-asyncio; extra == "dev"
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "sourcefire"
3
- version = "0.2.1"
3
+ version = "0.3.0"
4
4
  description = "Get instant context on any codebase. One command to index, ask questions in plain English, get answers grounded in actual code."
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -39,6 +39,7 @@ dependencies = [
39
39
  ]
40
40
 
41
41
  [project.optional-dependencies]
42
+ ast = ["tree-sitter-languages"]
42
43
  dev = ["pytest", "pytest-asyncio", "httpx"]
43
44
 
44
45
  [project.urls]
@@ -17,6 +17,7 @@ class StatusResponse(BaseModel):
17
17
  last_indexed: str
18
18
  index_status: str
19
19
  language: str = "generic"
20
+ project_name: str = "Sourcefire"
20
21
 
21
22
 
22
23
  class SourceResponse(BaseModel):
@@ -27,6 +27,7 @@ _index_status: dict[str, Any] = {
27
27
  "last_indexed": "never",
28
28
  "index_status": "not_ready",
29
29
  "language": "generic",
30
+ "project_name": "Sourcefire",
30
31
  }
31
32
 
32
33
 
@@ -135,10 +136,9 @@ async def sources(path: str = Query(..., description="Relative path within the c
135
136
  if _project_dir is None:
136
137
  raise HTTPException(status_code=503, detail="Project directory not initialized.")
137
138
 
138
- codebase_resolved = _project_dir.resolve()
139
139
  full_path = (_project_dir / path).resolve()
140
140
 
141
- if not str(full_path).startswith(str(codebase_resolved)):
141
+ if not full_path.is_relative_to(_project_dir.resolve()):
142
142
  raise HTTPException(status_code=400, detail="Path traversal detected.")
143
143
 
144
144
  if not full_path.is_file():
@@ -163,4 +163,5 @@ async def status() -> StatusResponse:
163
163
  last_indexed=str(_index_status.get("last_indexed", "never")),
164
164
  index_status=str(_index_status.get("index_status", "not_ready")),
165
165
  language=str(_index_status.get("language", "generic")),
166
+ project_name=str(_index_status.get("project_name", "Sourcefire")),
166
167
  )
@@ -181,7 +181,7 @@ async def retrieve_for_mode(
181
181
  profile: LanguageProfile | None = None,
182
182
  ) -> list[dict[str, Any]]:
183
183
  """Embed *query* and dispatch to the mode-specific retriever."""
184
- loop = asyncio.get_event_loop()
184
+ loop = asyncio.get_running_loop()
185
185
  query_vector: list[float] = await loop.run_in_executor(None, embed_text, query)
186
186
 
187
187
  if mode == "debug":
@@ -213,7 +213,7 @@ def _get_tools(
213
213
  Provide the relative filepath (e.g. 'src/main.py').
214
214
  """
215
215
  full_path = (_project_dir / filepath).resolve()
216
- if not str(full_path).startswith(str(_project_dir.resolve())):
216
+ if not full_path.is_relative_to(_project_dir.resolve()):
217
217
  return "Error: Path traversal not allowed."
218
218
  if not full_path.is_file():
219
219
  return f"Error: File '{filepath}' not found in the codebase."
@@ -236,7 +236,7 @@ def _get_tools(
236
236
  else:
237
237
  full_path = (_project_dir / dir_path).resolve()
238
238
 
239
- if not str(full_path).startswith(str(_project_dir.resolve())):
239
+ if not full_path.is_relative_to(_project_dir.resolve()):
240
240
  return "Error: Path traversal not allowed."
241
241
  if not full_path.is_dir():
242
242
  return f"Error: Directory '{dir_path}' not found."
@@ -270,7 +270,7 @@ def _get_tools(
270
270
  full_path = _project_dir if dir_path == "." else (_project_dir / dir_path)
271
271
  full_path = full_path.resolve()
272
272
 
273
- if not str(full_path).startswith(str(_project_dir.resolve())):
273
+ if not full_path.is_relative_to(_project_dir.resolve()):
274
274
  return "Error: Path traversal not allowed."
275
275
 
276
276
  if not full_path.is_dir() and not full_path.is_file():
@@ -362,7 +362,7 @@ def _get_tools(
362
362
  else:
363
363
  full_path = (_project_dir / dir_path).resolve()
364
364
 
365
- if not str(full_path).startswith(str(_project_dir.resolve())):
365
+ if not full_path.is_relative_to(_project_dir.resolve()):
366
366
  return "Error: Path traversal not allowed."
367
367
  if not full_path.is_dir():
368
368
  return f"Error: Directory '{dir_path}' not found."
@@ -401,7 +401,7 @@ def _get_tools(
401
401
  Provide relative filepath.
402
402
  """
403
403
  full_path = (_project_dir / filepath).resolve()
404
- if not str(full_path).startswith(str(_project_dir.resolve())):
404
+ if not full_path.is_relative_to(_project_dir.resolve()):
405
405
  return "Error: Path traversal not allowed."
406
406
  try:
407
407
  result = subprocess.run(
@@ -423,7 +423,7 @@ def _get_tools(
423
423
  Use when you need to know who last changed specific lines and why.
424
424
  """
425
425
  full_path = (_project_dir / filepath).resolve()
426
- if not str(full_path).startswith(str(_project_dir.resolve())):
426
+ if not full_path.is_relative_to(_project_dir.resolve()):
427
427
  return "Error: Path traversal not allowed."
428
428
  try:
429
429
  result = subprocess.run(
@@ -444,7 +444,7 @@ def _get_tools(
444
444
  Line numbers are 1-based. Returns lines with line numbers prefixed.
445
445
  """
446
446
  full_path = (_project_dir / filepath).resolve()
447
- if not str(full_path).startswith(str(_project_dir.resolve())):
447
+ if not full_path.is_relative_to(_project_dir.resolve()):
448
448
  return "Error: Path traversal not allowed."
449
449
  if not full_path.is_file():
450
450
  return f"Error: File '{filepath}' not found."
@@ -468,7 +468,7 @@ def _get_tools(
468
468
  import re as re_mod
469
469
  full_path = _project_dir if dir_path == "." else (_project_dir / dir_path)
470
470
  full_path = full_path.resolve()
471
- if not str(full_path).startswith(str(_project_dir.resolve())):
471
+ if not full_path.is_relative_to(_project_dir.resolve()):
472
472
  return "Error: Path traversal not allowed."
473
473
 
474
474
  try:
@@ -507,7 +507,7 @@ def _get_tools(
507
507
  """
508
508
  full_path = _project_dir if dir_path == "." else (_project_dir / dir_path)
509
509
  full_path = full_path.resolve()
510
- if not str(full_path).startswith(str(_project_dir.resolve())):
510
+ if not full_path.is_relative_to(_project_dir.resolve()):
511
511
  return "Error: Path traversal not allowed."
512
512
 
513
513
  results = []
@@ -542,7 +542,7 @@ def _get_tools(
542
542
  cmd = ["git", "diff", "--stat", "-p", ref]
543
543
  if filepath:
544
544
  full_path = (_project_dir / filepath).resolve()
545
- if not str(full_path).startswith(str(_project_dir.resolve())):
545
+ if not full_path.is_relative_to(_project_dir.resolve()):
546
546
  return "Error: Path traversal not allowed."
547
547
  cmd.extend(["--", filepath])
548
548
  try:
@@ -600,7 +600,7 @@ def _get_tools(
600
600
  Use to quickly assess file complexity and recency.
601
601
  """
602
602
  full_path = (_project_dir / filepath).resolve()
603
- if not str(full_path).startswith(str(_project_dir.resolve())):
603
+ if not full_path.is_relative_to(_project_dir.resolve()):
604
604
  return "Error: Path traversal not allowed."
605
605
  if not full_path.is_file():
606
606
  return f"Error: File '{filepath}' not found."
@@ -667,7 +667,7 @@ def _get_tools(
667
667
  (functions/methods invoked inside the given function's body).
668
668
  """
669
669
  full_path = (_project_dir / filepath).resolve()
670
- if not str(full_path).startswith(str(_project_dir.resolve())):
670
+ if not full_path.is_relative_to(_project_dir.resolve()):
671
671
  return "Error: Path traversal not allowed."
672
672
  if not full_path.is_file():
673
673
  return f"Error: File '{filepath}' not found."
@@ -771,7 +771,7 @@ def _get_tools(
771
771
  return "Error: Vector database not available."
772
772
  try:
773
773
  full_path = (_project_dir / filepath).resolve()
774
- if not str(full_path).startswith(str(_project_dir.resolve())):
774
+ if not full_path.is_relative_to(_project_dir.resolve()):
775
775
  return "Error: Path traversal not allowed."
776
776
  if not full_path.is_file():
777
777
  return f"Error: File '{filepath}' not found."
@@ -4,14 +4,19 @@ from __future__ import annotations
4
4
 
5
5
  import argparse
6
6
  import asyncio
7
- import fcntl
8
7
  import os
8
+ import socket
9
9
  import sys
10
10
  import webbrowser
11
11
  from contextlib import asynccontextmanager
12
12
  from datetime import datetime, timezone
13
13
  from pathlib import Path
14
14
 
15
+ if sys.platform == "win32":
16
+ import msvcrt
17
+ else:
18
+ import fcntl
19
+
15
20
  import uvicorn
16
21
  from dotenv import load_dotenv
17
22
  from fastapi import FastAPI
@@ -30,6 +35,8 @@ def parse_args() -> argparse.Namespace:
30
35
  parser.add_argument("--no-open", action="store_true", help="Don't auto-open browser")
31
36
  parser.add_argument("--reinit", action="store_true", help="Regenerate .sourcefire/config.toml via LLM")
32
37
  parser.add_argument("--verbose", action="store_true", help="Verbose logging")
38
+ parser.add_argument("--uninstall", action="store_true", help="Remove global ~/.sourcefire/ config directory")
39
+ parser.add_argument("--version", action="store_true", help="Show version and exit")
33
40
  return parser.parse_args()
34
41
 
35
42
 
@@ -58,7 +65,10 @@ def acquire_lock(lock_path: Path) -> int | None:
58
65
  lock_path.parent.mkdir(parents=True, exist_ok=True)
59
66
  try:
60
67
  fd = os.open(str(lock_path), os.O_CREAT | os.O_RDWR)
61
- fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
68
+ if sys.platform == "win32":
69
+ msvcrt.locking(fd, msvcrt.LK_NBLCK, 1)
70
+ else:
71
+ fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
62
72
  return fd
63
73
  except (OSError, BlockingIOError):
64
74
  return None
@@ -67,12 +77,34 @@ def acquire_lock(lock_path: Path) -> int | None:
67
77
  def release_lock(fd: int, lock_path: Path) -> None:
68
78
  """Release the file lock."""
69
79
  try:
70
- fcntl.flock(fd, fcntl.LOCK_UN)
80
+ if sys.platform == "win32":
81
+ msvcrt.locking(fd, msvcrt.LK_UNLCK, 1)
82
+ else:
83
+ fcntl.flock(fd, fcntl.LOCK_UN)
71
84
  os.close(fd)
72
85
  except OSError:
73
86
  pass
74
87
 
75
88
 
89
+ def _port_available(host: str, port: int) -> bool:
90
+ """Check if a port is available."""
91
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
92
+ try:
93
+ s.bind((host, port))
94
+ return True
95
+ except OSError:
96
+ return False
97
+
98
+
99
+ def find_available_port(host: str, preferred: int, max_attempts: int = 20) -> int:
100
+ """Find an available port, starting from preferred and incrementing."""
101
+ for offset in range(max_attempts):
102
+ port = preferred + offset
103
+ if _port_available(host, port):
104
+ return port
105
+ raise RuntimeError(f"No available port found in range {preferred}-{preferred + max_attempts - 1}")
106
+
107
+
76
108
  # ---------------------------------------------------------------------------
77
109
  # App state (shared between main() and lifespan)
78
110
  # ---------------------------------------------------------------------------
@@ -148,6 +180,7 @@ async def lifespan(app: FastAPI):
148
180
  "last_indexed": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
149
181
  "index_status": "ready",
150
182
  "language": lang_name,
183
+ "project_name": config.project_name or project_dir.name,
151
184
  }
152
185
 
153
186
  # Inject dependencies into routes
@@ -183,14 +216,20 @@ async def lifespan(app: FastAPI):
183
216
  # App
184
217
  # ---------------------------------------------------------------------------
185
218
 
219
+ from importlib.metadata import version as _pkg_version
186
220
  from importlib.resources import files as _resource_files
187
221
 
188
222
  _static_dir = str(Path(_resource_files("sourcefire")) / "static")
189
223
 
224
+ try:
225
+ _version = _pkg_version("sourcefire")
226
+ except Exception:
227
+ _version = "0.0.0"
228
+
190
229
  app = FastAPI(
191
230
  title="Sourcefire",
192
231
  description="AI-powered codebase RAG. Created by Athar Wani.",
193
- version="0.2.0",
232
+ version=_version,
194
233
  lifespan=lifespan,
195
234
  )
196
235
 
@@ -214,18 +253,59 @@ def main() -> None:
214
253
  """Sourcefire CLI entry point."""
215
254
  args = parse_args()
216
255
 
256
+ # Handle --version
257
+ if args.version:
258
+ from importlib.metadata import version as _get_ver
259
+ try:
260
+ print(f"sourcefire {_get_ver('sourcefire')}")
261
+ except Exception:
262
+ print("sourcefire (version unknown)")
263
+ return
264
+
265
+ # Handle --uninstall
266
+ if args.uninstall:
267
+ from sourcefire.global_config import uninstall
268
+ uninstall()
269
+ return
270
+
217
271
  project_dir, sourcefire_dir = discover_project()
218
272
 
273
+ # Safety check: warn if running in a broad directory (home, /, etc.)
274
+ needs_init = not sourcefire_dir.exists() or not (sourcefire_dir / "config.toml").exists()
275
+ if needs_init:
276
+ dangerous_dirs = {
277
+ Path.home().resolve(),
278
+ Path("/").resolve(),
279
+ }
280
+ # Also flag common broad directories
281
+ for name in ("Documents", "Downloads", "Desktop"):
282
+ dangerous_dirs.add((Path.home() / name).resolve())
283
+
284
+ if project_dir.resolve() in dangerous_dirs:
285
+ print(f"\n WARNING: You are about to index: {project_dir.resolve()}")
286
+ print(" This is a broad directory and may index thousands of files.\n")
287
+ try:
288
+ confirm = input(" Do you trust this folder? (yes/no): ").strip().lower()
289
+ except (EOFError, KeyboardInterrupt):
290
+ print("\nAborted.")
291
+ sys.exit(1)
292
+ if confirm not in ("yes", "y"):
293
+ print("Aborted. Run sourcefire from a project directory instead.")
294
+ sys.exit(0)
295
+
219
296
  # Acquire lock
220
297
  lock_fd = acquire_lock(sourcefire_dir / ".lock")
221
298
  if lock_fd is None:
222
299
  print("Error: Another sourcefire instance is already running for this project.")
223
300
  sys.exit(1)
224
301
 
225
- # Check for API key prompt interactively if missing
226
- api_key = os.getenv("GEMINI_API_KEY", "")
302
+ # Check for API key: env var -> ~/.sourcefire/config.toml -> prompt
303
+ from sourcefire.global_config import get_api_key, save_api_key, get_global_dir
304
+
305
+ api_key = get_api_key()
227
306
  if not api_key:
228
- print("No GEMINI_API_KEY found in environment.")
307
+ print("No Gemini API key found.")
308
+ print(f"It will be saved to {get_global_dir() / 'config.toml'} (global, works across all projects).\n")
229
309
  try:
230
310
  api_key = input("Enter your Gemini API key: ").strip()
231
311
  except (EOFError, KeyboardInterrupt):
@@ -238,12 +318,8 @@ def main() -> None:
238
318
  release_lock(lock_fd, sourcefire_dir / ".lock")
239
319
  sys.exit(1)
240
320
 
241
- # Persist to .env in project root
242
- env_path = project_dir / ".env"
243
- with open(env_path, "a") as f:
244
- f.write(f"\nGEMINI_API_KEY={api_key}\n")
245
- os.environ["GEMINI_API_KEY"] = api_key
246
- print(f"API key saved to {env_path}")
321
+ save_api_key(api_key)
322
+ print(f"API key saved to {get_global_dir() / 'config.toml'}\n")
247
323
 
248
324
  # Auto-init or reinit
249
325
  needs_init = not sourcefire_dir.exists() or not (sourcefire_dir / "config.toml").exists()
@@ -268,6 +344,18 @@ def main() -> None:
268
344
  if args.port:
269
345
  config.port = args.port
270
346
 
347
+ # Find available port (auto-increment if taken)
348
+ try:
349
+ actual_port = find_available_port(config.host, config.port)
350
+ except RuntimeError:
351
+ print(f"Error: No available port found starting from {config.port}.")
352
+ release_lock(lock_fd, sourcefire_dir / ".lock")
353
+ sys.exit(1)
354
+
355
+ if actual_port != config.port:
356
+ print(f"Port {config.port} is in use, using {actual_port} instead.")
357
+ config.port = actual_port
358
+
271
359
  # Store state for lifespan access
272
360
  _app_state["config"] = config
273
361
  _app_state["project_dir"] = project_dir
@@ -87,8 +87,19 @@ def default_config(project_dir: Path) -> SourcefireConfig:
87
87
  def load_config(project_dir: Path, sourcefire_dir: Path) -> SourcefireConfig:
88
88
  """Load config from .sourcefire/config.toml."""
89
89
  config_path = sourcefire_dir / "config.toml"
90
- raw = config_path.read_text(encoding="utf-8")
91
- data = tomllib.loads(raw)
90
+ try:
91
+ raw = config_path.read_text(encoding="utf-8")
92
+ except FileNotFoundError:
93
+ print(f"Error: Config file not found at {config_path}")
94
+ print("Run `sourcefire --reinit` to regenerate it.")
95
+ raise SystemExit(1)
96
+
97
+ try:
98
+ data = tomllib.loads(raw)
99
+ except tomllib.TOMLDecodeError as exc:
100
+ print(f"Error: Invalid TOML in {config_path}: {exc}")
101
+ print("Fix the syntax or run `sourcefire --reinit` to regenerate.")
102
+ raise SystemExit(1)
92
103
 
93
104
  project = data.get("project", {})
94
105
  indexer = data.get("indexer", {})
@@ -131,29 +131,43 @@ def get_chunks_by_files(
131
131
  return rows
132
132
 
133
133
 
134
- def get_indexed_files(collection: chromadb.Collection) -> set[str]:
135
- """Return set of all filenames currently in the collection."""
136
- results = collection.get(include=["metadatas"])
134
+ def get_indexed_files_and_mtimes(collection: chromadb.Collection) -> tuple[set[str], dict[str, float]]:
135
+ """Return (set of filenames, {filename: mtime}) for all indexed chunks.
136
+
137
+ Uses pagination to avoid loading the entire collection into memory at once.
138
+ """
137
139
  files: set[str] = set()
138
- if results["metadatas"]:
140
+ mtimes: dict[str, float] = {}
141
+ batch_size = 10000
142
+ offset = 0
143
+
144
+ total = collection.count()
145
+ if total == 0:
146
+ return files, mtimes
147
+
148
+ while offset < total:
149
+ results = collection.get(
150
+ include=["metadatas"],
151
+ limit=batch_size,
152
+ offset=offset,
153
+ )
154
+ if not results["metadatas"]:
155
+ break
139
156
  for meta in results["metadatas"]:
140
157
  if meta and "filename" in meta:
141
- files.add(meta["filename"])
142
- return files
143
-
158
+ fname = meta["filename"]
159
+ files.add(fname)
160
+ if "mtime" in meta:
161
+ try:
162
+ stored = float(meta["mtime"])
163
+ # Keep the max mtime per file (chunks share the same mtime)
164
+ if fname not in mtimes or stored > mtimes[fname]:
165
+ mtimes[fname] = stored
166
+ except (ValueError, TypeError):
167
+ pass
168
+ offset += batch_size
144
169
 
145
- def get_stored_mtimes(collection: chromadb.Collection) -> dict[str, float]:
146
- """Get stored mtimes for all indexed files from ChromaDB metadata."""
147
- results = collection.get(include=["metadatas"])
148
- mtimes: dict[str, float] = {}
149
- if results["metadatas"]:
150
- for meta in results["metadatas"]:
151
- if meta and "filename" in meta and "mtime" in meta:
152
- try:
153
- mtimes[meta["filename"]] = float(meta["mtime"])
154
- except (ValueError, TypeError):
155
- pass
156
- return mtimes
170
+ return files, mtimes
157
171
 
158
172
 
159
173
  # ---------------------------------------------------------------------------
@@ -168,7 +182,7 @@ async def async_query_similar(
168
182
  where: dict | None = None,
169
183
  ) -> list[dict[str, Any]]:
170
184
  """Async wrapper for query_similar."""
171
- loop = asyncio.get_event_loop()
185
+ loop = asyncio.get_running_loop()
172
186
  return await loop.run_in_executor(
173
187
  None, partial(query_similar, collection, query_embedding, n_results, where)
174
188
  )
@@ -179,7 +193,7 @@ async def async_get_chunks_by_files(
179
193
  filenames: list[str],
180
194
  ) -> list[dict[str, Any]]:
181
195
  """Async wrapper for get_chunks_by_files."""
182
- loop = asyncio.get_event_loop()
196
+ loop = asyncio.get_running_loop()
183
197
  return await loop.run_in_executor(
184
198
  None, partial(get_chunks_by_files, collection, filenames)
185
199
  )
@@ -190,7 +204,7 @@ async def async_delete_file_chunks(
190
204
  filename: str,
191
205
  ) -> None:
192
206
  """Async wrapper for delete_file_chunks."""
193
- loop = asyncio.get_event_loop()
207
+ loop = asyncio.get_running_loop()
194
208
  await loop.run_in_executor(
195
209
  None, partial(delete_file_chunks, collection, filename)
196
210
  )
@@ -0,0 +1,87 @@
1
+ """Global Sourcefire configuration — stored in ~/.sourcefire/
2
+
3
+ This directory holds user-level settings (API keys, preferences) that
4
+ apply across all projects. Separate from the per-project .sourcefire/
5
+ directory which holds index data and project config.
6
+
7
+ On uninstall, `sourcefire --uninstall` removes this directory.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import os
13
+ import platform
14
+ import tomllib
15
+ from pathlib import Path
16
+
17
+ import tomli_w
18
+
19
+
20
+ def get_global_dir() -> Path:
21
+ """Return the global Sourcefire config directory.
22
+
23
+ - macOS/Linux: ~/.sourcefire/
24
+ - Windows: %APPDATA%/sourcefire/
25
+ """
26
+ if platform.system() == "Windows":
27
+ base = Path(os.environ.get("APPDATA", Path.home() / "AppData" / "Roaming"))
28
+ return base / "sourcefire"
29
+ return Path.home() / ".sourcefire"
30
+
31
+
32
+ def get_global_config_path() -> Path:
33
+ return get_global_dir() / "config.toml"
34
+
35
+
36
+ def load_global_config() -> dict:
37
+ """Load global config. Returns empty dict if not found."""
38
+ path = get_global_config_path()
39
+ if not path.is_file():
40
+ return {}
41
+ try:
42
+ return tomllib.loads(path.read_text(encoding="utf-8"))
43
+ except Exception:
44
+ return {}
45
+
46
+
47
+ def save_global_config(data: dict) -> None:
48
+ """Save global config."""
49
+ path = get_global_config_path()
50
+ path.parent.mkdir(parents=True, exist_ok=True)
51
+ path.write_text(tomli_w.dumps(data), encoding="utf-8")
52
+
53
+
54
+ def get_api_key() -> str:
55
+ """Get the Gemini API key. Checks in order:
56
+
57
+ 1. GEMINI_API_KEY environment variable
58
+ 2. Global config (~/.sourcefire/config.toml)
59
+ """
60
+ # 1. Environment variable
61
+ key = os.getenv("GEMINI_API_KEY", "")
62
+ if key:
63
+ return key
64
+
65
+ # 2. Global config
66
+ config = load_global_config()
67
+ return config.get("gemini_api_key", "")
68
+
69
+
70
+ def save_api_key(key: str) -> None:
71
+ """Save API key to global config."""
72
+ config = load_global_config()
73
+ config["gemini_api_key"] = key
74
+ save_global_config(config)
75
+ os.environ["GEMINI_API_KEY"] = key
76
+
77
+
78
+ def uninstall() -> None:
79
+ """Remove the global ~/.sourcefire/ directory."""
80
+ import shutil
81
+
82
+ global_dir = get_global_dir()
83
+ if global_dir.is_dir():
84
+ shutil.rmtree(global_dir)
85
+ print(f"Removed {global_dir}")
86
+ else:
87
+ print(f"Nothing to remove — {global_dir} does not exist.")
@@ -14,7 +14,7 @@ from typing import Any
14
14
  import chromadb
15
15
 
16
16
  from sourcefire.config import SourcefireConfig
17
- from sourcefire.db import add_chunks, reset_collection, delete_file_chunks, get_indexed_files, get_stored_mtimes
17
+ from sourcefire.db import add_chunks, reset_collection, delete_file_chunks, get_indexed_files_and_mtimes
18
18
  from sourcefire.indexer.embeddings import embed_batch
19
19
  from sourcefire.indexer.language_profiles import LanguageProfile, get_profile, get_profile_for_extension
20
20
  from sourcefire.indexer.metadata import chunk_source_file, extract_metadata
@@ -74,6 +74,9 @@ def _chunk_plain_text(
74
74
  if len(text) <= chunk_size:
75
75
  return [text]
76
76
 
77
+ # Guard against infinite loop if overlap >= size
78
+ chunk_overlap = min(chunk_overlap, chunk_size - 1)
79
+
77
80
  chunks: list[str] = []
78
81
  start = 0
79
82
  while start < len(text):
@@ -233,8 +236,7 @@ def run_indexing(
233
236
  files_to_index = all_disk_files
234
237
  elif not full:
235
238
  # Incremental: compare mtimes
236
- indexed_files = get_indexed_files(collection)
237
- stored_mtimes = get_stored_mtimes(collection)
239
+ indexed_files, stored_mtimes = get_indexed_files_and_mtimes(collection)
238
240
 
239
241
  current_files: dict[str, Path] = {}
240
242
  for f in all_disk_files:
@@ -401,6 +401,9 @@ async function pollStatus() {
401
401
  if (data.language && data.language !== 'generic') {
402
402
  langBadge.textContent = data.language;
403
403
  }
404
+ if (data.project_name) {
405
+ document.title = data.project_name + ' — Sourcefire';
406
+ }
404
407
  } catch(e) {
405
408
  indexStatus.textContent = 'offline';
406
409
  indexStatus.classList.add('is-error');
@@ -75,7 +75,7 @@ async def watch_and_reindex(
75
75
  # Handle deletions
76
76
  for rel in deleted_files:
77
77
  try:
78
- loop = asyncio.get_event_loop()
78
+ loop = asyncio.get_running_loop()
79
79
  await loop.run_in_executor(None, delete_file_chunks, collection, rel)
80
80
  graph.remove_file(rel)
81
81
  print(f"[watcher] Removed: {rel}")
@@ -85,7 +85,7 @@ async def watch_and_reindex(
85
85
  # Handle additions/modifications
86
86
  if changed_files:
87
87
  try:
88
- loop = asyncio.get_event_loop()
88
+ loop = asyncio.get_running_loop()
89
89
  file_imports = await loop.run_in_executor(
90
90
  None, index_files, collection, changed_files, config, profile
91
91
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcefire
3
- Version: 0.2.1
3
+ Version: 0.3.0
4
4
  Summary: Get instant context on any codebase. One command to index, ask questions in plain English, get answers grounded in actual code.
5
5
  Author-email: Athar Wani <athar@cravv.com>
6
6
  License: MIT
@@ -35,6 +35,8 @@ Requires-Dist: tree-sitter
35
35
  Requires-Dist: python-dotenv
36
36
  Requires-Dist: watchfiles
37
37
  Requires-Dist: tomli-w
38
+ Provides-Extra: ast
39
+ Requires-Dist: tree-sitter-languages; extra == "ast"
38
40
  Provides-Extra: dev
39
41
  Requires-Dist: pytest; extra == "dev"
40
42
  Requires-Dist: pytest-asyncio; extra == "dev"
@@ -6,6 +6,7 @@ sourcefire/__init__.py
6
6
  sourcefire/cli.py
7
7
  sourcefire/config.py
8
8
  sourcefire/db.py
9
+ sourcefire/global_config.py
9
10
  sourcefire/init.py
10
11
  sourcefire/watcher.py
11
12
  sourcefire.egg-info/PKG-INFO
@@ -29,7 +30,6 @@ sourcefire/prompts/system.md
29
30
  sourcefire/retriever/__init__.py
30
31
  sourcefire/retriever/graph.py
31
32
  sourcefire/retriever/search.py
32
- sourcefire/static/.DS_Store
33
33
  sourcefire/static/app.js
34
34
  sourcefire/static/index.html
35
35
  sourcefire/static/styles.css
@@ -10,6 +10,9 @@ python-dotenv
10
10
  watchfiles
11
11
  tomli-w
12
12
 
13
+ [ast]
14
+ tree-sitter-languages
15
+
13
16
  [dev]
14
17
  pytest
15
18
  pytest-asyncio
File without changes
File without changes
File without changes
File without changes