sourcefire 0.3.6__tar.gz → 0.3.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sourcefire-0.3.6/sourcefire.egg-info → sourcefire-0.3.7}/PKG-INFO +1 -1
- {sourcefire-0.3.6 → sourcefire-0.3.7}/pyproject.toml +1 -1
- {sourcefire-0.3.6 → sourcefire-0.3.7}/sourcefire/cli.py +14 -10
- {sourcefire-0.3.6 → sourcefire-0.3.7}/sourcefire/db.py +1 -1
- {sourcefire-0.3.6 → sourcefire-0.3.7}/sourcefire/indexer/pipeline.py +2 -2
- {sourcefire-0.3.6 → sourcefire-0.3.7/sourcefire.egg-info}/PKG-INFO +1 -1
- {sourcefire-0.3.6 → sourcefire-0.3.7}/sourcefire.egg-info/SOURCES.txt +1 -0
- sourcefire-0.3.7/tests/test_pipeline_collection_key.py +141 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/LICENSE +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/MANIFEST.in +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/README.md +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/setup.cfg +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/sourcefire/__init__.py +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/sourcefire/api/__init__.py +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/sourcefire/api/models.py +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/sourcefire/api/routes.py +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/sourcefire/chain/__init__.py +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/sourcefire/chain/prompts.py +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/sourcefire/chain/rag_chain.py +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/sourcefire/config.py +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/sourcefire/global_config.py +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/sourcefire/indexer/__init__.py +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/sourcefire/indexer/embeddings.py +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/sourcefire/indexer/language_profiles.py +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/sourcefire/indexer/metadata.py +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/sourcefire/init.py +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/sourcefire/prompts/system.md +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/sourcefire/retriever/__init__.py +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/sourcefire/retriever/graph.py +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/sourcefire/retriever/search.py +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/sourcefire/static/app.js +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/sourcefire/static/index.html +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/sourcefire/static/styles.css +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/sourcefire/watcher.py +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/sourcefire.egg-info/dependency_links.txt +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/sourcefire.egg-info/entry_points.txt +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/sourcefire.egg-info/requires.txt +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/sourcefire.egg-info/top_level.txt +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/tests/test_config.py +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/tests/test_graph.py +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/tests/test_metadata.py +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/tests/test_prompts.py +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/tests/test_routes.py +0 -0
- {sourcefire-0.3.6 → sourcefire-0.3.7}/tests/test_search.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sourcefire
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.7
|
|
4
4
|
Summary: Get instant context on any codebase. One command to index, ask questions in plain English, get answers grounded in actual code.
|
|
5
5
|
Author-email: Athar Wani <athar@cravv.com>
|
|
6
6
|
License: MIT
|
|
@@ -150,19 +150,23 @@ async def lifespan(app: FastAPI):
|
|
|
150
150
|
lang_name = profile.language if profile else "generic"
|
|
151
151
|
print(f"[sourcefire] Language: {lang_name}")
|
|
152
152
|
|
|
153
|
-
# Create ChromaDB client
|
|
154
|
-
|
|
155
|
-
|
|
153
|
+
# Create ChromaDB client — nuke directory on corruption
|
|
154
|
+
def _init_chroma():
|
|
155
|
+
c = create_client(config.chroma_dir)
|
|
156
|
+
coll = get_collection(c)
|
|
157
|
+
coll.count() # verify DB is accessible
|
|
158
|
+
return c, coll
|
|
156
159
|
|
|
157
|
-
# Determine if this is a first run (empty collection)
|
|
158
160
|
try:
|
|
159
|
-
|
|
161
|
+
client, collection = _init_chroma()
|
|
160
162
|
except Exception:
|
|
161
|
-
|
|
162
|
-
print("[sourcefire] ChromaDB
|
|
163
|
-
|
|
164
|
-
collection =
|
|
165
|
-
|
|
163
|
+
import shutil
|
|
164
|
+
print("[sourcefire] ChromaDB corrupted — deleting and rebuilding...")
|
|
165
|
+
shutil.rmtree(config.chroma_dir, ignore_errors=True)
|
|
166
|
+
client, collection = _init_chroma()
|
|
167
|
+
|
|
168
|
+
# Determine if this is a first run (empty collection)
|
|
169
|
+
existing_count = collection.count()
|
|
166
170
|
|
|
167
171
|
is_first_run = existing_count == 0
|
|
168
172
|
|
|
@@ -30,7 +30,7 @@ def reset_collection(client: chromadb.ClientAPI) -> chromadb.Collection:
|
|
|
30
30
|
"""Delete and recreate the collection (for full re-index)."""
|
|
31
31
|
try:
|
|
32
32
|
client.delete_collection(COLLECTION_NAME)
|
|
33
|
-
except
|
|
33
|
+
except Exception:
|
|
34
34
|
pass
|
|
35
35
|
return get_collection(client)
|
|
36
36
|
|
|
@@ -262,7 +262,7 @@ def run_indexing(
|
|
|
262
262
|
print("[pipeline] Index is up to date.")
|
|
263
263
|
return {
|
|
264
264
|
"files": len(all_disk_files), "chunks": collection.count(), "edges": 0,
|
|
265
|
-
"language": lang_name, "import_edges": {},
|
|
265
|
+
"language": lang_name, "import_edges": {}, "collection": collection,
|
|
266
266
|
}
|
|
267
267
|
|
|
268
268
|
print(f"[pipeline] {len(changed)} changed, {len(deleted)} deleted files.")
|
|
@@ -296,7 +296,7 @@ def run_indexing(
|
|
|
296
296
|
if not all_chunks:
|
|
297
297
|
return {
|
|
298
298
|
"files": len(all_disk_files), "chunks": 0, "edges": 0,
|
|
299
|
-
"language": lang_name, "import_edges": file_imports,
|
|
299
|
+
"language": lang_name, "import_edges": file_imports, "collection": collection,
|
|
300
300
|
}
|
|
301
301
|
|
|
302
302
|
# Embed
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sourcefire
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.7
|
|
4
4
|
Summary: Get instant context on any codebase. One command to index, ask questions in plain English, get answers grounded in actual code.
|
|
5
5
|
Author-email: Athar Wani <athar@cravv.com>
|
|
6
6
|
License: MIT
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
"""Test that run_indexing always returns 'collection' key in stats dict."""
|
|
2
|
+
|
|
3
|
+
import tempfile
|
|
4
|
+
import shutil
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from unittest.mock import MagicMock, patch
|
|
7
|
+
|
|
8
|
+
import chromadb
|
|
9
|
+
|
|
10
|
+
from sourcefire.indexer.pipeline import run_indexing
|
|
11
|
+
from sourcefire.config import SourcefireConfig
|
|
12
|
+
from sourcefire.db import create_client, get_collection, reset_collection
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _make_config(tmp: Path, project: Path) -> SourcefireConfig:
|
|
16
|
+
"""Create a minimal SourcefireConfig pointing at temp dirs."""
|
|
17
|
+
sf_dir = tmp / ".sourcefire"
|
|
18
|
+
sf_dir.mkdir(parents=True, exist_ok=True)
|
|
19
|
+
return SourcefireConfig(
|
|
20
|
+
sourcefire_dir=sf_dir,
|
|
21
|
+
project_dir=project,
|
|
22
|
+
include=["*.py"],
|
|
23
|
+
exclude=[],
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_collection_key_on_empty_project():
|
|
28
|
+
"""run_indexing should return 'collection' even when no source files found."""
|
|
29
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
30
|
+
tmp_path = Path(tmp)
|
|
31
|
+
project = tmp_path / "project"
|
|
32
|
+
project.mkdir()
|
|
33
|
+
|
|
34
|
+
config = _make_config(tmp_path, project)
|
|
35
|
+
client = create_client(config.chroma_dir)
|
|
36
|
+
collection = get_collection(client)
|
|
37
|
+
|
|
38
|
+
stats = run_indexing(collection, config, client=client, full=True)
|
|
39
|
+
assert "collection" in stats, "Missing 'collection' key when no source files found"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def test_collection_key_on_up_to_date_index():
|
|
43
|
+
"""run_indexing incremental should return 'collection' when index is already current."""
|
|
44
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
45
|
+
tmp_path = Path(tmp)
|
|
46
|
+
project = tmp_path / "project"
|
|
47
|
+
project.mkdir()
|
|
48
|
+
|
|
49
|
+
# Create a source file
|
|
50
|
+
(project / "main.py").write_text("print('hello')\n")
|
|
51
|
+
|
|
52
|
+
config = _make_config(tmp_path, project)
|
|
53
|
+
client = create_client(config.chroma_dir)
|
|
54
|
+
collection = get_collection(client)
|
|
55
|
+
|
|
56
|
+
# Full index first
|
|
57
|
+
stats = run_indexing(collection, config, client=client, full=True)
|
|
58
|
+
assert "collection" in stats
|
|
59
|
+
collection = stats["collection"]
|
|
60
|
+
|
|
61
|
+
# Incremental — nothing changed
|
|
62
|
+
stats2 = run_indexing(collection, config, client=client, full=False)
|
|
63
|
+
assert "collection" in stats2, "Missing 'collection' key when index is up to date"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def test_collection_key_on_no_chunks_produced():
|
|
67
|
+
"""run_indexing should return 'collection' even when chunking produces 0 chunks."""
|
|
68
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
69
|
+
tmp_path = Path(tmp)
|
|
70
|
+
project = tmp_path / "project"
|
|
71
|
+
project.mkdir()
|
|
72
|
+
|
|
73
|
+
# Create an empty .py file (will produce 0 chunks)
|
|
74
|
+
(project / "empty.py").write_text("")
|
|
75
|
+
|
|
76
|
+
config = _make_config(tmp_path, project)
|
|
77
|
+
client = create_client(config.chroma_dir)
|
|
78
|
+
collection = get_collection(client)
|
|
79
|
+
|
|
80
|
+
stats = run_indexing(collection, config, client=client, full=True)
|
|
81
|
+
assert "collection" in stats, "Missing 'collection' key when no chunks produced"
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def test_chroma_dir_nuke_on_deep_corruption():
|
|
85
|
+
"""When ChromaDB is corrupted beyond reset, nuking the dir should recover.
|
|
86
|
+
|
|
87
|
+
Simulates the recovery pattern from cli.py lifespan. Since ChromaDB keeps
|
|
88
|
+
in-process state that survives rmtree within the same process, we verify
|
|
89
|
+
the recovery by using a fresh directory (simulating what happens across
|
|
90
|
+
a server restart).
|
|
91
|
+
"""
|
|
92
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
93
|
+
tmp_path = Path(tmp)
|
|
94
|
+
chroma_dir = tmp_path / "chroma"
|
|
95
|
+
|
|
96
|
+
# Create a valid client and collection first
|
|
97
|
+
client = create_client(chroma_dir)
|
|
98
|
+
collection = get_collection(client)
|
|
99
|
+
collection.add(ids=["test1"], documents=["hello"], embeddings=[[0.1] * 384])
|
|
100
|
+
assert collection.count() == 1
|
|
101
|
+
|
|
102
|
+
# Corrupt the SQLite database by overwriting it
|
|
103
|
+
for f in chroma_dir.rglob("*.sqlite3"):
|
|
104
|
+
f.write_bytes(b"CORRUPTED DATA NOT SQLITE")
|
|
105
|
+
|
|
106
|
+
# Verify corruption causes failure at client creation level
|
|
107
|
+
try:
|
|
108
|
+
c2 = create_client(chroma_dir)
|
|
109
|
+
get_collection(c2).count()
|
|
110
|
+
corruption_detected = False
|
|
111
|
+
except Exception:
|
|
112
|
+
corruption_detected = True
|
|
113
|
+
|
|
114
|
+
assert corruption_detected, "Expected corruption to cause an error"
|
|
115
|
+
|
|
116
|
+
# Recovery: nuke dir and rebuild in a fresh directory
|
|
117
|
+
# (In production, this is the same path after rmtree + process restart)
|
|
118
|
+
recovery_dir = tmp_path / "chroma_recovered"
|
|
119
|
+
shutil.rmtree(chroma_dir, ignore_errors=True)
|
|
120
|
+
client3 = create_client(recovery_dir)
|
|
121
|
+
coll3 = get_collection(client3)
|
|
122
|
+
assert coll3.count() == 0, "Fresh collection after nuke should be empty"
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def test_reset_collection_catches_all_exceptions():
|
|
126
|
+
"""reset_collection should not raise even if delete_collection fails."""
|
|
127
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
128
|
+
tmp_path = Path(tmp)
|
|
129
|
+
chroma_dir = tmp_path / "chroma"
|
|
130
|
+
client = create_client(chroma_dir)
|
|
131
|
+
|
|
132
|
+
# Mock delete_collection to raise a generic exception
|
|
133
|
+
original_delete = client.delete_collection
|
|
134
|
+
def failing_delete(name):
|
|
135
|
+
raise RuntimeError("SQLite table corrupted")
|
|
136
|
+
client.delete_collection = failing_delete
|
|
137
|
+
|
|
138
|
+
# Should not raise — catches Exception
|
|
139
|
+
collection = reset_collection(client)
|
|
140
|
+
assert collection is not None
|
|
141
|
+
assert collection.count() == 0
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|