sourcefire 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. sourcefire-0.2.0/LICENSE +21 -0
  2. sourcefire-0.2.0/MANIFEST.in +4 -0
  3. sourcefire-0.2.0/PKG-INFO +145 -0
  4. sourcefire-0.2.0/README.md +102 -0
  5. sourcefire-0.2.0/pyproject.toml +60 -0
  6. sourcefire-0.2.0/setup.cfg +4 -0
  7. sourcefire-0.2.0/sourcefire/__init__.py +0 -0
  8. sourcefire-0.2.0/sourcefire/api/__init__.py +0 -0
  9. sourcefire-0.2.0/sourcefire/api/models.py +24 -0
  10. sourcefire-0.2.0/sourcefire/api/routes.py +166 -0
  11. sourcefire-0.2.0/sourcefire/chain/__init__.py +0 -0
  12. sourcefire-0.2.0/sourcefire/chain/prompts.py +195 -0
  13. sourcefire-0.2.0/sourcefire/chain/rag_chain.py +967 -0
  14. sourcefire-0.2.0/sourcefire/cli.py +293 -0
  15. sourcefire-0.2.0/sourcefire/config.py +148 -0
  16. sourcefire-0.2.0/sourcefire/db.py +196 -0
  17. sourcefire-0.2.0/sourcefire/indexer/__init__.py +0 -0
  18. sourcefire-0.2.0/sourcefire/indexer/embeddings.py +27 -0
  19. sourcefire-0.2.0/sourcefire/indexer/language_profiles.py +448 -0
  20. sourcefire-0.2.0/sourcefire/indexer/metadata.py +289 -0
  21. sourcefire-0.2.0/sourcefire/indexer/pipeline.py +406 -0
  22. sourcefire-0.2.0/sourcefire/init.py +189 -0
  23. sourcefire-0.2.0/sourcefire/prompts/system.md +28 -0
  24. sourcefire-0.2.0/sourcefire/retriever/__init__.py +0 -0
  25. sourcefire-0.2.0/sourcefire/retriever/graph.py +162 -0
  26. sourcefire-0.2.0/sourcefire/retriever/search.py +86 -0
  27. sourcefire-0.2.0/sourcefire/static/.DS_Store +0 -0
  28. sourcefire-0.2.0/sourcefire/static/app.js +414 -0
  29. sourcefire-0.2.0/sourcefire/static/index.html +102 -0
  30. sourcefire-0.2.0/sourcefire/static/styles.css +607 -0
  31. sourcefire-0.2.0/sourcefire/watcher.py +105 -0
  32. sourcefire-0.2.0/sourcefire.egg-info/PKG-INFO +145 -0
  33. sourcefire-0.2.0/sourcefire.egg-info/SOURCES.txt +41 -0
  34. sourcefire-0.2.0/sourcefire.egg-info/dependency_links.txt +1 -0
  35. sourcefire-0.2.0/sourcefire.egg-info/entry_points.txt +2 -0
  36. sourcefire-0.2.0/sourcefire.egg-info/requires.txt +16 -0
  37. sourcefire-0.2.0/sourcefire.egg-info/top_level.txt +1 -0
  38. sourcefire-0.2.0/tests/test_config.py +24 -0
  39. sourcefire-0.2.0/tests/test_graph.py +42 -0
  40. sourcefire-0.2.0/tests/test_metadata.py +71 -0
  41. sourcefire-0.2.0/tests/test_prompts.py +45 -0
  42. sourcefire-0.2.0/tests/test_routes.py +26 -0
  43. sourcefire-0.2.0/tests/test_search.py +32 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Athar Wani / Cravv HQ
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,4 @@
1
+ recursive-include sourcefire/static *
2
+ recursive-include sourcefire/prompts *
3
+ include LICENSE
4
+ include README.md
@@ -0,0 +1,145 @@
1
+ Metadata-Version: 2.4
2
+ Name: sourcefire
3
+ Version: 0.2.0
4
+ Summary: AI-powered codebase RAG from your terminal — index any project, ask questions, get answers with full source context.
5
+ Author-email: Athar Wani <athar@cravv.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/waniathar2/sourcefire
8
+ Project-URL: Repository, https://github.com/waniathar2/sourcefire
9
+ Project-URL: Issues, https://github.com/waniathar2/sourcefire/issues
10
+ Keywords: rag,codebase,ai,code-search,embeddings,chromadb,gemini
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Environment :: Console
13
+ Classifier: Environment :: Web Environment
14
+ Classifier: Framework :: FastAPI
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Topic :: Software Development :: Documentation
22
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
23
+ Classifier: Topic :: Text Processing :: Indexing
24
+ Requires-Python: >=3.11
25
+ Description-Content-Type: text/markdown
26
+ License-File: LICENSE
27
+ Requires-Dist: chromadb
28
+ Requires-Dist: langchain
29
+ Requires-Dist: langchain-google-genai
30
+ Requires-Dist: fastapi
31
+ Requires-Dist: uvicorn[standard]
32
+ Requires-Dist: sse-starlette
33
+ Requires-Dist: sentence-transformers
34
+ Requires-Dist: tree-sitter
35
+ Requires-Dist: python-dotenv
36
+ Requires-Dist: watchfiles
37
+ Requires-Dist: tomli-w
38
+ Provides-Extra: dev
39
+ Requires-Dist: pytest; extra == "dev"
40
+ Requires-Dist: pytest-asyncio; extra == "dev"
41
+ Requires-Dist: httpx; extra == "dev"
42
+ Dynamic: license-file
43
+
44
+ # Sourcefire
45
+
46
+ AI-powered codebase RAG from your terminal. Index any project, ask questions, get answers with full source context.
47
+
48
+ ## Install
49
+
50
+ ```bash
51
+ pip install sourcefire
52
+ ```
53
+
54
+ ## Quick Start
55
+
56
+ ```bash
57
+ cd your-project
58
+ sourcefire
59
+ ```
60
+
61
+ That's it. Sourcefire will:
62
+
63
+ 1. **Auto-detect** your project structure and language
64
+ 2. **Generate** smart include/exclude patterns via LLM
65
+ 3. **Index** your codebase into a local ChromaDB vector database
66
+ 4. **Serve** a web UI where you can ask questions about your code
67
+ 5. **Watch** for file changes and re-index automatically
68
+
69
+ ## Requirements
70
+
71
+ - Python 3.11+
72
+ - A [Gemini API key](https://ai.google.dev/) (prompted on first run, saved to `.env`)
73
+
74
+ ## How It Works
75
+
76
+ Sourcefire creates a `.sourcefire/` directory in your project root:
77
+
78
+ ```
79
+ .sourcefire/
80
+ ├── config.toml # Project config (auto-generated, editable)
81
+ ├── chroma/ # Vector database (local, no server needed)
82
+ └── graph.json # Import graph for code navigation
83
+ ```
84
+
85
+ - **No PostgreSQL** — uses ChromaDB (SQLite-backed, embedded)
86
+ - **No external services** — everything runs locally
87
+ - **Per-project isolation** — each project gets its own database
88
+
89
+ ## Features
90
+
91
+ - **Zero config** — first run auto-generates everything via LLM analysis
92
+ - **8 language profiles** — Python, JavaScript/TypeScript, Go, Rust, Java, Dart, C, C++
93
+ - **AST-aware chunking** — splits code at function/class boundaries using tree-sitter
94
+ - **Live re-indexing** — file watcher detects changes and re-indexes automatically
95
+ - **3 query modes** — Debug (stack traces), Feature (architecture), Explain (walkthroughs)
96
+ - **18 code exploration tools** — the LLM can read files, search code, trace call chains, git blame, and more
97
+ - **Incremental indexing** — only re-indexes files that changed since last run
98
+
99
+ ## CLI Options
100
+
101
+ ```
102
+ sourcefire [--port PORT] [--no-open] [--reinit] [--verbose]
103
+ ```
104
+
105
+ | Flag | Description |
106
+ |------|-------------|
107
+ | `--port PORT` | Server port (default: 8000) |
108
+ | `--no-open` | Don't auto-open browser |
109
+ | `--reinit` | Regenerate include/exclude patterns via LLM |
110
+ | `--verbose` | Verbose logging |
111
+
112
+ ## Configuration
113
+
114
+ Edit `.sourcefire/config.toml` to customize:
115
+
116
+ ```toml
117
+ [project]
118
+ name = "my-project"
119
+ language = "auto" # or "python", "go", "rust", etc.
120
+
121
+ [indexer]
122
+ include = ["src/**/*.py"] # glob patterns to index
123
+ exclude = ["__pycache__/**", ".venv/**"]
124
+ chunk_size = 1000
125
+ chunk_overlap = 300
126
+
127
+ [llm]
128
+ model = "gemini-2.5-flash"
129
+ api_key_env = "GEMINI_API_KEY"
130
+
131
+ [server]
132
+ port = 8000
133
+
134
+ [retrieval]
135
+ top_k = 8
136
+ relevance_threshold = 0.3
137
+ ```
138
+
139
+ ## Subdirectory Support
140
+
141
+ Run `sourcefire` from any subdirectory — it walks up the tree to find `.sourcefire/`, just like `git` finds `.git/`.
142
+
143
+ ## License
144
+
145
+ MIT — Created by [Athar Wani](https://github.com/waniathar2) / Cravv HQ
@@ -0,0 +1,102 @@
1
+ # Sourcefire
2
+
3
+ AI-powered codebase RAG from your terminal. Index any project, ask questions, get answers with full source context.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ pip install sourcefire
9
+ ```
10
+
11
+ ## Quick Start
12
+
13
+ ```bash
14
+ cd your-project
15
+ sourcefire
16
+ ```
17
+
18
+ That's it. Sourcefire will:
19
+
20
+ 1. **Auto-detect** your project structure and language
21
+ 2. **Generate** smart include/exclude patterns via LLM
22
+ 3. **Index** your codebase into a local ChromaDB vector database
23
+ 4. **Serve** a web UI where you can ask questions about your code
24
+ 5. **Watch** for file changes and re-index automatically
25
+
26
+ ## Requirements
27
+
28
+ - Python 3.11+
29
+ - A [Gemini API key](https://ai.google.dev/) (prompted on first run, saved to `.env`)
30
+
31
+ ## How It Works
32
+
33
+ Sourcefire creates a `.sourcefire/` directory in your project root:
34
+
35
+ ```
36
+ .sourcefire/
37
+ ├── config.toml # Project config (auto-generated, editable)
38
+ ├── chroma/ # Vector database (local, no server needed)
39
+ └── graph.json # Import graph for code navigation
40
+ ```
41
+
42
+ - **No PostgreSQL** — uses ChromaDB (SQLite-backed, embedded)
43
+ - **No external services** — everything runs locally
44
+ - **Per-project isolation** — each project gets its own database
45
+
46
+ ## Features
47
+
48
+ - **Zero config** — first run auto-generates everything via LLM analysis
49
+ - **8 language profiles** — Python, JavaScript/TypeScript, Go, Rust, Java, Dart, C, C++
50
+ - **AST-aware chunking** — splits code at function/class boundaries using tree-sitter
51
+ - **Live re-indexing** — file watcher detects changes and re-indexes automatically
52
+ - **3 query modes** — Debug (stack traces), Feature (architecture), Explain (walkthroughs)
53
+ - **18 code exploration tools** — the LLM can read files, search code, trace call chains, git blame, and more
54
+ - **Incremental indexing** — only re-indexes files that changed since last run
55
+
56
+ ## CLI Options
57
+
58
+ ```
59
+ sourcefire [--port PORT] [--no-open] [--reinit] [--verbose]
60
+ ```
61
+
62
+ | Flag | Description |
63
+ |------|-------------|
64
+ | `--port PORT` | Server port (default: 8000) |
65
+ | `--no-open` | Don't auto-open browser |
66
+ | `--reinit` | Regenerate include/exclude patterns via LLM |
67
+ | `--verbose` | Verbose logging |
68
+
69
+ ## Configuration
70
+
71
+ Edit `.sourcefire/config.toml` to customize:
72
+
73
+ ```toml
74
+ [project]
75
+ name = "my-project"
76
+ language = "auto" # or "python", "go", "rust", etc.
77
+
78
+ [indexer]
79
+ include = ["src/**/*.py"] # glob patterns to index
80
+ exclude = ["__pycache__/**", ".venv/**"]
81
+ chunk_size = 1000
82
+ chunk_overlap = 300
83
+
84
+ [llm]
85
+ model = "gemini-2.5-flash"
86
+ api_key_env = "GEMINI_API_KEY"
87
+
88
+ [server]
89
+ port = 8000
90
+
91
+ [retrieval]
92
+ top_k = 8
93
+ relevance_threshold = 0.3
94
+ ```
95
+
96
+ ## Subdirectory Support
97
+
98
+ Run `sourcefire` from any subdirectory — it walks up the tree to find `.sourcefire/`, just like `git` finds `.git/`.
99
+
100
+ ## License
101
+
102
+ MIT — Created by [Athar Wani](https://github.com/waniathar2) / Cravv HQ
@@ -0,0 +1,60 @@
1
+ [project]
2
+ name = "sourcefire"
3
+ version = "0.2.0"
4
+ description = "AI-powered codebase RAG from your terminal — index any project, ask questions, get answers with full source context."
5
+ readme = "README.md"
6
+ license = {text = "MIT"}
7
+ requires-python = ">=3.11"
8
+ authors = [
9
+ {name = "Athar Wani", email = "athar@cravv.com"},
10
+ ]
11
+ keywords = ["rag", "codebase", "ai", "code-search", "embeddings", "chromadb", "gemini"]
12
+ classifiers = [
13
+ "Development Status :: 4 - Beta",
14
+ "Environment :: Console",
15
+ "Environment :: Web Environment",
16
+ "Framework :: FastAPI",
17
+ "Intended Audience :: Developers",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3.11",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Programming Language :: Python :: 3.13",
23
+ "Topic :: Software Development :: Documentation",
24
+ "Topic :: Software Development :: Libraries :: Python Modules",
25
+ "Topic :: Text Processing :: Indexing",
26
+ ]
27
+ dependencies = [
28
+ "chromadb",
29
+ "langchain",
30
+ "langchain-google-genai",
31
+ "fastapi",
32
+ "uvicorn[standard]",
33
+ "sse-starlette",
34
+ "sentence-transformers",
35
+ "tree-sitter",
36
+ "python-dotenv",
37
+ "watchfiles",
38
+ "tomli-w",
39
+ ]
40
+
41
+ [project.optional-dependencies]
42
+ dev = ["pytest", "pytest-asyncio", "httpx"]
43
+
44
+ [project.urls]
45
+ Homepage = "https://github.com/waniathar2/sourcefire"
46
+ Repository = "https://github.com/waniathar2/sourcefire"
47
+ Issues = "https://github.com/waniathar2/sourcefire/issues"
48
+
49
+ [project.scripts]
50
+ sourcefire = "sourcefire.cli:main"
51
+
52
+ [tool.setuptools.package-data]
53
+ sourcefire = ["static/**/*", "prompts/**/*"]
54
+
55
+ [tool.setuptools.packages.find]
56
+ include = ["sourcefire*"]
57
+
58
+ [build-system]
59
+ requires = ["setuptools>=68.0"]
60
+ build-backend = "setuptools.build_meta"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
File without changes
File without changes
@@ -0,0 +1,24 @@
1
+ """Pydantic request/response models for the Sourcefire API."""
2
+
3
+ from typing import Literal
4
+
5
+ from pydantic import BaseModel
6
+
7
+
8
+ class QueryRequest(BaseModel):
9
+ query: str
10
+ mode: Literal["debug", "feature", "explain"] = "debug"
11
+ model: Literal["gemini-2.5-flash", "gemini-2.5-pro"] = "gemini-2.5-flash"
12
+ history: list[dict] = []
13
+
14
+
15
+ class StatusResponse(BaseModel):
16
+ files_indexed: int
17
+ last_indexed: str
18
+ index_status: str
19
+ language: str = "generic"
20
+
21
+
22
+ class SourceResponse(BaseModel):
23
+ content: str
24
+ language: str
@@ -0,0 +1,166 @@
1
+ """FastAPI router for the Sourcefire API."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from pathlib import Path
7
+ from typing import Any, AsyncGenerator
8
+
9
+ from fastapi import APIRouter, HTTPException, Query
10
+ from sse_starlette.sse import EventSourceResponse
11
+
12
+ from sourcefire.api.models import QueryRequest, SourceResponse, StatusResponse
13
+
14
+ router = APIRouter(prefix="/api")
15
+
16
+ # ---------------------------------------------------------------------------
17
+ # Module-level dependency state — set once at startup via init_dependencies()
18
+ # ---------------------------------------------------------------------------
19
+
20
+ _collection: Any = None
21
+ _graph: Any = None
22
+ _profile: Any = None
23
+ _project_dir: Path | None = None
24
+ _gemini_api_key: str = ""
25
+ _index_status: dict[str, Any] = {
26
+ "files_indexed": 0,
27
+ "last_indexed": "never",
28
+ "index_status": "not_ready",
29
+ "language": "generic",
30
+ }
31
+
32
+
33
+ def init_dependencies(
34
+ collection: Any,
35
+ graph: Any,
36
+ index_status: dict[str, Any],
37
+ profile: Any = None,
38
+ project_dir: Path | None = None,
39
+ gemini_api_key: str = "",
40
+ ) -> None:
41
+ """Inject shared dependencies from the application lifespan."""
42
+ global _collection, _graph, _index_status, _profile, _project_dir, _gemini_api_key
43
+ _collection = collection
44
+ _graph = graph
45
+ _index_status = index_status
46
+ _profile = profile
47
+ _project_dir = project_dir
48
+ _gemini_api_key = gemini_api_key
49
+
50
+
51
+ # ---------------------------------------------------------------------------
52
+ # Language detection helper
53
+ # ---------------------------------------------------------------------------
54
+
55
+ _EXTENSION_TO_LANGUAGE: dict[str, str] = {
56
+ ".dart": "dart",
57
+ ".py": "python",
58
+ ".md": "markdown",
59
+ ".yaml": "yaml",
60
+ ".yml": "yaml",
61
+ ".json": "json",
62
+ ".ts": "typescript",
63
+ ".tsx": "typescript",
64
+ ".js": "javascript",
65
+ ".jsx": "javascript",
66
+ ".html": "html",
67
+ ".css": "css",
68
+ ".sh": "bash",
69
+ ".go": "go",
70
+ ".rs": "rust",
71
+ ".java": "java",
72
+ ".kt": "kotlin",
73
+ ".swift": "swift",
74
+ ".rb": "ruby",
75
+ ".php": "php",
76
+ ".c": "c",
77
+ ".cpp": "cpp",
78
+ ".h": "c",
79
+ ".hpp": "cpp",
80
+ ".toml": "toml",
81
+ ".xml": "xml",
82
+ ".sql": "sql",
83
+ ".graphql": "graphql",
84
+ ".proto": "protobuf",
85
+ ".tf": "hcl",
86
+ ".dockerfile": "dockerfile",
87
+ }
88
+
89
+
90
+ def _detect_language(file_path: Path) -> str:
91
+ name = file_path.name.lower()
92
+ if name == "dockerfile":
93
+ return "dockerfile"
94
+ if name == "makefile":
95
+ return "makefile"
96
+ return _EXTENSION_TO_LANGUAGE.get(file_path.suffix.lower(), "plaintext")
97
+
98
+
99
+ # ---------------------------------------------------------------------------
100
+ # Routes
101
+ # ---------------------------------------------------------------------------
102
+
103
+
104
+ @router.post("/query")
105
+ async def query(request: QueryRequest) -> EventSourceResponse:
106
+ """Stream a RAG response for the given query via Server-Sent Events."""
107
+ if not _gemini_api_key:
108
+ raise HTTPException(
109
+ status_code=503,
110
+ detail="GEMINI_API_KEY is not configured.",
111
+ )
112
+
113
+ from sourcefire.chain.rag_chain import stream_rag_response
114
+
115
+ async def _event_generator() -> AsyncGenerator[dict[str, str], None]:
116
+ async for chunk in stream_rag_response(
117
+ collection=_collection,
118
+ graph=_graph,
119
+ query=request.query,
120
+ mode=request.mode,
121
+ model=request.model,
122
+ history=request.history,
123
+ profile=_profile,
124
+ project_dir=_project_dir,
125
+ gemini_api_key=_gemini_api_key,
126
+ ):
127
+ yield {"data": json.dumps(chunk)}
128
+
129
+ return EventSourceResponse(_event_generator())
130
+
131
+
132
+ @router.get("/sources", response_model=SourceResponse)
133
+ async def sources(path: str = Query(..., description="Relative path within the codebase")) -> SourceResponse:
134
+ """Return the content and detected language of a source file."""
135
+ if _project_dir is None:
136
+ raise HTTPException(status_code=503, detail="Project directory not initialized.")
137
+
138
+ codebase_resolved = _project_dir.resolve()
139
+ full_path = (_project_dir / path).resolve()
140
+
141
+ if not str(full_path).startswith(str(codebase_resolved)):
142
+ raise HTTPException(status_code=400, detail="Path traversal detected.")
143
+
144
+ if not full_path.is_file():
145
+ raise HTTPException(status_code=404, detail=f"File not found: {path}")
146
+
147
+ try:
148
+ content = full_path.read_text(encoding="utf-8", errors="replace")
149
+ except OSError as exc:
150
+ raise HTTPException(status_code=500, detail=f"Could not read file: {exc}") from exc
151
+
152
+ return SourceResponse(
153
+ content=content,
154
+ language=_detect_language(full_path),
155
+ )
156
+
157
+
158
+ @router.get("/status", response_model=StatusResponse)
159
+ async def status() -> StatusResponse:
160
+ """Return current index status."""
161
+ return StatusResponse(
162
+ files_indexed=_index_status.get("files_indexed", 0),
163
+ last_indexed=str(_index_status.get("last_indexed", "never")),
164
+ index_status=str(_index_status.get("index_status", "not_ready")),
165
+ language=str(_index_status.get("language", "generic")),
166
+ )
File without changes