spectrida 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spectrida-0.1.0/.github/workflows/ci.yml +26 -0
- spectrida-0.1.0/.gitignore +27 -0
- spectrida-0.1.0/CHANGELOG.md +11 -0
- spectrida-0.1.0/LICENSE +21 -0
- spectrida-0.1.0/PKG-INFO +294 -0
- spectrida-0.1.0/README.md +259 -0
- spectrida-0.1.0/pyproject.toml +61 -0
- spectrida-0.1.0/scripts/upload_gguf.py +34 -0
- spectrida-0.1.0/spectrida/__init__.py +2 -0
- spectrida-0.1.0/spectrida/analysis/__init__.py +0 -0
- spectrida-0.1.0/spectrida/analysis/ida_gpu_accel/__init__.py +27 -0
- spectrida-0.1.0/spectrida/analysis/ida_gpu_accel/arm64_scanner.py +278 -0
- spectrida-0.1.0/spectrida/analysis/ida_gpu_accel/capstone_scanner.py +322 -0
- spectrida-0.1.0/spectrida/analysis/ida_gpu_accel/config.py +42 -0
- spectrida-0.1.0/spectrida/analysis/ida_gpu_accel/preseeder.py +144 -0
- spectrida-0.1.0/spectrida/analysis/ida_gpu_accel/x86_64_scanner.py +163 -0
- spectrida-0.1.0/spectrida/analysis/parallel_analyze.py +560 -0
- spectrida-0.1.0/spectrida/analysis/shard_worker.py +139 -0
- spectrida-0.1.0/spectrida/api.py +386 -0
- spectrida-0.1.0/spectrida/cli.py +146 -0
- spectrida-0.1.0/spectrida/config.py +146 -0
- spectrida-0.1.0/spectrida/core/__init__.py +0 -0
- spectrida-0.1.0/spectrida/core/backend.py +97 -0
- spectrida-0.1.0/spectrida/core/demo.py +111 -0
- spectrida-0.1.0/spectrida/core/ida.py +199 -0
- spectrida-0.1.0/spectrida/core/ollama.py +79 -0
- spectrida-0.1.0/spectrida/core/pipeline.py +92 -0
- spectrida-0.1.0/spectrida/core/services.py +90 -0
- spectrida-0.1.0/spectrida/onboard.py +116 -0
- spectrida-0.1.0/spectrida/tui/__init__.py +0 -0
- spectrida-0.1.0/spectrida/tui/app.py +71 -0
- spectrida-0.1.0/spectrida/tui/screens/__init__.py +0 -0
- spectrida-0.1.0/spectrida/tui/screens/analyze.py +110 -0
- spectrida-0.1.0/spectrida/tui/screens/browser.py +285 -0
- spectrida-0.1.0/spectrida/tui/screens/dialogs.py +74 -0
- spectrida-0.1.0/spectrida/tui/styles.tcss +70 -0
- spectrida-0.1.0/spectrida/tui/widgets/__init__.py +0 -0
- spectrida-0.1.0/spectrida/tui/widgets/disasm.py +85 -0
- spectrida-0.1.0/spectrida/tui/widgets/funclist.py +105 -0
- spectrida-0.1.0/spectrida/tui/widgets/statusbar.py +35 -0
- spectrida-0.1.0/spectrida/voice.py +238 -0
- spectrida-0.1.0/tests/test_config.py +25 -0
- spectrida-0.1.0/tests/test_core.py +30 -0
- spectrida-0.1.0/tests/test_voice.py +18 -0
- spectrida-0.1.0/textual_debug.log +406 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
name: ci
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [master, main]
|
|
6
|
+
pull_request:
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
test:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
strategy:
|
|
12
|
+
matrix:
|
|
13
|
+
python-version: ["3.10", "3.11", "3.12"]
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
- uses: actions/setup-python@v5
|
|
17
|
+
with:
|
|
18
|
+
python-version: ${{ matrix.python-version }}
|
|
19
|
+
- name: Install
|
|
20
|
+
run: |
|
|
21
|
+
python -m pip install --upgrade pip
|
|
22
|
+
pip install -e ".[dev]"
|
|
23
|
+
- name: Lint
|
|
24
|
+
run: ruff check spectrida tests
|
|
25
|
+
- name: Test
|
|
26
|
+
run: pytest -q
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.pyo
|
|
5
|
+
.eggs/
|
|
6
|
+
*.egg-info/
|
|
7
|
+
dist/
|
|
8
|
+
build/
|
|
9
|
+
.venv/
|
|
10
|
+
venv/
|
|
11
|
+
.pytest_cache/
|
|
12
|
+
.ruff_cache/
|
|
13
|
+
|
|
14
|
+
# IDA databases & sidecars (never commit binaries/analysis)
|
|
15
|
+
*.i64
|
|
16
|
+
*.idb
|
|
17
|
+
*.id0
|
|
18
|
+
*.id1
|
|
19
|
+
*.id2
|
|
20
|
+
*.nam
|
|
21
|
+
*.til
|
|
22
|
+
|
|
23
|
+
# spectrIDA runtime output
|
|
24
|
+
output/
|
|
25
|
+
|
|
26
|
+
# env / secrets
|
|
27
|
+
.env
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## 0.1.0 — first ghost
|
|
4
|
+
|
|
5
|
+
- Parallel sharded IDA analysis (Capstone recursive descent + idalib merge).
|
|
6
|
+
- AI function naming via a local Ollama model, streamed token-by-token.
|
|
7
|
+
- Terminal UI: virtualized function browser, syntax-highlighted disasm, decompiler view,
|
|
8
|
+
call-chain explorer, inline rename, command palette.
|
|
9
|
+
- First-run onboarding wizard (humorous, skippable) that helps set up Ollama + the model.
|
|
10
|
+
- Demo mode (`spectrida --demo`) — runs the whole TUI with no IDA/Ollama.
|
|
11
|
+
- Config-driven everything (`~/.spectrida/config.toml` + env vars); no hardcoded paths.
|
spectrida-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 ggfuchsi-oss
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
spectrida-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: spectrida
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Ghost through binaries — parallel IDA analysis + AI function naming in your terminal
|
|
5
|
+
Project-URL: Homepage, https://github.com/ggfuchsi-oss/spectrIDA
|
|
6
|
+
Project-URL: Repository, https://github.com/ggfuchsi-oss/spectrIDA
|
|
7
|
+
Project-URL: Issues, https://github.com/ggfuchsi-oss/spectrIDA/issues
|
|
8
|
+
Author: ggfuchsi-oss
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: ai,binary-analysis,decompiler,ida,ida-pro,llm,reverse-engineering,tui
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Environment :: Console
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Security
|
|
21
|
+
Classifier: Topic :: Software Development :: Disassemblers
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Requires-Dist: httpx>=0.27
|
|
24
|
+
Requires-Dist: rich>=13
|
|
25
|
+
Requires-Dist: textual>=0.80
|
|
26
|
+
Requires-Dist: tomli>=2.0; python_version < '3.11'
|
|
27
|
+
Requires-Dist: typer>=0.12
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: pytest>=8; extra == 'dev'
|
|
30
|
+
Requires-Dist: ruff>=0.6; extra == 'dev'
|
|
31
|
+
Provides-Extra: gpu
|
|
32
|
+
Requires-Dist: numpy>=1.24; extra == 'gpu'
|
|
33
|
+
Requires-Dist: torch>=2.0; extra == 'gpu'
|
|
34
|
+
Description-Content-Type: text/markdown
|
|
35
|
+
|
|
36
|
+
<div align="center">
|
|
37
|
+
|
|
38
|
+
# 👻 spectrIDA
|
|
39
|
+
|
|
40
|
+
**Ghost through binaries.**
|
|
41
|
+
|
|
42
|
+
Parallel IDA Pro analysis + AI function naming + a terminal that doesn't suck.
|
|
43
|
+
|
|
44
|
+
</div>
|
|
45
|
+
|
|
46
|
+
```
|
|
47
|
+
spectrida analyze GameAssembly.dll --workers 16
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
```
|
|
51
|
+
◈ spectrIDA ▸ GameAssembly.dll
|
|
52
|
+
|
|
53
|
+
✓ 00 ✓ 01 ✓ 02 ✓ 03 ▸ 04 · 05 · 06 · 07
|
|
54
|
+
✓ 08 ✓ 09 ✓ 10 ✓ 11 ✓ 12 ✓ 13 ▸ 14 · 15
|
|
55
|
+
|
|
56
|
+
14/16 shards │ 141,203 functions found
|
|
57
|
+
████████████████████████████░░░░ 89% ~4s remaining
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
---
|
|
61
|
+
|
|
62
|
+
## What it is
|
|
63
|
+
|
|
64
|
+
IDA Pro's auto-analysis is single-threaded. On a 34 MB il2cpp DLL that's *minutes*. spectrIDA splits
|
|
65
|
+
the binary into N shards, runs them in parallel via idalib, merges into one `.i64`, then lets a
|
|
66
|
+
fine-tuned 8B model **name every function** — all from one terminal UI with a cyberpunk theme and
|
|
67
|
+
exactly the right amount of sarcasm.
|
|
68
|
+
|
|
69
|
+
It is not Ghidra. It does one annoying thing (slow analysis + naming) fast, and it's genuinely fun
|
|
70
|
+
to use. **199 downloads speak for themselves.**
|
|
71
|
+
|
|
72
|
+
**No cloud. No telemetry. Runs entirely on your machine.**
|
|
73
|
+
|
|
74
|
+
---
|
|
75
|
+
|
|
76
|
+
## Numbers
|
|
77
|
+
|
|
78
|
+
| task | time |
|
|
79
|
+
|------|------|
|
|
80
|
+
| Among Us DLL — single-threaded IDA | ~4 hours |
|
|
81
|
+
| Among Us DLL — spectrIDA (16 workers) | **67 seconds** |
|
|
82
|
+
| 153,649 function binary — full naming pass | overnight |
|
|
83
|
+
| Binary overview (what does this thing do?) | ~30 seconds |
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
|
|
87
|
+
## Features
|
|
88
|
+
|
|
89
|
+
- **Parallel sharded analysis** — splits into address-space shards, runs N idalib instances,
|
|
90
|
+
merges into one `.i64`. Workers configurable via flag, config, or env var.
|
|
91
|
+
- **AI function naming** — fine-tuned Qwen3-8B runs locally via Ollama, streams names
|
|
92
|
+
token-by-token. Press `N`. Watch it think. Name appears.
|
|
93
|
+
- **Batch naming** — `B` to name every `sub_*` function in the list. Walk away. Come back.
|
|
94
|
+
- **Binary overview** — press `O` or run `spectrida overview file.i64`. Model reads 120
|
|
95
|
+
sampled function names and tells you what the binary does, what its subsystems are, and
|
|
96
|
+
anything security-relevant. Correctly identified a 153k-function IL2CPP runtime in 30 seconds.
|
|
97
|
+
- **Call chain explorer** — `C` shows callers and callees. The model uses these as context
|
|
98
|
+
when naming — a function called by `Player$$TakeDamage` gets named better than one in isolation.
|
|
99
|
+
- **Decompiler view** — `D` toggles Hex-Rays pseudocode.
|
|
100
|
+
- **Export** — dump everything to JSON, CSV, IDA `.idc` script, or a symbols file.
|
|
101
|
+
The `.idc` applies all AI-generated names back into any IDA install in one click.
|
|
102
|
+
- **Programmatic API** — `from spectrida.api import open_i64`. Drive everything from scripts,
|
|
103
|
+
notebooks, or Claude Code without touching the TUI.
|
|
104
|
+
- **Demo mode** (`spectrida --demo`) — try the whole thing with **zero setup**. No IDA, no Ollama.
|
|
105
|
+
- **A first-run wizard** — helps you install Ollama + the model, detects your IDA install
|
|
106
|
+
automatically, then never asks again.
|
|
107
|
+
|
|
108
|
+
---
|
|
109
|
+
|
|
110
|
+
## Install
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
pip install spectrida
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
Requirements: **IDA Pro 9.x** with idalib · **Python 3.10+** · **Ollama**
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
# install Ollama (Windows)
|
|
120
|
+
winget install Ollama.Ollama
|
|
121
|
+
|
|
122
|
+
# pull the model (8.7 GB — go get coffee)
|
|
123
|
+
ollama pull hf.co/gdfhhjk/spectrida-re-gguf
|
|
124
|
+
|
|
125
|
+
# first run — detects your IDA install and sets everything up
|
|
126
|
+
spectrida onboard
|
|
127
|
+
|
|
128
|
+
# or just try the demo right now
|
|
129
|
+
spectrida --demo
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
---
|
|
133
|
+
|
|
134
|
+
## Commands
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
# analyze a binary from scratch
|
|
138
|
+
spectrida analyze GameAssembly.dll
|
|
139
|
+
spectrida analyze GameAssembly.dll --workers 8 # custom worker count
|
|
140
|
+
|
|
141
|
+
# open an existing .i64 in the browser
|
|
142
|
+
spectrida open file.i64
|
|
143
|
+
|
|
144
|
+
# ask the AI what this binary is
|
|
145
|
+
spectrida overview file.i64
|
|
146
|
+
spectrida overview file.i64 --addr 0x10001000 --addr 0x10353fd0 # include specific functions
|
|
147
|
+
|
|
148
|
+
# export function names
|
|
149
|
+
spectrida export file.i64 -f idc # IDA script — apply names to any install
|
|
150
|
+
spectrida export file.i64 -f json # full dump with addresses + sizes
|
|
151
|
+
spectrida export file.i64 -f csv # spreadsheet
|
|
152
|
+
spectrida export file.i64 -f symbols # addr name pairs
|
|
153
|
+
spectrida export file.i64 --named-only # skip sub_* functions
|
|
154
|
+
|
|
155
|
+
# check Ollama + model status
|
|
156
|
+
spectrida serve
|
|
157
|
+
|
|
158
|
+
# re-run the setup wizard
|
|
159
|
+
spectrida onboard
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
---
|
|
163
|
+
|
|
164
|
+
## TUI keys
|
|
165
|
+
|
|
166
|
+
| Key | Action |
|
|
167
|
+
|-----|--------|
|
|
168
|
+
| `N` | Name selected function — AI streams the result live |
|
|
169
|
+
| `R` | Rename — pre-filled with the AI suggestion |
|
|
170
|
+
| `D` | Toggle decompiled pseudocode (Hex-Rays) |
|
|
171
|
+
| `C` | Call chain — callers and callees |
|
|
172
|
+
| `B` | Batch-name all `sub_*` functions in the current list |
|
|
173
|
+
| `O` | Overview — AI summary of the whole binary |
|
|
174
|
+
| `/` | Fuzzy search |
|
|
175
|
+
| `?` | Help |
|
|
176
|
+
| `Q` | Quit |
|
|
177
|
+
|
|
178
|
+
---
|
|
179
|
+
|
|
180
|
+
## Programmatic API
|
|
181
|
+
|
|
182
|
+
No TUI needed — drive spectrIDA from scripts, Claude Code, notebooks, whatever:
|
|
183
|
+
|
|
184
|
+
```python
|
|
185
|
+
import asyncio
|
|
186
|
+
from spectrida.api import open_i64
|
|
187
|
+
|
|
188
|
+
async def main():
|
|
189
|
+
async with open_i64("GameAssembly.i64") as db:
|
|
190
|
+
|
|
191
|
+
# list all 153k functions
|
|
192
|
+
funcs = await db.list_functions()
|
|
193
|
+
|
|
194
|
+
# name one function — returns name + reasoning + confidence
|
|
195
|
+
result = await db.name_function(0x10001000)
|
|
196
|
+
print(result["new_name"]) # init_atexit_handler
|
|
197
|
+
print(result["reasoning"]) # allocates array of 3 fn ptrs, calls _atexit...
|
|
198
|
+
|
|
199
|
+
# batch name everything (with live progress)
|
|
200
|
+
async def on_progress(done, total, r):
|
|
201
|
+
print(f" {done}/{total} {r['old_name']} -> {r['new_name']}")
|
|
202
|
+
|
|
203
|
+
await db.batch_name(limit=500, rename=True, progress_cb=on_progress)
|
|
204
|
+
|
|
205
|
+
# ask what the binary does
|
|
206
|
+
overview = await db.overview()
|
|
207
|
+
print(overview)
|
|
208
|
+
|
|
209
|
+
# export to IDA script
|
|
210
|
+
await db.export("names.idc", fmt="idc", named_only=True)
|
|
211
|
+
|
|
212
|
+
asyncio.run(main())
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
---
|
|
216
|
+
|
|
217
|
+
## The model
|
|
218
|
+
|
|
219
|
+
[`hf.co/gdfhhjk/spectrida-re-gguf`](https://huggingface.co/gdfhhjk/spectrida-re-gguf) — Qwen3-8B
|
|
220
|
+
fine-tuned for reverse engineering.
|
|
221
|
+
|
|
222
|
+
**Trained on:**
|
|
223
|
+
- x86/x64 assembly → function name pairs with call-chain context
|
|
224
|
+
- Tool call traces from [`jtsylve/ida-mcp`](https://github.com/jtsylve/ida-mcp) — headless IDA with idalib
|
|
225
|
+
- Extended context reasoning traces from a codebase context server
|
|
226
|
+
|
|
227
|
+
**Training approach:** neuron-targeted SFT + GRPO. Only the RE-relevant neurons are tuned —
|
|
228
|
+
base Qwen3 knowledge stays intact, you just added a very specific skill on top.
|
|
229
|
+
|
|
230
|
+
Runs locally via Ollama. GGUF — works on CPU, GPU, or both.
|
|
231
|
+
|
|
232
|
+
---
|
|
233
|
+
|
|
234
|
+
## Who is this for
|
|
235
|
+
|
|
236
|
+
You're reversing something. You have a binary with 150,000 functions. Maybe 2,000 have names from
|
|
237
|
+
metadata. The other 148,000 are `sub_XXXXXXXX`. You want to find the network code.
|
|
238
|
+
You can't grep for it because nothing has a name yet.
|
|
239
|
+
|
|
240
|
+
A human RE can name ~50-100 functions per hour if they're fast. At that rate, 150k functions = **3 years**.
|
|
241
|
+
|
|
242
|
+
spectrIDA names them overnight. Not perfectly — maybe 70% accuracy on generic functions,
|
|
243
|
+
much higher on patterns the model recognizes. But now instead of 148k `sub_` functions you have
|
|
244
|
+
`network_send_packet`, `serialize_player_state`, `validate_checksum` — and you know where to look.
|
|
245
|
+
|
|
246
|
+
It doesn't replace a skilled reverse engineer. It does the boring 80% so you can focus on the
|
|
247
|
+
interesting 20%. It's the orientation layer.
|
|
248
|
+
|
|
249
|
+
**Real use cases:**
|
|
250
|
+
- Game modding — find the physics system in a 150k-function binary in minutes, not days
|
|
251
|
+
- Security research — malware triage, understand a binary's architecture quickly
|
|
252
|
+
- CTF — time pressure, need to know what you're looking at immediately
|
|
253
|
+
- Anyone who has stared at `sub_140001234` for 20 minutes thinking *there has to be a better way*
|
|
254
|
+
|
|
255
|
+
---
|
|
256
|
+
|
|
257
|
+
## Configuration
|
|
258
|
+
|
|
259
|
+
`~/.spectrida/config.toml`:
|
|
260
|
+
|
|
261
|
+
```toml
|
|
262
|
+
[ida]
|
|
263
|
+
idalib = "C:/Program Files/IDA Professional 9.1"
|
|
264
|
+
output_dir = "~/.spectrida/output"
|
|
265
|
+
|
|
266
|
+
[ollama]
|
|
267
|
+
base_url = "http://localhost:11434"
|
|
268
|
+
model = "spectrida-re" # any ollama model name works
|
|
269
|
+
|
|
270
|
+
[pipeline]
|
|
271
|
+
workers = 16
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
Env var overrides: `SPECTRIDA_IDALIB` · `SPECTRIDA_MODEL` · `SPECTRIDA_WORKERS` · `SPECTRIDA_OLLAMA_URL`
|
|
275
|
+
|
|
276
|
+
---
|
|
277
|
+
|
|
278
|
+
## What's coming (chapter 2)
|
|
279
|
+
|
|
280
|
+
- **Deep context naming** — follow call trees N levels deep, feed the full chain to the model.
|
|
281
|
+
A function 3 hops from `encrypt_block` should know it's in the crypto path.
|
|
282
|
+
- **Deobfuscation** — TigressVM pattern detection and handler tracing
|
|
283
|
+
- **MCP server** — expose spectrIDA as an MCP tool so Claude Code can call it natively
|
|
284
|
+
|
|
285
|
+
---
|
|
286
|
+
|
|
287
|
+
## License
|
|
288
|
+
|
|
289
|
+
MIT. Do whatever you want with it. If it works, cool.
|
|
290
|
+
If it doesn't, blame the GGUF quantization.
|
|
291
|
+
|
|
292
|
+
Built with spite, coffee, and an RTX 4070.
|
|
293
|
+
The model has 199 downloads with zero marketing. Each one adds 0.01% to development speed.
|
|
294
|
+
(This is not true. But it's close.) 👻
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
|
|
3
|
+
# 👻 spectrIDA
|
|
4
|
+
|
|
5
|
+
**Ghost through binaries.**
|
|
6
|
+
|
|
7
|
+
Parallel IDA Pro analysis + AI function naming + a terminal that doesn't suck.
|
|
8
|
+
|
|
9
|
+
</div>
|
|
10
|
+
|
|
11
|
+
```
|
|
12
|
+
spectrida analyze GameAssembly.dll --workers 16
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
```
|
|
16
|
+
◈ spectrIDA ▸ GameAssembly.dll
|
|
17
|
+
|
|
18
|
+
✓ 00 ✓ 01 ✓ 02 ✓ 03 ▸ 04 · 05 · 06 · 07
|
|
19
|
+
✓ 08 ✓ 09 ✓ 10 ✓ 11 ✓ 12 ✓ 13 ▸ 14 · 15
|
|
20
|
+
|
|
21
|
+
14/16 shards │ 141,203 functions found
|
|
22
|
+
████████████████████████████░░░░ 89% ~4s remaining
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## What it is
|
|
28
|
+
|
|
29
|
+
IDA Pro's auto-analysis is single-threaded. On a 34 MB il2cpp DLL that's *minutes*. spectrIDA splits
|
|
30
|
+
the binary into N shards, runs them in parallel via idalib, merges into one `.i64`, then lets a
|
|
31
|
+
fine-tuned 8B model **name every function** — all from one terminal UI with a cyberpunk theme and
|
|
32
|
+
exactly the right amount of sarcasm.
|
|
33
|
+
|
|
34
|
+
It is not Ghidra. It does one annoying thing (slow analysis + naming) fast, and it's genuinely fun
|
|
35
|
+
to use. **199 downloads speak for themselves.**
|
|
36
|
+
|
|
37
|
+
**No cloud. No telemetry. Runs entirely on your machine.**
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
## Numbers
|
|
42
|
+
|
|
43
|
+
| task | time |
|
|
44
|
+
|------|------|
|
|
45
|
+
| Among Us DLL — single-threaded IDA | ~4 hours |
|
|
46
|
+
| Among Us DLL — spectrIDA (16 workers) | **67 seconds** |
|
|
47
|
+
| 153,649 function binary — full naming pass | overnight |
|
|
48
|
+
| Binary overview (what does this thing do?) | ~30 seconds |
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
## Features
|
|
53
|
+
|
|
54
|
+
- **Parallel sharded analysis** — splits into address-space shards, runs N idalib instances,
|
|
55
|
+
merges into one `.i64`. Workers configurable via flag, config, or env var.
|
|
56
|
+
- **AI function naming** — fine-tuned Qwen3-8B runs locally via Ollama, streams names
|
|
57
|
+
token-by-token. Press `N`. Watch it think. Name appears.
|
|
58
|
+
- **Batch naming** — `B` to name every `sub_*` function in the list. Walk away. Come back.
|
|
59
|
+
- **Binary overview** — press `O` or run `spectrida overview file.i64`. Model reads 120
|
|
60
|
+
sampled function names and tells you what the binary does, what its subsystems are, and
|
|
61
|
+
anything security-relevant. Correctly identified a 153k-function IL2CPP runtime in 30 seconds.
|
|
62
|
+
- **Call chain explorer** — `C` shows callers and callees. The model uses these as context
|
|
63
|
+
when naming — a function called by `Player$$TakeDamage` gets named better than one in isolation.
|
|
64
|
+
- **Decompiler view** — `D` toggles Hex-Rays pseudocode.
|
|
65
|
+
- **Export** — dump everything to JSON, CSV, IDA `.idc` script, or a symbols file.
|
|
66
|
+
The `.idc` applies all AI-generated names back into any IDA install in one click.
|
|
67
|
+
- **Programmatic API** — `from spectrida.api import open_i64`. Drive everything from scripts,
|
|
68
|
+
notebooks, or Claude Code without touching the TUI.
|
|
69
|
+
- **Demo mode** (`spectrida --demo`) — try the whole thing with **zero setup**. No IDA, no Ollama.
|
|
70
|
+
- **A first-run wizard** — helps you install Ollama + the model, detects your IDA install
|
|
71
|
+
automatically, then never asks again.
|
|
72
|
+
|
|
73
|
+
---
|
|
74
|
+
|
|
75
|
+
## Install
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
pip install spectrida
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Requirements: **IDA Pro 9.x** with idalib · **Python 3.10+** · **Ollama**
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
# install Ollama (Windows)
|
|
85
|
+
winget install Ollama.Ollama
|
|
86
|
+
|
|
87
|
+
# pull the model (8.7 GB — go get coffee)
|
|
88
|
+
ollama pull hf.co/gdfhhjk/spectrida-re-gguf
|
|
89
|
+
|
|
90
|
+
# first run — detects your IDA install and sets everything up
|
|
91
|
+
spectrida onboard
|
|
92
|
+
|
|
93
|
+
# or just try the demo right now
|
|
94
|
+
spectrida --demo
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
---
|
|
98
|
+
|
|
99
|
+
## Commands
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
# analyze a binary from scratch
|
|
103
|
+
spectrida analyze GameAssembly.dll
|
|
104
|
+
spectrida analyze GameAssembly.dll --workers 8 # custom worker count
|
|
105
|
+
|
|
106
|
+
# open an existing .i64 in the browser
|
|
107
|
+
spectrida open file.i64
|
|
108
|
+
|
|
109
|
+
# ask the AI what this binary is
|
|
110
|
+
spectrida overview file.i64
|
|
111
|
+
spectrida overview file.i64 --addr 0x10001000 --addr 0x10353fd0 # include specific functions
|
|
112
|
+
|
|
113
|
+
# export function names
|
|
114
|
+
spectrida export file.i64 -f idc # IDA script — apply names to any install
|
|
115
|
+
spectrida export file.i64 -f json # full dump with addresses + sizes
|
|
116
|
+
spectrida export file.i64 -f csv # spreadsheet
|
|
117
|
+
spectrida export file.i64 -f symbols # addr name pairs
|
|
118
|
+
spectrida export file.i64 --named-only # skip sub_* functions
|
|
119
|
+
|
|
120
|
+
# check Ollama + model status
|
|
121
|
+
spectrida serve
|
|
122
|
+
|
|
123
|
+
# re-run the setup wizard
|
|
124
|
+
spectrida onboard
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
---
|
|
128
|
+
|
|
129
|
+
## TUI keys
|
|
130
|
+
|
|
131
|
+
| Key | Action |
|
|
132
|
+
|-----|--------|
|
|
133
|
+
| `N` | Name selected function — AI streams the result live |
|
|
134
|
+
| `R` | Rename — pre-filled with the AI suggestion |
|
|
135
|
+
| `D` | Toggle decompiled pseudocode (Hex-Rays) |
|
|
136
|
+
| `C` | Call chain — callers and callees |
|
|
137
|
+
| `B` | Batch-name all `sub_*` functions in the current list |
|
|
138
|
+
| `O` | Overview — AI summary of the whole binary |
|
|
139
|
+
| `/` | Fuzzy search |
|
|
140
|
+
| `?` | Help |
|
|
141
|
+
| `Q` | Quit |
|
|
142
|
+
|
|
143
|
+
---
|
|
144
|
+
|
|
145
|
+
## Programmatic API
|
|
146
|
+
|
|
147
|
+
No TUI needed — drive spectrIDA from scripts, Claude Code, notebooks, whatever:
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
import asyncio
|
|
151
|
+
from spectrida.api import open_i64
|
|
152
|
+
|
|
153
|
+
async def main():
|
|
154
|
+
async with open_i64("GameAssembly.i64") as db:
|
|
155
|
+
|
|
156
|
+
# list all 153k functions
|
|
157
|
+
funcs = await db.list_functions()
|
|
158
|
+
|
|
159
|
+
# name one function — returns name + reasoning + confidence
|
|
160
|
+
result = await db.name_function(0x10001000)
|
|
161
|
+
print(result["new_name"]) # init_atexit_handler
|
|
162
|
+
print(result["reasoning"]) # allocates array of 3 fn ptrs, calls _atexit...
|
|
163
|
+
|
|
164
|
+
# batch name everything (with live progress)
|
|
165
|
+
async def on_progress(done, total, r):
|
|
166
|
+
print(f" {done}/{total} {r['old_name']} -> {r['new_name']}")
|
|
167
|
+
|
|
168
|
+
await db.batch_name(limit=500, rename=True, progress_cb=on_progress)
|
|
169
|
+
|
|
170
|
+
# ask what the binary does
|
|
171
|
+
overview = await db.overview()
|
|
172
|
+
print(overview)
|
|
173
|
+
|
|
174
|
+
# export to IDA script
|
|
175
|
+
await db.export("names.idc", fmt="idc", named_only=True)
|
|
176
|
+
|
|
177
|
+
asyncio.run(main())
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
---
|
|
181
|
+
|
|
182
|
+
## The model
|
|
183
|
+
|
|
184
|
+
[`hf.co/gdfhhjk/spectrida-re-gguf`](https://huggingface.co/gdfhhjk/spectrida-re-gguf) — Qwen3-8B
|
|
185
|
+
fine-tuned for reverse engineering.
|
|
186
|
+
|
|
187
|
+
**Trained on:**
|
|
188
|
+
- x86/x64 assembly → function name pairs with call-chain context
|
|
189
|
+
- Tool call traces from [`jtsylve/ida-mcp`](https://github.com/jtsylve/ida-mcp) — headless IDA with idalib
|
|
190
|
+
- Extended context reasoning traces from a codebase context server
|
|
191
|
+
|
|
192
|
+
**Training approach:** neuron-targeted SFT + GRPO. Only the RE-relevant neurons are tuned —
|
|
193
|
+
base Qwen3 knowledge stays intact, you just added a very specific skill on top.
|
|
194
|
+
|
|
195
|
+
Runs locally via Ollama. GGUF — works on CPU, GPU, or both.
|
|
196
|
+
|
|
197
|
+
---
|
|
198
|
+
|
|
199
|
+
## Who is this for
|
|
200
|
+
|
|
201
|
+
You're reversing something. You have a binary with 150,000 functions. Maybe 2,000 have names from
|
|
202
|
+
metadata. The other 148,000 are `sub_XXXXXXXX`. You want to find the network code.
|
|
203
|
+
You can't grep for it because nothing has a name yet.
|
|
204
|
+
|
|
205
|
+
A human RE can name ~50-100 functions per hour if they're fast. At that rate, 150k functions = **3 years**.
|
|
206
|
+
|
|
207
|
+
spectrIDA names them overnight. Not perfectly — maybe 70% accuracy on generic functions,
|
|
208
|
+
much higher on patterns the model recognizes. But now instead of 148k `sub_` functions you have
|
|
209
|
+
`network_send_packet`, `serialize_player_state`, `validate_checksum` — and you know where to look.
|
|
210
|
+
|
|
211
|
+
It doesn't replace a skilled reverse engineer. It does the boring 80% so you can focus on the
|
|
212
|
+
interesting 20%. It's the orientation layer.
|
|
213
|
+
|
|
214
|
+
**Real use cases:**
|
|
215
|
+
- Game modding — find the physics system in a 150k-function binary in minutes, not days
|
|
216
|
+
- Security research — malware triage, understand a binary's architecture quickly
|
|
217
|
+
- CTF — time pressure, need to know what you're looking at immediately
|
|
218
|
+
- Anyone who has stared at `sub_140001234` for 20 minutes thinking *there has to be a better way*
|
|
219
|
+
|
|
220
|
+
---
|
|
221
|
+
|
|
222
|
+
## Configuration
|
|
223
|
+
|
|
224
|
+
`~/.spectrida/config.toml`:
|
|
225
|
+
|
|
226
|
+
```toml
|
|
227
|
+
[ida]
|
|
228
|
+
idalib = "C:/Program Files/IDA Professional 9.1"
|
|
229
|
+
output_dir = "~/.spectrida/output"
|
|
230
|
+
|
|
231
|
+
[ollama]
|
|
232
|
+
base_url = "http://localhost:11434"
|
|
233
|
+
model = "spectrida-re" # any ollama model name works
|
|
234
|
+
|
|
235
|
+
[pipeline]
|
|
236
|
+
workers = 16
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
Env var overrides: `SPECTRIDA_IDALIB` · `SPECTRIDA_MODEL` · `SPECTRIDA_WORKERS` · `SPECTRIDA_OLLAMA_URL`
|
|
240
|
+
|
|
241
|
+
---
|
|
242
|
+
|
|
243
|
+
## What's coming (chapter 2)
|
|
244
|
+
|
|
245
|
+
- **Deep context naming** — follow call trees N levels deep, feed the full chain to the model.
|
|
246
|
+
A function 3 hops from `encrypt_block` should know it's in the crypto path.
|
|
247
|
+
- **Deobfuscation** — TigressVM pattern detection and handler tracing
|
|
248
|
+
- **MCP server** — expose spectrIDA as an MCP tool so Claude Code can call it natively
|
|
249
|
+
|
|
250
|
+
---
|
|
251
|
+
|
|
252
|
+
## License
|
|
253
|
+
|
|
254
|
+
MIT. Do whatever you want with it. If it works, cool.
|
|
255
|
+
If it doesn't, blame the GGUF quantization.
|
|
256
|
+
|
|
257
|
+
Built with spite, coffee, and an RTX 4070.
|
|
258
|
+
The model has 199 downloads with zero marketing. Each one adds 0.01% to development speed.
|
|
259
|
+
(This is not true. But it's close.) 👻
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "spectrida"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Ghost through binaries — parallel IDA analysis + AI function naming in your terminal"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "ggfuchsi-oss" }]
|
|
13
|
+
keywords = ["ida", "ida-pro", "reverse-engineering", "binary-analysis", "llm", "ai", "tui", "decompiler"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Environment :: Console",
|
|
17
|
+
"Intended Audience :: Developers",
|
|
18
|
+
"License :: OSI Approved :: MIT License",
|
|
19
|
+
"Operating System :: OS Independent",
|
|
20
|
+
"Topic :: Security",
|
|
21
|
+
"Topic :: Software Development :: Disassemblers",
|
|
22
|
+
"Programming Language :: Python :: 3.10",
|
|
23
|
+
"Programming Language :: Python :: 3.11",
|
|
24
|
+
"Programming Language :: Python :: 3.12",
|
|
25
|
+
]
|
|
26
|
+
dependencies = [
|
|
27
|
+
"typer>=0.12",
|
|
28
|
+
"textual>=0.80",
|
|
29
|
+
"httpx>=0.27",
|
|
30
|
+
"rich>=13",
|
|
31
|
+
"tomli>=2.0; python_version < '3.11'",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
[project.optional-dependencies]
|
|
35
|
+
gpu = ["torch>=2.0", "numpy>=1.24"]
|
|
36
|
+
dev = ["pytest>=8", "ruff>=0.6"]
|
|
37
|
+
|
|
38
|
+
[project.scripts]
|
|
39
|
+
spectrida = "spectrida.cli:main"
|
|
40
|
+
|
|
41
|
+
[project.urls]
|
|
42
|
+
Homepage = "https://github.com/ggfuchsi-oss/spectrIDA"
|
|
43
|
+
Repository = "https://github.com/ggfuchsi-oss/spectrIDA"
|
|
44
|
+
Issues = "https://github.com/ggfuchsi-oss/spectrIDA/issues"
|
|
45
|
+
|
|
46
|
+
[tool.hatch.build.targets.wheel]
|
|
47
|
+
packages = ["spectrida"]
|
|
48
|
+
|
|
49
|
+
[tool.ruff]
|
|
50
|
+
line-length = 100
|
|
51
|
+
target-version = "py310"
|
|
52
|
+
# the bundled analysis pipeline is vendored subprocess code (idapro must import
|
|
53
|
+
# after sys.path setup, etc.) — not linted.
|
|
54
|
+
extend-exclude = ["spectrida/analysis"]
|
|
55
|
+
|
|
56
|
+
[tool.ruff.lint]
|
|
57
|
+
select = ["E", "F", "W", "I", "UP", "B"]
|
|
58
|
+
ignore = ["E501", "E701", "E702"]
|
|
59
|
+
|
|
60
|
+
[tool.pytest.ini_options]
|
|
61
|
+
testpaths = ["tests"]
|