tabber 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tabber-1.0.0/LICENSE.txt +36 -0
- tabber-1.0.0/PKG-INFO +289 -0
- tabber-1.0.0/README.md +256 -0
- tabber-1.0.0/pyproject.toml +44 -0
- tabber-1.0.0/setup.cfg +4 -0
- tabber-1.0.0/src/tabber/api.py +141 -0
- tabber-1.0.0/src/tabber/caching.py +73 -0
- tabber-1.0.0/src/tabber/config.py +63 -0
- tabber-1.0.0/src/tabber/gatherers/__init__.py +0 -0
- tabber-1.0.0/src/tabber/gatherers/base.py +16 -0
- tabber-1.0.0/src/tabber/gatherers/events.py +53 -0
- tabber-1.0.0/src/tabber/gatherers/instagram.py +61 -0
- tabber-1.0.0/src/tabber/gatherers/news.py +37 -0
- tabber-1.0.0/src/tabber/gatherers/reddit.py +52 -0
- tabber-1.0.0/src/tabber/gatherers/twitter.py +42 -0
- tabber-1.0.0/src/tabber/gatherers/wikipedia.py +96 -0
- tabber-1.0.0/src/tabber/llm.py +97 -0
- tabber-1.0.0/src/tabber/models.py +49 -0
- tabber-1.0.0/src/tabber/modules/__init__.py +0 -0
- tabber-1.0.0/src/tabber/modules/identification.py +321 -0
- tabber-1.0.0/src/tabber/modules/information_gathering.py +98 -0
- tabber-1.0.0/src/tabber/modules/location_analysis.py +62 -0
- tabber-1.0.0/src/tabber/sqlite.py +107 -0
- tabber-1.0.0/src/tabber/tabber.py +168 -0
- tabber-1.0.0/src/tabber.egg-info/PKG-INFO +289 -0
- tabber-1.0.0/src/tabber.egg-info/SOURCES.txt +37 -0
- tabber-1.0.0/src/tabber.egg-info/dependency_links.txt +1 -0
- tabber-1.0.0/src/tabber.egg-info/requires.txt +18 -0
- tabber-1.0.0/src/tabber.egg-info/top_level.txt +1 -0
- tabber-1.0.0/tests/test_api.py +241 -0
- tabber-1.0.0/tests/test_caching.py +144 -0
- tabber-1.0.0/tests/test_config.py +95 -0
- tabber-1.0.0/tests/test_gatherers.py +113 -0
- tabber-1.0.0/tests/test_identification.py +235 -0
- tabber-1.0.0/tests/test_information_gathering.py +115 -0
- tabber-1.0.0/tests/test_llm.py +198 -0
- tabber-1.0.0/tests/test_location_analysis.py +73 -0
- tabber-1.0.0/tests/test_models.py +123 -0
- tabber-1.0.0/tests/test_sqlite.py +206 -0
tabber-1.0.0/LICENSE.txt
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Samuel Roux
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
PRIVACY AND LEGAL DISCLAIMER
|
|
16
|
+
|
|
17
|
+
Systematically tracking or locating individuals without their explicit consent
|
|
18
|
+
can violate privacy laws (e.g., GDPR, CCPA, U.S. state wiretap/stalking
|
|
19
|
+
statutes, platform Terms of Service) and may constitute a criminal offence in
|
|
20
|
+
many jurisdictions. This Software is intended solely for lawful research on
|
|
21
|
+
publicly available information about public figures. Users are solely
|
|
22
|
+
responsible for ensuring that their use of this Software complies with all
|
|
23
|
+
applicable laws and regulations. By using this Software, you represent and
|
|
24
|
+
warrant that you will not use it to track, locate, surveil, or harass any
|
|
25
|
+
individual in violation of applicable law.
|
|
26
|
+
|
|
27
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
28
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
29
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
30
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
31
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
32
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
33
|
+
SOFTWARE. THE AUTHORS EXPRESSLY DISCLAIM ALL LIABILITY FOR ANY UNLAWFUL,
|
|
34
|
+
HARMFUL, OR UNETHICAL USE OF THIS SOFTWARE BY ANY THIRD PARTY, AND SHALL NOT
|
|
35
|
+
BE HELD RESPONSIBLE OR SUBJECT TO LEGAL ACTION FOR THE CONDUCT OR ACTIONS OF
|
|
36
|
+
ANY USER OR DOWNSTREAM RECIPIENT OF THIS SOFTWARE.
|
tabber-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tabber
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: An OSINT CLI person locator tool
|
|
5
|
+
Author-email: Sam <sam@gatewaycorporate.org>
|
|
6
|
+
Classifier: Development Status :: 3 - Alpha
|
|
7
|
+
Classifier: Intended Audience :: Developers
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Requires-Python: >=3.8
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
License-File: LICENSE.txt
|
|
16
|
+
Requires-Dist: click>=8.1
|
|
17
|
+
Requires-Dist: pydantic>=2.0
|
|
18
|
+
Requires-Dist: openai>=1.0
|
|
19
|
+
Requires-Dist: ddgs>=6.0
|
|
20
|
+
Requires-Dist: requests>=2.31
|
|
21
|
+
Requires-Dist: tweepy>=4.14
|
|
22
|
+
Requires-Dist: praw>=7.7
|
|
23
|
+
Requires-Dist: rich>=13.0
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
26
|
+
Requires-Dist: black; extra == "dev"
|
|
27
|
+
Requires-Dist: flake8; extra == "dev"
|
|
28
|
+
Requires-Dist: httpx>=0.27; extra == "dev"
|
|
29
|
+
Provides-Extra: server
|
|
30
|
+
Requires-Dist: fastapi>=0.111; extra == "server"
|
|
31
|
+
Requires-Dist: uvicorn[standard]>=0.30; extra == "server"
|
|
32
|
+
Dynamic: license-file
|
|
33
|
+
|
|
34
|
+
# Tabber
|
|
35
|
+
|
|
36
|
+
An OSINT CLI tool that determines the most likely current or recent physical location of a public figure by aggregating data from multiple sources and reasoning over them with an LLM.
|
|
37
|
+
|
|
38
|
+
## How It Works
|
|
39
|
+
|
|
40
|
+
1. **Disambiguation** — The input name is resolved to a structured person profile via LLM.
|
|
41
|
+
2. **Feedback loop** (up to N iterations):
|
|
42
|
+
- The LLM generates targeted search hints based on the profile and any prior data.
|
|
43
|
+
- All configured gatherers run in parallel to collect raw data.
|
|
44
|
+
- The LLM evaluates whether there is sufficient location signal; exits early if so, or refines and repeats.
|
|
45
|
+
3. **Location analysis** — All gathered data is synthesised by a final LLM call into a location result with confidence and reasoning.
|
|
46
|
+
4. Results are displayed in a Rich terminal panel with colour-coded confidence and **automatically cached** to SQLite for instant recall on repeat lookups.
|
|
47
|
+
|
|
48
|
+
## Installation
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
git clone https://github.com/itsnotqwerty/tabber.git
|
|
52
|
+
cd tabber
|
|
53
|
+
python -m venv .venv
|
|
54
|
+
source .venv/bin/activate
|
|
55
|
+
pip install -e .
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
To also enable the REST API server:
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
pip install -e ".[server]"
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Configuration
|
|
65
|
+
|
|
66
|
+
Configuration is stored in `~/.tabber/config.json` and managed via the `config` subcommand.
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
tabber config set <key> <value>
|
|
70
|
+
tabber config show
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### Configuration Keys
|
|
74
|
+
|
|
75
|
+
| Key | Default | Description |
|
|
76
|
+
| ------------------------ | ----------------------- | -------------------------------------------- |
|
|
77
|
+
| `max_iterations` | `3` | Max feedback loop iterations |
|
|
78
|
+
| `llm_provider` | `openai` | LLM backend to use (`openai` or `anthropic`) |
|
|
79
|
+
| `openai_api_key` | — | Required when `llm_provider` is `openai` |
|
|
80
|
+
| `anthropic_api_key` | — | Required when `llm_provider` is `anthropic` |
|
|
81
|
+
| `twitter_bearer_token` | — | Enables the Twitter gatherer |
|
|
82
|
+
| `instagram_access_token` | — | Enables the Instagram gatherer |
|
|
83
|
+
| `reddit_client_id` | — | Required (with secret) for Reddit gatherer |
|
|
84
|
+
| `reddit_client_secret` | — | Required (with ID) for Reddit gatherer |
|
|
85
|
+
| `cache_ttl_hours` | `24` | How long a cached result stays valid (hours) |
|
|
86
|
+
| `db_path` | `~/.tabber/results.db` | SQLite database file location |
|
|
87
|
+
| `server_host` | `127.0.0.1` | Default bind host for `tabber server` |
|
|
88
|
+
| `server_port` | `8000` | Default bind port for `tabber server` |
|
|
89
|
+
|
|
90
|
+
At minimum, set your API key for the chosen provider:
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
# OpenAI (default)
|
|
94
|
+
tabber config set openai_api_key sk-...
|
|
95
|
+
|
|
96
|
+
# Anthropic
|
|
97
|
+
tabber config set llm_provider anthropic
|
|
98
|
+
tabber config set anthropic_api_key sk-ant-...
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## Usage
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
tabber lookup "Elon Musk"
|
|
105
|
+
tabber "Elon Musk" # shorthand
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### Options
|
|
109
|
+
|
|
110
|
+
| Flag | Default | Description |
|
|
111
|
+
| ----------------------- | ----------- | ----------------------------------------------------- |
|
|
112
|
+
| `--verbose` / `-v` | off | Show per-iteration details (hints, source counts) |
|
|
113
|
+
| `--max-iter N` / `-n N` | from config | Override the max number of iterations |
|
|
114
|
+
| `--no-cache` | off | Skip the cache and always run a fresh lookup |
|
|
115
|
+
|
|
116
|
+
The output panel shows the inferred **location**, **confidence** (green ≥70%, yellow ≥40%, red <40%), **reasoning**, and **sources**. Results marked `(cached)` were served from the local database without making any LLM or network calls.
|
|
117
|
+
|
|
118
|
+
## Caching
|
|
119
|
+
|
|
120
|
+
Every completed lookup is stored in a local SQLite database (`~/.tabber/results.db`). On subsequent lookups for the same name, the cached result is returned immediately if it was created within the last `cache_ttl_hours` hours (default: 24).
|
|
121
|
+
|
|
122
|
+
```
|
|
123
|
+
~/.tabber/
|
|
124
|
+
├── config.json # configuration
|
|
125
|
+
└── results.db # SQLite result cache
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### Cache behaviour
|
|
129
|
+
|
|
130
|
+
- **CLI** — cache is checked automatically before running the pipeline. Use `--no-cache` to force a fresh run (the new result is still stored).
|
|
131
|
+
- **API** — same logic: set `"no_cache": true` in the request body to bypass the cache.
|
|
132
|
+
- **TTL** — configure how long results stay valid: `tabber config set cache_ttl_hours 48`
|
|
133
|
+
- **Invalidation** — delete cached results for a name via the API (`DELETE /results/{name}`) or by removing the database file.
|
|
134
|
+
|
|
135
|
+
## REST API Server
|
|
136
|
+
|
|
137
|
+
Start the server with:
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
tabber server
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
### Server options
|
|
144
|
+
|
|
145
|
+
| Flag | Default | Description |
|
|
146
|
+
| ---------- | ----------- | -------------------------------------- |
|
|
147
|
+
| `--host` | `127.0.0.1` | Bind address |
|
|
148
|
+
| `--port` | `8000` | Bind port |
|
|
149
|
+
| `--reload` | off | Auto-reload on code changes (dev mode) |
|
|
150
|
+
|
|
151
|
+
The server uses FastAPI and requires the `[server]` extra (`pip install -e ".[server]"`). Interactive API docs are available at `http://localhost:8000/docs` once the server is running.
|
|
152
|
+
|
|
153
|
+
### Endpoints
|
|
154
|
+
|
|
155
|
+
| Method | Path | Description |
|
|
156
|
+
| -------- | ----------------- | -------------------------------------------------------------------- |
|
|
157
|
+
| `GET` | `/health` | Health check — returns `{"status": "ok"}` |
|
|
158
|
+
| `POST` | `/lookup` | Run or recall a lookup. Request: `{"name": str, "no_cache": bool}` |
|
|
159
|
+
| `GET` | `/results` | List all stored results, newest first. Supports `?limit=N` (max 500) |
|
|
160
|
+
| `GET` | `/results/{name}` | Most recent stored result for a name |
|
|
161
|
+
| `DELETE` | `/results/{name}` | Invalidate all cached results for a name |
|
|
162
|
+
|
|
163
|
+
### Request / response examples
|
|
164
|
+
|
|
165
|
+
**POST /lookup**
|
|
166
|
+
|
|
167
|
+
```json
|
|
168
|
+
// request
|
|
169
|
+
{ "name": "Taylor Swift" }
|
|
170
|
+
|
|
171
|
+
// response
|
|
172
|
+
{
|
|
173
|
+
"query_name": "Taylor Swift",
|
|
174
|
+
"canon_name": "Taylor Swift",
|
|
175
|
+
"cached": false,
|
|
176
|
+
"timestamp": "2026-03-22T14:00:00+00:00",
|
|
177
|
+
"result": {
|
|
178
|
+
"location": "Nashville, Tennessee, USA",
|
|
179
|
+
"confidence": 0.82,
|
|
180
|
+
"reasoning": "Multiple recent news sources confirm a studio session in Nashville.",
|
|
181
|
+
"sources": ["news", "wikipedia"]
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
**GET /results?limit=5**
|
|
187
|
+
|
|
188
|
+
```json
|
|
189
|
+
[
|
|
190
|
+
{
|
|
191
|
+
"id": 3,
|
|
192
|
+
"query_name": "Taylor Swift",
|
|
193
|
+
"canon_name": "Taylor Swift",
|
|
194
|
+
"location": "Nashville, Tennessee, USA",
|
|
195
|
+
"confidence": 0.82,
|
|
196
|
+
"reasoning": "...",
|
|
197
|
+
"sources": ["news", "wikipedia"],
|
|
198
|
+
"timestamp": "2026-03-22T14:00:00+00:00"
|
|
199
|
+
}
|
|
200
|
+
]
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
**DELETE /results/Taylor%20Swift**
|
|
204
|
+
|
|
205
|
+
```json
|
|
206
|
+
{ "deleted": 1, "name": "Taylor Swift" }
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
### Error responses
|
|
210
|
+
|
|
211
|
+
| Status | Condition |
|
|
212
|
+
| ------ | ------------------------------------------ |
|
|
213
|
+
| `422` | Validation error (e.g. empty `name` field) |
|
|
214
|
+
| `502` | Upstream error from LLM or gatherers |
|
|
215
|
+
| `404` | No stored result found for the given name |
|
|
216
|
+
|
|
217
|
+
## Data Sources (Gatherers)
|
|
218
|
+
|
|
219
|
+
| Gatherer | Service | Auth Required |
|
|
220
|
+
| --------- | ------------------------- | ------------------------------------------- |
|
|
221
|
+
| News | DuckDuckGo News | No |
|
|
222
|
+
| Wikipedia | Wikipedia & Wikidata APIs | No |
|
|
223
|
+
| Events | DuckDuckGo Web Search | No |
|
|
224
|
+
| Twitter | Twitter API v2 | `twitter_bearer_token` |
|
|
225
|
+
| Reddit | Reddit API | `reddit_client_id` + `reddit_client_secret` |
|
|
226
|
+
| Instagram | Instagram Graph API | `instagram_access_token` |
|
|
227
|
+
|
|
228
|
+
Gatherers that lack the required credentials are skipped automatically. The tool works with only the unauthenticated gatherers (News, Wikipedia, Events), but more sources improve accuracy.
|
|
229
|
+
|
|
230
|
+
## LLM
|
|
231
|
+
|
|
232
|
+
Tabber supports two providers, configured via `llm_provider`:
|
|
233
|
+
|
|
234
|
+
| Provider | Model | Key |
|
|
235
|
+
| ------------------ | ----------------- | ------------------- |
|
|
236
|
+
| `openai` (default) | `gpt-4o` | `openai_api_key` |
|
|
237
|
+
| `anthropic` | `claude-opus-4-6` | `anthropic_api_key` |
|
|
238
|
+
|
|
239
|
+
Both providers are accessed via the OpenAI-compatible SDK.
|
|
240
|
+
|
|
241
|
+
All LLM calls use **Pydantic structured outputs** via the `response_format` parameter so responses are parsed and validated automatically:
|
|
242
|
+
|
|
243
|
+
- **OpenAI** — uses `client.beta.chat.completions.parse(response_format=Model)`, which returns a validated Pydantic instance directly.
|
|
244
|
+
- **Anthropic** — passes the model's JSON schema via `response_format={"type": "json_schema", ...}` and validates the response with `Model.model_validate_json()`.
|
|
245
|
+
|
|
246
|
+
### Data Models (`models.py`)
|
|
247
|
+
|
|
248
|
+
| Model | Purpose |
|
|
249
|
+
| ------------------ | ---------------------------------------------------------- |
|
|
250
|
+
| `PersonProfile` | Disambiguated identity — name, aliases, roles |
|
|
251
|
+
| `HintsList` | Wrapper for the list of LLM-generated search hints |
|
|
252
|
+
| `GathererResult` | Raw output from one data source |
|
|
253
|
+
| `OSINTBundle` | Aggregated results across all gatherers for one iteration |
|
|
254
|
+
| `SignalEvaluation` | LLM confidence score + reasoning for location sufficiency |
|
|
255
|
+
| `LocationResult` | Final inferred location with confidence and evidence trail |
|
|
256
|
+
| `LookupResponse` | API response envelope — wraps `LocationResult` with metadata (query name, canonical name, cache flag, timestamp) |
|
|
257
|
+
|
|
258
|
+
## Testing
|
|
259
|
+
|
|
260
|
+
The test suite lives in `tests/` and uses [pytest](https://pytest.org). All LLM and external HTTP calls are mocked so tests run offline without API keys.
|
|
261
|
+
|
|
262
|
+
```bash
|
|
263
|
+
# Install dev dependencies (pytest + httpx for API tests)
|
|
264
|
+
pip install -e ".[dev]"
|
|
265
|
+
|
|
266
|
+
# Run the full suite
|
|
267
|
+
pytest
|
|
268
|
+
|
|
269
|
+
# Run with verbose output
|
|
270
|
+
pytest -v
|
|
271
|
+
|
|
272
|
+
# Run a specific file
|
|
273
|
+
pytest tests/test_llm.py
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
### Test coverage by file
|
|
277
|
+
|
|
278
|
+
| Test file | What it covers |
|
|
279
|
+
| -------------------------------- | ------------------------------------------------------------------------------------ |
|
|
280
|
+
| `test_config.py` | `config.load`, `set_key`, `masked` — I/O redirected to `tmp_path` |
|
|
281
|
+
| `test_models.py` | Pydantic validation for every model, including `LookupResponse` |
|
|
282
|
+
| `test_llm.py` | `complete()` routing, system messages, `response_format` for both providers, missing-key errors |
|
|
283
|
+
| `test_identification.py` | Each private function (`_disambiguate`, `_generate_hints`, etc.) and the full `run()` loop |
|
|
284
|
+
| `test_location_analysis.py` | `analyse()` prompt construction and `response_format` pass-through |
|
|
285
|
+
| `test_information_gathering.py` | Gatherer enable/disable logic and `gather()` bundle assembly |
|
|
286
|
+
| `test_gatherers.py` | `is_configured` for every gatherer class, base class interface |
|
|
287
|
+
| `test_sqlite.py` | `init_db`, `save_result`, `get_latest`, `list_all`, `delete_by_name` — in-process SQLite |
|
|
288
|
+
| `test_caching.py` | TTL expiry, `get_cached`, `store`, `invalidate` — DB redirected to `tmp_path` |
|
|
289
|
+
| `test_api.py` | All five REST endpoints via `TestClient` — cached and fresh paths, error cases |
|
tabber-1.0.0/README.md
ADDED
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
# Tabber
|
|
2
|
+
|
|
3
|
+
An OSINT CLI tool that determines the most likely current or recent physical location of a public figure by aggregating data from multiple sources and reasoning over them with an LLM.
|
|
4
|
+
|
|
5
|
+
## How It Works
|
|
6
|
+
|
|
7
|
+
1. **Disambiguation** — The input name is resolved to a structured person profile via LLM.
|
|
8
|
+
2. **Feedback loop** (up to N iterations):
|
|
9
|
+
- The LLM generates targeted search hints based on the profile and any prior data.
|
|
10
|
+
- All configured gatherers run in parallel to collect raw data.
|
|
11
|
+
- The LLM evaluates whether there is sufficient location signal; exits early if so, or refines and repeats.
|
|
12
|
+
3. **Location analysis** — All gathered data is synthesised by a final LLM call into a location result with confidence and reasoning.
|
|
13
|
+
4. Results are displayed in a Rich terminal panel with colour-coded confidence and **automatically cached** to SQLite for instant recall on repeat lookups.
|
|
14
|
+
|
|
15
|
+
## Installation
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
git clone https://github.com/itsnotqwerty/tabber.git
|
|
19
|
+
cd tabber
|
|
20
|
+
python -m venv .venv
|
|
21
|
+
source .venv/bin/activate
|
|
22
|
+
pip install -e .
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
To also enable the REST API server:
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
pip install -e ".[server]"
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Configuration
|
|
32
|
+
|
|
33
|
+
Configuration is stored in `~/.tabber/config.json` and managed via the `config` subcommand.
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
tabber config set <key> <value>
|
|
37
|
+
tabber config show
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
### Configuration Keys
|
|
41
|
+
|
|
42
|
+
| Key | Default | Description |
|
|
43
|
+
| ------------------------ | ----------------------- | -------------------------------------------- |
|
|
44
|
+
| `max_iterations` | `3` | Max feedback loop iterations |
|
|
45
|
+
| `llm_provider` | `openai` | LLM backend to use (`openai` or `anthropic`) |
|
|
46
|
+
| `openai_api_key` | — | Required when `llm_provider` is `openai` |
|
|
47
|
+
| `anthropic_api_key` | — | Required when `llm_provider` is `anthropic` |
|
|
48
|
+
| `twitter_bearer_token` | — | Enables the Twitter gatherer |
|
|
49
|
+
| `instagram_access_token` | — | Enables the Instagram gatherer |
|
|
50
|
+
| `reddit_client_id` | — | Required (with secret) for Reddit gatherer |
|
|
51
|
+
| `reddit_client_secret` | — | Required (with ID) for Reddit gatherer |
|
|
52
|
+
| `cache_ttl_hours` | `24` | How long a cached result stays valid (hours) |
|
|
53
|
+
| `db_path` | `~/.tabber/results.db` | SQLite database file location |
|
|
54
|
+
| `server_host` | `127.0.0.1` | Default bind host for `tabber server` |
|
|
55
|
+
| `server_port` | `8000` | Default bind port for `tabber server` |
|
|
56
|
+
|
|
57
|
+
At minimum, set your API key for the chosen provider:
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
# OpenAI (default)
|
|
61
|
+
tabber config set openai_api_key sk-...
|
|
62
|
+
|
|
63
|
+
# Anthropic
|
|
64
|
+
tabber config set llm_provider anthropic
|
|
65
|
+
tabber config set anthropic_api_key sk-ant-...
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Usage
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
tabber lookup "Elon Musk"
|
|
72
|
+
tabber "Elon Musk" # shorthand
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### Options
|
|
76
|
+
|
|
77
|
+
| Flag | Default | Description |
|
|
78
|
+
| ----------------------- | ----------- | ----------------------------------------------------- |
|
|
79
|
+
| `--verbose` / `-v` | off | Show per-iteration details (hints, source counts) |
|
|
80
|
+
| `--max-iter N` / `-n N` | from config | Override the max number of iterations |
|
|
81
|
+
| `--no-cache` | off | Skip the cache and always run a fresh lookup |
|
|
82
|
+
|
|
83
|
+
The output panel shows the inferred **location**, **confidence** (green ≥70%, yellow ≥40%, red <40%), **reasoning**, and **sources**. Results marked `(cached)` were served from the local database without making any LLM or network calls.
|
|
84
|
+
|
|
85
|
+
## Caching
|
|
86
|
+
|
|
87
|
+
Every completed lookup is stored in a local SQLite database (`~/.tabber/results.db`). On subsequent lookups for the same name, the cached result is returned immediately if it was created within the last `cache_ttl_hours` hours (default: 24).
|
|
88
|
+
|
|
89
|
+
```
|
|
90
|
+
~/.tabber/
|
|
91
|
+
├── config.json # configuration
|
|
92
|
+
└── results.db # SQLite result cache
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### Cache behaviour
|
|
96
|
+
|
|
97
|
+
- **CLI** — cache is checked automatically before running the pipeline. Use `--no-cache` to force a fresh run (the new result is still stored).
|
|
98
|
+
- **API** — same logic: set `"no_cache": true` in the request body to bypass the cache.
|
|
99
|
+
- **TTL** — configure how long results stay valid: `tabber config set cache_ttl_hours 48`
|
|
100
|
+
- **Invalidation** — delete cached results for a name via the API (`DELETE /results/{name}`) or by removing the database file.
|
|
101
|
+
|
|
102
|
+
## REST API Server
|
|
103
|
+
|
|
104
|
+
Start the server with:
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
tabber server
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### Server options
|
|
111
|
+
|
|
112
|
+
| Flag | Default | Description |
|
|
113
|
+
| ---------- | ----------- | -------------------------------------- |
|
|
114
|
+
| `--host` | `127.0.0.1` | Bind address |
|
|
115
|
+
| `--port` | `8000` | Bind port |
|
|
116
|
+
| `--reload` | off | Auto-reload on code changes (dev mode) |
|
|
117
|
+
|
|
118
|
+
The server uses FastAPI and requires the `[server]` extra (`pip install -e ".[server]"`). Interactive API docs are available at `http://localhost:8000/docs` once the server is running.
|
|
119
|
+
|
|
120
|
+
### Endpoints
|
|
121
|
+
|
|
122
|
+
| Method | Path | Description |
|
|
123
|
+
| -------- | ----------------- | -------------------------------------------------------------------- |
|
|
124
|
+
| `GET` | `/health` | Health check — returns `{"status": "ok"}` |
|
|
125
|
+
| `POST` | `/lookup` | Run or recall a lookup. Request: `{"name": str, "no_cache": bool}` |
|
|
126
|
+
| `GET` | `/results` | List all stored results, newest first. Supports `?limit=N` (max 500) |
|
|
127
|
+
| `GET` | `/results/{name}` | Most recent stored result for a name |
|
|
128
|
+
| `DELETE` | `/results/{name}` | Invalidate all cached results for a name |
|
|
129
|
+
|
|
130
|
+
### Request / response examples
|
|
131
|
+
|
|
132
|
+
**POST /lookup**
|
|
133
|
+
|
|
134
|
+
```json
|
|
135
|
+
// request
|
|
136
|
+
{ "name": "Taylor Swift" }
|
|
137
|
+
|
|
138
|
+
// response
|
|
139
|
+
{
|
|
140
|
+
"query_name": "Taylor Swift",
|
|
141
|
+
"canon_name": "Taylor Swift",
|
|
142
|
+
"cached": false,
|
|
143
|
+
"timestamp": "2026-03-22T14:00:00+00:00",
|
|
144
|
+
"result": {
|
|
145
|
+
"location": "Nashville, Tennessee, USA",
|
|
146
|
+
"confidence": 0.82,
|
|
147
|
+
"reasoning": "Multiple recent news sources confirm a studio session in Nashville.",
|
|
148
|
+
"sources": ["news", "wikipedia"]
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
**GET /results?limit=5**
|
|
154
|
+
|
|
155
|
+
```json
|
|
156
|
+
[
|
|
157
|
+
{
|
|
158
|
+
"id": 3,
|
|
159
|
+
"query_name": "Taylor Swift",
|
|
160
|
+
"canon_name": "Taylor Swift",
|
|
161
|
+
"location": "Nashville, Tennessee, USA",
|
|
162
|
+
"confidence": 0.82,
|
|
163
|
+
"reasoning": "...",
|
|
164
|
+
"sources": ["news", "wikipedia"],
|
|
165
|
+
"timestamp": "2026-03-22T14:00:00+00:00"
|
|
166
|
+
}
|
|
167
|
+
]
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
**DELETE /results/Taylor%20Swift**
|
|
171
|
+
|
|
172
|
+
```json
|
|
173
|
+
{ "deleted": 1, "name": "Taylor Swift" }
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
### Error responses
|
|
177
|
+
|
|
178
|
+
| Status | Condition |
|
|
179
|
+
| ------ | ------------------------------------------ |
|
|
180
|
+
| `422` | Validation error (e.g. empty `name` field) |
|
|
181
|
+
| `502` | Upstream error from LLM or gatherers |
|
|
182
|
+
| `404` | No stored result found for the given name |
|
|
183
|
+
|
|
184
|
+
## Data Sources (Gatherers)
|
|
185
|
+
|
|
186
|
+
| Gatherer | Service | Auth Required |
|
|
187
|
+
| --------- | ------------------------- | ------------------------------------------- |
|
|
188
|
+
| News | DuckDuckGo News | No |
|
|
189
|
+
| Wikipedia | Wikipedia & Wikidata APIs | No |
|
|
190
|
+
| Events | DuckDuckGo Web Search | No |
|
|
191
|
+
| Twitter | Twitter API v2 | `twitter_bearer_token` |
|
|
192
|
+
| Reddit | Reddit API | `reddit_client_id` + `reddit_client_secret` |
|
|
193
|
+
| Instagram | Instagram Graph API | `instagram_access_token` |
|
|
194
|
+
|
|
195
|
+
Gatherers that lack the required credentials are skipped automatically. The tool works with only the unauthenticated gatherers (News, Wikipedia, Events), but more sources improve accuracy.
|
|
196
|
+
|
|
197
|
+
## LLM
|
|
198
|
+
|
|
199
|
+
Tabber supports two providers, configured via `llm_provider`:
|
|
200
|
+
|
|
201
|
+
| Provider | Model | Key |
|
|
202
|
+
| ------------------ | ----------------- | ------------------- |
|
|
203
|
+
| `openai` (default) | `gpt-4o` | `openai_api_key` |
|
|
204
|
+
| `anthropic` | `claude-opus-4-6` | `anthropic_api_key` |
|
|
205
|
+
|
|
206
|
+
Both providers are accessed via the OpenAI-compatible SDK.
|
|
207
|
+
|
|
208
|
+
All LLM calls use **Pydantic structured outputs** via the `response_format` parameter so responses are parsed and validated automatically:
|
|
209
|
+
|
|
210
|
+
- **OpenAI** — uses `client.beta.chat.completions.parse(response_format=Model)`, which returns a validated Pydantic instance directly.
|
|
211
|
+
- **Anthropic** — passes the model's JSON schema via `response_format={"type": "json_schema", ...}` and validates the response with `Model.model_validate_json()`.
|
|
212
|
+
|
|
213
|
+
### Data Models (`models.py`)
|
|
214
|
+
|
|
215
|
+
| Model | Purpose |
|
|
216
|
+
| ------------------ | ---------------------------------------------------------- |
|
|
217
|
+
| `PersonProfile` | Disambiguated identity — name, aliases, roles |
|
|
218
|
+
| `HintsList` | Wrapper for the list of LLM-generated search hints |
|
|
219
|
+
| `GathererResult` | Raw output from one data source |
|
|
220
|
+
| `OSINTBundle` | Aggregated results across all gatherers for one iteration |
|
|
221
|
+
| `SignalEvaluation` | LLM confidence score + reasoning for location sufficiency |
|
|
222
|
+
| `LocationResult` | Final inferred location with confidence and evidence trail |
|
|
223
|
+
| `LookupResponse` | API response envelope — wraps `LocationResult` with metadata (query name, canonical name, cache flag, timestamp) |
|
|
224
|
+
|
|
225
|
+
## Testing
|
|
226
|
+
|
|
227
|
+
The test suite lives in `tests/` and uses [pytest](https://pytest.org). All LLM and external HTTP calls are mocked so tests run offline without API keys.
|
|
228
|
+
|
|
229
|
+
```bash
|
|
230
|
+
# Install dev dependencies (pytest + httpx for API tests)
|
|
231
|
+
pip install -e ".[dev]"
|
|
232
|
+
|
|
233
|
+
# Run the full suite
|
|
234
|
+
pytest
|
|
235
|
+
|
|
236
|
+
# Run with verbose output
|
|
237
|
+
pytest -v
|
|
238
|
+
|
|
239
|
+
# Run a specific file
|
|
240
|
+
pytest tests/test_llm.py
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
### Test coverage by file
|
|
244
|
+
|
|
245
|
+
| Test file | What it covers |
|
|
246
|
+
| -------------------------------- | ------------------------------------------------------------------------------------ |
|
|
247
|
+
| `test_config.py` | `config.load`, `set_key`, `masked` — I/O redirected to `tmp_path` |
|
|
248
|
+
| `test_models.py` | Pydantic validation for every model, including `LookupResponse` |
|
|
249
|
+
| `test_llm.py` | `complete()` routing, system messages, `response_format` for both providers, missing-key errors |
|
|
250
|
+
| `test_identification.py` | Each private function (`_disambiguate`, `_generate_hints`, etc.) and the full `run()` loop |
|
|
251
|
+
| `test_location_analysis.py` | `analyse()` prompt construction and `response_format` pass-through |
|
|
252
|
+
| `test_information_gathering.py` | Gatherer enable/disable logic and `gather()` bundle assembly |
|
|
253
|
+
| `test_gatherers.py` | `is_configured` for every gatherer class, base class interface |
|
|
254
|
+
| `test_sqlite.py` | `init_db`, `save_result`, `get_latest`, `list_all`, `delete_by_name` — in-process SQLite |
|
|
255
|
+
| `test_caching.py` | TTL expiry, `get_cached`, `store`, `invalidate` — DB redirected to `tmp_path` |
|
|
256
|
+
| `test_api.py` | All five REST endpoints via `TestClient` — cached and fresh paths, error cases |
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=45", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "tabber"
|
|
7
|
+
version = "1.0.0"
|
|
8
|
+
description = "An OSINT CLI person locator tool"
|
|
9
|
+
authors = [
|
|
10
|
+
{name = "Sam", email = "sam@gatewaycorporate.org"}
|
|
11
|
+
]
|
|
12
|
+
dependencies = [
|
|
13
|
+
"click>=8.1",
|
|
14
|
+
"pydantic>=2.0",
|
|
15
|
+
"openai>=1.0",
|
|
16
|
+
"ddgs>=6.0",
|
|
17
|
+
"requests>=2.31",
|
|
18
|
+
"tweepy>=4.14",
|
|
19
|
+
"praw>=7.7",
|
|
20
|
+
"rich>=13.0"
|
|
21
|
+
]
|
|
22
|
+
readme = "README.md"
|
|
23
|
+
requires-python = ">=3.8"
|
|
24
|
+
classifiers = [
|
|
25
|
+
"Development Status :: 3 - Alpha",
|
|
26
|
+
"Intended Audience :: Developers",
|
|
27
|
+
"Programming Language :: Python :: 3",
|
|
28
|
+
"Programming Language :: Python :: 3.8",
|
|
29
|
+
"Programming Language :: Python :: 3.9",
|
|
30
|
+
"Programming Language :: Python :: 3.10",
|
|
31
|
+
"Programming Language :: Python :: 3.11",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
[project.optional-dependencies]
|
|
35
|
+
dev = ["pytest>=7.0", "black", "flake8", "httpx>=0.27"]
|
|
36
|
+
server = ["fastapi>=0.111", "uvicorn[standard]>=0.30"]
|
|
37
|
+
|
|
38
|
+
[tool.setuptools.packages.find]
|
|
39
|
+
where = ["src"]
|
|
40
|
+
|
|
41
|
+
[tool.pytest.ini_options]
|
|
42
|
+
pythonpath = ["src/tabber"]
|
|
43
|
+
testpaths = ["tests"]
|
|
44
|
+
extrapaths = ["src/tabber"]
|
tabber-1.0.0/setup.cfg
ADDED