tabber 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. tabber-1.0.0/LICENSE.txt +36 -0
  2. tabber-1.0.0/PKG-INFO +289 -0
  3. tabber-1.0.0/README.md +256 -0
  4. tabber-1.0.0/pyproject.toml +44 -0
  5. tabber-1.0.0/setup.cfg +4 -0
  6. tabber-1.0.0/src/tabber/api.py +141 -0
  7. tabber-1.0.0/src/tabber/caching.py +73 -0
  8. tabber-1.0.0/src/tabber/config.py +63 -0
  9. tabber-1.0.0/src/tabber/gatherers/__init__.py +0 -0
  10. tabber-1.0.0/src/tabber/gatherers/base.py +16 -0
  11. tabber-1.0.0/src/tabber/gatherers/events.py +53 -0
  12. tabber-1.0.0/src/tabber/gatherers/instagram.py +61 -0
  13. tabber-1.0.0/src/tabber/gatherers/news.py +37 -0
  14. tabber-1.0.0/src/tabber/gatherers/reddit.py +52 -0
  15. tabber-1.0.0/src/tabber/gatherers/twitter.py +42 -0
  16. tabber-1.0.0/src/tabber/gatherers/wikipedia.py +96 -0
  17. tabber-1.0.0/src/tabber/llm.py +97 -0
  18. tabber-1.0.0/src/tabber/models.py +49 -0
  19. tabber-1.0.0/src/tabber/modules/__init__.py +0 -0
  20. tabber-1.0.0/src/tabber/modules/identification.py +321 -0
  21. tabber-1.0.0/src/tabber/modules/information_gathering.py +98 -0
  22. tabber-1.0.0/src/tabber/modules/location_analysis.py +62 -0
  23. tabber-1.0.0/src/tabber/sqlite.py +107 -0
  24. tabber-1.0.0/src/tabber/tabber.py +168 -0
  25. tabber-1.0.0/src/tabber.egg-info/PKG-INFO +289 -0
  26. tabber-1.0.0/src/tabber.egg-info/SOURCES.txt +37 -0
  27. tabber-1.0.0/src/tabber.egg-info/dependency_links.txt +1 -0
  28. tabber-1.0.0/src/tabber.egg-info/requires.txt +18 -0
  29. tabber-1.0.0/src/tabber.egg-info/top_level.txt +1 -0
  30. tabber-1.0.0/tests/test_api.py +241 -0
  31. tabber-1.0.0/tests/test_caching.py +144 -0
  32. tabber-1.0.0/tests/test_config.py +95 -0
  33. tabber-1.0.0/tests/test_gatherers.py +113 -0
  34. tabber-1.0.0/tests/test_identification.py +235 -0
  35. tabber-1.0.0/tests/test_information_gathering.py +115 -0
  36. tabber-1.0.0/tests/test_llm.py +198 -0
  37. tabber-1.0.0/tests/test_location_analysis.py +73 -0
  38. tabber-1.0.0/tests/test_models.py +123 -0
  39. tabber-1.0.0/tests/test_sqlite.py +206 -0
@@ -0,0 +1,36 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Samuel Roux
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ PRIVACY AND LEGAL DISCLAIMER
16
+
17
+ Systematically tracking or locating individuals without their explicit consent
18
+ can violate privacy laws (e.g., GDPR, CCPA, U.S. state wiretap/stalking
19
+ statutes, platform Terms of Service) and may constitute a criminal offence in
20
+ many jurisdictions. This Software is intended solely for lawful research on
21
+ publicly available information about public figures. Users are solely
22
+ responsible for ensuring that their use of this Software complies with all
23
+ applicable laws and regulations. By using this Software, you represent and
24
+ warrant that you will not use it to track, locate, surveil, or harass any
25
+ individual in violation of applicable law.
26
+
27
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
28
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
29
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
30
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
31
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
32
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33
+ SOFTWARE. THE AUTHORS EXPRESSLY DISCLAIM ALL LIABILITY FOR ANY UNLAWFUL,
34
+ HARMFUL, OR UNETHICAL USE OF THIS SOFTWARE BY ANY THIRD PARTY, AND SHALL NOT
35
+ BE HELD RESPONSIBLE OR SUBJECT TO LEGAL ACTION FOR THE CONDUCT OR ACTIONS OF
36
+ ANY USER OR DOWNSTREAM RECIPIENT OF THIS SOFTWARE.
tabber-1.0.0/PKG-INFO ADDED
@@ -0,0 +1,289 @@
1
+ Metadata-Version: 2.4
2
+ Name: tabber
3
+ Version: 1.0.0
4
+ Summary: An OSINT CLI person locator tool
5
+ Author-email: Sam <sam@gatewaycorporate.org>
6
+ Classifier: Development Status :: 3 - Alpha
7
+ Classifier: Intended Audience :: Developers
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.8
10
+ Classifier: Programming Language :: Python :: 3.9
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Requires-Python: >=3.8
14
+ Description-Content-Type: text/markdown
15
+ License-File: LICENSE.txt
16
+ Requires-Dist: click>=8.1
17
+ Requires-Dist: pydantic>=2.0
18
+ Requires-Dist: openai>=1.0
19
+ Requires-Dist: ddgs>=6.0
20
+ Requires-Dist: requests>=2.31
21
+ Requires-Dist: tweepy>=4.14
22
+ Requires-Dist: praw>=7.7
23
+ Requires-Dist: rich>=13.0
24
+ Provides-Extra: dev
25
+ Requires-Dist: pytest>=7.0; extra == "dev"
26
+ Requires-Dist: black; extra == "dev"
27
+ Requires-Dist: flake8; extra == "dev"
28
+ Requires-Dist: httpx>=0.27; extra == "dev"
29
+ Provides-Extra: server
30
+ Requires-Dist: fastapi>=0.111; extra == "server"
31
+ Requires-Dist: uvicorn[standard]>=0.30; extra == "server"
32
+ Dynamic: license-file
33
+
34
+ # Tabber
35
+
36
+ An OSINT CLI tool that determines the most likely current or recent physical location of a public figure by aggregating data from multiple sources and reasoning over them with an LLM.
37
+
38
+ ## How It Works
39
+
40
+ 1. **Disambiguation** — The input name is resolved to a structured person profile via LLM.
41
+ 2. **Feedback loop** (up to N iterations):
42
+ - The LLM generates targeted search hints based on the profile and any prior data.
43
+ - All configured gatherers run in parallel to collect raw data.
44
+ - The LLM evaluates whether there is sufficient location signal; exits early if so, or refines and repeats.
45
+ 3. **Location analysis** — All gathered data is synthesised by a final LLM call into a location result with confidence and reasoning.
46
+ 4. Results are displayed in a Rich terminal panel with colour-coded confidence and **automatically cached** to SQLite for instant recall on repeat lookups.
47
+
48
+ ## Installation
49
+
50
+ ```bash
51
+ git clone https://github.com/itsnotqwerty/tabber.git
52
+ cd tabber
53
+ python -m venv .venv
54
+ source .venv/bin/activate
55
+ pip install -e .
56
+ ```
57
+
58
+ To also enable the REST API server:
59
+
60
+ ```bash
61
+ pip install -e ".[server]"
62
+ ```
63
+
64
+ ## Configuration
65
+
66
+ Configuration is stored in `~/.tabber/config.json` and managed via the `config` subcommand.
67
+
68
+ ```bash
69
+ tabber config set <key> <value>
70
+ tabber config show
71
+ ```
72
+
73
+ ### Configuration Keys
74
+
75
+ | Key | Default | Description |
76
+ | ------------------------ | ----------------------- | -------------------------------------------- |
77
+ | `max_iterations` | `3` | Max feedback loop iterations |
78
+ | `llm_provider` | `openai` | LLM backend to use (`openai` or `anthropic`) |
79
+ | `openai_api_key` | — | Required when `llm_provider` is `openai` |
80
+ | `anthropic_api_key` | — | Required when `llm_provider` is `anthropic` |
81
+ | `twitter_bearer_token` | — | Enables the Twitter gatherer |
82
+ | `instagram_access_token` | — | Enables the Instagram gatherer |
83
+ | `reddit_client_id` | — | Required (with secret) for Reddit gatherer |
84
+ | `reddit_client_secret` | — | Required (with ID) for Reddit gatherer |
85
+ | `cache_ttl_hours` | `24` | How long a cached result stays valid (hours) |
86
+ | `db_path` | `~/.tabber/results.db` | SQLite database file location |
87
+ | `server_host` | `127.0.0.1` | Default bind host for `tabber server` |
88
+ | `server_port` | `8000` | Default bind port for `tabber server` |
89
+
90
+ At minimum, set your API key for the chosen provider:
91
+
92
+ ```bash
93
+ # OpenAI (default)
94
+ tabber config set openai_api_key sk-...
95
+
96
+ # Anthropic
97
+ tabber config set llm_provider anthropic
98
+ tabber config set anthropic_api_key sk-ant-...
99
+ ```
100
+
101
+ ## Usage
102
+
103
+ ```bash
104
+ tabber lookup "Elon Musk"
105
+ tabber "Elon Musk" # shorthand
106
+ ```
107
+
108
+ ### Options
109
+
110
+ | Flag | Default | Description |
111
+ | ----------------------- | ----------- | ----------------------------------------------------- |
112
+ | `--verbose` / `-v` | off | Show per-iteration details (hints, source counts) |
113
+ | `--max-iter N` / `-n N` | from config | Override the max number of iterations |
114
+ | `--no-cache` | off | Skip the cache and always run a fresh lookup |
115
+
116
+ The output panel shows the inferred **location**, **confidence** (green ≥70%, yellow ≥40%, red <40%), **reasoning**, and **sources**. Results marked `(cached)` were served from the local database without making any LLM or network calls.
117
+
118
+ ## Caching
119
+
120
+ Every completed lookup is stored in a local SQLite database (`~/.tabber/results.db`). On subsequent lookups for the same name, the cached result is returned immediately if it was created within the last `cache_ttl_hours` hours (default: 24).
121
+
122
+ ```
123
+ ~/.tabber/
124
+ ├── config.json # configuration
125
+ └── results.db # SQLite result cache
126
+ ```
127
+
128
+ ### Cache behaviour
129
+
130
+ - **CLI** — cache is checked automatically before running the pipeline. Use `--no-cache` to force a fresh run (the new result is still stored).
131
+ - **API** — same logic: set `"no_cache": true` in the request body to bypass the cache.
132
+ - **TTL** — configure how long results stay valid: `tabber config set cache_ttl_hours 48`
133
+ - **Invalidation** — delete cached results for a name via the API (`DELETE /results/{name}`) or by removing the database file.
134
+
135
+ ## REST API Server
136
+
137
+ Start the server with:
138
+
139
+ ```bash
140
+ tabber server
141
+ ```
142
+
143
+ ### Server options
144
+
145
+ | Flag | Default | Description |
146
+ | ---------- | ----------- | -------------------------------------- |
147
+ | `--host` | `127.0.0.1` | Bind address |
148
+ | `--port` | `8000` | Bind port |
149
+ | `--reload` | off | Auto-reload on code changes (dev mode) |
150
+
151
+ The server uses FastAPI and requires the `[server]` extra (`pip install -e ".[server]"`). Interactive API docs are available at `http://localhost:8000/docs` once the server is running.
152
+
153
+ ### Endpoints
154
+
155
+ | Method | Path | Description |
156
+ | -------- | ----------------- | -------------------------------------------------------------------- |
157
+ | `GET` | `/health` | Health check — returns `{"status": "ok"}` |
158
+ | `POST` | `/lookup` | Run or recall a lookup. Request: `{"name": str, "no_cache": bool}` |
159
+ | `GET` | `/results` | List all stored results, newest first. Supports `?limit=N` (max 500) |
160
+ | `GET` | `/results/{name}` | Most recent stored result for a name |
161
+ | `DELETE` | `/results/{name}` | Invalidate all cached results for a name |
162
+
163
+ ### Request / response examples
164
+
165
+ **POST /lookup**
166
+
167
+ ```json
168
+ // request
169
+ { "name": "Taylor Swift" }
170
+
171
+ // response
172
+ {
173
+ "query_name": "Taylor Swift",
174
+ "canon_name": "Taylor Swift",
175
+ "cached": false,
176
+ "timestamp": "2026-03-22T14:00:00+00:00",
177
+ "result": {
178
+ "location": "Nashville, Tennessee, USA",
179
+ "confidence": 0.82,
180
+ "reasoning": "Multiple recent news sources confirm a studio session in Nashville.",
181
+ "sources": ["news", "wikipedia"]
182
+ }
183
+ }
184
+ ```
185
+
186
+ **GET /results?limit=5**
187
+
188
+ ```json
189
+ [
190
+ {
191
+ "id": 3,
192
+ "query_name": "Taylor Swift",
193
+ "canon_name": "Taylor Swift",
194
+ "location": "Nashville, Tennessee, USA",
195
+ "confidence": 0.82,
196
+ "reasoning": "...",
197
+ "sources": ["news", "wikipedia"],
198
+ "timestamp": "2026-03-22T14:00:00+00:00"
199
+ }
200
+ ]
201
+ ```
202
+
203
+ **DELETE /results/Taylor%20Swift**
204
+
205
+ ```json
206
+ { "deleted": 1, "name": "Taylor Swift" }
207
+ ```
208
+
209
+ ### Error responses
210
+
211
+ | Status | Condition |
212
+ | ------ | ------------------------------------------ |
213
+ | `422` | Validation error (e.g. empty `name` field) |
214
+ | `502` | Upstream error from LLM or gatherers |
215
+ | `404` | No stored result found for the given name |
216
+
217
+ ## Data Sources (Gatherers)
218
+
219
+ | Gatherer | Service | Auth Required |
220
+ | --------- | ------------------------- | ------------------------------------------- |
221
+ | News | DuckDuckGo News | No |
222
+ | Wikipedia | Wikipedia & Wikidata APIs | No |
223
+ | Events | DuckDuckGo Web Search | No |
224
+ | Twitter | Twitter API v2 | `twitter_bearer_token` |
225
+ | Reddit | Reddit API | `reddit_client_id` + `reddit_client_secret` |
226
+ | Instagram | Instagram Graph API | `instagram_access_token` |
227
+
228
+ Gatherers that lack the required credentials are skipped automatically. The tool works with only the unauthenticated gatherers (News, Wikipedia, Events), but more sources improve accuracy.
229
+
230
+ ## LLM
231
+
232
+ Tabber supports two providers, configured via `llm_provider`:
233
+
234
+ | Provider | Model | Key |
235
+ | ------------------ | ----------------- | ------------------- |
236
+ | `openai` (default) | `gpt-4o` | `openai_api_key` |
237
+ | `anthropic` | `claude-opus-4-6` | `anthropic_api_key` |
238
+
239
+ Both providers are accessed via the OpenAI-compatible SDK.
240
+
241
+ All LLM calls use **Pydantic structured outputs** via the `response_format` parameter so responses are parsed and validated automatically:
242
+
243
+ - **OpenAI** — uses `client.beta.chat.completions.parse(response_format=Model)`, which returns a validated Pydantic instance directly.
244
+ - **Anthropic** — passes the model's JSON schema via `response_format={"type": "json_schema", ...}` and validates the response with `Model.model_validate_json()`.
245
+
246
+ ### Data Models (`models.py`)
247
+
248
+ | Model | Purpose |
249
+ | ------------------ | ---------------------------------------------------------- |
250
+ | `PersonProfile` | Disambiguated identity — name, aliases, roles |
251
+ | `HintsList` | Wrapper for the list of LLM-generated search hints |
252
+ | `GathererResult` | Raw output from one data source |
253
+ | `OSINTBundle` | Aggregated results across all gatherers for one iteration |
254
+ | `SignalEvaluation` | LLM confidence score + reasoning for location sufficiency |
255
+ | `LocationResult` | Final inferred location with confidence and evidence trail |
256
+ | `LookupResponse` | API response envelope — wraps `LocationResult` with metadata (query name, canonical name, cache flag, timestamp) |
257
+
258
+ ## Testing
259
+
260
+ The test suite lives in `tests/` and uses [pytest](https://pytest.org). All LLM and external HTTP calls are mocked so tests run offline without API keys.
261
+
262
+ ```bash
263
+ # Install dev dependencies (pytest + httpx for API tests)
264
+ pip install -e ".[dev]"
265
+
266
+ # Run the full suite
267
+ pytest
268
+
269
+ # Run with verbose output
270
+ pytest -v
271
+
272
+ # Run a specific file
273
+ pytest tests/test_llm.py
274
+ ```
275
+
276
+ ### Test coverage by file
277
+
278
+ | Test file | What it covers |
279
+ | -------------------------------- | ------------------------------------------------------------------------------------ |
280
+ | `test_config.py` | `config.load`, `set_key`, `masked` — I/O redirected to `tmp_path` |
281
+ | `test_models.py` | Pydantic validation for every model, including `LookupResponse` |
282
+ | `test_llm.py` | `complete()` routing, system messages, `response_format` for both providers, missing-key errors |
283
+ | `test_identification.py` | Each private function (`_disambiguate`, `_generate_hints`, etc.) and the full `run()` loop |
284
+ | `test_location_analysis.py` | `analyse()` prompt construction and `response_format` pass-through |
285
+ | `test_information_gathering.py` | Gatherer enable/disable logic and `gather()` bundle assembly |
286
+ | `test_gatherers.py` | `is_configured` for every gatherer class, base class interface |
287
+ | `test_sqlite.py` | `init_db`, `save_result`, `get_latest`, `list_all`, `delete_by_name` — in-process SQLite |
288
+ | `test_caching.py` | TTL expiry, `get_cached`, `store`, `invalidate` — DB redirected to `tmp_path` |
289
+ | `test_api.py` | All five REST endpoints via `TestClient` — cached and fresh paths, error cases |
tabber-1.0.0/README.md ADDED
@@ -0,0 +1,256 @@
1
+ # Tabber
2
+
3
+ An OSINT CLI tool that determines the most likely current or recent physical location of a public figure by aggregating data from multiple sources and reasoning over them with an LLM.
4
+
5
+ ## How It Works
6
+
7
+ 1. **Disambiguation** — The input name is resolved to a structured person profile via LLM.
8
+ 2. **Feedback loop** (up to N iterations):
9
+ - The LLM generates targeted search hints based on the profile and any prior data.
10
+ - All configured gatherers run in parallel to collect raw data.
11
+ - The LLM evaluates whether there is sufficient location signal; exits early if so, or refines and repeats.
12
+ 3. **Location analysis** — All gathered data is synthesised by a final LLM call into a location result with confidence and reasoning.
13
+ 4. Results are displayed in a Rich terminal panel with colour-coded confidence and **automatically cached** to SQLite for instant recall on repeat lookups.
14
+
15
+ ## Installation
16
+
17
+ ```bash
18
+ git clone https://github.com/itsnotqwerty/tabber.git
19
+ cd tabber
20
+ python -m venv .venv
21
+ source .venv/bin/activate
22
+ pip install -e .
23
+ ```
24
+
25
+ To also enable the REST API server:
26
+
27
+ ```bash
28
+ pip install -e ".[server]"
29
+ ```
30
+
31
+ ## Configuration
32
+
33
+ Configuration is stored in `~/.tabber/config.json` and managed via the `config` subcommand.
34
+
35
+ ```bash
36
+ tabber config set <key> <value>
37
+ tabber config show
38
+ ```
39
+
40
+ ### Configuration Keys
41
+
42
+ | Key | Default | Description |
43
+ | ------------------------ | ----------------------- | -------------------------------------------- |
44
+ | `max_iterations` | `3` | Max feedback loop iterations |
45
+ | `llm_provider` | `openai` | LLM backend to use (`openai` or `anthropic`) |
46
+ | `openai_api_key` | — | Required when `llm_provider` is `openai` |
47
+ | `anthropic_api_key` | — | Required when `llm_provider` is `anthropic` |
48
+ | `twitter_bearer_token` | — | Enables the Twitter gatherer |
49
+ | `instagram_access_token` | — | Enables the Instagram gatherer |
50
+ | `reddit_client_id` | — | Required (with secret) for Reddit gatherer |
51
+ | `reddit_client_secret` | — | Required (with ID) for Reddit gatherer |
52
+ | `cache_ttl_hours` | `24` | How long a cached result stays valid (hours) |
53
+ | `db_path` | `~/.tabber/results.db` | SQLite database file location |
54
+ | `server_host` | `127.0.0.1` | Default bind host for `tabber server` |
55
+ | `server_port` | `8000` | Default bind port for `tabber server` |
56
+
57
+ At minimum, set your API key for the chosen provider:
58
+
59
+ ```bash
60
+ # OpenAI (default)
61
+ tabber config set openai_api_key sk-...
62
+
63
+ # Anthropic
64
+ tabber config set llm_provider anthropic
65
+ tabber config set anthropic_api_key sk-ant-...
66
+ ```
67
+
68
+ ## Usage
69
+
70
+ ```bash
71
+ tabber lookup "Elon Musk"
72
+ tabber "Elon Musk" # shorthand
73
+ ```
74
+
75
+ ### Options
76
+
77
+ | Flag | Default | Description |
78
+ | ----------------------- | ----------- | ----------------------------------------------------- |
79
+ | `--verbose` / `-v` | off | Show per-iteration details (hints, source counts) |
80
+ | `--max-iter N` / `-n N` | from config | Override the max number of iterations |
81
+ | `--no-cache` | off | Skip the cache and always run a fresh lookup |
82
+
83
+ The output panel shows the inferred **location**, **confidence** (green ≥70%, yellow ≥40%, red <40%), **reasoning**, and **sources**. Results marked `(cached)` were served from the local database without making any LLM or network calls.
84
+
85
+ ## Caching
86
+
87
+ Every completed lookup is stored in a local SQLite database (`~/.tabber/results.db`). On subsequent lookups for the same name, the cached result is returned immediately if it was created within the last `cache_ttl_hours` hours (default: 24).
88
+
89
+ ```
90
+ ~/.tabber/
91
+ ├── config.json # configuration
92
+ └── results.db # SQLite result cache
93
+ ```
94
+
95
+ ### Cache behaviour
96
+
97
+ - **CLI** — cache is checked automatically before running the pipeline. Use `--no-cache` to force a fresh run (the new result is still stored).
98
+ - **API** — same logic: set `"no_cache": true` in the request body to bypass the cache.
99
+ - **TTL** — configure how long results stay valid: `tabber config set cache_ttl_hours 48`
100
+ - **Invalidation** — delete cached results for a name via the API (`DELETE /results/{name}`) or by removing the database file.
101
+
102
+ ## REST API Server
103
+
104
+ Start the server with:
105
+
106
+ ```bash
107
+ tabber server
108
+ ```
109
+
110
+ ### Server options
111
+
112
+ | Flag | Default | Description |
113
+ | ---------- | ----------- | -------------------------------------- |
114
+ | `--host` | `127.0.0.1` | Bind address |
115
+ | `--port` | `8000` | Bind port |
116
+ | `--reload` | off | Auto-reload on code changes (dev mode) |
117
+
118
+ The server uses FastAPI and requires the `[server]` extra (`pip install -e ".[server]"`). Interactive API docs are available at `http://localhost:8000/docs` once the server is running.
119
+
120
+ ### Endpoints
121
+
122
+ | Method | Path | Description |
123
+ | -------- | ----------------- | -------------------------------------------------------------------- |
124
+ | `GET` | `/health` | Health check — returns `{"status": "ok"}` |
125
+ | `POST` | `/lookup` | Run or recall a lookup. Request: `{"name": str, "no_cache": bool}` |
126
+ | `GET` | `/results` | List all stored results, newest first. Supports `?limit=N` (max 500) |
127
+ | `GET` | `/results/{name}` | Most recent stored result for a name |
128
+ | `DELETE` | `/results/{name}` | Invalidate all cached results for a name |
129
+
130
+ ### Request / response examples
131
+
132
+ **POST /lookup**
133
+
134
+ ```json
135
+ // request
136
+ { "name": "Taylor Swift" }
137
+
138
+ // response
139
+ {
140
+ "query_name": "Taylor Swift",
141
+ "canon_name": "Taylor Swift",
142
+ "cached": false,
143
+ "timestamp": "2026-03-22T14:00:00+00:00",
144
+ "result": {
145
+ "location": "Nashville, Tennessee, USA",
146
+ "confidence": 0.82,
147
+ "reasoning": "Multiple recent news sources confirm a studio session in Nashville.",
148
+ "sources": ["news", "wikipedia"]
149
+ }
150
+ }
151
+ ```
152
+
153
+ **GET /results?limit=5**
154
+
155
+ ```json
156
+ [
157
+ {
158
+ "id": 3,
159
+ "query_name": "Taylor Swift",
160
+ "canon_name": "Taylor Swift",
161
+ "location": "Nashville, Tennessee, USA",
162
+ "confidence": 0.82,
163
+ "reasoning": "...",
164
+ "sources": ["news", "wikipedia"],
165
+ "timestamp": "2026-03-22T14:00:00+00:00"
166
+ }
167
+ ]
168
+ ```
169
+
170
+ **DELETE /results/Taylor%20Swift**
171
+
172
+ ```json
173
+ { "deleted": 1, "name": "Taylor Swift" }
174
+ ```
175
+
176
+ ### Error responses
177
+
178
+ | Status | Condition |
179
+ | ------ | ------------------------------------------ |
180
+ | `422` | Validation error (e.g. empty `name` field) |
181
+ | `502` | Upstream error from LLM or gatherers |
182
+ | `404` | No stored result found for the given name |
183
+
184
+ ## Data Sources (Gatherers)
185
+
186
+ | Gatherer | Service | Auth Required |
187
+ | --------- | ------------------------- | ------------------------------------------- |
188
+ | News | DuckDuckGo News | No |
189
+ | Wikipedia | Wikipedia & Wikidata APIs | No |
190
+ | Events | DuckDuckGo Web Search | No |
191
+ | Twitter | Twitter API v2 | `twitter_bearer_token` |
192
+ | Reddit | Reddit API | `reddit_client_id` + `reddit_client_secret` |
193
+ | Instagram | Instagram Graph API | `instagram_access_token` |
194
+
195
+ Gatherers that lack the required credentials are skipped automatically. The tool works with only the unauthenticated gatherers (News, Wikipedia, Events), but more sources improve accuracy.
196
+
197
+ ## LLM
198
+
199
+ Tabber supports two providers, configured via `llm_provider`:
200
+
201
+ | Provider | Model | Key |
202
+ | ------------------ | ----------------- | ------------------- |
203
+ | `openai` (default) | `gpt-4o` | `openai_api_key` |
204
+ | `anthropic` | `claude-opus-4-6` | `anthropic_api_key` |
205
+
206
+ Both providers are accessed via the OpenAI-compatible SDK.
207
+
208
+ All LLM calls use **Pydantic structured outputs** via the `response_format` parameter so responses are parsed and validated automatically:
209
+
210
+ - **OpenAI** — uses `client.beta.chat.completions.parse(response_format=Model)`, which returns a validated Pydantic instance directly.
211
+ - **Anthropic** — passes the model's JSON schema via `response_format={"type": "json_schema", ...}` and validates the response with `Model.model_validate_json()`.
212
+
213
+ ### Data Models (`models.py`)
214
+
215
+ | Model | Purpose |
216
+ | ------------------ | ---------------------------------------------------------- |
217
+ | `PersonProfile` | Disambiguated identity — name, aliases, roles |
218
+ | `HintsList` | Wrapper for the list of LLM-generated search hints |
219
+ | `GathererResult` | Raw output from one data source |
220
+ | `OSINTBundle` | Aggregated results across all gatherers for one iteration |
221
+ | `SignalEvaluation` | LLM confidence score + reasoning for location sufficiency |
222
+ | `LocationResult` | Final inferred location with confidence and evidence trail |
223
+ | `LookupResponse` | API response envelope — wraps `LocationResult` with metadata (query name, canonical name, cache flag, timestamp) |
224
+
225
+ ## Testing
226
+
227
+ The test suite lives in `tests/` and uses [pytest](https://pytest.org). All LLM and external HTTP calls are mocked so tests run offline without API keys.
228
+
229
+ ```bash
230
+ # Install dev dependencies (pytest + httpx for API tests)
231
+ pip install -e ".[dev]"
232
+
233
+ # Run the full suite
234
+ pytest
235
+
236
+ # Run with verbose output
237
+ pytest -v
238
+
239
+ # Run a specific file
240
+ pytest tests/test_llm.py
241
+ ```
242
+
243
+ ### Test coverage by file
244
+
245
+ | Test file | What it covers |
246
+ | -------------------------------- | ------------------------------------------------------------------------------------ |
247
+ | `test_config.py` | `config.load`, `set_key`, `masked` — I/O redirected to `tmp_path` |
248
+ | `test_models.py` | Pydantic validation for every model, including `LookupResponse` |
249
+ | `test_llm.py` | `complete()` routing, system messages, `response_format` for both providers, missing-key errors |
250
+ | `test_identification.py` | Each private function (`_disambiguate`, `_generate_hints`, etc.) and the full `run()` loop |
251
+ | `test_location_analysis.py` | `analyse()` prompt construction and `response_format` pass-through |
252
+ | `test_information_gathering.py` | Gatherer enable/disable logic and `gather()` bundle assembly |
253
+ | `test_gatherers.py` | `is_configured` for every gatherer class, base class interface |
254
+ | `test_sqlite.py` | `init_db`, `save_result`, `get_latest`, `list_all`, `delete_by_name` — in-process SQLite |
255
+ | `test_caching.py` | TTL expiry, `get_cached`, `store`, `invalidate` — DB redirected to `tmp_path` |
256
+ | `test_api.py` | All five REST endpoints via `TestClient` — cached and fresh paths, error cases |
@@ -0,0 +1,44 @@
1
+ [build-system]
2
+ requires = ["setuptools>=45", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "tabber"
7
+ version = "1.0.0"
8
+ description = "An OSINT CLI person locator tool"
9
+ authors = [
10
+ {name = "Sam", email = "sam@gatewaycorporate.org"}
11
+ ]
12
+ dependencies = [
13
+ "click>=8.1",
14
+ "pydantic>=2.0",
15
+ "openai>=1.0",
16
+ "ddgs>=6.0",
17
+ "requests>=2.31",
18
+ "tweepy>=4.14",
19
+ "praw>=7.7",
20
+ "rich>=13.0"
21
+ ]
22
+ readme = "README.md"
23
+ requires-python = ">=3.8"
24
+ classifiers = [
25
+ "Development Status :: 3 - Alpha",
26
+ "Intended Audience :: Developers",
27
+ "Programming Language :: Python :: 3",
28
+ "Programming Language :: Python :: 3.8",
29
+ "Programming Language :: Python :: 3.9",
30
+ "Programming Language :: Python :: 3.10",
31
+ "Programming Language :: Python :: 3.11",
32
+ ]
33
+
34
+ [project.optional-dependencies]
35
+ dev = ["pytest>=7.0", "black", "flake8", "httpx>=0.27"]
36
+ server = ["fastapi>=0.111", "uvicorn[standard]>=0.30"]
37
+
38
+ [tool.setuptools.packages.find]
39
+ where = ["src"]
40
+
41
+ [tool.pytest.ini_options]
42
+ pythonpath = ["src/tabber"]
43
+ testpaths = ["tests"]
44
+ extrapaths = ["src/tabber"]
tabber-1.0.0/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+