surfacemap 2.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. surfacemap-2.0.0/.gitignore +49 -0
  2. surfacemap-2.0.0/CLAUDE.md +82 -0
  3. surfacemap-2.0.0/LICENSE +21 -0
  4. surfacemap-2.0.0/PKG-INFO +250 -0
  5. surfacemap-2.0.0/README.md +214 -0
  6. surfacemap-2.0.0/pyproject.toml +53 -0
  7. surfacemap-2.0.0/src/surfacemap/__init__.py +10 -0
  8. surfacemap-2.0.0/src/surfacemap/analysis/__init__.py +0 -0
  9. surfacemap-2.0.0/src/surfacemap/analysis/narrative.py +143 -0
  10. surfacemap-2.0.0/src/surfacemap/analysis/risk.py +209 -0
  11. surfacemap-2.0.0/src/surfacemap/api/__init__.py +0 -0
  12. surfacemap-2.0.0/src/surfacemap/api/server.py +165 -0
  13. surfacemap-2.0.0/src/surfacemap/cli/__init__.py +0 -0
  14. surfacemap-2.0.0/src/surfacemap/cli/main.py +622 -0
  15. surfacemap-2.0.0/src/surfacemap/core/__init__.py +0 -0
  16. surfacemap-2.0.0/src/surfacemap/core/config.py +199 -0
  17. surfacemap-2.0.0/src/surfacemap/core/llm.py +590 -0
  18. surfacemap-2.0.0/src/surfacemap/core/models.py +221 -0
  19. surfacemap-2.0.0/src/surfacemap/discovery/__init__.py +0 -0
  20. surfacemap-2.0.0/src/surfacemap/discovery/active.py +770 -0
  21. surfacemap-2.0.0/src/surfacemap/discovery/base.py +63 -0
  22. surfacemap-2.0.0/src/surfacemap/discovery/dns.py +857 -0
  23. surfacemap-2.0.0/src/surfacemap/discovery/engine.py +668 -0
  24. surfacemap-2.0.0/src/surfacemap/discovery/enrichment.py +507 -0
  25. surfacemap-2.0.0/src/surfacemap/discovery/http.py +381 -0
  26. surfacemap-2.0.0/src/surfacemap/discovery/osint.py +937 -0
  27. surfacemap-2.0.0/src/surfacemap/discovery/web.py +1036 -0
  28. surfacemap-2.0.0/src/surfacemap/notifications/__init__.py +0 -0
  29. surfacemap-2.0.0/src/surfacemap/notifications/slack.py +233 -0
  30. surfacemap-2.0.0/src/surfacemap/storage/__init__.py +0 -0
  31. surfacemap-2.0.0/src/surfacemap/storage/db.py +290 -0
  32. surfacemap-2.0.0/test-output/39bfd4d8e277.json +17411 -0
  33. surfacemap-2.0.0/tests/__init__.py +0 -0
@@ -0,0 +1,49 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ *.egg-info/
7
+ *.egg
8
+ dist/
9
+ build/
10
+ .eggs/
11
+
12
+ # Virtual environments
13
+ .venv/
14
+ venv/
15
+ env/
16
+ ENV/
17
+
18
+ # Environment variables
19
+ .env
20
+ .env.*
21
+
22
+ # Database
23
+ *.db
24
+ *.sqlite
25
+ *.sqlite3
26
+
27
+ # Output
28
+ output/
29
+
30
+ # IDE
31
+ .idea/
32
+ .vscode/
33
+ *.swp
34
+ *.swo
35
+ *~
36
+
37
+ # OS
38
+ .DS_Store
39
+ Thumbs.db
40
+
41
+ # Testing
42
+ .pytest_cache/
43
+ .coverage
44
+ htmlcov/
45
+ .mypy_cache/
46
+ .ruff_cache/
47
+
48
+ # Jupyter
49
+ .ipynb_checkpoints/
@@ -0,0 +1,82 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Project Overview
6
+
7
+ SurfaceMap is an LLM-driven attack surface discovery tool. Given a company name or domain, it runs 30+ discovery modules concurrently to enumerate subdomains, IPs, ports, cloud buckets, certificates, and more. Results are deduplicated, risk-scored, and output as terminal trees, JSON, CSV, HTML mindmaps, or Mermaid diagrams.
8
+
9
+ ## Build & Run Commands
10
+
11
+ ```bash
12
+ # Install (editable, all extras)
13
+ pip install -e ".[all]"
14
+
15
+ # Install core only (no API/LLM/Slack)
16
+ pip install -e .
17
+
18
+ # Run CLI
19
+ surfacemap discover "Target Corp" --domain target.com --tree --json
20
+
21
+ # Run API server
22
+ uvicorn surfacemap.api.server:app --host 0.0.0.0 --port 8000
23
+
24
+ # Run config viewer
25
+ surfacemap config
26
+ ```
27
+
28
+ There is no test suite, linter, or formatter configured yet. The `tests/` directory exists but is empty.
29
+
30
+ ## Architecture
31
+
32
+ ### Execution Flow
33
+
34
+ CLI (`cli/main.py`) or API (`api/server.py`) → `DiscoveryEngine` → 4-phase pipeline:
35
+ 1. **Phase 0**: LLM Brainstorm (optional) — identifies subsidiaries, infrastructure hints
36
+ 2. **Phase 1**: Passive Recon — 15 modules run concurrently via `asyncio.gather()`
37
+ 3. **Phase 2**: Active Probing — 14 modules (CORS checks, sensitive paths, JS analysis)
38
+ 4. **Phase 3**: LLM Analysis — risk scoring, attack path generation
39
+
40
+ ### Module System
41
+
42
+ All discovery modules extend `DiscoveryModule` ABC in `discovery/base.py`:
43
+ - Must implement `name`, `description` properties and `discover(target, result)` async method
44
+ - `safe_discover()` wraps execution with timeout (120s default) and error handling
45
+ - Module failures are isolated — they don't crash the pipeline
46
+
47
+ Modules are organized by category across files in `discovery/`:
48
+ - `dns.py` — DNS records, subdomains, zone transfers, cloud detection, subdomain takeover
49
+ - `http.py` — HTTP probing, port scanning (nmap)
50
+ - `web.py` — Wayback, cert transparency, URL scanning, web tech detection
51
+ - `osint.py` — WHOIS, ASN, reverse DNS, SSL analysis, email security
52
+ - `active.py` — Sensitive paths, JS analysis, CORS, cookie security
53
+ - `enrichment.py` — VirusTotal, Shodan, GitHub dorks, email harvesting
54
+
55
+ ### Data Model
56
+
57
+ `core/models.py` defines the asset-centric model:
58
+ - **Asset**: type (16 `AssetType` enums), value, status, severity, metadata dict
59
+ - **ScanResult**: container with fingerprint-based deduplication (SHA256 of type:value)
60
+ - Assets are added via `ScanResult.add_asset()` which handles dedup automatically
61
+
62
+ ### LLM Integration
63
+
64
+ `core/llm.py` — `LLMBrain` class with provider fallback chain: Gemini → Anthropic → OpenAI. Used for brainstorming targets, risk scoring, and false-positive filtering. Entirely optional — tool works without any LLM key.
65
+
66
+ ### Configuration
67
+
68
+ `core/config.py` — `SurfaceMapConfig` dataclass, singleton via `get_config()`. All settings come from environment variables (auto-loads `.env` file). Key prefixes: `SURFACEMAP_*` for tool settings, plus `GEMINI_API_KEY`, `VIRUSTOTAL_API_KEY`, `SHODAN_API_KEY`, `GITHUB_TOKEN`, `HUNTER_API_KEY` for external services.
69
+
70
+ ### Storage & Output
71
+
72
+ - `storage/db.py` — async SQLite via aiosqlite, stores scans and assets
73
+ - `output/mindmap.py` — generates standalone HTML with D3.js force-directed graph
74
+ - `output/formatters.py` — JSON, CSV, Rich tree, Mermaid export
75
+
76
+ ## Key Conventions
77
+
78
+ - **Async-first**: All discovery, HTTP, DNS, and DB operations are async. Use `asyncio` patterns.
79
+ - **Fault tolerance**: Every module runs inside `safe_discover()` with per-module timeouts. Never let one module failure affect others.
80
+ - **No secrets in code**: All API keys via env vars. The `.env` file is gitignored.
81
+ - **External tools are optional**: `dig`, `nmap`, `subfinder` enhance results but the tool must work without them (check availability before calling).
82
+ - Python 3.11+ required. Build system is Hatch/Hatchling.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Yash Korat
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,250 @@
1
+ Metadata-Version: 2.4
2
+ Name: surfacemap
3
+ Version: 2.0.0
4
+ Summary: LLM-driven attack surface discovery — find every asset from just a company name
5
+ Author: Yash Korat
6
+ License-Expression: MIT
7
+ License-File: LICENSE
8
+ Keywords: attack-surface,discovery,osint,recon,security
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Information Technology
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Topic :: Security
16
+ Requires-Python: >=3.11
17
+ Requires-Dist: aiosqlite>=0.20.0
18
+ Requires-Dist: httpx>=0.27.0
19
+ Requires-Dist: rich>=13.7.0
20
+ Requires-Dist: typer[all]>=0.9.0
21
+ Provides-Extra: all
22
+ Requires-Dist: anthropic>=0.40.0; extra == 'all'
23
+ Requires-Dist: fastapi>=0.115.0; extra == 'all'
24
+ Requires-Dist: google-genai>=1.0.0; extra == 'all'
25
+ Requires-Dist: slack-sdk>=3.30.0; extra == 'all'
26
+ Requires-Dist: uvicorn>=0.30.0; extra == 'all'
27
+ Provides-Extra: api
28
+ Requires-Dist: fastapi>=0.115.0; extra == 'api'
29
+ Requires-Dist: uvicorn>=0.30.0; extra == 'api'
30
+ Provides-Extra: llm
31
+ Requires-Dist: anthropic>=0.40.0; extra == 'llm'
32
+ Requires-Dist: google-genai>=1.0.0; extra == 'llm'
33
+ Provides-Extra: notifications
34
+ Requires-Dist: slack-sdk>=3.30.0; extra == 'notifications'
35
+ Description-Content-Type: text/markdown
36
+
37
+ # SurfaceMap
38
+
39
+ **LLM-driven attack surface discovery. Find every external asset from just a company name.**
40
+
41
+ SurfaceMap combines passive OSINT techniques, DNS enumeration, HTTP probing, port scanning, cloud bucket enumeration, and LLM intelligence to build a complete map of an organization's attack surface.
42
+
43
+ ---
44
+
45
+ ## Quick Start
46
+
47
+ ```bash
48
+ # Install
49
+ pip install -e ".[all]"
50
+
51
+ # Set your LLM API key
52
+ export GEMINI_API_KEY="your-key-here"
53
+
54
+ # Discover everything about a company
55
+ surfacemap discover "Acme Corp" --domain acme.com --tree --json
56
+
57
+ # Or just scan a domain
58
+ surfacemap discover example.com --mindmap
59
+ ```
60
+
61
+ ## Installation
62
+
63
+ ```bash
64
+ # Core (CLI + discovery)
65
+ pip install -e .
66
+
67
+ # With API server
68
+ pip install -e ".[api]"
69
+
70
+ # With LLM intelligence
71
+ pip install -e ".[llm]"
72
+
73
+ # With Slack notifications
74
+ pip install -e ".[notifications]"
75
+
76
+ # Everything
77
+ pip install -e ".[all]"
78
+ ```
79
+
80
+ ### External Tools (Optional)
81
+
82
+ SurfaceMap works without these, but they enhance discovery:
83
+
84
+ | Tool | Purpose | Install |
85
+ |------|---------|---------|
86
+ | `dig` | DNS record enumeration | Included with most OS |
87
+ | `nmap` | Port scanning | `brew install nmap` / `apt install nmap` |
88
+ | `subfinder` | Passive subdomain enum | `go install github.com/projectdiscovery/subfinder/v2/cmd/subfinder@latest` |
89
+
90
+ ## CLI Usage
91
+
92
+ ```bash
93
+ # Full discovery with tree output
94
+ surfacemap discover "Google" --domain google.com --tree
95
+
96
+ # Export to JSON and CSV
97
+ surfacemap discover example.com --json --csv --output ./results
98
+
99
+ # Generate interactive HTML mindmap
100
+ surfacemap discover "Acme Corp" -d acme.com --mindmap
101
+
102
+ # Check version
103
+ surfacemap version
104
+ ```
105
+
106
+ ### Options
107
+
108
+ | Flag | Short | Description |
109
+ |------|-------|-------------|
110
+ | `--domain` | `-d` | Primary domain (if target is a company name) |
111
+ | `--output` | `-o` | Output directory for results |
112
+ | `--tree` | `-t` | Display results as a rich tree in terminal |
113
+ | `--mindmap` | `-m` | Generate interactive D3.js HTML mindmap |
114
+ | `--json` | `-j` | Export results to JSON |
115
+ | `--csv` | | Export results to CSV |
116
+
117
+ ## API Server
118
+
119
+ ```bash
120
+ # Start the API server
121
+ uvicorn surfacemap.api.server:app --host 0.0.0.0 --port 8000
122
+
123
+ # Start a scan
124
+ curl -X POST "http://localhost:8000/discover?target=example.com"
125
+
126
+ # Get scan results
127
+ curl "http://localhost:8000/scans/{scan_id}"
128
+
129
+ # Health check
130
+ curl "http://localhost:8000/health"
131
+ ```
132
+
133
+ ### API Endpoints
134
+
135
+ | Method | Endpoint | Description |
136
+ |--------|----------|-------------|
137
+ | `POST` | `/discover` | Start a new discovery scan |
138
+ | `GET` | `/scans/{id}` | Get scan results by ID |
139
+ | `GET` | `/scans` | List recent scans |
140
+ | `GET` | `/health` | Health check |
141
+
142
+ ## Discovery Modules
143
+
144
+ SurfaceMap runs discovery in 6 phases:
145
+
146
+ | # | Phase | Module | Description |
147
+ |---|-------|--------|-------------|
148
+ | 1 | Company Intel | LLM Brain | Discover domains, subsidiaries, and related entities via LLM |
149
+ | 1 | Company Intel | Subsidiary Discovery | Identify acquisitions, brands, and child companies |
150
+ | 2 | DNS | DNS Records | Enumerate A, AAAA, MX, NS, TXT, CNAME, SOA records |
151
+ | 2 | DNS | Subdomain - subfinder | Passive subdomain enumeration via subfinder |
152
+ | 2 | DNS | Subdomain - crt.sh | Certificate transparency log mining |
153
+ | 2 | DNS | Subdomain - Brute Force | DNS brute force with 100+ common prefixes |
154
+ | 2 | DNS | Subdomain - LLM | AI-suggested subdomain candidates |
155
+ | 3 | HTTP | HTTP Probe | Probe all hosts for HTTP/HTTPS services |
156
+ | 3 | HTTP | Technology Detection | Identify web servers, frameworks, CMS from headers |
157
+ | 3 | HTTP | Security Headers | Check for missing HSTS, CSP, X-Frame-Options, etc. |
158
+ | 3 | HTTP | CDN Detection | Identify Cloudflare, CloudFront, Fastly, Akamai, etc. |
159
+ | 3 | HTTP | WAF Detection | Detect web application firewalls |
160
+ | 4 | Ports | Port Scan | nmap service version detection on discovered IPs |
161
+ | 5 | Cloud | S3 Bucket Enum | Check for public/existing AWS S3 buckets |
162
+ | 5 | Cloud | Azure Blob Enum | Check for Azure Blob Storage containers |
163
+ | 5 | Cloud | GCS Bucket Enum | Check for Google Cloud Storage buckets |
164
+ | 5 | Takeover | Subdomain Takeover | Detect dangling CNAMEs across 17 providers |
165
+ | 6 | Dorks | Google Dorks | LLM-generated targeted search queries |
166
+
167
+ ### Asset Types
168
+
169
+ | Type | Description |
170
+ |------|-------------|
171
+ | `domain` | Root domains |
172
+ | `subdomain` | Discovered subdomains |
173
+ | `ip` | IP addresses |
174
+ | `port` | Open ports |
175
+ | `service` | Running services with version info |
176
+ | `cloud_bucket` | S3, Azure Blob, GCS buckets |
177
+ | `email_server` | MX record mail servers |
178
+ | `nameserver` | NS record nameservers |
179
+ | `cdn` | Content delivery networks |
180
+ | `waf` | Web application firewalls |
181
+ | `certificate` | TLS/SSL certificates |
182
+ | `github_repo` | GitHub repositories |
183
+ | `social_media` | Social media profiles |
184
+ | `url` | Discovered URLs |
185
+ | `technology` | Detected technologies |
186
+ | `subsidiary` | Subsidiaries and acquisitions |
187
+
188
+ ## Configuration
189
+
190
+ All settings are configured via environment variables:
191
+
192
+ | Variable | Default | Description |
193
+ |----------|---------|-------------|
194
+ | `GEMINI_API_KEY` | | Google Gemini API key |
195
+ | `ANTHROPIC_API_KEY` | | Anthropic Claude API key |
196
+ | `SURFACEMAP_LLM_PROVIDER` | `gemini` | LLM provider (`gemini` or `anthropic`) |
197
+ | `SURFACEMAP_LLM_MODEL` | `gemini-2.5-flash` | LLM model name |
198
+ | `SURFACEMAP_HTTP_TIMEOUT` | `15` | HTTP probe timeout (seconds) |
199
+ | `SURFACEMAP_DNS_TIMEOUT` | `10` | DNS lookup timeout (seconds) |
200
+ | `SURFACEMAP_SCAN_TIMEOUT` | `300` | nmap scan timeout (seconds) |
201
+ | `SURFACEMAP_OUTPUT_DIR` | `./output` | Default output directory |
202
+ | `SURFACEMAP_DB_PATH` | `./surfacemap.db` | SQLite database path |
203
+ | `SURFACEMAP_SLACK_WEBHOOK` | | Slack webhook URL for notifications |
204
+ | `SURFACEMAP_SLACK_TOKEN` | | Slack Bot Token for notifications |
205
+ | `SURFACEMAP_SLACK_CHANNEL` | `#security` | Slack channel for notifications |
206
+ | `SURFACEMAP_MAX_SUBDOMAINS` | `500` | Maximum subdomains to enumerate |
207
+ | `SURFACEMAP_MAX_PROBES` | `20` | Concurrent HTTP probes |
208
+ | `SURFACEMAP_MAX_DNS` | `50` | Concurrent DNS lookups |
209
+ | `SURFACEMAP_NMAP_ARGS` | `-sV -T4 --top-ports 100` | nmap arguments |
210
+
211
+ ## Output Formats
212
+
213
+ - **Terminal Tree** — Rich tree display with color-coded statuses
214
+ - **JSON** — Full scan data with metadata
215
+ - **CSV** — Flat export for spreadsheet analysis
216
+ - **HTML Mindmap** — Interactive D3.js force-directed graph with dark theme, zoom, drag, and tooltips
217
+ - **Mermaid** — Mermaid.js mindmap diagram for embedding in docs
218
+
219
+ ## Architecture
220
+
221
+ ```
222
+ surfacemap/
223
+ core/
224
+ config.py — Environment-based configuration
225
+ models.py — Asset, ScanResult, enums
226
+ llm.py — LLM integration (Gemini/Claude)
227
+ discovery/
228
+ base.py — DiscoveryModule ABC
229
+ dns.py — DNS, subdomain, takeover, cloud modules
230
+ http.py — HTTP probe, port scan modules
231
+ engine.py — 6-phase orchestration engine
232
+ cli/
233
+ main.py — Typer CLI application
234
+ output/
235
+ mindmap.py — D3.js HTML and Mermaid export
236
+ api/
237
+ server.py — FastAPI REST API
238
+ notifications/
239
+ slack.py — Slack Block Kit notifications
240
+ storage/
241
+ db.py — SQLite persistence with aiosqlite
242
+ ```
243
+
244
+ ## License
245
+
246
+ MIT License. Copyright (c) 2026 Yash Korat.
247
+
248
+ ---
249
+
250
+ Built by [BreachLine Labs](https://breachline.io)
@@ -0,0 +1,214 @@
1
+ # SurfaceMap
2
+
3
+ **LLM-driven attack surface discovery. Find every external asset from just a company name.**
4
+
5
+ SurfaceMap combines passive OSINT techniques, DNS enumeration, HTTP probing, port scanning, cloud bucket enumeration, and LLM intelligence to build a complete map of an organization's attack surface.
6
+
7
+ ---
8
+
9
+ ## Quick Start
10
+
11
+ ```bash
12
+ # Install
13
+ pip install -e ".[all]"
14
+
15
+ # Set your LLM API key
16
+ export GEMINI_API_KEY="your-key-here"
17
+
18
+ # Discover everything about a company
19
+ surfacemap discover "Acme Corp" --domain acme.com --tree --json
20
+
21
+ # Or just scan a domain
22
+ surfacemap discover example.com --mindmap
23
+ ```
24
+
25
+ ## Installation
26
+
27
+ ```bash
28
+ # Core (CLI + discovery)
29
+ pip install -e .
30
+
31
+ # With API server
32
+ pip install -e ".[api]"
33
+
34
+ # With LLM intelligence
35
+ pip install -e ".[llm]"
36
+
37
+ # With Slack notifications
38
+ pip install -e ".[notifications]"
39
+
40
+ # Everything
41
+ pip install -e ".[all]"
42
+ ```
43
+
44
+ ### External Tools (Optional)
45
+
46
+ SurfaceMap works without these, but they enhance discovery:
47
+
48
+ | Tool | Purpose | Install |
49
+ |------|---------|---------|
50
+ | `dig` | DNS record enumeration | Included with most OS |
51
+ | `nmap` | Port scanning | `brew install nmap` / `apt install nmap` |
52
+ | `subfinder` | Passive subdomain enum | `go install github.com/projectdiscovery/subfinder/v2/cmd/subfinder@latest` |
53
+
54
+ ## CLI Usage
55
+
56
+ ```bash
57
+ # Full discovery with tree output
58
+ surfacemap discover "Google" --domain google.com --tree
59
+
60
+ # Export to JSON and CSV
61
+ surfacemap discover example.com --json --csv --output ./results
62
+
63
+ # Generate interactive HTML mindmap
64
+ surfacemap discover "Acme Corp" -d acme.com --mindmap
65
+
66
+ # Check version
67
+ surfacemap version
68
+ ```
69
+
70
+ ### Options
71
+
72
+ | Flag | Short | Description |
73
+ |------|-------|-------------|
74
+ | `--domain` | `-d` | Primary domain (if target is a company name) |
75
+ | `--output` | `-o` | Output directory for results |
76
+ | `--tree` | `-t` | Display results as a rich tree in terminal |
77
+ | `--mindmap` | `-m` | Generate interactive D3.js HTML mindmap |
78
+ | `--json` | `-j` | Export results to JSON |
79
+ | `--csv` | | Export results to CSV |
80
+
81
+ ## API Server
82
+
83
+ ```bash
84
+ # Start the API server
85
+ uvicorn surfacemap.api.server:app --host 0.0.0.0 --port 8000
86
+
87
+ # Start a scan
88
+ curl -X POST "http://localhost:8000/discover?target=example.com"
89
+
90
+ # Get scan results
91
+ curl "http://localhost:8000/scans/{scan_id}"
92
+
93
+ # Health check
94
+ curl "http://localhost:8000/health"
95
+ ```
96
+
97
+ ### API Endpoints
98
+
99
+ | Method | Endpoint | Description |
100
+ |--------|----------|-------------|
101
+ | `POST` | `/discover` | Start a new discovery scan |
102
+ | `GET` | `/scans/{id}` | Get scan results by ID |
103
+ | `GET` | `/scans` | List recent scans |
104
+ | `GET` | `/health` | Health check |
105
+
106
+ ## Discovery Modules
107
+
108
+ SurfaceMap runs discovery in 6 phases:
109
+
110
+ | # | Phase | Module | Description |
111
+ |---|-------|--------|-------------|
112
+ | 1 | Company Intel | LLM Brain | Discover domains, subsidiaries, and related entities via LLM |
113
+ | 1 | Company Intel | Subsidiary Discovery | Identify acquisitions, brands, and child companies |
114
+ | 2 | DNS | DNS Records | Enumerate A, AAAA, MX, NS, TXT, CNAME, SOA records |
115
+ | 2 | DNS | Subdomain - subfinder | Passive subdomain enumeration via subfinder |
116
+ | 2 | DNS | Subdomain - crt.sh | Certificate transparency log mining |
117
+ | 2 | DNS | Subdomain - Brute Force | DNS brute force with 100+ common prefixes |
118
+ | 2 | DNS | Subdomain - LLM | AI-suggested subdomain candidates |
119
+ | 3 | HTTP | HTTP Probe | Probe all hosts for HTTP/HTTPS services |
120
+ | 3 | HTTP | Technology Detection | Identify web servers, frameworks, CMS from headers |
121
+ | 3 | HTTP | Security Headers | Check for missing HSTS, CSP, X-Frame-Options, etc. |
122
+ | 3 | HTTP | CDN Detection | Identify Cloudflare, CloudFront, Fastly, Akamai, etc. |
123
+ | 3 | HTTP | WAF Detection | Detect web application firewalls |
124
+ | 4 | Ports | Port Scan | nmap service version detection on discovered IPs |
125
+ | 5 | Cloud | S3 Bucket Enum | Check for public/existing AWS S3 buckets |
126
+ | 5 | Cloud | Azure Blob Enum | Check for Azure Blob Storage containers |
127
+ | 5 | Cloud | GCS Bucket Enum | Check for Google Cloud Storage buckets |
128
+ | 5 | Takeover | Subdomain Takeover | Detect dangling CNAMEs across 17 providers |
129
+ | 6 | Dorks | Google Dorks | LLM-generated targeted search queries |
130
+
131
+ ### Asset Types
132
+
133
+ | Type | Description |
134
+ |------|-------------|
135
+ | `domain` | Root domains |
136
+ | `subdomain` | Discovered subdomains |
137
+ | `ip` | IP addresses |
138
+ | `port` | Open ports |
139
+ | `service` | Running services with version info |
140
+ | `cloud_bucket` | S3, Azure Blob, GCS buckets |
141
+ | `email_server` | MX record mail servers |
142
+ | `nameserver` | NS record nameservers |
143
+ | `cdn` | Content delivery networks |
144
+ | `waf` | Web application firewalls |
145
+ | `certificate` | TLS/SSL certificates |
146
+ | `github_repo` | GitHub repositories |
147
+ | `social_media` | Social media profiles |
148
+ | `url` | Discovered URLs |
149
+ | `technology` | Detected technologies |
150
+ | `subsidiary` | Subsidiaries and acquisitions |
151
+
152
+ ## Configuration
153
+
154
+ All settings are configured via environment variables:
155
+
156
+ | Variable | Default | Description |
157
+ |----------|---------|-------------|
158
+ | `GEMINI_API_KEY` | | Google Gemini API key |
159
+ | `ANTHROPIC_API_KEY` | | Anthropic Claude API key |
160
+ | `SURFACEMAP_LLM_PROVIDER` | `gemini` | LLM provider (`gemini` or `anthropic`) |
161
+ | `SURFACEMAP_LLM_MODEL` | `gemini-2.5-flash` | LLM model name |
162
+ | `SURFACEMAP_HTTP_TIMEOUT` | `15` | HTTP probe timeout (seconds) |
163
+ | `SURFACEMAP_DNS_TIMEOUT` | `10` | DNS lookup timeout (seconds) |
164
+ | `SURFACEMAP_SCAN_TIMEOUT` | `300` | nmap scan timeout (seconds) |
165
+ | `SURFACEMAP_OUTPUT_DIR` | `./output` | Default output directory |
166
+ | `SURFACEMAP_DB_PATH` | `./surfacemap.db` | SQLite database path |
167
+ | `SURFACEMAP_SLACK_WEBHOOK` | | Slack webhook URL for notifications |
168
+ | `SURFACEMAP_SLACK_TOKEN` | | Slack Bot Token for notifications |
169
+ | `SURFACEMAP_SLACK_CHANNEL` | `#security` | Slack channel for notifications |
170
+ | `SURFACEMAP_MAX_SUBDOMAINS` | `500` | Maximum subdomains to enumerate |
171
+ | `SURFACEMAP_MAX_PROBES` | `20` | Concurrent HTTP probes |
172
+ | `SURFACEMAP_MAX_DNS` | `50` | Concurrent DNS lookups |
173
+ | `SURFACEMAP_NMAP_ARGS` | `-sV -T4 --top-ports 100` | nmap arguments |
174
+
175
+ ## Output Formats
176
+
177
+ - **Terminal Tree** — Rich tree display with color-coded statuses
178
+ - **JSON** — Full scan data with metadata
179
+ - **CSV** — Flat export for spreadsheet analysis
180
+ - **HTML Mindmap** — Interactive D3.js force-directed graph with dark theme, zoom, drag, and tooltips
181
+ - **Mermaid** — Mermaid.js mindmap diagram for embedding in docs
182
+
183
+ ## Architecture
184
+
185
+ ```
186
+ surfacemap/
187
+ core/
188
+ config.py — Environment-based configuration
189
+ models.py — Asset, ScanResult, enums
190
+ llm.py — LLM integration (Gemini/Claude)
191
+ discovery/
192
+ base.py — DiscoveryModule ABC
193
+ dns.py — DNS, subdomain, takeover, cloud modules
194
+ http.py — HTTP probe, port scan modules
195
+ engine.py — 6-phase orchestration engine
196
+ cli/
197
+ main.py — Typer CLI application
198
+ output/
199
+ mindmap.py — D3.js HTML and Mermaid export
200
+ api/
201
+ server.py — FastAPI REST API
202
+ notifications/
203
+ slack.py — Slack Block Kit notifications
204
+ storage/
205
+ db.py — SQLite persistence with aiosqlite
206
+ ```
207
+
208
+ ## License
209
+
210
+ MIT License. Copyright (c) 2026 Yash Korat.
211
+
212
+ ---
213
+
214
+ Built by [BreachLine Labs](https://breachline.io)
@@ -0,0 +1,53 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "surfacemap"
7
+ version = "2.0.0"
8
+ description = "LLM-driven attack surface discovery — find every asset from just a company name"
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.11"
12
+ authors = [
13
+ { name = "Yash Korat" },
14
+ ]
15
+ keywords = ["security", "recon", "attack-surface", "osint", "discovery"]
16
+ classifiers = [
17
+ "Development Status :: 4 - Beta",
18
+ "Intended Audience :: Information Technology",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.11",
22
+ "Programming Language :: Python :: 3.12",
23
+ "Topic :: Security",
24
+ ]
25
+
26
+ dependencies = [
27
+ "typer[all]>=0.9.0",
28
+ "httpx>=0.27.0",
29
+ "rich>=13.7.0",
30
+ "aiosqlite>=0.20.0",
31
+ ]
32
+
33
+ [project.optional-dependencies]
34
+ api = [
35
+ "fastapi>=0.115.0",
36
+ "uvicorn>=0.30.0",
37
+ ]
38
+ llm = [
39
+ "google-genai>=1.0.0",
40
+ "anthropic>=0.40.0",
41
+ ]
42
+ notifications = [
43
+ "slack-sdk>=3.30.0",
44
+ ]
45
+ all = [
46
+ "surfacemap[api,llm,notifications]",
47
+ ]
48
+
49
+ [project.scripts]
50
+ surfacemap = "surfacemap.cli.main:app"
51
+
52
+ [tool.hatch.build.targets.wheel]
53
+ packages = ["src/surfacemap"]
@@ -0,0 +1,10 @@
1
+ """SurfaceMap — LLM-driven attack surface discovery.
2
+
3
+ Discover every external asset of a company from just its name.
4
+ Combines passive OSINT, DNS enumeration, HTTP probing, port scanning,
5
+ cloud storage enumeration, and LLM intelligence to build a complete
6
+ attack surface map.
7
+ """
8
+
9
+ __version__ = "2.0.0"
10
+ __author__ = "Yash Korat"
File without changes