surfacemap 2.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- surfacemap-2.0.0/.gitignore +49 -0
- surfacemap-2.0.0/CLAUDE.md +82 -0
- surfacemap-2.0.0/LICENSE +21 -0
- surfacemap-2.0.0/PKG-INFO +250 -0
- surfacemap-2.0.0/README.md +214 -0
- surfacemap-2.0.0/pyproject.toml +53 -0
- surfacemap-2.0.0/src/surfacemap/__init__.py +10 -0
- surfacemap-2.0.0/src/surfacemap/analysis/__init__.py +0 -0
- surfacemap-2.0.0/src/surfacemap/analysis/narrative.py +143 -0
- surfacemap-2.0.0/src/surfacemap/analysis/risk.py +209 -0
- surfacemap-2.0.0/src/surfacemap/api/__init__.py +0 -0
- surfacemap-2.0.0/src/surfacemap/api/server.py +165 -0
- surfacemap-2.0.0/src/surfacemap/cli/__init__.py +0 -0
- surfacemap-2.0.0/src/surfacemap/cli/main.py +622 -0
- surfacemap-2.0.0/src/surfacemap/core/__init__.py +0 -0
- surfacemap-2.0.0/src/surfacemap/core/config.py +199 -0
- surfacemap-2.0.0/src/surfacemap/core/llm.py +590 -0
- surfacemap-2.0.0/src/surfacemap/core/models.py +221 -0
- surfacemap-2.0.0/src/surfacemap/discovery/__init__.py +0 -0
- surfacemap-2.0.0/src/surfacemap/discovery/active.py +770 -0
- surfacemap-2.0.0/src/surfacemap/discovery/base.py +63 -0
- surfacemap-2.0.0/src/surfacemap/discovery/dns.py +857 -0
- surfacemap-2.0.0/src/surfacemap/discovery/engine.py +668 -0
- surfacemap-2.0.0/src/surfacemap/discovery/enrichment.py +507 -0
- surfacemap-2.0.0/src/surfacemap/discovery/http.py +381 -0
- surfacemap-2.0.0/src/surfacemap/discovery/osint.py +937 -0
- surfacemap-2.0.0/src/surfacemap/discovery/web.py +1036 -0
- surfacemap-2.0.0/src/surfacemap/notifications/__init__.py +0 -0
- surfacemap-2.0.0/src/surfacemap/notifications/slack.py +233 -0
- surfacemap-2.0.0/src/surfacemap/storage/__init__.py +0 -0
- surfacemap-2.0.0/src/surfacemap/storage/db.py +290 -0
- surfacemap-2.0.0/test-output/39bfd4d8e277.json +17411 -0
- surfacemap-2.0.0/tests/__init__.py +0 -0
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
*.egg-info/
|
|
7
|
+
*.egg
|
|
8
|
+
dist/
|
|
9
|
+
build/
|
|
10
|
+
.eggs/
|
|
11
|
+
|
|
12
|
+
# Virtual environments
|
|
13
|
+
.venv/
|
|
14
|
+
venv/
|
|
15
|
+
env/
|
|
16
|
+
ENV/
|
|
17
|
+
|
|
18
|
+
# Environment variables
|
|
19
|
+
.env
|
|
20
|
+
.env.*
|
|
21
|
+
|
|
22
|
+
# Database
|
|
23
|
+
*.db
|
|
24
|
+
*.sqlite
|
|
25
|
+
*.sqlite3
|
|
26
|
+
|
|
27
|
+
# Output
|
|
28
|
+
output/
|
|
29
|
+
|
|
30
|
+
# IDE
|
|
31
|
+
.idea/
|
|
32
|
+
.vscode/
|
|
33
|
+
*.swp
|
|
34
|
+
*.swo
|
|
35
|
+
*~
|
|
36
|
+
|
|
37
|
+
# OS
|
|
38
|
+
.DS_Store
|
|
39
|
+
Thumbs.db
|
|
40
|
+
|
|
41
|
+
# Testing
|
|
42
|
+
.pytest_cache/
|
|
43
|
+
.coverage
|
|
44
|
+
htmlcov/
|
|
45
|
+
.mypy_cache/
|
|
46
|
+
.ruff_cache/
|
|
47
|
+
|
|
48
|
+
# Jupyter
|
|
49
|
+
.ipynb_checkpoints/
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
4
|
+
|
|
5
|
+
## Project Overview
|
|
6
|
+
|
|
7
|
+
SurfaceMap is an LLM-driven attack surface discovery tool. Given a company name or domain, it runs 30+ discovery modules concurrently to enumerate subdomains, IPs, ports, cloud buckets, certificates, and more. Results are deduplicated, risk-scored, and output as terminal trees, JSON, CSV, HTML mindmaps, or Mermaid diagrams.
|
|
8
|
+
|
|
9
|
+
## Build & Run Commands
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
# Install (editable, all extras)
|
|
13
|
+
pip install -e ".[all]"
|
|
14
|
+
|
|
15
|
+
# Install core only (no API/LLM/Slack)
|
|
16
|
+
pip install -e .
|
|
17
|
+
|
|
18
|
+
# Run CLI
|
|
19
|
+
surfacemap discover "Target Corp" --domain target.com --tree --json
|
|
20
|
+
|
|
21
|
+
# Run API server
|
|
22
|
+
uvicorn surfacemap.api.server:app --host 0.0.0.0 --port 8000
|
|
23
|
+
|
|
24
|
+
# Run config viewer
|
|
25
|
+
surfacemap config
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
There is no test suite, linter, or formatter configured yet. The `tests/` directory exists but is empty.
|
|
29
|
+
|
|
30
|
+
## Architecture
|
|
31
|
+
|
|
32
|
+
### Execution Flow
|
|
33
|
+
|
|
34
|
+
CLI (`cli/main.py`) or API (`api/server.py`) → `DiscoveryEngine` → 4-phase pipeline:
|
|
35
|
+
1. **Phase 0**: LLM Brainstorm (optional) — identifies subsidiaries, infrastructure hints
|
|
36
|
+
2. **Phase 1**: Passive Recon — 15 modules run concurrently via `asyncio.gather()`
|
|
37
|
+
3. **Phase 2**: Active Probing — 14 modules (CORS checks, sensitive paths, JS analysis)
|
|
38
|
+
4. **Phase 3**: LLM Analysis — risk scoring, attack path generation
|
|
39
|
+
|
|
40
|
+
### Module System
|
|
41
|
+
|
|
42
|
+
All discovery modules extend `DiscoveryModule` ABC in `discovery/base.py`:
|
|
43
|
+
- Must implement `name`, `description` properties and `discover(target, result)` async method
|
|
44
|
+
- `safe_discover()` wraps execution with timeout (120s default) and error handling
|
|
45
|
+
- Module failures are isolated — they don't crash the pipeline
|
|
46
|
+
|
|
47
|
+
Modules are organized by category across files in `discovery/`:
|
|
48
|
+
- `dns.py` — DNS records, subdomains, zone transfers, cloud detection, subdomain takeover
|
|
49
|
+
- `http.py` — HTTP probing, port scanning (nmap)
|
|
50
|
+
- `web.py` — Wayback, cert transparency, URL scanning, web tech detection
|
|
51
|
+
- `osint.py` — WHOIS, ASN, reverse DNS, SSL analysis, email security
|
|
52
|
+
- `active.py` — Sensitive paths, JS analysis, CORS, cookie security
|
|
53
|
+
- `enrichment.py` — VirusTotal, Shodan, GitHub dorks, email harvesting
|
|
54
|
+
|
|
55
|
+
### Data Model
|
|
56
|
+
|
|
57
|
+
`core/models.py` defines the asset-centric model:
|
|
58
|
+
- **Asset**: type (16 `AssetType` enums), value, status, severity, metadata dict
|
|
59
|
+
- **ScanResult**: container with fingerprint-based deduplication (SHA256 of type:value)
|
|
60
|
+
- Assets are added via `ScanResult.add_asset()` which handles dedup automatically
|
|
61
|
+
|
|
62
|
+
### LLM Integration
|
|
63
|
+
|
|
64
|
+
`core/llm.py` — `LLMBrain` class with provider fallback chain: Gemini → Anthropic → OpenAI. Used for brainstorming targets, risk scoring, and false-positive filtering. Entirely optional — tool works without any LLM key.
|
|
65
|
+
|
|
66
|
+
### Configuration
|
|
67
|
+
|
|
68
|
+
`core/config.py` — `SurfaceMapConfig` dataclass, singleton via `get_config()`. All settings come from environment variables (auto-loads `.env` file). Key prefixes: `SURFACEMAP_*` for tool settings, plus `GEMINI_API_KEY`, `VIRUSTOTAL_API_KEY`, `SHODAN_API_KEY`, `GITHUB_TOKEN`, `HUNTER_API_KEY` for external services.
|
|
69
|
+
|
|
70
|
+
### Storage & Output
|
|
71
|
+
|
|
72
|
+
- `storage/db.py` — async SQLite via aiosqlite, stores scans and assets
|
|
73
|
+
- `output/mindmap.py` — generates standalone HTML with D3.js force-directed graph
|
|
74
|
+
- `output/formatters.py` — JSON, CSV, Rich tree, Mermaid export
|
|
75
|
+
|
|
76
|
+
## Key Conventions
|
|
77
|
+
|
|
78
|
+
- **Async-first**: All discovery, HTTP, DNS, and DB operations are async. Use `asyncio` patterns.
|
|
79
|
+
- **Fault tolerance**: Every module runs inside `safe_discover()` with per-module timeouts. Never let one module failure affect others.
|
|
80
|
+
- **No secrets in code**: All API keys via env vars. The `.env` file is gitignored.
|
|
81
|
+
- **External tools are optional**: `dig`, `nmap`, `subfinder` enhance results but the tool must work without them (check availability before calling).
|
|
82
|
+
- Python 3.11+ required. Build system is Hatch/Hatchling.
|
surfacemap-2.0.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Yash Korat
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: surfacemap
|
|
3
|
+
Version: 2.0.0
|
|
4
|
+
Summary: LLM-driven attack surface discovery — find every asset from just a company name
|
|
5
|
+
Author: Yash Korat
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Keywords: attack-surface,discovery,osint,recon,security
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Information Technology
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Topic :: Security
|
|
16
|
+
Requires-Python: >=3.11
|
|
17
|
+
Requires-Dist: aiosqlite>=0.20.0
|
|
18
|
+
Requires-Dist: httpx>=0.27.0
|
|
19
|
+
Requires-Dist: rich>=13.7.0
|
|
20
|
+
Requires-Dist: typer[all]>=0.9.0
|
|
21
|
+
Provides-Extra: all
|
|
22
|
+
Requires-Dist: anthropic>=0.40.0; extra == 'all'
|
|
23
|
+
Requires-Dist: fastapi>=0.115.0; extra == 'all'
|
|
24
|
+
Requires-Dist: google-genai>=1.0.0; extra == 'all'
|
|
25
|
+
Requires-Dist: slack-sdk>=3.30.0; extra == 'all'
|
|
26
|
+
Requires-Dist: uvicorn>=0.30.0; extra == 'all'
|
|
27
|
+
Provides-Extra: api
|
|
28
|
+
Requires-Dist: fastapi>=0.115.0; extra == 'api'
|
|
29
|
+
Requires-Dist: uvicorn>=0.30.0; extra == 'api'
|
|
30
|
+
Provides-Extra: llm
|
|
31
|
+
Requires-Dist: anthropic>=0.40.0; extra == 'llm'
|
|
32
|
+
Requires-Dist: google-genai>=1.0.0; extra == 'llm'
|
|
33
|
+
Provides-Extra: notifications
|
|
34
|
+
Requires-Dist: slack-sdk>=3.30.0; extra == 'notifications'
|
|
35
|
+
Description-Content-Type: text/markdown
|
|
36
|
+
|
|
37
|
+
# SurfaceMap
|
|
38
|
+
|
|
39
|
+
**LLM-driven attack surface discovery. Find every external asset from just a company name.**
|
|
40
|
+
|
|
41
|
+
SurfaceMap combines passive OSINT techniques, DNS enumeration, HTTP probing, port scanning, cloud bucket enumeration, and LLM intelligence to build a complete map of an organization's attack surface.
|
|
42
|
+
|
|
43
|
+
---
|
|
44
|
+
|
|
45
|
+
## Quick Start
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
# Install
|
|
49
|
+
pip install -e ".[all]"
|
|
50
|
+
|
|
51
|
+
# Set your LLM API key
|
|
52
|
+
export GEMINI_API_KEY="your-key-here"
|
|
53
|
+
|
|
54
|
+
# Discover everything about a company
|
|
55
|
+
surfacemap discover "Acme Corp" --domain acme.com --tree --json
|
|
56
|
+
|
|
57
|
+
# Or just scan a domain
|
|
58
|
+
surfacemap discover example.com --mindmap
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Installation
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
# Core (CLI + discovery)
|
|
65
|
+
pip install -e .
|
|
66
|
+
|
|
67
|
+
# With API server
|
|
68
|
+
pip install -e ".[api]"
|
|
69
|
+
|
|
70
|
+
# With LLM intelligence
|
|
71
|
+
pip install -e ".[llm]"
|
|
72
|
+
|
|
73
|
+
# With Slack notifications
|
|
74
|
+
pip install -e ".[notifications]"
|
|
75
|
+
|
|
76
|
+
# Everything
|
|
77
|
+
pip install -e ".[all]"
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### External Tools (Optional)
|
|
81
|
+
|
|
82
|
+
SurfaceMap works without these, but they enhance discovery:
|
|
83
|
+
|
|
84
|
+
| Tool | Purpose | Install |
|
|
85
|
+
|------|---------|---------|
|
|
86
|
+
| `dig` | DNS record enumeration | Included with most OS |
|
|
87
|
+
| `nmap` | Port scanning | `brew install nmap` / `apt install nmap` |
|
|
88
|
+
| `subfinder` | Passive subdomain enum | `go install github.com/projectdiscovery/subfinder/v2/cmd/subfinder@latest` |
|
|
89
|
+
|
|
90
|
+
## CLI Usage
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
# Full discovery with tree output
|
|
94
|
+
surfacemap discover "Google" --domain google.com --tree
|
|
95
|
+
|
|
96
|
+
# Export to JSON and CSV
|
|
97
|
+
surfacemap discover example.com --json --csv --output ./results
|
|
98
|
+
|
|
99
|
+
# Generate interactive HTML mindmap
|
|
100
|
+
surfacemap discover "Acme Corp" -d acme.com --mindmap
|
|
101
|
+
|
|
102
|
+
# Check version
|
|
103
|
+
surfacemap version
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
### Options
|
|
107
|
+
|
|
108
|
+
| Flag | Short | Description |
|
|
109
|
+
|------|-------|-------------|
|
|
110
|
+
| `--domain` | `-d` | Primary domain (if target is a company name) |
|
|
111
|
+
| `--output` | `-o` | Output directory for results |
|
|
112
|
+
| `--tree` | `-t` | Display results as a rich tree in terminal |
|
|
113
|
+
| `--mindmap` | `-m` | Generate interactive D3.js HTML mindmap |
|
|
114
|
+
| `--json` | `-j` | Export results to JSON |
|
|
115
|
+
| `--csv` | | Export results to CSV |
|
|
116
|
+
|
|
117
|
+
## API Server
|
|
118
|
+
|
|
119
|
+
```bash
|
|
120
|
+
# Start the API server
|
|
121
|
+
uvicorn surfacemap.api.server:app --host 0.0.0.0 --port 8000
|
|
122
|
+
|
|
123
|
+
# Start a scan
|
|
124
|
+
curl -X POST "http://localhost:8000/discover?target=example.com"
|
|
125
|
+
|
|
126
|
+
# Get scan results
|
|
127
|
+
curl "http://localhost:8000/scans/{scan_id}"
|
|
128
|
+
|
|
129
|
+
# Health check
|
|
130
|
+
curl "http://localhost:8000/health"
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
### API Endpoints
|
|
134
|
+
|
|
135
|
+
| Method | Endpoint | Description |
|
|
136
|
+
|--------|----------|-------------|
|
|
137
|
+
| `POST` | `/discover` | Start a new discovery scan |
|
|
138
|
+
| `GET` | `/scans/{id}` | Get scan results by ID |
|
|
139
|
+
| `GET` | `/scans` | List recent scans |
|
|
140
|
+
| `GET` | `/health` | Health check |
|
|
141
|
+
|
|
142
|
+
## Discovery Modules
|
|
143
|
+
|
|
144
|
+
SurfaceMap runs discovery in 6 phases:
|
|
145
|
+
|
|
146
|
+
| # | Phase | Module | Description |
|
|
147
|
+
|---|-------|--------|-------------|
|
|
148
|
+
| 1 | Company Intel | LLM Brain | Discover domains, subsidiaries, and related entities via LLM |
|
|
149
|
+
| 1 | Company Intel | Subsidiary Discovery | Identify acquisitions, brands, and child companies |
|
|
150
|
+
| 2 | DNS | DNS Records | Enumerate A, AAAA, MX, NS, TXT, CNAME, SOA records |
|
|
151
|
+
| 2 | DNS | Subdomain - subfinder | Passive subdomain enumeration via subfinder |
|
|
152
|
+
| 2 | DNS | Subdomain - crt.sh | Certificate transparency log mining |
|
|
153
|
+
| 2 | DNS | Subdomain - Brute Force | DNS brute force with 100+ common prefixes |
|
|
154
|
+
| 2 | DNS | Subdomain - LLM | AI-suggested subdomain candidates |
|
|
155
|
+
| 3 | HTTP | HTTP Probe | Probe all hosts for HTTP/HTTPS services |
|
|
156
|
+
| 3 | HTTP | Technology Detection | Identify web servers, frameworks, CMS from headers |
|
|
157
|
+
| 3 | HTTP | Security Headers | Check for missing HSTS, CSP, X-Frame-Options, etc. |
|
|
158
|
+
| 3 | HTTP | CDN Detection | Identify Cloudflare, CloudFront, Fastly, Akamai, etc. |
|
|
159
|
+
| 3 | HTTP | WAF Detection | Detect web application firewalls |
|
|
160
|
+
| 4 | Ports | Port Scan | nmap service version detection on discovered IPs |
|
|
161
|
+
| 5 | Cloud | S3 Bucket Enum | Check for public/existing AWS S3 buckets |
|
|
162
|
+
| 5 | Cloud | Azure Blob Enum | Check for Azure Blob Storage containers |
|
|
163
|
+
| 5 | Cloud | GCS Bucket Enum | Check for Google Cloud Storage buckets |
|
|
164
|
+
| 5 | Takeover | Subdomain Takeover | Detect dangling CNAMEs across 17 providers |
|
|
165
|
+
| 6 | Dorks | Google Dorks | LLM-generated targeted search queries |
|
|
166
|
+
|
|
167
|
+
### Asset Types
|
|
168
|
+
|
|
169
|
+
| Type | Description |
|
|
170
|
+
|------|-------------|
|
|
171
|
+
| `domain` | Root domains |
|
|
172
|
+
| `subdomain` | Discovered subdomains |
|
|
173
|
+
| `ip` | IP addresses |
|
|
174
|
+
| `port` | Open ports |
|
|
175
|
+
| `service` | Running services with version info |
|
|
176
|
+
| `cloud_bucket` | S3, Azure Blob, GCS buckets |
|
|
177
|
+
| `email_server` | MX record mail servers |
|
|
178
|
+
| `nameserver` | NS record nameservers |
|
|
179
|
+
| `cdn` | Content delivery networks |
|
|
180
|
+
| `waf` | Web application firewalls |
|
|
181
|
+
| `certificate` | TLS/SSL certificates |
|
|
182
|
+
| `github_repo` | GitHub repositories |
|
|
183
|
+
| `social_media` | Social media profiles |
|
|
184
|
+
| `url` | Discovered URLs |
|
|
185
|
+
| `technology` | Detected technologies |
|
|
186
|
+
| `subsidiary` | Subsidiaries and acquisitions |
|
|
187
|
+
|
|
188
|
+
## Configuration
|
|
189
|
+
|
|
190
|
+
All settings are configured via environment variables:
|
|
191
|
+
|
|
192
|
+
| Variable | Default | Description |
|
|
193
|
+
|----------|---------|-------------|
|
|
194
|
+
| `GEMINI_API_KEY` | | Google Gemini API key |
|
|
195
|
+
| `ANTHROPIC_API_KEY` | | Anthropic Claude API key |
|
|
196
|
+
| `SURFACEMAP_LLM_PROVIDER` | `gemini` | LLM provider (`gemini` or `anthropic`) |
|
|
197
|
+
| `SURFACEMAP_LLM_MODEL` | `gemini-2.5-flash` | LLM model name |
|
|
198
|
+
| `SURFACEMAP_HTTP_TIMEOUT` | `15` | HTTP probe timeout (seconds) |
|
|
199
|
+
| `SURFACEMAP_DNS_TIMEOUT` | `10` | DNS lookup timeout (seconds) |
|
|
200
|
+
| `SURFACEMAP_SCAN_TIMEOUT` | `300` | nmap scan timeout (seconds) |
|
|
201
|
+
| `SURFACEMAP_OUTPUT_DIR` | `./output` | Default output directory |
|
|
202
|
+
| `SURFACEMAP_DB_PATH` | `./surfacemap.db` | SQLite database path |
|
|
203
|
+
| `SURFACEMAP_SLACK_WEBHOOK` | | Slack webhook URL for notifications |
|
|
204
|
+
| `SURFACEMAP_SLACK_TOKEN` | | Slack Bot Token for notifications |
|
|
205
|
+
| `SURFACEMAP_SLACK_CHANNEL` | `#security` | Slack channel for notifications |
|
|
206
|
+
| `SURFACEMAP_MAX_SUBDOMAINS` | `500` | Maximum subdomains to enumerate |
|
|
207
|
+
| `SURFACEMAP_MAX_PROBES` | `20` | Concurrent HTTP probes |
|
|
208
|
+
| `SURFACEMAP_MAX_DNS` | `50` | Concurrent DNS lookups |
|
|
209
|
+
| `SURFACEMAP_NMAP_ARGS` | `-sV -T4 --top-ports 100` | nmap arguments |
|
|
210
|
+
|
|
211
|
+
## Output Formats
|
|
212
|
+
|
|
213
|
+
- **Terminal Tree** — Rich tree display with color-coded statuses
|
|
214
|
+
- **JSON** — Full scan data with metadata
|
|
215
|
+
- **CSV** — Flat export for spreadsheet analysis
|
|
216
|
+
- **HTML Mindmap** — Interactive D3.js force-directed graph with dark theme, zoom, drag, and tooltips
|
|
217
|
+
- **Mermaid** — Mermaid.js mindmap diagram for embedding in docs
|
|
218
|
+
|
|
219
|
+
## Architecture
|
|
220
|
+
|
|
221
|
+
```
|
|
222
|
+
surfacemap/
|
|
223
|
+
core/
|
|
224
|
+
config.py — Environment-based configuration
|
|
225
|
+
models.py — Asset, ScanResult, enums
|
|
226
|
+
llm.py — LLM integration (Gemini/Claude)
|
|
227
|
+
discovery/
|
|
228
|
+
base.py — DiscoveryModule ABC
|
|
229
|
+
dns.py — DNS, subdomain, takeover, cloud modules
|
|
230
|
+
http.py — HTTP probe, port scan modules
|
|
231
|
+
engine.py — 6-phase orchestration engine
|
|
232
|
+
cli/
|
|
233
|
+
main.py — Typer CLI application
|
|
234
|
+
output/
|
|
235
|
+
mindmap.py — D3.js HTML and Mermaid export
|
|
236
|
+
api/
|
|
237
|
+
server.py — FastAPI REST API
|
|
238
|
+
notifications/
|
|
239
|
+
slack.py — Slack Block Kit notifications
|
|
240
|
+
storage/
|
|
241
|
+
db.py — SQLite persistence with aiosqlite
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
## License
|
|
245
|
+
|
|
246
|
+
MIT License. Copyright (c) 2026 Yash Korat.
|
|
247
|
+
|
|
248
|
+
---
|
|
249
|
+
|
|
250
|
+
Built by [BreachLine Labs](https://breachline.io)
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
# SurfaceMap
|
|
2
|
+
|
|
3
|
+
**LLM-driven attack surface discovery. Find every external asset from just a company name.**
|
|
4
|
+
|
|
5
|
+
SurfaceMap combines passive OSINT techniques, DNS enumeration, HTTP probing, port scanning, cloud bucket enumeration, and LLM intelligence to build a complete map of an organization's attack surface.
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Quick Start
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
# Install
|
|
13
|
+
pip install -e ".[all]"
|
|
14
|
+
|
|
15
|
+
# Set your LLM API key
|
|
16
|
+
export GEMINI_API_KEY="your-key-here"
|
|
17
|
+
|
|
18
|
+
# Discover everything about a company
|
|
19
|
+
surfacemap discover "Acme Corp" --domain acme.com --tree --json
|
|
20
|
+
|
|
21
|
+
# Or just scan a domain
|
|
22
|
+
surfacemap discover example.com --mindmap
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Installation
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
# Core (CLI + discovery)
|
|
29
|
+
pip install -e .
|
|
30
|
+
|
|
31
|
+
# With API server
|
|
32
|
+
pip install -e ".[api]"
|
|
33
|
+
|
|
34
|
+
# With LLM intelligence
|
|
35
|
+
pip install -e ".[llm]"
|
|
36
|
+
|
|
37
|
+
# With Slack notifications
|
|
38
|
+
pip install -e ".[notifications]"
|
|
39
|
+
|
|
40
|
+
# Everything
|
|
41
|
+
pip install -e ".[all]"
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### External Tools (Optional)
|
|
45
|
+
|
|
46
|
+
SurfaceMap works without these, but they enhance discovery:
|
|
47
|
+
|
|
48
|
+
| Tool | Purpose | Install |
|
|
49
|
+
|------|---------|---------|
|
|
50
|
+
| `dig` | DNS record enumeration | Included with most OS |
|
|
51
|
+
| `nmap` | Port scanning | `brew install nmap` / `apt install nmap` |
|
|
52
|
+
| `subfinder` | Passive subdomain enum | `go install github.com/projectdiscovery/subfinder/v2/cmd/subfinder@latest` |
|
|
53
|
+
|
|
54
|
+
## CLI Usage
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
# Full discovery with tree output
|
|
58
|
+
surfacemap discover "Google" --domain google.com --tree
|
|
59
|
+
|
|
60
|
+
# Export to JSON and CSV
|
|
61
|
+
surfacemap discover example.com --json --csv --output ./results
|
|
62
|
+
|
|
63
|
+
# Generate interactive HTML mindmap
|
|
64
|
+
surfacemap discover "Acme Corp" -d acme.com --mindmap
|
|
65
|
+
|
|
66
|
+
# Check version
|
|
67
|
+
surfacemap version
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### Options
|
|
71
|
+
|
|
72
|
+
| Flag | Short | Description |
|
|
73
|
+
|------|-------|-------------|
|
|
74
|
+
| `--domain` | `-d` | Primary domain (if target is a company name) |
|
|
75
|
+
| `--output` | `-o` | Output directory for results |
|
|
76
|
+
| `--tree` | `-t` | Display results as a rich tree in terminal |
|
|
77
|
+
| `--mindmap` | `-m` | Generate interactive D3.js HTML mindmap |
|
|
78
|
+
| `--json` | `-j` | Export results to JSON |
|
|
79
|
+
| `--csv` | | Export results to CSV |
|
|
80
|
+
|
|
81
|
+
## API Server
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
# Start the API server
|
|
85
|
+
uvicorn surfacemap.api.server:app --host 0.0.0.0 --port 8000
|
|
86
|
+
|
|
87
|
+
# Start a scan
|
|
88
|
+
curl -X POST "http://localhost:8000/discover?target=example.com"
|
|
89
|
+
|
|
90
|
+
# Get scan results
|
|
91
|
+
curl "http://localhost:8000/scans/{scan_id}"
|
|
92
|
+
|
|
93
|
+
# Health check
|
|
94
|
+
curl "http://localhost:8000/health"
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
### API Endpoints
|
|
98
|
+
|
|
99
|
+
| Method | Endpoint | Description |
|
|
100
|
+
|--------|----------|-------------|
|
|
101
|
+
| `POST` | `/discover` | Start a new discovery scan |
|
|
102
|
+
| `GET` | `/scans/{id}` | Get scan results by ID |
|
|
103
|
+
| `GET` | `/scans` | List recent scans |
|
|
104
|
+
| `GET` | `/health` | Health check |
|
|
105
|
+
|
|
106
|
+
## Discovery Modules
|
|
107
|
+
|
|
108
|
+
SurfaceMap runs discovery in 6 phases:
|
|
109
|
+
|
|
110
|
+
| # | Phase | Module | Description |
|
|
111
|
+
|---|-------|--------|-------------|
|
|
112
|
+
| 1 | Company Intel | LLM Brain | Discover domains, subsidiaries, and related entities via LLM |
|
|
113
|
+
| 1 | Company Intel | Subsidiary Discovery | Identify acquisitions, brands, and child companies |
|
|
114
|
+
| 2 | DNS | DNS Records | Enumerate A, AAAA, MX, NS, TXT, CNAME, SOA records |
|
|
115
|
+
| 2 | DNS | Subdomain - subfinder | Passive subdomain enumeration via subfinder |
|
|
116
|
+
| 2 | DNS | Subdomain - crt.sh | Certificate transparency log mining |
|
|
117
|
+
| 2 | DNS | Subdomain - Brute Force | DNS brute force with 100+ common prefixes |
|
|
118
|
+
| 2 | DNS | Subdomain - LLM | AI-suggested subdomain candidates |
|
|
119
|
+
| 3 | HTTP | HTTP Probe | Probe all hosts for HTTP/HTTPS services |
|
|
120
|
+
| 3 | HTTP | Technology Detection | Identify web servers, frameworks, CMS from headers |
|
|
121
|
+
| 3 | HTTP | Security Headers | Check for missing HSTS, CSP, X-Frame-Options, etc. |
|
|
122
|
+
| 3 | HTTP | CDN Detection | Identify Cloudflare, CloudFront, Fastly, Akamai, etc. |
|
|
123
|
+
| 3 | HTTP | WAF Detection | Detect web application firewalls |
|
|
124
|
+
| 4 | Ports | Port Scan | nmap service version detection on discovered IPs |
|
|
125
|
+
| 5 | Cloud | S3 Bucket Enum | Check for public/existing AWS S3 buckets |
|
|
126
|
+
| 5 | Cloud | Azure Blob Enum | Check for Azure Blob Storage containers |
|
|
127
|
+
| 5 | Cloud | GCS Bucket Enum | Check for Google Cloud Storage buckets |
|
|
128
|
+
| 5 | Takeover | Subdomain Takeover | Detect dangling CNAMEs across 17 providers |
|
|
129
|
+
| 6 | Dorks | Google Dorks | LLM-generated targeted search queries |
|
|
130
|
+
|
|
131
|
+
### Asset Types
|
|
132
|
+
|
|
133
|
+
| Type | Description |
|
|
134
|
+
|------|-------------|
|
|
135
|
+
| `domain` | Root domains |
|
|
136
|
+
| `subdomain` | Discovered subdomains |
|
|
137
|
+
| `ip` | IP addresses |
|
|
138
|
+
| `port` | Open ports |
|
|
139
|
+
| `service` | Running services with version info |
|
|
140
|
+
| `cloud_bucket` | S3, Azure Blob, GCS buckets |
|
|
141
|
+
| `email_server` | MX record mail servers |
|
|
142
|
+
| `nameserver` | NS record nameservers |
|
|
143
|
+
| `cdn` | Content delivery networks |
|
|
144
|
+
| `waf` | Web application firewalls |
|
|
145
|
+
| `certificate` | TLS/SSL certificates |
|
|
146
|
+
| `github_repo` | GitHub repositories |
|
|
147
|
+
| `social_media` | Social media profiles |
|
|
148
|
+
| `url` | Discovered URLs |
|
|
149
|
+
| `technology` | Detected technologies |
|
|
150
|
+
| `subsidiary` | Subsidiaries and acquisitions |
|
|
151
|
+
|
|
152
|
+
## Configuration
|
|
153
|
+
|
|
154
|
+
All settings are configured via environment variables:
|
|
155
|
+
|
|
156
|
+
| Variable | Default | Description |
|
|
157
|
+
|----------|---------|-------------|
|
|
158
|
+
| `GEMINI_API_KEY` | | Google Gemini API key |
|
|
159
|
+
| `ANTHROPIC_API_KEY` | | Anthropic Claude API key |
|
|
160
|
+
| `SURFACEMAP_LLM_PROVIDER` | `gemini` | LLM provider (`gemini` or `anthropic`) |
|
|
161
|
+
| `SURFACEMAP_LLM_MODEL` | `gemini-2.5-flash` | LLM model name |
|
|
162
|
+
| `SURFACEMAP_HTTP_TIMEOUT` | `15` | HTTP probe timeout (seconds) |
|
|
163
|
+
| `SURFACEMAP_DNS_TIMEOUT` | `10` | DNS lookup timeout (seconds) |
|
|
164
|
+
| `SURFACEMAP_SCAN_TIMEOUT` | `300` | nmap scan timeout (seconds) |
|
|
165
|
+
| `SURFACEMAP_OUTPUT_DIR` | `./output` | Default output directory |
|
|
166
|
+
| `SURFACEMAP_DB_PATH` | `./surfacemap.db` | SQLite database path |
|
|
167
|
+
| `SURFACEMAP_SLACK_WEBHOOK` | | Slack webhook URL for notifications |
|
|
168
|
+
| `SURFACEMAP_SLACK_TOKEN` | | Slack Bot Token for notifications |
|
|
169
|
+
| `SURFACEMAP_SLACK_CHANNEL` | `#security` | Slack channel for notifications |
|
|
170
|
+
| `SURFACEMAP_MAX_SUBDOMAINS` | `500` | Maximum subdomains to enumerate |
|
|
171
|
+
| `SURFACEMAP_MAX_PROBES` | `20` | Concurrent HTTP probes |
|
|
172
|
+
| `SURFACEMAP_MAX_DNS` | `50` | Concurrent DNS lookups |
|
|
173
|
+
| `SURFACEMAP_NMAP_ARGS` | `-sV -T4 --top-ports 100` | nmap arguments |
|
|
174
|
+
|
|
175
|
+
## Output Formats
|
|
176
|
+
|
|
177
|
+
- **Terminal Tree** — Rich tree display with color-coded statuses
|
|
178
|
+
- **JSON** — Full scan data with metadata
|
|
179
|
+
- **CSV** — Flat export for spreadsheet analysis
|
|
180
|
+
- **HTML Mindmap** — Interactive D3.js force-directed graph with dark theme, zoom, drag, and tooltips
|
|
181
|
+
- **Mermaid** — Mermaid.js mindmap diagram for embedding in docs
|
|
182
|
+
|
|
183
|
+
## Architecture
|
|
184
|
+
|
|
185
|
+
```
|
|
186
|
+
surfacemap/
|
|
187
|
+
core/
|
|
188
|
+
config.py — Environment-based configuration
|
|
189
|
+
models.py — Asset, ScanResult, enums
|
|
190
|
+
llm.py — LLM integration (Gemini/Claude)
|
|
191
|
+
discovery/
|
|
192
|
+
base.py — DiscoveryModule ABC
|
|
193
|
+
dns.py — DNS, subdomain, takeover, cloud modules
|
|
194
|
+
http.py — HTTP probe, port scan modules
|
|
195
|
+
engine.py — 6-phase orchestration engine
|
|
196
|
+
cli/
|
|
197
|
+
main.py — Typer CLI application
|
|
198
|
+
output/
|
|
199
|
+
mindmap.py — D3.js HTML and Mermaid export
|
|
200
|
+
api/
|
|
201
|
+
server.py — FastAPI REST API
|
|
202
|
+
notifications/
|
|
203
|
+
slack.py — Slack Block Kit notifications
|
|
204
|
+
storage/
|
|
205
|
+
db.py — SQLite persistence with aiosqlite
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
## License
|
|
209
|
+
|
|
210
|
+
MIT License. Copyright (c) 2026 Yash Korat.
|
|
211
|
+
|
|
212
|
+
---
|
|
213
|
+
|
|
214
|
+
Built by [BreachLine Labs](https://breachline.io)
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "surfacemap"
|
|
7
|
+
version = "2.0.0"
|
|
8
|
+
description = "LLM-driven attack surface discovery — find every asset from just a company name"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
requires-python = ">=3.11"
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Yash Korat" },
|
|
14
|
+
]
|
|
15
|
+
keywords = ["security", "recon", "attack-surface", "osint", "discovery"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 4 - Beta",
|
|
18
|
+
"Intended Audience :: Information Technology",
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.11",
|
|
22
|
+
"Programming Language :: Python :: 3.12",
|
|
23
|
+
"Topic :: Security",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
dependencies = [
|
|
27
|
+
"typer[all]>=0.9.0",
|
|
28
|
+
"httpx>=0.27.0",
|
|
29
|
+
"rich>=13.7.0",
|
|
30
|
+
"aiosqlite>=0.20.0",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
[project.optional-dependencies]
|
|
34
|
+
api = [
|
|
35
|
+
"fastapi>=0.115.0",
|
|
36
|
+
"uvicorn>=0.30.0",
|
|
37
|
+
]
|
|
38
|
+
llm = [
|
|
39
|
+
"google-genai>=1.0.0",
|
|
40
|
+
"anthropic>=0.40.0",
|
|
41
|
+
]
|
|
42
|
+
notifications = [
|
|
43
|
+
"slack-sdk>=3.30.0",
|
|
44
|
+
]
|
|
45
|
+
all = [
|
|
46
|
+
"surfacemap[api,llm,notifications]",
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
[project.scripts]
|
|
50
|
+
surfacemap = "surfacemap.cli.main:app"
|
|
51
|
+
|
|
52
|
+
[tool.hatch.build.targets.wheel]
|
|
53
|
+
packages = ["src/surfacemap"]
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""SurfaceMap — LLM-driven attack surface discovery.
|
|
2
|
+
|
|
3
|
+
Discover every external asset of a company from just its name.
|
|
4
|
+
Combines passive OSINT, DNS enumeration, HTTP probing, port scanning,
|
|
5
|
+
cloud storage enumeration, and LLM intelligence to build a complete
|
|
6
|
+
attack surface map.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__version__ = "2.0.0"
|
|
10
|
+
__author__ = "Yash Korat"
|
|
File without changes
|