webanalyzer-security 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
api.py ADDED
@@ -0,0 +1,301 @@
1
+ from fastapi import FastAPI, BackgroundTasks, HTTPException
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from pydantic import BaseModel
4
+ import os
5
+ import json
6
+ import asyncio
7
+
8
+ # WebAnalyzer dependencies
9
+ from utils.utils import save_results_to_json
10
+ from utils.module_wrapper import execute_modules_safely, ModuleExecutor
11
+
12
+ # Modules
13
+ from modules.domain_info import get_domain_info
14
+ from modules.domain_dns import DNSAnalyzer
15
+ from modules.subfinder_tool import run_subfinder
16
+ from modules.seo_analysis import analyze_advanced_seo
17
+ from modules.web_technologies import detect_web_technologies
18
+ from modules.security_analysis import analyze_security
19
+ from modules.contact_spy import GlobalDomainScraper
20
+ from modules.subdomain_takeover import SubdomainTakeover
21
+ from modules.advanced_content_scanner import AdvancedContentScanner
22
+ from modules.cloudflare_bypass import CloudflareBypass
23
+ from modules.nmap_zero_day import UltraAdvancedNetworkScanner
24
+ from modules.geo_analysis import analyze_geo
25
+
26
+ app = FastAPI(title="WebAnalyzer API", description="FastAPI Backend for WebAnalyzer React Panel")
27
+
28
+ # Add CORS so React frontend can fetch
29
+ app.add_middleware(
30
+ CORSMiddleware,
31
+ allow_origins=["*"],
32
+ allow_credentials=True,
33
+ allow_methods=["*"],
34
+ allow_headers=["*"],
35
+ )
36
+
37
+ # Global dictionary to track active scans
38
+ ACTIVE_SCANS = {}
39
+
40
+ class ScanRequest(BaseModel):
41
+ domain: str
42
+ modules: list[str]
43
+
44
+ # Dictionary of modules manually replicated from main.py
45
+ def get_safe_wrapper(module_func):
46
+ async def wrapper(domain):
47
+ return module_func(domain)
48
+ return wrapper
49
+
50
+ @app.get("/api/results/{domain}")
51
+ async def get_results(domain: str):
52
+ """Retrieve saved JSON results for a domain"""
53
+ result_path = os.path.join("logs", domain, "results.json")
54
+ if os.path.exists(result_path):
55
+ with open(result_path, "r", encoding="utf-8") as f:
56
+ return json.load(f)
57
+ raise HTTPException(status_code=404, detail="Results not found")
58
+
59
+ @app.get("/api/status/{domain}")
60
+ async def get_status(domain: str):
61
+ """Retrieve realtime progression status of a scan"""
62
+ if domain in ACTIVE_SCANS:
63
+ return ACTIVE_SCANS[domain]
64
+
65
+ result_path = os.path.join("logs", domain, "results.json")
66
+ if os.path.exists(result_path):
67
+ return {"total": 1, "completed": 1, "current_module": "Finished", "results": {}}
68
+
69
+ raise HTTPException(status_code=404, detail="Scan not found or not active")
70
+
71
+ @app.post("/api/scan")
72
+ async def start_scan(request: ScanRequest, background_tasks: BackgroundTasks):
73
+ """Starts an background scan and returns immediately"""
74
+
75
+ # Redefine the dictionary to pass to execute_modules_safely
76
+ module_functions = {
77
+ "Domain Information": lambda d: get_domain_info(d, os.getenv('WHOIS_API_KEY', 'default_key')),
78
+ "DNS Records": lambda d: DNSAnalyzer().get_dns_records(d),
79
+ "SEO Analysis": analyze_advanced_seo,
80
+ "Web Technologies": detect_web_technologies,
81
+ "Security Analysis": analyze_security,
82
+ "Advanced Content Scan": lambda d: AdvancedContentScanner(d).scan(),
83
+ "Contact Spy": lambda d: GlobalDomainScraper(d).run(),
84
+ "Subdomain Discovery": lambda d: run_subfinder(d, output_dir=f"logs/{d}"),
85
+ "Subdomain Takeover": lambda d: SubdomainTakeover(d).run(),
86
+ "CloudFlare Bypass": lambda d: CloudflareBypass(d).run(),
87
+ "Nmap Zero Day Scan": lambda d: UltraAdvancedNetworkScanner(d).run_scan(),
88
+ "GEO Analysis": analyze_geo
89
+ }
90
+
91
+ # Initialize the memory state for tracking
92
+ ACTIVE_SCANS[request.domain] = {
93
+ "total": len(request.modules),
94
+ "completed": 0,
95
+ "current_module": "Initializing...",
96
+ "results": {}
97
+ }
98
+
99
+ # We will submit a background task to process
100
+ background_tasks.add_task(run_scan_background, request.domain, request.modules, module_functions)
101
+
102
+ return {"status": "Scan started via Background Task", "domain": request.domain, "modules": request.modules}
103
+
104
+
105
+ async def run_scan_background(domain: str, selected_modules: list[str], module_functions: dict):
106
+ """Background task function to avoid blocking the HTTP request"""
107
+ executor = ModuleExecutor()
108
+ results = {}
109
+
110
+ module_weights = {
111
+ 'Domain Information': 'light', 'DNS Records': 'light',
112
+ 'SEO Analysis': 'medium', 'Web Technologies': 'medium',
113
+ 'Security Analysis': 'heavy', 'Advanced Content Scan': 'heavy',
114
+ 'Contact Spy': 'heavy', 'Subdomain Discovery': 'heavy',
115
+ 'Subdomain Takeover': 'heavy', 'CloudFlare Bypass': 'heavy',
116
+ 'Nmap Zero Day Scan': 'heavy', 'GEO Analysis': 'light'
117
+ }
118
+
119
+ for module_name in selected_modules:
120
+ if module_name in module_functions:
121
+ ACTIVE_SCANS[domain]["current_module"] = module_name
122
+ func = module_functions[module_name]
123
+ delay_type = module_weights.get(module_name, 'medium')
124
+
125
+ # For simplicity, we just safely execute it synchronously here
126
+ try:
127
+ if asyncio.iscoroutinefunction(func):
128
+ res = await func(domain)
129
+ else:
130
+ res = func(domain)
131
+ results[module_name] = res
132
+ ACTIVE_SCANS[domain]["results"][module_name] = res
133
+ except Exception as e:
134
+ results[module_name] = {"error": str(e)}
135
+ ACTIVE_SCANS[domain]["results"][module_name] = {"error": str(e)}
136
+
137
+ ACTIVE_SCANS[domain]["completed"] += 1
138
+
139
+ ACTIVE_SCANS[domain]["current_module"] = "Writing to disk..."
140
+ # Save logic directly to logs/domain/results.json
141
+ save_results_to_json(domain, results)
142
+
143
+ ACTIVE_SCANS[domain]["current_module"] = "Finished"
144
+ executor.cleanup()
145
+
146
+
147
+ # ─── Advanced Content Scanner per-section scanning ───
148
+
149
+ class ACSSectionRequest(BaseModel):
150
+ domain: str
151
+ section: str
152
+
153
+ class ACSAllRequest(BaseModel):
154
+ domain: str
155
+
156
+ ACS_SECTION_TASKS = {}
157
+
158
+ @app.post("/api/scan/section")
159
+ async def scan_acs_section(request: ACSSectionRequest, background_tasks: BackgroundTasks):
160
+ """Run a single ACS section for a domain"""
161
+ task_key = f"{request.domain}::{request.section}"
162
+ ACS_SECTION_TASKS[task_key] = {"status": "running", "result": None, "error": None}
163
+ background_tasks.add_task(_run_acs_section, request.domain, request.section, task_key)
164
+ return {"status": "started", "section": request.section}
165
+
166
+ @app.get("/api/scan/section/status")
167
+ async def acs_section_status(domain: str, section: str):
168
+ """Check status of a single ACS section scan"""
169
+ task_key = f"{domain}::{section}"
170
+ if task_key in ACS_SECTION_TASKS:
171
+ return ACS_SECTION_TASKS[task_key]
172
+ raise HTTPException(status_code=404, detail="Section task not found")
173
+
174
+ @app.post("/api/scan/acs-all")
175
+ async def scan_acs_all(request: ACSAllRequest, background_tasks: BackgroundTasks):
176
+ """Run the full Advanced Content Scanner"""
177
+ task_key = f"{request.domain}::acs_full"
178
+ ACS_SECTION_TASKS[task_key] = {"status": "running", "result": None, "error": None}
179
+ background_tasks.add_task(_run_acs_full, request.domain, task_key)
180
+ return {"status": "started", "domain": request.domain}
181
+
182
+ @app.get("/api/scan/acs-all/status")
183
+ async def acs_all_status(domain: str):
184
+ """Check full ACS scan status"""
185
+ task_key = f"{domain}::acs_full"
186
+ if task_key in ACS_SECTION_TASKS:
187
+ return ACS_SECTION_TASKS[task_key]
188
+ raise HTTPException(status_code=404, detail="ACS scan not found")
189
+
190
+ def _run_acs_section(domain: str, section: str, task_key: str):
191
+ """Background: run a single ACS conceptual section"""
192
+ try:
193
+ scanner = AdvancedContentScanner(domain)
194
+ # Map section IDs to scanner methods
195
+ section_map = {
196
+ "overview": lambda s: {"version": s.VERSION, "domain": s.domain, "status": "ready"},
197
+ "data_classes": lambda s: {"classes": ["SecretFinding", "JSVulnFinding", "SSRFVulnFinding", "ActiveVulnFinding", "SecurityHeaderFinding", "ExposedEndpoint"], "status": "inspected"},
198
+ "pattern_registry": lambda s: {"secret_patterns": len(s.patterns.SECRETS), "js_categories": len(s.patterns.JS_SECURITY), "ssrf_params": len(s.patterns.SSRF_PARAMS), "sensitive_paths": len(s.patterns.SENSITIVE_PATHS), "status": "loaded"},
199
+ "crawl_engine": lambda s: _run_crawl(s),
200
+ "secret_scanner": lambda s: {"secrets": s.findings.get("secrets", []), "total": len(s.findings.get("secrets", [])), "status": "scanned"},
201
+ "js_analysis": lambda s: {"js_vulnerabilities": s.findings.get("js_vulnerabilities", []), "total": len(s.findings.get("js_vulnerabilities", [])), "status": "analyzed"},
202
+ "ssrf_detection": lambda s: {"ssrf_vulnerabilities": s.findings.get("ssrf_vulnerabilities", []), "total": len(s.findings.get("ssrf_vulnerabilities", [])), "status": "probed"},
203
+ "active_testing": lambda s: _run_active(s),
204
+ "headless_browser": lambda s: _run_headless(s),
205
+ "exploit_chains": lambda s: _run_chains(s),
206
+ "waf_detection": lambda s: _run_waf(s),
207
+ "utilities": lambda s: {"helpers": ["_entropy", "_mask", "_shash", "_fp_value", "_fp_context", "_sev_passes", "_is_new", "_next_id", "_risk_score"], "status": "ready"},
208
+ "main_flow": lambda s: _run_full_flow(s),
209
+ }
210
+
211
+ if section in section_map:
212
+ result = section_map[section](scanner)
213
+ ACS_SECTION_TASKS[task_key] = {"status": "completed", "result": result, "error": None}
214
+ else:
215
+ ACS_SECTION_TASKS[task_key] = {"status": "error", "result": None, "error": f"Unknown section: {section}"}
216
+ except Exception as e:
217
+ ACS_SECTION_TASKS[task_key] = {"status": "error", "result": None, "error": str(e)}
218
+
219
+
220
+ def _run_crawl(scanner):
221
+ duration = scanner.crawl_website()
222
+ return {
223
+ "urls_crawled": len(scanner.visited_urls),
224
+ "js_files": len(scanner.js_files),
225
+ "api_endpoints": len(scanner.api_endpoints),
226
+ "secrets": len(scanner.findings.get("secrets", [])),
227
+ "js_vulnerabilities": len(scanner.findings.get("js_vulnerabilities", [])),
228
+ "security_headers": len(scanner.findings.get("security_headers", [])),
229
+ "duration": duration,
230
+ "status": "crawled",
231
+ }
232
+
233
+ def _run_active(scanner):
234
+ scanner.crawl_website()
235
+ if hasattr(scanner, '_scan_sensitive_paths'):
236
+ scanner._scan_sensitive_paths()
237
+ if hasattr(scanner, '_test_cors'):
238
+ scanner._test_cors()
239
+ if hasattr(scanner, '_test_auth_bypass'):
240
+ scanner._test_auth_bypass()
241
+ return {
242
+ "active_vulnerabilities": [f.__dict__ if hasattr(f, '__dict__') else f for f in scanner.findings.get("active_vulnerabilities", [])],
243
+ "exposed_endpoints": [f.__dict__ if hasattr(f, '__dict__') else f for f in scanner.findings.get("exposed_endpoints", [])],
244
+ "total_active": len(scanner.findings.get("active_vulnerabilities", [])),
245
+ "total_exposed": len(scanner.findings.get("exposed_endpoints", [])),
246
+ "status": "tested",
247
+ }
248
+
249
+ def _run_headless(scanner):
250
+ scanner.crawl_website()
251
+ if hasattr(scanner, '_run_headless_scan'):
252
+ scanner._run_headless_scan()
253
+ return {
254
+ "dynamic_routes": list(getattr(scanner, '_dynamic_routes', set())),
255
+ "status": "scanned",
256
+ }
257
+
258
+ def _run_chains(scanner):
259
+ scanner.crawl_website()
260
+ if hasattr(scanner, '_build_exploit_chains'):
261
+ scanner._build_exploit_chains()
262
+ return {
263
+ "exploit_chains": scanner.findings.get("exploit_chains", []),
264
+ "total": len(scanner.findings.get("exploit_chains", [])),
265
+ "status": "built",
266
+ }
267
+
268
+ def _run_waf(scanner):
269
+ # Must crawl first — WAF headers are often only on internal pages, not the base URL
270
+ scanner.crawl_website()
271
+ waf = getattr(scanner, '_detected_waf', None)
272
+ blocked = getattr(scanner, '_waf_triggered_count', 0) > 0
273
+ return {
274
+ "detected_waf": waf or "Tespit edilmedi",
275
+ "is_blocked": blocked,
276
+ "rate_limit": scanner.rate_limit,
277
+ "pages_scanned": len(scanner.visited_urls),
278
+ "status": "detected",
279
+ }
280
+
281
+ def _run_full_flow(scanner):
282
+ results = scanner.run()
283
+ return results
284
+
285
+ def _run_acs_full(domain: str, task_key: str):
286
+ """Background: run the entire ACS pipeline"""
287
+ try:
288
+ scanner = AdvancedContentScanner(domain)
289
+ results = scanner.run()
290
+ ACS_SECTION_TASKS[task_key] = {"status": "completed", "result": results, "error": None}
291
+ except Exception as e:
292
+ ACS_SECTION_TASKS[task_key] = {"status": "error", "result": None, "error": str(e)}
293
+
294
+ @app.get("/")
295
+ async def root():
296
+ return {
297
+ "status": "online",
298
+ "name": "WebAnalyzer API",
299
+ "version": "3.0.0",
300
+ "docs_url": "/docs"
301
+ }
bulk/loader.py ADDED
@@ -0,0 +1,146 @@
1
+ import csv
2
+ import json
3
+ from pathlib import Path
4
+ import logging
5
+ from database.db_manager import db_manager
6
+
7
+ class BulkDomainLoader:
8
+ def __init__(self):
9
+ self.db = db_manager
10
+ self.supported_formats = ['.txt', '.csv', '.json']
11
+
12
+ def load_domains(self, file_path, job_name=None):
13
+ """Domain listesini yükle"""
14
+ file_path = Path(file_path)
15
+
16
+ if not file_path.exists():
17
+ raise FileNotFoundError(f"File not found: {file_path}")
18
+
19
+ extension = file_path.suffix.lower()
20
+
21
+ if extension not in self.supported_formats:
22
+ raise ValueError(f"Unsupported format. Use: {self.supported_formats}")
23
+
24
+ # Domain listesini oku
25
+ domains = self._read_file(file_path, extension)
26
+
27
+ # Validate ve clean
28
+ domains = self._validate_domains(domains)
29
+
30
+ if not domains:
31
+ raise ValueError("No valid domains found in file")
32
+
33
+ # Database'e kaydet
34
+ job_name = job_name or f"Bulk Scan - {file_path.name}"
35
+ job_id = self._create_job(job_name, len(domains))
36
+
37
+ # Domainleri ekle
38
+ added = self.db.add_domains_bulk(job_id, domains)
39
+
40
+ logging.info(f"Job #{job_id} created: {added}/{len(domains)} domains added")
41
+
42
+ return job_id, added
43
+
44
+ def _read_file(self, file_path, extension):
45
+ """Dosyadan domainleri oku"""
46
+ domains = []
47
+
48
+ try:
49
+ if extension == '.txt':
50
+ with open(file_path, 'r', encoding='utf-8') as f:
51
+ domains = [line.strip() for line in f if line.strip() and not line.startswith('#')]
52
+
53
+ elif extension == '.csv':
54
+ with open(file_path, 'r', encoding='utf-8') as f:
55
+ reader = csv.reader(f)
56
+ # Skip header if exists
57
+ first_row = next(reader, None)
58
+ if first_row and not self._is_domain(first_row[0]):
59
+ pass # Skip header
60
+ else:
61
+ domains.append(first_row[0].strip())
62
+
63
+ for row in reader:
64
+ if row and row[0].strip():
65
+ domains.append(row[0].strip())
66
+
67
+ elif extension == '.json':
68
+ with open(file_path, 'r', encoding='utf-8') as f:
69
+ data = json.load(f)
70
+ if isinstance(data, list):
71
+ # Handle different JSON formats
72
+ for item in data:
73
+ if isinstance(item, dict):
74
+ domain = (item.get('domain_name') or
75
+ item.get('domain') or
76
+ item.get('url', ''))
77
+ if domain:
78
+ domains.append(domain)
79
+ elif isinstance(item, str):
80
+ domains.append(item)
81
+ elif isinstance(data, dict) and 'domains' in data:
82
+ domains = data['domains']
83
+
84
+ except Exception as e:
85
+ raise ValueError(f"Error reading file: {e}")
86
+
87
+ return domains
88
+
89
+ def _validate_domains(self, domains):
90
+ """Domain validasyonu"""
91
+ clean_domains = []
92
+
93
+ for domain in domains:
94
+ # Temizle
95
+ domain = str(domain).strip().lower()
96
+
97
+ # http/https kaldır
98
+ domain = domain.replace('http://', '').replace('https://', '')
99
+
100
+ # Path kaldır
101
+ if '/' in domain:
102
+ domain = domain.split('/')[0]
103
+
104
+ # Port kaldır
105
+ if ':' in domain and not domain.count(':') > 1: # IPv6 değilse
106
+ domain = domain.split(':')[0]
107
+
108
+ # Basic validation
109
+ if self._is_domain(domain):
110
+ clean_domains.append(domain)
111
+
112
+ # Duplicate'leri kaldır
113
+ return list(set(clean_domains))
114
+
115
+ def _is_domain(self, domain):
116
+ """Domain format validation"""
117
+ if not domain or len(domain) < 4:
118
+ return False
119
+
120
+ # Basic domain pattern check
121
+ if not '.' in domain:
122
+ return False
123
+
124
+ # Check for invalid characters
125
+ invalid_chars = ['<', '>', '"', "'", '|', '\\', '^', '`', '{', '}']
126
+ if any(char in domain for char in invalid_chars):
127
+ return False
128
+
129
+ # Check domain parts
130
+ parts = domain.split('.')
131
+ if len(parts) < 2:
132
+ return False
133
+
134
+ # Check if all parts are valid
135
+ for part in parts:
136
+ if not part or part.startswith('-') or part.endswith('-'):
137
+ return False
138
+
139
+ return True
140
+
141
+ def _create_job(self, job_name, total_domains):
142
+ """Yeni job oluştur"""
143
+ try:
144
+ return self.db.create_scan_job(job_name, total_domains)
145
+ except Exception as e:
146
+ raise RuntimeError(f"Failed to create scan job: {e}")