webanalyzer-security 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- api.py +301 -0
- bulk/loader.py +146 -0
- bulk/processor.py +741 -0
- bulk_scan.py +353 -0
- check_progress.py +48 -0
- config.py +275 -0
- database/db_manager.py +551 -0
- database/schema.sql +102 -0
- domains-check.py +429 -0
- main.py +1088 -0
- modules/advanced_content_scanner.py +2547 -0
- modules/api_security_scanner.py +1741 -0
- modules/cloudflare_bypass.py +528 -0
- modules/contact_spy.py +351 -0
- modules/domain_dns.py +98 -0
- modules/domain_info.py +611 -0
- modules/geo_analysis.py +195 -0
- modules/nmap_zero_day.py +293 -0
- modules/security_analysis.py +759 -0
- modules/seo_analysis.py +562 -0
- modules/subdomain_takeover.py +717 -0
- modules/subfinder_tool.py +45 -0
- modules/universal_adapter.py +422 -0
- modules/web_technologies.py +1691 -0
- monitor.py +145 -0
- utils/__init__.py +0 -0
- utils/module_wrapper.py +261 -0
- utils/session_manager.py +190 -0
- utils/utils.py +294 -0
- webanalyzer_security-3.0.0.dist-info/METADATA +583 -0
- webanalyzer_security-3.0.0.dist-info/RECORD +35 -0
- webanalyzer_security-3.0.0.dist-info/WHEEL +5 -0
- webanalyzer_security-3.0.0.dist-info/entry_points.txt +2 -0
- webanalyzer_security-3.0.0.dist-info/licenses/LICENSE +21 -0
- webanalyzer_security-3.0.0.dist-info/top_level.txt +11 -0
api.py
ADDED
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
from fastapi import FastAPI, BackgroundTasks, HTTPException
|
|
2
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
import os
|
|
5
|
+
import json
|
|
6
|
+
import asyncio
|
|
7
|
+
|
|
8
|
+
# WebAnalyzer dependencies
|
|
9
|
+
from utils.utils import save_results_to_json
|
|
10
|
+
from utils.module_wrapper import execute_modules_safely, ModuleExecutor
|
|
11
|
+
|
|
12
|
+
# Modules
|
|
13
|
+
from modules.domain_info import get_domain_info
|
|
14
|
+
from modules.domain_dns import DNSAnalyzer
|
|
15
|
+
from modules.subfinder_tool import run_subfinder
|
|
16
|
+
from modules.seo_analysis import analyze_advanced_seo
|
|
17
|
+
from modules.web_technologies import detect_web_technologies
|
|
18
|
+
from modules.security_analysis import analyze_security
|
|
19
|
+
from modules.contact_spy import GlobalDomainScraper
|
|
20
|
+
from modules.subdomain_takeover import SubdomainTakeover
|
|
21
|
+
from modules.advanced_content_scanner import AdvancedContentScanner
|
|
22
|
+
from modules.cloudflare_bypass import CloudflareBypass
|
|
23
|
+
from modules.nmap_zero_day import UltraAdvancedNetworkScanner
|
|
24
|
+
from modules.geo_analysis import analyze_geo
|
|
25
|
+
|
|
26
|
+
app = FastAPI(title="WebAnalyzer API", description="FastAPI Backend for WebAnalyzer React Panel")
|
|
27
|
+
|
|
28
|
+
# Add CORS so React frontend can fetch
|
|
29
|
+
app.add_middleware(
|
|
30
|
+
CORSMiddleware,
|
|
31
|
+
allow_origins=["*"],
|
|
32
|
+
allow_credentials=True,
|
|
33
|
+
allow_methods=["*"],
|
|
34
|
+
allow_headers=["*"],
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
# Global dictionary to track active scans
|
|
38
|
+
ACTIVE_SCANS = {}
|
|
39
|
+
|
|
40
|
+
class ScanRequest(BaseModel):
|
|
41
|
+
domain: str
|
|
42
|
+
modules: list[str]
|
|
43
|
+
|
|
44
|
+
# Dictionary of modules manually replicated from main.py
|
|
45
|
+
def get_safe_wrapper(module_func):
|
|
46
|
+
async def wrapper(domain):
|
|
47
|
+
return module_func(domain)
|
|
48
|
+
return wrapper
|
|
49
|
+
|
|
50
|
+
@app.get("/api/results/{domain}")
|
|
51
|
+
async def get_results(domain: str):
|
|
52
|
+
"""Retrieve saved JSON results for a domain"""
|
|
53
|
+
result_path = os.path.join("logs", domain, "results.json")
|
|
54
|
+
if os.path.exists(result_path):
|
|
55
|
+
with open(result_path, "r", encoding="utf-8") as f:
|
|
56
|
+
return json.load(f)
|
|
57
|
+
raise HTTPException(status_code=404, detail="Results not found")
|
|
58
|
+
|
|
59
|
+
@app.get("/api/status/{domain}")
|
|
60
|
+
async def get_status(domain: str):
|
|
61
|
+
"""Retrieve realtime progression status of a scan"""
|
|
62
|
+
if domain in ACTIVE_SCANS:
|
|
63
|
+
return ACTIVE_SCANS[domain]
|
|
64
|
+
|
|
65
|
+
result_path = os.path.join("logs", domain, "results.json")
|
|
66
|
+
if os.path.exists(result_path):
|
|
67
|
+
return {"total": 1, "completed": 1, "current_module": "Finished", "results": {}}
|
|
68
|
+
|
|
69
|
+
raise HTTPException(status_code=404, detail="Scan not found or not active")
|
|
70
|
+
|
|
71
|
+
@app.post("/api/scan")
|
|
72
|
+
async def start_scan(request: ScanRequest, background_tasks: BackgroundTasks):
|
|
73
|
+
"""Starts an background scan and returns immediately"""
|
|
74
|
+
|
|
75
|
+
# Redefine the dictionary to pass to execute_modules_safely
|
|
76
|
+
module_functions = {
|
|
77
|
+
"Domain Information": lambda d: get_domain_info(d, os.getenv('WHOIS_API_KEY', 'default_key')),
|
|
78
|
+
"DNS Records": lambda d: DNSAnalyzer().get_dns_records(d),
|
|
79
|
+
"SEO Analysis": analyze_advanced_seo,
|
|
80
|
+
"Web Technologies": detect_web_technologies,
|
|
81
|
+
"Security Analysis": analyze_security,
|
|
82
|
+
"Advanced Content Scan": lambda d: AdvancedContentScanner(d).scan(),
|
|
83
|
+
"Contact Spy": lambda d: GlobalDomainScraper(d).run(),
|
|
84
|
+
"Subdomain Discovery": lambda d: run_subfinder(d, output_dir=f"logs/{d}"),
|
|
85
|
+
"Subdomain Takeover": lambda d: SubdomainTakeover(d).run(),
|
|
86
|
+
"CloudFlare Bypass": lambda d: CloudflareBypass(d).run(),
|
|
87
|
+
"Nmap Zero Day Scan": lambda d: UltraAdvancedNetworkScanner(d).run_scan(),
|
|
88
|
+
"GEO Analysis": analyze_geo
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
# Initialize the memory state for tracking
|
|
92
|
+
ACTIVE_SCANS[request.domain] = {
|
|
93
|
+
"total": len(request.modules),
|
|
94
|
+
"completed": 0,
|
|
95
|
+
"current_module": "Initializing...",
|
|
96
|
+
"results": {}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
# We will submit a background task to process
|
|
100
|
+
background_tasks.add_task(run_scan_background, request.domain, request.modules, module_functions)
|
|
101
|
+
|
|
102
|
+
return {"status": "Scan started via Background Task", "domain": request.domain, "modules": request.modules}
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
async def run_scan_background(domain: str, selected_modules: list[str], module_functions: dict):
|
|
106
|
+
"""Background task function to avoid blocking the HTTP request"""
|
|
107
|
+
executor = ModuleExecutor()
|
|
108
|
+
results = {}
|
|
109
|
+
|
|
110
|
+
module_weights = {
|
|
111
|
+
'Domain Information': 'light', 'DNS Records': 'light',
|
|
112
|
+
'SEO Analysis': 'medium', 'Web Technologies': 'medium',
|
|
113
|
+
'Security Analysis': 'heavy', 'Advanced Content Scan': 'heavy',
|
|
114
|
+
'Contact Spy': 'heavy', 'Subdomain Discovery': 'heavy',
|
|
115
|
+
'Subdomain Takeover': 'heavy', 'CloudFlare Bypass': 'heavy',
|
|
116
|
+
'Nmap Zero Day Scan': 'heavy', 'GEO Analysis': 'light'
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
for module_name in selected_modules:
|
|
120
|
+
if module_name in module_functions:
|
|
121
|
+
ACTIVE_SCANS[domain]["current_module"] = module_name
|
|
122
|
+
func = module_functions[module_name]
|
|
123
|
+
delay_type = module_weights.get(module_name, 'medium')
|
|
124
|
+
|
|
125
|
+
# For simplicity, we just safely execute it synchronously here
|
|
126
|
+
try:
|
|
127
|
+
if asyncio.iscoroutinefunction(func):
|
|
128
|
+
res = await func(domain)
|
|
129
|
+
else:
|
|
130
|
+
res = func(domain)
|
|
131
|
+
results[module_name] = res
|
|
132
|
+
ACTIVE_SCANS[domain]["results"][module_name] = res
|
|
133
|
+
except Exception as e:
|
|
134
|
+
results[module_name] = {"error": str(e)}
|
|
135
|
+
ACTIVE_SCANS[domain]["results"][module_name] = {"error": str(e)}
|
|
136
|
+
|
|
137
|
+
ACTIVE_SCANS[domain]["completed"] += 1
|
|
138
|
+
|
|
139
|
+
ACTIVE_SCANS[domain]["current_module"] = "Writing to disk..."
|
|
140
|
+
# Save logic directly to logs/domain/results.json
|
|
141
|
+
save_results_to_json(domain, results)
|
|
142
|
+
|
|
143
|
+
ACTIVE_SCANS[domain]["current_module"] = "Finished"
|
|
144
|
+
executor.cleanup()
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
# ─── Advanced Content Scanner per-section scanning ───
|
|
148
|
+
|
|
149
|
+
class ACSSectionRequest(BaseModel):
|
|
150
|
+
domain: str
|
|
151
|
+
section: str
|
|
152
|
+
|
|
153
|
+
class ACSAllRequest(BaseModel):
|
|
154
|
+
domain: str
|
|
155
|
+
|
|
156
|
+
ACS_SECTION_TASKS = {}
|
|
157
|
+
|
|
158
|
+
@app.post("/api/scan/section")
|
|
159
|
+
async def scan_acs_section(request: ACSSectionRequest, background_tasks: BackgroundTasks):
|
|
160
|
+
"""Run a single ACS section for a domain"""
|
|
161
|
+
task_key = f"{request.domain}::{request.section}"
|
|
162
|
+
ACS_SECTION_TASKS[task_key] = {"status": "running", "result": None, "error": None}
|
|
163
|
+
background_tasks.add_task(_run_acs_section, request.domain, request.section, task_key)
|
|
164
|
+
return {"status": "started", "section": request.section}
|
|
165
|
+
|
|
166
|
+
@app.get("/api/scan/section/status")
|
|
167
|
+
async def acs_section_status(domain: str, section: str):
|
|
168
|
+
"""Check status of a single ACS section scan"""
|
|
169
|
+
task_key = f"{domain}::{section}"
|
|
170
|
+
if task_key in ACS_SECTION_TASKS:
|
|
171
|
+
return ACS_SECTION_TASKS[task_key]
|
|
172
|
+
raise HTTPException(status_code=404, detail="Section task not found")
|
|
173
|
+
|
|
174
|
+
@app.post("/api/scan/acs-all")
|
|
175
|
+
async def scan_acs_all(request: ACSAllRequest, background_tasks: BackgroundTasks):
|
|
176
|
+
"""Run the full Advanced Content Scanner"""
|
|
177
|
+
task_key = f"{request.domain}::acs_full"
|
|
178
|
+
ACS_SECTION_TASKS[task_key] = {"status": "running", "result": None, "error": None}
|
|
179
|
+
background_tasks.add_task(_run_acs_full, request.domain, task_key)
|
|
180
|
+
return {"status": "started", "domain": request.domain}
|
|
181
|
+
|
|
182
|
+
@app.get("/api/scan/acs-all/status")
|
|
183
|
+
async def acs_all_status(domain: str):
|
|
184
|
+
"""Check full ACS scan status"""
|
|
185
|
+
task_key = f"{domain}::acs_full"
|
|
186
|
+
if task_key in ACS_SECTION_TASKS:
|
|
187
|
+
return ACS_SECTION_TASKS[task_key]
|
|
188
|
+
raise HTTPException(status_code=404, detail="ACS scan not found")
|
|
189
|
+
|
|
190
|
+
def _run_acs_section(domain: str, section: str, task_key: str):
|
|
191
|
+
"""Background: run a single ACS conceptual section"""
|
|
192
|
+
try:
|
|
193
|
+
scanner = AdvancedContentScanner(domain)
|
|
194
|
+
# Map section IDs to scanner methods
|
|
195
|
+
section_map = {
|
|
196
|
+
"overview": lambda s: {"version": s.VERSION, "domain": s.domain, "status": "ready"},
|
|
197
|
+
"data_classes": lambda s: {"classes": ["SecretFinding", "JSVulnFinding", "SSRFVulnFinding", "ActiveVulnFinding", "SecurityHeaderFinding", "ExposedEndpoint"], "status": "inspected"},
|
|
198
|
+
"pattern_registry": lambda s: {"secret_patterns": len(s.patterns.SECRETS), "js_categories": len(s.patterns.JS_SECURITY), "ssrf_params": len(s.patterns.SSRF_PARAMS), "sensitive_paths": len(s.patterns.SENSITIVE_PATHS), "status": "loaded"},
|
|
199
|
+
"crawl_engine": lambda s: _run_crawl(s),
|
|
200
|
+
"secret_scanner": lambda s: {"secrets": s.findings.get("secrets", []), "total": len(s.findings.get("secrets", [])), "status": "scanned"},
|
|
201
|
+
"js_analysis": lambda s: {"js_vulnerabilities": s.findings.get("js_vulnerabilities", []), "total": len(s.findings.get("js_vulnerabilities", [])), "status": "analyzed"},
|
|
202
|
+
"ssrf_detection": lambda s: {"ssrf_vulnerabilities": s.findings.get("ssrf_vulnerabilities", []), "total": len(s.findings.get("ssrf_vulnerabilities", [])), "status": "probed"},
|
|
203
|
+
"active_testing": lambda s: _run_active(s),
|
|
204
|
+
"headless_browser": lambda s: _run_headless(s),
|
|
205
|
+
"exploit_chains": lambda s: _run_chains(s),
|
|
206
|
+
"waf_detection": lambda s: _run_waf(s),
|
|
207
|
+
"utilities": lambda s: {"helpers": ["_entropy", "_mask", "_shash", "_fp_value", "_fp_context", "_sev_passes", "_is_new", "_next_id", "_risk_score"], "status": "ready"},
|
|
208
|
+
"main_flow": lambda s: _run_full_flow(s),
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
if section in section_map:
|
|
212
|
+
result = section_map[section](scanner)
|
|
213
|
+
ACS_SECTION_TASKS[task_key] = {"status": "completed", "result": result, "error": None}
|
|
214
|
+
else:
|
|
215
|
+
ACS_SECTION_TASKS[task_key] = {"status": "error", "result": None, "error": f"Unknown section: {section}"}
|
|
216
|
+
except Exception as e:
|
|
217
|
+
ACS_SECTION_TASKS[task_key] = {"status": "error", "result": None, "error": str(e)}
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def _run_crawl(scanner):
|
|
221
|
+
duration = scanner.crawl_website()
|
|
222
|
+
return {
|
|
223
|
+
"urls_crawled": len(scanner.visited_urls),
|
|
224
|
+
"js_files": len(scanner.js_files),
|
|
225
|
+
"api_endpoints": len(scanner.api_endpoints),
|
|
226
|
+
"secrets": len(scanner.findings.get("secrets", [])),
|
|
227
|
+
"js_vulnerabilities": len(scanner.findings.get("js_vulnerabilities", [])),
|
|
228
|
+
"security_headers": len(scanner.findings.get("security_headers", [])),
|
|
229
|
+
"duration": duration,
|
|
230
|
+
"status": "crawled",
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
def _run_active(scanner):
|
|
234
|
+
scanner.crawl_website()
|
|
235
|
+
if hasattr(scanner, '_scan_sensitive_paths'):
|
|
236
|
+
scanner._scan_sensitive_paths()
|
|
237
|
+
if hasattr(scanner, '_test_cors'):
|
|
238
|
+
scanner._test_cors()
|
|
239
|
+
if hasattr(scanner, '_test_auth_bypass'):
|
|
240
|
+
scanner._test_auth_bypass()
|
|
241
|
+
return {
|
|
242
|
+
"active_vulnerabilities": [f.__dict__ if hasattr(f, '__dict__') else f for f in scanner.findings.get("active_vulnerabilities", [])],
|
|
243
|
+
"exposed_endpoints": [f.__dict__ if hasattr(f, '__dict__') else f for f in scanner.findings.get("exposed_endpoints", [])],
|
|
244
|
+
"total_active": len(scanner.findings.get("active_vulnerabilities", [])),
|
|
245
|
+
"total_exposed": len(scanner.findings.get("exposed_endpoints", [])),
|
|
246
|
+
"status": "tested",
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
def _run_headless(scanner):
|
|
250
|
+
scanner.crawl_website()
|
|
251
|
+
if hasattr(scanner, '_run_headless_scan'):
|
|
252
|
+
scanner._run_headless_scan()
|
|
253
|
+
return {
|
|
254
|
+
"dynamic_routes": list(getattr(scanner, '_dynamic_routes', set())),
|
|
255
|
+
"status": "scanned",
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
def _run_chains(scanner):
|
|
259
|
+
scanner.crawl_website()
|
|
260
|
+
if hasattr(scanner, '_build_exploit_chains'):
|
|
261
|
+
scanner._build_exploit_chains()
|
|
262
|
+
return {
|
|
263
|
+
"exploit_chains": scanner.findings.get("exploit_chains", []),
|
|
264
|
+
"total": len(scanner.findings.get("exploit_chains", [])),
|
|
265
|
+
"status": "built",
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
def _run_waf(scanner):
|
|
269
|
+
# Must crawl first — WAF headers are often only on internal pages, not the base URL
|
|
270
|
+
scanner.crawl_website()
|
|
271
|
+
waf = getattr(scanner, '_detected_waf', None)
|
|
272
|
+
blocked = getattr(scanner, '_waf_triggered_count', 0) > 0
|
|
273
|
+
return {
|
|
274
|
+
"detected_waf": waf or "Tespit edilmedi",
|
|
275
|
+
"is_blocked": blocked,
|
|
276
|
+
"rate_limit": scanner.rate_limit,
|
|
277
|
+
"pages_scanned": len(scanner.visited_urls),
|
|
278
|
+
"status": "detected",
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
def _run_full_flow(scanner):
|
|
282
|
+
results = scanner.run()
|
|
283
|
+
return results
|
|
284
|
+
|
|
285
|
+
def _run_acs_full(domain: str, task_key: str):
|
|
286
|
+
"""Background: run the entire ACS pipeline"""
|
|
287
|
+
try:
|
|
288
|
+
scanner = AdvancedContentScanner(domain)
|
|
289
|
+
results = scanner.run()
|
|
290
|
+
ACS_SECTION_TASKS[task_key] = {"status": "completed", "result": results, "error": None}
|
|
291
|
+
except Exception as e:
|
|
292
|
+
ACS_SECTION_TASKS[task_key] = {"status": "error", "result": None, "error": str(e)}
|
|
293
|
+
|
|
294
|
+
@app.get("/")
|
|
295
|
+
async def root():
|
|
296
|
+
return {
|
|
297
|
+
"status": "online",
|
|
298
|
+
"name": "WebAnalyzer API",
|
|
299
|
+
"version": "3.0.0",
|
|
300
|
+
"docs_url": "/docs"
|
|
301
|
+
}
|
bulk/loader.py
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
import csv
|
|
2
|
+
import json
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
import logging
|
|
5
|
+
from database.db_manager import db_manager
|
|
6
|
+
|
|
7
|
+
class BulkDomainLoader:
|
|
8
|
+
def __init__(self):
|
|
9
|
+
self.db = db_manager
|
|
10
|
+
self.supported_formats = ['.txt', '.csv', '.json']
|
|
11
|
+
|
|
12
|
+
def load_domains(self, file_path, job_name=None):
|
|
13
|
+
"""Domain listesini yükle"""
|
|
14
|
+
file_path = Path(file_path)
|
|
15
|
+
|
|
16
|
+
if not file_path.exists():
|
|
17
|
+
raise FileNotFoundError(f"File not found: {file_path}")
|
|
18
|
+
|
|
19
|
+
extension = file_path.suffix.lower()
|
|
20
|
+
|
|
21
|
+
if extension not in self.supported_formats:
|
|
22
|
+
raise ValueError(f"Unsupported format. Use: {self.supported_formats}")
|
|
23
|
+
|
|
24
|
+
# Domain listesini oku
|
|
25
|
+
domains = self._read_file(file_path, extension)
|
|
26
|
+
|
|
27
|
+
# Validate ve clean
|
|
28
|
+
domains = self._validate_domains(domains)
|
|
29
|
+
|
|
30
|
+
if not domains:
|
|
31
|
+
raise ValueError("No valid domains found in file")
|
|
32
|
+
|
|
33
|
+
# Database'e kaydet
|
|
34
|
+
job_name = job_name or f"Bulk Scan - {file_path.name}"
|
|
35
|
+
job_id = self._create_job(job_name, len(domains))
|
|
36
|
+
|
|
37
|
+
# Domainleri ekle
|
|
38
|
+
added = self.db.add_domains_bulk(job_id, domains)
|
|
39
|
+
|
|
40
|
+
logging.info(f"Job #{job_id} created: {added}/{len(domains)} domains added")
|
|
41
|
+
|
|
42
|
+
return job_id, added
|
|
43
|
+
|
|
44
|
+
def _read_file(self, file_path, extension):
|
|
45
|
+
"""Dosyadan domainleri oku"""
|
|
46
|
+
domains = []
|
|
47
|
+
|
|
48
|
+
try:
|
|
49
|
+
if extension == '.txt':
|
|
50
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
51
|
+
domains = [line.strip() for line in f if line.strip() and not line.startswith('#')]
|
|
52
|
+
|
|
53
|
+
elif extension == '.csv':
|
|
54
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
55
|
+
reader = csv.reader(f)
|
|
56
|
+
# Skip header if exists
|
|
57
|
+
first_row = next(reader, None)
|
|
58
|
+
if first_row and not self._is_domain(first_row[0]):
|
|
59
|
+
pass # Skip header
|
|
60
|
+
else:
|
|
61
|
+
domains.append(first_row[0].strip())
|
|
62
|
+
|
|
63
|
+
for row in reader:
|
|
64
|
+
if row and row[0].strip():
|
|
65
|
+
domains.append(row[0].strip())
|
|
66
|
+
|
|
67
|
+
elif extension == '.json':
|
|
68
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
69
|
+
data = json.load(f)
|
|
70
|
+
if isinstance(data, list):
|
|
71
|
+
# Handle different JSON formats
|
|
72
|
+
for item in data:
|
|
73
|
+
if isinstance(item, dict):
|
|
74
|
+
domain = (item.get('domain_name') or
|
|
75
|
+
item.get('domain') or
|
|
76
|
+
item.get('url', ''))
|
|
77
|
+
if domain:
|
|
78
|
+
domains.append(domain)
|
|
79
|
+
elif isinstance(item, str):
|
|
80
|
+
domains.append(item)
|
|
81
|
+
elif isinstance(data, dict) and 'domains' in data:
|
|
82
|
+
domains = data['domains']
|
|
83
|
+
|
|
84
|
+
except Exception as e:
|
|
85
|
+
raise ValueError(f"Error reading file: {e}")
|
|
86
|
+
|
|
87
|
+
return domains
|
|
88
|
+
|
|
89
|
+
def _validate_domains(self, domains):
|
|
90
|
+
"""Domain validasyonu"""
|
|
91
|
+
clean_domains = []
|
|
92
|
+
|
|
93
|
+
for domain in domains:
|
|
94
|
+
# Temizle
|
|
95
|
+
domain = str(domain).strip().lower()
|
|
96
|
+
|
|
97
|
+
# http/https kaldır
|
|
98
|
+
domain = domain.replace('http://', '').replace('https://', '')
|
|
99
|
+
|
|
100
|
+
# Path kaldır
|
|
101
|
+
if '/' in domain:
|
|
102
|
+
domain = domain.split('/')[0]
|
|
103
|
+
|
|
104
|
+
# Port kaldır
|
|
105
|
+
if ':' in domain and not domain.count(':') > 1: # IPv6 değilse
|
|
106
|
+
domain = domain.split(':')[0]
|
|
107
|
+
|
|
108
|
+
# Basic validation
|
|
109
|
+
if self._is_domain(domain):
|
|
110
|
+
clean_domains.append(domain)
|
|
111
|
+
|
|
112
|
+
# Duplicate'leri kaldır
|
|
113
|
+
return list(set(clean_domains))
|
|
114
|
+
|
|
115
|
+
def _is_domain(self, domain):
|
|
116
|
+
"""Domain format validation"""
|
|
117
|
+
if not domain or len(domain) < 4:
|
|
118
|
+
return False
|
|
119
|
+
|
|
120
|
+
# Basic domain pattern check
|
|
121
|
+
if not '.' in domain:
|
|
122
|
+
return False
|
|
123
|
+
|
|
124
|
+
# Check for invalid characters
|
|
125
|
+
invalid_chars = ['<', '>', '"', "'", '|', '\\', '^', '`', '{', '}']
|
|
126
|
+
if any(char in domain for char in invalid_chars):
|
|
127
|
+
return False
|
|
128
|
+
|
|
129
|
+
# Check domain parts
|
|
130
|
+
parts = domain.split('.')
|
|
131
|
+
if len(parts) < 2:
|
|
132
|
+
return False
|
|
133
|
+
|
|
134
|
+
# Check if all parts are valid
|
|
135
|
+
for part in parts:
|
|
136
|
+
if not part or part.startswith('-') or part.endswith('-'):
|
|
137
|
+
return False
|
|
138
|
+
|
|
139
|
+
return True
|
|
140
|
+
|
|
141
|
+
def _create_job(self, job_name, total_domains):
|
|
142
|
+
"""Yeni job oluştur"""
|
|
143
|
+
try:
|
|
144
|
+
return self.db.create_scan_job(job_name, total_domains)
|
|
145
|
+
except Exception as e:
|
|
146
|
+
raise RuntimeError(f"Failed to create scan job: {e}")
|