waymore 4.9__py3-none-any.whl → 5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- waymore/__init__.py +1 -1
- waymore/waymore.py +255 -20
- {waymore-4.9.dist-info → waymore-5.1.dist-info}/METADATA +11 -7
- waymore-5.1.dist-info/RECORD +8 -0
- {waymore-4.9.dist-info → waymore-5.1.dist-info}/WHEEL +1 -1
- waymore-4.9.dist-info/RECORD +0 -8
- {waymore-4.9.dist-info → waymore-5.1.dist-info}/LICENSE +0 -0
- {waymore-4.9.dist-info → waymore-5.1.dist-info}/entry_points.txt +0 -0
- {waymore-4.9.dist-info → waymore-5.1.dist-info}/top_level.txt +0 -0
waymore/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__="
|
|
1
|
+
__version__="5.1"
|
waymore/waymore.py
CHANGED
|
@@ -79,6 +79,7 @@ checkCommonCrawl = 0
|
|
|
79
79
|
checkAlienVault = 0
|
|
80
80
|
checkURLScan = 0
|
|
81
81
|
checkVirusTotal = 0
|
|
82
|
+
checkIntelx = 0
|
|
82
83
|
argsInputHostname = ''
|
|
83
84
|
responseOutputDirectory = ''
|
|
84
85
|
|
|
@@ -88,6 +89,9 @@ CCRAWL_INDEX_URL = 'https://index.commoncrawl.org/collinfo.json'
|
|
|
88
89
|
ALIENVAULT_URL = 'https://otx.alienvault.com/api/v1/indicators/{TYPE}/{DOMAIN}/url_list?limit=500'
|
|
89
90
|
URLSCAN_URL = 'https://urlscan.io/api/v1/search/?q=domain:{DOMAIN}&size=10000'
|
|
90
91
|
VIRUSTOTAL_URL = 'https://www.virustotal.com/vtapi/v2/domain/report?apikey={APIKEY}&domain={DOMAIN}'
|
|
92
|
+
INTELX_SEARCH_URL = 'https://2.intelx.io/phonebook/search'
|
|
93
|
+
INTELX_RESULTS_URL = 'https://2.intelx.io/phonebook/search/result?id='
|
|
94
|
+
INTELX_ACCOUNT_URL = 'https://2.intelx.io/authenticate/info'
|
|
91
95
|
|
|
92
96
|
# User Agents to use when making requests, chosen at random
|
|
93
97
|
USER_AGENT = [
|
|
@@ -144,6 +148,7 @@ URLSCAN_API_KEY = ''
|
|
|
144
148
|
CONTINUE_RESPONSES_IF_PIPED = True
|
|
145
149
|
WEBHOOK_DISCORD = ''
|
|
146
150
|
DEFAULT_OUTPUT_DIR = ''
|
|
151
|
+
INTELX_API_KEY = ''
|
|
147
152
|
|
|
148
153
|
API_KEY_SECRET = "aHR0cHM6Ly95b3V0dS5iZS9kUXc0dzlXZ1hjUQ=="
|
|
149
154
|
|
|
@@ -285,7 +290,7 @@ def showOptions():
|
|
|
285
290
|
"""
|
|
286
291
|
Show the chosen options and config settings
|
|
287
292
|
"""
|
|
288
|
-
global inputIsDomainANDPath, argsInput, isInputFile
|
|
293
|
+
global inputIsDomainANDPath, argsInput, isInputFile, INTELX_API_KEY
|
|
289
294
|
|
|
290
295
|
try:
|
|
291
296
|
write(colored('Selected config and settings:', 'cyan'))
|
|
@@ -325,6 +330,9 @@ def showOptions():
|
|
|
325
330
|
providers = providers + 'URLScan, '
|
|
326
331
|
if not args.xvt:
|
|
327
332
|
providers = providers + 'VirusTotal, '
|
|
333
|
+
# Only show Intelligence X if the API key wa provided
|
|
334
|
+
if not args.xix and INTELX_API_KEY != '':
|
|
335
|
+
providers = providers + 'Intelligence X, '
|
|
328
336
|
if providers == '':
|
|
329
337
|
providers = 'None'
|
|
330
338
|
write(colored('Providers: ' +str(providers.strip(', ')), 'magenta')+colored(' Which providers to check for URLs.','white'))
|
|
@@ -349,6 +357,11 @@ def showOptions():
|
|
|
349
357
|
write(colored('VirusTotal API Key:', 'magenta')+colored(' {none} - You can get a FREE or paid API Key at https://www.virustotal.com/gui/join-us which will let you get some extra URLs.','white'))
|
|
350
358
|
else:
|
|
351
359
|
write(colored('VirusTotal API Key: ', 'magenta')+colored(VIRUSTOTAL_API_KEY))
|
|
360
|
+
|
|
361
|
+
if INTELX_API_KEY == '':
|
|
362
|
+
write(colored('Intelligence X API Key:', 'magenta')+colored(' {none} - You require a paid API Key from https://intelx.io/product','white'))
|
|
363
|
+
else:
|
|
364
|
+
write(colored('Intelligence X API Key: ', 'magenta')+colored(INTELX_API_KEY))
|
|
352
365
|
|
|
353
366
|
if args.mode in ['U','B']:
|
|
354
367
|
if args.output_urls != '':
|
|
@@ -401,12 +414,12 @@ def showOptions():
|
|
|
401
414
|
write(colored('Response URL exclusions: ', 'magenta')+colored(FILTER_URL))
|
|
402
415
|
|
|
403
416
|
if args.mt:
|
|
404
|
-
write(colored('-mt: ' +str(args.mt.lower()), 'magenta')+colored(' Only retrieve URLs and Responses that match these MIME Types.','white')+colored(' NOTE: This will NOT be applied to Alien Vault OTX
|
|
417
|
+
write(colored('-mt: ' +str(args.mt.lower()), 'magenta')+colored(' Only retrieve URLs and Responses that match these MIME Types.','white')+colored(' NOTE: This will NOT be applied to Alien Vault OTX, Virus Total and Intelligence X because they don\'t have the ability to filter on MIME Type. Sometimes URLScan does not have a MIME Type defined - these will always be included. Consider excluding sources if this matters to you','yellow'))
|
|
405
418
|
else:
|
|
406
419
|
if args.ft:
|
|
407
|
-
write(colored('-ft: ' +str(args.ft.lower()), 'magenta')+colored(' Don\'t retrieve URLs and Responses that match these MIME Types.','white')+colored(' NOTE: This will NOT be applied to Alien Vault OTX
|
|
420
|
+
write(colored('-ft: ' +str(args.ft.lower()), 'magenta')+colored(' Don\'t retrieve URLs and Responses that match these MIME Types.','white')+colored(' NOTE: This will NOT be applied to Alien Vault OTX, Virus Total and Intelligence X because they don\'t have the ability to filter on MIME Type. Sometimes URLScan does not have a MIME Type defined - these will always be included. Consider excluding sources if this matters to you','yellow'))
|
|
408
421
|
else:
|
|
409
|
-
write(colored('MIME Type exclusions: ', 'magenta')+colored(FILTER_MIME)+colored(' Don\'t retrieve URLs and Responses that match these MIME Types.','white')+colored(' NOTE: This will NOT be applied to Alien Vault OTX
|
|
422
|
+
write(colored('MIME Type exclusions: ', 'magenta')+colored(FILTER_MIME)+colored(' Don\'t retrieve URLs and Responses that match these MIME Types.','white')+colored(' NOTE: This will NOT be applied to Alien Vault OTX, Virus Total and Intelligence X because they don\'t have the ability to filter on MIME Type. Sometimes URLScan does not have a MIME Type defined - these will always be included. Consider excluding sources if this matters to you','yellow'))
|
|
410
423
|
|
|
411
424
|
if args.keywords_only and args.keywords_only == '#CONFIG':
|
|
412
425
|
if FILTER_KEYWORDS == '':
|
|
@@ -444,7 +457,7 @@ def getConfig():
|
|
|
444
457
|
"""
|
|
445
458
|
Try to get the values from the config file, otherwise use the defaults
|
|
446
459
|
"""
|
|
447
|
-
global FILTER_CODE, FILTER_MIME, FILTER_URL, FILTER_KEYWORDS, URLSCAN_API_KEY, VIRUSTOTAL_API_KEY, CONTINUE_RESPONSES_IF_PIPED, subs, path, waymorePath, inputIsDomainANDPath, HTTP_ADAPTER, HTTP_ADAPTER_CC, argsInput, terminalWidth, MATCH_CODE, WEBHOOK_DISCORD, DEFAULT_OUTPUT_DIR, MATCH_MIME
|
|
460
|
+
global FILTER_CODE, FILTER_MIME, FILTER_URL, FILTER_KEYWORDS, URLSCAN_API_KEY, VIRUSTOTAL_API_KEY, CONTINUE_RESPONSES_IF_PIPED, subs, path, waymorePath, inputIsDomainANDPath, HTTP_ADAPTER, HTTP_ADAPTER_CC, argsInput, terminalWidth, MATCH_CODE, WEBHOOK_DISCORD, DEFAULT_OUTPUT_DIR, MATCH_MIME, INTELX_API_KEY
|
|
448
461
|
try:
|
|
449
462
|
|
|
450
463
|
# Set terminal width
|
|
@@ -580,6 +593,13 @@ def getConfig():
|
|
|
580
593
|
writerr(colored('Unable to read "VIRUSTOTAL_API_KEY" from config.yml - consider adding (you can get a FREE api key at virustotal.com)', 'red'))
|
|
581
594
|
VIRUSTOTAL_API_KEY = ''
|
|
582
595
|
|
|
596
|
+
try:
|
|
597
|
+
INTELX_API_KEY = config.get('INTELX_API_KEY')
|
|
598
|
+
if str(INTELX_API_KEY) == 'None':
|
|
599
|
+
INTELX_API_KEY = ''
|
|
600
|
+
except Exception as e:
|
|
601
|
+
INTELX_API_KEY = ''
|
|
602
|
+
|
|
583
603
|
try:
|
|
584
604
|
FILTER_KEYWORDS = config.get('FILTER_KEYWORDS')
|
|
585
605
|
if str(FILTER_KEYWORDS) == 'None':
|
|
@@ -653,6 +673,7 @@ def getConfig():
|
|
|
653
673
|
FILTER_CODE = DEFAULT_FILTER_CODE
|
|
654
674
|
URLSCAN_API_KEY = ''
|
|
655
675
|
VIRUSTOTAL_API_KEY = ''
|
|
676
|
+
INTELX_API_KEY = ''
|
|
656
677
|
FILTER_KEYWORDS = ''
|
|
657
678
|
CONTINUE_RESPONSES_IF_PIPED = True
|
|
658
679
|
WEBHOOK_DISCORD = ''
|
|
@@ -1015,12 +1036,12 @@ def processURLOutput():
|
|
|
1015
1036
|
"""
|
|
1016
1037
|
Show results of the URL output, i.e. getting URLs from archive.org and commoncrawl.org and write results to file
|
|
1017
1038
|
"""
|
|
1018
|
-
global linksFound, subs, path, argsInput, checkWayback, checkCommonCrawl, checkAlienVault, checkURLScan, checkVirusTotal, DEFAULT_OUTPUT_DIR
|
|
1039
|
+
global linksFound, subs, path, argsInput, checkWayback, checkCommonCrawl, checkAlienVault, checkURLScan, checkVirusTotal, DEFAULT_OUTPUT_DIR, checkIntelx
|
|
1019
1040
|
|
|
1020
1041
|
try:
|
|
1021
1042
|
|
|
1022
1043
|
if args.check_only:
|
|
1023
|
-
totalRequests = checkWayback + checkCommonCrawl + checkAlienVault + checkURLScan + checkVirusTotal
|
|
1044
|
+
totalRequests = checkWayback + checkCommonCrawl + checkAlienVault + checkURLScan + checkVirusTotal + checkIntelx
|
|
1024
1045
|
minutes = totalRequests*1 // 60
|
|
1025
1046
|
hours = minutes // 60
|
|
1026
1047
|
days = hours // 24
|
|
@@ -1285,16 +1306,17 @@ def validateArgProviders(x):
|
|
|
1285
1306
|
- otx
|
|
1286
1307
|
- urlscan
|
|
1287
1308
|
- virustotal
|
|
1309
|
+
- intelx
|
|
1288
1310
|
"""
|
|
1289
1311
|
invalid = False
|
|
1290
1312
|
x = x.lower()
|
|
1291
1313
|
providers = x.split(',')
|
|
1292
1314
|
for provider in providers:
|
|
1293
|
-
if not re.fullmatch(r'(wayback|commoncrawl|otx|urlscan|virustotal)', provider):
|
|
1315
|
+
if not re.fullmatch(r'(wayback|commoncrawl|otx|urlscan|virustotal|intelx)', provider):
|
|
1294
1316
|
invalid = True
|
|
1295
1317
|
break
|
|
1296
1318
|
if invalid:
|
|
1297
|
-
raise argparse.ArgumentTypeError('Pass providers separated by a comma, e.g. wayback,commoncrawl,otx,urlscan,virustotal')
|
|
1319
|
+
raise argparse.ArgumentTypeError('Pass providers separated by a comma, e.g. wayback,commoncrawl,otx,urlscan,virustotal,intelx')
|
|
1298
1320
|
return x
|
|
1299
1321
|
|
|
1300
1322
|
def processAlienVaultPage(url):
|
|
@@ -1616,7 +1638,10 @@ def getURLScanUrls():
|
|
|
1616
1638
|
# Get the first page from urlscan.io
|
|
1617
1639
|
try:
|
|
1618
1640
|
# Choose a random user agent string to use for any requests
|
|
1619
|
-
|
|
1641
|
+
# For other sources we would use `random.choice(USER_AGENT)` to asignn a random user-agent, but it seems
|
|
1642
|
+
# that there are a handful of those that ALWAYS return 429. Passing a specific one all the time seems to
|
|
1643
|
+
# be successful all the time
|
|
1644
|
+
userAgent = "waymore by xnl-h4ck3r"
|
|
1620
1645
|
session = requests.Session()
|
|
1621
1646
|
session.mount('https://', HTTP_ADAPTER)
|
|
1622
1647
|
session.mount('http://', HTTP_ADAPTER)
|
|
@@ -1767,7 +1792,6 @@ def getURLScanUrls():
|
|
|
1767
1792
|
# Get the next page from urlscan.io
|
|
1768
1793
|
try:
|
|
1769
1794
|
# Choose a random user agent string to use for any requests
|
|
1770
|
-
userAgent = random.choice(USER_AGENT)
|
|
1771
1795
|
session = requests.Session()
|
|
1772
1796
|
session.mount('https://', HTTP_ADAPTER)
|
|
1773
1797
|
session.mount('http://', HTTP_ADAPTER)
|
|
@@ -2336,7 +2360,7 @@ def getCommonCrawlUrls():
|
|
|
2336
2360
|
|
|
2337
2361
|
def processVirusTotalUrl(url):
|
|
2338
2362
|
"""
|
|
2339
|
-
Process a specific URL from virustotal.
|
|
2363
|
+
Process a specific URL from virustotal.com to determine whether to save the link
|
|
2340
2364
|
"""
|
|
2341
2365
|
global argsInput, argsInputHostname
|
|
2342
2366
|
|
|
@@ -2378,7 +2402,7 @@ def processVirusTotalUrl(url):
|
|
|
2378
2402
|
|
|
2379
2403
|
# Add link if it passed filters
|
|
2380
2404
|
if addLink:
|
|
2381
|
-
# Just get the hostname of the
|
|
2405
|
+
# Just get the hostname of the url
|
|
2382
2406
|
tldExtract = tldextract.extract(url)
|
|
2383
2407
|
subDomain = tldExtract.subdomain
|
|
2384
2408
|
if subDomain != '':
|
|
@@ -2423,11 +2447,10 @@ def getVirusTotalUrls():
|
|
|
2423
2447
|
session = requests.Session()
|
|
2424
2448
|
session.mount('https://', HTTP_ADAPTER)
|
|
2425
2449
|
session.mount('http://', HTTP_ADAPTER)
|
|
2426
|
-
# Pass the API-Key header too. This can change the max endpoints per page, depending on URLScan subscription
|
|
2427
2450
|
resp = session.get(url, headers={'User-Agent':userAgent})
|
|
2428
2451
|
requestsMade = requestsMade + 1
|
|
2429
2452
|
except Exception as e:
|
|
2430
|
-
write(colored(getSPACER('[ ERR ] Unable to get links from virustotal.
|
|
2453
|
+
write(colored(getSPACER('[ ERR ] Unable to get links from virustotal.com: ' + str(e)), 'red'))
|
|
2431
2454
|
return
|
|
2432
2455
|
|
|
2433
2456
|
# Deal with any errors
|
|
@@ -2494,6 +2517,204 @@ def getVirusTotalUrls():
|
|
|
2494
2517
|
except Exception as e:
|
|
2495
2518
|
writerr(colored('ERROR getVirusTotalUrls 1: ' + str(e), 'red'))
|
|
2496
2519
|
|
|
2520
|
+
def processIntelxUrl(url):
|
|
2521
|
+
"""
|
|
2522
|
+
Process a specific URL from intelx.io to determine whether to save the link
|
|
2523
|
+
"""
|
|
2524
|
+
global argsInput, argsInputHostname
|
|
2525
|
+
|
|
2526
|
+
addLink = True
|
|
2527
|
+
|
|
2528
|
+
# If the url passed doesn't have a scheme, prefix with http://
|
|
2529
|
+
match = re.search(r'^[A-za-z]*\:\/\/', url, flags=re.IGNORECASE)
|
|
2530
|
+
if match is None:
|
|
2531
|
+
url = 'http://'+url
|
|
2532
|
+
|
|
2533
|
+
try:
|
|
2534
|
+
# If filters are required then test them
|
|
2535
|
+
if not args.filter_responses_only:
|
|
2536
|
+
|
|
2537
|
+
# If the user requested -n / --no-subs then we don't want to add it if it has a sub domain (www. will not be classed as a sub domain)
|
|
2538
|
+
if args.no_subs:
|
|
2539
|
+
match = re.search(r'^[A-za-z]*\:\/\/(www\.)?'+re.escape(argsInputHostname), url, flags=re.IGNORECASE)
|
|
2540
|
+
if match is None:
|
|
2541
|
+
addLink = False
|
|
2542
|
+
|
|
2543
|
+
# If the user didn't requested -f / --filter-responses-only then check http code
|
|
2544
|
+
# Note we can't check MIME filter because it is not returned by VirusTotal API
|
|
2545
|
+
if addLink and not args.filter_responses_only:
|
|
2546
|
+
|
|
2547
|
+
# Check the URL exclusions
|
|
2548
|
+
if addLink:
|
|
2549
|
+
match = re.search(r'('+re.escape(FILTER_URL).replace(',','|')+')', url, flags=re.IGNORECASE)
|
|
2550
|
+
if match is not None:
|
|
2551
|
+
addLink = False
|
|
2552
|
+
|
|
2553
|
+
# Set keywords filter if -ko argument passed
|
|
2554
|
+
if addLink and args.keywords_only:
|
|
2555
|
+
if args.keywords_only == '#CONFIG':
|
|
2556
|
+
match = re.search(r'('+re.escape(FILTER_KEYWORDS).replace(',','|')+')', url, flags=re.IGNORECASE)
|
|
2557
|
+
else:
|
|
2558
|
+
match = re.search(r'('+args.keywords_only+')', url, flags=re.IGNORECASE)
|
|
2559
|
+
if match is None:
|
|
2560
|
+
addLink = False
|
|
2561
|
+
|
|
2562
|
+
# Add link if it passed filters
|
|
2563
|
+
if addLink:
|
|
2564
|
+
linksFoundAdd(url)
|
|
2565
|
+
|
|
2566
|
+
except Exception as e:
|
|
2567
|
+
writerr(colored('ERROR processIntelxUrl 1: ' + str(e), 'red'))
|
|
2568
|
+
|
|
2569
|
+
def processIntelxType(target, credits):
|
|
2570
|
+
'''
|
|
2571
|
+
target: 1 - Domains
|
|
2572
|
+
target: 3 - URLs
|
|
2573
|
+
'''
|
|
2574
|
+
try:
|
|
2575
|
+
try:
|
|
2576
|
+
requestsMade = 0
|
|
2577
|
+
|
|
2578
|
+
# Choose a random user agent string to use for any requests
|
|
2579
|
+
userAgent = random.choice(USER_AGENT)
|
|
2580
|
+
session = requests.Session()
|
|
2581
|
+
session.mount('https://', HTTP_ADAPTER)
|
|
2582
|
+
session.mount('http://', HTTP_ADAPTER)
|
|
2583
|
+
# Pass the API key in the X-Key header too.
|
|
2584
|
+
resp = session.post(INTELX_SEARCH_URL, data='{"term":"'+quote(argsInputHostname)+'","target":'+str(target)+'}', headers={'User-Agent':userAgent,'X-Key':INTELX_API_KEY})
|
|
2585
|
+
requestsMade = requestsMade + 1
|
|
2586
|
+
except Exception as e:
|
|
2587
|
+
write(colored(getSPACER('[ ERR ] Unable to get links from intelx.io: ' + str(e)), 'red'))
|
|
2588
|
+
return
|
|
2589
|
+
|
|
2590
|
+
# Deal with any errors
|
|
2591
|
+
if resp.status_code == 429:
|
|
2592
|
+
writerr(colored(getSPACER('[ 429 ] IntelX rate limit reached so unable to get links.'),'red'))
|
|
2593
|
+
return
|
|
2594
|
+
elif resp.status_code == 401:
|
|
2595
|
+
writerr(colored(getSPACER('[ 401 ] IntelX: Not authorized. The source requires a paid API key. Check your API key is correct.'),'red'))
|
|
2596
|
+
return
|
|
2597
|
+
elif resp.status_code == 402:
|
|
2598
|
+
if credits.startswith("0/"):
|
|
2599
|
+
writerr(colored(getSPACER('[ 402 ] IntelX: You have run out of daily credits on Intelx ('+credits+').'),'red'))
|
|
2600
|
+
else:
|
|
2601
|
+
writerr(colored(getSPACER('[ 402 ] IntelX: It appears you have run out of daily credits on Intelx.'),'red'))
|
|
2602
|
+
return
|
|
2603
|
+
elif resp.status_code == 403:
|
|
2604
|
+
writerr(colored(getSPACER('[ 403 ] IntelX: Permission denied. Check your API key is correct.'),'red'))
|
|
2605
|
+
return
|
|
2606
|
+
elif resp.status_code != 200:
|
|
2607
|
+
writerr(colored(getSPACER('[ ' + str(resp.status_code) + ' ] Unable to get links from intelx.io'),'red'))
|
|
2608
|
+
return
|
|
2609
|
+
|
|
2610
|
+
# Get the JSON response
|
|
2611
|
+
try:
|
|
2612
|
+
jsonResp = json.loads(resp.text.strip())
|
|
2613
|
+
id = jsonResp['id']
|
|
2614
|
+
except:
|
|
2615
|
+
writerr(colored(getSPACER('[ ERR ] There was an unexpected response from the Intelligence API'),'red'))
|
|
2616
|
+
return
|
|
2617
|
+
|
|
2618
|
+
# Get each page of the results
|
|
2619
|
+
moreResults = True
|
|
2620
|
+
status = 0
|
|
2621
|
+
while moreResults:
|
|
2622
|
+
if stopSource:
|
|
2623
|
+
break
|
|
2624
|
+
try:
|
|
2625
|
+
resp = session.get(INTELX_RESULTS_URL+id, headers={'User-Agent':userAgent,'X-Key':INTELX_API_KEY})
|
|
2626
|
+
requestsMade = requestsMade + 1
|
|
2627
|
+
except Exception as e:
|
|
2628
|
+
write(colored(getSPACER('[ ERR ] Unable to get links from intelx.io: ' + str(e)), 'red'))
|
|
2629
|
+
return
|
|
2630
|
+
|
|
2631
|
+
# Get the JSON response
|
|
2632
|
+
try:
|
|
2633
|
+
jsonResp = json.loads(resp.text.strip())
|
|
2634
|
+
status = jsonResp['status']
|
|
2635
|
+
except:
|
|
2636
|
+
writerr(colored(getSPACER('[ ERR ] There was an unexpected response from the Intelligence API'),'red'))
|
|
2637
|
+
moreResults = False
|
|
2638
|
+
|
|
2639
|
+
try:
|
|
2640
|
+
selector_values = [entry['selectorvalue'] for entry in jsonResp.get('selectors', [])]
|
|
2641
|
+
except Exception as e:
|
|
2642
|
+
selector_values = []
|
|
2643
|
+
try:
|
|
2644
|
+
selector_valuesh = [entry['selectorvalueh'] for entry in jsonResp.get('selectors', [])]
|
|
2645
|
+
except Exception as e:
|
|
2646
|
+
selector_valuesh = []
|
|
2647
|
+
|
|
2648
|
+
# Work out whether to include each url
|
|
2649
|
+
unique_values = list(set(selector_values + selector_valuesh))
|
|
2650
|
+
for ixurl in unique_values:
|
|
2651
|
+
if stopSource:
|
|
2652
|
+
break
|
|
2653
|
+
processIntelxUrl(ixurl)
|
|
2654
|
+
|
|
2655
|
+
if status == 1 or selector_values == []:
|
|
2656
|
+
moreResults = False
|
|
2657
|
+
|
|
2658
|
+
except Exception as e:
|
|
2659
|
+
writerr(colored('ERROR processIntelxType 1: ' + str(e), 'red'))
|
|
2660
|
+
|
|
2661
|
+
def getIntelxAccountInfo() -> str:
|
|
2662
|
+
'''
|
|
2663
|
+
Get the account info and return the number of Credits remainiing from the /phonebook/search
|
|
2664
|
+
'''
|
|
2665
|
+
try:
|
|
2666
|
+
# Choose a random user agent string to use for any requests
|
|
2667
|
+
userAgent = random.choice(USER_AGENT)
|
|
2668
|
+
session = requests.Session()
|
|
2669
|
+
session.mount('https://', HTTP_ADAPTER)
|
|
2670
|
+
session.mount('http://', HTTP_ADAPTER)
|
|
2671
|
+
# Pass the API key in the X-Key header too.
|
|
2672
|
+
resp = session.get(INTELX_ACCOUNT_URL, headers={'User-Agent':userAgent,'X-Key':INTELX_API_KEY})
|
|
2673
|
+
jsonResp = json.loads(resp.text.strip())
|
|
2674
|
+
credits = str(jsonResp.get("paths", {}).get("/phonebook/search", {}).get("Credit", "Unknown"))
|
|
2675
|
+
credits_max = str(jsonResp.get("paths", {}).get("/phonebook/search", {}).get("CreditMax", "Unknown"))
|
|
2676
|
+
return credits+"/"+credits_max
|
|
2677
|
+
except:
|
|
2678
|
+
return "Unknown"
|
|
2679
|
+
|
|
2680
|
+
def getIntelxUrls():
|
|
2681
|
+
"""
|
|
2682
|
+
Get URLs from the Intelligence X Phonebook search
|
|
2683
|
+
"""
|
|
2684
|
+
global INTELX_API_KEY, linksFound, waymorePath, subs, stopProgram, stopSource, argsInput, checkIntelx, argsInputHostname
|
|
2685
|
+
|
|
2686
|
+
# Write the file of URL's for the passed domain/URL
|
|
2687
|
+
try:
|
|
2688
|
+
if args.check_only:
|
|
2689
|
+
write(colored('Get URLs from Intelligence X: ','cyan')+colored('minimum 4 requests','white'))
|
|
2690
|
+
checkIntelx = 4
|
|
2691
|
+
return
|
|
2692
|
+
|
|
2693
|
+
stopSource = False
|
|
2694
|
+
originalLinkCount = len(linksFound)
|
|
2695
|
+
credits = getIntelxAccountInfo()
|
|
2696
|
+
if verbose():
|
|
2697
|
+
write(colored('The Intelligence X URL requested to get links (Credits: '+credits+'): ','magenta')+colored(INTELX_SEARCH_URL+'\n','white'))
|
|
2698
|
+
|
|
2699
|
+
if not args.check_only:
|
|
2700
|
+
write(colored('\rGetting links from intelx.io API...\r','cyan'))
|
|
2701
|
+
|
|
2702
|
+
# Get the domains from Intelligence X if the --no-subs wasn't passed
|
|
2703
|
+
if not args.no_subs:
|
|
2704
|
+
processIntelxType(1, credits)
|
|
2705
|
+
|
|
2706
|
+
# Get the URLs from Intelligence X
|
|
2707
|
+
processIntelxType(3, credits)
|
|
2708
|
+
|
|
2709
|
+
linkCount = len(linksFound) - originalLinkCount
|
|
2710
|
+
if args.xwm and args.xcc and args.xav and args.xus and args.xvt:
|
|
2711
|
+
write(getSPACER(colored('Links found on intelx.io: ', 'cyan')+colored(str(linkCount),'white'))+'\n')
|
|
2712
|
+
else:
|
|
2713
|
+
write(getSPACER(colored('Extra links found on intelx.io: ', 'cyan')+colored(str(linkCount),'white'))+'\n')
|
|
2714
|
+
|
|
2715
|
+
except Exception as e:
|
|
2716
|
+
writerr(colored('ERROR getIntelxUrls 1: ' + str(e), 'red'))
|
|
2717
|
+
|
|
2497
2718
|
def processResponses():
|
|
2498
2719
|
"""
|
|
2499
2720
|
Get archived responses from Wayback Machine (archive.org)
|
|
@@ -2993,7 +3214,7 @@ def combineInlineJS():
|
|
|
2993
3214
|
|
|
2994
3215
|
# Run waymore
|
|
2995
3216
|
def main():
|
|
2996
|
-
global args, DEFAULT_TIMEOUT, inputValues, argsInput, linksFound, linkMimes, successCount, failureCount, fileCount, totalResponses, totalPages, indexFile, path, stopSource, stopProgram, VIRUSTOTAL_API_KEY, inputIsSubDomain, argsInputHostname, WEBHOOK_DISCORD, responseOutputDirectory, fileCount
|
|
3217
|
+
global args, DEFAULT_TIMEOUT, inputValues, argsInput, linksFound, linkMimes, successCount, failureCount, fileCount, totalResponses, totalPages, indexFile, path, stopSource, stopProgram, VIRUSTOTAL_API_KEY, inputIsSubDomain, argsInputHostname, WEBHOOK_DISCORD, responseOutputDirectory, fileCount, INTELX_API_KEY
|
|
2997
3218
|
|
|
2998
3219
|
# Tell Python to run the handler() function when SIGINT is received
|
|
2999
3220
|
signal(SIGINT, handler)
|
|
@@ -3051,7 +3272,7 @@ def main():
|
|
|
3051
3272
|
parser.add_argument(
|
|
3052
3273
|
'-ft',
|
|
3053
3274
|
action='store',
|
|
3054
|
-
help='Filter MIME Types for retrieved URLs and responses. Comma separated list of MIME Types (default: the FILTER_MIME values from config.yml). Passing this argument will override the value from config.yml. NOTE: This will NOT be applied to Alien Vault OTX
|
|
3275
|
+
help='Filter MIME Types for retrieved URLs and responses. Comma separated list of MIME Types (default: the FILTER_MIME values from config.yml). Passing this argument will override the value from config.yml. NOTE: This will NOT be applied to Alien Vault OTX, Virus Total and Intelligence X because they don\'t have the ability to filter on MIME Type. Sometimes URLScan does not have a MIME Type defined - these will always be included. Consider excluding sources if this matters to you.',
|
|
3055
3276
|
type=validateArgMimeTypes,
|
|
3056
3277
|
)
|
|
3057
3278
|
parser.add_argument(
|
|
@@ -3063,7 +3284,7 @@ def main():
|
|
|
3063
3284
|
parser.add_argument(
|
|
3064
3285
|
'-mt',
|
|
3065
3286
|
action='store',
|
|
3066
|
-
help='Only MIME Types for retrieved URLs and responses. Comma separated list of MIME types. Passing this argument overrides the config FILTER_MIME and -ft. NOTE: This will NOT be applied to Alien Vault OTX
|
|
3287
|
+
help='Only MIME Types for retrieved URLs and responses. Comma separated list of MIME types. Passing this argument overrides the config FILTER_MIME and -ft. NOTE: This will NOT be applied to Alien Vault OTX, Virus Total and Intelligence X because they don\'t have the ability to filter on MIME Type. Sometimes URLScan does not have a MIME Type defined - these will always be included. Consider excluding sources if this matters to you.',
|
|
3067
3288
|
type=validateArgMimeTypes,
|
|
3068
3289
|
)
|
|
3069
3290
|
parser.add_argument(
|
|
@@ -3141,13 +3362,19 @@ def main():
|
|
|
3141
3362
|
help='Exclude checks for links from virustotal.com',
|
|
3142
3363
|
default=False
|
|
3143
3364
|
)
|
|
3365
|
+
parser.add_argument(
|
|
3366
|
+
'-xix',
|
|
3367
|
+
action='store_true',
|
|
3368
|
+
help='Exclude checks for links from intelx.io',
|
|
3369
|
+
default=False
|
|
3370
|
+
)
|
|
3144
3371
|
parser.add_argument(
|
|
3145
3372
|
'--providers',
|
|
3146
3373
|
action='store',
|
|
3147
|
-
help='A comma separated list of source providers that you want to get URLs from. The values can be wayback,commoncrawl,otx,urlscan and
|
|
3374
|
+
help='A comma separated list of source providers that you want to get URLs from. The values can be wayback,commoncrawl,otx,urlscan,virustotal and intelx. Passing this will override any exclude arguments (e.g. -xwm,-xcc, etc.) passed to exclude sources, and reset those based on what was passed with this argument.',
|
|
3148
3375
|
default=[],
|
|
3149
3376
|
type=validateArgProviders,
|
|
3150
|
-
metavar='{wayback,commoncrawl,otx,urlscan,virustotal}'
|
|
3377
|
+
metavar='{wayback,commoncrawl,otx,urlscan,virustotal,intelx}'
|
|
3151
3378
|
)
|
|
3152
3379
|
parser.add_argument(
|
|
3153
3380
|
'-lcc',
|
|
@@ -3301,6 +3528,10 @@ def main():
|
|
|
3301
3528
|
args.xvt = True
|
|
3302
3529
|
else:
|
|
3303
3530
|
args.xvt = False
|
|
3531
|
+
if 'intelx' not in args.providers:
|
|
3532
|
+
args.xix = True
|
|
3533
|
+
else:
|
|
3534
|
+
args.xix = False
|
|
3304
3535
|
|
|
3305
3536
|
# If no input was given, raise an error
|
|
3306
3537
|
if sys.stdin.isatty():
|
|
@@ -3390,6 +3621,10 @@ def main():
|
|
|
3390
3621
|
# If not requested to exclude, get URLs from virustotal.com if we have an API key
|
|
3391
3622
|
if not args.xvt and VIRUSTOTAL_API_KEY != '' and stopProgram is None:
|
|
3392
3623
|
getVirusTotalUrls()
|
|
3624
|
+
|
|
3625
|
+
# If not requested to exclude, get URLs from intelx.io if we have an API key
|
|
3626
|
+
if not args.xix and INTELX_API_KEY != '' and stopProgram is None:
|
|
3627
|
+
getIntelxUrls()
|
|
3393
3628
|
|
|
3394
3629
|
# Output results of all searches
|
|
3395
3630
|
processURLOutput()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: waymore
|
|
3
|
-
Version:
|
|
3
|
+
Version: 5.1
|
|
4
4
|
Summary: Find way more from the Wayback Machine, Common Crawl, Alien Vault OTX, URLScan & VirusTotal!
|
|
5
5
|
Home-page: https://github.com/xnl-h4ck3r/waymore
|
|
6
6
|
Author: @xnl-h4ck3r
|
|
@@ -15,7 +15,7 @@ Requires-Dist: tldextract
|
|
|
15
15
|
|
|
16
16
|
<center><img src="https://github.com/xnl-h4ck3r/waymore/blob/main/waymore/images/title.png"></center>
|
|
17
17
|
|
|
18
|
-
## About -
|
|
18
|
+
## About - v5.1
|
|
19
19
|
|
|
20
20
|
The idea behind **waymore** is to find even more links from the Wayback Machine than other existing tools.
|
|
21
21
|
|
|
@@ -23,7 +23,7 @@ The idea behind **waymore** is to find even more links from the Wayback Machine
|
|
|
23
23
|
👉 Also, other tools do not currenrtly deal with the rate limiting now in place by the sources, and will often just stop with incomplete results and not let you know they are incomplete.
|
|
24
24
|
|
|
25
25
|
Anyone who does bug bounty will have likely used the amazing [waybackurls](https://github.com/tomnomnom/waybackurls) by @TomNomNoms. This tool gets URLs from [web.archive.org](https://web.archive.org) and additional links (if any) from one of the index collections on [index.commoncrawl.org](http://index.commoncrawl.org/).
|
|
26
|
-
You would have also likely used the amazing [gau](https://github.com/lc/gau) by @hacker\_ which also finds URL's from wayback archive, Common Crawl, but also from Alien Vault and
|
|
26
|
+
You would have also likely used the amazing [gau](https://github.com/lc/gau) by @hacker\_ which also finds URL's from wayback archive, Common Crawl, but also from Alien Vault, URLScan, Virus Total and Intelligence X.
|
|
27
27
|
Now **waymore** gets URL's from ALL of those sources too (with ability to filter more to get what you want):
|
|
28
28
|
|
|
29
29
|
- Wayback Machine (web.archive.org)
|
|
@@ -31,6 +31,7 @@ Now **waymore** gets URL's from ALL of those sources too (with ability to filter
|
|
|
31
31
|
- Alien Vault OTX (otx.alienvault.com)
|
|
32
32
|
- URLScan (urlscan.io)
|
|
33
33
|
- Virus Total (virustotal.com)
|
|
34
|
+
- Intelligence X (intelx.io) - PAID SOURCE ONLY
|
|
34
35
|
|
|
35
36
|
👉 It's a point that many seem to miss, so I'll just add it again :) ... The biggest difference between **waymore** and other tools is that it can also **download the archived responses** for URLs on wayback machine so that you can then search these for even more links, developer comments, extra parameters, etc. etc.
|
|
36
37
|
|
|
@@ -83,9 +84,9 @@ pipx install git+https://github.com/xnl-h4ck3r/waymore.git
|
|
|
83
84
|
| -n | --no-subs | Don't include subdomains of the target domain (only used if input is not a domain with a specific path). |
|
|
84
85
|
| -f | --filter-responses-only | The initial links from sources will not be filtered, only the responses that are downloaded, e.g. it maybe useful to still see all available paths from the links, even if you don't want to check the content. |
|
|
85
86
|
| -fc | | Filter HTTP status codes for retrieved URLs and responses. Comma separated list of codes (default: the `FILTER_CODE` values from `config.yml`). Passing this argument will override the value from `config.yml` |
|
|
86
|
-
| -ft | | Filter MIME Types for retrieved URLs and responses. Comma separated list of MIME Types (default: the `FILTER_MIME` values from `config.yml`). Passing this argument will override the value from `config.yml`. **NOTE: This will NOT be applied to Alien Vault OTX
|
|
87
|
+
| -ft | | Filter MIME Types for retrieved URLs and responses. Comma separated list of MIME Types (default: the `FILTER_MIME` values from `config.yml`). Passing this argument will override the value from `config.yml`. **NOTE: This will NOT be applied to Alien Vault OTX, Virus Total and Intelligence X because they don't have the ability to filter on MIME Type. Sometimes URLScan does not have a MIME Type defined - these will always be included. Consider excluding sources if this matters to you.**. |
|
|
87
88
|
| -mc | | Only Match HTTP status codes for retrieved URLs and responses. Comma separated list of codes. Passing this argument overrides the config `FILTER_CODE` and `-fc`. |
|
|
88
|
-
| -mt | | Only MIME Types for retrieved URLs and responses. Comma separated list of MIME types. Passing this argument overrides the config `FILTER_MIME` and `-ft`. **NOTE: This will NOT be applied to Alien Vault OTX
|
|
89
|
+
| -mt | | Only MIME Types for retrieved URLs and responses. Comma separated list of MIME types. Passing this argument overrides the config `FILTER_MIME` and `-ft`. **NOTE: This will NOT be applied to Alien Vault OTX, Virus Total and Intelligence X because they don't have the ability to filter on MIME Type. Sometimes URLScan does not have a MIME Type defined - these will always be included. Consider excluding sources if this matters to you.**. |
|
|
89
90
|
| -l | --limit | How many responses will be saved (if `-mode R` or `-mode B` is passed). A positive value will get the **first N** results, a negative value will get the **last N** results. A value of 0 will get **ALL** responses (default: 5000) |
|
|
90
91
|
| -from | --from-date | What date to get responses from. If not specified it will get from the earliest possible results. A partial value can be passed, e.g. `2016`, `201805`, etc. |
|
|
91
92
|
| -to | --to-date | What date to get responses to. If not specified it will get to the latest possible results. A partial value can be passed, e.g. `2021`, `202112`, etc. |
|
|
@@ -97,6 +98,7 @@ pipx install git+https://github.com/xnl-h4ck3r/waymore.git
|
|
|
97
98
|
| -xav | | Exclude checks for links from alienvault.com |
|
|
98
99
|
| -xus | | Exclude checks for links from urlscan.io |
|
|
99
100
|
| -xvt | | Exclude checks for links from virustotal.com |
|
|
101
|
+
| -xix | | Exclude checks for links from Intelligence X.com |
|
|
100
102
|
| -lcc | | Limit the number of Common Crawl index collections searched, e.g. `-lcc 10` will just search the latest `10` collections (default: 1). As of November 2024 there are currently 106 collections. Setting to `0` will search **ALL** collections. If you don't want to search Common Crawl at all, use the `-xcc` option. |
|
|
101
103
|
| -lcy | | Limit the number of Common Crawl index collections searched by the year of the index data. The earliest index has data from 2008. Setting to 0 (default) will search collections or any year (but in conjuction with `-lcc`). For example, if you are only interested in data from 2015 and after, pass `-lcy 2015`. This will override the value of `-lcc` if passed. If you don't want to search Common Crawl at all, use the `-xcc` option. |
|
|
102
104
|
| -t | --timeout | This is for archived responses only! How many seconds to wait for the server to send data before giving up (default: 30) |
|
|
@@ -164,8 +166,9 @@ The `config.yml` file (typically in `~/.config/waymore/`) have values that can b
|
|
|
164
166
|
- `CONTINUE_RESPONSES_IF_PIPED` - If retrieving archive responses doesn't complete, you will be prompted next time whether you want to continue with the previous run. However, if `stdout` is piped to another process it is assumed you don't want to have an interactive prompt. A value of `True` (default) will determine assure the previous run will be continued. if you want a fresh run every time then set to `False`.
|
|
165
167
|
- `WEBHOOK_DISCORD` - If the `--notify-discord` argument is passed, `knoxnl` will send a notification to this Discord wehook when a successful XSS is found.
|
|
166
168
|
- `DEFAULT_OUTPUT_DIR` - This is the default location of any output files written if the `-oU` and `-oR` arguments are not used. If the value of this key is blank, then it will default to the location of the `config.yml` file.
|
|
169
|
+
- `INTELX_API_KEY` - You can sign up to [intelx.io here](https://intelx.io/product). It requires a paid API key to do the `/phonebook/search` through their API (as of 2024-09-01, the Phonebook service has been restricted to paid users due to constant abuse by spam accounts).
|
|
167
170
|
|
|
168
|
-
**NOTE: The MIME types cannot be filtered for Alien Vault OTX
|
|
171
|
+
**NOTE: The MIME types cannot be filtered for Alien Vault OTX, Virus Total and Intelligence X because they don't have the ability to filter on MIME Type. Sometimes URLScan does not have a MIME Type defined for a URL. In these cases, URLs will be included regardless of filter or match. Bear this in mind and consider excluding certain providers if this is important.**
|
|
169
172
|
|
|
170
173
|
## Output
|
|
171
174
|
|
|
@@ -281,7 +284,7 @@ If you come across any problems at all, or have ideas for improvements, please f
|
|
|
281
284
|
|
|
282
285
|
## TODO
|
|
283
286
|
|
|
284
|
-
- Add an `-
|
|
287
|
+
- Add an `-oos` argument that accepts a file of Out Of Scope subdomains/URLs that will not be returned in the output, or have any responses downloaded
|
|
285
288
|
|
|
286
289
|
## References
|
|
287
290
|
|
|
@@ -290,6 +293,7 @@ If you come across any problems at all, or have ideas for improvements, please f
|
|
|
290
293
|
- [Alien Vault OTX API](https://otx.alienvault.com/assets/static/external_api.html)
|
|
291
294
|
- [URLScan API](https://urlscan.io/docs/api/)
|
|
292
295
|
- [VirusTotal API (v2)](https://docs.virustotal.com/v2.0/reference/getting-started)
|
|
296
|
+
- [Intelligence X SDK](https://github.com/IntelligenceX/SDK?tab=readme-ov-file#intelligence-x-public-sdk)
|
|
293
297
|
|
|
294
298
|
Good luck and good hunting!
|
|
295
299
|
If you really love the tool (or any others), or they helped you find an awesome bounty, consider [BUYING ME A COFFEE!](https://ko-fi.com/xnlh4ck3r) ☕ (I could use the caffeine!)
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
waymore/__init__.py,sha256=X4ON0rponPxoQ0b-Wv7zvwCPFlzC2oSmg_nHdJmpyis,17
|
|
2
|
+
waymore/waymore.py,sha256=sG4cpeFN0cOfO06AgetlTbs20fVUbpxQr4g_RuHAlaw,188638
|
|
3
|
+
waymore-5.1.dist-info/LICENSE,sha256=o_jq62xZ1YxI8tqzQKbNtqr3RW2i5sh0rk6ixCJEroU,1068
|
|
4
|
+
waymore-5.1.dist-info/METADATA,sha256=npHpoTL5ceG210zvaBtXHVSfWqE2Gh_ekR_beYLTRx0,50674
|
|
5
|
+
waymore-5.1.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
|
|
6
|
+
waymore-5.1.dist-info/entry_points.txt,sha256=YHy5EUf3r_7OTkt9jvylLjNeg7Z5yvIVm5RUAyfNcN4,49
|
|
7
|
+
waymore-5.1.dist-info/top_level.txt,sha256=RFTphkWaRu1N7lUWIPUjabgCPQ3ETmNllF7qze4JJ_s,8
|
|
8
|
+
waymore-5.1.dist-info/RECORD,,
|
waymore-4.9.dist-info/RECORD
DELETED
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
waymore/__init__.py,sha256=3oueXc07OVc1C6HkNlbSrHDdAoM-CsAgJQ4LAyBP4LA,17
|
|
2
|
-
waymore/waymore.py,sha256=BUWxqHcC_ZuIeRHqRc4lKZDluBptx6H-jWHsRoO7jq4,178141
|
|
3
|
-
waymore-4.9.dist-info/LICENSE,sha256=o_jq62xZ1YxI8tqzQKbNtqr3RW2i5sh0rk6ixCJEroU,1068
|
|
4
|
-
waymore-4.9.dist-info/METADATA,sha256=2_Ru3GCk7zMVuyqFotCBcw6nbRYMUCuWAZ-oX_YdQMY,49511
|
|
5
|
-
waymore-4.9.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
|
|
6
|
-
waymore-4.9.dist-info/entry_points.txt,sha256=YHy5EUf3r_7OTkt9jvylLjNeg7Z5yvIVm5RUAyfNcN4,49
|
|
7
|
-
waymore-4.9.dist-info/top_level.txt,sha256=RFTphkWaRu1N7lUWIPUjabgCPQ3ETmNllF7qze4JJ_s,8
|
|
8
|
-
waymore-4.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|