waymore 4.3__py3-none-any.whl → 4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- waymore/__init__.py +1 -1
- waymore/waymore.py +112 -48
- {waymore-4.3.dist-info → waymore-4.5.dist-info}/METADATA +2 -3
- waymore-4.5.dist-info/RECORD +8 -0
- {waymore-4.3.dist-info → waymore-4.5.dist-info}/WHEEL +1 -1
- waymore-4.3.dist-info/RECORD +0 -8
- {waymore-4.3.dist-info → waymore-4.5.dist-info}/LICENSE +0 -0
- {waymore-4.3.dist-info → waymore-4.5.dist-info}/entry_points.txt +0 -0
- {waymore-4.3.dist-info → waymore-4.5.dist-info}/top_level.txt +0 -0
waymore/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__="4.
|
|
1
|
+
__version__="4.5"
|
waymore/waymore.py
CHANGED
|
@@ -706,23 +706,55 @@ def fixArchiveOrgUrl(url):
|
|
|
706
706
|
url = url[0:newline]
|
|
707
707
|
return url
|
|
708
708
|
|
|
709
|
+
# Add a link to the linksFound collection for archived responses (included timestamp preifx)
|
|
710
|
+
def linksFoundResponseAdd(link):
|
|
711
|
+
global linksFound, argsInput, argsInputHostname
|
|
712
|
+
|
|
713
|
+
try:
|
|
714
|
+
if inputIsDomainANDPath:
|
|
715
|
+
checkInput = argsInput
|
|
716
|
+
else:
|
|
717
|
+
checkInput = argsInputHostname
|
|
718
|
+
|
|
719
|
+
# Remove the timestamp
|
|
720
|
+
linkWithoutTimestamp = link.split('/', 1)[-1]
|
|
721
|
+
|
|
722
|
+
# If the link specifies port 80 or 443, e.g. http://example.com:80, then remove the port
|
|
723
|
+
parsed = urlparse(linkWithoutTimestamp.strip())
|
|
724
|
+
if parsed.port in (80, 443):
|
|
725
|
+
new_netloc = parsed.hostname
|
|
726
|
+
parsed_url = parsed._replace(netloc=new_netloc).geturl()
|
|
727
|
+
else:
|
|
728
|
+
parsed_url = linkWithoutTimestamp
|
|
729
|
+
|
|
730
|
+
# Don't write it if the link does not contain the requested domain (this can sometimes happen)
|
|
731
|
+
if parsed_url.find(checkInput) >= 0:
|
|
732
|
+
linksFound.add(link)
|
|
733
|
+
except Exception as e:
|
|
734
|
+
linksFound.add(link)
|
|
735
|
+
|
|
709
736
|
# Add a link to the linksFound collection
|
|
710
737
|
def linksFoundAdd(link):
|
|
711
738
|
global linksFound, argsInput, argsInputHostname
|
|
712
|
-
|
|
739
|
+
|
|
713
740
|
try:
|
|
714
741
|
if inputIsDomainANDPath:
|
|
715
742
|
checkInput = argsInput
|
|
716
743
|
else:
|
|
717
744
|
checkInput = argsInputHostname
|
|
745
|
+
|
|
746
|
+
# If the link specifies port 80 or 443, e.g. http://example.com:80, then remove the port
|
|
747
|
+
parsed = urlparse(link.strip())
|
|
748
|
+
if parsed.port in (80, 443):
|
|
749
|
+
new_netloc = parsed.hostname
|
|
750
|
+
parsed_url = parsed._replace(netloc=new_netloc).geturl()
|
|
751
|
+
else:
|
|
752
|
+
parsed_url = link
|
|
753
|
+
|
|
718
754
|
# Don't write it if the link does not contain the requested domain (this can sometimes happen)
|
|
719
|
-
if
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
newNetloc = parsed.netloc.split(':')[0]
|
|
723
|
-
parsed = parsed._replace(netloc=newNetloc).geturl()
|
|
724
|
-
linksFound.add(parsed)
|
|
725
|
-
except:
|
|
755
|
+
if parsed_url.find(checkInput) >= 0:
|
|
756
|
+
linksFound.add(link)
|
|
757
|
+
except Exception as e:
|
|
726
758
|
linksFound.add(link)
|
|
727
759
|
|
|
728
760
|
def processArchiveUrl(url):
|
|
@@ -1352,11 +1384,15 @@ def getAlienVaultUrls():
|
|
|
1352
1384
|
# Carry on if something was found
|
|
1353
1385
|
if resp.text.lower().find('"error": "') < 0:
|
|
1354
1386
|
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1387
|
+
try:
|
|
1388
|
+
# Get the JSON response
|
|
1389
|
+
jsonResp = json.loads(resp.text.strip())
|
|
1390
|
+
|
|
1391
|
+
# Try to get the number of results
|
|
1392
|
+
totalUrls = int(jsonResp['full_size'])
|
|
1393
|
+
except:
|
|
1394
|
+
writerr(colored(getSPACER('[ ERR ] There was an unexpected response from the Alien Vault API'),'red'))
|
|
1395
|
+
totalUrls = 0
|
|
1360
1396
|
|
|
1361
1397
|
# If there are results, carry on
|
|
1362
1398
|
if totalUrls > 0 or args.check_only:
|
|
@@ -1556,19 +1592,28 @@ def getURLScanUrls():
|
|
|
1556
1592
|
writerr(colored(getSPACER('[ ' + str(resp.status_code) + ' ] Unable to get links from urlscan.io'),'red'))
|
|
1557
1593
|
return
|
|
1558
1594
|
|
|
1559
|
-
|
|
1560
|
-
|
|
1595
|
+
try:
|
|
1596
|
+
# Get the JSON response
|
|
1597
|
+
jsonResp = json.loads(resp.text.strip())
|
|
1561
1598
|
|
|
1562
|
-
|
|
1563
|
-
|
|
1599
|
+
# Get the number of results
|
|
1600
|
+
totalUrls = int(jsonResp['total'])
|
|
1601
|
+
except:
|
|
1602
|
+
writerr(colored(getSPACER('[ ERR ] There was an unexpected response from the URLScan API'),'red'))
|
|
1603
|
+
totalUrls = 0
|
|
1564
1604
|
|
|
1605
|
+
# Carry on if something was found
|
|
1565
1606
|
if args.check_only:
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
|
|
1569
|
-
|
|
1570
|
-
|
|
1607
|
+
try:
|
|
1608
|
+
hasMore = jsonResp['has_more']
|
|
1609
|
+
if hasMore:
|
|
1610
|
+
write(colored('Get URLs from URLScan: ','cyan')+colored('UNKNOWN requests','white'))
|
|
1611
|
+
else:
|
|
1612
|
+
write(colored('Get URLs from URLScan: ','cyan')+colored('1 request','white'))
|
|
1613
|
+
except:
|
|
1614
|
+
pass
|
|
1571
1615
|
checkURLScan = 1
|
|
1616
|
+
|
|
1572
1617
|
else:
|
|
1573
1618
|
# Carry on if something was found
|
|
1574
1619
|
if int(totalUrls) > 0:
|
|
@@ -1714,6 +1759,7 @@ def processWayBackPage(url):
|
|
|
1714
1759
|
if not stopSource:
|
|
1715
1760
|
try:
|
|
1716
1761
|
# Choose a random user agent string to use for any requests
|
|
1762
|
+
resp = None
|
|
1717
1763
|
userAgent = random.choice(USER_AGENT)
|
|
1718
1764
|
page = url.split('page=')[1]
|
|
1719
1765
|
session = requests.Session()
|
|
@@ -1785,8 +1831,11 @@ def processWayBackPage(url):
|
|
|
1785
1831
|
results = line.decode("utf-8")
|
|
1786
1832
|
foundUrl = fixArchiveOrgUrl(str(results).split(' ')[1])
|
|
1787
1833
|
|
|
1788
|
-
#
|
|
1789
|
-
|
|
1834
|
+
# If --filter-responses-only wasn't used, then check the URL exclusions
|
|
1835
|
+
if args.filter_responses_only:
|
|
1836
|
+
match = None
|
|
1837
|
+
else:
|
|
1838
|
+
match = re.search(r'('+re.escape(FILTER_URL).replace(',','|')+')', foundUrl, flags=re.IGNORECASE)
|
|
1790
1839
|
if match is None:
|
|
1791
1840
|
# Only get MIME Types if --verbose option was selected
|
|
1792
1841
|
if verbose():
|
|
@@ -2300,29 +2349,33 @@ def getVirusTotalUrls():
|
|
|
2300
2349
|
return
|
|
2301
2350
|
|
|
2302
2351
|
# Get the JSON response
|
|
2303
|
-
|
|
2352
|
+
try:
|
|
2353
|
+
jsonResp = json.loads(resp.text.strip())
|
|
2304
2354
|
|
|
2305
|
-
|
|
2306
|
-
|
|
2307
|
-
|
|
2308
|
-
|
|
2355
|
+
# Get the different URLs
|
|
2356
|
+
if args.no_subs:
|
|
2357
|
+
subDomains = []
|
|
2358
|
+
else:
|
|
2359
|
+
try:
|
|
2360
|
+
subDomains = jsonResp['subdomains']
|
|
2361
|
+
except Exception as e:
|
|
2362
|
+
subDomains = []
|
|
2363
|
+
try:
|
|
2364
|
+
detectedUrls = [entry['url'] for entry in jsonResp.get('detected_urls', [])]
|
|
2365
|
+
except Exception as e:
|
|
2366
|
+
detectedUrls = []
|
|
2309
2367
|
try:
|
|
2310
|
-
|
|
2368
|
+
undetectedUrls = [entry[0] for entry in jsonResp.get('undetected_urls', [])]
|
|
2311
2369
|
except Exception as e:
|
|
2312
|
-
|
|
2313
|
-
|
|
2314
|
-
|
|
2315
|
-
|
|
2316
|
-
|
|
2317
|
-
|
|
2318
|
-
|
|
2319
|
-
except Exception as e:
|
|
2320
|
-
undetectedUrls = []
|
|
2321
|
-
try:
|
|
2322
|
-
totalUrls = set(subDomains + detectedUrls + undetectedUrls)
|
|
2323
|
-
except Exception as e:
|
|
2370
|
+
undetectedUrls = []
|
|
2371
|
+
try:
|
|
2372
|
+
totalUrls = set(subDomains + detectedUrls + undetectedUrls)
|
|
2373
|
+
except Exception as e:
|
|
2374
|
+
totalUrls = []
|
|
2375
|
+
except:
|
|
2376
|
+
writerr(colored(getSPACER('[ ERR ] There was an unexpected response from the VirusTotal API'),'red'))
|
|
2324
2377
|
totalUrls = []
|
|
2325
|
-
|
|
2378
|
+
|
|
2326
2379
|
if args.check_only:
|
|
2327
2380
|
write(colored('Get URLs from VirusTotal: ','cyan')+colored('1 request','white'))
|
|
2328
2381
|
checkVirusTotal = 1
|
|
@@ -2347,7 +2400,7 @@ def getVirusTotalUrls():
|
|
|
2347
2400
|
|
|
2348
2401
|
except Exception as e:
|
|
2349
2402
|
writerr(colored('ERROR getVirusTotalUrls 1: ' + str(e), 'red'))
|
|
2350
|
-
|
|
2403
|
+
|
|
2351
2404
|
def processResponses():
|
|
2352
2405
|
"""
|
|
2353
2406
|
Get archived responses from Wayback Machine (archive.org)
|
|
@@ -2513,13 +2566,14 @@ def processResponses():
|
|
|
2513
2566
|
except:
|
|
2514
2567
|
pass
|
|
2515
2568
|
|
|
2516
|
-
# Go through the response to save the links found
|
|
2569
|
+
# Go through the response to save the links found
|
|
2517
2570
|
for line in resp.iter_lines():
|
|
2518
2571
|
try:
|
|
2519
2572
|
results = line.decode("utf-8")
|
|
2520
|
-
|
|
2521
|
-
|
|
2522
|
-
|
|
2573
|
+
parts = results.split(' ', 2)
|
|
2574
|
+
timestamp = parts[0]
|
|
2575
|
+
originalUrl = parts[1]
|
|
2576
|
+
linksFoundResponseAdd(timestamp+'/'+originalUrl)
|
|
2523
2577
|
except Exception as e:
|
|
2524
2578
|
writerr(colored(getSPACER('ERROR processResponses 3: Cannot to get link from line: '+str(line)), 'red'))
|
|
2525
2579
|
|
|
@@ -2540,6 +2594,16 @@ def processResponses():
|
|
|
2540
2594
|
|
|
2541
2595
|
# Get the total number of responses we will try to get and set the current file count to the success count
|
|
2542
2596
|
totalResponses = len(linkRequests)
|
|
2597
|
+
|
|
2598
|
+
# If there are no reponses to download, diaplay an error and exit
|
|
2599
|
+
if totalResponses == 0:
|
|
2600
|
+
try:
|
|
2601
|
+
if originalUrl:
|
|
2602
|
+
writerr(colored(getSPACER('Failed to get links from Wayback Machine (archive.org) - there were results (e.g. "'+originalUrl+'") but they didn\'t match the input you gave. Check input and try again.'), 'red'))
|
|
2603
|
+
except:
|
|
2604
|
+
writerr(colored(getSPACER('Failed to get links from Wayback Machine (archive.org) - check input and try again.'), 'red'))
|
|
2605
|
+
return
|
|
2606
|
+
|
|
2543
2607
|
fileCount = successCount
|
|
2544
2608
|
|
|
2545
2609
|
if args.check_only:
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: waymore
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.5
|
|
4
4
|
Summary: Find way more from the Wayback Machine, Common Crawl, Alien Vault OTX, URLScan & VirusTotal!
|
|
5
5
|
Home-page: https://github.com/xnl-h4ck3r/waymore
|
|
6
6
|
Author: @xnl-h4ck3r
|
|
7
7
|
Description-Content-Type: text/markdown
|
|
8
8
|
License-File: LICENSE
|
|
9
|
-
Requires-Dist: argparse
|
|
10
9
|
Requires-Dist: requests
|
|
11
10
|
Requires-Dist: pyyaml
|
|
12
11
|
Requires-Dist: termcolor
|
|
@@ -16,7 +15,7 @@ Requires-Dist: tldextract
|
|
|
16
15
|
|
|
17
16
|
<center><img src="https://github.com/xnl-h4ck3r/waymore/blob/main/waymore/images/title.png"></center>
|
|
18
17
|
|
|
19
|
-
## About - v4.
|
|
18
|
+
## About - v4.5
|
|
20
19
|
|
|
21
20
|
The idea behind **waymore** is to find even more links from the Wayback Machine than other existing tools.
|
|
22
21
|
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
waymore/__init__.py,sha256=HpBSj4W3_snlRrPgOuCuVP107OOZenaFQECvPnsC9V4,17
|
|
2
|
+
waymore/waymore.py,sha256=kfGA3T_cDADuhZ_78Ta22fxnqlGVUm56yvIncEfnZDs,170779
|
|
3
|
+
waymore-4.5.dist-info/LICENSE,sha256=o_jq62xZ1YxI8tqzQKbNtqr3RW2i5sh0rk6ixCJEroU,1068
|
|
4
|
+
waymore-4.5.dist-info/METADATA,sha256=v57_NUSUTSGqA1fZQb9UQgFUoOIDcc9AQeDZbRyL7kk,47221
|
|
5
|
+
waymore-4.5.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
|
|
6
|
+
waymore-4.5.dist-info/entry_points.txt,sha256=YHy5EUf3r_7OTkt9jvylLjNeg7Z5yvIVm5RUAyfNcN4,49
|
|
7
|
+
waymore-4.5.dist-info/top_level.txt,sha256=RFTphkWaRu1N7lUWIPUjabgCPQ3ETmNllF7qze4JJ_s,8
|
|
8
|
+
waymore-4.5.dist-info/RECORD,,
|
waymore-4.3.dist-info/RECORD
DELETED
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
waymore/__init__.py,sha256=-TFFyw9iukscHpq2I58oEz2oLQ995GbajzvC6Iz9ddM,17
|
|
2
|
-
waymore/waymore.py,sha256=SWTqBUa-btDe6cWjRcL3w-ef1uK45LpfztCgvgtQPSM,168145
|
|
3
|
-
waymore-4.3.dist-info/LICENSE,sha256=o_jq62xZ1YxI8tqzQKbNtqr3RW2i5sh0rk6ixCJEroU,1068
|
|
4
|
-
waymore-4.3.dist-info/METADATA,sha256=q7_uq3p1kLMMARqUWTnA33rhxUluolsyAykMv7Ot598,47245
|
|
5
|
-
waymore-4.3.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
6
|
-
waymore-4.3.dist-info/entry_points.txt,sha256=YHy5EUf3r_7OTkt9jvylLjNeg7Z5yvIVm5RUAyfNcN4,49
|
|
7
|
-
waymore-4.3.dist-info/top_level.txt,sha256=RFTphkWaRu1N7lUWIPUjabgCPQ3ETmNllF7qze4JJ_s,8
|
|
8
|
-
waymore-4.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|