waymore 4.1__py3-none-any.whl → 4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- waymore/__init__.py +1 -1
- waymore/waymore.py +66 -46
- {waymore-4.1.dist-info → waymore-4.3.dist-info}/METADATA +2 -2
- waymore-4.3.dist-info/RECORD +8 -0
- waymore-4.1.dist-info/RECORD +0 -8
- {waymore-4.1.dist-info → waymore-4.3.dist-info}/LICENSE +0 -0
- {waymore-4.1.dist-info → waymore-4.3.dist-info}/WHEEL +0 -0
- {waymore-4.1.dist-info → waymore-4.3.dist-info}/entry_points.txt +0 -0
- {waymore-4.1.dist-info → waymore-4.3.dist-info}/top_level.txt +0 -0
waymore/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__="4.
|
|
1
|
+
__version__="4.3"
|
waymore/waymore.py
CHANGED
|
@@ -83,7 +83,7 @@ argsInputHostname = ''
|
|
|
83
83
|
responseOutputDirectory = ''
|
|
84
84
|
|
|
85
85
|
# Source Provider URLs
|
|
86
|
-
WAYBACK_URL = 'https://web.archive.org/cdx/search/cdx?url={DOMAIN}
|
|
86
|
+
WAYBACK_URL = 'https://web.archive.org/cdx/search/cdx?url={DOMAIN}{COLLAPSE}&fl=timestamp,original,mimetype,statuscode,digest'
|
|
87
87
|
CCRAWL_INDEX_URL = 'https://index.commoncrawl.org/collinfo.json'
|
|
88
88
|
ALIENVAULT_URL = 'https://otx.alienvault.com/api/v1/indicators/{TYPE}/{DOMAIN}/url_list?limit=500'
|
|
89
89
|
URLSCAN_URL = 'https://urlscan.io/api/v1/search/?q=domain:{DOMAIN}&size=10000'
|
|
@@ -1780,23 +1780,31 @@ def processWayBackPage(url):
|
|
|
1780
1780
|
return
|
|
1781
1781
|
|
|
1782
1782
|
# Get the URLs and MIME types. Each line is a separate JSON string
|
|
1783
|
-
|
|
1784
|
-
|
|
1785
|
-
|
|
1786
|
-
if verbose():
|
|
1787
|
-
try:
|
|
1788
|
-
linkMimes.add(str(results).split(' ')[2])
|
|
1789
|
-
except Exception as e:
|
|
1790
|
-
if verbose():
|
|
1791
|
-
writerr(colored(getSPACER('ERROR processWayBackPage 2: Cannot get MIME type from line: ' + str(line)),'red'))
|
|
1792
|
-
write(resp.text)
|
|
1793
|
-
try:
|
|
1783
|
+
try:
|
|
1784
|
+
for line in resp.iter_lines():
|
|
1785
|
+
results = line.decode("utf-8")
|
|
1794
1786
|
foundUrl = fixArchiveOrgUrl(str(results).split(' ')[1])
|
|
1795
|
-
|
|
1796
|
-
|
|
1797
|
-
|
|
1798
|
-
|
|
1799
|
-
|
|
1787
|
+
|
|
1788
|
+
# Check the URL exclusions
|
|
1789
|
+
match = re.search(r'('+re.escape(FILTER_URL).replace(',','|')+')', foundUrl, flags=re.IGNORECASE)
|
|
1790
|
+
if match is None:
|
|
1791
|
+
# Only get MIME Types if --verbose option was selected
|
|
1792
|
+
if verbose():
|
|
1793
|
+
try:
|
|
1794
|
+
linkMimes.add(str(results).split(' ')[2])
|
|
1795
|
+
except Exception as e:
|
|
1796
|
+
if verbose():
|
|
1797
|
+
writerr(colored(getSPACER('ERROR processWayBackPage 2: Cannot get MIME type from line: ' + str(line)),'red'))
|
|
1798
|
+
write(resp.text)
|
|
1799
|
+
try:
|
|
1800
|
+
linksFoundAdd(foundUrl)
|
|
1801
|
+
except Exception as e:
|
|
1802
|
+
if verbose():
|
|
1803
|
+
writerr(colored(getSPACER('ERROR processWayBackPage 3: Cannot get link from line: ' + str(line)),'red'))
|
|
1804
|
+
write(resp.text)
|
|
1805
|
+
except Exception as e:
|
|
1806
|
+
if verbose():
|
|
1807
|
+
writerr(colored(getSPACER('ERROR processWayBackPage 4: ' + str(line)),'red'))
|
|
1800
1808
|
else:
|
|
1801
1809
|
pass
|
|
1802
1810
|
except Exception as e:
|
|
@@ -1843,11 +1851,15 @@ def getWaybackUrls():
|
|
|
1843
1851
|
session.mount('https://', HTTP_ADAPTER)
|
|
1844
1852
|
session.mount('http://', HTTP_ADAPTER)
|
|
1845
1853
|
resp = session.get(url+'&showNumPages=True', headers={"User-Agent":userAgent})
|
|
1846
|
-
totalPages =
|
|
1847
|
-
|
|
1848
|
-
|
|
1849
|
-
|
|
1850
|
-
|
|
1854
|
+
# Try to get the total number of pages. If there is a problem, we'll return totalPages = 0 which means we'll get everything back in one request
|
|
1855
|
+
try:
|
|
1856
|
+
totalPages = int(resp.text.strip())
|
|
1857
|
+
|
|
1858
|
+
# If the argument to limit the requests was passed and the total pages is larger than that, set to the limit
|
|
1859
|
+
if args.limit_requests != 0 and totalPages > args.limit_requests:
|
|
1860
|
+
totalPages = args.limit_requests
|
|
1861
|
+
except:
|
|
1862
|
+
totalPages = -1
|
|
1851
1863
|
except Exception as e:
|
|
1852
1864
|
try:
|
|
1853
1865
|
# If the rate limit was reached end now
|
|
@@ -1872,31 +1884,39 @@ def getWaybackUrls():
|
|
|
1872
1884
|
else:
|
|
1873
1885
|
writerr(colored(getSPACER('[ ERR ] Unable to get links from Wayback Machine (archive.org): ' + str(e)), 'red'))
|
|
1874
1886
|
return
|
|
1875
|
-
|
|
1887
|
+
|
|
1876
1888
|
if args.check_only:
|
|
1877
|
-
|
|
1878
|
-
|
|
1889
|
+
if totalPages < 0:
|
|
1890
|
+
write(colored('Due to a change in Wayback Machine API, all URLs will be retrieved in one request and it is not possible to determine how long it will take, so please ignore this.','cyan'))
|
|
1891
|
+
else:
|
|
1892
|
+
checkWayback = totalPages
|
|
1893
|
+
write(colored('Get URLs from Wayback Machine: ','cyan')+colored(str(checkWayback)+' requests','white'))
|
|
1879
1894
|
else:
|
|
1880
1895
|
if verbose():
|
|
1881
1896
|
write(colored('The archive URL requested to get links: ','magenta')+colored(url+'\n','white'))
|
|
1882
1897
|
|
|
1883
|
-
|
|
1884
|
-
|
|
1898
|
+
if totalPages < 0:
|
|
1899
|
+
write(colored('\rGetting links from Wayback Machine (archive.org) with one request (this can take a while for some domains)...\r','cyan'))
|
|
1885
1900
|
|
|
1886
|
-
|
|
1887
|
-
pages = []
|
|
1888
|
-
if totalPages == 1:
|
|
1889
|
-
pages.append(url)
|
|
1901
|
+
processWayBackPage(url)
|
|
1890
1902
|
else:
|
|
1891
|
-
|
|
1892
|
-
|
|
1893
|
-
|
|
1894
|
-
|
|
1895
|
-
|
|
1896
|
-
|
|
1897
|
-
|
|
1898
|
-
|
|
1899
|
-
|
|
1903
|
+
# if the page number was found then display it, but otherwise we will just try to increment until we have everything
|
|
1904
|
+
write(colored('\rGetting links from ' + str(totalPages) + ' Wayback Machine (archive.org) API requests (this can take a while for some domains)...\r','cyan'))
|
|
1905
|
+
|
|
1906
|
+
# Get a list of all the page URLs we need to visit
|
|
1907
|
+
pages = []
|
|
1908
|
+
if totalPages == 1:
|
|
1909
|
+
pages.append(url)
|
|
1910
|
+
else:
|
|
1911
|
+
for page in range(0, totalPages):
|
|
1912
|
+
pages.append(url+str(page))
|
|
1913
|
+
|
|
1914
|
+
# Process the URLs from web archive
|
|
1915
|
+
if stopProgram is None:
|
|
1916
|
+
p = mp.Pool(args.processes)
|
|
1917
|
+
p.map(processWayBackPage, pages)
|
|
1918
|
+
p.close()
|
|
1919
|
+
p.join()
|
|
1900
1920
|
|
|
1901
1921
|
# Show the MIME types found (in case user wants to exclude more)
|
|
1902
1922
|
if verbose() and len(linkMimes) > 0 :
|
|
@@ -2422,12 +2442,12 @@ def processResponses():
|
|
|
2422
2442
|
# This is useful for filtering out captures that are 'too dense' or when looking for unique captures."
|
|
2423
2443
|
if args.capture_interval == 'none': # get all
|
|
2424
2444
|
collapse = ''
|
|
2425
|
-
elif args.capture_interval == 'h': # get at most 1 capture per hour
|
|
2426
|
-
collapse = 'timestamp:10'
|
|
2427
|
-
elif args.capture_interval == 'd': # get at most 1 capture per day
|
|
2428
|
-
collapse = 'timestamp:8'
|
|
2429
|
-
elif args.capture_interval == 'm': # get at most 1 capture per month
|
|
2430
|
-
collapse = 'timestamp:6'
|
|
2445
|
+
elif args.capture_interval == 'h': # get at most 1 capture per URL per hour
|
|
2446
|
+
collapse = '&collapse=timestamp:10'
|
|
2447
|
+
elif args.capture_interval == 'd': # get at most 1 capture per URL per day
|
|
2448
|
+
collapse = '&collapse=timestamp:8'
|
|
2449
|
+
elif args.capture_interval == 'm': # get at most 1 capture per URL per month
|
|
2450
|
+
collapse = '&collapse=timestamp:6'
|
|
2431
2451
|
|
|
2432
2452
|
url = WAYBACK_URL.replace('{DOMAIN}',subs + quote(argsInput) + path).replace('{COLLAPSE}',collapse) + filterMIME + filterCode + filterLimit + filterFrom + filterTo + filterKeywords
|
|
2433
2453
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: waymore
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.3
|
|
4
4
|
Summary: Find way more from the Wayback Machine, Common Crawl, Alien Vault OTX, URLScan & VirusTotal!
|
|
5
5
|
Home-page: https://github.com/xnl-h4ck3r/waymore
|
|
6
6
|
Author: @xnl-h4ck3r
|
|
@@ -16,7 +16,7 @@ Requires-Dist: tldextract
|
|
|
16
16
|
|
|
17
17
|
<center><img src="https://github.com/xnl-h4ck3r/waymore/blob/main/waymore/images/title.png"></center>
|
|
18
18
|
|
|
19
|
-
## About - v4.
|
|
19
|
+
## About - v4.3
|
|
20
20
|
|
|
21
21
|
The idea behind **waymore** is to find even more links from the Wayback Machine than other existing tools.
|
|
22
22
|
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
waymore/__init__.py,sha256=-TFFyw9iukscHpq2I58oEz2oLQ995GbajzvC6Iz9ddM,17
|
|
2
|
+
waymore/waymore.py,sha256=SWTqBUa-btDe6cWjRcL3w-ef1uK45LpfztCgvgtQPSM,168145
|
|
3
|
+
waymore-4.3.dist-info/LICENSE,sha256=o_jq62xZ1YxI8tqzQKbNtqr3RW2i5sh0rk6ixCJEroU,1068
|
|
4
|
+
waymore-4.3.dist-info/METADATA,sha256=q7_uq3p1kLMMARqUWTnA33rhxUluolsyAykMv7Ot598,47245
|
|
5
|
+
waymore-4.3.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
6
|
+
waymore-4.3.dist-info/entry_points.txt,sha256=YHy5EUf3r_7OTkt9jvylLjNeg7Z5yvIVm5RUAyfNcN4,49
|
|
7
|
+
waymore-4.3.dist-info/top_level.txt,sha256=RFTphkWaRu1N7lUWIPUjabgCPQ3ETmNllF7qze4JJ_s,8
|
|
8
|
+
waymore-4.3.dist-info/RECORD,,
|
waymore-4.1.dist-info/RECORD
DELETED
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
waymore/__init__.py,sha256=W33igiZPTC8XKkjVNoDhbalnEpvc0URlY2v823612DE,17
|
|
2
|
-
waymore/waymore.py,sha256=0BC0QZbyQap8f8xMp9_IXYG6yC_ZC9Hgi6L7_FER4Tg,166754
|
|
3
|
-
waymore-4.1.dist-info/LICENSE,sha256=o_jq62xZ1YxI8tqzQKbNtqr3RW2i5sh0rk6ixCJEroU,1068
|
|
4
|
-
waymore-4.1.dist-info/METADATA,sha256=ePRZd4A8GtQQHFlsRDCcPPW6GvrRe1XcgHDoYVav-qI,47245
|
|
5
|
-
waymore-4.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
6
|
-
waymore-4.1.dist-info/entry_points.txt,sha256=YHy5EUf3r_7OTkt9jvylLjNeg7Z5yvIVm5RUAyfNcN4,49
|
|
7
|
-
waymore-4.1.dist-info/top_level.txt,sha256=RFTphkWaRu1N7lUWIPUjabgCPQ3ETmNllF7qze4JJ_s,8
|
|
8
|
-
waymore-4.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|