waymore 4.2__py3-none-any.whl → 4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- waymore/__init__.py +1 -1
- waymore/waymore.py +39 -27
- {waymore-4.2.dist-info → waymore-4.3.dist-info}/METADATA +2 -2
- waymore-4.3.dist-info/RECORD +8 -0
- waymore-4.2.dist-info/RECORD +0 -8
- {waymore-4.2.dist-info → waymore-4.3.dist-info}/LICENSE +0 -0
- {waymore-4.2.dist-info → waymore-4.3.dist-info}/WHEEL +0 -0
- {waymore-4.2.dist-info → waymore-4.3.dist-info}/entry_points.txt +0 -0
- {waymore-4.2.dist-info → waymore-4.3.dist-info}/top_level.txt +0 -0
waymore/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__="4.
|
|
1
|
+
__version__="4.3"
|
waymore/waymore.py
CHANGED
|
@@ -83,7 +83,7 @@ argsInputHostname = ''
|
|
|
83
83
|
responseOutputDirectory = ''
|
|
84
84
|
|
|
85
85
|
# Source Provider URLs
|
|
86
|
-
WAYBACK_URL = 'https://web.archive.org/cdx/search/cdx?url={DOMAIN}
|
|
86
|
+
WAYBACK_URL = 'https://web.archive.org/cdx/search/cdx?url={DOMAIN}{COLLAPSE}&fl=timestamp,original,mimetype,statuscode,digest'
|
|
87
87
|
CCRAWL_INDEX_URL = 'https://index.commoncrawl.org/collinfo.json'
|
|
88
88
|
ALIENVAULT_URL = 'https://otx.alienvault.com/api/v1/indicators/{TYPE}/{DOMAIN}/url_list?limit=500'
|
|
89
89
|
URLSCAN_URL = 'https://urlscan.io/api/v1/search/?q=domain:{DOMAIN}&size=10000'
|
|
@@ -1851,11 +1851,15 @@ def getWaybackUrls():
|
|
|
1851
1851
|
session.mount('https://', HTTP_ADAPTER)
|
|
1852
1852
|
session.mount('http://', HTTP_ADAPTER)
|
|
1853
1853
|
resp = session.get(url+'&showNumPages=True', headers={"User-Agent":userAgent})
|
|
1854
|
-
totalPages =
|
|
1855
|
-
|
|
1856
|
-
|
|
1857
|
-
|
|
1858
|
-
|
|
1854
|
+
# Try to get the total number of pages. If there is a problem, we'll return totalPages = 0 which means we'll get everything back in one request
|
|
1855
|
+
try:
|
|
1856
|
+
totalPages = int(resp.text.strip())
|
|
1857
|
+
|
|
1858
|
+
# If the argument to limit the requests was passed and the total pages is larger than that, set to the limit
|
|
1859
|
+
if args.limit_requests != 0 and totalPages > args.limit_requests:
|
|
1860
|
+
totalPages = args.limit_requests
|
|
1861
|
+
except:
|
|
1862
|
+
totalPages = -1
|
|
1859
1863
|
except Exception as e:
|
|
1860
1864
|
try:
|
|
1861
1865
|
# If the rate limit was reached end now
|
|
@@ -1880,31 +1884,39 @@ def getWaybackUrls():
|
|
|
1880
1884
|
else:
|
|
1881
1885
|
writerr(colored(getSPACER('[ ERR ] Unable to get links from Wayback Machine (archive.org): ' + str(e)), 'red'))
|
|
1882
1886
|
return
|
|
1883
|
-
|
|
1887
|
+
|
|
1884
1888
|
if args.check_only:
|
|
1885
|
-
|
|
1886
|
-
|
|
1889
|
+
if totalPages < 0:
|
|
1890
|
+
write(colored('Due to a change in Wayback Machine API, all URLs will be retrieved in one request and it is not possible to determine how long it will take, so please ignore this.','cyan'))
|
|
1891
|
+
else:
|
|
1892
|
+
checkWayback = totalPages
|
|
1893
|
+
write(colored('Get URLs from Wayback Machine: ','cyan')+colored(str(checkWayback)+' requests','white'))
|
|
1887
1894
|
else:
|
|
1888
1895
|
if verbose():
|
|
1889
1896
|
write(colored('The archive URL requested to get links: ','magenta')+colored(url+'\n','white'))
|
|
1890
1897
|
|
|
1891
|
-
|
|
1892
|
-
|
|
1898
|
+
if totalPages < 0:
|
|
1899
|
+
write(colored('\rGetting links from Wayback Machine (archive.org) with one request (this can take a while for some domains)...\r','cyan'))
|
|
1893
1900
|
|
|
1894
|
-
|
|
1895
|
-
pages = []
|
|
1896
|
-
if totalPages == 1:
|
|
1897
|
-
pages.append(url)
|
|
1901
|
+
processWayBackPage(url)
|
|
1898
1902
|
else:
|
|
1899
|
-
|
|
1900
|
-
|
|
1901
|
-
|
|
1902
|
-
|
|
1903
|
-
|
|
1904
|
-
|
|
1905
|
-
|
|
1906
|
-
|
|
1907
|
-
|
|
1903
|
+
# if the page number was found then display it, but otherwise we will just try to increment until we have everything
|
|
1904
|
+
write(colored('\rGetting links from ' + str(totalPages) + ' Wayback Machine (archive.org) API requests (this can take a while for some domains)...\r','cyan'))
|
|
1905
|
+
|
|
1906
|
+
# Get a list of all the page URLs we need to visit
|
|
1907
|
+
pages = []
|
|
1908
|
+
if totalPages == 1:
|
|
1909
|
+
pages.append(url)
|
|
1910
|
+
else:
|
|
1911
|
+
for page in range(0, totalPages):
|
|
1912
|
+
pages.append(url+str(page))
|
|
1913
|
+
|
|
1914
|
+
# Process the URLs from web archive
|
|
1915
|
+
if stopProgram is None:
|
|
1916
|
+
p = mp.Pool(args.processes)
|
|
1917
|
+
p.map(processWayBackPage, pages)
|
|
1918
|
+
p.close()
|
|
1919
|
+
p.join()
|
|
1908
1920
|
|
|
1909
1921
|
# Show the MIME types found (in case user wants to exclude more)
|
|
1910
1922
|
if verbose() and len(linkMimes) > 0 :
|
|
@@ -2431,11 +2443,11 @@ def processResponses():
|
|
|
2431
2443
|
if args.capture_interval == 'none': # get all
|
|
2432
2444
|
collapse = ''
|
|
2433
2445
|
elif args.capture_interval == 'h': # get at most 1 capture per URL per hour
|
|
2434
|
-
collapse = 'timestamp:10
|
|
2446
|
+
collapse = '&collapse=timestamp:10'
|
|
2435
2447
|
elif args.capture_interval == 'd': # get at most 1 capture per URL per day
|
|
2436
|
-
collapse = 'timestamp:8
|
|
2448
|
+
collapse = '&collapse=timestamp:8'
|
|
2437
2449
|
elif args.capture_interval == 'm': # get at most 1 capture per URL per month
|
|
2438
|
-
collapse = 'timestamp:6
|
|
2450
|
+
collapse = '&collapse=timestamp:6'
|
|
2439
2451
|
|
|
2440
2452
|
url = WAYBACK_URL.replace('{DOMAIN}',subs + quote(argsInput) + path).replace('{COLLAPSE}',collapse) + filterMIME + filterCode + filterLimit + filterFrom + filterTo + filterKeywords
|
|
2441
2453
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: waymore
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.3
|
|
4
4
|
Summary: Find way more from the Wayback Machine, Common Crawl, Alien Vault OTX, URLScan & VirusTotal!
|
|
5
5
|
Home-page: https://github.com/xnl-h4ck3r/waymore
|
|
6
6
|
Author: @xnl-h4ck3r
|
|
@@ -16,7 +16,7 @@ Requires-Dist: tldextract
|
|
|
16
16
|
|
|
17
17
|
<center><img src="https://github.com/xnl-h4ck3r/waymore/blob/main/waymore/images/title.png"></center>
|
|
18
18
|
|
|
19
|
-
## About - v4.
|
|
19
|
+
## About - v4.3
|
|
20
20
|
|
|
21
21
|
The idea behind **waymore** is to find even more links from the Wayback Machine than other existing tools.
|
|
22
22
|
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
waymore/__init__.py,sha256=-TFFyw9iukscHpq2I58oEz2oLQ995GbajzvC6Iz9ddM,17
|
|
2
|
+
waymore/waymore.py,sha256=SWTqBUa-btDe6cWjRcL3w-ef1uK45LpfztCgvgtQPSM,168145
|
|
3
|
+
waymore-4.3.dist-info/LICENSE,sha256=o_jq62xZ1YxI8tqzQKbNtqr3RW2i5sh0rk6ixCJEroU,1068
|
|
4
|
+
waymore-4.3.dist-info/METADATA,sha256=q7_uq3p1kLMMARqUWTnA33rhxUluolsyAykMv7Ot598,47245
|
|
5
|
+
waymore-4.3.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
6
|
+
waymore-4.3.dist-info/entry_points.txt,sha256=YHy5EUf3r_7OTkt9jvylLjNeg7Z5yvIVm5RUAyfNcN4,49
|
|
7
|
+
waymore-4.3.dist-info/top_level.txt,sha256=RFTphkWaRu1N7lUWIPUjabgCPQ3ETmNllF7qze4JJ_s,8
|
|
8
|
+
waymore-4.3.dist-info/RECORD,,
|
waymore-4.2.dist-info/RECORD
DELETED
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
waymore/__init__.py,sha256=KS43n250T2U7gE8jspc0cFVyTllND-0M0RkTA_yyc88,17
|
|
2
|
-
waymore/waymore.py,sha256=8HdTaKE5-SzIIWWWF5kskaBVzl0FRG0DpBChR11JWjs,167332
|
|
3
|
-
waymore-4.2.dist-info/LICENSE,sha256=o_jq62xZ1YxI8tqzQKbNtqr3RW2i5sh0rk6ixCJEroU,1068
|
|
4
|
-
waymore-4.2.dist-info/METADATA,sha256=uSCOfxcJDVsAO9z_in0-n6l8BOfD9jPNHiAsDuhuhz0,47245
|
|
5
|
-
waymore-4.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
6
|
-
waymore-4.2.dist-info/entry_points.txt,sha256=YHy5EUf3r_7OTkt9jvylLjNeg7Z5yvIVm5RUAyfNcN4,49
|
|
7
|
-
waymore-4.2.dist-info/top_level.txt,sha256=RFTphkWaRu1N7lUWIPUjabgCPQ3ETmNllF7qze4JJ_s,8
|
|
8
|
-
waymore-4.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|