waymore 4.3__tar.gz → 4.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {waymore-4.3/waymore.egg-info → waymore-4.4}/PKG-INFO +2 -3
- {waymore-4.3 → waymore-4.4}/README.md +1 -1
- {waymore-4.3 → waymore-4.4}/setup.py +1 -1
- waymore-4.4/waymore/__init__.py +1 -0
- {waymore-4.3 → waymore-4.4}/waymore/waymore.py +56 -13
- {waymore-4.3 → waymore-4.4/waymore.egg-info}/PKG-INFO +2 -3
- {waymore-4.3 → waymore-4.4}/waymore.egg-info/requires.txt +0 -1
- waymore-4.3/waymore/__init__.py +0 -1
- {waymore-4.3 → waymore-4.4}/LICENSE +0 -0
- {waymore-4.3 → waymore-4.4}/setup.cfg +0 -0
- {waymore-4.3 → waymore-4.4}/waymore.egg-info/SOURCES.txt +0 -0
- {waymore-4.3 → waymore-4.4}/waymore.egg-info/dependency_links.txt +0 -0
- {waymore-4.3 → waymore-4.4}/waymore.egg-info/entry_points.txt +0 -0
- {waymore-4.3 → waymore-4.4}/waymore.egg-info/top_level.txt +0 -0
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: waymore
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.4
|
|
4
4
|
Summary: Find way more from the Wayback Machine, Common Crawl, Alien Vault OTX, URLScan & VirusTotal!
|
|
5
5
|
Home-page: https://github.com/xnl-h4ck3r/waymore
|
|
6
6
|
Author: @xnl-h4ck3r
|
|
7
7
|
Description-Content-Type: text/markdown
|
|
8
8
|
License-File: LICENSE
|
|
9
|
-
Requires-Dist: argparse
|
|
10
9
|
Requires-Dist: requests
|
|
11
10
|
Requires-Dist: pyyaml
|
|
12
11
|
Requires-Dist: termcolor
|
|
@@ -16,7 +15,7 @@ Requires-Dist: tldextract
|
|
|
16
15
|
|
|
17
16
|
<center><img src="https://github.com/xnl-h4ck3r/waymore/blob/main/waymore/images/title.png"></center>
|
|
18
17
|
|
|
19
|
-
## About - v4.
|
|
18
|
+
## About - v4.4
|
|
20
19
|
|
|
21
20
|
The idea behind **waymore** is to find even more links from the Wayback Machine than other existing tools.
|
|
22
21
|
|
|
@@ -34,7 +34,7 @@ setup(
|
|
|
34
34
|
author="@xnl-h4ck3r",
|
|
35
35
|
url="https://github.com/xnl-h4ck3r/waymore",
|
|
36
36
|
py_modules=["waymore"],
|
|
37
|
-
install_requires=["
|
|
37
|
+
install_requires=["requests","pyyaml","termcolor","psutil","urlparse3","tldextract"],
|
|
38
38
|
entry_points={
|
|
39
39
|
'console_scripts': [
|
|
40
40
|
'waymore = waymore.waymore:main',
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__="4.4"
|
|
@@ -706,23 +706,55 @@ def fixArchiveOrgUrl(url):
|
|
|
706
706
|
url = url[0:newline]
|
|
707
707
|
return url
|
|
708
708
|
|
|
709
|
+
# Add a link to the linksFound collection for archived responses (included timestamp preifx)
|
|
710
|
+
def linksFoundResponseAdd(link):
|
|
711
|
+
global linksFound, argsInput, argsInputHostname
|
|
712
|
+
|
|
713
|
+
try:
|
|
714
|
+
if inputIsDomainANDPath:
|
|
715
|
+
checkInput = argsInput
|
|
716
|
+
else:
|
|
717
|
+
checkInput = argsInputHostname
|
|
718
|
+
|
|
719
|
+
# Remove the timestamp
|
|
720
|
+
linkWithoutTimestamp = link.split('/', 1)[-1]
|
|
721
|
+
|
|
722
|
+
# If the link specifies port 80 or 443, e.g. http://example.com:80, then remove the port
|
|
723
|
+
parsed = urlparse(linkWithoutTimestamp.strip())
|
|
724
|
+
if parsed.port in (80, 443):
|
|
725
|
+
new_netloc = parsed.hostname
|
|
726
|
+
parsed_url = parsed._replace(netloc=new_netloc).geturl()
|
|
727
|
+
else:
|
|
728
|
+
parsed_url = linkWithoutTimestamp
|
|
729
|
+
|
|
730
|
+
# Don't write it if the link does not contain the requested domain (this can sometimes happen)
|
|
731
|
+
if parsed_url.find(checkInput) >= 0:
|
|
732
|
+
linksFound.add(link)
|
|
733
|
+
except Exception as e:
|
|
734
|
+
linksFound.add(link)
|
|
735
|
+
|
|
709
736
|
# Add a link to the linksFound collection
|
|
710
737
|
def linksFoundAdd(link):
|
|
711
738
|
global linksFound, argsInput, argsInputHostname
|
|
712
|
-
|
|
739
|
+
|
|
713
740
|
try:
|
|
714
741
|
if inputIsDomainANDPath:
|
|
715
742
|
checkInput = argsInput
|
|
716
743
|
else:
|
|
717
744
|
checkInput = argsInputHostname
|
|
745
|
+
|
|
746
|
+
# If the link specifies port 80 or 443, e.g. http://example.com:80, then remove the port
|
|
747
|
+
parsed = urlparse(link.strip())
|
|
748
|
+
if parsed.port in (80, 443):
|
|
749
|
+
new_netloc = parsed.hostname
|
|
750
|
+
parsed_url = parsed._replace(netloc=new_netloc).geturl()
|
|
751
|
+
else:
|
|
752
|
+
parsed_url = link
|
|
753
|
+
|
|
718
754
|
# Don't write it if the link does not contain the requested domain (this can sometimes happen)
|
|
719
|
-
if
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
newNetloc = parsed.netloc.split(':')[0]
|
|
723
|
-
parsed = parsed._replace(netloc=newNetloc).geturl()
|
|
724
|
-
linksFound.add(parsed)
|
|
725
|
-
except:
|
|
755
|
+
if parsed_url.find(checkInput) >= 0:
|
|
756
|
+
linksFound.add(link)
|
|
757
|
+
except Exception as e:
|
|
726
758
|
linksFound.add(link)
|
|
727
759
|
|
|
728
760
|
def processArchiveUrl(url):
|
|
@@ -2347,7 +2379,7 @@ def getVirusTotalUrls():
|
|
|
2347
2379
|
|
|
2348
2380
|
except Exception as e:
|
|
2349
2381
|
writerr(colored('ERROR getVirusTotalUrls 1: ' + str(e), 'red'))
|
|
2350
|
-
|
|
2382
|
+
|
|
2351
2383
|
def processResponses():
|
|
2352
2384
|
"""
|
|
2353
2385
|
Get archived responses from Wayback Machine (archive.org)
|
|
@@ -2513,13 +2545,14 @@ def processResponses():
|
|
|
2513
2545
|
except:
|
|
2514
2546
|
pass
|
|
2515
2547
|
|
|
2516
|
-
# Go through the response to save the links found
|
|
2548
|
+
# Go through the response to save the links found
|
|
2517
2549
|
for line in resp.iter_lines():
|
|
2518
2550
|
try:
|
|
2519
2551
|
results = line.decode("utf-8")
|
|
2520
|
-
|
|
2521
|
-
|
|
2522
|
-
|
|
2552
|
+
parts = results.split(' ', 2)
|
|
2553
|
+
timestamp = parts[0]
|
|
2554
|
+
originalUrl = parts[1]
|
|
2555
|
+
linksFoundResponseAdd(timestamp+'/'+originalUrl)
|
|
2523
2556
|
except Exception as e:
|
|
2524
2557
|
writerr(colored(getSPACER('ERROR processResponses 3: Cannot to get link from line: '+str(line)), 'red'))
|
|
2525
2558
|
|
|
@@ -2540,6 +2573,16 @@ def processResponses():
|
|
|
2540
2573
|
|
|
2541
2574
|
# Get the total number of responses we will try to get and set the current file count to the success count
|
|
2542
2575
|
totalResponses = len(linkRequests)
|
|
2576
|
+
|
|
2577
|
+
# If there are no reponses to download, diaplay an error and exit
|
|
2578
|
+
if totalResponses == 0:
|
|
2579
|
+
try:
|
|
2580
|
+
if originalUrl:
|
|
2581
|
+
writerr(colored(getSPACER('Failed to get links from Wayback Machine (archive.org) - there were results (e.g. "'+originalUrl+'") but they didn\'t match the input you gave. Check input and try again.'), 'red'))
|
|
2582
|
+
except:
|
|
2583
|
+
writerr(colored(getSPACER('Failed to get links from Wayback Machine (archive.org) - check input and try again.'), 'red'))
|
|
2584
|
+
return
|
|
2585
|
+
|
|
2543
2586
|
fileCount = successCount
|
|
2544
2587
|
|
|
2545
2588
|
if args.check_only:
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: waymore
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.4
|
|
4
4
|
Summary: Find way more from the Wayback Machine, Common Crawl, Alien Vault OTX, URLScan & VirusTotal!
|
|
5
5
|
Home-page: https://github.com/xnl-h4ck3r/waymore
|
|
6
6
|
Author: @xnl-h4ck3r
|
|
7
7
|
Description-Content-Type: text/markdown
|
|
8
8
|
License-File: LICENSE
|
|
9
|
-
Requires-Dist: argparse
|
|
10
9
|
Requires-Dist: requests
|
|
11
10
|
Requires-Dist: pyyaml
|
|
12
11
|
Requires-Dist: termcolor
|
|
@@ -16,7 +15,7 @@ Requires-Dist: tldextract
|
|
|
16
15
|
|
|
17
16
|
<center><img src="https://github.com/xnl-h4ck3r/waymore/blob/main/waymore/images/title.png"></center>
|
|
18
17
|
|
|
19
|
-
## About - v4.
|
|
18
|
+
## About - v4.4
|
|
20
19
|
|
|
21
20
|
The idea behind **waymore** is to find even more links from the Wayback Machine than other existing tools.
|
|
22
21
|
|
waymore-4.3/waymore/__init__.py
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__="4.3"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|