PyPI - waymore - Versions diffs - 4.3__tar.gz → 4.4__tar.gz - Mend

waymore 4.3tar.gz → 4.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

{waymore-4.3/waymore.egg-info → waymore-4.4}/PKG-INFO RENAMED Viewed

@@ -1,12 +1,11 @@
 Metadata-Version: 2.1
 Name: waymore
-Version: 4.3
+Version: 4.4
 Summary: Find way more from the Wayback Machine, Common Crawl, Alien Vault OTX, URLScan & VirusTotal!
 Home-page: https://github.com/xnl-h4ck3r/waymore
 Author: @xnl-h4ck3r
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: argparse
 Requires-Dist: requests
 Requires-Dist: pyyaml
 Requires-Dist: termcolor
@@ -16,7 +15,7 @@ Requires-Dist: tldextract
 <center><img src="https://github.com/xnl-h4ck3r/waymore/blob/main/waymore/images/title.png"></center>
-## About - v4.3
+## About - v4.4
 The idea behind **waymore** is to find even more links from the Wayback Machine than other existing tools.

{waymore-4.3 → waymore-4.4}/README.md RENAMED Viewed

@@ -1,6 +1,6 @@
 <center><img src="https://github.com/xnl-h4ck3r/waymore/blob/main/waymore/images/title.png"></center>
-## About - v4.3
+## About - v4.4
 The idea behind **waymore** is to find even more links from the Wayback Machine than other existing tools.

{waymore-4.3 → waymore-4.4}/setup.py RENAMED Viewed

@@ -34,7 +34,7 @@ setup(
     author="@xnl-h4ck3r",
     url="https://github.com/xnl-h4ck3r/waymore",
     py_modules=["waymore"],
-    install_requires=["argparse","requests","pyyaml","termcolor","psutil","urlparse3","tldextract"],
+    install_requires=["requests","pyyaml","termcolor","psutil","urlparse3","tldextract"],
     entry_points={
         'console_scripts': [
             'waymore = waymore.waymore:main',

waymore-4.4/waymore/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__="4.4"

{waymore-4.3 → waymore-4.4}/waymore/waymore.py RENAMED Viewed

@@ -706,23 +706,55 @@ def fixArchiveOrgUrl(url):
             url = url[0:newline]
     return url
+# Add a link to the linksFound collection for archived responses (included timestamp preifx)
+def linksFoundResponseAdd(link):
+    global linksFound, argsInput, argsInputHostname
+    try:
+        if inputIsDomainANDPath:
+            checkInput = argsInput
+        else:
+            checkInput = argsInputHostname
+        # Remove the timestamp
+        linkWithoutTimestamp = link.split('/', 1)[-1]
+        # If the link specifies port 80 or 443, e.g. http://example.com:80, then remove the port
+        parsed = urlparse(linkWithoutTimestamp.strip())
+        if parsed.port in (80, 443):
+            new_netloc = parsed.hostname
+            parsed_url = parsed._replace(netloc=new_netloc).geturl()
+        else:
+            parsed_url = linkWithoutTimestamp
+        # Don't write it if the link does not contain the requested domain (this can sometimes happen)
+        if parsed_url.find(checkInput) >= 0:
+            linksFound.add(link)
+    except Exception as e:
+        linksFound.add(link)
 # Add a link to the linksFound collection
 def linksFoundAdd(link):
     global linksFound, argsInput, argsInputHostname
-    # If the link specifies port 80 or 443, e.g. http://example.com:80, then remove the port
     try:
         if inputIsDomainANDPath:
             checkInput = argsInput
         else:
             checkInput = argsInputHostname
+        # If the link specifies port 80 or 443, e.g. http://example.com:80, then remove the port
+        parsed = urlparse(link.strip())
+        if parsed.port in (80, 443):
+            new_netloc = parsed.hostname
+            parsed_url = parsed._replace(netloc=new_netloc).geturl()
+        else:
+            parsed_url = link
         # Don't write it if the link does not contain the requested domain (this can sometimes happen)
-        if link.find(checkInput) >= 0:
-            parsed = urlparse(link.strip())
-            if parsed.netloc.find(':80') >= 0 or parsed.netloc.fnd(':443') >= 0:
-                newNetloc = parsed.netloc.split(':')[0]
-                parsed = parsed._replace(netloc=newNetloc).geturl()
-            linksFound.add(parsed)
-    except:
+        if parsed_url.find(checkInput) >= 0:
+            linksFound.add(link)
+    except Exception as e:
         linksFound.add(link)
 def processArchiveUrl(url):
@@ -2347,7 +2379,7 @@ def getVirusTotalUrls():
     except Exception as e:
         writerr(colored('ERROR getVirusTotalUrls 1: ' + str(e), 'red'))
 def processResponses():
     """
     Get archived responses from Wayback Machine (archive.org)
@@ -2513,13 +2545,14 @@ def processResponses():
                 except:
                     pass
-            # Go through the response to save the links found
+            # Go through the response to save the links found
             for line in resp.iter_lines():
                 try:
                     results = line.decode("utf-8")
-                    timestamp = results.split(' ')[0]
-                    originalUrl = results.split(' ')[1]
-                    linksFoundAdd(timestamp+'/'+originalUrl)
+                    parts = results.split(' ', 2)
+                    timestamp = parts[0]
+                    originalUrl = parts[1]
+                    linksFoundResponseAdd(timestamp+'/'+originalUrl)
                 except Exception as e:
                     writerr(colored(getSPACER('ERROR processResponses 3: Cannot to get link from line: '+str(line)), 'red'))
@@ -2540,6 +2573,16 @@ def processResponses():
         # Get the total number of responses we will try to get and set the current file count to the success count
         totalResponses = len(linkRequests)
+        # If there are no reponses to download, diaplay an error and exit
+        if totalResponses == 0:
+            try:
+                if originalUrl:
+                    writerr(colored(getSPACER('Failed to get links from Wayback Machine (archive.org) - there were results (e.g. "'+originalUrl+'") but they didn\'t match the input you gave. Check input and try again.'), 'red'))
+            except:
+                writerr(colored(getSPACER('Failed to get links from Wayback Machine (archive.org) - check input and try again.'), 'red'))
+            return
         fileCount = successCount
         if args.check_only:

{waymore-4.3 → waymore-4.4/waymore.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,12 +1,11 @@
 Metadata-Version: 2.1
 Name: waymore
-Version: 4.3
+Version: 4.4
 Summary: Find way more from the Wayback Machine, Common Crawl, Alien Vault OTX, URLScan & VirusTotal!
 Home-page: https://github.com/xnl-h4ck3r/waymore
 Author: @xnl-h4ck3r
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: argparse
 Requires-Dist: requests
 Requires-Dist: pyyaml
 Requires-Dist: termcolor
@@ -16,7 +15,7 @@ Requires-Dist: tldextract
 <center><img src="https://github.com/xnl-h4ck3r/waymore/blob/main/waymore/images/title.png"></center>
-## About - v4.3
+## About - v4.4
 The idea behind **waymore** is to find even more links from the Wayback Machine than other existing tools.