PyPI - waymore - Versions diffs - 4.3__py3-none-any.whl → 4.5__py3-none-any.whl - Mend

waymore 4.3py3-none-any.whl → 4.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

waymore/__init__.py +1 -1
waymore/waymore.py +112 -48
{waymore-4.3.dist-info → waymore-4.5.dist-info}/METADATA +2 -3
waymore-4.5.dist-info/RECORD +8 -0
{waymore-4.3.dist-info → waymore-4.5.dist-info}/WHEEL +1 -1
waymore-4.3.dist-info/RECORD +0 -8
{waymore-4.3.dist-info → waymore-4.5.dist-info}/LICENSE +0 -0
{waymore-4.3.dist-info → waymore-4.5.dist-info}/entry_points.txt +0 -0
{waymore-4.3.dist-info → waymore-4.5.dist-info}/top_level.txt +0 -0

waymore/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__="4.3"
1	+ __version__="4.5"

waymore/waymore.py CHANGED Viewed

@@ -706,23 +706,55 @@ def fixArchiveOrgUrl(url):
             url = url[0:newline]
     return url
+# Add a link to the linksFound collection for archived responses (included timestamp preifx)
+def linksFoundResponseAdd(link):
+    global linksFound, argsInput, argsInputHostname
+    try:
+        if inputIsDomainANDPath:
+            checkInput = argsInput
+        else:
+            checkInput = argsInputHostname
+        # Remove the timestamp
+        linkWithoutTimestamp = link.split('/', 1)[-1]
+        # If the link specifies port 80 or 443, e.g. http://example.com:80, then remove the port
+        parsed = urlparse(linkWithoutTimestamp.strip())
+        if parsed.port in (80, 443):
+            new_netloc = parsed.hostname
+            parsed_url = parsed._replace(netloc=new_netloc).geturl()
+        else:
+            parsed_url = linkWithoutTimestamp
+        # Don't write it if the link does not contain the requested domain (this can sometimes happen)
+        if parsed_url.find(checkInput) >= 0:
+            linksFound.add(link)
+    except Exception as e:
+        linksFound.add(link)
 # Add a link to the linksFound collection
 def linksFoundAdd(link):
     global linksFound, argsInput, argsInputHostname
-    # If the link specifies port 80 or 443, e.g. http://example.com:80, then remove the port
     try:
         if inputIsDomainANDPath:
             checkInput = argsInput
         else:
             checkInput = argsInputHostname
+        # If the link specifies port 80 or 443, e.g. http://example.com:80, then remove the port
+        parsed = urlparse(link.strip())
+        if parsed.port in (80, 443):
+            new_netloc = parsed.hostname
+            parsed_url = parsed._replace(netloc=new_netloc).geturl()
+        else:
+            parsed_url = link
         # Don't write it if the link does not contain the requested domain (this can sometimes happen)
-        if link.find(checkInput) >= 0:
-            parsed = urlparse(link.strip())
-            if parsed.netloc.find(':80') >= 0 or parsed.netloc.fnd(':443') >= 0:
-                newNetloc = parsed.netloc.split(':')[0]
-                parsed = parsed._replace(netloc=newNetloc).geturl()
-            linksFound.add(parsed)
-    except:
+        if parsed_url.find(checkInput) >= 0:
+            linksFound.add(link)
+    except Exception as e:
         linksFound.add(link)
 def processArchiveUrl(url):
@@ -1352,11 +1384,15 @@ def getAlienVaultUrls():
         # Carry on if something was found
         if resp.text.lower().find('"error": "') < 0:
-            # Get the JSON response
-            jsonResp = json.loads(resp.text.strip())
-            # Try to get the number of results
-            totalUrls = jsonResp['full_size']
+            try:
+                # Get the JSON response
+                jsonResp = json.loads(resp.text.strip())
+                # Try to get the number of results
+                totalUrls = int(jsonResp['full_size'])
+            except:
+                writerr(colored(getSPACER('[ ERR ] There was an unexpected response from the Alien Vault API'),'red'))
+                totalUrls = 0
             # If there are results, carry on
             if totalUrls > 0 or args.check_only:
@@ -1556,19 +1592,28 @@ def getURLScanUrls():
             writerr(colored(getSPACER('[ ' + str(resp.status_code) + ' ] Unable to get links from urlscan.io'),'red'))
             return
-        # Get the JSON response
-        jsonResp = json.loads(resp.text.strip())
+        try:
+            # Get the JSON response
+            jsonResp = json.loads(resp.text.strip())
-        # Get the number of results
-        totalUrls = jsonResp['total']
+            # Get the number of results
+            totalUrls = int(jsonResp['total'])
+        except:
+            writerr(colored(getSPACER('[ ERR ] There was an unexpected response from the URLScan API'),'red'))
+            totalUrls = 0
+        # Carry on if something was found
         if args.check_only:
-            hasMore = jsonResp['has_more']
-            if hasMore:
-                write(colored('Get URLs from URLScan: ','cyan')+colored('UNKNOWN requests','white'))
-            else:
-                write(colored('Get URLs from URLScan: ','cyan')+colored('1 request','white'))
+            try:
+                hasMore = jsonResp['has_more']
+                if hasMore:
+                    write(colored('Get URLs from URLScan: ','cyan')+colored('UNKNOWN requests','white'))
+                else:
+                    write(colored('Get URLs from URLScan: ','cyan')+colored('1 request','white'))
+            except:
+                pass
             checkURLScan = 1
         else:
             # Carry on if something was found
             if int(totalUrls) > 0:
@@ -1714,6 +1759,7 @@ def processWayBackPage(url):
         if not stopSource:
             try:
                 # Choose a random user agent string to use for any requests
+                resp = None
                 userAgent = random.choice(USER_AGENT)
                 page = url.split('page=')[1]
                 session = requests.Session()
@@ -1785,8 +1831,11 @@ def processWayBackPage(url):
                     results = line.decode("utf-8")
                     foundUrl = fixArchiveOrgUrl(str(results).split(' ')[1])
-                    # Check the URL exclusions
-                    match = re.search(r'('+re.escape(FILTER_URL).replace(',','|')+')', foundUrl, flags=re.IGNORECASE)
+                    # If --filter-responses-only wasn't used, then check the URL exclusions
+                    if args.filter_responses_only:
+                        match = None
+                    else:
+                        match = re.search(r'('+re.escape(FILTER_URL).replace(',','|')+')', foundUrl, flags=re.IGNORECASE)
                     if match is None:
                         # Only get MIME Types if --verbose option was selected
                         if verbose():
@@ -2300,29 +2349,33 @@ def getVirusTotalUrls():
             return
         # Get the JSON response
-        jsonResp = json.loads(resp.text.strip())
+        try:
+            jsonResp = json.loads(resp.text.strip())
-        # Get the different URLs
-        if args.no_subs:
-            subDomains = []
-        else:
+            # Get the different URLs
+            if args.no_subs:
+                subDomains = []
+            else:
+                try:
+                    subDomains = jsonResp['subdomains']
+                except Exception as e:
+                    subDomains = []
+            try:
+                detectedUrls = [entry['url'] for entry in jsonResp.get('detected_urls', [])]
+            except Exception as e:
+                detectedUrls = []
             try:
-                subDomains = jsonResp['subdomains']
+                undetectedUrls = [entry[0] for entry in jsonResp.get('undetected_urls', [])]
             except Exception as e:
-                subDomains = []
-        try:
-            detectedUrls = [entry['url'] for entry in jsonResp.get('detected_urls', [])]
-        except Exception as e:
-            detectedUrls = []
-        try:
-            undetectedUrls = [entry[0] for entry in jsonResp.get('undetected_urls', [])]
-        except Exception as e:
-            undetectedUrls = []
-        try:
-            totalUrls = set(subDomains + detectedUrls + undetectedUrls)
-        except Exception as e:
+                undetectedUrls = []
+            try:
+                totalUrls = set(subDomains + detectedUrls + undetectedUrls)
+            except Exception as e:
+                totalUrls = []
+        except:
+            writerr(colored(getSPACER('[ ERR ] There was an unexpected response from the VirusTotal API'),'red'))
             totalUrls = []
         if args.check_only:
             write(colored('Get URLs from VirusTotal: ','cyan')+colored('1 request','white'))
             checkVirusTotal = 1
@@ -2347,7 +2400,7 @@ def getVirusTotalUrls():
     except Exception as e:
         writerr(colored('ERROR getVirusTotalUrls 1: ' + str(e), 'red'))
 def processResponses():
     """
     Get archived responses from Wayback Machine (archive.org)
@@ -2513,13 +2566,14 @@ def processResponses():
                 except:
                     pass
-            # Go through the response to save the links found
+            # Go through the response to save the links found
             for line in resp.iter_lines():
                 try:
                     results = line.decode("utf-8")
-                    timestamp = results.split(' ')[0]
-                    originalUrl = results.split(' ')[1]
-                    linksFoundAdd(timestamp+'/'+originalUrl)
+                    parts = results.split(' ', 2)
+                    timestamp = parts[0]
+                    originalUrl = parts[1]
+                    linksFoundResponseAdd(timestamp+'/'+originalUrl)
                 except Exception as e:
                     writerr(colored(getSPACER('ERROR processResponses 3: Cannot to get link from line: '+str(line)), 'red'))
@@ -2540,6 +2594,16 @@ def processResponses():
         # Get the total number of responses we will try to get and set the current file count to the success count
         totalResponses = len(linkRequests)
+        # If there are no reponses to download, diaplay an error and exit
+        if totalResponses == 0:
+            try:
+                if originalUrl:
+                    writerr(colored(getSPACER('Failed to get links from Wayback Machine (archive.org) - there were results (e.g. "'+originalUrl+'") but they didn\'t match the input you gave. Check input and try again.'), 'red'))
+            except:
+                writerr(colored(getSPACER('Failed to get links from Wayback Machine (archive.org) - check input and try again.'), 'red'))
+            return
         fileCount = successCount
         if args.check_only:

{waymore-4.3.dist-info → waymore-4.5.dist-info}/METADATA RENAMED Viewed

@@ -1,12 +1,11 @@
 Metadata-Version: 2.1
 Name: waymore
-Version: 4.3
+Version: 4.5
 Summary: Find way more from the Wayback Machine, Common Crawl, Alien Vault OTX, URLScan & VirusTotal!
 Home-page: https://github.com/xnl-h4ck3r/waymore
 Author: @xnl-h4ck3r
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: argparse
 Requires-Dist: requests
 Requires-Dist: pyyaml
 Requires-Dist: termcolor
@@ -16,7 +15,7 @@ Requires-Dist: tldextract
 <center><img src="https://github.com/xnl-h4ck3r/waymore/blob/main/waymore/images/title.png"></center>
-## About - v4.3
+## About - v4.5
 The idea behind **waymore** is to find even more links from the Wayback Machine than other existing tools.

waymore-4.5.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,8 @@
+waymore/__init__.py,sha256=HpBSj4W3_snlRrPgOuCuVP107OOZenaFQECvPnsC9V4,17
+waymore/waymore.py,sha256=kfGA3T_cDADuhZ_78Ta22fxnqlGVUm56yvIncEfnZDs,170779
+waymore-4.5.dist-info/LICENSE,sha256=o_jq62xZ1YxI8tqzQKbNtqr3RW2i5sh0rk6ixCJEroU,1068
+waymore-4.5.dist-info/METADATA,sha256=v57_NUSUTSGqA1fZQb9UQgFUoOIDcc9AQeDZbRyL7kk,47221
+waymore-4.5.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
+waymore-4.5.dist-info/entry_points.txt,sha256=YHy5EUf3r_7OTkt9jvylLjNeg7Z5yvIVm5RUAyfNcN4,49
+waymore-4.5.dist-info/top_level.txt,sha256=RFTphkWaRu1N7lUWIPUjabgCPQ3ETmNllF7qze4JJ_s,8
+waymore-4.5.dist-info/RECORD,,

{waymore-4.3.dist-info → waymore-4.5.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: bdist_wheel (0.43.0)
+Generator: setuptools (70.2.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

waymore-4.3.dist-info/RECORD DELETED Viewed

@@ -1,8 +0,0 @@
-waymore/__init__.py,sha256=-TFFyw9iukscHpq2I58oEz2oLQ995GbajzvC6Iz9ddM,17
-waymore/waymore.py,sha256=SWTqBUa-btDe6cWjRcL3w-ef1uK45LpfztCgvgtQPSM,168145
-waymore-4.3.dist-info/LICENSE,sha256=o_jq62xZ1YxI8tqzQKbNtqr3RW2i5sh0rk6ixCJEroU,1068
-waymore-4.3.dist-info/METADATA,sha256=q7_uq3p1kLMMARqUWTnA33rhxUluolsyAykMv7Ot598,47245
-waymore-4.3.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-waymore-4.3.dist-info/entry_points.txt,sha256=YHy5EUf3r_7OTkt9jvylLjNeg7Z5yvIVm5RUAyfNcN4,49
-waymore-4.3.dist-info/top_level.txt,sha256=RFTphkWaRu1N7lUWIPUjabgCPQ3ETmNllF7qze4JJ_s,8
-waymore-4.3.dist-info/RECORD,,

{waymore-4.3.dist-info → waymore-4.5.dist-info}/LICENSE RENAMED Viewed

File without changes

{waymore-4.3.dist-info → waymore-4.5.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{waymore-4.3.dist-info → waymore-4.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

waymore 4.3__py3-none-any.whl → 4.5__py3-none-any.whl

waymore 4.3py3-none-any.whl → 4.5py3-none-any.whl