PyPI - waymore - Versions diffs - 6.5__py3-none-any.whl → 7.0__py3-none-any.whl - Mend

waymore 6.5py3-none-any.whl → 7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

waymore/__init__.py +1 -1
waymore/waymore.py +1082 -1193
{waymore-6.5.dist-info → waymore-7.0.dist-info}/METADATA +10 -8
waymore-7.0.dist-info/RECORD +8 -0
{waymore-6.5.dist-info → waymore-7.0.dist-info}/WHEEL +1 -1
waymore-6.5.dist-info/RECORD +0 -8
{waymore-6.5.dist-info → waymore-7.0.dist-info}/entry_points.txt +0 -0
{waymore-6.5.dist-info → waymore-7.0.dist-info/licenses}/LICENSE +0 -0
{waymore-6.5.dist-info → waymore-7.0.dist-info}/top_level.txt +0 -0

waymore/waymore.py CHANGED Viewed

@@ -4,28 +4,30 @@
 # Full help here: https://github.com/xnl-h4ck3r/waymore/blob/main/README.md
 # Good luck and good hunting! If you really love the tool (or any others), or they helped you find an awesome bounty, consider BUYING ME A COFFEE! (https://ko-fi.com/xnlh4ck3r) ☕ (I could use the caffeine!)
-from urllib.parse import urlparse
-import requests
-from requests.exceptions import ConnectionError
-from requests.utils import quote
-from requests.adapters import HTTPAdapter, Retry
 import argparse
-from signal import SIGINT, signal
+import asyncio
+import enum
+import json
+import math
 import multiprocessing.dummy as mp
-from termcolor import colored
-from datetime import datetime, timedelta
-from pathlib import Path
-import yaml
 import os
-import json
-import re
+import pickle
 import random
+import re
 import sys
-import math
-import enum
-import pickle
-import time
+import threading
+from datetime import datetime, timedelta
+from pathlib import Path
+from signal import SIGINT, signal
+from urllib.parse import urlparse
+import requests
 import tldextract
+import yaml
+from requests.adapters import HTTPAdapter, Retry
+from requests.exceptions import ConnectionError
+from requests.utils import quote
+from termcolor import colored
 try:
     from . import __version__
@@ -59,6 +61,12 @@ argsInput = ""
 isInputFile = False
 stopProgramCount = 0
 stopSource = False
+stopSourceWayback = False
+stopSourceCommonCrawl = False
+stopSourceAlienVault = False
+stopSourceURLScan = False
+stopSourceVirusTotal = False
+stopSourceIntelx = False
 successCount = 0
 failureCount = 0
 fileCount = 0
@@ -79,6 +87,10 @@ currentMemUsage = 0
 maxMemoryPercent = 0
 currentMemPercent = 0
 process = None
+current_response = None
+current_session = None
+# Event used to interrupt long sleeps (e.g., rate-limit waits) when SIGINT is received
+interrupt_event = threading.Event()
 HTTP_ADAPTER = None
 HTTP_ADAPTER_CC = None
 checkWayback = 0
@@ -90,20 +102,28 @@ checkIntelx = 0
 argsInputHostname = ""
 responseOutputDirectory = ""
 urlscanRequestLinks = set()
+intelxAPIIssue = False
+linkCountWayback = 0
+linkCountCommonCrawl = 0
+linkCountAlienVault = 0
+linkCountURLScan = 0
+linkCountVirusTotal = 0
+linkCountIntelx = 0
+# Thread lock for protecting shared state during concurrent operations
+links_lock = threading.Lock()
+# Shared state for link collection across all sources
+linksFound = set()
+linkMimes = set()
 # Source Provider URLs
 WAYBACK_URL = "https://web.archive.org/cdx/search/cdx?url={DOMAIN}{COLLAPSE}&fl=timestamp,original,mimetype,statuscode,digest"
 CCRAWL_INDEX_URL = "https://index.commoncrawl.org/collinfo.json"
-ALIENVAULT_URL = (
-    "https://otx.alienvault.com/api/v1/indicators/{TYPE}/{DOMAIN}/url_list?limit=500"
-)
-URLSCAN_URL = (
-    "https://urlscan.io/api/v1/search/?q=domain:{DOMAIN}{DATERANGE}&size=10000"
-)
+ALIENVAULT_URL = "https://otx.alienvault.com/api/v1/indicators/{TYPE}/{DOMAIN}/url_list?limit=500"
+URLSCAN_URL = "https://urlscan.io/api/v1/search/?q=domain:{DOMAIN}{DATERANGE}&size=10000"
 URLSCAN_DOM_URL = "https://urlscan.io/dom/"
-VIRUSTOTAL_URL = (
-    "https://www.virustotal.com/vtapi/v2/domain/report?apikey={APIKEY}&domain={DOMAIN}"
-)
+VIRUSTOTAL_URL = "https://www.virustotal.com/vtapi/v2/domain/report?apikey={APIKEY}&domain={DOMAIN}"
 INTELX_SEARCH_URL = "https://2.intelx.io/phonebook/search"
 INTELX_RESULTS_URL = "https://2.intelx.io/phonebook/search/result?id="
 INTELX_ACCOUNT_URL = "https://2.intelx.io/authenticate/info"
@@ -237,8 +257,7 @@ def write(text="", pipe=False):
     # or if the tool has been piped and the pipe parameter is True
     # AND if --stream is NOT active OR if it is active but we are explicitly piping (e.g. for URLs)
     if (sys.stdout.isatty() or (not sys.stdout.isatty() and pipe)) and (
-        not (args.stream and args.mode == "U")
-        or (args.stream and args.mode == "U" and pipe)
+        not (args.stream and args.mode == "U") or (args.stream and args.mode == "U" and pipe)
     ):
         # If it has carriage return in the string, don't add a newline
         if text.find("\r") > 0:
@@ -274,26 +293,14 @@ def showVersion():
                 timeout=3,
             )
         except Exception:
-            write(
-                "Current waymore version "
-                + __version__
-                + " (unable to check if latest)\n"
-            )
+            write("Current waymore version " + __version__ + " (unable to check if latest)\n")
         if __version__ == resp.text.split("=")[1].replace('"', "").strip():
             write(
-                "Current waymore version "
-                + __version__
-                + " ("
-                + colored("latest", "green")
-                + ")\n"
+                "Current waymore version " + __version__ + " (" + colored("latest", "green") + ")\n"
             )
         else:
             write(
-                "Current waymore version "
-                + __version__
-                + " ("
-                + colored("outdated", "red")
-                + ")\n"
+                "Current waymore version " + __version__ + " (" + colored("outdated", "red") + ")\n"
             )
     except Exception:
         pass
@@ -307,9 +314,7 @@ def showBanner():
     write(colored("| | | / ___ | |_| ", "red") + "| | | | |_| | |   | |_| |")
     write(colored(r" \___/\_____|\__  ", "red") + r"|_|_|_|\___/| |   | ____/")
     write(
-        colored("            (____/ ", "red")
-        + colored("  by Xnl-h4ck3r ", "magenta")
-        + r" \_____)"
+        colored("            (____/ ", "red") + colored("  by Xnl-h4ck3r ", "magenta") + r" \_____)"
     )
     try:
         currentDate = datetime.now().date()
@@ -322,11 +327,7 @@ def showBanner():
                 )
             )
         elif currentDate.month == 10 and currentDate.day == 31:
-            write(
-                colored(
-                    "            *** 🎃 HAPPY HALLOWEEN! 🎃 ***", "red", attrs=["blink"]
-                )
-            )
+            write(colored("            *** 🎃 HAPPY HALLOWEEN! 🎃 ***", "red", attrs=["blink"]))
         elif currentDate.month == 1 and currentDate.day in (1, 2, 3, 4, 5):
             write(
                 colored(
@@ -353,16 +354,14 @@ def handler(signal_received, frame):
     This function is called if Ctrl-C is called by the user
     An attempt will be made to try and clean up properly
     """
-    global stopSource, stopProgram, stopProgramCount
+    global stopSource, stopProgram, stopProgramCount, stopSourceWayback, stopSourceCommonCrawl, stopSourceAlienVault, stopSourceURLScan, stopSourceVirusTotal, stopSourceIntelx, current_response, current_session
     if stopProgram is not None:
         stopProgramCount = stopProgramCount + 1
         if stopProgramCount == 1:
             writerr(
                 colored(
-                    getSPACER(
-                        ">>> Please be patient... Trying to save data and end gracefully!"
-                    ),
+                    getSPACER(">>> Please be patient... Trying to save data and end gracefully!"),
                     "red",
                 )
             )
@@ -384,17 +383,41 @@ def handler(signal_received, frame):
     else:
         stopProgram = StopProgram.SIGINT
         stopSource = True
+        stopSourceWayback = True
+        stopSourceCommonCrawl = True
+        stopSourceAlienVault = True
+        stopSourceURLScan = True
+        stopSourceVirusTotal = True
+        stopSourceIntelx = True
+        # Try to close any active response or session to interrupt blocking network I/O
+        try:
+            if current_response is not None:
+                try:
+                    current_response.close()
+                except Exception:
+                    pass
+        except Exception:
+            pass
+        try:
+            if current_session is not None:
+                try:
+                    current_session.close()
+                except Exception:
+                    pass
+        except Exception:
+            pass
+        # Signal any waits to stop early
+        try:
+            interrupt_event.set()
+        except Exception:
+            pass
         writerr(
             colored(
                 getSPACER('>>> "Oh my God, they killed Kenny... and waymore!" - Kyle'),
                 "red",
             )
         )
-        writerr(
-            colored(
-                getSPACER(">>> Attempting to rescue any data gathered so far..."), "red"
-            )
-        )
+        writerr(colored(getSPACER(">>> Attempting to rescue any data gathered so far..."), "red"))
 def showOptions():
@@ -479,13 +502,13 @@ def showOptions():
         )
         if not args.xcc:
-            if args.lcc == 0 and args.lcy == 0:
+            if args.lcc == 0 and args.from_date is None and args.to_date is None:
                 write(
                     colored("-lcc: " + str(args.lcc), "magenta")
                     + colored(" Search ALL Common Crawl index collections.", "white")
                 )
             else:
-                if args.lcy == 0:
+                if args.from_date is None and args.to_date is None:
                     write(
                         colored("-lcc: " + str(args.lcc), "magenta")
                         + colored(
@@ -498,19 +521,10 @@ def showOptions():
                         write(
                             colored("-lcc: " + str(args.lcc), "magenta")
                             + colored(
-                                " The number of latest Common Crawl index collections to be searched.",
+                                " The number of latest Common Crawl index collections to be searched within the specified date range (-to and -from).",
                                 "white",
                             )
                         )
-                    write(
-                        colored("-lcy: " + str(args.lcy), "magenta")
-                        + colored(
-                            " Search all Common Crawl index collections with data from year "
-                            + str(args.lcy)
-                            + " and after.",
-                            "white",
-                        )
-                    )
         if URLSCAN_API_KEY == "":
             write(
@@ -532,9 +546,7 @@ def showOptions():
                 )
             )
         else:
-            write(
-                colored("VirusTotal API Key: ", "magenta") + colored(VIRUSTOTAL_API_KEY)
-            )
+            write(colored("VirusTotal API Key: ", "magenta") + colored(VIRUSTOTAL_API_KEY))
         if INTELX_API_KEY == "":
             write(
@@ -545,9 +557,7 @@ def showOptions():
                 )
             )
         else:
-            write(
-                colored("Intelligence X API Key: ", "magenta") + colored(INTELX_API_KEY)
-            )
+            write(colored("Intelligence X API Key: ", "magenta") + colored(INTELX_API_KEY))
         if args.mode in ["U", "B"]:
             if args.output_urls != "":
@@ -589,9 +599,7 @@ def showOptions():
                     write(
                         colored("-l: " + str(args.limit), "magenta")
                         + colored(
-                            " Only save the FIRST "
-                            + str(args.limit)
-                            + " responses found.",
+                            " Only save the FIRST " + str(args.limit) + " responses found.",
                             "white",
                         )
                     )
@@ -599,24 +607,11 @@ def showOptions():
                     write(
                         colored("-l: " + str(args.limit), "magenta")
                         + colored(
-                            " Only save the LAST "
-                            + str(abs(args.limit))
-                            + " responses found.",
+                            " Only save the LAST " + str(abs(args.limit)) + " responses found.",
                             "white",
                         )
                     )
-            if args.from_date is not None:
-                write(
-                    colored("-from: " + str(args.from_date), "magenta")
-                    + colored(" The date/time to get responses from.", "white")
-                )
-            if args.to_date is not None:
-                write(
-                    colored("-to: " + str(args.to_date), "magenta")
-                    + colored(" The date/time to get responses up to.", "white")
-                )
             if args.capture_interval == "h":
                 write(
                     colored("-ci: " + args.capture_interval, "magenta")
@@ -667,6 +662,32 @@ def showOptions():
                 )
             )
+        if args.from_date is not None:
+            write(
+                colored("-from: " + str(args.from_date), "magenta")
+                + colored(
+                    " The date/time to get data from.",
+                    "white",
+                )
+                + colored(
+                    " NOTE: All results will still be returned from Intelligence X, and all sub domains from Virus Total, because these cannot be filtered by date.",
+                    "yellow",
+                )
+            )
+        if args.to_date is not None:
+            write(
+                colored("-to: " + str(args.to_date), "magenta")
+                + colored(
+                    " The date/time to get data up to.",
+                    "white",
+                )
+                + colored(
+                    " NOTE: All results will still be returned from Intelligence X, and all sub domains from Virus Total, because these cannot be filtered by date.",
+                    "yellow",
+                )
+            )
         write(
             colored("-f: " + str(args.filter_responses_only), "magenta")
             + colored(
@@ -705,9 +726,7 @@ def showOptions():
                     )
                 )
         if not args.mc and args.fc:
-            write(
-                colored("Response Code exclusions: ", "magenta") + colored(FILTER_CODE)
-            )
+            write(colored("Response Code exclusions: ", "magenta") + colored(FILTER_CODE))
         write(colored("Response URL exclusions: ", "magenta") + colored(FILTER_URL))
         if args.mt:
@@ -771,14 +790,9 @@ def showOptions():
                     )
                 )
             else:
-                write(
-                    colored("Discord Webhook: ", "magenta") + colored(WEBHOOK_DISCORD)
-                )
+                write(colored("Discord Webhook: ", "magenta") + colored(WEBHOOK_DISCORD))
-        write(
-            colored("Default Output Directory: ", "magenta")
-            + colored(str(DEFAULT_OUTPUT_DIR))
-        )
+        write(colored("Default Output Directory: ", "magenta") + colored(str(DEFAULT_OUTPUT_DIR)))
         if args.regex_after is not None:
             write(
@@ -799,7 +813,7 @@ def showOptions():
         if args.mode in ["R", "B"] or (args.mode == "U" and not args.xcc):
             write(
                 colored("-p: " + str(args.processes), "magenta")
-                + colored(" The number of parallel requests made.", "white")
+                + colored(" The number of parallel requests made per source.", "white")
             )
         write(
             colored("-r: " + str(args.retries), "magenta")
@@ -1084,10 +1098,7 @@ def getConfig():
             if args.notify_discord:
                 try:
                     WEBHOOK_DISCORD = config.get("WEBHOOK_DISCORD")
-                    if (
-                        str(WEBHOOK_DISCORD) == "None"
-                        or str(WEBHOOK_DISCORD) == "YOUR_WEBHOOK"
-                    ):
+                    if str(WEBHOOK_DISCORD) == "None" or str(WEBHOOK_DISCORD) == "YOUR_WEBHOOK":
                         writerr(
                             colored(
                                 'No value for "WEBHOOK_DISCORD" in config.yml - default set',
@@ -1164,9 +1175,7 @@ def getConfig():
             else:
                 writerr(
                     colored(
-                        'WARNING: Cannot find file "'
-                        + args.config
-                        + '", so using default values',
+                        'WARNING: Cannot find file "' + args.config + '", so using default values',
                         "yellow",
                     )
                 )
@@ -1238,9 +1247,7 @@ def printProgressBar(
     if not (args.stream and args.mode == "U"):
         try:
             percent = (
-                ("{0:." + str(decimals) + "f}")
-                .format(100 * (iteration / float(total)))
-                .rjust(5)
+                ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total))).rjust(5)
             )
             filledLength = int(length * iteration // total)
             bar = fill * filledLength + "-" * (length - filledLength)
@@ -1297,7 +1304,7 @@ def fixArchiveOrgUrl(url):
 # Add a link to the linksFound collection for archived responses (included timestamp preifx)
 def linksFoundResponseAdd(link):
-    global linksFound, argsInput, argsInputHostname
+    global linksFound, argsInput, argsInputHostname, links_lock
     try:
         if inputIsDomainANDPath:
@@ -1318,20 +1325,22 @@ def linksFoundResponseAdd(link):
         # Don't write it if the link does not contain the requested domain (this can sometimes happen)
         if parsed_url.lower().find(checkInput.lower()) >= 0:
-            linksFound.add(link)
+            with links_lock:
+                linksFound.add(link)
             # If streaming is enabled and mode is 'U', print the link to stdout
             if args.stream and args.mode == "U":
                 write(link, pipe=True)
     except Exception:
-        linksFound.add(link)
+        with links_lock:
+            linksFound.add(link)
         # If streaming is enabled and mode is 'U', print the link to stdout
         if args.stream and args.mode == "U":
             write(link, pipe=True)
 # Add a link to the linksFound collection
-def linksFoundAdd(link):
-    global linksFound, argsInput, argsInputHostname
+def linksFoundAdd(link, source_set=None):
+    global linksFound, argsInput, argsInputHostname, links_lock
     try:
         if inputIsDomainANDPath:
@@ -1349,12 +1358,20 @@ def linksFoundAdd(link):
         # Don't write it if the link does not contain the requested domain (this can sometimes happen)
         if parsed_url.find(checkInput) >= 0:
-            linksFound.add(link)
+            with links_lock:
+                if source_set is not None:
+                    source_set.add(link)
+                else:
+                    linksFound.add(link)
             # If streaming is enabled and mode is 'U', print the link to stdout
             if args.stream and args.mode == "U":
                 write(link, pipe=True)
     except Exception:
-        linksFound.add(link)
+        with links_lock:
+            if source_set is not None:
+                source_set.add(link)
+            else:
+                linksFound.add(link)
         # If streaming is enabled and mode is 'U', print the link to stdout
         if args.stream and args.mode == "U":
             write(link, pipe=True)
@@ -1394,9 +1411,7 @@ def processArchiveUrl(url):
                     )
                     archiveHtml = str(resp.text)
                     try:
-                        contentType = (
-                            resp.headers.get("Content-Type").split(";")[0].lower()
-                        )
+                        contentType = resp.headers.get("Content-Type").split(";")[0].lower()
                     except Exception:
                         contentType = ""
@@ -1407,18 +1422,13 @@ def processArchiveUrl(url):
                         # If the FILTER_CODE includes 404, and it doesn't seem to be a custom 404 page
                         if "404" not in FILTER_CODE or (
                             "404" in FILTER_CODE
-                            and not re.findall(
-                                REGEX_404, archiveHtml, re.DOTALL | re.IGNORECASE
-                            )
+                            and not re.findall(REGEX_404, archiveHtml, re.DOTALL | re.IGNORECASE)
                         ):
                             # Add the URL as a comment at the start of the response
                             if args.url_filename:
                                 archiveHtml = (
-                                    "/* Original URL: "
-                                    + archiveUrl
-                                    + " */\n"
-                                    + archiveHtml
+                                    "/* Original URL: " + archiveUrl + " */\n" + archiveHtml
                                 )
                             # Remove all web archive references in the response
@@ -1565,9 +1575,7 @@ def processArchiveUrl(url):
                                     # Determine the extension from the content type
                                     try:
                                         if contentType != "":
-                                            extension = contentType.split("/")[
-                                                1
-                                            ].replace("x-", "")
+                                            extension = contentType.split("/")[1].replace("x-", "")
                                         if extension == "":
                                             extension = contentType.lower()
                                     except Exception:
@@ -1588,15 +1596,11 @@ def processArchiveUrl(url):
                                     # If extension is still blank, set to html if the content ends with HTML tag, otherwise set to unknown
                                     if extension == "":
                                         if (
-                                            archiveHtml.lower()
-                                            .strip()
-                                            .endswith("</html>")
+                                            archiveHtml.lower().strip().endswith("</html>")
                                             or archiveHtml.lower()
                                             .strip()
                                             .startswith("<!doctype html")
-                                            or archiveHtml.lower()
-                                            .strip()
-                                            .startswith("<html")
+                                            or archiveHtml.lower().strip().startswith("<html")
                                         ):
                                             extension = "html"
                                         else:
@@ -1626,12 +1630,10 @@ def processArchiveUrl(url):
                             except Exception as e:
                                 writerr(
                                     colored(
-                                        getSPACER(
-                                            "[ ERR ] Failed to write file "
-                                            + filePath
-                                            + ": "
-                                            + str(e)
-                                        ),
+                                        "Wayback - [ ERR ] Failed to write file "
+                                        + filePath
+                                        + ": "
+                                        + str(e),
                                         "red",
                                     )
                                 )
@@ -1641,23 +1643,16 @@ def processArchiveUrl(url):
                                 try:
                                     timestamp = str(datetime.now())
                                     indexFile.write(
-                                        hashValue
-                                        + ","
-                                        + archiveUrl
-                                        + " ,"
-                                        + timestamp
-                                        + "\n"
+                                        hashValue + "," + archiveUrl + " ," + timestamp + "\n"
                                     )
                                     indexFile.flush()
                                 except Exception as e:
                                     writerr(
                                         colored(
-                                            getSPACER(
-                                                '[ ERR ] Failed to write to waymore_index.txt for "'
-                                                + archiveUrl
-                                                + '": '
-                                                + str(e)
-                                            ),
+                                            'Wayback - [ ERR ] Failed to write to waymore_index.txt for "'
+                                            + archiveUrl
+                                            + '": '
+                                            + str(e),
                                             "red",
                                         )
                                     )
@@ -1668,9 +1663,7 @@ def processArchiveUrl(url):
                                     debugText = ""
                                     if archiveHtml.lower().find("archive.org") > 0:
                                         debugText = "ARCHIVE.ORG"
-                                    elif (
-                                        archiveHtml.lower().find("internet archive") > 0
-                                    ):
+                                    elif archiveHtml.lower().find("internet archive") > 0:
                                         debugText = "INTERNET ARCHIVE"
                                     elif archiveHtml.lower().find("wombat") > 0:
                                         debugText = "WOMBAT (JS)"
@@ -1697,11 +1690,7 @@ def processArchiveUrl(url):
                     if verbose():
                         writerr(
                             colored(
-                                getSPACER(
-                                    '[ ERR ] Wayback Machine (archive.org) returned a problem for "'
-                                    + archiveUrl
-                                    + '"'
-                                ),
+                                'Wayback - [ ERR ] returned a problem for "' + archiveUrl + '"',
                                 "red",
                             )
                         )
@@ -1710,11 +1699,7 @@ def processArchiveUrl(url):
                     if verbose():
                         writerr(
                             colored(
-                                getSPACER(
-                                    '[ ERR ] Wayback Machine (archive.org) connection error for "'
-                                    + archiveUrl
-                                    + '"'
-                                ),
+                                'Wayback - [ ERR ] connection error for "' + archiveUrl + '"',
                                 "red",
                             )
                         )
@@ -1724,25 +1709,21 @@ def processArchiveUrl(url):
                         try:
                             writerr(
                                 colored(
-                                    getSPACER(
-                                        "[ "
-                                        + str(resp.status_code)
-                                        + ' ] Failed to get response for "'
-                                        + archiveUrl
-                                        + '"'
-                                    ),
+                                    "Wayback - [ "
+                                    + str(resp.status_code)
+                                    + ' ] Failed to get response for "'
+                                    + archiveUrl
+                                    + '"',
                                     "red",
                                 )
                             )
                         except Exception:
                             writerr(
                                 colored(
-                                    getSPACER(
-                                        '[ ERR ] Failed to get response for "'
-                                        + archiveUrl
-                                        + '": '
-                                        + str(e)
-                                    ),
+                                    'Wayback - [ ERR ] Failed to get response for "'
+                                    + archiveUrl
+                                    + '": '
+                                    + str(e),
                                     "red",
                                 )
                             )
@@ -1769,9 +1750,7 @@ def processArchiveUrl(url):
                             )
                     except Exception:
                         if verbose():
-                            suffix = (
-                                'Complete (To show mem use, run "pip install psutil")'
-                            )
+                            suffix = 'Complete (To show mem use, run "pip install psutil")'
                 printProgressBar(
                     successCount + failureCount,
                     totalResponses,
@@ -1796,9 +1775,7 @@ def processArchiveUrl(url):
             except Exception as e:
                 if verbose():
-                    writerr(
-                        colored(getSPACER('Error for "' + url + '": ' + str(e)), "red")
-                    )
+                    writerr(colored('Wayback - [ ERR ] Error for "' + url + '": ' + str(e), "red"))
     except Exception as e:
         writerr(colored("ERROR processArchiveUrl 1:  " + str(e), "red"))
@@ -1883,7 +1860,7 @@ def processURLOutput():
             linkCount = len(linksFound)
             write(
                 getSPACER(
-                    colored("Links found for " + subs + argsInput + ": ", "cyan")
+                    colored("\nTotal unique links found for " + subs + argsInput + ": ", "cyan")
                     + colored(str(linkCount) + " 🤘", "white")
                 )
                 + "\n"
@@ -1926,7 +1903,7 @@ def processURLOutput():
             appendedUrls = False
             if not args.output_overwrite:
                 try:
-                    with open(filename, "r") as existingLinks:
+                    with open(filename) as existingLinks:
                         for link in existingLinks.readlines():
                             linksFound.add(link.strip())
                     appendedUrls = True
@@ -1968,16 +1945,10 @@ def processURLOutput():
                         writerr(colored("ERROR processURLOutput 3: " + str(e), "red"))
             # If there are less links output because of filters, show the new total
-            if (
-                args.regex_after is not None
-                and linkCount > 0
-                and outputCount < linkCount
-            ):
+            if args.regex_after is not None and linkCount > 0 and outputCount < linkCount:
                 write(
                     colored(
-                        'Links found after applying filter "'
-                        + args.regex_after
-                        + '": ',
+                        'Links found after applying filter "' + args.regex_after + '": ',
                         "cyan",
                     )
                     + colored(str(outputCount) + " 🤘\n", "white")
@@ -1992,11 +1963,7 @@ def processURLOutput():
             if verbose():
                 if outputCount == 0:
-                    write(
-                        colored(
-                            "No links were found so nothing written to file.", "cyan"
-                        )
-                    )
+                    write(colored("No links were found so nothing written to file.", "cyan"))
                 else:
                     if appendedUrls:
                         write(
@@ -2018,11 +1985,11 @@ def processURLOutput():
                     if os.path.exists(filenameOld) and os.path.exists(filename):
                         # Get all the old links
-                        with open(filenameOld, "r") as oldFile:
+                        with open(filenameOld) as oldFile:
                             oldLinks = set(oldFile.readlines())
                         # Get all the new links
-                        with open(filename, "r") as newFile:
+                        with open(filename) as newFile:
                             newLinks = set(newFile.readlines())
                         # Create a file with most recent new links
@@ -2061,7 +2028,7 @@ def stripUnwanted(url):
     """
     parsed = urlparse(url)
     # Strip scheme
-    scheme = "%s://" % parsed.scheme
+    scheme = f"{parsed.scheme}://"
     strippedUrl = parsed.geturl().replace(scheme, "", 1)
     # Strip query string and fragment
     strippedUrl = strippedUrl.split("#")[0].split("?")[0]
@@ -2092,7 +2059,7 @@ def validateArgInput(x):
         if os.path.isfile(x):
             isInputFile = True
             # Open file and put all values in input list
-            with open(x, "r") as inputFile:
+            with open(x) as inputFile:
                 lines = inputFile.readlines()
             # Check if any lines start with a *. and replace without the *.
             for line in lines:
@@ -2189,9 +2156,7 @@ def validateArgProviders(x):
     x = x.lower()
     providers = x.split(",")
     for provider in providers:
-        if not re.fullmatch(
-            r"(wayback|commoncrawl|otx|urlscan|virustotal|intelx)", provider
-        ):
+        if not re.fullmatch(r"(wayback|commoncrawl|otx|urlscan|virustotal|intelx)", provider):
             invalid = True
             break
     if invalid:
@@ -2201,16 +2166,32 @@ def validateArgProviders(x):
     return x
+def parseDateArg(dateArg):
+    """
+    Parse a date argument from the command line into a datetime object
+    """
+    formats = {
+        4: "%Y",
+        6: "%Y%m",
+        8: "%Y%m%d",
+        10: "%Y%m%d%H",
+        12: "%Y%m%d%H%M",
+        14: "%Y%m%d%H%M%S",
+    }
+    fmt = formats.get(len(dateArg))
+    return datetime.strptime(dateArg, fmt)
 def processAlienVaultPage(url):
     """
     Get URLs from a specific page of otx.alienvault.org API for the input domain
     """
-    global totalPages, linkMimes, linksFound, stopSource, argsInput
+    global totalPages, linkMimes, linksFound, stopSourceAlienVault, argsInput, linkCountAlienVault
     try:
         # Get memory in case it exceeds threshold
         getMemory()
-        if not stopSource:
+        if not stopSourceAlienVault:
             try:
                 # Choose a random user agent string to use for any requests
                 userAgent = random.choice(USER_AGENT)
@@ -2222,9 +2203,7 @@ def processAlienVaultPage(url):
             except ConnectionError:
                 writerr(
                     colored(
-                        getSPACER(
-                            "[ ERR ] alienvault.org connection error for page " + page
-                        ),
+                        getSPACER("AlienVault - [ ERR ] Connection error for page " + page),
                         "red",
                     )
                 )
@@ -2233,12 +2212,10 @@ def processAlienVaultPage(url):
             except Exception as e:
                 writerr(
                     colored(
-                        getSPACER(
-                            "[ ERR ] Error getting response for page "
-                            + page
-                            + " - "
-                            + str(e)
-                        ),
+                        "AlienVault -[ ERR ] Error getting response for page "
+                        + page
+                        + " - "
+                        + str(e),
                         "red",
                     )
                 )
@@ -2249,26 +2226,21 @@ def processAlienVaultPage(url):
                     if resp is not None:
                         # If a status other of 429, then stop processing Alien Vault
                         if resp.status_code == 429:
-                            writerr(
-                                colored(
-                                    getSPACER(
-                                        "[ 429 ] Alien Vault rate limit reached, so stopping. Links that have already been retrieved will be saved."
-                                    ),
-                                    "red",
+                            if not stopSourceAlienVault:  # Only print message once
+                                writerr(
+                                    colored(
+                                        "AlienVault - [ 429 ] Rate limit reached, so stopping. Links that have already been retrieved will be saved.",
+                                        "red",
+                                    )
                                 )
-                            )
-                            stopSource = True
+                            stopSourceAlienVault = True
                             return
                         # If the response from alienvault.com is empty then skip
                         if resp.text == "" and totalPages == 0:
                             if verbose():
                                 writerr(
                                     colored(
-                                        getSPACER(
-                                            "[ ERR ] "
-                                            + url
-                                            + " gave an empty response."
-                                        ),
+                                        "AlienVault - [ ERR ] " + url + " gave an empty response.",
                                         "red",
                                     )
                                 )
@@ -2278,12 +2250,10 @@ def processAlienVaultPage(url):
                             if verbose():
                                 writerr(
                                     colored(
-                                        getSPACER(
-                                            "[ "
-                                            + str(resp.status_code)
-                                            + " ] Error for "
-                                            + url
-                                        ),
+                                        "AlienVauilt - [ "
+                                        + str(resp.status_code)
+                                        + " ] Error for "
+                                        + url,
                                         "red",
                                     )
                                 )
@@ -2306,6 +2276,7 @@ def processAlienVaultPage(url):
                 if foundUrl != "":
                     # If filters are not required and subs are wanted then just add the URL to the list
                     if args.filter_responses_only and not args.no_subs:
+                        linkCountAlienVault = linkCountAlienVault + 1
                         linksFoundAdd(foundUrl)
                     else:
                         addLink = True
@@ -2332,9 +2303,7 @@ def processAlienVaultPage(url):
                             # Compare the HTTP code gainst the Code exclusions and matches
                             if MATCH_CODE != "":
                                 match = re.search(
-                                    r"("
-                                    + re.escape(MATCH_CODE).replace(",", "|")
-                                    + ")",
+                                    r"(" + re.escape(MATCH_CODE).replace(",", "|") + ")",
                                     httpCode,
                                     flags=re.IGNORECASE,
                                 )
@@ -2342,9 +2311,7 @@ def processAlienVaultPage(url):
                                     addLink = False
                             else:
                                 match = re.search(
-                                    r"("
-                                    + re.escape(FILTER_CODE).replace(",", "|")
-                                    + ")",
+                                    r"(" + re.escape(FILTER_CODE).replace(",", "|") + ")",
                                     httpCode,
                                     flags=re.IGNORECASE,
                                 )
@@ -2354,9 +2321,7 @@ def processAlienVaultPage(url):
                             # Check the URL exclusions
                             if addLink:
                                 match = re.search(
-                                    r"("
-                                    + re.escape(FILTER_URL).replace(",", "|")
-                                    + ")",
+                                    r"(" + re.escape(FILTER_URL).replace(",", "|") + ")",
                                     foundUrl,
                                     flags=re.IGNORECASE,
                                 )
@@ -2367,9 +2332,7 @@ def processAlienVaultPage(url):
                             if addLink and args.keywords_only:
                                 if args.keywords_only == "#CONFIG":
                                     match = re.search(
-                                        r"("
-                                        + re.escape(FILTER_KEYWORDS).replace(",", "|")
-                                        + ")",
+                                        r"(" + re.escape(FILTER_KEYWORDS).replace(",", "|") + ")",
                                         foundUrl,
                                         flags=re.IGNORECASE,
                                     )
@@ -2382,9 +2345,39 @@ def processAlienVaultPage(url):
                                 if match is None:
                                     addLink = False
+                            # Check date is in range if required
+                            if args.from_date is not None or args.to_date is not None:
+                                try:
+                                    urlDateStr = urlSection["date"]
+                                    # Remove fractional seconds if present
+                                    urlDateStr = urlDateStr.split(".")[0]
+                                    urlDate = datetime.strptime(urlDateStr, "%Y-%m-%dT%H:%M:%S")
+                                    # If from date passed, check
+                                    if args.from_date is not None:
+                                        fromDate = parseDateArg(args.from_date)
+                                        if urlDate < fromDate:
+                                            addLink = False
+                                    # If to date passed, check
+                                    if args.to_date is not None:
+                                        toDate = parseDateArg(args.to_date)
+                                        if urlDate >= toDate:
+                                            addLink = False
+                                except Exception as e:
+                                    if verbose():
+                                        writerr(
+                                            colored(
+                                                "ERROR processLAlienVaultPage date check: "
+                                                + str(e),
+                                                "red",
+                                            )
+                                        )
                         # Add link if it passed filters
                         if addLink:
-                            linksFoundAdd(foundUrl)
+                            linksFoundAdd(foundUrl, linksFoundAlienVault)
         else:
             pass
     except Exception as e:
@@ -2396,12 +2389,12 @@ def getAlienVaultUrls():
     """
     Get URLs from the Alien Vault OTX, otx.alienvault.com
     """
-    global linksFound, waymorePath, subs, path, stopProgram, totalPages, stopSource, argsInput, checkAlienVault, inputIsSubDomain, argsInputHostname
+    global linksFound, waymorePath, subs, path, stopProgram, totalPages, stopSourceAlienVault, argsInput, checkAlienVault, inputIsSubDomain, argsInputHostname, linkCountAlienVault, linksFoundAlienVault
     # Write the file of URL's for the passed domain/URL
     try:
-        stopSource = False
-        originalLinkCount = len(linksFound)
+        stopSourceAlienVault = False
+        linksFoundAlienVault = set()
         # Set the Alien Vault API indicator types of domain or hostname (has subdomain)
         if inputIsSubDomain:
@@ -2418,11 +2411,12 @@ def getAlienVaultUrls():
         # Get the number of pages (i.e. separate requests) that are going to be made to alienvault.com
         totalPages = 0
+        resp = None
         try:
             if not args.check_only:
                 write(
                     colored(
-                        "\rGetting the number of alienvault.com pages to search...\r",
+                        "AlienVault - [ INFO ] Getting the number of alienvault.com pages to search...",
                         "cyan",
                     )
                 )
@@ -2431,43 +2425,39 @@ def getAlienVaultUrls():
             session = requests.Session()
             session.mount("https://", HTTP_ADAPTER)
             session.mount("http://", HTTP_ADAPTER)
-            resp = session.get(
-                url + "&showNumPages=True", headers={"User-Agent": userAgent}
-            )
+            resp = session.get(url + "&showNumPages=True", headers={"User-Agent": userAgent})
         except Exception as e:
             writerr(
                 colored(
-                    getSPACER(
-                        "[ ERR ] Unable to get links from alienvault.com: " + str(e)
-                    ),
+                    "AlienVault - [ ERR ] Unable to get links from alienvault.com: " + str(e),
                     "red",
                 )
             )
-            return
+            # Don't return - continue to show link count at the end
         # If the rate limit was reached end now
-        if resp.status_code == 429:
+        if resp is not None and resp.status_code == 429:
             writerr(
                 colored(
-                    getSPACER(
-                        "[ 429 ] Alien Vault rate limit reached so unable to get links."
-                    ),
+                    "AlienVault - [ 429 ] Rate limit reached so unable to get links.",
                     "red",
                 )
             )
-            return
+            # Don't return - continue to show link count at the end
-        if verbose():
+        if resp is not None and verbose():
             write(
-                getSPACER(
-                    colored("The Alien Vault URL requested to get links: ", "magenta")
-                    + colored(url, "white")
-                )
+                colored("AlienVault - [ INFO ] The URL requested to get links: ", "magenta")
+                + colored(url, "white")
                 + "\n"
             )
         # Carry on if something was found
-        if resp.text.lower().find('"error": "') < 0:
+        if (
+            resp is not None
+            and resp.status_code != 429
+            and resp.text.lower().find('"error": "') < 0
+        ):
             try:
                 # Get the JSON response
@@ -2478,9 +2468,7 @@ def getAlienVaultUrls():
             except Exception:
                 writerr(
                     colored(
-                        getSPACER(
-                            "[ ERR ] There was an unexpected response from the Alien Vault API"
-                        ),
+                        "AlienVault - [ ERR ] There was an unexpected response from the API",
                         "red",
                     )
                 )
@@ -2502,16 +2490,16 @@ def getAlienVaultUrls():
                     else:
                         checkAlienVault = totalPages
                     write(
-                        colored("Get URLs from Alien Vault: ", "cyan")
+                        colored("AlienVault - [ INFO ] Getting URLs from Alien Vault: ", "cyan")
                         + colored(str(checkAlienVault) + " requests", "white")
                     )
                 else:
                     # if the page number was found then display it, but otherwise we will just try to increment until we have everything
                     write(
                         colored(
-                            "\rGetting links from "
+                            "AlienVault - [ INFO ] Getting links from "
                             + str(totalPages)
-                            + " alienvault.com API requests (this can take a while for some domains)...\r",
+                            + " alienvault.com API requests (this can take a while for some domains)...",
                             "cyan",
                         )
                     )
@@ -2531,32 +2519,19 @@ def getAlienVaultUrls():
             if verbose():
                 writerr(
                     colored(
-                        getSPACER(
-                            "[ ERR ] An error was returned in the alienvault.com response."
-                        )
-                        + "\n",
+                        "AlienVault - [ ERR ] An error was returned in the response." + "\n",
                         "red",
                     )
                 )
         if not args.check_only:
-            linkCount = len(linksFound) - originalLinkCount
-            if args.xwm and args.xcc:
-                write(
-                    getSPACER(
-                        colored("Links found on alienvault.com: ", "cyan")
-                        + colored(str(linkCount), "white")
-                    )
-                    + "\n"
-                )
-            else:
-                write(
-                    getSPACER(
-                        colored("Extra links found on alienvault.com: ", "cyan")
-                        + colored(str(linkCount), "white")
-                    )
-                    + "\n"
-                )
+            linkCountAlienVault = len(linksFoundAlienVault)
+            write(
+                colored("AlienVault - [ INFO ] Links found on alienvault.com: ", "cyan")
+                + colored(str(linkCountAlienVault), "white")
+            )
+            linksFound.update(linksFoundAlienVault)
+            linksFoundAlienVault.clear()
     except Exception as e:
         writerr(colored("ERROR getAlienVaultUrls 1: " + str(e), "red"))
@@ -2566,7 +2541,7 @@ def processURLScanUrl(url, httpCode, mimeType, urlscanID=""):
     """
     Process a specific URL from urlscan.io to determine whether to save the link
     """
-    global argsInput, argsInputHostname, urlscanRequestLinks
+    global argsInput, argsInputHostname, urlscanRequestLinks, links_lock, linkCountURLScan, linksFoundURLScan
     addLink = True
@@ -2629,9 +2604,7 @@ def processURLScanUrl(url, httpCode, mimeType, urlscanID=""):
                             flags=re.IGNORECASE,
                         )
                     else:
-                        match = re.search(
-                            r"(" + args.keywords_only + ")", url, flags=re.IGNORECASE
-                        )
+                        match = re.search(r"(" + args.keywords_only + ")", url, flags=re.IGNORECASE)
                     if match is None:
                         addLink = False
@@ -2657,7 +2630,8 @@ def processURLScanUrl(url, httpCode, mimeType, urlscanID=""):
                 # Add MIME Types if --verbose option was selected
                 if verbose():
                     if mimeType.strip() != "":
-                        linkMimes.add(mimeType)
+                        with links_lock:
+                            linkMimes.add(mimeType)
         # Add link if it passed filters
         if addLink:
@@ -2677,11 +2651,12 @@ def processURLScanUrl(url, httpCode, mimeType, urlscanID=""):
             )
             if match is not None:
                 if args.mode in ("U", "B"):
-                    linksFoundAdd(url)
+                    linksFoundAdd(url, linksFoundURLScan)
                 # If Response mode is requested then add the DOM ID to try later, for the number of responses wanted
                 if urlscanID != "" and args.mode in ("R", "B"):
                     if args.limit == 0 or len(urlscanRequestLinks) < args.limit:
-                        urlscanRequestLinks.add((url, URLSCAN_DOM_URL + urlscanID))
+                        with links_lock:
+                            urlscanRequestLinks.add((url, URLSCAN_DOM_URL + urlscanID))
     except Exception as e:
         writerr(colored("ERROR processURLScanUrl 1: " + str(e), "red"))
@@ -2726,12 +2701,7 @@ def getURLScanDOM(originalUrl, domUrl):
                         # Add the URL as a comment at the start of the response
                         if args.url_filename:
-                            archiveHtml = (
-                                "/* Original URL: "
-                                + originalUrl
-                                + " */\n"
-                                + archiveHtml
-                            )
+                            archiveHtml = "/* Original URL: " + originalUrl + " */\n" + archiveHtml
                         # Create file name based on url or hash value of the response, depending on selection. Ensure the file name isn't over 255 characters
                         if args.url_filename:
@@ -2760,9 +2730,7 @@ def getURLScanDOM(originalUrl, domUrl):
                                 if (
                                     archiveHtml.lower().strip().endswith("</html>")
                                     or archiveHtml.lower().strip().endswith("</body>")
-                                    or archiveHtml.lower()
-                                    .strip()
-                                    .startswith("<!doctype html")
+                                    or archiveHtml.lower().strip().startswith("<!doctype html")
                                     or archiveHtml.lower().strip().startswith("<html")
                                     or archiveHtml.lower().strip().startswith("<head")
                                 ):
@@ -2794,12 +2762,10 @@ def getURLScanDOM(originalUrl, domUrl):
                         except Exception as e:
                             writerr(
                                 colored(
-                                    getSPACER(
-                                        "[ ERR ] Failed to write file "
-                                        + filePath
-                                        + ": "
-                                        + str(e)
-                                    ),
+                                    "URLScan - [ ERR ] Failed to write file "
+                                    + filePath
+                                    + ": "
+                                    + str(e),
                                     "red",
                                 )
                             )
@@ -2822,12 +2788,10 @@ def getURLScanDOM(originalUrl, domUrl):
                             except Exception as e:
                                 writerr(
                                     colored(
-                                        getSPACER(
-                                            '[ ERR ] Failed to write to waymore_index.txt for "'
-                                            + domUrl
-                                            + '": '
-                                            + str(e)
-                                        ),
+                                        'URLScan - [ ERR ] Failed to write to waymore_index.txt for "'
+                                        + domUrl
+                                        + '": '
+                                        + str(e),
                                         "red",
                                     )
                                 )
@@ -2843,25 +2807,21 @@ def getURLScanDOM(originalUrl, domUrl):
                         try:
                             writerr(
                                 colored(
-                                    getSPACER(
-                                        "[ "
-                                        + str(resp.status_code)
-                                        + ' ] Failed to get response for "'
-                                        + domUrl
-                                        + '"'
-                                    ),
+                                    "URLScan - [ "
+                                    + str(resp.status_code)
+                                    + ' ] Failed to get response for "'
+                                    + domUrl
+                                    + '"',
                                     "red",
                                 )
                             )
                         except Exception:
                             writerr(
                                 colored(
-                                    getSPACER(
-                                        '[ ERR ] Failed to get response for "'
-                                        + domUrl
-                                        + '": '
-                                        + str(e)
-                                    ),
+                                    'URLScan - [ ERR ] Failed to get response for "'
+                                    + domUrl
+                                    + '": '
+                                    + str(e),
                                     "red",
                                 )
                             )
@@ -2888,9 +2848,7 @@ def getURLScanDOM(originalUrl, domUrl):
                             )
                     except Exception:
                         if verbose():
-                            suffix = (
-                                'Complete (To show mem use, run "pip install psutil")'
-                            )
+                            suffix = 'Complete (To show mem use, run "pip install psutil")'
                 printProgressBar(
                     successCount + failureCount,
                     totalResponses,
@@ -2903,23 +2861,15 @@ def getURLScanDOM(originalUrl, domUrl):
                 # Write the total count to the continueResp.URLScan.tmp file
                 try:
                     continueRespFileURLScan.seek(0)
-                    continueRespFileURLScan.write(
-                        str(successCount + failureCount) + "\n"
-                    )
+                    continueRespFileURLScan.write(str(successCount + failureCount) + "\n")
                 except Exception as e:
                     if verbose():
-                        writerr(
-                            colored(
-                                getSPACER("ERROR getURLScanDOM 2:  " + str(e)), "red"
-                            )
-                        )
+                        writerr(colored(getSPACER("ERROR getURLScanDOM 2:  " + str(e)), "red"))
             except Exception as e:
                 if verbose():
                     writerr(
-                        colored(
-                            getSPACER('Error for "' + domUrl + '": ' + str(e)), "red"
-                        )
+                        colored('URLScan - [ ERR ] Error for "' + domUrl + '": ' + str(e), "red")
                     )
     except Exception as e:
@@ -2945,14 +2895,15 @@ def getURLScanUrls():
     """
     Get URLs from the URLSCan API, urlscan.io
     """
-    global URLSCAN_API_KEY, linksFound, linkMimes, waymorePath, subs, stopProgram, stopSource, argsInput, checkURLScan, argsInputHostname
+    global URLSCAN_API_KEY, linksFound, linkMimes, waymorePath, subs, stopProgram, stopSourceURLScan, argsInput, checkURLScan, argsInputHostname, linkCountURLScan, linksFoundURLScan
     # Write the file of URL's for the passed domain/URL
     try:
         requestsMade = 0
-        stopSource = False
-        linkMimes = set()
-        originalLinkCount = len(linksFound)
+        stopSourceURLScan = False
+        linksFoundURLScan = set()
+        totalUrls = 0
+        checkResponse = True
         # Set the URL to just the hostname
         url = URLSCAN_URL.replace("{DOMAIN}", quote(argsInputHostname))
@@ -2975,21 +2926,23 @@ def getURLScanUrls():
             if args.mode == "R":
                 write(
                     colored(
-                        "The URLScan URL requested to get links for responses: ",
+                        "URLScan - [ INFO ] The URLScan URL requested to get links for responses: ",
                         "magenta",
                     )
                     + colored(url + "\n", "white")
                 )
             else:
                 write(
-                    colored("The URLScan URL requested to get links: ", "magenta")
+                    colored(
+                        "URLScan - [ INFO ] The URLScan URL requested to get links: ", "magenta"
+                    )
                     + colored(url + "\n", "white")
                 )
-        if not args.check_only:
+        if args.mode in ("U", "B") and not args.check_only:
             write(
                 colored(
-                    "\rGetting links from urlscan.io API (this can take a while for some domains)...\r",
+                    "URLScan - [ INFO ] Getting links from urlscan.io API (this can take a while for some domains)...",
                     "cyan",
                 )
             )
@@ -3005,14 +2958,12 @@ def getURLScanUrls():
             session.mount("https://", HTTP_ADAPTER)
             session.mount("http://", HTTP_ADAPTER)
             # Pass the API-Key header too. This can change the max endpoints per page, depending on URLScan subscription
-            resp = session.get(
-                url, headers={"User-Agent": userAgent, "API-Key": URLSCAN_API_KEY}
-            )
+            resp = session.get(url, headers={"User-Agent": userAgent, "API-Key": URLSCAN_API_KEY})
             requestsMade = requestsMade + 1
         except Exception as e:
             write(
                 colored(
-                    getSPACER("[ ERR ] Unable to get links from urlscan.io: " + str(e)),
+                    "URLScan - [ ERR ] Unable to get links from urlscan.io: " + str(e),
                     "red",
                 )
             )
@@ -3027,15 +2978,17 @@ def getURLScanUrls():
                 if seconds <= args.urlscan_rate_limit_retry * 60:
                     writerr(
                         colored(
-                            getSPACER(
-                                "[ 429 ] URLScan rate limit reached, so waiting for another "
-                                + str(seconds)
-                                + " seconds before continuing..."
-                            ),
+                            "URLScan - [ 429 ] Rate limit reached, so waiting for another "
+                            + str(seconds)
+                            + " seconds before continuing...",
                             "yellow",
                         )
                     )
-                    time.sleep(seconds + 1)
+                    # Wait can be interrupted by SIGINT via interrupt_event
+                    interrupt_event.clear()
+                    if interrupt_event.wait(seconds + 1):
+                        # Interrupted by SIGINT
+                        return
                     try:
                         resp = session.get(
                             url,
@@ -3048,10 +3001,7 @@ def getURLScanUrls():
                     except Exception as e:
                         write(
                             colored(
-                                getSPACER(
-                                    "[ ERR ] Unable to get links from urlscan.io: "
-                                    + str(e)
-                                ),
+                                "URLScan - [ ERR ] Unable to get links from urlscan.io: " + str(e),
                                 "red",
                             )
                         )
@@ -3064,18 +3014,14 @@ def getURLScanUrls():
                     if resp.status_code == 429:
                         writerr(
                             colored(
-                                getSPACER(
-                                    "[ 429 ] URLScan rate limit reached so trying without API Key..."
-                                ),
+                                "URLScan - [ 429 ] Rate limit reached so trying without API Key...",
                                 "red",
                             )
                         )
                     else:
                         writerr(
                             colored(
-                                getSPACER(
-                                    "The URLScan API Key is invalid so trying without API Key..."
-                                ),
+                                "URLScan - [ INF ] The API Key is invalid so trying without API Key...",
                                 "red",
                             )
                         )
@@ -3085,64 +3031,54 @@ def getURLScanUrls():
                 except Exception as e:
                     writerr(
                         colored(
-                            getSPACER(
-                                "[ ERR ] Unable to get links from urlscan.io: " + str(e)
-                            ),
+                            "URLScan - [ ERR ] Unable to get links from urlscan.io: " + str(e),
                             "red",
                         )
                     )
-                    return
+                    checkResponse = False
                 # If the rate limit was reached end now
                 if resp.status_code == 429:
                     writerr(
                         colored(
-                            getSPACER(
-                                "[ 429 ] URLScan rate limit reached without API Key so unable to get links."
-                            ),
+                            "URLScan - [ 429 ] Rate limit reached without API Key so unable to get links.",
                             "red",
                         )
                     )
-                    return
+                    checkResponse = False
             else:
                 writerr(
                     colored(
-                        getSPACER(
-                            "[ 429 ] URLScan rate limit reached so unable to get links."
-                        ),
+                        "URLScan - [ 429 ] Rate limit reached so unable to get links.",
                         "red",
                     )
                 )
-                return
+                checkResponse = False
         elif resp.status_code != 200:
             writerr(
                 colored(
-                    getSPACER(
-                        "[ "
-                        + str(resp.status_code)
-                        + " ] Unable to get links from urlscan.io"
-                    ),
+                    "URLScan - [ "
+                    + str(resp.status_code)
+                    + " ] Unable to get links from urlscan.io",
                     "red",
                 )
             )
-            return
+            checkResponse = False
         try:
-            # Get the JSON response
-            jsonResp = json.loads(resp.text.strip())
+            if checkResponse:
+                # Get the JSON response
+                jsonResp = json.loads(resp.text.strip())
-            # Get the number of results
-            totalUrls = int(jsonResp["total"])
+                # Get the number of results
+                totalUrls = int(jsonResp["total"])
         except Exception:
             writerr(
                 colored(
-                    getSPACER(
-                        "[ ERR ] There was an unexpected response from the URLScan API"
-                    ),
+                    "URLScan - [ ERR ] There was an unexpected response from the API",
                     "red",
                 )
             )
-            totalUrls = 0
         # Carry on if something was found
         if args.check_only and args.mode != "R":
@@ -3150,12 +3086,12 @@ def getURLScanUrls():
                 hasMore = jsonResp["has_more"]
                 if hasMore:
                     write(
-                        colored("Get URLs from URLScan: ", "cyan")
+                        colored("URLScan - [ INFO ] Get URLs from URLScan: ", "cyan")
                         + colored("UNKNOWN requests", "white")
                     )
                 else:
                     write(
-                        colored("Get URLs from URLScan: ", "cyan")
+                        colored("URLScan - [ INFO ] Get URLs from URLScan: ", "cyan")
                         + colored("1 request", "white")
                     )
             except Exception:
@@ -3166,7 +3102,7 @@ def getURLScanUrls():
             # Carry on if something was found
             if int(totalUrls) > 0:
-                while not stopSource:
+                while not stopSourceURLScan:
                     searchAfter = ""
@@ -3203,9 +3139,7 @@ def getURLScanUrls():
                             sort = urlSection["sort"]
                         except Exception:
                             sort = ""
-                        searchAfter = (
-                            "&search_after=" + str(sort[0]) + "," + str(sort[1])
-                        )
+                        searchAfter = "&search_after=" + str(sort[0]) + "," + str(sort[1])
                         # Get the HTTP code
                         try:
@@ -3243,7 +3177,7 @@ def getURLScanUrls():
                     if searchAfter != "":
                         keepTrying = True
-                        while not stopSource and keepTrying:
+                        while not stopSourceURLScan and keepTrying:
                             keepTrying = False
                             # Get the next page from urlscan.io
                             try:
@@ -3263,10 +3197,8 @@ def getURLScanUrls():
                             except Exception as e:
                                 writerr(
                                     colored(
-                                        getSPACER(
-                                            "[ ERR ] Unable to get links from urlscan.io: "
-                                            + str(e)
-                                        ),
+                                        "URLScan - [ ERR ] Unable to get links from urlscan.io: "
+                                        + str(e),
                                         "red",
                                     )
                                 )
@@ -3285,56 +3217,53 @@ def getURLScanUrls():
                                     if seconds <= args.urlscan_rate_limit_retry * 60:
                                         writerr(
                                             colored(
-                                                getSPACER(
-                                                    "[ 429 ] URLScan rate limit reached, so waiting for another "
-                                                    + str(seconds)
-                                                    + " seconds before continuing..."
-                                                ),
+                                                "URLScan - [ 429 ] Rate limit reached, so waiting for another "
+                                                + str(seconds)
+                                                + " seconds before continuing...",
                                                 "yellow",
                                             )
                                         )
-                                        time.sleep(seconds + 1)
+                                        # Wait can be interrupted by SIGINT via interrupt_event
+                                        interrupt_event.clear()
+                                        if interrupt_event.wait(seconds + 1):
+                                            # Interrupted by SIGINT
+                                            keepTrying = False
+                                            break
                                         keepTrying = True
                                         continue
                                     else:
                                         writerr(
                                             colored(
-                                                getSPACER(
-                                                    "[ 429 ] URLScan rate limit reached (waiting time of "
-                                                    + str(seconds)
-                                                    + "), so stopping. Links that have already been retrieved will be saved."
-                                                ),
+                                                "URLScan - [ 429 ] Rate limit reached (waiting time of "
+                                                + str(seconds)
+                                                + "), so stopping. Links that have already been retrieved will be saved.",
                                                 "red",
                                             )
                                         )
-                                        stopSource = True
+                                        stopSourceURLScan = True
                                         pass
                                 else:
                                     writerr(
                                         colored(
-                                            getSPACER(
-                                                "[ 429 ] URLScan rate limit reached, so stopping. Links that have already been retrieved will be saved."
-                                            ),
+                                            "URLScan - [ 429 ] Rate limit reached, so stopping. Links that have already been retrieved will be saved.",
                                             "red",
                                         )
                                     )
-                                    stopSource = True
+                                    stopSourceURLScan = True
                                     pass
                             elif resp.status_code != 200:
                                 writerr(
                                     colored(
-                                        getSPACER(
-                                            "[ "
-                                            + str(resp.status_code)
-                                            + " ] Unable to get links from urlscan.io"
-                                        ),
+                                        "URLScan - [ "
+                                        + str(resp.status_code)
+                                        + " ] Unable to get links from urlscan.io",
                                         "red",
                                     )
                                 )
-                                stopSource = True
+                                stopSourceURLScan = True
                                 pass
-                        if not stopSource:
+                        if not stopSourceURLScan:
                             # Get the JSON response
                             jsonResp = json.loads(resp.text.strip())
@@ -3342,47 +3271,32 @@ def getURLScanUrls():
                             if (
                                 jsonResp["results"] is None
                                 or len(jsonResp["results"]) == 0
-                                or (
-                                    args.limit_requests != 0
-                                    and requestsMade > args.limit_requests
-                                )
+                                or (args.limit_requests != 0 and requestsMade > args.limit_requests)
                                 or (
                                     args.mode == "R"
                                     and args.limit != 0
                                     and requestsMade > args.limit
                                 )
                             ):
-                                stopSource = True
+                                stopSourceURLScan = True
             # Show the MIME types found (in case user wants to exclude more)
             if verbose() and len(linkMimes) > 0 and args.mode != "R":
                 linkMimes.discard("warc/revisit")
                 write(
-                    getSPACER(
-                        colored("MIME types found: ", "magenta")
-                        + colored(str(linkMimes), "white")
-                    )
+                    colored("URLScan - [ INFO ] MIME types found: ", "magenta")
+                    + colored(str(linkMimes), "white")
                     + "\n"
                 )
-            linkCount = len(linksFound) - originalLinkCount
             if args.mode != "R":
-                if args.xwm and args.xcc and args.xav:
-                    write(
-                        getSPACER(
-                            colored("Links found on urlscan.io: ", "cyan")
-                            + colored(str(linkCount), "white")
-                        )
-                        + "\n"
-                    )
-                else:
-                    write(
-                        getSPACER(
-                            colored("Extra links found on urlscan.io: ", "cyan")
-                            + colored(str(linkCount), "white")
-                        )
-                        + "\n"
-                    )
+                linkCountURLScan = len(linksFoundURLScan)
+                write(
+                    colored("URLScan - [ INFO ] Links found on urlscan.io: ", "cyan")
+                    + colored(str(linkCountURLScan), "white")
+                )
+                linksFound.update(linksFoundURLScan)
+                linksFoundURLScan.clear()
     except Exception as e:
         writerr(colored("ERROR getURLScanUrls 1: " + str(e), "red"))
@@ -3392,12 +3306,11 @@ def processWayBackPage(url):
     """
     Get URLs from a specific page of archive.org CDX API for the input domain
     """
-    global totalPages, linkMimes, linksFound, stopSource
+    global totalPages, linkMimes, linksFound, stopSourceWayback, linkCountWayback, linksFoundWayback, current_response, current_session
     try:
         # Get memory in case it exceeds threshold
         getMemory()
-        if not stopSource:
+        if not stopSourceWayback:
             try:
                 # Choose a random user agent string to use for any requests
                 resp = None
@@ -3406,229 +3319,231 @@ def processWayBackPage(url):
                 session = requests.Session()
                 session.mount("https://", HTTP_ADAPTER)
                 session.mount("http://", HTTP_ADAPTER)
-                resp = session.get(url, headers={"User-Agent": userAgent})
-            except ConnectionError:
-                writerr(
-                    colored(
-                        getSPACER(
-                            "[ ERR ] Wayback Machine (archive.org) connection error for page "
-                            + page
-                        ),
-                        "red",
-                    )
-                )
-                resp = None
-                return
-            except Exception as e:
-                writerr(
-                    colored(
-                        getSPACER(
-                            "[ ERR ] Error getting response for page "
-                            + page
-                            + " - "
-                            + str(e)
-                        ),
-                        "red",
-                    )
+                # expose session so SIGINT handler can close it to interrupt blocking network I/O
+                try:
+                    current_session = session
+                except Exception:
+                    pass
+                resp = session.get(
+                    url, headers={"User-Agent": userAgent}, stream=True, timeout=args.timeout
                 )
-                resp = None
-                return
-            finally:
+                # expose live response so SIGINT handler can close it to interrupt blocking I/O
                 try:
-                    if resp is not None:
-                        # If a status other of 429, then stop processing Wayback Machine
-                        if resp.status_code == 429:
-                            if args.wayback_rate_limit_retry > 0:
-                                seconds = args.wayback_rate_limit_retry * 60
-                                if args.processes == 1:
-                                    writerr(
-                                        colored(
-                                            "\r[ 429 ] Wayback Machine (archive.org) rate limit reached on page "
-                                            + str(page)
-                                            + " of "
-                                            + str(totalPages)
-                                            + ", so waiting for "
-                                            + str(seconds)
-                                            + " seconds before continuing...\r",
-                                            "yellow",
-                                        )
-                                    )
-                                else:
-                                    writerr(
-                                        colored(
-                                            "\r[ 429 ] Wayback Machine (archive.org) rate limit reached, so waiting for "
-                                            + str(seconds)
-                                            + " seconds before continuing...\r",
-                                            "yellow",
-                                        )
-                                    )
-                                time.sleep(seconds)
-                                try:
-                                    resp = session.get(
-                                        url, headers={"User-Agent": userAgent}
-                                    )
-                                except ConnectionError:
-                                    writerr(
-                                        colored(
-                                            getSPACER(
-                                                "[ ERR ] Wayback Machine (archive.org) connection error for page "
-                                                + page
-                                            ),
-                                            "red",
-                                        )
+                    current_response = resp
+                except Exception:
+                    pass
+                # Check response status in the finally block
+                if resp is not None:
+                    # If a status other of 429, then stop processing Wayback Machine
+                    if resp.status_code == 429:
+                        if args.wayback_rate_limit_retry > 0:
+                            seconds = args.wayback_rate_limit_retry * 60
+                            if args.processes == 1:
+                                writerr(
+                                    colored(
+                                        "Wayback - [ 429 ] Rate limit reached on page "
+                                        + str(page)
+                                        + " of "
+                                        + str(totalPages)
+                                        + ", so waiting for "
+                                        + str(seconds)
+                                        + " seconds before continuing...",
+                                        "yellow",
                                     )
-                                    resp = None
-                                    return
-                                except Exception as e:
-                                    writerr(
-                                        colored(
-                                            getSPACER(
-                                                "[ ERR ] Error getting response for page "
-                                                + page
-                                                + " - "
-                                                + str(e)
-                                            ),
-                                            "red",
-                                        )
+                                )
+                            else:
+                                writerr(
+                                    colored(
+                                        "Wayback - [ 429 ] Rate limit reached, so waiting for "
+                                        + str(seconds)
+                                        + " seconds before continuing...",
+                                        "yellow",
                                     )
-                                    resp = None
-                                    return
-                        if resp.status_code == 429:
-                            writerr(
-                                colored(
-                                    getSPACER(
-                                        "[ 429 ] Wayback Machine (archive.org) rate limit reached, so stopping. Links that have already been retrieved will be saved."
-                                    ),
-                                    "red",
                                 )
-                            )
-                            stopSource = True
-                            return
-                        # If a status other of 503, then the site is unavailable
-                        if resp.status_code == 503:
-                            writerr(
-                                colored(
-                                    getSPACER(
-                                        "[ 503 ] Wayback Machine (archive.org) is currently unavailable. It may be down for maintenance. You can check https://web.archive.org/cdx/ to verify."
-                                    ),
-                                    "red",
+                            # Wait can be interrupted by SIGINT via interrupt_event
+                            interrupt_event.clear()
+                            if interrupt_event.wait(seconds):
+                                return
+                            try:
+                                resp = session.get(
+                                    url,
+                                    headers={"User-Agent": userAgent},
+                                    stream=True,
+                                    timeout=args.timeout,
                                 )
-                            )
-                            stopSource = True
-                            return
-                        # If the response from archive.org is empty then skip
-                        if resp.text == "" and totalPages == 0:
-                            if verbose():
+                                try:
+                                    current_response = resp
+                                except Exception:
+                                    pass
+                            except ConnectionError:
                                 writerr(
                                     colored(
-                                        getSPACER(
-                                            "[ ERR ] "
-                                            + url
-                                            + " gave an empty response."
-                                        ),
+                                        "Wayback - [ ERR ] Connection error for page " + page,
                                         "red",
                                     )
                                 )
-                            return
-                        # If a status other than 200, then stop
-                        if resp.status_code != 200:
-                            if verbose():
+                                resp = None
+                                return
+                            except Exception as e:
                                 writerr(
                                     colored(
-                                        getSPACER(
-                                            "[ "
-                                            + str(resp.status_code)
-                                            + " ] Error for "
-                                            + url
-                                        ),
+                                        "Wayback - [ ERR ] Error getting response for page "
+                                        + page
+                                        + " - "
+                                        + str(e),
                                         "red",
                                     )
                                 )
-                            return
-                except ConnectionError:
-                    writerr(
-                        colored(
-                            getSPACER(
-                                "[ ERR ] Wayback Machine (archive.org) connection error for page "
-                                + page
-                            ),
-                            "red",
+                                resp = None
+                                return
+                    if resp.status_code == 429:
+                        writerr(
+                            colored(
+                                "Wayback - [ 429 ] Rate limit reached, so stopping. Links that have already been retrieved will be saved.",
+                                "red",
+                            )
                         )
-                    )
-                    resp = None
-                    return
-                except Exception as e:
-                    writerr(
-                        colored(
-                            getSPACER(
-                                "[ ERR ] Error getting response for page "
-                                + page
-                                + " - "
-                                + str(e)
-                            ),
-                            "red",
+                        stopSourceWayback = True
+                        return
+                    # If a status other of 503, then the site is unavailable
+                    if resp.status_code == 503:
+                        writerr(
+                            colored(
+                                "Wayback - [ 503 ] The Wayback Machine (archive.org) is currently unavailable. It may be down for maintenance. You can check https://web.archive.org/cdx/ to verify.",
+                                "red",
+                            )
                         )
-                    )
-                    resp = None
-                    return
+                        stopSourceWayback = True
+                        return
+                    # If a status other than 200, then stop
+                    if resp.status_code != 200:
+                        if verbose():
+                            writerr(
+                                colored(
+                                    "Wayback - [ " + str(resp.status_code) + " ] Error for " + url,
+                                    "red",
+                                )
+                            )
+                        try:
+                            current_response = None
+                        except Exception:
+                            pass
+                        try:
+                            current_session = None
+                        except Exception:
+                            pass
+                        return
-            # Get the URLs and MIME types. Each line is a separate JSON string
-            try:
+                # Get the URLs and MIME types. Each line is a separate JSON string
+                # Process lines as they arrive - if connection drops, we keep what we've already processed
                 for line in resp.iter_lines():
-                    results = line.decode("utf-8")
-                    foundUrl = fixArchiveOrgUrl(str(results).split(" ")[1])
+                    try:
+                        results = line.decode("utf-8")
+                        foundUrl = fixArchiveOrgUrl(str(results).split(" ")[1])
-                    # If --filter-responses-only wasn't used, then check the URL exclusions
-                    if args.filter_responses_only:
-                        match = None
-                    else:
-                        match = re.search(
-                            r"(" + re.escape(FILTER_URL).replace(",", "|") + ")",
-                            foundUrl,
-                            flags=re.IGNORECASE,
-                        )
-                    if match is None:
-                        # Only get MIME Types if --verbose option was selected
-                        if verbose():
+                        # If --filter-responses-only wasn't used, then check the URL exclusions
+                        if args.filter_responses_only:
+                            match = None
+                        else:
+                            match = re.search(
+                                r"(" + re.escape(FILTER_URL).replace(",", "|") + ")",
+                                foundUrl,
+                                flags=re.IGNORECASE,
+                            )
+                        if match is None:
+                            # Only get MIME Types if --verbose option was selected
+                            if verbose():
+                                try:
+                                    mimeType = str(results).split(" ")[2]
+                                    if mimeType != "":
+                                        linkMimes.add(mimeType)
+                                except Exception:
+                                    if verbose():
+                                        writerr(
+                                            colored(
+                                                getSPACER(
+                                                    "ERROR processWayBackPage 2: Cannot get MIME type from line: "
+                                                    + str(line)
+                                                ),
+                                                "red",
+                                            )
+                                        )
                             try:
-                                mimeType = str(results).split(" ")[2]
-                                if mimeType != "":
-                                    linkMimes.add(mimeType)
+                                linksFoundAdd(foundUrl, linksFoundWayback)
                             except Exception:
                                 if verbose():
                                     writerr(
                                         colored(
                                             getSPACER(
-                                                "ERROR processWayBackPage 2: Cannot get MIME type from line: "
+                                                "ERROR processWayBackPage 3: Cannot get link from line: "
                                                 + str(line)
                                             ),
                                             "red",
                                         )
                                     )
-                                    write(resp.text)
-                        try:
-                            linksFoundAdd(foundUrl)
-                        except Exception:
-                            if verbose():
-                                writerr(
-                                    colored(
-                                        getSPACER(
-                                            "ERROR processWayBackPage 3: Cannot get link from line: "
-                                            + str(line)
-                                        ),
-                                        "red",
-                                    )
+                    except Exception:
+                        if verbose():
+                            writerr(
+                                colored(
+                                    getSPACER("ERROR processWayBackPage 4: " + str(line)), "red"
                                 )
-                                write(resp.text)
-            except Exception:
-                if verbose():
+                            )
+            except ConnectionError:
+                writerr(
+                    colored(
+                        "Wayback - [ ERR ] Connection error for page "
+                        + page
+                        + (
+                            f" (saved {len(linksFoundWayback)} URLs before error)"
+                            if len(linksFoundWayback) > 0
+                            else ""
+                        ),
+                        "red",
+                    )
+                )
+                try:
+                    current_response = None
+                except Exception:
+                    pass
+                try:
+                    current_session = None
+                except Exception:
+                    pass
+                return
+            except Exception as e:
+                # Even if connection drops, we've already saved the URLs processed so far
+                if len(linksFoundWayback) > 0:
+                    writerr(
+                        colored(
+                            f"Wayback - [ WARN ] Error getting response for page {page} - {str(e)} (saved {len(linksFoundWayback)} URLs before error)",
+                            "yellow",
+                        )
+                    )
+                else:
                     writerr(
                         colored(
-                            getSPACER("ERROR processWayBackPage 4: " + str(line)), "red"
+                            "Wayback - [ ERR ] Error getting response for page "
+                            + page
+                            + " - "
+                            + str(e),
+                            "red",
                         )
                     )
+                try:
+                    current_response = None
+                except Exception:
+                    pass
+                try:
+                    current_session = None
+                except Exception:
+                    pass
+                return
         else:
+            print("DEBUG: HERE END!")  # DEBUG
             pass
     except Exception as e:
         if verbose():
@@ -3639,40 +3554,47 @@ def getWaybackUrls():
     """
     Get URLs from the Wayback Machine, archive.org
     """
-    global linksFound, linkMimes, waymorePath, subs, path, stopProgram, totalPages, stopSource, argsInput, checkWayback
+    global linksFound, linkMimes, waymorePath, subs, path, stopProgram, totalPages, stopSourceWayback, argsInput, checkWayback, linkCountWayback, linksFoundWayback
     # Write the file of URL's for the passed domain/URL
     try:
-        stopSource = False
+        stopSourceWayback = False
+        linksFoundWayback = set()
         if MATCH_MIME != "":
             filterMIME = "&filter=mimetype:" + re.escape(MATCH_MIME).replace(",", "|")
         else:
-            filterMIME = "&filter=!mimetype:warc/revisit|" + re.escape(
-                FILTER_MIME
-            ).replace(",", "|")
+            filterMIME = "&filter=!mimetype:warc/revisit|" + re.escape(FILTER_MIME).replace(
+                ",", "|"
+            )
         # If there any \+ in the MIME types, e.g. image/svg\+xml (the backslash is because it was previosuly escaped), then replace the \+ with a . otherwise the wayback API does not recognise it
         filterMIME = filterMIME.replace("\+", ".")
         if MATCH_CODE != "":
             filterCode = "&filter=statuscode:" + re.escape(MATCH_CODE).replace(",", "|")
         else:
-            filterCode = "&filter=!statuscode:" + re.escape(FILTER_CODE).replace(
-                ",", "|"
-            )
+            filterCode = "&filter=!statuscode:" + re.escape(FILTER_CODE).replace(",", "|")
         # Set keywords filter if -ko argument passed
         filterKeywords = ""
         if args.keywords_only:
             if args.keywords_only == "#CONFIG":
                 filterKeywords = (
-                    "&filter=original:.*("
-                    + re.escape(FILTER_KEYWORDS).replace(",", "|")
-                    + ").*"
+                    "&filter=original:.*(" + re.escape(FILTER_KEYWORDS).replace(",", "|") + ").*"
                 )
             else:
                 filterKeywords = "&filter=original:.*(" + args.keywords_only + ").*"
+        # Add the date filters if they were passed
+        if args.from_date is None:
+            filterFrom = ""
+        else:
+            filterFrom = "&from=" + str(args.from_date)
+        if args.to_date is None:
+            filterTo = ""
+        else:
+            filterTo = "&to=" + str(args.to_date)
         if args.filter_responses_only:
             url = (
                 WAYBACK_URL.replace("{DOMAIN}", subs + quote(argsInput) + path).replace(
@@ -3688,6 +3610,8 @@ def getWaybackUrls():
                 + filterMIME
                 + filterCode
                 + filterKeywords
+                + filterFrom
+                + filterTo
                 + "&page="
             )
@@ -3697,7 +3621,7 @@ def getWaybackUrls():
             if not args.check_only:
                 write(
                     colored(
-                        "\rGetting the number of Wayback Machine (archive.org) pages to search...\r",
+                        "Wayback - [ INFO ] Getting the number of pages to search...",
                         "cyan",
                     )
                 )
@@ -3706,9 +3630,7 @@ def getWaybackUrls():
             session = requests.Session()
             session.mount("https://", HTTP_ADAPTER)
             session.mount("http://", HTTP_ADAPTER)
-            resp = session.get(
-                url + "&showNumPages=True", headers={"User-Agent": userAgent}
-            )
+            resp = session.get(url + "&showNumPages=True", headers={"User-Agent": userAgent})
             # Try to get the total number of pages. If there is a problem, we'll return totalPages = 0 which means we'll get everything back in one request
             try:
                 totalPages = int(resp.text.strip())
@@ -3724,9 +3646,7 @@ def getWaybackUrls():
                 if resp.status_code == 429:
                     writerr(
                         colored(
-                            getSPACER(
-                                "[ 429 ] Wayback Machine (Archive.org) rate limit reached so unable to get links."
-                            ),
+                            "Wayback - [ 429 ] Rate limit reached so unable to get links.",
                             "red",
                         )
                     )
@@ -3736,9 +3656,7 @@ def getWaybackUrls():
                 if resp.status_code == 503:
                     writerr(
                         colored(
-                            getSPACER(
-                                "[ 503 ] Wayback Machine (Archive.org) is currently unavailable. It may be down for maintenance. You can check https://web.archive.org/cdx/ to verify."
-                            ),
+                            "Wayback - [ 503 ] The Wayback Machine (Archive.org) is currently unavailable. It may be down for maintenance. You can check https://web.archive.org/cdx/ to verify.",
                             "red",
                         )
                     )
@@ -3747,19 +3665,15 @@ def getWaybackUrls():
                 if resp.text.lower().find("blocked site error") > 0:
                     writerr(
                         colored(
-                            getSPACER(
-                                "[ ERR ] Unable to get links from Wayback Machine (archive.org): Blocked Site Error (they block the target site)"
-                            ),
+                            "Wayback - [ ERR ] Unable to get links from Wayback Machine (archive.org): Blocked Site Error (they block the target site)",
                             "red",
                         )
                     )
                 else:
                     writerr(
                         colored(
-                            getSPACER(
-                                "[ ERR ] Unable to get links from Wayback Machine (archive.org): "
-                                + str(resp.text.strip())
-                            ),
+                            "Wayback - [ ERR ] Unable to get links from Wayback Machine (archive.org): "
+                            + str(resp.text.strip()),
                             "red",
                         )
                     )
@@ -3767,28 +3681,22 @@ def getWaybackUrls():
                 if str(e).lower().find("alert access denied"):
                     writerr(
                         colored(
-                            getSPACER(
-                                "[ ERR ] Unable to get links from Wayback Machine (archive.org): Access Denied. Are you able to manually visit https://web.archive.org/? Your ISP may be blocking you, e.g. your adult content filter is on (why it triggers that filter I don't know, but it has happened!)"
-                            ),
+                            "Wayback - [ ERR ] Unable to get links from Wayback Machine (archive.org): Access Denied. Are you able to manually visit https://web.archive.org/? Your ISP may be blocking you, e.g. your adult content filter is on (why it triggers that filter I don't know, but it has happened!)",
                             "red",
                         )
                     )
                 elif str(e).lower().find("connection refused"):
                     writerr(
                         colored(
-                            getSPACER(
-                                "[ ERR ] Unable to get links from Wayback Machine (archive.org): Connection Refused. Are you able to manually visit https://web.archive.org/? Your ISP may be blocking your IP)"
-                            ),
+                            "Wayback - [ ERR ] Unable to get links from Wayback Machine (archive.org): Connection Refused. Are you able to manually visit https://web.archive.org/? Your ISP may be blocking your IP)",
                             "red",
                         )
                     )
                 else:
                     writerr(
                         colored(
-                            getSPACER(
-                                "[ ERR ] Unable to get links from Wayback Machine (archive.org): "
-                                + str(e)
-                            ),
+                            "Wayback - [ ERR ] Unable to get links from Wayback Machine (archive.org): "
+                            + str(e),
                             "red",
                         )
                     )
@@ -3798,27 +3706,29 @@ def getWaybackUrls():
             if totalPages < 0:
                 write(
                     colored(
-                        "Due to a change in Wayback Machine API, all URLs will be retrieved in one request and it is not possible to determine how long it will take, so please ignore this.",
+                        "Wayback - [ INFO ] Due to a change in Wayback Machine API, all URLs will be retrieved in one request and it is not possible to determine how long it will take, so please ignore this.",
                         "cyan",
                     )
                 )
             else:
                 checkWayback = totalPages
                 write(
-                    colored("Get URLs from Wayback Machine: ", "cyan")
+                    colored("Wayback - [ INFO ] Get URLs from Wayback Machine: ", "cyan")
                     + colored(str(checkWayback) + " requests", "white")
                 )
         else:
             if verbose():
                 write(
-                    colored("The archive URL requested to get links: ", "magenta")
+                    colored(
+                        "Wayback - [ INFO ] The archive URL requested to get links: ", "magenta"
+                    )
                     + colored(url + "\n", "white")
                 )
             if totalPages < 0:
                 write(
                     colored(
-                        "\rGetting links from Wayback Machine (archive.org) with one request (this can take a while for some domains)...\r",
+                        "Wayback - [ INFO ] Getting links from Wayback Machine (archive.org) with one request (this can take a while for some domains)...",
                         "cyan",
                     )
                 )
@@ -3828,9 +3738,9 @@ def getWaybackUrls():
                 # if the page number was found then display it, but otherwise we will just try to increment until we have everything
                 write(
                     colored(
-                        "\rGetting links from "
+                        "Wayback - [ INFO ] Getting links from "
                         + str(totalPages)
-                        + " Wayback Machine (archive.org) API requests (this can take a while for some domains)...\r",
+                        + " Wayback Machine (archive.org) API requests (this can take a while for some domains)...",
                         "cyan",
                     )
                 )
@@ -3854,25 +3764,22 @@ def getWaybackUrls():
             if verbose() and len(linkMimes) > 0:
                 linkMimes.discard("warc/revisit")
                 write(
-                    getSPACER(
-                        colored("MIME types found: ", "magenta")
-                        + colored(str(linkMimes), "white")
-                    )
+                    colored("Wayback - [ INFO ] MIME types found: ", "magenta")
+                    + colored(str(linkMimes), "white")
                     + "\n"
                 )
                 linkMimes = None
             if not args.xwm:
-                linkCount = len(linksFound)
+                linkCountWayback = len(linksFoundWayback)
                 write(
-                    getSPACER(
-                        colored(
-                            "Links found on Wayback Machine (archive.org): ", "cyan"
-                        )
-                        + colored(str(linkCount), "white")
+                    colored(
+                        "Wayback - [ INFO ] Links found on Wayback Machine (archive.org): ", "cyan"
                     )
-                    + "\n"
+                    + colored(str(linkCountWayback), "white")
                 )
+                linksFound.update(linksFoundWayback)
+                linksFoundWayback.clear()
     except Exception as e:
         writerr(colored("ERROR getWaybackUrls 1: " + str(e), "red"))
@@ -3882,13 +3789,13 @@ def processCommonCrawlCollection(cdxApiUrl):
     """
     Get URLs from a given Common Crawl index collection
     """
-    global subs, path, linksFound, linkMimes, stopSource, argsInput
+    global subs, path, linksFound, linkMimes, stopSourceCommonCrawl, argsInput, linkCountCommonCrawl, linksFoundCommonCrawl, current_response, current_session
     try:
         # Get memory in case it exceeds threshold
         getMemory()
-        if not stopSource:
+        if not stopSourceCommonCrawl:
             # Set mime content type filter
             if MATCH_MIME.strip() != "":
                 filterMIME = "&filter=~mime:("
@@ -3902,31 +3809,21 @@ def processCommonCrawlCollection(cdxApiUrl):
             # Set status code filter
             filterCode = ""
             if MATCH_CODE.strip() != "":
-                filterCode = (
-                    "&filter=~status:(" + re.escape(MATCH_CODE).replace(",", "|") + ")"
-                )
+                filterCode = "&filter=~status:(" + re.escape(MATCH_CODE).replace(",", "|") + ")"
             else:
-                filterCode = (
-                    "&filter=!~status:("
-                    + re.escape(FILTER_CODE).replace(",", "|")
-                    + ")"
-                )
+                filterCode = "&filter=!~status:(" + re.escape(FILTER_CODE).replace(",", "|") + ")"
             # Set keywords filter if -ko argument passed
             filterKeywords = ""
             if args.keywords_only:
                 if args.keywords_only == "#CONFIG":
                     filterKeywords = (
-                        "&filter=~url:.*("
-                        + re.escape(FILTER_KEYWORDS).replace(",", "|")
-                        + ").*"
+                        "&filter=~url:.*(" + re.escape(FILTER_KEYWORDS).replace(",", "|") + ").*"
                     )
                 else:
                     filterKeywords = "&filter=~url:.*(" + args.keywords_only + ").*"
-            commonCrawlUrl = (
-                cdxApiUrl + "?output=json&fl=timestamp,url,mime,status,digest&url="
-            )
+            commonCrawlUrl = cdxApiUrl + "?output=json&fl=timestamp,url,mime,status,digest&url="
             if args.filter_responses_only:
                 url = commonCrawlUrl + subs + quote(argsInput) + path
@@ -3947,25 +3844,26 @@ def processCommonCrawlCollection(cdxApiUrl):
                 session = requests.Session()
                 session.mount("https://", HTTP_ADAPTER_CC)
                 session.mount("http://", HTTP_ADAPTER_CC)
+                try:
+                    current_session = session
+                except Exception:
+                    pass
                 resp = session.get(url, stream=True, headers={"User-Agent": userAgent})
+                try:
+                    current_response = resp
+                except Exception:
+                    pass
             except ConnectionError:
                 writerr(
                     colored(
-                        getSPACER(
-                            "[ ERR ] Common Crawl connection error for index "
-                            + cdxApiUrl
-                        ),
+                        "CommonCrawl - [ ERR ] Connection error for index " + cdxApiUrl,
                         "red",
                     )
                 )
                 resp = None
                 return
             except Exception as e:
-                writerr(
-                    colored(
-                        getSPACER("[ ERR ] Error getting response - " + str(e)), "red"
-                    )
-                )
+                writerr(colored("CommonCrawl - [ ERR ] Error getting response - " + str(e), "red"))
                 resp = None
                 return
             finally:
@@ -3975,13 +3873,11 @@ def processCommonCrawlCollection(cdxApiUrl):
                         if resp.status_code == 429:
                             writerr(
                                 colored(
-                                    getSPACER(
-                                        "[ 429 ] Common Crawl rate limit reached, so stopping. Links that have already been retrieved will be saved."
-                                    ),
+                                    "CommonCrawl - [ 429 ] Rate limit reached, so stopping. Links that have already been retrieved will be saved.",
                                     "red",
                                 )
                             )
-                            stopSource = True
+                            stopSourceCommonCrawl = True
                             return
                         # If the response from commoncrawl.org says nothing was found...
                         if resp.text.lower().find("no captures found") > 0:
@@ -3992,11 +3888,7 @@ def processCommonCrawlCollection(cdxApiUrl):
                             if verbose():
                                 writerr(
                                     colored(
-                                        getSPACER(
-                                            "[ ERR ] "
-                                            + url
-                                            + " gave an empty response."
-                                        ),
+                                        "CommonCrawl - [ ERR ] " + url + " gave an empty response.",
                                         "red",
                                     )
                                 )
@@ -4006,12 +3898,10 @@ def processCommonCrawlCollection(cdxApiUrl):
                             if verbose():
                                 writerr(
                                     colored(
-                                        getSPACER(
-                                            "[ "
-                                            + str(resp.status_code)
-                                            + " ] Error for "
-                                            + cdxApiUrl
-                                        ),
+                                        "CommonCrawl - [ "
+                                        + str(resp.status_code)
+                                        + " ] Error for "
+                                        + cdxApiUrl,
                                         "red",
                                     )
                                 )
@@ -4020,27 +3910,71 @@ def processCommonCrawlCollection(cdxApiUrl):
                     pass
             # Get the URLs and MIME types
-            for line in resp.iter_lines():
-                results = line.decode("utf-8")
-                try:
-                    data = json.loads(results)
-                    # Get MIME Types if --verbose option was seletced
-                    if verbose():
-                        try:
-                            if data["mime"] != "":
-                                linkMimes.add(data["mime"])
-                        except Exception:
-                            pass
-                    linksFoundAdd(data["url"])
-                except Exception:
-                    if verbose():
-                        writerr(
-                            colored(
-                                "ERROR processCommonCrawlCollection 2: Cannot get URL and MIME type from line: "
-                                + str(line),
-                                "red",
+            try:
+                for line in resp.iter_lines():
+                    results = line.decode("utf-8")
+                    try:
+                        data = json.loads(results)
+                        # Get MIME Types if --verbose option was seletced
+                        if verbose():
+                            try:
+                                if data["mime"] != "":
+                                    linkMimes.add(data["mime"])
+                            except Exception:
+                                pass
+                        # If -from or -to were passed, check the timestamp of the URL.
+                        # Only continue if the URL falls within the date range specified
+                        if args.from_date is not None or args.to_date is not None:
+                            try:
+                                ts = data["timestamp"]
+                                # Normalize helper: pad/truncate date string to 14 digits (YYYYMMDDhhmmss)
+                                def normalize_date(d, is_from):
+                                    if d is None:
+                                        return None
+                                    d = d.strip()
+                                    # Pad to 14 digits: from_date pads with 0s, to_date with 9s
+                                    if is_from:
+                                        return (d + "0" * (14 - len(d)))[:14]
+                                    else:
+                                        return (d + "9" * (14 - len(d)))[:14]
+                                from_ts = normalize_date(args.from_date, True)
+                                to_ts = normalize_date(args.to_date, False)
+                                # Compare numerically
+                                if from_ts and ts < from_ts:
+                                    continue
+                                if to_ts and ts > to_ts:
+                                    continue
+                            except Exception:
+                                writerr(
+                                    colored(
+                                        "ERROR processCommonCrawlCollection 3: Cannot get timestamp from line {line}: {str(e)}",
+                                        "red",
+                                    )
+                                )
+                        linksFoundAdd(data["url"], linksFoundCommonCrawl)
+                    except Exception:
+                        if verbose():
+                            writerr(
+                                colored(
+                                    "ERROR processCommonCrawlCollection 2: Cannot get URL and MIME type from line: "
+                                    + str(line),
+                                    "red",
+                                )
                             )
-                        )
+            finally:
+                try:
+                    current_response = None
+                except Exception:
+                    pass
+                try:
+                    current_session = None
+                except Exception:
+                    pass
         else:
             pass
     except Exception as e:
@@ -4067,10 +4001,8 @@ def getCommonCrawlIndexes():
                 except Exception as e:
                     writerr(
                         colored(
-                            getSPACER(
-                                "[ ERR ] Couldn't delete local version of Common Crawl index file: "
-                                + str(e)
-                            ),
+                            "CommonCrawl - [ ERR ] Couldn't delete local version of Common Crawl index file: "
+                            + str(e),
                             "red",
                         )
                     )
@@ -4081,17 +4013,15 @@ def getCommonCrawlIndexes():
         if not createFile:
             # Read the indexes from the local file
             try:
-                with open(collinfoPath, "r") as file:
+                with open(collinfoPath) as file:
                     jsonResp = file.read()
                 file.close()
             except Exception as e:
                 createFile = True
                 writerr(
                     colored(
-                        getSPACER(
-                            "[ ERR ] Couldn't read local version of Common Crawl index file: "
-                            + str(e)
-                        ),
+                        "CommonCrawl - [ ERR ] Couldn't read local version of Common Crawl index file: "
+                        + str(e),
                         "red",
                     )
                 )
@@ -4104,15 +4034,11 @@ def getCommonCrawlIndexes():
                 session = requests.Session()
                 session.mount("https://", HTTP_ADAPTER_CC)
                 session.mount("http://", HTTP_ADAPTER_CC)
-                indexes = session.get(
-                    CCRAWL_INDEX_URL, headers={"User-Agent": userAgent}
-                )
+                indexes = session.get(CCRAWL_INDEX_URL, headers={"User-Agent": userAgent})
             except ConnectionError:
                 writerr(
                     colored(
-                        getSPACER(
-                            "[ ERR ] Common Crawl connection error getting Index file"
-                        ),
+                        "CommonCrawl - [ ERR ] Connection error getting Index file",
                         "red",
                     )
                 )
@@ -4120,10 +4046,8 @@ def getCommonCrawlIndexes():
             except Exception as e:
                 writerr(
                     colored(
-                        getSPACER(
-                            "[ ERR ] Error getting Common Crawl index collection - "
-                            + str(e)
-                        ),
+                        "CommonCrawl - [ ERR ] Error getting Common Crawl index collection - "
+                        + str(e),
                         "red",
                     )
                 )
@@ -4133,9 +4057,7 @@ def getCommonCrawlIndexes():
             if indexes.status_code == 429:
                 writerr(
                     colored(
-                        getSPACER(
-                            "[ 429 ] Common Crawl rate limit reached so unable to get links."
-                        ),
+                        "CommonCrawl - [ 429 ] Rate limit reached so unable to get links.",
                         "red",
                     )
                 )
@@ -4144,7 +4066,7 @@ def getCommonCrawlIndexes():
             elif indexes.status_code == 503:
                 writerr(
                     colored(
-                        getSPACER("[ 503 ] Common Crawl seems to be unavailable."),
+                        "CommonCrawl - [ 503 ] Common Crawl seems to be unavailable.",
                         "red",
                     )
                 )
@@ -4152,11 +4074,9 @@ def getCommonCrawlIndexes():
             elif indexes.status_code != 200:
                 writerr(
                     colored(
-                        getSPACER(
-                            "[ "
-                            + str(indexes.status_code)
-                            + " ] Common Crawl did not retrun the indexes file."
-                        ),
+                        "CommonCrawl - [ "
+                        + str(indexes.status_code)
+                        + " ] Common Crawl did not retrun the indexes file.",
                         "red",
                     )
                 )
@@ -4173,10 +4093,8 @@ def getCommonCrawlIndexes():
             except Exception as e:
                 writerr(
                     colored(
-                        getSPACER(
-                            "[ ERR ] Couldn't create local version of Common Crawl index file: "
-                            + str(e)
-                        ),
+                        "CommonCrawl - [ ERR ] Couldn't create local version of Common Crawl index file: "
+                        + str(e),
                         "red",
                     )
                 )
@@ -4187,26 +4105,40 @@ def getCommonCrawlIndexes():
         for values in json.loads(jsonResp):
             for key in values:
                 if key == "cdx-api":
-                    if args.lcy != 0:
+                    if args.from_date is not None or args.to_date is not None:
                         try:
                             indexYear = values[key].split("CC-MAIN-")[1][:4]
-                            if int(indexYear) >= args.lcy:
-                                cdxApiUrls.add(values[key])
+                            # Only get the indexes that fall within the date range specified
+                            if args.from_date is not None:
+                                fromYear = int(args.from_date[:4])
+                                # There are a few exceptions with the filename format at the start of Common Crawl indexes where it contains 2 years, so deal with those (e.g. CC-MAIN-2009-2010-index and CC-MAIN-2008-2009-index)
+                                if fromYear in (2009, 2010):
+                                    fromYear = fromYear - 1
+                                if int(indexYear) < fromYear:
+                                    continue
+                            if args.to_date is not None:
+                                toYear = int(args.to_date[:4])
+                                if int(indexYear) > toYear:
+                                    continue
+                            # If it passed the date range checks then add the index URL
+                            cdxApiUrls.add(values[key])
+                            collection = collection + 1
                         except Exception as e:
                             writerr(
                                 colored(
-                                    getSPACER(
-                                        "[ ERR ] Failed to get the year from index name "
-                                        + values[key]
-                                        + " - "
-                                        + str(e)
-                                    ),
+                                    "CommonCrawl - [ ERR ] Failed to get the year from index name "
+                                    + values[key]
+                                    + " - "
+                                    + str(e),
                                     "red",
                                 )
                             )
                     else:
                         cdxApiUrls.add(values[key])
-            collection = collection + 1
+                        collection = collection + 1
+            # Only get the most recent number of indexes specified by -lcc argument
             if collection == args.lcc:
                 break
@@ -4220,12 +4152,11 @@ def getCommonCrawlUrls():
     """
     Get all Common Crawl index collections to get all URLs from each one
     """
-    global linksFound, linkMimes, waymorePath, subs, path, stopSource, argsInput, checkCommonCrawl
+    global linksFound, linkMimes, waymorePath, subs, path, stopSourceCommonCrawl, argsInput, checkCommonCrawl, linkCountCommonCrawl, linksFoundCommonCrawl
     try:
-        stopSource = False
-        linkMimes = set()
-        originalLinkCount = len(linksFound)
+        stopSourceCommonCrawl = False
+        linksFoundCommonCrawl = set()
         # Set mime content type filter
         if MATCH_MIME.strip() != "":
@@ -4240,13 +4171,9 @@ def getCommonCrawlUrls():
         # Set status code filter
         filterCode = ""
         if MATCH_CODE.strip() != "":
-            filterCode = (
-                "&filter=~status:(" + re.escape(MATCH_CODE).replace(",", "|") + ")"
-            )
+            filterCode = "&filter=~status:(" + re.escape(MATCH_CODE).replace(",", "|") + ")"
         else:
-            filterCode = (
-                "&filter=!~status:(" + re.escape(FILTER_CODE).replace(",", "|") + ")"
-            )
+            filterCode = "&filter=!~status:(" + re.escape(FILTER_CODE).replace(",", "|") + ")"
         if verbose():
             if args.filter_responses_only:
@@ -4267,7 +4194,7 @@ def getCommonCrawlUrls():
                 )
             write(
                 colored(
-                    "The commoncrawl index URL requested to get links (where {CDX-API-URL} is from "
+                    "CommonCrawl - [ INFO ] The index URL requested to get links (where {CDX-API-URL} is from "
                     + CCRAWL_INDEX_URL
                     + "): ",
                     "magenta",
@@ -4276,9 +4203,7 @@ def getCommonCrawlUrls():
             )
         if not args.check_only:
-            write(
-                colored("\rGetting commoncrawl.org index collections list...\r", "cyan")
-            )
+            write(colored("CommonCrawl - [ INFO ] Getting index collections list...", "cyan"))
         # Get the Common Crawl index collections
         cdxApiUrls = getCommonCrawlIndexes()
@@ -4291,15 +4216,15 @@ def getCommonCrawlUrls():
                 else:
                     checkCommonCrawl = len(cdxApiUrls) + 1
                 write(
-                    colored("Get URLs from Common Crawl: ", "cyan")
+                    colored("CommonCrawl - [ INFO ] Get URLs from Common Crawl: ", "cyan")
                     + colored(str(checkCommonCrawl) + " requests", "white")
                 )
             else:
                 write(
                     colored(
-                        "\rGetting links from the latest "
+                        "CommonCrawl - [ INFO ] Getting links from the latest "
                         + str(len(cdxApiUrls))
-                        + " commoncrawl.org index collections (this can take a while for some domains)...\r",
+                        + " commoncrawl.org index collections (this can take a while for some domains)...",
                         "cyan",
                     )
                 )
@@ -4315,30 +4240,18 @@ def getCommonCrawlUrls():
                 if verbose() and len(linkMimes) > 0:
                     linkMimes.discard("warc/revisit")
                     write(
-                        getSPACER(
-                            colored("MIME types found: ", "magenta")
-                            + colored(str(linkMimes), "white")
-                        )
+                        colored("CommonCrawl - [ INFO ] MIME types found: ", "magenta")
+                        + colored(str(linkMimes), "white")
                         + "\n"
                     )
-                linkCount = len(linksFound) - originalLinkCount
-                if args.xwm:
-                    write(
-                        getSPACER(
-                            colored("Links found on commoncrawl.org: ", "cyan")
-                            + colored(str(linkCount), "white")
-                        )
-                        + "\n"
-                    )
-                else:
-                    write(
-                        getSPACER(
-                            colored("Extra links found on commoncrawl.org: ", "cyan")
-                            + colored(str(linkCount), "white")
-                        )
-                        + "\n"
-                    )
+                linkCountCommonCrawl = len(linksFoundCommonCrawl)
+                write(
+                    colored("CommonCrawl - [ INFO ] Links found on commoncrawl.org: ", "cyan")
+                    + colored(str(linkCountCommonCrawl), "white")
+                )
+                linksFound.update(linksFoundCommonCrawl)
+                linksFoundCommonCrawl.clear()
     except Exception as e:
         writerr(colored("ERROR getCommonCrawlUrls 1: " + str(e), "red"))
@@ -4348,7 +4261,7 @@ def processVirusTotalUrl(url):
     """
     Process a specific URL from virustotal.com to determine whether to save the link
     """
-    global argsInput, argsInputHostname
+    global argsInput, argsInputHostname, linkCountVirusTotal, linksFoundVirusTotal
     addLink = True
@@ -4394,9 +4307,7 @@ def processVirusTotalUrl(url):
                             flags=re.IGNORECASE,
                         )
                     else:
-                        match = re.search(
-                            r"(" + args.keywords_only + ")", url, flags=re.IGNORECASE
-                        )
+                        match = re.search(r"(" + args.keywords_only + ")", url, flags=re.IGNORECASE)
                     if match is None:
                         addLink = False
@@ -4417,7 +4328,7 @@ def processVirusTotalUrl(url):
                 flags=re.IGNORECASE,
             )
             if match is not None:
-                linksFoundAdd(url)
+                linksFoundAdd(url, linksFoundVirusTotal)
     except Exception as e:
         writerr(colored("ERROR processVirusTotalUrl 1: " + str(e), "red"))
@@ -4425,58 +4336,50 @@ def processVirusTotalUrl(url):
 def getVirusTotalUrls():
     """
-    Get URLs from the VirusTotal API v2
+    Get URLs from the VirusTotal API v2 and process them.
+    Each URL is normalized as (url, scan_date) tuple. Dates are filtered according to args.from_date / args.to_date.
     """
-    global VIRUSTOTAL_API_KEY, linksFound, linkMimes, waymorePath, subs, stopProgram, stopSource, argsInput, checkVirusTotal, argsInputHostname
+    global VIRUSTOTAL_API_KEY, linksFound, linkMimes, waymorePath, subs, stopProgram, stopSourceVirusTotal, argsInput, checkVirusTotal, argsInputHostname, linkCountVirusTotal, linksFoundVirusTotal
-    # Write the file of URL's for the passed domain/URL
     try:
-        requestsMade = 0
-        stopSource = False
-        linkMimes = set()
-        originalLinkCount = len(linksFound)
+        stopSourceVirusTotal = False
+        linksFoundVirusTotal = set()
-        # Just pass the hostname in the URL
+        # Build the VirusTotal API URL
         url = VIRUSTOTAL_URL.replace("{DOMAIN}", quote(argsInputHostname)).replace(
             "{APIKEY}", VIRUSTOTAL_API_KEY
         )
         if verbose():
             write(
-                colored("The VirusTotal URL requested to get links: ", "magenta")
+                colored("VirusTotal - [ INFO ] The URL requested to get links: ", "magenta")
                 + colored(url + "\n", "white")
             )
         if not args.check_only:
-            write(colored("\rGetting links from virustotal.com API...\r", "cyan"))
+            write(colored("VirusTotal - [ INFO ] Getting links from virustotal.com API...", "cyan"))
-        # Get the domain report from virustotal
+        # Make request
         try:
-            # Choose a random user agent string to use for any requests
             userAgent = random.choice(USER_AGENT)
             session = requests.Session()
             session.mount("https://", HTTP_ADAPTER)
             session.mount("http://", HTTP_ADAPTER)
             resp = session.get(url, headers={"User-Agent": userAgent})
-            requestsMade = requestsMade + 1
         except Exception as e:
-            write(
+            writerr(
                 colored(
-                    getSPACER(
-                        "[ ERR ] Unable to get links from virustotal.com: " + str(e)
-                    ),
+                    "VirusTotal - [ ERR ] Unable to get links from virustotal.com: " + str(e),
                     "red",
                 )
             )
             return
-        # Deal with any errors
+        # Handle HTTP errors
         if resp.status_code == 429:
             writerr(
                 colored(
-                    getSPACER(
-                        "[ 429 ] VirusTotal rate limit reached so unable to get links."
-                    ),
+                    "VirusTotal - [ 429 ] Rate limit reached so unable to get links.",
                     "red",
                 )
             )
@@ -4484,9 +4387,7 @@ def getVirusTotalUrls():
         elif resp.status_code == 403:
             writerr(
                 colored(
-                    getSPACER(
-                        "[ 403 ] VirusTotal: Permission denied. Check your API key is correct."
-                    ),
+                    "VirusTotal - [ 403 ] Permission denied. Check your API key is correct.",
                     "red",
                 )
             )
@@ -4494,101 +4395,94 @@ def getVirusTotalUrls():
         elif resp.status_code != 200:
             writerr(
                 colored(
-                    getSPACER(
-                        "[ "
-                        + str(resp.status_code)
-                        + " ] Unable to get links from virustotal.com"
-                    ),
+                    "VirusTotal - [ ERR ] [ "
+                    + str(resp.status_code)
+                    + " ] Unable to get links from virustotal.com",
                     "red",
                 )
             )
             return
-        # Get the JSON response
+        # Parse JSON
         try:
             jsonResp = json.loads(resp.text.strip())
-            # Get the different URLs
+            # Normalize arrays as (url, scan_date) tuples
             if args.no_subs:
-                subDomains = []
+                subdomains = []
             else:
-                try:
-                    subDomains = jsonResp["subdomains"]
-                except Exception:
-                    subDomains = []
-            try:
-                detectedUrls = [
-                    entry["url"] for entry in jsonResp.get("detected_urls", [])
-                ]
-            except Exception:
-                detectedUrls = []
-            try:
-                undetectedUrls = [
-                    entry[0] for entry in jsonResp.get("undetected_urls", [])
-                ]
-            except Exception:
-                undetectedUrls = []
-            try:
-                totalUrls = set(subDomains + detectedUrls + undetectedUrls)
-            except Exception:
-                totalUrls = []
-        except Exception:
+                subdomains = [(sd, None) for sd in jsonResp.get("subdomains", [])]
+            detected_urls = [
+                (entry.get("url"), entry.get("scan_date"))
+                for entry in jsonResp.get("detected_urls", [])
+            ]
+            undetected_urls = [
+                (entry[0], entry[4]) for entry in jsonResp.get("undetected_urls", [])
+            ]
+            # Combine all
+            all_urls = subdomains + detected_urls + undetected_urls
+        except Exception as e:
             writerr(
                 colored(
-                    getSPACER(
-                        "[ ERR ] There was an unexpected response from the VirusTotal API"
-                    ),
+                    "VirusTotal - [ ERR ] Unexpected response from the VirusTotal API: " + str(e),
                     "red",
                 )
             )
-            totalUrls = []
+            all_urls = []
+        # Check only mode
         if args.check_only:
             write(
-                colored("Get URLs from VirusTotal: ", "cyan")
+                colored("VirusTotal - [ INFO ] Get URLs from VirusTotal: ", "cyan")
                 + colored("1 request", "white")
             )
             checkVirusTotal = 1
         else:
-            # Carry on if something was found
-            for vturl in totalUrls:
-                if stopSource:
+            # Process each URL tuple
+            for url, scan_date in all_urls:
+                if stopSourceVirusTotal:
                     break
-                # Get memory in case it exceeds threshold
                 getMemory()
-                # Work out whether to include it
-                processVirusTotalUrl(vturl)
-            linkCount = len(linksFound) - originalLinkCount
-            if args.xwm and args.xcc and args.xav and args.xus:
-                write(
-                    getSPACER(
-                        colored("Links found on virustotal.com: ", "cyan")
-                        + colored(str(linkCount), "white")
-                    )
-                    + "\n"
-                )
-            else:
-                write(
-                    getSPACER(
-                        colored("Extra links found on virustotal.com: ", "cyan")
-                        + colored(str(linkCount), "white")
-                    )
-                    + "\n"
-                )
+                # Filter by date if -from or -to was passed and we have a date for the url
+                if scan_date and (args.from_date is not None or args.to_date is not None):
+                    urlDate = datetime.strptime(scan_date, "%Y-%m-%d %H:%M:%S")
+                    # If from date passed, check
+                    if args.from_date is not None:
+                        fromDate = parseDateArg(args.from_date)
+                        if urlDate < fromDate:
+                            continue
+                    # If to date passed, check
+                    if args.to_date is not None:
+                        toDate = parseDateArg(args.to_date)
+                        if urlDate >= toDate:
+                            continue
+                # Process URL
+                processVirusTotalUrl(url)
+            # Show links found
+            linkCountVirusTotal = len(linksFoundVirusTotal)
+            write(
+                colored("VirusTotal - [ INFO ] Links found on virustotal.com: ", "cyan")
+                + colored(str(linkCountVirusTotal), "white")
+            )
+            linksFound.update(linksFoundVirusTotal)
+            linksFoundVirusTotal.clear()
     except Exception as e:
-        writerr(colored("ERROR getVirusTotalUrls 1: " + str(e), "red"))
+        writerr(colored(f"ERROR getVirusTotalUrls: {e}", "red"))
 def processIntelxUrl(url):
     """
     Process a specific URL from intelx.io to determine whether to save the link
     """
-    global argsInput, argsInputHostname
+    global argsInput, argsInputHostname, linkCountIntelx, linksFoundIntelx
     addLink = True
@@ -4634,15 +4528,13 @@ def processIntelxUrl(url):
                             flags=re.IGNORECASE,
                         )
                     else:
-                        match = re.search(
-                            r"(" + args.keywords_only + ")", url, flags=re.IGNORECASE
-                        )
+                        match = re.search(r"(" + args.keywords_only + ")", url, flags=re.IGNORECASE)
                     if match is None:
                         addLink = False
         # Add link if it passed filters
         if addLink:
-            linksFoundAdd(url)
+            linksFoundAdd(url, linksFoundIntelx)
     except Exception as e:
         writerr(colored("ERROR processIntelxUrl 1: " + str(e), "red"))
@@ -4653,6 +4545,7 @@ def processIntelxType(target, credits):
     target: 1 - Domains
     target: 3 - URLs
     """
+    global intelxAPIIssue
     try:
         try:
             requestsMade = 0
@@ -4665,18 +4558,14 @@ def processIntelxType(target, credits):
             # Pass the API key in the X-Key header too.
             resp = session.post(
                 INTELX_SEARCH_URL,
-                data='{"term":"'
-                + quote(argsInputHostname)
-                + '","target":'
-                + str(target)
-                + "}",
+                data='{"term":"' + quote(argsInputHostname) + '","target":' + str(target) + "}",
                 headers={"User-Agent": userAgent, "X-Key": INTELX_API_KEY},
             )
             requestsMade = requestsMade + 1
         except Exception as e:
             write(
                 colored(
-                    getSPACER("[ ERR ] Unable to get links from intelx.io: " + str(e)),
+                    "IntelX - [ ERR ] Unable to get links from intelx.io: " + str(e),
                     "red",
                 )
             )
@@ -4684,53 +4573,47 @@ def processIntelxType(target, credits):
         # Deal with any errors
         if resp.status_code == 429:
+            intelxAPIIssue = True
             writerr(
                 colored(
-                    getSPACER(
-                        "[ 429 ] IntelX rate limit reached so unable to get links."
-                    ),
+                    "IntelX - [ 429 ] Rate limit reached so unable to get links.",
                     "red",
                 )
             )
             return
         elif resp.status_code == 401:
+            intelxAPIIssue = True
             writerr(
                 colored(
-                    getSPACER(
-                        "[ 401 ] IntelX: Not authorized. The source requires a paid API key. Check your API key is correct."
-                    ),
+                    "IntelX - [ 401 ] Not authorized. The source requires a paid API key. Check your API key is correct.",
                     "red",
                 )
             )
             return
         elif resp.status_code == 402:
+            intelxAPIIssue = True
             if credits.startswith("0/"):
                 writerr(
                     colored(
-                        getSPACER(
-                            "[ 402 ] IntelX: You have run out of daily credits on Intelx ("
-                            + credits
-                            + ")."
-                        ),
+                        "IntelX - [ 402 ] You have run out of daily credits on Intelx ("
+                        + credits
+                        + ").",
                         "red",
                     )
                 )
             else:
                 writerr(
                     colored(
-                        getSPACER(
-                            "[ 402 ] IntelX: It appears you have run out of daily credits on Intelx."
-                        ),
+                        "IntelX - [ 402 ] It appears you have run out of daily credits on Intelx.",
                         "red",
                     )
                 )
             return
         elif resp.status_code == 403:
+            intelxAPIIssue = True
             writerr(
                 colored(
-                    getSPACER(
-                        "[ 403 ] IntelX: Permission denied. Check your API key is correct."
-                    ),
+                    "IntelX - [ 403 ] Permission denied. Check your API key is correct.",
                     "red",
                 )
             )
@@ -4738,11 +4621,7 @@ def processIntelxType(target, credits):
         elif resp.status_code != 200:
             writerr(
                 colored(
-                    getSPACER(
-                        "[ "
-                        + str(resp.status_code)
-                        + " ] Unable to get links from intelx.io"
-                    ),
+                    "IntelX - [ " + str(resp.status_code) + " ] Unable to get links from intelx.io",
                     "red",
                 )
             )
@@ -4755,9 +4634,7 @@ def processIntelxType(target, credits):
         except Exception:
             writerr(
                 colored(
-                    getSPACER(
-                        "[ ERR ] There was an unexpected response from the Intelligence API"
-                    ),
+                    "IntelX - [ ERR ] There was an unexpected response from the Intelligence API",
                     "red",
                 )
             )
@@ -4767,7 +4644,7 @@ def processIntelxType(target, credits):
         moreResults = True
         status = 0
         while moreResults:
-            if stopSource:
+            if stopSourceIntelx:
                 break
             try:
                 resp = session.get(
@@ -4778,9 +4655,7 @@ def processIntelxType(target, credits):
             except Exception as e:
                 write(
                     colored(
-                        getSPACER(
-                            "[ ERR ] Unable to get links from intelx.io: " + str(e)
-                        ),
+                        "IntelX - [ ERR ] Unable to get links from intelx.io: " + str(e),
                         "red",
                     )
                 )
@@ -4793,9 +4668,7 @@ def processIntelxType(target, credits):
             except Exception:
                 writerr(
                     colored(
-                        getSPACER(
-                            "[ ERR ] There was an unexpected response from the Intelligence API"
-                        ),
+                        "IntelX - [ ERR ] There was an unexpected response from the Intelligence API",
                         "red",
                     )
                 )
@@ -4817,7 +4690,7 @@ def processIntelxType(target, credits):
             # Work out whether to include each url
             unique_values = list(set(selector_values + selector_valuesh))
             for ixurl in unique_values:
-                if stopSource:
+                if stopSourceIntelx:
                     break
                 processIntelxUrl(ixurl)
@@ -4845,14 +4718,10 @@ def getIntelxAccountInfo() -> str:
         )
         jsonResp = json.loads(resp.text.strip())
         credits = str(
-            jsonResp.get("paths", {})
-            .get("/phonebook/search", {})
-            .get("Credit", "Unknown")
+            jsonResp.get("paths", {}).get("/phonebook/search", {}).get("Credit", "Unknown")
         )
         credits_max = str(
-            jsonResp.get("paths", {})
-            .get("/phonebook/search", {})
-            .get("CreditMax", "Unknown")
+            jsonResp.get("paths", {}).get("/phonebook/search", {}).get("CreditMax", "Unknown")
         )
         return credits + "/" + credits_max
     except Exception:
@@ -4863,25 +4732,26 @@ def getIntelxUrls():
     """
     Get URLs from the Intelligence X Phonebook search
     """
-    global INTELX_API_KEY, linksFound, waymorePath, subs, stopProgram, stopSource, argsInput, checkIntelx, argsInputHostname
+    global INTELX_API_KEY, linksFound, waymorePath, subs, stopProgram, stopSourceIntelx, argsInput, checkIntelx, argsInputHostname, intelxAPIIssue, linkCountIntelx
     # Write the file of URL's for the passed domain/URL
     try:
         if args.check_only:
             write(
-                colored("Get URLs from Intelligence X: ", "cyan")
+                colored("IntelX - [ INFO ] Get URLs from Intelligence X: ", "cyan")
                 + colored("minimum 4 requests", "white")
             )
             checkIntelx = 4
             return
-        stopSource = False
-        originalLinkCount = len(linksFound)
+        stopSourceIntelx = False
+        linksFoundIntelx = set()
         credits = getIntelxAccountInfo()
         if verbose():
             write(
                 colored(
-                    "The Intelligence X URL requested to get links (Credits: "
+                    "IntelX - [ INFO ] The Intelligence X URL requested to get links (Credits: "
                     + credits
                     + "): ",
                     "magenta",
@@ -4890,32 +4760,23 @@ def getIntelxUrls():
             )
         if not args.check_only:
-            write(colored("\rGetting links from intelx.io API...\r", "cyan"))
+            write(colored("IntelX - [ INFO ] Getting links from intelx.io API...", "cyan"))
         # Get the domains from Intelligence X if the --no-subs wasn't passed
         if not args.no_subs:
             processIntelxType(1, credits)
         # Get the URLs from Intelligence X
-        processIntelxType(3, credits)
+        if not intelxAPIIssue:
+            processIntelxType(3, credits)
-        linkCount = len(linksFound) - originalLinkCount
-        if args.xwm and args.xcc and args.xav and args.xus and args.xvt:
-            write(
-                getSPACER(
-                    colored("Links found on intelx.io: ", "cyan")
-                    + colored(str(linkCount), "white")
-                )
-                + "\n"
-            )
-        else:
-            write(
-                getSPACER(
-                    colored("Extra links found on intelx.io: ", "cyan")
-                    + colored(str(linkCount), "white")
-                )
-                + "\n"
-            )
+        linkCountIntelx = len(linksFoundIntelx)
+        write(
+            colored("IntelX - [ INFO ] Links found on intelx.io: ", "cyan")
+            + colored(str(linkCountIntelx), "white")
+        )
+        linksFound.update(linksFoundIntelx)
+        linksFoundIntelx.clear()
     except Exception as e:
         writerr(colored("ERROR getIntelxUrls 1: " + str(e), "red"))
@@ -4968,27 +4829,23 @@ def processResponsesURLScan():
                 indexPath = responseOutputDirectory + "waymore_index.txt"
             except Exception as e:
                 if verbose():
-                    writerr(
-                        colored("ERROR processResponsesURLScan 4: " + str(e), "red")
-                    )
+                    writerr(colored("ERROR processResponsesURLScan 4: " + str(e), "red"))
         # Get URLs from URLScan.io if the DOM ID's haven't been retrieved yet
-        if args.mode == "R" and stopProgram is None and not args.check_only:
-            write(
-                colored(
-                    "\rGetting list of response links (this can take a while for some domains)...\r",
-                    "cyan",
+        if stopProgram is None and not args.check_only:
+            if args.mode in ("R", "B"):
+                write(
+                    colored(
+                        "URLScan - [ INFO ] Getting list of response links (this can take a while for some domains)...",
+                        "cyan",
+                    )
                 )
-            )
-            getURLScanUrls()
+            if args.mode == "R":
+                getURLScanUrls()
         # Check if a continueResp.URLScan.tmp and responses.URLScan.tmp files exists
         runPrevious = "n"
-        if (
-            not args.check_only
-            and os.path.exists(continuePath)
-            and os.path.exists(responsesPath)
-        ):
+        if not args.check_only and os.path.exists(continuePath) and os.path.exists(responsesPath):
             # Load the links into the set
             with open(responsesPath, "rb") as fl:
@@ -4997,7 +4854,7 @@ def processResponsesURLScan():
             # Get the previous end position to start again at this point
             try:
-                with open(continuePath, "r") as fc:
+                with open(continuePath) as fc:
                     successCount = int(fc.readline().strip())
             except Exception:
                 successCount = 0
@@ -5082,25 +4939,6 @@ def processResponsesURLScan():
                     "green",
                 )
             )
-            # if args.limit == 5000 and totalResponses == 5000:
-            #     writerr(colored('Downloading archived responses: ','cyan')+colored(str(totalResponses+1)+' requests (the --limit argument defaults to '+str(DEFAULT_LIMIT)+')','cyan'))
-            # else:
-            #     writerr(colored('Downloading archived responses: ','cyan')+colored(str(totalResponses+1)+' requests','white'))
-            # minutes = round(totalResponses*2.5 // 60)
-            # hours = minutes // 60
-            # days = hours // 24
-            # if minutes < 5:
-            #     write(colored('\n-> Downloading the responses (depending on their size) should be quite quick!','green'))
-            # elif hours < 2:
-            #     write(colored('\n-> Downloading the responses (depending on their size) could take more than '+str(minutes)+' minutes.','green'))
-            # elif hours < 6:
-            #     write(colored('\n-> Downloading the responses (depending on their size) could take more than '+str(hours)+' hours.','green'))
-            # elif hours < 24:
-            #     write(colored('\n-> Downloading the responses (depending on their size) could take more than '+str(hours)+' hours.','yellow'))
-            # elif days < 7:
-            #     write(colored('\n-> Downloading the responses (depending on their size) could take more than '+str(days)+' days. Consider using arguments -ko, -l, -ci, -from and -to wisely! ','red'))
-            # else:
-            #     write(colored('\n-> Downloading the responses (depending on their size) could take more than '+str(days)+' days!!! Consider using arguments -ko, -l, -ci, -from and -to wisely!','red'))
             write("")
         else:
             # If the limit has been set over the default, give a warning that this could take a long time!
@@ -5162,7 +5000,7 @@ def processResponsesURLScan():
                 if failureCount > 0:
                     if verbose():
                         write(
-                            colored("\nURLScan responses saved to ", "cyan")
+                            colored("URLScan - [ INFO ] Responses saved to ", "cyan")
                             + colored(responseOutputDirectory, "white")
                             + colored(" for " + subs + argsInput + ": ", "cyan")
                             + colored(
@@ -5177,10 +5015,7 @@ def processResponsesURLScan():
                     else:
                         write(
                             colored(
-                                "\nURLScan responses saved for "
-                                + subs
-                                + argsInput
-                                + ": ",
+                                "URLScan - [ INFO ] Responses saved for " + subs + argsInput + ": ",
                                 "cyan",
                             )
                             + colored(
@@ -5195,7 +5030,10 @@ def processResponsesURLScan():
                 else:
                     if verbose():
                         write(
-                            colored("\nURLScan responses saved to ", "cyan")
+                            colored(
+                                "URLScan - [ INFO ] Responses saved for " + subs + argsInput + ": ",
+                                "cyan",
+                            )
                             + colored(responseOutputDirectory, "white")
                             + colored(" for " + subs + argsInput + ": ", "cyan")
                             + colored(
@@ -5209,10 +5047,7 @@ def processResponsesURLScan():
                     else:
                         write(
                             colored(
-                                "\nURLScan responses saved for "
-                                + subs
-                                + argsInput
-                                + ": ",
+                                "URLScan - [ INFO ] Responses saved for " + subs + argsInput + ": ",
                                 "cyan",
                             )
                             + colored(
@@ -5225,9 +5060,7 @@ def processResponsesURLScan():
                         )
             except Exception as e:
                 if verbose():
-                    writerr(
-                        colored("ERROR processResponsesURLScan 5: " + str(e), "red")
-                    )
+                    writerr(colored("ERROR processResponsesURLScan 5: " + str(e), "red"))
         totalFileCount = totalFileCount + fileCount
     except Exception as e:
@@ -5240,7 +5073,7 @@ def processResponsesWayback():
     """
     Get archived responses from Wayback Machine (archive.org)
     """
-    global linksFound, subs, path, indexFile, totalResponses, stopProgram, argsInput, continueRespFile, successCount, fileCount, DEFAULT_OUTPUT_DIR, responseOutputDirectory, failureCount, totalFileCount
+    global linksFound, subs, path, indexFile, totalResponses, stopProgram, argsInput, continueRespFile, successCount, fileCount, DEFAULT_OUTPUT_DIR, responseOutputDirectory, failureCount, totalFileCount, current_response, current_session
     try:
         fileCount = 0
         failureCount = 0
@@ -5255,17 +5088,11 @@ def processResponsesWayback():
                 indexPath = responseOutputDirectory + "waymore_index.txt"
             except Exception as e:
                 if verbose():
-                    writerr(
-                        colored("ERROR processResponsesWayback 4: " + str(e), "red")
-                    )
+                    writerr(colored("ERROR processResponsesWayback 4: " + str(e), "red"))
         # Check if a continueResp.tmp and responses.tmp files exists
         runPrevious = "n"
-        if (
-            not args.check_only
-            and os.path.exists(continuePath)
-            and os.path.exists(responsesPath)
-        ):
+        if not args.check_only and os.path.exists(continuePath) and os.path.exists(responsesPath):
             # Load the links into the set
             with open(responsesPath, "rb") as fl:
@@ -5274,7 +5101,7 @@ def processResponsesWayback():
             # Get the previous end position to start again at this point
             try:
-                with open(continuePath, "r") as fc:
+                with open(continuePath) as fc:
                     successCount = int(fc.readline().strip())
             except Exception:
                 successCount = 0
@@ -5349,9 +5176,7 @@ def processResponsesWayback():
             # Set mime content type filter
             filterMIME = ""
             if MATCH_MIME.strip() != "":
-                filterMIME = "&filter=mimetype:" + re.escape(MATCH_MIME).replace(
-                    ",", "|"
-                )
+                filterMIME = "&filter=mimetype:" + re.escape(MATCH_MIME).replace(",", "|")
             else:
                 filterMIME = "&filter=!mimetype:warc/revisit"
                 filterMIME = filterMIME + "|" + re.escape(FILTER_MIME).replace(",", "|")
@@ -5359,13 +5184,9 @@ def processResponsesWayback():
             # Set status code filter
             filterCode = ""
             if MATCH_CODE.strip() != "":
-                filterCode = "&filter=statuscode:" + re.escape(MATCH_CODE).replace(
-                    ",", "|"
-                )
+                filterCode = "&filter=statuscode:" + re.escape(MATCH_CODE).replace(",", "|")
             else:
-                filterCode = "&filter=!statuscode:" + re.escape(FILTER_CODE).replace(
-                    ",", "|"
-                )
+                filterCode = "&filter=!statuscode:" + re.escape(FILTER_CODE).replace(",", "|")
             # Set the collapse parameter value in the archive.org URL. From the Wayback API docs:
             # "A new form of filtering is the option to 'collapse' results based on a field, or a substring of a field.
@@ -5377,9 +5198,7 @@ def processResponsesWayback():
                 collapse = "&collapse=timestamp:10"
             elif args.capture_interval == "d":  # get at most 1 capture per URL per day
                 collapse = "&collapse=timestamp:8"
-            elif (
-                args.capture_interval == "m"
-            ):  # get at most 1 capture per URL per month
+            elif args.capture_interval == "m":  # get at most 1 capture per URL per month
                 collapse = "&collapse=timestamp:6"
             url = (
@@ -5397,18 +5216,18 @@ def processResponsesWayback():
             if verbose():
                 write(
                     colored(
-                        "The Wayback Machine URL requested to get responses: ",
+                        "Wayback - [ INFO ] The URL requested to get responses: ",
                         "magenta",
                     )
                     + colored(url + "\n", "white")
                 )
             if args.check_only:
-                write(colored("\rChecking archived response requests...\r", "cyan"))
+                write(colored("Wayback - [ INFO ] Checking archived response requests...", "cyan"))
             else:
                 write(
                     colored(
-                        "\rGetting list of response links (this can take a while for some domains)...\r",
+                        "Wayback - [ INFO ] Getting list of response links (this can take a while for some domains)...",
                         "cyan",
                     )
                 )
@@ -5421,18 +5240,24 @@ def processResponsesWayback():
                 session = requests.Session()
                 session.mount("https://", HTTP_ADAPTER)
                 session.mount("http://", HTTP_ADAPTER)
+                try:
+                    current_session = session
+                except Exception:
+                    pass
                 resp = session.get(
                     url,
                     stream=True,
                     headers={"User-Agent": userAgent},
                     timeout=args.timeout,
                 )
+                try:
+                    current_response = resp
+                except Exception:
+                    pass
             except ConnectionError:
                 writerr(
                     colored(
-                        getSPACER(
-                            "[ ERR ] Wayback Machine (archive.org) connection error"
-                        ),
+                        getSPACER("Wayback - [ ERR ] Connection error"),
                         "red",
                     )
                 )
@@ -5442,7 +5267,7 @@ def processResponsesWayback():
             except Exception as e:
                 writerr(
                     colored(
-                        getSPACER("[ ERR ] Couldn't get list of responses: " + str(e)),
+                        getSPACER("Wayback - [ ERR ] Couldn't get list of responses: " + str(e)),
                         "red",
                     )
                 )
@@ -5457,7 +5282,7 @@ def processResponsesWayback():
                             writerr(
                                 colored(
                                     getSPACER(
-                                        "No archived responses were found on Wayback Machine (archive.org) for the given search parameters."
+                                        "Wayback - [ ERR ] No archived responses were found on Wayback Machine (archive.org) for the given search parameters."
                                     ),
                                     "red",
                                 )
@@ -5468,7 +5293,7 @@ def processResponsesWayback():
                             writerr(
                                 colored(
                                     getSPACER(
-                                        "[ 429 ] Wayback Machine (archive.org) rate limit reached, so stopping. Links that have already been retrieved will be saved."
+                                        "Wayback - [ 429 ] Wayback Machine (archive.org) rate limit reached, so stopping. Links that have already been retrieved will be saved."
                                     ),
                                     "red",
                                 )
@@ -5479,7 +5304,7 @@ def processResponsesWayback():
                             writerr(
                                 colored(
                                     getSPACER(
-                                        "[ 503 ] Wayback Machine (archive.org) is currently unavailable. It may be down for maintenance. You can check https://web.archive.org/cdx/ to verify."
+                                        "Wayback - [ 503 ] Wayback Machine (archive.org) is currently unavailable. It may be down for maintenance. You can check https://web.archive.org/cdx/ to verify."
                                     ),
                                     "red",
                                 )
@@ -5491,7 +5316,7 @@ def processResponsesWayback():
                                 writerr(
                                     colored(
                                         getSPACER(
-                                            "[ "
+                                            "Wayback - [ "
                                             + str(resp.status_code)
                                             + " ] Error for "
                                             + url
@@ -5506,7 +5331,7 @@ def processResponsesWayback():
                                 writerr(
                                     colored(
                                         getSPACER(
-                                            "Failed to get links from Wayback Machine (archive.org) - consider removing -ko / --keywords-only argument, or changing FILTER_KEYWORDS in config.yml"
+                                            "Wayback - [ ERR ] Failed to get links from Wayback Machine (archive.org) - consider removing -ko / --keywords-only argument, or changing FILTER_KEYWORDS in config.yml"
                                         ),
                                         "red",
                                     )
@@ -5515,7 +5340,7 @@ def processResponsesWayback():
                                 writerr(
                                     colored(
                                         getSPACER(
-                                            "Failed to get links from Wayback Machine (archive.org) - consider removing -ko / --keywords-only argument, or changing the Regex value you passed"
+                                            "Wayback - [ ERR ] Failed to get links from Wayback Machine (archive.org) - consider removing -ko / --keywords-only argument, or changing the Regex value you passed"
                                         ),
                                         "red",
                                     )
@@ -5525,7 +5350,7 @@ def processResponsesWayback():
                                 writerr(
                                     colored(
                                         getSPACER(
-                                            "Failed to get links from Wayback Machine (archive.org) - Blocked Site Error (they block the target site)"
+                                            "Wayback - [ ERR ] Failed to get links from Wayback Machine (archive.org) - Blocked Site Error (they block the target site)"
                                         ),
                                         "red",
                                     )
@@ -5534,7 +5359,7 @@ def processResponsesWayback():
                                 writerr(
                                     colored(
                                         getSPACER(
-                                            "Failed to get links from Wayback Machine (archive.org) - check input domain and try again."
+                                            "Wayback - [ ERR ] Failed to get links from Wayback Machine (archive.org) - check input domain and try again."
                                         ),
                                         "red",
                                     )
@@ -5544,23 +5369,43 @@ def processResponsesWayback():
                     pass
             # Go through the response to save the links found
-            for line in resp.iter_lines():
+            try:
+                for line in resp.iter_lines():
+                    try:
+                        results = line.decode("utf-8")
+                        parts = results.split(" ", 2)
+                        timestamp = parts[0]
+                        originalUrl = parts[1]
+                        linksFoundResponseAdd(timestamp + "/" + originalUrl)
+                    except Exception:
+                        writerr(
+                            colored(
+                                getSPACER(
+                                    "ERROR processResponsesWayback 3: Cannot to get link from line: "
+                                    + str(line)
+                                ),
+                                "red",
+                            )
+                        )
+            finally:
                 try:
-                    results = line.decode("utf-8")
-                    parts = results.split(" ", 2)
-                    timestamp = parts[0]
-                    originalUrl = parts[1]
-                    linksFoundResponseAdd(timestamp + "/" + originalUrl)
+                    current_response = None
                 except Exception:
-                    writerr(
-                        colored(
-                            getSPACER(
-                                "ERROR processResponsesWayback 3: Cannot to get link from line: "
-                                + str(line)
-                            ),
-                            "red",
-                        )
-                    )
+                    pass
+                try:
+                    current_session = None
+                except Exception:
+                    pass
+            # Cleanup shared response/session references now the response has been processed
+            try:
+                current_response = None
+            except Exception:
+                pass
+            try:
+                current_session = None
+            except Exception:
+                pass
             # Remove any links that have URL exclusions
             linkRequests = []
@@ -5574,8 +5419,7 @@ def processResponsesWayback():
                 # b) it does not match the URL exclusions
                 if (
                     args.regex_after is None
-                    or re.search(args.regex_after, link, flags=re.IGNORECASE)
-                    is not None
+                    or re.search(args.regex_after, link, flags=re.IGNORECASE) is not None
                 ) and exclusionRegex.search(link) is None:
                     linkRequests.append(link)
@@ -5594,7 +5438,7 @@ def processResponsesWayback():
                     writerr(
                         colored(
                             getSPACER(
-                                'Failed to get links from Wayback Machine (archive.org) - there were results (e.g. "'
+                                'Wayback - [ ERR ] Failed to get links from Wayback Machine (archive.org) - there were results (e.g. "'
                                 + originalUrl
                                 + "\") but they didn't match the input you gave. Check input and try again."
                             ),
@@ -5605,7 +5449,7 @@ def processResponsesWayback():
                 writerr(
                     colored(
                         getSPACER(
-                            "Failed to get links from Wayback Machine (archive.org) - check input and try again."
+                            "Wayback - [ ERR ] Failed to get links from Wayback Machine (archive.org) - check input and try again."
                         ),
                         "red",
                     )
@@ -5748,7 +5592,7 @@ def processResponsesWayback():
                 if failureCount > 0:
                     if verbose():
                         write(
-                            colored("\nWayback responses saved to ", "cyan")
+                            colored("Wayback - [ INFO ] Responses saved to ", "cyan")
                             + colored(responseOutputDirectory, "white")
                             + colored(" for " + subs + argsInput + ": ", "cyan")
                             + colored(
@@ -5763,10 +5607,7 @@ def processResponsesWayback():
                     else:
                         write(
                             colored(
-                                "\nWayback responses saved for "
-                                + subs
-                                + argsInput
-                                + ": ",
+                                "Wayback - [ INFO ] Responses saved for " + subs + argsInput + ": ",
                                 "cyan",
                             )
                             + colored(
@@ -5781,7 +5622,7 @@ def processResponsesWayback():
                 else:
                     if verbose():
                         write(
-                            colored("\nWayback responses saved to ", "cyan")
+                            colored("Wayback - [ INFO ] Responses saved to ", "cyan")
                             + colored(responseOutputDirectory, "white")
                             + colored(" for " + subs + argsInput + ": ", "cyan")
                             + colored(
@@ -5795,10 +5636,7 @@ def processResponsesWayback():
                     else:
                         write(
                             colored(
-                                "\nWayback responses saved for "
-                                + subs
-                                + argsInput
-                                + ": ",
+                                "Wayback - [ INFO ] Responses saved for " + subs + argsInput + ": ",
                                 "cyan",
                             )
                             + colored(
@@ -5811,9 +5649,7 @@ def processResponsesWayback():
                         )
             except Exception as e:
                 if verbose():
-                    writerr(
-                        colored("ERROR processResponsesWayback 5: " + str(e), "red")
-                    )
+                    writerr(colored("ERROR processResponsesWayback 5: " + str(e), "red"))
         totalFileCount = totalFileCount + fileCount
     except Exception as e:
@@ -5911,8 +5747,7 @@ def notifyDiscord():
                 writerr(
                     colored(
                         getSPACER(
-                            "WARNING: Failed to send notification to Discord - "
-                            + result.json()
+                            "WARNING: Failed to send notification to Discord - " + result.json()
                         ),
                         "yellow",
                     )
@@ -5920,9 +5755,7 @@ def notifyDiscord():
         except Exception as e:
             writerr(
                 colored(
-                    getSPACER(
-                        "WARNING: Failed to send notification to Discord - " + str(e)
-                    ),
+                    getSPACER("WARNING: Failed to send notification to Discord - " + str(e)),
                     "yellow",
                 )
             )
@@ -6037,9 +5870,7 @@ def combineInlineJS():
         totalSections = len(uniqueScripts)
         sectionCounter = 0  # Counter for inline JS sections
-        currentOutputFile = os.path.join(
-            responseOutputDirectory, outputFileTemplate.format(1)
-        )
+        currentOutputFile = os.path.join(responseOutputDirectory, outputFileTemplate.format(1))
         currentSectionsWritten = 0  # Counter for sections written in current file
         if totalSections > 0:
@@ -6075,9 +5906,7 @@ def combineInlineJS():
                         currentSectionsWritten = 1
                     # Insert comment line for the beginning of the section
-                    inlineJSFile.write(
-                        f"//****** INLINE JS SECTION {sectionCounter} ******//\n\n"
-                    )
+                    inlineJSFile.write(f"//****** INLINE JS SECTION {sectionCounter} ******//\n\n")
                     # Write comments indicating the files the script was found in
                     files = ""
@@ -6111,10 +5940,7 @@ def combineInlineJS():
                 write(
                     colored("Created files ", "cyan")
                     + colored(
-                        responseOutputDirectory
-                        + "combinedInline{1-"
-                        + str(fileNumber)
-                        + "}.js",
+                        responseOutputDirectory + "combinedInline{1-" + str(fileNumber) + "}.js",
                         "white",
                     )
                     + colored(" (contents of inline JS)\n", "cyan")
@@ -6124,9 +5950,91 @@ def combineInlineJS():
         writerr(colored("ERROR combineInlineJS 1: " + str(e), "red"))
+# Async wrapper functions for concurrent source fetching
+async def fetch_wayback_async():
+    """Async wrapper for getWaybackUrls - runs in thread pool"""
+    loop = asyncio.get_event_loop()
+    await loop.run_in_executor(None, getWaybackUrls)
+async def fetch_commoncrawl_async():
+    """Async wrapper for getCommonCrawlUrls - runs in thread pool"""
+    loop = asyncio.get_event_loop()
+    await loop.run_in_executor(None, getCommonCrawlUrls)
+async def fetch_alienvault_async():
+    """Async wrapper for getAlienVaultUrls - runs in thread pool"""
+    loop = asyncio.get_event_loop()
+    await loop.run_in_executor(None, getAlienVaultUrls)
+async def fetch_urlscan_async():
+    """Async wrapper for getURLScanUrls - runs in thread pool"""
+    loop = asyncio.get_event_loop()
+    await loop.run_in_executor(None, getURLScanUrls)
+async def fetch_virustotal_async():
+    """Async wrapper for getVirusTotalUrls - runs in thread pool"""
+    loop = asyncio.get_event_loop()
+    await loop.run_in_executor(None, getVirusTotalUrls)
+async def fetch_intelx_async():
+    """Async wrapper for getIntelxUrls - runs in thread pool"""
+    loop = asyncio.get_event_loop()
+    await loop.run_in_executor(None, getIntelxUrls)
+async def fetch_all_sources_async():
+    """
+    Orchestrator function to fetch from all enabled sources concurrently.
+    Each source runs in its own thread pool executor while orchestration happens async.
+    """
+    global args, stopProgram, VIRUSTOTAL_API_KEY, INTELX_API_KEY, argsInput
+    tasks = []
+    # Build list of tasks for enabled sources
+    if not args.xwm and stopProgram is None:
+        tasks.append(("Wayback Machine", fetch_wayback_async()))
+    if not args.xcc and stopProgram is None:
+        tasks.append(("Common Crawl", fetch_commoncrawl_async()))
+    if not args.xav and stopProgram is None and not argsInput.startswith("."):
+        tasks.append(("AlienVault OTX", fetch_alienvault_async()))
+    if not args.xus and stopProgram is None:
+        tasks.append(("URLScan", fetch_urlscan_async()))
+    if not args.xvt and VIRUSTOTAL_API_KEY != "" and stopProgram is None:
+        tasks.append(("VirusTotal", fetch_virustotal_async()))
+    if not args.xix and INTELX_API_KEY != "" and stopProgram is None:
+        tasks.append(("Intelligence X", fetch_intelx_async()))
+    if not tasks:
+        return
+    # Extract just the coroutines for gather
+    task_coros = [task[1] for task in tasks]
+    # Fetch all concurrently, capturing exceptions so one failure doesn't stop others
+    results = await asyncio.gather(*task_coros, return_exceptions=True)
+    # Check for any exceptions that occurred
+    for i, result in enumerate(results):
+        if isinstance(result, Exception):
+            source_name = tasks[i][0]
+            if verbose():
+                writerr(
+                    colored(
+                        getSPACER(f"ERROR in {source_name} during concurrent fetch: {str(result)}"),
+                        "red",
+                    )
+                )
 # Run waymore
 def main():
-    global args, DEFAULT_TIMEOUT, inputValues, argsInput, linksFound, linkMimes, successCount, failureCount, fileCount, totalResponses, totalPages, indexFile, path, stopSource, stopProgram, VIRUSTOTAL_API_KEY, inputIsSubDomain, argsInputHostname, WEBHOOK_DISCORD, responseOutputDirectory, fileCount, INTELX_API_KEY
+    global args, DEFAULT_TIMEOUT, inputValues, argsInput, linksFound, linkMimes, successCount, failureCount, fileCount, totalResponses, totalPages, indexFile, path, stopSource, stopProgram, VIRUSTOTAL_API_KEY, inputIsSubDomain, argsInputHostname, WEBHOOK_DISCORD, responseOutputDirectory, fileCount, INTELX_API_KEY, stopSourceAlienVault, stopSourceCommonCrawl, stopSourceWayback, stopSourceURLScan, stopSourceVirusTotal, stopSourceIntelx
     # Tell Python to run the handler() function when SIGINT is received
     signal(SIGINT, handler)
@@ -6295,13 +6203,7 @@ def main():
         action="store",
         type=int,
         help="Limit the number of Common Crawl index collections searched, e.g. '-lcc 10' will just search the latest 10 collections (default: 1). As of November 2024 there are currently 106 collections. Setting to 0 (default) will search ALL collections. If you don't want to search Common Crawl at all, use the -xcc option.",
-    )
-    parser.add_argument(
-        "-lcy",
-        action="store",
-        type=int,
-        help="Limit the number of Common Crawl index collections searched by the year of the index data. The earliest index has data from 2008. Setting to 0 (default) will search collections or any year (but in conjuction with -lcc). For example, if you are only interested in data from 2015 and after, pass -lcy 2015. If you don't want to search Common Crawl at all, use the -xcc option.",
-        default=0,
+        default=1,
     )
     parser.add_argument(
         "-t",
@@ -6316,10 +6218,10 @@ def main():
     parser.add_argument(
         "-p",
         "--processes",
-        help="Basic multithreading is done when getting requests for a file of URLs. This argument determines the number of processes (threads) used (default: 1)",
+        help="Basic multithreading is done when getting requests for a file of URLs. This argument determines the number of processes (threads) used (default: 2)",
         action="store",
         type=validateArgProcesses,
-        default=1,
+        default=2,
         metavar="<integer>",
     )
     parser.add_argument(
@@ -6420,13 +6322,6 @@ def main():
         showVersion()
         sys.exit()
-    # If -lcc wasn't passed then set to the default of 1 if -lcy is 0. This will make them work together
-    if args.lcc is None:
-        if args.lcy == 0:
-            args.lcc = 1
-        else:
-            args.lcc = 0
     # If --providers was passed, then manually set the exclude arguments;
     if args.providers:
         if "wayback" not in args.providers:
@@ -6531,6 +6426,12 @@ def main():
             indexFile = None
             path = ""
             stopSource = False
+            stopSourceWayback = False
+            stopSourceCommonCrawl = False
+            stopSourceAlienVault = False
+            stopSourceURLScan = False
+            stopSourceVirusTotal = False
+            stopSourceIntelx = False
             # Get the config settings from the config.yml file
             getConfig()
@@ -6548,29 +6449,17 @@ def main():
             # If the mode is U (URLs retrieved) or B (URLs retrieved AND Responses downloaded)
             if args.mode in ["U", "B"]:
-                # If not requested to exclude, get URLs from the Wayback Machine (archive.org)
-                if not args.xwm and stopProgram is None:
-                    getWaybackUrls()
-                # If not requested to exclude, get URLs from commoncrawl.org
-                if not args.xcc and stopProgram is None:
-                    getCommonCrawlUrls()
-                # If not requested to exclude and a TLD wasn't passed, get URLs from alienvault.com
-                if not args.xav and stopProgram is None and not inpt.startswith("."):
-                    getAlienVaultUrls()
-                # If not requested to exclude, get URLs from urlscan.io
-                if not args.xus and stopProgram is None:
-                    getURLScanUrls()
-                # If not requested to exclude, get URLs from virustotal.com if we have an API key
-                if not args.xvt and VIRUSTOTAL_API_KEY != "" and stopProgram is None:
-                    getVirusTotalUrls()
-                # If not requested to exclude, get URLs from intelx.io if we have an API key
-                if not args.xix and INTELX_API_KEY != "" and stopProgram is None:
-                    getIntelxUrls()
+                # Fetch from all sources concurrently using async/await
+                try:
+                    asyncio.run(fetch_all_sources_async())
+                except Exception as e:
+                    if verbose():
+                        writerr(
+                            colored(
+                                getSPACER(f"ERROR during concurrent source fetching: {str(e)}"),
+                                "red",
+                            )
+                        )
                 # Output results of all searches
                 processURLOutput()

waymore 6.5__py3-none-any.whl → 7.0__py3-none-any.whl

waymore 6.5py3-none-any.whl → 7.0py3-none-any.whl