PyPI - waymore - Versions diffs - 4.9__py3-none-any.whl → 5.1__py3-none-any.whl - Mend

waymore 4.9py3-none-any.whl → 5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

waymore/__init__.py +1 -1
waymore/waymore.py +255 -20
{waymore-4.9.dist-info → waymore-5.1.dist-info}/METADATA +11 -7
waymore-5.1.dist-info/RECORD +8 -0
{waymore-4.9.dist-info → waymore-5.1.dist-info}/WHEEL +1 -1
waymore-4.9.dist-info/RECORD +0 -8
{waymore-4.9.dist-info → waymore-5.1.dist-info}/LICENSE +0 -0
{waymore-4.9.dist-info → waymore-5.1.dist-info}/entry_points.txt +0 -0
{waymore-4.9.dist-info → waymore-5.1.dist-info}/top_level.txt +0 -0

waymore/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__="4.9"
1	+ __version__="5.1"

waymore/waymore.py CHANGED Viewed

@@ -79,6 +79,7 @@ checkCommonCrawl = 0
 checkAlienVault = 0
 checkURLScan = 0
 checkVirusTotal = 0
+checkIntelx = 0
 argsInputHostname = ''
 responseOutputDirectory = ''
@@ -88,6 +89,9 @@ CCRAWL_INDEX_URL = 'https://index.commoncrawl.org/collinfo.json'
 ALIENVAULT_URL = 'https://otx.alienvault.com/api/v1/indicators/{TYPE}/{DOMAIN}/url_list?limit=500'
 URLSCAN_URL = 'https://urlscan.io/api/v1/search/?q=domain:{DOMAIN}&size=10000'
 VIRUSTOTAL_URL = 'https://www.virustotal.com/vtapi/v2/domain/report?apikey={APIKEY}&domain={DOMAIN}'
+INTELX_SEARCH_URL = 'https://2.intelx.io/phonebook/search'
+INTELX_RESULTS_URL = 'https://2.intelx.io/phonebook/search/result?id='
+INTELX_ACCOUNT_URL = 'https://2.intelx.io/authenticate/info'
 # User Agents to use when making requests, chosen at random
 USER_AGENT  = [
@@ -144,6 +148,7 @@ URLSCAN_API_KEY = ''
 CONTINUE_RESPONSES_IF_PIPED = True
 WEBHOOK_DISCORD = ''
 DEFAULT_OUTPUT_DIR = ''
+INTELX_API_KEY = ''
 API_KEY_SECRET = "aHR0cHM6Ly95b3V0dS5iZS9kUXc0dzlXZ1hjUQ=="
@@ -285,7 +290,7 @@ def showOptions():
     """
     Show the chosen options and config settings
     """
-    global inputIsDomainANDPath, argsInput, isInputFile
+    global inputIsDomainANDPath, argsInput, isInputFile, INTELX_API_KEY
     try:
         write(colored('Selected config and settings:', 'cyan'))
@@ -325,6 +330,9 @@ def showOptions():
             providers = providers + 'URLScan, '
         if not args.xvt:
             providers = providers + 'VirusTotal, '
+        # Only show Intelligence X if the API key wa provided
+        if not args.xix and INTELX_API_KEY != '':
+            providers = providers + 'Intelligence X, '
         if providers == '':
             providers = 'None'
         write(colored('Providers: ' +str(providers.strip(', ')), 'magenta')+colored(' Which providers to check for URLs.','white'))
@@ -349,6 +357,11 @@ def showOptions():
             write(colored('VirusTotal API Key:', 'magenta')+colored(' {none} - You can get a FREE or paid API Key at https://www.virustotal.com/gui/join-us which will let you get some extra URLs.','white'))
         else:
             write(colored('VirusTotal API Key: ', 'magenta')+colored(VIRUSTOTAL_API_KEY))
+        if INTELX_API_KEY == '':
+            write(colored('Intelligence X API Key:', 'magenta')+colored(' {none} - You require a paid API Key from https://intelx.io/product','white'))
+        else:
+            write(colored('Intelligence X API Key: ', 'magenta')+colored(INTELX_API_KEY))
         if args.mode in ['U','B']:
             if args.output_urls != '':
@@ -401,12 +414,12 @@ def showOptions():
         write(colored('Response URL exclusions: ', 'magenta')+colored(FILTER_URL))
         if args.mt:
-            write(colored('-mt: ' +str(args.mt.lower()), 'magenta')+colored(' Only retrieve URLs and Responses that match these MIME Types.','white')+colored(' NOTE: This will NOT be applied to Alien Vault OTX and Virus Total because they don\'t have the ability to filter on MIME Type. Sometimes URLScan does not have a MIME Type defined - these will always be included. Consider excluding sources if this matters to you','yellow'))
+            write(colored('-mt: ' +str(args.mt.lower()), 'magenta')+colored(' Only retrieve URLs and Responses that match these MIME Types.','white')+colored(' NOTE: This will NOT be applied to Alien Vault OTX, Virus Total and Intelligence X because they don\'t have the ability to filter on MIME Type. Sometimes URLScan does not have a MIME Type defined - these will always be included. Consider excluding sources if this matters to you','yellow'))
         else:
             if args.ft:
-                write(colored('-ft: ' +str(args.ft.lower()), 'magenta')+colored(' Don\'t retrieve URLs and Responses that match these MIME Types.','white')+colored(' NOTE: This will NOT be applied to Alien Vault OTX and Virus Total because they don\'t have the ability to filter on MIME Type. Sometimes URLScan does not have a MIME Type defined - these will always be included. Consider excluding sources if this matters to you','yellow'))
+                write(colored('-ft: ' +str(args.ft.lower()), 'magenta')+colored(' Don\'t retrieve URLs and Responses that match these MIME Types.','white')+colored(' NOTE: This will NOT be applied to Alien Vault OTX, Virus Total and Intelligence X because they don\'t have the ability to filter on MIME Type. Sometimes URLScan does not have a MIME Type defined - these will always be included. Consider excluding sources if this matters to you','yellow'))
             else:
-                write(colored('MIME Type exclusions: ', 'magenta')+colored(FILTER_MIME)+colored(' Don\'t retrieve URLs and Responses that match these MIME Types.','white')+colored(' NOTE: This will NOT be applied to Alien Vault OTX and Virus Total because they don\'t have the ability to filter on MIME Type. Sometimes URLScan does not have a MIME Type defined - these will always be included. Consider excluding sources if this matters to you','yellow'))
+                write(colored('MIME Type exclusions: ', 'magenta')+colored(FILTER_MIME)+colored(' Don\'t retrieve URLs and Responses that match these MIME Types.','white')+colored(' NOTE: This will NOT be applied to Alien Vault OTX, Virus Total and Intelligence X because they don\'t have the ability to filter on MIME Type. Sometimes URLScan does not have a MIME Type defined - these will always be included. Consider excluding sources if this matters to you','yellow'))
         if args.keywords_only and args.keywords_only == '#CONFIG':
             if FILTER_KEYWORDS == '':
@@ -444,7 +457,7 @@ def getConfig():
     """
     Try to get the values from the config file, otherwise use the defaults
     """
-    global FILTER_CODE, FILTER_MIME, FILTER_URL, FILTER_KEYWORDS, URLSCAN_API_KEY, VIRUSTOTAL_API_KEY, CONTINUE_RESPONSES_IF_PIPED, subs, path, waymorePath, inputIsDomainANDPath, HTTP_ADAPTER, HTTP_ADAPTER_CC, argsInput, terminalWidth, MATCH_CODE, WEBHOOK_DISCORD, DEFAULT_OUTPUT_DIR, MATCH_MIME
+    global FILTER_CODE, FILTER_MIME, FILTER_URL, FILTER_KEYWORDS, URLSCAN_API_KEY, VIRUSTOTAL_API_KEY, CONTINUE_RESPONSES_IF_PIPED, subs, path, waymorePath, inputIsDomainANDPath, HTTP_ADAPTER, HTTP_ADAPTER_CC, argsInput, terminalWidth, MATCH_CODE, WEBHOOK_DISCORD, DEFAULT_OUTPUT_DIR, MATCH_MIME, INTELX_API_KEY
     try:
         # Set terminal width
@@ -580,6 +593,13 @@ def getConfig():
                 writerr(colored('Unable to read "VIRUSTOTAL_API_KEY" from config.yml - consider adding (you can get a FREE api key at virustotal.com)', 'red'))
                 VIRUSTOTAL_API_KEY = ''
+            try:
+                INTELX_API_KEY = config.get('INTELX_API_KEY')
+                if str(INTELX_API_KEY) == 'None':
+                    INTELX_API_KEY = ''
+            except Exception as e:
+                INTELX_API_KEY = ''
             try:
                 FILTER_KEYWORDS = config.get('FILTER_KEYWORDS')
                 if str(FILTER_KEYWORDS) == 'None':
@@ -653,6 +673,7 @@ def getConfig():
             FILTER_CODE = DEFAULT_FILTER_CODE
             URLSCAN_API_KEY = ''
             VIRUSTOTAL_API_KEY = ''
+            INTELX_API_KEY = ''
             FILTER_KEYWORDS = ''
             CONTINUE_RESPONSES_IF_PIPED = True
             WEBHOOK_DISCORD = ''
@@ -1015,12 +1036,12 @@ def processURLOutput():
     """
     Show results of the URL output, i.e. getting URLs from archive.org and commoncrawl.org and write results to file
     """
-    global linksFound, subs, path, argsInput, checkWayback, checkCommonCrawl, checkAlienVault, checkURLScan, checkVirusTotal, DEFAULT_OUTPUT_DIR
+    global linksFound, subs, path, argsInput, checkWayback, checkCommonCrawl, checkAlienVault, checkURLScan, checkVirusTotal, DEFAULT_OUTPUT_DIR, checkIntelx
     try:
         if args.check_only:
-            totalRequests = checkWayback + checkCommonCrawl + checkAlienVault + checkURLScan + checkVirusTotal
+            totalRequests = checkWayback + checkCommonCrawl + checkAlienVault + checkURLScan + checkVirusTotal + checkIntelx
             minutes = totalRequests*1 // 60
             hours = minutes // 60
             days = hours // 24
@@ -1285,16 +1306,17 @@ def validateArgProviders(x):
     - otx
     - urlscan
     - virustotal
+    - intelx
     """
     invalid = False
     x = x.lower()
     providers = x.split(',')
     for provider in providers:
-        if not re.fullmatch(r'(wayback|commoncrawl|otx|urlscan|virustotal)', provider):
+        if not re.fullmatch(r'(wayback|commoncrawl|otx|urlscan|virustotal|intelx)', provider):
             invalid = True
             break
     if invalid:
-        raise argparse.ArgumentTypeError('Pass providers separated by a comma, e.g. wayback,commoncrawl,otx,urlscan,virustotal')
+        raise argparse.ArgumentTypeError('Pass providers separated by a comma, e.g. wayback,commoncrawl,otx,urlscan,virustotal,intelx')
     return x
 def processAlienVaultPage(url):
@@ -1616,7 +1638,10 @@ def getURLScanUrls():
         # Get the first page from urlscan.io
         try:
             # Choose a random user agent string to use for any requests
-            userAgent = random.choice(USER_AGENT)
+            # For other sources we would use `random.choice(USER_AGENT)` to asignn a random user-agent, but it seems
+            # that there are a handful of those that ALWAYS return 429. Passing a specific one all the time seems to
+            # be successful all the time
+            userAgent = "waymore by xnl-h4ck3r"
             session = requests.Session()
             session.mount('https://', HTTP_ADAPTER)
             session.mount('http://', HTTP_ADAPTER)
@@ -1767,7 +1792,6 @@ def getURLScanUrls():
                             # Get the next page from urlscan.io
                             try:
                                 # Choose a random user agent string to use for any requests
-                                userAgent = random.choice(USER_AGENT)
                                 session = requests.Session()
                                 session.mount('https://', HTTP_ADAPTER)
                                 session.mount('http://', HTTP_ADAPTER)
@@ -2336,7 +2360,7 @@ def getCommonCrawlUrls():
 def processVirusTotalUrl(url):
     """
-    Process a specific URL from virustotal.io to determine whether to save the link
+    Process a specific URL from virustotal.com to determine whether to save the link
     """
     global argsInput, argsInputHostname
@@ -2378,7 +2402,7 @@ def processVirusTotalUrl(url):
         # Add link if it passed filters
         if addLink:
-            # Just get the hostname of the urkl
+            # Just get the hostname of the url
             tldExtract = tldextract.extract(url)
             subDomain = tldExtract.subdomain
             if subDomain != '':
@@ -2423,11 +2447,10 @@ def getVirusTotalUrls():
             session = requests.Session()
             session.mount('https://', HTTP_ADAPTER)
             session.mount('http://', HTTP_ADAPTER)
-            # Pass the API-Key header too. This can change the max endpoints per page, depending on URLScan subscription
             resp = session.get(url, headers={'User-Agent':userAgent})
             requestsMade = requestsMade + 1
         except Exception as e:
-            write(colored(getSPACER('[ ERR ] Unable to get links from virustotal.io: ' + str(e)), 'red'))
+            write(colored(getSPACER('[ ERR ] Unable to get links from virustotal.com: ' + str(e)), 'red'))
             return
         # Deal with any errors
@@ -2494,6 +2517,204 @@ def getVirusTotalUrls():
     except Exception as e:
         writerr(colored('ERROR getVirusTotalUrls 1: ' + str(e), 'red'))
+def processIntelxUrl(url):
+    """
+    Process a specific URL from intelx.io to determine whether to save the link
+    """
+    global argsInput, argsInputHostname
+    addLink = True
+    # If the url passed doesn't have a scheme, prefix with http://
+    match = re.search(r'^[A-za-z]*\:\/\/', url, flags=re.IGNORECASE)
+    if match is None:
+        url = 'http://'+url
+    try:
+        # If filters are required then test them
+        if not args.filter_responses_only:
+            # If the user requested -n / --no-subs then we don't want to add it if it has a sub domain (www. will not be classed as a sub domain)
+            if args.no_subs:
+                match = re.search(r'^[A-za-z]*\:\/\/(www\.)?'+re.escape(argsInputHostname), url, flags=re.IGNORECASE)
+                if match is None:
+                    addLink = False
+            # If the user didn't requested -f / --filter-responses-only then check http code
+            # Note we can't check MIME filter because it is not returned by VirusTotal API
+            if addLink and not args.filter_responses_only:
+                # Check the URL exclusions
+                if addLink:
+                    match = re.search(r'('+re.escape(FILTER_URL).replace(',','|')+')', url, flags=re.IGNORECASE)
+                    if match is not None:
+                        addLink = False
+                # Set keywords filter if -ko argument passed
+                if addLink and args.keywords_only:
+                    if args.keywords_only == '#CONFIG':
+                        match = re.search(r'('+re.escape(FILTER_KEYWORDS).replace(',','|')+')', url, flags=re.IGNORECASE)
+                    else:
+                        match = re.search(r'('+args.keywords_only+')', url, flags=re.IGNORECASE)
+                    if match is None:
+                        addLink = False
+        # Add link if it passed filters
+        if addLink:
+            linksFoundAdd(url)
+    except Exception as e:
+        writerr(colored('ERROR processIntelxUrl 1: ' + str(e), 'red'))
+def processIntelxType(target, credits):
+    '''
+    target: 1 - Domains
+    target: 3 - URLs
+    '''
+    try:
+        try:
+            requestsMade = 0
+            # Choose a random user agent string to use for any requests
+            userAgent = random.choice(USER_AGENT)
+            session = requests.Session()
+            session.mount('https://', HTTP_ADAPTER)
+            session.mount('http://', HTTP_ADAPTER)
+            # Pass the API key in the X-Key header too.
+            resp = session.post(INTELX_SEARCH_URL, data='{"term":"'+quote(argsInputHostname)+'","target":'+str(target)+'}', headers={'User-Agent':userAgent,'X-Key':INTELX_API_KEY})
+            requestsMade = requestsMade + 1
+        except Exception as e:
+            write(colored(getSPACER('[ ERR ] Unable to get links from intelx.io: ' + str(e)), 'red'))
+            return
+        # Deal with any errors
+        if resp.status_code == 429:
+            writerr(colored(getSPACER('[ 429 ] IntelX rate limit reached so unable to get links.'),'red'))
+            return
+        elif resp.status_code == 401:
+            writerr(colored(getSPACER('[ 401 ] IntelX: Not authorized. The source requires a paid API key. Check your API key is correct.'),'red'))
+            return
+        elif resp.status_code == 402:
+            if credits.startswith("0/"):
+                writerr(colored(getSPACER('[ 402 ] IntelX: You have run out of daily credits on Intelx ('+credits+').'),'red'))
+            else:
+                writerr(colored(getSPACER('[ 402 ] IntelX: It appears you have run out of daily credits on Intelx.'),'red'))
+            return
+        elif resp.status_code == 403:
+            writerr(colored(getSPACER('[ 403 ] IntelX: Permission denied. Check your API key is correct.'),'red'))
+            return
+        elif resp.status_code != 200:
+            writerr(colored(getSPACER('[ ' + str(resp.status_code) + ' ] Unable to get links from intelx.io'),'red'))
+            return
+        # Get the JSON response
+        try:
+            jsonResp = json.loads(resp.text.strip())
+            id = jsonResp['id']
+        except:
+            writerr(colored(getSPACER('[ ERR ] There was an unexpected response from the Intelligence API'),'red'))
+            return
+        # Get each page of the results
+        moreResults = True
+        status = 0
+        while moreResults:
+            if stopSource:
+                break
+            try:
+                resp = session.get(INTELX_RESULTS_URL+id, headers={'User-Agent':userAgent,'X-Key':INTELX_API_KEY})
+                requestsMade = requestsMade + 1
+            except Exception as e:
+                write(colored(getSPACER('[ ERR ] Unable to get links from intelx.io: ' + str(e)), 'red'))
+                return
+            # Get the JSON response
+            try:
+                jsonResp = json.loads(resp.text.strip())
+                status = jsonResp['status']
+            except:
+                writerr(colored(getSPACER('[ ERR ] There was an unexpected response from the Intelligence API'),'red'))
+                moreResults = False
+            try:
+                selector_values = [entry['selectorvalue'] for entry in jsonResp.get('selectors', [])]
+            except Exception as e:
+                selector_values = []
+            try:
+                selector_valuesh = [entry['selectorvalueh'] for entry in jsonResp.get('selectors', [])]
+            except Exception as e:
+                selector_valuesh = []
+            # Work out whether to include each url
+            unique_values = list(set(selector_values + selector_valuesh))
+            for ixurl in unique_values:
+                if stopSource:
+                    break
+                processIntelxUrl(ixurl)
+            if status == 1 or selector_values == []:
+                moreResults = False
+    except Exception as e:
+        writerr(colored('ERROR processIntelxType 1: ' + str(e), 'red'))
+def getIntelxAccountInfo() -> str:
+    '''
+    Get the account info and return the number of Credits remainiing from the /phonebook/search
+    '''
+    try:
+        # Choose a random user agent string to use for any requests
+        userAgent = random.choice(USER_AGENT)
+        session = requests.Session()
+        session.mount('https://', HTTP_ADAPTER)
+        session.mount('http://', HTTP_ADAPTER)
+        # Pass the API key in the X-Key header too.
+        resp = session.get(INTELX_ACCOUNT_URL, headers={'User-Agent':userAgent,'X-Key':INTELX_API_KEY})
+        jsonResp = json.loads(resp.text.strip())
+        credits = str(jsonResp.get("paths", {}).get("/phonebook/search", {}).get("Credit", "Unknown"))
+        credits_max = str(jsonResp.get("paths", {}).get("/phonebook/search", {}).get("CreditMax", "Unknown"))
+        return credits+"/"+credits_max
+    except:
+        return "Unknown"
+def getIntelxUrls():
+    """
+    Get URLs from the Intelligence X Phonebook search
+    """
+    global INTELX_API_KEY, linksFound, waymorePath, subs, stopProgram, stopSource, argsInput, checkIntelx, argsInputHostname
+    # Write the file of URL's for the passed domain/URL
+    try:
+        if args.check_only:
+            write(colored('Get URLs from Intelligence X: ','cyan')+colored('minimum 4 requests','white'))
+            checkIntelx = 4
+            return
+        stopSource = False
+        originalLinkCount = len(linksFound)
+        credits = getIntelxAccountInfo()
+        if verbose():
+            write(colored('The Intelligence X URL requested to get links (Credits: '+credits+'): ','magenta')+colored(INTELX_SEARCH_URL+'\n','white'))
+        if not args.check_only:
+            write(colored('\rGetting links from intelx.io API...\r','cyan'))
+        # Get the domains from Intelligence X if the --no-subs wasn't passed
+        if not args.no_subs:
+            processIntelxType(1, credits)
+        # Get the URLs from Intelligence X
+        processIntelxType(3, credits)
+        linkCount = len(linksFound) - originalLinkCount
+        if args.xwm and args.xcc and args.xav and args.xus and args.xvt:
+            write(getSPACER(colored('Links found on intelx.io: ', 'cyan')+colored(str(linkCount),'white'))+'\n')
+        else:
+            write(getSPACER(colored('Extra links found on intelx.io: ', 'cyan')+colored(str(linkCount),'white'))+'\n')
+    except Exception as e:
+        writerr(colored('ERROR getIntelxUrls 1: ' + str(e), 'red'))
 def processResponses():
     """
     Get archived responses from Wayback Machine (archive.org)
@@ -2993,7 +3214,7 @@ def combineInlineJS():
 # Run waymore
 def main():
-    global args, DEFAULT_TIMEOUT, inputValues, argsInput, linksFound, linkMimes, successCount, failureCount, fileCount, totalResponses, totalPages, indexFile, path, stopSource, stopProgram, VIRUSTOTAL_API_KEY, inputIsSubDomain, argsInputHostname, WEBHOOK_DISCORD, responseOutputDirectory, fileCount
+    global args, DEFAULT_TIMEOUT, inputValues, argsInput, linksFound, linkMimes, successCount, failureCount, fileCount, totalResponses, totalPages, indexFile, path, stopSource, stopProgram, VIRUSTOTAL_API_KEY, inputIsSubDomain, argsInputHostname, WEBHOOK_DISCORD, responseOutputDirectory, fileCount, INTELX_API_KEY
     # Tell Python to run the handler() function when SIGINT is received
     signal(SIGINT, handler)
@@ -3051,7 +3272,7 @@ def main():
     parser.add_argument(
         '-ft',
         action='store',
-        help='Filter MIME Types for retrieved URLs and responses. Comma separated list of MIME Types (default: the FILTER_MIME values from config.yml). Passing this argument will override the value from config.yml. NOTE: This will NOT be applied to Alien Vault OTX and Virus Total because they don\'t have the ability to filter on MIME Type. Sometimes URLScan does not have a MIME Type defined - these will always be included. Consider excluding sources if this matters to you.',
+        help='Filter MIME Types for retrieved URLs and responses. Comma separated list of MIME Types (default: the FILTER_MIME values from config.yml). Passing this argument will override the value from config.yml. NOTE: This will NOT be applied to Alien Vault OTX, Virus Total and Intelligence X because they don\'t have the ability to filter on MIME Type. Sometimes URLScan does not have a MIME Type defined - these will always be included. Consider excluding sources if this matters to you.',
         type=validateArgMimeTypes,
     )
     parser.add_argument(
@@ -3063,7 +3284,7 @@ def main():
     parser.add_argument(
         '-mt',
         action='store',
-        help='Only MIME Types for retrieved URLs and responses. Comma separated list of MIME types. Passing this argument overrides the config FILTER_MIME and -ft. NOTE: This will NOT be applied to Alien Vault OTX and Virus Total because they don\'t have the ability to filter on MIME Type. Sometimes URLScan does not have a MIME Type defined - these will always be included. Consider excluding sources if this matters to you.',
+        help='Only MIME Types for retrieved URLs and responses. Comma separated list of MIME types. Passing this argument overrides the config FILTER_MIME and -ft. NOTE: This will NOT be applied to Alien Vault OTX, Virus Total and Intelligence X because they don\'t have the ability to filter on MIME Type. Sometimes URLScan does not have a MIME Type defined - these will always be included. Consider excluding sources if this matters to you.',
         type=validateArgMimeTypes,
     )
     parser.add_argument(
@@ -3141,13 +3362,19 @@ def main():
         help='Exclude checks for links from virustotal.com',
         default=False
     )
+    parser.add_argument(
+        '-xix',
+        action='store_true',
+        help='Exclude checks for links from intelx.io',
+        default=False
+    )
     parser.add_argument(
         '--providers',
         action='store',
-        help='A comma separated list of source providers that you want to get URLs from. The values can be wayback,commoncrawl,otx,urlscan and virustotal. Passing this will override any exclude arguments (e.g. -xwm,-xcc, etc.) passed to exclude sources, and reset those based on what was passed with this argument.',
+        help='A comma separated list of source providers that you want to get URLs from. The values can be wayback,commoncrawl,otx,urlscan,virustotal and intelx. Passing this will override any exclude arguments (e.g. -xwm,-xcc, etc.) passed to exclude sources, and reset those based on what was passed with this argument.',
         default=[],
         type=validateArgProviders,
-        metavar='{wayback,commoncrawl,otx,urlscan,virustotal}'
+        metavar='{wayback,commoncrawl,otx,urlscan,virustotal,intelx}'
     )
     parser.add_argument(
         '-lcc',
@@ -3301,6 +3528,10 @@ def main():
             args.xvt = True
         else:
             args.xvt = False
+        if 'intelx' not in args.providers:
+            args.xix = True
+        else:
+            args.xix = False
     # If no input was given, raise an error
     if sys.stdin.isatty():
@@ -3390,6 +3621,10 @@ def main():
                 # If not requested to exclude, get URLs from virustotal.com if we have an API key
                 if not args.xvt and VIRUSTOTAL_API_KEY != '' and stopProgram is None:
                     getVirusTotalUrls()
+                # If not requested to exclude, get URLs from intelx.io if we have an API key
+                if not args.xix and INTELX_API_KEY != '' and stopProgram is None:
+                    getIntelxUrls()
                 # Output results of all searches
                 processURLOutput()

{waymore-4.9.dist-info → waymore-5.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: waymore
-Version: 4.9
+Version: 5.1
 Summary: Find way more from the Wayback Machine, Common Crawl, Alien Vault OTX, URLScan & VirusTotal!
 Home-page: https://github.com/xnl-h4ck3r/waymore
 Author: @xnl-h4ck3r
@@ -15,7 +15,7 @@ Requires-Dist: tldextract
 <center><img src="https://github.com/xnl-h4ck3r/waymore/blob/main/waymore/images/title.png"></center>
-## About - v4.9
+## About - v5.1
 The idea behind **waymore** is to find even more links from the Wayback Machine than other existing tools.
@@ -23,7 +23,7 @@ The idea behind **waymore** is to find even more links from the Wayback Machine
 👉 Also, other tools do not currenrtly deal with the rate limiting now in place by the sources, and will often just stop with incomplete results and not let you know they are incomplete.
 Anyone who does bug bounty will have likely used the amazing [waybackurls](https://github.com/tomnomnom/waybackurls) by @TomNomNoms. This tool gets URLs from [web.archive.org](https://web.archive.org) and additional links (if any) from one of the index collections on [index.commoncrawl.org](http://index.commoncrawl.org/).
-You would have also likely used the amazing [gau](https://github.com/lc/gau) by @hacker\_ which also finds URL's from wayback archive, Common Crawl, but also from Alien Vault and URLScan.
+You would have also likely used the amazing [gau](https://github.com/lc/gau) by @hacker\_ which also finds URL's from wayback archive, Common Crawl, but also from Alien Vault, URLScan, Virus Total and Intelligence X.
 Now **waymore** gets URL's from ALL of those sources too (with ability to filter more to get what you want):
 - Wayback Machine (web.archive.org)
@@ -31,6 +31,7 @@ Now **waymore** gets URL's from ALL of those sources too (with ability to filter
 - Alien Vault OTX (otx.alienvault.com)
 - URLScan (urlscan.io)
 - Virus Total (virustotal.com)
+- Intelligence X (intelx.io) - PAID SOURCE ONLY
 👉 It's a point that many seem to miss, so I'll just add it again :) ... The biggest difference between **waymore** and other tools is that it can also **download the archived responses** for URLs on wayback machine so that you can then search these for even more links, developer comments, extra parameters, etc. etc.
@@ -83,9 +84,9 @@ pipx install git+https://github.com/xnl-h4ck3r/waymore.git
 | -n            | --no-subs                  | Don't include subdomains of the target domain (only used if input is not a domain with a specific path).                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
 | -f            | --filter-responses-only    | The initial links from sources will not be filtered, only the responses that are downloaded, e.g. it maybe useful to still see all available paths from the links, even if you don't want to check the content.                                                                                                                                                                                                                                                                                                                                                                                                                                                |
 | -fc           |                            | Filter HTTP status codes for retrieved URLs and responses. Comma separated list of codes (default: the `FILTER_CODE` values from `config.yml`). Passing this argument will override the value from `config.yml`                                                                                                                                                                                                                                                                                                                                                                                                                                                |
-| -ft           |                            | Filter MIME Types for retrieved URLs and responses. Comma separated list of MIME Types (default: the `FILTER_MIME` values from `config.yml`). Passing this argument will override the value from `config.yml`. **NOTE: This will NOT be applied to Alien Vault OTX and Virus Total because they don't have the ability to filter on MIME Type. Sometimes URLScan does not have a MIME Type defined - these will always be included. Consider excluding sources if this matters to you.**.                                                                                                                                                                      |
+| -ft           |                            | Filter MIME Types for retrieved URLs and responses. Comma separated list of MIME Types (default: the `FILTER_MIME` values from `config.yml`). Passing this argument will override the value from `config.yml`. **NOTE: This will NOT be applied to Alien Vault OTX, Virus Total and Intelligence X because they don't have the ability to filter on MIME Type. Sometimes URLScan does not have a MIME Type defined - these will always be included. Consider excluding sources if this matters to you.**.                                                                                                                                                      |
 | -mc           |                            | Only Match HTTP status codes for retrieved URLs and responses. Comma separated list of codes. Passing this argument overrides the config `FILTER_CODE` and `-fc`.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
-| -mt           |                            | Only MIME Types for retrieved URLs and responses. Comma separated list of MIME types. Passing this argument overrides the config `FILTER_MIME` and `-ft`. **NOTE: This will NOT be applied to Alien Vault OTX and Virus Total because they don't have the ability to filter on MIME Type. Sometimes URLScan does not have a MIME Type defined - these will always be included. Consider excluding sources if this matters to you.**.                                                                                                                                                                                                                           |
+| -mt           |                            | Only MIME Types for retrieved URLs and responses. Comma separated list of MIME types. Passing this argument overrides the config `FILTER_MIME` and `-ft`. **NOTE: This will NOT be applied to Alien Vault OTX, Virus Total and Intelligence X because they don't have the ability to filter on MIME Type. Sometimes URLScan does not have a MIME Type defined - these will always be included. Consider excluding sources if this matters to you.**.                                                                                                                                                                                                           |
 | -l            | --limit                    | How many responses will be saved (if `-mode R` or `-mode B` is passed). A positive value will get the **first N** results, a negative value will get the **last N** results. A value of 0 will get **ALL** responses (default: 5000)                                                                                                                                                                                                                                                                                                                                                                                                                           |
 | -from         | --from-date                | What date to get responses from. If not specified it will get from the earliest possible results. A partial value can be passed, e.g. `2016`, `201805`, etc.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
 | -to           | --to-date                  | What date to get responses to. If not specified it will get to the latest possible results. A partial value can be passed, e.g. `2021`, `202112`, etc.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
@@ -97,6 +98,7 @@ pipx install git+https://github.com/xnl-h4ck3r/waymore.git
 | -xav          |                            | Exclude checks for links from alienvault.com                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
 | -xus          |                            | Exclude checks for links from urlscan.io                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
 | -xvt          |                            | Exclude checks for links from virustotal.com                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
+| -xix          |                            | Exclude checks for links from Intelligence X.com                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
 | -lcc          |                            | Limit the number of Common Crawl index collections searched, e.g. `-lcc 10` will just search the latest `10` collections (default: 1). As of November 2024 there are currently 106 collections. Setting to `0` will search **ALL** collections. If you don't want to search Common Crawl at all, use the `-xcc` option.                                                                                                                                                                                                                                                                                                                                        |
 | -lcy          |                            | Limit the number of Common Crawl index collections searched by the year of the index data. The earliest index has data from 2008. Setting to 0 (default) will search collections or any year (but in conjuction with `-lcc`). For example, if you are only interested in data from 2015 and after, pass `-lcy 2015`. This will override the value of `-lcc` if passed. If you don't want to search Common Crawl at all, use the `-xcc` option.                                                                                                                                                                                                                 |
 | -t            | --timeout                  | This is for archived responses only! How many seconds to wait for the server to send data before giving up (default: 30)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
@@ -164,8 +166,9 @@ The `config.yml` file (typically in `~/.config/waymore/`) have values that can b
 - `CONTINUE_RESPONSES_IF_PIPED` - If retrieving archive responses doesn't complete, you will be prompted next time whether you want to continue with the previous run. However, if `stdout` is piped to another process it is assumed you don't want to have an interactive prompt. A value of `True` (default) will determine assure the previous run will be continued. if you want a fresh run every time then set to `False`.
 - `WEBHOOK_DISCORD` - If the `--notify-discord` argument is passed, `knoxnl` will send a notification to this Discord wehook when a successful XSS is found.
 - `DEFAULT_OUTPUT_DIR` - This is the default location of any output files written if the `-oU` and `-oR` arguments are not used. If the value of this key is blank, then it will default to the location of the `config.yml` file.
+- `INTELX_API_KEY` - You can sign up to [intelx.io here](https://intelx.io/product). It requires a paid API key to do the `/phonebook/search` through their API (as of 2024-09-01, the Phonebook service has been restricted to paid users due to constant abuse by spam accounts).
-  **NOTE: The MIME types cannot be filtered for Alien Vault OTX and Virus Total because they don't have the ability to filter on MIME Type. Sometimes URLScan does not have a MIME Type defined for a URL. In these cases, URLs will be included regardless of filter or match. Bear this in mind and consider excluding certain providers if this is important.**
+  **NOTE: The MIME types cannot be filtered for Alien Vault OTX, Virus Total and Intelligence X because they don't have the ability to filter on MIME Type. Sometimes URLScan does not have a MIME Type defined for a URL. In these cases, URLs will be included regardless of filter or match. Bear this in mind and consider excluding certain providers if this is important.**
 ## Output
@@ -281,7 +284,7 @@ If you come across any problems at all, or have ideas for improvements, please f
 ## TODO
-- Add an `-oss` argument that accepts a file of Out Of Scope subdomains/URLs that will not be returned in the output, or have any responses downloaded
+- Add an `-oos` argument that accepts a file of Out Of Scope subdomains/URLs that will not be returned in the output, or have any responses downloaded
 ## References
@@ -290,6 +293,7 @@ If you come across any problems at all, or have ideas for improvements, please f
 - [Alien Vault OTX API](https://otx.alienvault.com/assets/static/external_api.html)
 - [URLScan API](https://urlscan.io/docs/api/)
 - [VirusTotal API (v2)](https://docs.virustotal.com/v2.0/reference/getting-started)
+- [Intelligence X SDK](https://github.com/IntelligenceX/SDK?tab=readme-ov-file#intelligence-x-public-sdk)
 Good luck and good hunting!
 If you really love the tool (or any others), or they helped you find an awesome bounty, consider [BUYING ME A COFFEE!](https://ko-fi.com/xnlh4ck3r) ☕ (I could use the caffeine!)

waymore-5.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,8 @@
+waymore/__init__.py,sha256=X4ON0rponPxoQ0b-Wv7zvwCPFlzC2oSmg_nHdJmpyis,17
+waymore/waymore.py,sha256=sG4cpeFN0cOfO06AgetlTbs20fVUbpxQr4g_RuHAlaw,188638
+waymore-5.1.dist-info/LICENSE,sha256=o_jq62xZ1YxI8tqzQKbNtqr3RW2i5sh0rk6ixCJEroU,1068
+waymore-5.1.dist-info/METADATA,sha256=npHpoTL5ceG210zvaBtXHVSfWqE2Gh_ekR_beYLTRx0,50674
+waymore-5.1.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
+waymore-5.1.dist-info/entry_points.txt,sha256=YHy5EUf3r_7OTkt9jvylLjNeg7Z5yvIVm5RUAyfNcN4,49
+waymore-5.1.dist-info/top_level.txt,sha256=RFTphkWaRu1N7lUWIPUjabgCPQ3ETmNllF7qze4JJ_s,8
+waymore-5.1.dist-info/RECORD,,

{waymore-4.9.dist-info → waymore-5.1.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.3.0)
+Generator: setuptools (75.3.2)
 Root-Is-Purelib: true
 Tag: py3-none-any

waymore-4.9.dist-info/RECORD DELETED Viewed

@@ -1,8 +0,0 @@
-waymore/__init__.py,sha256=3oueXc07OVc1C6HkNlbSrHDdAoM-CsAgJQ4LAyBP4LA,17
-waymore/waymore.py,sha256=BUWxqHcC_ZuIeRHqRc4lKZDluBptx6H-jWHsRoO7jq4,178141
-waymore-4.9.dist-info/LICENSE,sha256=o_jq62xZ1YxI8tqzQKbNtqr3RW2i5sh0rk6ixCJEroU,1068
-waymore-4.9.dist-info/METADATA,sha256=2_Ru3GCk7zMVuyqFotCBcw6nbRYMUCuWAZ-oX_YdQMY,49511
-waymore-4.9.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
-waymore-4.9.dist-info/entry_points.txt,sha256=YHy5EUf3r_7OTkt9jvylLjNeg7Z5yvIVm5RUAyfNcN4,49
-waymore-4.9.dist-info/top_level.txt,sha256=RFTphkWaRu1N7lUWIPUjabgCPQ3ETmNllF7qze4JJ_s,8
-waymore-4.9.dist-info/RECORD,,

{waymore-4.9.dist-info → waymore-5.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{waymore-4.9.dist-info → waymore-5.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{waymore-4.9.dist-info → waymore-5.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

waymore 4.9__py3-none-any.whl → 5.1__py3-none-any.whl

waymore 4.9py3-none-any.whl → 5.1py3-none-any.whl