waymore 4.3__py3-none-any.whl → 4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
waymore/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__="4.3"
1
+ __version__="4.5"
waymore/waymore.py CHANGED
@@ -706,23 +706,55 @@ def fixArchiveOrgUrl(url):
706
706
  url = url[0:newline]
707
707
  return url
708
708
 
709
+ # Add a link to the linksFound collection for archived responses (included timestamp preifx)
710
+ def linksFoundResponseAdd(link):
711
+ global linksFound, argsInput, argsInputHostname
712
+
713
+ try:
714
+ if inputIsDomainANDPath:
715
+ checkInput = argsInput
716
+ else:
717
+ checkInput = argsInputHostname
718
+
719
+ # Remove the timestamp
720
+ linkWithoutTimestamp = link.split('/', 1)[-1]
721
+
722
+ # If the link specifies port 80 or 443, e.g. http://example.com:80, then remove the port
723
+ parsed = urlparse(linkWithoutTimestamp.strip())
724
+ if parsed.port in (80, 443):
725
+ new_netloc = parsed.hostname
726
+ parsed_url = parsed._replace(netloc=new_netloc).geturl()
727
+ else:
728
+ parsed_url = linkWithoutTimestamp
729
+
730
+ # Don't write it if the link does not contain the requested domain (this can sometimes happen)
731
+ if parsed_url.find(checkInput) >= 0:
732
+ linksFound.add(link)
733
+ except Exception as e:
734
+ linksFound.add(link)
735
+
709
736
  # Add a link to the linksFound collection
710
737
  def linksFoundAdd(link):
711
738
  global linksFound, argsInput, argsInputHostname
712
- # If the link specifies port 80 or 443, e.g. http://example.com:80, then remove the port
739
+
713
740
  try:
714
741
  if inputIsDomainANDPath:
715
742
  checkInput = argsInput
716
743
  else:
717
744
  checkInput = argsInputHostname
745
+
746
+ # If the link specifies port 80 or 443, e.g. http://example.com:80, then remove the port
747
+ parsed = urlparse(link.strip())
748
+ if parsed.port in (80, 443):
749
+ new_netloc = parsed.hostname
750
+ parsed_url = parsed._replace(netloc=new_netloc).geturl()
751
+ else:
752
+ parsed_url = link
753
+
718
754
  # Don't write it if the link does not contain the requested domain (this can sometimes happen)
719
- if link.find(checkInput) >= 0:
720
- parsed = urlparse(link.strip())
721
- if parsed.netloc.find(':80') >= 0 or parsed.netloc.fnd(':443') >= 0:
722
- newNetloc = parsed.netloc.split(':')[0]
723
- parsed = parsed._replace(netloc=newNetloc).geturl()
724
- linksFound.add(parsed)
725
- except:
755
+ if parsed_url.find(checkInput) >= 0:
756
+ linksFound.add(link)
757
+ except Exception as e:
726
758
  linksFound.add(link)
727
759
 
728
760
  def processArchiveUrl(url):
@@ -1352,11 +1384,15 @@ def getAlienVaultUrls():
1352
1384
  # Carry on if something was found
1353
1385
  if resp.text.lower().find('"error": "') < 0:
1354
1386
 
1355
- # Get the JSON response
1356
- jsonResp = json.loads(resp.text.strip())
1357
-
1358
- # Try to get the number of results
1359
- totalUrls = jsonResp['full_size']
1387
+ try:
1388
+ # Get the JSON response
1389
+ jsonResp = json.loads(resp.text.strip())
1390
+
1391
+ # Try to get the number of results
1392
+ totalUrls = int(jsonResp['full_size'])
1393
+ except:
1394
+ writerr(colored(getSPACER('[ ERR ] There was an unexpected response from the Alien Vault API'),'red'))
1395
+ totalUrls = 0
1360
1396
 
1361
1397
  # If there are results, carry on
1362
1398
  if totalUrls > 0 or args.check_only:
@@ -1556,19 +1592,28 @@ def getURLScanUrls():
1556
1592
  writerr(colored(getSPACER('[ ' + str(resp.status_code) + ' ] Unable to get links from urlscan.io'),'red'))
1557
1593
  return
1558
1594
 
1559
- # Get the JSON response
1560
- jsonResp = json.loads(resp.text.strip())
1595
+ try:
1596
+ # Get the JSON response
1597
+ jsonResp = json.loads(resp.text.strip())
1561
1598
 
1562
- # Get the number of results
1563
- totalUrls = jsonResp['total']
1599
+ # Get the number of results
1600
+ totalUrls = int(jsonResp['total'])
1601
+ except:
1602
+ writerr(colored(getSPACER('[ ERR ] There was an unexpected response from the URLScan API'),'red'))
1603
+ totalUrls = 0
1564
1604
 
1605
+ # Carry on if something was found
1565
1606
  if args.check_only:
1566
- hasMore = jsonResp['has_more']
1567
- if hasMore:
1568
- write(colored('Get URLs from URLScan: ','cyan')+colored('UNKNOWN requests','white'))
1569
- else:
1570
- write(colored('Get URLs from URLScan: ','cyan')+colored('1 request','white'))
1607
+ try:
1608
+ hasMore = jsonResp['has_more']
1609
+ if hasMore:
1610
+ write(colored('Get URLs from URLScan: ','cyan')+colored('UNKNOWN requests','white'))
1611
+ else:
1612
+ write(colored('Get URLs from URLScan: ','cyan')+colored('1 request','white'))
1613
+ except:
1614
+ pass
1571
1615
  checkURLScan = 1
1616
+
1572
1617
  else:
1573
1618
  # Carry on if something was found
1574
1619
  if int(totalUrls) > 0:
@@ -1714,6 +1759,7 @@ def processWayBackPage(url):
1714
1759
  if not stopSource:
1715
1760
  try:
1716
1761
  # Choose a random user agent string to use for any requests
1762
+ resp = None
1717
1763
  userAgent = random.choice(USER_AGENT)
1718
1764
  page = url.split('page=')[1]
1719
1765
  session = requests.Session()
@@ -1785,8 +1831,11 @@ def processWayBackPage(url):
1785
1831
  results = line.decode("utf-8")
1786
1832
  foundUrl = fixArchiveOrgUrl(str(results).split(' ')[1])
1787
1833
 
1788
- # Check the URL exclusions
1789
- match = re.search(r'('+re.escape(FILTER_URL).replace(',','|')+')', foundUrl, flags=re.IGNORECASE)
1834
+ # If --filter-responses-only wasn't used, then check the URL exclusions
1835
+ if args.filter_responses_only:
1836
+ match = None
1837
+ else:
1838
+ match = re.search(r'('+re.escape(FILTER_URL).replace(',','|')+')', foundUrl, flags=re.IGNORECASE)
1790
1839
  if match is None:
1791
1840
  # Only get MIME Types if --verbose option was selected
1792
1841
  if verbose():
@@ -2300,29 +2349,33 @@ def getVirusTotalUrls():
2300
2349
  return
2301
2350
 
2302
2351
  # Get the JSON response
2303
- jsonResp = json.loads(resp.text.strip())
2352
+ try:
2353
+ jsonResp = json.loads(resp.text.strip())
2304
2354
 
2305
- # Get the different URLs
2306
- if args.no_subs:
2307
- subDomains = []
2308
- else:
2355
+ # Get the different URLs
2356
+ if args.no_subs:
2357
+ subDomains = []
2358
+ else:
2359
+ try:
2360
+ subDomains = jsonResp['subdomains']
2361
+ except Exception as e:
2362
+ subDomains = []
2363
+ try:
2364
+ detectedUrls = [entry['url'] for entry in jsonResp.get('detected_urls', [])]
2365
+ except Exception as e:
2366
+ detectedUrls = []
2309
2367
  try:
2310
- subDomains = jsonResp['subdomains']
2368
+ undetectedUrls = [entry[0] for entry in jsonResp.get('undetected_urls', [])]
2311
2369
  except Exception as e:
2312
- subDomains = []
2313
- try:
2314
- detectedUrls = [entry['url'] for entry in jsonResp.get('detected_urls', [])]
2315
- except Exception as e:
2316
- detectedUrls = []
2317
- try:
2318
- undetectedUrls = [entry[0] for entry in jsonResp.get('undetected_urls', [])]
2319
- except Exception as e:
2320
- undetectedUrls = []
2321
- try:
2322
- totalUrls = set(subDomains + detectedUrls + undetectedUrls)
2323
- except Exception as e:
2370
+ undetectedUrls = []
2371
+ try:
2372
+ totalUrls = set(subDomains + detectedUrls + undetectedUrls)
2373
+ except Exception as e:
2374
+ totalUrls = []
2375
+ except:
2376
+ writerr(colored(getSPACER('[ ERR ] There was an unexpected response from the VirusTotal API'),'red'))
2324
2377
  totalUrls = []
2325
-
2378
+
2326
2379
  if args.check_only:
2327
2380
  write(colored('Get URLs from VirusTotal: ','cyan')+colored('1 request','white'))
2328
2381
  checkVirusTotal = 1
@@ -2347,7 +2400,7 @@ def getVirusTotalUrls():
2347
2400
 
2348
2401
  except Exception as e:
2349
2402
  writerr(colored('ERROR getVirusTotalUrls 1: ' + str(e), 'red'))
2350
-
2403
+
2351
2404
  def processResponses():
2352
2405
  """
2353
2406
  Get archived responses from Wayback Machine (archive.org)
@@ -2513,13 +2566,14 @@ def processResponses():
2513
2566
  except:
2514
2567
  pass
2515
2568
 
2516
- # Go through the response to save the links found
2569
+ # Go through the response to save the links found
2517
2570
  for line in resp.iter_lines():
2518
2571
  try:
2519
2572
  results = line.decode("utf-8")
2520
- timestamp = results.split(' ')[0]
2521
- originalUrl = results.split(' ')[1]
2522
- linksFoundAdd(timestamp+'/'+originalUrl)
2573
+ parts = results.split(' ', 2)
2574
+ timestamp = parts[0]
2575
+ originalUrl = parts[1]
2576
+ linksFoundResponseAdd(timestamp+'/'+originalUrl)
2523
2577
  except Exception as e:
2524
2578
  writerr(colored(getSPACER('ERROR processResponses 3: Cannot to get link from line: '+str(line)), 'red'))
2525
2579
 
@@ -2540,6 +2594,16 @@ def processResponses():
2540
2594
 
2541
2595
  # Get the total number of responses we will try to get and set the current file count to the success count
2542
2596
  totalResponses = len(linkRequests)
2597
+
2598
+ # If there are no reponses to download, diaplay an error and exit
2599
+ if totalResponses == 0:
2600
+ try:
2601
+ if originalUrl:
2602
+ writerr(colored(getSPACER('Failed to get links from Wayback Machine (archive.org) - there were results (e.g. "'+originalUrl+'") but they didn\'t match the input you gave. Check input and try again.'), 'red'))
2603
+ except:
2604
+ writerr(colored(getSPACER('Failed to get links from Wayback Machine (archive.org) - check input and try again.'), 'red'))
2605
+ return
2606
+
2543
2607
  fileCount = successCount
2544
2608
 
2545
2609
  if args.check_only:
@@ -1,12 +1,11 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: waymore
3
- Version: 4.3
3
+ Version: 4.5
4
4
  Summary: Find way more from the Wayback Machine, Common Crawl, Alien Vault OTX, URLScan & VirusTotal!
5
5
  Home-page: https://github.com/xnl-h4ck3r/waymore
6
6
  Author: @xnl-h4ck3r
7
7
  Description-Content-Type: text/markdown
8
8
  License-File: LICENSE
9
- Requires-Dist: argparse
10
9
  Requires-Dist: requests
11
10
  Requires-Dist: pyyaml
12
11
  Requires-Dist: termcolor
@@ -16,7 +15,7 @@ Requires-Dist: tldextract
16
15
 
17
16
  <center><img src="https://github.com/xnl-h4ck3r/waymore/blob/main/waymore/images/title.png"></center>
18
17
 
19
- ## About - v4.3
18
+ ## About - v4.5
20
19
 
21
20
  The idea behind **waymore** is to find even more links from the Wayback Machine than other existing tools.
22
21
 
@@ -0,0 +1,8 @@
1
+ waymore/__init__.py,sha256=HpBSj4W3_snlRrPgOuCuVP107OOZenaFQECvPnsC9V4,17
2
+ waymore/waymore.py,sha256=kfGA3T_cDADuhZ_78Ta22fxnqlGVUm56yvIncEfnZDs,170779
3
+ waymore-4.5.dist-info/LICENSE,sha256=o_jq62xZ1YxI8tqzQKbNtqr3RW2i5sh0rk6ixCJEroU,1068
4
+ waymore-4.5.dist-info/METADATA,sha256=v57_NUSUTSGqA1fZQb9UQgFUoOIDcc9AQeDZbRyL7kk,47221
5
+ waymore-4.5.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
6
+ waymore-4.5.dist-info/entry_points.txt,sha256=YHy5EUf3r_7OTkt9jvylLjNeg7Z5yvIVm5RUAyfNcN4,49
7
+ waymore-4.5.dist-info/top_level.txt,sha256=RFTphkWaRu1N7lUWIPUjabgCPQ3ETmNllF7qze4JJ_s,8
8
+ waymore-4.5.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.43.0)
2
+ Generator: setuptools (70.2.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,8 +0,0 @@
1
- waymore/__init__.py,sha256=-TFFyw9iukscHpq2I58oEz2oLQ995GbajzvC6Iz9ddM,17
2
- waymore/waymore.py,sha256=SWTqBUa-btDe6cWjRcL3w-ef1uK45LpfztCgvgtQPSM,168145
3
- waymore-4.3.dist-info/LICENSE,sha256=o_jq62xZ1YxI8tqzQKbNtqr3RW2i5sh0rk6ixCJEroU,1068
4
- waymore-4.3.dist-info/METADATA,sha256=q7_uq3p1kLMMARqUWTnA33rhxUluolsyAykMv7Ot598,47245
5
- waymore-4.3.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
6
- waymore-4.3.dist-info/entry_points.txt,sha256=YHy5EUf3r_7OTkt9jvylLjNeg7Z5yvIVm5RUAyfNcN4,49
7
- waymore-4.3.dist-info/top_level.txt,sha256=RFTphkWaRu1N7lUWIPUjabgCPQ3ETmNllF7qze4JJ_s,8
8
- waymore-4.3.dist-info/RECORD,,
File without changes