waymore 4.1__tar.gz → 4.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: waymore
3
- Version: 4.1
3
+ Version: 4.2
4
4
  Summary: Find way more from the Wayback Machine, Common Crawl, Alien Vault OTX, URLScan & VirusTotal!
5
5
  Home-page: https://github.com/xnl-h4ck3r/waymore
6
6
  Author: @xnl-h4ck3r
@@ -16,7 +16,7 @@ Requires-Dist: tldextract
16
16
 
17
17
  <center><img src="https://github.com/xnl-h4ck3r/waymore/blob/main/waymore/images/title.png"></center>
18
18
 
19
- ## About - v4.1
19
+ ## About - v4.2
20
20
 
21
21
  The idea behind **waymore** is to find even more links from the Wayback Machine than other existing tools.
22
22
 
@@ -1,6 +1,6 @@
1
1
  <center><img src="https://github.com/xnl-h4ck3r/waymore/blob/main/waymore/images/title.png"></center>
2
2
 
3
- ## About - v4.1
3
+ ## About - v4.2
4
4
 
5
5
  The idea behind **waymore** is to find even more links from the Wayback Machine than other existing tools.
6
6
 
@@ -0,0 +1 @@
1
+ __version__="4.2"
@@ -1780,23 +1780,31 @@ def processWayBackPage(url):
1780
1780
  return
1781
1781
 
1782
1782
  # Get the URLs and MIME types. Each line is a separate JSON string
1783
- for line in resp.iter_lines():
1784
- results = line.decode("utf-8")
1785
- # Only get MIME Types if --verbose option was selected
1786
- if verbose():
1787
- try:
1788
- linkMimes.add(str(results).split(' ')[2])
1789
- except Exception as e:
1790
- if verbose():
1791
- writerr(colored(getSPACER('ERROR processWayBackPage 2: Cannot get MIME type from line: ' + str(line)),'red'))
1792
- write(resp.text)
1793
- try:
1783
+ try:
1784
+ for line in resp.iter_lines():
1785
+ results = line.decode("utf-8")
1794
1786
  foundUrl = fixArchiveOrgUrl(str(results).split(' ')[1])
1795
- linksFoundAdd(foundUrl)
1796
- except Exception as e:
1797
- if verbose():
1798
- writerr(colored(getSPACER('ERROR processWayBackPage 3: Cannot get link from line: ' + str(line)),'red'))
1799
- write(resp.text)
1787
+
1788
+ # Check the URL exclusions
1789
+ match = re.search(r'('+re.escape(FILTER_URL).replace(',','|')+')', foundUrl, flags=re.IGNORECASE)
1790
+ if match is None:
1791
+ # Only get MIME Types if --verbose option was selected
1792
+ if verbose():
1793
+ try:
1794
+ linkMimes.add(str(results).split(' ')[2])
1795
+ except Exception as e:
1796
+ if verbose():
1797
+ writerr(colored(getSPACER('ERROR processWayBackPage 2: Cannot get MIME type from line: ' + str(line)),'red'))
1798
+ write(resp.text)
1799
+ try:
1800
+ linksFoundAdd(foundUrl)
1801
+ except Exception as e:
1802
+ if verbose():
1803
+ writerr(colored(getSPACER('ERROR processWayBackPage 3: Cannot get link from line: ' + str(line)),'red'))
1804
+ write(resp.text)
1805
+ except Exception as e:
1806
+ if verbose():
1807
+ writerr(colored(getSPACER('ERROR processWayBackPage 4: ' + str(line)),'red'))
1800
1808
  else:
1801
1809
  pass
1802
1810
  except Exception as e:
@@ -2422,12 +2430,12 @@ def processResponses():
2422
2430
  # This is useful for filtering out captures that are 'too dense' or when looking for unique captures."
2423
2431
  if args.capture_interval == 'none': # get all
2424
2432
  collapse = ''
2425
- elif args.capture_interval == 'h': # get at most 1 capture per hour
2426
- collapse = 'timestamp:10'
2427
- elif args.capture_interval == 'd': # get at most 1 capture per day
2428
- collapse = 'timestamp:8'
2429
- elif args.capture_interval == 'm': # get at most 1 capture per month
2430
- collapse = 'timestamp:6'
2433
+ elif args.capture_interval == 'h': # get at most 1 capture per URL per hour
2434
+ collapse = 'timestamp:10,original'
2435
+ elif args.capture_interval == 'd': # get at most 1 capture per URL per day
2436
+ collapse = 'timestamp:8,original'
2437
+ elif args.capture_interval == 'm': # get at most 1 capture per URL per month
2438
+ collapse = 'timestamp:6,original'
2431
2439
 
2432
2440
  url = WAYBACK_URL.replace('{DOMAIN}',subs + quote(argsInput) + path).replace('{COLLAPSE}',collapse) + filterMIME + filterCode + filterLimit + filterFrom + filterTo + filterKeywords
2433
2441
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: waymore
3
- Version: 4.1
3
+ Version: 4.2
4
4
  Summary: Find way more from the Wayback Machine, Common Crawl, Alien Vault OTX, URLScan & VirusTotal!
5
5
  Home-page: https://github.com/xnl-h4ck3r/waymore
6
6
  Author: @xnl-h4ck3r
@@ -16,7 +16,7 @@ Requires-Dist: tldextract
16
16
 
17
17
  <center><img src="https://github.com/xnl-h4ck3r/waymore/blob/main/waymore/images/title.png"></center>
18
18
 
19
- ## About - v4.1
19
+ ## About - v4.2
20
20
 
21
21
  The idea behind **waymore** is to find even more links from the Wayback Machine than other existing tools.
22
22
 
@@ -1 +0,0 @@
1
- __version__="4.1"
File without changes
File without changes
File without changes