waymore 4.3__tar.gz → 4.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,11 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: waymore
3
- Version: 4.3
3
+ Version: 4.4
4
4
  Summary: Find way more from the Wayback Machine, Common Crawl, Alien Vault OTX, URLScan & VirusTotal!
5
5
  Home-page: https://github.com/xnl-h4ck3r/waymore
6
6
  Author: @xnl-h4ck3r
7
7
  Description-Content-Type: text/markdown
8
8
  License-File: LICENSE
9
- Requires-Dist: argparse
10
9
  Requires-Dist: requests
11
10
  Requires-Dist: pyyaml
12
11
  Requires-Dist: termcolor
@@ -16,7 +15,7 @@ Requires-Dist: tldextract
16
15
 
17
16
  <center><img src="https://github.com/xnl-h4ck3r/waymore/blob/main/waymore/images/title.png"></center>
18
17
 
19
- ## About - v4.3
18
+ ## About - v4.4
20
19
 
21
20
  The idea behind **waymore** is to find even more links from the Wayback Machine than other existing tools.
22
21
 
@@ -1,6 +1,6 @@
1
1
  <center><img src="https://github.com/xnl-h4ck3r/waymore/blob/main/waymore/images/title.png"></center>
2
2
 
3
- ## About - v4.3
3
+ ## About - v4.4
4
4
 
5
5
  The idea behind **waymore** is to find even more links from the Wayback Machine than other existing tools.
6
6
 
@@ -34,7 +34,7 @@ setup(
34
34
  author="@xnl-h4ck3r",
35
35
  url="https://github.com/xnl-h4ck3r/waymore",
36
36
  py_modules=["waymore"],
37
- install_requires=["argparse","requests","pyyaml","termcolor","psutil","urlparse3","tldextract"],
37
+ install_requires=["requests","pyyaml","termcolor","psutil","urlparse3","tldextract"],
38
38
  entry_points={
39
39
  'console_scripts': [
40
40
  'waymore = waymore.waymore:main',
@@ -0,0 +1 @@
1
+ __version__="4.4"
@@ -706,23 +706,55 @@ def fixArchiveOrgUrl(url):
706
706
  url = url[0:newline]
707
707
  return url
708
708
 
709
+ # Add a link to the linksFound collection for archived responses (included timestamp preifx)
710
+ def linksFoundResponseAdd(link):
711
+ global linksFound, argsInput, argsInputHostname
712
+
713
+ try:
714
+ if inputIsDomainANDPath:
715
+ checkInput = argsInput
716
+ else:
717
+ checkInput = argsInputHostname
718
+
719
+ # Remove the timestamp
720
+ linkWithoutTimestamp = link.split('/', 1)[-1]
721
+
722
+ # If the link specifies port 80 or 443, e.g. http://example.com:80, then remove the port
723
+ parsed = urlparse(linkWithoutTimestamp.strip())
724
+ if parsed.port in (80, 443):
725
+ new_netloc = parsed.hostname
726
+ parsed_url = parsed._replace(netloc=new_netloc).geturl()
727
+ else:
728
+ parsed_url = linkWithoutTimestamp
729
+
730
+ # Don't write it if the link does not contain the requested domain (this can sometimes happen)
731
+ if parsed_url.find(checkInput) >= 0:
732
+ linksFound.add(link)
733
+ except Exception as e:
734
+ linksFound.add(link)
735
+
709
736
  # Add a link to the linksFound collection
710
737
  def linksFoundAdd(link):
711
738
  global linksFound, argsInput, argsInputHostname
712
- # If the link specifies port 80 or 443, e.g. http://example.com:80, then remove the port
739
+
713
740
  try:
714
741
  if inputIsDomainANDPath:
715
742
  checkInput = argsInput
716
743
  else:
717
744
  checkInput = argsInputHostname
745
+
746
+ # If the link specifies port 80 or 443, e.g. http://example.com:80, then remove the port
747
+ parsed = urlparse(link.strip())
748
+ if parsed.port in (80, 443):
749
+ new_netloc = parsed.hostname
750
+ parsed_url = parsed._replace(netloc=new_netloc).geturl()
751
+ else:
752
+ parsed_url = link
753
+
718
754
  # Don't write it if the link does not contain the requested domain (this can sometimes happen)
719
- if link.find(checkInput) >= 0:
720
- parsed = urlparse(link.strip())
721
- if parsed.netloc.find(':80') >= 0 or parsed.netloc.fnd(':443') >= 0:
722
- newNetloc = parsed.netloc.split(':')[0]
723
- parsed = parsed._replace(netloc=newNetloc).geturl()
724
- linksFound.add(parsed)
725
- except:
755
+ if parsed_url.find(checkInput) >= 0:
756
+ linksFound.add(link)
757
+ except Exception as e:
726
758
  linksFound.add(link)
727
759
 
728
760
  def processArchiveUrl(url):
@@ -2347,7 +2379,7 @@ def getVirusTotalUrls():
2347
2379
 
2348
2380
  except Exception as e:
2349
2381
  writerr(colored('ERROR getVirusTotalUrls 1: ' + str(e), 'red'))
2350
-
2382
+
2351
2383
  def processResponses():
2352
2384
  """
2353
2385
  Get archived responses from Wayback Machine (archive.org)
@@ -2513,13 +2545,14 @@ def processResponses():
2513
2545
  except:
2514
2546
  pass
2515
2547
 
2516
- # Go through the response to save the links found
2548
+ # Go through the response to save the links found
2517
2549
  for line in resp.iter_lines():
2518
2550
  try:
2519
2551
  results = line.decode("utf-8")
2520
- timestamp = results.split(' ')[0]
2521
- originalUrl = results.split(' ')[1]
2522
- linksFoundAdd(timestamp+'/'+originalUrl)
2552
+ parts = results.split(' ', 2)
2553
+ timestamp = parts[0]
2554
+ originalUrl = parts[1]
2555
+ linksFoundResponseAdd(timestamp+'/'+originalUrl)
2523
2556
  except Exception as e:
2524
2557
  writerr(colored(getSPACER('ERROR processResponses 3: Cannot to get link from line: '+str(line)), 'red'))
2525
2558
 
@@ -2540,6 +2573,16 @@ def processResponses():
2540
2573
 
2541
2574
  # Get the total number of responses we will try to get and set the current file count to the success count
2542
2575
  totalResponses = len(linkRequests)
2576
+
2577
+ # If there are no reponses to download, diaplay an error and exit
2578
+ if totalResponses == 0:
2579
+ try:
2580
+ if originalUrl:
2581
+ writerr(colored(getSPACER('Failed to get links from Wayback Machine (archive.org) - there were results (e.g. "'+originalUrl+'") but they didn\'t match the input you gave. Check input and try again.'), 'red'))
2582
+ except:
2583
+ writerr(colored(getSPACER('Failed to get links from Wayback Machine (archive.org) - check input and try again.'), 'red'))
2584
+ return
2585
+
2543
2586
  fileCount = successCount
2544
2587
 
2545
2588
  if args.check_only:
@@ -1,12 +1,11 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: waymore
3
- Version: 4.3
3
+ Version: 4.4
4
4
  Summary: Find way more from the Wayback Machine, Common Crawl, Alien Vault OTX, URLScan & VirusTotal!
5
5
  Home-page: https://github.com/xnl-h4ck3r/waymore
6
6
  Author: @xnl-h4ck3r
7
7
  Description-Content-Type: text/markdown
8
8
  License-File: LICENSE
9
- Requires-Dist: argparse
10
9
  Requires-Dist: requests
11
10
  Requires-Dist: pyyaml
12
11
  Requires-Dist: termcolor
@@ -16,7 +15,7 @@ Requires-Dist: tldextract
16
15
 
17
16
  <center><img src="https://github.com/xnl-h4ck3r/waymore/blob/main/waymore/images/title.png"></center>
18
17
 
19
- ## About - v4.3
18
+ ## About - v4.4
20
19
 
21
20
  The idea behind **waymore** is to find even more links from the Wayback Machine than other existing tools.
22
21
 
@@ -1,4 +1,3 @@
1
- argparse
2
1
  requests
3
2
  pyyaml
4
3
  termcolor
@@ -1 +0,0 @@
1
- __version__="4.3"
File without changes
File without changes