waymore 7.4__py3-none-any.whl → 7.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
waymore/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "7.4"
1
+ __version__ = "7.5"
waymore/waymore.py CHANGED
@@ -19,6 +19,7 @@ import threading
19
19
  from datetime import datetime, timedelta
20
20
  from pathlib import Path
21
21
  from signal import SIGINT, signal
22
+ from typing import Optional
22
23
  from urllib.parse import urlparse
23
24
 
24
25
  import requests
@@ -129,9 +130,64 @@ ALIENVAULT_URL = "https://otx.alienvault.com/api/v1/indicators/{TYPE}/{DOMAIN}/u
129
130
  URLSCAN_URL = "https://urlscan.io/api/v1/search/?q=domain:{DOMAIN}{DATERANGE}&size=10000"
130
131
  URLSCAN_DOM_URL = "https://urlscan.io/dom/"
131
132
  VIRUSTOTAL_URL = "https://www.virustotal.com/vtapi/v2/domain/report?apikey={APIKEY}&domain={DOMAIN}"
132
- INTELX_SEARCH_URL = "https://2.intelx.io/phonebook/search"
133
- INTELX_RESULTS_URL = "https://2.intelx.io/phonebook/search/result?id="
134
- INTELX_ACCOUNT_URL = "https://2.intelx.io/authenticate/info"
133
+ # Paid endpoint first, free endpoint as fallback
134
+ INTELX_BASES = ["https://2.intelx.io", "https://free.intelx.io"]
135
+
136
+ intelx_tls = threading.local()
137
+
138
+
139
+ def initIntelxTls():
140
+ """Initialize thread-local storage for IntelX if not already done."""
141
+ if not hasattr(intelx_tls, "INTELX_BASE"):
142
+ intelx_tls.INTELX_BASE = INTELX_BASES[0]
143
+ intelx_tls.INTELX_SEARCH_URL = f"{intelx_tls.INTELX_BASE}/phonebook/search"
144
+ intelx_tls.INTELX_RESULTS_URL = f"{intelx_tls.INTELX_BASE}/phonebook/search/result?id="
145
+ intelx_tls.INTELX_ACCOUNT_URL = f"{intelx_tls.INTELX_BASE}/authenticate/info"
146
+
147
+
148
+ def setIntelxBase(base: str):
149
+ """Update IntelX URLs to use the provided base (thread-local)."""
150
+ initIntelxTls()
151
+ intelx_tls.INTELX_BASE = base
152
+ intelx_tls.INTELX_SEARCH_URL = f"{intelx_tls.INTELX_BASE}/phonebook/search"
153
+ intelx_tls.INTELX_RESULTS_URL = f"{intelx_tls.INTELX_BASE}/phonebook/search/result?id="
154
+ intelx_tls.INTELX_ACCOUNT_URL = f"{intelx_tls.INTELX_BASE}/authenticate/info"
155
+
156
+
157
+ def chooseIntelxBase(api_key: str) -> Optional[requests.Response]:
158
+ """
159
+ Probe IntelX endpoints in order (paid, then free) and set the first that works.
160
+ Returns the last response (or None) so callers can inspect status/JSON.
161
+ """
162
+ initIntelxTls()
163
+ try:
164
+ session = requests.Session()
165
+ session.mount("https://", HTTP_ADAPTER)
166
+ session.mount("http://", HTTP_ADAPTER)
167
+ last_resp = None
168
+ for base in INTELX_BASES:
169
+ userAgent = random.choice(USER_AGENT)
170
+ try:
171
+ resp = session.get(
172
+ f"{base}/authenticate/info",
173
+ headers={"User-Agent": userAgent, "X-Key": api_key},
174
+ )
175
+ last_resp = resp
176
+ if resp.status_code == 200:
177
+ setIntelxBase(base)
178
+ return resp
179
+ if resp.status_code in [401, 403]:
180
+ # Try next base
181
+ continue
182
+ except Exception as e:
183
+ writerr(colored(f"IntelX - [ ERR ] Problem probing {base}: {e}", "red"))
184
+ # For other codes or exceptions, try next base anyway instead of breaking prematurely
185
+ continue
186
+ return last_resp
187
+ except Exception as e:
188
+ writerr(colored(f"IntelX - [ ERR ] Unexpected error in chooseIntelxBase: {e}", "red"))
189
+ return None
190
+
135
191
 
136
192
  # User Agents to use when making requests, chosen at random
137
193
  USER_AGENT = [
@@ -559,7 +615,7 @@ def showOptions():
559
615
  write(
560
616
  colored("Intelligence X API Key:", "magenta")
561
617
  + colored(
562
- " {none} - You require a paid API Key from https://intelx.io/product",
618
+ " {none} - You require a Academia or Paid API Key from https://intelx.io/product",
563
619
  "white",
564
620
  )
565
621
  )
@@ -4600,7 +4656,10 @@ def processIntelxUrl(url):
4600
4656
 
4601
4657
  # Add link if it passed filters
4602
4658
  if addLink:
4603
- linksFoundAdd(url, linksFoundIntelx)
4659
+ # Clean the link to remove any █ (\u2588) characters from the link. These can be present in the IntelX results when the Academia plan is used
4660
+ url = url.replace("\u2588", "").strip()
4661
+ if url != "":
4662
+ linksFoundAdd(url, linksFoundIntelx)
4604
4663
 
4605
4664
  except Exception as e:
4606
4665
  writerr(colored("ERROR processIntelxUrl 1: " + str(e), "red"))
@@ -4611,86 +4670,107 @@ def processIntelxType(target, credits):
4611
4670
  target: 1 - Domains
4612
4671
  target: 3 - URLs
4613
4672
  """
4673
+ initIntelxTls()
4614
4674
  global intelxAPIIssue
4615
4675
  try:
4616
- try:
4617
- requestsMade = 0
4676
+ attempts = 0
4677
+ resp = None
4678
+ # Choose a random user agent string to use for any requests and reuse session
4679
+ userAgent = random.choice(USER_AGENT)
4680
+ session = requests.Session()
4681
+ session.mount("https://", HTTP_ADAPTER)
4682
+ session.mount("http://", HTTP_ADAPTER)
4618
4683
 
4619
- # Choose a random user agent string to use for any requests
4620
- userAgent = random.choice(USER_AGENT)
4621
- session = requests.Session()
4622
- session.mount("https://", HTTP_ADAPTER)
4623
- session.mount("http://", HTTP_ADAPTER)
4624
- # Pass the API key in the X-Key header too.
4625
- resp = session.post(
4626
- INTELX_SEARCH_URL,
4627
- data='{"term":"' + quote(argsInputHostname) + '","target":' + str(target) + "}",
4628
- headers={"User-Agent": userAgent, "X-Key": INTELX_API_KEY},
4629
- )
4630
- requestsMade = requestsMade + 1
4631
- except Exception as e:
4632
- write(
4633
- colored(
4634
- "IntelX - [ ERR ] Unable to get links from intelx.io: " + str(e),
4635
- "red",
4684
+ while attempts < 2:
4685
+ attempts += 1
4686
+ try:
4687
+ requestsMade = 0
4688
+ # Pass the API key in the X-Key header too.
4689
+ resp = session.post(
4690
+ intelx_tls.INTELX_SEARCH_URL,
4691
+ data='{"term":"' + quote(argsInputHostname) + '","target":' + str(target) + "}",
4692
+ headers={"User-Agent": userAgent, "X-Key": INTELX_API_KEY},
4636
4693
  )
4637
- )
4638
- return
4694
+ requestsMade = requestsMade + 1
4695
+ except Exception as e:
4696
+ write(
4697
+ colored(
4698
+ "IntelX - [ ERR ] Unable to get links from intelx.io: " + str(e),
4699
+ "red",
4700
+ )
4701
+ )
4702
+ return
4639
4703
 
4640
- # Deal with any errors
4641
- if resp.status_code == 429:
4642
- intelxAPIIssue = True
4643
- writerr(
4644
- colored(
4645
- "IntelX - [ 429 ] Rate limit reached so unable to get links.",
4646
- "red",
4704
+ # Deal with any errors
4705
+ if resp.status_code == 200:
4706
+ break
4707
+ elif resp.status_code == 429:
4708
+ intelxAPIIssue = True
4709
+ writerr(
4710
+ colored(
4711
+ "IntelX - [ 429 ] Rate limit reached so unable to get links.",
4712
+ "red",
4713
+ )
4647
4714
  )
4648
- )
4649
- return
4650
- elif resp.status_code == 401:
4651
- intelxAPIIssue = True
4652
- writerr(
4653
- colored(
4654
- "IntelX - [ 401 ] Not authorized. The source requires a paid API key. Check your API key is correct.",
4655
- "red",
4715
+ return
4716
+ elif resp.status_code == 401:
4717
+ # Retry with free endpoint if paid endpoint was used and auth failed
4718
+ if intelx_tls.INTELX_BASE != INTELX_BASES[-1]:
4719
+ setIntelxBase(INTELX_BASES[-1])
4720
+ continue
4721
+ intelxAPIIssue = True
4722
+ writerr(
4723
+ colored(
4724
+ "IntelX - [ 401 ] Not authorized. Check your API key is correct.",
4725
+ "red",
4726
+ )
4656
4727
  )
4657
- )
4658
- return
4659
- elif resp.status_code == 402:
4660
- intelxAPIIssue = True
4661
- if credits.startswith("0/"):
4728
+ return
4729
+ elif resp.status_code == 402:
4730
+ # If we were on paid, fall back to free and retry once
4731
+ if intelx_tls.INTELX_BASE != INTELX_BASES[-1]:
4732
+ setIntelxBase(INTELX_BASES[-1])
4733
+ continue
4734
+ intelxAPIIssue = True
4735
+ if credits.startswith("0/"):
4736
+ writerr(
4737
+ colored(
4738
+ "IntelX - [ 402 ] You have run out of daily credits on Intelx ("
4739
+ + credits
4740
+ + ").",
4741
+ "red",
4742
+ )
4743
+ )
4744
+ else:
4745
+ writerr(
4746
+ colored(
4747
+ "IntelX - [ 402 ] It appears you have run out of daily credits on Intelx.",
4748
+ "red",
4749
+ )
4750
+ )
4751
+ return
4752
+ elif resp.status_code == 403:
4753
+ intelxAPIIssue = True
4662
4754
  writerr(
4663
4755
  colored(
4664
- "IntelX - [ 402 ] You have run out of daily credits on Intelx ("
4665
- + credits
4666
- + ").",
4756
+ "IntelX - [ 403 ] Permission denied. Check your API key is correct.",
4667
4757
  "red",
4668
4758
  )
4669
4759
  )
4760
+ return
4670
4761
  else:
4671
4762
  writerr(
4672
4763
  colored(
4673
- "IntelX - [ 402 ] It appears you have run out of daily credits on Intelx.",
4764
+ "IntelX - [ "
4765
+ + str(resp.status_code)
4766
+ + " ] Unable to get links from intelx.io",
4674
4767
  "red",
4675
4768
  )
4676
4769
  )
4677
- return
4678
- elif resp.status_code == 403:
4679
- intelxAPIIssue = True
4680
- writerr(
4681
- colored(
4682
- "IntelX - [ 403 ] Permission denied. Check your API key is correct.",
4683
- "red",
4684
- )
4685
- )
4686
- return
4687
- elif resp.status_code != 200:
4688
- writerr(
4689
- colored(
4690
- "IntelX - [ " + str(resp.status_code) + " ] Unable to get links from intelx.io",
4691
- "red",
4692
- )
4693
- )
4770
+ return
4771
+
4772
+ # Double check we have a valid response
4773
+ if resp is None or resp.status_code != 200:
4694
4774
  return
4695
4775
 
4696
4776
  # Get the JSON response
@@ -4714,7 +4794,7 @@ def processIntelxType(target, credits):
4714
4794
  break
4715
4795
  try:
4716
4796
  resp = session.get(
4717
- INTELX_RESULTS_URL + id,
4797
+ intelx_tls.INTELX_RESULTS_URL + id,
4718
4798
  headers={"User-Agent": userAgent, "X-Key": INTELX_API_KEY},
4719
4799
  )
4720
4800
  requestsMade = requestsMade + 1
@@ -4769,19 +4849,13 @@ def processIntelxType(target, credits):
4769
4849
 
4770
4850
  def getIntelxAccountInfo() -> str:
4771
4851
  """
4772
- Get the account info and return the number of Credits remainiing from the /phonebook/search
4852
+ Get the account info and return the number of Credits remaining from the /phonebook/search
4773
4853
  """
4854
+ initIntelxTls()
4774
4855
  try:
4775
- # Choose a random user agent string to use for any requests
4776
- userAgent = random.choice(USER_AGENT)
4777
- session = requests.Session()
4778
- session.mount("https://", HTTP_ADAPTER)
4779
- session.mount("http://", HTTP_ADAPTER)
4780
- # Pass the API key in the X-Key header too.
4781
- resp = session.get(
4782
- INTELX_ACCOUNT_URL,
4783
- headers={"User-Agent": userAgent, "X-Key": INTELX_API_KEY},
4784
- )
4856
+ resp = chooseIntelxBase(INTELX_API_KEY)
4857
+ if resp is None or resp.status_code != 200:
4858
+ return "Unknown"
4785
4859
  jsonResp = json.loads(resp.text.strip())
4786
4860
  credits = str(
4787
4861
  jsonResp.get("paths", {}).get("/phonebook/search", {}).get("Credit", "Unknown")
@@ -4812,6 +4886,7 @@ def getIntelxUrls():
4812
4886
 
4813
4887
  stopSourceIntelx = False
4814
4888
  linksFoundIntelx = set()
4889
+ initIntelxTls()
4815
4890
 
4816
4891
  credits = getIntelxAccountInfo()
4817
4892
  if verbose():
@@ -4822,7 +4897,7 @@ def getIntelxUrls():
4822
4897
  + "): ",
4823
4898
  "magenta",
4824
4899
  )
4825
- + colored(INTELX_SEARCH_URL + "\n", "white")
4900
+ + colored(intelx_tls.INTELX_SEARCH_URL + "\n", "white")
4826
4901
  )
4827
4902
 
4828
4903
  if not args.check_only:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: waymore
3
- Version: 7.4
3
+ Version: 7.5
4
4
  Summary: Find way more from the Wayback Machine, Common Crawl, Alien Vault OTX, URLScan, VirusTotal & Intelligence X!
5
5
  Home-page: https://github.com/xnl-h4ck3r/waymore
6
6
  Author: xnl-h4ck3r
@@ -21,12 +21,12 @@ Dynamic: license-file
21
21
 
22
22
  <center><img src="https://github.com/xnl-h4ck3r/waymore/blob/main/waymore/images/title.png"></center>
23
23
 
24
- ## About - v7.4
24
+ ## About - v7.5
25
25
 
26
- The idea behind **waymore** is to find even more links from the Wayback Machine than other existing tools.
26
+ The idea behind **waymore** is to find even more links from the Wayback Machine (plus other sources) than other existing tools.
27
27
 
28
- 👉 The biggest difference between **waymore** and other tools is that it can also **download the archived responses** for URLs on wayback machine so that you can then search these for even more links, developer comments, extra parameters, etc. etc.
29
- 👉 Also, other tools do not currenrtly deal with the rate limiting now in place by the sources, and will often just stop with incomplete results and not let you know they are incomplete.
28
+ 👉 The biggest difference between **waymore** and other tools is that it can also **download the archived responses** for URLs on wayback machine (and URLScan) so that you can then search these for even more links, developer comments, extra parameters, etc. etc.
29
+ 👉 Also, other tools do not currently deal with the rate limiting now in place by the sources, and will often just stop with incomplete results and not let you know they are incomplete.
30
30
 
31
31
  Anyone who does bug bounty will have likely used the amazing [waybackurls](https://github.com/tomnomnom/waybackurls) by @TomNomNoms. This tool gets URLs from [web.archive.org](https://web.archive.org) and additional links (if any) from one of the index collections on [index.commoncrawl.org](http://index.commoncrawl.org/).
32
32
  You would have also likely used the amazing [gau](https://github.com/lc/gau) by @hacker\_ which also finds URL's from wayback archive, Common Crawl, but also from Alien Vault, URLScan, Virus Total and Intelligence X.
@@ -37,7 +37,7 @@ Now **waymore** gets URL's from ALL of those sources too (with ability to filter
37
37
  - Alien Vault OTX (otx.alienvault.com)
38
38
  - URLScan (urlscan.io)
39
39
  - Virus Total (virustotal.com)
40
- - Intelligence X (intelx.io) - PAID SOURCE ONLY
40
+ - Intelligence X (intelx.io) - ACADEMIA OR PAID TIERS ONLY
41
41
 
42
42
  👉 It's a point that many seem to miss, so I'll just add it again :) ... The biggest difference between **waymore** and other tools is that it can also **download the archived responses** for URLs on wayback machine so that you can then search these for even more links, developer comments, extra parameters, etc. etc.
43
43
 
@@ -175,7 +175,7 @@ The `config.yml` file (typically in `~/.config/waymore/`) have values that can b
175
175
  - `TELEGRAM_BOT_TOKEN` - If the `--notify-telegram` argument is passed, `waymore` will use this token to send a notification to Telegram.
176
176
  - `TELEGRAM_CHAT_ID` - If the `--notify-telegram` argument is passed, `waymore` will send the notification to this chat ID.
177
177
  - `DEFAULT_OUTPUT_DIR` - This is the default location of any output files written if the `-oU` and `-oR` arguments are not used. If the value of this key is blank, then it will default to the location of the `config.yml` file.
178
- - `INTELX_API_KEY` - You can sign up to [intelx.io here](https://intelx.io/product). It requires a paid API key to do the `/phonebook/search` through their API (as of 2024-09-01, the Phonebook service has been restricted to paid users due to constant abuse by spam accounts).
178
+ - `INTELX_API_KEY` - You can sign up to [intelx.io here](https://intelx.io/product). It requires an academia or paid API key to do the `/phonebook/search` through their API (as of 2024-09-01, the Phonebook service has been restricted to academia or paid users due to constant abuse by spam accounts). You can get a free API key for academic use if you sign up with a valid academic email address.
179
179
 
180
180
  **NOTE: The MIME types cannot be filtered for Alien Vault OTX, Virus Total and Intelligence X because they don't have the ability to filter on MIME Type. Sometimes URLScan does not have a MIME Type defined for a URL. In these cases, URLs will be included regardless of filter or match. Bear this in mind and consider excluding certain providers if this is important.**
181
181
 
@@ -0,0 +1,8 @@
1
+ waymore/__init__.py,sha256=Bj0SHUhuftOvJsN9gwdGRcvQ8yI3Mk5ytfgq6DQVu74,20
2
+ waymore/waymore.py,sha256=4GnaBKY0bcDhHQsIUc258IG5IeuPolsP8OfwL90AUUQ,280938
3
+ waymore-7.5.dist-info/licenses/LICENSE,sha256=o_jq62xZ1YxI8tqzQKbNtqr3RW2i5sh0rk6ixCJEroU,1068
4
+ waymore-7.5.dist-info/METADATA,sha256=QAIem6CNbsbyY4lgbwQoy4-qVWFV-qsSOxs7OZRU__Y,52640
5
+ waymore-7.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
+ waymore-7.5.dist-info/entry_points.txt,sha256=YHy5EUf3r_7OTkt9jvylLjNeg7Z5yvIVm5RUAyfNcN4,49
7
+ waymore-7.5.dist-info/top_level.txt,sha256=RFTphkWaRu1N7lUWIPUjabgCPQ3ETmNllF7qze4JJ_s,8
8
+ waymore-7.5.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- waymore/__init__.py,sha256=ewDGl3zNlo7l0-Jn6yNrqHtDpC0IF0FjF6rbWzh5_YA,20
2
- waymore/waymore.py,sha256=GT4WU-jse7nbFcODS-Ss0jUdq0iGOSLo4K7JjS3iulY,277670
3
- waymore-7.4.dist-info/licenses/LICENSE,sha256=o_jq62xZ1YxI8tqzQKbNtqr3RW2i5sh0rk6ixCJEroU,1068
4
- waymore-7.4.dist-info/METADATA,sha256=-q0twpS_Y1Dag-gkPMyNSuM1dwkpdTYZ1U8CaDNNkkQ,52474
5
- waymore-7.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
- waymore-7.4.dist-info/entry_points.txt,sha256=YHy5EUf3r_7OTkt9jvylLjNeg7Z5yvIVm5RUAyfNcN4,49
7
- waymore-7.4.dist-info/top_level.txt,sha256=RFTphkWaRu1N7lUWIPUjabgCPQ3ETmNllF7qze4JJ_s,8
8
- waymore-7.4.dist-info/RECORD,,
File without changes