swiftshadow 1.0.2__tar.gz → 1.2.0__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,11 +1,11 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: swiftshadow
3
- Version: 1.0.2
3
+ Version: 1.2.0
4
4
  Summary: Free IP Proxy rotator for python
5
5
  Home-page: https://github.com/sachin-sankar/swiftshadow
6
6
  Author: Sachin Sankar
7
7
  Author-email: mail.sachinsankar@gmail.com
8
- Classifier: Development Status :: 2 - Pre-Alpha
8
+ Classifier: Development Status :: 5 - Production/Stable
9
9
  Description-Content-Type: text/markdown
10
10
  License-File: LICENSE
11
11
  Requires-Dist: requests
@@ -14,6 +14,9 @@ Requires-Dist: requests
14
14
 
15
15
  ![PyPI - Downloads](https://img.shields.io/pypi/dm/swiftshadow) ![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/sachin-sankar/swiftshadow?include_prereleases&style=flat)
16
16
 
17
+ > [!TIP]
18
+ > I'm refactoring the library for better speed and maintainability. Future updates might have breaking changes, but I'll keep you posted!
19
+
17
20
  ## About
18
21
 
19
22
  Swiftshadow is a powerful Python library designed to simplify the process of rotating IP proxies for web scraping, data mining, and other automated tasks. With its advanced features, Swiftshadow can help you overcome many of the challenges associated with web scraping, including blocked IP addresses and other forms of detection.
@@ -2,6 +2,9 @@
2
2
 
3
3
  ![PyPI - Downloads](https://img.shields.io/pypi/dm/swiftshadow) ![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/sachin-sankar/swiftshadow?include_prereleases&style=flat)
4
4
 
5
+ > [!TIP]
6
+ > I'm refactoring the library for better speed and maintainability. Future updates might have breaking changes, but I'll keep you posted!
7
+
5
8
  ## About
6
9
 
7
10
  Swiftshadow is a powerful Python library designed to simplify the process of rotating IP proxies for web scraping, data mining, and other automated tasks. With its advanced features, Swiftshadow can help you overcome many of the challenges associated with web scraping, including blocked IP addresses and other forms of detection.
@@ -19,10 +19,10 @@ setup(
19
19
  description="Free IP Proxy rotator for python",
20
20
  long_description=long_description,
21
21
  long_description_content_type="text/markdown",
22
- version="1.0.2",
22
+ version="1.2.0",
23
23
  packages=find_packages(where=".", exclude=["tests"]),
24
24
  install_requires=["requests"],
25
25
  classifiers=[
26
- "Development Status :: 2 - Pre-Alpha",
26
+ "Development Status :: 5 - Production/Stable",
27
27
  ],
28
28
  )
@@ -0,0 +1,25 @@
1
+ from swiftshadow.providers import Providers
2
+
3
+
4
+ def QuickProxy(countries: list = [], protocol: str = "http"):
5
+ """
6
+ This function is a faster alternative to `Proxy` class.
7
+ No caching is done.
8
+
9
+ Args:
10
+ countries: ISO 3166-2 Two letter country codes to filter proxies.
11
+ protocol: HTTP/HTTPS protocol to filter proxies.
12
+
13
+ Returns:
14
+ proxyObject (dict): A working proxy object.
15
+ """
16
+ for providerDict in Providers:
17
+ if protocol not in providerDict["protocols"]:
18
+ continue
19
+ if (len(countries) != 0) and (not providerDict["countryFilter"]):
20
+ continue
21
+ try:
22
+ return providerDict["provider"](1, countries, protocol)[0]
23
+ except:
24
+ continue
25
+ return None
@@ -1,5 +1,4 @@
1
1
  from datetime import datetime, timezone, timedelta
2
- from time import sleep
3
2
 
4
3
 
5
4
  def getExpiry(expiryIn):
@@ -2,9 +2,17 @@ from requests import get
2
2
  from random import choice
3
3
  from json import dump, load
4
4
  from swiftshadow.helpers import log
5
- from swiftshadow.providers import Proxyscrape, Scrapingant, Providers
5
+ from swiftshadow.providers import Providers
6
6
  import swiftshadow.cache as cache
7
- import os
7
+ import logging
8
+ import sys
9
+
10
+ logger = logging.getLogger("swiftshadow")
11
+ logger.setLevel(logging.INFO)
12
+ logFormat = logging.Formatter("%(asctime)s - %(name)s [%(levelname)s]:%(message)s")
13
+ streamhandler = logging.StreamHandler(stream=sys.stdout)
14
+ streamhandler.setFormatter(logFormat)
15
+ logger.addHandler(streamhandler)
8
16
 
9
17
 
10
18
  class Proxy:
@@ -16,6 +24,8 @@ class Proxy:
16
24
  autoRotate: bool = False,
17
25
  cachePeriod: int = 10,
18
26
  cacheFolder: str = "",
27
+ debug: bool = False,
28
+ logToFile: bool = False,
19
29
  ):
20
30
  """
21
31
  The one class for everything.
@@ -29,6 +39,8 @@ class Proxy:
29
39
  autoRotate: Rotates proxy when `Proxy.proxy()` function is called.
30
40
  cachePeriod: Time to cache proxies in minutes.
31
41
  cacheFolder: Folder to store cache file.
42
+ debug: Sets Log Level to Debug.
43
+ logToFile: Whether to pipe log to a log file. If cacheFolder is set log file is saved there.
32
44
 
33
45
  Returns:
34
46
  proxyClass (swiftshadow.Proxy): `swiftshadow.Proxy` class instance
@@ -51,22 +63,17 @@ class Proxy:
51
63
  self.cacheFilePath = ".swiftshadow.json"
52
64
  else:
53
65
  self.cacheFilePath = f"{cacheFolder}/.swiftshadow.json"
54
-
55
- self.update()
56
-
57
- def checkIp(self, ip, cc, protocol):
58
- if (ip[1] == cc or cc == None) and ip[2] == protocol:
59
- proxy = {ip[2]: ip[0]}
60
- try:
61
- oip = get(f"{protocol}://ipinfo.io/ip", proxies=proxy).text
62
- except:
63
- return False
64
- if oip.count(".") == 3 and oip != self.mip:
65
- return True
66
+ if debug:
67
+ logger.setLevel(logging.DEBUG)
68
+ if logToFile:
69
+ if cacheFolder == "":
70
+ logFilePath = "swiftshadow.log"
66
71
  else:
67
- return False
68
- else:
69
- return False
72
+ logFilePath = f"{cacheFolder}/swiftshadow.log"
73
+ fileHandler = logging.FileHandler(logFilePath)
74
+ fileHandler.setFormatter(logFormat)
75
+ logger.addHandler(fileHandler)
76
+ self.update()
70
77
 
71
78
  def update(self):
72
79
  try:
@@ -75,8 +82,7 @@ class Proxy:
75
82
  self.expiry = data[0]
76
83
  expired = cache.checkExpiry(self.expiry)
77
84
  if not expired:
78
- log(
79
- "info",
85
+ logger.info(
80
86
  "Loaded proxies from cache",
81
87
  )
82
88
  self.proxies = data[1]
@@ -84,22 +90,25 @@ class Proxy:
84
90
  self.current = self.proxies[0]
85
91
  return
86
92
  else:
87
- log(
88
- "info",
89
- "Cache expired. Updating cache...",
93
+ logger.info(
94
+ "Cache expired. Updating cache.",
90
95
  )
91
96
  except FileNotFoundError:
92
- log("error", "No cache found. Cache will be created after update")
97
+ logger.info("No cache found. Cache will be created after update")
93
98
 
94
99
  self.proxies = []
95
- self.proxies.extend(Proxyscrape(self.maxProxies, self.countries, self.protocol))
96
- if len(self.proxies) != self.maxProxies:
100
+ for providerDict in Providers:
101
+ if self.protocol not in providerDict["protocols"]:
102
+ continue
103
+ if (len(self.countries) != 0) and (not providerDict["countryFilter"]):
104
+ continue
97
105
  self.proxies.extend(
98
- Scrapingant(self.maxProxies, self.countries, self.protocol)
106
+ providerDict["provider"](self.maxProxies, self.countries, self.protocol)
99
107
  )
108
+ if len(self.proxies) >= self.maxProxies:
109
+ break
100
110
  if len(self.proxies) == 0:
101
- log(
102
- "warning",
111
+ logger.warning(
103
112
  "No proxies found for current settings. To prevent runtime error updating the proxy list again.",
104
113
  )
105
114
  self.update()
@@ -134,32 +143,3 @@ class Proxy:
134
143
  return choice(self.proxies)
135
144
  else:
136
145
  return self.current
137
-
138
-
139
- class ProxyChains:
140
- def __init__(
141
- self, countries: list = [], protocol: str = "http", maxProxies: int = 10
142
- ):
143
- self.countries = [i.upper() for i in countries]
144
- self.protocol = protocol
145
- self.maxProxies = maxProxies
146
- self.update()
147
-
148
- def update(self):
149
- proxies = []
150
- for provider in Providers:
151
- print(len(proxies))
152
- if len(proxies) == self.maxProxies:
153
- break
154
- log("INFO", f"{provider}")
155
- for proxyDict in provider(self.maxProxies, self.countries, self.protocol):
156
- proxyRaw = list(proxyDict.items())[0]
157
- proxy = f'{proxyRaw[0]} {proxyRaw[1].replace(":"," ")}'
158
- proxies.append(proxy)
159
- proxies = "\n".join(proxies)
160
- configFileName = "swiftshadow-proxychains.conf"
161
- config = f"random_chain\nchain_len=1\nproxy_dns\n[ProxyList]\n{proxies}"
162
- with open(configFileName, "w") as file:
163
- file.write(config)
164
- cmd = f"proxychains -f {os.path.abspath(configFileName)}"
165
- os.system(cmd)
@@ -0,0 +1,22 @@
1
+ from requests import get
2
+ from datetime import datetime
3
+
4
+
5
+ def checkProxy(proxy):
6
+ proxyDict = {proxy[1]: proxy[0]}
7
+ try:
8
+ resp = get(
9
+ f"{proxy[1]}://checkip.amazonaws.com", proxies=proxyDict, timeout=2
10
+ ).text
11
+ if resp.count(".") == 3:
12
+ return True
13
+ return False
14
+ except Exception as e:
15
+ return False
16
+
17
+
18
+ def log(level, message):
19
+ level = level.upper()
20
+ print(
21
+ f'{datetime.now().strftime("%d/%m/%Y %H:%M:%S")} - [swiftshadow] - {level} : {message}'
22
+ )
@@ -0,0 +1,57 @@
1
+ from requests import get
2
+ from swiftshadow.helpers import checkProxy
3
+
4
+ def Monosans(max, countries=[],protocol="http"):
5
+ raw = get('https://raw.githubusercontent.com/monosans/proxy-list/main/proxies.json').json()
6
+ results = []
7
+ count = 0
8
+ for proxy in raw:
9
+ if count == max:
10
+ return results
11
+ if proxy['protocol'] == protocol:
12
+ if len(countries) != 0 and proxy['geolocation']['country']['iso_code'] not in countries:
13
+ continue
14
+ proxy = [f'{proxy['host']}:{proxy['port']}',proxy['protocol']]
15
+ if checkProxy(proxy):
16
+ results.append(proxy)
17
+ count += 1
18
+ return results
19
+
20
+ def Thespeedx(max,countries=[],protocol='http'):
21
+ results = []
22
+ count =0
23
+ raw = get('https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt').text
24
+ for line in raw.splitlines():
25
+ if count == max:
26
+ break
27
+ proxy = [line,'http']
28
+ if checkProxy(proxy):
29
+ results.append(proxy)
30
+ print(proxy,True)
31
+ count +=1
32
+ else:
33
+ continue
34
+ return results
35
+
36
+ def ProxyScrape(max,countries=[],protocol='http'):
37
+ baseUrl = 'https://api.proxyscrape.com/v3/free-proxy-list/get?request=displayproxies&protocol=http&proxy_format=ipport&format=json'
38
+ results = []
39
+ count = 0
40
+ if len(countries) == 0:
41
+ apiUrl = baseUrl + '&country=all'
42
+ else:
43
+ apiUrl = baseUrl + '&country=' + ','.join([i.upper() for i in countries])
44
+ raw = get(apiUrl).json()
45
+ for ipRaw in raw['proxies']:
46
+ if count == max:
47
+ break
48
+ proxy = [ipRaw['proxy'],'http']
49
+ if checkProxy(proxy):
50
+ results.append(proxy)
51
+ count += 1
52
+ else:
53
+ print(proxy,False)
54
+ continue
55
+ return results
56
+
57
+ Providers = [{'provider':Monosans,'countryFilter':True,'protocols':['http']},{'provider':Thespeedx,'countryFilter':False,'protocols':['http']},{'provider':ProxyScrape,'countryFilter':True,'protocols':['http']}]
@@ -1,11 +1,11 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: swiftshadow
3
- Version: 1.0.2
3
+ Version: 1.2.0
4
4
  Summary: Free IP Proxy rotator for python
5
5
  Home-page: https://github.com/sachin-sankar/swiftshadow
6
6
  Author: Sachin Sankar
7
7
  Author-email: mail.sachinsankar@gmail.com
8
- Classifier: Development Status :: 2 - Pre-Alpha
8
+ Classifier: Development Status :: 5 - Production/Stable
9
9
  Description-Content-Type: text/markdown
10
10
  License-File: LICENSE
11
11
  Requires-Dist: requests
@@ -14,6 +14,9 @@ Requires-Dist: requests
14
14
 
15
15
  ![PyPI - Downloads](https://img.shields.io/pypi/dm/swiftshadow) ![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/sachin-sankar/swiftshadow?include_prereleases&style=flat)
16
16
 
17
+ > [!TIP]
18
+ > I'm refactoring the library for better speed and maintainability. Future updates might have breaking changes, but I'll keep you posted!
19
+
17
20
  ## About
18
21
 
19
22
  Swiftshadow is a powerful Python library designed to simplify the process of rotating IP proxies for web scraping, data mining, and other automated tasks. With its advanced features, Swiftshadow can help you overcome many of the challenges associated with web scraping, including blocked IP addresses and other forms of detection.
@@ -1,19 +0,0 @@
1
- from swiftshadow.providers import Proxyscrape, Scrapingant
2
-
3
-
4
- def QuickProxy(countries: list = [], protocol: str = "http"):
5
- """
6
- This function is a faster alternative to `Proxy` class.
7
- No caching is done.
8
-
9
- Args:
10
- countries: ISO 3166-2 Two letter country codes to filter proxies.
11
- protocol: HTTP/HTTPS protocol to filter proxies.
12
-
13
- Returns:
14
- proxyObject (dict): A working proxy object.
15
- """
16
- try:
17
- return Proxyscrape(1, countries=countries, protocol=protocol)[0]
18
- except:
19
- return Scrapingant(1, countries=countries, protocol=protocol)[0]
@@ -1,34 +0,0 @@
1
- from swiftshadow.constants import CountryCodes
2
- from requests import get
3
- from datetime import datetime
4
-
5
-
6
- def getCountryCode(countryName):
7
- try:
8
- return CountryCodes[countryName]
9
- except KeyError:
10
- for name in list(CountryCodes.keys()):
11
- if countryName in name:
12
- return CountryCodes[name]
13
-
14
-
15
- def checkProxy(proxy, countries):
16
- if countries != []:
17
- if proxy[-1].upper() not in countries:
18
- return False
19
- proxyDict = {proxy[1]: proxy[0]}
20
- try:
21
- resp = get(f"{proxy[1]}://ipinfo.io/ip", proxies=proxyDict, timeout=2).text
22
- if resp.count(".") == 3:
23
- return True
24
- return False
25
- except Exception as e:
26
- # log('error',str(e))
27
- return False
28
-
29
-
30
- def log(level, message):
31
- level = level.upper()
32
- print(
33
- f'{datetime.now().strftime("%d/%m/%Y %H:%M:%S")} - [swiftshadow] - {level} : {message}'
34
- )
@@ -1,52 +0,0 @@
1
- from requests import get
2
- from swiftshadow.helpers import getCountryCode, checkProxy
3
-
4
-
5
- def Scrapingant(max, countries=[], protocol="http"):
6
- result = []
7
- count = 0
8
- raw = get("https://scrapingant.com/proxies").text
9
- rows = [i.split("<td>") for i in raw.split("<tr>")]
10
-
11
- def clean(text):
12
- return text[: text.find("<")].strip()
13
-
14
- for row in rows[2:]:
15
- if count == max:
16
- return result
17
- zprotocol = clean(row[3]).lower()
18
- if zprotocol != protocol:
19
- continue
20
- cleaned = [
21
- clean(row[1]) + ":" + clean(row[2]),
22
- protocol,
23
- getCountryCode(clean(row[4].split(" ", 1)[1])),
24
- ]
25
- if checkProxy(cleaned, countries):
26
- result.append({cleaned[1]: cleaned[0]})
27
- count += 1
28
- return result
29
-
30
-
31
- def Proxyscrape(max, countries=[], protocol="http"):
32
- result = []
33
- count = 0
34
- query = "https://api.proxyscrape.com/v2/?timeout=5000&request=displayproxies&protocol=http"
35
- if countries == []:
36
- query += "&country=all"
37
- else:
38
- query += "&country=" + ",".join(countries)
39
- if protocol == "https":
40
- query += "&ssl=yes"
41
- ips = get(query).text
42
- for ip in ips.split("\n"):
43
- if count == max:
44
- return result
45
- proxy = [ip.strip(), protocol, "all"]
46
- if checkProxy(proxy, []):
47
- result.append({proxy[1]: proxy[0]})
48
- count += 1
49
- return result
50
-
51
-
52
- Providers = [Proxyscrape, Scrapingant]
File without changes
File without changes