webtoolkit 0.0.185__tar.gz → 0.0.187__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/PKG-INFO +3 -2
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/pyproject.toml +1 -1
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/domaincache.py +17 -2
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/urllocation.py +2 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/LICENSE +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/README.md +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/__init__.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/baseurl.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/contentinterface.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/contentlinkparser.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/contentmoderation.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/contenttext.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/crawlers/__init__.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/crawlers/crawlerinterface.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/crawlers/requestscrawler.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/handlers/__init__.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/handlers/defaulturlhandler.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/handlers/handlerchannelodysee.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/handlers/handlerchannelyoutube.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/handlers/handlerhttppage.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/handlers/handlerinterface.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/handlers/handlers.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/handlers/handlervideoodysee.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/handlers/handlervideoyoutube.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/pages.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/remoteserver.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/remoteurl.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/request.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/response.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/statuses.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/tests/__init__.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/tests/fake/__init__.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/tests/fake/codeproject.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/tests/fake/firebog.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/tests/fake/geekwirecom.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/tests/fake/githubcom.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/tests/fake/hackernews.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/tests/fake/instance.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/tests/fake/opmlfile.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/tests/fake/reddit.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/tests/fake/returndislike.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/tests/fake/robotstxtcom.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/tests/fake/thehill.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/tests/fake/warhammercommunity.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/tests/fake/youtube.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/tests/fakeinternet.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/tests/fakeinternetcontents.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/tests/fakeresponse.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/tests/mocks.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/utils/dateutils.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/utils/logger.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/webconfig.py +0 -0
- {webtoolkit-0.0.185 → webtoolkit-0.0.187}/webtoolkit/webtools.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: webtoolkit
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.187
|
|
4
4
|
Summary: Web tools and interfaces for Internet data processing.
|
|
5
5
|
License: GPL3
|
|
6
6
|
Author: Iwan Grozny
|
|
@@ -12,6 +12,7 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.10
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.11
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
16
|
Requires-Dist: beautifulsoup4 (>=4.13.5,<5.0.0)
|
|
16
17
|
Requires-Dist: brutefeedparser (>=0.10.5,<0.11.0)
|
|
17
18
|
Requires-Dist: lxml (>=5.4.0,<6.0.0)
|
|
@@ -176,6 +176,9 @@ class DomainCache(object):
|
|
|
176
176
|
respect_robots_txt = True
|
|
177
177
|
|
|
178
178
|
def get_object(url, url_builder):
|
|
179
|
+
"""
|
|
180
|
+
API
|
|
181
|
+
"""
|
|
179
182
|
|
|
180
183
|
if DomainCache.object is None:
|
|
181
184
|
DomainCache.object = DomainCache(
|
|
@@ -193,7 +196,7 @@ class DomainCache(object):
|
|
|
193
196
|
respect_robots_txt=True,
|
|
194
197
|
):
|
|
195
198
|
"""
|
|
196
|
-
@note Not
|
|
199
|
+
@note Not API
|
|
197
200
|
"""
|
|
198
201
|
self.cache_size = cache_size
|
|
199
202
|
self.cache = {}
|
|
@@ -209,6 +212,18 @@ class DomainCache(object):
|
|
|
209
212
|
|
|
210
213
|
return self.cache[input_url]["domain"]
|
|
211
214
|
|
|
215
|
+
def get_length(self):
|
|
216
|
+
"""
|
|
217
|
+
Returns length of cache
|
|
218
|
+
"""
|
|
219
|
+
return len(self.cache)
|
|
220
|
+
|
|
221
|
+
def get_max_length(self):
|
|
222
|
+
"""
|
|
223
|
+
Returns length of cache
|
|
224
|
+
"""
|
|
225
|
+
return self.cache_size)
|
|
226
|
+
|
|
212
227
|
def read_info(self, domain_url):
|
|
213
228
|
return DomainCacheInfo(
|
|
214
229
|
domain_url,
|
|
@@ -216,7 +231,7 @@ class DomainCache(object):
|
|
|
216
231
|
)
|
|
217
232
|
|
|
218
233
|
def remove_from_cache(self):
|
|
219
|
-
if
|
|
234
|
+
if self.get_length() < self.get_max_length():
|
|
220
235
|
return
|
|
221
236
|
|
|
222
237
|
thelist = []
|
|
@@ -376,6 +376,8 @@ class UrlLocation(object):
|
|
|
376
376
|
parsed_url = urlparse(url)
|
|
377
377
|
query_params = parse_qs(parsed_url.query)
|
|
378
378
|
param_value = query_params.get("q", [None])[0]
|
|
379
|
+
if not param_value:
|
|
380
|
+
return url
|
|
379
381
|
|
|
380
382
|
param_value = unquote(param_value)
|
|
381
383
|
param_value = UrlLocation.get_cleaned_link(param_value)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|