webtoolkit 0.0.182__tar.gz → 0.0.185__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/PKG-INFO +1 -1
  2. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/pyproject.toml +1 -1
  3. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/baseurl.py +55 -39
  4. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/contentinterface.py +1 -0
  5. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/contentlinkparser.py +9 -7
  6. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/crawlers/crawlerinterface.py +2 -0
  7. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/crawlers/requestscrawler.py +2 -4
  8. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/handlers/defaulturlhandler.py +6 -2
  9. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/handlers/handlerchannelodysee.py +3 -1
  10. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/handlers/handlerchannelyoutube.py +5 -5
  11. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/handlers/handlerinterface.py +2 -1
  12. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/handlers/handlervideoodysee.py +1 -0
  13. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/handlers/handlervideoyoutube.py +3 -1
  14. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/pages.py +18 -14
  15. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/remoteurl.py +80 -45
  16. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/request.py +5 -6
  17. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/response.py +9 -2
  18. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/tests/__init__.py +0 -1
  19. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/tests/fakeresponse.py +6 -4
  20. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/tests/mocks.py +12 -4
  21. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/urllocation.py +1 -0
  22. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/webtools.py +126 -31
  23. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/LICENSE +0 -0
  24. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/README.md +0 -0
  25. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/__init__.py +0 -0
  26. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/contentmoderation.py +0 -0
  27. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/contenttext.py +0 -0
  28. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/crawlers/__init__.py +0 -0
  29. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/domaincache.py +0 -0
  30. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/handlers/__init__.py +0 -0
  31. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/handlers/handlerhttppage.py +0 -0
  32. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/handlers/handlers.py +0 -0
  33. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/remoteserver.py +0 -0
  34. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/statuses.py +0 -0
  35. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/tests/fake/__init__.py +0 -0
  36. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/tests/fake/codeproject.py +0 -0
  37. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/tests/fake/firebog.py +0 -0
  38. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/tests/fake/geekwirecom.py +0 -0
  39. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/tests/fake/githubcom.py +0 -0
  40. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/tests/fake/hackernews.py +0 -0
  41. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/tests/fake/instance.py +0 -0
  42. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/tests/fake/opmlfile.py +0 -0
  43. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/tests/fake/reddit.py +0 -0
  44. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/tests/fake/returndislike.py +0 -0
  45. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/tests/fake/robotstxtcom.py +0 -0
  46. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/tests/fake/thehill.py +0 -0
  47. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/tests/fake/warhammercommunity.py +0 -0
  48. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/tests/fake/youtube.py +0 -0
  49. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/tests/fakeinternet.py +0 -0
  50. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/tests/fakeinternetcontents.py +0 -0
  51. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/utils/dateutils.py +0 -0
  52. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/utils/logger.py +0 -0
  53. {webtoolkit-0.0.182 → webtoolkit-0.0.185}/webtoolkit/webconfig.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: webtoolkit
3
- Version: 0.0.182
3
+ Version: 0.0.185
4
4
  Summary: Web tools and interfaces for Internet data processing.
5
5
  License: GPL3
6
6
  Author: Iwan Grozny
@@ -3,7 +3,7 @@
3
3
 
4
4
  [tool.poetry]
5
5
  name = "webtoolkit"
6
- version = "0.0.182"
6
+ version = "0.0.185"
7
7
  description = "Web tools and interfaces for Internet data processing."
8
8
  authors = ["Iwan Grozny <renegat@renegat0x0.ddns.net>"]
9
9
  license = "GPL3"
@@ -7,6 +7,7 @@ response = url.get_response()
7
7
  """
8
8
 
9
9
  import base64
10
+ from typing import Any, Callable, Optional, Type
10
11
 
11
12
  from .utils.dateutils import DateUtils
12
13
 
@@ -59,6 +60,7 @@ class BaseUrl(ContentInterface):
59
60
  """
60
61
  Base Url class capable of reading network pages.
61
62
  """
63
+
62
64
  def __init__(self, url=None, request=None, url_builder=None):
63
65
  """
64
66
  Constructor
@@ -105,7 +107,7 @@ class BaseUrl(ContentInterface):
105
107
  Returns available handlers.
106
108
  Order is important - from the most precise handler to the most general.
107
109
  """
108
- #fmt off
110
+ # fmt off
109
111
 
110
112
  return [
111
113
  YouTubeVideoHandler,
@@ -118,13 +120,13 @@ class BaseUrl(ContentInterface):
118
120
  InternetArchive,
119
121
  FourChanChannelHandler,
120
122
  TwitterUrlHandler,
121
- YouTubeChannelHandler, # present here, if somebody wants to call it by name
122
- HttpPageHandler, # default
123
+ YouTubeChannelHandler, # present here, if somebody wants to call it by name
124
+ HttpPageHandler, # default
123
125
  ]
124
- #fmt on
126
+ # fmt on
125
127
 
126
128
  def get_handler_by_name(self, handler_name):
127
- """ Returns handler class """
129
+ """Returns handler class"""
128
130
  handlers = self.get_handlers()
129
131
  for handler in handlers:
130
132
  if handler.__name__ == handler_name:
@@ -218,8 +220,10 @@ class BaseUrl(ContentInterface):
218
220
  if self.response:
219
221
  if not self.response.is_valid():
220
222
  WebLogger.error(
221
- "Url:{} Response is invalid:{}".format(self.request.url, self.response),
222
- detail_text = str(response_to_json(self.response))
223
+ "Url:{} Response is invalid:{}".format(
224
+ self.request.url, self.response
225
+ ),
226
+ detail_text=str(response_to_json(self.response)),
223
227
  )
224
228
 
225
229
  return self.response
@@ -256,7 +260,7 @@ class BaseUrl(ContentInterface):
256
260
  return RequestsCrawler(self.request.url).ping()
257
261
 
258
262
  def get_handler_implementation(self):
259
- """ Returns handler """
263
+ """Returns handler"""
260
264
  url = self.request.url
261
265
  if not url:
262
266
  return
@@ -269,7 +273,11 @@ class BaseUrl(ContentInterface):
269
273
 
270
274
  handlers = self.get_handlers()
271
275
  for handler in handlers:
272
- if self.request.handler_name and self.request.handler_name != "" and self.request.handler_name != handler.__name__:
276
+ if (
277
+ self.request.handler_name
278
+ and self.request.handler_name != ""
279
+ and self.request.handler_name != handler.__name__
280
+ ):
273
281
  continue
274
282
  if self.request.handler_type and self.request.handler_type != handler:
275
283
  continue
@@ -289,7 +297,7 @@ class BaseUrl(ContentInterface):
289
297
  raise NotImplementedError("Protocol has not been implemented")
290
298
 
291
299
  def get_cleaned_link(self):
292
- """ Returns cleaned up link. Free of unwanted args, tracking, sanitized. """
300
+ """Returns cleaned up link. Free of unwanted args, tracking, sanitized."""
293
301
  url = self.request.url
294
302
 
295
303
  url = url.strip()
@@ -310,7 +318,7 @@ class BaseUrl(ContentInterface):
310
318
  return self.request.url
311
319
 
312
320
  def get_urls(self):
313
- """ Returns various link versions for URL """
321
+ """Returns various link versions for URL"""
314
322
  properties = {}
315
323
  properties["link"] = self.request.url
316
324
  properties["link_request"] = self.request_url
@@ -320,7 +328,7 @@ class BaseUrl(ContentInterface):
320
328
  return properties
321
329
 
322
330
  def get_canonical_url(self):
323
- """ Returns canonical link """
331
+ """Returns canonical link"""
324
332
  if self.handler:
325
333
  return self.handler.get_canonical_url()
326
334
 
@@ -331,7 +339,7 @@ class BaseUrl(ContentInterface):
331
339
  return handler.get_canonical_url()
332
340
 
333
341
  def get_urls_archive(self):
334
- """ Returns archive link for URL """
342
+ """Returns archive link for URL"""
335
343
  p = UrlLocation(self.request.url)
336
344
  short_url = p.get_protocolless()
337
345
 
@@ -348,7 +356,7 @@ class BaseUrl(ContentInterface):
348
356
  return "{}".format(self.request)
349
357
 
350
358
  def is_valid(self):
351
- """ Returns indication if URL is valid """
359
+ """Returns indication if URL is valid"""
352
360
  if not self.handler:
353
361
  return False
354
362
 
@@ -364,54 +372,54 @@ class BaseUrl(ContentInterface):
364
372
  return True
365
373
 
366
374
  def get_title(self):
367
- """ Returns title """
375
+ """Returns title"""
368
376
  if self.handler:
369
377
  return self.handler.get_title()
370
378
 
371
379
  def get_description(self):
372
- """ Returns description """
380
+ """Returns description"""
373
381
  if self.handler:
374
382
  return self.handler.get_description()
375
383
 
376
384
  def get_language(self):
377
- """ Returns language """
385
+ """Returns language"""
378
386
  if self.handler:
379
387
  return self.handler.get_language()
380
388
 
381
389
  def get_thumbnail(self):
382
- """ Returns thumbnail """
390
+ """Returns thumbnail"""
383
391
  if self.handler:
384
392
  return self.handler.get_thumbnail()
385
393
 
386
394
  def get_author(self):
387
- """ Returns author """
395
+ """Returns author"""
388
396
  if self.handler:
389
397
  return self.handler.get_author()
390
398
 
391
399
  def get_album(self):
392
- """ Returns album """
400
+ """Returns album"""
393
401
  if self.handler:
394
402
  return self.handler.get_album()
395
403
 
396
404
  def get_tags(self):
397
- """ Returns tags """
405
+ """Returns tags"""
398
406
  if self.handler:
399
407
  return self.handler.get_tags()
400
408
 
401
409
  def get_date_published(self):
402
- """ Returns date published """
410
+ """Returns date published"""
403
411
  if self.handler:
404
412
  return self.handler.get_date_published()
405
413
 
406
414
  def get_status_code(self) -> int | None:
407
- """ Returns status code """
415
+ """Returns status code"""
408
416
  if self.response:
409
417
  return self.response.get_status_code()
410
418
 
411
419
  return 0
412
420
 
413
421
  def get_entries(self):
414
- """ Returns entries list """
422
+ """Returns entries list"""
415
423
 
416
424
  handler = self.get_handler()
417
425
  if handler:
@@ -446,7 +454,7 @@ class BaseUrl(ContentInterface):
446
454
  return u
447
455
 
448
456
  def get_feeds(self):
449
- """ Returns feeds found for URL """
457
+ """Returns feeds found for URL"""
450
458
  result = []
451
459
 
452
460
  handler = self.get_handler()
@@ -459,23 +467,24 @@ class BaseUrl(ContentInterface):
459
467
  return calculate_hash(text)
460
468
 
461
469
  def get_hash(self):
462
- """ Returns hash for URL """
470
+ """Returns hash for URL"""
463
471
  handler = self.get_handler()
464
472
  if handler:
465
473
  return handler.get_hash()
466
474
 
467
475
  def get_body_hash(self):
468
- """ Returns body hash for URL """
476
+ """Returns body hash for URL"""
469
477
  handler = self.get_handler()
470
478
  if handler:
471
479
  return handler.get_body_hash()
472
480
 
473
- def get_meta_hash(self):
474
- """ Returns meta hash for URL """
475
- response = self.get_response()
476
-
481
+ def get_meta_hash(self) -> Optional[str]:
482
+ """
483
+ Calculates and returns a hash of the page's metadata properties.
484
+ :return: A base64-encoded hash of the properties.
485
+ """
486
+ self.get_response()
477
487
  properties_data = self.get_properties_data()
478
-
479
488
  properties_hash = self.property_encode(calculate_hash(str(properties_data)))
480
489
  return properties_hash
481
490
 
@@ -486,7 +495,7 @@ class BaseUrl(ContentInterface):
486
495
  return self.get_properties_data()
487
496
 
488
497
  def get_all_properties(self, include_social=False):
489
- """ Returns all URL properties """
498
+ """Returns all URL properties"""
490
499
  response = self.get_response()
491
500
 
492
501
  properties_data = self.get_properties()
@@ -540,8 +549,8 @@ class BaseUrl(ContentInterface):
540
549
  return all_properties
541
550
 
542
551
  def get_properties_data(self):
543
- """ Returns simple meta properties.
544
- TODO there should two functions: get_all_properties and get_properties """
552
+ """Returns simple meta properties.
553
+ TODO there should two functions: get_all_properties and get_properties"""
545
554
  properties = super().get_properties()
546
555
  page_handler = self.get_handler()
547
556
 
@@ -567,7 +576,10 @@ class BaseUrl(ContentInterface):
567
576
  properties["channel_name"] = page_handler.get_channel_name()
568
577
  properties["channel_url"] = page_handler.get_channel_url()
569
578
 
570
- if type(page_handler) is HttpPageHandler and type(page_handler.p) is HtmlPage:
579
+ if (
580
+ type(page_handler) is HttpPageHandler
581
+ and type(page_handler.p) is HtmlPage
582
+ ):
571
583
  properties["favicon"] = page_handler.p.get_favicon()
572
584
  properties["meta title"] = page_handler.p.get_meta_field("title")
573
585
  properties["meta description"] = page_handler.p.get_meta_field(
@@ -576,7 +588,9 @@ class BaseUrl(ContentInterface):
576
588
  properties["meta keywords"] = page_handler.p.get_meta_field("keywords")
577
589
 
578
590
  properties["og:title"] = page_handler.p.get_og_field("title")
579
- properties["og:description"] = page_handler.p.get_og_field("description")
591
+ properties["og:description"] = page_handler.p.get_og_field(
592
+ "description"
593
+ )
580
594
  properties["og:image"] = page_handler.p.get_og_field("image")
581
595
  properties["og:site_name"] = page_handler.p.get_og_field("site_name")
582
596
  properties["schema:thumbnailUrl"] = page_handler.p.get_schema_field(
@@ -631,11 +645,13 @@ class BaseUrl(ContentInterface):
631
645
  """
632
646
  Returns indication is access is allowed for bots, robots
633
647
  """
634
- domain_info = DomainCache.get_object(url =self.request.url, url_builder=self.url_builder)
648
+ domain_info = DomainCache.get_object(
649
+ url=self.request.url, url_builder=self.url_builder
650
+ )
635
651
  return domain_info.is_allowed(self.request.url)
636
652
 
637
653
  def get_social_properties(self):
638
- """ Returns social properties """
654
+ """Returns social properties"""
639
655
  url = self.request.url
640
656
 
641
657
  json_obj = {}
@@ -20,6 +20,7 @@ class ContentInterface(object):
20
20
  """
21
21
  Content interface
22
22
  """
23
+
23
24
  def __init__(self, url, contents):
24
25
  self.url = url
25
26
  self.contents = contents
@@ -164,10 +164,12 @@ class ContentLinkParser(ContentInterface):
164
164
  item = item[wh + 1 :]
165
165
 
166
166
  # not absolute path
167
- if (not item.startswith("http")
167
+ if (
168
+ not item.startswith("http")
168
169
  and not item.startswith("https")
169
170
  and not item.startswith("ftp")
170
- and not item.startswith("smb")):
171
+ and not item.startswith("smb")
172
+ ):
171
173
 
172
174
  location = UrlLocation("https://" + item)
173
175
  domain = location.get_domain_only()
@@ -179,15 +181,15 @@ class ContentLinkParser(ContentInterface):
179
181
  return
180
182
  item = self.join_url_parts(url, item)
181
183
 
182
- if (not item.startswith("http")
184
+ if (
185
+ not item.startswith("http")
183
186
  and not item.startswith("https")
184
187
  and not item.startswith("ftp")
185
- and not item.startswith("smb")):
188
+ and not item.startswith("smb")
189
+ ):
186
190
  item = "https://" + item
187
191
 
188
- if item.startswith("https:&#x2F;&#x2F") or item.startswith(
189
- "http:&#x2F;&#x2F"
190
- ):
192
+ if item.startswith("https:&#x2F;&#x2F") or item.startswith("http:&#x2F;&#x2F"):
191
193
  item = ContentLinkParser.decode_url(item)
192
194
  return item
193
195
 
@@ -1,6 +1,7 @@
1
1
  """
2
2
  Crawler interface can be implemented to provide new mechanisms of crawling
3
3
  """
4
+
4
5
  import json
5
6
  import os
6
7
  import base64
@@ -46,6 +47,7 @@ class CrawlerInterface(object):
46
47
  Crawler is a tool that allows to obtain contents from the internet.
47
48
  There are various tools.
48
49
  """
50
+
49
51
  def __init__(self, url=None, request=None):
50
52
  """
51
53
  @param response_file If set, response is stored in a file
@@ -202,9 +202,7 @@ class RequestsCrawler(CrawlerInterface):
202
202
 
203
203
  def request_with_timeout(request, stream, result):
204
204
  try:
205
- result["response"] = self.make_requests_call(
206
- request, stream
207
- )
205
+ result["response"] = self.make_requests_call(request, stream)
208
206
  except Exception as e:
209
207
  result["exception"] = e
210
208
 
@@ -297,5 +295,5 @@ class RequestsCrawler(CrawlerInterface):
297
295
 
298
296
  def update_request(self):
299
297
  self.request.timeout_s = self.get_timeout_s()
300
- #TODO - headers are not set
298
+ # TODO - headers are not set
301
299
  # self.request.request_headers = self.get_request_headers()
@@ -1,6 +1,7 @@
1
1
  """
2
2
  Default url handler.
3
3
  """
4
+
4
5
  import copy
5
6
  from collections import OrderedDict
6
7
  from concurrent.futures import ThreadPoolExecutor
@@ -42,7 +43,7 @@ class DefaultUrlHandler(HttpPageHandler):
42
43
  else:
43
44
  request = PageRequestObject(url)
44
45
  request.url = url
45
- #request.handler_type = HttpPageHandler # object will be assigned by builder
46
+ # request.handler_type = HttpPageHandler # object will be assigned by builder
46
47
 
47
48
  # if we will not hardcode this handler, then it will recursively loop
48
49
  request.handler_name = "HttpPageHandler"
@@ -108,6 +109,7 @@ class DefaultCompoundChannelHandler(DefaultChannelHandler):
108
109
  """
109
110
  Default URL handler which is capable of obtaining data from many network sources automatically.
110
111
  """
112
+
111
113
  def __init__(self, url=None, contents=None, request=None, url_builder=None):
112
114
  self.responses = []
113
115
  self.channel_sources_urls = OrderedDict()
@@ -158,7 +160,9 @@ class DefaultCompoundChannelHandler(DefaultChannelHandler):
158
160
  with ThreadPoolExecutor() as executor:
159
161
  for channel_source in channel_sources:
160
162
  if channel_source not in self.channel_sources_urls:
161
- handles.append(executor.submit(self.get_response_source, channel_source))
163
+ handles.append(
164
+ executor.submit(self.get_response_source, channel_source)
165
+ )
162
166
 
163
167
  for handle in handles:
164
168
  url = handle.result()
@@ -8,7 +8,9 @@ from .handlerhttppage import HttpPageHandler
8
8
 
9
9
  class OdyseeChannelHandler(DefaultCompoundChannelHandler):
10
10
 
11
- def __init__(self, url=None, contents=None, request=None, url_builder=None, channel_code=None):
11
+ def __init__(
12
+ self, url=None, contents=None, request=None, url_builder=None, channel_code=None
13
+ ):
12
14
  if channel_code is not None:
13
15
  url = self.code2url(channel_code)
14
16
 
@@ -150,20 +150,20 @@ class YouTubeChannelHandler(DefaultCompoundChannelHandler):
150
150
  wh1 = url.find("youtube.com/user")
151
151
  if wh1 >= 0:
152
152
  start = wh1 + len("youtube.com/user") + 1
153
- wh2 = url.find("/", start+1)
153
+ wh2 = url.find("/", start + 1)
154
154
  if wh2 == -1:
155
- return url[start-1:]
155
+ return url[start - 1 :]
156
156
  else:
157
- return url[start-1:wh2]
157
+ return url[start - 1 : wh2]
158
158
 
159
159
  wh1 = url.find("youtube.com/@")
160
160
  if wh1 >= 0:
161
161
  start = wh1 + len("youtube.com/@") + 1
162
162
  wh2 = url.find("/", start + 1)
163
163
  if wh2 == -1:
164
- return url[start-1:]
164
+ return url[start - 1 :]
165
165
  else:
166
- return url[start-1:wh2]
166
+ return url[start - 1 : wh2]
167
167
 
168
168
  def input2code_channel(self, url):
169
169
  wh = url.rfind("/")
@@ -1,13 +1,14 @@
1
1
  """
2
2
  Handler interface that can be implemented to provide more complex logic for reading meta data.
3
3
  """
4
+
4
5
  from webtoolkit import DefaultContentPage, calculate_hash_binary, calculate_hash
5
6
 
6
7
 
7
8
  class HandlerInterface(DefaultContentPage):
8
9
  """
9
10
  Handler interface can be implemented to provide more complex means for obtaining data from the internet.
10
- For example to obtain data about YouTube video you can fetch JSON file from yt-dlp, but also ask
11
+ For example to obtain data about YouTube video you can fetch JSON file from yt-dlp, but also ask
11
12
  return dislike page to obtain dislike ratio.
12
13
  """
13
14
 
@@ -89,6 +89,7 @@ class OdyseeVideoHandler(DefaultUrlHandler):
89
89
 
90
90
  def get_feeds(self):
91
91
  from .handlerchannelodysee import OdyseeChannelHandler
92
+
92
93
  feeds = OdyseeChannelHandler(channel_code=self.channel_code).get_feeds()
93
94
  return feeds
94
95
 
@@ -137,7 +137,9 @@ class YouTubeVideoHandler(DefaultCompoundChannelHandler):
137
137
  return super().get_social_data()
138
138
 
139
139
  def get_return_dislike_url_link(self):
140
- return "https://returnyoutubedislikeapi.com/votes?videoId=" + self.get_video_code()
140
+ return (
141
+ "https://returnyoutubedislikeapi.com/votes?videoId=" + self.get_video_code()
142
+ )
141
143
 
142
144
  def get_view_count(self):
143
145
  """ """
@@ -25,6 +25,7 @@ class DefaultContentPage(ContentInterface):
25
25
  """
26
26
  Default content page that does not throw exceptions
27
27
  """
28
+
28
29
  def __init__(self, url, contents=""):
29
30
  super().__init__(url=url, contents=contents)
30
31
 
@@ -66,8 +67,9 @@ class JsonPage(ContentInterface):
66
67
  """
67
68
  JSON page
68
69
  """
70
+
69
71
  def __init__(self, url, contents):
70
- """ Constructor """
72
+ """Constructor"""
71
73
  super().__init__(url=url, contents=contents)
72
74
 
73
75
  self.json_obj = None
@@ -80,9 +82,9 @@ class JsonPage(ContentInterface):
80
82
  # to be expected
81
83
  pass
82
84
 
83
- #try:
85
+ # try:
84
86
  # WebLogger.debug(f"Invalid json:{contents}")
85
- #except Exception as E:
87
+ # except Exception as E:
86
88
  # print(str(E))
87
89
 
88
90
  def is_valid(self) -> bool:
@@ -128,7 +130,7 @@ class JsonPage(ContentInterface):
128
130
 
129
131
  class RssPageEntry(ContentInterface):
130
132
  def __init__(self, feed_index, feed_entry, url, contents, page_object_properties):
131
- """ Constructor """
133
+ """Constructor"""
132
134
  self.feed_index = feed_index
133
135
  self.feed_entry = feed_entry
134
136
  self.url = url
@@ -138,7 +140,7 @@ class RssPageEntry(ContentInterface):
138
140
  super().__init__(url=self.url, contents=contents)
139
141
 
140
142
  def get_properties(self):
141
- """ Returns map of properties """
143
+ """Returns map of properties"""
142
144
  output_map = {}
143
145
 
144
146
  link = None
@@ -310,7 +312,7 @@ class RssPage(ContentInterface):
310
312
  """
311
313
 
312
314
  def __init__(self, url, contents):
313
- """ Constructor """
315
+ """Constructor"""
314
316
  self.feed = None
315
317
 
316
318
  """
@@ -582,8 +584,9 @@ class RssContentReader(object):
582
584
  """
583
585
  RSS reader
584
586
  """
587
+
585
588
  def __init__(self, url, contents):
586
- """ Constructor """
589
+ """Constructor"""
587
590
  self.contents = contents
588
591
  self.process()
589
592
 
@@ -608,9 +611,10 @@ class RssContentReader(object):
608
611
 
609
612
 
610
613
  class OpmlPageEntry(ContentInterface):
611
- """ OPML Page entry """
614
+ """OPML Page entry"""
615
+
612
616
  def __init__(self, url, contents, opml_entry):
613
- """ Constructor """
617
+ """Constructor"""
614
618
  super().__init__(url=url, contents=contents)
615
619
  self.opml_entry = opml_entry
616
620
  self.title = None
@@ -718,7 +722,7 @@ class HtmlPage(ContentInterface):
718
722
  """
719
723
 
720
724
  def __init__(self, url, contents):
721
- """ Constructor """
725
+ """Constructor"""
722
726
  super().__init__(url=url, contents=contents)
723
727
 
724
728
  if self.contents:
@@ -1139,9 +1143,9 @@ class HtmlPage(ContentInterface):
1139
1143
  # props["robots_txt_url"] = UrlLocation(self.url).get_robots_txt_url()
1140
1144
  # props["site_maps_urls"] = self.get_site_maps()
1141
1145
 
1142
- #props["links"] = self.get_links()
1143
- #props["links_inner"] = self.get_links_inner()
1144
- #props["links_outer"] = self.get_links_outer()
1146
+ # props["links"] = self.get_links()
1147
+ # props["links_inner"] = self.get_links_inner()
1148
+ # props["links_outer"] = self.get_links_outer()
1145
1149
 
1146
1150
  props["favicons"] = self.get_favicons()
1147
1151
  props["contents"] = self.get_contents()
@@ -1289,7 +1293,7 @@ class XmlPage(ContentInterface):
1289
1293
  """
1290
1294
 
1291
1295
  def __init__(self, url, contents):
1292
- """ Constructor """
1296
+ """Constructor"""
1293
1297
  super().__init__(url=url, contents=contents)
1294
1298
 
1295
1299
  def is_valid(self) -> bool: