webscout 1.2.6__tar.gz → 1.2.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (40) hide show
  1. {webscout-1.2.6 → webscout-1.2.9}/PKG-INFO +20 -16
  2. {webscout-1.2.6 → webscout-1.2.9}/README.md +19 -15
  3. {webscout-1.2.6 → webscout-1.2.9}/setup.py +5 -4
  4. {webscout-1.2.6 → webscout-1.2.9}/webscout/__init__.py +0 -1
  5. {webscout-1.2.6 → webscout-1.2.9}/webscout/transcriber.py +496 -497
  6. webscout-1.2.9/webscout/version.py +2 -0
  7. {webscout-1.2.6 → webscout-1.2.9}/webscout.egg-info/PKG-INFO +20 -16
  8. webscout-1.2.6/webscout/version.py +0 -2
  9. {webscout-1.2.6 → webscout-1.2.9}/DeepWEBS/__init__.py +0 -0
  10. {webscout-1.2.6 → webscout-1.2.9}/DeepWEBS/documents/__init__.py +0 -0
  11. {webscout-1.2.6 → webscout-1.2.9}/DeepWEBS/documents/query_results_extractor.py +0 -0
  12. {webscout-1.2.6 → webscout-1.2.9}/DeepWEBS/documents/webpage_content_extractor.py +0 -0
  13. {webscout-1.2.6 → webscout-1.2.9}/DeepWEBS/networks/__init__.py +0 -0
  14. {webscout-1.2.6 → webscout-1.2.9}/DeepWEBS/networks/filepath_converter.py +0 -0
  15. {webscout-1.2.6 → webscout-1.2.9}/DeepWEBS/networks/google_searcher.py +0 -0
  16. {webscout-1.2.6 → webscout-1.2.9}/DeepWEBS/networks/network_configs.py +0 -0
  17. {webscout-1.2.6 → webscout-1.2.9}/DeepWEBS/networks/webpage_fetcher.py +0 -0
  18. {webscout-1.2.6 → webscout-1.2.9}/DeepWEBS/utilsdw/__init__.py +0 -0
  19. {webscout-1.2.6 → webscout-1.2.9}/DeepWEBS/utilsdw/enver.py +0 -0
  20. {webscout-1.2.6 → webscout-1.2.9}/DeepWEBS/utilsdw/logger.py +0 -0
  21. {webscout-1.2.6 → webscout-1.2.9}/LICENSE.md +0 -0
  22. {webscout-1.2.6 → webscout-1.2.9}/setup.cfg +0 -0
  23. {webscout-1.2.6 → webscout-1.2.9}/webscout/AI.py +0 -0
  24. {webscout-1.2.6 → webscout-1.2.9}/webscout/AIbase.py +0 -0
  25. {webscout-1.2.6 → webscout-1.2.9}/webscout/AIutel.py +0 -0
  26. {webscout-1.2.6 → webscout-1.2.9}/webscout/DWEBS.py +0 -0
  27. {webscout-1.2.6 → webscout-1.2.9}/webscout/HelpingAI.py +0 -0
  28. {webscout-1.2.6 → webscout-1.2.9}/webscout/LLM.py +0 -0
  29. {webscout-1.2.6 → webscout-1.2.9}/webscout/__main__.py +0 -0
  30. {webscout-1.2.6 → webscout-1.2.9}/webscout/cli.py +0 -0
  31. {webscout-1.2.6 → webscout-1.2.9}/webscout/exceptions.py +0 -0
  32. {webscout-1.2.6 → webscout-1.2.9}/webscout/models.py +0 -0
  33. {webscout-1.2.6 → webscout-1.2.9}/webscout/utils.py +0 -0
  34. {webscout-1.2.6 → webscout-1.2.9}/webscout/webscout_search.py +0 -0
  35. {webscout-1.2.6 → webscout-1.2.9}/webscout/webscout_search_async.py +0 -0
  36. {webscout-1.2.6 → webscout-1.2.9}/webscout.egg-info/SOURCES.txt +0 -0
  37. {webscout-1.2.6 → webscout-1.2.9}/webscout.egg-info/dependency_links.txt +0 -0
  38. {webscout-1.2.6 → webscout-1.2.9}/webscout.egg-info/entry_points.txt +0 -0
  39. {webscout-1.2.6 → webscout-1.2.9}/webscout.egg-info/requires.txt +0 -0
  40. {webscout-1.2.6 → webscout-1.2.9}/webscout.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: webscout
3
- Version: 1.2.6
3
+ Version: 1.2.9
4
4
  Summary: Search for words, documents, images, videos, news, maps and text translation using the Google, DuckDuckGo.com, yep.com, phind.com, you.com, etc Also containes AI models and now can transcribe yt videos
5
5
  Author: OEvortex
6
6
  Author-email: helpingai5@gmail.com
@@ -229,17 +229,23 @@ def extract_transcript(video_id):
229
229
  try:
230
230
  transcript_list = transcriber.list_transcripts(video_id)
231
231
  for transcript in transcript_list:
232
- transcript_text_list = transcript.fetch()
232
+ transcript_data_list = transcript.fetch()
233
233
  lang = transcript.language
234
234
  transcript_text = ""
235
235
  if transcript.language_code == 'en':
236
- for line in transcript_text_list:
237
- transcript_text += " " + line["text"]
236
+ for line in transcript_data_list:
237
+ start_time = line['start']
238
+ end_time = start_time + line['duration']
239
+ formatted_line = f"{start_time:.2f} - {end_time:.2f}: {line['text']}\n"
240
+ transcript_text += formatted_line
238
241
  return transcript_text
239
242
  elif transcript.is_translatable:
240
243
  english_transcript_list = transcript.translate('en').fetch()
241
244
  for line in english_transcript_list:
242
- transcript_text += " " + line["text"]
245
+ start_time = line['start']
246
+ end_time = start_time + line['duration']
247
+ formatted_line = f"{start_time:.2f} - {end_time:.2f}: {line['text']}\n"
248
+ transcript_text += formatted_line
243
249
  return transcript_text
244
250
  print("Transcript extraction failed. Please check the video URL.")
245
251
  except Exception as e:
@@ -351,11 +357,12 @@ import logging
351
357
  import sys
352
358
  from itertools import chain
353
359
  from random import shuffle
354
-
355
360
  import requests
356
361
  from webscout import AsyncWEBS
357
362
 
358
- # bypass curl-cffi NotImplementedError in windows https://curl-cffi.readthedocs.io/en/latest/faq/
363
+ # If you have proxies, define them here
364
+ proxies = None
365
+
359
366
  if sys.platform.lower().startswith("win"):
360
367
  asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
361
368
 
@@ -367,24 +374,21 @@ def get_words():
367
374
 
368
375
  async def aget_results(word):
369
376
  async with AsyncWEBS(proxies=proxies) as WEBS:
370
- results = [r async for r in WEBS.text(word, max_results=None)]
377
+ results = await WEBS.text(word, max_results=None)
371
378
  return results
372
379
 
373
380
  async def main():
374
381
  words = get_words()
375
382
  shuffle(words)
376
- tasks = []
377
- for word in words[:10]:
378
- tasks.append(aget_results(word))
383
+ tasks = [aget_results(word) for word in words[:10]]
379
384
  results = await asyncio.gather(*tasks)
380
385
  print(f"Done")
381
386
  for r in chain.from_iterable(results):
382
387
  print(r)
383
-
384
388
 
385
- if __name__ == "__main__":
386
- logging.basicConfig(level=logging.DEBUG)
387
- asyncio.run(main())
389
+ logging.basicConfig(level=logging.DEBUG)
390
+
391
+ await main()
388
392
  ```
389
393
  It is important to note that the WEBS and AsyncWEBS classes should always be used as a context manager (with statement).
390
394
  This ensures proper resource management and cleanup, as the context manager will automatically handle opening and closing the HTTP client connection.
@@ -685,6 +689,6 @@ def chat(model_name, system_message="You are Jarvis"):# system prompt
685
689
  AI.chat()
686
690
 
687
691
  if __name__ == "__main__":
688
- model_name = "mistralai/Mistral-7B-Instruct-v0.1" # name of the model you wish to use It supports ALL text generation models on deepinfra.com.
692
+ model_name = "mistralai/Mistral-7B-Instruct-v0.2" # name of the model you wish to use It supports ALL text generation models on deepinfra.com.
689
693
  chat(model_name)
690
694
  ```
@@ -177,17 +177,23 @@ def extract_transcript(video_id):
177
177
  try:
178
178
  transcript_list = transcriber.list_transcripts(video_id)
179
179
  for transcript in transcript_list:
180
- transcript_text_list = transcript.fetch()
180
+ transcript_data_list = transcript.fetch()
181
181
  lang = transcript.language
182
182
  transcript_text = ""
183
183
  if transcript.language_code == 'en':
184
- for line in transcript_text_list:
185
- transcript_text += " " + line["text"]
184
+ for line in transcript_data_list:
185
+ start_time = line['start']
186
+ end_time = start_time + line['duration']
187
+ formatted_line = f"{start_time:.2f} - {end_time:.2f}: {line['text']}\n"
188
+ transcript_text += formatted_line
186
189
  return transcript_text
187
190
  elif transcript.is_translatable:
188
191
  english_transcript_list = transcript.translate('en').fetch()
189
192
  for line in english_transcript_list:
190
- transcript_text += " " + line["text"]
193
+ start_time = line['start']
194
+ end_time = start_time + line['duration']
195
+ formatted_line = f"{start_time:.2f} - {end_time:.2f}: {line['text']}\n"
196
+ transcript_text += formatted_line
191
197
  return transcript_text
192
198
  print("Transcript extraction failed. Please check the video URL.")
193
199
  except Exception as e:
@@ -299,11 +305,12 @@ import logging
299
305
  import sys
300
306
  from itertools import chain
301
307
  from random import shuffle
302
-
303
308
  import requests
304
309
  from webscout import AsyncWEBS
305
310
 
306
- # bypass curl-cffi NotImplementedError in windows https://curl-cffi.readthedocs.io/en/latest/faq/
311
+ # If you have proxies, define them here
312
+ proxies = None
313
+
307
314
  if sys.platform.lower().startswith("win"):
308
315
  asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
309
316
 
@@ -315,24 +322,21 @@ def get_words():
315
322
 
316
323
  async def aget_results(word):
317
324
  async with AsyncWEBS(proxies=proxies) as WEBS:
318
- results = [r async for r in WEBS.text(word, max_results=None)]
325
+ results = await WEBS.text(word, max_results=None)
319
326
  return results
320
327
 
321
328
  async def main():
322
329
  words = get_words()
323
330
  shuffle(words)
324
- tasks = []
325
- for word in words[:10]:
326
- tasks.append(aget_results(word))
331
+ tasks = [aget_results(word) for word in words[:10]]
327
332
  results = await asyncio.gather(*tasks)
328
333
  print(f"Done")
329
334
  for r in chain.from_iterable(results):
330
335
  print(r)
331
-
332
336
 
333
- if __name__ == "__main__":
334
- logging.basicConfig(level=logging.DEBUG)
335
- asyncio.run(main())
337
+ logging.basicConfig(level=logging.DEBUG)
338
+
339
+ await main()
336
340
  ```
337
341
  It is important to note that the WEBS and AsyncWEBS classes should always be used as a context manager (with statement).
338
342
  This ensures proper resource management and cleanup, as the context manager will automatically handle opening and closing the HTTP client connection.
@@ -633,6 +637,6 @@ def chat(model_name, system_message="You are Jarvis"):# system prompt
633
637
  AI.chat()
634
638
 
635
639
  if __name__ == "__main__":
636
- model_name = "mistralai/Mistral-7B-Instruct-v0.1" # name of the model you wish to use It supports ALL text generation models on deepinfra.com.
640
+ model_name = "mistralai/Mistral-7B-Instruct-v0.2" # name of the model you wish to use It supports ALL text generation models on deepinfra.com.
637
641
  chat(model_name)
638
642
  ```
@@ -1,15 +1,15 @@
1
1
  from setuptools import setup, find_packages
2
2
 
3
- version = None
4
- with open("webscout/version.py") as version_file:
5
- exec(version_file.read())
3
+ # version = None
4
+ # with open("webscout/version.py") as version_file:
5
+ # exec(version_file.read())
6
6
 
7
7
  with open("README.md", encoding="utf-8") as f:
8
8
  README = f.read()
9
9
 
10
10
  setup(
11
11
  name="webscout",
12
- version="1.2.6",
12
+ version="1.2.9",
13
13
  description="Search for words, documents, images, videos, news, maps and text translation using the Google, DuckDuckGo.com, yep.com, phind.com, you.com, etc Also containes AI models and now can transcribe yt videos",
14
14
  long_description=README,
15
15
  long_description_content_type="text/markdown",
@@ -54,6 +54,7 @@ setup(
54
54
  "tiktoken",
55
55
  "tldextract",
56
56
  "orjson",
57
+ # "appdirs"
57
58
  ],
58
59
  entry_points={
59
60
  "console_scripts": [
@@ -9,7 +9,6 @@ from .webscout_search import WEBS
9
9
  from .webscout_search_async import AsyncWEBS
10
10
  from .version import __version__
11
11
  from .DWEBS import DeepWEBS
12
- from .AIutel import appdir
13
12
  from .transcriber import transcriber
14
13
 
15
14