webscout 1.2.6__tar.gz → 1.2.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- {webscout-1.2.6 → webscout-1.2.9}/PKG-INFO +20 -16
- {webscout-1.2.6 → webscout-1.2.9}/README.md +19 -15
- {webscout-1.2.6 → webscout-1.2.9}/setup.py +5 -4
- {webscout-1.2.6 → webscout-1.2.9}/webscout/__init__.py +0 -1
- {webscout-1.2.6 → webscout-1.2.9}/webscout/transcriber.py +496 -497
- webscout-1.2.9/webscout/version.py +2 -0
- {webscout-1.2.6 → webscout-1.2.9}/webscout.egg-info/PKG-INFO +20 -16
- webscout-1.2.6/webscout/version.py +0 -2
- {webscout-1.2.6 → webscout-1.2.9}/DeepWEBS/__init__.py +0 -0
- {webscout-1.2.6 → webscout-1.2.9}/DeepWEBS/documents/__init__.py +0 -0
- {webscout-1.2.6 → webscout-1.2.9}/DeepWEBS/documents/query_results_extractor.py +0 -0
- {webscout-1.2.6 → webscout-1.2.9}/DeepWEBS/documents/webpage_content_extractor.py +0 -0
- {webscout-1.2.6 → webscout-1.2.9}/DeepWEBS/networks/__init__.py +0 -0
- {webscout-1.2.6 → webscout-1.2.9}/DeepWEBS/networks/filepath_converter.py +0 -0
- {webscout-1.2.6 → webscout-1.2.9}/DeepWEBS/networks/google_searcher.py +0 -0
- {webscout-1.2.6 → webscout-1.2.9}/DeepWEBS/networks/network_configs.py +0 -0
- {webscout-1.2.6 → webscout-1.2.9}/DeepWEBS/networks/webpage_fetcher.py +0 -0
- {webscout-1.2.6 → webscout-1.2.9}/DeepWEBS/utilsdw/__init__.py +0 -0
- {webscout-1.2.6 → webscout-1.2.9}/DeepWEBS/utilsdw/enver.py +0 -0
- {webscout-1.2.6 → webscout-1.2.9}/DeepWEBS/utilsdw/logger.py +0 -0
- {webscout-1.2.6 → webscout-1.2.9}/LICENSE.md +0 -0
- {webscout-1.2.6 → webscout-1.2.9}/setup.cfg +0 -0
- {webscout-1.2.6 → webscout-1.2.9}/webscout/AI.py +0 -0
- {webscout-1.2.6 → webscout-1.2.9}/webscout/AIbase.py +0 -0
- {webscout-1.2.6 → webscout-1.2.9}/webscout/AIutel.py +0 -0
- {webscout-1.2.6 → webscout-1.2.9}/webscout/DWEBS.py +0 -0
- {webscout-1.2.6 → webscout-1.2.9}/webscout/HelpingAI.py +0 -0
- {webscout-1.2.6 → webscout-1.2.9}/webscout/LLM.py +0 -0
- {webscout-1.2.6 → webscout-1.2.9}/webscout/__main__.py +0 -0
- {webscout-1.2.6 → webscout-1.2.9}/webscout/cli.py +0 -0
- {webscout-1.2.6 → webscout-1.2.9}/webscout/exceptions.py +0 -0
- {webscout-1.2.6 → webscout-1.2.9}/webscout/models.py +0 -0
- {webscout-1.2.6 → webscout-1.2.9}/webscout/utils.py +0 -0
- {webscout-1.2.6 → webscout-1.2.9}/webscout/webscout_search.py +0 -0
- {webscout-1.2.6 → webscout-1.2.9}/webscout/webscout_search_async.py +0 -0
- {webscout-1.2.6 → webscout-1.2.9}/webscout.egg-info/SOURCES.txt +0 -0
- {webscout-1.2.6 → webscout-1.2.9}/webscout.egg-info/dependency_links.txt +0 -0
- {webscout-1.2.6 → webscout-1.2.9}/webscout.egg-info/entry_points.txt +0 -0
- {webscout-1.2.6 → webscout-1.2.9}/webscout.egg-info/requires.txt +0 -0
- {webscout-1.2.6 → webscout-1.2.9}/webscout.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: webscout
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.9
|
|
4
4
|
Summary: Search for words, documents, images, videos, news, maps and text translation using the Google, DuckDuckGo.com, yep.com, phind.com, you.com, etc Also containes AI models and now can transcribe yt videos
|
|
5
5
|
Author: OEvortex
|
|
6
6
|
Author-email: helpingai5@gmail.com
|
|
@@ -229,17 +229,23 @@ def extract_transcript(video_id):
|
|
|
229
229
|
try:
|
|
230
230
|
transcript_list = transcriber.list_transcripts(video_id)
|
|
231
231
|
for transcript in transcript_list:
|
|
232
|
-
|
|
232
|
+
transcript_data_list = transcript.fetch()
|
|
233
233
|
lang = transcript.language
|
|
234
234
|
transcript_text = ""
|
|
235
235
|
if transcript.language_code == 'en':
|
|
236
|
-
for line in
|
|
237
|
-
|
|
236
|
+
for line in transcript_data_list:
|
|
237
|
+
start_time = line['start']
|
|
238
|
+
end_time = start_time + line['duration']
|
|
239
|
+
formatted_line = f"{start_time:.2f} - {end_time:.2f}: {line['text']}\n"
|
|
240
|
+
transcript_text += formatted_line
|
|
238
241
|
return transcript_text
|
|
239
242
|
elif transcript.is_translatable:
|
|
240
243
|
english_transcript_list = transcript.translate('en').fetch()
|
|
241
244
|
for line in english_transcript_list:
|
|
242
|
-
|
|
245
|
+
start_time = line['start']
|
|
246
|
+
end_time = start_time + line['duration']
|
|
247
|
+
formatted_line = f"{start_time:.2f} - {end_time:.2f}: {line['text']}\n"
|
|
248
|
+
transcript_text += formatted_line
|
|
243
249
|
return transcript_text
|
|
244
250
|
print("Transcript extraction failed. Please check the video URL.")
|
|
245
251
|
except Exception as e:
|
|
@@ -351,11 +357,12 @@ import logging
|
|
|
351
357
|
import sys
|
|
352
358
|
from itertools import chain
|
|
353
359
|
from random import shuffle
|
|
354
|
-
|
|
355
360
|
import requests
|
|
356
361
|
from webscout import AsyncWEBS
|
|
357
362
|
|
|
358
|
-
#
|
|
363
|
+
# If you have proxies, define them here
|
|
364
|
+
proxies = None
|
|
365
|
+
|
|
359
366
|
if sys.platform.lower().startswith("win"):
|
|
360
367
|
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
|
|
361
368
|
|
|
@@ -367,24 +374,21 @@ def get_words():
|
|
|
367
374
|
|
|
368
375
|
async def aget_results(word):
|
|
369
376
|
async with AsyncWEBS(proxies=proxies) as WEBS:
|
|
370
|
-
results =
|
|
377
|
+
results = await WEBS.text(word, max_results=None)
|
|
371
378
|
return results
|
|
372
379
|
|
|
373
380
|
async def main():
|
|
374
381
|
words = get_words()
|
|
375
382
|
shuffle(words)
|
|
376
|
-
tasks = []
|
|
377
|
-
for word in words[:10]:
|
|
378
|
-
tasks.append(aget_results(word))
|
|
383
|
+
tasks = [aget_results(word) for word in words[:10]]
|
|
379
384
|
results = await asyncio.gather(*tasks)
|
|
380
385
|
print(f"Done")
|
|
381
386
|
for r in chain.from_iterable(results):
|
|
382
387
|
print(r)
|
|
383
|
-
|
|
384
388
|
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
389
|
+
logging.basicConfig(level=logging.DEBUG)
|
|
390
|
+
|
|
391
|
+
await main()
|
|
388
392
|
```
|
|
389
393
|
It is important to note that the WEBS and AsyncWEBS classes should always be used as a context manager (with statement).
|
|
390
394
|
This ensures proper resource management and cleanup, as the context manager will automatically handle opening and closing the HTTP client connection.
|
|
@@ -685,6 +689,6 @@ def chat(model_name, system_message="You are Jarvis"):# system prompt
|
|
|
685
689
|
AI.chat()
|
|
686
690
|
|
|
687
691
|
if __name__ == "__main__":
|
|
688
|
-
model_name = "mistralai/Mistral-7B-Instruct-v0.
|
|
692
|
+
model_name = "mistralai/Mistral-7B-Instruct-v0.2" # name of the model you wish to use It supports ALL text generation models on deepinfra.com.
|
|
689
693
|
chat(model_name)
|
|
690
694
|
```
|
|
@@ -177,17 +177,23 @@ def extract_transcript(video_id):
|
|
|
177
177
|
try:
|
|
178
178
|
transcript_list = transcriber.list_transcripts(video_id)
|
|
179
179
|
for transcript in transcript_list:
|
|
180
|
-
|
|
180
|
+
transcript_data_list = transcript.fetch()
|
|
181
181
|
lang = transcript.language
|
|
182
182
|
transcript_text = ""
|
|
183
183
|
if transcript.language_code == 'en':
|
|
184
|
-
for line in
|
|
185
|
-
|
|
184
|
+
for line in transcript_data_list:
|
|
185
|
+
start_time = line['start']
|
|
186
|
+
end_time = start_time + line['duration']
|
|
187
|
+
formatted_line = f"{start_time:.2f} - {end_time:.2f}: {line['text']}\n"
|
|
188
|
+
transcript_text += formatted_line
|
|
186
189
|
return transcript_text
|
|
187
190
|
elif transcript.is_translatable:
|
|
188
191
|
english_transcript_list = transcript.translate('en').fetch()
|
|
189
192
|
for line in english_transcript_list:
|
|
190
|
-
|
|
193
|
+
start_time = line['start']
|
|
194
|
+
end_time = start_time + line['duration']
|
|
195
|
+
formatted_line = f"{start_time:.2f} - {end_time:.2f}: {line['text']}\n"
|
|
196
|
+
transcript_text += formatted_line
|
|
191
197
|
return transcript_text
|
|
192
198
|
print("Transcript extraction failed. Please check the video URL.")
|
|
193
199
|
except Exception as e:
|
|
@@ -299,11 +305,12 @@ import logging
|
|
|
299
305
|
import sys
|
|
300
306
|
from itertools import chain
|
|
301
307
|
from random import shuffle
|
|
302
|
-
|
|
303
308
|
import requests
|
|
304
309
|
from webscout import AsyncWEBS
|
|
305
310
|
|
|
306
|
-
#
|
|
311
|
+
# If you have proxies, define them here
|
|
312
|
+
proxies = None
|
|
313
|
+
|
|
307
314
|
if sys.platform.lower().startswith("win"):
|
|
308
315
|
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
|
|
309
316
|
|
|
@@ -315,24 +322,21 @@ def get_words():
|
|
|
315
322
|
|
|
316
323
|
async def aget_results(word):
|
|
317
324
|
async with AsyncWEBS(proxies=proxies) as WEBS:
|
|
318
|
-
results =
|
|
325
|
+
results = await WEBS.text(word, max_results=None)
|
|
319
326
|
return results
|
|
320
327
|
|
|
321
328
|
async def main():
|
|
322
329
|
words = get_words()
|
|
323
330
|
shuffle(words)
|
|
324
|
-
tasks = []
|
|
325
|
-
for word in words[:10]:
|
|
326
|
-
tasks.append(aget_results(word))
|
|
331
|
+
tasks = [aget_results(word) for word in words[:10]]
|
|
327
332
|
results = await asyncio.gather(*tasks)
|
|
328
333
|
print(f"Done")
|
|
329
334
|
for r in chain.from_iterable(results):
|
|
330
335
|
print(r)
|
|
331
|
-
|
|
332
336
|
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
337
|
+
logging.basicConfig(level=logging.DEBUG)
|
|
338
|
+
|
|
339
|
+
await main()
|
|
336
340
|
```
|
|
337
341
|
It is important to note that the WEBS and AsyncWEBS classes should always be used as a context manager (with statement).
|
|
338
342
|
This ensures proper resource management and cleanup, as the context manager will automatically handle opening and closing the HTTP client connection.
|
|
@@ -633,6 +637,6 @@ def chat(model_name, system_message="You are Jarvis"):# system prompt
|
|
|
633
637
|
AI.chat()
|
|
634
638
|
|
|
635
639
|
if __name__ == "__main__":
|
|
636
|
-
model_name = "mistralai/Mistral-7B-Instruct-v0.
|
|
640
|
+
model_name = "mistralai/Mistral-7B-Instruct-v0.2" # name of the model you wish to use It supports ALL text generation models on deepinfra.com.
|
|
637
641
|
chat(model_name)
|
|
638
642
|
```
|
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
from setuptools import setup, find_packages
|
|
2
2
|
|
|
3
|
-
version = None
|
|
4
|
-
with open("webscout/version.py") as version_file:
|
|
5
|
-
|
|
3
|
+
# version = None
|
|
4
|
+
# with open("webscout/version.py") as version_file:
|
|
5
|
+
# exec(version_file.read())
|
|
6
6
|
|
|
7
7
|
with open("README.md", encoding="utf-8") as f:
|
|
8
8
|
README = f.read()
|
|
9
9
|
|
|
10
10
|
setup(
|
|
11
11
|
name="webscout",
|
|
12
|
-
version="1.2.
|
|
12
|
+
version="1.2.9",
|
|
13
13
|
description="Search for words, documents, images, videos, news, maps and text translation using the Google, DuckDuckGo.com, yep.com, phind.com, you.com, etc Also containes AI models and now can transcribe yt videos",
|
|
14
14
|
long_description=README,
|
|
15
15
|
long_description_content_type="text/markdown",
|
|
@@ -54,6 +54,7 @@ setup(
|
|
|
54
54
|
"tiktoken",
|
|
55
55
|
"tldextract",
|
|
56
56
|
"orjson",
|
|
57
|
+
# "appdirs"
|
|
57
58
|
],
|
|
58
59
|
entry_points={
|
|
59
60
|
"console_scripts": [
|