webscout 2025.10.15__py3-none-any.whl → 2025.10.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout/Extra/YTToolkit/README.md +1 -1
- webscout/Extra/tempmail/README.md +3 -3
- webscout/Provider/OPENAI/README.md +1 -1
- webscout/Provider/TTI/bing.py +4 -4
- webscout/__init__.py +1 -1
- webscout/client.py +4 -5
- webscout/litprinter/__init__.py +0 -42
- webscout/scout/README.md +59 -8
- webscout/scout/core/scout.py +62 -0
- webscout/scout/element.py +251 -45
- webscout/search/__init__.py +3 -4
- webscout/search/engines/bing/images.py +5 -2
- webscout/search/engines/bing/news.py +6 -4
- webscout/search/engines/bing/text.py +5 -2
- webscout/search/engines/yahoo/__init__.py +41 -0
- webscout/search/engines/yahoo/answers.py +16 -0
- webscout/search/engines/yahoo/base.py +34 -0
- webscout/search/engines/yahoo/images.py +324 -0
- webscout/search/engines/yahoo/maps.py +16 -0
- webscout/search/engines/yahoo/news.py +258 -0
- webscout/search/engines/yahoo/suggestions.py +140 -0
- webscout/search/engines/yahoo/text.py +273 -0
- webscout/search/engines/yahoo/translate.py +16 -0
- webscout/search/engines/yahoo/videos.py +302 -0
- webscout/search/engines/yahoo/weather.py +220 -0
- webscout/search/http_client.py +1 -1
- webscout/search/yahoo_main.py +54 -0
- webscout/{auth → server}/__init__.py +2 -23
- webscout/server/config.py +84 -0
- webscout/{auth → server}/request_processing.py +3 -28
- webscout/{auth → server}/routes.py +6 -148
- webscout/server/schemas.py +23 -0
- webscout/{auth → server}/server.py +11 -43
- webscout/server/simple_logger.py +84 -0
- webscout/version.py +1 -1
- webscout/version.py.bak +1 -1
- webscout/zeroart/README.md +17 -9
- webscout/zeroart/__init__.py +78 -6
- webscout/zeroart/effects.py +51 -1
- webscout/zeroart/fonts.py +559 -1
- {webscout-2025.10.15.dist-info → webscout-2025.10.16.dist-info}/METADATA +10 -52
- {webscout-2025.10.15.dist-info → webscout-2025.10.16.dist-info}/RECORD +49 -45
- {webscout-2025.10.15.dist-info → webscout-2025.10.16.dist-info}/entry_points.txt +1 -1
- webscout/auth/api_key_manager.py +0 -189
- webscout/auth/auth_system.py +0 -85
- webscout/auth/config.py +0 -175
- webscout/auth/database.py +0 -755
- webscout/auth/middleware.py +0 -248
- webscout/auth/models.py +0 -185
- webscout/auth/rate_limiter.py +0 -254
- webscout/auth/schemas.py +0 -103
- webscout/auth/simple_logger.py +0 -236
- webscout/search/engines/yahoo.py +0 -65
- webscout/search/engines/yahoo_news.py +0 -64
- /webscout/{auth → server}/exceptions.py +0 -0
- /webscout/{auth → server}/providers.py +0 -0
- /webscout/{auth → server}/request_models.py +0 -0
- {webscout-2025.10.15.dist-info → webscout-2025.10.16.dist-info}/WHEEL +0 -0
- {webscout-2025.10.15.dist-info → webscout-2025.10.16.dist-info}/licenses/LICENSE.md +0 -0
- {webscout-2025.10.15.dist-info → webscout-2025.10.16.dist-info}/top_level.txt +0 -0
webscout/auth/schemas.py
DELETED
|
@@ -1,103 +0,0 @@
|
|
|
1
|
-
# webscout/auth/schemas.py
|
|
2
|
-
|
|
3
|
-
from datetime import datetime
|
|
4
|
-
from typing import Optional, Dict, Any
|
|
5
|
-
from pydantic import BaseModel, Field
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class APIKeyCreateRequest(BaseModel):
|
|
9
|
-
"""Request model for creating a new API key."""
|
|
10
|
-
username: str = Field(..., min_length=3, max_length=50, description="Username for the API key owner (required)")
|
|
11
|
-
telegram_id: str = Field(..., min_length=1, description="Telegram user ID (required)")
|
|
12
|
-
name: Optional[str] = Field(None, description="Optional name for the API key")
|
|
13
|
-
rate_limit: Optional[int] = Field(10, description="Rate limit per minute (default: 10)")
|
|
14
|
-
expires_in_days: Optional[int] = Field(None, description="Number of days until expiration")
|
|
15
|
-
metadata: Optional[Dict[str, Any]] = Field(default_factory=dict, description="Additional metadata")
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
class APIKeyCreateResponse(BaseModel):
|
|
19
|
-
"""Response model for API key creation."""
|
|
20
|
-
api_key: str = Field(..., description="The generated API key")
|
|
21
|
-
key_id: str = Field(..., description="Unique identifier for the API key")
|
|
22
|
-
user_id: str = Field(..., description="User ID associated with the API key")
|
|
23
|
-
name: Optional[str] = Field(None, description="Name of the API key")
|
|
24
|
-
created_at: datetime = Field(..., description="Creation timestamp")
|
|
25
|
-
expires_at: Optional[datetime] = Field(None, description="Expiration timestamp")
|
|
26
|
-
rate_limit: int = Field(..., description="Rate limit per minute")
|
|
27
|
-
|
|
28
|
-
class Config:
|
|
29
|
-
json_encoders = {
|
|
30
|
-
datetime: lambda v: v.isoformat()
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
class APIKeyValidationResponse(BaseModel):
|
|
35
|
-
"""Response model for API key validation."""
|
|
36
|
-
valid: bool = Field(..., description="Whether the API key is valid")
|
|
37
|
-
user_id: Optional[str] = Field(None, description="User ID if key is valid")
|
|
38
|
-
key_id: Optional[str] = Field(None, description="Key ID if key is valid")
|
|
39
|
-
rate_limit: Optional[int] = Field(None, description="Rate limit per minute")
|
|
40
|
-
usage_count: Optional[int] = Field(None, description="Total usage count")
|
|
41
|
-
last_used_at: Optional[datetime] = Field(None, description="Last usage timestamp")
|
|
42
|
-
error: Optional[str] = Field(None, description="Error message if key is invalid")
|
|
43
|
-
|
|
44
|
-
class Config:
|
|
45
|
-
json_encoders = {
|
|
46
|
-
datetime: lambda v: v.isoformat()
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
class UserCreateRequest(BaseModel):
|
|
51
|
-
"""Request model for creating a new user."""
|
|
52
|
-
username: str = Field(..., min_length=3, max_length=50, description="Username for the new user")
|
|
53
|
-
telegram_id: str = Field(..., min_length=1, description="Telegram user ID (required)")
|
|
54
|
-
metadata: Optional[Dict[str, Any]] = Field(default_factory=dict, description="Additional metadata")
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
class UserResponse(BaseModel):
|
|
58
|
-
"""Response model for user information."""
|
|
59
|
-
id: str = Field(..., description="User ID")
|
|
60
|
-
username: str = Field(..., description="Username")
|
|
61
|
-
telegram_id: str = Field(..., description="Telegram user ID")
|
|
62
|
-
created_at: datetime = Field(..., description="Creation timestamp")
|
|
63
|
-
updated_at: datetime = Field(..., description="Last update timestamp")
|
|
64
|
-
is_active: bool = Field(..., description="Whether the user is active")
|
|
65
|
-
metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional metadata")
|
|
66
|
-
|
|
67
|
-
class Config:
|
|
68
|
-
json_encoders = {
|
|
69
|
-
datetime: lambda v: v.isoformat()
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
class RateLimitStatus(BaseModel):
|
|
74
|
-
"""Response model for rate limit status."""
|
|
75
|
-
allowed: bool = Field(..., description="Whether the request is allowed")
|
|
76
|
-
limit: int = Field(..., description="Rate limit per minute")
|
|
77
|
-
remaining: int = Field(..., description="Remaining requests in current window")
|
|
78
|
-
reset_at: datetime = Field(..., description="When the rate limit resets")
|
|
79
|
-
retry_after: Optional[int] = Field(None, description="Seconds to wait before retry")
|
|
80
|
-
|
|
81
|
-
class Config:
|
|
82
|
-
json_encoders = {
|
|
83
|
-
datetime: lambda v: v.isoformat()
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
class ErrorResponse(BaseModel):
|
|
88
|
-
"""Standard error response model."""
|
|
89
|
-
error: str = Field(..., description="Error message")
|
|
90
|
-
code: str = Field(..., description="Error code")
|
|
91
|
-
details: Optional[Dict[str, Any]] = Field(None, description="Additional error details")
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
class HealthCheckResponse(BaseModel):
|
|
95
|
-
"""Health check response model."""
|
|
96
|
-
status: str = Field(..., description="Service status")
|
|
97
|
-
database: str = Field(..., description="Database status")
|
|
98
|
-
timestamp: datetime = Field(..., description="Check timestamp")
|
|
99
|
-
|
|
100
|
-
class Config:
|
|
101
|
-
json_encoders = {
|
|
102
|
-
datetime: lambda v: v.isoformat()
|
|
103
|
-
}
|
webscout/auth/simple_logger.py
DELETED
|
@@ -1,236 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Simple database logging for no-auth mode.
|
|
3
|
-
Logs API requests directly to Supabase without authentication.
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
import os
|
|
7
|
-
import uuid
|
|
8
|
-
import asyncio
|
|
9
|
-
from datetime import datetime, timezone
|
|
10
|
-
from typing import Optional, Dict, Any
|
|
11
|
-
import json
|
|
12
|
-
|
|
13
|
-
from webscout.Litlogger import Logger, LogLevel, LogFormat, ConsoleHandler
|
|
14
|
-
import sys
|
|
15
|
-
|
|
16
|
-
# Setup logger
|
|
17
|
-
logger = Logger(
|
|
18
|
-
name="webscout.api.simple_db",
|
|
19
|
-
level=LogLevel.INFO,
|
|
20
|
-
handlers=[ConsoleHandler(stream=sys.stdout)],
|
|
21
|
-
fmt=LogFormat.DEFAULT
|
|
22
|
-
)
|
|
23
|
-
|
|
24
|
-
try:
|
|
25
|
-
from supabase import create_client, Client
|
|
26
|
-
SUPABASE_AVAILABLE = True
|
|
27
|
-
except ImportError:
|
|
28
|
-
logger.warning("Supabase not available. Install with: pip install supabase")
|
|
29
|
-
SUPABASE_AVAILABLE = False
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
class SimpleRequestLogger:
|
|
33
|
-
"""Simple request logger for no-auth mode."""
|
|
34
|
-
|
|
35
|
-
def __init__(self):
|
|
36
|
-
self.supabase_client: Optional[Client] = None
|
|
37
|
-
self.initialize_supabase()
|
|
38
|
-
|
|
39
|
-
def initialize_supabase(self):
|
|
40
|
-
"""Initialize Supabase client if credentials are available."""
|
|
41
|
-
if not SUPABASE_AVAILABLE:
|
|
42
|
-
logger.warning("Supabase package not installed. Request logging disabled.")
|
|
43
|
-
return
|
|
44
|
-
|
|
45
|
-
supabase_url = os.getenv("SUPABASE_URL")
|
|
46
|
-
supabase_key = os.getenv("SUPABASE_ANON_KEY")
|
|
47
|
-
|
|
48
|
-
if supabase_url and supabase_key:
|
|
49
|
-
try:
|
|
50
|
-
self.supabase_client = create_client(supabase_url, supabase_key)
|
|
51
|
-
logger.info("Supabase client initialized for request logging")
|
|
52
|
-
|
|
53
|
-
except Exception as e:
|
|
54
|
-
logger.error(f"Failed to initialize Supabase client: {e}")
|
|
55
|
-
self.supabase_client = None
|
|
56
|
-
else:
|
|
57
|
-
logger.info("Supabase credentials not found. Request logging disabled.")
|
|
58
|
-
|
|
59
|
-
async def log_request(
|
|
60
|
-
self,
|
|
61
|
-
request_id: str,
|
|
62
|
-
ip_address: str,
|
|
63
|
-
model: str,
|
|
64
|
-
question: str,
|
|
65
|
-
answer: str,
|
|
66
|
-
provider: Optional[str] = None,
|
|
67
|
-
request_time: Optional[datetime] = None,
|
|
68
|
-
response_time: Optional[datetime] = None,
|
|
69
|
-
processing_time_ms: Optional[float] = None,
|
|
70
|
-
tokens_used: Optional[int] = None,
|
|
71
|
-
error: Optional[str] = None,
|
|
72
|
-
user_agent: Optional[str] = None
|
|
73
|
-
) -> bool:
|
|
74
|
-
"""
|
|
75
|
-
Log API request details to Supabase.
|
|
76
|
-
|
|
77
|
-
Args:
|
|
78
|
-
request_id: Unique identifier for the request
|
|
79
|
-
ip_address: Client IP address
|
|
80
|
-
model: Model used for the request
|
|
81
|
-
question: User's question/prompt
|
|
82
|
-
answer: AI's response
|
|
83
|
-
provider: Provider used (e.g., ChatGPT, Claude, etc.)
|
|
84
|
-
request_time: When the request was received
|
|
85
|
-
response_time: When the response was sent
|
|
86
|
-
processing_time_ms: Processing time in milliseconds
|
|
87
|
-
tokens_used: Number of tokens consumed
|
|
88
|
-
error: Error message if any
|
|
89
|
-
user_agent: User agent string
|
|
90
|
-
|
|
91
|
-
Returns:
|
|
92
|
-
bool: True if logged successfully, False otherwise
|
|
93
|
-
"""
|
|
94
|
-
if not self.supabase_client:
|
|
95
|
-
# Still log to console for debugging
|
|
96
|
-
logger.info(f"Request {request_id}: {model} - {question[:100]}...")
|
|
97
|
-
return False
|
|
98
|
-
|
|
99
|
-
if not request_time:
|
|
100
|
-
request_time = datetime.now(timezone.utc)
|
|
101
|
-
|
|
102
|
-
if not response_time:
|
|
103
|
-
response_time = datetime.now(timezone.utc)
|
|
104
|
-
|
|
105
|
-
try:
|
|
106
|
-
data = {
|
|
107
|
-
"request_id": request_id,
|
|
108
|
-
"ip_address": ip_address,
|
|
109
|
-
"model": model,
|
|
110
|
-
"provider": provider or "unknown",
|
|
111
|
-
"question": question[:2000] if question else "", # Truncate long questions
|
|
112
|
-
"answer": answer[:5000] if answer else "", # Truncate long answers
|
|
113
|
-
"request_time": request_time.isoformat(),
|
|
114
|
-
"response_time": response_time.isoformat(),
|
|
115
|
-
"processing_time_ms": processing_time_ms,
|
|
116
|
-
"tokens_used": tokens_used,
|
|
117
|
-
"error": error[:1000] if error else None, # Truncate long errors
|
|
118
|
-
"user_agent": user_agent[:500] if user_agent else None,
|
|
119
|
-
"created_at": datetime.now(timezone.utc).isoformat()
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
result = self.supabase_client.table("api_requests").insert(data).execute()
|
|
123
|
-
|
|
124
|
-
if result.data:
|
|
125
|
-
logger.info(f"✅ Request {request_id} logged to database")
|
|
126
|
-
return True
|
|
127
|
-
else:
|
|
128
|
-
logger.error(f"❌ Failed to log request {request_id}: No data returned")
|
|
129
|
-
return False
|
|
130
|
-
|
|
131
|
-
except Exception as e:
|
|
132
|
-
logger.error(f"❌ Failed to log request {request_id}: {e}")
|
|
133
|
-
return False
|
|
134
|
-
|
|
135
|
-
async def get_recent_requests(self, limit: int = 10) -> Dict[str, Any]:
|
|
136
|
-
"""Get recent API requests for monitoring."""
|
|
137
|
-
if not self.supabase_client:
|
|
138
|
-
return {"error": "Database not available", "requests": []}
|
|
139
|
-
|
|
140
|
-
try:
|
|
141
|
-
result = self.supabase_client.table("api_requests")\
|
|
142
|
-
.select("request_id, ip_address, model, provider, created_at, processing_time_ms, error")\
|
|
143
|
-
.order("created_at", desc=True)\
|
|
144
|
-
.limit(limit)\
|
|
145
|
-
.execute()
|
|
146
|
-
|
|
147
|
-
return {
|
|
148
|
-
"requests": result.data if result.data else [],
|
|
149
|
-
"count": len(result.data) if result.data else 0
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
except Exception as e:
|
|
153
|
-
logger.error(f"Failed to get recent requests: {e}")
|
|
154
|
-
return {"error": str(e), "requests": []}
|
|
155
|
-
|
|
156
|
-
async def get_stats(self) -> Dict[str, Any]:
|
|
157
|
-
"""Get basic statistics about API usage."""
|
|
158
|
-
if not self.supabase_client:
|
|
159
|
-
return {"error": "Database not available"}
|
|
160
|
-
|
|
161
|
-
try:
|
|
162
|
-
# Get total requests today
|
|
163
|
-
today = datetime.now(timezone.utc).date().isoformat()
|
|
164
|
-
|
|
165
|
-
today_requests = self.supabase_client.table("api_requests")\
|
|
166
|
-
.select("request_id", count="exact")\
|
|
167
|
-
.gte("created_at", f"{today}T00:00:00Z")\
|
|
168
|
-
.execute()
|
|
169
|
-
|
|
170
|
-
# Get requests by model (last 100)
|
|
171
|
-
model_requests = self.supabase_client.table("api_requests")\
|
|
172
|
-
.select("model")\
|
|
173
|
-
.order("created_at", desc=True)\
|
|
174
|
-
.limit(100)\
|
|
175
|
-
.execute()
|
|
176
|
-
|
|
177
|
-
model_counts = {}
|
|
178
|
-
if model_requests.data:
|
|
179
|
-
for req in model_requests.data:
|
|
180
|
-
model = req.get("model", "unknown")
|
|
181
|
-
model_counts[model] = model_counts.get(model, 0) + 1
|
|
182
|
-
|
|
183
|
-
return {
|
|
184
|
-
"today_requests": today_requests.count if hasattr(today_requests, 'count') else 0,
|
|
185
|
-
"model_usage": model_counts,
|
|
186
|
-
"available": True
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
except Exception as e:
|
|
190
|
-
logger.error(f"Failed to get stats: {e}")
|
|
191
|
-
return {"error": str(e), "available": False}
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
# Global instance
|
|
195
|
-
request_logger = SimpleRequestLogger()
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
async def log_api_request(
|
|
199
|
-
request_id: str,
|
|
200
|
-
ip_address: str,
|
|
201
|
-
model: str,
|
|
202
|
-
question: str,
|
|
203
|
-
answer: str,
|
|
204
|
-
**kwargs
|
|
205
|
-
) -> bool:
|
|
206
|
-
"""Convenience function to log API requests."""
|
|
207
|
-
return await request_logger.log_request(
|
|
208
|
-
request_id=request_id,
|
|
209
|
-
ip_address=ip_address,
|
|
210
|
-
model=model,
|
|
211
|
-
question=question,
|
|
212
|
-
answer=answer,
|
|
213
|
-
**kwargs
|
|
214
|
-
)
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
def get_client_ip(request) -> str:
|
|
218
|
-
"""Extract client IP address from request."""
|
|
219
|
-
# Check for X-Forwarded-For header (common with proxies/load balancers)
|
|
220
|
-
forwarded_for = request.headers.get("X-Forwarded-For")
|
|
221
|
-
if forwarded_for:
|
|
222
|
-
# Take the first IP in the chain
|
|
223
|
-
return forwarded_for.split(",")[0].strip()
|
|
224
|
-
|
|
225
|
-
# Check for X-Real-IP header
|
|
226
|
-
real_ip = request.headers.get("X-Real-IP")
|
|
227
|
-
if real_ip:
|
|
228
|
-
return real_ip.strip()
|
|
229
|
-
|
|
230
|
-
# Fall back to direct client IP
|
|
231
|
-
return getattr(request.client, "host", "unknown")
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
def generate_request_id() -> str:
|
|
235
|
-
"""Generate a unique request ID."""
|
|
236
|
-
return str(uuid.uuid4())
|
webscout/search/engines/yahoo.py
DELETED
|
@@ -1,65 +0,0 @@
|
|
|
1
|
-
"""Yahoo search engine."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
from collections.abc import Mapping
|
|
6
|
-
from secrets import token_urlsafe
|
|
7
|
-
from typing import Any
|
|
8
|
-
from urllib.parse import unquote_plus
|
|
9
|
-
|
|
10
|
-
from ..base import BaseSearchEngine
|
|
11
|
-
from ..results import TextResult
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def extract_url(u: str) -> str:
|
|
15
|
-
"""Sanitize url."""
|
|
16
|
-
if "/RU=" in u:
|
|
17
|
-
start = u.find("/RU=") + 4
|
|
18
|
-
end = u.find("/RK=", start)
|
|
19
|
-
if end == -1:
|
|
20
|
-
end = len(u)
|
|
21
|
-
return unquote_plus(u[start:end])
|
|
22
|
-
return u
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
class Yahoo(BaseSearchEngine[TextResult]):
|
|
26
|
-
"""Yahoo search engine."""
|
|
27
|
-
|
|
28
|
-
name = "yahoo"
|
|
29
|
-
category = "text"
|
|
30
|
-
provider = "bing"
|
|
31
|
-
|
|
32
|
-
search_url = "https://search.yahoo.com/search"
|
|
33
|
-
search_method = "GET"
|
|
34
|
-
|
|
35
|
-
items_xpath = "//div[contains(@class, 'relsrch')]"
|
|
36
|
-
elements_xpath: Mapping[str, str] = {
|
|
37
|
-
"title": ".//div[contains(@class, 'Title')]//h3//text()",
|
|
38
|
-
"href": ".//div[contains(@class, 'Title')]//a/@href",
|
|
39
|
-
"body": ".//div[contains(@class, 'Text')]//text()",
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
def build_payload(
|
|
43
|
-
self, query: str, region: str, safesearch: str, timelimit: str | None, page: int = 1, **kwargs: Any
|
|
44
|
-
) -> dict[str, Any]:
|
|
45
|
-
"""Build a payload for the search request."""
|
|
46
|
-
self.search_url = (
|
|
47
|
-
f"https://search.yahoo.com/search;_ylt={token_urlsafe(24 * 3 // 4)};_ylu={token_urlsafe(47 * 3 // 4)}"
|
|
48
|
-
)
|
|
49
|
-
payload = {"p": query}
|
|
50
|
-
if page > 1:
|
|
51
|
-
payload["b"] = f"{(page - 1) * 7 + 1}"
|
|
52
|
-
if timelimit:
|
|
53
|
-
payload["btf"] = timelimit
|
|
54
|
-
return payload
|
|
55
|
-
|
|
56
|
-
def post_extract_results(self, results: list[TextResult]) -> list[TextResult]:
|
|
57
|
-
"""Post-process search results."""
|
|
58
|
-
post_results = []
|
|
59
|
-
for result in results:
|
|
60
|
-
if result.href.startswith("https://www.bing.com/aclick?"):
|
|
61
|
-
continue
|
|
62
|
-
if "/RU=" in result.href:
|
|
63
|
-
result.href = extract_url(result.href)
|
|
64
|
-
post_results.append(result)
|
|
65
|
-
return post_results
|
|
@@ -1,64 +0,0 @@
|
|
|
1
|
-
"""Yahoo news search engine."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
from collections.abc import Mapping
|
|
6
|
-
from secrets import token_urlsafe
|
|
7
|
-
from typing import Any
|
|
8
|
-
|
|
9
|
-
from ..base import BaseSearchEngine
|
|
10
|
-
from ..results import NewsResult
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
def extract_image(u: str) -> str:
|
|
14
|
-
"""Sanitize image url."""
|
|
15
|
-
if u and u.startswith("data:image"):
|
|
16
|
-
return ""
|
|
17
|
-
return u
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def extract_source(s: str) -> str:
|
|
21
|
-
"""Remove ' via Yahoo' from string."""
|
|
22
|
-
return s.replace(" via Yahoo", "") if s else s
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
class YahooNews(BaseSearchEngine[NewsResult]):
|
|
26
|
-
"""Yahoo news search engine."""
|
|
27
|
-
|
|
28
|
-
name = "yahoo"
|
|
29
|
-
category = "news"
|
|
30
|
-
provider = "bing"
|
|
31
|
-
|
|
32
|
-
search_url = "https://news.search.yahoo.com/search"
|
|
33
|
-
search_method = "GET"
|
|
34
|
-
|
|
35
|
-
items_xpath = "//div[contains(@class, 'NewsArticle')]"
|
|
36
|
-
elements_xpath: Mapping[str, str] = {
|
|
37
|
-
"date": ".//span[contains(@class, 'fc-2nd')]//text()",
|
|
38
|
-
"title": ".//h4//a//text()",
|
|
39
|
-
"url": ".//h4//a/@href",
|
|
40
|
-
"body": ".//p//text()",
|
|
41
|
-
"image": ".//img/@src",
|
|
42
|
-
"source": ".//span[contains(@class, 's-source')]//text()",
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
def build_payload(
|
|
46
|
-
self, query: str, region: str, safesearch: str, timelimit: str | None, page: int = 1, **kwargs: Any
|
|
47
|
-
) -> dict[str, Any]:
|
|
48
|
-
"""Build a payload for the search request."""
|
|
49
|
-
self.search_url = (
|
|
50
|
-
f"https://news.search.yahoo.com/search;_ylt={token_urlsafe(24 * 3 // 4)};_ylu={token_urlsafe(47 * 3 // 4)}"
|
|
51
|
-
)
|
|
52
|
-
payload = {"p": query}
|
|
53
|
-
if page > 1:
|
|
54
|
-
payload["b"] = f"{(page - 1) * 10 + 1}"
|
|
55
|
-
if timelimit:
|
|
56
|
-
payload["btf"] = timelimit
|
|
57
|
-
return payload
|
|
58
|
-
|
|
59
|
-
def post_extract_results(self, results: list[NewsResult]) -> list[NewsResult]:
|
|
60
|
-
"""Post-process search results."""
|
|
61
|
-
for result in results:
|
|
62
|
-
result.image = extract_image(result.image)
|
|
63
|
-
result.source = extract_source(result.source)
|
|
64
|
-
return results
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|