spotifyscraper 2.0.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spotify_scraper/__init__.py +92 -0
- spotify_scraper/__main__.py +10 -0
- spotify_scraper/auth/__init__.py +101 -0
- spotify_scraper/auth/session.py +269 -0
- spotify_scraper/browsers/__init__.py +73 -0
- spotify_scraper/browsers/base.py +206 -0
- spotify_scraper/browsers/requests_browser.py +289 -0
- spotify_scraper/browsers/selenium_browser.py +203 -0
- spotify_scraper/cli/__init__.py +116 -0
- spotify_scraper/cli/commands/__init__.py +10 -0
- spotify_scraper/cli/commands/album.py +157 -0
- spotify_scraper/cli/commands/artist.py +191 -0
- spotify_scraper/cli/commands/download.py +402 -0
- spotify_scraper/cli/commands/playlist.py +226 -0
- spotify_scraper/cli/commands/track.py +143 -0
- spotify_scraper/cli/utils.py +280 -0
- spotify_scraper/client.py +728 -0
- spotify_scraper/config_manager.py +791 -0
- spotify_scraper/constants.py +83 -0
- spotify_scraper/core/client.py +132 -0
- spotify_scraper/core/config.py +307 -0
- spotify_scraper/core/constants.py +78 -0
- spotify_scraper/core/exceptions.py +218 -0
- spotify_scraper/core/scraper.py +179 -0
- spotify_scraper/core/types.py +218 -0
- spotify_scraper/extractors/__init__.py +17 -0
- spotify_scraper/extractors/album.py +423 -0
- spotify_scraper/extractors/artist.py +325 -0
- spotify_scraper/extractors/playlist.py +433 -0
- spotify_scraper/extractors/track.py +328 -0
- spotify_scraper/media/__init__.py +14 -0
- spotify_scraper/media/audio.py +283 -0
- spotify_scraper/media/image.py +252 -0
- spotify_scraper/parsers/json_parser.py +542 -0
- spotify_scraper/py.typed +2 -0
- spotify_scraper/utils/common.py +859 -0
- spotify_scraper/utils/logger.py +134 -0
- spotify_scraper/utils/url.py +533 -0
- spotifyscraper-2.0.1.dist-info/METADATA +411 -0
- spotifyscraper-2.0.1.dist-info/RECORD +44 -0
- spotifyscraper-2.0.1.dist-info/WHEEL +6 -0
- spotifyscraper-2.0.1.dist-info/entry_points.txt +2 -0
- spotifyscraper-2.0.1.dist-info/licenses/LICENSE +21 -0
- spotifyscraper-2.0.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""SpotifyScraper - Modern Spotify Web Scraper.
|
|
2
|
+
|
|
3
|
+
A fast, modern Python library for extracting data from Spotify's web player.
|
|
4
|
+
Supports tracks, albums, artists, playlists, and lyrics with both requests and
|
|
5
|
+
Selenium backends.
|
|
6
|
+
|
|
7
|
+
This package provides a high-level interface for extracting metadata from Spotify's
|
|
8
|
+
web player without requiring API authentication. It parses Spotify's React-based
|
|
9
|
+
web interface to extract structured data.
|
|
10
|
+
|
|
11
|
+
Key Features:
|
|
12
|
+
- Extract metadata for tracks, albums, artists, and playlists
|
|
13
|
+
- Download preview audio clips and cover images
|
|
14
|
+
- Support for both lightweight (requests) and full (Selenium) browsers
|
|
15
|
+
- No API key required - works with public Spotify web pages
|
|
16
|
+
- Type-safe data structures using TypedDict
|
|
17
|
+
- Comprehensive error handling with specific exception types
|
|
18
|
+
|
|
19
|
+
Typical usage example:
|
|
20
|
+
from spotify_scraper import SpotifyClient
|
|
21
|
+
|
|
22
|
+
# Create a client
|
|
23
|
+
client = SpotifyClient()
|
|
24
|
+
|
|
25
|
+
# Extract track information
|
|
26
|
+
track_data = client.get_track_info("https://open.spotify.com/track/...")
|
|
27
|
+
print(f"Track: {track_data['name']} by {track_data['artists'][0]['name']}")
|
|
28
|
+
|
|
29
|
+
# Download preview and cover
|
|
30
|
+
client.download_preview_mp3(track_url, path="downloads/")
|
|
31
|
+
client.download_cover(track_url, path="covers/")
|
|
32
|
+
|
|
33
|
+
For authenticated features (e.g., lyrics), provide cookies:
|
|
34
|
+
client = SpotifyClient(cookie_file="cookies.txt")
|
|
35
|
+
track_with_lyrics = client.get_track_info_with_lyrics(track_url)
|
|
36
|
+
|
|
37
|
+
Note:
|
|
38
|
+
This library is designed for educational and personal use. Always respect
|
|
39
|
+
Spotify's Terms of Service and robots.txt when using this library.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
__version__ = "2.0.1"
|
|
43
|
+
__author__ = "Ali Akhtari"
|
|
44
|
+
__email__ = "aliakhtari78@hotmail.com"
|
|
45
|
+
__license__ = "MIT"
|
|
46
|
+
__url__ = "https://github.com/AliAkhtari78/SpotifyScraper"
|
|
47
|
+
|
|
48
|
+
# Core imports for easy access
|
|
49
|
+
from spotify_scraper.client import SpotifyClient
|
|
50
|
+
from spotify_scraper.core.config import Config
|
|
51
|
+
from spotify_scraper.core.exceptions import (
|
|
52
|
+
AuthenticationError,
|
|
53
|
+
BrowserError,
|
|
54
|
+
ConfigurationError,
|
|
55
|
+
ExtractionError,
|
|
56
|
+
MediaError,
|
|
57
|
+
NetworkError,
|
|
58
|
+
ParsingError,
|
|
59
|
+
SpotifyScraperError,
|
|
60
|
+
URLError,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# Utility functions
|
|
64
|
+
from spotify_scraper.utils.url import (
|
|
65
|
+
convert_to_embed_url,
|
|
66
|
+
extract_id,
|
|
67
|
+
is_spotify_url,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
# No backward compatibility needed
|
|
71
|
+
|
|
72
|
+
__all__ = [
|
|
73
|
+
"SpotifyClient",
|
|
74
|
+
"Config",
|
|
75
|
+
"is_spotify_url",
|
|
76
|
+
"extract_id",
|
|
77
|
+
"convert_to_embed_url",
|
|
78
|
+
"SpotifyScraperError",
|
|
79
|
+
"URLError",
|
|
80
|
+
"ParsingError",
|
|
81
|
+
"ExtractionError",
|
|
82
|
+
"NetworkError",
|
|
83
|
+
"AuthenticationError",
|
|
84
|
+
"BrowserError",
|
|
85
|
+
"MediaError",
|
|
86
|
+
"ConfigurationError",
|
|
87
|
+
]
|
|
88
|
+
|
|
89
|
+
# Package metadata
|
|
90
|
+
__title__ = "spotifyscraper"
|
|
91
|
+
__description__ = "A modern Python library for extracting data from Spotify's web interface"
|
|
92
|
+
__version_info__ = tuple(int(part) for part in __version__.split("."))
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Authentication module for SpotifyScraper.
|
|
3
|
+
|
|
4
|
+
This module handles session management and authentication.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
import re
|
|
9
|
+
from typing import Dict, Optional
|
|
10
|
+
|
|
11
|
+
import requests
|
|
12
|
+
|
|
13
|
+
from spotify_scraper.constants import DEFAULT_HEADERS
|
|
14
|
+
from spotify_scraper.core.exceptions import AuthenticationError
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Session:
|
|
20
|
+
"""
|
|
21
|
+
Session management class for authentication with Spotify web player.
|
|
22
|
+
|
|
23
|
+
This class provides functionality to create authenticated sessions
|
|
24
|
+
using cookies, headers, and proxies. It is designed to be backward
|
|
25
|
+
compatible with the original Request class from SpotifyScraper v1.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
cookie_file: Optional[str] = None,
|
|
31
|
+
headers: Optional[Dict[str, str]] = None,
|
|
32
|
+
proxy: Optional[Dict[str, str]] = None,
|
|
33
|
+
):
|
|
34
|
+
"""
|
|
35
|
+
Initialize the Session.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
cookie_file: Path to a cookies.txt file (optional)
|
|
39
|
+
headers: Custom headers for requests (optional)
|
|
40
|
+
proxy: Proxy configuration (optional)
|
|
41
|
+
"""
|
|
42
|
+
# Store provided parameters
|
|
43
|
+
self.cookie_file = cookie_file
|
|
44
|
+
self.headers = headers
|
|
45
|
+
self.proxy = proxy
|
|
46
|
+
|
|
47
|
+
# Initialize cookie dictionary
|
|
48
|
+
if cookie_file is None:
|
|
49
|
+
self.cookie = None
|
|
50
|
+
else:
|
|
51
|
+
try:
|
|
52
|
+
self.cookie = self._parse_cookie_file()
|
|
53
|
+
logger.debug("Loaded cookies from %s", cookie_file)
|
|
54
|
+
except Exception as e:
|
|
55
|
+
logger.error("Failed to load cookies from %s: %s", cookie_file, e)
|
|
56
|
+
raise AuthenticationError(f"Failed to load cookies: {e}") from e
|
|
57
|
+
|
|
58
|
+
def _parse_cookie_file(self) -> Dict[str, str]:
|
|
59
|
+
"""
|
|
60
|
+
Parse a cookies.txt file and return a dictionary of key-value pairs.
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
Dictionary of cookies
|
|
64
|
+
"""
|
|
65
|
+
cookies = {}
|
|
66
|
+
with open(self.cookie_file, "r", encoding="utf-8") as fp:
|
|
67
|
+
for line in fp:
|
|
68
|
+
if not re.match(r"^\#", line):
|
|
69
|
+
line_fields = line.strip().split("\t")
|
|
70
|
+
if len(line_fields) >= 7:
|
|
71
|
+
cookies[line_fields[5]] = line_fields[6]
|
|
72
|
+
|
|
73
|
+
return cookies
|
|
74
|
+
|
|
75
|
+
def request(self) -> requests.Session:
|
|
76
|
+
"""
|
|
77
|
+
Create session using requests library and set cookie and headers.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
Configured requests.Session object
|
|
81
|
+
"""
|
|
82
|
+
# Create a new session
|
|
83
|
+
request_session = requests.Session()
|
|
84
|
+
|
|
85
|
+
# Set headers with defaults if not provided
|
|
86
|
+
if self.headers is None:
|
|
87
|
+
request_session.headers.update(DEFAULT_HEADERS)
|
|
88
|
+
else:
|
|
89
|
+
request_session.headers.update(self.headers)
|
|
90
|
+
|
|
91
|
+
# Set cookies if provided
|
|
92
|
+
if self.cookie is not None:
|
|
93
|
+
request_session.cookies.update(self.cookie)
|
|
94
|
+
|
|
95
|
+
# Set proxy if provided
|
|
96
|
+
if self.proxy is not None:
|
|
97
|
+
request_session.proxies.update(self.proxy)
|
|
98
|
+
|
|
99
|
+
logger.debug("Created requests session")
|
|
100
|
+
|
|
101
|
+
return request_session
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Session management for SpotifyScraper authentication.
|
|
3
|
+
|
|
4
|
+
This module handles authentication and session management for Spotify access.
|
|
5
|
+
Think of this as the key management system - it handles getting and maintaining
|
|
6
|
+
the credentials needed to access Spotify's data.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import logging
|
|
11
|
+
import os
|
|
12
|
+
from datetime import datetime, timedelta
|
|
13
|
+
from typing import Dict, Optional
|
|
14
|
+
|
|
15
|
+
from spotify_scraper.core.constants import (
|
|
16
|
+
SESSION_CACHE_FILE,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Session:
|
|
23
|
+
"""
|
|
24
|
+
Manages authentication sessions for Spotify access.
|
|
25
|
+
|
|
26
|
+
This class handles the complex task of maintaining valid authentication
|
|
27
|
+
with Spotify. It can work with different authentication methods like
|
|
28
|
+
cookies from a browser session or API tokens.
|
|
29
|
+
|
|
30
|
+
The session acts like a smart credential manager - it knows when credentials
|
|
31
|
+
are expired and can attempt to refresh them automatically.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
access_token: Optional[str] = None,
|
|
37
|
+
cookies: Optional[Dict[str, str]] = None,
|
|
38
|
+
headers: Optional[Dict[str, str]] = None,
|
|
39
|
+
):
|
|
40
|
+
"""
|
|
41
|
+
Initialize a session for Spotify authentication.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
access_token: Spotify access token if available
|
|
45
|
+
cookies: HTTP cookies for authentication
|
|
46
|
+
headers: Additional HTTP headers to include in requests
|
|
47
|
+
"""
|
|
48
|
+
self.access_token = access_token
|
|
49
|
+
self.cookies = cookies or {}
|
|
50
|
+
self.headers = headers or {}
|
|
51
|
+
self.expires_at: Optional[datetime] = None
|
|
52
|
+
self.is_anonymous = access_token is None
|
|
53
|
+
|
|
54
|
+
logger.debug("Initialized Session (anonymous: %s)", self.is_anonymous)
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def _cookies(self):
|
|
58
|
+
"""Compatibility property for accessing cookies."""
|
|
59
|
+
return self.cookies
|
|
60
|
+
|
|
61
|
+
def is_valid(self) -> bool:
|
|
62
|
+
"""
|
|
63
|
+
Check if the session is currently valid.
|
|
64
|
+
|
|
65
|
+
A session is considered valid if it has authentication credentials
|
|
66
|
+
and those credentials haven't expired.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
True if session is valid and can be used for requests
|
|
70
|
+
"""
|
|
71
|
+
# If we have no authentication method, session is not valid
|
|
72
|
+
if not self.access_token and not self.cookies:
|
|
73
|
+
return False
|
|
74
|
+
|
|
75
|
+
# If we have an expiration time, check if we're still within it
|
|
76
|
+
if self.expires_at and datetime.now() >= self.expires_at:
|
|
77
|
+
logger.debug("Session has expired")
|
|
78
|
+
return False
|
|
79
|
+
|
|
80
|
+
return True
|
|
81
|
+
|
|
82
|
+
def refresh(self) -> bool:
|
|
83
|
+
"""
|
|
84
|
+
Attempt to refresh the session credentials.
|
|
85
|
+
|
|
86
|
+
This method tries to get new credentials when the current ones
|
|
87
|
+
have expired. The actual refresh mechanism depends on the type
|
|
88
|
+
of authentication being used.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
True if refresh was successful, False otherwise
|
|
92
|
+
"""
|
|
93
|
+
# For now, this is a placeholder. A real implementation would:
|
|
94
|
+
# 1. Use refresh tokens to get new access tokens
|
|
95
|
+
# 2. Re-authenticate with stored credentials
|
|
96
|
+
# 3. Prompt user for new authentication if needed
|
|
97
|
+
|
|
98
|
+
logger.warning("Session refresh not yet implemented")
|
|
99
|
+
return False
|
|
100
|
+
|
|
101
|
+
def set_access_token(self, token: str, expires_in: Optional[int] = None) -> None:
|
|
102
|
+
"""
|
|
103
|
+
Set a new access token for the session.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
token: The access token to use
|
|
107
|
+
expires_in: Token lifetime in seconds (optional)
|
|
108
|
+
"""
|
|
109
|
+
self.access_token = token
|
|
110
|
+
self.is_anonymous = False
|
|
111
|
+
|
|
112
|
+
if expires_in:
|
|
113
|
+
self.expires_at = datetime.now() + timedelta(seconds=expires_in)
|
|
114
|
+
|
|
115
|
+
logger.debug("Updated session with new access token")
|
|
116
|
+
|
|
117
|
+
def add_cookies(self, cookies: Dict[str, str]) -> None:
|
|
118
|
+
"""
|
|
119
|
+
Add cookies to the session.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
cookies: Dictionary of cookie name-value pairs
|
|
123
|
+
"""
|
|
124
|
+
self.cookies.update(cookies)
|
|
125
|
+
logger.debug("Added %s cookies to session", len(cookies))
|
|
126
|
+
|
|
127
|
+
def get_auth_headers(self) -> Dict[str, str]:
|
|
128
|
+
"""
|
|
129
|
+
Get HTTP headers needed for authenticated requests.
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
Dictionary of headers to include in HTTP requests
|
|
133
|
+
"""
|
|
134
|
+
auth_headers = self.headers.copy()
|
|
135
|
+
|
|
136
|
+
if self.access_token:
|
|
137
|
+
auth_headers["Authorization"] = f"Bearer {self.access_token}"
|
|
138
|
+
|
|
139
|
+
return auth_headers
|
|
140
|
+
|
|
141
|
+
def save_to_file(self, filepath: Optional[str] = None) -> bool:
|
|
142
|
+
"""
|
|
143
|
+
Save session data to a file for persistence.
|
|
144
|
+
|
|
145
|
+
This allows sessions to be restored after the program restarts,
|
|
146
|
+
which is convenient for users so they don't have to re-authenticate
|
|
147
|
+
every time.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
filepath: Path to save session data. If None, uses default location.
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
True if save was successful, False otherwise
|
|
154
|
+
"""
|
|
155
|
+
if filepath is None:
|
|
156
|
+
filepath = SESSION_CACHE_FILE
|
|
157
|
+
|
|
158
|
+
try:
|
|
159
|
+
session_data = {
|
|
160
|
+
"access_token": self.access_token,
|
|
161
|
+
"cookies": self.cookies,
|
|
162
|
+
"headers": self.headers,
|
|
163
|
+
"expires_at": self.expires_at.isoformat() if self.expires_at else None,
|
|
164
|
+
"is_anonymous": self.is_anonymous,
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
with open(filepath, "w", encoding="utf-8") as f:
|
|
168
|
+
json.dump(session_data, f, indent=2)
|
|
169
|
+
|
|
170
|
+
logger.debug("Saved session to %s", filepath)
|
|
171
|
+
return True
|
|
172
|
+
|
|
173
|
+
except Exception as e:
|
|
174
|
+
logger.error("Failed to save session: %s", e)
|
|
175
|
+
return False
|
|
176
|
+
|
|
177
|
+
@classmethod
|
|
178
|
+
def load_from_file(cls, filepath: Optional[str] = None) -> Optional["Session"]:
|
|
179
|
+
"""
|
|
180
|
+
Load session data from a file.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
filepath: Path to load session data from. If None, uses default location.
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
Session instance if loading was successful, None otherwise
|
|
187
|
+
"""
|
|
188
|
+
if filepath is None:
|
|
189
|
+
filepath = SESSION_CACHE_FILE
|
|
190
|
+
|
|
191
|
+
if not os.path.exists(filepath):
|
|
192
|
+
logger.debug("Session file %s does not exist", filepath)
|
|
193
|
+
return None
|
|
194
|
+
|
|
195
|
+
try:
|
|
196
|
+
with open(filepath, "r", encoding="utf-8") as f:
|
|
197
|
+
session_data = json.load(f)
|
|
198
|
+
|
|
199
|
+
session = cls(
|
|
200
|
+
access_token=session_data.get("access_token"),
|
|
201
|
+
cookies=session_data.get("cookies", {}),
|
|
202
|
+
headers=session_data.get("headers", {}),
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
# Restore expiration time if available
|
|
206
|
+
expires_at_str = session_data.get("expires_at")
|
|
207
|
+
if expires_at_str:
|
|
208
|
+
session.expires_at = datetime.fromisoformat(expires_at_str)
|
|
209
|
+
|
|
210
|
+
session.is_anonymous = session_data.get("is_anonymous", True)
|
|
211
|
+
|
|
212
|
+
logger.debug("Loaded session from %s", filepath)
|
|
213
|
+
return session
|
|
214
|
+
|
|
215
|
+
except Exception as e:
|
|
216
|
+
logger.error("Failed to load session: %s", e)
|
|
217
|
+
return None
|
|
218
|
+
|
|
219
|
+
def clear(self) -> None:
|
|
220
|
+
"""
|
|
221
|
+
Clear all authentication data from the session.
|
|
222
|
+
|
|
223
|
+
This essentially logs the user out by removing all stored credentials.
|
|
224
|
+
"""
|
|
225
|
+
self.access_token = None
|
|
226
|
+
self.cookies.clear()
|
|
227
|
+
self.headers.clear()
|
|
228
|
+
self.expires_at = None
|
|
229
|
+
self.is_anonymous = True
|
|
230
|
+
|
|
231
|
+
logger.debug("Cleared session authentication data")
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
# Backward compatibility class for the old Request interface
|
|
235
|
+
class Request:
|
|
236
|
+
"""
|
|
237
|
+
Backward compatibility wrapper for the old Request class.
|
|
238
|
+
|
|
239
|
+
This class provides the same interface as the original SpotifyScraper
|
|
240
|
+
Request class, but internally uses the new Session system. This allows
|
|
241
|
+
existing code to work without changes while benefiting from the improved
|
|
242
|
+
architecture underneath.
|
|
243
|
+
"""
|
|
244
|
+
|
|
245
|
+
def __init__(
|
|
246
|
+
self,
|
|
247
|
+
cookie_file: Optional[str] = None,
|
|
248
|
+
headers: Optional[Dict[str, str]] = None,
|
|
249
|
+
proxy: Optional[str] = None,
|
|
250
|
+
):
|
|
251
|
+
"""
|
|
252
|
+
Initialize with the same interface as the original Request class.
|
|
253
|
+
|
|
254
|
+
Args:
|
|
255
|
+
cookie_file: Path to cookie file (legacy parameter)
|
|
256
|
+
headers: HTTP headers to use
|
|
257
|
+
proxy: Proxy URL to use (legacy parameter)
|
|
258
|
+
"""
|
|
259
|
+
self.session = Session(headers=headers)
|
|
260
|
+
logger.debug("Initialized Request (compatibility mode)")
|
|
261
|
+
|
|
262
|
+
def request(self) -> Session:
|
|
263
|
+
"""
|
|
264
|
+
Return a session object that can be used with the old Scraper interface.
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
Session object compatible with old code
|
|
268
|
+
"""
|
|
269
|
+
return self.session
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Browser factory module for SpotifyScraper.
|
|
3
|
+
|
|
4
|
+
This module provides factory functions for creating browser instances.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
|
|
9
|
+
from spotify_scraper.browsers.base import Browser
|
|
10
|
+
from spotify_scraper.core.exceptions import BrowserError
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def create_browser(browser_type: str = "auto", **kwargs) -> Browser:
|
|
16
|
+
"""
|
|
17
|
+
Create appropriate browser instance.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
browser_type: Type of browser ('requests', 'selenium', or 'auto')
|
|
21
|
+
**kwargs: Additional arguments to pass to browser constructor
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
Configured browser instance
|
|
25
|
+
|
|
26
|
+
Raises:
|
|
27
|
+
BrowserError: If browser creation fails
|
|
28
|
+
ValueError: If browser_type is invalid
|
|
29
|
+
"""
|
|
30
|
+
# Import implementation classes here to avoid circular imports
|
|
31
|
+
from spotify_scraper.browsers.requests_browser import RequestsBrowser
|
|
32
|
+
|
|
33
|
+
try:
|
|
34
|
+
from spotify_scraper.browsers.selenium_browser import SeleniumBrowser
|
|
35
|
+
|
|
36
|
+
selenium_available = True
|
|
37
|
+
except ImportError:
|
|
38
|
+
selenium_available = False
|
|
39
|
+
logger.warning("Selenium is not available, falling back to requests")
|
|
40
|
+
|
|
41
|
+
# Create browser based on type
|
|
42
|
+
if browser_type == "requests":
|
|
43
|
+
logger.debug("Creating RequestsBrowser")
|
|
44
|
+
return RequestsBrowser(**kwargs)
|
|
45
|
+
|
|
46
|
+
elif browser_type == "selenium":
|
|
47
|
+
if selenium_available:
|
|
48
|
+
logger.debug("Creating SeleniumBrowser")
|
|
49
|
+
return SeleniumBrowser(**kwargs)
|
|
50
|
+
else:
|
|
51
|
+
logger.warning("Selenium requested but not available, falling back to requests")
|
|
52
|
+
return RequestsBrowser(**kwargs)
|
|
53
|
+
|
|
54
|
+
elif browser_type == "auto":
|
|
55
|
+
# Try requests first, fallback to selenium if needed
|
|
56
|
+
try:
|
|
57
|
+
logger.debug("Trying RequestsBrowser")
|
|
58
|
+
browser = RequestsBrowser(**kwargs)
|
|
59
|
+
# Test browser with a simple request
|
|
60
|
+
browser.get_page_content("https://open.spotify.com")
|
|
61
|
+
return browser
|
|
62
|
+
except Exception as e:
|
|
63
|
+
logger.warning("RequestsBrowser failed: %s", e)
|
|
64
|
+
|
|
65
|
+
if selenium_available:
|
|
66
|
+
logger.debug("Falling back to SeleniumBrowser")
|
|
67
|
+
return SeleniumBrowser(**kwargs)
|
|
68
|
+
else:
|
|
69
|
+
logger.error("Neither RequestsBrowser nor SeleniumBrowser are working")
|
|
70
|
+
raise BrowserError("Failed to create any browser instance") from e
|
|
71
|
+
|
|
72
|
+
else:
|
|
73
|
+
raise ValueError(f"Unknown browser type: {browser_type}")
|