spotifyscraper 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. spotify_scraper/__init__.py +88 -0
  2. spotify_scraper/__main__.py +10 -0
  3. spotify_scraper/auth/__init__.py +100 -0
  4. spotify_scraper/auth/session.py +263 -0
  5. spotify_scraper/browsers/__init__.py +72 -0
  6. spotify_scraper/browsers/base.py +211 -0
  7. spotify_scraper/browsers/requests_browser.py +286 -0
  8. spotify_scraper/browsers/selenium_browser.py +145 -0
  9. spotify_scraper/cli/__init__.py +114 -0
  10. spotify_scraper/cli/commands/__init__.py +10 -0
  11. spotify_scraper/cli/commands/album.py +152 -0
  12. spotify_scraper/cli/commands/artist.py +183 -0
  13. spotify_scraper/cli/commands/download.py +400 -0
  14. spotify_scraper/cli/commands/playlist.py +210 -0
  15. spotify_scraper/cli/commands/track.py +141 -0
  16. spotify_scraper/cli/utils.py +275 -0
  17. spotify_scraper/client.py +718 -0
  18. spotify_scraper/config_manager.py +779 -0
  19. spotify_scraper/constants.py +86 -0
  20. spotify_scraper/core/client.py +132 -0
  21. spotify_scraper/core/config.py +313 -0
  22. spotify_scraper/core/constants.py +75 -0
  23. spotify_scraper/core/exceptions.py +200 -0
  24. spotify_scraper/core/scraper.py +174 -0
  25. spotify_scraper/core/types.py +209 -0
  26. spotify_scraper/exceptions.py +79 -0
  27. spotify_scraper/extractors/__init__.py +17 -0
  28. spotify_scraper/extractors/album.py +354 -0
  29. spotify_scraper/extractors/artist.py +324 -0
  30. spotify_scraper/extractors/playlist.py +360 -0
  31. spotify_scraper/extractors/track.py +327 -0
  32. spotify_scraper/media/__init__.py +14 -0
  33. spotify_scraper/media/audio.py +281 -0
  34. spotify_scraper/media/image.py +248 -0
  35. spotify_scraper/parsers/json_parser.py +432 -0
  36. spotify_scraper/py.typed +2 -0
  37. spotify_scraper/utils/common.py +860 -0
  38. spotify_scraper/utils/logger.py +135 -0
  39. spotify_scraper/utils/url.py +496 -0
  40. spotifyscraper-2.0.0.dist-info/METADATA +606 -0
  41. spotifyscraper-2.0.0.dist-info/RECORD +45 -0
  42. spotifyscraper-2.0.0.dist-info/WHEEL +5 -0
  43. spotifyscraper-2.0.0.dist-info/entry_points.txt +3 -0
  44. spotifyscraper-2.0.0.dist-info/licenses/LICENSE +21 -0
  45. spotifyscraper-2.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,88 @@
1
+ """SpotifyScraper - Modern Spotify Web Scraper.
2
+
3
+ A fast, modern Python library for extracting data from Spotify's web player.
4
+ Supports tracks, albums, artists, playlists, and lyrics with both requests and
5
+ Selenium backends.
6
+
7
+ This package provides a high-level interface for extracting metadata from Spotify's
8
+ web player without requiring API authentication. It parses Spotify's React-based
9
+ web interface to extract structured data.
10
+
11
+ Key Features:
12
+ - Extract metadata for tracks, albums, artists, and playlists
13
+ - Download preview audio clips and cover images
14
+ - Support for both lightweight (requests) and full (Selenium) browsers
15
+ - No API key required - works with public Spotify web pages
16
+ - Type-safe data structures using TypedDict
17
+ - Comprehensive error handling with specific exception types
18
+
19
+ Typical usage example:
20
+ from spotify_scraper import SpotifyClient
21
+
22
+ # Create a client
23
+ client = SpotifyClient()
24
+
25
+ # Extract track information
26
+ track_data = client.get_track_info("https://open.spotify.com/track/...")
27
+ print(f"Track: {track_data['name']} by {track_data['artists'][0]['name']}")
28
+
29
+ # Download preview and cover
30
+ client.download_preview_mp3(track_url, path="downloads/")
31
+ client.download_cover(track_url, path="covers/")
32
+
33
+ For authenticated features (e.g., lyrics), provide cookies:
34
+ client = SpotifyClient(cookie_file="cookies.txt")
35
+ track_with_lyrics = client.get_track_info_with_lyrics(track_url)
36
+
37
+ Note:
38
+ This library is designed for educational and personal use. Always respect
39
+ Spotify's Terms of Service and robots.txt when using this library.
40
+ """
41
+
42
+ __version__ = "2.0.0"
43
+ __author__ = "Ali Akhtari"
44
+ __email__ = "aliakhtari78@hotmail.com"
45
+ __license__ = "MIT"
46
+ __url__ = "https://github.com/AliAkhtari78/SpotifyScraper"
47
+
48
+ # Core imports for easy access
49
+ from spotify_scraper.client import SpotifyClient
50
+ from spotify_scraper.core.exceptions import (
51
+ SpotifyScraperError,
52
+ URLError,
53
+ ParsingError,
54
+ ExtractionError,
55
+ NetworkError,
56
+ AuthenticationError,
57
+ BrowserError,
58
+ MediaError,
59
+ )
60
+
61
+ # Utility functions
62
+ from spotify_scraper.utils.url import (
63
+ is_spotify_url,
64
+ extract_id,
65
+ convert_to_embed_url,
66
+ )
67
+
68
+ # No backward compatibility needed
69
+
70
+ __all__ = [
71
+ "SpotifyClient",
72
+ "is_spotify_url",
73
+ "extract_id",
74
+ "convert_to_embed_url",
75
+ "SpotifyScraperError",
76
+ "URLError",
77
+ "ParsingError",
78
+ "ExtractionError",
79
+ "NetworkError",
80
+ "AuthenticationError",
81
+ "BrowserError",
82
+ "MediaError",
83
+ ]
84
+
85
+ # Package metadata
86
+ __title__ = "spotifyscraper"
87
+ __description__ = "A modern Python library for extracting data from Spotify's web interface"
88
+ __version_info__ = tuple(int(part) for part in __version__.split('.'))
@@ -0,0 +1,10 @@
1
+ """
2
+ Main entry point for running SpotifyScraper as a module.
3
+
4
+ This allows the package to be run with `python -m spotify_scraper`.
5
+ """
6
+
7
+ from spotify_scraper.cli import main
8
+
9
+ if __name__ == "__main__":
10
+ main()
@@ -0,0 +1,100 @@
1
+ """
2
+ Authentication module for SpotifyScraper.
3
+
4
+ This module handles session management and authentication.
5
+ """
6
+
7
+ from typing import Dict, Optional, Any
8
+ import re
9
+ import requests
10
+ import logging
11
+
12
+ from spotify_scraper.exceptions import AuthenticationError
13
+ from spotify_scraper.constants import DEFAULT_HEADERS
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class Session:
19
+ """
20
+ Session management class for authentication with Spotify web player.
21
+
22
+ This class provides functionality to create authenticated sessions
23
+ using cookies, headers, and proxies. It is designed to be backward
24
+ compatible with the original Request class from SpotifyScraper v1.
25
+ """
26
+
27
+ def __init__(
28
+ self,
29
+ cookie_file: Optional[str] = None,
30
+ headers: Optional[Dict[str, str]] = None,
31
+ proxy: Optional[Dict[str, str]] = None,
32
+ ):
33
+ """
34
+ Initialize the Session.
35
+
36
+ Args:
37
+ cookie_file: Path to a cookies.txt file (optional)
38
+ headers: Custom headers for requests (optional)
39
+ proxy: Proxy configuration (optional)
40
+ """
41
+ # Store provided parameters
42
+ self.cookie_file = cookie_file
43
+ self.headers = headers
44
+ self.proxy = proxy
45
+
46
+ # Initialize cookie dictionary
47
+ if cookie_file is None:
48
+ self.cookie = None
49
+ else:
50
+ try:
51
+ self.cookie = self._parse_cookie_file()
52
+ logger.debug(f"Loaded cookies from {cookie_file}")
53
+ except Exception as e:
54
+ logger.error(f"Failed to load cookies from {cookie_file}: {e}")
55
+ raise AuthenticationError(f"Failed to load cookies: {e}") from e
56
+
57
+ def _parse_cookie_file(self) -> Dict[str, str]:
58
+ """
59
+ Parse a cookies.txt file and return a dictionary of key-value pairs.
60
+
61
+ Returns:
62
+ Dictionary of cookies
63
+ """
64
+ cookies = {}
65
+ with open(self.cookie_file, "r") as fp:
66
+ for line in fp:
67
+ if not re.match(r"^\#", line):
68
+ line_fields = line.strip().split("\t")
69
+ if len(line_fields) >= 7:
70
+ cookies[line_fields[5]] = line_fields[6]
71
+
72
+ return cookies
73
+
74
+ def request(self) -> requests.Session:
75
+ """
76
+ Create session using requests library and set cookie and headers.
77
+
78
+ Returns:
79
+ Configured requests.Session object
80
+ """
81
+ # Create a new session
82
+ request_session = requests.Session()
83
+
84
+ # Set headers with defaults if not provided
85
+ if self.headers is None:
86
+ request_session.headers.update(DEFAULT_HEADERS)
87
+ else:
88
+ request_session.headers.update(self.headers)
89
+
90
+ # Set cookies if provided
91
+ if self.cookie is not None:
92
+ request_session.cookies.update(self.cookie)
93
+
94
+ # Set proxy if provided
95
+ if self.proxy is not None:
96
+ request_session.proxies.update(self.proxy)
97
+
98
+ logger.debug("Created requests session")
99
+
100
+ return request_session
@@ -0,0 +1,263 @@
1
+ """
2
+ Session management for SpotifyScraper authentication.
3
+
4
+ This module handles authentication and session management for Spotify access.
5
+ Think of this as the key management system - it handles getting and maintaining
6
+ the credentials needed to access Spotify's data.
7
+ """
8
+
9
+ from typing import Optional, Dict, Any, Union
10
+ import logging
11
+ import json
12
+ import os
13
+ from datetime import datetime, timedelta
14
+
15
+ from spotify_scraper.core.exceptions import AuthenticationError
16
+ from spotify_scraper.core.constants import (
17
+ CREDENTIALS_FILE_NAME,
18
+ SESSION_CACHE_FILE,
19
+ DEFAULT_SESSION_TIMEOUT,
20
+ MAX_SESSION_RETRIES,
21
+ )
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class Session:
27
+ """
28
+ Manages authentication sessions for Spotify access.
29
+
30
+ This class handles the complex task of maintaining valid authentication
31
+ with Spotify. It can work with different authentication methods like
32
+ cookies from a browser session or API tokens.
33
+
34
+ The session acts like a smart credential manager - it knows when credentials
35
+ are expired and can attempt to refresh them automatically.
36
+ """
37
+
38
+ def __init__(
39
+ self,
40
+ access_token: Optional[str] = None,
41
+ cookies: Optional[Dict[str, str]] = None,
42
+ headers: Optional[Dict[str, str]] = None,
43
+ ):
44
+ """
45
+ Initialize a session for Spotify authentication.
46
+
47
+ Args:
48
+ access_token: Spotify access token if available
49
+ cookies: HTTP cookies for authentication
50
+ headers: Additional HTTP headers to include in requests
51
+ """
52
+ self.access_token = access_token
53
+ self.cookies = cookies or {}
54
+ self.headers = headers or {}
55
+ self.expires_at: Optional[datetime] = None
56
+ self.is_anonymous = access_token is None
57
+
58
+ logger.debug(f"Initialized Session (anonymous: {self.is_anonymous})")
59
+
60
+ def is_valid(self) -> bool:
61
+ """
62
+ Check if the session is currently valid.
63
+
64
+ A session is considered valid if it has authentication credentials
65
+ and those credentials haven't expired.
66
+
67
+ Returns:
68
+ True if session is valid and can be used for requests
69
+ """
70
+ # If we have no authentication method, session is not valid
71
+ if not self.access_token and not self.cookies:
72
+ return False
73
+
74
+ # If we have an expiration time, check if we're still within it
75
+ if self.expires_at and datetime.now() >= self.expires_at:
76
+ logger.debug("Session has expired")
77
+ return False
78
+
79
+ return True
80
+
81
+ def refresh(self) -> bool:
82
+ """
83
+ Attempt to refresh the session credentials.
84
+
85
+ This method tries to get new credentials when the current ones
86
+ have expired. The actual refresh mechanism depends on the type
87
+ of authentication being used.
88
+
89
+ Returns:
90
+ True if refresh was successful, False otherwise
91
+ """
92
+ # For now, this is a placeholder. A real implementation would:
93
+ # 1. Use refresh tokens to get new access tokens
94
+ # 2. Re-authenticate with stored credentials
95
+ # 3. Prompt user for new authentication if needed
96
+
97
+ logger.warning("Session refresh not yet implemented")
98
+ return False
99
+
100
+ def set_access_token(self, token: str, expires_in: Optional[int] = None) -> None:
101
+ """
102
+ Set a new access token for the session.
103
+
104
+ Args:
105
+ token: The access token to use
106
+ expires_in: Token lifetime in seconds (optional)
107
+ """
108
+ self.access_token = token
109
+ self.is_anonymous = False
110
+
111
+ if expires_in:
112
+ self.expires_at = datetime.now() + timedelta(seconds=expires_in)
113
+
114
+ logger.debug("Updated session with new access token")
115
+
116
+ def add_cookies(self, cookies: Dict[str, str]) -> None:
117
+ """
118
+ Add cookies to the session.
119
+
120
+ Args:
121
+ cookies: Dictionary of cookie name-value pairs
122
+ """
123
+ self.cookies.update(cookies)
124
+ logger.debug(f"Added {len(cookies)} cookies to session")
125
+
126
+ def get_auth_headers(self) -> Dict[str, str]:
127
+ """
128
+ Get HTTP headers needed for authenticated requests.
129
+
130
+ Returns:
131
+ Dictionary of headers to include in HTTP requests
132
+ """
133
+ auth_headers = self.headers.copy()
134
+
135
+ if self.access_token:
136
+ auth_headers["Authorization"] = f"Bearer {self.access_token}"
137
+
138
+ return auth_headers
139
+
140
+ def save_to_file(self, filepath: Optional[str] = None) -> bool:
141
+ """
142
+ Save session data to a file for persistence.
143
+
144
+ This allows sessions to be restored after the program restarts,
145
+ which is convenient for users so they don't have to re-authenticate
146
+ every time.
147
+
148
+ Args:
149
+ filepath: Path to save session data. If None, uses default location.
150
+
151
+ Returns:
152
+ True if save was successful, False otherwise
153
+ """
154
+ if filepath is None:
155
+ filepath = SESSION_CACHE_FILE
156
+
157
+ try:
158
+ session_data = {
159
+ "access_token": self.access_token,
160
+ "cookies": self.cookies,
161
+ "headers": self.headers,
162
+ "expires_at": self.expires_at.isoformat() if self.expires_at else None,
163
+ "is_anonymous": self.is_anonymous,
164
+ }
165
+
166
+ with open(filepath, "w") as f:
167
+ json.dump(session_data, f, indent=2)
168
+
169
+ logger.debug(f"Saved session to {filepath}")
170
+ return True
171
+
172
+ except Exception as e:
173
+ logger.error(f"Failed to save session: {e}")
174
+ return False
175
+
176
+ @classmethod
177
+ def load_from_file(cls, filepath: Optional[str] = None) -> Optional["Session"]:
178
+ """
179
+ Load session data from a file.
180
+
181
+ Args:
182
+ filepath: Path to load session data from. If None, uses default location.
183
+
184
+ Returns:
185
+ Session instance if loading was successful, None otherwise
186
+ """
187
+ if filepath is None:
188
+ filepath = SESSION_CACHE_FILE
189
+
190
+ if not os.path.exists(filepath):
191
+ logger.debug(f"Session file {filepath} does not exist")
192
+ return None
193
+
194
+ try:
195
+ with open(filepath, "r") as f:
196
+ session_data = json.load(f)
197
+
198
+ session = cls(
199
+ access_token=session_data.get("access_token"),
200
+ cookies=session_data.get("cookies", {}),
201
+ headers=session_data.get("headers", {}),
202
+ )
203
+
204
+ # Restore expiration time if available
205
+ expires_at_str = session_data.get("expires_at")
206
+ if expires_at_str:
207
+ session.expires_at = datetime.fromisoformat(expires_at_str)
208
+
209
+ session.is_anonymous = session_data.get("is_anonymous", True)
210
+
211
+ logger.debug(f"Loaded session from {filepath}")
212
+ return session
213
+
214
+ except Exception as e:
215
+ logger.error(f"Failed to load session: {e}")
216
+ return None
217
+
218
+ def clear(self) -> None:
219
+ """
220
+ Clear all authentication data from the session.
221
+
222
+ This essentially logs the user out by removing all stored credentials.
223
+ """
224
+ self.access_token = None
225
+ self.cookies.clear()
226
+ self.headers.clear()
227
+ self.expires_at = None
228
+ self.is_anonymous = True
229
+
230
+ logger.debug("Cleared session authentication data")
231
+
232
+
233
+ # Backward compatibility class for the old Request interface
234
+ class Request:
235
+ """
236
+ Backward compatibility wrapper for the old Request class.
237
+
238
+ This class provides the same interface as the original SpotifyScraper
239
+ Request class, but internally uses the new Session system. This allows
240
+ existing code to work without changes while benefiting from the improved
241
+ architecture underneath.
242
+ """
243
+
244
+ def __init__(self, cookie_file: Optional[str] = None, headers: Optional[Dict[str, str]] = None, proxy: Optional[str] = None):
245
+ """
246
+ Initialize with the same interface as the original Request class.
247
+
248
+ Args:
249
+ cookie_file: Path to cookie file (legacy parameter)
250
+ headers: HTTP headers to use
251
+ proxy: Proxy URL to use (legacy parameter)
252
+ """
253
+ self.session = Session(headers=headers)
254
+ logger.debug("Initialized Request (compatibility mode)")
255
+
256
+ def request(self) -> Session:
257
+ """
258
+ Return a session object that can be used with the old Scraper interface.
259
+
260
+ Returns:
261
+ Session object compatible with old code
262
+ """
263
+ return self.session
@@ -0,0 +1,72 @@
1
+ """
2
+ Browser factory module for SpotifyScraper.
3
+
4
+ This module provides factory functions for creating browser instances.
5
+ """
6
+
7
+ from typing import Optional, Union, Dict, Any
8
+ import logging
9
+
10
+ from spotify_scraper.exceptions import BrowserError
11
+ from spotify_scraper.browsers.base import Browser
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ def create_browser(browser_type: str = "auto", **kwargs) -> Browser:
16
+ """
17
+ Create appropriate browser instance.
18
+
19
+ Args:
20
+ browser_type: Type of browser ('requests', 'selenium', or 'auto')
21
+ **kwargs: Additional arguments to pass to browser constructor
22
+
23
+ Returns:
24
+ Configured browser instance
25
+
26
+ Raises:
27
+ BrowserError: If browser creation fails
28
+ ValueError: If browser_type is invalid
29
+ """
30
+ # Import implementation classes here to avoid circular imports
31
+ from spotify_scraper.browsers.requests_browser import RequestsBrowser
32
+
33
+ try:
34
+ from spotify_scraper.browsers.selenium_browser import SeleniumBrowser
35
+ selenium_available = True
36
+ except ImportError:
37
+ selenium_available = False
38
+ logger.warning("Selenium is not available, falling back to requests")
39
+
40
+ # Create browser based on type
41
+ if browser_type == "requests":
42
+ logger.debug("Creating RequestsBrowser")
43
+ return RequestsBrowser(**kwargs)
44
+
45
+ elif browser_type == "selenium":
46
+ if selenium_available:
47
+ logger.debug("Creating SeleniumBrowser")
48
+ return SeleniumBrowser(**kwargs)
49
+ else:
50
+ logger.warning("Selenium requested but not available, falling back to requests")
51
+ return RequestsBrowser(**kwargs)
52
+
53
+ elif browser_type == "auto":
54
+ # Try requests first, fallback to selenium if needed
55
+ try:
56
+ logger.debug("Trying RequestsBrowser")
57
+ browser = RequestsBrowser(**kwargs)
58
+ # Test browser with a simple request
59
+ browser.get("https://open.spotify.com")
60
+ return browser
61
+ except Exception as e:
62
+ logger.warning(f"RequestsBrowser failed: {e}")
63
+
64
+ if selenium_available:
65
+ logger.debug("Falling back to SeleniumBrowser")
66
+ return SeleniumBrowser(**kwargs)
67
+ else:
68
+ logger.error("Neither RequestsBrowser nor SeleniumBrowser are working")
69
+ raise BrowserError("Failed to create any browser instance") from e
70
+
71
+ else:
72
+ raise ValueError(f"Unknown browser type: {browser_type}")