spotifyscraper 2.0.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. spotify_scraper/__init__.py +92 -0
  2. spotify_scraper/__main__.py +10 -0
  3. spotify_scraper/auth/__init__.py +101 -0
  4. spotify_scraper/auth/session.py +269 -0
  5. spotify_scraper/browsers/__init__.py +73 -0
  6. spotify_scraper/browsers/base.py +206 -0
  7. spotify_scraper/browsers/requests_browser.py +289 -0
  8. spotify_scraper/browsers/selenium_browser.py +203 -0
  9. spotify_scraper/cli/__init__.py +116 -0
  10. spotify_scraper/cli/commands/__init__.py +10 -0
  11. spotify_scraper/cli/commands/album.py +157 -0
  12. spotify_scraper/cli/commands/artist.py +191 -0
  13. spotify_scraper/cli/commands/download.py +402 -0
  14. spotify_scraper/cli/commands/playlist.py +226 -0
  15. spotify_scraper/cli/commands/track.py +143 -0
  16. spotify_scraper/cli/utils.py +280 -0
  17. spotify_scraper/client.py +728 -0
  18. spotify_scraper/config_manager.py +791 -0
  19. spotify_scraper/constants.py +83 -0
  20. spotify_scraper/core/client.py +132 -0
  21. spotify_scraper/core/config.py +307 -0
  22. spotify_scraper/core/constants.py +78 -0
  23. spotify_scraper/core/exceptions.py +218 -0
  24. spotify_scraper/core/scraper.py +179 -0
  25. spotify_scraper/core/types.py +218 -0
  26. spotify_scraper/extractors/__init__.py +17 -0
  27. spotify_scraper/extractors/album.py +423 -0
  28. spotify_scraper/extractors/artist.py +325 -0
  29. spotify_scraper/extractors/playlist.py +433 -0
  30. spotify_scraper/extractors/track.py +328 -0
  31. spotify_scraper/media/__init__.py +14 -0
  32. spotify_scraper/media/audio.py +283 -0
  33. spotify_scraper/media/image.py +252 -0
  34. spotify_scraper/parsers/json_parser.py +542 -0
  35. spotify_scraper/py.typed +2 -0
  36. spotify_scraper/utils/common.py +859 -0
  37. spotify_scraper/utils/logger.py +134 -0
  38. spotify_scraper/utils/url.py +533 -0
  39. spotifyscraper-2.0.1.dist-info/METADATA +411 -0
  40. spotifyscraper-2.0.1.dist-info/RECORD +44 -0
  41. spotifyscraper-2.0.1.dist-info/WHEEL +6 -0
  42. spotifyscraper-2.0.1.dist-info/entry_points.txt +2 -0
  43. spotifyscraper-2.0.1.dist-info/licenses/LICENSE +21 -0
  44. spotifyscraper-2.0.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,92 @@
1
+ """SpotifyScraper - Modern Spotify Web Scraper.
2
+
3
+ A fast, modern Python library for extracting data from Spotify's web player.
4
+ Supports tracks, albums, artists, playlists, and lyrics with both requests and
5
+ Selenium backends.
6
+
7
+ This package provides a high-level interface for extracting metadata from Spotify's
8
+ web player without requiring API authentication. It parses Spotify's React-based
9
+ web interface to extract structured data.
10
+
11
+ Key Features:
12
+ - Extract metadata for tracks, albums, artists, and playlists
13
+ - Download preview audio clips and cover images
14
+ - Support for both lightweight (requests) and full (Selenium) browsers
15
+ - No API key required - works with public Spotify web pages
16
+ - Type-safe data structures using TypedDict
17
+ - Comprehensive error handling with specific exception types
18
+
19
+ Typical usage example:
20
+ from spotify_scraper import SpotifyClient
21
+
22
+ # Create a client
23
+ client = SpotifyClient()
24
+
25
+ # Extract track information
26
+ track_data = client.get_track_info("https://open.spotify.com/track/...")
27
+ print(f"Track: {track_data['name']} by {track_data['artists'][0]['name']}")
28
+
29
+ # Download preview and cover
30
+ client.download_preview_mp3(track_url, path="downloads/")
31
+ client.download_cover(track_url, path="covers/")
32
+
33
+ For authenticated features (e.g., lyrics), provide cookies:
34
+ client = SpotifyClient(cookie_file="cookies.txt")
35
+ track_with_lyrics = client.get_track_info_with_lyrics(track_url)
36
+
37
+ Note:
38
+ This library is designed for educational and personal use. Always respect
39
+ Spotify's Terms of Service and robots.txt when using this library.
40
+ """
41
+
42
+ __version__ = "2.0.1"
43
+ __author__ = "Ali Akhtari"
44
+ __email__ = "aliakhtari78@hotmail.com"
45
+ __license__ = "MIT"
46
+ __url__ = "https://github.com/AliAkhtari78/SpotifyScraper"
47
+
48
+ # Core imports for easy access
49
+ from spotify_scraper.client import SpotifyClient
50
+ from spotify_scraper.core.config import Config
51
+ from spotify_scraper.core.exceptions import (
52
+ AuthenticationError,
53
+ BrowserError,
54
+ ConfigurationError,
55
+ ExtractionError,
56
+ MediaError,
57
+ NetworkError,
58
+ ParsingError,
59
+ SpotifyScraperError,
60
+ URLError,
61
+ )
62
+
63
+ # Utility functions
64
+ from spotify_scraper.utils.url import (
65
+ convert_to_embed_url,
66
+ extract_id,
67
+ is_spotify_url,
68
+ )
69
+
70
+ # No backward compatibility needed
71
+
72
+ __all__ = [
73
+ "SpotifyClient",
74
+ "Config",
75
+ "is_spotify_url",
76
+ "extract_id",
77
+ "convert_to_embed_url",
78
+ "SpotifyScraperError",
79
+ "URLError",
80
+ "ParsingError",
81
+ "ExtractionError",
82
+ "NetworkError",
83
+ "AuthenticationError",
84
+ "BrowserError",
85
+ "MediaError",
86
+ "ConfigurationError",
87
+ ]
88
+
89
+ # Package metadata
90
+ __title__ = "spotifyscraper"
91
+ __description__ = "A modern Python library for extracting data from Spotify's web interface"
92
+ __version_info__ = tuple(int(part) for part in __version__.split("."))
@@ -0,0 +1,10 @@
1
+ """
2
+ Main entry point for running SpotifyScraper as a module.
3
+
4
+ This allows the package to be run with `python -m spotify_scraper`.
5
+ """
6
+
7
+ from spotify_scraper.cli import main
8
+
9
+ if __name__ == "__main__":
10
+ main()
@@ -0,0 +1,101 @@
1
+ """
2
+ Authentication module for SpotifyScraper.
3
+
4
+ This module handles session management and authentication.
5
+ """
6
+
7
+ import logging
8
+ import re
9
+ from typing import Dict, Optional
10
+
11
+ import requests
12
+
13
+ from spotify_scraper.constants import DEFAULT_HEADERS
14
+ from spotify_scraper.core.exceptions import AuthenticationError
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class Session:
20
+ """
21
+ Session management class for authentication with Spotify web player.
22
+
23
+ This class provides functionality to create authenticated sessions
24
+ using cookies, headers, and proxies. It is designed to be backward
25
+ compatible with the original Request class from SpotifyScraper v1.
26
+ """
27
+
28
+ def __init__(
29
+ self,
30
+ cookie_file: Optional[str] = None,
31
+ headers: Optional[Dict[str, str]] = None,
32
+ proxy: Optional[Dict[str, str]] = None,
33
+ ):
34
+ """
35
+ Initialize the Session.
36
+
37
+ Args:
38
+ cookie_file: Path to a cookies.txt file (optional)
39
+ headers: Custom headers for requests (optional)
40
+ proxy: Proxy configuration (optional)
41
+ """
42
+ # Store provided parameters
43
+ self.cookie_file = cookie_file
44
+ self.headers = headers
45
+ self.proxy = proxy
46
+
47
+ # Initialize cookie dictionary
48
+ if cookie_file is None:
49
+ self.cookie = None
50
+ else:
51
+ try:
52
+ self.cookie = self._parse_cookie_file()
53
+ logger.debug("Loaded cookies from %s", cookie_file)
54
+ except Exception as e:
55
+ logger.error("Failed to load cookies from %s: %s", cookie_file, e)
56
+ raise AuthenticationError(f"Failed to load cookies: {e}") from e
57
+
58
+ def _parse_cookie_file(self) -> Dict[str, str]:
59
+ """
60
+ Parse a cookies.txt file and return a dictionary of key-value pairs.
61
+
62
+ Returns:
63
+ Dictionary of cookies
64
+ """
65
+ cookies = {}
66
+ with open(self.cookie_file, "r", encoding="utf-8") as fp:
67
+ for line in fp:
68
+ if not re.match(r"^\#", line):
69
+ line_fields = line.strip().split("\t")
70
+ if len(line_fields) >= 7:
71
+ cookies[line_fields[5]] = line_fields[6]
72
+
73
+ return cookies
74
+
75
+ def request(self) -> requests.Session:
76
+ """
77
+ Create session using requests library and set cookie and headers.
78
+
79
+ Returns:
80
+ Configured requests.Session object
81
+ """
82
+ # Create a new session
83
+ request_session = requests.Session()
84
+
85
+ # Set headers with defaults if not provided
86
+ if self.headers is None:
87
+ request_session.headers.update(DEFAULT_HEADERS)
88
+ else:
89
+ request_session.headers.update(self.headers)
90
+
91
+ # Set cookies if provided
92
+ if self.cookie is not None:
93
+ request_session.cookies.update(self.cookie)
94
+
95
+ # Set proxy if provided
96
+ if self.proxy is not None:
97
+ request_session.proxies.update(self.proxy)
98
+
99
+ logger.debug("Created requests session")
100
+
101
+ return request_session
@@ -0,0 +1,269 @@
1
+ """
2
+ Session management for SpotifyScraper authentication.
3
+
4
+ This module handles authentication and session management for Spotify access.
5
+ Think of this as the key management system - it handles getting and maintaining
6
+ the credentials needed to access Spotify's data.
7
+ """
8
+
9
+ import json
10
+ import logging
11
+ import os
12
+ from datetime import datetime, timedelta
13
+ from typing import Dict, Optional
14
+
15
+ from spotify_scraper.core.constants import (
16
+ SESSION_CACHE_FILE,
17
+ )
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class Session:
23
+ """
24
+ Manages authentication sessions for Spotify access.
25
+
26
+ This class handles the complex task of maintaining valid authentication
27
+ with Spotify. It can work with different authentication methods like
28
+ cookies from a browser session or API tokens.
29
+
30
+ The session acts like a smart credential manager - it knows when credentials
31
+ are expired and can attempt to refresh them automatically.
32
+ """
33
+
34
+ def __init__(
35
+ self,
36
+ access_token: Optional[str] = None,
37
+ cookies: Optional[Dict[str, str]] = None,
38
+ headers: Optional[Dict[str, str]] = None,
39
+ ):
40
+ """
41
+ Initialize a session for Spotify authentication.
42
+
43
+ Args:
44
+ access_token: Spotify access token if available
45
+ cookies: HTTP cookies for authentication
46
+ headers: Additional HTTP headers to include in requests
47
+ """
48
+ self.access_token = access_token
49
+ self.cookies = cookies or {}
50
+ self.headers = headers or {}
51
+ self.expires_at: Optional[datetime] = None
52
+ self.is_anonymous = access_token is None
53
+
54
+ logger.debug("Initialized Session (anonymous: %s)", self.is_anonymous)
55
+
56
+ @property
57
+ def _cookies(self):
58
+ """Compatibility property for accessing cookies."""
59
+ return self.cookies
60
+
61
+ def is_valid(self) -> bool:
62
+ """
63
+ Check if the session is currently valid.
64
+
65
+ A session is considered valid if it has authentication credentials
66
+ and those credentials haven't expired.
67
+
68
+ Returns:
69
+ True if session is valid and can be used for requests
70
+ """
71
+ # If we have no authentication method, session is not valid
72
+ if not self.access_token and not self.cookies:
73
+ return False
74
+
75
+ # If we have an expiration time, check if we're still within it
76
+ if self.expires_at and datetime.now() >= self.expires_at:
77
+ logger.debug("Session has expired")
78
+ return False
79
+
80
+ return True
81
+
82
+ def refresh(self) -> bool:
83
+ """
84
+ Attempt to refresh the session credentials.
85
+
86
+ This method tries to get new credentials when the current ones
87
+ have expired. The actual refresh mechanism depends on the type
88
+ of authentication being used.
89
+
90
+ Returns:
91
+ True if refresh was successful, False otherwise
92
+ """
93
+ # For now, this is a placeholder. A real implementation would:
94
+ # 1. Use refresh tokens to get new access tokens
95
+ # 2. Re-authenticate with stored credentials
96
+ # 3. Prompt user for new authentication if needed
97
+
98
+ logger.warning("Session refresh not yet implemented")
99
+ return False
100
+
101
+ def set_access_token(self, token: str, expires_in: Optional[int] = None) -> None:
102
+ """
103
+ Set a new access token for the session.
104
+
105
+ Args:
106
+ token: The access token to use
107
+ expires_in: Token lifetime in seconds (optional)
108
+ """
109
+ self.access_token = token
110
+ self.is_anonymous = False
111
+
112
+ if expires_in:
113
+ self.expires_at = datetime.now() + timedelta(seconds=expires_in)
114
+
115
+ logger.debug("Updated session with new access token")
116
+
117
+ def add_cookies(self, cookies: Dict[str, str]) -> None:
118
+ """
119
+ Add cookies to the session.
120
+
121
+ Args:
122
+ cookies: Dictionary of cookie name-value pairs
123
+ """
124
+ self.cookies.update(cookies)
125
+ logger.debug("Added %s cookies to session", len(cookies))
126
+
127
+ def get_auth_headers(self) -> Dict[str, str]:
128
+ """
129
+ Get HTTP headers needed for authenticated requests.
130
+
131
+ Returns:
132
+ Dictionary of headers to include in HTTP requests
133
+ """
134
+ auth_headers = self.headers.copy()
135
+
136
+ if self.access_token:
137
+ auth_headers["Authorization"] = f"Bearer {self.access_token}"
138
+
139
+ return auth_headers
140
+
141
+ def save_to_file(self, filepath: Optional[str] = None) -> bool:
142
+ """
143
+ Save session data to a file for persistence.
144
+
145
+ This allows sessions to be restored after the program restarts,
146
+ which is convenient for users so they don't have to re-authenticate
147
+ every time.
148
+
149
+ Args:
150
+ filepath: Path to save session data. If None, uses default location.
151
+
152
+ Returns:
153
+ True if save was successful, False otherwise
154
+ """
155
+ if filepath is None:
156
+ filepath = SESSION_CACHE_FILE
157
+
158
+ try:
159
+ session_data = {
160
+ "access_token": self.access_token,
161
+ "cookies": self.cookies,
162
+ "headers": self.headers,
163
+ "expires_at": self.expires_at.isoformat() if self.expires_at else None,
164
+ "is_anonymous": self.is_anonymous,
165
+ }
166
+
167
+ with open(filepath, "w", encoding="utf-8") as f:
168
+ json.dump(session_data, f, indent=2)
169
+
170
+ logger.debug("Saved session to %s", filepath)
171
+ return True
172
+
173
+ except Exception as e:
174
+ logger.error("Failed to save session: %s", e)
175
+ return False
176
+
177
+ @classmethod
178
+ def load_from_file(cls, filepath: Optional[str] = None) -> Optional["Session"]:
179
+ """
180
+ Load session data from a file.
181
+
182
+ Args:
183
+ filepath: Path to load session data from. If None, uses default location.
184
+
185
+ Returns:
186
+ Session instance if loading was successful, None otherwise
187
+ """
188
+ if filepath is None:
189
+ filepath = SESSION_CACHE_FILE
190
+
191
+ if not os.path.exists(filepath):
192
+ logger.debug("Session file %s does not exist", filepath)
193
+ return None
194
+
195
+ try:
196
+ with open(filepath, "r", encoding="utf-8") as f:
197
+ session_data = json.load(f)
198
+
199
+ session = cls(
200
+ access_token=session_data.get("access_token"),
201
+ cookies=session_data.get("cookies", {}),
202
+ headers=session_data.get("headers", {}),
203
+ )
204
+
205
+ # Restore expiration time if available
206
+ expires_at_str = session_data.get("expires_at")
207
+ if expires_at_str:
208
+ session.expires_at = datetime.fromisoformat(expires_at_str)
209
+
210
+ session.is_anonymous = session_data.get("is_anonymous", True)
211
+
212
+ logger.debug("Loaded session from %s", filepath)
213
+ return session
214
+
215
+ except Exception as e:
216
+ logger.error("Failed to load session: %s", e)
217
+ return None
218
+
219
+ def clear(self) -> None:
220
+ """
221
+ Clear all authentication data from the session.
222
+
223
+ This essentially logs the user out by removing all stored credentials.
224
+ """
225
+ self.access_token = None
226
+ self.cookies.clear()
227
+ self.headers.clear()
228
+ self.expires_at = None
229
+ self.is_anonymous = True
230
+
231
+ logger.debug("Cleared session authentication data")
232
+
233
+
234
+ # Backward compatibility class for the old Request interface
235
+ class Request:
236
+ """
237
+ Backward compatibility wrapper for the old Request class.
238
+
239
+ This class provides the same interface as the original SpotifyScraper
240
+ Request class, but internally uses the new Session system. This allows
241
+ existing code to work without changes while benefiting from the improved
242
+ architecture underneath.
243
+ """
244
+
245
+ def __init__(
246
+ self,
247
+ cookie_file: Optional[str] = None,
248
+ headers: Optional[Dict[str, str]] = None,
249
+ proxy: Optional[str] = None,
250
+ ):
251
+ """
252
+ Initialize with the same interface as the original Request class.
253
+
254
+ Args:
255
+ cookie_file: Path to cookie file (legacy parameter)
256
+ headers: HTTP headers to use
257
+ proxy: Proxy URL to use (legacy parameter)
258
+ """
259
+ self.session = Session(headers=headers)
260
+ logger.debug("Initialized Request (compatibility mode)")
261
+
262
+ def request(self) -> Session:
263
+ """
264
+ Return a session object that can be used with the old Scraper interface.
265
+
266
+ Returns:
267
+ Session object compatible with old code
268
+ """
269
+ return self.session
@@ -0,0 +1,73 @@
1
+ """
2
+ Browser factory module for SpotifyScraper.
3
+
4
+ This module provides factory functions for creating browser instances.
5
+ """
6
+
7
+ import logging
8
+
9
+ from spotify_scraper.browsers.base import Browser
10
+ from spotify_scraper.core.exceptions import BrowserError
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ def create_browser(browser_type: str = "auto", **kwargs) -> Browser:
16
+ """
17
+ Create appropriate browser instance.
18
+
19
+ Args:
20
+ browser_type: Type of browser ('requests', 'selenium', or 'auto')
21
+ **kwargs: Additional arguments to pass to browser constructor
22
+
23
+ Returns:
24
+ Configured browser instance
25
+
26
+ Raises:
27
+ BrowserError: If browser creation fails
28
+ ValueError: If browser_type is invalid
29
+ """
30
+ # Import implementation classes here to avoid circular imports
31
+ from spotify_scraper.browsers.requests_browser import RequestsBrowser
32
+
33
+ try:
34
+ from spotify_scraper.browsers.selenium_browser import SeleniumBrowser
35
+
36
+ selenium_available = True
37
+ except ImportError:
38
+ selenium_available = False
39
+ logger.warning("Selenium is not available, falling back to requests")
40
+
41
+ # Create browser based on type
42
+ if browser_type == "requests":
43
+ logger.debug("Creating RequestsBrowser")
44
+ return RequestsBrowser(**kwargs)
45
+
46
+ elif browser_type == "selenium":
47
+ if selenium_available:
48
+ logger.debug("Creating SeleniumBrowser")
49
+ return SeleniumBrowser(**kwargs)
50
+ else:
51
+ logger.warning("Selenium requested but not available, falling back to requests")
52
+ return RequestsBrowser(**kwargs)
53
+
54
+ elif browser_type == "auto":
55
+ # Try requests first, fallback to selenium if needed
56
+ try:
57
+ logger.debug("Trying RequestsBrowser")
58
+ browser = RequestsBrowser(**kwargs)
59
+ # Test browser with a simple request
60
+ browser.get_page_content("https://open.spotify.com")
61
+ return browser
62
+ except Exception as e:
63
+ logger.warning("RequestsBrowser failed: %s", e)
64
+
65
+ if selenium_available:
66
+ logger.debug("Falling back to SeleniumBrowser")
67
+ return SeleniumBrowser(**kwargs)
68
+ else:
69
+ logger.error("Neither RequestsBrowser nor SeleniumBrowser are working")
70
+ raise BrowserError("Failed to create any browser instance") from e
71
+
72
+ else:
73
+ raise ValueError(f"Unknown browser type: {browser_type}")