substack-api 0.1.0__tar.gz → 1.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,131 @@
1
+ Metadata-Version: 2.2
2
+ Name: substack-api
3
+ Version: 1.0.2
4
+ Summary: Unofficial wrapper for the Substack API
5
+ Requires-Python: >=3.12
6
+ Description-Content-Type: text/markdown
7
+ License-File: LICENSE
8
+ Requires-Dist: requests>=2.32.3
9
+
10
+ # Substack API
11
+
12
+ An unofficial Python client library for interacting with Substack newsletters and content.
13
+
14
+ ## Overview
15
+
16
+ This library provides Python interfaces for interacting with Substack's unofficial API, allowing you to:
17
+
18
+ - Retrieve newsletter posts, podcasts, and recommendations
19
+ - Get user profile information and subscriptions
20
+ - Fetch post content and metadata
21
+ - Search for posts within newsletters
22
+
23
+ ## Installation
24
+
25
+ ```bash
26
+ # Using pip
27
+ pip install substack-api
28
+
29
+ # Using poetry
30
+ poetry add substack-api
31
+ ```
32
+
33
+ ## Usage Examples
34
+
35
+ ### Working with Newsletters
36
+
37
+ ```python
38
+ from substack_api import Newsletter
39
+
40
+ # Initialize a newsletter by its URL
41
+ newsletter = Newsletter("https://example.substack.com")
42
+
43
+ # Get recent posts (returns Post objects)
44
+ recent_posts = newsletter.get_posts(limit=5)
45
+
46
+ # Get posts sorted by popularity
47
+ top_posts = newsletter.get_posts(sorting="top", limit=10)
48
+
49
+ # Search for posts
50
+ search_results = newsletter.search_posts("machine learning", limit=3)
51
+
52
+ # Get podcast episodes
53
+ podcasts = newsletter.get_podcasts(limit=5)
54
+
55
+ # Get recommended newsletters
56
+ recommendations = newsletter.get_recommendations()
57
+
58
+ # Get newsletter authors
59
+ authors = newsletter.get_authors()
60
+ ```
61
+
62
+ ### Working with Posts
63
+
64
+ ```python
65
+ from substack_api import Post
66
+
67
+ # Initialize a post by its URL
68
+ post = Post("https://example.substack.com/p/post-slug")
69
+
70
+ # Get post metadata
71
+ metadata = post.get_metadata()
72
+
73
+ # Get the post's HTML content
74
+ content = post.get_content()
75
+ ```
76
+
77
+ ### Working with Users
78
+
79
+ ```python
80
+ from substack_api import User
81
+
82
+ # Initialize a user by their username
83
+ user = User("username")
84
+
85
+ # Get user profile information
86
+ profile_data = user.get_raw_data()
87
+
88
+ # Get user ID and name
89
+ user_id = user.id
90
+ name = user.name
91
+
92
+ # Get user's subscriptions
93
+ subscriptions = user.get_subscriptions()
94
+ ```
95
+
96
+ ## Limitations
97
+
98
+ - This is an unofficial library and not endorsed by Substack
99
+ - APIs may change without notice, potentially breaking functionality
100
+ - Some features may only work for public content
101
+ - Rate limiting may be enforced by Substack
102
+
103
+ ## Development
104
+
105
+ ### Running Tests
106
+
107
+ ```bash
108
+ # Install dev dependencies
109
+ pip install -e ".[dev]"
110
+
111
+ # Run tests
112
+ pytest
113
+ ```
114
+
115
+ ### Contributing
116
+
117
+ Contributions are welcome! Please feel free to submit a Pull Request.
118
+
119
+ 1. Fork the repository
120
+ 2. Create your feature branch (`git checkout -b feature/amazing-feature`)
121
+ 3. Commit your changes (`git commit -m 'Add some amazing feature'`)
122
+ 4. Push to the branch (`git push origin feature/amazing-feature`)
123
+ 5. Open a Pull Request
124
+
125
+ ## License
126
+
127
+ This project is licensed under the MIT License - see the LICENSE file for details.
128
+
129
+ ## Disclaimer
130
+
131
+ This package is not affiliated with, endorsed by, or connected to Substack in any way. It is an independent project created to make Substack content more accessible through Python.
@@ -0,0 +1,122 @@
1
+ # Substack API
2
+
3
+ An unofficial Python client library for interacting with Substack newsletters and content.
4
+
5
+ ## Overview
6
+
7
+ This library provides Python interfaces for interacting with Substack's unofficial API, allowing you to:
8
+
9
+ - Retrieve newsletter posts, podcasts, and recommendations
10
+ - Get user profile information and subscriptions
11
+ - Fetch post content and metadata
12
+ - Search for posts within newsletters
13
+
14
+ ## Installation
15
+
16
+ ```bash
17
+ # Using pip
18
+ pip install substack-api
19
+
20
+ # Using poetry
21
+ poetry add substack-api
22
+ ```
23
+
24
+ ## Usage Examples
25
+
26
+ ### Working with Newsletters
27
+
28
+ ```python
29
+ from substack_api import Newsletter
30
+
31
+ # Initialize a newsletter by its URL
32
+ newsletter = Newsletter("https://example.substack.com")
33
+
34
+ # Get recent posts (returns Post objects)
35
+ recent_posts = newsletter.get_posts(limit=5)
36
+
37
+ # Get posts sorted by popularity
38
+ top_posts = newsletter.get_posts(sorting="top", limit=10)
39
+
40
+ # Search for posts
41
+ search_results = newsletter.search_posts("machine learning", limit=3)
42
+
43
+ # Get podcast episodes
44
+ podcasts = newsletter.get_podcasts(limit=5)
45
+
46
+ # Get recommended newsletters
47
+ recommendations = newsletter.get_recommendations()
48
+
49
+ # Get newsletter authors
50
+ authors = newsletter.get_authors()
51
+ ```
52
+
53
+ ### Working with Posts
54
+
55
+ ```python
56
+ from substack_api import Post
57
+
58
+ # Initialize a post by its URL
59
+ post = Post("https://example.substack.com/p/post-slug")
60
+
61
+ # Get post metadata
62
+ metadata = post.get_metadata()
63
+
64
+ # Get the post's HTML content
65
+ content = post.get_content()
66
+ ```
67
+
68
+ ### Working with Users
69
+
70
+ ```python
71
+ from substack_api import User
72
+
73
+ # Initialize a user by their username
74
+ user = User("username")
75
+
76
+ # Get user profile information
77
+ profile_data = user.get_raw_data()
78
+
79
+ # Get user ID and name
80
+ user_id = user.id
81
+ name = user.name
82
+
83
+ # Get user's subscriptions
84
+ subscriptions = user.get_subscriptions()
85
+ ```
86
+
87
+ ## Limitations
88
+
89
+ - This is an unofficial library and not endorsed by Substack
90
+ - APIs may change without notice, potentially breaking functionality
91
+ - Some features may only work for public content
92
+ - Rate limiting may be enforced by Substack
93
+
94
+ ## Development
95
+
96
+ ### Running Tests
97
+
98
+ ```bash
99
+ # Install dev dependencies
100
+ pip install -e ".[dev]"
101
+
102
+ # Run tests
103
+ pytest
104
+ ```
105
+
106
+ ### Contributing
107
+
108
+ Contributions are welcome! Please feel free to submit a Pull Request.
109
+
110
+ 1. Fork the repository
111
+ 2. Create your feature branch (`git checkout -b feature/amazing-feature`)
112
+ 3. Commit your changes (`git commit -m 'Add some amazing feature'`)
113
+ 4. Push to the branch (`git push origin feature/amazing-feature`)
114
+ 5. Open a Pull Request
115
+
116
+ ## License
117
+
118
+ This project is licensed under the MIT License - see the LICENSE file for details.
119
+
120
+ ## Disclaimer
121
+
122
+ This package is not affiliated with, endorsed by, or connected to Substack in any way. It is an independent project created to make Substack content more accessible through Python.
@@ -0,0 +1,20 @@
1
+ [project]
2
+ name = "substack-api"
3
+ version = "1.0.2"
4
+ description = "Unofficial wrapper for the Substack API"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = [
8
+ "requests>=2.32.3",
9
+ ]
10
+
11
+ [dependency-groups]
12
+ dev = [
13
+ "ipykernel>=6.29.5",
14
+ "mike>=2.1.3",
15
+ "mkdocs>=1.6.1",
16
+ "mkdocs-material>=9.6.6",
17
+ "mkdocstrings-python>=1.16.2",
18
+ "pytest>=8.3.4",
19
+ "ruff>=0.9.9",
20
+ ]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,6 @@
1
+ from .category import Category
2
+ from .newsletter import Newsletter
3
+ from .post import Post
4
+ from .user import User
5
+
6
+ __all__ = ["User", "Post", "Category", "Newsletter"]
@@ -0,0 +1,180 @@
1
+ from typing import Any, Dict, List, Optional, Tuple
2
+
3
+ import requests
4
+
5
+ # Add Newsletter import
6
+ from .newsletter import Newsletter
7
+
8
+ HEADERS = {
9
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36"
10
+ }
11
+
12
+
13
+ def list_all_categories() -> List[Tuple[str, int]]:
14
+ """
15
+ Get name / id representations of all newsletter categories
16
+
17
+ Returns
18
+ -------
19
+ List[Tuple[str, int]]
20
+ List of tuples containing (category_name, category_id)
21
+ """
22
+ endpoint_cat = "https://substack.com/api/v1/categories"
23
+ r = requests.get(endpoint_cat, headers=HEADERS, timeout=30)
24
+ r.raise_for_status()
25
+ categories = [(i["name"], i["id"]) for i in r.json()]
26
+ return categories
27
+
28
+
29
+ class Category:
30
+ """
31
+ Top-level newsletter category
32
+ """
33
+
34
+ def __init__(self, name: Optional[str] = None, id: Optional[int] = None) -> None:
35
+ """
36
+ Initialize a Category object.
37
+
38
+ Parameters
39
+ ----------
40
+ name : Optional[str]
41
+ The name of the category
42
+ id : Optional[int]
43
+ The ID of the category
44
+
45
+ Raises
46
+ ------
47
+ ValueError
48
+ If neither name nor id is provided, or if the provided name/id is not found
49
+ """
50
+ if name is None and id is None:
51
+ raise ValueError("Either name or id must be provided")
52
+
53
+ self.name = name
54
+ self.id = id
55
+ self._newsletters_data = None # Cache for newsletter data
56
+
57
+ # Retrieve missing attributes if only one of name or id is provided
58
+ if self.name and self.id is None:
59
+ self._get_id_from_name()
60
+ elif self.id and self.name is None:
61
+ self._get_name_from_id()
62
+
63
+ def __str__(self) -> str:
64
+ return f"{self.name} ({self.id})"
65
+
66
+ def __repr__(self) -> str:
67
+ return f"Category(name={self.name}, id={self.id})"
68
+
69
+ def _get_id_from_name(self) -> None:
70
+ """
71
+ Lookup category ID based on name
72
+
73
+ Raises
74
+ ------
75
+ ValueError
76
+ If the category name is not found
77
+ """
78
+ categories = list_all_categories()
79
+ for name, id in categories:
80
+ if name == self.name:
81
+ self.id = id
82
+ return
83
+ raise ValueError(f"Category name '{self.name}' not found")
84
+
85
+ def _get_name_from_id(self) -> None:
86
+ """
87
+ Lookup category name based on ID
88
+
89
+ Raises
90
+ ------
91
+ ValueError
92
+ If the category ID is not found
93
+ """
94
+ categories = list_all_categories()
95
+ for name, id in categories:
96
+ if id == self.id:
97
+ self.name = name
98
+ return
99
+ raise ValueError(f"Category ID {self.id} not found")
100
+
101
+ def _fetch_newsletters_data(
102
+ self, force_refresh: bool = False
103
+ ) -> List[Dict[str, Any]]:
104
+ """
105
+ Fetch the raw newsletter data from the API and cache it
106
+
107
+ Parameters
108
+ ----------
109
+ force_refresh : bool
110
+ Whether to force a refresh of the data, ignoring the cache
111
+
112
+ Returns
113
+ -------
114
+ List[Dict[str, Any]]
115
+ Full newsletter metadata
116
+ """
117
+ if self._newsletters_data is not None and not force_refresh:
118
+ return self._newsletters_data
119
+
120
+ endpoint = f"https://substack.com/api/v1/category/public/{self.id}/all?page="
121
+
122
+ all_newsletters = []
123
+ page_num = 0
124
+ more = True
125
+ # endpoint doesn't return more than 21 pages
126
+ while more and page_num <= 20:
127
+ full_url = endpoint + str(page_num)
128
+ r = requests.get(full_url, headers=HEADERS, timeout=30)
129
+ r.raise_for_status()
130
+
131
+ resp = r.json()
132
+ newsletters = resp["publications"]
133
+ all_newsletters.extend(newsletters)
134
+ page_num += 1
135
+ more = resp["more"]
136
+
137
+ self._newsletters_data = all_newsletters
138
+ return all_newsletters
139
+
140
+ def get_newsletter_urls(self) -> List[str]:
141
+ """
142
+ Get only the URLs of newsletters in this category
143
+
144
+ Returns
145
+ -------
146
+ List[str]
147
+ List of newsletter URLs
148
+ """
149
+ data = self._fetch_newsletters_data()
150
+
151
+ return [item["base_url"] for item in data]
152
+
153
+ def get_newsletters(self) -> List[Newsletter]:
154
+ """
155
+ Get Newsletter objects for all newsletters in this category
156
+
157
+ Returns
158
+ -------
159
+ List[Newsletter]
160
+ List of Newsletter objects
161
+ """
162
+ urls = self.get_newsletter_urls()
163
+ return [Newsletter(url) for url in urls]
164
+
165
+ def get_newsletter_metadata(self) -> List[Dict[str, Any]]:
166
+ """
167
+ Get full metadata for all newsletters in this category
168
+
169
+ Returns
170
+ -------
171
+ List[Dict[str, Any]]
172
+ List of newsletter metadata dictionaries
173
+ """
174
+ return self._fetch_newsletters_data()
175
+
176
+ def refresh_data(self) -> None:
177
+ """
178
+ Force refresh of the newsletter data cache
179
+ """
180
+ self._fetch_newsletters_data(force_refresh=True)
@@ -0,0 +1,224 @@
1
+ from time import sleep
2
+ from typing import Any, Dict, List, Optional
3
+
4
+ import requests
5
+
6
+ HEADERS = {
7
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36"
8
+ }
9
+
10
+
11
+ class Newsletter:
12
+ """
13
+ Newsletter class for interacting with Substack newsletters
14
+ """
15
+
16
+ def __init__(self, url: str) -> None:
17
+ """
18
+ Initialize a Newsletter object.
19
+
20
+ Parameters
21
+ ----------
22
+ url : str
23
+ The URL of the Substack newsletter
24
+ """
25
+ self.url = url
26
+
27
+ def __str__(self) -> str:
28
+ return f"Newsletter: {self.url}"
29
+
30
+ def __repr__(self) -> str:
31
+ return f"Newsletter(url={self.url})"
32
+
33
+ def _fetch_paginated_posts(
34
+ self, params: Dict[str, str], limit: Optional[int] = None, page_size: int = 15
35
+ ) -> List[Dict[str, Any]]:
36
+ """
37
+ Helper method to fetch paginated posts with different query parameters
38
+
39
+ Parameters
40
+ ----------
41
+ params : Dict[str, str]
42
+ Dictionary of query parameters to include in the API request
43
+ limit : Optional[int]
44
+ Maximum number of posts to return
45
+ page_size : int
46
+ Number of posts to retrieve per page request
47
+
48
+ Returns
49
+ -------
50
+ List[Dict[str, Any]]
51
+ List of post data dictionaries
52
+ """
53
+ results = []
54
+ offset = 0
55
+ batch_size = page_size # The API default limit per request
56
+ more_items = True
57
+
58
+ while more_items:
59
+ # Update params with current offset and batch size
60
+ current_params = params.copy()
61
+ current_params.update({"offset": str(offset), "limit": str(batch_size)})
62
+
63
+ # Format query parameters
64
+ query_string = "&".join([f"{k}={v}" for k, v in current_params.items()])
65
+ endpoint = f"{self.url}/api/v1/archive?{query_string}"
66
+
67
+ # Make the request
68
+ response = requests.get(endpoint, headers=HEADERS, timeout=30)
69
+
70
+ if response.status_code != 200:
71
+ break
72
+
73
+ items = response.json()
74
+ if not items:
75
+ break
76
+
77
+ results.extend(items)
78
+
79
+ # Update offset for next batch
80
+ offset += batch_size
81
+
82
+ # Check if we've reached the requested limit
83
+ if limit and len(results) >= limit:
84
+ results = results[:limit]
85
+ more_items = False
86
+
87
+ # Check if we got fewer items than requested (last page)
88
+ if len(items) < batch_size:
89
+ more_items = False
90
+
91
+ # Be nice to the API
92
+ sleep(0.5)
93
+
94
+ # Instead of creating Post objects directly, return the URLs
95
+ # The caller will create Post objects as needed
96
+ return results
97
+
98
+ def get_posts(self, sorting: str = "new", limit: Optional[int] = None) -> List:
99
+ """
100
+ Get posts from the newsletter with specified sorting
101
+
102
+ Parameters
103
+ ----------
104
+ sorting : str
105
+ Sorting order for the posts ("new", "top", "pinned", or "community")
106
+ limit : Optional[int]
107
+ Maximum number of posts to return
108
+
109
+ Returns
110
+ -------
111
+ List[Post]
112
+ List of Post objects
113
+ """
114
+ from .post import Post # Import here to avoid circular import
115
+
116
+ params = {"sort": sorting}
117
+ post_data = self._fetch_paginated_posts(params, limit)
118
+ return [Post(item["canonical_url"]) for item in post_data]
119
+
120
+ def search_posts(self, query: str, limit: Optional[int] = None) -> List:
121
+ """
122
+ Search posts in the newsletter with the given query
123
+
124
+ Parameters
125
+ ----------
126
+ query : str
127
+ Search query string
128
+ limit : Optional[int]
129
+ Maximum number of posts to return
130
+
131
+ Returns
132
+ -------
133
+ List[Post]
134
+ List of Post objects matching the search query
135
+ """
136
+ from .post import Post # Import here to avoid circular import
137
+
138
+ params = {"sort": "new", "search": query}
139
+ post_data = self._fetch_paginated_posts(params, limit)
140
+ return [Post(item["canonical_url"]) for item in post_data]
141
+
142
+ def get_podcasts(self, limit: Optional[int] = None) -> List:
143
+ """
144
+ Get podcast posts from the newsletter
145
+
146
+ Parameters
147
+ ----------
148
+ limit : Optional[int]
149
+ Maximum number of podcast posts to return
150
+
151
+ Returns
152
+ -------
153
+ List[Post]
154
+ List of Post objects representing podcast posts
155
+ """
156
+ from .post import Post # Import here to avoid circular import
157
+
158
+ params = {"sort": "new", "type": "podcast"}
159
+ post_data = self._fetch_paginated_posts(params, limit)
160
+ return [Post(item["canonical_url"]) for item in post_data]
161
+
162
+ def get_recommendations(self) -> List["Newsletter"]:
163
+ """
164
+ Get recommended publications for this newsletter
165
+
166
+ Returns
167
+ -------
168
+ List[Newsletter]
169
+ List of recommended Newsletter objects
170
+ """
171
+ # First get any post to extract the publication ID
172
+ posts = self.get_posts(limit=1)
173
+ if not posts:
174
+ return []
175
+
176
+ publication_id = posts[0].get_metadata()["publication_id"]
177
+
178
+ # Now get the recommendations
179
+ endpoint = f"{self.url}/api/v1/recommendations/from/{publication_id}"
180
+ response = requests.get(endpoint, headers=HEADERS, timeout=30)
181
+
182
+ if response.status_code != 200:
183
+ return []
184
+
185
+ recommendations = response.json()
186
+ if not recommendations:
187
+ return []
188
+
189
+ recommended_newsletter_urls = []
190
+ for rec in recommendations:
191
+ recpub = rec["recommendedPublication"]
192
+ if "custom_domain" in recpub and recpub["custom_domain"]:
193
+ recommended_newsletter_urls.append(recpub["custom_domain"])
194
+ else:
195
+ recommended_newsletter_urls.append(
196
+ f"{recpub['subdomain']}.substack.com"
197
+ )
198
+
199
+ # Avoid circular import
200
+ from .newsletter import Newsletter
201
+
202
+ result = [Newsletter(url) for url in recommended_newsletter_urls]
203
+
204
+ return result
205
+
206
+ def get_authors(self) -> List:
207
+ """
208
+ Get authors of the newsletter
209
+
210
+ Returns
211
+ -------
212
+ List[User]
213
+ List of User objects representing the authors
214
+ """
215
+ from .user import User # Import here to avoid circular import
216
+
217
+ r = requests.get(
218
+ f"{self.url}/api/v1/publication/users/ranked?public=true",
219
+ headers=HEADERS,
220
+ timeout=30,
221
+ )
222
+ r.raise_for_status()
223
+ authors = r.json()
224
+ return [User(author["handle"]) for author in authors]