substack-api 1.0.2__tar.gz → 1.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. substack_api-1.1.2/PKG-INFO +221 -0
  2. substack_api-1.1.2/README.md +208 -0
  3. {substack_api-1.0.2 → substack_api-1.1.2}/pyproject.toml +7 -4
  4. substack_api-1.1.2/substack_api/__init__.py +15 -0
  5. substack_api-1.1.2/substack_api/auth.py +106 -0
  6. {substack_api-1.0.2 → substack_api-1.1.2}/substack_api/category.py +2 -0
  7. {substack_api-1.0.2 → substack_api-1.1.2}/substack_api/newsletter.py +42 -21
  8. {substack_api-1.0.2 → substack_api-1.1.2}/substack_api/post.py +32 -3
  9. substack_api-1.1.2/substack_api/user.py +267 -0
  10. substack_api-1.1.2/substack_api.egg-info/PKG-INFO +221 -0
  11. {substack_api-1.0.2 → substack_api-1.1.2}/substack_api.egg-info/SOURCES.txt +4 -1
  12. substack_api-1.1.2/tests/test_auth.py +237 -0
  13. substack_api-1.1.2/tests/test_user_redirects.py +292 -0
  14. substack_api-1.0.2/PKG-INFO +0 -131
  15. substack_api-1.0.2/README.md +0 -122
  16. substack_api-1.0.2/substack_api/__init__.py +0 -6
  17. substack_api-1.0.2/substack_api/user.py +0 -136
  18. substack_api-1.0.2/substack_api.egg-info/PKG-INFO +0 -131
  19. {substack_api-1.0.2 → substack_api-1.1.2}/LICENSE +0 -0
  20. {substack_api-1.0.2 → substack_api-1.1.2}/setup.cfg +0 -0
  21. {substack_api-1.0.2 → substack_api-1.1.2}/substack_api.egg-info/dependency_links.txt +0 -0
  22. {substack_api-1.0.2 → substack_api-1.1.2}/substack_api.egg-info/requires.txt +0 -0
  23. {substack_api-1.0.2 → substack_api-1.1.2}/substack_api.egg-info/top_level.txt +0 -0
  24. {substack_api-1.0.2 → substack_api-1.1.2}/tests/test_category.py +0 -0
  25. {substack_api-1.0.2 → substack_api-1.1.2}/tests/test_newsletter.py +0 -0
  26. {substack_api-1.0.2 → substack_api-1.1.2}/tests/test_post.py +0 -0
  27. {substack_api-1.0.2 → substack_api-1.1.2}/tests/test_user.py +0 -0
@@ -0,0 +1,221 @@
1
+ Metadata-Version: 2.4
2
+ Name: substack-api
3
+ Version: 1.1.2
4
+ Summary: Unofficial wrapper for the Substack API
5
+ Project-URL: Homepage, https://github.com/nhagar/substack_api
6
+ Project-URL: Bug Tracker, https://github.com/nhagar/substack_api/issues
7
+ Project-URL: Documentation, https://nhagar.github.io/substack_api/
8
+ Requires-Python: >=3.12
9
+ Description-Content-Type: text/markdown
10
+ License-File: LICENSE
11
+ Requires-Dist: requests>=2.32.3
12
+ Dynamic: license-file
13
+
14
+ # Substack API
15
+
16
+ An unofficial Python client library for interacting with Substack newsletters and content.
17
+
18
+ ## Overview
19
+
20
+ This library provides Python interfaces for interacting with Substack's unofficial API, allowing you to:
21
+
22
+ - Retrieve newsletter posts, podcasts, and recommendations
23
+ - Get user profile information and subscriptions
24
+ - Fetch post content and metadata
25
+ - Search for posts within newsletters
26
+ - Access paywalled content **that you have written or paid for** with user-provided authentication
27
+
28
+ ## Installation
29
+
30
+ ```bash
31
+ # Using pip
32
+ pip install substack-api
33
+
34
+ # Using poetry
35
+ poetry add substack-api
36
+ ```
37
+
38
+ ## Usage Examples
39
+
40
+ ### Working with Newsletters
41
+
42
+ ```python
43
+ from substack_api import Newsletter
44
+
45
+ # Initialize a newsletter by its URL
46
+ newsletter = Newsletter("https://example.substack.com")
47
+
48
+ # Get recent posts (returns Post objects)
49
+ recent_posts = newsletter.get_posts(limit=5)
50
+
51
+ # Get posts sorted by popularity
52
+ top_posts = newsletter.get_posts(sorting="top", limit=10)
53
+
54
+ # Search for posts
55
+ search_results = newsletter.search_posts("machine learning", limit=3)
56
+
57
+ # Get podcast episodes
58
+ podcasts = newsletter.get_podcasts(limit=5)
59
+
60
+ # Get recommended newsletters
61
+ recommendations = newsletter.get_recommendations()
62
+
63
+ # Get newsletter authors
64
+ authors = newsletter.get_authors()
65
+ ```
66
+
67
+ ### Working with Posts
68
+
69
+ ```python
70
+ from substack_api import Post
71
+
72
+ # Initialize a post by its URL
73
+ post = Post("https://example.substack.com/p/post-slug")
74
+
75
+ # Get post metadata
76
+ metadata = post.get_metadata()
77
+
78
+ # Get the post's HTML content
79
+ content = post.get_content()
80
+ ```
81
+
82
+ ### Accessing Paywalled Content with Authentication
83
+
84
+ To access paywalled content, you need to provide your own session cookies from a logged-in Substack session:
85
+
86
+ ```python
87
+ from substack_api import Newsletter, Post, SubstackAuth
88
+
89
+ # Set up authentication with your cookies
90
+ auth = SubstackAuth(cookies_path="path/to/your/cookies.json")
91
+
92
+ # Use authentication with newsletters
93
+ newsletter = Newsletter("https://example.substack.com", auth=auth)
94
+ posts = newsletter.get_posts(limit=5) # Can now access paywalled posts
95
+
96
+ # Use authentication with individual posts
97
+ post = Post("https://example.substack.com/p/paywalled-post", auth=auth)
98
+ content = post.get_content() # Can now access paywalled content
99
+
100
+ # Check if a post is paywalled
101
+ if post.is_paywalled():
102
+ print("This post requires a subscription")
103
+ ```
104
+
105
+ #### Getting Your Cookies
106
+
107
+ To access paywalled content, you need to export your browser cookies from a logged-in Substack session. The cookies should be in JSON format with the following structure:
108
+
109
+ ```json
110
+ [
111
+ {
112
+ "name": "substack.sid",
113
+ "value": "your_session_id",
114
+ "domain": ".substack.com",
115
+ "path": "/",
116
+ "secure": true
117
+ },
118
+ {
119
+ "name": "substack.lli",
120
+ "value": "your_lli_value",
121
+ "domain": ".substack.com",
122
+ "path": "/",
123
+ "secure": true
124
+ },
125
+ ...
126
+ ]
127
+ ```
128
+
129
+ **Important**: Only use your own cookies from your own authenticated session. **This feature is intended for users to access their own subscribed or authored content programmatically.**
130
+
131
+ ### Working with Users
132
+
133
+ ```python
134
+ from substack_api import User
135
+
136
+ # Initialize a user by their username
137
+ user = User("username")
138
+
139
+ # Get user profile information
140
+ profile_data = user.get_raw_data()
141
+
142
+ # Get user ID and name
143
+ user_id = user.id
144
+ name = user.name
145
+
146
+ # Get user's subscriptions
147
+ subscriptions = user.get_subscriptions()
148
+ ```
149
+
150
+ #### Handling Renamed Accounts
151
+ Substack allows users to change their handle (username) at any time. When this happens, the old API endpoints return 404 errors. This library automatically handles these redirects by default.
152
+ ##### Automatic Redirect Handling
153
+
154
+ ```python
155
+ from substack_api import User
156
+
157
+ # This will automatically follow redirects if the handle has changed
158
+ user = User("oldhandle") # Will find the user even if they renamed to "newhandle"
159
+
160
+ # Check if a redirect occurred
161
+ if user.was_redirected:
162
+ print(f"User was renamed from {user.original_username} to {user.username}")
163
+ ```
164
+
165
+ ##### Disable Redirect Following
166
+
167
+ If you prefer to handle 404s yourself:
168
+
169
+ ```python
170
+ # Disable automatic redirect following
171
+ user = User("oldhandle", follow_redirects=False)
172
+ ```
173
+
174
+ ##### Manual Handle Resolution
175
+
176
+ You can also manually resolve handle redirects:
177
+
178
+ ```python
179
+ from substack_api import resolve_handle_redirect
180
+
181
+ new_handle = resolve_handle_redirect("oldhandle")
182
+ if new_handle:
183
+ print(f"Handle was renamed to: {new_handle}")
184
+ ```
185
+ ## Limitations
186
+
187
+ - This is an unofficial library and not endorsed by Substack
188
+ - APIs may change without notice, potentially breaking functionality
189
+ - Rate limiting may be enforced by Substack
190
+ - **Authentication requires users to provide their own session cookies**
191
+ - **Users are responsible for complying with Substack's terms of service when using authentication features**
192
+
193
+ ## Development
194
+
195
+ ### Running Tests
196
+
197
+ ```bash
198
+ # Install dev dependencies
199
+ pip install -e ".[dev]"
200
+
201
+ # Run tests
202
+ pytest
203
+ ```
204
+
205
+ ### Contributing
206
+
207
+ Contributions are welcome! Please feel free to submit a Pull Request.
208
+
209
+ 1. Fork the repository
210
+ 2. Create your feature branch (`git checkout -b feature/amazing-feature`)
211
+ 3. Commit your changes (`git commit -m 'Add some amazing feature'`)
212
+ 4. Push to the branch (`git push origin feature/amazing-feature`)
213
+ 5. Open a Pull Request
214
+
215
+ ## License
216
+
217
+ This project is licensed under the MIT License - see the LICENSE file for details.
218
+
219
+ ## Disclaimer
220
+
221
+ This package is not affiliated with, endorsed by, or connected to Substack in any way. It is an independent project created to make Substack content more accessible through Python.
@@ -0,0 +1,208 @@
1
+ # Substack API
2
+
3
+ An unofficial Python client library for interacting with Substack newsletters and content.
4
+
5
+ ## Overview
6
+
7
+ This library provides Python interfaces for interacting with Substack's unofficial API, allowing you to:
8
+
9
+ - Retrieve newsletter posts, podcasts, and recommendations
10
+ - Get user profile information and subscriptions
11
+ - Fetch post content and metadata
12
+ - Search for posts within newsletters
13
+ - Access paywalled content **that you have written or paid for** with user-provided authentication
14
+
15
+ ## Installation
16
+
17
+ ```bash
18
+ # Using pip
19
+ pip install substack-api
20
+
21
+ # Using poetry
22
+ poetry add substack-api
23
+ ```
24
+
25
+ ## Usage Examples
26
+
27
+ ### Working with Newsletters
28
+
29
+ ```python
30
+ from substack_api import Newsletter
31
+
32
+ # Initialize a newsletter by its URL
33
+ newsletter = Newsletter("https://example.substack.com")
34
+
35
+ # Get recent posts (returns Post objects)
36
+ recent_posts = newsletter.get_posts(limit=5)
37
+
38
+ # Get posts sorted by popularity
39
+ top_posts = newsletter.get_posts(sorting="top", limit=10)
40
+
41
+ # Search for posts
42
+ search_results = newsletter.search_posts("machine learning", limit=3)
43
+
44
+ # Get podcast episodes
45
+ podcasts = newsletter.get_podcasts(limit=5)
46
+
47
+ # Get recommended newsletters
48
+ recommendations = newsletter.get_recommendations()
49
+
50
+ # Get newsletter authors
51
+ authors = newsletter.get_authors()
52
+ ```
53
+
54
+ ### Working with Posts
55
+
56
+ ```python
57
+ from substack_api import Post
58
+
59
+ # Initialize a post by its URL
60
+ post = Post("https://example.substack.com/p/post-slug")
61
+
62
+ # Get post metadata
63
+ metadata = post.get_metadata()
64
+
65
+ # Get the post's HTML content
66
+ content = post.get_content()
67
+ ```
68
+
69
+ ### Accessing Paywalled Content with Authentication
70
+
71
+ To access paywalled content, you need to provide your own session cookies from a logged-in Substack session:
72
+
73
+ ```python
74
+ from substack_api import Newsletter, Post, SubstackAuth
75
+
76
+ # Set up authentication with your cookies
77
+ auth = SubstackAuth(cookies_path="path/to/your/cookies.json")
78
+
79
+ # Use authentication with newsletters
80
+ newsletter = Newsletter("https://example.substack.com", auth=auth)
81
+ posts = newsletter.get_posts(limit=5) # Can now access paywalled posts
82
+
83
+ # Use authentication with individual posts
84
+ post = Post("https://example.substack.com/p/paywalled-post", auth=auth)
85
+ content = post.get_content() # Can now access paywalled content
86
+
87
+ # Check if a post is paywalled
88
+ if post.is_paywalled():
89
+ print("This post requires a subscription")
90
+ ```
91
+
92
+ #### Getting Your Cookies
93
+
94
+ To access paywalled content, you need to export your browser cookies from a logged-in Substack session. The cookies should be in JSON format with the following structure:
95
+
96
+ ```json
97
+ [
98
+ {
99
+ "name": "substack.sid",
100
+ "value": "your_session_id",
101
+ "domain": ".substack.com",
102
+ "path": "/",
103
+ "secure": true
104
+ },
105
+ {
106
+ "name": "substack.lli",
107
+ "value": "your_lli_value",
108
+ "domain": ".substack.com",
109
+ "path": "/",
110
+ "secure": true
111
+ },
112
+ ...
113
+ ]
114
+ ```
115
+
116
+ **Important**: Only use your own cookies from your own authenticated session. **This feature is intended for users to access their own subscribed or authored content programmatically.**
117
+
118
+ ### Working with Users
119
+
120
+ ```python
121
+ from substack_api import User
122
+
123
+ # Initialize a user by their username
124
+ user = User("username")
125
+
126
+ # Get user profile information
127
+ profile_data = user.get_raw_data()
128
+
129
+ # Get user ID and name
130
+ user_id = user.id
131
+ name = user.name
132
+
133
+ # Get user's subscriptions
134
+ subscriptions = user.get_subscriptions()
135
+ ```
136
+
137
+ #### Handling Renamed Accounts
138
+ Substack allows users to change their handle (username) at any time. When this happens, the old API endpoints return 404 errors. This library automatically handles these redirects by default.
139
+ ##### Automatic Redirect Handling
140
+
141
+ ```python
142
+ from substack_api import User
143
+
144
+ # This will automatically follow redirects if the handle has changed
145
+ user = User("oldhandle") # Will find the user even if they renamed to "newhandle"
146
+
147
+ # Check if a redirect occurred
148
+ if user.was_redirected:
149
+ print(f"User was renamed from {user.original_username} to {user.username}")
150
+ ```
151
+
152
+ ##### Disable Redirect Following
153
+
154
+ If you prefer to handle 404s yourself:
155
+
156
+ ```python
157
+ # Disable automatic redirect following
158
+ user = User("oldhandle", follow_redirects=False)
159
+ ```
160
+
161
+ ##### Manual Handle Resolution
162
+
163
+ You can also manually resolve handle redirects:
164
+
165
+ ```python
166
+ from substack_api import resolve_handle_redirect
167
+
168
+ new_handle = resolve_handle_redirect("oldhandle")
169
+ if new_handle:
170
+ print(f"Handle was renamed to: {new_handle}")
171
+ ```
172
+ ## Limitations
173
+
174
+ - This is an unofficial library and not endorsed by Substack
175
+ - APIs may change without notice, potentially breaking functionality
176
+ - Rate limiting may be enforced by Substack
177
+ - **Authentication requires users to provide their own session cookies**
178
+ - **Users are responsible for complying with Substack's terms of service when using authentication features**
179
+
180
+ ## Development
181
+
182
+ ### Running Tests
183
+
184
+ ```bash
185
+ # Install dev dependencies
186
+ pip install -e ".[dev]"
187
+
188
+ # Run tests
189
+ pytest
190
+ ```
191
+
192
+ ### Contributing
193
+
194
+ Contributions are welcome! Please feel free to submit a Pull Request.
195
+
196
+ 1. Fork the repository
197
+ 2. Create your feature branch (`git checkout -b feature/amazing-feature`)
198
+ 3. Commit your changes (`git commit -m 'Add some amazing feature'`)
199
+ 4. Push to the branch (`git push origin feature/amazing-feature`)
200
+ 5. Open a Pull Request
201
+
202
+ ## License
203
+
204
+ This project is licensed under the MIT License - see the LICENSE file for details.
205
+
206
+ ## Disclaimer
207
+
208
+ This package is not affiliated with, endorsed by, or connected to Substack in any way. It is an independent project created to make Substack content more accessible through Python.
@@ -1,12 +1,10 @@
1
1
  [project]
2
2
  name = "substack-api"
3
- version = "1.0.2"
3
+ version = "1.1.2"
4
4
  description = "Unofficial wrapper for the Substack API"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12"
7
- dependencies = [
8
- "requests>=2.32.3",
9
- ]
7
+ dependencies = ["requests>=2.32.3"]
10
8
 
11
9
  [dependency-groups]
12
10
  dev = [
@@ -18,3 +16,8 @@ dev = [
18
16
  "pytest>=8.3.4",
19
17
  "ruff>=0.9.9",
20
18
  ]
19
+
20
+ [project.urls]
21
+ "Homepage" = "https://github.com/nhagar/substack_api"
22
+ "Bug Tracker" = "https://github.com/nhagar/substack_api/issues"
23
+ "Documentation" = "https://nhagar.github.io/substack_api/"
@@ -0,0 +1,15 @@
1
+ from .auth import SubstackAuth
2
+ from .category import Category, list_all_categories
3
+ from .newsletter import Newsletter
4
+ from .post import Post
5
+ from .user import User, resolve_handle_redirect
6
+
7
+ __all__ = [
8
+ "User",
9
+ "Post",
10
+ "Category",
11
+ "Newsletter",
12
+ "SubstackAuth",
13
+ "resolve_handle_redirect",
14
+ "list_all_categories",
15
+ ]
@@ -0,0 +1,106 @@
1
+ import json
2
+ import os
3
+
4
+ import requests
5
+
6
+
7
+ class SubstackAuth:
8
+ """Handles authentication for Substack API requests."""
9
+
10
+ def __init__(
11
+ self,
12
+ cookies_path: str,
13
+ ):
14
+ """
15
+ Initialize authentication handler.
16
+
17
+ Parameters
18
+ ----------
19
+ cookies_path : str, optional
20
+ Path to retrieve session cookies from
21
+ """
22
+ self.cookies_path = cookies_path
23
+ self.session = requests.Session()
24
+ self.authenticated = False
25
+
26
+ # Set default headers
27
+ self.session.headers.update(
28
+ {
29
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36",
30
+ "Accept": "application/json",
31
+ "Content-Type": "application/json",
32
+ }
33
+ )
34
+
35
+ # Try to load existing cookies
36
+ if os.path.exists(self.cookies_path):
37
+ self.load_cookies()
38
+ self.authenticated = True
39
+ else:
40
+ print(f"Cookies file not found at {self.cookies_path}. Please log in.")
41
+ self.authenticated = False
42
+ self.session.cookies.clear()
43
+
44
+ def load_cookies(self) -> bool:
45
+ """
46
+ Load cookies from file.
47
+
48
+ Returns
49
+ -------
50
+ bool
51
+ True if cookies loaded successfully
52
+ """
53
+ try:
54
+ with open(self.cookies_path, "r") as f:
55
+ cookies = json.load(f)
56
+
57
+ for cookie in cookies:
58
+ self.session.cookies.set(
59
+ cookie["name"],
60
+ cookie["value"],
61
+ domain=cookie.get("domain"),
62
+ path=cookie.get("path", "/"),
63
+ secure=cookie.get("secure", False),
64
+ )
65
+
66
+ return True
67
+
68
+ except Exception as e:
69
+ print(f"Failed to load cookies: {str(e)}")
70
+ return False
71
+
72
+ def get(self, url: str, **kwargs) -> requests.Response:
73
+ """
74
+ Make authenticated GET request.
75
+
76
+ Parameters
77
+ ----------
78
+ url : str
79
+ URL to request
80
+ **kwargs
81
+ Additional arguments to pass to requests.get
82
+
83
+ Returns
84
+ -------
85
+ requests.Response
86
+ Response object
87
+ """
88
+ return self.session.get(url, **kwargs)
89
+
90
+ def post(self, url: str, **kwargs) -> requests.Response:
91
+ """
92
+ Make authenticated POST request.
93
+
94
+ Parameters
95
+ ----------
96
+ url : str
97
+ URL to request
98
+ **kwargs
99
+ Additional arguments to pass to requests.post
100
+
101
+ Returns
102
+ -------
103
+ requests.Response
104
+ Response object
105
+ """
106
+ return self.session.post(url, **kwargs)
@@ -1,3 +1,4 @@
1
+ from time import sleep
1
2
  from typing import Any, Dict, List, Optional, Tuple
2
3
 
3
4
  import requests
@@ -127,6 +128,7 @@ class Category:
127
128
  full_url = endpoint + str(page_num)
128
129
  r = requests.get(full_url, headers=HEADERS, timeout=30)
129
130
  r.raise_for_status()
131
+ sleep(2) # Be polite to the server
130
132
 
131
133
  resp = r.json()
132
134
  newsletters = resp["publications"]