substack-api 1.0.2__tar.gz → 1.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- substack_api-1.1.1/PKG-INFO +221 -0
- substack_api-1.1.1/README.md +208 -0
- {substack_api-1.0.2 → substack_api-1.1.1}/pyproject.toml +7 -4
- substack_api-1.1.1/substack_api/__init__.py +14 -0
- substack_api-1.1.1/substack_api/auth.py +106 -0
- {substack_api-1.0.2 → substack_api-1.1.1}/substack_api/newsletter.py +37 -12
- {substack_api-1.0.2 → substack_api-1.1.1}/substack_api/post.py +32 -3
- substack_api-1.1.1/substack_api/user.py +267 -0
- substack_api-1.1.1/substack_api.egg-info/PKG-INFO +221 -0
- {substack_api-1.0.2 → substack_api-1.1.1}/substack_api.egg-info/SOURCES.txt +4 -1
- substack_api-1.1.1/tests/test_auth.py +237 -0
- substack_api-1.1.1/tests/test_user_redirects.py +292 -0
- substack_api-1.0.2/PKG-INFO +0 -131
- substack_api-1.0.2/README.md +0 -122
- substack_api-1.0.2/substack_api/__init__.py +0 -6
- substack_api-1.0.2/substack_api/user.py +0 -136
- substack_api-1.0.2/substack_api.egg-info/PKG-INFO +0 -131
- {substack_api-1.0.2 → substack_api-1.1.1}/LICENSE +0 -0
- {substack_api-1.0.2 → substack_api-1.1.1}/setup.cfg +0 -0
- {substack_api-1.0.2 → substack_api-1.1.1}/substack_api/category.py +0 -0
- {substack_api-1.0.2 → substack_api-1.1.1}/substack_api.egg-info/dependency_links.txt +0 -0
- {substack_api-1.0.2 → substack_api-1.1.1}/substack_api.egg-info/requires.txt +0 -0
- {substack_api-1.0.2 → substack_api-1.1.1}/substack_api.egg-info/top_level.txt +0 -0
- {substack_api-1.0.2 → substack_api-1.1.1}/tests/test_category.py +0 -0
- {substack_api-1.0.2 → substack_api-1.1.1}/tests/test_newsletter.py +0 -0
- {substack_api-1.0.2 → substack_api-1.1.1}/tests/test_post.py +0 -0
- {substack_api-1.0.2 → substack_api-1.1.1}/tests/test_user.py +0 -0
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: substack-api
|
|
3
|
+
Version: 1.1.1
|
|
4
|
+
Summary: Unofficial wrapper for the Substack API
|
|
5
|
+
Project-URL: Homepage, https://github.com/nhagar/substack_api
|
|
6
|
+
Project-URL: Bug Tracker, https://github.com/nhagar/substack_api/issues
|
|
7
|
+
Project-URL: Documentation, https://nhagar.github.io/substack_api/
|
|
8
|
+
Requires-Python: >=3.12
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Requires-Dist: requests>=2.32.3
|
|
12
|
+
Dynamic: license-file
|
|
13
|
+
|
|
14
|
+
# Substack API
|
|
15
|
+
|
|
16
|
+
An unofficial Python client library for interacting with Substack newsletters and content.
|
|
17
|
+
|
|
18
|
+
## Overview
|
|
19
|
+
|
|
20
|
+
This library provides Python interfaces for interacting with Substack's unofficial API, allowing you to:
|
|
21
|
+
|
|
22
|
+
- Retrieve newsletter posts, podcasts, and recommendations
|
|
23
|
+
- Get user profile information and subscriptions
|
|
24
|
+
- Fetch post content and metadata
|
|
25
|
+
- Search for posts within newsletters
|
|
26
|
+
- Access paywalled content **that you have written or paid for** with user-provided authentication
|
|
27
|
+
|
|
28
|
+
## Installation
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
# Using pip
|
|
32
|
+
pip install substack-api
|
|
33
|
+
|
|
34
|
+
# Using poetry
|
|
35
|
+
poetry add substack-api
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Usage Examples
|
|
39
|
+
|
|
40
|
+
### Working with Newsletters
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
from substack_api import Newsletter
|
|
44
|
+
|
|
45
|
+
# Initialize a newsletter by its URL
|
|
46
|
+
newsletter = Newsletter("https://example.substack.com")
|
|
47
|
+
|
|
48
|
+
# Get recent posts (returns Post objects)
|
|
49
|
+
recent_posts = newsletter.get_posts(limit=5)
|
|
50
|
+
|
|
51
|
+
# Get posts sorted by popularity
|
|
52
|
+
top_posts = newsletter.get_posts(sorting="top", limit=10)
|
|
53
|
+
|
|
54
|
+
# Search for posts
|
|
55
|
+
search_results = newsletter.search_posts("machine learning", limit=3)
|
|
56
|
+
|
|
57
|
+
# Get podcast episodes
|
|
58
|
+
podcasts = newsletter.get_podcasts(limit=5)
|
|
59
|
+
|
|
60
|
+
# Get recommended newsletters
|
|
61
|
+
recommendations = newsletter.get_recommendations()
|
|
62
|
+
|
|
63
|
+
# Get newsletter authors
|
|
64
|
+
authors = newsletter.get_authors()
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### Working with Posts
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
from substack_api import Post
|
|
71
|
+
|
|
72
|
+
# Initialize a post by its URL
|
|
73
|
+
post = Post("https://example.substack.com/p/post-slug")
|
|
74
|
+
|
|
75
|
+
# Get post metadata
|
|
76
|
+
metadata = post.get_metadata()
|
|
77
|
+
|
|
78
|
+
# Get the post's HTML content
|
|
79
|
+
content = post.get_content()
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### Accessing Paywalled Content with Authentication
|
|
83
|
+
|
|
84
|
+
To access paywalled content, you need to provide your own session cookies from a logged-in Substack session:
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
from substack_api import Newsletter, Post, SubstackAuth
|
|
88
|
+
|
|
89
|
+
# Set up authentication with your cookies
|
|
90
|
+
auth = SubstackAuth(cookies_path="path/to/your/cookies.json")
|
|
91
|
+
|
|
92
|
+
# Use authentication with newsletters
|
|
93
|
+
newsletter = Newsletter("https://example.substack.com", auth=auth)
|
|
94
|
+
posts = newsletter.get_posts(limit=5) # Can now access paywalled posts
|
|
95
|
+
|
|
96
|
+
# Use authentication with individual posts
|
|
97
|
+
post = Post("https://example.substack.com/p/paywalled-post", auth=auth)
|
|
98
|
+
content = post.get_content() # Can now access paywalled content
|
|
99
|
+
|
|
100
|
+
# Check if a post is paywalled
|
|
101
|
+
if post.is_paywalled():
|
|
102
|
+
print("This post requires a subscription")
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
#### Getting Your Cookies
|
|
106
|
+
|
|
107
|
+
To access paywalled content, you need to export your browser cookies from a logged-in Substack session. The cookies should be in JSON format with the following structure:
|
|
108
|
+
|
|
109
|
+
```json
|
|
110
|
+
[
|
|
111
|
+
{
|
|
112
|
+
"name": "substack.sid",
|
|
113
|
+
"value": "your_session_id",
|
|
114
|
+
"domain": ".substack.com",
|
|
115
|
+
"path": "/",
|
|
116
|
+
"secure": true
|
|
117
|
+
},
|
|
118
|
+
{
|
|
119
|
+
"name": "substack.lli",
|
|
120
|
+
"value": "your_lli_value",
|
|
121
|
+
"domain": ".substack.com",
|
|
122
|
+
"path": "/",
|
|
123
|
+
"secure": true
|
|
124
|
+
},
|
|
125
|
+
...
|
|
126
|
+
]
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
**Important**: Only use your own cookies from your own authenticated session. **This feature is intended for users to access their own subscribed or authored content programmatically.**
|
|
130
|
+
|
|
131
|
+
### Working with Users
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
from substack_api import User
|
|
135
|
+
|
|
136
|
+
# Initialize a user by their username
|
|
137
|
+
user = User("username")
|
|
138
|
+
|
|
139
|
+
# Get user profile information
|
|
140
|
+
profile_data = user.get_raw_data()
|
|
141
|
+
|
|
142
|
+
# Get user ID and name
|
|
143
|
+
user_id = user.id
|
|
144
|
+
name = user.name
|
|
145
|
+
|
|
146
|
+
# Get user's subscriptions
|
|
147
|
+
subscriptions = user.get_subscriptions()
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
#### Handling Renamed Accounts
|
|
151
|
+
Substack allows users to change their handle (username) at any time. When this happens, the old API endpoints return 404 errors. This library automatically handles these redirects by default.
|
|
152
|
+
##### Automatic Redirect Handling
|
|
153
|
+
|
|
154
|
+
```python
|
|
155
|
+
from substack_api import User
|
|
156
|
+
|
|
157
|
+
# This will automatically follow redirects if the handle has changed
|
|
158
|
+
user = User("oldhandle") # Will find the user even if they renamed to "newhandle"
|
|
159
|
+
|
|
160
|
+
# Check if a redirect occurred
|
|
161
|
+
if user.was_redirected:
|
|
162
|
+
print(f"User was renamed from {user.original_username} to {user.username}")
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
##### Disable Redirect Following
|
|
166
|
+
|
|
167
|
+
If you prefer to handle 404s yourself:
|
|
168
|
+
|
|
169
|
+
```python
|
|
170
|
+
# Disable automatic redirect following
|
|
171
|
+
user = User("oldhandle", follow_redirects=False)
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
##### Manual Handle Resolution
|
|
175
|
+
|
|
176
|
+
You can also manually resolve handle redirects:
|
|
177
|
+
|
|
178
|
+
```python
|
|
179
|
+
from substack_api import resolve_handle_redirect
|
|
180
|
+
|
|
181
|
+
new_handle = resolve_handle_redirect("oldhandle")
|
|
182
|
+
if new_handle:
|
|
183
|
+
print(f"Handle was renamed to: {new_handle}")
|
|
184
|
+
```
|
|
185
|
+
## Limitations
|
|
186
|
+
|
|
187
|
+
- This is an unofficial library and not endorsed by Substack
|
|
188
|
+
- APIs may change without notice, potentially breaking functionality
|
|
189
|
+
- Rate limiting may be enforced by Substack
|
|
190
|
+
- **Authentication requires users to provide their own session cookies**
|
|
191
|
+
- **Users are responsible for complying with Substack's terms of service when using authentication features**
|
|
192
|
+
|
|
193
|
+
## Development
|
|
194
|
+
|
|
195
|
+
### Running Tests
|
|
196
|
+
|
|
197
|
+
```bash
|
|
198
|
+
# Install dev dependencies
|
|
199
|
+
pip install -e ".[dev]"
|
|
200
|
+
|
|
201
|
+
# Run tests
|
|
202
|
+
pytest
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
### Contributing
|
|
206
|
+
|
|
207
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
|
208
|
+
|
|
209
|
+
1. Fork the repository
|
|
210
|
+
2. Create your feature branch (`git checkout -b feature/amazing-feature`)
|
|
211
|
+
3. Commit your changes (`git commit -m 'Add some amazing feature'`)
|
|
212
|
+
4. Push to the branch (`git push origin feature/amazing-feature`)
|
|
213
|
+
5. Open a Pull Request
|
|
214
|
+
|
|
215
|
+
## License
|
|
216
|
+
|
|
217
|
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
|
218
|
+
|
|
219
|
+
## Disclaimer
|
|
220
|
+
|
|
221
|
+
This package is not affiliated with, endorsed by, or connected to Substack in any way. It is an independent project created to make Substack content more accessible through Python.
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
# Substack API
|
|
2
|
+
|
|
3
|
+
An unofficial Python client library for interacting with Substack newsletters and content.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
This library provides Python interfaces for interacting with Substack's unofficial API, allowing you to:
|
|
8
|
+
|
|
9
|
+
- Retrieve newsletter posts, podcasts, and recommendations
|
|
10
|
+
- Get user profile information and subscriptions
|
|
11
|
+
- Fetch post content and metadata
|
|
12
|
+
- Search for posts within newsletters
|
|
13
|
+
- Access paywalled content **that you have written or paid for** with user-provided authentication
|
|
14
|
+
|
|
15
|
+
## Installation
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
# Using pip
|
|
19
|
+
pip install substack-api
|
|
20
|
+
|
|
21
|
+
# Using poetry
|
|
22
|
+
poetry add substack-api
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Usage Examples
|
|
26
|
+
|
|
27
|
+
### Working with Newsletters
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
from substack_api import Newsletter
|
|
31
|
+
|
|
32
|
+
# Initialize a newsletter by its URL
|
|
33
|
+
newsletter = Newsletter("https://example.substack.com")
|
|
34
|
+
|
|
35
|
+
# Get recent posts (returns Post objects)
|
|
36
|
+
recent_posts = newsletter.get_posts(limit=5)
|
|
37
|
+
|
|
38
|
+
# Get posts sorted by popularity
|
|
39
|
+
top_posts = newsletter.get_posts(sorting="top", limit=10)
|
|
40
|
+
|
|
41
|
+
# Search for posts
|
|
42
|
+
search_results = newsletter.search_posts("machine learning", limit=3)
|
|
43
|
+
|
|
44
|
+
# Get podcast episodes
|
|
45
|
+
podcasts = newsletter.get_podcasts(limit=5)
|
|
46
|
+
|
|
47
|
+
# Get recommended newsletters
|
|
48
|
+
recommendations = newsletter.get_recommendations()
|
|
49
|
+
|
|
50
|
+
# Get newsletter authors
|
|
51
|
+
authors = newsletter.get_authors()
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### Working with Posts
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
from substack_api import Post
|
|
58
|
+
|
|
59
|
+
# Initialize a post by its URL
|
|
60
|
+
post = Post("https://example.substack.com/p/post-slug")
|
|
61
|
+
|
|
62
|
+
# Get post metadata
|
|
63
|
+
metadata = post.get_metadata()
|
|
64
|
+
|
|
65
|
+
# Get the post's HTML content
|
|
66
|
+
content = post.get_content()
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### Accessing Paywalled Content with Authentication
|
|
70
|
+
|
|
71
|
+
To access paywalled content, you need to provide your own session cookies from a logged-in Substack session:
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
from substack_api import Newsletter, Post, SubstackAuth
|
|
75
|
+
|
|
76
|
+
# Set up authentication with your cookies
|
|
77
|
+
auth = SubstackAuth(cookies_path="path/to/your/cookies.json")
|
|
78
|
+
|
|
79
|
+
# Use authentication with newsletters
|
|
80
|
+
newsletter = Newsletter("https://example.substack.com", auth=auth)
|
|
81
|
+
posts = newsletter.get_posts(limit=5) # Can now access paywalled posts
|
|
82
|
+
|
|
83
|
+
# Use authentication with individual posts
|
|
84
|
+
post = Post("https://example.substack.com/p/paywalled-post", auth=auth)
|
|
85
|
+
content = post.get_content() # Can now access paywalled content
|
|
86
|
+
|
|
87
|
+
# Check if a post is paywalled
|
|
88
|
+
if post.is_paywalled():
|
|
89
|
+
print("This post requires a subscription")
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
#### Getting Your Cookies
|
|
93
|
+
|
|
94
|
+
To access paywalled content, you need to export your browser cookies from a logged-in Substack session. The cookies should be in JSON format with the following structure:
|
|
95
|
+
|
|
96
|
+
```json
|
|
97
|
+
[
|
|
98
|
+
{
|
|
99
|
+
"name": "substack.sid",
|
|
100
|
+
"value": "your_session_id",
|
|
101
|
+
"domain": ".substack.com",
|
|
102
|
+
"path": "/",
|
|
103
|
+
"secure": true
|
|
104
|
+
},
|
|
105
|
+
{
|
|
106
|
+
"name": "substack.lli",
|
|
107
|
+
"value": "your_lli_value",
|
|
108
|
+
"domain": ".substack.com",
|
|
109
|
+
"path": "/",
|
|
110
|
+
"secure": true
|
|
111
|
+
},
|
|
112
|
+
...
|
|
113
|
+
]
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
**Important**: Only use your own cookies from your own authenticated session. **This feature is intended for users to access their own subscribed or authored content programmatically.**
|
|
117
|
+
|
|
118
|
+
### Working with Users
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
from substack_api import User
|
|
122
|
+
|
|
123
|
+
# Initialize a user by their username
|
|
124
|
+
user = User("username")
|
|
125
|
+
|
|
126
|
+
# Get user profile information
|
|
127
|
+
profile_data = user.get_raw_data()
|
|
128
|
+
|
|
129
|
+
# Get user ID and name
|
|
130
|
+
user_id = user.id
|
|
131
|
+
name = user.name
|
|
132
|
+
|
|
133
|
+
# Get user's subscriptions
|
|
134
|
+
subscriptions = user.get_subscriptions()
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
#### Handling Renamed Accounts
|
|
138
|
+
Substack allows users to change their handle (username) at any time. When this happens, the old API endpoints return 404 errors. This library automatically handles these redirects by default.
|
|
139
|
+
##### Automatic Redirect Handling
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
from substack_api import User
|
|
143
|
+
|
|
144
|
+
# This will automatically follow redirects if the handle has changed
|
|
145
|
+
user = User("oldhandle") # Will find the user even if they renamed to "newhandle"
|
|
146
|
+
|
|
147
|
+
# Check if a redirect occurred
|
|
148
|
+
if user.was_redirected:
|
|
149
|
+
print(f"User was renamed from {user.original_username} to {user.username}")
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
##### Disable Redirect Following
|
|
153
|
+
|
|
154
|
+
If you prefer to handle 404s yourself:
|
|
155
|
+
|
|
156
|
+
```python
|
|
157
|
+
# Disable automatic redirect following
|
|
158
|
+
user = User("oldhandle", follow_redirects=False)
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
##### Manual Handle Resolution
|
|
162
|
+
|
|
163
|
+
You can also manually resolve handle redirects:
|
|
164
|
+
|
|
165
|
+
```python
|
|
166
|
+
from substack_api import resolve_handle_redirect
|
|
167
|
+
|
|
168
|
+
new_handle = resolve_handle_redirect("oldhandle")
|
|
169
|
+
if new_handle:
|
|
170
|
+
print(f"Handle was renamed to: {new_handle}")
|
|
171
|
+
```
|
|
172
|
+
## Limitations
|
|
173
|
+
|
|
174
|
+
- This is an unofficial library and not endorsed by Substack
|
|
175
|
+
- APIs may change without notice, potentially breaking functionality
|
|
176
|
+
- Rate limiting may be enforced by Substack
|
|
177
|
+
- **Authentication requires users to provide their own session cookies**
|
|
178
|
+
- **Users are responsible for complying with Substack's terms of service when using authentication features**
|
|
179
|
+
|
|
180
|
+
## Development
|
|
181
|
+
|
|
182
|
+
### Running Tests
|
|
183
|
+
|
|
184
|
+
```bash
|
|
185
|
+
# Install dev dependencies
|
|
186
|
+
pip install -e ".[dev]"
|
|
187
|
+
|
|
188
|
+
# Run tests
|
|
189
|
+
pytest
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
### Contributing
|
|
193
|
+
|
|
194
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
|
195
|
+
|
|
196
|
+
1. Fork the repository
|
|
197
|
+
2. Create your feature branch (`git checkout -b feature/amazing-feature`)
|
|
198
|
+
3. Commit your changes (`git commit -m 'Add some amazing feature'`)
|
|
199
|
+
4. Push to the branch (`git push origin feature/amazing-feature`)
|
|
200
|
+
5. Open a Pull Request
|
|
201
|
+
|
|
202
|
+
## License
|
|
203
|
+
|
|
204
|
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
|
205
|
+
|
|
206
|
+
## Disclaimer
|
|
207
|
+
|
|
208
|
+
This package is not affiliated with, endorsed by, or connected to Substack in any way. It is an independent project created to make Substack content more accessible through Python.
|
|
@@ -1,12 +1,10 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "substack-api"
|
|
3
|
-
version = "1.
|
|
3
|
+
version = "1.1.1"
|
|
4
4
|
description = "Unofficial wrapper for the Substack API"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.12"
|
|
7
|
-
dependencies = [
|
|
8
|
-
"requests>=2.32.3",
|
|
9
|
-
]
|
|
7
|
+
dependencies = ["requests>=2.32.3"]
|
|
10
8
|
|
|
11
9
|
[dependency-groups]
|
|
12
10
|
dev = [
|
|
@@ -18,3 +16,8 @@ dev = [
|
|
|
18
16
|
"pytest>=8.3.4",
|
|
19
17
|
"ruff>=0.9.9",
|
|
20
18
|
]
|
|
19
|
+
|
|
20
|
+
[project.urls]
|
|
21
|
+
"Homepage" = "https://github.com/nhagar/substack_api"
|
|
22
|
+
"Bug Tracker" = "https://github.com/nhagar/substack_api/issues"
|
|
23
|
+
"Documentation" = "https://nhagar.github.io/substack_api/"
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from .auth import SubstackAuth
|
|
2
|
+
from .category import Category
|
|
3
|
+
from .newsletter import Newsletter
|
|
4
|
+
from .post import Post
|
|
5
|
+
from .user import User, resolve_handle_redirect
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"User",
|
|
9
|
+
"Post",
|
|
10
|
+
"Category",
|
|
11
|
+
"Newsletter",
|
|
12
|
+
"SubstackAuth",
|
|
13
|
+
"resolve_handle_redirect",
|
|
14
|
+
]
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
import requests
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class SubstackAuth:
|
|
8
|
+
"""Handles authentication for Substack API requests."""
|
|
9
|
+
|
|
10
|
+
def __init__(
|
|
11
|
+
self,
|
|
12
|
+
cookies_path: str,
|
|
13
|
+
):
|
|
14
|
+
"""
|
|
15
|
+
Initialize authentication handler.
|
|
16
|
+
|
|
17
|
+
Parameters
|
|
18
|
+
----------
|
|
19
|
+
cookies_path : str, optional
|
|
20
|
+
Path to retrieve session cookies from
|
|
21
|
+
"""
|
|
22
|
+
self.cookies_path = cookies_path
|
|
23
|
+
self.session = requests.Session()
|
|
24
|
+
self.authenticated = False
|
|
25
|
+
|
|
26
|
+
# Set default headers
|
|
27
|
+
self.session.headers.update(
|
|
28
|
+
{
|
|
29
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36",
|
|
30
|
+
"Accept": "application/json",
|
|
31
|
+
"Content-Type": "application/json",
|
|
32
|
+
}
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# Try to load existing cookies
|
|
36
|
+
if os.path.exists(self.cookies_path):
|
|
37
|
+
self.load_cookies()
|
|
38
|
+
self.authenticated = True
|
|
39
|
+
else:
|
|
40
|
+
print(f"Cookies file not found at {self.cookies_path}. Please log in.")
|
|
41
|
+
self.authenticated = False
|
|
42
|
+
self.session.cookies.clear()
|
|
43
|
+
|
|
44
|
+
def load_cookies(self) -> bool:
|
|
45
|
+
"""
|
|
46
|
+
Load cookies from file.
|
|
47
|
+
|
|
48
|
+
Returns
|
|
49
|
+
-------
|
|
50
|
+
bool
|
|
51
|
+
True if cookies loaded successfully
|
|
52
|
+
"""
|
|
53
|
+
try:
|
|
54
|
+
with open(self.cookies_path, "r") as f:
|
|
55
|
+
cookies = json.load(f)
|
|
56
|
+
|
|
57
|
+
for cookie in cookies:
|
|
58
|
+
self.session.cookies.set(
|
|
59
|
+
cookie["name"],
|
|
60
|
+
cookie["value"],
|
|
61
|
+
domain=cookie.get("domain"),
|
|
62
|
+
path=cookie.get("path", "/"),
|
|
63
|
+
secure=cookie.get("secure", False),
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
return True
|
|
67
|
+
|
|
68
|
+
except Exception as e:
|
|
69
|
+
print(f"Failed to load cookies: {str(e)}")
|
|
70
|
+
return False
|
|
71
|
+
|
|
72
|
+
def get(self, url: str, **kwargs) -> requests.Response:
|
|
73
|
+
"""
|
|
74
|
+
Make authenticated GET request.
|
|
75
|
+
|
|
76
|
+
Parameters
|
|
77
|
+
----------
|
|
78
|
+
url : str
|
|
79
|
+
URL to request
|
|
80
|
+
**kwargs
|
|
81
|
+
Additional arguments to pass to requests.get
|
|
82
|
+
|
|
83
|
+
Returns
|
|
84
|
+
-------
|
|
85
|
+
requests.Response
|
|
86
|
+
Response object
|
|
87
|
+
"""
|
|
88
|
+
return self.session.get(url, **kwargs)
|
|
89
|
+
|
|
90
|
+
def post(self, url: str, **kwargs) -> requests.Response:
|
|
91
|
+
"""
|
|
92
|
+
Make authenticated POST request.
|
|
93
|
+
|
|
94
|
+
Parameters
|
|
95
|
+
----------
|
|
96
|
+
url : str
|
|
97
|
+
URL to request
|
|
98
|
+
**kwargs
|
|
99
|
+
Additional arguments to pass to requests.post
|
|
100
|
+
|
|
101
|
+
Returns
|
|
102
|
+
-------
|
|
103
|
+
requests.Response
|
|
104
|
+
Response object
|
|
105
|
+
"""
|
|
106
|
+
return self.session.post(url, **kwargs)
|
|
@@ -3,6 +3,8 @@ from typing import Any, Dict, List, Optional
|
|
|
3
3
|
|
|
4
4
|
import requests
|
|
5
5
|
|
|
6
|
+
from substack_api.auth import SubstackAuth
|
|
7
|
+
|
|
6
8
|
HEADERS = {
|
|
7
9
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36"
|
|
8
10
|
}
|
|
@@ -13,7 +15,7 @@ class Newsletter:
|
|
|
13
15
|
Newsletter class for interacting with Substack newsletters
|
|
14
16
|
"""
|
|
15
17
|
|
|
16
|
-
def __init__(self, url: str) -> None:
|
|
18
|
+
def __init__(self, url: str, auth: Optional[SubstackAuth] = None) -> None:
|
|
17
19
|
"""
|
|
18
20
|
Initialize a Newsletter object.
|
|
19
21
|
|
|
@@ -21,8 +23,11 @@ class Newsletter:
|
|
|
21
23
|
----------
|
|
22
24
|
url : str
|
|
23
25
|
The URL of the Substack newsletter
|
|
26
|
+
auth : Optional[SubstackAuth]
|
|
27
|
+
Authentication handler for accessing paywalled content
|
|
24
28
|
"""
|
|
25
29
|
self.url = url
|
|
30
|
+
self.auth = auth
|
|
26
31
|
|
|
27
32
|
def __str__(self) -> str:
|
|
28
33
|
return f"Newsletter: {self.url}"
|
|
@@ -30,6 +35,27 @@ class Newsletter:
|
|
|
30
35
|
def __repr__(self) -> str:
|
|
31
36
|
return f"Newsletter(url={self.url})"
|
|
32
37
|
|
|
38
|
+
def _make_request(self, endpoint: str, **kwargs) -> requests.Response:
|
|
39
|
+
"""
|
|
40
|
+
Make a GET request to the specified endpoint with authentication if needed.
|
|
41
|
+
|
|
42
|
+
Parameters
|
|
43
|
+
----------
|
|
44
|
+
endpoint : str
|
|
45
|
+
The API endpoint to request
|
|
46
|
+
**kwargs : Any
|
|
47
|
+
Additional parameters for the request
|
|
48
|
+
|
|
49
|
+
Returns
|
|
50
|
+
-------
|
|
51
|
+
requests.Response
|
|
52
|
+
The response object from the request
|
|
53
|
+
"""
|
|
54
|
+
if self.auth and self.auth.authenticated:
|
|
55
|
+
return self.auth.get(endpoint, **kwargs)
|
|
56
|
+
else:
|
|
57
|
+
return requests.get(endpoint, headers=HEADERS, **kwargs)
|
|
58
|
+
|
|
33
59
|
def _fetch_paginated_posts(
|
|
34
60
|
self, params: Dict[str, str], limit: Optional[int] = None, page_size: int = 15
|
|
35
61
|
) -> List[Dict[str, Any]]:
|
|
@@ -65,7 +91,7 @@ class Newsletter:
|
|
|
65
91
|
endpoint = f"{self.url}/api/v1/archive?{query_string}"
|
|
66
92
|
|
|
67
93
|
# Make the request
|
|
68
|
-
response =
|
|
94
|
+
response = self._make_request(endpoint, timeout=30)
|
|
69
95
|
|
|
70
96
|
if response.status_code != 200:
|
|
71
97
|
break
|
|
@@ -115,7 +141,7 @@ class Newsletter:
|
|
|
115
141
|
|
|
116
142
|
params = {"sort": sorting}
|
|
117
143
|
post_data = self._fetch_paginated_posts(params, limit)
|
|
118
|
-
return [Post(item["canonical_url"]) for item in post_data]
|
|
144
|
+
return [Post(item["canonical_url"], auth=self.auth) for item in post_data]
|
|
119
145
|
|
|
120
146
|
def search_posts(self, query: str, limit: Optional[int] = None) -> List:
|
|
121
147
|
"""
|
|
@@ -137,7 +163,7 @@ class Newsletter:
|
|
|
137
163
|
|
|
138
164
|
params = {"sort": "new", "search": query}
|
|
139
165
|
post_data = self._fetch_paginated_posts(params, limit)
|
|
140
|
-
return [Post(item["canonical_url"]) for item in post_data]
|
|
166
|
+
return [Post(item["canonical_url"], auth=self.auth) for item in post_data]
|
|
141
167
|
|
|
142
168
|
def get_podcasts(self, limit: Optional[int] = None) -> List:
|
|
143
169
|
"""
|
|
@@ -157,7 +183,7 @@ class Newsletter:
|
|
|
157
183
|
|
|
158
184
|
params = {"sort": "new", "type": "podcast"}
|
|
159
185
|
post_data = self._fetch_paginated_posts(params, limit)
|
|
160
|
-
return [Post(item["canonical_url"]) for item in post_data]
|
|
186
|
+
return [Post(item["canonical_url"], auth=self.auth) for item in post_data]
|
|
161
187
|
|
|
162
188
|
def get_recommendations(self) -> List["Newsletter"]:
|
|
163
189
|
"""
|
|
@@ -177,7 +203,7 @@ class Newsletter:
|
|
|
177
203
|
|
|
178
204
|
# Now get the recommendations
|
|
179
205
|
endpoint = f"{self.url}/api/v1/recommendations/from/{publication_id}"
|
|
180
|
-
response =
|
|
206
|
+
response = self._make_request(endpoint, timeout=30)
|
|
181
207
|
|
|
182
208
|
if response.status_code != 200:
|
|
183
209
|
return []
|
|
@@ -199,7 +225,9 @@ class Newsletter:
|
|
|
199
225
|
# Avoid circular import
|
|
200
226
|
from .newsletter import Newsletter
|
|
201
227
|
|
|
202
|
-
result = [
|
|
228
|
+
result = [
|
|
229
|
+
Newsletter(url, auth=self.auth) for url in recommended_newsletter_urls
|
|
230
|
+
]
|
|
203
231
|
|
|
204
232
|
return result
|
|
205
233
|
|
|
@@ -214,11 +242,8 @@ class Newsletter:
|
|
|
214
242
|
"""
|
|
215
243
|
from .user import User # Import here to avoid circular import
|
|
216
244
|
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
headers=HEADERS,
|
|
220
|
-
timeout=30,
|
|
221
|
-
)
|
|
245
|
+
endpoint = f"{self.url}/api/v1/publication/users/ranked?public=true"
|
|
246
|
+
r = self._make_request(endpoint, timeout=30)
|
|
222
247
|
r.raise_for_status()
|
|
223
248
|
authors = r.json()
|
|
224
249
|
return [User(author["handle"]) for author in authors]
|