substack-api 1.0.2__tar.gz → 1.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- substack_api-1.1.2/PKG-INFO +221 -0
- substack_api-1.1.2/README.md +208 -0
- {substack_api-1.0.2 → substack_api-1.1.2}/pyproject.toml +7 -4
- substack_api-1.1.2/substack_api/__init__.py +15 -0
- substack_api-1.1.2/substack_api/auth.py +106 -0
- {substack_api-1.0.2 → substack_api-1.1.2}/substack_api/category.py +2 -0
- {substack_api-1.0.2 → substack_api-1.1.2}/substack_api/newsletter.py +42 -21
- {substack_api-1.0.2 → substack_api-1.1.2}/substack_api/post.py +32 -3
- substack_api-1.1.2/substack_api/user.py +267 -0
- substack_api-1.1.2/substack_api.egg-info/PKG-INFO +221 -0
- {substack_api-1.0.2 → substack_api-1.1.2}/substack_api.egg-info/SOURCES.txt +4 -1
- substack_api-1.1.2/tests/test_auth.py +237 -0
- substack_api-1.1.2/tests/test_user_redirects.py +292 -0
- substack_api-1.0.2/PKG-INFO +0 -131
- substack_api-1.0.2/README.md +0 -122
- substack_api-1.0.2/substack_api/__init__.py +0 -6
- substack_api-1.0.2/substack_api/user.py +0 -136
- substack_api-1.0.2/substack_api.egg-info/PKG-INFO +0 -131
- {substack_api-1.0.2 → substack_api-1.1.2}/LICENSE +0 -0
- {substack_api-1.0.2 → substack_api-1.1.2}/setup.cfg +0 -0
- {substack_api-1.0.2 → substack_api-1.1.2}/substack_api.egg-info/dependency_links.txt +0 -0
- {substack_api-1.0.2 → substack_api-1.1.2}/substack_api.egg-info/requires.txt +0 -0
- {substack_api-1.0.2 → substack_api-1.1.2}/substack_api.egg-info/top_level.txt +0 -0
- {substack_api-1.0.2 → substack_api-1.1.2}/tests/test_category.py +0 -0
- {substack_api-1.0.2 → substack_api-1.1.2}/tests/test_newsletter.py +0 -0
- {substack_api-1.0.2 → substack_api-1.1.2}/tests/test_post.py +0 -0
- {substack_api-1.0.2 → substack_api-1.1.2}/tests/test_user.py +0 -0
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: substack-api
|
|
3
|
+
Version: 1.1.2
|
|
4
|
+
Summary: Unofficial wrapper for the Substack API
|
|
5
|
+
Project-URL: Homepage, https://github.com/nhagar/substack_api
|
|
6
|
+
Project-URL: Bug Tracker, https://github.com/nhagar/substack_api/issues
|
|
7
|
+
Project-URL: Documentation, https://nhagar.github.io/substack_api/
|
|
8
|
+
Requires-Python: >=3.12
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Requires-Dist: requests>=2.32.3
|
|
12
|
+
Dynamic: license-file
|
|
13
|
+
|
|
14
|
+
# Substack API
|
|
15
|
+
|
|
16
|
+
An unofficial Python client library for interacting with Substack newsletters and content.
|
|
17
|
+
|
|
18
|
+
## Overview
|
|
19
|
+
|
|
20
|
+
This library provides Python interfaces for interacting with Substack's unofficial API, allowing you to:
|
|
21
|
+
|
|
22
|
+
- Retrieve newsletter posts, podcasts, and recommendations
|
|
23
|
+
- Get user profile information and subscriptions
|
|
24
|
+
- Fetch post content and metadata
|
|
25
|
+
- Search for posts within newsletters
|
|
26
|
+
- Access paywalled content **that you have written or paid for** with user-provided authentication
|
|
27
|
+
|
|
28
|
+
## Installation
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
# Using pip
|
|
32
|
+
pip install substack-api
|
|
33
|
+
|
|
34
|
+
# Using poetry
|
|
35
|
+
poetry add substack-api
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Usage Examples
|
|
39
|
+
|
|
40
|
+
### Working with Newsletters
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
from substack_api import Newsletter
|
|
44
|
+
|
|
45
|
+
# Initialize a newsletter by its URL
|
|
46
|
+
newsletter = Newsletter("https://example.substack.com")
|
|
47
|
+
|
|
48
|
+
# Get recent posts (returns Post objects)
|
|
49
|
+
recent_posts = newsletter.get_posts(limit=5)
|
|
50
|
+
|
|
51
|
+
# Get posts sorted by popularity
|
|
52
|
+
top_posts = newsletter.get_posts(sorting="top", limit=10)
|
|
53
|
+
|
|
54
|
+
# Search for posts
|
|
55
|
+
search_results = newsletter.search_posts("machine learning", limit=3)
|
|
56
|
+
|
|
57
|
+
# Get podcast episodes
|
|
58
|
+
podcasts = newsletter.get_podcasts(limit=5)
|
|
59
|
+
|
|
60
|
+
# Get recommended newsletters
|
|
61
|
+
recommendations = newsletter.get_recommendations()
|
|
62
|
+
|
|
63
|
+
# Get newsletter authors
|
|
64
|
+
authors = newsletter.get_authors()
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### Working with Posts
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
from substack_api import Post
|
|
71
|
+
|
|
72
|
+
# Initialize a post by its URL
|
|
73
|
+
post = Post("https://example.substack.com/p/post-slug")
|
|
74
|
+
|
|
75
|
+
# Get post metadata
|
|
76
|
+
metadata = post.get_metadata()
|
|
77
|
+
|
|
78
|
+
# Get the post's HTML content
|
|
79
|
+
content = post.get_content()
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### Accessing Paywalled Content with Authentication
|
|
83
|
+
|
|
84
|
+
To access paywalled content, you need to provide your own session cookies from a logged-in Substack session:
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
from substack_api import Newsletter, Post, SubstackAuth
|
|
88
|
+
|
|
89
|
+
# Set up authentication with your cookies
|
|
90
|
+
auth = SubstackAuth(cookies_path="path/to/your/cookies.json")
|
|
91
|
+
|
|
92
|
+
# Use authentication with newsletters
|
|
93
|
+
newsletter = Newsletter("https://example.substack.com", auth=auth)
|
|
94
|
+
posts = newsletter.get_posts(limit=5) # Can now access paywalled posts
|
|
95
|
+
|
|
96
|
+
# Use authentication with individual posts
|
|
97
|
+
post = Post("https://example.substack.com/p/paywalled-post", auth=auth)
|
|
98
|
+
content = post.get_content() # Can now access paywalled content
|
|
99
|
+
|
|
100
|
+
# Check if a post is paywalled
|
|
101
|
+
if post.is_paywalled():
|
|
102
|
+
print("This post requires a subscription")
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
#### Getting Your Cookies
|
|
106
|
+
|
|
107
|
+
To access paywalled content, you need to export your browser cookies from a logged-in Substack session. The cookies should be in JSON format with the following structure:
|
|
108
|
+
|
|
109
|
+
```json
|
|
110
|
+
[
|
|
111
|
+
{
|
|
112
|
+
"name": "substack.sid",
|
|
113
|
+
"value": "your_session_id",
|
|
114
|
+
"domain": ".substack.com",
|
|
115
|
+
"path": "/",
|
|
116
|
+
"secure": true
|
|
117
|
+
},
|
|
118
|
+
{
|
|
119
|
+
"name": "substack.lli",
|
|
120
|
+
"value": "your_lli_value",
|
|
121
|
+
"domain": ".substack.com",
|
|
122
|
+
"path": "/",
|
|
123
|
+
"secure": true
|
|
124
|
+
},
|
|
125
|
+
...
|
|
126
|
+
]
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
**Important**: Only use your own cookies from your own authenticated session. **This feature is intended for users to access their own subscribed or authored content programmatically.**
|
|
130
|
+
|
|
131
|
+
### Working with Users
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
from substack_api import User
|
|
135
|
+
|
|
136
|
+
# Initialize a user by their username
|
|
137
|
+
user = User("username")
|
|
138
|
+
|
|
139
|
+
# Get user profile information
|
|
140
|
+
profile_data = user.get_raw_data()
|
|
141
|
+
|
|
142
|
+
# Get user ID and name
|
|
143
|
+
user_id = user.id
|
|
144
|
+
name = user.name
|
|
145
|
+
|
|
146
|
+
# Get user's subscriptions
|
|
147
|
+
subscriptions = user.get_subscriptions()
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
#### Handling Renamed Accounts
|
|
151
|
+
Substack allows users to change their handle (username) at any time. When this happens, the old API endpoints return 404 errors. This library automatically handles these redirects by default.
|
|
152
|
+
##### Automatic Redirect Handling
|
|
153
|
+
|
|
154
|
+
```python
|
|
155
|
+
from substack_api import User
|
|
156
|
+
|
|
157
|
+
# This will automatically follow redirects if the handle has changed
|
|
158
|
+
user = User("oldhandle") # Will find the user even if they renamed to "newhandle"
|
|
159
|
+
|
|
160
|
+
# Check if a redirect occurred
|
|
161
|
+
if user.was_redirected:
|
|
162
|
+
print(f"User was renamed from {user.original_username} to {user.username}")
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
##### Disable Redirect Following
|
|
166
|
+
|
|
167
|
+
If you prefer to handle 404s yourself:
|
|
168
|
+
|
|
169
|
+
```python
|
|
170
|
+
# Disable automatic redirect following
|
|
171
|
+
user = User("oldhandle", follow_redirects=False)
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
##### Manual Handle Resolution
|
|
175
|
+
|
|
176
|
+
You can also manually resolve handle redirects:
|
|
177
|
+
|
|
178
|
+
```python
|
|
179
|
+
from substack_api import resolve_handle_redirect
|
|
180
|
+
|
|
181
|
+
new_handle = resolve_handle_redirect("oldhandle")
|
|
182
|
+
if new_handle:
|
|
183
|
+
print(f"Handle was renamed to: {new_handle}")
|
|
184
|
+
```
|
|
185
|
+
## Limitations
|
|
186
|
+
|
|
187
|
+
- This is an unofficial library and not endorsed by Substack
|
|
188
|
+
- APIs may change without notice, potentially breaking functionality
|
|
189
|
+
- Rate limiting may be enforced by Substack
|
|
190
|
+
- **Authentication requires users to provide their own session cookies**
|
|
191
|
+
- **Users are responsible for complying with Substack's terms of service when using authentication features**
|
|
192
|
+
|
|
193
|
+
## Development
|
|
194
|
+
|
|
195
|
+
### Running Tests
|
|
196
|
+
|
|
197
|
+
```bash
|
|
198
|
+
# Install dev dependencies
|
|
199
|
+
pip install -e ".[dev]"
|
|
200
|
+
|
|
201
|
+
# Run tests
|
|
202
|
+
pytest
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
### Contributing
|
|
206
|
+
|
|
207
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
|
208
|
+
|
|
209
|
+
1. Fork the repository
|
|
210
|
+
2. Create your feature branch (`git checkout -b feature/amazing-feature`)
|
|
211
|
+
3. Commit your changes (`git commit -m 'Add some amazing feature'`)
|
|
212
|
+
4. Push to the branch (`git push origin feature/amazing-feature`)
|
|
213
|
+
5. Open a Pull Request
|
|
214
|
+
|
|
215
|
+
## License
|
|
216
|
+
|
|
217
|
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
|
218
|
+
|
|
219
|
+
## Disclaimer
|
|
220
|
+
|
|
221
|
+
This package is not affiliated with, endorsed by, or connected to Substack in any way. It is an independent project created to make Substack content more accessible through Python.
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
# Substack API
|
|
2
|
+
|
|
3
|
+
An unofficial Python client library for interacting with Substack newsletters and content.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
This library provides Python interfaces for interacting with Substack's unofficial API, allowing you to:
|
|
8
|
+
|
|
9
|
+
- Retrieve newsletter posts, podcasts, and recommendations
|
|
10
|
+
- Get user profile information and subscriptions
|
|
11
|
+
- Fetch post content and metadata
|
|
12
|
+
- Search for posts within newsletters
|
|
13
|
+
- Access paywalled content **that you have written or paid for** with user-provided authentication
|
|
14
|
+
|
|
15
|
+
## Installation
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
# Using pip
|
|
19
|
+
pip install substack-api
|
|
20
|
+
|
|
21
|
+
# Using poetry
|
|
22
|
+
poetry add substack-api
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Usage Examples
|
|
26
|
+
|
|
27
|
+
### Working with Newsletters
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
from substack_api import Newsletter
|
|
31
|
+
|
|
32
|
+
# Initialize a newsletter by its URL
|
|
33
|
+
newsletter = Newsletter("https://example.substack.com")
|
|
34
|
+
|
|
35
|
+
# Get recent posts (returns Post objects)
|
|
36
|
+
recent_posts = newsletter.get_posts(limit=5)
|
|
37
|
+
|
|
38
|
+
# Get posts sorted by popularity
|
|
39
|
+
top_posts = newsletter.get_posts(sorting="top", limit=10)
|
|
40
|
+
|
|
41
|
+
# Search for posts
|
|
42
|
+
search_results = newsletter.search_posts("machine learning", limit=3)
|
|
43
|
+
|
|
44
|
+
# Get podcast episodes
|
|
45
|
+
podcasts = newsletter.get_podcasts(limit=5)
|
|
46
|
+
|
|
47
|
+
# Get recommended newsletters
|
|
48
|
+
recommendations = newsletter.get_recommendations()
|
|
49
|
+
|
|
50
|
+
# Get newsletter authors
|
|
51
|
+
authors = newsletter.get_authors()
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### Working with Posts
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
from substack_api import Post
|
|
58
|
+
|
|
59
|
+
# Initialize a post by its URL
|
|
60
|
+
post = Post("https://example.substack.com/p/post-slug")
|
|
61
|
+
|
|
62
|
+
# Get post metadata
|
|
63
|
+
metadata = post.get_metadata()
|
|
64
|
+
|
|
65
|
+
# Get the post's HTML content
|
|
66
|
+
content = post.get_content()
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### Accessing Paywalled Content with Authentication
|
|
70
|
+
|
|
71
|
+
To access paywalled content, you need to provide your own session cookies from a logged-in Substack session:
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
from substack_api import Newsletter, Post, SubstackAuth
|
|
75
|
+
|
|
76
|
+
# Set up authentication with your cookies
|
|
77
|
+
auth = SubstackAuth(cookies_path="path/to/your/cookies.json")
|
|
78
|
+
|
|
79
|
+
# Use authentication with newsletters
|
|
80
|
+
newsletter = Newsletter("https://example.substack.com", auth=auth)
|
|
81
|
+
posts = newsletter.get_posts(limit=5) # Can now access paywalled posts
|
|
82
|
+
|
|
83
|
+
# Use authentication with individual posts
|
|
84
|
+
post = Post("https://example.substack.com/p/paywalled-post", auth=auth)
|
|
85
|
+
content = post.get_content() # Can now access paywalled content
|
|
86
|
+
|
|
87
|
+
# Check if a post is paywalled
|
|
88
|
+
if post.is_paywalled():
|
|
89
|
+
print("This post requires a subscription")
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
#### Getting Your Cookies
|
|
93
|
+
|
|
94
|
+
To access paywalled content, you need to export your browser cookies from a logged-in Substack session. The cookies should be in JSON format with the following structure:
|
|
95
|
+
|
|
96
|
+
```json
|
|
97
|
+
[
|
|
98
|
+
{
|
|
99
|
+
"name": "substack.sid",
|
|
100
|
+
"value": "your_session_id",
|
|
101
|
+
"domain": ".substack.com",
|
|
102
|
+
"path": "/",
|
|
103
|
+
"secure": true
|
|
104
|
+
},
|
|
105
|
+
{
|
|
106
|
+
"name": "substack.lli",
|
|
107
|
+
"value": "your_lli_value",
|
|
108
|
+
"domain": ".substack.com",
|
|
109
|
+
"path": "/",
|
|
110
|
+
"secure": true
|
|
111
|
+
},
|
|
112
|
+
...
|
|
113
|
+
]
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
**Important**: Only use your own cookies from your own authenticated session. **This feature is intended for users to access their own subscribed or authored content programmatically.**
|
|
117
|
+
|
|
118
|
+
### Working with Users
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
from substack_api import User
|
|
122
|
+
|
|
123
|
+
# Initialize a user by their username
|
|
124
|
+
user = User("username")
|
|
125
|
+
|
|
126
|
+
# Get user profile information
|
|
127
|
+
profile_data = user.get_raw_data()
|
|
128
|
+
|
|
129
|
+
# Get user ID and name
|
|
130
|
+
user_id = user.id
|
|
131
|
+
name = user.name
|
|
132
|
+
|
|
133
|
+
# Get user's subscriptions
|
|
134
|
+
subscriptions = user.get_subscriptions()
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
#### Handling Renamed Accounts
|
|
138
|
+
Substack allows users to change their handle (username) at any time. When this happens, the old API endpoints return 404 errors. This library automatically handles these redirects by default.
|
|
139
|
+
##### Automatic Redirect Handling
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
from substack_api import User
|
|
143
|
+
|
|
144
|
+
# This will automatically follow redirects if the handle has changed
|
|
145
|
+
user = User("oldhandle") # Will find the user even if they renamed to "newhandle"
|
|
146
|
+
|
|
147
|
+
# Check if a redirect occurred
|
|
148
|
+
if user.was_redirected:
|
|
149
|
+
print(f"User was renamed from {user.original_username} to {user.username}")
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
##### Disable Redirect Following
|
|
153
|
+
|
|
154
|
+
If you prefer to handle 404s yourself:
|
|
155
|
+
|
|
156
|
+
```python
|
|
157
|
+
# Disable automatic redirect following
|
|
158
|
+
user = User("oldhandle", follow_redirects=False)
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
##### Manual Handle Resolution
|
|
162
|
+
|
|
163
|
+
You can also manually resolve handle redirects:
|
|
164
|
+
|
|
165
|
+
```python
|
|
166
|
+
from substack_api import resolve_handle_redirect
|
|
167
|
+
|
|
168
|
+
new_handle = resolve_handle_redirect("oldhandle")
|
|
169
|
+
if new_handle:
|
|
170
|
+
print(f"Handle was renamed to: {new_handle}")
|
|
171
|
+
```
|
|
172
|
+
## Limitations
|
|
173
|
+
|
|
174
|
+
- This is an unofficial library and not endorsed by Substack
|
|
175
|
+
- APIs may change without notice, potentially breaking functionality
|
|
176
|
+
- Rate limiting may be enforced by Substack
|
|
177
|
+
- **Authentication requires users to provide their own session cookies**
|
|
178
|
+
- **Users are responsible for complying with Substack's terms of service when using authentication features**
|
|
179
|
+
|
|
180
|
+
## Development
|
|
181
|
+
|
|
182
|
+
### Running Tests
|
|
183
|
+
|
|
184
|
+
```bash
|
|
185
|
+
# Install dev dependencies
|
|
186
|
+
pip install -e ".[dev]"
|
|
187
|
+
|
|
188
|
+
# Run tests
|
|
189
|
+
pytest
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
### Contributing
|
|
193
|
+
|
|
194
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
|
195
|
+
|
|
196
|
+
1. Fork the repository
|
|
197
|
+
2. Create your feature branch (`git checkout -b feature/amazing-feature`)
|
|
198
|
+
3. Commit your changes (`git commit -m 'Add some amazing feature'`)
|
|
199
|
+
4. Push to the branch (`git push origin feature/amazing-feature`)
|
|
200
|
+
5. Open a Pull Request
|
|
201
|
+
|
|
202
|
+
## License
|
|
203
|
+
|
|
204
|
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
|
205
|
+
|
|
206
|
+
## Disclaimer
|
|
207
|
+
|
|
208
|
+
This package is not affiliated with, endorsed by, or connected to Substack in any way. It is an independent project created to make Substack content more accessible through Python.
|
|
@@ -1,12 +1,10 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "substack-api"
|
|
3
|
-
version = "1.
|
|
3
|
+
version = "1.1.2"
|
|
4
4
|
description = "Unofficial wrapper for the Substack API"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.12"
|
|
7
|
-
dependencies = [
|
|
8
|
-
"requests>=2.32.3",
|
|
9
|
-
]
|
|
7
|
+
dependencies = ["requests>=2.32.3"]
|
|
10
8
|
|
|
11
9
|
[dependency-groups]
|
|
12
10
|
dev = [
|
|
@@ -18,3 +16,8 @@ dev = [
|
|
|
18
16
|
"pytest>=8.3.4",
|
|
19
17
|
"ruff>=0.9.9",
|
|
20
18
|
]
|
|
19
|
+
|
|
20
|
+
[project.urls]
|
|
21
|
+
"Homepage" = "https://github.com/nhagar/substack_api"
|
|
22
|
+
"Bug Tracker" = "https://github.com/nhagar/substack_api/issues"
|
|
23
|
+
"Documentation" = "https://nhagar.github.io/substack_api/"
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from .auth import SubstackAuth
|
|
2
|
+
from .category import Category, list_all_categories
|
|
3
|
+
from .newsletter import Newsletter
|
|
4
|
+
from .post import Post
|
|
5
|
+
from .user import User, resolve_handle_redirect
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"User",
|
|
9
|
+
"Post",
|
|
10
|
+
"Category",
|
|
11
|
+
"Newsletter",
|
|
12
|
+
"SubstackAuth",
|
|
13
|
+
"resolve_handle_redirect",
|
|
14
|
+
"list_all_categories",
|
|
15
|
+
]
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
import requests
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class SubstackAuth:
|
|
8
|
+
"""Handles authentication for Substack API requests."""
|
|
9
|
+
|
|
10
|
+
def __init__(
|
|
11
|
+
self,
|
|
12
|
+
cookies_path: str,
|
|
13
|
+
):
|
|
14
|
+
"""
|
|
15
|
+
Initialize authentication handler.
|
|
16
|
+
|
|
17
|
+
Parameters
|
|
18
|
+
----------
|
|
19
|
+
cookies_path : str, optional
|
|
20
|
+
Path to retrieve session cookies from
|
|
21
|
+
"""
|
|
22
|
+
self.cookies_path = cookies_path
|
|
23
|
+
self.session = requests.Session()
|
|
24
|
+
self.authenticated = False
|
|
25
|
+
|
|
26
|
+
# Set default headers
|
|
27
|
+
self.session.headers.update(
|
|
28
|
+
{
|
|
29
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36",
|
|
30
|
+
"Accept": "application/json",
|
|
31
|
+
"Content-Type": "application/json",
|
|
32
|
+
}
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# Try to load existing cookies
|
|
36
|
+
if os.path.exists(self.cookies_path):
|
|
37
|
+
self.load_cookies()
|
|
38
|
+
self.authenticated = True
|
|
39
|
+
else:
|
|
40
|
+
print(f"Cookies file not found at {self.cookies_path}. Please log in.")
|
|
41
|
+
self.authenticated = False
|
|
42
|
+
self.session.cookies.clear()
|
|
43
|
+
|
|
44
|
+
def load_cookies(self) -> bool:
|
|
45
|
+
"""
|
|
46
|
+
Load cookies from file.
|
|
47
|
+
|
|
48
|
+
Returns
|
|
49
|
+
-------
|
|
50
|
+
bool
|
|
51
|
+
True if cookies loaded successfully
|
|
52
|
+
"""
|
|
53
|
+
try:
|
|
54
|
+
with open(self.cookies_path, "r") as f:
|
|
55
|
+
cookies = json.load(f)
|
|
56
|
+
|
|
57
|
+
for cookie in cookies:
|
|
58
|
+
self.session.cookies.set(
|
|
59
|
+
cookie["name"],
|
|
60
|
+
cookie["value"],
|
|
61
|
+
domain=cookie.get("domain"),
|
|
62
|
+
path=cookie.get("path", "/"),
|
|
63
|
+
secure=cookie.get("secure", False),
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
return True
|
|
67
|
+
|
|
68
|
+
except Exception as e:
|
|
69
|
+
print(f"Failed to load cookies: {str(e)}")
|
|
70
|
+
return False
|
|
71
|
+
|
|
72
|
+
def get(self, url: str, **kwargs) -> requests.Response:
|
|
73
|
+
"""
|
|
74
|
+
Make authenticated GET request.
|
|
75
|
+
|
|
76
|
+
Parameters
|
|
77
|
+
----------
|
|
78
|
+
url : str
|
|
79
|
+
URL to request
|
|
80
|
+
**kwargs
|
|
81
|
+
Additional arguments to pass to requests.get
|
|
82
|
+
|
|
83
|
+
Returns
|
|
84
|
+
-------
|
|
85
|
+
requests.Response
|
|
86
|
+
Response object
|
|
87
|
+
"""
|
|
88
|
+
return self.session.get(url, **kwargs)
|
|
89
|
+
|
|
90
|
+
def post(self, url: str, **kwargs) -> requests.Response:
|
|
91
|
+
"""
|
|
92
|
+
Make authenticated POST request.
|
|
93
|
+
|
|
94
|
+
Parameters
|
|
95
|
+
----------
|
|
96
|
+
url : str
|
|
97
|
+
URL to request
|
|
98
|
+
**kwargs
|
|
99
|
+
Additional arguments to pass to requests.post
|
|
100
|
+
|
|
101
|
+
Returns
|
|
102
|
+
-------
|
|
103
|
+
requests.Response
|
|
104
|
+
Response object
|
|
105
|
+
"""
|
|
106
|
+
return self.session.post(url, **kwargs)
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from time import sleep
|
|
1
2
|
from typing import Any, Dict, List, Optional, Tuple
|
|
2
3
|
|
|
3
4
|
import requests
|
|
@@ -127,6 +128,7 @@ class Category:
|
|
|
127
128
|
full_url = endpoint + str(page_num)
|
|
128
129
|
r = requests.get(full_url, headers=HEADERS, timeout=30)
|
|
129
130
|
r.raise_for_status()
|
|
131
|
+
sleep(2) # Be polite to the server
|
|
130
132
|
|
|
131
133
|
resp = r.json()
|
|
132
134
|
newsletters = resp["publications"]
|