substack-api 0.1.0__tar.gz → 1.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- substack_api-1.0.2/PKG-INFO +131 -0
- substack_api-1.0.2/README.md +122 -0
- substack_api-1.0.2/pyproject.toml +20 -0
- substack_api-1.0.2/setup.cfg +4 -0
- substack_api-1.0.2/substack_api/__init__.py +6 -0
- substack_api-1.0.2/substack_api/category.py +180 -0
- substack_api-1.0.2/substack_api/newsletter.py +224 -0
- substack_api-1.0.2/substack_api/post.py +95 -0
- substack_api-1.0.2/substack_api/user.py +136 -0
- substack_api-1.0.2/substack_api.egg-info/PKG-INFO +131 -0
- substack_api-1.0.2/substack_api.egg-info/SOURCES.txt +17 -0
- substack_api-1.0.2/substack_api.egg-info/dependency_links.txt +1 -0
- substack_api-1.0.2/substack_api.egg-info/requires.txt +1 -0
- substack_api-1.0.2/substack_api.egg-info/top_level.txt +1 -0
- substack_api-1.0.2/tests/test_category.py +261 -0
- substack_api-1.0.2/tests/test_newsletter.py +300 -0
- substack_api-1.0.2/tests/test_post.py +158 -0
- substack_api-1.0.2/tests/test_user.py +176 -0
- substack_api-0.1.0/PKG-INFO +0 -61
- substack_api-0.1.0/README.md +0 -42
- substack_api-0.1.0/pyproject.toml +0 -17
- substack_api-0.1.0/substack_api/__init__.py +0 -0
- substack_api-0.1.0/substack_api/newsletter.py +0 -178
- substack_api-0.1.0/substack_api/user.py +0 -77
- {substack_api-0.1.0 → substack_api-1.0.2}/LICENSE +0 -0
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: substack-api
|
|
3
|
+
Version: 1.0.2
|
|
4
|
+
Summary: Unofficial wrapper for the Substack API
|
|
5
|
+
Requires-Python: >=3.12
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Requires-Dist: requests>=2.32.3
|
|
9
|
+
|
|
10
|
+
# Substack API
|
|
11
|
+
|
|
12
|
+
An unofficial Python client library for interacting with Substack newsletters and content.
|
|
13
|
+
|
|
14
|
+
## Overview
|
|
15
|
+
|
|
16
|
+
This library provides Python interfaces for interacting with Substack's unofficial API, allowing you to:
|
|
17
|
+
|
|
18
|
+
- Retrieve newsletter posts, podcasts, and recommendations
|
|
19
|
+
- Get user profile information and subscriptions
|
|
20
|
+
- Fetch post content and metadata
|
|
21
|
+
- Search for posts within newsletters
|
|
22
|
+
|
|
23
|
+
## Installation
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
# Using pip
|
|
27
|
+
pip install substack-api
|
|
28
|
+
|
|
29
|
+
# Using poetry
|
|
30
|
+
poetry add substack-api
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Usage Examples
|
|
34
|
+
|
|
35
|
+
### Working with Newsletters
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
from substack_api import Newsletter
|
|
39
|
+
|
|
40
|
+
# Initialize a newsletter by its URL
|
|
41
|
+
newsletter = Newsletter("https://example.substack.com")
|
|
42
|
+
|
|
43
|
+
# Get recent posts (returns Post objects)
|
|
44
|
+
recent_posts = newsletter.get_posts(limit=5)
|
|
45
|
+
|
|
46
|
+
# Get posts sorted by popularity
|
|
47
|
+
top_posts = newsletter.get_posts(sorting="top", limit=10)
|
|
48
|
+
|
|
49
|
+
# Search for posts
|
|
50
|
+
search_results = newsletter.search_posts("machine learning", limit=3)
|
|
51
|
+
|
|
52
|
+
# Get podcast episodes
|
|
53
|
+
podcasts = newsletter.get_podcasts(limit=5)
|
|
54
|
+
|
|
55
|
+
# Get recommended newsletters
|
|
56
|
+
recommendations = newsletter.get_recommendations()
|
|
57
|
+
|
|
58
|
+
# Get newsletter authors
|
|
59
|
+
authors = newsletter.get_authors()
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### Working with Posts
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
from substack_api import Post
|
|
66
|
+
|
|
67
|
+
# Initialize a post by its URL
|
|
68
|
+
post = Post("https://example.substack.com/p/post-slug")
|
|
69
|
+
|
|
70
|
+
# Get post metadata
|
|
71
|
+
metadata = post.get_metadata()
|
|
72
|
+
|
|
73
|
+
# Get the post's HTML content
|
|
74
|
+
content = post.get_content()
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### Working with Users
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
from substack_api import User
|
|
81
|
+
|
|
82
|
+
# Initialize a user by their username
|
|
83
|
+
user = User("username")
|
|
84
|
+
|
|
85
|
+
# Get user profile information
|
|
86
|
+
profile_data = user.get_raw_data()
|
|
87
|
+
|
|
88
|
+
# Get user ID and name
|
|
89
|
+
user_id = user.id
|
|
90
|
+
name = user.name
|
|
91
|
+
|
|
92
|
+
# Get user's subscriptions
|
|
93
|
+
subscriptions = user.get_subscriptions()
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## Limitations
|
|
97
|
+
|
|
98
|
+
- This is an unofficial library and not endorsed by Substack
|
|
99
|
+
- APIs may change without notice, potentially breaking functionality
|
|
100
|
+
- Some features may only work for public content
|
|
101
|
+
- Rate limiting may be enforced by Substack
|
|
102
|
+
|
|
103
|
+
## Development
|
|
104
|
+
|
|
105
|
+
### Running Tests
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
# Install dev dependencies
|
|
109
|
+
pip install -e ".[dev]"
|
|
110
|
+
|
|
111
|
+
# Run tests
|
|
112
|
+
pytest
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### Contributing
|
|
116
|
+
|
|
117
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
|
118
|
+
|
|
119
|
+
1. Fork the repository
|
|
120
|
+
2. Create your feature branch (`git checkout -b feature/amazing-feature`)
|
|
121
|
+
3. Commit your changes (`git commit -m 'Add some amazing feature'`)
|
|
122
|
+
4. Push to the branch (`git push origin feature/amazing-feature`)
|
|
123
|
+
5. Open a Pull Request
|
|
124
|
+
|
|
125
|
+
## License
|
|
126
|
+
|
|
127
|
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
|
128
|
+
|
|
129
|
+
## Disclaimer
|
|
130
|
+
|
|
131
|
+
This package is not affiliated with, endorsed by, or connected to Substack in any way. It is an independent project created to make Substack content more accessible through Python.
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# Substack API
|
|
2
|
+
|
|
3
|
+
An unofficial Python client library for interacting with Substack newsletters and content.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
This library provides Python interfaces for interacting with Substack's unofficial API, allowing you to:
|
|
8
|
+
|
|
9
|
+
- Retrieve newsletter posts, podcasts, and recommendations
|
|
10
|
+
- Get user profile information and subscriptions
|
|
11
|
+
- Fetch post content and metadata
|
|
12
|
+
- Search for posts within newsletters
|
|
13
|
+
|
|
14
|
+
## Installation
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
# Using pip
|
|
18
|
+
pip install substack-api
|
|
19
|
+
|
|
20
|
+
# Using poetry
|
|
21
|
+
poetry add substack-api
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Usage Examples
|
|
25
|
+
|
|
26
|
+
### Working with Newsletters
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
from substack_api import Newsletter
|
|
30
|
+
|
|
31
|
+
# Initialize a newsletter by its URL
|
|
32
|
+
newsletter = Newsletter("https://example.substack.com")
|
|
33
|
+
|
|
34
|
+
# Get recent posts (returns Post objects)
|
|
35
|
+
recent_posts = newsletter.get_posts(limit=5)
|
|
36
|
+
|
|
37
|
+
# Get posts sorted by popularity
|
|
38
|
+
top_posts = newsletter.get_posts(sorting="top", limit=10)
|
|
39
|
+
|
|
40
|
+
# Search for posts
|
|
41
|
+
search_results = newsletter.search_posts("machine learning", limit=3)
|
|
42
|
+
|
|
43
|
+
# Get podcast episodes
|
|
44
|
+
podcasts = newsletter.get_podcasts(limit=5)
|
|
45
|
+
|
|
46
|
+
# Get recommended newsletters
|
|
47
|
+
recommendations = newsletter.get_recommendations()
|
|
48
|
+
|
|
49
|
+
# Get newsletter authors
|
|
50
|
+
authors = newsletter.get_authors()
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### Working with Posts
|
|
54
|
+
|
|
55
|
+
```python
|
|
56
|
+
from substack_api import Post
|
|
57
|
+
|
|
58
|
+
# Initialize a post by its URL
|
|
59
|
+
post = Post("https://example.substack.com/p/post-slug")
|
|
60
|
+
|
|
61
|
+
# Get post metadata
|
|
62
|
+
metadata = post.get_metadata()
|
|
63
|
+
|
|
64
|
+
# Get the post's HTML content
|
|
65
|
+
content = post.get_content()
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### Working with Users
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
from substack_api import User
|
|
72
|
+
|
|
73
|
+
# Initialize a user by their username
|
|
74
|
+
user = User("username")
|
|
75
|
+
|
|
76
|
+
# Get user profile information
|
|
77
|
+
profile_data = user.get_raw_data()
|
|
78
|
+
|
|
79
|
+
# Get user ID and name
|
|
80
|
+
user_id = user.id
|
|
81
|
+
name = user.name
|
|
82
|
+
|
|
83
|
+
# Get user's subscriptions
|
|
84
|
+
subscriptions = user.get_subscriptions()
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Limitations
|
|
88
|
+
|
|
89
|
+
- This is an unofficial library and not endorsed by Substack
|
|
90
|
+
- APIs may change without notice, potentially breaking functionality
|
|
91
|
+
- Some features may only work for public content
|
|
92
|
+
- Rate limiting may be enforced by Substack
|
|
93
|
+
|
|
94
|
+
## Development
|
|
95
|
+
|
|
96
|
+
### Running Tests
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
# Install dev dependencies
|
|
100
|
+
pip install -e ".[dev]"
|
|
101
|
+
|
|
102
|
+
# Run tests
|
|
103
|
+
pytest
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
### Contributing
|
|
107
|
+
|
|
108
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
|
109
|
+
|
|
110
|
+
1. Fork the repository
|
|
111
|
+
2. Create your feature branch (`git checkout -b feature/amazing-feature`)
|
|
112
|
+
3. Commit your changes (`git commit -m 'Add some amazing feature'`)
|
|
113
|
+
4. Push to the branch (`git push origin feature/amazing-feature`)
|
|
114
|
+
5. Open a Pull Request
|
|
115
|
+
|
|
116
|
+
## License
|
|
117
|
+
|
|
118
|
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
|
119
|
+
|
|
120
|
+
## Disclaimer
|
|
121
|
+
|
|
122
|
+
This package is not affiliated with, endorsed by, or connected to Substack in any way. It is an independent project created to make Substack content more accessible through Python.
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "substack-api"
|
|
3
|
+
version = "1.0.2"
|
|
4
|
+
description = "Unofficial wrapper for the Substack API"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.12"
|
|
7
|
+
dependencies = [
|
|
8
|
+
"requests>=2.32.3",
|
|
9
|
+
]
|
|
10
|
+
|
|
11
|
+
[dependency-groups]
|
|
12
|
+
dev = [
|
|
13
|
+
"ipykernel>=6.29.5",
|
|
14
|
+
"mike>=2.1.3",
|
|
15
|
+
"mkdocs>=1.6.1",
|
|
16
|
+
"mkdocs-material>=9.6.6",
|
|
17
|
+
"mkdocstrings-python>=1.16.2",
|
|
18
|
+
"pytest>=8.3.4",
|
|
19
|
+
"ruff>=0.9.9",
|
|
20
|
+
]
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
2
|
+
|
|
3
|
+
import requests
|
|
4
|
+
|
|
5
|
+
# Add Newsletter import
|
|
6
|
+
from .newsletter import Newsletter
|
|
7
|
+
|
|
8
|
+
HEADERS = {
|
|
9
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36"
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def list_all_categories() -> List[Tuple[str, int]]:
|
|
14
|
+
"""
|
|
15
|
+
Get name / id representations of all newsletter categories
|
|
16
|
+
|
|
17
|
+
Returns
|
|
18
|
+
-------
|
|
19
|
+
List[Tuple[str, int]]
|
|
20
|
+
List of tuples containing (category_name, category_id)
|
|
21
|
+
"""
|
|
22
|
+
endpoint_cat = "https://substack.com/api/v1/categories"
|
|
23
|
+
r = requests.get(endpoint_cat, headers=HEADERS, timeout=30)
|
|
24
|
+
r.raise_for_status()
|
|
25
|
+
categories = [(i["name"], i["id"]) for i in r.json()]
|
|
26
|
+
return categories
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class Category:
|
|
30
|
+
"""
|
|
31
|
+
Top-level newsletter category
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self, name: Optional[str] = None, id: Optional[int] = None) -> None:
|
|
35
|
+
"""
|
|
36
|
+
Initialize a Category object.
|
|
37
|
+
|
|
38
|
+
Parameters
|
|
39
|
+
----------
|
|
40
|
+
name : Optional[str]
|
|
41
|
+
The name of the category
|
|
42
|
+
id : Optional[int]
|
|
43
|
+
The ID of the category
|
|
44
|
+
|
|
45
|
+
Raises
|
|
46
|
+
------
|
|
47
|
+
ValueError
|
|
48
|
+
If neither name nor id is provided, or if the provided name/id is not found
|
|
49
|
+
"""
|
|
50
|
+
if name is None and id is None:
|
|
51
|
+
raise ValueError("Either name or id must be provided")
|
|
52
|
+
|
|
53
|
+
self.name = name
|
|
54
|
+
self.id = id
|
|
55
|
+
self._newsletters_data = None # Cache for newsletter data
|
|
56
|
+
|
|
57
|
+
# Retrieve missing attributes if only one of name or id is provided
|
|
58
|
+
if self.name and self.id is None:
|
|
59
|
+
self._get_id_from_name()
|
|
60
|
+
elif self.id and self.name is None:
|
|
61
|
+
self._get_name_from_id()
|
|
62
|
+
|
|
63
|
+
def __str__(self) -> str:
|
|
64
|
+
return f"{self.name} ({self.id})"
|
|
65
|
+
|
|
66
|
+
def __repr__(self) -> str:
|
|
67
|
+
return f"Category(name={self.name}, id={self.id})"
|
|
68
|
+
|
|
69
|
+
def _get_id_from_name(self) -> None:
|
|
70
|
+
"""
|
|
71
|
+
Lookup category ID based on name
|
|
72
|
+
|
|
73
|
+
Raises
|
|
74
|
+
------
|
|
75
|
+
ValueError
|
|
76
|
+
If the category name is not found
|
|
77
|
+
"""
|
|
78
|
+
categories = list_all_categories()
|
|
79
|
+
for name, id in categories:
|
|
80
|
+
if name == self.name:
|
|
81
|
+
self.id = id
|
|
82
|
+
return
|
|
83
|
+
raise ValueError(f"Category name '{self.name}' not found")
|
|
84
|
+
|
|
85
|
+
def _get_name_from_id(self) -> None:
|
|
86
|
+
"""
|
|
87
|
+
Lookup category name based on ID
|
|
88
|
+
|
|
89
|
+
Raises
|
|
90
|
+
------
|
|
91
|
+
ValueError
|
|
92
|
+
If the category ID is not found
|
|
93
|
+
"""
|
|
94
|
+
categories = list_all_categories()
|
|
95
|
+
for name, id in categories:
|
|
96
|
+
if id == self.id:
|
|
97
|
+
self.name = name
|
|
98
|
+
return
|
|
99
|
+
raise ValueError(f"Category ID {self.id} not found")
|
|
100
|
+
|
|
101
|
+
def _fetch_newsletters_data(
|
|
102
|
+
self, force_refresh: bool = False
|
|
103
|
+
) -> List[Dict[str, Any]]:
|
|
104
|
+
"""
|
|
105
|
+
Fetch the raw newsletter data from the API and cache it
|
|
106
|
+
|
|
107
|
+
Parameters
|
|
108
|
+
----------
|
|
109
|
+
force_refresh : bool
|
|
110
|
+
Whether to force a refresh of the data, ignoring the cache
|
|
111
|
+
|
|
112
|
+
Returns
|
|
113
|
+
-------
|
|
114
|
+
List[Dict[str, Any]]
|
|
115
|
+
Full newsletter metadata
|
|
116
|
+
"""
|
|
117
|
+
if self._newsletters_data is not None and not force_refresh:
|
|
118
|
+
return self._newsletters_data
|
|
119
|
+
|
|
120
|
+
endpoint = f"https://substack.com/api/v1/category/public/{self.id}/all?page="
|
|
121
|
+
|
|
122
|
+
all_newsletters = []
|
|
123
|
+
page_num = 0
|
|
124
|
+
more = True
|
|
125
|
+
# endpoint doesn't return more than 21 pages
|
|
126
|
+
while more and page_num <= 20:
|
|
127
|
+
full_url = endpoint + str(page_num)
|
|
128
|
+
r = requests.get(full_url, headers=HEADERS, timeout=30)
|
|
129
|
+
r.raise_for_status()
|
|
130
|
+
|
|
131
|
+
resp = r.json()
|
|
132
|
+
newsletters = resp["publications"]
|
|
133
|
+
all_newsletters.extend(newsletters)
|
|
134
|
+
page_num += 1
|
|
135
|
+
more = resp["more"]
|
|
136
|
+
|
|
137
|
+
self._newsletters_data = all_newsletters
|
|
138
|
+
return all_newsletters
|
|
139
|
+
|
|
140
|
+
def get_newsletter_urls(self) -> List[str]:
|
|
141
|
+
"""
|
|
142
|
+
Get only the URLs of newsletters in this category
|
|
143
|
+
|
|
144
|
+
Returns
|
|
145
|
+
-------
|
|
146
|
+
List[str]
|
|
147
|
+
List of newsletter URLs
|
|
148
|
+
"""
|
|
149
|
+
data = self._fetch_newsletters_data()
|
|
150
|
+
|
|
151
|
+
return [item["base_url"] for item in data]
|
|
152
|
+
|
|
153
|
+
def get_newsletters(self) -> List[Newsletter]:
|
|
154
|
+
"""
|
|
155
|
+
Get Newsletter objects for all newsletters in this category
|
|
156
|
+
|
|
157
|
+
Returns
|
|
158
|
+
-------
|
|
159
|
+
List[Newsletter]
|
|
160
|
+
List of Newsletter objects
|
|
161
|
+
"""
|
|
162
|
+
urls = self.get_newsletter_urls()
|
|
163
|
+
return [Newsletter(url) for url in urls]
|
|
164
|
+
|
|
165
|
+
def get_newsletter_metadata(self) -> List[Dict[str, Any]]:
|
|
166
|
+
"""
|
|
167
|
+
Get full metadata for all newsletters in this category
|
|
168
|
+
|
|
169
|
+
Returns
|
|
170
|
+
-------
|
|
171
|
+
List[Dict[str, Any]]
|
|
172
|
+
List of newsletter metadata dictionaries
|
|
173
|
+
"""
|
|
174
|
+
return self._fetch_newsletters_data()
|
|
175
|
+
|
|
176
|
+
def refresh_data(self) -> None:
|
|
177
|
+
"""
|
|
178
|
+
Force refresh of the newsletter data cache
|
|
179
|
+
"""
|
|
180
|
+
self._fetch_newsletters_data(force_refresh=True)
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
from time import sleep
|
|
2
|
+
from typing import Any, Dict, List, Optional
|
|
3
|
+
|
|
4
|
+
import requests
|
|
5
|
+
|
|
6
|
+
HEADERS = {
|
|
7
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36"
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Newsletter:
|
|
12
|
+
"""
|
|
13
|
+
Newsletter class for interacting with Substack newsletters
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(self, url: str) -> None:
|
|
17
|
+
"""
|
|
18
|
+
Initialize a Newsletter object.
|
|
19
|
+
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
url : str
|
|
23
|
+
The URL of the Substack newsletter
|
|
24
|
+
"""
|
|
25
|
+
self.url = url
|
|
26
|
+
|
|
27
|
+
def __str__(self) -> str:
|
|
28
|
+
return f"Newsletter: {self.url}"
|
|
29
|
+
|
|
30
|
+
def __repr__(self) -> str:
|
|
31
|
+
return f"Newsletter(url={self.url})"
|
|
32
|
+
|
|
33
|
+
def _fetch_paginated_posts(
|
|
34
|
+
self, params: Dict[str, str], limit: Optional[int] = None, page_size: int = 15
|
|
35
|
+
) -> List[Dict[str, Any]]:
|
|
36
|
+
"""
|
|
37
|
+
Helper method to fetch paginated posts with different query parameters
|
|
38
|
+
|
|
39
|
+
Parameters
|
|
40
|
+
----------
|
|
41
|
+
params : Dict[str, str]
|
|
42
|
+
Dictionary of query parameters to include in the API request
|
|
43
|
+
limit : Optional[int]
|
|
44
|
+
Maximum number of posts to return
|
|
45
|
+
page_size : int
|
|
46
|
+
Number of posts to retrieve per page request
|
|
47
|
+
|
|
48
|
+
Returns
|
|
49
|
+
-------
|
|
50
|
+
List[Dict[str, Any]]
|
|
51
|
+
List of post data dictionaries
|
|
52
|
+
"""
|
|
53
|
+
results = []
|
|
54
|
+
offset = 0
|
|
55
|
+
batch_size = page_size # The API default limit per request
|
|
56
|
+
more_items = True
|
|
57
|
+
|
|
58
|
+
while more_items:
|
|
59
|
+
# Update params with current offset and batch size
|
|
60
|
+
current_params = params.copy()
|
|
61
|
+
current_params.update({"offset": str(offset), "limit": str(batch_size)})
|
|
62
|
+
|
|
63
|
+
# Format query parameters
|
|
64
|
+
query_string = "&".join([f"{k}={v}" for k, v in current_params.items()])
|
|
65
|
+
endpoint = f"{self.url}/api/v1/archive?{query_string}"
|
|
66
|
+
|
|
67
|
+
# Make the request
|
|
68
|
+
response = requests.get(endpoint, headers=HEADERS, timeout=30)
|
|
69
|
+
|
|
70
|
+
if response.status_code != 200:
|
|
71
|
+
break
|
|
72
|
+
|
|
73
|
+
items = response.json()
|
|
74
|
+
if not items:
|
|
75
|
+
break
|
|
76
|
+
|
|
77
|
+
results.extend(items)
|
|
78
|
+
|
|
79
|
+
# Update offset for next batch
|
|
80
|
+
offset += batch_size
|
|
81
|
+
|
|
82
|
+
# Check if we've reached the requested limit
|
|
83
|
+
if limit and len(results) >= limit:
|
|
84
|
+
results = results[:limit]
|
|
85
|
+
more_items = False
|
|
86
|
+
|
|
87
|
+
# Check if we got fewer items than requested (last page)
|
|
88
|
+
if len(items) < batch_size:
|
|
89
|
+
more_items = False
|
|
90
|
+
|
|
91
|
+
# Be nice to the API
|
|
92
|
+
sleep(0.5)
|
|
93
|
+
|
|
94
|
+
# Instead of creating Post objects directly, return the URLs
|
|
95
|
+
# The caller will create Post objects as needed
|
|
96
|
+
return results
|
|
97
|
+
|
|
98
|
+
def get_posts(self, sorting: str = "new", limit: Optional[int] = None) -> List:
|
|
99
|
+
"""
|
|
100
|
+
Get posts from the newsletter with specified sorting
|
|
101
|
+
|
|
102
|
+
Parameters
|
|
103
|
+
----------
|
|
104
|
+
sorting : str
|
|
105
|
+
Sorting order for the posts ("new", "top", "pinned", or "community")
|
|
106
|
+
limit : Optional[int]
|
|
107
|
+
Maximum number of posts to return
|
|
108
|
+
|
|
109
|
+
Returns
|
|
110
|
+
-------
|
|
111
|
+
List[Post]
|
|
112
|
+
List of Post objects
|
|
113
|
+
"""
|
|
114
|
+
from .post import Post # Import here to avoid circular import
|
|
115
|
+
|
|
116
|
+
params = {"sort": sorting}
|
|
117
|
+
post_data = self._fetch_paginated_posts(params, limit)
|
|
118
|
+
return [Post(item["canonical_url"]) for item in post_data]
|
|
119
|
+
|
|
120
|
+
def search_posts(self, query: str, limit: Optional[int] = None) -> List:
|
|
121
|
+
"""
|
|
122
|
+
Search posts in the newsletter with the given query
|
|
123
|
+
|
|
124
|
+
Parameters
|
|
125
|
+
----------
|
|
126
|
+
query : str
|
|
127
|
+
Search query string
|
|
128
|
+
limit : Optional[int]
|
|
129
|
+
Maximum number of posts to return
|
|
130
|
+
|
|
131
|
+
Returns
|
|
132
|
+
-------
|
|
133
|
+
List[Post]
|
|
134
|
+
List of Post objects matching the search query
|
|
135
|
+
"""
|
|
136
|
+
from .post import Post # Import here to avoid circular import
|
|
137
|
+
|
|
138
|
+
params = {"sort": "new", "search": query}
|
|
139
|
+
post_data = self._fetch_paginated_posts(params, limit)
|
|
140
|
+
return [Post(item["canonical_url"]) for item in post_data]
|
|
141
|
+
|
|
142
|
+
def get_podcasts(self, limit: Optional[int] = None) -> List:
|
|
143
|
+
"""
|
|
144
|
+
Get podcast posts from the newsletter
|
|
145
|
+
|
|
146
|
+
Parameters
|
|
147
|
+
----------
|
|
148
|
+
limit : Optional[int]
|
|
149
|
+
Maximum number of podcast posts to return
|
|
150
|
+
|
|
151
|
+
Returns
|
|
152
|
+
-------
|
|
153
|
+
List[Post]
|
|
154
|
+
List of Post objects representing podcast posts
|
|
155
|
+
"""
|
|
156
|
+
from .post import Post # Import here to avoid circular import
|
|
157
|
+
|
|
158
|
+
params = {"sort": "new", "type": "podcast"}
|
|
159
|
+
post_data = self._fetch_paginated_posts(params, limit)
|
|
160
|
+
return [Post(item["canonical_url"]) for item in post_data]
|
|
161
|
+
|
|
162
|
+
def get_recommendations(self) -> List["Newsletter"]:
|
|
163
|
+
"""
|
|
164
|
+
Get recommended publications for this newsletter
|
|
165
|
+
|
|
166
|
+
Returns
|
|
167
|
+
-------
|
|
168
|
+
List[Newsletter]
|
|
169
|
+
List of recommended Newsletter objects
|
|
170
|
+
"""
|
|
171
|
+
# First get any post to extract the publication ID
|
|
172
|
+
posts = self.get_posts(limit=1)
|
|
173
|
+
if not posts:
|
|
174
|
+
return []
|
|
175
|
+
|
|
176
|
+
publication_id = posts[0].get_metadata()["publication_id"]
|
|
177
|
+
|
|
178
|
+
# Now get the recommendations
|
|
179
|
+
endpoint = f"{self.url}/api/v1/recommendations/from/{publication_id}"
|
|
180
|
+
response = requests.get(endpoint, headers=HEADERS, timeout=30)
|
|
181
|
+
|
|
182
|
+
if response.status_code != 200:
|
|
183
|
+
return []
|
|
184
|
+
|
|
185
|
+
recommendations = response.json()
|
|
186
|
+
if not recommendations:
|
|
187
|
+
return []
|
|
188
|
+
|
|
189
|
+
recommended_newsletter_urls = []
|
|
190
|
+
for rec in recommendations:
|
|
191
|
+
recpub = rec["recommendedPublication"]
|
|
192
|
+
if "custom_domain" in recpub and recpub["custom_domain"]:
|
|
193
|
+
recommended_newsletter_urls.append(recpub["custom_domain"])
|
|
194
|
+
else:
|
|
195
|
+
recommended_newsletter_urls.append(
|
|
196
|
+
f"{recpub['subdomain']}.substack.com"
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
# Avoid circular import
|
|
200
|
+
from .newsletter import Newsletter
|
|
201
|
+
|
|
202
|
+
result = [Newsletter(url) for url in recommended_newsletter_urls]
|
|
203
|
+
|
|
204
|
+
return result
|
|
205
|
+
|
|
206
|
+
def get_authors(self) -> List:
|
|
207
|
+
"""
|
|
208
|
+
Get authors of the newsletter
|
|
209
|
+
|
|
210
|
+
Returns
|
|
211
|
+
-------
|
|
212
|
+
List[User]
|
|
213
|
+
List of User objects representing the authors
|
|
214
|
+
"""
|
|
215
|
+
from .user import User # Import here to avoid circular import
|
|
216
|
+
|
|
217
|
+
r = requests.get(
|
|
218
|
+
f"{self.url}/api/v1/publication/users/ranked?public=true",
|
|
219
|
+
headers=HEADERS,
|
|
220
|
+
timeout=30,
|
|
221
|
+
)
|
|
222
|
+
r.raise_for_status()
|
|
223
|
+
authors = r.json()
|
|
224
|
+
return [User(author["handle"]) for author in authors]
|