thordata-sdk 1.4.0__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thordata/__init__.py +4 -40
- thordata/async_client.py +503 -1796
- thordata/client.py +444 -1322
- thordata/core/__init__.py +23 -0
- thordata/core/async_http_client.py +91 -0
- thordata/core/http_client.py +79 -0
- thordata/core/tunnel.py +287 -0
- thordata/enums.py +41 -380
- thordata/exceptions.py +70 -19
- thordata/models.py +37 -1193
- thordata/retry.py +1 -1
- thordata/tools/__init__.py +38 -0
- thordata/tools/base.py +42 -0
- thordata/tools/code.py +39 -0
- thordata/tools/ecommerce.py +251 -0
- thordata/tools/professional.py +155 -0
- thordata/tools/search.py +115 -0
- thordata/tools/social.py +374 -0
- thordata/tools/travel.py +100 -0
- thordata/tools/video.py +154 -0
- thordata/types/__init__.py +77 -0
- thordata/types/common.py +141 -0
- thordata/types/proxy.py +340 -0
- thordata/types/serp.py +224 -0
- thordata/types/task.py +156 -0
- thordata/types/universal.py +66 -0
- thordata/unlimited.py +67 -0
- thordata_sdk-1.6.0.dist-info/METADATA +287 -0
- thordata_sdk-1.6.0.dist-info/RECORD +35 -0
- {thordata_sdk-1.4.0.dist-info → thordata_sdk-1.6.0.dist-info}/WHEEL +1 -1
- thordata/_example_utils.py +0 -77
- thordata/demo.py +0 -138
- thordata_sdk-1.4.0.dist-info/METADATA +0 -208
- thordata_sdk-1.4.0.dist-info/RECORD +0 -18
- {thordata_sdk-1.4.0.dist-info → thordata_sdk-1.6.0.dist-info}/licenses/LICENSE +0 -0
- {thordata_sdk-1.4.0.dist-info → thordata_sdk-1.6.0.dist-info}/top_level.txt +0 -0
thordata/tools/social.py
ADDED
|
@@ -0,0 +1,374 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Social Media Scraper Tools.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
|
|
9
|
+
from .base import ToolRequest
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class TikTok:
|
|
13
|
+
@dataclass
|
|
14
|
+
class Post(ToolRequest):
|
|
15
|
+
"""TikTok Post Information Scraper by URL"""
|
|
16
|
+
|
|
17
|
+
SPIDER_ID = "tiktok_posts_by-url"
|
|
18
|
+
SPIDER_NAME = "tiktok.com"
|
|
19
|
+
url: str
|
|
20
|
+
country: str | None = None
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class PostsByKeywords(ToolRequest):
|
|
24
|
+
"""TikTok Post Information Scraper by Keywords"""
|
|
25
|
+
|
|
26
|
+
SPIDER_ID = "tiktok_posts_by-keywords"
|
|
27
|
+
SPIDER_NAME = "tiktok.com"
|
|
28
|
+
search_keyword: str
|
|
29
|
+
num_of_posts: int | None = None
|
|
30
|
+
posts_to_not_include: str | None = None
|
|
31
|
+
country: str | None = None
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class PostsByProfileUrl(ToolRequest):
|
|
35
|
+
"""TikTok Post Information Scraper by Profile URL"""
|
|
36
|
+
|
|
37
|
+
SPIDER_ID = "tiktok_posts_by-profileurl"
|
|
38
|
+
SPIDER_NAME = "tiktok.com"
|
|
39
|
+
url: str
|
|
40
|
+
start_date: str | None = None
|
|
41
|
+
end_date: str | None = None
|
|
42
|
+
num_of_posts: int | None = None
|
|
43
|
+
what_to_collect: str | None = None
|
|
44
|
+
post_type: str | None = None
|
|
45
|
+
posts_to_not_include: str | None = None
|
|
46
|
+
country: str | None = None
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class PostsByListUrl(ToolRequest):
|
|
50
|
+
"""TikTok Post Information Scraper by List URL"""
|
|
51
|
+
|
|
52
|
+
SPIDER_ID = "tiktok_posts_by-listurl"
|
|
53
|
+
SPIDER_NAME = "tiktok.com"
|
|
54
|
+
url: str
|
|
55
|
+
num_of_posts: int | None = None
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class Comment(ToolRequest):
|
|
59
|
+
"""TikTok Comment Scraper"""
|
|
60
|
+
|
|
61
|
+
SPIDER_ID = "tiktok_comment_by-url"
|
|
62
|
+
SPIDER_NAME = "tiktok.com"
|
|
63
|
+
url: str
|
|
64
|
+
page_turning: int | None = None
|
|
65
|
+
|
|
66
|
+
@dataclass
|
|
67
|
+
class Profile(ToolRequest):
|
|
68
|
+
"""TikTok Profile Information Scraper by URL"""
|
|
69
|
+
|
|
70
|
+
SPIDER_ID = "tiktok_profiles_by-url"
|
|
71
|
+
SPIDER_NAME = "tiktok.com"
|
|
72
|
+
url: str # Profile URL (e.g. https://www.tiktok.com/@user)
|
|
73
|
+
country: str | None = None
|
|
74
|
+
|
|
75
|
+
@dataclass
|
|
76
|
+
class ProfilesByListUrl(ToolRequest):
|
|
77
|
+
"""TikTok Profile Information Scraper by List URL"""
|
|
78
|
+
|
|
79
|
+
SPIDER_ID = "tiktok_profiles_by-listurl"
|
|
80
|
+
SPIDER_NAME = "tiktok.com"
|
|
81
|
+
search_url: str
|
|
82
|
+
country: str | None = None
|
|
83
|
+
page_turning: int | None = None
|
|
84
|
+
|
|
85
|
+
@dataclass
|
|
86
|
+
class Shop(ToolRequest):
|
|
87
|
+
"""TikTok Shop Information Scraper by URL"""
|
|
88
|
+
|
|
89
|
+
SPIDER_ID = "tiktok_shop_by-url"
|
|
90
|
+
SPIDER_NAME = "tiktok.com"
|
|
91
|
+
url: str
|
|
92
|
+
|
|
93
|
+
@dataclass
|
|
94
|
+
class ShopByCategoryUrl(ToolRequest):
|
|
95
|
+
"""TikTok Shop Information Scraper by Category URL"""
|
|
96
|
+
|
|
97
|
+
SPIDER_ID = "tiktok_shop_by-category-url"
|
|
98
|
+
SPIDER_NAME = "tiktok.com"
|
|
99
|
+
category_url: str
|
|
100
|
+
|
|
101
|
+
@dataclass
|
|
102
|
+
class ShopByKeywords(ToolRequest):
|
|
103
|
+
"""TikTok Shop Information Scraper by Keywords"""
|
|
104
|
+
|
|
105
|
+
SPIDER_ID = "tiktok_shop_by-keywords"
|
|
106
|
+
SPIDER_NAME = "tiktok.com"
|
|
107
|
+
keyword: str
|
|
108
|
+
domain: str = "https://www.tiktok.com/shop"
|
|
109
|
+
page_turning: int | None = None
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class Facebook:
|
|
113
|
+
@dataclass
|
|
114
|
+
class PostDetails(ToolRequest):
|
|
115
|
+
"""Facebook Post Details Scraper"""
|
|
116
|
+
|
|
117
|
+
SPIDER_ID = "facebook_post_by-posts-url"
|
|
118
|
+
SPIDER_NAME = "facebook.com"
|
|
119
|
+
url: str
|
|
120
|
+
|
|
121
|
+
@dataclass
|
|
122
|
+
class Posts(ToolRequest):
|
|
123
|
+
"""Facebook Posts Scraper by Keywords"""
|
|
124
|
+
|
|
125
|
+
SPIDER_ID = "facebook_post_by-keywords"
|
|
126
|
+
SPIDER_NAME = "facebook.com"
|
|
127
|
+
keyword: str
|
|
128
|
+
recent_posts: bool | None = None
|
|
129
|
+
date: str | None = None # Year 2025 etc.
|
|
130
|
+
number: int = 10
|
|
131
|
+
|
|
132
|
+
@dataclass
|
|
133
|
+
class EventByEventListUrl(ToolRequest):
|
|
134
|
+
"""Facebook Events Scraper by Event List URL"""
|
|
135
|
+
|
|
136
|
+
SPIDER_ID = "facebook_event_by-eventlist-url"
|
|
137
|
+
SPIDER_NAME = "facebook.com"
|
|
138
|
+
url: str
|
|
139
|
+
upcoming_events_only: str | None = None
|
|
140
|
+
|
|
141
|
+
@dataclass
|
|
142
|
+
class EventBySearchUrl(ToolRequest):
|
|
143
|
+
"""Facebook Events Scraper by Search URL"""
|
|
144
|
+
|
|
145
|
+
SPIDER_ID = "facebook_event_by-search-url"
|
|
146
|
+
SPIDER_NAME = "facebook.com"
|
|
147
|
+
url: str
|
|
148
|
+
|
|
149
|
+
@dataclass
|
|
150
|
+
class EventByEventsUrl(ToolRequest):
|
|
151
|
+
"""Facebook Events Scraper by Events URL"""
|
|
152
|
+
|
|
153
|
+
SPIDER_ID = "facebook_event_by-events-url"
|
|
154
|
+
SPIDER_NAME = "facebook.com"
|
|
155
|
+
url: str
|
|
156
|
+
|
|
157
|
+
@dataclass
|
|
158
|
+
class Profile(ToolRequest):
|
|
159
|
+
"""Facebook Profile Scraper"""
|
|
160
|
+
|
|
161
|
+
SPIDER_ID = "facebook_profile_by-profiles-url"
|
|
162
|
+
SPIDER_NAME = "facebook.com"
|
|
163
|
+
url: str
|
|
164
|
+
|
|
165
|
+
@dataclass
|
|
166
|
+
class Comment(ToolRequest):
|
|
167
|
+
"""Facebook Post Comments Scraper"""
|
|
168
|
+
|
|
169
|
+
SPIDER_ID = "facebook_comment_by-comments-url"
|
|
170
|
+
SPIDER_NAME = "facebook.com"
|
|
171
|
+
url: str
|
|
172
|
+
get_all_replies: str | None = None
|
|
173
|
+
limit_records: str | None = None
|
|
174
|
+
comments_sort: str | None = None # All comments
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
class Instagram:
|
|
178
|
+
@dataclass
|
|
179
|
+
class Profile(ToolRequest):
|
|
180
|
+
"""Instagram Profile Scraper by Username"""
|
|
181
|
+
|
|
182
|
+
SPIDER_ID = "ins_profiles_by-username"
|
|
183
|
+
SPIDER_NAME = "instagram.com"
|
|
184
|
+
username: str
|
|
185
|
+
|
|
186
|
+
@dataclass
|
|
187
|
+
class ProfileByUrl(ToolRequest):
|
|
188
|
+
"""Instagram Profile Scraper by Profile URL"""
|
|
189
|
+
|
|
190
|
+
SPIDER_ID = "ins_profiles_by-profileurl"
|
|
191
|
+
SPIDER_NAME = "instagram.com"
|
|
192
|
+
profileurl: str
|
|
193
|
+
|
|
194
|
+
@dataclass
|
|
195
|
+
class Post(ToolRequest):
|
|
196
|
+
"""Instagram Post Information Scraper by Profile URL"""
|
|
197
|
+
|
|
198
|
+
SPIDER_ID = "ins_posts_by-profileurl"
|
|
199
|
+
SPIDER_NAME = "instagram.com"
|
|
200
|
+
profileurl: str
|
|
201
|
+
resultsLimit: int = 10
|
|
202
|
+
start_date: str | None = None
|
|
203
|
+
end_date: str | None = None
|
|
204
|
+
post_type: str | None = None # Post or Reel
|
|
205
|
+
|
|
206
|
+
@dataclass
|
|
207
|
+
class PostByUrl(ToolRequest):
|
|
208
|
+
"""Instagram Post Information Scraper by Post URL"""
|
|
209
|
+
|
|
210
|
+
SPIDER_ID = "ins_posts_by-posturl"
|
|
211
|
+
SPIDER_NAME = "instagram.com"
|
|
212
|
+
posturl: str
|
|
213
|
+
|
|
214
|
+
@dataclass
|
|
215
|
+
class Reel(ToolRequest):
|
|
216
|
+
"""Instagram Reel Information Scraper by URL"""
|
|
217
|
+
|
|
218
|
+
SPIDER_ID = "ins_reel_by-url"
|
|
219
|
+
SPIDER_NAME = "instagram.com"
|
|
220
|
+
url: str
|
|
221
|
+
|
|
222
|
+
@dataclass
|
|
223
|
+
class AllReel(ToolRequest):
|
|
224
|
+
"""Instagram All Reel Information Scraper by URL"""
|
|
225
|
+
|
|
226
|
+
SPIDER_ID = "ins_allreel_by-url"
|
|
227
|
+
SPIDER_NAME = "instagram.com"
|
|
228
|
+
url: str
|
|
229
|
+
num_of_posts: int | None = None
|
|
230
|
+
posts_to_not_include: str | None = None
|
|
231
|
+
start_date: str | None = None
|
|
232
|
+
end_date: str | None = None
|
|
233
|
+
|
|
234
|
+
@dataclass
|
|
235
|
+
class ReelByListUrl(ToolRequest):
|
|
236
|
+
"""Instagram Reel Information Scraper by List URL"""
|
|
237
|
+
|
|
238
|
+
SPIDER_ID = "ins_reel_by-listurl"
|
|
239
|
+
SPIDER_NAME = "instagram.com"
|
|
240
|
+
url: str
|
|
241
|
+
num_of_posts: int | None = None
|
|
242
|
+
posts_to_not_include: str | None = None
|
|
243
|
+
start_date: str | None = None
|
|
244
|
+
end_date: str | None = None
|
|
245
|
+
|
|
246
|
+
@dataclass
|
|
247
|
+
class Comment(ToolRequest):
|
|
248
|
+
"""Instagram Post Comment Scraper"""
|
|
249
|
+
|
|
250
|
+
SPIDER_ID = "ins_comment_by-posturl"
|
|
251
|
+
SPIDER_NAME = "instagram.com"
|
|
252
|
+
posturl: str
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
class Twitter:
|
|
256
|
+
@dataclass
|
|
257
|
+
class Profile(ToolRequest):
|
|
258
|
+
"""Twitter(X) Profile Scraper by Profile URL"""
|
|
259
|
+
|
|
260
|
+
SPIDER_ID = "twitter_profile_by-profileurl"
|
|
261
|
+
SPIDER_NAME = "x.com"
|
|
262
|
+
url: str
|
|
263
|
+
|
|
264
|
+
@dataclass
|
|
265
|
+
class ProfileByUsername(ToolRequest):
|
|
266
|
+
"""Twitter(X) Profile Scraper by Username"""
|
|
267
|
+
|
|
268
|
+
SPIDER_ID = "twitter_profile_by-username"
|
|
269
|
+
SPIDER_NAME = "x.com"
|
|
270
|
+
user_name: str
|
|
271
|
+
|
|
272
|
+
@dataclass
|
|
273
|
+
class Post(ToolRequest):
|
|
274
|
+
"""Twitter(X) Post Information Scraper by Post URL"""
|
|
275
|
+
|
|
276
|
+
SPIDER_ID = "twitter_post_by-posturl"
|
|
277
|
+
SPIDER_NAME = "x.com"
|
|
278
|
+
url: str # Post URL (e.g. https://x.com/user/status/123)
|
|
279
|
+
|
|
280
|
+
@dataclass
|
|
281
|
+
class PostByProfileUrl(ToolRequest):
|
|
282
|
+
"""Twitter(X) Post Information Scraper by Profile URL"""
|
|
283
|
+
|
|
284
|
+
SPIDER_ID = "twitter_post_by-profileurl"
|
|
285
|
+
SPIDER_NAME = "x.com"
|
|
286
|
+
url: str # Profile URL
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
class LinkedIn:
|
|
290
|
+
@dataclass
|
|
291
|
+
class Company(ToolRequest):
|
|
292
|
+
"""LinkedIn Company Information Scraper"""
|
|
293
|
+
|
|
294
|
+
SPIDER_ID = "linkedin_company_information_by-url"
|
|
295
|
+
SPIDER_NAME = "linkedin.com"
|
|
296
|
+
url: str
|
|
297
|
+
|
|
298
|
+
@dataclass
|
|
299
|
+
class Jobs(ToolRequest):
|
|
300
|
+
"""LinkedIn Job Listing Scraper by Job Listing URL"""
|
|
301
|
+
|
|
302
|
+
SPIDER_ID = "linkedin_job_listings_information_by-job-listing-url"
|
|
303
|
+
SPIDER_NAME = "linkedin.com"
|
|
304
|
+
job_listing_url: str
|
|
305
|
+
page_turning: int | None = None
|
|
306
|
+
|
|
307
|
+
@dataclass
|
|
308
|
+
class JobByUrl(ToolRequest):
|
|
309
|
+
"""LinkedIn Job Listing Scraper by Job URL"""
|
|
310
|
+
|
|
311
|
+
SPIDER_ID = "linkedin_job_listings_information_by-job-url"
|
|
312
|
+
SPIDER_NAME = "linkedin.com"
|
|
313
|
+
job_url: str
|
|
314
|
+
|
|
315
|
+
@dataclass
|
|
316
|
+
class JobByKeyword(ToolRequest):
|
|
317
|
+
"""LinkedIn Job Listing Scraper by Keyword"""
|
|
318
|
+
|
|
319
|
+
SPIDER_ID = "linkedin_job_listings_information_by-keyword"
|
|
320
|
+
SPIDER_NAME = "linkedin.com"
|
|
321
|
+
location: str
|
|
322
|
+
keyword: str
|
|
323
|
+
time_range: str | None = None
|
|
324
|
+
experience_level: str | None = None
|
|
325
|
+
job_type: str | None = None
|
|
326
|
+
remote: str | None = None
|
|
327
|
+
company: str | None = None
|
|
328
|
+
selective_search: str | None = None
|
|
329
|
+
jobs_to_not_include: str | None = None
|
|
330
|
+
location_radius: str | None = None
|
|
331
|
+
page_turning: int | None = None
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
class Reddit:
|
|
335
|
+
@dataclass
|
|
336
|
+
class Posts(ToolRequest):
|
|
337
|
+
"""Reddit Post Information Scraper by URL"""
|
|
338
|
+
|
|
339
|
+
SPIDER_ID = "reddit_posts_by-url"
|
|
340
|
+
SPIDER_NAME = "reddit.com"
|
|
341
|
+
url: str
|
|
342
|
+
|
|
343
|
+
@dataclass
|
|
344
|
+
class PostsByKeywords(ToolRequest):
|
|
345
|
+
"""Reddit Post Information Scraper by Keywords"""
|
|
346
|
+
|
|
347
|
+
SPIDER_ID = "reddit_posts_by-keywords"
|
|
348
|
+
SPIDER_NAME = "reddit.com"
|
|
349
|
+
keyword: str
|
|
350
|
+
date: str | None = None # All time
|
|
351
|
+
num_of_posts: int | None = None
|
|
352
|
+
sort_by: str | None = None
|
|
353
|
+
|
|
354
|
+
@dataclass
|
|
355
|
+
class PostsBySubredditUrl(ToolRequest):
|
|
356
|
+
"""Reddit Post Information Scraper by Subreddit URL"""
|
|
357
|
+
|
|
358
|
+
SPIDER_ID = "reddit_posts_by-subredditurl"
|
|
359
|
+
SPIDER_NAME = "reddit.com"
|
|
360
|
+
url: str
|
|
361
|
+
sort_by: str | None = None
|
|
362
|
+
num_of_posts: int | None = None
|
|
363
|
+
sort_by_time: str | None = None # All Time
|
|
364
|
+
|
|
365
|
+
@dataclass
|
|
366
|
+
class Comment(ToolRequest):
|
|
367
|
+
"""Reddit Post Comment Scraper"""
|
|
368
|
+
|
|
369
|
+
SPIDER_ID = "reddit_comment_by-url"
|
|
370
|
+
SPIDER_NAME = "reddit.com"
|
|
371
|
+
url: str
|
|
372
|
+
days_back: int | None = None
|
|
373
|
+
load_all_replies: str | None = None
|
|
374
|
+
comment_limit: int | None = None
|
thordata/tools/travel.py
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Travel & Real Estate Scraper Tools (Booking, Zillow, Airbnb)
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
|
|
9
|
+
from .base import ToolRequest
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Booking:
|
|
13
|
+
"""Namespace for Booking.com tools."""
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class HotelByUrl(ToolRequest):
|
|
17
|
+
"""Booking Hotel Information Scraper by URL"""
|
|
18
|
+
|
|
19
|
+
SPIDER_ID = "booking_hotellist_by-url"
|
|
20
|
+
SPIDER_NAME = "booking.com"
|
|
21
|
+
url: str
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Zillow:
|
|
25
|
+
"""Namespace for Zillow tools."""
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class PriceByUrl(ToolRequest):
|
|
29
|
+
"""Zillow Property Price History Information Scraper by URL"""
|
|
30
|
+
|
|
31
|
+
SPIDER_ID = "zillow_price_by-url"
|
|
32
|
+
SPIDER_NAME = "zillow.com"
|
|
33
|
+
url: str
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class ProductByUrl(ToolRequest):
|
|
37
|
+
"""Zillow Property Details Information Scraper by URL"""
|
|
38
|
+
|
|
39
|
+
SPIDER_ID = "zillow_product_by-url"
|
|
40
|
+
SPIDER_NAME = "zillow.com"
|
|
41
|
+
url: str
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class ProductByFilter(ToolRequest):
|
|
45
|
+
"""Zillow Property Details Information Scraper by Filter"""
|
|
46
|
+
|
|
47
|
+
SPIDER_ID = "zillow_product_by-filter"
|
|
48
|
+
SPIDER_NAME = "zillow.com"
|
|
49
|
+
keywords_location: str
|
|
50
|
+
listingCategory: str | None = None # For Rent, For Sale
|
|
51
|
+
HomeType: str | None = None # Houses
|
|
52
|
+
days_on_zillow: str | None = None # Any
|
|
53
|
+
maximum: int | None = None
|
|
54
|
+
|
|
55
|
+
@dataclass
|
|
56
|
+
class ProductByListUrl(ToolRequest):
|
|
57
|
+
"""Zillow Property Details Information Scraper by List URL"""
|
|
58
|
+
|
|
59
|
+
SPIDER_ID = "zillow_product_by-listurl"
|
|
60
|
+
SPIDER_NAME = "zillow.com"
|
|
61
|
+
url: str
|
|
62
|
+
maximum: int | None = None
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class Airbnb:
|
|
66
|
+
"""Namespace for Airbnb tools."""
|
|
67
|
+
|
|
68
|
+
@dataclass
|
|
69
|
+
class ProductBySearchUrl(ToolRequest):
|
|
70
|
+
"""Airbnb Properties Information Scraper by Search URL"""
|
|
71
|
+
|
|
72
|
+
SPIDER_ID = "airbnb_product_by-searchurl"
|
|
73
|
+
SPIDER_NAME = "airbnb.com"
|
|
74
|
+
searchurl: str
|
|
75
|
+
country: str | None = None
|
|
76
|
+
|
|
77
|
+
@dataclass
|
|
78
|
+
class ProductByLocation(ToolRequest):
|
|
79
|
+
"""Airbnb Properties Information Scraper by Location"""
|
|
80
|
+
|
|
81
|
+
SPIDER_ID = "airbnb_product_by-location"
|
|
82
|
+
SPIDER_NAME = "airbnb.com"
|
|
83
|
+
location: str
|
|
84
|
+
check_in: str | None = None
|
|
85
|
+
check_out: str | None = None
|
|
86
|
+
num_of_adults: str | None = None
|
|
87
|
+
num_of_children: str | None = None
|
|
88
|
+
num_of_infants: str | None = None
|
|
89
|
+
num_of_pets: str | None = None
|
|
90
|
+
country: str | None = None
|
|
91
|
+
currency: str | None = None
|
|
92
|
+
|
|
93
|
+
@dataclass
|
|
94
|
+
class ProductByUrl(ToolRequest):
|
|
95
|
+
"""Airbnb Properties Information Scraper by URL"""
|
|
96
|
+
|
|
97
|
+
SPIDER_ID = "airbnb_product_by-url"
|
|
98
|
+
SPIDER_NAME = "airbnb.com"
|
|
99
|
+
url: str
|
|
100
|
+
country: str | None = None
|
thordata/tools/video.py
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Video & Audio Scraper Tools (YouTube, etc.)
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
|
|
9
|
+
from ..types.common import CommonSettings
|
|
10
|
+
from .base import ToolRequest, VideoToolRequest
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class YouTube:
|
|
14
|
+
"""Namespace for YouTube tools."""
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class VideoDownload(VideoToolRequest):
|
|
18
|
+
"""YouTube Video File Scraper (Download). Uses video_builder."""
|
|
19
|
+
|
|
20
|
+
SPIDER_ID = "youtube_video_by-url"
|
|
21
|
+
SPIDER_NAME = "youtube.com"
|
|
22
|
+
|
|
23
|
+
url: str # Video URL
|
|
24
|
+
common_settings: CommonSettings = field(default_factory=CommonSettings)
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class AudioDownload(VideoToolRequest):
|
|
28
|
+
"""YouTube Audio File Scraper (Download). Uses video_builder."""
|
|
29
|
+
|
|
30
|
+
SPIDER_ID = "youtube_audio_by-url"
|
|
31
|
+
SPIDER_NAME = "youtube.com"
|
|
32
|
+
|
|
33
|
+
url: str
|
|
34
|
+
common_settings: CommonSettings = field(default_factory=CommonSettings)
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class SubtitleDownload(VideoToolRequest):
|
|
38
|
+
"""YouTube Subtitle File Scraper. Uses video_builder."""
|
|
39
|
+
|
|
40
|
+
SPIDER_ID = "youtube_transcript_by-id"
|
|
41
|
+
SPIDER_NAME = "youtube.com"
|
|
42
|
+
|
|
43
|
+
video_id: str
|
|
44
|
+
subtitles_type: str | None = None # Auto generated / user uploaded
|
|
45
|
+
common_settings: CommonSettings = field(default_factory=CommonSettings)
|
|
46
|
+
|
|
47
|
+
@dataclass
|
|
48
|
+
class Profile(VideoToolRequest):
|
|
49
|
+
"""YouTube Profile Scraper by Keyword. Uses video_builder."""
|
|
50
|
+
|
|
51
|
+
SPIDER_ID = "youtube_profiles_by-keyword"
|
|
52
|
+
SPIDER_NAME = "youtube.com"
|
|
53
|
+
|
|
54
|
+
keyword: str
|
|
55
|
+
page_turning: int = 1
|
|
56
|
+
common_settings: CommonSettings = field(default_factory=CommonSettings)
|
|
57
|
+
|
|
58
|
+
@dataclass
|
|
59
|
+
class ProfileByUrl(VideoToolRequest):
|
|
60
|
+
"""YouTube Profile Scraper by URL. Uses video_builder."""
|
|
61
|
+
|
|
62
|
+
SPIDER_ID = "youtube_profiles_by-url"
|
|
63
|
+
SPIDER_NAME = "youtube.com"
|
|
64
|
+
|
|
65
|
+
url: str # Channel URL
|
|
66
|
+
common_settings: CommonSettings = field(default_factory=CommonSettings)
|
|
67
|
+
|
|
68
|
+
@dataclass
|
|
69
|
+
class Comments(VideoToolRequest):
|
|
70
|
+
"""YouTube Comment Information Scraper. Uses video_builder."""
|
|
71
|
+
|
|
72
|
+
SPIDER_ID = "youtube_comment_by-id"
|
|
73
|
+
SPIDER_NAME = "youtube.com"
|
|
74
|
+
|
|
75
|
+
video_id: str
|
|
76
|
+
num_of_comments: int | None = None
|
|
77
|
+
sort_by: str | None = None # Top comments / Newest first
|
|
78
|
+
common_settings: CommonSettings = field(default_factory=CommonSettings)
|
|
79
|
+
|
|
80
|
+
@dataclass
|
|
81
|
+
class VideoInfo(VideoToolRequest):
|
|
82
|
+
"""YouTube Video Basic Information Scraper. Uses video_builder."""
|
|
83
|
+
|
|
84
|
+
SPIDER_ID = "youtube_product_by-id"
|
|
85
|
+
SPIDER_NAME = "youtube.com"
|
|
86
|
+
|
|
87
|
+
video_id: str
|
|
88
|
+
common_settings: CommonSettings = field(default_factory=CommonSettings)
|
|
89
|
+
|
|
90
|
+
@dataclass
|
|
91
|
+
class VideoPostByUrl(ToolRequest):
|
|
92
|
+
"""YouTube Video Post Scraper by URL. Uses standard builder."""
|
|
93
|
+
|
|
94
|
+
SPIDER_ID = "youtube_video-post_by-url"
|
|
95
|
+
SPIDER_NAME = "youtube.com"
|
|
96
|
+
|
|
97
|
+
url: str # Channel Video URL
|
|
98
|
+
order_by: str | None = None
|
|
99
|
+
start_index: str | None = None
|
|
100
|
+
num_of_posts: str | None = None
|
|
101
|
+
|
|
102
|
+
@dataclass
|
|
103
|
+
class VideoPostBySearchFilters(ToolRequest):
|
|
104
|
+
"""YouTube Video Post Scraper by Search Filters. Uses standard builder."""
|
|
105
|
+
|
|
106
|
+
SPIDER_ID = "youtube_video-post_by-search-filters"
|
|
107
|
+
SPIDER_NAME = "youtube.com"
|
|
108
|
+
|
|
109
|
+
keyword_search: str
|
|
110
|
+
features: str | None = None
|
|
111
|
+
type: str | None = None # Videos
|
|
112
|
+
duration: str | None = None
|
|
113
|
+
upload_date: str | None = None
|
|
114
|
+
num_of_posts: str | None = None
|
|
115
|
+
|
|
116
|
+
@dataclass
|
|
117
|
+
class VideoPostByHashtag(ToolRequest):
|
|
118
|
+
"""YouTube Video Post Scraper by Hashtag. Uses standard builder."""
|
|
119
|
+
|
|
120
|
+
SPIDER_ID = "youtube_video-post_by-hashtag"
|
|
121
|
+
SPIDER_NAME = "youtube.com"
|
|
122
|
+
|
|
123
|
+
hashtag: str
|
|
124
|
+
num_of_posts: str | None = None
|
|
125
|
+
|
|
126
|
+
@dataclass
|
|
127
|
+
class VideoPostByPodcastUrl(ToolRequest):
|
|
128
|
+
"""YouTube Video Post Scraper by Podcast URL. Uses standard builder."""
|
|
129
|
+
|
|
130
|
+
SPIDER_ID = "youtube_video-post_by-podcast-url"
|
|
131
|
+
SPIDER_NAME = "youtube.com"
|
|
132
|
+
|
|
133
|
+
url: str # Playlist URL
|
|
134
|
+
num_of_posts: str | None = None
|
|
135
|
+
|
|
136
|
+
@dataclass
|
|
137
|
+
class VideoPostByKeyword(ToolRequest):
|
|
138
|
+
"""YouTube Video Post Scraper by Keyword. Uses standard builder."""
|
|
139
|
+
|
|
140
|
+
SPIDER_ID = "youtube_video-post_by-keyword"
|
|
141
|
+
SPIDER_NAME = "youtube.com"
|
|
142
|
+
|
|
143
|
+
keyword: str
|
|
144
|
+
num_of_posts: str | None = None
|
|
145
|
+
|
|
146
|
+
@dataclass
|
|
147
|
+
class VideoPostByExplore(ToolRequest):
|
|
148
|
+
"""YouTube Video Post Scraper by Explore URL. Uses standard builder."""
|
|
149
|
+
|
|
150
|
+
SPIDER_ID = "youtube_video-post_by-explore"
|
|
151
|
+
SPIDER_NAME = "youtube.com"
|
|
152
|
+
|
|
153
|
+
url: str
|
|
154
|
+
all_tabs: str | None = None
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Thordata Data Types and Models.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from .common import (
|
|
6
|
+
CommonSettings,
|
|
7
|
+
Continent,
|
|
8
|
+
Country,
|
|
9
|
+
Device,
|
|
10
|
+
OutputFormat,
|
|
11
|
+
ThordataBaseConfig,
|
|
12
|
+
normalize_enum_value,
|
|
13
|
+
)
|
|
14
|
+
from .proxy import (
|
|
15
|
+
ProxyConfig,
|
|
16
|
+
ProxyHost,
|
|
17
|
+
ProxyPort,
|
|
18
|
+
ProxyProduct,
|
|
19
|
+
ProxyServer,
|
|
20
|
+
ProxyType,
|
|
21
|
+
ProxyUser,
|
|
22
|
+
ProxyUserList,
|
|
23
|
+
SessionType,
|
|
24
|
+
StaticISPProxy,
|
|
25
|
+
StickySession,
|
|
26
|
+
)
|
|
27
|
+
from .serp import (
|
|
28
|
+
BingSearchType,
|
|
29
|
+
Engine,
|
|
30
|
+
GoogleSearchType,
|
|
31
|
+
GoogleTbm,
|
|
32
|
+
SerpRequest,
|
|
33
|
+
TimeRange,
|
|
34
|
+
)
|
|
35
|
+
from .task import (
|
|
36
|
+
DataFormat,
|
|
37
|
+
ScraperTaskConfig,
|
|
38
|
+
TaskStatus,
|
|
39
|
+
TaskStatusResponse,
|
|
40
|
+
UsageStatistics,
|
|
41
|
+
VideoTaskConfig,
|
|
42
|
+
)
|
|
43
|
+
from .universal import UniversalScrapeRequest
|
|
44
|
+
|
|
45
|
+
__all__ = [
|
|
46
|
+
"CommonSettings",
|
|
47
|
+
"Device",
|
|
48
|
+
"OutputFormat",
|
|
49
|
+
"ThordataBaseConfig",
|
|
50
|
+
"Continent",
|
|
51
|
+
"Country",
|
|
52
|
+
"normalize_enum_value",
|
|
53
|
+
"ProxyConfig",
|
|
54
|
+
"ProxyProduct",
|
|
55
|
+
"ProxyServer",
|
|
56
|
+
"ProxyType",
|
|
57
|
+
"ProxyUser",
|
|
58
|
+
"ProxyUserList",
|
|
59
|
+
"SessionType",
|
|
60
|
+
"StaticISPProxy",
|
|
61
|
+
"StickySession",
|
|
62
|
+
"ProxyHost",
|
|
63
|
+
"ProxyPort",
|
|
64
|
+
"BingSearchType",
|
|
65
|
+
"Engine",
|
|
66
|
+
"GoogleSearchType",
|
|
67
|
+
"GoogleTbm",
|
|
68
|
+
"SerpRequest",
|
|
69
|
+
"TimeRange",
|
|
70
|
+
"DataFormat",
|
|
71
|
+
"ScraperTaskConfig",
|
|
72
|
+
"TaskStatus",
|
|
73
|
+
"TaskStatusResponse",
|
|
74
|
+
"UsageStatistics",
|
|
75
|
+
"VideoTaskConfig",
|
|
76
|
+
"UniversalScrapeRequest",
|
|
77
|
+
]
|