thordata-sdk 1.5.0__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thordata/__init__.py +1 -1
- thordata/async_client.py +55 -13
- thordata/client.py +64 -13
- thordata/enums.py +2 -2
- thordata/exceptions.py +80 -20
- thordata/models.py +1 -1
- thordata/retry.py +1 -1
- thordata/tools/__init__.py +11 -1
- thordata/tools/code.py +17 -4
- thordata/tools/ecommerce.py +194 -10
- thordata/tools/professional.py +155 -0
- thordata/tools/search.py +47 -5
- thordata/tools/social.py +225 -41
- thordata/tools/travel.py +100 -0
- thordata/tools/video.py +80 -7
- thordata/types/serp.py +6 -2
- thordata/types/task.py +75 -9
- thordata/types/universal.py +37 -5
- {thordata_sdk-1.5.0.dist-info → thordata_sdk-1.7.0.dist-info}/METADATA +63 -7
- thordata_sdk-1.7.0.dist-info/RECORD +35 -0
- {thordata_sdk-1.5.0.dist-info → thordata_sdk-1.7.0.dist-info}/WHEEL +1 -1
- thordata/_example_utils.py +0 -77
- thordata/demo.py +0 -138
- thordata_sdk-1.5.0.dist-info/RECORD +0 -35
- {thordata_sdk-1.5.0.dist-info → thordata_sdk-1.7.0.dist-info}/licenses/LICENSE +0 -0
- {thordata_sdk-1.5.0.dist-info → thordata_sdk-1.7.0.dist-info}/top_level.txt +0 -0
thordata/tools/social.py
CHANGED
|
@@ -12,12 +12,47 @@ from .base import ToolRequest
|
|
|
12
12
|
class TikTok:
|
|
13
13
|
@dataclass
|
|
14
14
|
class Post(ToolRequest):
|
|
15
|
-
"""TikTok Post Information Scraper"""
|
|
15
|
+
"""TikTok Post Information Scraper by URL"""
|
|
16
16
|
|
|
17
17
|
SPIDER_ID = "tiktok_posts_by-url"
|
|
18
18
|
SPIDER_NAME = "tiktok.com"
|
|
19
19
|
url: str
|
|
20
|
-
|
|
20
|
+
country: str | None = None
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class PostsByKeywords(ToolRequest):
|
|
24
|
+
"""TikTok Post Information Scraper by Keywords"""
|
|
25
|
+
|
|
26
|
+
SPIDER_ID = "tiktok_posts_by-keywords"
|
|
27
|
+
SPIDER_NAME = "tiktok.com"
|
|
28
|
+
search_keyword: str
|
|
29
|
+
num_of_posts: int | None = None
|
|
30
|
+
posts_to_not_include: str | None = None
|
|
31
|
+
country: str | None = None
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class PostsByProfileUrl(ToolRequest):
|
|
35
|
+
"""TikTok Post Information Scraper by Profile URL"""
|
|
36
|
+
|
|
37
|
+
SPIDER_ID = "tiktok_posts_by-profileurl"
|
|
38
|
+
SPIDER_NAME = "tiktok.com"
|
|
39
|
+
url: str
|
|
40
|
+
start_date: str | None = None
|
|
41
|
+
end_date: str | None = None
|
|
42
|
+
num_of_posts: int | None = None
|
|
43
|
+
what_to_collect: str | None = None
|
|
44
|
+
post_type: str | None = None
|
|
45
|
+
posts_to_not_include: str | None = None
|
|
46
|
+
country: str | None = None
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class PostsByListUrl(ToolRequest):
|
|
50
|
+
"""TikTok Post Information Scraper by List URL"""
|
|
51
|
+
|
|
52
|
+
SPIDER_ID = "tiktok_posts_by-listurl"
|
|
53
|
+
SPIDER_NAME = "tiktok.com"
|
|
54
|
+
url: str
|
|
55
|
+
num_of_posts: int | None = None
|
|
21
56
|
|
|
22
57
|
@dataclass
|
|
23
58
|
class Comment(ToolRequest):
|
|
@@ -30,33 +65,62 @@ class TikTok:
|
|
|
30
65
|
|
|
31
66
|
@dataclass
|
|
32
67
|
class Profile(ToolRequest):
|
|
33
|
-
"""TikTok Profile Information Scraper"""
|
|
68
|
+
"""TikTok Profile Information Scraper by URL"""
|
|
34
69
|
|
|
35
70
|
SPIDER_ID = "tiktok_profiles_by-url"
|
|
36
71
|
SPIDER_NAME = "tiktok.com"
|
|
37
|
-
|
|
38
72
|
url: str # Profile URL (e.g. https://www.tiktok.com/@user)
|
|
39
|
-
|
|
73
|
+
country: str | None = None
|
|
74
|
+
|
|
75
|
+
@dataclass
|
|
76
|
+
class ProfilesByListUrl(ToolRequest):
|
|
77
|
+
"""TikTok Profile Information Scraper by List URL"""
|
|
40
78
|
|
|
79
|
+
SPIDER_ID = "tiktok_profiles_by-listurl"
|
|
80
|
+
SPIDER_NAME = "tiktok.com"
|
|
81
|
+
search_url: str
|
|
41
82
|
country: str | None = None
|
|
42
83
|
page_turning: int | None = None
|
|
43
84
|
|
|
44
85
|
@dataclass
|
|
45
86
|
class Shop(ToolRequest):
|
|
46
|
-
"""TikTok Shop Information Scraper"""
|
|
87
|
+
"""TikTok Shop Information Scraper by URL"""
|
|
47
88
|
|
|
48
89
|
SPIDER_ID = "tiktok_shop_by-url"
|
|
49
90
|
SPIDER_NAME = "tiktok.com"
|
|
50
91
|
url: str
|
|
51
|
-
|
|
52
|
-
|
|
92
|
+
|
|
93
|
+
@dataclass
|
|
94
|
+
class ShopByCategoryUrl(ToolRequest):
|
|
95
|
+
"""TikTok Shop Information Scraper by Category URL"""
|
|
96
|
+
|
|
97
|
+
SPIDER_ID = "tiktok_shop_by-category-url"
|
|
98
|
+
SPIDER_NAME = "tiktok.com"
|
|
99
|
+
category_url: str
|
|
100
|
+
|
|
101
|
+
@dataclass
|
|
102
|
+
class ShopByKeywords(ToolRequest):
|
|
103
|
+
"""TikTok Shop Information Scraper by Keywords"""
|
|
104
|
+
|
|
105
|
+
SPIDER_ID = "tiktok_shop_by-keywords"
|
|
106
|
+
SPIDER_NAME = "tiktok.com"
|
|
107
|
+
keyword: str
|
|
108
|
+
domain: str = "https://www.tiktok.com/shop"
|
|
53
109
|
page_turning: int | None = None
|
|
54
110
|
|
|
55
111
|
|
|
56
112
|
class Facebook:
|
|
113
|
+
@dataclass
|
|
114
|
+
class PostDetails(ToolRequest):
|
|
115
|
+
"""Facebook Post Details Scraper"""
|
|
116
|
+
|
|
117
|
+
SPIDER_ID = "facebook_post_by-posts-url"
|
|
118
|
+
SPIDER_NAME = "facebook.com"
|
|
119
|
+
url: str
|
|
120
|
+
|
|
57
121
|
@dataclass
|
|
58
122
|
class Posts(ToolRequest):
|
|
59
|
-
"""Facebook Posts Scraper"""
|
|
123
|
+
"""Facebook Posts Scraper by Keywords"""
|
|
60
124
|
|
|
61
125
|
SPIDER_ID = "facebook_post_by-keywords"
|
|
62
126
|
SPIDER_NAME = "facebook.com"
|
|
@@ -66,27 +130,70 @@ class Facebook:
|
|
|
66
130
|
number: int = 10
|
|
67
131
|
|
|
68
132
|
@dataclass
|
|
69
|
-
class
|
|
70
|
-
"""Facebook
|
|
133
|
+
class EventByEventListUrl(ToolRequest):
|
|
134
|
+
"""Facebook Events Scraper by Event List URL"""
|
|
71
135
|
|
|
72
|
-
SPIDER_ID = "
|
|
136
|
+
SPIDER_ID = "facebook_event_by-eventlist-url"
|
|
137
|
+
SPIDER_NAME = "facebook.com"
|
|
138
|
+
url: str
|
|
139
|
+
upcoming_events_only: str | None = None
|
|
140
|
+
|
|
141
|
+
@dataclass
|
|
142
|
+
class EventBySearchUrl(ToolRequest):
|
|
143
|
+
"""Facebook Events Scraper by Search URL"""
|
|
144
|
+
|
|
145
|
+
SPIDER_ID = "facebook_event_by-search-url"
|
|
146
|
+
SPIDER_NAME = "facebook.com"
|
|
147
|
+
url: str
|
|
148
|
+
|
|
149
|
+
@dataclass
|
|
150
|
+
class EventByEventsUrl(ToolRequest):
|
|
151
|
+
"""Facebook Events Scraper by Events URL"""
|
|
152
|
+
|
|
153
|
+
SPIDER_ID = "facebook_event_by-events-url"
|
|
154
|
+
SPIDER_NAME = "facebook.com"
|
|
155
|
+
url: str
|
|
156
|
+
|
|
157
|
+
@dataclass
|
|
158
|
+
class Profile(ToolRequest):
|
|
159
|
+
"""Facebook Profile Scraper"""
|
|
160
|
+
|
|
161
|
+
SPIDER_ID = "facebook_profile_by-profiles-url"
|
|
162
|
+
SPIDER_NAME = "facebook.com"
|
|
163
|
+
url: str
|
|
164
|
+
|
|
165
|
+
@dataclass
|
|
166
|
+
class Comment(ToolRequest):
|
|
167
|
+
"""Facebook Post Comments Scraper"""
|
|
168
|
+
|
|
169
|
+
SPIDER_ID = "facebook_comment_by-comments-url"
|
|
73
170
|
SPIDER_NAME = "facebook.com"
|
|
74
171
|
url: str
|
|
172
|
+
get_all_replies: str | None = None
|
|
173
|
+
limit_records: str | None = None
|
|
174
|
+
comments_sort: str | None = None # All comments
|
|
75
175
|
|
|
76
176
|
|
|
77
177
|
class Instagram:
|
|
78
178
|
@dataclass
|
|
79
179
|
class Profile(ToolRequest):
|
|
80
|
-
"""Instagram Profile Scraper"""
|
|
180
|
+
"""Instagram Profile Scraper by Username"""
|
|
81
181
|
|
|
82
182
|
SPIDER_ID = "ins_profiles_by-username"
|
|
83
183
|
SPIDER_NAME = "instagram.com"
|
|
84
184
|
username: str
|
|
85
|
-
|
|
185
|
+
|
|
186
|
+
@dataclass
|
|
187
|
+
class ProfileByUrl(ToolRequest):
|
|
188
|
+
"""Instagram Profile Scraper by Profile URL"""
|
|
189
|
+
|
|
190
|
+
SPIDER_ID = "ins_profiles_by-profileurl"
|
|
191
|
+
SPIDER_NAME = "instagram.com"
|
|
192
|
+
profileurl: str
|
|
86
193
|
|
|
87
194
|
@dataclass
|
|
88
195
|
class Post(ToolRequest):
|
|
89
|
-
"""Instagram Post Information Scraper"""
|
|
196
|
+
"""Instagram Post Information Scraper by Profile URL"""
|
|
90
197
|
|
|
91
198
|
SPIDER_ID = "ins_posts_by-profileurl"
|
|
92
199
|
SPIDER_NAME = "instagram.com"
|
|
@@ -96,14 +203,45 @@ class Instagram:
|
|
|
96
203
|
end_date: str | None = None
|
|
97
204
|
post_type: str | None = None # Post or Reel
|
|
98
205
|
|
|
206
|
+
@dataclass
|
|
207
|
+
class PostByUrl(ToolRequest):
|
|
208
|
+
"""Instagram Post Information Scraper by Post URL"""
|
|
209
|
+
|
|
210
|
+
SPIDER_ID = "ins_posts_by-posturl"
|
|
211
|
+
SPIDER_NAME = "instagram.com"
|
|
212
|
+
posturl: str
|
|
213
|
+
|
|
99
214
|
@dataclass
|
|
100
215
|
class Reel(ToolRequest):
|
|
101
|
-
"""Instagram Reel Information Scraper"""
|
|
216
|
+
"""Instagram Reel Information Scraper by URL"""
|
|
102
217
|
|
|
103
218
|
SPIDER_ID = "ins_reel_by-url"
|
|
104
219
|
SPIDER_NAME = "instagram.com"
|
|
105
220
|
url: str
|
|
221
|
+
|
|
222
|
+
@dataclass
|
|
223
|
+
class AllReel(ToolRequest):
|
|
224
|
+
"""Instagram All Reel Information Scraper by URL"""
|
|
225
|
+
|
|
226
|
+
SPIDER_ID = "ins_allreel_by-url"
|
|
227
|
+
SPIDER_NAME = "instagram.com"
|
|
228
|
+
url: str
|
|
229
|
+
num_of_posts: int | None = None
|
|
230
|
+
posts_to_not_include: str | None = None
|
|
231
|
+
start_date: str | None = None
|
|
232
|
+
end_date: str | None = None
|
|
233
|
+
|
|
234
|
+
@dataclass
|
|
235
|
+
class ReelByListUrl(ToolRequest):
|
|
236
|
+
"""Instagram Reel Information Scraper by List URL"""
|
|
237
|
+
|
|
238
|
+
SPIDER_ID = "ins_reel_by-listurl"
|
|
239
|
+
SPIDER_NAME = "instagram.com"
|
|
240
|
+
url: str
|
|
106
241
|
num_of_posts: int | None = None
|
|
242
|
+
posts_to_not_include: str | None = None
|
|
243
|
+
start_date: str | None = None
|
|
244
|
+
end_date: str | None = None
|
|
107
245
|
|
|
108
246
|
@dataclass
|
|
109
247
|
class Comment(ToolRequest):
|
|
@@ -117,30 +255,35 @@ class Instagram:
|
|
|
117
255
|
class Twitter:
|
|
118
256
|
@dataclass
|
|
119
257
|
class Profile(ToolRequest):
|
|
120
|
-
"""Twitter(X) Profile Scraper"""
|
|
258
|
+
"""Twitter(X) Profile Scraper by Profile URL"""
|
|
121
259
|
|
|
122
|
-
SPIDER_ID = "
|
|
123
|
-
SPIDER_NAME = "
|
|
260
|
+
SPIDER_ID = "twitter_profile_by-profileurl"
|
|
261
|
+
SPIDER_NAME = "x.com"
|
|
124
262
|
url: str
|
|
125
|
-
max_number_of_posts: int | None = None
|
|
126
|
-
user_name: str | None = None
|
|
127
263
|
|
|
128
264
|
@dataclass
|
|
129
|
-
class
|
|
130
|
-
"""
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
"""
|
|
265
|
+
class ProfileByUsername(ToolRequest):
|
|
266
|
+
"""Twitter(X) Profile Scraper by Username"""
|
|
267
|
+
|
|
268
|
+
SPIDER_ID = "twitter_profile_by-username"
|
|
269
|
+
SPIDER_NAME = "x.com"
|
|
270
|
+
user_name: str
|
|
136
271
|
|
|
137
|
-
|
|
138
|
-
|
|
272
|
+
@dataclass
|
|
273
|
+
class Post(ToolRequest):
|
|
274
|
+
"""Twitter(X) Post Information Scraper by Post URL"""
|
|
139
275
|
|
|
276
|
+
SPIDER_ID = "twitter_post_by-posturl"
|
|
277
|
+
SPIDER_NAME = "x.com"
|
|
140
278
|
url: str # Post URL (e.g. https://x.com/user/status/123)
|
|
141
279
|
|
|
142
|
-
|
|
143
|
-
|
|
280
|
+
@dataclass
|
|
281
|
+
class PostByProfileUrl(ToolRequest):
|
|
282
|
+
"""Twitter(X) Post Information Scraper by Profile URL"""
|
|
283
|
+
|
|
284
|
+
SPIDER_ID = "twitter_post_by-profileurl"
|
|
285
|
+
SPIDER_NAME = "x.com"
|
|
286
|
+
url: str # Profile URL
|
|
144
287
|
|
|
145
288
|
|
|
146
289
|
class LinkedIn:
|
|
@@ -154,30 +297,70 @@ class LinkedIn:
|
|
|
154
297
|
|
|
155
298
|
@dataclass
|
|
156
299
|
class Jobs(ToolRequest):
|
|
157
|
-
"""LinkedIn Job Listing Scraper"""
|
|
300
|
+
"""LinkedIn Job Listing Scraper by Job Listing URL"""
|
|
158
301
|
|
|
159
302
|
SPIDER_ID = "linkedin_job_listings_information_by-job-listing-url"
|
|
160
303
|
SPIDER_NAME = "linkedin.com"
|
|
161
304
|
job_listing_url: str
|
|
305
|
+
page_turning: int | None = None
|
|
306
|
+
|
|
307
|
+
@dataclass
|
|
308
|
+
class JobByUrl(ToolRequest):
|
|
309
|
+
"""LinkedIn Job Listing Scraper by Job URL"""
|
|
310
|
+
|
|
311
|
+
SPIDER_ID = "linkedin_job_listings_information_by-job-url"
|
|
312
|
+
SPIDER_NAME = "linkedin.com"
|
|
313
|
+
job_url: str
|
|
314
|
+
|
|
315
|
+
@dataclass
|
|
316
|
+
class JobByKeyword(ToolRequest):
|
|
317
|
+
"""LinkedIn Job Listing Scraper by Keyword"""
|
|
318
|
+
|
|
319
|
+
SPIDER_ID = "linkedin_job_listings_information_by-keyword"
|
|
320
|
+
SPIDER_NAME = "linkedin.com"
|
|
162
321
|
location: str
|
|
163
|
-
|
|
322
|
+
keyword: str
|
|
323
|
+
time_range: str | None = None
|
|
324
|
+
experience_level: str | None = None
|
|
325
|
+
job_type: str | None = None
|
|
326
|
+
remote: str | None = None
|
|
327
|
+
company: str | None = None
|
|
328
|
+
selective_search: str | None = None
|
|
329
|
+
jobs_to_not_include: str | None = None
|
|
330
|
+
location_radius: str | None = None
|
|
164
331
|
page_turning: int | None = None
|
|
165
|
-
keyword: str | None = None
|
|
166
|
-
remote: str | None = None # On_site, Remote, Hybrid
|
|
167
332
|
|
|
168
333
|
|
|
169
334
|
class Reddit:
|
|
170
335
|
@dataclass
|
|
171
336
|
class Posts(ToolRequest):
|
|
172
|
-
"""Reddit Post Information Scraper"""
|
|
337
|
+
"""Reddit Post Information Scraper by URL"""
|
|
173
338
|
|
|
174
339
|
SPIDER_ID = "reddit_posts_by-url"
|
|
175
340
|
SPIDER_NAME = "reddit.com"
|
|
176
341
|
url: str
|
|
177
|
-
|
|
178
|
-
|
|
342
|
+
|
|
343
|
+
@dataclass
|
|
344
|
+
class PostsByKeywords(ToolRequest):
|
|
345
|
+
"""Reddit Post Information Scraper by Keywords"""
|
|
346
|
+
|
|
347
|
+
SPIDER_ID = "reddit_posts_by-keywords"
|
|
348
|
+
SPIDER_NAME = "reddit.com"
|
|
349
|
+
keyword: str
|
|
350
|
+
date: str | None = None # All time
|
|
351
|
+
num_of_posts: int | None = None
|
|
352
|
+
sort_by: str | None = None
|
|
353
|
+
|
|
354
|
+
@dataclass
|
|
355
|
+
class PostsBySubredditUrl(ToolRequest):
|
|
356
|
+
"""Reddit Post Information Scraper by Subreddit URL"""
|
|
357
|
+
|
|
358
|
+
SPIDER_ID = "reddit_posts_by-subredditurl"
|
|
359
|
+
SPIDER_NAME = "reddit.com"
|
|
360
|
+
url: str
|
|
361
|
+
sort_by: str | None = None
|
|
179
362
|
num_of_posts: int | None = None
|
|
180
|
-
|
|
363
|
+
sort_by_time: str | None = None # All Time
|
|
181
364
|
|
|
182
365
|
@dataclass
|
|
183
366
|
class Comment(ToolRequest):
|
|
@@ -187,4 +370,5 @@ class Reddit:
|
|
|
187
370
|
SPIDER_NAME = "reddit.com"
|
|
188
371
|
url: str
|
|
189
372
|
days_back: int | None = None
|
|
190
|
-
load_all_replies:
|
|
373
|
+
load_all_replies: str | None = None
|
|
374
|
+
comment_limit: int | None = None
|
thordata/tools/travel.py
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Travel & Real Estate Scraper Tools (Booking, Zillow, Airbnb)
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
|
|
9
|
+
from .base import ToolRequest
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Booking:
|
|
13
|
+
"""Namespace for Booking.com tools."""
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class HotelByUrl(ToolRequest):
|
|
17
|
+
"""Booking Hotel Information Scraper by URL"""
|
|
18
|
+
|
|
19
|
+
SPIDER_ID = "booking_hotellist_by-url"
|
|
20
|
+
SPIDER_NAME = "booking.com"
|
|
21
|
+
url: str
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Zillow:
|
|
25
|
+
"""Namespace for Zillow tools."""
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class PriceByUrl(ToolRequest):
|
|
29
|
+
"""Zillow Property Price History Information Scraper by URL"""
|
|
30
|
+
|
|
31
|
+
SPIDER_ID = "zillow_price_by-url"
|
|
32
|
+
SPIDER_NAME = "zillow.com"
|
|
33
|
+
url: str
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class ProductByUrl(ToolRequest):
|
|
37
|
+
"""Zillow Property Details Information Scraper by URL"""
|
|
38
|
+
|
|
39
|
+
SPIDER_ID = "zillow_product_by-url"
|
|
40
|
+
SPIDER_NAME = "zillow.com"
|
|
41
|
+
url: str
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class ProductByFilter(ToolRequest):
|
|
45
|
+
"""Zillow Property Details Information Scraper by Filter"""
|
|
46
|
+
|
|
47
|
+
SPIDER_ID = "zillow_product_by-filter"
|
|
48
|
+
SPIDER_NAME = "zillow.com"
|
|
49
|
+
keywords_location: str
|
|
50
|
+
listingCategory: str | None = None # For Rent, For Sale
|
|
51
|
+
HomeType: str | None = None # Houses
|
|
52
|
+
days_on_zillow: str | None = None # Any
|
|
53
|
+
maximum: int | None = None
|
|
54
|
+
|
|
55
|
+
@dataclass
|
|
56
|
+
class ProductByListUrl(ToolRequest):
|
|
57
|
+
"""Zillow Property Details Information Scraper by List URL"""
|
|
58
|
+
|
|
59
|
+
SPIDER_ID = "zillow_product_by-listurl"
|
|
60
|
+
SPIDER_NAME = "zillow.com"
|
|
61
|
+
url: str
|
|
62
|
+
maximum: int | None = None
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class Airbnb:
|
|
66
|
+
"""Namespace for Airbnb tools."""
|
|
67
|
+
|
|
68
|
+
@dataclass
|
|
69
|
+
class ProductBySearchUrl(ToolRequest):
|
|
70
|
+
"""Airbnb Properties Information Scraper by Search URL"""
|
|
71
|
+
|
|
72
|
+
SPIDER_ID = "airbnb_product_by-searchurl"
|
|
73
|
+
SPIDER_NAME = "airbnb.com"
|
|
74
|
+
searchurl: str
|
|
75
|
+
country: str | None = None
|
|
76
|
+
|
|
77
|
+
@dataclass
|
|
78
|
+
class ProductByLocation(ToolRequest):
|
|
79
|
+
"""Airbnb Properties Information Scraper by Location"""
|
|
80
|
+
|
|
81
|
+
SPIDER_ID = "airbnb_product_by-location"
|
|
82
|
+
SPIDER_NAME = "airbnb.com"
|
|
83
|
+
location: str
|
|
84
|
+
check_in: str | None = None
|
|
85
|
+
check_out: str | None = None
|
|
86
|
+
num_of_adults: str | None = None
|
|
87
|
+
num_of_children: str | None = None
|
|
88
|
+
num_of_infants: str | None = None
|
|
89
|
+
num_of_pets: str | None = None
|
|
90
|
+
country: str | None = None
|
|
91
|
+
currency: str | None = None
|
|
92
|
+
|
|
93
|
+
@dataclass
|
|
94
|
+
class ProductByUrl(ToolRequest):
|
|
95
|
+
"""Airbnb Properties Information Scraper by URL"""
|
|
96
|
+
|
|
97
|
+
SPIDER_ID = "airbnb_product_by-url"
|
|
98
|
+
SPIDER_NAME = "airbnb.com"
|
|
99
|
+
url: str
|
|
100
|
+
country: str | None = None
|
thordata/tools/video.py
CHANGED
|
@@ -46,14 +46,23 @@ class YouTube:
|
|
|
46
46
|
|
|
47
47
|
@dataclass
|
|
48
48
|
class Profile(VideoToolRequest):
|
|
49
|
-
"""YouTube Profile Scraper. Uses video_builder."""
|
|
49
|
+
"""YouTube Profile Scraper by Keyword. Uses video_builder."""
|
|
50
50
|
|
|
51
51
|
SPIDER_ID = "youtube_profiles_by-keyword"
|
|
52
52
|
SPIDER_NAME = "youtube.com"
|
|
53
53
|
|
|
54
|
-
|
|
54
|
+
keyword: str
|
|
55
55
|
page_turning: int = 1
|
|
56
|
-
|
|
56
|
+
common_settings: CommonSettings = field(default_factory=CommonSettings)
|
|
57
|
+
|
|
58
|
+
@dataclass
|
|
59
|
+
class ProfileByUrl(VideoToolRequest):
|
|
60
|
+
"""YouTube Profile Scraper by URL. Uses video_builder."""
|
|
61
|
+
|
|
62
|
+
SPIDER_ID = "youtube_profiles_by-url"
|
|
63
|
+
SPIDER_NAME = "youtube.com"
|
|
64
|
+
|
|
65
|
+
url: str # Channel URL
|
|
57
66
|
common_settings: CommonSettings = field(default_factory=CommonSettings)
|
|
58
67
|
|
|
59
68
|
@dataclass
|
|
@@ -69,13 +78,77 @@ class YouTube:
|
|
|
69
78
|
common_settings: CommonSettings = field(default_factory=CommonSettings)
|
|
70
79
|
|
|
71
80
|
@dataclass
|
|
72
|
-
class VideoInfo(
|
|
73
|
-
"""YouTube Video
|
|
81
|
+
class VideoInfo(VideoToolRequest):
|
|
82
|
+
"""YouTube Video Basic Information Scraper. Uses video_builder."""
|
|
83
|
+
|
|
84
|
+
SPIDER_ID = "youtube_product_by-id"
|
|
85
|
+
SPIDER_NAME = "youtube.com"
|
|
86
|
+
|
|
87
|
+
video_id: str
|
|
88
|
+
common_settings: CommonSettings = field(default_factory=CommonSettings)
|
|
89
|
+
|
|
90
|
+
@dataclass
|
|
91
|
+
class VideoPostByUrl(ToolRequest):
|
|
92
|
+
"""YouTube Video Post Scraper by URL. Uses standard builder."""
|
|
74
93
|
|
|
75
|
-
# Note: This one does NOT inherit from VideoToolRequest because it uses the standard builder
|
|
76
|
-
# and doesn't support common_settings in the same way.
|
|
77
94
|
SPIDER_ID = "youtube_video-post_by-url"
|
|
78
95
|
SPIDER_NAME = "youtube.com"
|
|
79
96
|
|
|
80
97
|
url: str # Channel Video URL
|
|
98
|
+
order_by: str | None = None
|
|
99
|
+
start_index: str | None = None
|
|
81
100
|
num_of_posts: str | None = None
|
|
101
|
+
|
|
102
|
+
@dataclass
|
|
103
|
+
class VideoPostBySearchFilters(ToolRequest):
|
|
104
|
+
"""YouTube Video Post Scraper by Search Filters. Uses standard builder."""
|
|
105
|
+
|
|
106
|
+
SPIDER_ID = "youtube_video-post_by-search-filters"
|
|
107
|
+
SPIDER_NAME = "youtube.com"
|
|
108
|
+
|
|
109
|
+
keyword_search: str
|
|
110
|
+
features: str | None = None
|
|
111
|
+
type: str | None = None # Videos
|
|
112
|
+
duration: str | None = None
|
|
113
|
+
upload_date: str | None = None
|
|
114
|
+
num_of_posts: str | None = None
|
|
115
|
+
|
|
116
|
+
@dataclass
|
|
117
|
+
class VideoPostByHashtag(ToolRequest):
|
|
118
|
+
"""YouTube Video Post Scraper by Hashtag. Uses standard builder."""
|
|
119
|
+
|
|
120
|
+
SPIDER_ID = "youtube_video-post_by-hashtag"
|
|
121
|
+
SPIDER_NAME = "youtube.com"
|
|
122
|
+
|
|
123
|
+
hashtag: str
|
|
124
|
+
num_of_posts: str | None = None
|
|
125
|
+
|
|
126
|
+
@dataclass
|
|
127
|
+
class VideoPostByPodcastUrl(ToolRequest):
|
|
128
|
+
"""YouTube Video Post Scraper by Podcast URL. Uses standard builder."""
|
|
129
|
+
|
|
130
|
+
SPIDER_ID = "youtube_video-post_by-podcast-url"
|
|
131
|
+
SPIDER_NAME = "youtube.com"
|
|
132
|
+
|
|
133
|
+
url: str # Playlist URL
|
|
134
|
+
num_of_posts: str | None = None
|
|
135
|
+
|
|
136
|
+
@dataclass
|
|
137
|
+
class VideoPostByKeyword(ToolRequest):
|
|
138
|
+
"""YouTube Video Post Scraper by Keyword. Uses standard builder."""
|
|
139
|
+
|
|
140
|
+
SPIDER_ID = "youtube_video-post_by-keyword"
|
|
141
|
+
SPIDER_NAME = "youtube.com"
|
|
142
|
+
|
|
143
|
+
keyword: str
|
|
144
|
+
num_of_posts: str | None = None
|
|
145
|
+
|
|
146
|
+
@dataclass
|
|
147
|
+
class VideoPostByExplore(ToolRequest):
|
|
148
|
+
"""YouTube Video Post Scraper by Explore URL. Uses standard builder."""
|
|
149
|
+
|
|
150
|
+
SPIDER_ID = "youtube_video-post_by-explore"
|
|
151
|
+
SPIDER_NAME = "youtube.com"
|
|
152
|
+
|
|
153
|
+
url: str
|
|
154
|
+
all_tabs: str | None = None
|
thordata/types/serp.py
CHANGED
|
@@ -117,7 +117,7 @@ class SerpRequest(ThordataBaseConfig):
|
|
|
117
117
|
render_js: bool | None = None
|
|
118
118
|
no_cache: bool | None = None
|
|
119
119
|
|
|
120
|
-
# Output
|
|
120
|
+
# Output format: "json" (json=1), "html" (json=3), "light_json" (json=4), or "both" (json=2)
|
|
121
121
|
output_format: str = "json"
|
|
122
122
|
|
|
123
123
|
# Advanced Google
|
|
@@ -155,13 +155,17 @@ class SerpRequest(ThordataBaseConfig):
|
|
|
155
155
|
}
|
|
156
156
|
|
|
157
157
|
# JSON output handling
|
|
158
|
+
# Dashboard mapping: json=1 (json), json=3 (html), json=4 (light json), json=2 (both)
|
|
158
159
|
fmt = self.output_format.lower()
|
|
159
160
|
if fmt == "json":
|
|
160
161
|
payload["json"] = "1"
|
|
161
162
|
elif fmt == "html":
|
|
162
|
-
|
|
163
|
+
payload["json"] = "3"
|
|
164
|
+
elif fmt in ("light_json", "light-json", "lightjson"):
|
|
165
|
+
payload["json"] = "4"
|
|
163
166
|
elif fmt in ("2", "both", "json+html"):
|
|
164
167
|
payload["json"] = "2"
|
|
168
|
+
# If no json param is set, default to HTML (legacy behavior)
|
|
165
169
|
|
|
166
170
|
# Query param handling
|
|
167
171
|
if engine == "yandex":
|