thordata-sdk 1.5.0__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
thordata/tools/social.py CHANGED
@@ -12,12 +12,47 @@ from .base import ToolRequest
12
12
  class TikTok:
13
13
  @dataclass
14
14
  class Post(ToolRequest):
15
- """TikTok Post Information Scraper"""
15
+ """TikTok Post Information Scraper by URL"""
16
16
 
17
17
  SPIDER_ID = "tiktok_posts_by-url"
18
18
  SPIDER_NAME = "tiktok.com"
19
19
  url: str
20
- page_turning: int | None = None
20
+ country: str | None = None
21
+
22
+ @dataclass
23
+ class PostsByKeywords(ToolRequest):
24
+ """TikTok Post Information Scraper by Keywords"""
25
+
26
+ SPIDER_ID = "tiktok_posts_by-keywords"
27
+ SPIDER_NAME = "tiktok.com"
28
+ search_keyword: str
29
+ num_of_posts: int | None = None
30
+ posts_to_not_include: str | None = None
31
+ country: str | None = None
32
+
33
+ @dataclass
34
+ class PostsByProfileUrl(ToolRequest):
35
+ """TikTok Post Information Scraper by Profile URL"""
36
+
37
+ SPIDER_ID = "tiktok_posts_by-profileurl"
38
+ SPIDER_NAME = "tiktok.com"
39
+ url: str
40
+ start_date: str | None = None
41
+ end_date: str | None = None
42
+ num_of_posts: int | None = None
43
+ what_to_collect: str | None = None
44
+ post_type: str | None = None
45
+ posts_to_not_include: str | None = None
46
+ country: str | None = None
47
+
48
+ @dataclass
49
+ class PostsByListUrl(ToolRequest):
50
+ """TikTok Post Information Scraper by List URL"""
51
+
52
+ SPIDER_ID = "tiktok_posts_by-listurl"
53
+ SPIDER_NAME = "tiktok.com"
54
+ url: str
55
+ num_of_posts: int | None = None
21
56
 
22
57
  @dataclass
23
58
  class Comment(ToolRequest):
@@ -30,33 +65,62 @@ class TikTok:
30
65
 
31
66
  @dataclass
32
67
  class Profile(ToolRequest):
33
- """TikTok Profile Information Scraper"""
68
+ """TikTok Profile Information Scraper by URL"""
34
69
 
35
70
  SPIDER_ID = "tiktok_profiles_by-url"
36
71
  SPIDER_NAME = "tiktok.com"
37
-
38
72
  url: str # Profile URL (e.g. https://www.tiktok.com/@user)
39
- search_url: str | None = None
73
+ country: str | None = None
74
+
75
+ @dataclass
76
+ class ProfilesByListUrl(ToolRequest):
77
+ """TikTok Profile Information Scraper by List URL"""
40
78
 
79
+ SPIDER_ID = "tiktok_profiles_by-listurl"
80
+ SPIDER_NAME = "tiktok.com"
81
+ search_url: str
41
82
  country: str | None = None
42
83
  page_turning: int | None = None
43
84
 
44
85
  @dataclass
45
86
  class Shop(ToolRequest):
46
- """TikTok Shop Information Scraper"""
87
+ """TikTok Shop Information Scraper by URL"""
47
88
 
48
89
  SPIDER_ID = "tiktok_shop_by-url"
49
90
  SPIDER_NAME = "tiktok.com"
50
91
  url: str
51
- category_url: str | None = None
52
- keyword: str | None = None
92
+
93
+ @dataclass
94
+ class ShopByCategoryUrl(ToolRequest):
95
+ """TikTok Shop Information Scraper by Category URL"""
96
+
97
+ SPIDER_ID = "tiktok_shop_by-category-url"
98
+ SPIDER_NAME = "tiktok.com"
99
+ category_url: str
100
+
101
+ @dataclass
102
+ class ShopByKeywords(ToolRequest):
103
+ """TikTok Shop Information Scraper by Keywords"""
104
+
105
+ SPIDER_ID = "tiktok_shop_by-keywords"
106
+ SPIDER_NAME = "tiktok.com"
107
+ keyword: str
108
+ domain: str = "https://www.tiktok.com/shop"
53
109
  page_turning: int | None = None
54
110
 
55
111
 
56
112
  class Facebook:
113
+ @dataclass
114
+ class PostDetails(ToolRequest):
115
+ """Facebook Post Details Scraper"""
116
+
117
+ SPIDER_ID = "facebook_post_by-posts-url"
118
+ SPIDER_NAME = "facebook.com"
119
+ url: str
120
+
57
121
  @dataclass
58
122
  class Posts(ToolRequest):
59
- """Facebook Posts Scraper"""
123
+ """Facebook Posts Scraper by Keywords"""
60
124
 
61
125
  SPIDER_ID = "facebook_post_by-keywords"
62
126
  SPIDER_NAME = "facebook.com"
@@ -66,27 +130,70 @@ class Facebook:
66
130
  number: int = 10
67
131
 
68
132
  @dataclass
69
- class PostDetails(ToolRequest):
70
- """Facebook Post Details Scraper"""
133
+ class EventByEventListUrl(ToolRequest):
134
+ """Facebook Events Scraper by Event List URL"""
71
135
 
72
- SPIDER_ID = "facebook_post_by-posts-url"
136
+ SPIDER_ID = "facebook_event_by-eventlist-url"
137
+ SPIDER_NAME = "facebook.com"
138
+ url: str
139
+ upcoming_events_only: str | None = None
140
+
141
+ @dataclass
142
+ class EventBySearchUrl(ToolRequest):
143
+ """Facebook Events Scraper by Search URL"""
144
+
145
+ SPIDER_ID = "facebook_event_by-search-url"
146
+ SPIDER_NAME = "facebook.com"
147
+ url: str
148
+
149
+ @dataclass
150
+ class EventByEventsUrl(ToolRequest):
151
+ """Facebook Events Scraper by Events URL"""
152
+
153
+ SPIDER_ID = "facebook_event_by-events-url"
154
+ SPIDER_NAME = "facebook.com"
155
+ url: str
156
+
157
+ @dataclass
158
+ class Profile(ToolRequest):
159
+ """Facebook Profile Scraper"""
160
+
161
+ SPIDER_ID = "facebook_profile_by-profiles-url"
162
+ SPIDER_NAME = "facebook.com"
163
+ url: str
164
+
165
+ @dataclass
166
+ class Comment(ToolRequest):
167
+ """Facebook Post Comments Scraper"""
168
+
169
+ SPIDER_ID = "facebook_comment_by-comments-url"
73
170
  SPIDER_NAME = "facebook.com"
74
171
  url: str
172
+ get_all_replies: str | None = None
173
+ limit_records: str | None = None
174
+ comments_sort: str | None = None # All comments
75
175
 
76
176
 
77
177
  class Instagram:
78
178
  @dataclass
79
179
  class Profile(ToolRequest):
80
- """Instagram Profile Scraper"""
180
+ """Instagram Profile Scraper by Username"""
81
181
 
82
182
  SPIDER_ID = "ins_profiles_by-username"
83
183
  SPIDER_NAME = "instagram.com"
84
184
  username: str
85
- profileurl: str | None = None
185
+
186
+ @dataclass
187
+ class ProfileByUrl(ToolRequest):
188
+ """Instagram Profile Scraper by Profile URL"""
189
+
190
+ SPIDER_ID = "ins_profiles_by-profileurl"
191
+ SPIDER_NAME = "instagram.com"
192
+ profileurl: str
86
193
 
87
194
  @dataclass
88
195
  class Post(ToolRequest):
89
- """Instagram Post Information Scraper"""
196
+ """Instagram Post Information Scraper by Profile URL"""
90
197
 
91
198
  SPIDER_ID = "ins_posts_by-profileurl"
92
199
  SPIDER_NAME = "instagram.com"
@@ -96,14 +203,45 @@ class Instagram:
96
203
  end_date: str | None = None
97
204
  post_type: str | None = None # Post or Reel
98
205
 
206
+ @dataclass
207
+ class PostByUrl(ToolRequest):
208
+ """Instagram Post Information Scraper by Post URL"""
209
+
210
+ SPIDER_ID = "ins_posts_by-posturl"
211
+ SPIDER_NAME = "instagram.com"
212
+ posturl: str
213
+
99
214
  @dataclass
100
215
  class Reel(ToolRequest):
101
- """Instagram Reel Information Scraper"""
216
+ """Instagram Reel Information Scraper by URL"""
102
217
 
103
218
  SPIDER_ID = "ins_reel_by-url"
104
219
  SPIDER_NAME = "instagram.com"
105
220
  url: str
221
+
222
+ @dataclass
223
+ class AllReel(ToolRequest):
224
+ """Instagram All Reel Information Scraper by URL"""
225
+
226
+ SPIDER_ID = "ins_allreel_by-url"
227
+ SPIDER_NAME = "instagram.com"
228
+ url: str
229
+ num_of_posts: int | None = None
230
+ posts_to_not_include: str | None = None
231
+ start_date: str | None = None
232
+ end_date: str | None = None
233
+
234
+ @dataclass
235
+ class ReelByListUrl(ToolRequest):
236
+ """Instagram Reel Information Scraper by List URL"""
237
+
238
+ SPIDER_ID = "ins_reel_by-listurl"
239
+ SPIDER_NAME = "instagram.com"
240
+ url: str
106
241
  num_of_posts: int | None = None
242
+ posts_to_not_include: str | None = None
243
+ start_date: str | None = None
244
+ end_date: str | None = None
107
245
 
108
246
  @dataclass
109
247
  class Comment(ToolRequest):
@@ -117,30 +255,35 @@ class Instagram:
117
255
  class Twitter:
118
256
  @dataclass
119
257
  class Profile(ToolRequest):
120
- """Twitter(X) Profile Scraper"""
258
+ """Twitter(X) Profile Scraper by Profile URL"""
121
259
 
122
- SPIDER_ID = "twitter_profiles_by-url"
123
- SPIDER_NAME = "twitter.com"
260
+ SPIDER_ID = "twitter_profile_by-profileurl"
261
+ SPIDER_NAME = "x.com"
124
262
  url: str
125
- max_number_of_posts: int | None = None
126
- user_name: str | None = None
127
263
 
128
264
  @dataclass
129
- class Post(ToolRequest):
130
- """
131
- Twitter(X) Post Information Scraper
132
- Updates based on integration snippet:
133
- - SPIDER_NAME is 'x.com'
134
- - Only 'url' is required.
135
- """
265
+ class ProfileByUsername(ToolRequest):
266
+ """Twitter(X) Profile Scraper by Username"""
267
+
268
+ SPIDER_ID = "twitter_profile_by-username"
269
+ SPIDER_NAME = "x.com"
270
+ user_name: str
136
271
 
137
- SPIDER_ID = "twitter_by-posturl_by-url"
138
- SPIDER_NAME = "x.com" # Updated from snippet
272
+ @dataclass
273
+ class Post(ToolRequest):
274
+ """Twitter(X) Post Information Scraper by Post URL"""
139
275
 
276
+ SPIDER_ID = "twitter_post_by-posturl"
277
+ SPIDER_NAME = "x.com"
140
278
  url: str # Post URL (e.g. https://x.com/user/status/123)
141
279
 
142
- start_date: str | None = None
143
- end_date: str | None = None
280
+ @dataclass
281
+ class PostByProfileUrl(ToolRequest):
282
+ """Twitter(X) Post Information Scraper by Profile URL"""
283
+
284
+ SPIDER_ID = "twitter_post_by-profileurl"
285
+ SPIDER_NAME = "x.com"
286
+ url: str # Profile URL
144
287
 
145
288
 
146
289
  class LinkedIn:
@@ -154,30 +297,70 @@ class LinkedIn:
154
297
 
155
298
  @dataclass
156
299
  class Jobs(ToolRequest):
157
- """LinkedIn Job Listing Scraper"""
300
+ """LinkedIn Job Listing Scraper by Job Listing URL"""
158
301
 
159
302
  SPIDER_ID = "linkedin_job_listings_information_by-job-listing-url"
160
303
  SPIDER_NAME = "linkedin.com"
161
304
  job_listing_url: str
305
+ page_turning: int | None = None
306
+
307
+ @dataclass
308
+ class JobByUrl(ToolRequest):
309
+ """LinkedIn Job Listing Scraper by Job URL"""
310
+
311
+ SPIDER_ID = "linkedin_job_listings_information_by-job-url"
312
+ SPIDER_NAME = "linkedin.com"
313
+ job_url: str
314
+
315
+ @dataclass
316
+ class JobByKeyword(ToolRequest):
317
+ """LinkedIn Job Listing Scraper by Keyword"""
318
+
319
+ SPIDER_ID = "linkedin_job_listings_information_by-keyword"
320
+ SPIDER_NAME = "linkedin.com"
162
321
  location: str
163
- job_url: str | None = None
322
+ keyword: str
323
+ time_range: str | None = None
324
+ experience_level: str | None = None
325
+ job_type: str | None = None
326
+ remote: str | None = None
327
+ company: str | None = None
328
+ selective_search: str | None = None
329
+ jobs_to_not_include: str | None = None
330
+ location_radius: str | None = None
164
331
  page_turning: int | None = None
165
- keyword: str | None = None
166
- remote: str | None = None # On_site, Remote, Hybrid
167
332
 
168
333
 
169
334
  class Reddit:
170
335
  @dataclass
171
336
  class Posts(ToolRequest):
172
- """Reddit Post Information Scraper"""
337
+ """Reddit Post Information Scraper by URL"""
173
338
 
174
339
  SPIDER_ID = "reddit_posts_by-url"
175
340
  SPIDER_NAME = "reddit.com"
176
341
  url: str
177
- keyword: str | None = None
178
- subreddit_url: str | None = None
342
+
343
+ @dataclass
344
+ class PostsByKeywords(ToolRequest):
345
+ """Reddit Post Information Scraper by Keywords"""
346
+
347
+ SPIDER_ID = "reddit_posts_by-keywords"
348
+ SPIDER_NAME = "reddit.com"
349
+ keyword: str
350
+ date: str | None = None # All time
351
+ num_of_posts: int | None = None
352
+ sort_by: str | None = None
353
+
354
+ @dataclass
355
+ class PostsBySubredditUrl(ToolRequest):
356
+ """Reddit Post Information Scraper by Subreddit URL"""
357
+
358
+ SPIDER_ID = "reddit_posts_by-subredditurl"
359
+ SPIDER_NAME = "reddit.com"
360
+ url: str
361
+ sort_by: str | None = None
179
362
  num_of_posts: int | None = None
180
- sort_by: str | None = None # Relevance, Hot, Top, New
363
+ sort_by_time: str | None = None # All Time
181
364
 
182
365
  @dataclass
183
366
  class Comment(ToolRequest):
@@ -187,4 +370,5 @@ class Reddit:
187
370
  SPIDER_NAME = "reddit.com"
188
371
  url: str
189
372
  days_back: int | None = None
190
- load_all_replies: bool | None = None
373
+ load_all_replies: str | None = None
374
+ comment_limit: int | None = None
@@ -0,0 +1,100 @@
1
+ """
2
+ Travel & Real Estate Scraper Tools (Booking, Zillow, Airbnb)
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from dataclasses import dataclass
8
+
9
+ from .base import ToolRequest
10
+
11
+
12
+ class Booking:
13
+ """Namespace for Booking.com tools."""
14
+
15
+ @dataclass
16
+ class HotelByUrl(ToolRequest):
17
+ """Booking Hotel Information Scraper by URL"""
18
+
19
+ SPIDER_ID = "booking_hotellist_by-url"
20
+ SPIDER_NAME = "booking.com"
21
+ url: str
22
+
23
+
24
+ class Zillow:
25
+ """Namespace for Zillow tools."""
26
+
27
+ @dataclass
28
+ class PriceByUrl(ToolRequest):
29
+ """Zillow Property Price History Information Scraper by URL"""
30
+
31
+ SPIDER_ID = "zillow_price_by-url"
32
+ SPIDER_NAME = "zillow.com"
33
+ url: str
34
+
35
+ @dataclass
36
+ class ProductByUrl(ToolRequest):
37
+ """Zillow Property Details Information Scraper by URL"""
38
+
39
+ SPIDER_ID = "zillow_product_by-url"
40
+ SPIDER_NAME = "zillow.com"
41
+ url: str
42
+
43
+ @dataclass
44
+ class ProductByFilter(ToolRequest):
45
+ """Zillow Property Details Information Scraper by Filter"""
46
+
47
+ SPIDER_ID = "zillow_product_by-filter"
48
+ SPIDER_NAME = "zillow.com"
49
+ keywords_location: str
50
+ listingCategory: str | None = None # For Rent, For Sale
51
+ HomeType: str | None = None # Houses
52
+ days_on_zillow: str | None = None # Any
53
+ maximum: int | None = None
54
+
55
+ @dataclass
56
+ class ProductByListUrl(ToolRequest):
57
+ """Zillow Property Details Information Scraper by List URL"""
58
+
59
+ SPIDER_ID = "zillow_product_by-listurl"
60
+ SPIDER_NAME = "zillow.com"
61
+ url: str
62
+ maximum: int | None = None
63
+
64
+
65
+ class Airbnb:
66
+ """Namespace for Airbnb tools."""
67
+
68
+ @dataclass
69
+ class ProductBySearchUrl(ToolRequest):
70
+ """Airbnb Properties Information Scraper by Search URL"""
71
+
72
+ SPIDER_ID = "airbnb_product_by-searchurl"
73
+ SPIDER_NAME = "airbnb.com"
74
+ searchurl: str
75
+ country: str | None = None
76
+
77
+ @dataclass
78
+ class ProductByLocation(ToolRequest):
79
+ """Airbnb Properties Information Scraper by Location"""
80
+
81
+ SPIDER_ID = "airbnb_product_by-location"
82
+ SPIDER_NAME = "airbnb.com"
83
+ location: str
84
+ check_in: str | None = None
85
+ check_out: str | None = None
86
+ num_of_adults: str | None = None
87
+ num_of_children: str | None = None
88
+ num_of_infants: str | None = None
89
+ num_of_pets: str | None = None
90
+ country: str | None = None
91
+ currency: str | None = None
92
+
93
+ @dataclass
94
+ class ProductByUrl(ToolRequest):
95
+ """Airbnb Properties Information Scraper by URL"""
96
+
97
+ SPIDER_ID = "airbnb_product_by-url"
98
+ SPIDER_NAME = "airbnb.com"
99
+ url: str
100
+ country: str | None = None
thordata/tools/video.py CHANGED
@@ -46,14 +46,23 @@ class YouTube:
46
46
 
47
47
  @dataclass
48
48
  class Profile(VideoToolRequest):
49
- """YouTube Profile Scraper. Uses video_builder."""
49
+ """YouTube Profile Scraper by Keyword. Uses video_builder."""
50
50
 
51
51
  SPIDER_ID = "youtube_profiles_by-keyword"
52
52
  SPIDER_NAME = "youtube.com"
53
53
 
54
- url: str # Channel URL
54
+ keyword: str
55
55
  page_turning: int = 1
56
- keyword: str | None = None
56
+ common_settings: CommonSettings = field(default_factory=CommonSettings)
57
+
58
+ @dataclass
59
+ class ProfileByUrl(VideoToolRequest):
60
+ """YouTube Profile Scraper by URL. Uses video_builder."""
61
+
62
+ SPIDER_ID = "youtube_profiles_by-url"
63
+ SPIDER_NAME = "youtube.com"
64
+
65
+ url: str # Channel URL
57
66
  common_settings: CommonSettings = field(default_factory=CommonSettings)
58
67
 
59
68
  @dataclass
@@ -69,13 +78,77 @@ class YouTube:
69
78
  common_settings: CommonSettings = field(default_factory=CommonSettings)
70
79
 
71
80
  @dataclass
72
- class VideoInfo(ToolRequest):
73
- """YouTube Video Post Scraper (Metadata only). Standard builder."""
81
+ class VideoInfo(VideoToolRequest):
82
+ """YouTube Video Basic Information Scraper. Uses video_builder."""
83
+
84
+ SPIDER_ID = "youtube_product_by-id"
85
+ SPIDER_NAME = "youtube.com"
86
+
87
+ video_id: str
88
+ common_settings: CommonSettings = field(default_factory=CommonSettings)
89
+
90
+ @dataclass
91
+ class VideoPostByUrl(ToolRequest):
92
+ """YouTube Video Post Scraper by URL. Uses standard builder."""
74
93
 
75
- # Note: This one does NOT inherit from VideoToolRequest because it uses the standard builder
76
- # and doesn't support common_settings in the same way.
77
94
  SPIDER_ID = "youtube_video-post_by-url"
78
95
  SPIDER_NAME = "youtube.com"
79
96
 
80
97
  url: str # Channel Video URL
98
+ order_by: str | None = None
99
+ start_index: str | None = None
81
100
  num_of_posts: str | None = None
101
+
102
+ @dataclass
103
+ class VideoPostBySearchFilters(ToolRequest):
104
+ """YouTube Video Post Scraper by Search Filters. Uses standard builder."""
105
+
106
+ SPIDER_ID = "youtube_video-post_by-search-filters"
107
+ SPIDER_NAME = "youtube.com"
108
+
109
+ keyword_search: str
110
+ features: str | None = None
111
+ type: str | None = None # Videos
112
+ duration: str | None = None
113
+ upload_date: str | None = None
114
+ num_of_posts: str | None = None
115
+
116
+ @dataclass
117
+ class VideoPostByHashtag(ToolRequest):
118
+ """YouTube Video Post Scraper by Hashtag. Uses standard builder."""
119
+
120
+ SPIDER_ID = "youtube_video-post_by-hashtag"
121
+ SPIDER_NAME = "youtube.com"
122
+
123
+ hashtag: str
124
+ num_of_posts: str | None = None
125
+
126
+ @dataclass
127
+ class VideoPostByPodcastUrl(ToolRequest):
128
+ """YouTube Video Post Scraper by Podcast URL. Uses standard builder."""
129
+
130
+ SPIDER_ID = "youtube_video-post_by-podcast-url"
131
+ SPIDER_NAME = "youtube.com"
132
+
133
+ url: str # Playlist URL
134
+ num_of_posts: str | None = None
135
+
136
+ @dataclass
137
+ class VideoPostByKeyword(ToolRequest):
138
+ """YouTube Video Post Scraper by Keyword. Uses standard builder."""
139
+
140
+ SPIDER_ID = "youtube_video-post_by-keyword"
141
+ SPIDER_NAME = "youtube.com"
142
+
143
+ keyword: str
144
+ num_of_posts: str | None = None
145
+
146
+ @dataclass
147
+ class VideoPostByExplore(ToolRequest):
148
+ """YouTube Video Post Scraper by Explore URL. Uses standard builder."""
149
+
150
+ SPIDER_ID = "youtube_video-post_by-explore"
151
+ SPIDER_NAME = "youtube.com"
152
+
153
+ url: str
154
+ all_tabs: str | None = None
thordata/types/serp.py CHANGED
@@ -117,7 +117,7 @@ class SerpRequest(ThordataBaseConfig):
117
117
  render_js: bool | None = None
118
118
  no_cache: bool | None = None
119
119
 
120
- # Output
120
+ # Output format: "json" (json=1), "html" (json=3), "light_json" (json=4), or "both" (json=2)
121
121
  output_format: str = "json"
122
122
 
123
123
  # Advanced Google
@@ -155,13 +155,17 @@ class SerpRequest(ThordataBaseConfig):
155
155
  }
156
156
 
157
157
  # JSON output handling
158
+ # Dashboard mapping: json=1 (json), json=3 (html), json=4 (light json), json=2 (both)
158
159
  fmt = self.output_format.lower()
159
160
  if fmt == "json":
160
161
  payload["json"] = "1"
161
162
  elif fmt == "html":
162
- pass # No json param means HTML
163
+ payload["json"] = "3"
164
+ elif fmt in ("light_json", "light-json", "lightjson"):
165
+ payload["json"] = "4"
163
166
  elif fmt in ("2", "both", "json+html"):
164
167
  payload["json"] = "2"
168
+ # If no json param is set, default to HTML (legacy behavior)
165
169
 
166
170
  # Query param handling
167
171
  if engine == "yandex":