thordata-sdk 1.5.0__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,9 +12,10 @@ from .base import ToolRequest
12
12
  class Amazon:
13
13
  """Namespace for Amazon tools."""
14
14
 
15
+ # --- Product Details (5 methods) ---
15
16
  @dataclass
16
- class Product(ToolRequest):
17
- """Amazon Product Details Scraper"""
17
+ class ProductByAsin(ToolRequest):
18
+ """Amazon Product Details Scraper by ASIN."""
18
19
 
19
20
  SPIDER_ID = "amazon_product_by-asin"
20
21
  SPIDER_NAME = "amazon.com"
@@ -22,16 +23,112 @@ class Amazon:
22
23
  asin: str
23
24
  domain: str = "amazon.com"
24
25
 
26
+ # Backward compatible alias
27
+ Product = ProductByAsin
28
+
25
29
  @dataclass
26
- class GlobalProduct(ToolRequest):
27
- """Amazon Global Product Details Scraper"""
30
+ class ProductByUrl(ToolRequest):
31
+ """Amazon Product Details Scraper by URL."""
28
32
 
29
- SPIDER_ID = "amazon_global-product_by-url"
33
+ SPIDER_ID = "amazon_product_by-url"
30
34
  SPIDER_NAME = "amazon.com"
31
35
 
32
36
  url: str
33
37
  zip_code: str | None = None
34
38
 
39
+ @dataclass
40
+ class ProductByKeywords(ToolRequest):
41
+ """Amazon Product Details Scraper by Keywords."""
42
+
43
+ SPIDER_ID = "amazon_product_by-keywords"
44
+ SPIDER_NAME = "amazon.com"
45
+
46
+ keyword: str
47
+ page_turning: int | None = None
48
+ lowest_price: float | None = None
49
+ highest_price: float | None = None
50
+
51
+ @dataclass
52
+ class ProductByCategoryUrl(ToolRequest):
53
+ """Amazon Product Details Scraper by Category URL."""
54
+
55
+ SPIDER_ID = "amazon_product_by-category-url"
56
+ SPIDER_NAME = "amazon.com"
57
+
58
+ url: str
59
+ sort_by: str | None = None
60
+ page_turning: int | None = None
61
+
62
+ @dataclass
63
+ class ProductByBestSellers(ToolRequest):
64
+ """Amazon Product Details Scraper by Best Sellers URL."""
65
+
66
+ SPIDER_ID = "amazon_product_by-best-sellers"
67
+ SPIDER_NAME = "amazon.com"
68
+
69
+ url: str
70
+ page_turning: int | None = None
71
+
72
+ # --- Other Amazon Tools ---
73
+
74
+ @dataclass
75
+ class GlobalProductByUrl(ToolRequest):
76
+ """Amazon Global Product Details Scraper by URL"""
77
+
78
+ SPIDER_ID = "amazon_global-product_by-url"
79
+ SPIDER_NAME = "amazon.com"
80
+
81
+ url: str
82
+
83
+ # Backward compatible alias
84
+ GlobalProduct = GlobalProductByUrl
85
+
86
+ @dataclass
87
+ class GlobalProductByCategoryUrl(ToolRequest):
88
+ """Amazon Global Product Details Scraper by Category URL"""
89
+
90
+ SPIDER_ID = "amazon_global-product_by-category-url"
91
+ SPIDER_NAME = "amazon.com"
92
+
93
+ url: str
94
+ sort_by: str | None = None
95
+ get_sponsored: str | None = None
96
+ maximum: int | None = None
97
+
98
+ @dataclass
99
+ class GlobalProductBySellerUrl(ToolRequest):
100
+ """Amazon Global Product Details Scraper by Seller URL"""
101
+
102
+ SPIDER_ID = "amazon_global-product_by-seller-url"
103
+ SPIDER_NAME = "amazon.com"
104
+
105
+ url: str
106
+ maximum: int | None = None
107
+
108
+ @dataclass
109
+ class GlobalProductByKeywords(ToolRequest):
110
+ """Amazon Global Product Details Scraper by Keywords"""
111
+
112
+ SPIDER_ID = "amazon_global-product_by-keywords"
113
+ SPIDER_NAME = "amazon.com"
114
+
115
+ keyword: str
116
+ domain: str = "https://www.amazon.com"
117
+ lowest_price: str | None = None
118
+ highest_price: str | None = None
119
+ page_turning: int | None = None
120
+
121
+ @dataclass
122
+ class GlobalProductByKeywordsBrand(ToolRequest):
123
+ """Amazon Global Product Details Scraper by Keywords and Brand"""
124
+
125
+ SPIDER_ID = "amazon_global-product_by-keywords-brand"
126
+ SPIDER_NAME = "amazon.com"
127
+
128
+ keyword: str
129
+ brands: str
130
+ page_turning: int | None = None
131
+
35
132
  @dataclass
36
133
  class Review(ToolRequest):
37
134
  """Amazon Product Review Scraper"""
@@ -59,9 +156,96 @@ class Amazon:
59
156
  SPIDER_NAME = "amazon.com"
60
157
 
61
158
  keyword: str
62
- domain: str = "amazon.com"
159
+ domain: str = "https://www.amazon.com/"
63
160
  page_turning: int = 1
64
- sort_by: str | None = None # Best Sellers, Newest Arrivals, etc.
65
- min_price: float | None = None
66
- max_price: float | None = None
67
- get_sponsored: bool | None = None
161
+
162
+
163
+ class eBay:
164
+ """Namespace for eBay tools."""
165
+
166
+ @dataclass
167
+ class ProductByUrl(ToolRequest):
168
+ """eBay Information Scraper by URL"""
169
+
170
+ SPIDER_ID = "ebay_ebay_by-url"
171
+ SPIDER_NAME = "ebay.com"
172
+ url: str
173
+
174
+ @dataclass
175
+ class ProductByCategoryUrl(ToolRequest):
176
+ """eBay Information Scraper by Category URL"""
177
+
178
+ SPIDER_ID = "ebay_ebay_by-category-url"
179
+ SPIDER_NAME = "ebay.com"
180
+ url: str
181
+ count: str | None = None
182
+
183
+ @dataclass
184
+ class ProductByKeywords(ToolRequest):
185
+ """eBay Information Scraper by Keywords"""
186
+
187
+ SPIDER_ID = "ebay_ebay_by-keywords"
188
+ SPIDER_NAME = "ebay.com"
189
+ keywords: str
190
+ count: str | None = None
191
+
192
+ @dataclass
193
+ class ProductByListUrl(ToolRequest):
194
+ """eBay Information Scraper by List URL"""
195
+
196
+ SPIDER_ID = "ebay_ebay_by-listurl"
197
+ SPIDER_NAME = "ebay.com"
198
+ url: str
199
+ count: str | None = None
200
+
201
+
202
+ class Walmart:
203
+ """Namespace for Walmart tools."""
204
+
205
+ @dataclass
206
+ class ProductByUrl(ToolRequest):
207
+ """Walmart Product Information Scraper by URL"""
208
+
209
+ SPIDER_ID = "walmart_product_by-url"
210
+ SPIDER_NAME = "walmart.com"
211
+ url: str
212
+ all_variations: str | None = None
213
+
214
+ @dataclass
215
+ class ProductByCategoryUrl(ToolRequest):
216
+ """Walmart Product Information Scraper by Category URL"""
217
+
218
+ SPIDER_ID = "walmart_product_by-category-url"
219
+ SPIDER_NAME = "walmart.com"
220
+ category_url: str
221
+ all_variations: str | None = None
222
+ page_turning: int | None = None
223
+
224
+ @dataclass
225
+ class ProductBySku(ToolRequest):
226
+ """Walmart Product Information Scraper by SKU"""
227
+
228
+ SPIDER_ID = "walmart_product_by-sku"
229
+ SPIDER_NAME = "walmart.com"
230
+ sku: str
231
+ all_variations: str | None = None
232
+
233
+ @dataclass
234
+ class ProductByKeywords(ToolRequest):
235
+ """Walmart Product Information Scraper by Keywords"""
236
+
237
+ SPIDER_ID = "walmart_product_by-keywords"
238
+ SPIDER_NAME = "walmart.com"
239
+ keyword: str
240
+ domain: str = "https://www.walmart.com/"
241
+ all_variations: str | None = None
242
+ page_turning: int | None = None
243
+
244
+ @dataclass
245
+ class ProductByZipcodes(ToolRequest):
246
+ """Walmart Product Information Scraper by Zipcodes"""
247
+
248
+ SPIDER_ID = "walmart_product_by-zipcodes"
249
+ SPIDER_NAME = "walmart.com"
250
+ url: str
251
+ zip_code: str | None = None
@@ -0,0 +1,155 @@
1
+ """
2
+ Professional Platform Scraper Tools (Indeed, Glassdoor, Crunchbase, etc.)
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from dataclasses import dataclass
8
+
9
+ from .base import ToolRequest
10
+
11
+
12
+ class Indeed:
13
+ """Namespace for Indeed tools."""
14
+
15
+ @dataclass
16
+ class JobByUrl(ToolRequest):
17
+ """Indeed Job Listings Scraper by Job URL"""
18
+
19
+ SPIDER_ID = "indeed_job-listings_by-job-url"
20
+ SPIDER_NAME = "indeed.com"
21
+ job_url: str
22
+
23
+ @dataclass
24
+ class JobByKeyword(ToolRequest):
25
+ """Indeed Job Listings Scraper by Keyword"""
26
+
27
+ SPIDER_ID = "indeed_job-listings_by-keyword"
28
+ SPIDER_NAME = "indeed.com"
29
+ keyword: str
30
+ location: str
31
+ country: str | None = None
32
+ domain: str | None = None
33
+ date_posted: str | None = None
34
+ posted_by: str | None = None
35
+ pay: str | None = None
36
+ location_radius: str | None = None
37
+
38
+ @dataclass
39
+ class CompanyByListUrl(ToolRequest):
40
+ """Indeed Companies Info Scraper by Company List URL"""
41
+
42
+ SPIDER_ID = "indeed_companies-info_by-company-list-url"
43
+ SPIDER_NAME = "indeed.com"
44
+ company_list_url: str
45
+
46
+ @dataclass
47
+ class CompanyByKeyword(ToolRequest):
48
+ """Indeed Companies Info Scraper by Keyword"""
49
+
50
+ SPIDER_ID = "indeed_companies-info_by-keyword"
51
+ SPIDER_NAME = "indeed.com"
52
+ keyword: str
53
+
54
+ @dataclass
55
+ class CompanyByIndustryAndState(ToolRequest):
56
+ """Indeed Companies Info Scraper by Industry and State"""
57
+
58
+ SPIDER_ID = "indeed_companies-info_by-industry-and-state"
59
+ SPIDER_NAME = "indeed.com"
60
+ industry: str
61
+ state: str | None = None
62
+
63
+ @dataclass
64
+ class CompanyByUrl(ToolRequest):
65
+ """Indeed Companies Info Scraper by Company URL"""
66
+
67
+ SPIDER_ID = "indeed_companies-info_by-company-url"
68
+ SPIDER_NAME = "indeed.com"
69
+ company_url: str
70
+
71
+
72
+ class Glassdoor:
73
+ """Namespace for Glassdoor tools."""
74
+
75
+ @dataclass
76
+ class CompanyByUrl(ToolRequest):
77
+ """Glassdoor Company Overview Information Scraper by URL"""
78
+
79
+ SPIDER_ID = "glassdoor_company_by-url"
80
+ SPIDER_NAME = "glassdoor.com"
81
+ url: str
82
+
83
+ @dataclass
84
+ class CompanyByInputFilter(ToolRequest):
85
+ """Glassdoor Company Overview Information Scraper by Input Filter"""
86
+
87
+ SPIDER_ID = "glassdoor_company_by-inputfilter"
88
+ SPIDER_NAME = "glassdoor.com"
89
+ company_name: str
90
+ location: str | None = None
91
+ industries: str | None = None
92
+ Job_title: str | None = None # Note: capital J in API
93
+
94
+ @dataclass
95
+ class CompanyByKeywords(ToolRequest):
96
+ """Glassdoor Company Overview Information Scraper by Keywords"""
97
+
98
+ SPIDER_ID = "glassdoor_company_by-keywords"
99
+ SPIDER_NAME = "glassdoor.com"
100
+ search_url: str
101
+ max_search_results: int | None = None
102
+
103
+ @dataclass
104
+ class CompanyByListUrl(ToolRequest):
105
+ """Glassdoor Company Overview Information Scraper by List URL"""
106
+
107
+ SPIDER_ID = "glassdoor_company_by-listurl"
108
+ SPIDER_NAME = "glassdoor.com"
109
+ url: str
110
+
111
+ @dataclass
112
+ class JobByUrl(ToolRequest):
113
+ """Glassdoor Job Information Scraper by URL"""
114
+
115
+ SPIDER_ID = "glassdoor_joblistings_by-url"
116
+ SPIDER_NAME = "glassdoor.com"
117
+ url: str
118
+
119
+ @dataclass
120
+ class JobByKeywords(ToolRequest):
121
+ """Glassdoor Job Information Scraper by Keywords"""
122
+
123
+ SPIDER_ID = "glassdoor_joblistings_by-keywords"
124
+ SPIDER_NAME = "glassdoor.com"
125
+ keyword: str
126
+ location: str
127
+ country: str | None = None
128
+
129
+ @dataclass
130
+ class JobByListUrl(ToolRequest):
131
+ """Glassdoor Job Information Scraper by List URL"""
132
+
133
+ SPIDER_ID = "glassdoor_joblistings_by-listurl"
134
+ SPIDER_NAME = "glassdoor.com"
135
+ url: str
136
+
137
+
138
+ class Crunchbase:
139
+ """Namespace for Crunchbase tools."""
140
+
141
+ @dataclass
142
+ class CompanyByUrl(ToolRequest):
143
+ """Crunchbase Company Information Scraper by URL"""
144
+
145
+ SPIDER_ID = "crunchbase_company_by-url"
146
+ SPIDER_NAME = "crunchbase.com"
147
+ url: str
148
+
149
+ @dataclass
150
+ class CompanyByKeywords(ToolRequest):
151
+ """Crunchbase Company Information Scraper by Keywords"""
152
+
153
+ SPIDER_ID = "crunchbase_company_by-keywords"
154
+ SPIDER_NAME = "crunchbase.com"
155
+ keyword: str
thordata/tools/search.py CHANGED
@@ -13,13 +13,47 @@ class GoogleMaps:
13
13
  """Namespace for Google Maps tools."""
14
14
 
15
15
  @dataclass
16
- class Details(ToolRequest):
17
- """Google Maps Details Information Scraper"""
16
+ class DetailsByUrl(ToolRequest):
17
+ """Google Maps Details Scraper by URL."""
18
18
 
19
19
  SPIDER_ID = "google_map-details_by-url"
20
20
  SPIDER_NAME = "google.com"
21
21
 
22
- url: str # Google Maps URL
22
+ url: str
23
+
24
+ @dataclass
25
+ class DetailsByCid(ToolRequest):
26
+ """Google Maps Details Scraper by CID."""
27
+
28
+ SPIDER_ID = "google_map-details_by-cid"
29
+ SPIDER_NAME = "google.com"
30
+
31
+ CID: str
32
+
33
+ @dataclass
34
+ class DetailsByLocation(ToolRequest):
35
+ """Google Maps Details Scraper by Location keyword + country (+ optional lat/long/zoom).""" # noqa: E501
36
+
37
+ SPIDER_ID = "google_map-details_by-location"
38
+ SPIDER_NAME = "google.com"
39
+
40
+ country: str
41
+ keyword: str
42
+ lat: str | None = None
43
+ long: str | None = None
44
+ zoom_level: str | None = None
45
+
46
+ @dataclass
47
+ class DetailsByPlaceId(ToolRequest):
48
+ """Google Maps Details Scraper by Place ID."""
49
+
50
+ SPIDER_ID = "google_map-details_by-placeid"
51
+ SPIDER_NAME = "google.com"
52
+
53
+ place_id: str
54
+
55
+ # Backward compatible alias: keep old name working
56
+ Details = DetailsByUrl
23
57
 
24
58
  @dataclass
25
59
  class Reviews(ToolRequest):
@@ -37,14 +71,22 @@ class GoogleShopping:
37
71
 
38
72
  @dataclass
39
73
  class Product(ToolRequest):
40
- """Google Shopping Information Scraper"""
74
+ """Google Shopping Information Scraper by URL"""
41
75
 
42
76
  SPIDER_ID = "google_shopping_by-url"
43
77
  SPIDER_NAME = "google.com"
44
-
45
78
  url: str
46
79
  country: str | None = None # e.g. "US"
47
80
 
81
+ @dataclass
82
+ class ProductByKeywords(ToolRequest):
83
+ """Google Shopping Information Scraper by Keywords"""
84
+
85
+ SPIDER_ID = "google_shopping_by-keywords"
86
+ SPIDER_NAME = "google.com"
87
+ keyword: str
88
+ country: str | None = None # e.g. "US"
89
+
48
90
 
49
91
  class GooglePlay:
50
92
  """Namespace for Google Play Store tools."""