thordata-sdk 1.5.0__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thordata/__init__.py +1 -1
- thordata/async_client.py +12 -7
- thordata/client.py +12 -7
- thordata/enums.py +2 -2
- thordata/exceptions.py +70 -19
- thordata/models.py +1 -1
- thordata/retry.py +1 -1
- thordata/tools/__init__.py +11 -1
- thordata/tools/code.py +17 -4
- thordata/tools/ecommerce.py +194 -10
- thordata/tools/professional.py +155 -0
- thordata/tools/search.py +47 -5
- thordata/tools/social.py +225 -41
- thordata/tools/travel.py +100 -0
- thordata/tools/video.py +80 -7
- thordata/types/task.py +16 -4
- {thordata_sdk-1.5.0.dist-info → thordata_sdk-1.6.0.dist-info}/METADATA +63 -7
- thordata_sdk-1.6.0.dist-info/RECORD +35 -0
- {thordata_sdk-1.5.0.dist-info → thordata_sdk-1.6.0.dist-info}/WHEEL +1 -1
- thordata/_example_utils.py +0 -77
- thordata/demo.py +0 -138
- thordata_sdk-1.5.0.dist-info/RECORD +0 -35
- {thordata_sdk-1.5.0.dist-info → thordata_sdk-1.6.0.dist-info}/licenses/LICENSE +0 -0
- {thordata_sdk-1.5.0.dist-info → thordata_sdk-1.6.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Professional Platform Scraper Tools (Indeed, Glassdoor, Crunchbase, etc.)
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
|
|
9
|
+
from .base import ToolRequest
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Indeed:
|
|
13
|
+
"""Namespace for Indeed tools."""
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class JobByUrl(ToolRequest):
|
|
17
|
+
"""Indeed Job Listings Scraper by Job URL"""
|
|
18
|
+
|
|
19
|
+
SPIDER_ID = "indeed_job-listings_by-job-url"
|
|
20
|
+
SPIDER_NAME = "indeed.com"
|
|
21
|
+
job_url: str
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class JobByKeyword(ToolRequest):
|
|
25
|
+
"""Indeed Job Listings Scraper by Keyword"""
|
|
26
|
+
|
|
27
|
+
SPIDER_ID = "indeed_job-listings_by-keyword"
|
|
28
|
+
SPIDER_NAME = "indeed.com"
|
|
29
|
+
keyword: str
|
|
30
|
+
location: str
|
|
31
|
+
country: str | None = None
|
|
32
|
+
domain: str | None = None
|
|
33
|
+
date_posted: str | None = None
|
|
34
|
+
posted_by: str | None = None
|
|
35
|
+
pay: str | None = None
|
|
36
|
+
location_radius: str | None = None
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class CompanyByListUrl(ToolRequest):
|
|
40
|
+
"""Indeed Companies Info Scraper by Company List URL"""
|
|
41
|
+
|
|
42
|
+
SPIDER_ID = "indeed_companies-info_by-company-list-url"
|
|
43
|
+
SPIDER_NAME = "indeed.com"
|
|
44
|
+
company_list_url: str
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class CompanyByKeyword(ToolRequest):
|
|
48
|
+
"""Indeed Companies Info Scraper by Keyword"""
|
|
49
|
+
|
|
50
|
+
SPIDER_ID = "indeed_companies-info_by-keyword"
|
|
51
|
+
SPIDER_NAME = "indeed.com"
|
|
52
|
+
keyword: str
|
|
53
|
+
|
|
54
|
+
@dataclass
|
|
55
|
+
class CompanyByIndustryAndState(ToolRequest):
|
|
56
|
+
"""Indeed Companies Info Scraper by Industry and State"""
|
|
57
|
+
|
|
58
|
+
SPIDER_ID = "indeed_companies-info_by-industry-and-state"
|
|
59
|
+
SPIDER_NAME = "indeed.com"
|
|
60
|
+
industry: str
|
|
61
|
+
state: str | None = None
|
|
62
|
+
|
|
63
|
+
@dataclass
|
|
64
|
+
class CompanyByUrl(ToolRequest):
|
|
65
|
+
"""Indeed Companies Info Scraper by Company URL"""
|
|
66
|
+
|
|
67
|
+
SPIDER_ID = "indeed_companies-info_by-company-url"
|
|
68
|
+
SPIDER_NAME = "indeed.com"
|
|
69
|
+
company_url: str
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class Glassdoor:
|
|
73
|
+
"""Namespace for Glassdoor tools."""
|
|
74
|
+
|
|
75
|
+
@dataclass
|
|
76
|
+
class CompanyByUrl(ToolRequest):
|
|
77
|
+
"""Glassdoor Company Overview Information Scraper by URL"""
|
|
78
|
+
|
|
79
|
+
SPIDER_ID = "glassdoor_company_by-url"
|
|
80
|
+
SPIDER_NAME = "glassdoor.com"
|
|
81
|
+
url: str
|
|
82
|
+
|
|
83
|
+
@dataclass
|
|
84
|
+
class CompanyByInputFilter(ToolRequest):
|
|
85
|
+
"""Glassdoor Company Overview Information Scraper by Input Filter"""
|
|
86
|
+
|
|
87
|
+
SPIDER_ID = "glassdoor_company_by-inputfilter"
|
|
88
|
+
SPIDER_NAME = "glassdoor.com"
|
|
89
|
+
company_name: str
|
|
90
|
+
location: str | None = None
|
|
91
|
+
industries: str | None = None
|
|
92
|
+
Job_title: str | None = None # Note: capital J in API
|
|
93
|
+
|
|
94
|
+
@dataclass
|
|
95
|
+
class CompanyByKeywords(ToolRequest):
|
|
96
|
+
"""Glassdoor Company Overview Information Scraper by Keywords"""
|
|
97
|
+
|
|
98
|
+
SPIDER_ID = "glassdoor_company_by-keywords"
|
|
99
|
+
SPIDER_NAME = "glassdoor.com"
|
|
100
|
+
search_url: str
|
|
101
|
+
max_search_results: int | None = None
|
|
102
|
+
|
|
103
|
+
@dataclass
|
|
104
|
+
class CompanyByListUrl(ToolRequest):
|
|
105
|
+
"""Glassdoor Company Overview Information Scraper by List URL"""
|
|
106
|
+
|
|
107
|
+
SPIDER_ID = "glassdoor_company_by-listurl"
|
|
108
|
+
SPIDER_NAME = "glassdoor.com"
|
|
109
|
+
url: str
|
|
110
|
+
|
|
111
|
+
@dataclass
|
|
112
|
+
class JobByUrl(ToolRequest):
|
|
113
|
+
"""Glassdoor Job Information Scraper by URL"""
|
|
114
|
+
|
|
115
|
+
SPIDER_ID = "glassdoor_joblistings_by-url"
|
|
116
|
+
SPIDER_NAME = "glassdoor.com"
|
|
117
|
+
url: str
|
|
118
|
+
|
|
119
|
+
@dataclass
|
|
120
|
+
class JobByKeywords(ToolRequest):
|
|
121
|
+
"""Glassdoor Job Information Scraper by Keywords"""
|
|
122
|
+
|
|
123
|
+
SPIDER_ID = "glassdoor_joblistings_by-keywords"
|
|
124
|
+
SPIDER_NAME = "glassdoor.com"
|
|
125
|
+
keyword: str
|
|
126
|
+
location: str
|
|
127
|
+
country: str | None = None
|
|
128
|
+
|
|
129
|
+
@dataclass
|
|
130
|
+
class JobByListUrl(ToolRequest):
|
|
131
|
+
"""Glassdoor Job Information Scraper by List URL"""
|
|
132
|
+
|
|
133
|
+
SPIDER_ID = "glassdoor_joblistings_by-listurl"
|
|
134
|
+
SPIDER_NAME = "glassdoor.com"
|
|
135
|
+
url: str
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class Crunchbase:
|
|
139
|
+
"""Namespace for Crunchbase tools."""
|
|
140
|
+
|
|
141
|
+
@dataclass
|
|
142
|
+
class CompanyByUrl(ToolRequest):
|
|
143
|
+
"""Crunchbase Company Information Scraper by URL"""
|
|
144
|
+
|
|
145
|
+
SPIDER_ID = "crunchbase_company_by-url"
|
|
146
|
+
SPIDER_NAME = "crunchbase.com"
|
|
147
|
+
url: str
|
|
148
|
+
|
|
149
|
+
@dataclass
|
|
150
|
+
class CompanyByKeywords(ToolRequest):
|
|
151
|
+
"""Crunchbase Company Information Scraper by Keywords"""
|
|
152
|
+
|
|
153
|
+
SPIDER_ID = "crunchbase_company_by-keywords"
|
|
154
|
+
SPIDER_NAME = "crunchbase.com"
|
|
155
|
+
keyword: str
|
thordata/tools/search.py
CHANGED
|
@@ -13,13 +13,47 @@ class GoogleMaps:
|
|
|
13
13
|
"""Namespace for Google Maps tools."""
|
|
14
14
|
|
|
15
15
|
@dataclass
|
|
16
|
-
class
|
|
17
|
-
"""Google Maps Details
|
|
16
|
+
class DetailsByUrl(ToolRequest):
|
|
17
|
+
"""Google Maps Details Scraper by URL."""
|
|
18
18
|
|
|
19
19
|
SPIDER_ID = "google_map-details_by-url"
|
|
20
20
|
SPIDER_NAME = "google.com"
|
|
21
21
|
|
|
22
|
-
url: str
|
|
22
|
+
url: str
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class DetailsByCid(ToolRequest):
|
|
26
|
+
"""Google Maps Details Scraper by CID."""
|
|
27
|
+
|
|
28
|
+
SPIDER_ID = "google_map-details_by-cid"
|
|
29
|
+
SPIDER_NAME = "google.com"
|
|
30
|
+
|
|
31
|
+
CID: str
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class DetailsByLocation(ToolRequest):
|
|
35
|
+
"""Google Maps Details Scraper by Location keyword + country (+ optional lat/long/zoom).""" # noqa: E501
|
|
36
|
+
|
|
37
|
+
SPIDER_ID = "google_map-details_by-location"
|
|
38
|
+
SPIDER_NAME = "google.com"
|
|
39
|
+
|
|
40
|
+
country: str
|
|
41
|
+
keyword: str
|
|
42
|
+
lat: str | None = None
|
|
43
|
+
long: str | None = None
|
|
44
|
+
zoom_level: str | None = None
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class DetailsByPlaceId(ToolRequest):
|
|
48
|
+
"""Google Maps Details Scraper by Place ID."""
|
|
49
|
+
|
|
50
|
+
SPIDER_ID = "google_map-details_by-placeid"
|
|
51
|
+
SPIDER_NAME = "google.com"
|
|
52
|
+
|
|
53
|
+
place_id: str
|
|
54
|
+
|
|
55
|
+
# Backward compatible alias: keep old name working
|
|
56
|
+
Details = DetailsByUrl
|
|
23
57
|
|
|
24
58
|
@dataclass
|
|
25
59
|
class Reviews(ToolRequest):
|
|
@@ -37,14 +71,22 @@ class GoogleShopping:
|
|
|
37
71
|
|
|
38
72
|
@dataclass
|
|
39
73
|
class Product(ToolRequest):
|
|
40
|
-
"""Google Shopping Information Scraper"""
|
|
74
|
+
"""Google Shopping Information Scraper by URL"""
|
|
41
75
|
|
|
42
76
|
SPIDER_ID = "google_shopping_by-url"
|
|
43
77
|
SPIDER_NAME = "google.com"
|
|
44
|
-
|
|
45
78
|
url: str
|
|
46
79
|
country: str | None = None # e.g. "US"
|
|
47
80
|
|
|
81
|
+
@dataclass
|
|
82
|
+
class ProductByKeywords(ToolRequest):
|
|
83
|
+
"""Google Shopping Information Scraper by Keywords"""
|
|
84
|
+
|
|
85
|
+
SPIDER_ID = "google_shopping_by-keywords"
|
|
86
|
+
SPIDER_NAME = "google.com"
|
|
87
|
+
keyword: str
|
|
88
|
+
country: str | None = None # e.g. "US"
|
|
89
|
+
|
|
48
90
|
|
|
49
91
|
class GooglePlay:
|
|
50
92
|
"""Namespace for Google Play Store tools."""
|
thordata/tools/social.py
CHANGED
|
@@ -12,12 +12,47 @@ from .base import ToolRequest
|
|
|
12
12
|
class TikTok:
|
|
13
13
|
@dataclass
|
|
14
14
|
class Post(ToolRequest):
|
|
15
|
-
"""TikTok Post Information Scraper"""
|
|
15
|
+
"""TikTok Post Information Scraper by URL"""
|
|
16
16
|
|
|
17
17
|
SPIDER_ID = "tiktok_posts_by-url"
|
|
18
18
|
SPIDER_NAME = "tiktok.com"
|
|
19
19
|
url: str
|
|
20
|
-
|
|
20
|
+
country: str | None = None
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class PostsByKeywords(ToolRequest):
|
|
24
|
+
"""TikTok Post Information Scraper by Keywords"""
|
|
25
|
+
|
|
26
|
+
SPIDER_ID = "tiktok_posts_by-keywords"
|
|
27
|
+
SPIDER_NAME = "tiktok.com"
|
|
28
|
+
search_keyword: str
|
|
29
|
+
num_of_posts: int | None = None
|
|
30
|
+
posts_to_not_include: str | None = None
|
|
31
|
+
country: str | None = None
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class PostsByProfileUrl(ToolRequest):
|
|
35
|
+
"""TikTok Post Information Scraper by Profile URL"""
|
|
36
|
+
|
|
37
|
+
SPIDER_ID = "tiktok_posts_by-profileurl"
|
|
38
|
+
SPIDER_NAME = "tiktok.com"
|
|
39
|
+
url: str
|
|
40
|
+
start_date: str | None = None
|
|
41
|
+
end_date: str | None = None
|
|
42
|
+
num_of_posts: int | None = None
|
|
43
|
+
what_to_collect: str | None = None
|
|
44
|
+
post_type: str | None = None
|
|
45
|
+
posts_to_not_include: str | None = None
|
|
46
|
+
country: str | None = None
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class PostsByListUrl(ToolRequest):
|
|
50
|
+
"""TikTok Post Information Scraper by List URL"""
|
|
51
|
+
|
|
52
|
+
SPIDER_ID = "tiktok_posts_by-listurl"
|
|
53
|
+
SPIDER_NAME = "tiktok.com"
|
|
54
|
+
url: str
|
|
55
|
+
num_of_posts: int | None = None
|
|
21
56
|
|
|
22
57
|
@dataclass
|
|
23
58
|
class Comment(ToolRequest):
|
|
@@ -30,33 +65,62 @@ class TikTok:
|
|
|
30
65
|
|
|
31
66
|
@dataclass
|
|
32
67
|
class Profile(ToolRequest):
|
|
33
|
-
"""TikTok Profile Information Scraper"""
|
|
68
|
+
"""TikTok Profile Information Scraper by URL"""
|
|
34
69
|
|
|
35
70
|
SPIDER_ID = "tiktok_profiles_by-url"
|
|
36
71
|
SPIDER_NAME = "tiktok.com"
|
|
37
|
-
|
|
38
72
|
url: str # Profile URL (e.g. https://www.tiktok.com/@user)
|
|
39
|
-
|
|
73
|
+
country: str | None = None
|
|
74
|
+
|
|
75
|
+
@dataclass
|
|
76
|
+
class ProfilesByListUrl(ToolRequest):
|
|
77
|
+
"""TikTok Profile Information Scraper by List URL"""
|
|
40
78
|
|
|
79
|
+
SPIDER_ID = "tiktok_profiles_by-listurl"
|
|
80
|
+
SPIDER_NAME = "tiktok.com"
|
|
81
|
+
search_url: str
|
|
41
82
|
country: str | None = None
|
|
42
83
|
page_turning: int | None = None
|
|
43
84
|
|
|
44
85
|
@dataclass
|
|
45
86
|
class Shop(ToolRequest):
|
|
46
|
-
"""TikTok Shop Information Scraper"""
|
|
87
|
+
"""TikTok Shop Information Scraper by URL"""
|
|
47
88
|
|
|
48
89
|
SPIDER_ID = "tiktok_shop_by-url"
|
|
49
90
|
SPIDER_NAME = "tiktok.com"
|
|
50
91
|
url: str
|
|
51
|
-
|
|
52
|
-
|
|
92
|
+
|
|
93
|
+
@dataclass
|
|
94
|
+
class ShopByCategoryUrl(ToolRequest):
|
|
95
|
+
"""TikTok Shop Information Scraper by Category URL"""
|
|
96
|
+
|
|
97
|
+
SPIDER_ID = "tiktok_shop_by-category-url"
|
|
98
|
+
SPIDER_NAME = "tiktok.com"
|
|
99
|
+
category_url: str
|
|
100
|
+
|
|
101
|
+
@dataclass
|
|
102
|
+
class ShopByKeywords(ToolRequest):
|
|
103
|
+
"""TikTok Shop Information Scraper by Keywords"""
|
|
104
|
+
|
|
105
|
+
SPIDER_ID = "tiktok_shop_by-keywords"
|
|
106
|
+
SPIDER_NAME = "tiktok.com"
|
|
107
|
+
keyword: str
|
|
108
|
+
domain: str = "https://www.tiktok.com/shop"
|
|
53
109
|
page_turning: int | None = None
|
|
54
110
|
|
|
55
111
|
|
|
56
112
|
class Facebook:
|
|
113
|
+
@dataclass
|
|
114
|
+
class PostDetails(ToolRequest):
|
|
115
|
+
"""Facebook Post Details Scraper"""
|
|
116
|
+
|
|
117
|
+
SPIDER_ID = "facebook_post_by-posts-url"
|
|
118
|
+
SPIDER_NAME = "facebook.com"
|
|
119
|
+
url: str
|
|
120
|
+
|
|
57
121
|
@dataclass
|
|
58
122
|
class Posts(ToolRequest):
|
|
59
|
-
"""Facebook Posts Scraper"""
|
|
123
|
+
"""Facebook Posts Scraper by Keywords"""
|
|
60
124
|
|
|
61
125
|
SPIDER_ID = "facebook_post_by-keywords"
|
|
62
126
|
SPIDER_NAME = "facebook.com"
|
|
@@ -66,27 +130,70 @@ class Facebook:
|
|
|
66
130
|
number: int = 10
|
|
67
131
|
|
|
68
132
|
@dataclass
|
|
69
|
-
class
|
|
70
|
-
"""Facebook
|
|
133
|
+
class EventByEventListUrl(ToolRequest):
|
|
134
|
+
"""Facebook Events Scraper by Event List URL"""
|
|
71
135
|
|
|
72
|
-
SPIDER_ID = "
|
|
136
|
+
SPIDER_ID = "facebook_event_by-eventlist-url"
|
|
137
|
+
SPIDER_NAME = "facebook.com"
|
|
138
|
+
url: str
|
|
139
|
+
upcoming_events_only: str | None = None
|
|
140
|
+
|
|
141
|
+
@dataclass
|
|
142
|
+
class EventBySearchUrl(ToolRequest):
|
|
143
|
+
"""Facebook Events Scraper by Search URL"""
|
|
144
|
+
|
|
145
|
+
SPIDER_ID = "facebook_event_by-search-url"
|
|
146
|
+
SPIDER_NAME = "facebook.com"
|
|
147
|
+
url: str
|
|
148
|
+
|
|
149
|
+
@dataclass
|
|
150
|
+
class EventByEventsUrl(ToolRequest):
|
|
151
|
+
"""Facebook Events Scraper by Events URL"""
|
|
152
|
+
|
|
153
|
+
SPIDER_ID = "facebook_event_by-events-url"
|
|
154
|
+
SPIDER_NAME = "facebook.com"
|
|
155
|
+
url: str
|
|
156
|
+
|
|
157
|
+
@dataclass
|
|
158
|
+
class Profile(ToolRequest):
|
|
159
|
+
"""Facebook Profile Scraper"""
|
|
160
|
+
|
|
161
|
+
SPIDER_ID = "facebook_profile_by-profiles-url"
|
|
162
|
+
SPIDER_NAME = "facebook.com"
|
|
163
|
+
url: str
|
|
164
|
+
|
|
165
|
+
@dataclass
|
|
166
|
+
class Comment(ToolRequest):
|
|
167
|
+
"""Facebook Post Comments Scraper"""
|
|
168
|
+
|
|
169
|
+
SPIDER_ID = "facebook_comment_by-comments-url"
|
|
73
170
|
SPIDER_NAME = "facebook.com"
|
|
74
171
|
url: str
|
|
172
|
+
get_all_replies: str | None = None
|
|
173
|
+
limit_records: str | None = None
|
|
174
|
+
comments_sort: str | None = None # All comments
|
|
75
175
|
|
|
76
176
|
|
|
77
177
|
class Instagram:
|
|
78
178
|
@dataclass
|
|
79
179
|
class Profile(ToolRequest):
|
|
80
|
-
"""Instagram Profile Scraper"""
|
|
180
|
+
"""Instagram Profile Scraper by Username"""
|
|
81
181
|
|
|
82
182
|
SPIDER_ID = "ins_profiles_by-username"
|
|
83
183
|
SPIDER_NAME = "instagram.com"
|
|
84
184
|
username: str
|
|
85
|
-
|
|
185
|
+
|
|
186
|
+
@dataclass
|
|
187
|
+
class ProfileByUrl(ToolRequest):
|
|
188
|
+
"""Instagram Profile Scraper by Profile URL"""
|
|
189
|
+
|
|
190
|
+
SPIDER_ID = "ins_profiles_by-profileurl"
|
|
191
|
+
SPIDER_NAME = "instagram.com"
|
|
192
|
+
profileurl: str
|
|
86
193
|
|
|
87
194
|
@dataclass
|
|
88
195
|
class Post(ToolRequest):
|
|
89
|
-
"""Instagram Post Information Scraper"""
|
|
196
|
+
"""Instagram Post Information Scraper by Profile URL"""
|
|
90
197
|
|
|
91
198
|
SPIDER_ID = "ins_posts_by-profileurl"
|
|
92
199
|
SPIDER_NAME = "instagram.com"
|
|
@@ -96,14 +203,45 @@ class Instagram:
|
|
|
96
203
|
end_date: str | None = None
|
|
97
204
|
post_type: str | None = None # Post or Reel
|
|
98
205
|
|
|
206
|
+
@dataclass
|
|
207
|
+
class PostByUrl(ToolRequest):
|
|
208
|
+
"""Instagram Post Information Scraper by Post URL"""
|
|
209
|
+
|
|
210
|
+
SPIDER_ID = "ins_posts_by-posturl"
|
|
211
|
+
SPIDER_NAME = "instagram.com"
|
|
212
|
+
posturl: str
|
|
213
|
+
|
|
99
214
|
@dataclass
|
|
100
215
|
class Reel(ToolRequest):
|
|
101
|
-
"""Instagram Reel Information Scraper"""
|
|
216
|
+
"""Instagram Reel Information Scraper by URL"""
|
|
102
217
|
|
|
103
218
|
SPIDER_ID = "ins_reel_by-url"
|
|
104
219
|
SPIDER_NAME = "instagram.com"
|
|
105
220
|
url: str
|
|
221
|
+
|
|
222
|
+
@dataclass
|
|
223
|
+
class AllReel(ToolRequest):
|
|
224
|
+
"""Instagram All Reel Information Scraper by URL"""
|
|
225
|
+
|
|
226
|
+
SPIDER_ID = "ins_allreel_by-url"
|
|
227
|
+
SPIDER_NAME = "instagram.com"
|
|
228
|
+
url: str
|
|
229
|
+
num_of_posts: int | None = None
|
|
230
|
+
posts_to_not_include: str | None = None
|
|
231
|
+
start_date: str | None = None
|
|
232
|
+
end_date: str | None = None
|
|
233
|
+
|
|
234
|
+
@dataclass
|
|
235
|
+
class ReelByListUrl(ToolRequest):
|
|
236
|
+
"""Instagram Reel Information Scraper by List URL"""
|
|
237
|
+
|
|
238
|
+
SPIDER_ID = "ins_reel_by-listurl"
|
|
239
|
+
SPIDER_NAME = "instagram.com"
|
|
240
|
+
url: str
|
|
106
241
|
num_of_posts: int | None = None
|
|
242
|
+
posts_to_not_include: str | None = None
|
|
243
|
+
start_date: str | None = None
|
|
244
|
+
end_date: str | None = None
|
|
107
245
|
|
|
108
246
|
@dataclass
|
|
109
247
|
class Comment(ToolRequest):
|
|
@@ -117,30 +255,35 @@ class Instagram:
|
|
|
117
255
|
class Twitter:
|
|
118
256
|
@dataclass
|
|
119
257
|
class Profile(ToolRequest):
|
|
120
|
-
"""Twitter(X) Profile Scraper"""
|
|
258
|
+
"""Twitter(X) Profile Scraper by Profile URL"""
|
|
121
259
|
|
|
122
|
-
SPIDER_ID = "
|
|
123
|
-
SPIDER_NAME = "
|
|
260
|
+
SPIDER_ID = "twitter_profile_by-profileurl"
|
|
261
|
+
SPIDER_NAME = "x.com"
|
|
124
262
|
url: str
|
|
125
|
-
max_number_of_posts: int | None = None
|
|
126
|
-
user_name: str | None = None
|
|
127
263
|
|
|
128
264
|
@dataclass
|
|
129
|
-
class
|
|
130
|
-
"""
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
"""
|
|
265
|
+
class ProfileByUsername(ToolRequest):
|
|
266
|
+
"""Twitter(X) Profile Scraper by Username"""
|
|
267
|
+
|
|
268
|
+
SPIDER_ID = "twitter_profile_by-username"
|
|
269
|
+
SPIDER_NAME = "x.com"
|
|
270
|
+
user_name: str
|
|
136
271
|
|
|
137
|
-
|
|
138
|
-
|
|
272
|
+
@dataclass
|
|
273
|
+
class Post(ToolRequest):
|
|
274
|
+
"""Twitter(X) Post Information Scraper by Post URL"""
|
|
139
275
|
|
|
276
|
+
SPIDER_ID = "twitter_post_by-posturl"
|
|
277
|
+
SPIDER_NAME = "x.com"
|
|
140
278
|
url: str # Post URL (e.g. https://x.com/user/status/123)
|
|
141
279
|
|
|
142
|
-
|
|
143
|
-
|
|
280
|
+
@dataclass
|
|
281
|
+
class PostByProfileUrl(ToolRequest):
|
|
282
|
+
"""Twitter(X) Post Information Scraper by Profile URL"""
|
|
283
|
+
|
|
284
|
+
SPIDER_ID = "twitter_post_by-profileurl"
|
|
285
|
+
SPIDER_NAME = "x.com"
|
|
286
|
+
url: str # Profile URL
|
|
144
287
|
|
|
145
288
|
|
|
146
289
|
class LinkedIn:
|
|
@@ -154,30 +297,70 @@ class LinkedIn:
|
|
|
154
297
|
|
|
155
298
|
@dataclass
|
|
156
299
|
class Jobs(ToolRequest):
|
|
157
|
-
"""LinkedIn Job Listing Scraper"""
|
|
300
|
+
"""LinkedIn Job Listing Scraper by Job Listing URL"""
|
|
158
301
|
|
|
159
302
|
SPIDER_ID = "linkedin_job_listings_information_by-job-listing-url"
|
|
160
303
|
SPIDER_NAME = "linkedin.com"
|
|
161
304
|
job_listing_url: str
|
|
305
|
+
page_turning: int | None = None
|
|
306
|
+
|
|
307
|
+
@dataclass
|
|
308
|
+
class JobByUrl(ToolRequest):
|
|
309
|
+
"""LinkedIn Job Listing Scraper by Job URL"""
|
|
310
|
+
|
|
311
|
+
SPIDER_ID = "linkedin_job_listings_information_by-job-url"
|
|
312
|
+
SPIDER_NAME = "linkedin.com"
|
|
313
|
+
job_url: str
|
|
314
|
+
|
|
315
|
+
@dataclass
|
|
316
|
+
class JobByKeyword(ToolRequest):
|
|
317
|
+
"""LinkedIn Job Listing Scraper by Keyword"""
|
|
318
|
+
|
|
319
|
+
SPIDER_ID = "linkedin_job_listings_information_by-keyword"
|
|
320
|
+
SPIDER_NAME = "linkedin.com"
|
|
162
321
|
location: str
|
|
163
|
-
|
|
322
|
+
keyword: str
|
|
323
|
+
time_range: str | None = None
|
|
324
|
+
experience_level: str | None = None
|
|
325
|
+
job_type: str | None = None
|
|
326
|
+
remote: str | None = None
|
|
327
|
+
company: str | None = None
|
|
328
|
+
selective_search: str | None = None
|
|
329
|
+
jobs_to_not_include: str | None = None
|
|
330
|
+
location_radius: str | None = None
|
|
164
331
|
page_turning: int | None = None
|
|
165
|
-
keyword: str | None = None
|
|
166
|
-
remote: str | None = None # On_site, Remote, Hybrid
|
|
167
332
|
|
|
168
333
|
|
|
169
334
|
class Reddit:
|
|
170
335
|
@dataclass
|
|
171
336
|
class Posts(ToolRequest):
|
|
172
|
-
"""Reddit Post Information Scraper"""
|
|
337
|
+
"""Reddit Post Information Scraper by URL"""
|
|
173
338
|
|
|
174
339
|
SPIDER_ID = "reddit_posts_by-url"
|
|
175
340
|
SPIDER_NAME = "reddit.com"
|
|
176
341
|
url: str
|
|
177
|
-
|
|
178
|
-
|
|
342
|
+
|
|
343
|
+
@dataclass
|
|
344
|
+
class PostsByKeywords(ToolRequest):
|
|
345
|
+
"""Reddit Post Information Scraper by Keywords"""
|
|
346
|
+
|
|
347
|
+
SPIDER_ID = "reddit_posts_by-keywords"
|
|
348
|
+
SPIDER_NAME = "reddit.com"
|
|
349
|
+
keyword: str
|
|
350
|
+
date: str | None = None # All time
|
|
351
|
+
num_of_posts: int | None = None
|
|
352
|
+
sort_by: str | None = None
|
|
353
|
+
|
|
354
|
+
@dataclass
|
|
355
|
+
class PostsBySubredditUrl(ToolRequest):
|
|
356
|
+
"""Reddit Post Information Scraper by Subreddit URL"""
|
|
357
|
+
|
|
358
|
+
SPIDER_ID = "reddit_posts_by-subredditurl"
|
|
359
|
+
SPIDER_NAME = "reddit.com"
|
|
360
|
+
url: str
|
|
361
|
+
sort_by: str | None = None
|
|
179
362
|
num_of_posts: int | None = None
|
|
180
|
-
|
|
363
|
+
sort_by_time: str | None = None # All Time
|
|
181
364
|
|
|
182
365
|
@dataclass
|
|
183
366
|
class Comment(ToolRequest):
|
|
@@ -187,4 +370,5 @@ class Reddit:
|
|
|
187
370
|
SPIDER_NAME = "reddit.com"
|
|
188
371
|
url: str
|
|
189
372
|
days_back: int | None = None
|
|
190
|
-
load_all_replies:
|
|
373
|
+
load_all_replies: str | None = None
|
|
374
|
+
comment_limit: int | None = None
|