yad2-scraper 0.4.0__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {yad2_scraper-0.4.0 → yad2_scraper-0.5.0}/PKG-INFO +2 -3
- {yad2_scraper-0.4.0 → yad2_scraper-0.5.0}/pyproject.toml +2 -2
- yad2_scraper-0.5.0/yad2_scraper/next_data.py +50 -0
- {yad2_scraper-0.4.0 → yad2_scraper-0.5.0}/yad2_scraper/query.py +2 -2
- {yad2_scraper-0.4.0 → yad2_scraper-0.5.0}/yad2_scraper/utils.py +16 -1
- yad2_scraper-0.5.0/yad2_scraper/vehicles/__init__.py +5 -0
- yad2_scraper-0.5.0/yad2_scraper/vehicles/category.py +15 -0
- yad2_scraper-0.5.0/yad2_scraper/vehicles/next_data.py +318 -0
- yad2_scraper-0.5.0/yad2_scraper/vehicles/query.py +25 -0
- yad2_scraper-0.5.0/yad2_scraper/vehicles/tag.py +63 -0
- yad2_scraper-0.5.0/yad2_scraper/vehicles/urls.py +16 -0
- yad2_scraper-0.4.0/yad2_scraper/next_data.py +0 -27
- {yad2_scraper-0.4.0 → yad2_scraper-0.5.0}/LICENSE +0 -0
- {yad2_scraper-0.4.0 → yad2_scraper-0.5.0}/README.md +0 -0
- {yad2_scraper-0.4.0 → yad2_scraper-0.5.0}/yad2_scraper/__init__.py +0 -0
- {yad2_scraper-0.4.0 → yad2_scraper-0.5.0}/yad2_scraper/category.py +0 -0
- {yad2_scraper-0.4.0 → yad2_scraper-0.5.0}/yad2_scraper/constants.py +0 -0
- {yad2_scraper-0.4.0 → yad2_scraper-0.5.0}/yad2_scraper/exceptions.py +0 -0
- {yad2_scraper-0.4.0 → yad2_scraper-0.5.0}/yad2_scraper/scraper.py +0 -0
@@ -1,14 +1,13 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: yad2-scraper
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.5.0
|
4
4
|
Summary: Scrape Yad2 in Python.
|
5
5
|
License: LICENSE
|
6
6
|
Author: dav ost
|
7
7
|
Author-email: davidost2003@gmail.com
|
8
|
-
Requires-Python: >=3.
|
8
|
+
Requires-Python: >=3.8
|
9
9
|
Classifier: License :: Other/Proprietary License
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
11
|
-
Classifier: Programming Language :: Python :: 3.7
|
12
11
|
Classifier: Programming Language :: Python :: 3.8
|
13
12
|
Classifier: Programming Language :: Python :: 3.9
|
14
13
|
Classifier: Programming Language :: Python :: 3.10
|
@@ -1,13 +1,13 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "yad2-scraper"
|
3
|
-
version = "0.
|
3
|
+
version = "0.5.0"
|
4
4
|
description = "Scrape Yad2 in Python."
|
5
5
|
authors = ["dav ost <davidost2003@gmail.com>"]
|
6
6
|
license = "LICENSE"
|
7
7
|
readme = "README.md"
|
8
8
|
|
9
9
|
[tool.poetry.dependencies]
|
10
|
-
python = ">=3.
|
10
|
+
python = ">=3.8"
|
11
11
|
httpx = "^0.24.0"
|
12
12
|
httpcore = ">=0.15.0"
|
13
13
|
fake-useragent = "^0.1.11"
|
@@ -0,0 +1,50 @@
|
|
1
|
+
from datetime import datetime
|
2
|
+
from enum import Enum
|
3
|
+
from typing import List, Union
|
4
|
+
|
5
|
+
from yad2_scraper.utils import safe_access
|
6
|
+
|
7
|
+
FieldTypes = Union[str, int]
|
8
|
+
|
9
|
+
safe_access_optional_keys = safe_access(exceptions=(KeyError, TypeError), default=None)
|
10
|
+
|
11
|
+
|
12
|
+
class SafeAccessOptionalKeysMeta(type):
|
13
|
+
def __new__(cls, name, bases, dictionary):
|
14
|
+
for attr_name, attr_value in dictionary.items():
|
15
|
+
if callable(attr_value): # Wrap methods
|
16
|
+
dictionary[attr_name] = safe_access_optional_keys(attr_value)
|
17
|
+
elif isinstance(attr_value, property): # Wrap properties
|
18
|
+
dictionary[attr_name] = property(
|
19
|
+
safe_access_optional_keys(attr_value.fget) if attr_value.fget else None,
|
20
|
+
safe_access_optional_keys(attr_value.fset) if attr_value.fset else None,
|
21
|
+
safe_access_optional_keys(attr_value.fdel) if attr_value.fdel else None,
|
22
|
+
attr_value.__doc__,
|
23
|
+
)
|
24
|
+
return super().__new__(cls, name, bases, dictionary)
|
25
|
+
|
26
|
+
|
27
|
+
class Field(str, Enum):
|
28
|
+
ID = "id"
|
29
|
+
TEXT = "text"
|
30
|
+
ENGLISH_TEXT = "textEng"
|
31
|
+
|
32
|
+
|
33
|
+
def convert_string_date_to_datetime(date_string: str) -> datetime:
|
34
|
+
return datetime.fromisoformat(date_string)
|
35
|
+
|
36
|
+
|
37
|
+
class NextData:
|
38
|
+
def __init__(self, data: dict):
|
39
|
+
self.data = data
|
40
|
+
|
41
|
+
@property
|
42
|
+
def json(self) -> dict:
|
43
|
+
return self.data
|
44
|
+
|
45
|
+
@property
|
46
|
+
def queries(self) -> List[dict]:
|
47
|
+
return self.data["props"]["pageProps"]["dehydratedState"]["queries"]
|
48
|
+
|
49
|
+
def __getitem__(self, item):
|
50
|
+
return self.data[item]
|
@@ -2,7 +2,7 @@ from pydantic import BaseModel
|
|
2
2
|
from enum import Enum
|
3
3
|
from typing import Optional, Tuple
|
4
4
|
|
5
|
-
|
5
|
+
NumberRange = Tuple[int, int]
|
6
6
|
|
7
7
|
|
8
8
|
class OrderBy(int, Enum):
|
@@ -27,7 +27,7 @@ def format_number_range(number_range: Optional[Tuple[int, int]]) -> Optional[str
|
|
27
27
|
class QueryFilters(BaseModel):
|
28
28
|
page: Optional[int] = None
|
29
29
|
order_by: Optional[OrderBy] = None
|
30
|
-
price_range: Optional[
|
30
|
+
price_range: Optional[NumberRange] = None
|
31
31
|
...
|
32
32
|
|
33
33
|
def to_params(self) -> dict:
|
@@ -1,5 +1,6 @@
|
|
1
|
+
import functools
|
1
2
|
from bs4 import BeautifulSoup, Tag
|
2
|
-
from typing import Union, List
|
3
|
+
from typing import Union, List, Tuple, Any
|
3
4
|
|
4
5
|
|
5
6
|
def join_url(url: str, path: str) -> str:
|
@@ -19,3 +20,17 @@ def find_html_tag_by_class_substring(e: Union[BeautifulSoup, Tag], tag_name: str
|
|
19
20
|
|
20
21
|
def find_all_html_tags_by_class_substring(e: Union[BeautifulSoup, Tag], tag_name: str, substring: str) -> List[Tag]:
|
21
22
|
return e.find_all(tag_name, class_=lambda class_name: class_name and substring in class_name)
|
23
|
+
|
24
|
+
|
25
|
+
def safe_access(exceptions: Tuple = (), default: Any = None):
|
26
|
+
def decorator(func):
|
27
|
+
@functools.wraps(func)
|
28
|
+
def wrapper(*args, **kwargs):
|
29
|
+
try:
|
30
|
+
return func(*args, **kwargs)
|
31
|
+
except exceptions:
|
32
|
+
return default
|
33
|
+
|
34
|
+
return wrapper
|
35
|
+
|
36
|
+
return decorator
|
@@ -0,0 +1,15 @@
|
|
1
|
+
from typing import List, Optional
|
2
|
+
|
3
|
+
from yad2_scraper.category import Yad2Category
|
4
|
+
from yad2_scraper.vehicles.tag import VehicleTag
|
5
|
+
from yad2_scraper.vehicles.next_data import VehiclesNextData
|
6
|
+
|
7
|
+
|
8
|
+
class Yad2VehiclesCategory(Yad2Category):
|
9
|
+
def get_vehicle_tags(self) -> List[VehicleTag]:
|
10
|
+
tags = self.find_all_tags_by_class_substring("div", "feedItemBox")
|
11
|
+
return [VehicleTag(tag) for tag in tags]
|
12
|
+
|
13
|
+
def load_next_data(self) -> Optional[VehiclesNextData]:
|
14
|
+
next_data = super().load_next_data()
|
15
|
+
return VehiclesNextData(next_data) if next_data else None
|
@@ -0,0 +1,318 @@
|
|
1
|
+
import itertools
|
2
|
+
from datetime import datetime
|
3
|
+
from typing import List, Any, Iterator, Optional
|
4
|
+
|
5
|
+
from yad2_scraper.next_data import (
|
6
|
+
SafeAccessOptionalKeysMeta,
|
7
|
+
NextData,
|
8
|
+
Field,
|
9
|
+
FieldTypes,
|
10
|
+
convert_string_date_to_datetime
|
11
|
+
)
|
12
|
+
from yad2_scraper.utils import join_url
|
13
|
+
from yad2_scraper.vehicles.urls import VEHICLES_URL
|
14
|
+
|
15
|
+
|
16
|
+
class VehicleData(metaclass=SafeAccessOptionalKeysMeta):
|
17
|
+
def __init__(self, data: dict):
|
18
|
+
self.data = data
|
19
|
+
|
20
|
+
@property
|
21
|
+
def token(self) -> str:
|
22
|
+
return self["token"]
|
23
|
+
|
24
|
+
@property
|
25
|
+
def page_link(self) -> str:
|
26
|
+
return join_url(VEHICLES_URL, f"item/{self.token}")
|
27
|
+
|
28
|
+
@property
|
29
|
+
def price(self) -> int:
|
30
|
+
return self["price"]
|
31
|
+
|
32
|
+
@property
|
33
|
+
def customer(self) -> dict:
|
34
|
+
return self["customer"]
|
35
|
+
|
36
|
+
@property
|
37
|
+
def customer_name(self) -> str:
|
38
|
+
return self.customer["name"]
|
39
|
+
|
40
|
+
@property
|
41
|
+
def customer_phone(self) -> str:
|
42
|
+
return self.customer["phone"]
|
43
|
+
|
44
|
+
@property
|
45
|
+
def address(self) -> dict:
|
46
|
+
return self["address"]
|
47
|
+
|
48
|
+
def top_area(self, field: Field = Field.TEXT) -> Optional[FieldTypes]:
|
49
|
+
return self["address"]["topArea"][field]
|
50
|
+
|
51
|
+
def area(self, field: Field = Field.TEXT) -> Optional[FieldTypes]:
|
52
|
+
return self["address"]["area"][field]
|
53
|
+
|
54
|
+
def city(self, field: Field = Field.TEXT) -> Optional[FieldTypes]:
|
55
|
+
return self["address"]["city"][field]
|
56
|
+
|
57
|
+
@property
|
58
|
+
def metadata(self) -> dict:
|
59
|
+
return self["metaData"]
|
60
|
+
|
61
|
+
@property
|
62
|
+
def video(self) -> str:
|
63
|
+
return self.metadata["video"]
|
64
|
+
|
65
|
+
@property
|
66
|
+
def cover_image(self) -> str:
|
67
|
+
return self.metadata["coverImage"]
|
68
|
+
|
69
|
+
@property
|
70
|
+
def images(self) -> str:
|
71
|
+
return self.metadata["images"]
|
72
|
+
|
73
|
+
@property
|
74
|
+
def description(self) -> str:
|
75
|
+
return self.metadata["description"]
|
76
|
+
|
77
|
+
@property
|
78
|
+
def dates(self) -> dict:
|
79
|
+
return self["dates"]
|
80
|
+
|
81
|
+
@property
|
82
|
+
def updated_at(self) -> datetime:
|
83
|
+
return convert_string_date_to_datetime(self.dates["updatedAt"])
|
84
|
+
|
85
|
+
@property
|
86
|
+
def created_at(self) -> datetime:
|
87
|
+
return convert_string_date_to_datetime(self.dates["createdAt"])
|
88
|
+
|
89
|
+
@property
|
90
|
+
def ends_at(self) -> datetime:
|
91
|
+
return convert_string_date_to_datetime(self.dates["endsAt"])
|
92
|
+
|
93
|
+
@property
|
94
|
+
def rebounced_at(self) -> datetime:
|
95
|
+
return convert_string_date_to_datetime(self.dates["rebouncedAt"])
|
96
|
+
|
97
|
+
def manufacturer(self, field: Field = Field.TEXT) -> Optional[FieldTypes]:
|
98
|
+
return self["manufacturer"][field]
|
99
|
+
|
100
|
+
def color(self, field: Field = Field.TEXT) -> Optional[FieldTypes]:
|
101
|
+
return self["color"][field]
|
102
|
+
|
103
|
+
@property
|
104
|
+
def km(self) -> Optional[int]:
|
105
|
+
return self["km"]
|
106
|
+
|
107
|
+
@property
|
108
|
+
def hand(self, field: Field = Field.ID) -> Optional[FieldTypes]:
|
109
|
+
return self["hand"][field]
|
110
|
+
|
111
|
+
@property
|
112
|
+
def engine_volume(self) -> Optional[int]:
|
113
|
+
return self["engineVolume"]
|
114
|
+
|
115
|
+
@property
|
116
|
+
def horse_power(self) -> Optional[int]:
|
117
|
+
return self["horsePower"]
|
118
|
+
|
119
|
+
@property
|
120
|
+
def previous_owner(self, field: Field = Field.TEXT) -> Optional[FieldTypes]:
|
121
|
+
return self["previousOwner"][field]
|
122
|
+
|
123
|
+
@property
|
124
|
+
def above_price(self) -> Optional[int]:
|
125
|
+
return self["abovePrice"]
|
126
|
+
|
127
|
+
@property
|
128
|
+
def tags(self) -> List[dict]:
|
129
|
+
return self["tags"]
|
130
|
+
|
131
|
+
@property
|
132
|
+
def is_contact_lead_supported(self) -> Optional[bool]:
|
133
|
+
return self["isContactLeadSupported"]
|
134
|
+
|
135
|
+
@property
|
136
|
+
def vehicle_dates(self) -> dict:
|
137
|
+
return self["vehicleDates"]
|
138
|
+
|
139
|
+
@property
|
140
|
+
def year_of_production(self) -> Optional[int]:
|
141
|
+
return self.vehicle_dates["yearOfProduction"]
|
142
|
+
|
143
|
+
@property
|
144
|
+
def month_of_production(self) -> Optional[int]:
|
145
|
+
return self.vehicle_dates["monthOfProduction"]["id"]
|
146
|
+
|
147
|
+
@property
|
148
|
+
def test_date(self) -> Optional[datetime]:
|
149
|
+
return convert_string_date_to_datetime(self.vehicle_dates["testDate"])
|
150
|
+
|
151
|
+
def model(self, field: Field = Field.TEXT) -> Optional[FieldTypes]:
|
152
|
+
return self["model"][field]
|
153
|
+
|
154
|
+
@property
|
155
|
+
def sub_model(self) -> Optional[str]:
|
156
|
+
return self["subModel"]
|
157
|
+
|
158
|
+
def gear_box(self, field: Field = Field.TEXT) -> Optional[FieldTypes]:
|
159
|
+
return self["gearBox"][field]
|
160
|
+
|
161
|
+
def car_family_types(self, field: Field = Field.TEXT) -> Optional[List[FieldTypes]]:
|
162
|
+
return [obj[field] for obj in self["carFamilyType"]]
|
163
|
+
|
164
|
+
def engine_type(self, field: Field = Field.TEXT) -> Optional[FieldTypes]:
|
165
|
+
return self["engineType"][field]
|
166
|
+
|
167
|
+
@property
|
168
|
+
def seats(self) -> Optional[int]:
|
169
|
+
return self["seats"]
|
170
|
+
|
171
|
+
@property
|
172
|
+
def number_of_doors(self) -> Optional[int]:
|
173
|
+
return self["numberOfDoors"]
|
174
|
+
|
175
|
+
@property
|
176
|
+
def owner(self) -> Optional[str]:
|
177
|
+
return self["owner"]["text"]
|
178
|
+
|
179
|
+
@property
|
180
|
+
def body_type(self) -> Optional[str]:
|
181
|
+
return self["bodyType"]["text"]
|
182
|
+
|
183
|
+
@property
|
184
|
+
def combined_fuel_consumption(self) -> Optional[float]:
|
185
|
+
return self["combinedFuelConsumption"]
|
186
|
+
|
187
|
+
@property
|
188
|
+
def power_train_architecture(self) -> Optional[str]:
|
189
|
+
return self["powertrainArchitecture"]
|
190
|
+
|
191
|
+
def car_tags(self, field: Field = Field.TEXT) -> Optional[List[FieldTypes]]:
|
192
|
+
return [obj[field] for obj in self["carTag"]]
|
193
|
+
|
194
|
+
@property
|
195
|
+
def specification(self) -> dict:
|
196
|
+
return self["specification"]
|
197
|
+
|
198
|
+
@property
|
199
|
+
def has_air_conditioner(self) -> Optional[bool]:
|
200
|
+
return self.specification["airConditioner"]
|
201
|
+
|
202
|
+
@property
|
203
|
+
def has_power_steering(self) -> Optional[bool]:
|
204
|
+
return self.specification["powerSteering"]
|
205
|
+
|
206
|
+
@property
|
207
|
+
def has_magnesium_wheel(self) -> Optional[bool]:
|
208
|
+
return self.specification["magnesiumWheel"]
|
209
|
+
|
210
|
+
@property
|
211
|
+
def has_tire_pressure_monitoring_system(self) -> Optional[bool]:
|
212
|
+
return self.specification["tirePressureMonitoringSystem"]
|
213
|
+
|
214
|
+
@property
|
215
|
+
def has_abs(self) -> Optional[bool]:
|
216
|
+
return self.specification["abs"]
|
217
|
+
|
218
|
+
@property
|
219
|
+
def air_bags(self) -> Optional[int]:
|
220
|
+
return self.specification["airBags"]
|
221
|
+
|
222
|
+
@property
|
223
|
+
def has_control_stability(self) -> Optional[bool]:
|
224
|
+
return self.specification["controlStability"]
|
225
|
+
|
226
|
+
@property
|
227
|
+
def has_electric_window(self) -> Optional[int]:
|
228
|
+
return self.specification["electricWindow"]
|
229
|
+
|
230
|
+
@property
|
231
|
+
def has_breaking_assist_system(self) -> Optional[bool]:
|
232
|
+
return self.specification["breakingAssistSystem"]
|
233
|
+
|
234
|
+
@property
|
235
|
+
def has_reverse_camera(self) -> Optional[bool]:
|
236
|
+
return self.specification["reverseCamera"]
|
237
|
+
|
238
|
+
@property
|
239
|
+
def has_adaptive_cruise_control(self) -> Optional[bool]:
|
240
|
+
return self.specification["adaptiveCruiseControl"]
|
241
|
+
|
242
|
+
@property
|
243
|
+
def has_high_beams_auto_control(self) -> Optional[bool]:
|
244
|
+
return self.specification["highBeamsAutoControl"]
|
245
|
+
|
246
|
+
@property
|
247
|
+
def has_blind_spot_assist(self) -> Optional[bool]:
|
248
|
+
return self.specification["blindSpotAssist"]
|
249
|
+
|
250
|
+
@property
|
251
|
+
def has_identify_pedestrians(self) -> Optional[bool]:
|
252
|
+
return self.specification["identifyPedestrians"]
|
253
|
+
|
254
|
+
@property
|
255
|
+
def has_seat_belts_sensors(self) -> Optional[bool]:
|
256
|
+
return self.specification["seatBeltsSensors"]
|
257
|
+
|
258
|
+
@property
|
259
|
+
def has_identifying_dangerous_nearing(self) -> Optional[bool]:
|
260
|
+
return self.specification["identifyingDangerousNearing"]
|
261
|
+
|
262
|
+
@property
|
263
|
+
def has_auto_lighting_in_forward(self) -> Optional[bool]:
|
264
|
+
return self.specification["autoLightingInForward"]
|
265
|
+
|
266
|
+
@property
|
267
|
+
def has_identify_traffic_signs(self) -> Optional[bool]:
|
268
|
+
return self.specification["identifyTrafficSigns"]
|
269
|
+
|
270
|
+
def ignition(self, field: Field = Field.TEXT) -> Optional[FieldTypes]:
|
271
|
+
return self.specification["ignition"][field]
|
272
|
+
|
273
|
+
@property
|
274
|
+
def safety_points(self) -> Optional[int]:
|
275
|
+
return self.specification["safetyPoints"]
|
276
|
+
|
277
|
+
@property
|
278
|
+
def is_handicapped_friendly(self) -> Optional[bool]:
|
279
|
+
return self.specification["isHandicappedFriendly"]
|
280
|
+
|
281
|
+
@property
|
282
|
+
def has_sun_roof(self) -> Optional[bool]:
|
283
|
+
return self.specification["sunRoof"]
|
284
|
+
|
285
|
+
@property
|
286
|
+
def is_turbo(self) -> Optional[bool]:
|
287
|
+
return self.specification["isTurbo"]
|
288
|
+
|
289
|
+
@property
|
290
|
+
def has_road_deviation_control(self) -> Optional[bool]:
|
291
|
+
return self.specification["roadDeviationControl"]
|
292
|
+
|
293
|
+
@property
|
294
|
+
def has_forward_distance_monitor(self) -> Optional[bool]:
|
295
|
+
return self.specification["forwardDistanceMonitor"]
|
296
|
+
|
297
|
+
@property
|
298
|
+
def has_box(self) -> Optional[bool]:
|
299
|
+
return self.specification["box"]
|
300
|
+
|
301
|
+
def __getitem__(self, key: str) -> Any:
|
302
|
+
return self.data[key]
|
303
|
+
|
304
|
+
|
305
|
+
class VehiclesNextData(NextData):
|
306
|
+
def iterate_vehicles(self) -> Iterator[VehicleData]:
|
307
|
+
for query in self.queries:
|
308
|
+
data = query["state"].get("data")
|
309
|
+
|
310
|
+
if not data or isinstance(data, list):
|
311
|
+
continue
|
312
|
+
|
313
|
+
for vehicle_data in itertools.chain.from_iterable(data.values()):
|
314
|
+
if isinstance(vehicle_data, dict):
|
315
|
+
yield VehicleData(vehicle_data)
|
316
|
+
|
317
|
+
def __getitem__(self, item):
|
318
|
+
return self.data[item]
|
@@ -0,0 +1,25 @@
|
|
1
|
+
from enum import Enum
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
from yad2_scraper.query import QueryFilters, OrderBy, NumberRange, format_number_range
|
5
|
+
|
6
|
+
|
7
|
+
class OrderVehiclesBy(int, Enum):
|
8
|
+
DATE = OrderBy.DATE
|
9
|
+
PRICE_LOWEST_TO_HIGHEST = OrderBy.PRICE_LOWEST_TO_HIGHEST
|
10
|
+
PRICE_HIGHEST_TO_LOWEST = OrderBy.PRICE_HIGHEST_TO_LOWEST
|
11
|
+
DISTANCE_LOWEST_TO_HIGHEST = 5
|
12
|
+
YEAR_HIGHEST_TO_LOWEST = 6
|
13
|
+
|
14
|
+
|
15
|
+
class VehiclesQueryFilters(QueryFilters):
|
16
|
+
year_range: Optional[NumberRange] = None
|
17
|
+
...
|
18
|
+
|
19
|
+
def to_params(self) -> dict:
|
20
|
+
return {
|
21
|
+
**super().to_params(),
|
22
|
+
"year": format_number_range(self.year_range)
|
23
|
+
}
|
24
|
+
|
25
|
+
# TODO: add QueryParams class for each vehicle type (some share the same attributes - sometimes with different enums)
|
@@ -0,0 +1,63 @@
|
|
1
|
+
from functools import cached_property
|
2
|
+
from bs4 import Tag
|
3
|
+
from typing import Optional
|
4
|
+
|
5
|
+
from yad2_scraper.utils import join_url, find_html_tag_by_class_substring
|
6
|
+
from yad2_scraper.vehicles.urls import VEHICLES_URL
|
7
|
+
|
8
|
+
YEAR_AND_HAND_TAG_SEPARATOR = " • "
|
9
|
+
|
10
|
+
|
11
|
+
class VehicleTag:
|
12
|
+
def __init__(self, tag: Tag):
|
13
|
+
self.tag = tag
|
14
|
+
|
15
|
+
@cached_property
|
16
|
+
def relative_link(self) -> str:
|
17
|
+
return self.find_tag_by_class_substring("a", "itemLink")["href"]
|
18
|
+
|
19
|
+
@property
|
20
|
+
def page_link(self) -> str:
|
21
|
+
return join_url(VEHICLES_URL, self.relative_link)
|
22
|
+
|
23
|
+
@cached_property
|
24
|
+
def image_url(self) -> str:
|
25
|
+
return self.find_tag_by_class_substring("img", "image")["src"]
|
26
|
+
|
27
|
+
@cached_property
|
28
|
+
def model(self) -> str:
|
29
|
+
return self.find_tag_by_class_substring("span", "heading").text.strip()
|
30
|
+
|
31
|
+
@cached_property
|
32
|
+
def marketing_text(self) -> str:
|
33
|
+
return self.find_tag_by_class_substring("span", "marketingText").text.strip()
|
34
|
+
|
35
|
+
@cached_property
|
36
|
+
def year_and_hand_string(self) -> str:
|
37
|
+
return self.find_tag_by_class_substring("span", "yearAndHand").text.strip()
|
38
|
+
|
39
|
+
@property
|
40
|
+
def year(self) -> int:
|
41
|
+
year, _ = self.year_and_hand_string.split(YEAR_AND_HAND_TAG_SEPARATOR)
|
42
|
+
return int(year)
|
43
|
+
|
44
|
+
@property
|
45
|
+
def hand(self) -> int:
|
46
|
+
_, hand_string = self.year_and_hand_string.split(YEAR_AND_HAND_TAG_SEPARATOR)
|
47
|
+
_, hand = hand_string.split()
|
48
|
+
return int(hand)
|
49
|
+
|
50
|
+
@cached_property
|
51
|
+
def price_string(self) -> str:
|
52
|
+
return self.find_tag_by_class_substring("span", "price").text.strip()
|
53
|
+
|
54
|
+
@property
|
55
|
+
def price(self) -> Optional[int]:
|
56
|
+
try:
|
57
|
+
price, _ = self.price_string.split()
|
58
|
+
return int(price.replace(",", ""))
|
59
|
+
except ValueError:
|
60
|
+
return None
|
61
|
+
|
62
|
+
def find_tag_by_class_substring(self, tag_name: str, substring: str) -> Tag:
|
63
|
+
return find_html_tag_by_class_substring(self.tag, tag_name, substring)
|
@@ -0,0 +1,16 @@
|
|
1
|
+
from typing import Literal, get_args
|
2
|
+
|
3
|
+
from yad2_scraper.utils import join_url
|
4
|
+
from yad2_scraper.constants import BASE_URL
|
5
|
+
|
6
|
+
VEHICLES_URL = join_url(BASE_URL, "vehicles")
|
7
|
+
|
8
|
+
VehicleType = Literal["cars", "motorcycles", "scooters", "trucks", "watercraft", "others"]
|
9
|
+
|
10
|
+
_VALID_VEHICLE_TYPES = get_args(VehicleType)
|
11
|
+
|
12
|
+
|
13
|
+
def get_vehicle_url(vehicle_type: VehicleType) -> str:
|
14
|
+
if vehicle_type not in _VALID_VEHICLE_TYPES:
|
15
|
+
raise ValueError(f"Invalid vehicle type: {repr(vehicle_type)}. Expected one of {_VALID_VEHICLE_TYPES}")
|
16
|
+
return join_url(VEHICLES_URL, vehicle_type)
|
@@ -1,27 +0,0 @@
|
|
1
|
-
from enum import Enum
|
2
|
-
from typing import List, Union
|
3
|
-
|
4
|
-
|
5
|
-
class Field(str, Enum):
|
6
|
-
ID = "id"
|
7
|
-
TEXT = "text"
|
8
|
-
ENGLISH_TEXT = "textEng"
|
9
|
-
|
10
|
-
|
11
|
-
FieldTypes = Union[str, int]
|
12
|
-
|
13
|
-
|
14
|
-
class NextData:
|
15
|
-
def __init__(self, data: dict):
|
16
|
-
self.data = data
|
17
|
-
|
18
|
-
@property
|
19
|
-
def json(self) -> dict:
|
20
|
-
return self.data
|
21
|
-
|
22
|
-
@property
|
23
|
-
def queries(self) -> List[dict]:
|
24
|
-
return self.data["props"]["pageProps"]["dehydratedState"]["queries"]
|
25
|
-
|
26
|
-
def __getitem__(self, item):
|
27
|
-
return self.data[item]
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|