PyPI - yad2-scraper - Versions diffs - 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

yad2-scraper 0.5.0py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

yad2_scraper/__init__.py +98 -3
yad2_scraper/category.py +6 -0
yad2_scraper/exceptions.py +11 -0
yad2_scraper/next_data.py +14 -5
yad2_scraper/query.py +5 -3
yad2_scraper/scraper.py +130 -1
yad2_scraper/utils.py +9 -8
yad2_scraper/vehicles/__init__.py +1 -1
yad2_scraper/vehicles/category.py +2 -0
yad2_scraper/vehicles/next_data.py +2 -0
yad2_scraper/vehicles/query.py +3 -2
yad2_scraper/vehicles/tag.py +2 -0
yad2_scraper/vehicles/urls.py +10 -6
yad2_scraper-0.5.1.dist-info/METADATA +164 -0
yad2_scraper-0.5.1.dist-info/RECORD +18 -0
{yad2_scraper-0.5.0.dist-info → yad2_scraper-0.5.1.dist-info}/WHEEL +1 -1
yad2_scraper-0.5.0.dist-info/METADATA +0 -26
yad2_scraper-0.5.0.dist-info/RECORD +0 -18
{yad2_scraper-0.5.0.dist-info → yad2_scraper-0.5.1.dist-info}/LICENSE +0 -0

yad2_scraper/__init__.py CHANGED Viewed

@@ -1,4 +1,99 @@
-from .scraper import Yad2Scraper
-from .query import QueryFilters, OrderBy
+from typing import Optional, Type
+from .scraper import Yad2Scraper, Category
+from .query import QueryFilters, OrderBy, NumberRange
 from .category import Yad2Category
-from .next_data import NextData
+from .next_data import NextData, Field
+from .utils import any_param_specified
+from .vehicles import (
+    Yad2VehiclesCategory,
+    VehiclesQueryFilters,
+    OrderVehiclesBy,
+    VehicleCategory,
+    get_vehicle_category_url
+)
+_default_scraper = None
+def get_default_scraper() -> Yad2Scraper:
+    """
+    Retrieves the default instance of the Yad2Scraper. If an instance does not already exist, it will be created.
+    Returns:
+        Yad2Scraper: The default instance of the Yad2Scraper.
+    Notes:
+        The default scraper is a singleton instance that is reused across multiple calls.
+    """
+    global _default_scraper
+    if not _default_scraper:
+        _default_scraper = Yad2Scraper()
+    return _default_scraper
+def fetch_category(
+        url: str,
+        category_type: Type[Category] = Yad2Category,
+        page: Optional[int] = None,
+        order_by: Optional[OrderBy] = None,
+        price_range: [NumberRange] = None
+) -> Category:
+    """
+    Fetches a specific category from the given URL, while applying optional filters.
+    Args:
+        url (str): The URL of the category to fetch.
+        category_type (Type[Category], optional): The type of category to return (default is `Yad2Category`).
+        page (Optional[int], optional): The page number for pagination (default is None).
+        order_by (Optional[OrderBy], optional): The sorting order for the results (default is None).
+        price_range (Optional[List[NumberRange]], optional): The price range filter for the results (default is None).
+    Returns:
+        Category: An instance of the specified `category_type`, populated with the fetched data.
+    Notes:
+        This method uses the default scraper to retrieve the category.
+    """
+    if any_param_specified(page, order_by, price_range):
+        params = QueryFilters(page=page, order_by=order_by, price_range=price_range)
+    else:
+        params = None
+    default_scraper = get_default_scraper()
+    return default_scraper.fetch_category(url, category_type, params=params)
+def fetch_vehicle_category(
+        vehicle_category: VehicleCategory,
+        page: Optional[int] = None,
+        order_by: Optional[OrderVehiclesBy] = None,
+        price_range: [NumberRange] = None,
+        year_range: [NumberRange] = None
+) -> Yad2VehiclesCategory:
+    """
+    Fetches a specific vehicle category, while applying optional filters.
+    Args:
+        vehicle_category (VehicleCategory): The vehicle category to fetch.
+        page (Optional[int], optional): The page number for pagination (default is None).
+        order_by (Optional[OrderVehiclesBy], optional): The sorting order for the results (default is None).
+        price_range (Optional[List[NumberRange]], optional): The price range filter for the results (default is None).
+        year_range (Optional[List[NumberRange]], optional): The year range filter for the results (default is None).
+    Returns:
+        Yad2VehiclesCategory: An instance of `Yad2VehiclesCategory`, populated with the fetched vehicle category data.
+    Notes:
+        This method uses the default scraper to fetch the vehicle category.
+    """
+    if any_param_specified(page, order_by, price_range, year_range):
+        params = VehiclesQueryFilters(page=page, order_by=order_by, price_range=price_range, year_range=year_range)
+    else:
+        params = None
+    url = get_vehicle_category_url(vehicle_category)
+    default_scraper = get_default_scraper()
+    return default_scraper.fetch_category(url, Yad2VehiclesCategory, params=params)

yad2_scraper/category.py CHANGED Viewed

@@ -8,18 +8,24 @@ from yad2_scraper.constants import NEXT_DATA_SCRIPT_ID
 class Yad2Category:
+    """Represents a Yad2 category parsed from an HTML page."""
     def __init__(self, soup: BeautifulSoup):
+        """Initialize with a BeautifulSoup object."""
         self.soup = soup
     @classmethod
     def from_html_io(cls, html_io: Union[TextIO, BinaryIO]):
+        """Create an instance from an HTML file-like object."""
         html = html_io.read()
         soup = BeautifulSoup(html, "html.parser")
         return cls(soup)
     def load_next_data(self) -> Optional[NextData]:
+        """Extract and parse Next.js data from the page."""
         tag = self.soup.find("script", id=NEXT_DATA_SCRIPT_ID)
         return NextData(json.loads(tag.string)) if tag else None
     def find_all_tags_by_class_substring(self, tag_name: str, substring: str) -> List[Tag]:
+        """Find all HTML tags with a class containing the given substring."""
         return find_all_html_tags_by_class_substring(self.soup, tag_name, substring)

yad2_scraper/exceptions.py CHANGED Viewed

@@ -3,29 +3,40 @@ from typing import List, Union
 class ResponseError(Exception):
+    """Represents an error response from an HTTP request."""
     def __init__(self, msg: str, request: httpx.Request, response: httpx.Response):
+        """Initialize with an error message, request, and response objects."""
         super().__init__(msg)
         self.request = request
         self.response = response
 class AntiBotDetectedError(ResponseError):
+    """Raised when an anti-bot mechanism is detected."""
     pass
 class UnexpectedContentError(ResponseError):
+    """Raised when the response content is not as expected."""
     pass
 class MaxAttemptsExceededError(Exception):
+    """Raised when the maximum number of attempts is exceeded."""
     def __init__(self, msg: str, max_attempts: int, errors: List[BaseException] = None):
+        """Initialize with an error message, max attempts, and optional errors."""
         super().__init__(msg)
         self.max_attempts = max_attempts
         self.errors = errors
 class MaxRequestAttemptsExceededError(MaxAttemptsExceededError):
+    """Raised when all HTTP request attempts fail."""
     def __init__(self, method: str, url: str, max_attempts: int, errors: List[Union[httpx.HTTPError, ResponseError]]):
+        """Initialize with request method, URL, max attempts, and error list."""
         msg = f"All {max_attempts} attempts for {method} request to '{url}' have failed"
         super().__init__(msg, max_attempts, errors)
         self.method = method

yad2_scraper/next_data.py CHANGED Viewed

@@ -6,19 +6,21 @@ from yad2_scraper.utils import safe_access
 FieldTypes = Union[str, int]
-safe_access_optional_keys = safe_access(exceptions=(KeyError, TypeError), default=None)
+_safe_access_optional_keys = safe_access(exceptions=(KeyError, TypeError), default=None)
 class SafeAccessOptionalKeysMeta(type):
+    """Metaclass that wraps methods and properties with safe access handling."""
     def __new__(cls, name, bases, dictionary):
         for attr_name, attr_value in dictionary.items():
             if callable(attr_value):  # Wrap methods
-                dictionary[attr_name] = safe_access_optional_keys(attr_value)
+                dictionary[attr_name] = _safe_access_optional_keys(attr_value)
             elif isinstance(attr_value, property):  # Wrap properties
                 dictionary[attr_name] = property(
-                    safe_access_optional_keys(attr_value.fget) if attr_value.fget else None,
-                    safe_access_optional_keys(attr_value.fset) if attr_value.fset else None,
-                    safe_access_optional_keys(attr_value.fdel) if attr_value.fdel else None,
+                    _safe_access_optional_keys(attr_value.fget) if attr_value.fget else None,
+                    _safe_access_optional_keys(attr_value.fset) if attr_value.fset else None,
+                    _safe_access_optional_keys(attr_value.fdel) if attr_value.fdel else None,
                     attr_value.__doc__,
                 )
         return super().__new__(cls, name, bases, dictionary)
@@ -31,20 +33,27 @@ class Field(str, Enum):
 def convert_string_date_to_datetime(date_string: str) -> datetime:
+    """Convert an ISO format string to a datetime object."""
     return datetime.fromisoformat(date_string)
 class NextData:
+    """Represents structured Next.js data."""
     def __init__(self, data: dict):
+        """Initialize with Next.js data dictionary."""
         self.data = data
     @property
     def json(self) -> dict:
+        """Return raw JSON data."""
         return self.data
     @property
     def queries(self) -> List[dict]:
+        """Extract query data from Next.js state."""
         return self.data["props"]["pageProps"]["dehydratedState"]["queries"]
     def __getitem__(self, item):
+        """Allow dictionary-style access to data."""
         return self.data[item]

yad2_scraper/query.py CHANGED Viewed

@@ -13,6 +13,7 @@ class OrderBy(int, Enum):
 def format_number_range(number_range: Optional[Tuple[int, int]]) -> Optional[str]:
+    """Format a number range as 'min_value-max_value'."""
     if number_range is None:
         return None
@@ -25,12 +26,13 @@ def format_number_range(number_range: Optional[Tuple[int, int]]) -> Optional[str
 class QueryFilters(BaseModel):
+    """Pydantic model representing query filters for querying a resource."""
     page: Optional[int] = None
     order_by: Optional[OrderBy] = None
     price_range: Optional[NumberRange] = None
-    ...
     def to_params(self) -> dict:
+        """Convert filter fields to query parameters."""
         return {
             "page": self.page,
             "Order": self.order_by,
@@ -38,9 +40,9 @@ class QueryFilters(BaseModel):
         }
     def to_clean_params(self):
+        """Return query parameters excluding None values."""
         return {key: value for key, value in self.to_params().items() if value is not None}
-    # TODO: add helper methods for managing the attribute values
     def __iter__(self):
+        """Allow iteration over the clean query parameters."""
         yield from self.to_clean_params().items()

yad2_scraper/scraper.py CHANGED Viewed

@@ -24,6 +24,8 @@ logger = logging.getLogger(__name__)
 class Yad2Scraper:
+    """A scraper for fetching data from the Yad2 website, with robust features"""
     def __init__(
             self,
             client: Optional[httpx.Client] = None,
@@ -32,6 +34,16 @@ class Yad2Scraper:
             wait_strategy: Optional[WaitStrategy] = None,
             max_request_attempts: int = 1
     ):
+        """
+        Initializes the Yad2Scraper with provided parameters.
+        Args:
+            client (Optional[httpx.Client]): An optional custom HTTP client. If not provided, a default client is used.
+            request_defaults (Optional[Dict[str, Any]]): Default parameters for requests such as headers, params, etc.
+            randomize_user_agent (bool): If True, a random User-Agent will be set for each request. Defaults to True.
+            wait_strategy (Optional[WaitStrategy]): A function to determine the wait time between requests.
+            max_request_attempts (int): The maximum number of retry attempts for failed requests. Defaults to 1.
+        """
         self.client = client or httpx.Client(
             headers=DEFAULT_REQUEST_HEADERS,
             follow_redirects=ALLOW_REQUEST_REDIRECTS,
@@ -41,14 +53,32 @@ class Yad2Scraper:
         self.randomize_user_agent = randomize_user_agent
         self.wait_strategy = wait_strategy
         self.max_request_attempts = max_request_attempts
+        self._request_count = 0
         logger.debug(f"Scraper initialized with client: {self.client}")
+    @property
+    def request_count(self) -> int:
+        """Returns the number of requests made by the scraper so far."""
+        return self._request_count
     def set_user_agent(self, user_agent: str) -> None:
+        """
+        Sets the User-Agent header for requests.
+        Args:
+            user_agent (str): The User-Agent string to be used in HTTP requests.
+        """
         self.client.headers["User-Agent"] = user_agent
         logger.debug(f"User-Agent client header set to: '{user_agent}'")
     def set_no_script(self, no_script: bool) -> None:
+        """
+        Sets the "noscript" cookie in the client's cookies to control JavaScript content.
+        Args:
+            no_script (bool): If True, the "noscript" cookie is set to "1". If False, it's set to "0".
+        """
         value = "1" if no_script else "0"
         self.client.cookies.set("noscript", value)
         logger.debug(f"NoScript (noscript) client cookie set to: '{value}'")
@@ -56,18 +86,44 @@ class Yad2Scraper:
     def fetch_category(
             self,
             url: str,
-            category_type: Type[Category] = Yad2Category,
+            category_type: Type[Category],
             params: Optional[QueryParamTypes] = None
     ) -> Category:
+        """
+        Fetches and returns a category page from a given URL.
+        Args:
+            url (str): The URL of the category page.
+            category_type (Type[Category]): The class type of the category to be fetched.
+            params (Optional[QueryParamTypes]): Query parameters to be included in the request.
+        Returns:
+            Category: The fetched category, parsed from HTML.
+        """
         logger.debug(f"Fetching category from URL: '{url}'")
         response = self.get(url, params)
         logger.debug(f"Category fetched successfully from URL: '{url}'")
         return category_type.from_html_io(response)
     def get(self, url: str, params: Optional[QueryParamTypes] = None) -> httpx.Response:
+        """Sends a GET request to the specified URL."""
         return self.request("GET", url, params=params)
     def request(self, method: str, url: str, params: Optional[QueryParamTypes] = None) -> httpx.Response:
+        """
+        Sends an HTTP request with multiple attempts logic.
+        Args:
+            method (str): The HTTP method (e.g., "GET", "POST").
+            url (str): The URL to send the request to.
+            params (Optional[QueryParamTypes]): Query parameters to be included in the request.
+        Returns:
+            httpx.Response: The HTTP response object.
+        Raises:
+            MaxRequestAttemptsExceededError: If the request exceeds the maximum number of attempts.
+        """
         if not isinstance(self.max_request_attempts, int):
             raise TypeError(f"max_request_attempts must be of type 'int', but got {type(self.max_request_attempts)}")
@@ -92,11 +148,28 @@ class Yad2Scraper:
         raise max_attempts_error from error_list[-1]  # multiple errors exist, raise from the last one
     def close(self) -> None:
+        """Closes the HTTP client and logs the closure."""
         logger.debug("Closing scraper client")
         self.client.close()
         logger.info("Scraper client closed")
     def _send_request(self, method: str, url: str, request_options: Dict[str, Any], attempt: int) -> httpx.Response:
+        """
+        Sends an HTTP request with the specified method to the given URL, applying all necessary actions.
+        Args:
+            method (str): The HTTP method (e.g., 'GET', 'POST').
+            url (str): The target URL for the request.
+            request_options (Dict[str, Any]): Additional request options, including headers and parameters.
+            attempt (int): The current attempt number for the request.
+        Returns:
+            httpx.Response: The HTTP response object received from the server.
+        Raises:
+            AntiBotDetectedError: If the response contains Anti-Bot content.
+            UnexpectedContentError: If a GET request does not contain expected content.
+        """
         if self.randomize_user_agent:
             self._set_random_user_agent(request_options)
@@ -105,12 +178,22 @@ class Yad2Scraper:
         logger.info(f"Sending {method} request to URL: '{url}' {self._format_attempt_info(attempt)}")
         response = self.client.request(method, url, **request_options)
+        self._request_count += 1
         logger.debug(f"Received response {response.status_code} from '{url}' {self._format_attempt_info(attempt)}")
         self._validate_response(response)
         return response
     def _prepare_request_options(self, params: Optional[QueryParamTypes] = None) -> Dict[str, Any]:
+        """
+        Prepares the request options to be passed to the HTTP client's request method, based on the default options.
+        Args:
+            params (Optional[QueryParamTypes]): Optional query parameters to include in the request.
+        Returns:
+            Dict[str, Any]: A dictionary of the request options, including headers and query parameters.
+        """
         logger.debug("Preparing request options from defaults")
         request_options = self.request_defaults.copy()
@@ -122,11 +205,23 @@ class Yad2Scraper:
     @staticmethod
     def _set_random_user_agent(request_options: Dict[str, str]):
+        """
+        Sets a random User-Agent header in the request options.
+        Args:
+            request_options (Dict[str, str]): The request options to update with the random User-Agent.
+        """
         user_agent = fua.random
         request_options.setdefault("headers", {})["User-Agent"] = user_agent
         logger.debug(f"Updated request options with random User-Agent header: '{user_agent}'")
     def _apply_wait_strategy(self, attempt: int):
+        """
+        Applies a wait time before making a request based on the wait strategy for the given attempt.
+        Args:
+            attempt (int): The current attempt number to calculate the wait time.
+        """
         wait_time = self.wait_strategy(attempt)
         if not wait_time:
             return
@@ -136,6 +231,17 @@ class Yad2Scraper:
     @staticmethod
     def _validate_response(response: httpx.Response):
+        """
+        Validates the response to ensure it is successful.
+        Args:
+            response (httpx.Response): The HTTP response object to validate.
+        Raises:
+            httpx.HTTPStatusError: If a status error occurred.
+            AntiBotDetectedError: If the response contains Anti-Bot content.
+            UnexpectedContentError: If a GET response does not contain expected content.
+        """
         response.raise_for_status()
         if ANTIBOT_CONTENT_IDENTIFIER in response.content:
@@ -154,12 +260,35 @@ class Yad2Scraper:
         logger.debug("Response validation succeeded")
     def _format_attempt_info(self, attempt: int) -> str:
+        """
+        Formats a string representing the current attempt number and total attempt count.
+        Args:
+            attempt (int): The current attempt number.
+        Returns:
+            str: A formatted string representing the attempt info, e.g., "(attempt 1/5)".
+        """
         return f"(attempt {attempt}/{self.max_request_attempts})"
     def __enter__(self):
+        """
+        Prepares the scraper to be used in a `with` statement, allowing for resource management.
+        Returns:
+            Yad2Scraper: The scraper instance to be used within the `with` block.
+        """
         logger.debug("Entering scraper context")
         return self
     def __exit__(self, exc_type, exc_val, exc_tb):
+        """
+        Cleans up resources and closes the scraper client when exiting the `with` statement.
+        Args:
+            exc_type: The exception type (if any).
+            exc_val: The exception value (if any).
+            exc_tb: The traceback object (if any).
+        """
         logger.debug("Exiting scraper context")
         self.close()

yad2_scraper/utils.py CHANGED Viewed

@@ -2,27 +2,30 @@ import functools
 from bs4 import BeautifulSoup, Tag
 from typing import Union, List, Tuple, Any
+def any_param_specified(*params: Any) -> bool:
+    """Check if any parameter is not None."""
+    return any(param is not None for param in params)
 def join_url(url: str, path: str) -> str:
+    """Join a base URL with a path, ensuring proper slashes."""
     return url.rstrip("/") + "/" + path.lstrip("/")
 def get_parent_url(url: str) -> str:
+    """Return the parent URL by removing the last segment."""
     if url.count("/") <= 2:
         return url
     return url.rstrip("/").rsplit("/", 1)[0]
 def find_html_tag_by_class_substring(e: Union[BeautifulSoup, Tag], tag_name: str, substring: str) -> Tag:
+    """Find the first HTML tag with a class containing the given substring."""
     return e.find(tag_name, class_=lambda class_name: class_name and substring in class_name)
 def find_all_html_tags_by_class_substring(e: Union[BeautifulSoup, Tag], tag_name: str, substring: str) -> List[Tag]:
+    """Find all HTML tags with a class containing the given substring."""
     return e.find_all(tag_name, class_=lambda class_name: class_name and substring in class_name)
 def safe_access(exceptions: Tuple = (), default: Any = None):
+    """Decorator to safely execute a function, returning a default value on exception."""
     def decorator(func):
         @functools.wraps(func)
         def wrapper(*args, **kwargs):
@@ -30,7 +33,5 @@ def safe_access(exceptions: Tuple = (), default: Any = None):
                 return func(*args, **kwargs)
             except exceptions:
                 return default
         return wrapper
-    return decorator
+    return decorator

yad2_scraper/vehicles/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from .urls import VEHICLES_URL, VehicleType, get_vehicle_url
+from .urls import VEHICLES_URL, VehicleCategory, get_vehicle_category_url
 from .query import VehiclesQueryFilters, OrderVehiclesBy
 from .category import Yad2VehiclesCategory
 from .tag import VehicleTag

yad2_scraper/vehicles/category.py CHANGED Viewed

@@ -7,9 +7,11 @@ from yad2_scraper.vehicles.next_data import VehiclesNextData
 class Yad2VehiclesCategory(Yad2Category):
     def get_vehicle_tags(self) -> List[VehicleTag]:
+        """Retrieve a and return list of vehicle tags from the current category."""
         tags = self.find_all_tags_by_class_substring("div", "feedItemBox")
         return [VehicleTag(tag) for tag in tags]
     def load_next_data(self) -> Optional[VehiclesNextData]:
+        """Extract and parse Next.js data from the current vehicle page."""
         next_data = super().load_next_data()
         return VehiclesNextData(next_data) if next_data else None

yad2_scraper/vehicles/next_data.py CHANGED Viewed

@@ -14,6 +14,8 @@ from yad2_scraper.vehicles.urls import VEHICLES_URL
 class VehicleData(metaclass=SafeAccessOptionalKeysMeta):
+    """Represents structured Next.js data of a specific vehicle category."""
     def __init__(self, data: dict):
         self.data = data

yad2_scraper/vehicles/query.py CHANGED Viewed

@@ -13,13 +13,14 @@ class OrderVehiclesBy(int, Enum):
 class VehiclesQueryFilters(QueryFilters):
+    """Pydantic model representing query filters for querying a vehicle resource."""
     year_range: Optional[NumberRange] = None
-    ...
     def to_params(self) -> dict:
+        """Convert filter fields to query parameters, including 'year'."""
         return {
             **super().to_params(),
             "year": format_number_range(self.year_range)
         }
-# TODO: add QueryParams class for each vehicle type (some share the same attributes - sometimes with different enums)
+# TODO: add QueryParams class for each vehicle category (some share the same attributes, sometimes with different enums)

yad2_scraper/vehicles/tag.py CHANGED Viewed

@@ -9,6 +9,8 @@ YEAR_AND_HAND_TAG_SEPARATOR = " • "
 class VehicleTag:
+    """Represents a vehicle listing on the webpage, providing access to various details"""
     def __init__(self, tag: Tag):
         self.tag = tag

yad2_scraper/vehicles/urls.py CHANGED Viewed

@@ -5,12 +5,16 @@ from yad2_scraper.constants import BASE_URL
 VEHICLES_URL = join_url(BASE_URL, "vehicles")
-VehicleType = Literal["cars", "motorcycles", "scooters", "trucks", "watercraft", "others"]
+VehicleCategory = Literal["cars", "motorcycles", "scooters", "trucks", "watercraft", "others"]
-_VALID_VEHICLE_TYPES = get_args(VehicleType)
+_VALID_VEHICLE_CATEGORIES = get_args(VehicleCategory)
-def get_vehicle_url(vehicle_type: VehicleType) -> str:
-    if vehicle_type not in _VALID_VEHICLE_TYPES:
-        raise ValueError(f"Invalid vehicle type: {repr(vehicle_type)}. Expected one of {_VALID_VEHICLE_TYPES}")
-    return join_url(VEHICLES_URL, vehicle_type)
+def get_vehicle_category_url(vehicle_category: VehicleCategory) -> str:
+    """Generate the URL for the specified vehicle category."""
+    if vehicle_category not in _VALID_VEHICLE_CATEGORIES:
+        raise ValueError(
+            f"Invalid vehicle category: {repr(vehicle_category)}. Expected one of {_VALID_VEHICLE_CATEGORIES}"
+        )
+    return join_url(VEHICLES_URL, vehicle_category)

yad2_scraper-0.5.1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,164 @@
+Metadata-Version: 2.3
+Name: yad2-scraper
+Version: 0.5.1
+Summary: Scrape Yad2 in Python.
+License: LICENSE
+Author: dav ost
+Author-email: davidost2003@gmail.com
+Requires-Python: >=3.8
+Classifier: License :: Other/Proprietary License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Requires-Dist: beautifulsoup4 (>=4.11.1,<5.0.0)
+Requires-Dist: fake-useragent (>=0.1.11,<0.2.0)
+Requires-Dist: httpcore (>=0.15.0)
+Requires-Dist: httpx (>=0.24.0,<0.25.0)
+Requires-Dist: pydantic (>=1.10.0,<2.0.0)
+Description-Content-Type: text/markdown
+# Yad2 Scraper
+A Python package for scraping listings from [Yad2](https://www.yad2.co.il/), Israel's leading classifieds platform.
+This package provides a simple and flexible interface to fetch data, filter results, and extract relevant information.
+__NOTE__: Currently, the package primarily supports the **vehicles category**.
+Support for additional categories may be added in future updates.
+---
+## Features
+- **Fetch Listings**: Retrieve listings by category (e.g., vehicles, real-estate, etc.).
+- **Filter Results**: Apply filters such as price range, year range, and sorting order.
+- **Dynamic URL Generation**: Generate URLs for specific categories and filters.
+- **Type-Safe API**: Uses Python type hints (`Literal`, `Optional`, etc.) for better code clarity and safety.
+- **Extensible**: Easily extendable to support additional categories and filters.
+---
+## Installation
+Install the package using `pip`:
+```bash
+pip install yad2-scraper
+```
+## Usage
+### Fetching Category Listings
+To fetch any category, use the `fetch_category` function:
+```python
+from yad2_scraper import fetch_category, Yad2Category
+# Fetch real estate category (returns a generic Yad2Category object)
+real_estate_category_page1 = fetch_category("https://www.yad2.co.il/realestate/forsale", page=1)
+...
+real_estate_category_page2 = fetch_category("https://www.yad2.co.il/realestate/forsale", page=2)
+...
+```
+### Fetching Vehicle Listings
+To fetch vehicle listings for a specific category, use the `fetch_vehicle_category` function:
+```python
+from yad2_scraper import fetch_vehicle_category, OrderVehiclesBy, Field
+# Fetch cars category
+cars_category = fetch_vehicle_category("cars")
+for car_data in cars_category.load_next_data().iterate_vehicles():
+    print(car_data.model(Field.ENGLISH_TEXT))
+    print(car_data.test_date)
+    print(car_data.price)
+    ...
+# Fetch motorcycles category
+motorcycle_categories = fetch_vehicle_category(
+    "motorcycles",
+    price_range=(5000, 15000),
+    year_range=(2010, 2020),
+    order_by=OrderVehiclesBy.PRICE_LOWEST_TO_HIGHEST
+)
+for motorcycle_tag in motorcycle_categories.get_vehicle_tags():
+    print(motorcycle_tag.page_link)
+    print(motorcycle_tag.hand)
+    print(motorcycle_tag.price)
+    ...
+```
+### The Scraper Object
+The `Yad2Scraper` class is the core of the package.
+It handles HTTP requests, parses responses, and provides methods to fetch and filter vehicle listings.
+#### Creating a Scraper Instance
+You can create a `Yad2Scraper` instance manually or use the default scraper provided by the package:
+```python
+from yad2_scraper import Yad2Scraper, get_default_scraper
+# Create a custom scraper instance
+scraper = Yad2Scraper()
+# Use the default scraper
+default_scraper = get_default_scraper()
+```
+#### Fetching Category Listings
+The `fetch_category` method is used to fetch listings for a specific category.
+It takes a URL, a `Category` type, and optionally query params as arguments:
+```python
+from yad2_scraper import Yad2Scraper, Yad2Category, QueryFilters, OrderBy
+from yad2_scraper.vehicles import (
+    Yad2VehiclesCategory,
+    VehiclesQueryFilters,
+    OrderVehiclesBy,
+    get_vehicle_category_url
+)
+# Fetch businesses for sale category with filters
+scraper = Yad2Scraper()
+url = "https://www.yad2.co.il/products/businesses-for-sale"
+query_filters = QueryFilters(price_range=(10000, 250000), order_by=OrderBy.PRICE_LOWEST_TO_HIGHEST)
+real_estate_category = scraper.fetch_category(url, Yad2Category, params=query_filters)
+# Fetch watercraft (vehicle) category with filters
+url = get_vehicle_category_url("watercraft")
+query_filters = VehiclesQueryFilters(year_range=(2010, 2020), order_by=OrderVehiclesBy.DATE)
+watercraft_category = scraper.fetch_category(url, Yad2VehiclesCategory, params=query_filters)
+```
+#### Attributes & Methods
+The `Yad2Scraper` object contains a lot of additional attributes & methods which you can use.
+Please check out the actual code documentation for more details.
+## Contributing
+Contributions are welcomed! Here’s how you can get started:
+1. Fork the repository.
+2. Create a new branch for your feature or bugfix.
+3. Write tests for your changes.
+4. Submit a pull request.
+## License
+This project is licensed under the MIT License. See the LICENSE file for details.
+## Support
+For questions, issues, or feature requests, please open an issue on the GitHub repository.

yad2_scraper-0.5.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,18 @@
+yad2_scraper/__init__.py,sha256=oLANQo7jrtR5ex1tv4sM5ppaW9JpHS70Knsp0ZgVzm0,3708
+yad2_scraper/category.py,sha256=SQ2eg0-fQ9hEaNryYpWVFaJqCx1d65t2_E_S3qpuw9g,1230
+yad2_scraper/constants.py,sha256=8zXJ31fRqkDIOJp96BRK1PJofGXX8SG64YcfmJnVW8Q,910
+yad2_scraper/exceptions.py,sha256=5yentEUBuEGItwRcjtZY89A19rvFErcTy4S4GUtY_WY,1526
+yad2_scraper/next_data.py,sha256=OcZ7ingXSd6sLNkqQPz6NVTeEDbMkOai9QONFErc3FI,1977
+yad2_scraper/query.py,sha256=HPBoLE6xFjsmvBFR2ULvPq96XXl-2zOqXt7LnHgetIk,1438
+yad2_scraper/scraper.py,sha256=VA-P24Gvn1y5Pkn_n3hDdpVl1aeEnLoC82eBYteAbWQ,11816
+yad2_scraper/utils.py,sha256=UDpFKel_TJa0dJv1FV-CVqA8-uaFo_hDcooiFAkSZI8,1578
+yad2_scraper/vehicles/__init__.py,sha256=dxjZcNv3ExnN3fKW-m1oqKiX9YC7gj8lqpIa3uWo9iI,242
+yad2_scraper/vehicles/category.py,sha256=HdUGCVpC1jw2V-2XvyAC4pPlVQR6cwHyVKDxS3pfQhc,744
+yad2_scraper/vehicles/next_data.py,sha256=lEIWcTP7BOFDC3lL0FhBGp6u-7hsgGdbbrH0iw0Ux20,9203
+yad2_scraper/vehicles/query.py,sha256=ieIJSGJELcgzqtJh6bQXalvDg743LnI2RYrAyHDIH80,912
+yad2_scraper/vehicles/tag.py,sha256=Wj7v2c8IPQLYHVkfzP1UiulKKJE4yLqnbeh81nvWZhU,2052
+yad2_scraper/vehicles/urls.py,sha256=zxipWjm0SXn2gGOBWw9VqKAJ59mhIGpzd_fTYitpW8c,715
+yad2_scraper-0.5.1.dist-info/LICENSE,sha256=JCpnDxMx2kE40e0UQ1svSmifrLWg2Gni5VTkJR68thY,1065
+yad2_scraper-0.5.1.dist-info/METADATA,sha256=SLeA6BPi1idJ20WWWbl7AW-hC_u1_vKPRmUTg4_VhVI,5225
+yad2_scraper-0.5.1.dist-info/WHEEL,sha256=7dDg4QLnNKTvwIDR9Ac8jJaAmBC_owJrckbC0jjThyA,88
+yad2_scraper-0.5.1.dist-info/RECORD,,

{yad2_scraper-0.5.0.dist-info → yad2_scraper-0.5.1.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: poetry-core 2.0.1
+Generator: poetry-core 2.1.0
 Root-Is-Purelib: true
 Tag: py3-none-any

yad2_scraper-0.5.0.dist-info/METADATA DELETED Viewed

@@ -1,26 +0,0 @@
-Metadata-Version: 2.3
-Name: yad2-scraper
-Version: 0.5.0
-Summary: Scrape Yad2 in Python.
-License: LICENSE
-Author: dav ost
-Author-email: davidost2003@gmail.com
-Requires-Python: >=3.8
-Classifier: License :: Other/Proprietary License
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.8
-Classifier: Programming Language :: Python :: 3.9
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3.11
-Classifier: Programming Language :: Python :: 3.12
-Classifier: Programming Language :: Python :: 3.13
-Requires-Dist: beautifulsoup4 (>=4.11.1,<5.0.0)
-Requires-Dist: fake-useragent (>=0.1.11,<0.2.0)
-Requires-Dist: httpcore (>=0.15.0)
-Requires-Dist: httpx (>=0.24.0,<0.25.0)
-Requires-Dist: pydantic (>=1.10.0,<2.0.0)
-Description-Content-Type: text/markdown
-# yad2-scraper
-Scrape Yad2 in Python.

yad2_scraper-0.5.0.dist-info/RECORD DELETED Viewed

@@ -1,18 +0,0 @@
-yad2_scraper/__init__.py,sha256=UUiIk6TAHTAP4IY86bIR4TcY3VVMTCyEF0Sq1MSneMM,141
-yad2_scraper/category.py,sha256=KXLyjMOlPzu3xj08-uRmffAMD83DbqFVm-y1-T83Djw,910
-yad2_scraper/constants.py,sha256=8zXJ31fRqkDIOJp96BRK1PJofGXX8SG64YcfmJnVW8Q,910
-yad2_scraper/exceptions.py,sha256=_IcuDdJPKAznSUp_c3fLEuTnDdKf0NLJqpRPs0IzdXw,979
-yad2_scraper/next_data.py,sha256=fOatioaBxR7LZgRnXp35CoOkR7-Adv6rW_YKBQpSYj8,1585
-yad2_scraper/query.py,sha256=nURdupTnMbxgglJz7tdWSqnp4UG61nwWM1vjWQaylWE,1196
-yad2_scraper/scraper.py,sha256=sgDpfnKlBSDIWEb2enpQ5O9E5fJvXz3cDOnGXHGCJL4,6653
-yad2_scraper/utils.py,sha256=y6ErH2HcoCJn7OreNj4lvW--iOA7dv1LUIPa537GVjg,1070
-yad2_scraper/vehicles/__init__.py,sha256=4-4vVFu836nLzaTf1KTlddrjSk7dX3Nu9hm3cj1EKIU,229
-yad2_scraper/vehicles/category.py,sha256=BrH-aZY6hNlHtSqBmleifb7yY5R-76J2GAj9Bfd0Ulw,584
-yad2_scraper/vehicles/next_data.py,sha256=0xUbEwmj8CsWc0uqoW9hbM4FW26e4IWBiv-UcraSwrw,9125
-yad2_scraper/vehicles/query.py,sha256=VhL-E-sgpLxenZVvNgdCNWY15hMtoP0Oyv6SH_N3e04,757
-yad2_scraper/vehicles/tag.py,sha256=YTeCfVnaPnHz9CYRnfcQljEbNqynBDdlbX0HNPiB-XY,1960
-yad2_scraper/vehicles/urls.py,sha256=-aEtV_1elqHFdLIxBZglY0e0-UHGqQab5Rh5qKUyBtg,573
-yad2_scraper-0.5.0.dist-info/LICENSE,sha256=JCpnDxMx2kE40e0UQ1svSmifrLWg2Gni5VTkJR68thY,1065
-yad2_scraper-0.5.0.dist-info/METADATA,sha256=00MRqHUY9r2qiSRFgIZPCwli-aZgc_FJS2c0lshmKdY,875
-yad2_scraper-0.5.0.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
-yad2_scraper-0.5.0.dist-info/RECORD,,

{yad2_scraper-0.5.0.dist-info → yad2_scraper-0.5.1.dist-info}/LICENSE RENAMED Viewed

File without changes

yad2-scraper 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

yad2-scraper 0.5.0py3-none-any.whl → 0.5.1py3-none-any.whl