PyPI - uk_bin_collection - Versions diffs - 0.152.0__py3-none-any.whl → 0.152.2__py3-none-any.whl - Mend

uk_bin_collection 0.152.0py3-none-any.whl → 0.152.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

uk_bin_collection/uk_bin_collection/councils/CheshireEastCouncil.py CHANGED Viewed

@@ -1,18 +1,39 @@
-from typing import Dict, Any, Optional
-from bs4 import BeautifulSoup, Tag, NavigableString
+from typing import Any, Dict, Optional
+from bs4 import BeautifulSoup, NavigableString, Tag
+from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
 """
 This module provides bin collection data for Cheshire East Council.
 """
 class CouncilClass(AbstractGetBinDataClass):
     """
     A class to fetch and parse bin collection data for Cheshire East Council.
     """
     def parse_data(self, page: Any, **kwargs: Any) -> Dict[str, Any]:
+        try:
+            user_uprn = kwargs.get("uprn")
+            check_uprn(user_uprn)
+            url = f"https://online.cheshireeast.gov.uk/MyCollectionDay/SearchByAjax/GetBartecJobList?uprn={user_uprn}"
+            if not user_uprn:
+                # This is a fallback for if the user stored a URL in old system. Ensures backwards compatibility.
+                url = kwargs.get("url")
+        except Exception as e:
+            raise ValueError(f"Error getting identifier: {str(e)}")
+        # Add warning suppression for the insecure request
+        import urllib3
+        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+        # Make request with SSL verification disabled
+        page = requests.get(url, verify=False)
         soup = BeautifulSoup(page.text, features="html.parser")
         bin_data_dict: Dict[str, Any] = {"bins": []}

uk_bin_collection/uk_bin_collection/councils/ChorleyCouncil.py CHANGED Viewed

@@ -41,6 +41,10 @@ class CouncilClass(AbstractGetBinDataClass):
             check_uprn(user_uprn)
             check_postcode(user_postcode)
+            # Ensure UPRN starts with "UPRN"
+            if not user_uprn.startswith("UPRN"):
+                user_uprn = f"UPRN{user_uprn}"
             # Create Selenium webdriver
             user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
             driver = create_webdriver(web_driver, headless, user_agent, __name__)

uk_bin_collection/uk_bin_collection/councils/CroydonCouncil.py CHANGED Viewed

@@ -1,237 +1,16 @@
 import time
 from bs4 import BeautifulSoup
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.support.ui import Select, WebDriverWait
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
-def get_headers(base_url: str, method: str) -> dict[str, str]:
-    """
-    Gets request headers
-        :rtype: dict[str, str]
-        :param base_url: Base URL to use
-        :param method: Method to use
-        :return: Request headers
-    """
-    headers = {
-        "Accept-Encoding": "gzip, deflate, br",
-        "Accept-Language": "en-GB,en-US;q=0.9,en;q=0.8",
-        "Cache-Control": "max-age=0",
-        "Connection": "keep-alive",
-        "Host": "service.croydon.gov.uk",
-        "Origin": base_url,
-        "sec-ch-ua": '"Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"',
-        "sec-ch-ua-mobile": "?0",
-        "sec-ch-ua-platform": "Windows",
-        "Sec-Fetch-Dest": "document",
-        "Sec-Fetch-User": "?1",
-        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)"
-        " Chrome/109.0.0.0 Safari/537.36",
-    }
-    if method.lower() == "post":
-        headers["Accept"] = "application/json, text/javascript, */*; q=0.01"
-        headers["Content-Type"] = "application/x-www-form-urlencoded; charset=UTF-8"
-        headers["Sec-Fetch-Mode"] = "cors"
-        headers["Sec-Fetch-Mode"] = "same-origin"
-        headers["X-Requested-With"] = "XMLHttpRequest"
-    else:
-        headers["Accept"] = (
-            "text/html,application/xhtml+xml,application/xml;"
-            "q=0.9,image/avif,image/webp,image/apng,*/*;"
-            "q=0.8,application/signed-exchange;v=b3;q=0.9"
-        )
-        headers["Sec-Fetch-Mode"] = "navigate"
-        headers["Sec-Fetch-Mode"] = "none"
-    return headers
-def get_session_storage_global() -> object:
-    """
-    Gets session storage global object
-        :rtype: object
-        :return: Session storage global object
-    """
-    return {
-        "destination_stack": [
-            "w/webpage/bin-day-enter-address",
-            "w/webpage/your-bin-collection-details?context_record_id=86086077"
-            "&webpage_token=5c047b2c10b4aad66bef2054aac6bea52ad7a5e185ffdf7090b01f8ddc96728f",
-            "w/webpage/bin-day-enter-address",
-            "w/webpage/your-bin-collection-details?context_record_id=86085229"
-            "&webpage_token=cf1b8fd6213f4823277d98c1dd8a992e6ebef1fabc7d892714e5d9dade448c37",
-            "w/webpage/bin-day-enter-address",
-            "w/webpage/your-bin-collection-details?context_record_id=86084221"
-            "&webpage_token=7f52fb51019bf0e6bfe9647b1b31000124bd92a9d95781f1557f58b3ed40da52",
-            "w/webpage/bin-day-enter-address",
-            "w/webpage/your-bin-collection-details?context_record_id=86083209"
-            "&webpage_token=de50c265da927336f526d9d9a44947595c3aa38965aa8c495ac2fb73d272ece8",
-            "w/webpage/bin-day-enter-address",
-        ],
-        "last_context_record_id": "86086077",
-    }
-def get_csrf_token(s: requests.session, base_url: str) -> str:
-    """
-    Gets a CSRF token
-        :rtype: str
-        :param s: requests.Session() to use
-        :param base_url: Base URL to use
-        :return: CSRF token
-    """
-    csrf_token = ""
-    response = s.get(
-        base_url + "/wasteservices/w/webpage/bin-day-enter-address",
-        headers=get_headers(base_url, "GET"),
-    )
-    if response.status_code == 200:
-        soup = BeautifulSoup(response.text, features="html.parser")
-        soup.prettify()
-        app_body = soup.find("div", {"class": "app-body"})
-        script = app_body.find("script", {"type": "text/javascript"}).string
-        p = re.compile("var CSRF = ('|\")(.*?)('|\");")
-        m = p.search(script)
-        csrf_token = m.groups()[1]
-    else:
-        raise ValueError(
-            "Code 1: Failed to get a CSRF token. Please ensure the council website is online first,"
-            " then open an issue on GitHub."
-        )
-    return csrf_token
-def get_address_id(
-    s: requests.session, base_url: str, csrf_token: str, postcode: str, paon: str
-) -> str:
-    """
-    Gets the address ID
-        :rtype: str
-        :param s: requests.Session() to use
-        :param base_url: Base URL to use
-        :param csrf_token: CSRF token to use
-        :param postcode: Postcode to use
-        :param paon: House number/address to find
-        :return: address ID
-    """
-    address_id = "0"
-    # Get the addresses for the postcode
-    form_data = {
-        "code_action": "search",
-        "code_params": '{"search_item":"' + postcode + '","is_ss":true}',
-        "fragment_action": "handle_event",
-        "fragment_id": "PCF0020408EECEC1",
-        "fragment_collection_class": "formtable",
-        "fragment_collection_editable_values": '{"PCF0021449EECEC1":"1"}',
-        "_session_storage": json.dumps(
-            {
-                "/wasteservices/w/webpage/bin-day-enter-address": {},
-                "_global": get_session_storage_global(),
-            }
-        ),
-        "action_cell_id": "PCL0005629EECEC1",
-        "action_page_id": "PAG0000898EECEC1",
-        "form_check_ajax": csrf_token,
-    }
-    response = s.post(
-        base_url
-        + "/wasteservices/w/webpage/bin-day-enter-address?webpage_subpage_id=PAG0000898EECEC1"
-        "&webpage_token=faab02e1f62a58f7bad4c2ae5b8622e19846b97dde2a76f546c4bb1230cee044"
-        "&widget_action=fragment_action",
-        headers=get_headers(base_url, "POST"),
-        data=form_data,
-    )
-    if response.status_code == 200:
-        json_response = json.loads(response.text)
-        addresses = json_response["response"]["items"]
-        # Find the matching address id for the paon
-        for address in addresses:
-            # Check for full matches first
-            if address.get("dropdown_display_field") == paon:
-                address_id = address.get("id")
-                break
-        # Check for matching start if no full match found
-        if address_id == "0":
-            for address in addresses:
-                if address.get("dropdown_display_field").split()[0] == paon.strip():
-                    address_id = address.get("id")
-                    break
-        # Check match was found
-        if address_id == "0":
-            raise ValueError(
-                "Code 2: No matching address for house number/full address found."
-            )
-    else:
-        raise ValueError("Code 3: No addresses found for provided postcode.")
-    return address_id
-def get_collection_data(
-    s: requests.session, base_url: str, csrf_token: str, address_id: str
-) -> str:
-    """
-    Gets the collection data
-        :rtype: str
-        :param s: requests.Session() to use
-        :param base_url: Base URL to use
-        :param csrf_token: CSRF token to use
-        :param address_id: Address id to use
-        :param retries: Retries count
-        :return: Collection data
-    """
-    collection_data = ""
-    if address_id != "0":
-        form_data = {
-            "form_check": csrf_token,
-            "submitted_page_id": "PAG0000898EECEC1",
-            "submitted_widget_group_id": "PWG0002644EECEC1",
-            "submitted_widget_group_type": "modify",
-            "submission_token": "63e9126bacd815.12997577",
-            "payload[PAG0000898EECEC1][PWG0002644EECEC1][PCL0005629EECEC1][formtable]"
-            "[C_63e9126bacfb3][PCF0020408EECEC1]": address_id,
-            "payload[PAG0000898EECEC1][PWG0002644EECEC1][PCL0005629EECEC1][formtable]"
-            "[C_63e9126bacfb3][PCF0021449EECEC1]": "1",
-            "payload[PAG0000898EECEC1][PWG0002644EECEC1][PCL0005629EECEC1][formtable]"
-            "[C_63e9126bacfb3][PCF0020072EECEC1]": "Next",
-            "submit_fragment_id": "PCF0020072EECEC1",
-            "_session_storage": json.dumps({"_global": get_session_storage_global()}),
-            "_update_page_content_request": 1,
-            "form_check_ajax": csrf_token,
-        }
-        response = s.post(
-            base_url
-            + "/wasteservices/w/webpage/bin-day-enter-address?webpage_subpage_id=PAG0000898EECEC1"
-            "&webpage_token=faab02e1f62a58f7bad4c2ae5b8622e19846b97dde2a76f546c4bb1230cee044",
-            headers=get_headers(base_url, "POST"),
-            data=form_data,
-        )
-        if response.status_code == 200 and len(response.text) > 0:
-            json_response = json.loads(response.text)
-            form_data = {
-                "_dummy": 1,
-                "_session_storage": json.dumps(
-                    {"_global": get_session_storage_global()}
-                ),
-                "_update_page_content_request": 1,
-                "form_check_ajax": csrf_token,
-            }
-            response = s.post(
-                base_url + json_response["redirect_url"],
-                headers=get_headers(base_url, "POST"),
-                data=form_data,
-            )
-            if response.status_code == 200 and len(response.text) > 0:
-                json_response = json.loads(response.text)
-                collection_data = json_response["data"]
-            else:
-                raise ValueError("Code 4: Failed to get bin data.")
-        else:
-            raise ValueError(
-                "Code 5: Failed to get bin data. Too many requests. Please wait a few minutes before trying again."
-            )
-    return collection_data
 class CouncilClass(AbstractGetBinDataClass):
     """
     Concrete classes have to implement all abstract operations of the
@@ -240,47 +19,121 @@ class CouncilClass(AbstractGetBinDataClass):
     """
     def parse_data(self, page: str, **kwargs) -> dict:
-        requests.packages.urllib3.disable_warnings()
-        s = requests.Session()
-        base_url = "https://service.croydon.gov.uk"
-        paon = kwargs.get("paon")
-        postcode = kwargs.get("postcode")
-        check_paon(paon)
-        check_postcode(postcode)
+        driver = None
+        try:
+            user_postcode = kwargs.get("postcode")
+            if not user_postcode:
+                raise ValueError("No postcode provided.")
+            check_postcode(user_postcode)
+            user_paon = kwargs.get("paon")
+            check_paon(user_paon)
+            headless = kwargs.get("headless")
+            web_driver = kwargs.get("web_driver")
+            driver = create_webdriver(web_driver, headless, None, __name__)
+            page = "https://service.croydon.gov.uk/wasteservices/w/webpage/bin-day-enter-address"
+            driver.maximize_window()
+            driver.get(page)
+            postcode_input = WebDriverWait(driver, 60).until(
+                EC.presence_of_element_located(
+                    (By.CSS_SELECTOR, 'input[data-ts_identifier="postcode_input"]')
+                )
+            )
-        # Firstly, get a CSRF (cross-site request forgery) token
-        csrf_token = get_csrf_token(s, base_url)
-        # Next, get the address_id
-        address_id = get_address_id(s, base_url, csrf_token, postcode, paon)
-        # Finally, use the address_id to get the collection data
-        collection_data = get_collection_data(s, base_url, csrf_token, address_id)
-        if collection_data != "":
-            soup = BeautifulSoup(collection_data, features="html.parser")
-            soup.prettify()
+            postcode_input.send_keys(user_postcode + Keys.ENTER)
-            # Find the list elements
-            collection_record_elements = soup.find_all(
-                "div", {"class": "listing_template_record"}
+            time.sleep(5)
+            # Wait for address box to be visible
+            select_address_input = WebDriverWait(driver, 10).until(
+                EC.element_to_be_clickable(
+                    (By.CSS_SELECTOR, 'select[data-ts_identifier="address_selection"]')
+                )
             )
-            # Form a JSON wrapper
-            data = {"bins": []}
+            # Select address based on house number (paon)
+            select = Select(select_address_input)
+            paon = str(user_paon)  # Ensure paon is a string for comparison
+            address_found = False
-            for e in collection_record_elements:
-                collection_type = e.find("h2").get_text()
-                collection_date = e.find("span", {"class": "value-as-text"}).get_text()
-                dict_data = {
-                    "type": collection_type,
-                    "collectionDate": datetime.strptime(
-                        collection_date, "%A %d %B %Y"
-                    ).strftime(date_format),
-                }
-                data["bins"].append(dict_data)
+            for option in select.options:
+                # Look for house number pattern with surrounding spaces to avoid partial matches
+                if f" {paon} " in f" {option.text} ":
+                    select.select_by_value(option.get_attribute("value"))
+                    address_found = True
+                    break
-            if len(data["bins"]) == 0:
+            if not address_found:
                 raise ValueError(
-                    "Code 5: No bin data found. Please ensure the council website is showing data first,"
-                    " then open an issue on GitHub."
+                    f"Address with house number {paon} not found in the dropdown."
+                )
+            # Click the "Next" button
+            next_button = WebDriverWait(driver, 10).until(
+                EC.element_to_be_clickable(
+                    (By.CSS_SELECTOR, 'input[type="submit"][value="Next"]')
+                )
+            )
+            next_button.click()
+            # Wait for the bin collection content to load
+            collection_content = WebDriverWait(driver, 10).until(
+                EC.presence_of_element_located(
+                    (
+                        By.XPATH,
+                        '//*[@id="mats_content_wrapper"]/div[2]/div[2]/div[2]/div/div[1]/div/div[3]/div/div/div/div',
+                    )
                 )
+            )
-            return data
+            soup = BeautifulSoup(driver.page_source, "html.parser")
+            bin_data = {"bins": []}
+            # Find all bin collection sections
+            bin_sections = soup.find_all("div", {"class": "listing_template_record"})
+            for section in bin_sections:
+                # Get bin type from h2 tag
+                bin_type_elem = section.find("h2")
+                if bin_type_elem:
+                    bin_type = bin_type_elem.text.strip()
+                    # Find collection date span
+                    date_span = section.find("span", {"class": "value-as-text"})
+                    if date_span:
+                        collection_date_string = date_span.text.strip()
+                        # Convert date string to required format
+                        try:
+                            # Parse the date string (e.g., "Sunday 1 June 2025")
+                            parsed_date = datetime.strptime(
+                                collection_date_string, "%A %d %B %Y"
+                            )
+                            # Format as dd/mm/yyyy
+                            formatted_date = parsed_date.strftime("%d/%m/%Y")
+                            # Create bin entry
+                            bin_info = {
+                                "type": bin_type,
+                                "collectionDate": formatted_date,
+                            }
+                            bin_data["bins"].append(bin_info)
+                        except ValueError as e:
+                            print(f"Error parsing date '{collection_date_string}': {e}")
+            if not bin_data["bins"]:
+                raise ValueError("No bin collection data found")
+        except Exception as e:
+            # Here you can log the exception if needed
+            print(f"An error occurred: {e}")
+            # Optionally, re-raise the exception if you want it to propagate
+            raise
+        finally:
+            # This block ensures that the driver is closed regardless of an exception
+            if driver:
+                driver.quit()
+        return bin_data

uk_bin_collection/uk_bin_collection/councils/DartfordBoroughCouncil.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from bs4 import BeautifulSoup
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
@@ -12,7 +13,19 @@ class CouncilClass(AbstractGetBinDataClass):
     """
     def parse_data(self, page: str, **kwargs) -> dict:
+        try:
+            user_uprn = kwargs.get("uprn")
+            check_uprn(user_uprn)
+            url = f"https://windmz.dartford.gov.uk/ufs/WS_CHECK_COLLECTIONS.eb?UPRN={user_uprn}"
+            if not user_uprn:
+                # This is a fallback for if the user stored a URL in old system. Ensures backwards compatibility.
+                url = kwargs.get("url")
+        except Exception as e:
+            raise ValueError(f"Error getting identifier: {str(e)}")
         # Make a BS4 object
+        page = requests.get(url)
         soup = BeautifulSoup(page.text, features="html.parser")
         soup.prettify()

uk_bin_collection/uk_bin_collection/councils/DoverDistrictCouncil.py CHANGED Viewed

@@ -1,12 +1,27 @@
-from bs4 import BeautifulSoup
-from datetime import datetime
 import re
+from datetime import datetime
+from bs4 import BeautifulSoup
 from uk_bin_collection.uk_bin_collection.common import *  # Consider specific imports
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
 class CouncilClass(AbstractGetBinDataClass):
     def parse_data(self, page: str, **kwargs) -> dict:
+        try:
+            user_uprn = kwargs.get("uprn")
+            check_uprn(user_uprn)
+            url = f"https://collections.dover.gov.uk/property/{user_uprn}"
+            if not user_uprn:
+                # This is a fallback for if the user stored a URL in old system. Ensures backwards compatibility.
+                url = kwargs.get("url")
+        except Exception as e:
+            raise ValueError(f"Error getting identifier: {str(e)}")
+        # Make a BS4 object
+        page = requests.get(url)
         soup = BeautifulSoup(page.text, "html.parser")
         bins_data = {"bins": []}

uk_bin_collection/uk_bin_collection/councils/EastDevonDC.py CHANGED Viewed

@@ -3,7 +3,8 @@ from datetime import datetime
 import pandas as pd
 from bs4 import BeautifulSoup
-from uk_bin_collection.uk_bin_collection.common import date_format
+from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
@@ -15,7 +16,19 @@ class CouncilClass(AbstractGetBinDataClass):
     """
     def parse_data(self, page: str, **kwargs) -> dict:
+        try:
+            user_uprn = kwargs.get("uprn")
+            check_uprn(user_uprn)
+            url = f"https://eastdevon.gov.uk/recycling-and-waste/recycling-waste-information/when-is-my-bin-collected/future-collections-calendar/?UPRN={user_uprn}"
+            if not user_uprn:
+                # This is a fallback for if the user stored a URL in old system. Ensures backwards compatibility.
+                url = kwargs.get("url")
+        except Exception as e:
+            raise ValueError(f"Error getting identifier: {str(e)}")
         # Make a BS4 object
+        page = requests.get(url)
         soup = BeautifulSoup(page.text, features="html.parser")
         soup.prettify()

uk_bin_collection/uk_bin_collection/councils/EastbourneBoroughCouncil.py ADDED Viewed

@@ -0,0 +1,76 @@
+# Lewes Borough Council uses the same script.
+from bs4 import BeautifulSoup
+from uk_bin_collection.uk_bin_collection.common import *
+from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
+# import the wonderful Beautiful Soup and the URL grabber
+class CouncilClass(AbstractGetBinDataClass):
+    """
+    Concrete classes have to implement all abstract operations of the
+    base class. They can also override some operations with a default
+    implementation.
+    """
+    def parse_data(self, page: str, **kwargs) -> dict:
+        try:
+            user_uprn = kwargs.get("uprn")
+            check_uprn(user_uprn)
+            url = f"https://environmentfirst.co.uk/house.php?uprn={user_uprn}"
+            if not user_uprn:
+                # This is a fallback for if the user stored a URL in old system. Ensures backwards compatibility.
+                url = kwargs.get("url")
+        except Exception as e:
+            raise ValueError(f"Error getting identifier: {str(e)}")
+        # Make a BS4 object
+        page = requests.get(url)
+        soup = BeautifulSoup(page.text, features="html.parser")
+        soup.prettify()
+        # Get the paragraph lines from the page
+        data = {"bins": []}
+        page_text = soup.find("div", {"class": "collect"}).find_all("p")
+        # Parse the correct lines (find them, remove the ordinal indicator and make them the correct format date) and
+        # then add them to the dictionary
+        rubbish_day = datetime.strptime(
+            remove_ordinal_indicator_from_date_string(
+                page_text[2].find_next("strong").text
+            ),
+            "%d %B %Y",
+        ).strftime(date_format)
+        dict_data = {
+            "type": "Rubbish",
+            "collectionDate": rubbish_day,
+        }
+        data["bins"].append(dict_data)
+        recycling_day = datetime.strptime(
+            remove_ordinal_indicator_from_date_string(
+                page_text[4].find_next("strong").text
+            ),
+            "%d %B %Y",
+        ).strftime(date_format)
+        dict_data = {
+            "type": "Recycling",
+            "collectionDate": recycling_day,
+        }
+        data["bins"].append(dict_data)
+        if len(page_text) > 5:
+            garden_day = datetime.strptime(
+                remove_ordinal_indicator_from_date_string(
+                    page_text[6].find_next("strong").text
+                ),
+                "%d %B %Y",
+            ).strftime(date_format)
+            dict_data = {
+                "type": "Garden",
+                "collectionDate": garden_day,
+            }
+            data["bins"].append(dict_data)
+        return data

uk_bin_collection 0.152.0__py3-none-any.whl → 0.152.2__py3-none-any.whl

uk_bin_collection 0.152.0py3-none-any.whl → 0.152.2py3-none-any.whl