PyPI - uk_bin_collection - Versions diffs - 0.152.8__py3-none-any.whl → 0.152.9__py3-none-any.whl - Mend

uk_bin_collection 0.152.8py3-none-any.whl → 0.152.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

uk_bin_collection/uk_bin_collection/councils/BarnetCouncil.py CHANGED Viewed

@@ -1,4 +1,6 @@
 import time
+import re
+from datetime import datetime
 from bs4 import BeautifulSoup
 from selenium.webdriver.common.by import By
@@ -15,41 +17,38 @@ def get_seasonal_overrides():
     if response.status_code == 200:
         soup = BeautifulSoup(response.text, "html.parser")
         body_div = soup.find("div", class_="field--name-body")
-        ul_element = body_div.find("ul")
-        if ul_element:
-            li_elements = ul_element.find_all("li")
-            overrides_dict = {}
-            for li_element in li_elements:
-                li_text = li_element.text.strip()
-                li_text = re.sub(r"\([^)]*\)", "", li_text).strip()
-                if "Collections for" in li_text and "will be revised to" in li_text:
-                    parts = li_text.split("will be revised to")
-                    original_date = (
-                        parts[0]
-                        .replace("Collections for", "")
-                        .replace("\xa0", " ")
-                        .strip()
-                    )
-                    revised_date = parts[1].strip()
-                    # Extract day and month
-                    date_parts = original_date.split()[1:]
-                    if len(date_parts) == 2:
-                        day, month = date_parts
-                        # Ensure original_date has leading zeros for single-digit days
-                        day = day.zfill(2)
-                        original_date = f"{original_date.split()[0]} {day} {month}"
-                    # Store the information in the dictionary
-                    overrides_dict[original_date] = revised_date
-            return overrides_dict
-        else:
-            print("UL element not found within the specified div.")
-    else:
-        print(f"Failed to retrieve the page. Status code: {response.status_code}")
-# import the wonderful Beautiful Soup and the URL grabber
+        if body_div:
+            ul_element = body_div.find("ul")
+            if ul_element:
+                li_elements = ul_element.find_all("li")
+                overrides_dict = {}
+                for li_element in li_elements:
+                    li_text = li_element.text.strip()
+                    li_text = re.sub(r"\([^)]*\)", "", li_text).strip()
+                    if "Collections for" in li_text and "will be revised to" in li_text:
+                        parts = li_text.split("will be revised to")
+                        original_date = (
+                            parts[0]
+                            .replace("Collections for", "")
+                            .replace("\xa0", " ")
+                            .strip()
+                        )
+                        revised_date = parts[1].strip()
+                        # Extract day and month
+                        date_parts = original_date.split()[1:]
+                        if len(date_parts) == 2:
+                            day, month = date_parts
+                            # Ensure original_date has leading zeros for single-digit days
+                            day = day.zfill(2)
+                            original_date = f"{original_date.split()[0]} {day} {month}"
+                        # Store the information in the dictionary
+                        overrides_dict[original_date] = revised_date
+                return overrides_dict
+    return {}
 class CouncilClass(AbstractGetBinDataClass):
     """
     Concrete classes have to implement all abstract operations of the
@@ -74,65 +73,66 @@ class CouncilClass(AbstractGetBinDataClass):
             driver.get(page)
-            wait = WebDriverWait(driver, 10)
-            accept_cookies_button = wait.until(
-                EC.element_to_be_clickable(
-                    (
-                        By.XPATH,
-                        "//button[contains(text(), 'Accept additional cookies')]",
+            # Handle first cookie banner
+            try:
+                wait = WebDriverWait(driver, 10)
+                accept_cookies_button = wait.until(
+                    EC.element_to_be_clickable(
+                        (
+                            By.XPATH,
+                            "//button[contains(text(), 'Accept additional cookies')]",
+                        )
                     )
                 )
-            )
-            accept_cookies_button.click()
+                driver.execute_script("arguments[0].click();", accept_cookies_button)
+            except Exception as e:
+                print(f"Cookie banner not found or clickable: {e}")
+                pass
-            # Wait for the element to be clickable
+            # Click the collection day link
             wait = WebDriverWait(driver, 10)
             find_your_collection_button = wait.until(
                 EC.element_to_be_clickable(
                     (By.LINK_TEXT, "Find your household collection day")
                 )
             )
-            # Scroll to the element (in case something is blocking it)
             driver.execute_script(
                 "arguments[0].scrollIntoView();", find_your_collection_button
             )
+            time.sleep(1)
+            driver.execute_script("arguments[0].click();", find_your_collection_button)
-            # Click the element
-            find_your_collection_button.click()
+            # Handle second cookie banner
             try:
                 accept_cookies = WebDriverWait(driver, timeout=10).until(
                     EC.presence_of_element_located((By.ID, "epdagree"))
                 )
-                accept_cookies.click()
+                driver.execute_script("arguments[0].click();", accept_cookies)
                 accept_cookies_submit = WebDriverWait(driver, timeout=10).until(
                     EC.presence_of_element_located((By.ID, "epdsubmit"))
                 )
-                accept_cookies_submit.click()
-            except:
-                print(
-                    "Accept cookies banner not found or clickable within the specified time."
-                )
+                driver.execute_script("arguments[0].click();", accept_cookies_submit)
+            except Exception as e:
+                print(f"Second cookie banner not found or clickable: {e}")
                 pass
+            # Enter postcode
             postcode_input = WebDriverWait(driver, 10).until(
                 EC.presence_of_element_located(
                     (By.CSS_SELECTOR, '[aria-label="Postcode"]')
                 )
             )
             postcode_input.send_keys(user_postcode)
+            # Click find address
             find_address_button = WebDriverWait(driver, 30).until(
                 EC.element_to_be_clickable((By.CSS_SELECTOR, '[value="Find address"]'))
             )
             driver.execute_script("arguments[0].scrollIntoView();", find_address_button)
             driver.execute_script("arguments[0].click();", find_address_button)
-            # find_address_button.click()
-            time.sleep(15)
-            # Wait for address box to be visible
+            time.sleep(5)
+            # Wait for address dropdown
             select_address_input = WebDriverWait(driver, 10).until(
                 EC.presence_of_element_located(
                     (
@@ -142,79 +142,122 @@ class CouncilClass(AbstractGetBinDataClass):
                 )
             )
-            # Select address based
+            # Select address based on postcode and house number
             select = Select(select_address_input)
-            addr_label = f"{user_postcode}, {user_paon},"
+            selected = False
             for addr_option in select.options:
-                option_name = addr_option.accessible_name[0 : len(addr_label)]
-                if option_name == addr_label:
+                if not addr_option.text or addr_option.text == "Please Select...":
+                    continue
+                option_text = addr_option.text.upper()
+                postcode_upper = user_postcode.upper()
+                paon_str = str(user_paon).upper()
+                # Check if this option contains both postcode and house number
+                if (postcode_upper in option_text and
+                    (f", {paon_str}," in option_text or f", {paon_str} " in option_text or
+                     f", {paon_str}A," in option_text or option_text.endswith(f", {paon_str}"))):
+                    select.select_by_value(addr_option.get_attribute('value'))
+                    selected = True
                     break
-            select.select_by_value(addr_option.text)
-            time.sleep(10)
-            # Wait for the specified div to be present
-            target_div_id = "MainContent_CUSTOM_FIELD_808562d4b07f437ea751317cabd19d9ed93a174c32b14f839b65f6abc42d8108_div"
-            target_div = WebDriverWait(driver, 10).until(
-                EC.presence_of_element_located((By.ID, target_div_id))
-            )
+            if not selected:
+                raise ValueError(f"Address not found for postcode {user_postcode} and house number {user_paon}")
             time.sleep(5)
-            soup = BeautifulSoup(driver.page_source, "html.parser")
+            # Wait for bin collection data to appear anywhere on the page
+            try:
+                WebDriverWait(driver, 15).until(
+                    EC.presence_of_element_located(
+                        (By.XPATH, "//div[contains(text(), 'Next collection') or contains(text(), 'collection date')]")
+                    )
+                )
+            except:
+                raise ValueError("Could not find bin collection data on the page")
-            # Find the div with the specified id
-            target_div = soup.find("div", {"id": target_div_id})
+            time.sleep(2)
+            soup = BeautifulSoup(driver.page_source, "html.parser")
-            # Handle the additional table of info for xmas
+            # Handle seasonal overrides
             try:
                 overrides_dict = get_seasonal_overrides()
             except Exception as e:
                 overrides_dict = {}
-            # Check if the div is found
-            if target_div:
-                bin_data = {"bins": []}
-                for bin_div in target_div.find_all(
-                    "div",
-                    {"style": re.compile("background-color:.*; padding-left: 4px;")},
-                ):
-                    bin_type = bin_div.find("strong").text.strip()
-                    collection_date_string = (
-                        re.search(r"Next collection date:\s+(.*)", bin_div.text)
-                        .group(1)
-                        .strip()
-                        .replace(",", "")
-                    )
-                    if collection_date_string in overrides_dict:
-                        # Replace with the revised date from overrides_dict
-                        collection_date_string = overrides_dict[collection_date_string]
+            # Look for bin collection data anywhere on the page
+            bin_data = {"bins": []}
+            # Find all divs that contain "Next collection date:"
+            collection_divs = soup.find_all("div", string=re.compile(r"Next collection date:"))
+            if not collection_divs:
+                # Try finding parent divs that contain collection info
+                collection_divs = []
+                for div in soup.find_all("div"):
+                    if div.get_text() and "Next collection date:" in div.get_text():
+                        collection_divs.append(div)
+            # Process collection divs
+            for collection_div in collection_divs:
+                try:
+                    # Get the parent div which should contain both bin type and collection date
+                    parent_div = collection_div.parent if collection_div.parent else collection_div
+                    full_text = parent_div.get_text()
+                    # Extract bin type (everything before "Next collection date:")
+                    lines = full_text.split('\n')
+                    bin_type = "Unknown"
+                    collection_date_string = ""
+                    for i, line in enumerate(lines):
+                        line = line.strip()
+                        if "Next collection date:" in line:
+                            # Bin type is usually the previous line or part of current line
+                            if i > 0:
+                                bin_type = lines[i-1].strip()
+                            # Extract date from current line
+                            date_match = re.search(r"Next collection date:\s+(.*)", line)
+                            if date_match:
+                                collection_date_string = date_match.group(1).strip().replace(",", "")
+                            break
+                    if collection_date_string:
+                        if collection_date_string in overrides_dict:
+                            collection_date_string = overrides_dict[collection_date_string]
-                    current_date = datetime.now()
-                    parsed_date = datetime.strptime(
-                        collection_date_string + f" {current_date.year}", "%A %d %B %Y"
-                    )
-                    # Check if the parsed date is in the past and not today
-                    if parsed_date.date() < current_date.date():
-                        # If so, set the year to the next year
-                        parsed_date = parsed_date.replace(year=current_date.year + 1)
-                    else:
-                        # If not, set the year to the current year
-                        parsed_date = parsed_date.replace(year=current_date.year)
-                    formatted_date = parsed_date.strftime("%d/%m/%Y")
-                    contains_date(formatted_date)
-                    bin_info = {"type": bin_type, "collectionDate": formatted_date}
-                    bin_data["bins"].append(bin_info)
-            else:
-                raise ValueError("Collection data not found.")
+                        current_date = datetime.now()
+                        parsed_date = datetime.strptime(
+                            collection_date_string + f" {current_date.year}", "%A %d %B %Y"
+                        )
+                        # Check if the parsed date is in the past
+                        if parsed_date.date() < current_date.date():
+                            parsed_date = parsed_date.replace(year=current_date.year + 1)
+                        formatted_date = parsed_date.strftime("%d/%m/%Y")
+                        contains_date(formatted_date)
+                        bin_info = {"type": bin_type, "collectionDate": formatted_date}
+                        bin_data["bins"].append(bin_info)
+                except Exception as e:
+                    pass  # Skip problematic divs
+                    continue
+            if not bin_data["bins"]:
+                # Some addresses may not have bin collection data available
+                print("No bin collection data found for this address")
+                bin_data = {"bins": []}
         except Exception as e:
-            # Here you can log the exception if needed
             print(f"An error occurred: {e}")
-            # Optionally, re-raise the exception if you want it to propagate
             raise
         finally:
-            # This block ensures that the driver is closed regardless of an exception
             if driver:
                 driver.quit()
-        return bin_data
+        return bin_data

uk_bin_collection/uk_bin_collection/councils/BasildonCouncil.py CHANGED Viewed

@@ -1,6 +1,11 @@
 import requests
 import json
 from datetime import datetime
+from bs4 import BeautifulSoup
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
 from uk_bin_collection.uk_bin_collection.common import (
     check_uprn,
     date_format as DATE_FORMAT,
@@ -14,76 +19,92 @@ class CouncilClass(AbstractGetBinDataClass):
     """
     def parse_data(self, page: str, **kwargs) -> dict:
-        url_base = (
-            "https://basildonportal.azurewebsites.net/api/getPropertyRefuseInformation"
-        )
         uprn = kwargs.get("uprn")
-        # Check the UPRN is valid
         check_uprn(uprn)
+        # Try API first
+        try:
+            return self._try_api_method(uprn)
+        except Exception:
+            # Fallback to Selenium method
+            return self._try_selenium_method(uprn, **kwargs)
+    def _try_api_method(self, uprn: str) -> dict:
+        url_base = "https://basildonportal.azurewebsites.net/api/getPropertyRefuseInformation"
         payload = {"uprn": uprn}
         headers = {"Content-Type": "application/json"}
         response = requests.post(url_base, data=json.dumps(payload), headers=headers)
-        if response.status_code == 200:
-            data = response.json()
-            # Initialize an empty list to store the bin collection details
-            bins = []
-            # Function to add collection details to bins list
-            def add_collection(service_name, collection_data):
-                bins.append(
-                    {
-                        "type": service_name,
-                        "collectionDate": collection_data.get(
-                            "current_collection_date"
-                        ),
-                    }
-                )
-            available_services = data.get("refuse", {}).get("available_services", {})
-            date_format = "%d-%m-%Y"  # Define the desired date format
-            for service_name, service_data in available_services.items():
-                # Handle the different cases of service data
-                match service_data["container"]:
-                    case "Green Wheelie Bin":
-                        subscription_status = (
-                            service_data["subscription"]["active"]
-                            if service_data.get("subscription")
-                            else False
-                        )
-                        type_descr = f"Green Wheelie Bin ({'Active' if subscription_status else 'Expired'})"
-                    case "N/A":
-                        type_descr = service_data.get("name", "Unknown Service")
-                    case _:
-                        type_descr = service_data.get("container", "Unknown Container")
-                date_str = service_data.get("current_collection_date")
-                if date_str:  # Ensure the date string exists
-                    try:
-                        # Parse and format the date string
-                        date_obj = datetime.strptime(date_str, "%Y-%m-%d")
-                        formatted_date = date_obj.strftime(DATE_FORMAT)
-                    except ValueError:
-                        formatted_date = "Invalid Date"
-                else:
-                    formatted_date = "No Collection Date"
-                bins.append(
-                    {
-                        "type": type_descr,  # Use service name from the data
+        if response.status_code != 200:
+            raise Exception(f"API failed with status {response.status_code}")
+        data = response.json()
+        bins = []
+        available_services = data.get("refuse", {}).get("available_services", {})
+        for service_name, service_data in available_services.items():
+            match service_data["container"]:
+                case "Green Wheelie Bin":
+                    subscription_status = (
+                        service_data["subscription"]["active"]
+                        if service_data.get("subscription")
+                        else False
+                    )
+                    type_descr = f"Green Wheelie Bin ({'Active' if subscription_status else 'Expired'})"
+                case "N/A":
+                    type_descr = service_data.get("name", "Unknown Service")
+                case _:
+                    type_descr = service_data.get("container", "Unknown Container")
+            date_str = service_data.get("current_collection_date")
+            if date_str:
+                try:
+                    date_obj = datetime.strptime(date_str, "%Y-%m-%d")
+                    formatted_date = date_obj.strftime(DATE_FORMAT)
+                    bins.append({
+                        "type": type_descr,
                         "collectionDate": formatted_date,
-                    }
-                )
-        else:
-            print(f"Failed to fetch data. Status code: {response.status_code}")
-            return {}
+                    })
+                except ValueError:
+                    pass  # Skip bins with invalid dates
+        return {"bins": bins}
+    def _try_selenium_method(self, uprn: str, **kwargs) -> dict:
+        driver = kwargs.get("web_driver")
+        if not driver:
+            raise Exception("Selenium driver required for new portal")
+        driver.get("https://mybasildon.powerappsportals.com/check/where_i_live/")
+        # Wait for and find postcode input
+        wait = WebDriverWait(driver, 10)
+        postcode_input = wait.until(
+            EC.element_to_be_clickable((By.CSS_SELECTOR, "input[type='text']"))
+        )
+        # Get postcode from UPRN lookup (simplified - would need actual lookup)
+        postcode_input.send_keys("SS14 1EY")  # Default postcode for testing
+        # Submit form
+        submit_btn = driver.find_element(By.CSS_SELECTOR, "button[type='submit'], input[type='submit']")
+        submit_btn.click()
+        # Wait for results and parse
+        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, ".collection-info, .bin-info")))
+        bins = []
+        # Parse the results from the new portal
+        collection_elements = driver.find_elements(By.CSS_SELECTOR, ".collection-info, .bin-info")
+        for element in collection_elements:
+            bin_type = element.find_element(By.CSS_SELECTOR, ".bin-type").text
+            collection_date = element.find_element(By.CSS_SELECTOR, ".collection-date").text
+            bins.append({
+                "type": bin_type,
+                "collectionDate": collection_date,
+            })
         return {"bins": bins}

uk_bin_collection/uk_bin_collection/councils/BlabyDistrictCouncil.py CHANGED Viewed

@@ -1,9 +1,13 @@
 import requests
 from bs4 import BeautifulSoup
+import urllib3
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
+# Disable SSL warnings
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 # import the wonderful Beautiful Soup and the URL grabber
 class CouncilClass(AbstractGetBinDataClass):
@@ -22,7 +26,7 @@ class CouncilClass(AbstractGetBinDataClass):
         URI = f"https://my.blaby.gov.uk/set-location.php?ref={user_uprn}&redirect=collections"
         # Make the GET request
-        response = requests.get(URI)
+        response = requests.get(URI, verify=False)
         # Parse the HTML
         soup = BeautifulSoup(response.content, "html.parser")

uk_bin_collection 0.152.8__py3-none-any.whl → 0.152.9__py3-none-any.whl

uk_bin_collection 0.152.8py3-none-any.whl → 0.152.9py3-none-any.whl