PyPI - uk_bin_collection - Versions diffs - 0.152.8__py3-none-any.whl → 0.152.9__py3-none-any.whl - Mend

uk_bin_collection 0.152.8py3-none-any.whl → 0.152.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

uk_bin_collection/uk_bin_collection/councils/ChelmsfordCityCouncil.py CHANGED Viewed

@@ -1,127 +1,95 @@
-# This script pulls (in one hit) the data from Bromley Council Bins Data
-import datetime
 import re
 import time
-from datetime import datetime
+from datetime import datetime, timedelta
 import requests
 from bs4 import BeautifulSoup
 from selenium.webdriver.common.by import By
-from selenium.webdriver.common.keys import Keys
 from selenium.webdriver.support import expected_conditions as EC
-from selenium.webdriver.support.ui import Select
 from selenium.webdriver.support.wait import WebDriverWait
+from icalevents.icalevents import events
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
-# import the wonderful Beautiful Soup and the URL grabber
 class CouncilClass(AbstractGetBinDataClass):
-    """
-    Concrete classes have to implement all abstract operations of the
-    base class. They can also override some operations with a default
-    implementation.
-    """
     def parse_data(self, page: str, **kwargs) -> dict:
         driver = None
         try:
             data = {"bins": []}
             headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64)"}
-            uprn = kwargs.get("uprn")
             postcode = kwargs.get("postcode")
             user_paon = kwargs.get("paon")
             web_driver = kwargs.get("web_driver")
             headless = kwargs.get("headless")
             driver = create_webdriver(web_driver, headless, None, __name__)
-            url = kwargs.get("url")
-            driver.execute_script(f"window.location.href='{url}'")
-            wait = WebDriverWait(driver, 120)
-            post_code_search = wait.until(
-                EC.presence_of_element_located((By.XPATH, '//input[@name="keyword"]'))
-            )
-            post_code_search.send_keys(postcode)
-            submit_btn = wait.until(
-                EC.presence_of_element_located((By.CLASS_NAME, "__submitButton"))
-            )
-            submit_btn.send_keys(Keys.ENTER)
-            address_results = wait.until(
-                EC.presence_of_element_located((By.CLASS_NAME, "directories-table"))
-            )
-            address_link = wait.until(
-                EC.presence_of_element_located(
-                    (By.XPATH, f"//a[contains(text(), '{user_paon}')]")
+            wait = WebDriverWait(driver, 30)
+            # Navigate to bin collection page
+            driver.get("https://www.chelmsford.gov.uk/bins-and-recycling/check-your-collection-day/")
+            # Handle cookie overlay
+            try:
+                accept_btn = wait.until(
+                    EC.element_to_be_clickable((By.XPATH, "//*[contains(text(), 'ACCEPT')]"))
                 )
+                accept_btn.click()
+                time.sleep(1)
+            except:
+                pass
+            # Find postcode input field (dynamic ID)
+            postcode_input = wait.until(
+                EC.presence_of_element_located((By.XPATH, "//input[contains(@id, '_keyword')]"))
             )
-            address_link.send_keys(Keys.ENTER)
-            results = wait.until(
-                EC.presence_of_element_located((By.CLASS_NAME, "usercontent"))
+            postcode_input.clear()
+            postcode_input.send_keys(postcode)
+            # Click search button
+            submit_btn = wait.until(
+                EC.element_to_be_clickable((By.CLASS_NAME, "__submitButton"))
             )
-            # Make a BS4 object
+            submit_btn.click()
+            # Wait for results table
+            wait.until(EC.presence_of_element_located((By.TAG_NAME, "table")))
+            # Get the collection round from the table row
             soup = BeautifulSoup(driver.page_source, features="html.parser")
-            soup.prettify()
-            # Get collection calendar
-            calendar_urls = soup.find_all(
-                "a", string=re.compile(r"view or download the collection calendar")
-            )
-            if len(calendar_urls) > 0:
-                requests.packages.urllib3.disable_warnings()
-                response = requests.get(calendar_urls[0].get("href"), headers=headers)
-                # Make a BS4 object
-                soup = BeautifulSoup(response.text, features="html.parser")
-                soup.prettify()
-                # Loop the months
-                for month in soup.find_all("div", {"class": "usercontent"}):
-                    year = ""
-                    if month.find("h2") and "calendar" not in month.find("h2").get_text(
-                        strip=True
-                    ):
-                        year = datetime.strptime(
-                            month.find("h2").get_text(strip=True), "%B %Y"
-                        ).strftime("%Y")
-                    elif month.find("h3"):
-                        year = datetime.strptime(
-                            month.find("h3").get_text(strip=True), "%B %Y"
-                        ).strftime("%Y")
-                    if year != "":
-                        for row in month.find_all("li"):
-                            results = re.search(
-                                "([A-Za-z]+ \\d\\d? [A-Za-z]+): (.+)",
-                                row.get_text(strip=True),
-                            )
-                            if results:
-                                dict_data = {
-                                    "type": results.groups()[1].capitalize(),
-                                    "collectionDate": datetime.strptime(
-                                        results.groups()[0] + " " + year, "%A %d %B %Y"
-                                    ).strftime(date_format),
-                                }
-                                data["bins"].append(dict_data)
-                # Sort collections
-                data["bins"].sort(
-                    key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
-                )
+            # Find the row containing the address
+            for row in soup.find_all("tr"):
+                if user_paon in row.get_text():
+                    # Extract collection round (e.g., "Tuesday B")
+                    row_text = row.get_text()
+                    round_match = re.search(r"(Monday|Tuesday|Wednesday|Thursday|Friday)\s+([AB])", row_text)
+                    if round_match:
+                        day = round_match.group(1).lower()
+                        letter = round_match.group(2).lower()
+                        ics_url = f"https://www.chelmsford.gov.uk/media/4ipavf0m/{day}-{letter}-calendar.ics"
+                        break
+            else:
+                raise ValueError(f"Could not find collection round for address: {user_paon}")
+            # Get events from ICS file within the next 60 days
+            now = datetime.now()
+            future = now + timedelta(days=60)
+            # Parse ICS calendar
+            upcoming_events = events(ics_url, start=now, end=future)
+            for event in sorted(upcoming_events, key=lambda e: e.start):
+                if event.summary and event.start:
+                    data["bins"].append({
+                        "type": event.summary,
+                        "collectionDate": event.start.date().strftime(date_format)
+                    })
         except Exception as e:
-            # Here you can log the exception if needed
             print(f"An error occurred: {e}")
-            # Optionally, re-raise the exception if you want it to propagate
             raise
         finally:
-            # This block ensures that the driver is closed regardless of an exception
             if driver:
                 driver.quit()
-        return data
+        return data

uk_bin_collection/uk_bin_collection/councils/CherwellDistrictCouncil.py CHANGED Viewed

@@ -23,8 +23,11 @@ class CouncilClass(AbstractGetBinDataClass):
         URI = f"https://www.cherwell.gov.uk/homepage/129/bin-collection-search?uprn={user_uprn}"
-        # Make the GET request
-        response = requests.get(URI)
+        # Make the GET request with proper headers
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+        }
+        response = requests.get(URI, headers=headers)
         soup = BeautifulSoup(response.text, "html.parser")
@@ -45,22 +48,38 @@ class CouncilClass(AbstractGetBinDataClass):
             return date_obj.strftime(date_format)  # Return in YYYY-MM-DD format
-        # print(soup)
-        div = soup.find("div", class_="bin-collection-results__tasks")
-        for item in div.find_all("li", class_="list__item"):
-            # Extract bin type
-            bin_type_tag = item.find("h3", class_="bin-collection-tasks__heading")
-            bin_type = (
-                "".join(bin_type_tag.find_all(text=True, recursive=False)).strip()
-                if bin_type_tag
-                else "Unknown Bin"
-            )
+        # Find the bin collection results section
+        results_div = soup.find("div", class_="bin-collection-results")
+        if not results_div:
+            return bindata
+        tasks_div = results_div.find("div", class_="bin-collection-results__tasks")
+        if not tasks_div:
+            return bindata
+        # Find all bin collection items
+        for item in tasks_div.find_all("li", class_="list__item"):
+            # Extract bin type from heading
+            heading = item.find("h3", class_="bin-collection-tasks__heading")
+            if not heading:
+                continue
+            # Get the bin type text, excluding visually hidden spans
+            bin_type = ""
+            for text_node in heading.find_all(text=True):
+                parent = text_node.parent
+                if not (parent.name == "span" and "visually-hidden" in parent.get("class", [])):
+                    bin_type += text_node.strip()
+            if not bin_type:
+                continue
             # Extract collection date
             date_tag = item.find("p", class_="bin-collection-tasks__date")
-            collection_date = date_tag.text.strip() if date_tag else "Unknown Date"
+            if not date_tag:
+                continue
+            collection_date = date_tag.text.strip()
             dict_data = {
                 "type": bin_type,
@@ -68,8 +87,10 @@ class CouncilClass(AbstractGetBinDataClass):
             }
             bindata["bins"].append(dict_data)
-        bindata["bins"].sort(
-            key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
-        )
+        # Sort bins by collection date
+        if bindata["bins"]:
+            bindata["bins"].sort(
+                key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
+            )
         return bindata

uk_bin_collection/uk_bin_collection/councils/ChorleyCouncil.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import time
-import urllib.parse
+from datetime import datetime
 from bs4 import BeautifulSoup
 from selenium.webdriver.common.by import By
@@ -11,18 +11,6 @@ from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
-def format_bin_type(bin_colour: str):
-    bin_types = {
-        "grey": "Garden waste (Grey Bin)",
-        "brown": "Paper and card (Brown Bin)",
-        "blue": "Bottles and cans (Blue Bin)",
-        "green": "General waste (Green Bin)",
-    }
-    bin_colour = urllib.parse.unquote(bin_colour).split(" ")[0].lower()
-    return bin_types[bin_colour]
-# import the wonderful Beautiful Soup and the URL grabber
 class CouncilClass(AbstractGetBinDataClass):
     """
     Concrete classes have to implement all abstract operations of the
@@ -41,99 +29,120 @@ class CouncilClass(AbstractGetBinDataClass):
             check_uprn(user_uprn)
             check_postcode(user_postcode)
-            # Ensure UPRN starts with "UPRN"
-            if not user_uprn.startswith("UPRN"):
-                user_uprn = f"UPRN{user_uprn}"
             # Create Selenium webdriver
-            user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
+            user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
             driver = create_webdriver(web_driver, headless, user_agent, __name__)
-            driver.get("https://myaccount.chorley.gov.uk/wastecollections.aspx")
-            # Accept cookies banner
-            cookieBanner = WebDriverWait(driver, 10).until(
-                EC.presence_of_element_located((By.ID, "PrivacyPolicyNotification"))
-            )
-            cookieClose = cookieBanner.find_element(
-                By.CSS_SELECTOR, "span.ui-icon-circle-close"
+            # Navigate to the start page
+            driver.get("https://chorley.gov.uk/bincollectiondays")
+            # Click the "Check your collection day" button
+            check_button = WebDriverWait(driver, 10).until(
+                EC.element_to_be_clickable((By.XPATH, "//a[@class='button' and @href='https://forms.chorleysouthribble.gov.uk/chorley-bincollectiondays']")
+            ))
+            check_button.click()
+            # Wait for the form to load and enter postcode
+            postcode_input = WebDriverWait(driver, 10).until(
+                EC.presence_of_element_located((By.XPATH, "//input[@type='text'][1]")
+            ))
+            postcode_input.clear()
+            postcode_input.send_keys(user_postcode)
+            # Click the Lookup button
+            lookup_button = WebDriverWait(driver, 10).until(
+                EC.element_to_be_clickable((By.XPATH, "//button[contains(@class, 'btn--lookup')]")
+            ))
+            lookup_button.click()
+            # Wait for the property dropdown to be populated
+            property_dropdown = WebDriverWait(driver, 10).until(
+                EC.presence_of_element_located((By.XPATH, "//select[@class='form__select']")
+            ))
+            # Wait a moment for the dropdown to be fully populated
+            time.sleep(2)
+            # Find the property that matches the UPRN or select the first available property
+            select = Select(property_dropdown)
+            options = select.options
+            # Skip the "Please choose..." option and select based on UPRN or first available
+            selected = False
+            for option in options[1:]:  # Skip first "Please choose..." option
+                if user_uprn in option.get_attribute("value") or not selected:
+                    select.select_by_visible_text(option.text)
+                    selected = True
+                    break
+            if not selected and len(options) > 1:
+                # If no UPRN match, select the first available property
+                select.select_by_index(1)
+            # Click the Next button
+            next_button = WebDriverWait(driver, 10).until(
+                EC.element_to_be_clickable((By.CSS_SELECTOR, "button[type='submit'][value='Next']"))
             )
-            cookieClose.click()
-            # Populate postcode field
-            inputElement_postcode = driver.find_element(
-                By.ID,
-                "MainContent_addressSearch_txtPostCodeLookup",
-            )
-            inputElement_postcode.send_keys(user_postcode)
-            # Click search button
-            findAddress = WebDriverWait(driver, 10).until(
-                EC.presence_of_element_located(
-                    (
-                        By.ID,
-                        "MainContent_addressSearch_btnFindAddress",
-                    )
-                )
-            )
-            findAddress.click()
-            time.sleep(1)
-            # Wait for the 'Select address' dropdown to appear and select option matching UPRN
-            dropdown = WebDriverWait(driver, 10).until(
-                EC.presence_of_element_located(
-                    (
-                        By.ID,
-                        "MainContent_addressSearch_ddlAddress",
-                    )
-                )
+            next_button.click()
+            # Wait for the results page to load
+            WebDriverWait(driver, 10).until(
+                EC.presence_of_element_located((By.XPATH, "//th[text()='Collection']"))
             )
-            # Create a 'Select' for it, then select the matching URPN option
-            dropdownSelect = Select(dropdown)
-            dropdownSelect.select_by_value(user_uprn)
-            # Wait for the submit button to appear, then click it to get the collection dates
-            submit = WebDriverWait(driver, 10).until(
-                EC.presence_of_element_located((By.ID, "MainContent_btnSearch"))
-            )
-            submit.click()
-            soup = BeautifulSoup(driver.page_source, features="html.parser")
-            # Get the property details
-            property_details = soup.find(
-                "table",
-                {"class": "WasteCollection"},
-            )
-            # Get the dates
-            for row in property_details.tbody.find_all("tr", recursive=False):
-                month_col = row.td
-                month = month_col.get_text(strip=True)
-                for date_col in month_col.find_next_siblings("td"):
-                    day = date_col.p.contents[0].strip()
-                    if day == "":
-                        continue
-                    for bin_type in date_col.find_all("img"):
-                        bin_colour = bin_type.get("src").split("/")[-1].split(".")[0]
-                        date_object = datetime.strptime(f"{day} {month}", "%d %B %Y")
-                        date_formatted = date_object.strftime("%d/%m/%Y")
-                        dict_data = {
-                            "type": format_bin_type(bin_colour),
-                            "collectionDate": date_formatted,
-                        }
-                        data["bins"].append(dict_data)
+            # Parse the results
+            soup = BeautifulSoup(driver.page_source, "html.parser")
+            # Find the table with collection data
+            table = soup.find("table")
+            if table:
+                rows = table.find_all("tr")
+                for i, row in enumerate(rows):
+                    cells = row.find_all(["td", "th"])
+                    if i > 0 and len(cells) >= 2:  # Skip header row
+                        collection_type = cells[0].get_text(strip=True)
+                        collection_date = cells[1].get_text(strip=True)
+                        if collection_type and collection_date and collection_date != "Collection":
+                            # Try to parse the date
+                            try:
+                                # Handle the format "Tuesday, 05/08/25"
+                                if ", " in collection_date and "/" in collection_date:
+                                    # Remove the day name and parse the date
+                                    date_part = collection_date.split(", ")[1]
+                                    # Handle 2-digit year format
+                                    if len(date_part.split("/")[2]) == 2:
+                                        date_obj = datetime.strptime(date_part, "%d/%m/%y")
+                                    else:
+                                        date_obj = datetime.strptime(date_part, "%d/%m/%Y")
+                                elif "/" in collection_date:
+                                    date_obj = datetime.strptime(collection_date, "%d/%m/%Y")
+                                elif "-" in collection_date:
+                                    date_obj = datetime.strptime(collection_date, "%Y-%m-%d")
+                                else:
+                                    # Try to parse other formats
+                                    date_obj = datetime.strptime(collection_date, "%d %B %Y")
+                                formatted_date = date_obj.strftime("%d/%m/%Y")
+                                dict_data = {
+                                    "type": collection_type,
+                                    "collectionDate": formatted_date,
+                                }
+                                data["bins"].append(dict_data)
+                            except ValueError:
+                                # If date parsing fails, skip this entry
+                                continue
         except Exception as e:
             # Here you can log the exception if needed
             print(f"An error occurred: {e}")
             # Optionally, re-raise the exception if you want it to propagate
             raise
         finally:
-            # This block ensures that the driver is closed regardless of an exception
             if driver:
                 driver.quit()
-        return data
+        return data

uk_bin_collection 0.152.8__py3-none-any.whl → 0.152.9__py3-none-any.whl

uk_bin_collection 0.152.8py3-none-any.whl → 0.152.9py3-none-any.whl