PyPI - uk_bin_collection - Versions diffs - 0.151.0__py3-none-any.whl → 0.152.1__py3-none-any.whl - Mend

uk_bin_collection 0.151.0py3-none-any.whl → 0.152.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

uk_bin_collection/uk_bin_collection/councils/LiverpoolCityCouncil.py CHANGED Viewed

@@ -1,7 +1,8 @@
 from bs4 import BeautifulSoup
+from dateutil.relativedelta import relativedelta
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
-from dateutil.relativedelta import relativedelta
 # import the wonderful Beautiful Soup and the URL grabber
@@ -18,9 +19,20 @@ class CouncilClass(AbstractGetBinDataClass):
         collections = []
         curr_date = datetime.today()
-        # Parse the page
-        soup = BeautifulSoup(page.text, features="html.parser")
-        soup.prettify()
+        try:
+            user_uprn = kwargs.get("uprn")
+            check_uprn(user_uprn)
+            url = f"https://liverpool.gov.uk/Bins/BinDatesTable?UPRN={user_uprn}"
+            if not user_uprn:
+                # This is a fallback for if the user stored a URL in old system. Ensures backwards compatibility.
+                url = kwargs.get("url")
+        except Exception as e:
+            raise ValueError(f"Error getting identifier: {str(e)}")
+        # Make a BS4 object
+        page = requests.get(url)
+        soup = BeautifulSoup(page.text, "html.parser")
+        soup.prettify
         # Get all table rows on the page - enumerate gives us an index, which is handy for to keep a row count.
         # In this case, the first (0th) row is headings, so we can skip it, then parse the other data.

uk_bin_collection/uk_bin_collection/councils/MaidstoneBoroughCouncil.py CHANGED Viewed

@@ -3,6 +3,7 @@ from datetime import datetime
 from bs4 import BeautifulSoup
 from selenium.webdriver.common.by import By
+from selenium.webdriver.common.keys import Keys
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.support.ui import Select
 from selenium.webdriver.support.wait import WebDriverWait
@@ -10,8 +11,6 @@ from selenium.webdriver.support.wait import WebDriverWait
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
-# import the wonderful Beautiful Soup and the URL grabber
 class CouncilClass(AbstractGetBinDataClass):
     """
@@ -30,7 +29,7 @@ class CouncilClass(AbstractGetBinDataClass):
             web_driver = kwargs.get("web_driver")
             headless = kwargs.get("headless")
             check_postcode(user_postcode)
             # Create Selenium webdriver
             driver = create_webdriver(web_driver, headless, None, __name__)
             driver.get(page)
@@ -41,68 +40,64 @@ class CouncilClass(AbstractGetBinDataClass):
             driver.switch_to.frame(iframe_presense)
             wait = WebDriverWait(driver, 60)
             # Postal code input
             inputElement_postcodesearch = wait.until(
                 EC.element_to_be_clickable((By.NAME, "postcode"))
             )
             inputElement_postcodesearch.send_keys(user_postcode)
-            # Wait for the 'Select address' dropdown to be updated
-            dropdown_select = wait.until(
-                EC.presence_of_element_located((By.XPATH, "//span[contains(text(), 'Select...')]"))
-            )
-            dropdown_select.click()
+            time.sleep(5)
+            inputElement_postcodesearch.send_keys(Keys.TAB + Keys.DOWN)
             dropdown = wait.until(
-                EC.element_to_be_clickable((By.XPATH, f"//div[contains(text(), ' {user_paon}')]"))
+                EC.element_to_be_clickable(
+                    (By.XPATH, f"//div[contains(text(), ' {user_paon}')]")
+                )
             )
             dropdown.click()
-            # Wait for 'Searching for...' to be added to page
-            WebDriverWait(driver, timeout=15).until(
+            # This website is horrible!
+            WebDriverWait(driver, 20).until(
                 EC.text_to_be_present_in_element(
-                    (By.CSS_SELECTOR, "span[data-name=html1]"), "Searching"
-                )
-            )
-            # Wait for 'Searching for...' to be removed from page
-            WebDriverWait(driver, timeout=15).until(
-                EC.none_of(
-                    EC.text_to_be_present_in_element(
-                        (By.CSS_SELECTOR, "span[data-name=html1]"), "Searching"
-                    )
+                    (By.CSS_SELECTOR, "div.col-collection-panel"), "Next collection"
                 )
             )
             # Even then it can still be adding data to the page...
             time.sleep(5)
-            soup = BeautifulSoup(driver.page_source, features="html.parser")
-            # This is ugly but there is literally no consistency to the HTML
-            def is_a_collection_date(t):
-                return any("Next collection" in c for c in t.children)
-            for next_collection in soup.find_all(is_a_collection_date):
-                bin_info = list(
-                    next_collection.parent.select_one("div:nth-child(1)").children
-                )
-                if not bin_info:
-                    continue
-                bin = bin_info[0].get_text()
-                date = next_collection.select_one("strong").get_text(strip=True)
-                bin_date = datetime.strptime(date, "%d %b %Y")
-                dict_data = {
-                    "type": bin,
-                    "collectionDate": bin_date.strftime(date_format),
-                }
-                bin_data["bins"].append(dict_data)
-            bin_data["bins"].sort(
-                key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
+            # Scraping via Selenium rather than BeautifulSoup, to ensure eveything's loaded
+            collection_panels = driver.find_elements(
+                By.CSS_SELECTOR, "div.col-collection-panel"
             )
+            for panel in collection_panels:
+                try:
+                    # Get bin type (e.g., General waste, Food waste)
+                    bin_type = panel.find_element(
+                        By.CSS_SELECTOR, "h3.collectionDataHeader"
+                    ).text.strip()
+                    # Get next collection date
+                    lines = panel.find_elements(By.CSS_SELECTOR, "ul li")
+                    for line in lines:
+                        if "Next collection" in line.text:
+                            date_str = (
+                                line.text.split("Next collection")[1]
+                                .strip(": ")
+                                .strip()
+                            )
+                            bin_date = datetime.strptime(date_str, "%d/%m/%Y")
+                            bin_data["bins"].append(
+                                {
+                                    "type": bin_type,
+                                    "collectionDate": bin_date.strftime(date_format),
+                                }
+                            )
+                except Exception as inner_e:
+                    print(f"Skipping one panel due to error: {inner_e}")
         except Exception as e:
             # Here you can log the exception if needed
             print(f"An error occurred: {e}")

uk_bin_collection/uk_bin_collection/councils/NewhamCouncil.py CHANGED Viewed

@@ -7,14 +7,21 @@ from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataC
 class CouncilClass(AbstractGetBinDataClass):
     def parse_data(self, page: str, **kwargs) -> dict:
-        # get the page data
-        http = urllib3.PoolManager()
-        response = http.request("GET", kwargs["url"])
-        page_data = response.data
+        try:
+            user_uprn = kwargs.get("uprn")
+            check_uprn(user_uprn)
+            url = f"https://bincollection.newham.gov.uk/Details/Index/{user_uprn}"
+            if not user_uprn:
+                # This is a fallback for if the user stored a URL in old system. Ensures backwards compatibility.
+                url = kwargs.get("url")
+        except Exception as e:
+            raise ValueError(f"Error getting identifier: {str(e)}")
         # Make a BS4 object
-        soup = BeautifulSoup(page_data, features="html.parser")
-        soup.prettify()
+        page = requests.get(url)
+        soup = BeautifulSoup(page.text, "html.parser")
+        soup.prettify
         # Form a JSON wrapper
         data = {"bins": []}

uk_bin_collection/uk_bin_collection/councils/NorthEastDerbyshireDistrictCouncil.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from datetime import datetime
+from time import sleep
 from bs4 import BeautifulSoup
 from selenium.webdriver.common.by import By
@@ -9,8 +10,6 @@ from selenium.webdriver.support.wait import WebDriverWait
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
-# import the wonderful Beautiful Soup and the URL grabber
 class CouncilClass(AbstractGetBinDataClass):
     """
@@ -34,82 +33,105 @@ class CouncilClass(AbstractGetBinDataClass):
             headless = kwargs.get("headless")
             check_uprn(user_uprn)
             check_postcode(user_postcode)
-            # Create Selenium webdriver
             driver = create_webdriver(web_driver, headless, None, __name__)
             driver.get(page)
-            # If you bang in the house number (or property name) and postcode in the box it should find your property
             iframe_presense = WebDriverWait(driver, 30).until(
                 EC.presence_of_element_located((By.ID, "fillform-frame-1"))
             )
             driver.switch_to.frame(iframe_presense)
             wait = WebDriverWait(driver, 60)
             inputElement_postcodesearch = wait.until(
                 EC.element_to_be_clickable((By.NAME, "postcode_search"))
             )
             inputElement_postcodesearch.send_keys(str(user_postcode))
-            # Wait for the 'Select your property' dropdown to appear and select the first result
             dropdown = wait.until(EC.element_to_be_clickable((By.NAME, "selAddress")))
             dropdown_options = wait.until(
                 EC.presence_of_element_located((By.CLASS_NAME, "lookup-option"))
             )
-            # Create a 'Select' for it, then select the first address in the list
-            # (Index 0 is "Make a selection from the list")
             drop_down_values = Select(dropdown)
             option_element = wait.until(
                 EC.presence_of_element_located(
                     (By.CSS_SELECTOR, f'option.lookup-option[value="{str(user_uprn)}"]')
                 )
             )
             drop_down_values.select_by_value(str(user_uprn))
-            # Wait for the 'View more' link to appear, then click it to get the full set of dates
             h3_element = wait.until(
                 EC.presence_of_element_located(
                     (By.XPATH, "//th[contains(text(), 'Waste Collection')]")
                 )
             )
+            sleep(10)
             soup = BeautifulSoup(driver.page_source, features="html.parser")
+            print("Parsing HTML content...")
+            collection_rows = soup.find_all("tr")
+            for row in collection_rows:
+                cells = row.find_all("td")
+                if len(cells) == 3:  # Date, Image, Bin Type
+                    # Extract date carefully
+                    date_labels = cells[0].find_all("label")
+                    collection_date = None
+                    for label in date_labels:
+                        label_text = label.get_text().strip()
+                        if contains_date(label_text):
+                            collection_date = label_text
+                            break
+                    # Extract bin type
+                    bin_label = cells[2].find("label")
+                    bin_types = bin_label.get_text().strip() if bin_label else None
+                    if collection_date and bin_types:
+                        print(f"Found collection: {collection_date} - {bin_types}")
+                        # Handle combined collections
+                        if "&" in bin_types:
+                            if "Burgundy" in bin_types:
+                                data["bins"].append(
+                                    {
+                                        "type": "Burgundy Bin",
+                                        "collectionDate": datetime.strptime(
+                                            collection_date, "%d/%m/%Y"
+                                        ).strftime(date_format),
+                                    }
+                                )
+                            if "Green" in bin_types:
+                                data["bins"].append(
+                                    {
+                                        "type": "Green Bin",
+                                        "collectionDate": datetime.strptime(
+                                            collection_date, "%d/%m/%Y"
+                                        ).strftime(date_format),
+                                    }
+                                )
+                        else:
+                            if "Black" in bin_types:
+                                data["bins"].append(
+                                    {
+                                        "type": "Black Bin",
+                                        "collectionDate": datetime.strptime(
+                                            collection_date, "%d/%m/%Y"
+                                        ).strftime(date_format),
+                                    }
+                                )
+            print(f"Found {len(data['bins'])} collections")
+            print(f"Final data: {data}")
-            target_h3 = soup.find("h3", string="Collection Details")
-            tables_after_h3 = target_h3.parent.parent.find_next("table")
-            table_rows = tables_after_h3.find_all("tr")
-            for row in table_rows:
-                rowdata = row.find_all("td")
-                if len(rowdata) == 3:
-                    labels = rowdata[0].find_all("label")
-                    # Strip the day (i.e., Monday) out of the collection date string for parsing
-                    if len(labels) >= 2:
-                        date_label = labels[1]
-                        datestring = date_label.text.strip()
-                    # Add the bin type and collection date to the 'data' dictionary
-                    data["bins"].append(
-                        {
-                            "type": rowdata[2].text.strip(),
-                            "collectionDate": datetime.strptime(
-                                datestring, "%d/%m/%Y"
-                            ).strftime(
-                                date_format
-                            ),  # Format the date as needed
-                        }
-                    )
         except Exception as e:
-            # Here you can log the exception if needed
             print(f"An error occurred: {e}")
-            # Optionally, re-raise the exception if you want it to propagate
             raise
         finally:
-            # This block ensures that the driver is closed regardless of an exception
             if driver:
                 driver.quit()
         return data

uk_bin_collection/uk_bin_collection/councils/NorthSomersetCouncil.py CHANGED Viewed

@@ -43,9 +43,13 @@ class CouncilClass(AbstractGetBinDataClass):
                 collectionDate = (
                     cells[1].get_text(strip=True) + " " + datetime.now().strftime("%Y")
                 )
-                nextCollectionDate = (
-                    cells[2].get_text(strip=True) + " " + datetime.now().strftime("%Y")
-                )
+                if len(cells) > 2:
+                    nextCollectionDate = (
+                        cells[2].get_text(strip=True) + " " + datetime.now().strftime("%Y")
+                    )
+                else:
+                    nextCollectionDate = ""
                 # Make each Bin element in the JSON
                 dict_data = {
@@ -59,12 +63,13 @@ class CouncilClass(AbstractGetBinDataClass):
                 data["bins"].append(dict_data)
                 # Make each next Bin element in the JSON
-                dict_data = {
-                    "type": binType,
-                    "collectionDate": get_next_occurrence_from_day_month(
-                        datetime.strptime(nextCollectionDate, "%A %d %B %Y")
-                    ).strftime(date_format),
-                }
+                if nextCollectionDate != "":
+                    dict_data = {
+                        "type": binType,
+                        "collectionDate": get_next_occurrence_from_day_month(
+                            datetime.strptime(nextCollectionDate, "%A %d %B %Y")
+                        ).strftime(date_format),
+                    }
                 # Add data to the main JSON Wrapper
                 data["bins"].append(dict_data)

uk_bin_collection/uk_bin_collection/councils/NorthTynesideCouncil.py CHANGED Viewed

@@ -81,9 +81,9 @@ class CouncilClass(AbstractGetBinDataClass):
         # The regular calendar only shows until end of March 2026, work out how many weeks that is
         weeks_total = math.floor((datetime(2026, 4, 1) - datetime.now()).days / 7)
-        # The garden calendar only shows until end of November 2024, work out how many weeks that is
+        # The garden calendar only shows until end of November 2025, work out how many weeks that is
         garden_weeks_total = math.floor(
-            (datetime(2024, 12, 1) - datetime.now()).days / 7
+            (datetime(2025, 12, 1) - datetime.now()).days / 7
         )
         regular_collections, garden_collections, special_collections = [], [], []

uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py CHANGED Viewed

@@ -1,7 +1,10 @@
 import time
 from bs4 import BeautifulSoup
+from selenium.common.exceptions import TimeoutException
 from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.support.ui import WebDriverWait
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
@@ -42,28 +45,61 @@ class CouncilClass(AbstractGetBinDataClass):
             driver = create_webdriver(web_driver, headless, None, __name__)
             driver.get(page)
-            time.sleep(1)
+            # Create wait object
+            wait = WebDriverWait(driver, 20)
-            # Press the cookie accept - wait is to let the JS load it up
-            driver.find_element(By.ID, "ccc-notify-accept").click()
-            inputElement_hn = driver.find_element(
-                By.ID,
-                "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_txtHouse",
+            # Wait for and click cookie button
+            cookie_button = wait.until(
+                EC.element_to_be_clickable((By.ID, "ccc-notify-accept"))
+            )
+            cookie_button.click()
+            # Wait for and find house number input
+            inputElement_hn = wait.until(
+                EC.presence_of_element_located(
+                    (
+                        By.ID,
+                        "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_txtHouse",
+                    )
+                )
             )
-            inputElement_pc = driver.find_element(
-                By.ID,
-                "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_txtPostcode",
+            # Wait for and find postcode input
+            inputElement_pc = wait.until(
+                EC.presence_of_element_located(
+                    (
+                        By.ID,
+                        "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_txtPostcode",
+                    )
+                )
             )
+            # Enter details
             inputElement_pc.send_keys(user_postcode)
             inputElement_hn.send_keys(user_paon)
-            driver.find_element(
-                By.ID,
-                "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_butLookup",
-            ).click()
+            # Click lookup button and wait for results
+            lookup_button = wait.until(
+                EC.element_to_be_clickable(
+                    (
+                        By.ID,
+                        "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_butLookup",
+                    )
+                )
+            )
+            lookup_button.click()
+            # Wait for results to load
+            route_summary = wait.until(
+                EC.presence_of_element_located(
+                    (
+                        By.ID,
+                        "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_spanRouteSummary",
+                    )
+                )
+            )
+            # Get page source after everything has loaded
             soup = BeautifulSoup(driver.page_source, features="html.parser")
             # Work out which bins can be collected for this address. Glass bins are only on some houses due to pilot programme.

uk_bin_collection/uk_bin_collection/councils/SloughBoroughCouncil.py ADDED Viewed

@@ -0,0 +1,140 @@
+import time
+import re
+import requests
+from datetime import datetime
+from bs4 import BeautifulSoup
+from selenium.webdriver.common.by import By
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.support.ui import WebDriverWait
+from uk_bin_collection.uk_bin_collection.common import *
+from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
+def get_street_from_postcode(postcode: str, api_key: str) -> str:
+    url = "https://maps.googleapis.com/maps/api/geocode/json"
+    params = {"address": postcode, "key": api_key}
+    response = requests.get(url, params=params)
+    data = response.json()
+    if data["status"] != "OK":
+        raise ValueError(f"API error: {data['status']}")
+    for component in data["results"][0]["address_components"]:
+        if "route" in component["types"]:
+            return component["long_name"]
+    raise ValueError("No street (route) found in the response.")
+class CouncilClass(AbstractGetBinDataClass):
+    def parse_data(self, page: str, **kwargs) -> dict:
+        driver = None
+        bin_data = {"bins": []}
+        try:
+            user_postcode = kwargs.get("postcode")
+            if not user_postcode:
+                raise ValueError("No postcode provided.")
+            check_postcode(user_postcode)
+            headless = kwargs.get("headless")
+            web_driver = kwargs.get("web_driver")
+            driver = create_webdriver(web_driver, headless, None, __name__)
+            page = "https://www.slough.gov.uk/bin-collections"
+            driver.get(page)
+            # Accept cookies
+            WebDriverWait(driver, 10).until(
+                EC.element_to_be_clickable((By.ID, "ccc-recommended-settings"))
+            ).click()
+            # Enter the street name into the address search
+            address_input = WebDriverWait(driver, 10).until(
+                EC.presence_of_element_located((By.ID, "keyword_directory25"))
+            )
+            user_address = get_street_from_postcode(user_postcode, "AIzaSyBDLULT7EIlNtHerswPtfmL15Tt3Oc0bV8")
+            address_input.send_keys(user_address + Keys.ENTER)
+            # Wait for address results to load
+            WebDriverWait(driver, 10).until(
+                EC.presence_of_all_elements_located((By.CSS_SELECTOR, "span.list__link-text"))
+            )
+            span_elements = driver.find_elements(By.CSS_SELECTOR, "span.list__link-text")
+            for span in span_elements:
+                if user_address.lower() in span.text.lower():
+                    span.click()
+                    break
+            else:
+                raise Exception(f"No link found containing address: {user_address}")
+            # Wait for address detail page
+            WebDriverWait(driver, 10).until(
+                EC.presence_of_element_located((By.CSS_SELECTOR, "section.site-content"))
+            )
+            soup = BeautifulSoup(driver.page_source, "html.parser")
+            # Extract each bin link and type
+            for heading in soup.select("dt.definition__heading"):
+                heading_text = heading.get_text(strip=True)
+                if "bin day details" in heading_text.lower():
+                    bin_type = heading_text.split()[0].capitalize() + " bin"
+                    dd = heading.find_next_sibling("dd")
+                    link = dd.find("a", href=True)
+                    if link:
+                        bin_url = link["href"]
+                        if not bin_url.startswith("http"):
+                            bin_url = "https://www.slough.gov.uk" + bin_url
+                        # Visit the child page
+                        print(f"Navigating to {bin_url}")
+                        driver.get(bin_url)
+                        WebDriverWait(driver, 10).until(
+                            EC.presence_of_element_located((By.CSS_SELECTOR, "div.page-content"))
+                        )
+                        child_soup = BeautifulSoup(driver.page_source, "html.parser")
+                        editor_div = child_soup.find("div", class_="editor")
+                        if not editor_div:
+                            print("No editor div found on bin detail page.")
+                            continue
+                        ul = editor_div.find("ul")
+                        if not ul:
+                            print("No <ul> with dates found in editor div.")
+                            continue
+                    for li in ul.find_all("li"):
+                        raw_text = li.get_text(strip=True).replace(".", "")
+                        if "no collection" in raw_text.lower() or "no collections" in raw_text.lower():
+                            print(f"Ignoring non-collection note: {raw_text}")
+                            continue
+                        raw_date = raw_text
+                        try:
+                            parsed_date = datetime.strptime(raw_date, "%d %B %Y")
+                        except ValueError:
+                            raw_date_cleaned = raw_date.split("(")[0].strip()
+                            try:
+                                parsed_date = datetime.strptime(raw_date_cleaned, "%d %B %Y")
+                            except Exception:
+                                print(f"Could not parse date: {raw_text}")
+                                continue
+                        formatted_date = parsed_date.strftime("%d/%m/%Y")
+                        contains_date(formatted_date)
+                        bin_data["bins"].append({
+                            "type": bin_type,
+                            "collectionDate": formatted_date
+                        })
+                        print(f"Type: {bin_type}, Date: {formatted_date}")
+        except Exception as e:
+            print(f"An error occurred: {e}")
+            raise
+        finally:
+            if driver:
+                driver.quit()
+        return bin_data

uk_bin_collection 0.151.0__py3-none-any.whl → 0.152.1__py3-none-any.whl

uk_bin_collection 0.151.0py3-none-any.whl → 0.152.1py3-none-any.whl