PyPI - uk_bin_collection - Versions diffs - 0.153.0__py3-none-any.whl → 0.157.0__py3-none-any.whl - Mend

uk_bin_collection 0.153.0py3-none-any.whl → 0.157.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

uk_bin_collection/uk_bin_collection/councils/RotherhamCouncil.py CHANGED Viewed

@@ -1,57 +1,89 @@
-from bs4 import BeautifulSoup
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
+import requests
+from datetime import datetime
-# import the wonderful Beautiful Soup and the URL grabber
 class CouncilClass(AbstractGetBinDataClass):
     """
-    Concrete classes have to implement all abstract operations of the
-    base class. They can also override some operations with a default
-    implementation.
+    Rotherham collections via the public JSON API.
+    Returns the same shape as before:
+      {"bins": [{"type": "Black Bin", "collectionDate": "Tuesday, 29 September 2025"}, ...]}
+    Accepts kwargs['premisesid'] (recommended) or a numeric kwargs['uprn'].
     """
     def parse_data(self, page: str, **kwargs) -> dict:
-        user_uprn = kwargs.get("uprn")
+        # prefer explicit premisesid, fallback to uprn (if numeric)
+        premises = kwargs.get("premisesid")
+        uprn = kwargs.get("uprn")
-        check_uprn(user_uprn)
+        if uprn:
+            # preserve original behaviour where check_uprn exists for validation,
+            # but don't fail if uprn is intended as a simple premises id number.
+            try:
+                check_uprn(uprn)
+            except Exception:
+                # silently continue — user may have passed a numeric premises id as uprn
+                pass
+            if not premises and str(uprn).strip().isdigit():
+                premises = str(uprn).strip()
+        if not premises:
+            raise ValueError("No premises ID supplied. Pass 'premisesid' in kwargs or a numeric 'uprn'.")
+        api_url = "https://bins.azurewebsites.net/api/getcollections"
+        params = {
+            "premisesid": str(premises),
+            "localauthority": kwargs.get("localauthority", "Rotherham"),
+        }
         headers = {
-            "Content-Type": "application/x-www-form-urlencoded",
-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
+            "User-Agent": "UKBinCollectionData/1.0 (+https://github.com/robbrad/UKBinCollectionData)"
         }
-        response = requests.post(
-            "https://www.rotherham.gov.uk/bin-collections?address={}&submit=Submit".format(
-                user_uprn
-            ),
-            headers=headers
-        )
-        # Make a BS4 object
-        soup = BeautifulSoup(response.text, features="html.parser")
-        soup.prettify()
-        data = {"bins": []}
+        try:
+            resp = requests.get(api_url, params=params, headers=headers, timeout=10)
+        except Exception as exc:
+            print(f"Error contacting Rotherham API: {exc}")
+            return {"bins": []}
+        if resp.status_code != 200:
+            print(f"Rotherham API request failed ({resp.status_code}). URL: {resp.url}")
+            return {"bins": []}
-        table = soup.select("table")[0]
+        try:
+            collections = resp.json()
+        except ValueError:
+            print("Rotherham API returned non-JSON response.")
+            return {"bins": []}
+        data = {"bins": []}
+        seen = set()  # dedupe identical (type, date) pairs
+        for item in collections:
+            bin_type = item.get("BinType") or item.get("bintype") or "Unknown"
+            date_str = item.get("CollectionDate") or item.get("collectionDate")
+            if not date_str:
+                continue
-        if table:
-            rows = table.select("tr")
+            # API gives ISO date like '2025-09-29' (or possibly '2025-09-29T00:00:00').
+            try:
+                iso_date = date_str.split("T")[0]
+                parsed = datetime.strptime(iso_date, "%Y-%m-%d")
+                formatted = parsed.strftime(date_format)
+            except Exception:
+                # skip malformed dates
+                continue
-            for index, row in enumerate(rows):
-                bin_info_cell = row.select("td")
-                if bin_info_cell:
-                    bin_type = bin_info_cell[0].get_text(separator=" ", strip=True)
-                    bin_collection = bin_info_cell[1]
+            key = (bin_type.strip().lower(), formatted)
+            if key in seen:
+                continue
+            seen.add(key)
-                    if bin_collection:
-                        dict_data = {
-                            "type": bin_type.title(),
-                            "collectionDate": datetime.strptime(
-                                bin_collection.get_text(strip=True), "%A, %d %B %Y"
-                            ).strftime(date_format),
-                        }
+            dict_data = {"type": bin_type.title(), "collectionDate": formatted}
+            data["bins"].append(dict_data)
-                    data["bins"].append(dict_data)
-        else:
-            print("Something went wrong. Please open a GitHub issue.")
+        if not data["bins"]:
+            # helpful debugging note
+            print(f"Rotherham API returned no collection entries for premisesid={premises}")
-        return data
+        return data

uk_bin_collection/uk_bin_collection/councils/RushmoorCouncil.py CHANGED Viewed

@@ -1,7 +1,8 @@
 from bs4 import BeautifulSoup
+from lxml import etree
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
-from lxml import etree
 # import the wonderful Beautiful Soup and the URL grabber
@@ -20,7 +21,8 @@ class CouncilClass(AbstractGetBinDataClass):
         collections = []
         # Convert the XML to JSON and load the next collection data
-        result = soup.find("p").contents[0].text.replace("\\", "")[1:-1]
+        result = soup.find("p").contents[0]
         json_data = json.loads(result)["NextCollection"]
         # Get general waste data

uk_bin_collection/uk_bin_collection/councils/SandwellBoroughCouncil.py CHANGED Viewed

@@ -28,17 +28,10 @@ class CouncilClass(AbstractGetBinDataClass):
         "Referer": "https://my.sandwell.gov.uk/fillform/?iframe_id=fillform-frame-1&db_id=",
     }
     LOOKUPS = [
-        (
-            "58a1a71694992",
-            "DWDate",
-            [
-                "Recycling (Blue)",
-                "Household Waste (Grey)",
-                "Food Waste (Brown)",
-                "Batteries",
-            ],
-        ),
-        ("56b1cdaf6bb43", "GWDate", ["Garden Waste (Green)"]),
+        ("686295a88a750", "GWDate", ["Garden Waste (Green)"]),
+        ("686294de50729", "DWDate", ["Household Waste (Grey)"]),
+        ("6863a78a1dd8e", "FWDate", ["Food Waste (Brown)"]),
+        ("68629dd642423", "MDRDate", ["Recycling (Blue)"]),
     ]
     def parse_data(self, page: str, **kwargs) -> dict:

uk_bin_collection/uk_bin_collection/councils/SloughBoroughCouncil.py CHANGED Viewed

@@ -1,15 +1,18 @@
-import time
 import re
-import requests
+import time
 from datetime import datetime
+import requests
 from bs4 import BeautifulSoup
 from selenium.webdriver.common.by import By
 from selenium.webdriver.common.keys import Keys
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.support.ui import WebDriverWait
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
 def get_street_from_postcode(postcode: str, api_key: str) -> str:
     url = "https://maps.googleapis.com/maps/api/geocode/json"
     params = {"address": postcode, "key": api_key}
@@ -25,6 +28,7 @@ def get_street_from_postcode(postcode: str, api_key: str) -> str:
     raise ValueError("No street (route) found in the response.")
 class CouncilClass(AbstractGetBinDataClass):
     def parse_data(self, page: str, **kwargs) -> dict:
         driver = None
@@ -37,10 +41,10 @@ class CouncilClass(AbstractGetBinDataClass):
             headless = kwargs.get("headless")
             web_driver = kwargs.get("web_driver")
-            driver = create_webdriver(web_driver, headless, None, __name__)
+            UserAgent = "Mozilla/5.0"
+            driver = create_webdriver(web_driver, headless, UserAgent, __name__)
             page = "https://www.slough.gov.uk/bin-collections"
             driver.get(page)
             # Accept cookies
             WebDriverWait(driver, 10).until(
                 EC.element_to_be_clickable((By.ID, "ccc-recommended-settings"))
@@ -50,14 +54,20 @@ class CouncilClass(AbstractGetBinDataClass):
             address_input = WebDriverWait(driver, 10).until(
                 EC.presence_of_element_located((By.ID, "keyword_directory25"))
             )
-            user_address = get_street_from_postcode(user_postcode, "AIzaSyBDLULT7EIlNtHerswPtfmL15Tt3Oc0bV8")
+            user_address = get_street_from_postcode(
+                user_postcode, "AIzaSyBDLULT7EIlNtHerswPtfmL15Tt3Oc0bV8"
+            )
             address_input.send_keys(user_address + Keys.ENTER)
             # Wait for address results to load
             WebDriverWait(driver, 10).until(
-                EC.presence_of_all_elements_located((By.CSS_SELECTOR, "span.list__link-text"))
+                EC.presence_of_all_elements_located(
+                    (By.CSS_SELECTOR, "span.list__link-text")
+                )
+            )
+            span_elements = driver.find_elements(
+                By.CSS_SELECTOR, "span.list__link-text"
             )
-            span_elements = driver.find_elements(By.CSS_SELECTOR, "span.list__link-text")
             for span in span_elements:
                 if user_address.lower() in span.text.lower():
@@ -68,7 +78,9 @@ class CouncilClass(AbstractGetBinDataClass):
             # Wait for address detail page
             WebDriverWait(driver, 10).until(
-                EC.presence_of_element_located((By.CSS_SELECTOR, "section.site-content"))
+                EC.presence_of_element_located(
+                    (By.CSS_SELECTOR, "section.site-content")
+                )
             )
             soup = BeautifulSoup(driver.page_source, "html.parser")
@@ -86,28 +98,33 @@ class CouncilClass(AbstractGetBinDataClass):
                             bin_url = "https://www.slough.gov.uk" + bin_url
                         # Visit the child page
-                        print(f"Navigating to {bin_url}")
+                        # print(f"Navigating to {bin_url}")
                         driver.get(bin_url)
                         WebDriverWait(driver, 10).until(
-                            EC.presence_of_element_located((By.CSS_SELECTOR, "div.page-content"))
+                            EC.presence_of_element_located(
+                                (By.CSS_SELECTOR, "div.page-content")
+                            )
                         )
                         child_soup = BeautifulSoup(driver.page_source, "html.parser")
                         editor_div = child_soup.find("div", class_="editor")
                         if not editor_div:
-                            print("No editor div found on bin detail page.")
+                            # print("No editor div found on bin detail page.")
                             continue
                         ul = editor_div.find("ul")
                         if not ul:
-                            print("No <ul> with dates found in editor div.")
+                            # print("No <ul> with dates found in editor div.")
                             continue
                     for li in ul.find_all("li"):
                         raw_text = li.get_text(strip=True).replace(".", "")
-                        if "no collection" in raw_text.lower() or "no collections" in raw_text.lower():
-                            print(f"Ignoring non-collection note: {raw_text}")
+                        if (
+                            "no collection" in raw_text.lower()
+                            or "no collections" in raw_text.lower()
+                        ):
+                            # print(f"Ignoring non-collection note: {raw_text}")
                             continue
                         raw_date = raw_text
@@ -117,19 +134,20 @@ class CouncilClass(AbstractGetBinDataClass):
                         except ValueError:
                             raw_date_cleaned = raw_date.split("(")[0].strip()
                             try:
-                                parsed_date = datetime.strptime(raw_date_cleaned, "%d %B %Y")
+                                parsed_date = datetime.strptime(
+                                    raw_date_cleaned, "%d %B %Y"
+                                )
                             except Exception:
                                 print(f"Could not parse date: {raw_text}")
                                 continue
                         formatted_date = parsed_date.strftime("%d/%m/%Y")
                         contains_date(formatted_date)
-                        bin_data["bins"].append({
-                            "type": bin_type,
-                            "collectionDate": formatted_date
-                        })
+                        bin_data["bins"].append(
+                            {"type": bin_type, "collectionDate": formatted_date}
+                        )
-                        print(f"Type: {bin_type}, Date: {formatted_date}")
+                        # print(f"Type: {bin_type}, Date: {formatted_date}")
         except Exception as e:
             print(f"An error occurred: {e}")
@@ -137,4 +155,4 @@ class CouncilClass(AbstractGetBinDataClass):
         finally:
             if driver:
                 driver.quit()
-        return bin_data
+        return bin_data

uk_bin_collection/uk_bin_collection/councils/SomersetCouncil.py CHANGED Viewed

@@ -1,4 +1,9 @@
+import datetime
 from bs4 import BeautifulSoup
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.support.wait import WebDriverWait
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
@@ -13,6 +18,7 @@ class CouncilClass(AbstractGetBinDataClass):
     """
     def parse_data(self, page: str, **kwargs) -> dict:
+<<<<<<< HEAD
         user_postcode = kwargs.get("postcode")
         check_postcode(user_postcode)
         user_uprn = kwargs.get("uprn")
@@ -43,10 +49,16 @@ class CouncilClass(AbstractGetBinDataClass):
                 i["data-for"]: i.get("value", "")
                 for i in soup.select("input[data-for]")
             }
-            payload_salt = soup.select_one('input[id="pSalt"]').get("value")
-            payload_protected = soup.select_one('input[id="pPageItemsProtected"]').get(
-                "value"
-            )
+            # Check if required form elements exist
+            salt_element = soup.select_one('input[id="pSalt"]')
+            protected_element = soup.select_one('input[id="pPageItemsProtected"]')
+            if not salt_element or not protected_element:
+                raise Exception("Required form elements not found. The council website may have changed or be unavailable.")
+            payload_salt = salt_element.get("value")
+            payload_protected = protected_element.get("value")
             # Add the PostCode and 'SEARCH' to the payload
             payload["p_request"] = "SEARCH"
@@ -123,10 +135,16 @@ class CouncilClass(AbstractGetBinDataClass):
                 i["data-for"]: i.get("value", "")
                 for i in soup.select("input[data-for]")
             }
-            payload_salt = soup.select_one('input[id="pSalt"]').get("value")
-            payload_protected = soup.select_one('input[id="pPageItemsProtected"]').get(
-                "value"
-            )
+            # Check if required form elements exist
+            salt_element = soup.select_one('input[id="pSalt"]')
+            protected_element = soup.select_one('input[id="pPageItemsProtected"]')
+            if not salt_element or not protected_element:
+                raise Exception("Required form elements not found. The council website may have changed or be unavailable.")
+            payload_salt = salt_element.get("value")
+            payload_protected = protected_element.get("value")
             # Add the UPRN and 'SUBMIT' to the payload
             payload["p_request"] = "SUBMIT"
@@ -187,18 +205,115 @@ class CouncilClass(AbstractGetBinDataClass):
             # Create a BeautifulSoup object from the page's HTML
             soup = BeautifulSoup(resource.text, "html.parser")
+=======
+        driver = None
+        try:
+>>>>>>> master
             data = {"bins": []}
+            url = kwargs.get("url")
+            user_paon = kwargs.get("paon")
+            user_postcode = kwargs.get("postcode")
+            web_driver = kwargs.get("web_driver")
+            headless = kwargs.get("headless")
+            check_paon(user_paon)
+            check_postcode(user_postcode)
+            # Use a realistic user agent to help bypass Cloudflare
+            user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
+            driver = create_webdriver(web_driver, headless, user_agent, __name__)
+            driver.get("https://www.somerset.gov.uk/collection-days")
+            # Wait for the postcode field to appear then populate it
+            inputElement_postcode = WebDriverWait(driver, 30).until(
+                EC.presence_of_element_located((By.ID, "postcodeSearch"))
+            )
+            inputElement_postcode.send_keys(user_postcode)
+            # Click search button
+            findAddress = WebDriverWait(driver, 10).until(
+                EC.presence_of_element_located((By.CLASS_NAME, "govuk-button"))
+            )
+            findAddress.click()
+            # Wait for the 'Select address' dropdown to appear and select option matching the house name/number
+            WebDriverWait(driver, 10).until(
+                EC.element_to_be_clickable(
+                    (
+                        By.XPATH,
+                        "//select[@id='addressSelect']//option[contains(., '"
+                        + user_paon
+                        + "')]",
+                    )
+                )
+            ).click()
+            # Wait for the collections table to appear
+            WebDriverWait(driver, 20).until(
+                EC.presence_of_element_located(
+                    (
+                        By.XPATH,
+                        "//h2[contains(@class,'mt-4') and contains(@class,'govuk-heading-s') and normalize-space(.)='Your next collections']",
+                    )
+                )
+            )
+            soup = BeautifulSoup(driver.page_source, features="html.parser")
+            collections = soup.find_all("div", {"class": "p-2"})
+            for collection in collections:
+                bin_type = collection.find("h3").get_text()
+                next_collection = soup.find("div", {"class": "fw-bold"}).get_text()
+                following_collection = soup.find(
+                    lambda t: (
+                        t.name == "div"
+                        and t.get_text(strip=True).lower().startswith("followed by")
+                    )
+                ).get_text()
+                next_collection_date = datetime.strptime(next_collection, "%A %d %B")
+                following_collection_date = datetime.strptime(
+                    following_collection, "followed by %A %d %B"
+                )
+                current_date = datetime.now()
+                next_collection_date = next_collection_date.replace(
+                    year=current_date.year
+                )
+                following_collection_date = following_collection_date.replace(
+                    year=current_date.year
+                )
+                next_collection_date = get_next_occurrence_from_day_month(
+                    next_collection_date
+                )
+                following_collection_date = get_next_occurrence_from_day_month(
+                    following_collection_date
+                )
+                dict_data = {
+                    "type": bin_type,
+                    "collectionDate": next_collection_date.strftime(date_format),
+                }
+                data["bins"].append(dict_data)
+                dict_data = {
+                    "type": bin_type,
+                    "collectionDate": following_collection_date.strftime(date_format),
+                }
+                data["bins"].append(dict_data)
-            # Loop through the items on the page and build a JSON object for ingestion
-            for item in soup.select(".t-MediaList-item"):
-                for value in item.select(".t-MediaList-body"):
-                    dict_data = {
-                        "type": value.select("span")[1].get_text(strip=True).title(),
-                        "collectionDate": datetime.strptime(
-                            value.select(".t-MediaList-desc")[0].get_text(strip=True),
-                            "%A, %d %B, %Y",
-                        ).strftime(date_format),
-                    }
-                    data["bins"].append(dict_data)
-            return data
+        except Exception as e:
+            # Here you can log the exception if needed
+            print(f"An error occurred: {e}")
+            # Optionally, re-raise the exception if you want it to propagate
+            raise
+        finally:
+            # This block ensures that the driver is closed regardless of an exception
+            if driver:
+                driver.quit()
+        return data

uk_bin_collection/uk_bin_collection/councils/SouthGloucestershireCouncil.py CHANGED Viewed

@@ -6,17 +6,16 @@ from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataC
 def format_bin_data(key: str, date: datetime):
     formatted_date = date.strftime(date_format)
-    if re.match(r"^R\d+$", key) is not None:
-        # RX matches both general waste and recycling
-        return [
-            ("General Waste (Black Bin)", formatted_date),
-            ("Recycling & Food Waste", formatted_date),
-        ]
-    elif re.match(r"^G\d+$", key) is not None:
+    servicename = key.get("hso_servicename")
+    print(servicename)
+    if re.match(r"^Recycl", servicename) is not None:
+        return [ ("Recycling", formatted_date) ]
+    elif re.match(r"^Refuse", servicename) is not None:
+        return [("General Waste (Black Bin)", formatted_date)]
+    elif re.match(r"^Garden", servicename) is not None:
         return [("Garden Waste (Green Bin)", formatted_date)]
-    elif re.match(r"^C\d+$", key) is not None:
-        return [("Recycling & Food Waste", formatted_date)]
+    elif re.match(r"^Food", servicename) is not None:
+        return [("Food Waste", formatted_date)]
     else:
         return None
@@ -27,37 +26,34 @@ class CouncilClass(AbstractGetBinDataClass):
         check_uprn(uprn)
         api_url = (
-            f"https://webapps.southglos.gov.uk/Webservices/SGC.RefuseCollectionService/RefuseCollectionService"
-            f".svc/getCollections/{uprn}"
+            f"https://api.southglos.gov.uk/wastecomp/GetCollectionDetails"
+            f"?uprn={uprn}"
         )
         headers = {"content-type": "application/json"}
         response = requests.get(api_url, headers=headers)
-        json_response = json.loads(response.content)
+        json_response = response.json()
         if not json_response:
             raise ValueError("No collection data found for provided UPRN.")
-        collection_data = json_response[0]
+        collection_data = json_response.get('value')
         today = datetime.today()
         eight_weeks = datetime.today() + timedelta(days=8 * 7)
         data = {"bins": []}
         collection_tuple = []
-        for key in collection_data:
-            if key == "CalendarName":
-                continue
-            item = collection_data[key]
+        for collection in collection_data:
+            print(collection)
+            item = collection.get('hso_nextcollection')
             if item == "":
                 continue
-            collection_date = datetime.strptime(item, date_format)
+            collection_date = datetime.fromisoformat(item)
             if today.date() <= collection_date.date() <= eight_weeks.date():
-                bin_data = format_bin_data(key, collection_date)
+                bin_data = format_bin_data(collection, collection_date)
                 if bin_data is not None:
                     for bin_date in bin_data:
                         collection_tuple.append(bin_date)

uk_bin_collection 0.153.0__py3-none-any.whl → 0.157.0__py3-none-any.whl

uk_bin_collection 0.153.0py3-none-any.whl → 0.157.0py3-none-any.whl