PyPI - uk_bin_collection - Versions diffs - 0.154.0__py3-none-any.whl → 0.158.0__py3-none-any.whl - Mend

uk_bin_collection 0.154.0py3-none-any.whl → 0.158.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

uk_bin_collection/uk_bin_collection/councils/NewForestCouncil.py CHANGED Viewed

@@ -1,16 +1,14 @@
 import logging
-import pickle
 import time
-import requests
 from bs4 import BeautifulSoup
 from selenium import webdriver
+from selenium.common.exceptions import NoSuchElementException
 from selenium.webdriver.common.by import By
 from selenium.webdriver.common.keys import Keys
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.support.ui import Select
 from selenium.webdriver.support.wait import WebDriverWait
-from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
@@ -23,17 +21,64 @@ logging.basicConfig(
 class CouncilClass(AbstractGetBinDataClass):
+    def get_legacy_bins(self, page: str) -> []:
+        logging.info("Extracting legacy bin collection data")
+        soup = BeautifulSoup(page, features="html.parser")
+        legacy_bins = []
+        # Rubbish and recycling
+        rubbish_recycling = soup.find(
+            "span", class_="CTID-77-_ eb-77-Override-textControl"
+        )
+        if rubbish_recycling:
+            match = re.search(r"collected weekly on (\w+)", rubbish_recycling.text)
+            if match:
+                day_name = match.group(1)
+                next_collection = get_next_day_of_week(day_name)
+                legacy_bins.append(
+                    {
+                        "type": "Rubbish and recycling",
+                        "collectionDate": next_collection,
+                    }
+                )
+                logging.info(f"Rubbish and Recycling: {str(next_collection)}")
+        # Glass collection
+        glass_collection = soup.find("span", class_="CTID-78-_ eb-78-textControl")
+        if glass_collection:
+            match = re.search(
+                r"next collection is\s+(\d{2}/\d{2}/\d{4})", glass_collection.text
+            )
+            if match:
+                legacy_bins.append(
+                    {"type": "Glass collection", "collectionDate": match.group(1)}
+                )
+                logging.info(f"Glass: {str(match.group(1))}")
+        # Garden waste
+        garden_waste = soup.find("div", class_="eb-2HIpCnWC-Override-EditorInput")
+        if garden_waste:
+            match = re.search(r"(\d{2}/\d{2}/\d{4})", garden_waste.text)
+            if match:
+                legacy_bins.append(
+                    {"type": "Garden waste", "collectionDate": match.group(1)}
+                )
+                logging.info(f"Garden: {str(match.group(1))}")
+        # return bins
+        return legacy_bins
     def parse_data(self, page: str, **kwargs) -> dict:
         driver = None
         try:
-            data = {"bins": []}
-            collections = []
+            bins = []
             user_uprn = kwargs.get("uprn")
             user_postcode = kwargs.get("postcode")
             web_driver = kwargs.get("web_driver")
             headless = kwargs.get("headless")
             check_postcode(user_postcode)
-            url = "https://forms.newforest.gov.uk/ufs/FIND_MY_COLLECTION.eb"
+            url = "https://forms.newforest.gov.uk/ufs/FIND_MY_BIN_BAR.eb"
             # Get session cookies using requests
@@ -52,10 +97,20 @@ class CouncilClass(AbstractGetBinDataClass):
             logging.info("Entering postcode")
             input_element_postcode = wait.until(
-                EC.presence_of_element_located((By.XPATH, '//input[@id="CTID-1-_-A"]'))
+                EC.element_to_be_clickable(
+                    (By.XPATH, '//input[@id="CTID-JmLqCKl2-_-A"]')
+                )
+            )
+            driver.execute_script(
+                "arguments[0].scrollIntoView();", input_element_postcode
             )
-            input_element_postcode.send_keys(user_postcode)
+            logging.info(f"Entering postcode '{str(user_postcode)}'")
+            # Force the value through the DOM cos send_keys just don't work for some reason :(
+            driver.execute_script(
+                f"arguments[0].value='{str(user_postcode)}'", input_element_postcode
+            )
             logging.info("Searching for postcode")
             input_element_postcode_btn = wait.until(
@@ -66,7 +121,9 @@ class CouncilClass(AbstractGetBinDataClass):
             logging.info("Waiting for address dropdown")
             input_element_postcode_dropdown = wait.until(
-                EC.presence_of_element_located((By.XPATH, '//select[@id="CTID-6-_-A"]'))
+                EC.element_to_be_clickable(
+                    (By.XPATH, '//select[@id="CTID-KOeKcmrC-_-A"]')
+                )
             )
             logging.info("Selecting address")
@@ -86,51 +143,51 @@ class CouncilClass(AbstractGetBinDataClass):
             input_element_address_btn.click()
-            logging.info("Waiting for bin collection page")
-            h4_element = wait.until(
-                EC.presence_of_element_located(
-                    (By.XPATH, "//h1[contains(text(), 'Collections days for')]")
+            # Be patient, clicks take time!
+            time.sleep(2)
+            # logging.info(driver.page_source)
+            try:
+                link_element = driver.find_element(
+                    By.XPATH,
+                    '//a[contains(text(),"Find your current bin collection day")]',
+                )
+                logging.info(
+                    "Found override panel span, search for link and use old logic"
                 )
-            )
-            logging.info("Extracting bin collection data")
-            soup = BeautifulSoup(driver.page_source, features="html.parser")
-            bins = []
+                link_element.click()
-            # Rubbish and recycling
-            rubbish_recycling = soup.find(
-                "span", class_="CTID-77-_ eb-77-Override-textControl"
-            )
-            if rubbish_recycling:
-                match = re.search(r"collected weekly on (\w+)", rubbish_recycling.text)
-                if match:
-                    day_name = match.group(1)
-                    next_collection = get_next_day_of_week(day_name)
-                    bins.append(
-                        {
-                            "type": "Rubbish and recycling",
-                            "collectionDate": next_collection,
-                        }
-                    )
+                # Be patient, clicks take time!
+                time.sleep(2)
-            # Glass collection
-            glass_collection = soup.find("span", class_="CTID-78-_ eb-78-textControl")
-            if glass_collection:
-                match = re.search(
-                    r"next collection is\s+(\d{2}/\d{2}/\d{4})", glass_collection.text
-                )
-                if match:
-                    bins.append(
-                        {"type": "Glass collection", "collectionDate": match.group(1)}
+                bins = self.get_legacy_bins(driver.page_source)
+            except NoSuchElementException:
+                logging.info("Waiting for bin collection table")
+                collections_table = wait.until(
+                    EC.presence_of_element_located(
+                        (
+                            By.XPATH,
+                            '//table[contains(@class,"eb-1j4UaesZ-tableContent")]',
+                        )
                     )
+                )
-            # Garden waste
-            garden_waste = soup.find("div", class_="eb-2HIpCnWC-Override-EditorInput")
-            if garden_waste:
-                match = re.search(r"(\d{2}/\d{2}/\d{4})", garden_waste.text)
-                if match:
+                soup = BeautifulSoup(driver.page_source, features="html.parser")
+                rows = soup.find_all(class_="eb-1j4UaesZ-tableRow")
+                for row in rows:
+                    cols = row.find_all("td")
+                    date_string = cols[0].findChild("div").findChild("div").get_text()
+                    bin_type = cols[1].findChild("div").findChild("div").get_text()
+                    col_date = datetime.strptime(date_string, "%A %B %d, %Y")
                     bins.append(
-                        {"type": "Garden waste", "collectionDate": match.group(1)}
+                        {
+                            "type": bin_type,
+                            "collectionDate": datetime.strftime(col_date, date_format),
+                        }
                     )
             return {"bins": bins}

uk_bin_collection/uk_bin_collection/councils/NewportCityCouncil.py CHANGED Viewed

@@ -1,4 +1,9 @@
+import datetime
 from bs4 import BeautifulSoup
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.support.wait import WebDriverWait
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
@@ -13,6 +18,7 @@ class CouncilClass(AbstractGetBinDataClass):
     """
     def parse_data(self, page: str, **kwargs) -> dict:
+<<<<<<< HEAD
         user_postcode = kwargs.get("postcode")
         check_postcode(user_postcode)
         user_uprn = kwargs.get("uprn")
@@ -43,10 +49,16 @@ class CouncilClass(AbstractGetBinDataClass):
                 i["data-for"]: i.get("value", "")
                 for i in soup.select("input[data-for]")
             }
-            payload_salt = soup.select_one('input[id="pSalt"]').get("value")
-            payload_protected = soup.select_one('input[id="pPageItemsProtected"]').get(
-                "value"
-            )
+            # Check if required form elements exist
+            salt_element = soup.select_one('input[id="pSalt"]')
+            protected_element = soup.select_one('input[id="pPageItemsProtected"]')
+            if not salt_element or not protected_element:
+                raise Exception("Required form elements not found. The council website may have changed or be unavailable.")
+            payload_salt = salt_element.get("value")
+            payload_protected = protected_element.get("value")
             # Add the PostCode and 'SEARCH' to the payload
             payload["p_request"] = "SEARCH"
@@ -123,10 +135,16 @@ class CouncilClass(AbstractGetBinDataClass):
                 i["data-for"]: i.get("value", "")
                 for i in soup.select("input[data-for]")
             }
-            payload_salt = soup.select_one('input[id="pSalt"]').get("value")
-            payload_protected = soup.select_one('input[id="pPageItemsProtected"]').get(
-                "value"
-            )
+            # Check if required form elements exist
+            salt_element = soup.select_one('input[id="pSalt"]')
+            protected_element = soup.select_one('input[id="pPageItemsProtected"]')
+            if not salt_element or not protected_element:
+                raise Exception("Required form elements not found. The council website may have changed or be unavailable.")
+            payload_salt = salt_element.get("value")
+            payload_protected = protected_element.get("value")
             # Add the UPRN and 'SUBMIT' to the payload
             payload["p_request"] = "SUBMIT"
@@ -187,18 +205,117 @@ class CouncilClass(AbstractGetBinDataClass):
             # Create a BeautifulSoup object from the page's HTML
             soup = BeautifulSoup(resource.text, "html.parser")
+=======
+        driver = None
+        try:
+>>>>>>> master
             data = {"bins": []}
+            url = kwargs.get("url")
+            user_paon = kwargs.get("paon")
+            user_postcode = kwargs.get("postcode")
+            web_driver = kwargs.get("web_driver")
+            headless = kwargs.get("headless")
+            check_paon(user_paon)
+            check_postcode(user_postcode)
+            # Use a realistic user agent to help bypass Cloudflare
+            user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
+            driver = create_webdriver(web_driver, headless, user_agent, __name__)
+            driver.get(
+                "https://iportal.itouchvision.com/icollectionday/collection-day/?uuid=6CDD2A34C912312074D8E2410531401A8C00EFF7&lang=en"
+            )
+            # Wait for the postcode field to appear then populate it
+            inputElement_postcode = WebDriverWait(driver, 30).until(
+                EC.presence_of_element_located((By.ID, "postcodeSearch"))
+            )
+            inputElement_postcode.send_keys(user_postcode)
+            # Click search button
+            findAddress = WebDriverWait(driver, 10).until(
+                EC.presence_of_element_located((By.CLASS_NAME, "govuk-button"))
+            )
+            findAddress.click()
+            # Wait for the 'Select address' dropdown to appear and select option matching the house name/number
+            WebDriverWait(driver, 10).until(
+                EC.element_to_be_clickable(
+                    (
+                        By.XPATH,
+                        "//select[@id='addressSelect']//option[contains(., '"
+                        + user_paon
+                        + "')]",
+                    )
+                )
+            ).click()
+            # Wait for the collections table to appear
+            WebDriverWait(driver, 20).until(
+                EC.presence_of_element_located(
+                    (
+                        By.XPATH,
+                        "//h2[contains(@class,'mt-4') and contains(@class,'govuk-heading-s') and normalize-space(.)='Your next collections']",
+                    )
+                )
+            )
+            soup = BeautifulSoup(driver.page_source, features="html.parser")
+            collections = soup.find_all("div", {"class": "p-2"})
+            for collection in collections:
+                bin_type = collection.find("h3").get_text()
+                next_collection = soup.find("div", {"class": "fw-bold"}).get_text()
+                following_collection = soup.find(
+                    lambda t: (
+                        t.name == "div"
+                        and t.get_text(strip=True).lower().startswith("followed by")
+                    )
+                ).get_text()
+                next_collection_date = datetime.strptime(next_collection, "%A %d %B")
+                following_collection_date = datetime.strptime(
+                    following_collection, "followed by %A %d %B"
+                )
+                current_date = datetime.now()
+                next_collection_date = next_collection_date.replace(
+                    year=current_date.year
+                )
+                following_collection_date = following_collection_date.replace(
+                    year=current_date.year
+                )
+                next_collection_date = get_next_occurrence_from_day_month(
+                    next_collection_date
+                )
+                following_collection_date = get_next_occurrence_from_day_month(
+                    following_collection_date
+                )
+                dict_data = {
+                    "type": bin_type,
+                    "collectionDate": next_collection_date.strftime(date_format),
+                }
+                data["bins"].append(dict_data)
+                dict_data = {
+                    "type": bin_type,
+                    "collectionDate": following_collection_date.strftime(date_format),
+                }
+                data["bins"].append(dict_data)
-            # Loop through the items on the page and build a JSON object for ingestion
-            for item in soup.select(".t-MediaList-item"):
-                for value in item.select(".t-MediaList-body"):
-                    dict_data = {
-                        "type": value.select("span")[1].get_text(strip=True).title(),
-                        "collectionDate": datetime.strptime(
-                            value.select(".t-MediaList-desc")[0].get_text(strip=True),
-                            "%A, %d %B, %Y",
-                        ).strftime(date_format),
-                    }
-                    data["bins"].append(dict_data)
-            return data
+        except Exception as e:
+            # Here you can log the exception if needed
+            print(f"An error occurred: {e}")
+            # Optionally, re-raise the exception if you want it to propagate
+            raise
+        finally:
+            # This block ensures that the driver is closed regardless of an exception
+            if driver:
+                driver.quit()
+        return data

uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py CHANGED Viewed

@@ -30,7 +30,12 @@ class CouncilClass(AbstractGetBinDataClass):
     def parse_data(self, page: str, **kwargs) -> dict:
         driver = None
         try:
+<<<<<<< HEAD
+            # Use the new URL as mentioned in the issue
+            page = "http://bincollection.northumberland.gov.uk"
+=======
             page = "https://bincollection.northumberland.gov.uk/postcode"
+>>>>>>> master
             data = {"bins": []}
@@ -50,6 +55,182 @@ class CouncilClass(AbstractGetBinDataClass):
             # Create wait object
             wait = WebDriverWait(driver, 20)
+<<<<<<< HEAD
+            # The new site may have different structure, so we'll need to adapt
+            # Try to find postcode and house number inputs
+            try:
+                # Look for postcode input field
+                postcode_input = wait.until(
+                    EC.presence_of_element_located(
+                        (By.XPATH, "//input[contains(@name, 'postcode') or contains(@id, 'postcode') or contains(@placeholder, 'postcode')]")
+                    )
+                )
+                # Look for house number input field
+                house_input = wait.until(
+                    EC.presence_of_element_located(
+                        (By.XPATH, "//input[contains(@name, 'house') or contains(@id, 'house') or contains(@name, 'number') or contains(@placeholder, 'house')]")
+                    )
+                )
+                # Enter details
+                postcode_input.send_keys(user_postcode)
+                house_input.send_keys(user_paon)
+                # Look for submit button
+                submit_button = wait.until(
+                    EC.element_to_be_clickable(
+                        (By.XPATH, "//button[@type='submit'] | //input[@type='submit'] | //button[contains(text(), 'Search')] | //input[contains(@value, 'Search')]")
+                    )
+                )
+                submit_button.click()
+                # Wait for results to load
+                time.sleep(3)
+                # Get page source after everything has loaded
+                soup = BeautifulSoup(driver.page_source, features="html.parser")
+                # Look for collection dates and bin types in the results
+                # This is a generic approach that looks for common patterns
+                import re
+                from datetime import datetime
+                # Look for date patterns in the page
+                date_pattern = r'\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b|\b\d{1,2}\s+(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\w*\s+\d{2,4}\b'
+                page_text = soup.get_text()
+                dates = re.findall(date_pattern, page_text, re.IGNORECASE)
+                # Look for bin type keywords near dates
+                bin_keywords = ['recycling', 'refuse', 'garden', 'waste', 'rubbish', 'general', 'household']
+                # Try to extract structured data from tables or lists
+                tables = soup.find_all('table')
+                for table in tables:
+                    rows = table.find_all('tr')
+                    for row in rows:
+                        cells = row.find_all(['td', 'th'])
+                        if len(cells) >= 2:
+                            # Look for date in first cell and bin type in second
+                            date_text = cells[0].get_text().strip()
+                            type_text = cells[1].get_text().strip()
+                            # Try to parse date
+                            try:
+                                if re.match(r'\d{1,2}[/-]\d{1,2}[/-]\d{2,4}', date_text):
+                                    date_obj = datetime.strptime(date_text, '%d/%m/%Y')
+                                elif re.match(r'\d{1,2}\s+\w+\s+\d{4}', date_text):
+                                    date_obj = datetime.strptime(date_text, '%d %B %Y')
+                                else:
+                                    continue
+                                if any(keyword in type_text.lower() for keyword in bin_keywords):
+                                    data["bins"].append({
+                                        "type": type_text,
+                                        "collectionDate": date_obj.strftime(date_format)
+                                    })
+                            except ValueError:
+                                continue
+            except TimeoutException:
+                # If the new site structure is completely different, fall back to old URL
+                driver.get("https://www.northumberland.gov.uk/Waste/Household-waste/Household-bin-collections/Bin-Calendars.aspx")
+                # Wait for and click cookie button if present
+                try:
+                    cookie_button = wait.until(
+                        EC.element_to_be_clickable((By.ID, "ccc-notify-accept"))
+                    )
+                    cookie_button.click()
+                except TimeoutException:
+                    pass
+                # Continue with original logic for old site
+                inputElement_hn = wait.until(
+                    EC.presence_of_element_located(
+                        (
+                            By.ID,
+                            "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_txtHouse",
+                        )
+                    )
+                )
+                inputElement_pc = wait.until(
+                    EC.presence_of_element_located(
+                        (
+                            By.ID,
+                            "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_txtPostcode",
+                        )
+                    )
+                )
+                inputElement_pc.send_keys(user_postcode)
+                inputElement_hn.send_keys(user_paon)
+                lookup_button = wait.until(
+                    EC.element_to_be_clickable(
+                        (
+                            By.ID,
+                            "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_butLookup",
+                        )
+                    )
+                )
+                lookup_button.click()
+                route_summary = wait.until(
+                    EC.presence_of_element_located(
+                        (
+                            By.ID,
+                            "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_spanRouteSummary",
+                        )
+                    )
+                )
+                soup = BeautifulSoup(driver.page_source, features="html.parser")
+                bins_collected = list(
+                    map(
+                        str.strip,
+                        soup.find(
+                            "span",
+                            id="p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_spanRouteSummary",
+                        )
+                        .text.replace("Routes found: ", "")
+                        .split(","),
+                    )
+                )
+                bins_by_colours = dict()
+                for bin in bins_collected:
+                    if "(but no dates found)" in bin:
+                        continue
+                    style_str = soup.find("span", string=bin)["style"]
+                    bin_colour = self.extract_styles(style_str)["background-color"].upper()
+                    bins_by_colours[bin_colour] = bin
+                calander_tables = soup.find_all("table", title="Calendar")
+                for table in calander_tables:
+                    rows = table.find_all("tr")
+                    month_and_year = (
+                        rows[0].find("table", class_="calCtrlTitle").find("td").string
+                    )
+                    bin_days = table.find_all("td", class_="calCtrlDay")
+                    for day in bin_days:
+                        day_styles = self.extract_styles(day["style"])
+                        if "background-color" in day_styles:
+                            colour = day_styles["background-color"].upper()
+                            date = time.strptime(
+                                f"{day.string} {month_and_year}", "%d %B %Y"
+                            )
+                            data["bins"].append(
+                                {
+                                    "type": bins_by_colours[colour],
+                                    "collectionDate": time.strftime(date_format, date),
+                                }
+                            )
+=======
             # Wait for and click cookie button
             cookie_button = wait.until(
                 EC.element_to_be_clickable(
@@ -133,13 +314,11 @@ class CouncilClass(AbstractGetBinDataClass):
                         "collectionDate": time.strftime(date_format, collection_date),
                     }
                 )
+>>>>>>> master
         except Exception as e:
-            # Here you can log the exception if needed
             print(f"An error occurred: {e}")
-            # Optionally, re-raise the exception if you want it to propagate
             raise
         finally:
-            # This block ensures that the driver is closed regardless of an exception
             if driver:
                 driver.quit()
         return data

uk_bin_collection/uk_bin_collection/councils/OxfordCityCouncil.py CHANGED Viewed

@@ -25,6 +25,7 @@ class CouncilClass(AbstractGetBinDataClass):
         URI = "https://www.oxford.gov.uk/xfp/form/142#q6ad4e3bf432c83230a0347a6eea6c805c672efeb_0"
         session = requests.Session()
+        session.headers.update({'User-Agent': 'HomeAssistant UK Bin Collection integration'})
         token_response = session.get(session_uri)
         soup = BeautifulSoup(token_response.text, "html.parser")
         token = soup.find("input", {"name": "__token"}).attrs["value"]

uk_bin_collection 0.154.0__py3-none-any.whl → 0.158.0__py3-none-any.whl

uk_bin_collection 0.154.0py3-none-any.whl → 0.158.0py3-none-any.whl