PyPI - uk_bin_collection - Versions diffs - 0.150.1__py3-none-any.whl → 0.152.0__py3-none-any.whl - Mend

uk_bin_collection 0.150.1py3-none-any.whl → 0.152.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

uk_bin_collection/uk_bin_collection/councils/ChichesterDistrictCouncil.py CHANGED Viewed

@@ -1,110 +1,162 @@
 import time
 from datetime import datetime
-from selenium.webdriver.support.ui import Select
 from bs4 import BeautifulSoup
 from selenium.webdriver.common.by import By
-from selenium.webdriver.support import expected_conditions as EC
-from selenium.webdriver.support.ui import Select
-from selenium.webdriver.support.wait import WebDriverWait
 from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.support.ui import WebDriverWait, Select
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.common.exceptions import StaleElementReferenceException, TimeoutException
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
+date_format = "%d/%m/%Y"
-# import the wonderful Beautiful Soup and the URL grabber
 class CouncilClass(AbstractGetBinDataClass):
-    """
-    Concrete classes have to implement all abstract operations of the
-    base class. They can also override some operations with a default
-    implementation.
-    """
     def parse_data(self, page: str, **kwargs) -> dict:
         driver = None
         try:
-            # Make a BS4 object
             page = "https://www.chichester.gov.uk/checkyourbinday"
             user_postcode = kwargs.get("postcode")
-            user_uprn = kwargs.get("uprn")
+            house_number = kwargs.get("paon")
             web_driver = kwargs.get("web_driver")
             headless = kwargs.get("headless")
-            house_number = kwargs.get("paon")
             driver = create_webdriver(web_driver, headless, None, __name__)
             driver.get(page)
             wait = WebDriverWait(driver, 60)
-            inputElement_postcodesearch = wait.until(
+            input_postcode = wait.until(
                 EC.visibility_of_element_located(
                     (By.ID, "WASTECOLLECTIONCALENDARV5_CALENDAR_ADDRESSLOOKUPPOSTCODE")
                 )
             )
+            input_postcode.send_keys(user_postcode)
-            inputElement_postcodesearch.send_keys(user_postcode)
-            inputElement_postcodesearch_btn = wait.until(
-                EC.visibility_of_element_located(
-                    (By.ID, "WASTECOLLECTIONCALENDARV5_CALENDAR_ADDRESSLOOKUPSEARCH")
-                )
-            )
-            inputElement_postcodesearch_btn.send_keys(Keys.ENTER)
-            inputElement_select_address = wait.until(
+            search_button = wait.until(
                 EC.element_to_be_clickable(
-                    (By.ID, "WASTECOLLECTIONCALENDARV5_CALENDAR_ADDRESSLOOKUPADDRESS")
+                    (By.ID, "WASTECOLLECTIONCALENDARV5_CALENDAR_ADDRESSLOOKUPSEARCH")
                 )
             )
-            dropdown_element = driver.find_element(
-                By.ID, "WASTECOLLECTIONCALENDARV5_CALENDAR_ADDRESSLOOKUPADDRESS"
-            )
+            search_button.send_keys(Keys.ENTER)
-            # Now create a Select object based on the found element
-            dropdown = Select(dropdown_element)
+            self.smart_select_address(driver, house_number)
-            # Select the option by visible text
-            dropdown.select_by_visible_text(house_number)
-            results = wait.until(
-                EC.element_to_be_clickable(
+            wait.until(
+                EC.presence_of_element_located(
                     (By.CLASS_NAME, "bin-collection-dates-container")
                 )
             )
             soup = BeautifulSoup(driver.page_source, features="html.parser")
-            soup.prettify()
+            table = soup.find("table", class_="defaultgeneral bin-collection-dates")
+            rows = table.find_all("tr") if table else []
-            # Extract data from the table
             bin_collection_data = []
-            rows = soup.find(
-                "table", class_="defaultgeneral bin-collection-dates"
-            ).find_all("tr")
             for row in rows:
                 cells = row.find_all("td")
                 if cells:
                     date_str = cells[0].text.strip()
                     bin_type = cells[1].text.strip()
-                    # Convert date string to the required format DD/MM/YYYY
                     date_obj = datetime.strptime(date_str, "%d %B %Y")
-                    date_formatted = date_obj.strftime(date_format)
-                    bin_collection_data.append(
-                        {"collectionDate": date_formatted, "type": bin_type}
-                    )
+                    formatted_date = date_obj.strftime(date_format)
+                    bin_collection_data.append({
+                        "collectionDate": formatted_date,
+                        "type": bin_type
+                    })
-            # Convert to JSON
-            json_data = {"bins": bin_collection_data}
+            print(bin_collection_data)
+            return {"bins": bin_collection_data}
         except Exception as e:
-            # Here you can log the exception if needed
             print(f"An error occurred: {e}")
-            # Optionally, re-raise the exception if you want it to propagate
             raise
         finally:
-            # This block ensures that the driver is closed regardless of an exception
             if driver:
                 driver.quit()
-        return json_data
+    def smart_select_address(self, driver, house_number: str):
+        dropdown_id = "WASTECOLLECTIONCALENDARV5_CALENDAR_ADDRESSLOOKUPADDRESS"
+        print("Waiting for address dropdown...")
+        def dropdown_has_addresses(d):
+            try:
+                dropdown_el = d.find_element(By.ID, dropdown_id)
+                select = Select(dropdown_el)
+                return len(select.options) > 1
+            except StaleElementReferenceException:
+                return False
+        WebDriverWait(driver, 30).until(dropdown_has_addresses)
+        dropdown_el = driver.find_element(By.ID, dropdown_id)
+        dropdown = Select(dropdown_el)
+        print("Address dropdown options:")
+        for opt in dropdown.options:
+            print(f"- {opt.text.strip()}")
+        user_input_clean = house_number.lower().strip()
+        found = False
+        for option in dropdown.options:
+            option_text_clean = option.text.lower().strip()
+            print(f"Comparing: {repr(option_text_clean)} == {repr(user_input_clean)}")
+            if (
+                option_text_clean == user_input_clean
+                or option_text_clean.startswith(f"{user_input_clean},")
+            ):
+                try:
+                    option.click()
+                    found = True
+                    print(f"Strict match clicked: {option.text.strip()}")
+                    break
+                except StaleElementReferenceException:
+                    print("Stale during click, retrying...")
+                    dropdown_el = driver.find_element(By.ID, dropdown_id)
+                    dropdown = Select(dropdown_el)
+                    for fresh_option in dropdown.options:
+                        if fresh_option.text.lower().strip() == option_text_clean:
+                            fresh_option.click()
+                            found = True
+                            print(f"Strict match clicked after refresh: {fresh_option.text.strip()}")
+                            break
+            if found:
+                break
+        if not found:
+            print("No strict match found, trying fuzzy match...")
+            for option in dropdown.options:
+                option_text_clean = option.text.lower().strip()
+                if user_input_clean in option_text_clean:
+                    try:
+                        option.click()
+                        found = True
+                        print(f"Fuzzy match clicked: {option.text.strip()}")
+                        break
+                    except StaleElementReferenceException:
+                        print("Stale during fuzzy click, retrying...")
+                        dropdown_el = driver.find_element(By.ID, dropdown_id)
+                        dropdown = Select(dropdown_el)
+                        for fresh_option in dropdown.options:
+                            if fresh_option.text.lower().strip() == option_text_clean:
+                                fresh_option.click()
+                                found = True
+                                print(f"Fuzzy match clicked after refresh: {fresh_option.text.strip()}")
+                                break
+                if found:
+                    break
+        if not found:
+            all_opts = [opt.text.strip() for opt in dropdown.options]
+            raise Exception(
+                f"Could not find address '{house_number}' in options: {all_opts}"
+            )

uk_bin_collection/uk_bin_collection/councils/FermanaghOmaghDistrictCouncil.py ADDED Viewed

@@ -0,0 +1,102 @@
+import difflib
+from datetime import date, datetime
+import requests
+from bs4 import BeautifulSoup
+from uk_bin_collection.uk_bin_collection.common import *
+from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
+# import the wonderful Beautiful Soup and the URL grabber
+class CouncilClass(AbstractGetBinDataClass):
+    """
+    Concrete classes have to implement all abstract operations of the
+    base class. They can also override some operations with a default
+    implementation.
+    """
+    base_url = "https://fermanaghomagh.isl-fusion.com/"
+    def parse_data(self, page: str, **kwargs) -> dict:
+        """
+        This function will make a request to the search endpoint with the postcode, extract the
+        house numbers from the responses, then retrieve the ID of the entry with the house number that matches,
+        to then retrieve the bin schedule.
+        The API here is a weird combination of HTML in json responses.
+        """
+        postcode = kwargs.get("postcode")
+        paon = kwargs.get("paon")
+        if not postcode:
+            raise ValueError("Must provide a postcode")
+        if not paon:
+            raise ValueError("Must provide a house number")
+        search_url = f"{self.base_url}/address/{postcode}"
+        requests.packages.urllib3.disable_warnings()
+        s = requests.Session()
+        response = s.get(search_url)
+        response.raise_for_status()
+        address_data = response.json()
+        address_list = address_data["html"]
+        soup = BeautifulSoup(address_list, features="html.parser")
+        address_by_id = {}
+        for li in soup.find_all("li"):
+            link = li.find_all("a")[0]
+            address_id = link.attrs["href"]
+            address = link.text
+            address_by_id[address_id] = address
+        addresses = list(address_by_id.values())
+        common = difflib.SequenceMatcher(
+            a=addresses[0], b=addresses[1]
+        ).find_longest_match()
+        extra_bit = addresses[0][common.a : common.a + common.size]
+        ids_by_paon = {
+            a.replace(extra_bit, ""): a_id.replace("/view/", "").replace("/", "")
+            for a_id, a in address_by_id.items()
+        }
+        property_id = ids_by_paon.get(paon)
+        if not property_id:
+            raise ValueError(
+                f"Invalid house number, valid values are {', '.join(ids_by_paon.keys())}"
+            )
+        today = date.today()
+        calendar_url = (
+            f"{self.base_url}/calendar/{property_id}/{today.strftime('%Y-%m-%d')}"
+        )
+        response = s.get(calendar_url)
+        response.raise_for_status()
+        calendar_data = response.json()
+        next_collections = calendar_data["nextCollections"]
+        collections = list(next_collections["collections"].values())
+        data = {"bins": []}
+        for collection in collections:
+            collection_date = datetime.strptime(collection["date"], "%Y-%m-%d")
+            bins = [c["name"] for c in collection["collections"].values()]
+            for bin in bins:
+                data["bins"].append(
+                    {
+                        "type": bin,
+                        "collectionDate": collection_date.strftime(date_format),
+                    }
+                )
+        return data

uk_bin_collection/uk_bin_collection/councils/MaidstoneBoroughCouncil.py ADDED Viewed

@@ -0,0 +1,115 @@
+import time
+from datetime import datetime
+from bs4 import BeautifulSoup
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.support.ui import Select
+from selenium.webdriver.support.wait import WebDriverWait
+from uk_bin_collection.uk_bin_collection.common import *
+from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
+# import the wonderful Beautiful Soup and the URL grabber
+class CouncilClass(AbstractGetBinDataClass):
+    """
+    Concrete classes have to implement all abstract operations of the
+    base class. They can also override some operations with a default
+    implementation.
+    """
+    def parse_data(self, page: str, **kwargs) -> dict:
+        driver = None
+        try:
+            page = "https://my.maidstone.gov.uk/service/Find-your-bin-day"
+            bin_data = {"bins": []}
+            user_paon = kwargs.get("paon")
+            user_postcode = kwargs.get("postcode")
+            web_driver = kwargs.get("web_driver")
+            headless = kwargs.get("headless")
+            check_postcode(user_postcode)
+            # Create Selenium webdriver
+            driver = create_webdriver(web_driver, headless, None, __name__)
+            driver.get(page)
+            iframe_presense = WebDriverWait(driver, 30).until(
+                EC.presence_of_element_located((By.ID, "fillform-frame-1"))
+            )
+            driver.switch_to.frame(iframe_presense)
+            wait = WebDriverWait(driver, 60)
+            # Postal code input
+            inputElement_postcodesearch = wait.until(
+                EC.element_to_be_clickable((By.NAME, "postcode"))
+            )
+            inputElement_postcodesearch.send_keys(user_postcode)
+            # Wait for the 'Select address' dropdown to be updated
+            dropdown_select = wait.until(
+                EC.presence_of_element_located((By.XPATH, "//span[contains(text(), 'Select...')]"))
+            )
+            dropdown_select.click()
+            dropdown = wait.until(
+                EC.element_to_be_clickable((By.XPATH, f"//div[contains(text(), ' {user_paon}')]"))
+            )
+            dropdown.click()
+            # Wait for 'Searching for...' to be added to page
+            WebDriverWait(driver, timeout=15).until(
+                EC.text_to_be_present_in_element(
+                    (By.CSS_SELECTOR, "span[data-name=html1]"), "Searching"
+                )
+            )
+            # Wait for 'Searching for...' to be removed from page
+            WebDriverWait(driver, timeout=15).until(
+                EC.none_of(
+                    EC.text_to_be_present_in_element(
+                        (By.CSS_SELECTOR, "span[data-name=html1]"), "Searching"
+                    )
+                )
+            )
+            # Even then it can still be adding data to the page...
+            time.sleep(5)
+            soup = BeautifulSoup(driver.page_source, features="html.parser")
+            # This is ugly but there is literally no consistency to the HTML
+            def is_a_collection_date(t):
+                return any("Next collection" in c for c in t.children)
+            for next_collection in soup.find_all(is_a_collection_date):
+                bin_info = list(
+                    next_collection.parent.select_one("div:nth-child(1)").children
+                )
+                if not bin_info:
+                    continue
+                bin = bin_info[0].get_text()
+                date = next_collection.select_one("strong").get_text(strip=True)
+                bin_date = datetime.strptime(date, "%d %b %Y")
+                dict_data = {
+                    "type": bin,
+                    "collectionDate": bin_date.strftime(date_format),
+                }
+                bin_data["bins"].append(dict_data)
+            bin_data["bins"].sort(
+                key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
+            )
+        except Exception as e:
+            # Here you can log the exception if needed
+            print(f"An error occurred: {e}")
+            # Optionally, re-raise the exception if you want it to propagate
+            raise
+        finally:
+            # This block ensures that the driver is closed regardless of an exception
+            if driver:
+                driver.quit()
+        return bin_data

uk_bin_collection/uk_bin_collection/councils/NorthEastDerbyshireDistrictCouncil.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from datetime import datetime
+from time import sleep
 from bs4 import BeautifulSoup
 from selenium.webdriver.common.by import By
@@ -9,8 +10,6 @@ from selenium.webdriver.support.wait import WebDriverWait
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
-# import the wonderful Beautiful Soup and the URL grabber
 class CouncilClass(AbstractGetBinDataClass):
     """
@@ -34,82 +33,105 @@ class CouncilClass(AbstractGetBinDataClass):
             headless = kwargs.get("headless")
             check_uprn(user_uprn)
             check_postcode(user_postcode)
-            # Create Selenium webdriver
             driver = create_webdriver(web_driver, headless, None, __name__)
             driver.get(page)
-            # If you bang in the house number (or property name) and postcode in the box it should find your property
             iframe_presense = WebDriverWait(driver, 30).until(
                 EC.presence_of_element_located((By.ID, "fillform-frame-1"))
             )
             driver.switch_to.frame(iframe_presense)
             wait = WebDriverWait(driver, 60)
             inputElement_postcodesearch = wait.until(
                 EC.element_to_be_clickable((By.NAME, "postcode_search"))
             )
             inputElement_postcodesearch.send_keys(str(user_postcode))
-            # Wait for the 'Select your property' dropdown to appear and select the first result
             dropdown = wait.until(EC.element_to_be_clickable((By.NAME, "selAddress")))
             dropdown_options = wait.until(
                 EC.presence_of_element_located((By.CLASS_NAME, "lookup-option"))
             )
-            # Create a 'Select' for it, then select the first address in the list
-            # (Index 0 is "Make a selection from the list")
             drop_down_values = Select(dropdown)
             option_element = wait.until(
                 EC.presence_of_element_located(
                     (By.CSS_SELECTOR, f'option.lookup-option[value="{str(user_uprn)}"]')
                 )
             )
             drop_down_values.select_by_value(str(user_uprn))
-            # Wait for the 'View more' link to appear, then click it to get the full set of dates
             h3_element = wait.until(
                 EC.presence_of_element_located(
                     (By.XPATH, "//th[contains(text(), 'Waste Collection')]")
                 )
             )
+            sleep(10)
             soup = BeautifulSoup(driver.page_source, features="html.parser")
+            print("Parsing HTML content...")
+            collection_rows = soup.find_all("tr")
+            for row in collection_rows:
+                cells = row.find_all("td")
+                if len(cells) == 3:  # Date, Image, Bin Type
+                    # Extract date carefully
+                    date_labels = cells[0].find_all("label")
+                    collection_date = None
+                    for label in date_labels:
+                        label_text = label.get_text().strip()
+                        if contains_date(label_text):
+                            collection_date = label_text
+                            break
+                    # Extract bin type
+                    bin_label = cells[2].find("label")
+                    bin_types = bin_label.get_text().strip() if bin_label else None
+                    if collection_date and bin_types:
+                        print(f"Found collection: {collection_date} - {bin_types}")
+                        # Handle combined collections
+                        if "&" in bin_types:
+                            if "Burgundy" in bin_types:
+                                data["bins"].append(
+                                    {
+                                        "type": "Burgundy Bin",
+                                        "collectionDate": datetime.strptime(
+                                            collection_date, "%d/%m/%Y"
+                                        ).strftime(date_format),
+                                    }
+                                )
+                            if "Green" in bin_types:
+                                data["bins"].append(
+                                    {
+                                        "type": "Green Bin",
+                                        "collectionDate": datetime.strptime(
+                                            collection_date, "%d/%m/%Y"
+                                        ).strftime(date_format),
+                                    }
+                                )
+                        else:
+                            if "Black" in bin_types:
+                                data["bins"].append(
+                                    {
+                                        "type": "Black Bin",
+                                        "collectionDate": datetime.strptime(
+                                            collection_date, "%d/%m/%Y"
+                                        ).strftime(date_format),
+                                    }
+                                )
+            print(f"Found {len(data['bins'])} collections")
+            print(f"Final data: {data}")
-            target_h3 = soup.find("h3", string="Collection Details")
-            tables_after_h3 = target_h3.parent.parent.find_next("table")
-            table_rows = tables_after_h3.find_all("tr")
-            for row in table_rows:
-                rowdata = row.find_all("td")
-                if len(rowdata) == 3:
-                    labels = rowdata[0].find_all("label")
-                    # Strip the day (i.e., Monday) out of the collection date string for parsing
-                    if len(labels) >= 2:
-                        date_label = labels[1]
-                        datestring = date_label.text.strip()
-                    # Add the bin type and collection date to the 'data' dictionary
-                    data["bins"].append(
-                        {
-                            "type": rowdata[2].text.strip(),
-                            "collectionDate": datetime.strptime(
-                                datestring, "%d/%m/%Y"
-                            ).strftime(
-                                date_format
-                            ),  # Format the date as needed
-                        }
-                    )
         except Exception as e:
-            # Here you can log the exception if needed
             print(f"An error occurred: {e}")
-            # Optionally, re-raise the exception if you want it to propagate
             raise
         finally:
-            # This block ensures that the driver is closed regardless of an exception
             if driver:
                 driver.quit()
         return data

uk_bin_collection 0.150.1__py3-none-any.whl → 0.152.0__py3-none-any.whl

uk_bin_collection 0.150.1py3-none-any.whl → 0.152.0py3-none-any.whl