PyPI - uk_bin_collection - Versions diffs - 0.153.0__py3-none-any.whl → 0.157.0__py3-none-any.whl - Mend

uk_bin_collection 0.153.0py3-none-any.whl → 0.157.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

uk_bin_collection/uk_bin_collection/councils/NewportCityCouncil.py CHANGED Viewed

@@ -1,4 +1,9 @@
+import datetime
 from bs4 import BeautifulSoup
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.support.wait import WebDriverWait
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
@@ -13,6 +18,7 @@ class CouncilClass(AbstractGetBinDataClass):
     """
     def parse_data(self, page: str, **kwargs) -> dict:
+<<<<<<< HEAD
         user_postcode = kwargs.get("postcode")
         check_postcode(user_postcode)
         user_uprn = kwargs.get("uprn")
@@ -43,10 +49,16 @@ class CouncilClass(AbstractGetBinDataClass):
                 i["data-for"]: i.get("value", "")
                 for i in soup.select("input[data-for]")
             }
-            payload_salt = soup.select_one('input[id="pSalt"]').get("value")
-            payload_protected = soup.select_one('input[id="pPageItemsProtected"]').get(
-                "value"
-            )
+            # Check if required form elements exist
+            salt_element = soup.select_one('input[id="pSalt"]')
+            protected_element = soup.select_one('input[id="pPageItemsProtected"]')
+            if not salt_element or not protected_element:
+                raise Exception("Required form elements not found. The council website may have changed or be unavailable.")
+            payload_salt = salt_element.get("value")
+            payload_protected = protected_element.get("value")
             # Add the PostCode and 'SEARCH' to the payload
             payload["p_request"] = "SEARCH"
@@ -123,10 +135,16 @@ class CouncilClass(AbstractGetBinDataClass):
                 i["data-for"]: i.get("value", "")
                 for i in soup.select("input[data-for]")
             }
-            payload_salt = soup.select_one('input[id="pSalt"]').get("value")
-            payload_protected = soup.select_one('input[id="pPageItemsProtected"]').get(
-                "value"
-            )
+            # Check if required form elements exist
+            salt_element = soup.select_one('input[id="pSalt"]')
+            protected_element = soup.select_one('input[id="pPageItemsProtected"]')
+            if not salt_element or not protected_element:
+                raise Exception("Required form elements not found. The council website may have changed or be unavailable.")
+            payload_salt = salt_element.get("value")
+            payload_protected = protected_element.get("value")
             # Add the UPRN and 'SUBMIT' to the payload
             payload["p_request"] = "SUBMIT"
@@ -187,18 +205,117 @@ class CouncilClass(AbstractGetBinDataClass):
             # Create a BeautifulSoup object from the page's HTML
             soup = BeautifulSoup(resource.text, "html.parser")
+=======
+        driver = None
+        try:
+>>>>>>> master
             data = {"bins": []}
+            url = kwargs.get("url")
+            user_paon = kwargs.get("paon")
+            user_postcode = kwargs.get("postcode")
+            web_driver = kwargs.get("web_driver")
+            headless = kwargs.get("headless")
+            check_paon(user_paon)
+            check_postcode(user_postcode)
+            # Use a realistic user agent to help bypass Cloudflare
+            user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
+            driver = create_webdriver(web_driver, headless, user_agent, __name__)
+            driver.get(
+                "https://iportal.itouchvision.com/icollectionday/collection-day/?uuid=6CDD2A34C912312074D8E2410531401A8C00EFF7&lang=en"
+            )
+            # Wait for the postcode field to appear then populate it
+            inputElement_postcode = WebDriverWait(driver, 30).until(
+                EC.presence_of_element_located((By.ID, "postcodeSearch"))
+            )
+            inputElement_postcode.send_keys(user_postcode)
+            # Click search button
+            findAddress = WebDriverWait(driver, 10).until(
+                EC.presence_of_element_located((By.CLASS_NAME, "govuk-button"))
+            )
+            findAddress.click()
+            # Wait for the 'Select address' dropdown to appear and select option matching the house name/number
+            WebDriverWait(driver, 10).until(
+                EC.element_to_be_clickable(
+                    (
+                        By.XPATH,
+                        "//select[@id='addressSelect']//option[contains(., '"
+                        + user_paon
+                        + "')]",
+                    )
+                )
+            ).click()
+            # Wait for the collections table to appear
+            WebDriverWait(driver, 20).until(
+                EC.presence_of_element_located(
+                    (
+                        By.XPATH,
+                        "//h2[contains(@class,'mt-4') and contains(@class,'govuk-heading-s') and normalize-space(.)='Your next collections']",
+                    )
+                )
+            )
+            soup = BeautifulSoup(driver.page_source, features="html.parser")
+            collections = soup.find_all("div", {"class": "p-2"})
+            for collection in collections:
+                bin_type = collection.find("h3").get_text()
+                next_collection = soup.find("div", {"class": "fw-bold"}).get_text()
+                following_collection = soup.find(
+                    lambda t: (
+                        t.name == "div"
+                        and t.get_text(strip=True).lower().startswith("followed by")
+                    )
+                ).get_text()
+                next_collection_date = datetime.strptime(next_collection, "%A %d %B")
+                following_collection_date = datetime.strptime(
+                    following_collection, "followed by %A %d %B"
+                )
+                current_date = datetime.now()
+                next_collection_date = next_collection_date.replace(
+                    year=current_date.year
+                )
+                following_collection_date = following_collection_date.replace(
+                    year=current_date.year
+                )
+                next_collection_date = get_next_occurrence_from_day_month(
+                    next_collection_date
+                )
+                following_collection_date = get_next_occurrence_from_day_month(
+                    following_collection_date
+                )
+                dict_data = {
+                    "type": bin_type,
+                    "collectionDate": next_collection_date.strftime(date_format),
+                }
+                data["bins"].append(dict_data)
+                dict_data = {
+                    "type": bin_type,
+                    "collectionDate": following_collection_date.strftime(date_format),
+                }
+                data["bins"].append(dict_data)
-            # Loop through the items on the page and build a JSON object for ingestion
-            for item in soup.select(".t-MediaList-item"):
-                for value in item.select(".t-MediaList-body"):
-                    dict_data = {
-                        "type": value.select("span")[1].get_text(strip=True).title(),
-                        "collectionDate": datetime.strptime(
-                            value.select(".t-MediaList-desc")[0].get_text(strip=True),
-                            "%A, %d %B, %Y",
-                        ).strftime(date_format),
-                    }
-                    data["bins"].append(dict_data)
-            return data
+        except Exception as e:
+            # Here you can log the exception if needed
+            print(f"An error occurred: {e}")
+            # Optionally, re-raise the exception if you want it to propagate
+            raise
+        finally:
+            # This block ensures that the driver is closed regardless of an exception
+            if driver:
+                driver.quit()
+        return data

uk_bin_collection/uk_bin_collection/councils/NorthHertfordshireDistrictCouncil.py CHANGED Viewed

@@ -125,23 +125,6 @@ class CouncilClass(AbstractGetBinDataClass):
             # Wait for the page to load - giving it extra time
             time.sleep(5)
-            # Use only the selector that we know works
-            # print("Looking for bin type elements...")
-            try:
-                bin_type_selector = (
-                    By.CSS_SELECTOR,
-                    "div.formatting_bold.formatting_size_bigger.formatting span.value-as-text",
-                )
-                WebDriverWait(driver, 15).until(
-                    EC.presence_of_element_located(bin_type_selector)
-                )
-                # print(f"Found bin type elements with selector: {bin_type_selector}")
-            except TimeoutException:
-                # print("Could not find bin type elements. Taking screenshot for debugging...")
-                screenshot_path = f"bin_type_error_{int(time.time())}.png"
-                driver.save_screenshot(screenshot_path)
-                # print(f"Screenshot saved to {screenshot_path}")
             # Create BS4 object from driver's page source
             # print("Parsing page with BeautifulSoup...")
             soup = BeautifulSoup(driver.page_source, features="html.parser")
@@ -149,122 +132,37 @@ class CouncilClass(AbstractGetBinDataClass):
             # Initialize data dictionary
             data = {"bins": []}
-            # Looking for bin types in the exact HTML structure
-            bin_type_elements = soup.select(
-                "div.page_cell.contains_widget:first-of-type div.formatting_bold.formatting_size_bigger.formatting span.value-as-text"
-            )
-            # print(f"Found {len(bin_type_elements)} bin type elements")
-            # Look specifically for date elements with the exact structure
-            date_elements = soup.select("div.col-sm-12.font-xs-3xl span.value-as-text")
-            hidden_dates = soup.select(
-                "div.col-sm-12.font-xs-3xl input[type='hidden'][value*='/']"
-            )
-            # print(f"Found {len(bin_type_elements)} bin types and {len(date_elements)} date elements")
-            # We need a smarter way to match bin types with their dates
-            bin_count = 0
+            for row in soup.select(".listing_template_row"):
+                # Title (waste stream) is the first <p> in the section
+                first_p = row.find("p")
+                if not first_p:
+                    continue
+                stream = first_p.get_text(" ", strip=True)
-            # Map of bin types to their collection dates
-            bin_date_map = {}
+                for p in row.find_all("p"):
+                    t = p.get_text("\n", strip=True)
-            # Extract all date strings that look like actual dates
-            date_texts = []
-            date_pattern = re.compile(
-                r"(?:Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)\s+\d+(?:st|nd|rd|th)?\s+\w+\s+\d{4}",
-                re.IGNORECASE,
-            )
+                    if re.search(r"\bNext collection\b", t, flags=re.I):
+                        # Expect format: "Next collection\nTuesday 16th September 2025"
+                        parts = [x.strip() for x in t.split("\n") if x.strip()]
+                        if len(parts) >= 2:
+                            next_collection_display = parts[-1]  # last line
-            for element in date_elements:
-                text = element.get_text(strip=True)
-                if date_pattern.search(text):
-                    date_texts.append(text)
-                    # print(f"Found valid date text: {text}")
-            # Find hidden date inputs with values in DD/MM/YYYY format
-            hidden_date_values = []
-            for hidden in hidden_dates:
-                value = hidden.get("value", "")
-                if re.match(r"\d{1,2}/\d{1,2}/\d{4}", value):
-                    hidden_date_values.append(value)
-                    # print(f"Found hidden date value: {value}")
-            # When filtering date elements
-            date_elements = soup.select("div.col-sm-12.font-xs-3xl span.value-as-text")
-            valid_date_elements = []
-            for element in date_elements:
-                text = element.get_text(strip=True)
-                if contains_date(text):
-                    valid_date_elements.append(element)
-                    # print(f"Found valid date element: {text}")
-                else:
-                    pass
-                    # print(f"Skipping non-date element: {text}")
-            # print(f"Found {len(bin_type_elements)} bin types and {len(valid_date_elements)} valid date elements")
-            # When processing each bin type
-            for i, bin_type_elem in enumerate(bin_type_elements):
-                bin_type = bin_type_elem.get_text(strip=True)
-                # Try to find a date for this bin type
-                date_text = None
-                # Look for a valid date element
-                if i < len(valid_date_elements):
-                    date_elem = valid_date_elements[i]
-                    date_text = date_elem.get_text(strip=True)
-                # If we don't have a valid date yet, try using the hidden input
-                if not date_text or not contains_date(date_text):
-                    if i < len(hidden_dates):
-                        date_value = hidden_dates[i].get("value")
-                        if contains_date(date_value):
-                            date_text = date_value
-                # Skip if we don't have a valid date
-                if not date_text or not contains_date(date_text):
-                    # print(f"No valid date found for bin type: {bin_type}")
-                    continue
+                # Build record
+                next_date = datetime.strptime(
+                    remove_ordinal_indicator_from_date_string(next_collection_display),
+                    "%A %d %B %Y",
+                )
-                # print(f"Found bin type: {bin_type} with date: {date_text}")
+                # Create bin entry
+                bin_entry = {
+                    "type": stream,
+                    "collectionDate": next_date.strftime(date_format),
+                }
-                try:
-                    # Clean up the date text
-                    date_text = remove_ordinal_indicator_from_date_string(date_text)
-                    # Try to parse the date
-                    try:
-                        collection_date = datetime.strptime(
-                            date_text, "%A %d %B %Y"
-                        ).date()
-                    except ValueError:
-                        try:
-                            collection_date = datetime.strptime(
-                                date_text, "%d/%m/%Y"
-                            ).date()
-                        except ValueError:
-                            # Last resort
-                            collection_date = parse(date_text).date()
-                    # Create bin entry
-                    bin_entry = {
-                        "type": bin_type,
-                        "collectionDate": collection_date.strftime(date_format),
-                    }
-                    # Add to data
-                    data["bins"].append(bin_entry)
-                    bin_count += 1
-                    # print(f"Added bin entry: {bin_entry}")
-                except Exception as e:
-                    pass
-                    # print(f"Error parsing date '{date_text}': {str(e)}")
-            # print(f"Successfully parsed {bin_count} bin collections")
+                # Add to data
+                data["bins"].append(bin_entry)
+                # print(f"Added bin entry: {bin_entry}")
             if not data["bins"]:
                 # print("No bin data found. Saving page for debugging...")

uk_bin_collection 0.153.0__py3-none-any.whl → 0.157.0__py3-none-any.whl

uk_bin_collection 0.153.0py3-none-any.whl → 0.157.0py3-none-any.whl