PyPI - uk_bin_collection - Versions diffs - 0.152.8__py3-none-any.whl → 0.152.10__py3-none-any.whl - Mend

uk_bin_collection 0.152.8py3-none-any.whl → 0.152.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

uk_bin_collection/uk_bin_collection/councils/BlaenauGwentCountyBoroughCouncil.py CHANGED Viewed

@@ -3,19 +3,14 @@ from selenium.webdriver.common.by import By
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.support.ui import Select
 from selenium.webdriver.support.wait import WebDriverWait
+import re
+import time
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
-# import the wonderful Beautiful Soup and the URL grabber
 class CouncilClass(AbstractGetBinDataClass):
-    """
-    Concrete classes have to implement all abstract operations of the
-    base class. They can also override some operations with a default
-    implementation.
-    """
     def parse_data(self, page: str, **kwargs) -> dict:
         driver = None
         try:
@@ -29,85 +24,115 @@ class CouncilClass(AbstractGetBinDataClass):
             # Create Selenium webdriver
             driver = create_webdriver(web_driver, headless, None, __name__)
-            driver.get(
-                "https://iportal.itouchvision.com/icollectionday/collection-day/?uuid=238D5F9796C12643D190E3505931401A8C003F0D&lang=en"
+            # Navigate to the main page first
+            driver.get("https://www.blaenau-gwent.gov.uk/en/resident/waste-recycling/")
+            # Handle cookie overlay if present
+            try:
+                # Wait a moment for any overlays to appear
+                WebDriverWait(driver, 3).until(
+                    EC.presence_of_element_located((By.ID, "ccc-overlay"))
+                )
+                # Try to find and click cookie accept buttons
+                cookie_buttons = [
+                    "//button[contains(text(), 'Accept')]",
+                    "//button[contains(text(), 'OK')]",
+                    "//button[@id='ccc-recommended-settings']",
+                    "//button[contains(@class, 'cookie')]"
+                ]
+                for button_xpath in cookie_buttons:
+                    try:
+                        cookie_button = driver.find_element(By.XPATH, button_xpath)
+                        if cookie_button.is_displayed():
+                            cookie_button.click()
+                            break
+                    except:
+                        continue
+            except:
+                pass  # No cookie overlay found
+            # Find and extract the collection day URL
+            find_collection_link = WebDriverWait(driver, 10).until(
+                EC.presence_of_element_located((By.XPATH, "//a[contains(text(), 'Find Your Collection Day')]"))
             )
+            collection_url = find_collection_link.get_attribute("href")
+            # Navigate to the collection portal
+            driver.get(collection_url)
-            # Wait for the postcode field to appear then populate it
-            inputElement_postcode = WebDriverWait(driver, 10).until(
+            # Wait for the postcode field and enter postcode
+            postcode_input = WebDriverWait(driver, 10).until(
                 EC.presence_of_element_located((By.ID, "postcodeSearch"))
             )
-            inputElement_postcode.send_keys(user_postcode)
+            postcode_input.send_keys(user_postcode)
-            # Click search button
-            findAddress = WebDriverWait(driver, 10).until(
-                EC.presence_of_element_located(
-                    (By.XPATH, '//button[@class="govuk-button mt-4"]')
-                )
+            # Click Find button
+            find_button = WebDriverWait(driver, 10).until(
+                EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Find')]"))
             )
-            findAddress.click()
+            find_button.click()
-            # Wait for the dropdown to be visible
+            # Wait for address dropdown and select by UPRN
             WebDriverWait(driver, 10).until(
                 EC.presence_of_element_located((By.ID, "addressSelect"))
             )
             dropdown = Select(driver.find_element(By.ID, "addressSelect"))
             dropdown.select_by_value(user_uprn)
-            # Wait for the collections table to appear
-            WebDriverWait(driver, 10).until(
-                EC.presence_of_element_located(
-                    (
-                        By.XPATH,
-                        '//div[@class="ant-row d-flex justify-content-between mb-4 mt-2 css-2rgkd4"]',
-                    )
-                )
+            # Wait for collection data to load
+            time.sleep(3)  # Give JavaScript time to process the selection
+            # Wait for the actual collection data to appear
+            WebDriverWait(driver, 20).until(
+                lambda d: "Your next collections" in d.page_source and ("Recycling" in d.page_source or "Refuse" in d.page_source)
             )
             soup = BeautifulSoup(driver.page_source, features="html.parser")
-            recyclingcalendar = soup.find(
-                "div",
-                {
-                    "class": "ant-row d-flex justify-content-between mb-4 mt-2 css-2rgkd4"
-                },
-            )
-            rows = recyclingcalendar.find_all(
-                "div",
-                {
-                    "class": "ant-col ant-col-xs-12 ant-col-sm-12 ant-col-md-12 ant-col-lg-12 ant-col-xl-12 css-2rgkd4"
-                },
-            )
-            current_year = datetime.now().year
-            current_month = datetime.now().month
-            for row in rows:
-                BinType = row.find("h3").text
-                collectiondate = datetime.strptime(
-                    row.find("div", {"class": "text-white fw-bold"}).text,
-                    "%A %d %B",
-                )
-                if (current_month > 10) and (collectiondate.month < 3):
-                    collectiondate = collectiondate.replace(year=(current_year + 1))
-                else:
-                    collectiondate = collectiondate.replace(year=current_year)
-                dict_data = {
-                    "type": BinType,
-                    "collectionDate": collectiondate.strftime("%d/%m/%Y"),
-                }
-                data["bins"].append(dict_data)
+            page_text = soup.get_text()
+            # Find the collections section in the text
+            if "Your next collections" in page_text:
+                # Extract the section after "Your next collections"
+                collections_section = page_text.split("Your next collections")[1]
+                collections_section = collections_section.split("Related content")[0]  # Stop at Related content
+                # Use regex to find collection patterns
+                # Pattern to match: "Collection Type" followed by "Day Date Month" (stopping before 'followed')
+                pattern = r'(Recycling collection|Refuse Bin)([A-Za-z]+ \d+ [A-Za-z]+)(?=followed|$|[A-Z])'
+                matches = re.findall(pattern, collections_section)
+                for bin_type, date_text in matches:
+                    try:
+                        # Clean up the date text
+                        date_text = date_text.strip()
+                        if "followed by" in date_text:
+                            date_text = date_text.split("followed by")[0].strip()
+                        # Parse the date
+                        collection_date = datetime.strptime(date_text, "%A %d %B")
+                        # Set the correct year
+                        current_year = datetime.now().year
+                        current_month = datetime.now().month
+                        if (current_month > 10) and (collection_date.month < 3):
+                            collection_date = collection_date.replace(year=(current_year + 1))
+                        else:
+                            collection_date = collection_date.replace(year=current_year)
+                        dict_data = {
+                            "type": bin_type,
+                            "collectionDate": collection_date.strftime("%d/%m/%Y"),
+                        }
+                        data["bins"].append(dict_data)
+                    except ValueError:
+                        pass  # Skip if date parsing fails
         except Exception as e:
-            # Here you can log the exception if needed
             print(f"An error occurred: {e}")
-            # Optionally, re-raise the exception if you want it to propagate
             raise
         finally:
-            # This block ensures that the driver is closed regardless of an exception
             if driver:
                 driver.quit()
-        return data
+        return data

uk_bin_collection/uk_bin_collection/councils/BroxbourneCouncil.py CHANGED Viewed

@@ -1,83 +1,119 @@
 from datetime import datetime
+import time
-import requests
 from bs4 import BeautifulSoup
+from selenium.webdriver.common.by import By
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.support.ui import Select, WebDriverWait
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
 class CouncilClass(AbstractGetBinDataClass):
-    """
-    Concrete classes have to implement all abstract operations of the
-    base class. They can also override some operations with a default
-    implementation.
-    """
     def parse_data(self, page: str, **kwargs) -> dict:
         user_uprn = kwargs.get("uprn")
         user_postcode = kwargs.get("postcode")
+        web_driver = kwargs.get("web_driver")
+        headless = kwargs.get("headless")
         check_uprn(user_uprn)
         check_postcode(user_postcode)
         bindata = {"bins": []}
-        API_URL = "https://www.broxbourne.gov.uk/xfp/form/205"
-        post_data = {
-            "page": "490",
-            "locale": "en_GB",
-            "qacf7e570cf99fae4cb3a2e14d5a75fd0d6561058_0_0": user_postcode,
-            "qacf7e570cf99fae4cb3a2e14d5a75fd0d6561058_1_0": user_uprn,
-            "next": "Next",
-        }
-        r = requests.post(API_URL, data=post_data)
-        r.raise_for_status()
-        soup = BeautifulSoup(r.content, features="html.parser")
-        soup.prettify()
-        form__instructions = soup.find(attrs={"class": "form__instructions"})
-        table = form__instructions.find("table")
-        rows = table.find_all("tr")
-        current_year = datetime.now().year
-        current_month = datetime.now().month
-        # Process each row into a list of dictionaries
-        for row in rows[1:]:  # Skip the header row
-            columns = row.find_all("td")
-            collection_date_text = (
-                columns[0].get_text(separator=" ").replace("\xa0", " ").strip()
+        # Use a realistic user agent to help bypass Cloudflare
+        user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
+        driver = create_webdriver(web_driver, headless, user_agent, __name__)
+        try:
+            driver.get("https://www.broxbourne.gov.uk/bin-collection-date")
+            # Wait for Cloudflare challenge to complete
+            print("Waiting for page to load (Cloudflare check)...")
+            try:
+                WebDriverWait(driver, 45).until(
+                    lambda d: "Just a moment" not in d.title and d.title != "" and len(d.find_elements(By.TAG_NAME, "input")) > 0
+                )
+                print(f"Page loaded: {driver.title}")
+            except:
+                print(f"Timeout waiting for page load. Current title: {driver.title}")
+                # Try to continue anyway
+                pass
+            time.sleep(8)
+            # Handle cookie banner with multiple attempts
+            try:
+                cookie_btn = WebDriverWait(driver, 15).until(
+                    EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Allow all')]"))
+                )
+                cookie_btn.click()
+            except:
+                pass
+            # Find postcode input
+            postcode_input = WebDriverWait(driver, 20).until(
+                EC.element_to_be_clickable((By.XPATH, "//input[@autocomplete='postal-code']"))
             )
-            service = columns[1].get_text(separator=" ").replace("\xa0", " ").strip()
-            # Safely try to parse collection date
-            if collection_date_text:
-                try:
-                    collection_date = datetime.strptime(
-                        collection_date_text, "%a %d %b"
-                    )
-                    if collection_date.month == 1 and current_month != 1:
-                        collection_date = collection_date.replace(year=current_year + 1)
-                    else:
-                        collection_date = collection_date.replace(year=current_year)
-                    formatted_collection_date = collection_date.strftime(
-                        "%d/%m/%Y"
-                    )  # Use your desired date format
-                    dict_data = {
-                        "type": service,
-                        "collectionDate": formatted_collection_date,
-                    }
-                    bindata["bins"].append(dict_data)
-                except ValueError:
-                    # Skip invalid collection_date
-                    continue
-        # Sort valid bins by collectionDate
-        bindata["bins"].sort(
-            key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
-        )
-        return bindata
+            postcode_input.clear()
+            postcode_input.send_keys(user_postcode)
+            # Press Enter to lookup
+            postcode_input.send_keys(Keys.RETURN)
+            # Select address
+            address_select = WebDriverWait(driver, 15).until(
+                EC.presence_of_element_located((By.XPATH, "//select"))
+            )
+            Select(address_select).select_by_value(user_uprn)
+            # Click Next button
+            next_btn = WebDriverWait(driver, 15).until(
+                EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Next')]"))
+            )
+            next_btn.click()
+            # Get results
+            WebDriverWait(driver, 15).until(
+                EC.presence_of_element_located((By.XPATH, "//h1[contains(text(), 'When is my bin collection date?')]"))
+            )
+            table = WebDriverWait(driver, 15).until(
+                EC.presence_of_element_located((By.XPATH, "//h1[contains(text(), 'When is my bin collection date?')]/following::table[1]"))
+            )
+            soup = BeautifulSoup(table.get_attribute('outerHTML'), 'html.parser')
+            rows = soup.find_all('tr')
+            current_year = datetime.now().year
+            current_month = datetime.now().month
+            for row in rows[1:]:
+                columns = row.find_all('td')
+                if len(columns) >= 2:
+                    collection_date_text = columns[0].get_text().strip()
+                    service = columns[1].get_text().strip()
+                    if collection_date_text:
+                        try:
+                            collection_date = datetime.strptime(collection_date_text, "%a %d %b")
+                            if collection_date.month == 1 and current_month != 1:
+                                collection_date = collection_date.replace(year=current_year + 1)
+                            else:
+                                collection_date = collection_date.replace(year=current_year)
+                            bindata["bins"].append({
+                                "type": service,
+                                "collectionDate": collection_date.strftime("%d/%m/%Y")
+                            })
+                        except ValueError:
+                            continue
+            bindata["bins"].sort(key=lambda x: datetime.strptime(x["collectionDate"], "%d/%m/%Y"))
+        finally:
+            driver.quit()
+        return bindata

uk_bin_collection/uk_bin_collection/councils/BuckinghamshireCouncil.py CHANGED Viewed

@@ -20,6 +20,7 @@ class CouncilClass(AbstractGetBinDataClass):
             data = {"bins": []}
             user_paon = kwargs.get("paon")
             user_postcode = kwargs.get("postcode")
+            user_uprn = kwargs.get("uprn")
             web_driver = kwargs.get("web_driver")
             headless = kwargs.get("headless")
             check_paon(user_paon)
@@ -27,9 +28,13 @@ class CouncilClass(AbstractGetBinDataClass):
             # Create Selenium webdriver
             driver = create_webdriver(web_driver, headless, None, __name__)
-            driver.get(
-                "https://iapp.itouchvision.com/iappcollectionday/collection-day/?uuid=FA353FC74600CBE61BE409534D00A8EC09BDA3AC&lang=en"
+            driver.get(kwargs.get("url"))
+            # Click "Check now" button
+            check_now_button = WebDriverWait(driver, 10).until(
+                EC.element_to_be_clickable((By.XPATH, "//a[contains(text(), 'Check now')]"))
             )
+            check_now_button.click()
             # Wait for the postcode field to appear then populate it
             inputElement_postcode = WebDriverWait(driver, 10).until(
@@ -37,71 +42,77 @@ class CouncilClass(AbstractGetBinDataClass):
             )
             inputElement_postcode.send_keys(user_postcode)
-            # Click search button
-            findAddress = WebDriverWait(driver, 10).until(
-                EC.presence_of_element_located(
-                    (By.XPATH, '//button[@class="govuk-button mt-4"]')
-                )
+            # Click Find button
+            find_button = WebDriverWait(driver, 10).until(
+                EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Find')]"))
             )
-            findAddress.click()
+            find_button.click()
-            # Wait for the 'Select address' dropdown to appear and select option matching the house name/number
-            WebDriverWait(driver, 10).until(
-                EC.element_to_be_clickable(
-                    (
-                        By.XPATH,
-                        "//select[@id='addressSelect']//option[contains(., '"
-                        + user_paon
-                        + "')]",
-                    )
+            # Wait for the address dropdown and select by UPRN
+            if user_uprn:
+                address_option = WebDriverWait(driver, 10).until(
+                    EC.element_to_be_clickable((By.XPATH, f"//option[@value='{user_uprn}']"))
                 )
-            ).click()
-            # Wait for the collections table to appear
-            WebDriverWait(driver, 10).until(
-                EC.presence_of_element_located(
-                    (
-                        By.XPATH,
-                        '//div[@class="ant-row d-flex justify-content-between mb-4 mt-2 css-2rgkd4"]',
+                address_option.click()
+            else:
+                # Fallback to selecting by address text
+                address_option = WebDriverWait(driver, 10).until(
+                    EC.element_to_be_clickable(
+                        (By.XPATH, f"//select[@id='addressSelect']//option[contains(., '{user_paon}')]")
                     )
                 )
-            )
-            soup = BeautifulSoup(driver.page_source, features="html.parser")
+                address_option.click()
-            recyclingcalendar = soup.find(
-                "div",
-                {
-                    "class": "ant-row d-flex justify-content-between mb-4 mt-2 css-2rgkd4"
-                },
-            )
+            # Wait a moment for the page to update after address selection
+            import time
+            time.sleep(2)
-            rows = recyclingcalendar.find_all(
-                "div",
-                {
-                    "class": "ant-col ant-col-xs-12 ant-col-sm-12 ant-col-md-12 ant-col-lg-12 ant-col-xl-12 css-2rgkd4"
-                },
-            )
+            # Wait for collection information to appear - try multiple possible selectors
+            try:
+                WebDriverWait(driver, 15).until(
+                    EC.presence_of_element_located((By.XPATH, "//h2[contains(text(), 'Your next collections')]"))
+                )
+            except:
+                # Alternative wait for collection data structure
+                WebDriverWait(driver, 10).until(
+                    EC.presence_of_element_located((By.XPATH, "//div[contains(@class, 'ant-row') and contains(@class, 'd-flex')]//h3[@class='text-white']"))
+                )
+            soup = BeautifulSoup(driver.page_source, features="html.parser")
+            # Find all collection items with the specific structure - try multiple class patterns
+            collection_items = soup.find_all("div", class_=lambda x: x and "ant-col" in x and "ant-col-xs-12" in x)
+            if not collection_items:
+                # Fallback to finding items by structure
+                collection_items = soup.find_all("div", class_=lambda x: x and "p-2" in x and "d-flex" in x and "flex-column" in x)
             current_year = datetime.now().year
             current_month = datetime.now().month
-            for row in rows:
-                BinType = row.find("h3").text
-                collectiondate = datetime.strptime(
-                    row.find("div", {"class": "text-white fw-bold"}).text,
-                    "%A %d %B",
-                )
-                if (current_month > 10) and (collectiondate.month < 3):
-                    collectiondate = collectiondate.replace(year=(current_year + 1))
-                else:
-                    collectiondate = collectiondate.replace(year=current_year)
-                dict_data = {
-                    "type": BinType,
-                    "collectionDate": collectiondate.strftime("%d/%m/%Y"),
-                }
-                data["bins"].append(dict_data)
+            for item in collection_items:
+                # Extract bin type from h3 element
+                bin_type_elem = item.find("h3", class_="text-white")
+                # Extract date from div with specific classes
+                date_elem = item.find("div", class_="text-white fw-bold")
+                if bin_type_elem and date_elem:
+                    bin_type = bin_type_elem.get_text().strip()
+                    date_text = date_elem.get_text().strip()
+                    try:
+                        collection_date = datetime.strptime(date_text, "%A %d %B")
+                        if (current_month > 10) and (collection_date.month < 3):
+                            collection_date = collection_date.replace(year=(current_year + 1))
+                        else:
+                            collection_date = collection_date.replace(year=current_year)
+                        dict_data = {
+                            "type": bin_type,
+                            "collectionDate": collection_date.strftime("%d/%m/%Y"),
+                        }
+                        data["bins"].append(dict_data)
+                    except ValueError:
+                        continue
         except Exception as e:
             # Here you can log the exception if needed

uk_bin_collection 0.152.8__py3-none-any.whl → 0.152.10__py3-none-any.whl

uk_bin_collection 0.152.8py3-none-any.whl → 0.152.10py3-none-any.whl