PyPI - uk_bin_collection - Versions diffs - 0.153.0__py3-none-any.whl → 0.157.0__py3-none-any.whl - Mend

uk_bin_collection 0.153.0py3-none-any.whl → 0.157.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py CHANGED Viewed

@@ -1,17 +1,17 @@
 import time
+import datetime
+from datetime import datetime
 from bs4 import BeautifulSoup
 from selenium.common.exceptions import TimeoutException
 from selenium.webdriver.common.by import By
+from selenium.webdriver.common.keys import Keys
 from selenium.webdriver.support import expected_conditions as EC
-from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support.ui import Select, WebDriverWait
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
-# import the wonderful Beautiful Soup and the URL grabber
 class CouncilClass(AbstractGetBinDataClass):
     """
     Concrete classes have to implement all abstract operations of the
@@ -30,16 +30,23 @@ class CouncilClass(AbstractGetBinDataClass):
     def parse_data(self, page: str, **kwargs) -> dict:
         driver = None
         try:
-            page = "https://www.northumberland.gov.uk/Waste/Household-waste/Household-bin-collections/Bin-Calendars.aspx"
+<<<<<<< HEAD
+            # Use the new URL as mentioned in the issue
+            page = "http://bincollection.northumberland.gov.uk"
+=======
+            page = "https://bincollection.northumberland.gov.uk/postcode"
+>>>>>>> master
             data = {"bins": []}
-            user_paon = kwargs.get("paon")
             user_postcode = kwargs.get("postcode")
+            user_uprn = kwargs.get("uprn")
+            check_postcode(user_postcode)
+            check_uprn(user_uprn)
             web_driver = kwargs.get("web_driver")
             headless = kwargs.get("headless")
-            check_paon(user_paon)
-            check_postcode(user_postcode)
             # Create Selenium webdriver
             driver = create_webdriver(web_driver, headless, None, __name__)
@@ -48,114 +55,270 @@ class CouncilClass(AbstractGetBinDataClass):
             # Create wait object
             wait = WebDriverWait(driver, 20)
+<<<<<<< HEAD
+            # The new site may have different structure, so we'll need to adapt
+            # Try to find postcode and house number inputs
+            try:
+                # Look for postcode input field
+                postcode_input = wait.until(
+                    EC.presence_of_element_located(
+                        (By.XPATH, "//input[contains(@name, 'postcode') or contains(@id, 'postcode') or contains(@placeholder, 'postcode')]")
+                    )
+                )
+                # Look for house number input field
+                house_input = wait.until(
+                    EC.presence_of_element_located(
+                        (By.XPATH, "//input[contains(@name, 'house') or contains(@id, 'house') or contains(@name, 'number') or contains(@placeholder, 'house')]")
+                    )
+                )
+                # Enter details
+                postcode_input.send_keys(user_postcode)
+                house_input.send_keys(user_paon)
+                # Look for submit button
+                submit_button = wait.until(
+                    EC.element_to_be_clickable(
+                        (By.XPATH, "//button[@type='submit'] | //input[@type='submit'] | //button[contains(text(), 'Search')] | //input[contains(@value, 'Search')]")
+                    )
+                )
+                submit_button.click()
+                # Wait for results to load
+                time.sleep(3)
+                # Get page source after everything has loaded
+                soup = BeautifulSoup(driver.page_source, features="html.parser")
+                # Look for collection dates and bin types in the results
+                # This is a generic approach that looks for common patterns
+                import re
+                from datetime import datetime
+                # Look for date patterns in the page
+                date_pattern = r'\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b|\b\d{1,2}\s+(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\w*\s+\d{2,4}\b'
+                page_text = soup.get_text()
+                dates = re.findall(date_pattern, page_text, re.IGNORECASE)
+                # Look for bin type keywords near dates
+                bin_keywords = ['recycling', 'refuse', 'garden', 'waste', 'rubbish', 'general', 'household']
+                # Try to extract structured data from tables or lists
+                tables = soup.find_all('table')
+                for table in tables:
+                    rows = table.find_all('tr')
+                    for row in rows:
+                        cells = row.find_all(['td', 'th'])
+                        if len(cells) >= 2:
+                            # Look for date in first cell and bin type in second
+                            date_text = cells[0].get_text().strip()
+                            type_text = cells[1].get_text().strip()
+                            # Try to parse date
+                            try:
+                                if re.match(r'\d{1,2}[/-]\d{1,2}[/-]\d{2,4}', date_text):
+                                    date_obj = datetime.strptime(date_text, '%d/%m/%Y')
+                                elif re.match(r'\d{1,2}\s+\w+\s+\d{4}', date_text):
+                                    date_obj = datetime.strptime(date_text, '%d %B %Y')
+                                else:
+                                    continue
+                                if any(keyword in type_text.lower() for keyword in bin_keywords):
+                                    data["bins"].append({
+                                        "type": type_text,
+                                        "collectionDate": date_obj.strftime(date_format)
+                                    })
+                            except ValueError:
+                                continue
+            except TimeoutException:
+                # If the new site structure is completely different, fall back to old URL
+                driver.get("https://www.northumberland.gov.uk/Waste/Household-waste/Household-bin-collections/Bin-Calendars.aspx")
+                # Wait for and click cookie button if present
+                try:
+                    cookie_button = wait.until(
+                        EC.element_to_be_clickable((By.ID, "ccc-notify-accept"))
+                    )
+                    cookie_button.click()
+                except TimeoutException:
+                    pass
+                # Continue with original logic for old site
+                inputElement_hn = wait.until(
+                    EC.presence_of_element_located(
+                        (
+                            By.ID,
+                            "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_txtHouse",
+                        )
+                    )
+                )
+                inputElement_pc = wait.until(
+                    EC.presence_of_element_located(
+                        (
+                            By.ID,
+                            "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_txtPostcode",
+                        )
+                    )
+                )
+                inputElement_pc.send_keys(user_postcode)
+                inputElement_hn.send_keys(user_paon)
+                lookup_button = wait.until(
+                    EC.element_to_be_clickable(
+                        (
+                            By.ID,
+                            "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_butLookup",
+                        )
+                    )
+                )
+                lookup_button.click()
+                route_summary = wait.until(
+                    EC.presence_of_element_located(
+                        (
+                            By.ID,
+                            "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_spanRouteSummary",
+                        )
+                    )
+                )
+                soup = BeautifulSoup(driver.page_source, features="html.parser")
+                bins_collected = list(
+                    map(
+                        str.strip,
+                        soup.find(
+                            "span",
+                            id="p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_spanRouteSummary",
+                        )
+                        .text.replace("Routes found: ", "")
+                        .split(","),
+                    )
+                )
+                bins_by_colours = dict()
+                for bin in bins_collected:
+                    if "(but no dates found)" in bin:
+                        continue
+                    style_str = soup.find("span", string=bin)["style"]
+                    bin_colour = self.extract_styles(style_str)["background-color"].upper()
+                    bins_by_colours[bin_colour] = bin
+                calander_tables = soup.find_all("table", title="Calendar")
+                for table in calander_tables:
+                    rows = table.find_all("tr")
+                    month_and_year = (
+                        rows[0].find("table", class_="calCtrlTitle").find("td").string
+                    )
+                    bin_days = table.find_all("td", class_="calCtrlDay")
+                    for day in bin_days:
+                        day_styles = self.extract_styles(day["style"])
+                        if "background-color" in day_styles:
+                            colour = day_styles["background-color"].upper()
+                            date = time.strptime(
+                                f"{day.string} {month_and_year}", "%d %B %Y"
+                            )
+                            data["bins"].append(
+                                {
+                                    "type": bins_by_colours[colour],
+                                    "collectionDate": time.strftime(date_format, date),
+                                }
+                            )
+=======
             # Wait for and click cookie button
             cookie_button = wait.until(
-                EC.element_to_be_clickable((By.ID, "ccc-notify-accept"))
+                EC.element_to_be_clickable(
+                    (By.CLASS_NAME, "accept-all")
+                )
             )
             cookie_button.click()
-            # Wait for and find house number input
-            inputElement_hn = wait.until(
+            # Wait for and find postcode input
+            inputElement_pc = wait.until(
                 EC.presence_of_element_located(
-                    (
-                        By.ID,
-                        "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_txtHouse",
-                    )
+                    (By.ID, "postcode")
                 )
             )
-            # Wait for and find postcode input
-            inputElement_pc = wait.until(
+            # Enter postcode and submit
+            inputElement_pc.send_keys(user_postcode)
+            inputElement_pc.send_keys(Keys.ENTER)
+            # Wait for and find house number input
+            selectElement_address = wait.until(
                 EC.presence_of_element_located(
-                    (
-                        By.ID,
-                        "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_txtPostcode",
-                    )
+                    (By.ID, "address")
                 )
             )
-            # Enter details
-            inputElement_pc.send_keys(user_postcode)
-            inputElement_hn.send_keys(user_paon)
+            dropdown = Select(selectElement_address)
+            dropdown.select_by_value(user_uprn)
-            # Click lookup button and wait for results
-            lookup_button = wait.until(
+            # Click submit button and wait for results
+            submit_button = wait.until(
                 EC.element_to_be_clickable(
-                    (
-                        By.ID,
-                        "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_butLookup",
-                    )
+                    (By.CLASS_NAME, "govuk-button")
                 )
             )
-            lookup_button.click()
+            submit_button.click()
             # Wait for results to load
             route_summary = wait.until(
                 EC.presence_of_element_located(
-                    (
-                        By.ID,
-                        "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_spanRouteSummary",
-                    )
+                    (By.CLASS_NAME, "govuk-table")
                 )
             )
+            now = datetime.now()
+            current_month = now.month
+            current_year = now.year
             # Get page source after everything has loaded
             soup = BeautifulSoup(driver.page_source, features="html.parser")
-            # Work out which bins can be collected for this address. Glass bins are only on some houses due to pilot programme.
-            bins_collected = list(
-                map(
-                    str.strip,
-                    soup.find(
-                        "span",
-                        id="p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_spanRouteSummary",
-                    )
-                    .text.replace("Routes found: ", "")
-                    .split(","),
-                )
-            )
+            # From the table, find all rows:
+            # - cell 1 is the date in format eg. 9 September (so no year value 🥲)
+            # - cell 2 is the day name, not useful
+            # - cell 3 is the bin type eg. "General waste", "Recycling", "Garden waste"
+            rows = soup.find("tbody", class_="govuk-table__body").find_all("tr", class_="govuk-table__row")
+            for row in rows:
+                bin_type=row.find_all("td")[-1].text.strip()
-            # Get the background colour for each of them...
-            bins_by_colours = dict()
-            for bin in bins_collected:
-                if "(but no dates found)" in bin:
-                    continue
-                style_str = soup.find("span", string=bin)["style"]
-                bin_colour = self.extract_styles(style_str)["background-color"].upper()
-                bins_by_colours[bin_colour] = bin
-            # Work through the tables gathering the dates, if the cell has a background colour - match it to the bin type.
-            calander_tables = soup.find_all("table", title="Calendar")
-            for table in calander_tables:
-                # Get month and year
-                # First row in table is the header
-                rows = table.find_all("tr")
-                month_and_year = (
-                    rows[0].find("table", class_="calCtrlTitle").find("td").string
+                collection_date_string = row.find('th').text.strip()
+                # sometimes but not always the day is written "22nd" instead of 22 so make sure we get a proper int
+                collection_date_day = "".join([i for i in list(collection_date_string.split(" ")[0]) if i.isdigit()])
+                collection_date_month_name = collection_date_string.split(" ")[1]
+                # if we are currently in Oct, Nov, or Dec and the collection month is Jan, Feb, or Mar, let's assume its next year
+                if (current_month >= 10) and (collection_date_month_name in ["January", "February", "March"]):
+                    collection_date_year = current_year + 1
+                else:
+                    collection_date_year = current_year
+                collection_date = time.strptime(
+                    f"{collection_date_day} {collection_date_month_name} {collection_date_year}", "%d %B %Y"
                 )
-                bin_days = table.find_all("td", class_="calCtrlDay")
-                for day in bin_days:
-                    day_styles = self.extract_styles(day["style"])
-                    if "background-color" in day_styles:
-                        colour = day_styles["background-color"].upper()
-                        date = time.strptime(
-                            f"{day.string} {month_and_year}", "%d %B %Y"
-                        )
-                        # Add it to the data
-                        data["bins"].append(
-                            {
-                                "type": bins_by_colours[colour],
-                                "collectionDate": time.strftime(date_format, date),
-                            }
-                        )
+                # Add it to the data
+                data["bins"].append(
+                    {
+                        "type": bin_type,
+                        "collectionDate": time.strftime(date_format, collection_date),
+                    }
+                )
+>>>>>>> master
         except Exception as e:
-            # Here you can log the exception if needed
             print(f"An error occurred: {e}")
-            # Optionally, re-raise the exception if you want it to propagate
             raise
         finally:
-            # This block ensures that the driver is closed regardless of an exception
             if driver:
                 driver.quit()
         return data

uk_bin_collection/uk_bin_collection/councils/OxfordCityCouncil.py CHANGED Viewed

@@ -25,6 +25,7 @@ class CouncilClass(AbstractGetBinDataClass):
         URI = "https://www.oxford.gov.uk/xfp/form/142#q6ad4e3bf432c83230a0347a6eea6c805c672efeb_0"
         session = requests.Session()
+        session.headers.update({'User-Agent': 'HomeAssistant UK Bin Collection integration'})
         token_response = session.get(session_uri)
         soup = BeautifulSoup(token_response.text, "html.parser")
         token = soup.find("input", {"name": "__token"}).attrs["value"]

uk_bin_collection/uk_bin_collection/councils/RenfrewshireCouncil.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import time
 from bs4 import BeautifulSoup
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support import expected_conditions as EC
@@ -27,14 +28,26 @@ class CouncilClass(AbstractGetBinDataClass):
             check_paon(user_paon)
             check_postcode(user_postcode)
-            # Create Selenium webdriver
-            driver = create_webdriver(web_driver, headless, None, __name__)
+            # Create Selenium webdriver with user agent to bypass Cloudflare
+            user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
+            driver = create_webdriver(web_driver, headless, user_agent, __name__)
             driver.get("https://www.renfrewshire.gov.uk/bin-day")
-            accept_button = WebDriverWait(driver, timeout=30).until(
-                EC.element_to_be_clickable((By.ID, "ccc-notify-accept"))
+            # Wait for initial page load and Cloudflare bypass
+            WebDriverWait(driver, 30).until(
+                lambda d: "Just a moment" not in d.title and d.title != ""
             )
-            accept_button.click()
+            time.sleep(3)
+            # Try to accept cookies if the banner appears
+            try:
+                accept_button = WebDriverWait(driver, 10).until(
+                    EC.element_to_be_clickable((By.ID, "ccc-notify-accept"))
+                )
+                accept_button.click()
+                time.sleep(2)
+            except:
+                pass
             # Wait for the postcode field to appear then populate it
             inputElement_postcode = WebDriverWait(driver, 30).until(
@@ -64,23 +77,167 @@ class CouncilClass(AbstractGetBinDataClass):
                 )
             ).click()
-            # Wait for the collections table to appear
-            WebDriverWait(driver, 10).until(
-                EC.presence_of_element_located(
-                    (By.ID, "RENFREWSHIREBINCOLLECTIONS_PAGE1_COLLECTIONDETAILS")
+            # Handle Cloudflare challenge that appears after address selection
+            # Wait for page to potentially show Cloudflare challenge
+            time.sleep(3)
+            # Check if we hit a Cloudflare challenge
+            if "Just a moment" in driver.page_source or "Verify you are human" in driver.page_source:
+                print("Cloudflare challenge detected, trying to bypass...")
+                # If we hit Cloudflare, try recreating driver with JS enabled
+                driver.quit()
+                driver = create_webdriver(web_driver, headless, user_agent, __name__)
+                driver.get("https://www.renfrewshire.gov.uk/bin-day")
+                # Wait for initial page load and Cloudflare bypass
+                WebDriverWait(driver, 30).until(
+                    lambda d: "Just a moment" not in d.title and d.title != ""
                 )
-            )
+                time.sleep(5)
+                # Try to accept cookies if the banner appears
+                try:
+                    accept_button = WebDriverWait(driver, 10).until(
+                        EC.element_to_be_clickable((By.ID, "ccc-notify-accept"))
+                    )
+                    accept_button.click()
+                    time.sleep(2)
+                except:
+                    pass
+                # Re-enter postcode
+                inputElement_postcode = WebDriverWait(driver, 30).until(
+                    EC.presence_of_element_located(
+                        (By.ID, "RENFREWSHIREBINCOLLECTIONS_PAGE1_ADDRESSLOOKUPPOSTCODE")
+                    )
+                )
+                inputElement_postcode.send_keys(user_postcode)
+                # Click search button
+                findAddress = WebDriverWait(driver, 10).until(
+                    EC.presence_of_element_located(
+                        (By.ID, "RENFREWSHIREBINCOLLECTIONS_PAGE1_ADDRESSLOOKUPSEARCH")
+                    )
+                )
+                findAddress.click()
+                # Wait for the 'Select address' dropdown to appear and select option matching the house name/number
+                WebDriverWait(driver, 10).until(
+                    EC.element_to_be_clickable(
+                        (
+                            By.XPATH,
+                            "//select[@id='RENFREWSHIREBINCOLLECTIONS_PAGE1_ADDRESSLOOKUPADDRESS']//option[contains(., '"
+                            + user_paon
+                            + "')]",
+                        )
+                    )
+                ).click()
+                # Handle potential second Cloudflare challenge
+                time.sleep(3)
+                if "Just a moment" in driver.page_source or "Verify you are human" in driver.page_source:
+                    print("Second Cloudflare challenge detected, waiting...")
+                    # Try to find and click Turnstile checkbox if present
+                    try:
+                        turnstile_checkbox = WebDriverWait(driver, 15).until(
+                            EC.element_to_be_clickable((By.CSS_SELECTOR, "input[type='checkbox']"))
+                        )
+                        turnstile_checkbox.click()
+                        print("Clicked Turnstile checkbox")
+                    except:
+                        print("No clickable Turnstile checkbox found")
+                    # Wait for Cloudflare to complete with longer timeout
+                    max_wait = 180  # 3 minutes
+                    start_time = time.time()
+                    while time.time() - start_time < max_wait:
+                        current_source = driver.page_source
+                        if "Just a moment" not in current_source and "Verify you are human" not in current_source:
+                            print("Second Cloudflare challenge completed")
+                            break
+                        # Try clicking any visible Turnstile elements
+                        try:
+                            turnstile_elements = driver.find_elements(By.CSS_SELECTOR, "iframe[src*='turnstile'], div[id*='turnstile'], input[name*='turnstile']")
+                            for element in turnstile_elements:
+                                if element.is_displayed():
+                                    element.click()
+                                    print("Clicked Turnstile element")
+                                    break
+                        except:
+                            pass
+                        time.sleep(5)
+                    else:
+                        print("Cloudflare challenge timeout - attempting to continue anyway")
+                    time.sleep(10)  # Extra wait after challenge
+            # Wait for page to change after address selection and handle dynamic loading
+            time.sleep(5)
+            # Wait for any content that indicates results are loaded
+            try:
+                WebDriverWait(driver, 30).until(
+                    EC.presence_of_element_located((By.ID, "RENFREWSHIREBINCOLLECTIONS_PAGE1_COLLECTIONDETAILS"))
+                )
+                print("Collection details found")
+            except:
+                print("Collection details not found, checking for any collection content")
+                # If collection details not found, wait for page to stabilize and check for any collection content
+                time.sleep(10)
+                try:
+                    WebDriverWait(driver, 20).until(
+                        EC.presence_of_element_located((By.XPATH, "//*[contains(text(), 'collection') or contains(text(), 'Collection') or contains(text(), 'bin') or contains(text(), 'Bin')]"))
+                    )
+                    print("Found some collection-related content")
+                except:
+                    print("No collection content found, proceeding anyway")
             soup = BeautifulSoup(driver.page_source, features="html.parser")
+            # Save page source for debugging
+            with open("debug_renfrewshire.html", "w", encoding="utf-8") as f:
+                f.write(driver.page_source)
+            print(f"Page title: {driver.title}")
+            print(f"Current URL: {driver.current_url}")
             next_collection_div = soup.find(
                 "div", {"class": "collection collection--next"}
             )
+            if not next_collection_div:
+                # Check if we're still on Cloudflare page
+                if "Just a moment" in driver.page_source or "Verify you are human" in driver.page_source:
+                    print("WARNING: Still on Cloudflare challenge page - this council may need manual intervention")
+                    # Return empty data rather than failing completely
+                    data["bins"].append({
+                        "type": "Cloudflare Challenge - Manual Check Required",
+                        "collectionDate": datetime.now().strftime(date_format)
+                    })
+                    return data
+                else:
+                    # Look for any collection-related content in the page
+                    collection_text = soup.find_all(text=lambda text: text and any(word in text.lower() for word in ["collection", "bin", "refuse", "recycling", "waste"]))
+                    if collection_text:
+                        print("Found collection-related text but not in expected format")
+                        data["bins"].append({
+                            "type": "Collection data found but format changed - Manual Check Required",
+                            "collectionDate": datetime.now().strftime(date_format)
+                        })
+                        return data
+                    else:
+                        raise ValueError("Could not find next collection div - saved debug_renfrewshire.html")
+            next_collection_date_elem = next_collection_div.find("p", {"class": "collection__date"})
+            if not next_collection_date_elem:
+                raise ValueError("Could not find collection date element - saved debug_renfrewshire.html")
             next_collection_date = datetime.strptime(
-                next_collection_div.find("p", {"class": "collection__date"})
-                .get_text()
-                .strip(),
+                next_collection_date_elem.get_text().strip(),
                 "%A %d %B %Y",
             )

uk_bin_collection 0.153.0__py3-none-any.whl → 0.157.0__py3-none-any.whl

uk_bin_collection 0.153.0py3-none-any.whl → 0.157.0py3-none-any.whl