PyPI - uk_bin_collection - Versions diffs - 0.152.8__py3-none-any.whl → 0.152.10__py3-none-any.whl - Mend

uk_bin_collection 0.152.8py3-none-any.whl → 0.152.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

uk_bin_collection/uk_bin_collection/councils/CopelandBoroughCouncil.py CHANGED Viewed

@@ -1,93 +1,98 @@
-from xml.etree import ElementTree
+import requests
 from bs4 import BeautifulSoup
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
+# import the wonderful Beautiful Soup and the URL grabber
 class CouncilClass(AbstractGetBinDataClass):
     """
     Concrete classes have to implement all abstract operations of the
-    baseclass. They can also override some
-    operations with a default implementation.
+    base class. They can also override some operations with a default
+    implementation.
     """
     def parse_data(self, page: str, **kwargs) -> dict:
-        uprn = kwargs.get("uprn")
-        check_uprn(uprn)
-        council = "CPL"
-        # Make SOAP request
+        user_uprn = kwargs.get("uprn")
+        postcode = kwargs.get("postcode")
+        check_uprn(user_uprn)
+        bindata = {"bins": []}
+        URI = "https://waste.cumberland.gov.uk/renderform?t=25&k=E43CEB1FB59F859833EF2D52B16F3F4EBE1CAB6A"
+        s = requests.Session()
+        # Make the GET request
+        response = s.get(URI)
+        # Make a BS4 object
+        soup = BeautifulSoup(response.content, features="html.parser")
+        # print(soup)
+        token = (soup.find("input", {"name": "__RequestVerificationToken"})).get(
+            "value"
+        )
+        formguid = (soup.find("input", {"name": "FormGuid"})).get("value")
+        # print(token)
+        # print(formguid)
         headers = {
-            "Content-Type": "text/xml; charset=UTF-8",
-            "Referer": "https://collections-copeland.azurewebsites.net/calendar.html",
-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
+            "Content-Type": "application/x-www-form-urlencoded",
+            "Origin": "https://waste.cumberland.gov.uk",
+            "Referer": "https://waste.cumberland.gov.uk/renderform?t=25&k=E43CEB1FB59F859833EF2D52B16F3F4EBE1CAB6A",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 OPR/98.0.0.0",
+            "X-Requested-With": "XMLHttpRequest",
         }
-        requests.packages.urllib3.disable_warnings()
-        post_data = (
-            '<?xml version="1.0" encoding="utf-8"?>'
-            '<soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">'
-            '<soap:Body><getRoundCalendarForUPRN xmlns="http://webaspx-collections.azurewebsites.net/">'
-            "<council>" + council + "</council><UPRN>" + uprn + "</UPRN>"
-            "<from>Chtml</from></getRoundCalendarForUPRN></soap:Body></soap:Envelope>"
-        )
-        response = requests.post(
-            "https://collections-copeland.azurewebsites.net/WSCollExternal.asmx",
+        payload = {
+            "__RequestVerificationToken": token,
+            "FormGuid": formguid,
+            "ObjectTemplateID": "25",
+            "Trigger": "submit",
+            "CurrentSectionID": "33",
+            "TriggerCtl": "",
+            "FF265": f"U{user_uprn}",
+            "FF265lbltxt": "Please select your address",
+            "FF265-text": postcode
+        }
+        # print(payload)
+        response = s.post(
+            "https://waste.cumberland.gov.uk/renderform/Form",
             headers=headers,
-            data=post_data,
+            data=payload,
+        )
+        soup = BeautifulSoup(response.content, features="html.parser")
+        for row in soup.find_all("div", class_="resirow"):
+            # Extract the type of collection (e.g., Recycling, Refuse)
+            collection_type_div = row.find("div", class_="col")
+            collection_type = (
+                collection_type_div.get("class")[1]
+                if collection_type_div
+                else "Unknown"
+            )
+            # Extract the collection date
+            date_div = row.find("div", style="width:360px;")
+            collection_date = date_div.text.strip() if date_div else "Unknown"
+            dict_data = {
+                "type": collection_type,
+                "collectionDate": datetime.strptime(
+                    collection_date, "%A %d %B %Y"
+                ).strftime(date_format),
+            }
+            bindata["bins"].append(dict_data)
+        bindata["bins"].sort(
+            key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
         )
-        if response.status_code != 200:
-            raise ValueError("No bin data found for provided UPRN.")
-        # Get HTML from SOAP response
-        xmltree = ElementTree.fromstring(response.text)
-        html = xmltree.find(
-            ".//{http://webaspx-collections.azurewebsites.net/}getRoundCalendarForUPRNResult"
-        ).text
-        # Parse with BS4
-        soup = BeautifulSoup(html, features="html.parser")
-        soup.prettify()
-        data = {"bins": []}
-        for bin_type in ["Refuse", "Recycling", "Garden"]:
-            bin_el = soup.find("b", string=bin_type)
-            if bin_el:
-                bin_info = bin_el.next_sibling.split(": ")[1]
-                collection_date = ""
-                results = re.search("([A-Za-z]+ \\d\\d? [A-Za-z]+) then", bin_info)
-                if results:
-                    if results[1] == "Today":
-                        date = datetime.now()
-                    elif results[1] == "Tomorrow":
-                        date = datetime.now() + timedelta(days=1)
-                    else:
-                        date = get_next_occurrence_from_day_month(
-                            datetime.strptime(
-                                results[1] + " " + datetime.now().strftime("%Y"),
-                                "%a %d %b %Y",
-                            )
-                        )
-                    if date:
-                        collection_date = date.strftime(date_format)
-                else:
-                    results2 = re.search("([A-Za-z]+) then", bin_info)
-                    if results2:
-                        if results2[1] == "Today":
-                            collection_date = datetime.now().strftime(date_format)
-                        elif results2[1] == "Tomorrow":
-                            collection_date = (
-                                datetime.now() + timedelta(days=1)
-                            ).strftime(date_format)
-                        else:
-                            collection_date = results2[1]
-                if collection_date != "":
-                    dict_data = {
-                        "type": bin_type,
-                        "collectionDate": collection_date,
-                    }
-                    data["bins"].append(dict_data)
-        return data
+        return bindata

uk_bin_collection/uk_bin_collection/councils/CotswoldDistrictCouncil.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import time
-from datetime import datetime
+import re
+from datetime import datetime, timedelta
 from bs4 import BeautifulSoup
 from selenium.webdriver.common.by import By
@@ -11,8 +12,6 @@ from selenium.webdriver.support.wait import WebDriverWait
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
-# import the wonderful Beautiful Soup and the URL grabber
 class CouncilClass(AbstractGetBinDataClass):
     """
@@ -30,7 +29,8 @@ class CouncilClass(AbstractGetBinDataClass):
             house_number = kwargs.get("paon")
             postcode = kwargs.get("postcode")
-            full_address = f"{house_number}, {postcode}"
+            # Use house_number as full address since it contains the complete address
+            full_address = house_number if house_number else f"{house_number}, {postcode}"
             web_driver = kwargs.get("web_driver")
             headless = kwargs.get("headless")
@@ -38,81 +38,205 @@ class CouncilClass(AbstractGetBinDataClass):
             driver = create_webdriver(web_driver, headless, None, __name__)
             driver.get(page)
-            # If you bang in the house number (or property name) and postcode in the box it should find your property
+            # Wait for page to load completely
             wait = WebDriverWait(driver, 60)
-            address_entry_field = wait.until(
-                EC.element_to_be_clickable((By.XPATH, '//*[@id="combobox-input-22"]'))
-            )
-            address_entry_field.send_keys(str(full_address))
-            address_entry_field = wait.until(
-                EC.element_to_be_clickable((By.XPATH, '//*[@id="combobox-input-22"]'))
-            )
-            address_entry_field.click()
-            address_entry_field.send_keys(Keys.BACKSPACE)
-            address_entry_field.send_keys(str(full_address[len(full_address) - 1]))
-            first_found_address = wait.until(
-                EC.element_to_be_clickable(
-                    (By.XPATH, '//*[@id="dropdown-element-22"]/ul')
-                )
-            )
-            first_found_address.click()
-            # Wait for the 'Select your property' dropdown to appear and select the first result
-            next_btn = wait.until(
-                EC.element_to_be_clickable((By.XPATH, "//lightning-button/button"))
-            )
-            next_btn.click()
-            bin_data = wait.until(
-                EC.presence_of_element_located(
-                    (By.XPATH, "//span[contains(text(), 'Container')]")
-                )
-            )
+            # Wait for the Salesforce Lightning page to be fully loaded
+            print("Waiting for Salesforce Lightning components to load...")
+            time.sleep(10)
+            # Wait for the address input field to be present
+            try:
+                wait.until(EC.presence_of_element_located((By.XPATH, "//label[contains(text(), 'Enter your address')]")))
+                print("Address label found")
+                time.sleep(5)  # Additional wait for the input field to be ready
+            except Exception as e:
+                print(f"Address label not found: {e}")
+            # Find the address input field using the label
+            try:
+                address_entry_field = driver.find_element(By.XPATH, "//label[contains(text(), 'Enter your address')]/following-sibling::*//input")
+                print("Found address input field using label xpath")
+            except Exception as e:
+                print(f"Could not find address input field: {e}")
+                raise Exception("Could not find address input field")
+            # Clear any existing text and enter the address
+            try:
+                address_entry_field.clear()
+                address_entry_field.send_keys(str(full_address))
+                print(f"Entered address: {full_address}")
+            except Exception as e:
+                print(f"Error entering address: {e}")
+                raise
+            # Click the input field again to trigger the dropdown
+            try:
+                address_entry_field.click()
+                print("Clicked input field to trigger dropdown")
+                time.sleep(3)  # Wait for dropdown to appear
+            except Exception as e:
+                print(f"Error clicking input field: {e}")
+            # Wait for and click the dropdown option
+            try:
+                dropdown_wait = WebDriverWait(driver, 10)
+                dropdown_option = dropdown_wait.until(EC.element_to_be_clickable((By.XPATH, "//li[@role='presentation']")))
+                dropdown_option.click()
+                print("Clicked dropdown option")
+                time.sleep(2)
+            except Exception as e:
+                print(f"Error clicking dropdown option: {e}")
+                raise
+            # Find and click the Next button
+            try:
+                next_wait = WebDriverWait(driver, 10)
+                next_button = next_wait.until(EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Next')]")))
+                next_button.click()
+                print("Clicked Next button")
+                time.sleep(5)  # Wait for the bin collection data to load
+            except Exception as e:
+                print(f"Error clicking Next button: {e}")
+                raise
+            # Wait for the bin collection data table to load
+            try:
+                table_wait = WebDriverWait(driver, 15)
+                table_wait.until(EC.presence_of_element_located((By.XPATH, "//span[contains(text(), 'Collection Day')]")))
+                print("Bin collection data table loaded")
+                time.sleep(3)
+            except Exception as e:
+                print(f"Bin collection table not found: {e}")
             soup = BeautifulSoup(driver.page_source, features="html.parser")
-            rows = soup.find_all("tr", class_="slds-hint-parent")
             current_year = datetime.now().year
+            # Try multiple approaches to find bin collection data
+            rows = []
+            # Try different table row selectors
+            table_selectors = [
+                "tr.slds-hint-parent",
+                "tr[class*='slds']",
+                "table tr",
+                ".slds-table tr",
+                "tbody tr"
+            ]
+            for selector in table_selectors:
+                rows = soup.select(selector)
+                if rows:
+                    break
+            # If no table rows found, try to find any elements containing collection info
+            if not rows:
+                # Look for any elements that might contain bin collection information
+                collection_elements = soup.find_all(text=re.compile(r'(bin|collection|waste|recycling)', re.I))
+                if collection_elements:
+                    # Try to extract information from the surrounding elements
+                    for element in collection_elements[:10]:  # Limit to first 10 matches
+                        parent = element.parent
+                        if parent:
+                            text = parent.get_text().strip()
+                            if text and len(text) > 10:  # Only consider substantial text
+                                # Try to extract date patterns
+                                date_patterns = re.findall(r'\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b|\b\d{1,2}\s+\w+\s+\d{4}\b', text)
+                                if date_patterns:
+                                    data["bins"].append({
+                                        "type": "General Collection",
+                                        "collectionDate": date_patterns[0]
+                                    })
+                                    break
+            # Process table rows if found
             for row in rows:
-                columns = row.find_all("td")
-                if columns:
-                    container_type = row.find("th").text.strip()
-                    if columns[0].get_text() == "Today":
-                        collection_day = datetime.now().strftime("%a, %d %B")
-                    elif columns[0].get_text() == "Tomorrow":
-                        collection_day = (datetime.now() + timedelta(days=1)).strftime(
-                            "%a, %d %B"
-                        )
-                    else:
-                        collection_day = re.sub(
-                            r"[^a-zA-Z0-9,\s]", "", columns[0].get_text()
-                        ).strip()
+                try:
+                    columns = row.find_all(["td", "th"])
+                    if len(columns) >= 2:
+                        # Try to identify container type and date
+                        container_type = "Unknown"
+                        collection_date = ""
+                        # Look for header cell (th) for container type
+                        th_element = row.find("th")
+                        if th_element:
+                            container_type = th_element.get_text().strip()
+                        elif columns:
+                            # If no th, use first column as type
+                            container_type = columns[0].get_text().strip()
+                        # Look for date in subsequent columns
+                        for col in columns[1:] if th_element else columns[1:]:
+                            col_text = col.get_text().strip()
+                            if col_text:
+                                if col_text.lower() == "today":
+                                    collection_date = datetime.now().strftime("%d/%m/%Y")
+                                    break
+                                elif col_text.lower() == "tomorrow":
+                                    collection_date = (datetime.now() + timedelta(days=1)).strftime("%d/%m/%Y")
+                                    break
+                                else:
+                                    # Try to parse various date formats
+                                    try:
+                                        # Clean the text
+                                        clean_text = re.sub(r"[^a-zA-Z0-9,\s/-]", "", col_text).strip()
+                                        # Try different date parsing approaches
+                                        date_formats = [
+                                            "%a, %d %B",
+                                            "%d %B %Y",
+                                            "%d/%m/%Y",
+                                            "%d-%m-%Y",
+                                            "%B %d, %Y"
+                                        ]
+                                        for fmt in date_formats:
+                                            try:
+                                                parsed_date = datetime.strptime(clean_text, fmt)
+                                                if fmt == "%a, %d %B":  # Add year if missing
+                                                    if parsed_date.replace(year=current_year) < datetime.now():
+                                                        parsed_date = parsed_date.replace(year=current_year + 1)
+                                                    else:
+                                                        parsed_date = parsed_date.replace(year=current_year)
+                                                collection_date = parsed_date.strftime("%d/%m/%Y")
+                                                break
+                                            except ValueError:
+                                                continue
+                                        if collection_date:
+                                            break
+                                    except Exception:
+                                        continue
+                        # Add to data if we have both type and date
+                        if container_type and collection_date and container_type.lower() != "unknown":
+                            data["bins"].append({
+                                "type": container_type,
+                                "collectionDate": collection_date
+                            })
+                except Exception as e:
+                    print(f"Error processing row: {e}")
+                    continue
+            # If no data found, add a debug entry
+            if not data["bins"]:
+                print("No bin collection data found. Page source:")
+                print(driver.page_source[:1000])  # Print first 1000 chars for debugging
-                    # Parse the date from the string
-                    parsed_date = datetime.strptime(collection_day, "%a, %d %B")
-                    if parsed_date < datetime(
-                        parsed_date.year, parsed_date.month, parsed_date.day
-                    ):
-                        parsed_date = parsed_date.replace(year=current_year + 1)
-                    else:
-                        parsed_date = parsed_date.replace(year=current_year)
-                    # Format the date as %d/%m/%Y
-                    formatted_date = parsed_date.strftime("%d/%m/%Y")
-                    # Add the bin type and collection date to the 'data' dictionary
-                    data["bins"].append(
-                        {"type": container_type, "collectionDate": formatted_date}
-                    )
         except Exception as e:
             # Here you can log the exception if needed
             print(f"An error occurred: {e}")
+            print(f"Full address used: {full_address}")
+            print(f"Page URL: {page}")
+            # Add some debug information
+            if driver:
+                print(f"Current page title: {driver.title}")
+                print(f"Current URL: {driver.current_url}")
             # Optionally, re-raise the exception if you want it to propagate
             raise
         finally:
             # This block ensures that the driver is closed regardless of an exception
             if driver:
                 driver.quit()
-        return data
+        return data

uk_bin_collection/uk_bin_collection/councils/CoventryCityCouncil.py CHANGED Viewed

@@ -18,6 +18,10 @@ class CouncilClass(AbstractGetBinDataClass):
         bindata = {"bins": []}
         curr_date = datetime.today()
+        headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+        }
         soup = BeautifulSoup(page.content, features="html.parser")
         button = soup.find(
@@ -25,10 +29,10 @@ class CouncilClass(AbstractGetBinDataClass):
             text="Find out which bin will be collected when and sign up for a free email reminder.",
         )
-        if button["href"]:
+        if button and button.get("href"):
             URI = button["href"]
             # Make the GET request
-            response = requests.get(URI)
+            response = requests.get(URI, headers=headers)
             soup = BeautifulSoup(response.content, features="html.parser")
             divs = soup.find_all("div", {"class": "editor"})
             for div in divs:

uk_bin_collection/uk_bin_collection/councils/EastHertsCouncil.py CHANGED Viewed

@@ -1,7 +1,6 @@
-from bs4 import BeautifulSoup
-from selenium.webdriver.common.by import By
-from selenium.webdriver.support import expected_conditions as EC
-from selenium.webdriver.support.wait import WebDriverWait
+import json
+import requests
+from datetime import datetime
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
@@ -15,116 +14,28 @@ class CouncilClass(AbstractGetBinDataClass):
     """
     def parse_data(self, page: str, **kwargs) -> dict:
-        # Get and check UPRN
-        driver = None
-        try:
-            user_postcode = kwargs.get("postcode")
-            user_paon = kwargs.get("paon")
-            check_paon(user_paon)
-            check_postcode(user_postcode)
-            web_driver = kwargs.get("web_driver")
-            headless = kwargs.get("headless")
-            bindata = {"bins": []}
-            API_URL = "https://uhte-wrp.whitespacews.com"
-            # Create Selenium webdriver
-            driver = create_webdriver(web_driver, headless, None, __name__)
-            driver.get(API_URL)
-            # Click Find my bin collection day button
-            collectionButton = WebDriverWait(driver, 10).until(
-                EC.element_to_be_clickable((By.LINK_TEXT, "Find my bin collection day"))
-            )
-            collectionButton.click()
-            main_content = WebDriverWait(driver, 10).until(
-                EC.presence_of_element_located((By.ID, "main-content"))
-            )
-            # Wait for the property number field to appear then populate it
-            inputElement_number = WebDriverWait(driver, 10).until(
-                EC.element_to_be_clickable(
-                    (
-                        By.ID,
-                        "address_name_number",
-                    )
-                )
-            )
-            inputElement_number.send_keys(user_paon)
-            # Wait for the postcode field to appear then populate it
-            inputElement_postcode = WebDriverWait(driver, 10).until(
-                EC.element_to_be_clickable(
-                    (
-                        By.ID,
-                        "address_postcode",
-                    )
-                )
-            )
-            inputElement_postcode.send_keys(user_postcode)
-            # Click search button
-            continueButton = WebDriverWait(driver, 10).until(
-                EC.element_to_be_clickable(
-                    (
-                        By.ID,
-                        "Submit",
-                    )
-                )
-            )
-            continueButton.click()
-            # Wait for the 'Search Results' to appear and select the first result
-            property = WebDriverWait(driver, 10).until(
-                EC.element_to_be_clickable(
-                    (
-                        By.CSS_SELECTOR,
-                        "li.app-subnav__section-item a",
-                        # "app-subnav__link govuk-link clicker colordarkblue fontfamilyArial fontsize12rem",
-                        # "//a[starts-with(@aria-label, '{user_paon}')]",
-                    )
-                )
-            )
-            property.click()
-            upcoming_scheduled_collections = WebDriverWait(driver, 10).until(
-                EC.presence_of_element_located(
-                    (By.ID, "upcoming-scheduled-collections")
-                )
-            )
-            soup = BeautifulSoup(driver.page_source, features="html.parser")
-            collections = []
-            for collection in soup.find_all(
-                "u1",
-                class_="displayinlineblock justifycontentleft alignitemscenter margin0 padding0",
-            ):
-                date = collection.find(
-                    "p", string=lambda text: text and "/" in text
-                ).text.strip()  # Extract date
-                service = collection.find(
-                    "p", string=lambda text: text and "Collection Service" in text
-                ).text.strip()  # Extract service type
-                collections.append({"date": date, "service": service})
-            # Print the parsed data
-            for item in collections:
-                dict_data = {
-                    "type": item["service"],
-                    "collectionDate": item["date"],
-                }
-                bindata["bins"].append(dict_data)
-        except Exception as e:
-            # Here you can log the exception if needed
-            print(f"An error occurred: {e}")
-            # Optionally, re-raise the exception if you want it to propagate
-            raise
-        finally:
-            # This block ensures that the driver is closed regardless of an exception
-            if driver:
-                driver.quit()
+        user_uprn = kwargs.get("uprn")
+        check_uprn(user_uprn)
+        bindata = {"bins": []}
+        # Make API request
+        api_url = f"https://east-herts.co.uk/api/services/{user_uprn}"
+        response = requests.get(api_url)
+        response.raise_for_status()
+        data = response.json()
+        today = datetime.now().date()
+        for service in data.get("services", []):
+            collection_date_str = service.get("collectionDate")
+            if collection_date_str:
+                collection_date = datetime.strptime(collection_date_str, "%Y-%m-%d").date()
+                # Only include future dates
+                if collection_date >= today:
+                    dict_data = {
+                        "type": service.get("binType", ""),
+                        "collectionDate": collection_date.strftime("%d/%m/%Y"),
+                    }
+                    bindata["bins"].append(dict_data)
         return bindata

uk_bin_collection 0.152.8__py3-none-any.whl → 0.152.10__py3-none-any.whl

uk_bin_collection 0.152.8py3-none-any.whl → 0.152.10py3-none-any.whl