PyPI - uk_bin_collection - Versions diffs - 0.152.11__py3-none-any.whl → 0.154.0__py3-none-any.whl - Mend

uk_bin_collection 0.152.11py3-none-any.whl → 0.154.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py CHANGED Viewed

@@ -1,17 +1,17 @@
 import time
+import datetime
+from datetime import datetime
 from bs4 import BeautifulSoup
 from selenium.common.exceptions import TimeoutException
 from selenium.webdriver.common.by import By
+from selenium.webdriver.common.keys import Keys
 from selenium.webdriver.support import expected_conditions as EC
-from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support.ui import Select, WebDriverWait
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
-# import the wonderful Beautiful Soup and the URL grabber
 class CouncilClass(AbstractGetBinDataClass):
     """
     Concrete classes have to implement all abstract operations of the
@@ -30,16 +30,18 @@ class CouncilClass(AbstractGetBinDataClass):
     def parse_data(self, page: str, **kwargs) -> dict:
         driver = None
         try:
-            page = "https://www.northumberland.gov.uk/Waste/Household-waste/Household-bin-collections/Bin-Calendars.aspx"
+            page = "https://bincollection.northumberland.gov.uk/postcode"
             data = {"bins": []}
-            user_paon = kwargs.get("paon")
             user_postcode = kwargs.get("postcode")
+            user_uprn = kwargs.get("uprn")
+            check_postcode(user_postcode)
+            check_uprn(user_uprn)
             web_driver = kwargs.get("web_driver")
             headless = kwargs.get("headless")
-            check_paon(user_paon)
-            check_postcode(user_postcode)
             # Create Selenium webdriver
             driver = create_webdriver(web_driver, headless, None, __name__)
@@ -50,105 +52,87 @@ class CouncilClass(AbstractGetBinDataClass):
             # Wait for and click cookie button
             cookie_button = wait.until(
-                EC.element_to_be_clickable((By.ID, "ccc-notify-accept"))
+                EC.element_to_be_clickable(
+                    (By.CLASS_NAME, "accept-all")
+                )
             )
             cookie_button.click()
-            # Wait for and find house number input
-            inputElement_hn = wait.until(
+            # Wait for and find postcode input
+            inputElement_pc = wait.until(
                 EC.presence_of_element_located(
-                    (
-                        By.ID,
-                        "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_txtHouse",
-                    )
+                    (By.ID, "postcode")
                 )
             )
-            # Wait for and find postcode input
-            inputElement_pc = wait.until(
+            # Enter postcode and submit
+            inputElement_pc.send_keys(user_postcode)
+            inputElement_pc.send_keys(Keys.ENTER)
+            # Wait for and find house number input
+            selectElement_address = wait.until(
                 EC.presence_of_element_located(
-                    (
-                        By.ID,
-                        "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_txtPostcode",
-                    )
+                    (By.ID, "address")
                 )
             )
-            # Enter details
-            inputElement_pc.send_keys(user_postcode)
-            inputElement_hn.send_keys(user_paon)
+            dropdown = Select(selectElement_address)
+            dropdown.select_by_value(user_uprn)
-            # Click lookup button and wait for results
-            lookup_button = wait.until(
+            # Click submit button and wait for results
+            submit_button = wait.until(
                 EC.element_to_be_clickable(
-                    (
-                        By.ID,
-                        "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_butLookup",
-                    )
+                    (By.CLASS_NAME, "govuk-button")
                 )
             )
-            lookup_button.click()
+            submit_button.click()
             # Wait for results to load
             route_summary = wait.until(
                 EC.presence_of_element_located(
-                    (
-                        By.ID,
-                        "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_spanRouteSummary",
-                    )
+                    (By.CLASS_NAME, "govuk-table")
                 )
             )
+            now = datetime.now()
+            current_month = now.month
+            current_year = now.year
             # Get page source after everything has loaded
             soup = BeautifulSoup(driver.page_source, features="html.parser")
-            # Work out which bins can be collected for this address. Glass bins are only on some houses due to pilot programme.
-            bins_collected = list(
-                map(
-                    str.strip,
-                    soup.find(
-                        "span",
-                        id="p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_spanRouteSummary",
-                    )
-                    .text.replace("Routes found: ", "")
-                    .split(","),
+            # From the table, find all rows:
+            # - cell 1 is the date in format eg. 9 September (so no year value 🥲)
+            # - cell 2 is the day name, not useful
+            # - cell 3 is the bin type eg. "General waste", "Recycling", "Garden waste"
+            rows = soup.find("tbody", class_="govuk-table__body").find_all("tr", class_="govuk-table__row")
+            for row in rows:
+                bin_type=row.find_all("td")[-1].text.strip()
+                collection_date_string = row.find('th').text.strip()
+                # sometimes but not always the day is written "22nd" instead of 22 so make sure we get a proper int
+                collection_date_day = "".join([i for i in list(collection_date_string.split(" ")[0]) if i.isdigit()])
+                collection_date_month_name = collection_date_string.split(" ")[1]
+                # if we are currently in Oct, Nov, or Dec and the collection month is Jan, Feb, or Mar, let's assume its next year
+                if (current_month >= 10) and (collection_date_month_name in ["January", "February", "March"]):
+                    collection_date_year = current_year + 1
+                else:
+                    collection_date_year = current_year
+                collection_date = time.strptime(
+                    f"{collection_date_day} {collection_date_month_name} {collection_date_year}", "%d %B %Y"
                 )
-            )
-            # Get the background colour for each of them...
-            bins_by_colours = dict()
-            for bin in bins_collected:
-                if "(but no dates found)" in bin:
-                    continue
-                style_str = soup.find("span", string=bin)["style"]
-                bin_colour = self.extract_styles(style_str)["background-color"].upper()
-                bins_by_colours[bin_colour] = bin
-            # Work through the tables gathering the dates, if the cell has a background colour - match it to the bin type.
-            calander_tables = soup.find_all("table", title="Calendar")
-            for table in calander_tables:
-                # Get month and year
-                # First row in table is the header
-                rows = table.find_all("tr")
-                month_and_year = (
-                    rows[0].find("table", class_="calCtrlTitle").find("td").string
+                # Add it to the data
+                data["bins"].append(
+                    {
+                        "type": bin_type,
+                        "collectionDate": time.strftime(date_format, collection_date),
+                    }
                 )
-                bin_days = table.find_all("td", class_="calCtrlDay")
-                for day in bin_days:
-                    day_styles = self.extract_styles(day["style"])
-                    if "background-color" in day_styles:
-                        colour = day_styles["background-color"].upper()
-                        date = time.strptime(
-                            f"{day.string} {month_and_year}", "%d %B %Y"
-                        )
-                        # Add it to the data
-                        data["bins"].append(
-                            {
-                                "type": bins_by_colours[colour],
-                                "collectionDate": time.strftime(date_format, date),
-                            }
-                        )
         except Exception as e:
             # Here you can log the exception if needed
             print(f"An error occurred: {e}")

uk_bin_collection/uk_bin_collection/councils/NorwichCityCouncil.py CHANGED Viewed

@@ -1,5 +1,3 @@
-import time
 import requests
 from bs4 import BeautifulSoup
@@ -17,76 +15,79 @@ class CouncilClass(AbstractGetBinDataClass):
     def parse_data(self, page: str, **kwargs) -> dict:
-        user_uprn = kwargs.get("uprn")
-        check_uprn(user_uprn)
+        user_postcode = kwargs.get("postcode")
+        user_paon = kwargs.get("paon")
+        check_postcode(user_postcode)
+        check_paon(user_paon)
         bindata = {"bins": []}
-        API_URL = "https://maps.norwich.gov.uk/arcgis/rest/services/MyNorwich/PropertyDetails/FeatureServer/2/query"
-        params = {
-            "f": "json",
-            "where": f"UPRN='{user_uprn}' or UPRN='0{user_uprn}'",
-            "returnGeometry": "true",
-            "spatialRel": "esriSpatialRelIntersects",
-            "geometryType": "esriGeometryPolygon",
-            "inSR": "4326",
-            "outFields": "*",
-            "outSR": "4326",
-            "resultRecordCount": "1000",
+        URI = "https://bnr-wrp.whitespacews.com/"
+        session = requests.Session()
+        # get link from first page as has some kind of unique hash
+        r = session.get(
+            URI,
+        )
+        r.raise_for_status()
+        soup = BeautifulSoup(r.text, features="html.parser")
+        alink = soup.find("a", text="View my collections")
+        if alink is None:
+            raise Exception("Initial page did not load correctly")
+        # greplace 'seq' query string to skip next step
+        nextpageurl = alink["href"].replace("seq=1", "seq=2")
+        data = {
+            "address_name_number": user_paon,
+            "address_postcode": user_postcode,
         }
-        r = requests.get(API_URL, params=params)
-        data = r.json()
-        data = data["features"][0]["attributes"]["WasteCollectionHtml"]
-        soup = BeautifulSoup(data, "html.parser")
-        alternateCheck = soup.find("p")
-        if alternateCheck.text.__contains__("alternate"):
-            alternateCheck = True
-        else:
-            alternateCheck = False
-        strong = soup.find_all("strong")
-        collections = []
-        if alternateCheck:
-            bin_types = strong[2].text.strip().replace(".", "").split(" and ")
-            for bin in bin_types:
-                collections.append(
-                    (
-                        bin.capitalize(),
-                        datetime.strptime(strong[1].text.strip(), date_format),
-                    )
-                )
-        else:
-            p_tag = soup.find_all("p")
-            i = 1
-            for p in p_tag:
-                bin_types = (
-                    p.text.split("Your ")[1].split(" is collected")[0].split(" and ")
-                )
-                for bin in bin_types:
-                    collections.append(
-                        (
-                            bin.capitalize(),
-                            datetime.strptime(strong[1].text.strip(), date_format),
-                        )
-                    )
-                i += 2
-        if len(strong) > 3:
-            collections.append(
-                ("Garden", datetime.strptime(strong[4].text.strip(), date_format))
-            )
-        ordered_data = sorted(collections, key=lambda x: x[1])
-        for item in ordered_data:
+        # get list of addresses
+        r = session.post(nextpageurl, data)
+        r.raise_for_status()
+        soup = BeautifulSoup(r.text, features="html.parser")
+        # get first address (if you don't enter enough argument values this won't find the right address)
+        alink = soup.find("div", id="property_list").find("a")
+        if alink is None:
+            raise Exception("Address not found")
+        nextpageurl = URI + alink["href"]
+        # get collection page
+        r = session.get(
+            nextpageurl,
+        )
+        r.raise_for_status()
+        soup = BeautifulSoup(r.text, features="html.parser")
+        if soup.find("span", id="waste-hint"):
+            raise Exception("No scheduled services at this address")
+        u1s = soup.find("section", id="scheduled-collections").find_all("u1")
+        for u1 in u1s:
+            lis = u1.find_all("li", recursive=False)
+            date = lis[1].text.replace("\n", "")
+            bin_type = lis[2].text.replace("\n", "")
             dict_data = {
-                "type": item[0] + " bin",
-                "collectionDate": item[1].strftime(date_format),
+                "type": bin_type,
+                "collectionDate": datetime.strptime(
+                    date,
+                    "%d/%m/%Y",
+                ).strftime(date_format),
             }
             bindata["bins"].append(dict_data)
+        bindata["bins"].sort(
+            key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
+        )
         return bindata

uk_bin_collection/uk_bin_collection/councils/NuneatonBedworthBoroughCouncil.py CHANGED Viewed

@@ -1,23 +1,29 @@
+import re
+import urllib.parse
+import requests
 from bs4 import BeautifulSoup
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
-from bs4 import BeautifulSoup
-import urllib.parse
-import requests
-import re
 class CouncilClass(AbstractGetBinDataClass):
     def parse_data(self, page: str, **kwargs) -> dict:
         data = {"bins": []}
+        headers = {
+            "Origin": "https://www.nuneatonandbedworth.gov.uk/",
+            "Referer": "https://www.nuneatonandbedworth.gov.uk/",
+            "User-Agent": "Mozilla/5.0",
+        }
         street = urllib.parse.quote_plus(kwargs.get("paon"))
         base_url = "https://www.nuneatonandbedworth.gov.uk/"
         search_query = f"directory/search?directoryID=3&showInMap=&keywords={street}&search=Search+directory"
-        search_response = requests.get(base_url + search_query)
+        search_response = requests.get(base_url + search_query, headers=headers)
         if search_response.status_code == 200:
             soup = BeautifulSoup(search_response.content, "html.parser")
@@ -56,7 +62,13 @@ class CouncilClass(AbstractGetBinDataClass):
     def get_bin_data(self, url) -> dict:
-        bin_day_response = requests.get(url)
+        headers = {
+            "Origin": "https://www.nuneatonandbedworth.gov.uk/",
+            "Referer": "https://www.nuneatonandbedworth.gov.uk/",
+            "User-Agent": "Mozilla/5.0",
+        }
+        bin_day_response = requests.get(url, headers=headers)
         if bin_day_response.status_code == 200:

uk_bin_collection/uk_bin_collection/councils/RunnymedeBoroughCouncil.py CHANGED Viewed

@@ -21,10 +21,16 @@ class CouncilClass(AbstractGetBinDataClass):
         check_uprn(user_uprn)
         bindata = {"bins": []}
+        headers = {
+            "Origin": "https://www.runnymede.gov.uk",
+            "Referer": "https://www.runnymede.gov.uk",
+            "User-Agent": "Mozilla/5.0",
+        }
         URI = f"https://www.runnymede.gov.uk/homepage/150/check-your-bin-collection-day?address={user_uprn}"
         # Make the GET request
-        response = requests.get(URI)
+        response = requests.get(URI, headers=headers)
         soup = BeautifulSoup(response.text, "html.parser")

uk_bin_collection/uk_bin_collection/councils/RushmoorCouncil.py CHANGED Viewed

@@ -1,7 +1,8 @@
 from bs4 import BeautifulSoup
+from lxml import etree
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
-from lxml import etree
 # import the wonderful Beautiful Soup and the URL grabber
@@ -20,7 +21,8 @@ class CouncilClass(AbstractGetBinDataClass):
         collections = []
         # Convert the XML to JSON and load the next collection data
-        result = soup.find("p").contents[0].text.replace("\\", "")[1:-1]
+        result = soup.find("p").contents[0]
         json_data = json.loads(result)["NextCollection"]
         # Get general waste data

uk_bin_collection/uk_bin_collection/councils/SandwellBoroughCouncil.py CHANGED Viewed

@@ -28,17 +28,10 @@ class CouncilClass(AbstractGetBinDataClass):
         "Referer": "https://my.sandwell.gov.uk/fillform/?iframe_id=fillform-frame-1&db_id=",
     }
     LOOKUPS = [
-        (
-            "58a1a71694992",
-            "DWDate",
-            [
-                "Recycling (Blue)",
-                "Household Waste (Grey)",
-                "Food Waste (Brown)",
-                "Batteries",
-            ],
-        ),
-        ("56b1cdaf6bb43", "GWDate", ["Garden Waste (Green)"]),
+        ("686295a88a750", "GWDate", ["Garden Waste (Green)"]),
+        ("686294de50729", "DWDate", ["Household Waste (Grey)"]),
+        ("6863a78a1dd8e", "FWDate", ["Food Waste (Brown)"]),
+        ("68629dd642423", "MDRDate", ["Recycling (Blue)"]),
     ]
     def parse_data(self, page: str, **kwargs) -> dict:

uk_bin_collection/uk_bin_collection/councils/SloughBoroughCouncil.py CHANGED Viewed

@@ -1,15 +1,18 @@
-import time
 import re
-import requests
+import time
 from datetime import datetime
+import requests
 from bs4 import BeautifulSoup
 from selenium.webdriver.common.by import By
 from selenium.webdriver.common.keys import Keys
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.support.ui import WebDriverWait
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
 def get_street_from_postcode(postcode: str, api_key: str) -> str:
     url = "https://maps.googleapis.com/maps/api/geocode/json"
     params = {"address": postcode, "key": api_key}
@@ -25,6 +28,7 @@ def get_street_from_postcode(postcode: str, api_key: str) -> str:
     raise ValueError("No street (route) found in the response.")
 class CouncilClass(AbstractGetBinDataClass):
     def parse_data(self, page: str, **kwargs) -> dict:
         driver = None
@@ -37,10 +41,10 @@ class CouncilClass(AbstractGetBinDataClass):
             headless = kwargs.get("headless")
             web_driver = kwargs.get("web_driver")
-            driver = create_webdriver(web_driver, headless, None, __name__)
+            UserAgent = "Mozilla/5.0"
+            driver = create_webdriver(web_driver, headless, UserAgent, __name__)
             page = "https://www.slough.gov.uk/bin-collections"
             driver.get(page)
             # Accept cookies
             WebDriverWait(driver, 10).until(
                 EC.element_to_be_clickable((By.ID, "ccc-recommended-settings"))
@@ -50,14 +54,20 @@ class CouncilClass(AbstractGetBinDataClass):
             address_input = WebDriverWait(driver, 10).until(
                 EC.presence_of_element_located((By.ID, "keyword_directory25"))
             )
-            user_address = get_street_from_postcode(user_postcode, "AIzaSyBDLULT7EIlNtHerswPtfmL15Tt3Oc0bV8")
+            user_address = get_street_from_postcode(
+                user_postcode, "AIzaSyBDLULT7EIlNtHerswPtfmL15Tt3Oc0bV8"
+            )
             address_input.send_keys(user_address + Keys.ENTER)
             # Wait for address results to load
             WebDriverWait(driver, 10).until(
-                EC.presence_of_all_elements_located((By.CSS_SELECTOR, "span.list__link-text"))
+                EC.presence_of_all_elements_located(
+                    (By.CSS_SELECTOR, "span.list__link-text")
+                )
+            )
+            span_elements = driver.find_elements(
+                By.CSS_SELECTOR, "span.list__link-text"
             )
-            span_elements = driver.find_elements(By.CSS_SELECTOR, "span.list__link-text")
             for span in span_elements:
                 if user_address.lower() in span.text.lower():
@@ -68,7 +78,9 @@ class CouncilClass(AbstractGetBinDataClass):
             # Wait for address detail page
             WebDriverWait(driver, 10).until(
-                EC.presence_of_element_located((By.CSS_SELECTOR, "section.site-content"))
+                EC.presence_of_element_located(
+                    (By.CSS_SELECTOR, "section.site-content")
+                )
             )
             soup = BeautifulSoup(driver.page_source, "html.parser")
@@ -86,28 +98,33 @@ class CouncilClass(AbstractGetBinDataClass):
                             bin_url = "https://www.slough.gov.uk" + bin_url
                         # Visit the child page
-                        print(f"Navigating to {bin_url}")
+                        # print(f"Navigating to {bin_url}")
                         driver.get(bin_url)
                         WebDriverWait(driver, 10).until(
-                            EC.presence_of_element_located((By.CSS_SELECTOR, "div.page-content"))
+                            EC.presence_of_element_located(
+                                (By.CSS_SELECTOR, "div.page-content")
+                            )
                         )
                         child_soup = BeautifulSoup(driver.page_source, "html.parser")
                         editor_div = child_soup.find("div", class_="editor")
                         if not editor_div:
-                            print("No editor div found on bin detail page.")
+                            # print("No editor div found on bin detail page.")
                             continue
                         ul = editor_div.find("ul")
                         if not ul:
-                            print("No <ul> with dates found in editor div.")
+                            # print("No <ul> with dates found in editor div.")
                             continue
                     for li in ul.find_all("li"):
                         raw_text = li.get_text(strip=True).replace(".", "")
-                        if "no collection" in raw_text.lower() or "no collections" in raw_text.lower():
-                            print(f"Ignoring non-collection note: {raw_text}")
+                        if (
+                            "no collection" in raw_text.lower()
+                            or "no collections" in raw_text.lower()
+                        ):
+                            # print(f"Ignoring non-collection note: {raw_text}")
                             continue
                         raw_date = raw_text
@@ -117,19 +134,20 @@ class CouncilClass(AbstractGetBinDataClass):
                         except ValueError:
                             raw_date_cleaned = raw_date.split("(")[0].strip()
                             try:
-                                parsed_date = datetime.strptime(raw_date_cleaned, "%d %B %Y")
+                                parsed_date = datetime.strptime(
+                                    raw_date_cleaned, "%d %B %Y"
+                                )
                             except Exception:
                                 print(f"Could not parse date: {raw_text}")
                                 continue
                         formatted_date = parsed_date.strftime("%d/%m/%Y")
                         contains_date(formatted_date)
-                        bin_data["bins"].append({
-                            "type": bin_type,
-                            "collectionDate": formatted_date
-                        })
+                        bin_data["bins"].append(
+                            {"type": bin_type, "collectionDate": formatted_date}
+                        )
-                        print(f"Type: {bin_type}, Date: {formatted_date}")
+                        # print(f"Type: {bin_type}, Date: {formatted_date}")
         except Exception as e:
             print(f"An error occurred: {e}")
@@ -137,4 +155,4 @@ class CouncilClass(AbstractGetBinDataClass):
         finally:
             if driver:
                 driver.quit()
-        return bin_data
+        return bin_data

uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py CHANGED Viewed

@@ -77,6 +77,10 @@ class CouncilClass(AbstractGetBinDataClass):
         )
         submit.click()
+        WebDriverWait(driver, 10).until(
+            EC.presence_of_element_located((By.CLASS_NAME, "bin-collection__month"))
+        )
         soup = BeautifulSoup(driver.page_source, features="html.parser")
         # Quit Selenium webdriver to release session

uk_bin_collection 0.152.11__py3-none-any.whl → 0.154.0__py3-none-any.whl

uk_bin_collection 0.152.11py3-none-any.whl → 0.154.0py3-none-any.whl