uk_bin_collection 0.152.8__py3-none-any.whl → 0.152.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. uk_bin_collection/tests/input.json +11 -15
  2. uk_bin_collection/uk_bin_collection/councils/AngusCouncil.py +69 -46
  3. uk_bin_collection/uk_bin_collection/councils/BCPCouncil.py +119 -37
  4. uk_bin_collection/uk_bin_collection/councils/BarnetCouncil.py +158 -115
  5. uk_bin_collection/uk_bin_collection/councils/BasildonCouncil.py +87 -66
  6. uk_bin_collection/uk_bin_collection/councils/BlabyDistrictCouncil.py +5 -1
  7. uk_bin_collection/uk_bin_collection/councils/BlaenauGwentCountyBoroughCouncil.py +91 -66
  8. uk_bin_collection/uk_bin_collection/councils/BroxbourneCouncil.py +88 -67
  9. uk_bin_collection/uk_bin_collection/councils/BuckinghamshireCouncil.py +67 -56
  10. uk_bin_collection/uk_bin_collection/councils/ChelmsfordCityCouncil.py +63 -95
  11. uk_bin_collection/uk_bin_collection/councils/CherwellDistrictCouncil.py +39 -18
  12. uk_bin_collection/uk_bin_collection/councils/ChorleyCouncil.py +106 -97
  13. uk_bin_collection/uk_bin_collection/councils/CopelandBoroughCouncil.py +80 -75
  14. uk_bin_collection/uk_bin_collection/councils/CotswoldDistrictCouncil.py +191 -67
  15. uk_bin_collection/uk_bin_collection/councils/CoventryCityCouncil.py +6 -2
  16. uk_bin_collection/uk_bin_collection/councils/SouthwarkCouncil.py +23 -1
  17. uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py +4 -1
  18. uk_bin_collection/uk_bin_collection/get_bin_data.py +1 -1
  19. {uk_bin_collection-0.152.8.dist-info → uk_bin_collection-0.152.9.dist-info}/METADATA +1 -1
  20. {uk_bin_collection-0.152.8.dist-info → uk_bin_collection-0.152.9.dist-info}/RECORD +23 -24
  21. uk_bin_collection/uk_bin_collection/councils/AylesburyValeCouncil.py +0 -69
  22. {uk_bin_collection-0.152.8.dist-info → uk_bin_collection-0.152.9.dist-info}/LICENSE +0 -0
  23. {uk_bin_collection-0.152.8.dist-info → uk_bin_collection-0.152.9.dist-info}/WHEEL +0 -0
  24. {uk_bin_collection-0.152.8.dist-info → uk_bin_collection-0.152.9.dist-info}/entry_points.txt +0 -0
@@ -1,127 +1,95 @@
1
- # This script pulls (in one hit) the data from Bromley Council Bins Data
2
- import datetime
3
1
  import re
4
2
  import time
5
- from datetime import datetime
6
-
3
+ from datetime import datetime, timedelta
7
4
  import requests
8
5
  from bs4 import BeautifulSoup
9
6
  from selenium.webdriver.common.by import By
10
- from selenium.webdriver.common.keys import Keys
11
7
  from selenium.webdriver.support import expected_conditions as EC
12
- from selenium.webdriver.support.ui import Select
13
8
  from selenium.webdriver.support.wait import WebDriverWait
9
+ from icalevents.icalevents import events
14
10
 
15
11
  from uk_bin_collection.uk_bin_collection.common import *
16
12
  from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
17
13
 
18
-
19
- # import the wonderful Beautiful Soup and the URL grabber
20
14
  class CouncilClass(AbstractGetBinDataClass):
21
- """
22
- Concrete classes have to implement all abstract operations of the
23
- base class. They can also override some operations with a default
24
- implementation.
25
- """
26
-
27
15
  def parse_data(self, page: str, **kwargs) -> dict:
28
16
  driver = None
29
17
  try:
30
18
  data = {"bins": []}
31
19
  headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64)"}
32
-
33
- uprn = kwargs.get("uprn")
20
+
34
21
  postcode = kwargs.get("postcode")
35
22
  user_paon = kwargs.get("paon")
36
23
  web_driver = kwargs.get("web_driver")
37
24
  headless = kwargs.get("headless")
25
+
38
26
  driver = create_webdriver(web_driver, headless, None, __name__)
39
- url = kwargs.get("url")
40
-
41
- driver.execute_script(f"window.location.href='{url}'")
42
-
43
- wait = WebDriverWait(driver, 120)
44
- post_code_search = wait.until(
45
- EC.presence_of_element_located((By.XPATH, '//input[@name="keyword"]'))
46
- )
47
-
48
- post_code_search.send_keys(postcode)
49
-
50
- submit_btn = wait.until(
51
- EC.presence_of_element_located((By.CLASS_NAME, "__submitButton"))
52
- )
53
-
54
- submit_btn.send_keys(Keys.ENTER)
55
-
56
- address_results = wait.until(
57
- EC.presence_of_element_located((By.CLASS_NAME, "directories-table"))
58
- )
59
- address_link = wait.until(
60
- EC.presence_of_element_located(
61
- (By.XPATH, f"//a[contains(text(), '{user_paon}')]")
27
+ wait = WebDriverWait(driver, 30)
28
+
29
+ # Navigate to bin collection page
30
+ driver.get("https://www.chelmsford.gov.uk/bins-and-recycling/check-your-collection-day/")
31
+
32
+ # Handle cookie overlay
33
+ try:
34
+ accept_btn = wait.until(
35
+ EC.element_to_be_clickable((By.XPATH, "//*[contains(text(), 'ACCEPT')]"))
62
36
  )
37
+ accept_btn.click()
38
+ time.sleep(1)
39
+ except:
40
+ pass
41
+
42
+ # Find postcode input field (dynamic ID)
43
+ postcode_input = wait.until(
44
+ EC.presence_of_element_located((By.XPATH, "//input[contains(@id, '_keyword')]"))
63
45
  )
64
-
65
- address_link.send_keys(Keys.ENTER)
66
- results = wait.until(
67
- EC.presence_of_element_located((By.CLASS_NAME, "usercontent"))
46
+ postcode_input.clear()
47
+ postcode_input.send_keys(postcode)
48
+
49
+ # Click search button
50
+ submit_btn = wait.until(
51
+ EC.element_to_be_clickable((By.CLASS_NAME, "__submitButton"))
68
52
  )
69
-
70
- # Make a BS4 object
53
+ submit_btn.click()
54
+
55
+ # Wait for results table
56
+ wait.until(EC.presence_of_element_located((By.TAG_NAME, "table")))
57
+
58
+ # Get the collection round from the table row
71
59
  soup = BeautifulSoup(driver.page_source, features="html.parser")
72
- soup.prettify()
73
-
74
- # Get collection calendar
75
- calendar_urls = soup.find_all(
76
- "a", string=re.compile(r"view or download the collection calendar")
77
- )
78
- if len(calendar_urls) > 0:
79
- requests.packages.urllib3.disable_warnings()
80
- response = requests.get(calendar_urls[0].get("href"), headers=headers)
81
-
82
- # Make a BS4 object
83
- soup = BeautifulSoup(response.text, features="html.parser")
84
- soup.prettify()
85
-
86
- # Loop the months
87
- for month in soup.find_all("div", {"class": "usercontent"}):
88
- year = ""
89
- if month.find("h2") and "calendar" not in month.find("h2").get_text(
90
- strip=True
91
- ):
92
- year = datetime.strptime(
93
- month.find("h2").get_text(strip=True), "%B %Y"
94
- ).strftime("%Y")
95
- elif month.find("h3"):
96
- year = datetime.strptime(
97
- month.find("h3").get_text(strip=True), "%B %Y"
98
- ).strftime("%Y")
99
- if year != "":
100
- for row in month.find_all("li"):
101
- results = re.search(
102
- "([A-Za-z]+ \\d\\d? [A-Za-z]+): (.+)",
103
- row.get_text(strip=True),
104
- )
105
- if results:
106
- dict_data = {
107
- "type": results.groups()[1].capitalize(),
108
- "collectionDate": datetime.strptime(
109
- results.groups()[0] + " " + year, "%A %d %B %Y"
110
- ).strftime(date_format),
111
- }
112
- data["bins"].append(dict_data)
113
-
114
- # Sort collections
115
- data["bins"].sort(
116
- key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
117
- )
60
+
61
+ # Find the row containing the address
62
+ for row in soup.find_all("tr"):
63
+ if user_paon in row.get_text():
64
+ # Extract collection round (e.g., "Tuesday B")
65
+ row_text = row.get_text()
66
+ round_match = re.search(r"(Monday|Tuesday|Wednesday|Thursday|Friday)\s+([AB])", row_text)
67
+ if round_match:
68
+ day = round_match.group(1).lower()
69
+ letter = round_match.group(2).lower()
70
+ ics_url = f"https://www.chelmsford.gov.uk/media/4ipavf0m/{day}-{letter}-calendar.ics"
71
+ break
72
+ else:
73
+ raise ValueError(f"Could not find collection round for address: {user_paon}")
74
+
75
+ # Get events from ICS file within the next 60 days
76
+ now = datetime.now()
77
+ future = now + timedelta(days=60)
78
+
79
+ # Parse ICS calendar
80
+ upcoming_events = events(ics_url, start=now, end=future)
81
+
82
+ for event in sorted(upcoming_events, key=lambda e: e.start):
83
+ if event.summary and event.start:
84
+ data["bins"].append({
85
+ "type": event.summary,
86
+ "collectionDate": event.start.date().strftime(date_format)
87
+ })
118
88
  except Exception as e:
119
- # Here you can log the exception if needed
120
89
  print(f"An error occurred: {e}")
121
- # Optionally, re-raise the exception if you want it to propagate
122
90
  raise
123
91
  finally:
124
- # This block ensures that the driver is closed regardless of an exception
125
92
  if driver:
126
93
  driver.quit()
127
- return data
94
+
95
+ return data
@@ -23,8 +23,11 @@ class CouncilClass(AbstractGetBinDataClass):
23
23
 
24
24
  URI = f"https://www.cherwell.gov.uk/homepage/129/bin-collection-search?uprn={user_uprn}"
25
25
 
26
- # Make the GET request
27
- response = requests.get(URI)
26
+ # Make the GET request with proper headers
27
+ headers = {
28
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
29
+ }
30
+ response = requests.get(URI, headers=headers)
28
31
 
29
32
  soup = BeautifulSoup(response.text, "html.parser")
30
33
 
@@ -45,22 +48,38 @@ class CouncilClass(AbstractGetBinDataClass):
45
48
 
46
49
  return date_obj.strftime(date_format) # Return in YYYY-MM-DD format
47
50
 
48
- # print(soup)
49
-
50
- div = soup.find("div", class_="bin-collection-results__tasks")
51
-
52
- for item in div.find_all("li", class_="list__item"):
53
- # Extract bin type
54
- bin_type_tag = item.find("h3", class_="bin-collection-tasks__heading")
55
- bin_type = (
56
- "".join(bin_type_tag.find_all(text=True, recursive=False)).strip()
57
- if bin_type_tag
58
- else "Unknown Bin"
59
- )
51
+ # Find the bin collection results section
52
+ results_div = soup.find("div", class_="bin-collection-results")
53
+ if not results_div:
54
+ return bindata
55
+
56
+ tasks_div = results_div.find("div", class_="bin-collection-results__tasks")
57
+ if not tasks_div:
58
+ return bindata
59
+
60
+ # Find all bin collection items
61
+ for item in tasks_div.find_all("li", class_="list__item"):
62
+ # Extract bin type from heading
63
+ heading = item.find("h3", class_="bin-collection-tasks__heading")
64
+ if not heading:
65
+ continue
66
+
67
+ # Get the bin type text, excluding visually hidden spans
68
+ bin_type = ""
69
+ for text_node in heading.find_all(text=True):
70
+ parent = text_node.parent
71
+ if not (parent.name == "span" and "visually-hidden" in parent.get("class", [])):
72
+ bin_type += text_node.strip()
73
+
74
+ if not bin_type:
75
+ continue
60
76
 
61
77
  # Extract collection date
62
78
  date_tag = item.find("p", class_="bin-collection-tasks__date")
63
- collection_date = date_tag.text.strip() if date_tag else "Unknown Date"
79
+ if not date_tag:
80
+ continue
81
+
82
+ collection_date = date_tag.text.strip()
64
83
 
65
84
  dict_data = {
66
85
  "type": bin_type,
@@ -68,8 +87,10 @@ class CouncilClass(AbstractGetBinDataClass):
68
87
  }
69
88
  bindata["bins"].append(dict_data)
70
89
 
71
- bindata["bins"].sort(
72
- key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
73
- )
90
+ # Sort bins by collection date
91
+ if bindata["bins"]:
92
+ bindata["bins"].sort(
93
+ key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
94
+ )
74
95
 
75
96
  return bindata
@@ -1,5 +1,5 @@
1
1
  import time
2
- import urllib.parse
2
+ from datetime import datetime
3
3
 
4
4
  from bs4 import BeautifulSoup
5
5
  from selenium.webdriver.common.by import By
@@ -11,18 +11,6 @@ from uk_bin_collection.uk_bin_collection.common import *
11
11
  from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
12
12
 
13
13
 
14
- def format_bin_type(bin_colour: str):
15
- bin_types = {
16
- "grey": "Garden waste (Grey Bin)",
17
- "brown": "Paper and card (Brown Bin)",
18
- "blue": "Bottles and cans (Blue Bin)",
19
- "green": "General waste (Green Bin)",
20
- }
21
- bin_colour = urllib.parse.unquote(bin_colour).split(" ")[0].lower()
22
- return bin_types[bin_colour]
23
-
24
-
25
- # import the wonderful Beautiful Soup and the URL grabber
26
14
  class CouncilClass(AbstractGetBinDataClass):
27
15
  """
28
16
  Concrete classes have to implement all abstract operations of the
@@ -41,99 +29,120 @@ class CouncilClass(AbstractGetBinDataClass):
41
29
  check_uprn(user_uprn)
42
30
  check_postcode(user_postcode)
43
31
 
44
- # Ensure UPRN starts with "UPRN"
45
- if not user_uprn.startswith("UPRN"):
46
- user_uprn = f"UPRN{user_uprn}"
47
-
48
32
  # Create Selenium webdriver
49
- user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
33
+ user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
50
34
  driver = create_webdriver(web_driver, headless, user_agent, __name__)
51
- driver.get("https://myaccount.chorley.gov.uk/wastecollections.aspx")
52
-
53
- # Accept cookies banner
54
- cookieBanner = WebDriverWait(driver, 10).until(
55
- EC.presence_of_element_located((By.ID, "PrivacyPolicyNotification"))
56
- )
57
- cookieClose = cookieBanner.find_element(
58
- By.CSS_SELECTOR, "span.ui-icon-circle-close"
35
+
36
+ # Navigate to the start page
37
+ driver.get("https://chorley.gov.uk/bincollectiondays")
38
+
39
+ # Click the "Check your collection day" button
40
+ check_button = WebDriverWait(driver, 10).until(
41
+ EC.element_to_be_clickable((By.XPATH, "//a[@class='button' and @href='https://forms.chorleysouthribble.gov.uk/chorley-bincollectiondays']")
42
+ ))
43
+ check_button.click()
44
+
45
+ # Wait for the form to load and enter postcode
46
+ postcode_input = WebDriverWait(driver, 10).until(
47
+ EC.presence_of_element_located((By.XPATH, "//input[@type='text'][1]")
48
+ ))
49
+ postcode_input.clear()
50
+ postcode_input.send_keys(user_postcode)
51
+
52
+ # Click the Lookup button
53
+ lookup_button = WebDriverWait(driver, 10).until(
54
+ EC.element_to_be_clickable((By.XPATH, "//button[contains(@class, 'btn--lookup')]")
55
+ ))
56
+ lookup_button.click()
57
+
58
+ # Wait for the property dropdown to be populated
59
+ property_dropdown = WebDriverWait(driver, 10).until(
60
+ EC.presence_of_element_located((By.XPATH, "//select[@class='form__select']")
61
+ ))
62
+
63
+ # Wait a moment for the dropdown to be fully populated
64
+ time.sleep(2)
65
+
66
+ # Find the property that matches the UPRN or select the first available property
67
+ select = Select(property_dropdown)
68
+ options = select.options
69
+
70
+ # Skip the "Please choose..." option and select based on UPRN or first available
71
+ selected = False
72
+ for option in options[1:]: # Skip first "Please choose..." option
73
+ if user_uprn in option.get_attribute("value") or not selected:
74
+ select.select_by_visible_text(option.text)
75
+ selected = True
76
+ break
77
+
78
+ if not selected and len(options) > 1:
79
+ # If no UPRN match, select the first available property
80
+ select.select_by_index(1)
81
+
82
+ # Click the Next button
83
+ next_button = WebDriverWait(driver, 10).until(
84
+ EC.element_to_be_clickable((By.CSS_SELECTOR, "button[type='submit'][value='Next']"))
59
85
  )
60
- cookieClose.click()
61
-
62
- # Populate postcode field
63
- inputElement_postcode = driver.find_element(
64
- By.ID,
65
- "MainContent_addressSearch_txtPostCodeLookup",
66
- )
67
- inputElement_postcode.send_keys(user_postcode)
68
-
69
- # Click search button
70
- findAddress = WebDriverWait(driver, 10).until(
71
- EC.presence_of_element_located(
72
- (
73
- By.ID,
74
- "MainContent_addressSearch_btnFindAddress",
75
- )
76
- )
77
- )
78
- findAddress.click()
79
-
80
- time.sleep(1)
81
-
82
- # Wait for the 'Select address' dropdown to appear and select option matching UPRN
83
- dropdown = WebDriverWait(driver, 10).until(
84
- EC.presence_of_element_located(
85
- (
86
- By.ID,
87
- "MainContent_addressSearch_ddlAddress",
88
- )
89
- )
86
+ next_button.click()
87
+
88
+ # Wait for the results page to load
89
+ WebDriverWait(driver, 10).until(
90
+ EC.presence_of_element_located((By.XPATH, "//th[text()='Collection']"))
90
91
  )
91
- # Create a 'Select' for it, then select the matching URPN option
92
- dropdownSelect = Select(dropdown)
93
- dropdownSelect.select_by_value(user_uprn)
94
-
95
- # Wait for the submit button to appear, then click it to get the collection dates
96
- submit = WebDriverWait(driver, 10).until(
97
- EC.presence_of_element_located((By.ID, "MainContent_btnSearch"))
98
- )
99
- submit.click()
100
-
101
- soup = BeautifulSoup(driver.page_source, features="html.parser")
102
-
103
- # Get the property details
104
- property_details = soup.find(
105
- "table",
106
- {"class": "WasteCollection"},
107
- )
108
-
109
- # Get the dates
110
- for row in property_details.tbody.find_all("tr", recursive=False):
111
- month_col = row.td
112
- month = month_col.get_text(strip=True)
113
-
114
- for date_col in month_col.find_next_siblings("td"):
115
- day = date_col.p.contents[0].strip()
116
-
117
- if day == "":
118
- continue
119
-
120
- for bin_type in date_col.find_all("img"):
121
- bin_colour = bin_type.get("src").split("/")[-1].split(".")[0]
122
- date_object = datetime.strptime(f"{day} {month}", "%d %B %Y")
123
- date_formatted = date_object.strftime("%d/%m/%Y")
124
-
125
- dict_data = {
126
- "type": format_bin_type(bin_colour),
127
- "collectionDate": date_formatted,
128
- }
129
- data["bins"].append(dict_data)
92
+
93
+ # Parse the results
94
+ soup = BeautifulSoup(driver.page_source, "html.parser")
95
+
96
+ # Find the table with collection data
97
+ table = soup.find("table")
98
+
99
+ if table:
100
+ rows = table.find_all("tr")
101
+
102
+ for i, row in enumerate(rows):
103
+ cells = row.find_all(["td", "th"])
104
+
105
+ if i > 0 and len(cells) >= 2: # Skip header row
106
+ collection_type = cells[0].get_text(strip=True)
107
+ collection_date = cells[1].get_text(strip=True)
108
+
109
+ if collection_type and collection_date and collection_date != "Collection":
110
+ # Try to parse the date
111
+ try:
112
+ # Handle the format "Tuesday, 05/08/25"
113
+ if ", " in collection_date and "/" in collection_date:
114
+ # Remove the day name and parse the date
115
+ date_part = collection_date.split(", ")[1]
116
+ # Handle 2-digit year format
117
+ if len(date_part.split("/")[2]) == 2:
118
+ date_obj = datetime.strptime(date_part, "%d/%m/%y")
119
+ else:
120
+ date_obj = datetime.strptime(date_part, "%d/%m/%Y")
121
+ elif "/" in collection_date:
122
+ date_obj = datetime.strptime(collection_date, "%d/%m/%Y")
123
+ elif "-" in collection_date:
124
+ date_obj = datetime.strptime(collection_date, "%Y-%m-%d")
125
+ else:
126
+ # Try to parse other formats
127
+ date_obj = datetime.strptime(collection_date, "%d %B %Y")
128
+
129
+ formatted_date = date_obj.strftime("%d/%m/%Y")
130
+
131
+ dict_data = {
132
+ "type": collection_type,
133
+ "collectionDate": formatted_date,
134
+ }
135
+ data["bins"].append(dict_data)
136
+ except ValueError:
137
+ # If date parsing fails, skip this entry
138
+ continue
139
+
130
140
  except Exception as e:
131
141
  # Here you can log the exception if needed
132
142
  print(f"An error occurred: {e}")
133
143
  # Optionally, re-raise the exception if you want it to propagate
134
144
  raise
135
145
  finally:
136
- # This block ensures that the driver is closed regardless of an exception
137
146
  if driver:
138
147
  driver.quit()
139
- return data
148
+ return data