uk_bin_collection 0.144.2__py3-none-any.whl → 0.144.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -126,11 +126,14 @@ class CouncilClass(AbstractGetBinDataClass):
126
126
 
127
127
  collections = []
128
128
  for bin in collection_data:
129
+ if not bin["collection"]:
130
+ continue # Skip if there are no collection dates
131
+
129
132
  bin_type = bin["containerName"]
130
133
  next_collection = datetime.strptime(
131
134
  bin["collection"][0]["nextCollectionDate"], "%Y-%m-%dT%H:%M:%S"
132
135
  ).strftime(date_format)
133
- # Could work out next date using the roundDescription and the is_holiday function in common.py
136
+
134
137
  collections.append((bin_type, next_collection))
135
138
 
136
139
  ordered_data = sorted(collections, key=lambda x: x[1])
@@ -0,0 +1,273 @@
1
+ import json
2
+ from datetime import datetime, timedelta
3
+ from typing import Any, Dict
4
+
5
+ from bs4 import BeautifulSoup
6
+ from dateutil.parser import parse
7
+ from selenium.common.exceptions import (
8
+ NoSuchElementException,
9
+ StaleElementReferenceException,
10
+ TimeoutException,
11
+ )
12
+ from selenium.webdriver.common.by import By
13
+ from selenium.webdriver.common.keys import Keys
14
+ from selenium.webdriver.remote.webdriver import WebDriver
15
+ from selenium.webdriver.support import expected_conditions as EC
16
+ from selenium.webdriver.support.wait import WebDriverWait
17
+
18
+ from uk_bin_collection.uk_bin_collection.common import *
19
+ from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
20
+
21
+ # Dictionary mapping day names to their weekday numbers (Monday=0, Sunday=6)
22
+ DAYS_OF_WEEK = {
23
+ "Monday": 0,
24
+ "Tuesday": 1,
25
+ "Wednesday": 2,
26
+ "Thursday": 3,
27
+ "Friday": 4,
28
+ "Saturday": 5,
29
+ "Sunday": 6,
30
+ }
31
+
32
+
33
+ # This function checks for bank holiday collection changes,
34
+ # but the page seems manually written so might break easily
35
+ def get_bank_holiday_changes(driver: WebDriver) -> Dict[str, str]:
36
+ """Fetch and parse bank holiday collection changes from the council website."""
37
+ bank_holiday_url = "https://www.hillingdon.gov.uk/bank-holiday-collections"
38
+ driver.get(bank_holiday_url)
39
+
40
+ # Wait for page to load
41
+ wait = WebDriverWait(driver, 10)
42
+ wait.until(EC.presence_of_element_located((By.TAG_NAME, "table")))
43
+
44
+ # Parse the page
45
+ soup = BeautifulSoup(driver.page_source, features="html.parser")
46
+ changes: Dict[str, str] = {}
47
+
48
+ # Find all tables with collection changes
49
+ tables = soup.find_all("table")
50
+ for table in tables:
51
+ # Check if this is a collection changes table
52
+ headers = [th.text.strip() for th in table.find_all("th")]
53
+ if "Normal collection day" in headers and "Revised collection day" in headers:
54
+ # Process each row
55
+ for row in table.find_all("tr")[1:]: # Skip header row
56
+ cols = row.find_all("td")
57
+ if len(cols) >= 2:
58
+ normal_date = cols[0].text.strip()
59
+ revised_date = cols[1].text.strip()
60
+
61
+ # Parse dates
62
+ try:
63
+ normal_date = parse(normal_date, fuzzy=True).strftime(
64
+ "%d/%m/%Y"
65
+ )
66
+ revised_date = parse(revised_date, fuzzy=True).strftime(
67
+ "%d/%m/%Y"
68
+ )
69
+ changes[normal_date] = revised_date
70
+ except Exception as e:
71
+ print(f"Error parsing dates: {e}")
72
+ continue
73
+
74
+ return changes
75
+
76
+
77
+ class CouncilClass(AbstractGetBinDataClass):
78
+ def parse_data(self, page: str, **kwargs: Any) -> Dict[str, Any]:
79
+ driver = None
80
+ try:
81
+ data: Dict[str, Any] = {"bins": []}
82
+ user_paon = kwargs.get("paon")
83
+ user_postcode = kwargs.get("postcode")
84
+ web_driver = kwargs.get("web_driver")
85
+ headless = kwargs.get("headless")
86
+ url = kwargs.get("url")
87
+
88
+ check_paon(user_paon)
89
+ check_postcode(user_postcode)
90
+
91
+ driver = create_webdriver(web_driver, headless, None, __name__)
92
+ driver.get(url)
93
+
94
+ # Handle cookie banner if present
95
+ wait = WebDriverWait(driver, 30)
96
+ try:
97
+ cookie_button = wait.until(
98
+ EC.element_to_be_clickable(
99
+ (
100
+ By.CLASS_NAME,
101
+ "btn btn--cookiemessage btn--cancel btn--contrast",
102
+ )
103
+ )
104
+ )
105
+ cookie_button.click()
106
+ except (TimeoutException, NoSuchElementException):
107
+ pass
108
+
109
+ # Enter postcode
110
+ post_code_input = wait.until(
111
+ EC.element_to_be_clickable(
112
+ (
113
+ By.ID,
114
+ "WASTECOLLECTIONDAYLOOKUPINCLUDEGARDEN_ADDRESSLOOKUPPOSTCODE",
115
+ )
116
+ )
117
+ )
118
+ post_code_input.clear()
119
+ post_code_input.send_keys(user_postcode)
120
+ post_code_input.send_keys(Keys.TAB + Keys.ENTER)
121
+
122
+ # Wait for address options to populate
123
+ try:
124
+ # Wait for the address dropdown to be present and clickable
125
+ address_select = wait.until(
126
+ EC.presence_of_element_located(
127
+ (
128
+ By.ID,
129
+ "WASTECOLLECTIONDAYLOOKUPINCLUDEGARDEN_ADDRESSLOOKUPADDRESS",
130
+ )
131
+ )
132
+ )
133
+
134
+ # Wait for actual address options to appear
135
+ wait.until(
136
+ lambda driver: len(driver.find_elements(By.TAG_NAME, "option")) > 1
137
+ )
138
+
139
+ # Find and select address
140
+ options = address_select.find_elements(By.TAG_NAME, "option")[
141
+ 1:
142
+ ] # Skip placeholder
143
+ if not options:
144
+ raise Exception(f"No addresses found for postcode: {user_postcode}")
145
+
146
+ # Normalize user input by keeping only alphanumeric characters
147
+ normalized_user_input = "".join(
148
+ c for c in user_paon if c.isalnum()
149
+ ).lower()
150
+
151
+ # Find matching address in dropdown
152
+ for option in options:
153
+ # Normalize option text by keeping only alphanumeric characters
154
+ normalized_option = "".join(
155
+ c for c in option.text if c.isalnum()
156
+ ).lower()
157
+ if normalized_user_input in normalized_option:
158
+ option.click()
159
+ break
160
+ except TimeoutException:
161
+ raise Exception("Timeout waiting for address options to populate")
162
+
163
+ # Wait for collection table and day text
164
+ wait.until(
165
+ EC.presence_of_element_located(
166
+ (By.ID, "WASTECOLLECTIONDAYLOOKUPINCLUDEGARDEN_COLLECTIONTABLE")
167
+ )
168
+ )
169
+
170
+ # Wait for collection day text to be fully populated
171
+ wait.until(
172
+ lambda driver: len(
173
+ driver.find_element(
174
+ By.ID, "WASTECOLLECTIONDAYLOOKUPINCLUDEGARDEN_COLLECTIONTABLE"
175
+ )
176
+ .find_elements(By.TAG_NAME, "tr")[2]
177
+ .find_elements(By.TAG_NAME, "td")[1]
178
+ .text.strip()
179
+ .split()
180
+ )
181
+ > 1
182
+ )
183
+
184
+ # Parse the table
185
+ soup = BeautifulSoup(driver.page_source, features="html.parser")
186
+ table = soup.find(
187
+ "div", id="WASTECOLLECTIONDAYLOOKUPINCLUDEGARDEN_COLLECTIONTABLE"
188
+ ).find("table")
189
+
190
+ # Get collection day
191
+ collection_day_text = table.find_all("tr")[2].find_all("td")[1].text.strip()
192
+ day_of_week = next(
193
+ (
194
+ day
195
+ for day in DAYS_OF_WEEK
196
+ if day.lower() in collection_day_text.lower()
197
+ ),
198
+ None,
199
+ )
200
+ if not day_of_week:
201
+ raise Exception(
202
+ f"Could not determine collection day from text: '{collection_day_text}'"
203
+ )
204
+
205
+ # Calculate next collection date
206
+ today = datetime.now()
207
+ days_ahead = (DAYS_OF_WEEK[day_of_week] - today.weekday()) % 7
208
+ if days_ahead == 0: # If today is collection day, get next week's date
209
+ days_ahead = 7
210
+ next_collection = today + timedelta(days=days_ahead)
211
+
212
+ # Add collection dates for each bin type
213
+ bin_types = ["General Waste", "Recycling", "Food Waste"]
214
+ for bin_type in bin_types:
215
+ data["bins"].append(
216
+ {
217
+ "type": bin_type,
218
+ "collectionDate": next_collection.strftime("%d/%m/%Y"),
219
+ }
220
+ )
221
+
222
+ # Process collection details
223
+ bin_rows = soup.select("div.bin--row:not(:first-child)")
224
+ for row in bin_rows:
225
+ try:
226
+ bin_type = row.select_one("div.col-md-3").text.strip()
227
+ collection_dates_div = row.select("div.col-md-3")[1]
228
+ next_collection_text = "".join(
229
+ collection_dates_div.find_all(text=True, recursive=False)
230
+ ).strip()
231
+ cleaned_date_text = remove_ordinal_indicator_from_date_string(
232
+ next_collection_text
233
+ )
234
+ parsed_date = parse(cleaned_date_text, fuzzy=True)
235
+ bin_date = parsed_date.strftime("%d/%m/%Y")
236
+
237
+ if bin_type and bin_date:
238
+ data["bins"].append(
239
+ {
240
+ "type": bin_type,
241
+ "collectionDate": bin_date,
242
+ }
243
+ )
244
+ except Exception as e:
245
+ print(f"Error processing item: {e}")
246
+ continue
247
+
248
+ # Get bank holiday changes
249
+ print("\nChecking for bank holiday collection changes...")
250
+ bank_holiday_changes = get_bank_holiday_changes(driver)
251
+
252
+ # Apply any bank holiday changes to collection dates
253
+ for bin_data in data["bins"]:
254
+ original_date = bin_data["collectionDate"]
255
+ if original_date in bank_holiday_changes:
256
+ new_date = bank_holiday_changes[original_date]
257
+ print(
258
+ f"Bank holiday change: {bin_data['type']} collection moved from {original_date} to {new_date}"
259
+ )
260
+ bin_data["collectionDate"] = new_date
261
+
262
+ except Exception as e:
263
+ print(f"An error occurred: {e}")
264
+ raise
265
+ finally:
266
+ if driver:
267
+ driver.quit()
268
+
269
+ # Print the final data dictionary for debugging
270
+ print("\nFinal data dictionary:")
271
+ print(json.dumps(data, indent=2))
272
+
273
+ return data
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: uk_bin_collection
3
- Version: 0.144.2
3
+ Version: 0.144.4
4
4
  Summary: Python Lib to collect UK Bin Data
5
5
  Author: Robert Bradley
6
6
  Author-email: robbrad182@gmail.com
@@ -1,9 +1,11 @@
1
+ uk_bin_collection/Local_Authority_Boundaries.geojson,sha256=_j-hUiL0--t2ewd_s29-j7_AKRlhagRMmOhXyco-B6I,1175922
1
2
  uk_bin_collection/README.rst,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ uk_bin_collection/map.html,sha256=bBqCQkT4DjBeAL8hrjW5n8UHNxVUqc6XScpGMF60Vzw,3979
2
4
  uk_bin_collection/tests/check_selenium_url_in_input.json.py,sha256=Iecdja0I3XIiY76qmwPgcBqNgYv7n1-b5mg85JpMjg8,7817
3
5
  uk_bin_collection/tests/council_feature_input_parity.py,sha256=DO6Mk4ImYgM5ZCZ-cutwz5RoYYWZRLYx2tr6zIs_9Rc,3843
4
6
  uk_bin_collection/tests/features/environment.py,sha256=VQZjJdJI_kZn08M0j5cUgvKT4k3iTw8icJge1DGOkoA,127
5
7
  uk_bin_collection/tests/features/validate_council_outputs.feature,sha256=SJK-Vc737hrf03tssxxbeg_JIvAH-ddB8f6gU1LTbuQ,251
6
- uk_bin_collection/tests/input.json,sha256=khL_z76OZZqmFtagerGv_djRNUYZC5iGAaLNTvttV2o,122435
8
+ uk_bin_collection/tests/input.json,sha256=fU6qt6mRnMu08OcFwdWhYoRbUImiqIUPE8RAYuEQsRA,132316
7
9
  uk_bin_collection/tests/output.schema,sha256=ZwKQBwYyTDEM4G2hJwfLUVM-5v1vKRvRK9W9SS1sd18,1086
8
10
  uk_bin_collection/tests/step_defs/step_helpers/file_handler.py,sha256=Ygzi4V0S1MIHqbdstUlIqtRIwnynvhu4UtpweJ6-5N8,1474
9
11
  uk_bin_collection/tests/step_defs/test_validate_council.py,sha256=VZ0a81sioJULD7syAYHjvK_-nT_Rd36tUyzPetSA0gk,3475
@@ -50,7 +52,7 @@ uk_bin_collection/uk_bin_collection/councils/BraintreeDistrictCouncil.py,sha256=
50
52
  uk_bin_collection/uk_bin_collection/councils/BrecklandCouncil.py,sha256=PX6A_pDvaN109aSNWmEhm88GFKfkClIkmbwGURWvsks,1744
51
53
  uk_bin_collection/uk_bin_collection/councils/BrentCouncil.py,sha256=ucwokxvASYi_KiOYSOVdaGfC1kfUbII0r6Zl2NE1hnU,4208
52
54
  uk_bin_collection/uk_bin_collection/councils/BrightonandHoveCityCouncil.py,sha256=k6qt4cds-Ejd97Z-__pw2BYvGVbFdc9SUfF73PPrTNA,5823
53
- uk_bin_collection/uk_bin_collection/councils/BristolCityCouncil.py,sha256=kJmmDJz_kQ45DHmG7ocrUpNJonEn0kuXYEDQyZaf9ks,5576
55
+ uk_bin_collection/uk_bin_collection/councils/BristolCityCouncil.py,sha256=nQeRBKrDcZE2m_EzjUBr9dJ5tcUdGcUuA5FcnLkbLr4,5575
54
56
  uk_bin_collection/uk_bin_collection/councils/BroadlandDistrictCouncil.py,sha256=aelqhh503dx6O2EEmC3AT5tnY39Dc53qcouH8T-mek8,7613
55
57
  uk_bin_collection/uk_bin_collection/councils/BromleyBoroughCouncil.py,sha256=dii85JLmYU1uMidCEsWVo3stTcq_QqyC65DxG8u1UmE,4302
56
58
  uk_bin_collection/uk_bin_collection/councils/BromsgroveDistrictCouncil.py,sha256=PUfxP8j5Oh9wFHkdjbrJzQli9UzMHZzwrZ2hkThrvhI,1781
@@ -146,8 +148,8 @@ uk_bin_collection/uk_bin_collection/councils/HerefordshireCouncil.py,sha256=JpQh
146
148
  uk_bin_collection/uk_bin_collection/councils/HertsmereBoroughCouncil.py,sha256=ZbSsmqHStd2JtTMAq1Bhcvsj1BYp6ijELyOjZFX2GSw,6435
147
149
  uk_bin_collection/uk_bin_collection/councils/HighPeakCouncil.py,sha256=x7dfy8mdt2iGl8qJxHb-uBh4u0knmi9MJ6irOJw9WYA,4805
148
150
  uk_bin_collection/uk_bin_collection/councils/HighlandCouncil.py,sha256=GNxDU65QuZHV5va2IrKtcJ6TQoDdwmV03JvkVqOauP4,3291
151
+ uk_bin_collection/uk_bin_collection/councils/Hillingdon.py,sha256=R1enDv5gjwCUT3HKgj8C87xWrwvrutAN6XLu5P7tef8,10532
149
152
  uk_bin_collection/uk_bin_collection/councils/HinckleyandBosworthBoroughCouncil.py,sha256=51vXTKrstfJhb7cLCcrsvA9qKCsptyNMZvy7ML9DasM,2344
150
- uk_bin_collection/uk_bin_collection/councils/HounslowCouncil.py,sha256=LXhJ47rujx7k3naz0tFiTT1l5k6gAYcVdekJN1t_HLY,4564
151
153
  uk_bin_collection/uk_bin_collection/councils/HullCityCouncil.py,sha256=UHcesBoctFVcXDYuwfag43KbcJcopkEDzJ-54NxtK0Q,1851
152
154
  uk_bin_collection/uk_bin_collection/councils/HuntingdonDistrictCouncil.py,sha256=dGyhhG6HRjQ2SPeiRwUPTGlk9dPIslagV2k0GjEOn1s,1587
153
155
  uk_bin_collection/uk_bin_collection/councils/IpswichBoroughCouncil.py,sha256=57lmDl_FprG68gUhKQYpOa1M2pudyb1utfoMhUXNwzs,2802
@@ -330,8 +332,8 @@ uk_bin_collection/uk_bin_collection/councils/YorkCouncil.py,sha256=I2kBYMlsD4bId
330
332
  uk_bin_collection/uk_bin_collection/councils/council_class_template/councilclasstemplate.py,sha256=EQWRhZ2pEejlvm0fPyOTsOHKvUZmPnxEYO_OWRGKTjs,1158
331
333
  uk_bin_collection/uk_bin_collection/create_new_council.py,sha256=m-IhmWmeWQlFsTZC4OxuFvtw5ZtB8EAJHxJTH4O59lQ,1536
332
334
  uk_bin_collection/uk_bin_collection/get_bin_data.py,sha256=YvmHfZqanwrJ8ToGch34x-L-7yPe31nB_x77_Mgl_vo,4545
333
- uk_bin_collection-0.144.2.dist-info/LICENSE,sha256=vABBUOzcrgfaTKpzeo-si9YVEun6juDkndqA8RKdKGs,1071
334
- uk_bin_collection-0.144.2.dist-info/METADATA,sha256=h2gau2rSpNnurR8hcPU1G1O4c69ZM0K3vMNL2tn85PM,19858
335
- uk_bin_collection-0.144.2.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
336
- uk_bin_collection-0.144.2.dist-info/entry_points.txt,sha256=36WCSGMWSc916S3Hi1ZkazzDKHaJ6CD-4fCEFm5MIao,90
337
- uk_bin_collection-0.144.2.dist-info/RECORD,,
335
+ uk_bin_collection-0.144.4.dist-info/LICENSE,sha256=vABBUOzcrgfaTKpzeo-si9YVEun6juDkndqA8RKdKGs,1071
336
+ uk_bin_collection-0.144.4.dist-info/METADATA,sha256=C4p4EDGWkZ2ZkKT4U_iTeb7BH5tO15Mtfcqpo4LiHG8,19858
337
+ uk_bin_collection-0.144.4.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
338
+ uk_bin_collection-0.144.4.dist-info/entry_points.txt,sha256=36WCSGMWSc916S3Hi1ZkazzDKHaJ6CD-4fCEFm5MIao,90
339
+ uk_bin_collection-0.144.4.dist-info/RECORD,,
@@ -1,122 +0,0 @@
1
- import time
2
- from datetime import datetime
3
-
4
- from bs4 import BeautifulSoup
5
- from selenium.webdriver.common.by import By
6
- from selenium.webdriver.support import expected_conditions as EC
7
- from selenium.webdriver.support.ui import Select
8
- from selenium.webdriver.support.wait import WebDriverWait
9
- from selenium.webdriver.common.keys import Keys
10
-
11
- from uk_bin_collection.uk_bin_collection.common import *
12
- from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
13
-
14
-
15
- # import the wonderful Beautiful Soup and the URL grabber
16
- class CouncilClass(AbstractGetBinDataClass):
17
- """
18
- Concrete classes have to implement all abstract operations of the
19
- base class. They can also override some operations with a default
20
- implementation.
21
- """
22
-
23
- def parse_date(self, date_str):
24
- date_formats = [
25
- "This %A - %d %b %Y", # Changed %B to %b to accommodate abbreviated month names
26
- "Next %A - %d %b %Y", # Same change here
27
- "%A %d %b %Y", # And here
28
- ]
29
- for format in date_formats:
30
- try:
31
- return datetime.strptime(date_str, format).strftime("%d/%m/%Y")
32
- except ValueError:
33
- continue
34
- raise ValueError(f"Date format not recognized: {date_str}")
35
-
36
- def parse_data(self, page: str, **kwargs) -> dict:
37
- driver = None
38
- try:
39
- # Make a BS4 object
40
-
41
- page = "https://www.hounslow.gov.uk/info/20272/recycling_and_waste_collection_day_finder"
42
-
43
- user_postcode = kwargs.get("postcode")
44
- user_uprn = kwargs.get("uprn")
45
- user_paon = kwargs.get("paon")
46
- web_driver = kwargs.get("web_driver")
47
- headless = kwargs.get("headless")
48
-
49
- driver = create_webdriver(web_driver, headless, None, __name__)
50
- driver.get(page)
51
-
52
- wait = WebDriverWait(driver, 60)
53
-
54
- inputElement_postcodesearch = wait.until(
55
- EC.element_to_be_clickable((By.ID, "Postcode"))
56
- )
57
-
58
- inputElement_postcodesearch.send_keys(user_postcode)
59
-
60
- inputElement_postcodesearch_btn = wait.until(
61
- EC.element_to_be_clickable((By.ID, "findAddress"))
62
- )
63
- inputElement_postcodesearch_btn.click()
64
-
65
- inputElement_select_address = wait.until(
66
- EC.element_to_be_clickable((By.ID, "UPRN"))
67
- )
68
-
69
- select_element = wait.until(
70
- EC.visibility_of_element_located((By.ID, "UPRN"))
71
- ) # Adjust this ID to your element's ID
72
-
73
- # Create a Select object
74
- select = Select(select_element)
75
-
76
- # Fetch all options
77
- options = select.options
78
-
79
- # Loop through options to find the one that starts with the UPRN
80
- for option in options:
81
- if option.get_attribute("value").startswith(f"{user_uprn}|"):
82
- option.click() # Select the matching option
83
- break
84
-
85
- results = wait.until(
86
- EC.element_to_be_clickable((By.CLASS_NAME, "bin_day_main_wrapper"))
87
- )
88
-
89
- soup = BeautifulSoup(driver.page_source, features="html.parser")
90
- soup.prettify()
91
-
92
- # Find all headers which include collection dates
93
- collection_headers = soup.find_all("h4")
94
- bins_data = []
95
-
96
- # Process each collection date and corresponding bins
97
- for header in collection_headers:
98
- date_text = header.get_text(strip=True)
99
- collection_date = self.parse_date(date_text)
100
-
101
- # Get next sibling which should be the list of bins
102
- bin_list = header.find_next_sibling("ul")
103
- if bin_list:
104
- for item in bin_list.find_all("li", class_="list-group-item"):
105
- bin_type = item.get_text(strip=True)
106
- bins_data.append(
107
- {"type": bin_type, "collectionDate": collection_date}
108
- )
109
-
110
- # Construct the final JSON object
111
- json_data = {"bins": bins_data}
112
-
113
- except Exception as e:
114
- # Here you can log the exception if needed
115
- print(f"An error occurred: {e}")
116
- # Optionally, re-raise the exception if you want it to propagate
117
- raise
118
- finally:
119
- # This block ensures that the driver is closed regardless of an exception
120
- if driver:
121
- driver.quit()
122
- return json_data