uk_bin_collection 0.144.2__py3-none-any.whl → 0.144.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/Local_Authority_Boundaries.geojson +1 -0
- uk_bin_collection/map.html +108 -0
- uk_bin_collection/tests/input.json +613 -303
- uk_bin_collection/uk_bin_collection/councils/BristolCityCouncil.py +4 -1
- uk_bin_collection/uk_bin_collection/councils/Hillingdon.py +273 -0
- {uk_bin_collection-0.144.2.dist-info → uk_bin_collection-0.144.4.dist-info}/METADATA +1 -1
- {uk_bin_collection-0.144.2.dist-info → uk_bin_collection-0.144.4.dist-info}/RECORD +10 -8
- uk_bin_collection/uk_bin_collection/councils/HounslowCouncil.py +0 -122
- {uk_bin_collection-0.144.2.dist-info → uk_bin_collection-0.144.4.dist-info}/LICENSE +0 -0
- {uk_bin_collection-0.144.2.dist-info → uk_bin_collection-0.144.4.dist-info}/WHEEL +0 -0
- {uk_bin_collection-0.144.2.dist-info → uk_bin_collection-0.144.4.dist-info}/entry_points.txt +0 -0
@@ -126,11 +126,14 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
126
126
|
|
127
127
|
collections = []
|
128
128
|
for bin in collection_data:
|
129
|
+
if not bin["collection"]:
|
130
|
+
continue # Skip if there are no collection dates
|
131
|
+
|
129
132
|
bin_type = bin["containerName"]
|
130
133
|
next_collection = datetime.strptime(
|
131
134
|
bin["collection"][0]["nextCollectionDate"], "%Y-%m-%dT%H:%M:%S"
|
132
135
|
).strftime(date_format)
|
133
|
-
|
136
|
+
|
134
137
|
collections.append((bin_type, next_collection))
|
135
138
|
|
136
139
|
ordered_data = sorted(collections, key=lambda x: x[1])
|
@@ -0,0 +1,273 @@
|
|
1
|
+
import json
|
2
|
+
from datetime import datetime, timedelta
|
3
|
+
from typing import Any, Dict
|
4
|
+
|
5
|
+
from bs4 import BeautifulSoup
|
6
|
+
from dateutil.parser import parse
|
7
|
+
from selenium.common.exceptions import (
|
8
|
+
NoSuchElementException,
|
9
|
+
StaleElementReferenceException,
|
10
|
+
TimeoutException,
|
11
|
+
)
|
12
|
+
from selenium.webdriver.common.by import By
|
13
|
+
from selenium.webdriver.common.keys import Keys
|
14
|
+
from selenium.webdriver.remote.webdriver import WebDriver
|
15
|
+
from selenium.webdriver.support import expected_conditions as EC
|
16
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
17
|
+
|
18
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
19
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
20
|
+
|
21
|
+
# Dictionary mapping day names to their weekday numbers (Monday=0, Sunday=6)
|
22
|
+
DAYS_OF_WEEK = {
|
23
|
+
"Monday": 0,
|
24
|
+
"Tuesday": 1,
|
25
|
+
"Wednesday": 2,
|
26
|
+
"Thursday": 3,
|
27
|
+
"Friday": 4,
|
28
|
+
"Saturday": 5,
|
29
|
+
"Sunday": 6,
|
30
|
+
}
|
31
|
+
|
32
|
+
|
33
|
+
# This function checks for bank holiday collection changes,
|
34
|
+
# but the page seems manually written so might break easily
|
35
|
+
def get_bank_holiday_changes(driver: WebDriver) -> Dict[str, str]:
|
36
|
+
"""Fetch and parse bank holiday collection changes from the council website."""
|
37
|
+
bank_holiday_url = "https://www.hillingdon.gov.uk/bank-holiday-collections"
|
38
|
+
driver.get(bank_holiday_url)
|
39
|
+
|
40
|
+
# Wait for page to load
|
41
|
+
wait = WebDriverWait(driver, 10)
|
42
|
+
wait.until(EC.presence_of_element_located((By.TAG_NAME, "table")))
|
43
|
+
|
44
|
+
# Parse the page
|
45
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
46
|
+
changes: Dict[str, str] = {}
|
47
|
+
|
48
|
+
# Find all tables with collection changes
|
49
|
+
tables = soup.find_all("table")
|
50
|
+
for table in tables:
|
51
|
+
# Check if this is a collection changes table
|
52
|
+
headers = [th.text.strip() for th in table.find_all("th")]
|
53
|
+
if "Normal collection day" in headers and "Revised collection day" in headers:
|
54
|
+
# Process each row
|
55
|
+
for row in table.find_all("tr")[1:]: # Skip header row
|
56
|
+
cols = row.find_all("td")
|
57
|
+
if len(cols) >= 2:
|
58
|
+
normal_date = cols[0].text.strip()
|
59
|
+
revised_date = cols[1].text.strip()
|
60
|
+
|
61
|
+
# Parse dates
|
62
|
+
try:
|
63
|
+
normal_date = parse(normal_date, fuzzy=True).strftime(
|
64
|
+
"%d/%m/%Y"
|
65
|
+
)
|
66
|
+
revised_date = parse(revised_date, fuzzy=True).strftime(
|
67
|
+
"%d/%m/%Y"
|
68
|
+
)
|
69
|
+
changes[normal_date] = revised_date
|
70
|
+
except Exception as e:
|
71
|
+
print(f"Error parsing dates: {e}")
|
72
|
+
continue
|
73
|
+
|
74
|
+
return changes
|
75
|
+
|
76
|
+
|
77
|
+
class CouncilClass(AbstractGetBinDataClass):
|
78
|
+
def parse_data(self, page: str, **kwargs: Any) -> Dict[str, Any]:
|
79
|
+
driver = None
|
80
|
+
try:
|
81
|
+
data: Dict[str, Any] = {"bins": []}
|
82
|
+
user_paon = kwargs.get("paon")
|
83
|
+
user_postcode = kwargs.get("postcode")
|
84
|
+
web_driver = kwargs.get("web_driver")
|
85
|
+
headless = kwargs.get("headless")
|
86
|
+
url = kwargs.get("url")
|
87
|
+
|
88
|
+
check_paon(user_paon)
|
89
|
+
check_postcode(user_postcode)
|
90
|
+
|
91
|
+
driver = create_webdriver(web_driver, headless, None, __name__)
|
92
|
+
driver.get(url)
|
93
|
+
|
94
|
+
# Handle cookie banner if present
|
95
|
+
wait = WebDriverWait(driver, 30)
|
96
|
+
try:
|
97
|
+
cookie_button = wait.until(
|
98
|
+
EC.element_to_be_clickable(
|
99
|
+
(
|
100
|
+
By.CLASS_NAME,
|
101
|
+
"btn btn--cookiemessage btn--cancel btn--contrast",
|
102
|
+
)
|
103
|
+
)
|
104
|
+
)
|
105
|
+
cookie_button.click()
|
106
|
+
except (TimeoutException, NoSuchElementException):
|
107
|
+
pass
|
108
|
+
|
109
|
+
# Enter postcode
|
110
|
+
post_code_input = wait.until(
|
111
|
+
EC.element_to_be_clickable(
|
112
|
+
(
|
113
|
+
By.ID,
|
114
|
+
"WASTECOLLECTIONDAYLOOKUPINCLUDEGARDEN_ADDRESSLOOKUPPOSTCODE",
|
115
|
+
)
|
116
|
+
)
|
117
|
+
)
|
118
|
+
post_code_input.clear()
|
119
|
+
post_code_input.send_keys(user_postcode)
|
120
|
+
post_code_input.send_keys(Keys.TAB + Keys.ENTER)
|
121
|
+
|
122
|
+
# Wait for address options to populate
|
123
|
+
try:
|
124
|
+
# Wait for the address dropdown to be present and clickable
|
125
|
+
address_select = wait.until(
|
126
|
+
EC.presence_of_element_located(
|
127
|
+
(
|
128
|
+
By.ID,
|
129
|
+
"WASTECOLLECTIONDAYLOOKUPINCLUDEGARDEN_ADDRESSLOOKUPADDRESS",
|
130
|
+
)
|
131
|
+
)
|
132
|
+
)
|
133
|
+
|
134
|
+
# Wait for actual address options to appear
|
135
|
+
wait.until(
|
136
|
+
lambda driver: len(driver.find_elements(By.TAG_NAME, "option")) > 1
|
137
|
+
)
|
138
|
+
|
139
|
+
# Find and select address
|
140
|
+
options = address_select.find_elements(By.TAG_NAME, "option")[
|
141
|
+
1:
|
142
|
+
] # Skip placeholder
|
143
|
+
if not options:
|
144
|
+
raise Exception(f"No addresses found for postcode: {user_postcode}")
|
145
|
+
|
146
|
+
# Normalize user input by keeping only alphanumeric characters
|
147
|
+
normalized_user_input = "".join(
|
148
|
+
c for c in user_paon if c.isalnum()
|
149
|
+
).lower()
|
150
|
+
|
151
|
+
# Find matching address in dropdown
|
152
|
+
for option in options:
|
153
|
+
# Normalize option text by keeping only alphanumeric characters
|
154
|
+
normalized_option = "".join(
|
155
|
+
c for c in option.text if c.isalnum()
|
156
|
+
).lower()
|
157
|
+
if normalized_user_input in normalized_option:
|
158
|
+
option.click()
|
159
|
+
break
|
160
|
+
except TimeoutException:
|
161
|
+
raise Exception("Timeout waiting for address options to populate")
|
162
|
+
|
163
|
+
# Wait for collection table and day text
|
164
|
+
wait.until(
|
165
|
+
EC.presence_of_element_located(
|
166
|
+
(By.ID, "WASTECOLLECTIONDAYLOOKUPINCLUDEGARDEN_COLLECTIONTABLE")
|
167
|
+
)
|
168
|
+
)
|
169
|
+
|
170
|
+
# Wait for collection day text to be fully populated
|
171
|
+
wait.until(
|
172
|
+
lambda driver: len(
|
173
|
+
driver.find_element(
|
174
|
+
By.ID, "WASTECOLLECTIONDAYLOOKUPINCLUDEGARDEN_COLLECTIONTABLE"
|
175
|
+
)
|
176
|
+
.find_elements(By.TAG_NAME, "tr")[2]
|
177
|
+
.find_elements(By.TAG_NAME, "td")[1]
|
178
|
+
.text.strip()
|
179
|
+
.split()
|
180
|
+
)
|
181
|
+
> 1
|
182
|
+
)
|
183
|
+
|
184
|
+
# Parse the table
|
185
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
186
|
+
table = soup.find(
|
187
|
+
"div", id="WASTECOLLECTIONDAYLOOKUPINCLUDEGARDEN_COLLECTIONTABLE"
|
188
|
+
).find("table")
|
189
|
+
|
190
|
+
# Get collection day
|
191
|
+
collection_day_text = table.find_all("tr")[2].find_all("td")[1].text.strip()
|
192
|
+
day_of_week = next(
|
193
|
+
(
|
194
|
+
day
|
195
|
+
for day in DAYS_OF_WEEK
|
196
|
+
if day.lower() in collection_day_text.lower()
|
197
|
+
),
|
198
|
+
None,
|
199
|
+
)
|
200
|
+
if not day_of_week:
|
201
|
+
raise Exception(
|
202
|
+
f"Could not determine collection day from text: '{collection_day_text}'"
|
203
|
+
)
|
204
|
+
|
205
|
+
# Calculate next collection date
|
206
|
+
today = datetime.now()
|
207
|
+
days_ahead = (DAYS_OF_WEEK[day_of_week] - today.weekday()) % 7
|
208
|
+
if days_ahead == 0: # If today is collection day, get next week's date
|
209
|
+
days_ahead = 7
|
210
|
+
next_collection = today + timedelta(days=days_ahead)
|
211
|
+
|
212
|
+
# Add collection dates for each bin type
|
213
|
+
bin_types = ["General Waste", "Recycling", "Food Waste"]
|
214
|
+
for bin_type in bin_types:
|
215
|
+
data["bins"].append(
|
216
|
+
{
|
217
|
+
"type": bin_type,
|
218
|
+
"collectionDate": next_collection.strftime("%d/%m/%Y"),
|
219
|
+
}
|
220
|
+
)
|
221
|
+
|
222
|
+
# Process collection details
|
223
|
+
bin_rows = soup.select("div.bin--row:not(:first-child)")
|
224
|
+
for row in bin_rows:
|
225
|
+
try:
|
226
|
+
bin_type = row.select_one("div.col-md-3").text.strip()
|
227
|
+
collection_dates_div = row.select("div.col-md-3")[1]
|
228
|
+
next_collection_text = "".join(
|
229
|
+
collection_dates_div.find_all(text=True, recursive=False)
|
230
|
+
).strip()
|
231
|
+
cleaned_date_text = remove_ordinal_indicator_from_date_string(
|
232
|
+
next_collection_text
|
233
|
+
)
|
234
|
+
parsed_date = parse(cleaned_date_text, fuzzy=True)
|
235
|
+
bin_date = parsed_date.strftime("%d/%m/%Y")
|
236
|
+
|
237
|
+
if bin_type and bin_date:
|
238
|
+
data["bins"].append(
|
239
|
+
{
|
240
|
+
"type": bin_type,
|
241
|
+
"collectionDate": bin_date,
|
242
|
+
}
|
243
|
+
)
|
244
|
+
except Exception as e:
|
245
|
+
print(f"Error processing item: {e}")
|
246
|
+
continue
|
247
|
+
|
248
|
+
# Get bank holiday changes
|
249
|
+
print("\nChecking for bank holiday collection changes...")
|
250
|
+
bank_holiday_changes = get_bank_holiday_changes(driver)
|
251
|
+
|
252
|
+
# Apply any bank holiday changes to collection dates
|
253
|
+
for bin_data in data["bins"]:
|
254
|
+
original_date = bin_data["collectionDate"]
|
255
|
+
if original_date in bank_holiday_changes:
|
256
|
+
new_date = bank_holiday_changes[original_date]
|
257
|
+
print(
|
258
|
+
f"Bank holiday change: {bin_data['type']} collection moved from {original_date} to {new_date}"
|
259
|
+
)
|
260
|
+
bin_data["collectionDate"] = new_date
|
261
|
+
|
262
|
+
except Exception as e:
|
263
|
+
print(f"An error occurred: {e}")
|
264
|
+
raise
|
265
|
+
finally:
|
266
|
+
if driver:
|
267
|
+
driver.quit()
|
268
|
+
|
269
|
+
# Print the final data dictionary for debugging
|
270
|
+
print("\nFinal data dictionary:")
|
271
|
+
print(json.dumps(data, indent=2))
|
272
|
+
|
273
|
+
return data
|
@@ -1,9 +1,11 @@
|
|
1
|
+
uk_bin_collection/Local_Authority_Boundaries.geojson,sha256=_j-hUiL0--t2ewd_s29-j7_AKRlhagRMmOhXyco-B6I,1175922
|
1
2
|
uk_bin_collection/README.rst,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
|
+
uk_bin_collection/map.html,sha256=bBqCQkT4DjBeAL8hrjW5n8UHNxVUqc6XScpGMF60Vzw,3979
|
2
4
|
uk_bin_collection/tests/check_selenium_url_in_input.json.py,sha256=Iecdja0I3XIiY76qmwPgcBqNgYv7n1-b5mg85JpMjg8,7817
|
3
5
|
uk_bin_collection/tests/council_feature_input_parity.py,sha256=DO6Mk4ImYgM5ZCZ-cutwz5RoYYWZRLYx2tr6zIs_9Rc,3843
|
4
6
|
uk_bin_collection/tests/features/environment.py,sha256=VQZjJdJI_kZn08M0j5cUgvKT4k3iTw8icJge1DGOkoA,127
|
5
7
|
uk_bin_collection/tests/features/validate_council_outputs.feature,sha256=SJK-Vc737hrf03tssxxbeg_JIvAH-ddB8f6gU1LTbuQ,251
|
6
|
-
uk_bin_collection/tests/input.json,sha256=
|
8
|
+
uk_bin_collection/tests/input.json,sha256=fU6qt6mRnMu08OcFwdWhYoRbUImiqIUPE8RAYuEQsRA,132316
|
7
9
|
uk_bin_collection/tests/output.schema,sha256=ZwKQBwYyTDEM4G2hJwfLUVM-5v1vKRvRK9W9SS1sd18,1086
|
8
10
|
uk_bin_collection/tests/step_defs/step_helpers/file_handler.py,sha256=Ygzi4V0S1MIHqbdstUlIqtRIwnynvhu4UtpweJ6-5N8,1474
|
9
11
|
uk_bin_collection/tests/step_defs/test_validate_council.py,sha256=VZ0a81sioJULD7syAYHjvK_-nT_Rd36tUyzPetSA0gk,3475
|
@@ -50,7 +52,7 @@ uk_bin_collection/uk_bin_collection/councils/BraintreeDistrictCouncil.py,sha256=
|
|
50
52
|
uk_bin_collection/uk_bin_collection/councils/BrecklandCouncil.py,sha256=PX6A_pDvaN109aSNWmEhm88GFKfkClIkmbwGURWvsks,1744
|
51
53
|
uk_bin_collection/uk_bin_collection/councils/BrentCouncil.py,sha256=ucwokxvASYi_KiOYSOVdaGfC1kfUbII0r6Zl2NE1hnU,4208
|
52
54
|
uk_bin_collection/uk_bin_collection/councils/BrightonandHoveCityCouncil.py,sha256=k6qt4cds-Ejd97Z-__pw2BYvGVbFdc9SUfF73PPrTNA,5823
|
53
|
-
uk_bin_collection/uk_bin_collection/councils/BristolCityCouncil.py,sha256=
|
55
|
+
uk_bin_collection/uk_bin_collection/councils/BristolCityCouncil.py,sha256=nQeRBKrDcZE2m_EzjUBr9dJ5tcUdGcUuA5FcnLkbLr4,5575
|
54
56
|
uk_bin_collection/uk_bin_collection/councils/BroadlandDistrictCouncil.py,sha256=aelqhh503dx6O2EEmC3AT5tnY39Dc53qcouH8T-mek8,7613
|
55
57
|
uk_bin_collection/uk_bin_collection/councils/BromleyBoroughCouncil.py,sha256=dii85JLmYU1uMidCEsWVo3stTcq_QqyC65DxG8u1UmE,4302
|
56
58
|
uk_bin_collection/uk_bin_collection/councils/BromsgroveDistrictCouncil.py,sha256=PUfxP8j5Oh9wFHkdjbrJzQli9UzMHZzwrZ2hkThrvhI,1781
|
@@ -146,8 +148,8 @@ uk_bin_collection/uk_bin_collection/councils/HerefordshireCouncil.py,sha256=JpQh
|
|
146
148
|
uk_bin_collection/uk_bin_collection/councils/HertsmereBoroughCouncil.py,sha256=ZbSsmqHStd2JtTMAq1Bhcvsj1BYp6ijELyOjZFX2GSw,6435
|
147
149
|
uk_bin_collection/uk_bin_collection/councils/HighPeakCouncil.py,sha256=x7dfy8mdt2iGl8qJxHb-uBh4u0knmi9MJ6irOJw9WYA,4805
|
148
150
|
uk_bin_collection/uk_bin_collection/councils/HighlandCouncil.py,sha256=GNxDU65QuZHV5va2IrKtcJ6TQoDdwmV03JvkVqOauP4,3291
|
151
|
+
uk_bin_collection/uk_bin_collection/councils/Hillingdon.py,sha256=R1enDv5gjwCUT3HKgj8C87xWrwvrutAN6XLu5P7tef8,10532
|
149
152
|
uk_bin_collection/uk_bin_collection/councils/HinckleyandBosworthBoroughCouncil.py,sha256=51vXTKrstfJhb7cLCcrsvA9qKCsptyNMZvy7ML9DasM,2344
|
150
|
-
uk_bin_collection/uk_bin_collection/councils/HounslowCouncil.py,sha256=LXhJ47rujx7k3naz0tFiTT1l5k6gAYcVdekJN1t_HLY,4564
|
151
153
|
uk_bin_collection/uk_bin_collection/councils/HullCityCouncil.py,sha256=UHcesBoctFVcXDYuwfag43KbcJcopkEDzJ-54NxtK0Q,1851
|
152
154
|
uk_bin_collection/uk_bin_collection/councils/HuntingdonDistrictCouncil.py,sha256=dGyhhG6HRjQ2SPeiRwUPTGlk9dPIslagV2k0GjEOn1s,1587
|
153
155
|
uk_bin_collection/uk_bin_collection/councils/IpswichBoroughCouncil.py,sha256=57lmDl_FprG68gUhKQYpOa1M2pudyb1utfoMhUXNwzs,2802
|
@@ -330,8 +332,8 @@ uk_bin_collection/uk_bin_collection/councils/YorkCouncil.py,sha256=I2kBYMlsD4bId
|
|
330
332
|
uk_bin_collection/uk_bin_collection/councils/council_class_template/councilclasstemplate.py,sha256=EQWRhZ2pEejlvm0fPyOTsOHKvUZmPnxEYO_OWRGKTjs,1158
|
331
333
|
uk_bin_collection/uk_bin_collection/create_new_council.py,sha256=m-IhmWmeWQlFsTZC4OxuFvtw5ZtB8EAJHxJTH4O59lQ,1536
|
332
334
|
uk_bin_collection/uk_bin_collection/get_bin_data.py,sha256=YvmHfZqanwrJ8ToGch34x-L-7yPe31nB_x77_Mgl_vo,4545
|
333
|
-
uk_bin_collection-0.144.
|
334
|
-
uk_bin_collection-0.144.
|
335
|
-
uk_bin_collection-0.144.
|
336
|
-
uk_bin_collection-0.144.
|
337
|
-
uk_bin_collection-0.144.
|
335
|
+
uk_bin_collection-0.144.4.dist-info/LICENSE,sha256=vABBUOzcrgfaTKpzeo-si9YVEun6juDkndqA8RKdKGs,1071
|
336
|
+
uk_bin_collection-0.144.4.dist-info/METADATA,sha256=C4p4EDGWkZ2ZkKT4U_iTeb7BH5tO15Mtfcqpo4LiHG8,19858
|
337
|
+
uk_bin_collection-0.144.4.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
338
|
+
uk_bin_collection-0.144.4.dist-info/entry_points.txt,sha256=36WCSGMWSc916S3Hi1ZkazzDKHaJ6CD-4fCEFm5MIao,90
|
339
|
+
uk_bin_collection-0.144.4.dist-info/RECORD,,
|
@@ -1,122 +0,0 @@
|
|
1
|
-
import time
|
2
|
-
from datetime import datetime
|
3
|
-
|
4
|
-
from bs4 import BeautifulSoup
|
5
|
-
from selenium.webdriver.common.by import By
|
6
|
-
from selenium.webdriver.support import expected_conditions as EC
|
7
|
-
from selenium.webdriver.support.ui import Select
|
8
|
-
from selenium.webdriver.support.wait import WebDriverWait
|
9
|
-
from selenium.webdriver.common.keys import Keys
|
10
|
-
|
11
|
-
from uk_bin_collection.uk_bin_collection.common import *
|
12
|
-
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
13
|
-
|
14
|
-
|
15
|
-
# import the wonderful Beautiful Soup and the URL grabber
|
16
|
-
class CouncilClass(AbstractGetBinDataClass):
|
17
|
-
"""
|
18
|
-
Concrete classes have to implement all abstract operations of the
|
19
|
-
base class. They can also override some operations with a default
|
20
|
-
implementation.
|
21
|
-
"""
|
22
|
-
|
23
|
-
def parse_date(self, date_str):
|
24
|
-
date_formats = [
|
25
|
-
"This %A - %d %b %Y", # Changed %B to %b to accommodate abbreviated month names
|
26
|
-
"Next %A - %d %b %Y", # Same change here
|
27
|
-
"%A %d %b %Y", # And here
|
28
|
-
]
|
29
|
-
for format in date_formats:
|
30
|
-
try:
|
31
|
-
return datetime.strptime(date_str, format).strftime("%d/%m/%Y")
|
32
|
-
except ValueError:
|
33
|
-
continue
|
34
|
-
raise ValueError(f"Date format not recognized: {date_str}")
|
35
|
-
|
36
|
-
def parse_data(self, page: str, **kwargs) -> dict:
|
37
|
-
driver = None
|
38
|
-
try:
|
39
|
-
# Make a BS4 object
|
40
|
-
|
41
|
-
page = "https://www.hounslow.gov.uk/info/20272/recycling_and_waste_collection_day_finder"
|
42
|
-
|
43
|
-
user_postcode = kwargs.get("postcode")
|
44
|
-
user_uprn = kwargs.get("uprn")
|
45
|
-
user_paon = kwargs.get("paon")
|
46
|
-
web_driver = kwargs.get("web_driver")
|
47
|
-
headless = kwargs.get("headless")
|
48
|
-
|
49
|
-
driver = create_webdriver(web_driver, headless, None, __name__)
|
50
|
-
driver.get(page)
|
51
|
-
|
52
|
-
wait = WebDriverWait(driver, 60)
|
53
|
-
|
54
|
-
inputElement_postcodesearch = wait.until(
|
55
|
-
EC.element_to_be_clickable((By.ID, "Postcode"))
|
56
|
-
)
|
57
|
-
|
58
|
-
inputElement_postcodesearch.send_keys(user_postcode)
|
59
|
-
|
60
|
-
inputElement_postcodesearch_btn = wait.until(
|
61
|
-
EC.element_to_be_clickable((By.ID, "findAddress"))
|
62
|
-
)
|
63
|
-
inputElement_postcodesearch_btn.click()
|
64
|
-
|
65
|
-
inputElement_select_address = wait.until(
|
66
|
-
EC.element_to_be_clickable((By.ID, "UPRN"))
|
67
|
-
)
|
68
|
-
|
69
|
-
select_element = wait.until(
|
70
|
-
EC.visibility_of_element_located((By.ID, "UPRN"))
|
71
|
-
) # Adjust this ID to your element's ID
|
72
|
-
|
73
|
-
# Create a Select object
|
74
|
-
select = Select(select_element)
|
75
|
-
|
76
|
-
# Fetch all options
|
77
|
-
options = select.options
|
78
|
-
|
79
|
-
# Loop through options to find the one that starts with the UPRN
|
80
|
-
for option in options:
|
81
|
-
if option.get_attribute("value").startswith(f"{user_uprn}|"):
|
82
|
-
option.click() # Select the matching option
|
83
|
-
break
|
84
|
-
|
85
|
-
results = wait.until(
|
86
|
-
EC.element_to_be_clickable((By.CLASS_NAME, "bin_day_main_wrapper"))
|
87
|
-
)
|
88
|
-
|
89
|
-
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
90
|
-
soup.prettify()
|
91
|
-
|
92
|
-
# Find all headers which include collection dates
|
93
|
-
collection_headers = soup.find_all("h4")
|
94
|
-
bins_data = []
|
95
|
-
|
96
|
-
# Process each collection date and corresponding bins
|
97
|
-
for header in collection_headers:
|
98
|
-
date_text = header.get_text(strip=True)
|
99
|
-
collection_date = self.parse_date(date_text)
|
100
|
-
|
101
|
-
# Get next sibling which should be the list of bins
|
102
|
-
bin_list = header.find_next_sibling("ul")
|
103
|
-
if bin_list:
|
104
|
-
for item in bin_list.find_all("li", class_="list-group-item"):
|
105
|
-
bin_type = item.get_text(strip=True)
|
106
|
-
bins_data.append(
|
107
|
-
{"type": bin_type, "collectionDate": collection_date}
|
108
|
-
)
|
109
|
-
|
110
|
-
# Construct the final JSON object
|
111
|
-
json_data = {"bins": bins_data}
|
112
|
-
|
113
|
-
except Exception as e:
|
114
|
-
# Here you can log the exception if needed
|
115
|
-
print(f"An error occurred: {e}")
|
116
|
-
# Optionally, re-raise the exception if you want it to propagate
|
117
|
-
raise
|
118
|
-
finally:
|
119
|
-
# This block ensures that the driver is closed regardless of an exception
|
120
|
-
if driver:
|
121
|
-
driver.quit()
|
122
|
-
return json_data
|
File without changes
|
File without changes
|
{uk_bin_collection-0.144.2.dist-info → uk_bin_collection-0.144.4.dist-info}/entry_points.txt
RENAMED
File without changes
|