uk_bin_collection 0.154.0__py3-none-any.whl → 0.158.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/tests/input.json +21 -10
- uk_bin_collection/uk_bin_collection/councils/AberdeenCityCouncil.py +0 -1
- uk_bin_collection/uk_bin_collection/councils/DacorumBoroughCouncil.py +22 -13
- uk_bin_collection/uk_bin_collection/councils/EastDunbartonshireCouncil.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/IslingtonCouncil.py +8 -5
- uk_bin_collection/uk_bin_collection/councils/LancasterCityCouncil.py +23 -10
- uk_bin_collection/uk_bin_collection/councils/LondonBoroughSutton.py +60 -49
- uk_bin_collection/uk_bin_collection/councils/MidSuffolkDistrictCouncil.py +70 -92
- uk_bin_collection/uk_bin_collection/councils/NewForestCouncil.py +104 -47
- uk_bin_collection/uk_bin_collection/councils/NewportCityCouncil.py +138 -21
- uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py +182 -3
- uk_bin_collection/uk_bin_collection/councils/OxfordCityCouncil.py +1 -0
- uk_bin_collection/uk_bin_collection/councils/RenfrewshireCouncil.py +170 -13
- uk_bin_collection/uk_bin_collection/councils/RotherhamCouncil.py +70 -38
- uk_bin_collection/uk_bin_collection/councils/SomersetCouncil.py +136 -21
- uk_bin_collection/uk_bin_collection/councils/SouthGloucestershireCouncil.py +18 -22
- uk_bin_collection/uk_bin_collection/councils/TestValleyBoroughCouncil.py +138 -21
- {uk_bin_collection-0.154.0.dist-info → uk_bin_collection-0.158.0.dist-info}/METADATA +1 -1
- {uk_bin_collection-0.154.0.dist-info → uk_bin_collection-0.158.0.dist-info}/RECORD +22 -21
- {uk_bin_collection-0.154.0.dist-info → uk_bin_collection-0.158.0.dist-info}/LICENSE +0 -0
- {uk_bin_collection-0.154.0.dist-info → uk_bin_collection-0.158.0.dist-info}/WHEEL +0 -0
- {uk_bin_collection-0.154.0.dist-info → uk_bin_collection-0.158.0.dist-info}/entry_points.txt +0 -0
@@ -1,16 +1,14 @@
|
|
1
1
|
import logging
|
2
|
-
import pickle
|
3
2
|
import time
|
4
3
|
|
5
|
-
import requests
|
6
4
|
from bs4 import BeautifulSoup
|
7
5
|
from selenium import webdriver
|
6
|
+
from selenium.common.exceptions import NoSuchElementException
|
8
7
|
from selenium.webdriver.common.by import By
|
9
8
|
from selenium.webdriver.common.keys import Keys
|
10
9
|
from selenium.webdriver.support import expected_conditions as EC
|
11
10
|
from selenium.webdriver.support.ui import Select
|
12
11
|
from selenium.webdriver.support.wait import WebDriverWait
|
13
|
-
from uk_bin_collection.uk_bin_collection.common import *
|
14
12
|
|
15
13
|
from uk_bin_collection.uk_bin_collection.common import *
|
16
14
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
@@ -23,17 +21,64 @@ logging.basicConfig(
|
|
23
21
|
|
24
22
|
class CouncilClass(AbstractGetBinDataClass):
|
25
23
|
|
24
|
+
def get_legacy_bins(self, page: str) -> []:
|
25
|
+
|
26
|
+
logging.info("Extracting legacy bin collection data")
|
27
|
+
soup = BeautifulSoup(page, features="html.parser")
|
28
|
+
legacy_bins = []
|
29
|
+
|
30
|
+
# Rubbish and recycling
|
31
|
+
rubbish_recycling = soup.find(
|
32
|
+
"span", class_="CTID-77-_ eb-77-Override-textControl"
|
33
|
+
)
|
34
|
+
if rubbish_recycling:
|
35
|
+
match = re.search(r"collected weekly on (\w+)", rubbish_recycling.text)
|
36
|
+
if match:
|
37
|
+
day_name = match.group(1)
|
38
|
+
next_collection = get_next_day_of_week(day_name)
|
39
|
+
legacy_bins.append(
|
40
|
+
{
|
41
|
+
"type": "Rubbish and recycling",
|
42
|
+
"collectionDate": next_collection,
|
43
|
+
}
|
44
|
+
)
|
45
|
+
logging.info(f"Rubbish and Recycling: {str(next_collection)}")
|
46
|
+
|
47
|
+
# Glass collection
|
48
|
+
glass_collection = soup.find("span", class_="CTID-78-_ eb-78-textControl")
|
49
|
+
if glass_collection:
|
50
|
+
match = re.search(
|
51
|
+
r"next collection is\s+(\d{2}/\d{2}/\d{4})", glass_collection.text
|
52
|
+
)
|
53
|
+
if match:
|
54
|
+
legacy_bins.append(
|
55
|
+
{"type": "Glass collection", "collectionDate": match.group(1)}
|
56
|
+
)
|
57
|
+
logging.info(f"Glass: {str(match.group(1))}")
|
58
|
+
|
59
|
+
# Garden waste
|
60
|
+
garden_waste = soup.find("div", class_="eb-2HIpCnWC-Override-EditorInput")
|
61
|
+
if garden_waste:
|
62
|
+
match = re.search(r"(\d{2}/\d{2}/\d{4})", garden_waste.text)
|
63
|
+
if match:
|
64
|
+
legacy_bins.append(
|
65
|
+
{"type": "Garden waste", "collectionDate": match.group(1)}
|
66
|
+
)
|
67
|
+
logging.info(f"Garden: {str(match.group(1))}")
|
68
|
+
|
69
|
+
# return bins
|
70
|
+
return legacy_bins
|
71
|
+
|
26
72
|
def parse_data(self, page: str, **kwargs) -> dict:
|
27
73
|
driver = None
|
28
74
|
try:
|
29
|
-
|
30
|
-
collections = []
|
75
|
+
bins = []
|
31
76
|
user_uprn = kwargs.get("uprn")
|
32
77
|
user_postcode = kwargs.get("postcode")
|
33
78
|
web_driver = kwargs.get("web_driver")
|
34
79
|
headless = kwargs.get("headless")
|
35
80
|
check_postcode(user_postcode)
|
36
|
-
url = "https://forms.newforest.gov.uk/ufs/
|
81
|
+
url = "https://forms.newforest.gov.uk/ufs/FIND_MY_BIN_BAR.eb"
|
37
82
|
|
38
83
|
# Get session cookies using requests
|
39
84
|
|
@@ -52,10 +97,20 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
52
97
|
|
53
98
|
logging.info("Entering postcode")
|
54
99
|
input_element_postcode = wait.until(
|
55
|
-
EC.
|
100
|
+
EC.element_to_be_clickable(
|
101
|
+
(By.XPATH, '//input[@id="CTID-JmLqCKl2-_-A"]')
|
102
|
+
)
|
103
|
+
)
|
104
|
+
|
105
|
+
driver.execute_script(
|
106
|
+
"arguments[0].scrollIntoView();", input_element_postcode
|
56
107
|
)
|
57
108
|
|
58
|
-
|
109
|
+
logging.info(f"Entering postcode '{str(user_postcode)}'")
|
110
|
+
# Force the value through the DOM cos send_keys just don't work for some reason :(
|
111
|
+
driver.execute_script(
|
112
|
+
f"arguments[0].value='{str(user_postcode)}'", input_element_postcode
|
113
|
+
)
|
59
114
|
|
60
115
|
logging.info("Searching for postcode")
|
61
116
|
input_element_postcode_btn = wait.until(
|
@@ -66,7 +121,9 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
66
121
|
|
67
122
|
logging.info("Waiting for address dropdown")
|
68
123
|
input_element_postcode_dropdown = wait.until(
|
69
|
-
EC.
|
124
|
+
EC.element_to_be_clickable(
|
125
|
+
(By.XPATH, '//select[@id="CTID-KOeKcmrC-_-A"]')
|
126
|
+
)
|
70
127
|
)
|
71
128
|
|
72
129
|
logging.info("Selecting address")
|
@@ -86,51 +143,51 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
86
143
|
|
87
144
|
input_element_address_btn.click()
|
88
145
|
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
146
|
+
# Be patient, clicks take time!
|
147
|
+
time.sleep(2)
|
148
|
+
# logging.info(driver.page_source)
|
149
|
+
|
150
|
+
try:
|
151
|
+
link_element = driver.find_element(
|
152
|
+
By.XPATH,
|
153
|
+
'//a[contains(text(),"Find your current bin collection day")]',
|
154
|
+
)
|
155
|
+
logging.info(
|
156
|
+
"Found override panel span, search for link and use old logic"
|
93
157
|
)
|
94
|
-
)
|
95
158
|
|
96
|
-
|
97
|
-
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
98
|
-
bins = []
|
159
|
+
link_element.click()
|
99
160
|
|
100
|
-
|
101
|
-
|
102
|
-
"span", class_="CTID-77-_ eb-77-Override-textControl"
|
103
|
-
)
|
104
|
-
if rubbish_recycling:
|
105
|
-
match = re.search(r"collected weekly on (\w+)", rubbish_recycling.text)
|
106
|
-
if match:
|
107
|
-
day_name = match.group(1)
|
108
|
-
next_collection = get_next_day_of_week(day_name)
|
109
|
-
bins.append(
|
110
|
-
{
|
111
|
-
"type": "Rubbish and recycling",
|
112
|
-
"collectionDate": next_collection,
|
113
|
-
}
|
114
|
-
)
|
161
|
+
# Be patient, clicks take time!
|
162
|
+
time.sleep(2)
|
115
163
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
164
|
+
bins = self.get_legacy_bins(driver.page_source)
|
165
|
+
|
166
|
+
except NoSuchElementException:
|
167
|
+
logging.info("Waiting for bin collection table")
|
168
|
+
collections_table = wait.until(
|
169
|
+
EC.presence_of_element_located(
|
170
|
+
(
|
171
|
+
By.XPATH,
|
172
|
+
'//table[contains(@class,"eb-1j4UaesZ-tableContent")]',
|
173
|
+
)
|
125
174
|
)
|
175
|
+
)
|
126
176
|
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
177
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
178
|
+
rows = soup.find_all(class_="eb-1j4UaesZ-tableRow")
|
179
|
+
|
180
|
+
for row in rows:
|
181
|
+
cols = row.find_all("td")
|
182
|
+
date_string = cols[0].findChild("div").findChild("div").get_text()
|
183
|
+
bin_type = cols[1].findChild("div").findChild("div").get_text()
|
184
|
+
|
185
|
+
col_date = datetime.strptime(date_string, "%A %B %d, %Y")
|
132
186
|
bins.append(
|
133
|
-
{
|
187
|
+
{
|
188
|
+
"type": bin_type,
|
189
|
+
"collectionDate": datetime.strftime(col_date, date_format),
|
190
|
+
}
|
134
191
|
)
|
135
192
|
|
136
193
|
return {"bins": bins}
|
@@ -1,4 +1,9 @@
|
|
1
|
+
import datetime
|
2
|
+
|
1
3
|
from bs4 import BeautifulSoup
|
4
|
+
from selenium.webdriver.common.by import By
|
5
|
+
from selenium.webdriver.support import expected_conditions as EC
|
6
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
2
7
|
|
3
8
|
from uk_bin_collection.uk_bin_collection.common import *
|
4
9
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
@@ -13,6 +18,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
13
18
|
"""
|
14
19
|
|
15
20
|
def parse_data(self, page: str, **kwargs) -> dict:
|
21
|
+
<<<<<<< HEAD
|
16
22
|
user_postcode = kwargs.get("postcode")
|
17
23
|
check_postcode(user_postcode)
|
18
24
|
user_uprn = kwargs.get("uprn")
|
@@ -43,10 +49,16 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
43
49
|
i["data-for"]: i.get("value", "")
|
44
50
|
for i in soup.select("input[data-for]")
|
45
51
|
}
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
)
|
52
|
+
|
53
|
+
# Check if required form elements exist
|
54
|
+
salt_element = soup.select_one('input[id="pSalt"]')
|
55
|
+
protected_element = soup.select_one('input[id="pPageItemsProtected"]')
|
56
|
+
|
57
|
+
if not salt_element or not protected_element:
|
58
|
+
raise Exception("Required form elements not found. The council website may have changed or be unavailable.")
|
59
|
+
|
60
|
+
payload_salt = salt_element.get("value")
|
61
|
+
payload_protected = protected_element.get("value")
|
50
62
|
|
51
63
|
# Add the PostCode and 'SEARCH' to the payload
|
52
64
|
payload["p_request"] = "SEARCH"
|
@@ -123,10 +135,16 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
123
135
|
i["data-for"]: i.get("value", "")
|
124
136
|
for i in soup.select("input[data-for]")
|
125
137
|
}
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
)
|
138
|
+
|
139
|
+
# Check if required form elements exist
|
140
|
+
salt_element = soup.select_one('input[id="pSalt"]')
|
141
|
+
protected_element = soup.select_one('input[id="pPageItemsProtected"]')
|
142
|
+
|
143
|
+
if not salt_element or not protected_element:
|
144
|
+
raise Exception("Required form elements not found. The council website may have changed or be unavailable.")
|
145
|
+
|
146
|
+
payload_salt = salt_element.get("value")
|
147
|
+
payload_protected = protected_element.get("value")
|
130
148
|
|
131
149
|
# Add the UPRN and 'SUBMIT' to the payload
|
132
150
|
payload["p_request"] = "SUBMIT"
|
@@ -187,18 +205,117 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
187
205
|
|
188
206
|
# Create a BeautifulSoup object from the page's HTML
|
189
207
|
soup = BeautifulSoup(resource.text, "html.parser")
|
208
|
+
=======
|
209
|
+
driver = None
|
210
|
+
try:
|
211
|
+
>>>>>>> master
|
190
212
|
data = {"bins": []}
|
213
|
+
url = kwargs.get("url")
|
214
|
+
user_paon = kwargs.get("paon")
|
215
|
+
user_postcode = kwargs.get("postcode")
|
216
|
+
web_driver = kwargs.get("web_driver")
|
217
|
+
headless = kwargs.get("headless")
|
218
|
+
check_paon(user_paon)
|
219
|
+
check_postcode(user_postcode)
|
220
|
+
|
221
|
+
# Use a realistic user agent to help bypass Cloudflare
|
222
|
+
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
223
|
+
driver = create_webdriver(web_driver, headless, user_agent, __name__)
|
224
|
+
driver.get(
|
225
|
+
"https://iportal.itouchvision.com/icollectionday/collection-day/?uuid=6CDD2A34C912312074D8E2410531401A8C00EFF7&lang=en"
|
226
|
+
)
|
227
|
+
|
228
|
+
# Wait for the postcode field to appear then populate it
|
229
|
+
inputElement_postcode = WebDriverWait(driver, 30).until(
|
230
|
+
EC.presence_of_element_located((By.ID, "postcodeSearch"))
|
231
|
+
)
|
232
|
+
inputElement_postcode.send_keys(user_postcode)
|
233
|
+
|
234
|
+
# Click search button
|
235
|
+
findAddress = WebDriverWait(driver, 10).until(
|
236
|
+
EC.presence_of_element_located((By.CLASS_NAME, "govuk-button"))
|
237
|
+
)
|
238
|
+
findAddress.click()
|
239
|
+
|
240
|
+
# Wait for the 'Select address' dropdown to appear and select option matching the house name/number
|
241
|
+
WebDriverWait(driver, 10).until(
|
242
|
+
EC.element_to_be_clickable(
|
243
|
+
(
|
244
|
+
By.XPATH,
|
245
|
+
"//select[@id='addressSelect']//option[contains(., '"
|
246
|
+
+ user_paon
|
247
|
+
+ "')]",
|
248
|
+
)
|
249
|
+
)
|
250
|
+
).click()
|
251
|
+
|
252
|
+
# Wait for the collections table to appear
|
253
|
+
WebDriverWait(driver, 20).until(
|
254
|
+
EC.presence_of_element_located(
|
255
|
+
(
|
256
|
+
By.XPATH,
|
257
|
+
"//h2[contains(@class,'mt-4') and contains(@class,'govuk-heading-s') and normalize-space(.)='Your next collections']",
|
258
|
+
)
|
259
|
+
)
|
260
|
+
)
|
261
|
+
|
262
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
263
|
+
|
264
|
+
collections = soup.find_all("div", {"class": "p-2"})
|
265
|
+
|
266
|
+
for collection in collections:
|
267
|
+
bin_type = collection.find("h3").get_text()
|
268
|
+
|
269
|
+
next_collection = soup.find("div", {"class": "fw-bold"}).get_text()
|
270
|
+
|
271
|
+
following_collection = soup.find(
|
272
|
+
lambda t: (
|
273
|
+
t.name == "div"
|
274
|
+
and t.get_text(strip=True).lower().startswith("followed by")
|
275
|
+
)
|
276
|
+
).get_text()
|
277
|
+
|
278
|
+
next_collection_date = datetime.strptime(next_collection, "%A %d %B")
|
279
|
+
|
280
|
+
following_collection_date = datetime.strptime(
|
281
|
+
following_collection, "followed by %A %d %B"
|
282
|
+
)
|
283
|
+
|
284
|
+
current_date = datetime.now()
|
285
|
+
next_collection_date = next_collection_date.replace(
|
286
|
+
year=current_date.year
|
287
|
+
)
|
288
|
+
following_collection_date = following_collection_date.replace(
|
289
|
+
year=current_date.year
|
290
|
+
)
|
291
|
+
|
292
|
+
next_collection_date = get_next_occurrence_from_day_month(
|
293
|
+
next_collection_date
|
294
|
+
)
|
295
|
+
|
296
|
+
following_collection_date = get_next_occurrence_from_day_month(
|
297
|
+
following_collection_date
|
298
|
+
)
|
299
|
+
|
300
|
+
dict_data = {
|
301
|
+
"type": bin_type,
|
302
|
+
"collectionDate": next_collection_date.strftime(date_format),
|
303
|
+
}
|
304
|
+
data["bins"].append(dict_data)
|
305
|
+
|
306
|
+
dict_data = {
|
307
|
+
"type": bin_type,
|
308
|
+
"collectionDate": following_collection_date.strftime(date_format),
|
309
|
+
}
|
310
|
+
data["bins"].append(dict_data)
|
191
311
|
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
data["bins"].append(dict_data)
|
203
|
-
|
204
|
-
return data
|
312
|
+
except Exception as e:
|
313
|
+
# Here you can log the exception if needed
|
314
|
+
print(f"An error occurred: {e}")
|
315
|
+
# Optionally, re-raise the exception if you want it to propagate
|
316
|
+
raise
|
317
|
+
finally:
|
318
|
+
# This block ensures that the driver is closed regardless of an exception
|
319
|
+
if driver:
|
320
|
+
driver.quit()
|
321
|
+
return data
|
@@ -30,7 +30,12 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
30
30
|
def parse_data(self, page: str, **kwargs) -> dict:
|
31
31
|
driver = None
|
32
32
|
try:
|
33
|
+
<<<<<<< HEAD
|
34
|
+
# Use the new URL as mentioned in the issue
|
35
|
+
page = "http://bincollection.northumberland.gov.uk"
|
36
|
+
=======
|
33
37
|
page = "https://bincollection.northumberland.gov.uk/postcode"
|
38
|
+
>>>>>>> master
|
34
39
|
|
35
40
|
data = {"bins": []}
|
36
41
|
|
@@ -50,6 +55,182 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
50
55
|
# Create wait object
|
51
56
|
wait = WebDriverWait(driver, 20)
|
52
57
|
|
58
|
+
<<<<<<< HEAD
|
59
|
+
# The new site may have different structure, so we'll need to adapt
|
60
|
+
# Try to find postcode and house number inputs
|
61
|
+
try:
|
62
|
+
# Look for postcode input field
|
63
|
+
postcode_input = wait.until(
|
64
|
+
EC.presence_of_element_located(
|
65
|
+
(By.XPATH, "//input[contains(@name, 'postcode') or contains(@id, 'postcode') or contains(@placeholder, 'postcode')]")
|
66
|
+
)
|
67
|
+
)
|
68
|
+
|
69
|
+
# Look for house number input field
|
70
|
+
house_input = wait.until(
|
71
|
+
EC.presence_of_element_located(
|
72
|
+
(By.XPATH, "//input[contains(@name, 'house') or contains(@id, 'house') or contains(@name, 'number') or contains(@placeholder, 'house')]")
|
73
|
+
)
|
74
|
+
)
|
75
|
+
|
76
|
+
# Enter details
|
77
|
+
postcode_input.send_keys(user_postcode)
|
78
|
+
house_input.send_keys(user_paon)
|
79
|
+
|
80
|
+
# Look for submit button
|
81
|
+
submit_button = wait.until(
|
82
|
+
EC.element_to_be_clickable(
|
83
|
+
(By.XPATH, "//button[@type='submit'] | //input[@type='submit'] | //button[contains(text(), 'Search')] | //input[contains(@value, 'Search')]")
|
84
|
+
)
|
85
|
+
)
|
86
|
+
submit_button.click()
|
87
|
+
|
88
|
+
# Wait for results to load
|
89
|
+
time.sleep(3)
|
90
|
+
|
91
|
+
# Get page source after everything has loaded
|
92
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
93
|
+
|
94
|
+
# Look for collection dates and bin types in the results
|
95
|
+
# This is a generic approach that looks for common patterns
|
96
|
+
import re
|
97
|
+
from datetime import datetime
|
98
|
+
|
99
|
+
# Look for date patterns in the page
|
100
|
+
date_pattern = r'\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b|\b\d{1,2}\s+(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\w*\s+\d{2,4}\b'
|
101
|
+
page_text = soup.get_text()
|
102
|
+
dates = re.findall(date_pattern, page_text, re.IGNORECASE)
|
103
|
+
|
104
|
+
# Look for bin type keywords near dates
|
105
|
+
bin_keywords = ['recycling', 'refuse', 'garden', 'waste', 'rubbish', 'general', 'household']
|
106
|
+
|
107
|
+
# Try to extract structured data from tables or lists
|
108
|
+
tables = soup.find_all('table')
|
109
|
+
for table in tables:
|
110
|
+
rows = table.find_all('tr')
|
111
|
+
for row in rows:
|
112
|
+
cells = row.find_all(['td', 'th'])
|
113
|
+
if len(cells) >= 2:
|
114
|
+
# Look for date in first cell and bin type in second
|
115
|
+
date_text = cells[0].get_text().strip()
|
116
|
+
type_text = cells[1].get_text().strip()
|
117
|
+
|
118
|
+
# Try to parse date
|
119
|
+
try:
|
120
|
+
if re.match(r'\d{1,2}[/-]\d{1,2}[/-]\d{2,4}', date_text):
|
121
|
+
date_obj = datetime.strptime(date_text, '%d/%m/%Y')
|
122
|
+
elif re.match(r'\d{1,2}\s+\w+\s+\d{4}', date_text):
|
123
|
+
date_obj = datetime.strptime(date_text, '%d %B %Y')
|
124
|
+
else:
|
125
|
+
continue
|
126
|
+
|
127
|
+
if any(keyword in type_text.lower() for keyword in bin_keywords):
|
128
|
+
data["bins"].append({
|
129
|
+
"type": type_text,
|
130
|
+
"collectionDate": date_obj.strftime(date_format)
|
131
|
+
})
|
132
|
+
except ValueError:
|
133
|
+
continue
|
134
|
+
|
135
|
+
except TimeoutException:
|
136
|
+
# If the new site structure is completely different, fall back to old URL
|
137
|
+
driver.get("https://www.northumberland.gov.uk/Waste/Household-waste/Household-bin-collections/Bin-Calendars.aspx")
|
138
|
+
|
139
|
+
# Wait for and click cookie button if present
|
140
|
+
try:
|
141
|
+
cookie_button = wait.until(
|
142
|
+
EC.element_to_be_clickable((By.ID, "ccc-notify-accept"))
|
143
|
+
)
|
144
|
+
cookie_button.click()
|
145
|
+
except TimeoutException:
|
146
|
+
pass
|
147
|
+
|
148
|
+
# Continue with original logic for old site
|
149
|
+
inputElement_hn = wait.until(
|
150
|
+
EC.presence_of_element_located(
|
151
|
+
(
|
152
|
+
By.ID,
|
153
|
+
"p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_txtHouse",
|
154
|
+
)
|
155
|
+
)
|
156
|
+
)
|
157
|
+
|
158
|
+
inputElement_pc = wait.until(
|
159
|
+
EC.presence_of_element_located(
|
160
|
+
(
|
161
|
+
By.ID,
|
162
|
+
"p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_txtPostcode",
|
163
|
+
)
|
164
|
+
)
|
165
|
+
)
|
166
|
+
|
167
|
+
inputElement_pc.send_keys(user_postcode)
|
168
|
+
inputElement_hn.send_keys(user_paon)
|
169
|
+
|
170
|
+
lookup_button = wait.until(
|
171
|
+
EC.element_to_be_clickable(
|
172
|
+
(
|
173
|
+
By.ID,
|
174
|
+
"p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_butLookup",
|
175
|
+
)
|
176
|
+
)
|
177
|
+
)
|
178
|
+
lookup_button.click()
|
179
|
+
|
180
|
+
route_summary = wait.until(
|
181
|
+
EC.presence_of_element_located(
|
182
|
+
(
|
183
|
+
By.ID,
|
184
|
+
"p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_spanRouteSummary",
|
185
|
+
)
|
186
|
+
)
|
187
|
+
)
|
188
|
+
|
189
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
190
|
+
|
191
|
+
bins_collected = list(
|
192
|
+
map(
|
193
|
+
str.strip,
|
194
|
+
soup.find(
|
195
|
+
"span",
|
196
|
+
id="p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_spanRouteSummary",
|
197
|
+
)
|
198
|
+
.text.replace("Routes found: ", "")
|
199
|
+
.split(","),
|
200
|
+
)
|
201
|
+
)
|
202
|
+
|
203
|
+
bins_by_colours = dict()
|
204
|
+
for bin in bins_collected:
|
205
|
+
if "(but no dates found)" in bin:
|
206
|
+
continue
|
207
|
+
style_str = soup.find("span", string=bin)["style"]
|
208
|
+
bin_colour = self.extract_styles(style_str)["background-color"].upper()
|
209
|
+
bins_by_colours[bin_colour] = bin
|
210
|
+
|
211
|
+
calander_tables = soup.find_all("table", title="Calendar")
|
212
|
+
for table in calander_tables:
|
213
|
+
rows = table.find_all("tr")
|
214
|
+
month_and_year = (
|
215
|
+
rows[0].find("table", class_="calCtrlTitle").find("td").string
|
216
|
+
)
|
217
|
+
bin_days = table.find_all("td", class_="calCtrlDay")
|
218
|
+
for day in bin_days:
|
219
|
+
day_styles = self.extract_styles(day["style"])
|
220
|
+
if "background-color" in day_styles:
|
221
|
+
colour = day_styles["background-color"].upper()
|
222
|
+
date = time.strptime(
|
223
|
+
f"{day.string} {month_and_year}", "%d %B %Y"
|
224
|
+
)
|
225
|
+
|
226
|
+
data["bins"].append(
|
227
|
+
{
|
228
|
+
"type": bins_by_colours[colour],
|
229
|
+
"collectionDate": time.strftime(date_format, date),
|
230
|
+
}
|
231
|
+
)
|
232
|
+
|
233
|
+
=======
|
53
234
|
# Wait for and click cookie button
|
54
235
|
cookie_button = wait.until(
|
55
236
|
EC.element_to_be_clickable(
|
@@ -133,13 +314,11 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
133
314
|
"collectionDate": time.strftime(date_format, collection_date),
|
134
315
|
}
|
135
316
|
)
|
317
|
+
>>>>>>> master
|
136
318
|
except Exception as e:
|
137
|
-
# Here you can log the exception if needed
|
138
319
|
print(f"An error occurred: {e}")
|
139
|
-
# Optionally, re-raise the exception if you want it to propagate
|
140
320
|
raise
|
141
321
|
finally:
|
142
|
-
# This block ensures that the driver is closed regardless of an exception
|
143
322
|
if driver:
|
144
323
|
driver.quit()
|
145
324
|
return data
|
@@ -25,6 +25,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
25
25
|
URI = "https://www.oxford.gov.uk/xfp/form/142#q6ad4e3bf432c83230a0347a6eea6c805c672efeb_0"
|
26
26
|
|
27
27
|
session = requests.Session()
|
28
|
+
session.headers.update({'User-Agent': 'HomeAssistant UK Bin Collection integration'})
|
28
29
|
token_response = session.get(session_uri)
|
29
30
|
soup = BeautifulSoup(token_response.text, "html.parser")
|
30
31
|
token = soup.find("input", {"name": "__token"}).attrs["value"]
|