uk_bin_collection 0.153.0__py3-none-any.whl → 0.157.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/tests/input.json +34 -25
- uk_bin_collection/uk_bin_collection/councils/AberdeenCityCouncil.py +0 -1
- uk_bin_collection/uk_bin_collection/councils/BCPCouncil.py +45 -120
- uk_bin_collection/uk_bin_collection/councils/BasingstokeCouncil.py +4 -1
- uk_bin_collection/uk_bin_collection/councils/BrightonandHoveCityCouncil.py +15 -36
- uk_bin_collection/uk_bin_collection/councils/CastlepointDistrictCouncil.py +55 -24
- uk_bin_collection/uk_bin_collection/councils/DacorumBoroughCouncil.py +22 -13
- uk_bin_collection/uk_bin_collection/councils/EastDunbartonshireCouncil.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/ErewashBoroughCouncil.py +32 -34
- uk_bin_collection/uk_bin_collection/councils/FarehamBoroughCouncil.py +5 -2
- uk_bin_collection/uk_bin_collection/councils/FolkstoneandHytheDistrictCouncil.py +22 -0
- uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py +1 -1
- uk_bin_collection/uk_bin_collection/councils/HartlepoolBoroughCouncil.py +3 -1
- uk_bin_collection/uk_bin_collection/councils/IslingtonCouncil.py +8 -5
- uk_bin_collection/uk_bin_collection/councils/LancasterCityCouncil.py +23 -10
- uk_bin_collection/uk_bin_collection/councils/MidSuffolkDistrictCouncil.py +70 -92
- uk_bin_collection/uk_bin_collection/councils/NewForestCouncil.py +104 -47
- uk_bin_collection/uk_bin_collection/councils/NewportCityCouncil.py +138 -21
- uk_bin_collection/uk_bin_collection/councils/NorthHertfordshireDistrictCouncil.py +26 -128
- uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py +245 -82
- uk_bin_collection/uk_bin_collection/councils/OxfordCityCouncil.py +1 -0
- uk_bin_collection/uk_bin_collection/councils/RenfrewshireCouncil.py +170 -13
- uk_bin_collection/uk_bin_collection/councils/RotherhamCouncil.py +70 -38
- uk_bin_collection/uk_bin_collection/councils/RushmoorCouncil.py +4 -2
- uk_bin_collection/uk_bin_collection/councils/SandwellBoroughCouncil.py +4 -11
- uk_bin_collection/uk_bin_collection/councils/SloughBoroughCouncil.py +39 -21
- uk_bin_collection/uk_bin_collection/councils/SomersetCouncil.py +136 -21
- uk_bin_collection/uk_bin_collection/councils/SouthGloucestershireCouncil.py +18 -22
- uk_bin_collection/uk_bin_collection/councils/TestValleyBoroughCouncil.py +138 -21
- uk_bin_collection/uk_bin_collection/councils/WestBerkshireCouncil.py +16 -13
- {uk_bin_collection-0.153.0.dist-info → uk_bin_collection-0.157.0.dist-info}/METADATA +1 -1
- {uk_bin_collection-0.153.0.dist-info → uk_bin_collection-0.157.0.dist-info}/RECORD +35 -34
- {uk_bin_collection-0.153.0.dist-info → uk_bin_collection-0.157.0.dist-info}/LICENSE +0 -0
- {uk_bin_collection-0.153.0.dist-info → uk_bin_collection-0.157.0.dist-info}/WHEEL +0 -0
- {uk_bin_collection-0.153.0.dist-info → uk_bin_collection-0.157.0.dist-info}/entry_points.txt +0 -0
@@ -1,4 +1,9 @@
|
|
1
|
+
import datetime
|
2
|
+
|
1
3
|
from bs4 import BeautifulSoup
|
4
|
+
from selenium.webdriver.common.by import By
|
5
|
+
from selenium.webdriver.support import expected_conditions as EC
|
6
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
2
7
|
|
3
8
|
from uk_bin_collection.uk_bin_collection.common import *
|
4
9
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
@@ -13,6 +18,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
13
18
|
"""
|
14
19
|
|
15
20
|
def parse_data(self, page: str, **kwargs) -> dict:
|
21
|
+
<<<<<<< HEAD
|
16
22
|
user_postcode = kwargs.get("postcode")
|
17
23
|
check_postcode(user_postcode)
|
18
24
|
user_uprn = kwargs.get("uprn")
|
@@ -43,10 +49,16 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
43
49
|
i["data-for"]: i.get("value", "")
|
44
50
|
for i in soup.select("input[data-for]")
|
45
51
|
}
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
)
|
52
|
+
|
53
|
+
# Check if required form elements exist
|
54
|
+
salt_element = soup.select_one('input[id="pSalt"]')
|
55
|
+
protected_element = soup.select_one('input[id="pPageItemsProtected"]')
|
56
|
+
|
57
|
+
if not salt_element or not protected_element:
|
58
|
+
raise Exception("Required form elements not found. The council website may have changed or be unavailable.")
|
59
|
+
|
60
|
+
payload_salt = salt_element.get("value")
|
61
|
+
payload_protected = protected_element.get("value")
|
50
62
|
|
51
63
|
# Add the PostCode and 'SEARCH' to the payload
|
52
64
|
payload["p_request"] = "SEARCH"
|
@@ -123,10 +135,16 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
123
135
|
i["data-for"]: i.get("value", "")
|
124
136
|
for i in soup.select("input[data-for]")
|
125
137
|
}
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
)
|
138
|
+
|
139
|
+
# Check if required form elements exist
|
140
|
+
salt_element = soup.select_one('input[id="pSalt"]')
|
141
|
+
protected_element = soup.select_one('input[id="pPageItemsProtected"]')
|
142
|
+
|
143
|
+
if not salt_element or not protected_element:
|
144
|
+
raise Exception("Required form elements not found. The council website may have changed or be unavailable.")
|
145
|
+
|
146
|
+
payload_salt = salt_element.get("value")
|
147
|
+
payload_protected = protected_element.get("value")
|
130
148
|
|
131
149
|
# Add the UPRN and 'SUBMIT' to the payload
|
132
150
|
payload["p_request"] = "SUBMIT"
|
@@ -187,18 +205,117 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
187
205
|
|
188
206
|
# Create a BeautifulSoup object from the page's HTML
|
189
207
|
soup = BeautifulSoup(resource.text, "html.parser")
|
208
|
+
=======
|
209
|
+
driver = None
|
210
|
+
try:
|
211
|
+
>>>>>>> master
|
190
212
|
data = {"bins": []}
|
213
|
+
url = kwargs.get("url")
|
214
|
+
user_paon = kwargs.get("paon")
|
215
|
+
user_postcode = kwargs.get("postcode")
|
216
|
+
web_driver = kwargs.get("web_driver")
|
217
|
+
headless = kwargs.get("headless")
|
218
|
+
check_paon(user_paon)
|
219
|
+
check_postcode(user_postcode)
|
220
|
+
|
221
|
+
# Use a realistic user agent to help bypass Cloudflare
|
222
|
+
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
223
|
+
driver = create_webdriver(web_driver, headless, user_agent, __name__)
|
224
|
+
driver.get(
|
225
|
+
"https://iportal.itouchvision.com/icollectionday/collection-day/?uuid=6CDD2A34C912312074D8E2410531401A8C00EFF7&lang=en"
|
226
|
+
)
|
227
|
+
|
228
|
+
# Wait for the postcode field to appear then populate it
|
229
|
+
inputElement_postcode = WebDriverWait(driver, 30).until(
|
230
|
+
EC.presence_of_element_located((By.ID, "postcodeSearch"))
|
231
|
+
)
|
232
|
+
inputElement_postcode.send_keys(user_postcode)
|
233
|
+
|
234
|
+
# Click search button
|
235
|
+
findAddress = WebDriverWait(driver, 10).until(
|
236
|
+
EC.presence_of_element_located((By.CLASS_NAME, "govuk-button"))
|
237
|
+
)
|
238
|
+
findAddress.click()
|
239
|
+
|
240
|
+
# Wait for the 'Select address' dropdown to appear and select option matching the house name/number
|
241
|
+
WebDriverWait(driver, 10).until(
|
242
|
+
EC.element_to_be_clickable(
|
243
|
+
(
|
244
|
+
By.XPATH,
|
245
|
+
"//select[@id='addressSelect']//option[contains(., '"
|
246
|
+
+ user_paon
|
247
|
+
+ "')]",
|
248
|
+
)
|
249
|
+
)
|
250
|
+
).click()
|
251
|
+
|
252
|
+
# Wait for the collections table to appear
|
253
|
+
WebDriverWait(driver, 20).until(
|
254
|
+
EC.presence_of_element_located(
|
255
|
+
(
|
256
|
+
By.XPATH,
|
257
|
+
"//h2[contains(@class,'mt-4') and contains(@class,'govuk-heading-s') and normalize-space(.)='Your next collections']",
|
258
|
+
)
|
259
|
+
)
|
260
|
+
)
|
261
|
+
|
262
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
263
|
+
|
264
|
+
collections = soup.find_all("div", {"class": "p-2"})
|
265
|
+
|
266
|
+
for collection in collections:
|
267
|
+
bin_type = collection.find("h3").get_text()
|
268
|
+
|
269
|
+
next_collection = soup.find("div", {"class": "fw-bold"}).get_text()
|
270
|
+
|
271
|
+
following_collection = soup.find(
|
272
|
+
lambda t: (
|
273
|
+
t.name == "div"
|
274
|
+
and t.get_text(strip=True).lower().startswith("followed by")
|
275
|
+
)
|
276
|
+
).get_text()
|
277
|
+
|
278
|
+
next_collection_date = datetime.strptime(next_collection, "%A %d %B")
|
279
|
+
|
280
|
+
following_collection_date = datetime.strptime(
|
281
|
+
following_collection, "followed by %A %d %B"
|
282
|
+
)
|
283
|
+
|
284
|
+
current_date = datetime.now()
|
285
|
+
next_collection_date = next_collection_date.replace(
|
286
|
+
year=current_date.year
|
287
|
+
)
|
288
|
+
following_collection_date = following_collection_date.replace(
|
289
|
+
year=current_date.year
|
290
|
+
)
|
291
|
+
|
292
|
+
next_collection_date = get_next_occurrence_from_day_month(
|
293
|
+
next_collection_date
|
294
|
+
)
|
295
|
+
|
296
|
+
following_collection_date = get_next_occurrence_from_day_month(
|
297
|
+
following_collection_date
|
298
|
+
)
|
299
|
+
|
300
|
+
dict_data = {
|
301
|
+
"type": bin_type,
|
302
|
+
"collectionDate": next_collection_date.strftime(date_format),
|
303
|
+
}
|
304
|
+
data["bins"].append(dict_data)
|
305
|
+
|
306
|
+
dict_data = {
|
307
|
+
"type": bin_type,
|
308
|
+
"collectionDate": following_collection_date.strftime(date_format),
|
309
|
+
}
|
310
|
+
data["bins"].append(dict_data)
|
191
311
|
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
data["bins"].append(dict_data)
|
203
|
-
|
204
|
-
return data
|
312
|
+
except Exception as e:
|
313
|
+
# Here you can log the exception if needed
|
314
|
+
print(f"An error occurred: {e}")
|
315
|
+
# Optionally, re-raise the exception if you want it to propagate
|
316
|
+
raise
|
317
|
+
finally:
|
318
|
+
# This block ensures that the driver is closed regardless of an exception
|
319
|
+
if driver:
|
320
|
+
driver.quit()
|
321
|
+
return data
|
@@ -125,23 +125,6 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
125
125
|
# Wait for the page to load - giving it extra time
|
126
126
|
time.sleep(5)
|
127
127
|
|
128
|
-
# Use only the selector that we know works
|
129
|
-
# print("Looking for bin type elements...")
|
130
|
-
try:
|
131
|
-
bin_type_selector = (
|
132
|
-
By.CSS_SELECTOR,
|
133
|
-
"div.formatting_bold.formatting_size_bigger.formatting span.value-as-text",
|
134
|
-
)
|
135
|
-
WebDriverWait(driver, 15).until(
|
136
|
-
EC.presence_of_element_located(bin_type_selector)
|
137
|
-
)
|
138
|
-
# print(f"Found bin type elements with selector: {bin_type_selector}")
|
139
|
-
except TimeoutException:
|
140
|
-
# print("Could not find bin type elements. Taking screenshot for debugging...")
|
141
|
-
screenshot_path = f"bin_type_error_{int(time.time())}.png"
|
142
|
-
driver.save_screenshot(screenshot_path)
|
143
|
-
# print(f"Screenshot saved to {screenshot_path}")
|
144
|
-
|
145
128
|
# Create BS4 object from driver's page source
|
146
129
|
# print("Parsing page with BeautifulSoup...")
|
147
130
|
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
@@ -149,122 +132,37 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
149
132
|
# Initialize data dictionary
|
150
133
|
data = {"bins": []}
|
151
134
|
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
# Look specifically for date elements with the exact structure
|
159
|
-
date_elements = soup.select("div.col-sm-12.font-xs-3xl span.value-as-text")
|
160
|
-
hidden_dates = soup.select(
|
161
|
-
"div.col-sm-12.font-xs-3xl input[type='hidden'][value*='/']"
|
162
|
-
)
|
163
|
-
|
164
|
-
# print(f"Found {len(bin_type_elements)} bin types and {len(date_elements)} date elements")
|
165
|
-
|
166
|
-
# We need a smarter way to match bin types with their dates
|
167
|
-
bin_count = 0
|
135
|
+
for row in soup.select(".listing_template_row"):
|
136
|
+
# Title (waste stream) is the first <p> in the section
|
137
|
+
first_p = row.find("p")
|
138
|
+
if not first_p:
|
139
|
+
continue
|
140
|
+
stream = first_p.get_text(" ", strip=True)
|
168
141
|
|
169
|
-
|
170
|
-
|
142
|
+
for p in row.find_all("p"):
|
143
|
+
t = p.get_text("\n", strip=True)
|
171
144
|
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
)
|
145
|
+
if re.search(r"\bNext collection\b", t, flags=re.I):
|
146
|
+
# Expect format: "Next collection\nTuesday 16th September 2025"
|
147
|
+
parts = [x.strip() for x in t.split("\n") if x.strip()]
|
148
|
+
if len(parts) >= 2:
|
149
|
+
next_collection_display = parts[-1] # last line
|
178
150
|
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
# Find hidden date inputs with values in DD/MM/YYYY format
|
186
|
-
hidden_date_values = []
|
187
|
-
for hidden in hidden_dates:
|
188
|
-
value = hidden.get("value", "")
|
189
|
-
if re.match(r"\d{1,2}/\d{1,2}/\d{4}", value):
|
190
|
-
hidden_date_values.append(value)
|
191
|
-
# print(f"Found hidden date value: {value}")
|
192
|
-
|
193
|
-
# When filtering date elements
|
194
|
-
date_elements = soup.select("div.col-sm-12.font-xs-3xl span.value-as-text")
|
195
|
-
valid_date_elements = []
|
196
|
-
|
197
|
-
for element in date_elements:
|
198
|
-
text = element.get_text(strip=True)
|
199
|
-
if contains_date(text):
|
200
|
-
valid_date_elements.append(element)
|
201
|
-
# print(f"Found valid date element: {text}")
|
202
|
-
else:
|
203
|
-
pass
|
204
|
-
# print(f"Skipping non-date element: {text}")
|
205
|
-
|
206
|
-
# print(f"Found {len(bin_type_elements)} bin types and {len(valid_date_elements)} valid date elements")
|
207
|
-
|
208
|
-
# When processing each bin type
|
209
|
-
for i, bin_type_elem in enumerate(bin_type_elements):
|
210
|
-
bin_type = bin_type_elem.get_text(strip=True)
|
211
|
-
|
212
|
-
# Try to find a date for this bin type
|
213
|
-
date_text = None
|
214
|
-
|
215
|
-
# Look for a valid date element
|
216
|
-
if i < len(valid_date_elements):
|
217
|
-
date_elem = valid_date_elements[i]
|
218
|
-
date_text = date_elem.get_text(strip=True)
|
219
|
-
|
220
|
-
# If we don't have a valid date yet, try using the hidden input
|
221
|
-
if not date_text or not contains_date(date_text):
|
222
|
-
if i < len(hidden_dates):
|
223
|
-
date_value = hidden_dates[i].get("value")
|
224
|
-
if contains_date(date_value):
|
225
|
-
date_text = date_value
|
226
|
-
|
227
|
-
# Skip if we don't have a valid date
|
228
|
-
if not date_text or not contains_date(date_text):
|
229
|
-
# print(f"No valid date found for bin type: {bin_type}")
|
230
|
-
continue
|
151
|
+
# Build record
|
152
|
+
next_date = datetime.strptime(
|
153
|
+
remove_ordinal_indicator_from_date_string(next_collection_display),
|
154
|
+
"%A %d %B %Y",
|
155
|
+
)
|
231
156
|
|
232
|
-
#
|
157
|
+
# Create bin entry
|
158
|
+
bin_entry = {
|
159
|
+
"type": stream,
|
160
|
+
"collectionDate": next_date.strftime(date_format),
|
161
|
+
}
|
233
162
|
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
# Try to parse the date
|
239
|
-
try:
|
240
|
-
collection_date = datetime.strptime(
|
241
|
-
date_text, "%A %d %B %Y"
|
242
|
-
).date()
|
243
|
-
except ValueError:
|
244
|
-
try:
|
245
|
-
collection_date = datetime.strptime(
|
246
|
-
date_text, "%d/%m/%Y"
|
247
|
-
).date()
|
248
|
-
except ValueError:
|
249
|
-
# Last resort
|
250
|
-
collection_date = parse(date_text).date()
|
251
|
-
|
252
|
-
# Create bin entry
|
253
|
-
bin_entry = {
|
254
|
-
"type": bin_type,
|
255
|
-
"collectionDate": collection_date.strftime(date_format),
|
256
|
-
}
|
257
|
-
|
258
|
-
# Add to data
|
259
|
-
data["bins"].append(bin_entry)
|
260
|
-
bin_count += 1
|
261
|
-
# print(f"Added bin entry: {bin_entry}")
|
262
|
-
|
263
|
-
except Exception as e:
|
264
|
-
pass
|
265
|
-
# print(f"Error parsing date '{date_text}': {str(e)}")
|
266
|
-
|
267
|
-
# print(f"Successfully parsed {bin_count} bin collections")
|
163
|
+
# Add to data
|
164
|
+
data["bins"].append(bin_entry)
|
165
|
+
# print(f"Added bin entry: {bin_entry}")
|
268
166
|
|
269
167
|
if not data["bins"]:
|
270
168
|
# print("No bin data found. Saving page for debugging...")
|