uk_bin_collection 0.139.0__py3-none-any.whl → 0.141.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/tests/input.json +54 -36
- uk_bin_collection/uk_bin_collection/councils/BroadlandDistrictCouncil.py +185 -0
- uk_bin_collection/uk_bin_collection/councils/PeterboroughCityCouncil.py +167 -0
- {uk_bin_collection-0.139.0.dist-info → uk_bin_collection-0.141.0.dist-info}/METADATA +1 -1
- {uk_bin_collection-0.139.0.dist-info → uk_bin_collection-0.141.0.dist-info}/RECORD +8 -6
- {uk_bin_collection-0.139.0.dist-info → uk_bin_collection-0.141.0.dist-info}/LICENSE +0 -0
- {uk_bin_collection-0.139.0.dist-info → uk_bin_collection-0.141.0.dist-info}/WHEEL +0 -0
- {uk_bin_collection-0.139.0.dist-info → uk_bin_collection-0.141.0.dist-info}/entry_points.txt +0 -0
@@ -19,8 +19,8 @@
|
|
19
19
|
"wiki_note": "Replace XXXXXXXX with your UPRN. You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find it."
|
20
20
|
},
|
21
21
|
"AmberValleyBoroughCouncil": {
|
22
|
-
"url": "https://ambervalley.gov.uk",
|
23
22
|
"uprn": "100030026621",
|
23
|
+
"url": "https://ambervalley.gov.uk",
|
24
24
|
"wiki_name": "Amber Valley Borough Council",
|
25
25
|
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN."
|
26
26
|
},
|
@@ -71,8 +71,8 @@
|
|
71
71
|
"AshfordBoroughCouncil": {
|
72
72
|
"postcode": "TN23 7SP",
|
73
73
|
"uprn": "100060777899",
|
74
|
-
"web_driver": "http://selenium:4444",
|
75
74
|
"url": "https://ashford.gov.uk",
|
75
|
+
"web_driver": "http://selenium:4444",
|
76
76
|
"wiki_command_url_override": "https://ashford.gov.uk",
|
77
77
|
"wiki_name": "Ashford Borough Council",
|
78
78
|
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN."
|
@@ -203,8 +203,8 @@
|
|
203
203
|
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN."
|
204
204
|
},
|
205
205
|
"BolsoverCouncil": {
|
206
|
-
"url": "https://bolsover.gov.uk",
|
207
206
|
"uprn": "100030066827",
|
207
|
+
"url": "https://bolsover.gov.uk",
|
208
208
|
"wiki_name": "Bolsover Council",
|
209
209
|
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN."
|
210
210
|
},
|
@@ -282,6 +282,15 @@
|
|
282
282
|
"wiki_name": "Bristol City Council",
|
283
283
|
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN."
|
284
284
|
},
|
285
|
+
"BroadlandDistrictCouncil": {
|
286
|
+
"skip_get_url": true,
|
287
|
+
"house_number": "1 Park View, Horsford, Norfolk, NR10 3FD",
|
288
|
+
"postcode": "NR10 3FD",
|
289
|
+
"url": "https://area.southnorfolkandbroadland.gov.uk/FindAddress",
|
290
|
+
"web_driver": "http://selenium:4444",
|
291
|
+
"wiki_name": "Broadland District Council",
|
292
|
+
"wiki_note": "Use the full address as it appears on the drop-down on the site when you search by postcode."
|
293
|
+
},
|
285
294
|
"BromleyBoroughCouncil": {
|
286
295
|
"url": "https://recyclingservices.bromley.gov.uk/waste/6087017",
|
287
296
|
"web_driver": "http://selenium:4444",
|
@@ -386,13 +395,13 @@
|
|
386
395
|
"wiki_name": "Castlepoint District Council",
|
387
396
|
"wiki_note": "For this council, 'uprn' is actually a 4-digit code for your street. Go [here](https://apps.castlepoint.gov.uk/cpapps/index.cfm?fa=wastecalendar) and inspect the source of the dropdown box to find the 4-digit number for your street."
|
388
397
|
},
|
389
|
-
"CeredigionCountyCouncil":{
|
390
|
-
"house_number":"BLAEN CWMMAGWR, TRISANT, CEREDIGION, SY23 4RQ",
|
391
|
-
"postcode":"SY23 4RQ",
|
392
|
-
"url":"https://www.ceredigion.gov.uk/resident/bins-recycling/",
|
393
|
-
"web_driver":"http://selenium:4444",
|
394
|
-
"wiki_name":"Ceredigion County Council",
|
395
|
-
"wiki_note":"House Number is the full address as it appears on the drop-down on the site when you search by postcode. This parser requires a Selenium webdriver."
|
398
|
+
"CeredigionCountyCouncil": {
|
399
|
+
"house_number": "BLAEN CWMMAGWR, TRISANT, CEREDIGION, SY23 4RQ",
|
400
|
+
"postcode": "SY23 4RQ",
|
401
|
+
"url": "https://www.ceredigion.gov.uk/resident/bins-recycling/",
|
402
|
+
"web_driver": "http://selenium:4444",
|
403
|
+
"wiki_name": "Ceredigion County Council",
|
404
|
+
"wiki_note": "House Number is the full address as it appears on the drop-down on the site when you search by postcode. This parser requires a Selenium webdriver."
|
396
405
|
},
|
397
406
|
"CharnwoodBoroughCouncil": {
|
398
407
|
"url": "https://my.charnwood.gov.uk/location?put=cbc10070067259&rememberme=0&redirect=%2F",
|
@@ -409,9 +418,9 @@
|
|
409
418
|
"wiki_note": "Follow the instructions [here](https://www.chelmsford.gov.uk/myhome/) until you get the page listing your address, then copy the entire address text and use that in the house number field."
|
410
419
|
},
|
411
420
|
"CheltenhamBoroughCouncil": {
|
412
|
-
"uprn": "100120372027",
|
413
421
|
"postcode": "GL51 3NA",
|
414
422
|
"skip_get_url": true,
|
423
|
+
"uprn": "100120372027",
|
415
424
|
"url": "https://www.cheltenham.gov.uk",
|
416
425
|
"wiki_name": "Cheltenham Borough Council",
|
417
426
|
"wiki_note": "Pass the UPRN. You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search)."
|
@@ -639,8 +648,8 @@
|
|
639
648
|
"house_number": "1",
|
640
649
|
"postcode": "CM20 2FZ",
|
641
650
|
"skip_get_url": true,
|
642
|
-
"web_driver": "http://selenium:4444",
|
643
651
|
"url": "https://www.eastherts.gov.uk",
|
652
|
+
"web_driver": "http://selenium:4444",
|
644
653
|
"wiki_name": "East Herts Council",
|
645
654
|
"wiki_note": "Pass the house number and postcode in their respective parameters."
|
646
655
|
},
|
@@ -733,8 +742,8 @@
|
|
733
742
|
},
|
734
743
|
"EppingForestDistrictCouncil": {
|
735
744
|
"postcode": "IG9 6EP",
|
736
|
-
"web_driver": "http://selenium:4444",
|
737
745
|
"url": "https://eppingforestdc.maps.arcgis.com/apps/instant/lookup/index.html?appid=bfca32b46e2a47cd9c0a84f2d8cdde17&find=IG9%206EP",
|
746
|
+
"web_driver": "http://selenium:4444",
|
738
747
|
"wiki_name": "Epping Forest District Council",
|
739
748
|
"wiki_note": "Replace the postcode in the URL with your own."
|
740
749
|
},
|
@@ -848,12 +857,12 @@
|
|
848
857
|
"wiki_note": "Pass the house number, postcode, and UPRN in their respective parameters. This parser requires a Selenium webdriver."
|
849
858
|
},
|
850
859
|
"GooglePublicCalendarCouncil": {
|
851
|
-
"wiki_name": "Google Calendar (Public)",
|
852
|
-
"url": "https://calendar.google.com/calendar/ical/c_bca6c975545213622b3958bddab567e1e7398709c63e377b6fe9a9140bd1caf8%40group.calendar.google.com/public/basic.ics",
|
853
860
|
"skip_get_url": true,
|
861
|
+
"url": "https://calendar.google.com/calendar/ical/c_bca6c975545213622b3958bddab567e1e7398709c63e377b6fe9a9140bd1caf8%40group.calendar.google.com/public/basic.ics",
|
862
|
+
"wiki_name": "Google Calendar (Public)",
|
854
863
|
"wiki_note": "The URL should be the public ics file URL for the public Google calendar. See https://support.google.com/calendar/answer/37083?sjid=7202815583021446882-EU. Councils that currently need this are Trafford."
|
855
864
|
},
|
856
|
-
|
865
|
+
"GraveshamBoroughCouncil": {
|
857
866
|
"skip_get_url": true,
|
858
867
|
"uprn": "100060927046",
|
859
868
|
"url": "https://www.gravesham.gov.uk",
|
@@ -1134,8 +1143,8 @@
|
|
1134
1143
|
"LondonBoroughOfRichmondUponThames": {
|
1135
1144
|
"house_number": "March Road",
|
1136
1145
|
"skip_get_url": true,
|
1137
|
-
"web_driver": "http://selenium:4444",
|
1138
1146
|
"url": "https://www.richmond.gov.uk/services/waste_and_recycling/collection_days/",
|
1147
|
+
"web_driver": "http://selenium:4444",
|
1139
1148
|
"wiki_name": "London Borough Of Richmond Upon Thames",
|
1140
1149
|
"wiki_note": "Pass the name of the street ONLY in the house number parameter, unfortunately post code's are not allowed. "
|
1141
1150
|
},
|
@@ -1217,14 +1226,6 @@
|
|
1217
1226
|
"wiki_name": "Mid Devon Council",
|
1218
1227
|
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN."
|
1219
1228
|
},
|
1220
|
-
"MiddlesbroughCouncil": {
|
1221
|
-
"house_number": "12 Constantine Court Park Road North, Middlesbrough",
|
1222
|
-
"skip_get_url": true,
|
1223
|
-
"url": "https://www.midsussex.gov.uk/waste-recycling/bin-collection/",
|
1224
|
-
"web_driver": "http://selenium:4444",
|
1225
|
-
"wiki_name": "Middlesbrough Council",
|
1226
|
-
"wiki_note": "Pass the entire address without postcode as it appears when you type it on the website. This parser requires a Selenium webdriver."
|
1227
|
-
},
|
1228
1229
|
"MidSuffolkDistrictCouncil": {
|
1229
1230
|
"house_number": "Monday",
|
1230
1231
|
"postcode": "Week 2",
|
@@ -1243,6 +1244,14 @@
|
|
1243
1244
|
"wiki_name": "Mid Sussex District Council",
|
1244
1245
|
"wiki_note": "Pass the name of the street with the house number parameter, wrapped in double quotes. This parser requires a Selenium webdriver."
|
1245
1246
|
},
|
1247
|
+
"MiddlesbroughCouncil": {
|
1248
|
+
"house_number": "12 Constantine Court Park Road North, Middlesbrough",
|
1249
|
+
"skip_get_url": true,
|
1250
|
+
"url": "https://www.midsussex.gov.uk/waste-recycling/bin-collection/",
|
1251
|
+
"web_driver": "http://selenium:4444",
|
1252
|
+
"wiki_name": "Middlesbrough Council",
|
1253
|
+
"wiki_note": "Pass the entire address without postcode as it appears when you type it on the website. This parser requires a Selenium webdriver."
|
1254
|
+
},
|
1246
1255
|
"MidlothianCouncil": {
|
1247
1256
|
"house_number": "52",
|
1248
1257
|
"postcode": "EH19 2EB",
|
@@ -1335,11 +1344,11 @@
|
|
1335
1344
|
"wiki_name": "North Ayrshire Council",
|
1336
1345
|
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN."
|
1337
1346
|
},
|
1338
|
-
"NorthDevonCountyCouncil":{
|
1347
|
+
"NorthDevonCountyCouncil": {
|
1339
1348
|
"house_number": "1",
|
1340
|
-
"uprn": "100040249471",
|
1341
1349
|
"postcode": "EX31 2LE",
|
1342
1350
|
"skip_get_url": true,
|
1351
|
+
"uprn": "100040249471",
|
1343
1352
|
"url": "https://my.northdevon.gov.uk/service/WasteRecyclingCollectionCalendar",
|
1344
1353
|
"web_driver": "http://selenium:4444",
|
1345
1354
|
"wiki_name": "North Devon County Council",
|
@@ -1484,6 +1493,15 @@
|
|
1484
1493
|
"wiki_name": "Oxford City Council",
|
1485
1494
|
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN."
|
1486
1495
|
},
|
1496
|
+
"PeterboroughCityCouncil": {
|
1497
|
+
"house_number": "7 Arundel Road, Peterborough, PE4 6JJ",
|
1498
|
+
"postcode": "PE4 6JJ",
|
1499
|
+
"skip_get_url": true,
|
1500
|
+
"url": "https://report.peterborough.gov.uk/waste",
|
1501
|
+
"web_driver": "http://selenium:4444",
|
1502
|
+
"wiki_name": "Peterborough City Council",
|
1503
|
+
"wiki_note": "Pass the full address as it appears o nthe Peterborough website and postcode in their respective parameters. This parser requires a Selenium webdriver."
|
1504
|
+
},
|
1487
1505
|
"PerthAndKinrossCouncil": {
|
1488
1506
|
"uprn": "124032322",
|
1489
1507
|
"url": "https://www.pkc.gov.uk",
|
@@ -1690,13 +1708,6 @@
|
|
1690
1708
|
"wiki_name": "Somerset Council",
|
1691
1709
|
"wiki_note": "Provide your UPRN and postcode. Find your UPRN using [FindMyAddress](https://www.findmyaddress.co.uk/search)."
|
1692
1710
|
},
|
1693
|
-
"SouthamptonCityCouncil": {
|
1694
|
-
"skip_get_url": true,
|
1695
|
-
"uprn": "100060731893",
|
1696
|
-
"url": "https://www.southampton.gov.uk",
|
1697
|
-
"wiki_name": "Southampton City Council",
|
1698
|
-
"wiki_note": "Pass the UPRN. You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search)."
|
1699
|
-
},
|
1700
1711
|
"SouthAyrshireCouncil": {
|
1701
1712
|
"postcode": "KA19 7BN",
|
1702
1713
|
"skip_get_url": true,
|
@@ -1736,8 +1747,8 @@
|
|
1736
1747
|
"SouthHollandDistrictCouncil": {
|
1737
1748
|
"house_number": "1",
|
1738
1749
|
"postcode": "PE6 0HE",
|
1739
|
-
"uprn": "100030872493",
|
1740
1750
|
"skip_get_url": true,
|
1751
|
+
"uprn": "100030872493",
|
1741
1752
|
"url": "https://www.sholland.gov.uk/mycollections",
|
1742
1753
|
"web_driver": "http://selenium:4444",
|
1743
1754
|
"wiki_name": "South Holland District Council",
|
@@ -1793,6 +1804,13 @@
|
|
1793
1804
|
"wiki_name": "South Tyneside Council",
|
1794
1805
|
"wiki_note": "Provide your house number in the `house_number` parameter and postcode in the `postcode` parameter."
|
1795
1806
|
},
|
1807
|
+
"SouthamptonCityCouncil": {
|
1808
|
+
"skip_get_url": true,
|
1809
|
+
"uprn": "100060731893",
|
1810
|
+
"url": "https://www.southampton.gov.uk",
|
1811
|
+
"wiki_name": "Southampton City Council",
|
1812
|
+
"wiki_note": "Pass the UPRN. You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search)."
|
1813
|
+
},
|
1796
1814
|
"SouthwarkCouncil": {
|
1797
1815
|
"uprn": "200003469271",
|
1798
1816
|
"url": "https://services.southwark.gov.uk/bins/lookup/",
|
@@ -2289,4 +2307,4 @@
|
|
2289
2307
|
"wiki_name": "York Council",
|
2290
2308
|
"wiki_note": "Provide your UPRN."
|
2291
2309
|
}
|
2292
|
-
}
|
2310
|
+
}
|
@@ -0,0 +1,185 @@
|
|
1
|
+
# This script pulls (in one hit) the data from Broadland District Council Bins Data
|
2
|
+
# Working command line:
|
3
|
+
# python collect_data.py BroadlandDistrictCouncil "https://area.southnorfolkandbroadland.gov.uk/FindAddress" -p "NR10 3FD" -n "1 Park View, Horsford, Norfolk, NR10 3FD"
|
4
|
+
|
5
|
+
import re
|
6
|
+
import time
|
7
|
+
from datetime import datetime
|
8
|
+
|
9
|
+
from bs4 import BeautifulSoup
|
10
|
+
from selenium.webdriver.common.by import By
|
11
|
+
from selenium.webdriver.common.keys import Keys
|
12
|
+
from selenium.webdriver.support import expected_conditions as EC
|
13
|
+
from selenium.webdriver.support.ui import Select
|
14
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
15
|
+
|
16
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
17
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
18
|
+
|
19
|
+
|
20
|
+
class CouncilClass(AbstractGetBinDataClass):
|
21
|
+
|
22
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
23
|
+
driver = None
|
24
|
+
try:
|
25
|
+
data = {"bins": []}
|
26
|
+
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64)"}
|
27
|
+
|
28
|
+
uprn = kwargs.get("uprn")
|
29
|
+
user_paon = kwargs.get("paon")
|
30
|
+
postcode = kwargs.get("postcode")
|
31
|
+
web_driver = kwargs.get("web_driver")
|
32
|
+
headless = kwargs.get("headless")
|
33
|
+
url = kwargs.get("url")
|
34
|
+
|
35
|
+
print(
|
36
|
+
f"Starting parse_data with parameters: postcode={postcode}, paon={user_paon}"
|
37
|
+
)
|
38
|
+
print(
|
39
|
+
f"Creating webdriver with: web_driver={web_driver}, headless={headless}"
|
40
|
+
)
|
41
|
+
|
42
|
+
driver = create_webdriver(web_driver, headless, None, __name__)
|
43
|
+
print(f"Navigating to URL: {url}")
|
44
|
+
driver.get(url)
|
45
|
+
print("Successfully loaded the page")
|
46
|
+
|
47
|
+
# Handle cookie confirmation dialog
|
48
|
+
try:
|
49
|
+
# Adjust the selector depending on the site's button
|
50
|
+
accept_button = WebDriverWait(driver, 10).until(
|
51
|
+
EC.element_to_be_clickable(
|
52
|
+
(By.ID, "CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll")
|
53
|
+
)
|
54
|
+
)
|
55
|
+
accept_button.click()
|
56
|
+
print("Cookie banner clicked.")
|
57
|
+
except:
|
58
|
+
print("No cookie banner appeared or selector failed.")
|
59
|
+
|
60
|
+
wait = WebDriverWait(driver, 60)
|
61
|
+
post_code_search = wait.until(
|
62
|
+
EC.presence_of_element_located((By.ID, "Postcode"))
|
63
|
+
)
|
64
|
+
post_code_search.send_keys(postcode)
|
65
|
+
|
66
|
+
# Click the Find address button
|
67
|
+
print("Looking for 'Find address' button...")
|
68
|
+
submit_btn = wait.until(
|
69
|
+
EC.presence_of_element_located(
|
70
|
+
(
|
71
|
+
By.XPATH,
|
72
|
+
"//input[@type='submit' and @class='button button--secondary']",
|
73
|
+
)
|
74
|
+
)
|
75
|
+
)
|
76
|
+
print("Clicking button...")
|
77
|
+
submit_btn.send_keys(Keys.ENTER)
|
78
|
+
|
79
|
+
# Wait for the address dropdown to appear
|
80
|
+
print("Waiting for address dropdown to appear...")
|
81
|
+
address_dropdown = wait.until(
|
82
|
+
EC.presence_of_element_located((By.ID, "UprnAddress"))
|
83
|
+
)
|
84
|
+
print("Found address dropdown")
|
85
|
+
|
86
|
+
# Create a Select object for the dropdown
|
87
|
+
dropdown_select = Select(address_dropdown)
|
88
|
+
|
89
|
+
# Search for the exact address
|
90
|
+
print(f"Looking for address: {user_paon}")
|
91
|
+
|
92
|
+
# Select the address by visible text
|
93
|
+
dropdown_select.select_by_visible_text(user_paon)
|
94
|
+
print(f"Selected address: {user_paon}")
|
95
|
+
|
96
|
+
print("Looking for submit button after address selection...")
|
97
|
+
submit_btn = wait.until(
|
98
|
+
EC.element_to_be_clickable((By.XPATH, "//input[@type='submit']"))
|
99
|
+
)
|
100
|
+
print("Clicking button...")
|
101
|
+
submit_btn.send_keys(Keys.ENTER)
|
102
|
+
|
103
|
+
print("Waiting for collection details to appear...")
|
104
|
+
address_dropdown = wait.until(
|
105
|
+
EC.presence_of_element_located(
|
106
|
+
(
|
107
|
+
By.XPATH,
|
108
|
+
"//div[contains(@class, 'card-body')]//h4[contains(text(), 'Your next collections')]",
|
109
|
+
)
|
110
|
+
)
|
111
|
+
)
|
112
|
+
|
113
|
+
# Make a BS4 object
|
114
|
+
print("Parsing page with BeautifulSoup...")
|
115
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
116
|
+
|
117
|
+
# Initialize current date
|
118
|
+
current_date = datetime.now()
|
119
|
+
|
120
|
+
# Process collection details
|
121
|
+
print("Looking for collection details in the page...")
|
122
|
+
|
123
|
+
# Find the card-body div that contains the bin collection information
|
124
|
+
card_body = soup.find("div", class_="card-body")
|
125
|
+
|
126
|
+
if card_body:
|
127
|
+
# Find the "Your next collections" heading
|
128
|
+
next_collections_heading = card_body.find(
|
129
|
+
"h4", string="Your next collections"
|
130
|
+
)
|
131
|
+
|
132
|
+
if next_collections_heading:
|
133
|
+
# Find all bin collection divs (each with class "my-2")
|
134
|
+
bin_divs = next_collections_heading.find_next_siblings(
|
135
|
+
"div", class_="my-2"
|
136
|
+
)
|
137
|
+
|
138
|
+
print(f"Found {len(bin_divs)} bin collection divs")
|
139
|
+
|
140
|
+
for bin_div in bin_divs:
|
141
|
+
# Find the bin type (in a strong tag)
|
142
|
+
bin_type_elem = bin_div.find("strong")
|
143
|
+
bin_type = None
|
144
|
+
|
145
|
+
if bin_type_elem:
|
146
|
+
bin_type = bin_type_elem.text.strip().replace(
|
147
|
+
" (if applicable)", ""
|
148
|
+
)
|
149
|
+
|
150
|
+
# Get the parent element that contains both the bin type and date
|
151
|
+
text_container = bin_type_elem.parent
|
152
|
+
if text_container:
|
153
|
+
# Extract the full text and remove the bin type to get the date part
|
154
|
+
full_text = text_container.get_text(strip=True)
|
155
|
+
date_text = full_text.replace(bin_type, "").strip()
|
156
|
+
print(f"Unparsed collection date: {date_text}")
|
157
|
+
|
158
|
+
# Parse the date
|
159
|
+
# First, remove any ordinal indicators (1st, 2nd, 3rd, etc.)
|
160
|
+
cleaned_date_text = (
|
161
|
+
remove_ordinal_indicator_from_date_string(date_text)
|
162
|
+
)
|
163
|
+
|
164
|
+
from dateutil.parser import parse
|
165
|
+
|
166
|
+
parsed_date = parse(cleaned_date_text, fuzzy=True)
|
167
|
+
bin_date = parsed_date.strftime("%d/%m/%Y")
|
168
|
+
|
169
|
+
# Only process if we have both bin_type and bin_date
|
170
|
+
if bin_type and bin_date:
|
171
|
+
dict_data = {
|
172
|
+
"type": bin_type,
|
173
|
+
"collectionDate": bin_date,
|
174
|
+
}
|
175
|
+
data["bins"].append(dict_data)
|
176
|
+
print(f"Added bin data: {dict_data}")
|
177
|
+
except Exception as e:
|
178
|
+
print(f"An error occurred: {e}")
|
179
|
+
raise
|
180
|
+
finally:
|
181
|
+
print("Cleaning up webdriver...")
|
182
|
+
if driver:
|
183
|
+
driver.quit()
|
184
|
+
|
185
|
+
return data
|
@@ -0,0 +1,167 @@
|
|
1
|
+
import time
|
2
|
+
|
3
|
+
from bs4 import BeautifulSoup
|
4
|
+
from selenium.webdriver.common.by import By
|
5
|
+
from selenium.webdriver.support import expected_conditions as EC
|
6
|
+
from selenium.webdriver.support.ui import Select, WebDriverWait
|
7
|
+
|
8
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
9
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
10
|
+
|
11
|
+
|
12
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
13
|
+
class CouncilClass(AbstractGetBinDataClass):
|
14
|
+
"""
|
15
|
+
Concrete classes have to implement all abstract operations of the
|
16
|
+
base class. They can also override some operations with a default
|
17
|
+
implementation.
|
18
|
+
"""
|
19
|
+
|
20
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
21
|
+
driver = None
|
22
|
+
try:
|
23
|
+
user_poan = kwargs.get("paon")
|
24
|
+
user_postcode = kwargs.get("postcode")
|
25
|
+
if not user_postcode:
|
26
|
+
raise ValueError("No postcode provided.")
|
27
|
+
check_postcode(user_postcode)
|
28
|
+
|
29
|
+
headless = kwargs.get("headless")
|
30
|
+
web_driver = kwargs.get("web_driver")
|
31
|
+
driver = create_webdriver(web_driver, headless, None, __name__)
|
32
|
+
page = "https://report.peterborough.gov.uk/waste"
|
33
|
+
|
34
|
+
driver.get(page)
|
35
|
+
|
36
|
+
wait = WebDriverWait(driver, 30)
|
37
|
+
|
38
|
+
try:
|
39
|
+
# Cookies confirmed working in selenium
|
40
|
+
accept_cookies_button = wait.until(
|
41
|
+
EC.element_to_be_clickable(
|
42
|
+
(
|
43
|
+
By.XPATH,
|
44
|
+
"//button/span[contains(text(), 'I Accept Cookies')]",
|
45
|
+
)
|
46
|
+
)
|
47
|
+
)
|
48
|
+
accept_cookies_button.click()
|
49
|
+
except:
|
50
|
+
print(
|
51
|
+
"Accept cookies banner not found or clickable within the specified time."
|
52
|
+
)
|
53
|
+
pass
|
54
|
+
|
55
|
+
postcode_input = wait.until(
|
56
|
+
EC.presence_of_element_located((By.XPATH, '//input[@id="postcode"]'))
|
57
|
+
)
|
58
|
+
|
59
|
+
postcode_input.send_keys(user_postcode)
|
60
|
+
|
61
|
+
postcode_go_button = wait.until(
|
62
|
+
EC.element_to_be_clickable((By.XPATH, '//input[@id="go"]'))
|
63
|
+
)
|
64
|
+
|
65
|
+
postcode_go_button.click()
|
66
|
+
|
67
|
+
# Wait for the select address drop down to be present
|
68
|
+
select_address_input = wait.until(
|
69
|
+
EC.presence_of_element_located((By.XPATH, '//input[@id="address"]'))
|
70
|
+
)
|
71
|
+
|
72
|
+
select_address_input.click()
|
73
|
+
time.sleep(2)
|
74
|
+
|
75
|
+
select_address_input_item = wait.until(
|
76
|
+
EC.presence_of_element_located(
|
77
|
+
(By.XPATH, f"//li[contains(text(), '{user_poan}')]")
|
78
|
+
)
|
79
|
+
)
|
80
|
+
|
81
|
+
select_address_input_item.click()
|
82
|
+
|
83
|
+
address_continue_button = wait.until(
|
84
|
+
EC.element_to_be_clickable((By.XPATH, '//input[@value="Continue"]'))
|
85
|
+
)
|
86
|
+
|
87
|
+
address_continue_button.click()
|
88
|
+
|
89
|
+
your_collections_heading = wait.until(
|
90
|
+
EC.presence_of_element_located(
|
91
|
+
(By.XPATH, "//h2[contains(text(), 'Your collections')]")
|
92
|
+
)
|
93
|
+
)
|
94
|
+
|
95
|
+
results_page = wait.until(
|
96
|
+
EC.presence_of_element_located(
|
97
|
+
(By.XPATH, "//div[@class='waste__collections']")
|
98
|
+
)
|
99
|
+
)
|
100
|
+
|
101
|
+
soup = BeautifulSoup(results_page.get_attribute("innerHTML"), "html.parser")
|
102
|
+
|
103
|
+
data = {"bins": []}
|
104
|
+
output_date_format = "%d/%m/%Y"
|
105
|
+
input_date_format = "%A, %d %B %Y" # Expect: Thursday, 17 April 2025
|
106
|
+
|
107
|
+
# Each bin section is within a waste-service-wrapper div
|
108
|
+
collection_panels = soup.find_all("div", class_="waste-service-wrapper")
|
109
|
+
|
110
|
+
for panel in collection_panels:
|
111
|
+
try:
|
112
|
+
# Bin type
|
113
|
+
bin_type_tag = panel.find("h3", class_="waste-service-name")
|
114
|
+
if not bin_type_tag:
|
115
|
+
continue
|
116
|
+
bin_type = bin_type_tag.get_text(strip=True)
|
117
|
+
|
118
|
+
# Get 'Next collection' date
|
119
|
+
rows = panel.find_all("div", class_="govuk-summary-list__row")
|
120
|
+
next_collection = None
|
121
|
+
for row in rows:
|
122
|
+
key = row.find("dt", class_="govuk-summary-list__key")
|
123
|
+
value = row.find("dd", class_="govuk-summary-list__value")
|
124
|
+
if key and value and "Next collection" in key.get_text():
|
125
|
+
raw_date = " ".join(value.get_text().split())
|
126
|
+
|
127
|
+
# ✅ Remove st/nd/rd/th suffix from the day (e.g. 17th → 17)
|
128
|
+
cleaned_date = re.sub(
|
129
|
+
r"(\d{1,2})(st|nd|rd|th)", r"\1", raw_date
|
130
|
+
)
|
131
|
+
next_collection = cleaned_date
|
132
|
+
break
|
133
|
+
|
134
|
+
if not next_collection:
|
135
|
+
continue
|
136
|
+
|
137
|
+
print(f"Found next collection for {bin_type}: '{next_collection}'")
|
138
|
+
|
139
|
+
parsed_date = datetime.strptime(next_collection, input_date_format)
|
140
|
+
formatted_date = parsed_date.strftime(output_date_format)
|
141
|
+
|
142
|
+
data["bins"].append(
|
143
|
+
{
|
144
|
+
"type": bin_type,
|
145
|
+
"collectionDate": formatted_date,
|
146
|
+
}
|
147
|
+
)
|
148
|
+
|
149
|
+
except Exception as e:
|
150
|
+
print(
|
151
|
+
f"Error processing panel for bin '{bin_type if 'bin_type' in locals() else 'unknown'}': {e}"
|
152
|
+
)
|
153
|
+
|
154
|
+
# Sort the data
|
155
|
+
data["bins"].sort(
|
156
|
+
key=lambda x: datetime.strptime(x["collectionDate"], output_date_format)
|
157
|
+
)
|
158
|
+
except Exception as e:
|
159
|
+
# Here you can log the exception if needed
|
160
|
+
print(f"An error occurred: {e}")
|
161
|
+
# Optionally, re-raise the exception if you want it to propagate
|
162
|
+
raise
|
163
|
+
finally:
|
164
|
+
# This block ensures that the driver is closed regardless of an exception
|
165
|
+
if driver:
|
166
|
+
driver.quit()
|
167
|
+
return data
|
@@ -3,7 +3,7 @@ uk_bin_collection/tests/check_selenium_url_in_input.json.py,sha256=Iecdja0I3XIiY
|
|
3
3
|
uk_bin_collection/tests/council_feature_input_parity.py,sha256=DO6Mk4ImYgM5ZCZ-cutwz5RoYYWZRLYx2tr6zIs_9Rc,3843
|
4
4
|
uk_bin_collection/tests/features/environment.py,sha256=VQZjJdJI_kZn08M0j5cUgvKT4k3iTw8icJge1DGOkoA,127
|
5
5
|
uk_bin_collection/tests/features/validate_council_outputs.feature,sha256=SJK-Vc737hrf03tssxxbeg_JIvAH-ddB8f6gU1LTbuQ,251
|
6
|
-
uk_bin_collection/tests/input.json,sha256=
|
6
|
+
uk_bin_collection/tests/input.json,sha256=uvi5_CrjVy26H4gkWdoRXCJ1wsJPgntzJB26hXwC5jI,120556
|
7
7
|
uk_bin_collection/tests/output.schema,sha256=ZwKQBwYyTDEM4G2hJwfLUVM-5v1vKRvRK9W9SS1sd18,1086
|
8
8
|
uk_bin_collection/tests/step_defs/step_helpers/file_handler.py,sha256=Ygzi4V0S1MIHqbdstUlIqtRIwnynvhu4UtpweJ6-5N8,1474
|
9
9
|
uk_bin_collection/tests/step_defs/test_validate_council.py,sha256=VZ0a81sioJULD7syAYHjvK_-nT_Rd36tUyzPetSA0gk,3475
|
@@ -50,6 +50,7 @@ uk_bin_collection/uk_bin_collection/councils/BrecklandCouncil.py,sha256=PX6A_pDv
|
|
50
50
|
uk_bin_collection/uk_bin_collection/councils/BrentCouncil.py,sha256=ucwokxvASYi_KiOYSOVdaGfC1kfUbII0r6Zl2NE1hnU,4208
|
51
51
|
uk_bin_collection/uk_bin_collection/councils/BrightonandHoveCityCouncil.py,sha256=k6qt4cds-Ejd97Z-__pw2BYvGVbFdc9SUfF73PPrTNA,5823
|
52
52
|
uk_bin_collection/uk_bin_collection/councils/BristolCityCouncil.py,sha256=kJmmDJz_kQ45DHmG7ocrUpNJonEn0kuXYEDQyZaf9ks,5576
|
53
|
+
uk_bin_collection/uk_bin_collection/councils/BroadlandDistrictCouncil.py,sha256=aelqhh503dx6O2EEmC3AT5tnY39Dc53qcouH8T-mek8,7613
|
53
54
|
uk_bin_collection/uk_bin_collection/councils/BromleyBoroughCouncil.py,sha256=dii85JLmYU1uMidCEsWVo3stTcq_QqyC65DxG8u1UmE,4302
|
54
55
|
uk_bin_collection/uk_bin_collection/councils/BromsgroveDistrictCouncil.py,sha256=PUfxP8j5Oh9wFHkdjbrJzQli9UzMHZzwrZ2hkThrvhI,1781
|
55
56
|
uk_bin_collection/uk_bin_collection/councils/BroxbourneCouncil.py,sha256=mcRtkFc9g3YNN17OQfhzYJtNYeWrZPP_e7m7goEhz5I,3012
|
@@ -215,6 +216,7 @@ uk_bin_collection/uk_bin_collection/councils/OadbyAndWigstonBoroughCouncil.py,sh
|
|
215
216
|
uk_bin_collection/uk_bin_collection/councils/OldhamCouncil.py,sha256=9dlesCxNoVXlmQaqZj7QFh00smnJbm1Gnjkr_Uvzurs,1771
|
216
217
|
uk_bin_collection/uk_bin_collection/councils/OxfordCityCouncil.py,sha256=d_bY0cXRDH4kSoWGGCTNN61MNErapSOf2WSTYDJr2r8,2318
|
217
218
|
uk_bin_collection/uk_bin_collection/councils/PerthAndKinrossCouncil.py,sha256=Kos5GzN2co3Ij3tSHOXB9S71Yt78RROCfVRtnh7M1VU,3657
|
219
|
+
uk_bin_collection/uk_bin_collection/councils/PeterboroughCityCouncil.py,sha256=lOrDD4jfJ-_C5UwCGqRcQ1G-U1F5X6rf255ypzYEBcg,6300
|
218
220
|
uk_bin_collection/uk_bin_collection/councils/PlymouthCouncil.py,sha256=FJqpJ0GJhpjYeyZ9ioZPkKGl-zrqMD3y5iKa07e_i30,3202
|
219
221
|
uk_bin_collection/uk_bin_collection/councils/PortsmouthCityCouncil.py,sha256=xogNgVvwM5FljCziiNLgZ_wzkOnrQkifi1dkPMDRMtg,5588
|
220
222
|
uk_bin_collection/uk_bin_collection/councils/PowysCouncil.py,sha256=db3Y5FJz-LFDqmVZqPdzcBxh0Q26OFPrbUxlQ7r4vsQ,5896
|
@@ -324,8 +326,8 @@ uk_bin_collection/uk_bin_collection/councils/YorkCouncil.py,sha256=I2kBYMlsD4bId
|
|
324
326
|
uk_bin_collection/uk_bin_collection/councils/council_class_template/councilclasstemplate.py,sha256=EQWRhZ2pEejlvm0fPyOTsOHKvUZmPnxEYO_OWRGKTjs,1158
|
325
327
|
uk_bin_collection/uk_bin_collection/create_new_council.py,sha256=m-IhmWmeWQlFsTZC4OxuFvtw5ZtB8EAJHxJTH4O59lQ,1536
|
326
328
|
uk_bin_collection/uk_bin_collection/get_bin_data.py,sha256=YvmHfZqanwrJ8ToGch34x-L-7yPe31nB_x77_Mgl_vo,4545
|
327
|
-
uk_bin_collection-0.
|
328
|
-
uk_bin_collection-0.
|
329
|
-
uk_bin_collection-0.
|
330
|
-
uk_bin_collection-0.
|
331
|
-
uk_bin_collection-0.
|
329
|
+
uk_bin_collection-0.141.0.dist-info/LICENSE,sha256=vABBUOzcrgfaTKpzeo-si9YVEun6juDkndqA8RKdKGs,1071
|
330
|
+
uk_bin_collection-0.141.0.dist-info/METADATA,sha256=wkZzAu4PUwGpBcnep3fvKUVjYSPYKVf4_wlShHwzfXM,19851
|
331
|
+
uk_bin_collection-0.141.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
332
|
+
uk_bin_collection-0.141.0.dist-info/entry_points.txt,sha256=36WCSGMWSc916S3Hi1ZkazzDKHaJ6CD-4fCEFm5MIao,90
|
333
|
+
uk_bin_collection-0.141.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
{uk_bin_collection-0.139.0.dist-info → uk_bin_collection-0.141.0.dist-info}/entry_points.txt
RENAMED
File without changes
|