uk_bin_collection 0.138.1__py3-none-any.whl → 0.140.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/tests/input.json +63 -26
- uk_bin_collection/uk_bin_collection/councils/AberdeenCityCouncil.py +2 -1
- uk_bin_collection/uk_bin_collection/councils/AberdeenshireCouncil.py +1 -0
- uk_bin_collection/uk_bin_collection/councils/ArdsAndNorthDownCouncil.py +1 -0
- uk_bin_collection/uk_bin_collection/councils/BarnsleyMBCouncil.py +1 -0
- uk_bin_collection/uk_bin_collection/councils/BroadlandDistrictCouncil.py +185 -0
- uk_bin_collection/uk_bin_collection/councils/BroxbourneCouncil.py +7 -3
- uk_bin_collection/uk_bin_collection/councils/CeredigionCountyCouncil.py +157 -0
- uk_bin_collection/uk_bin_collection/councils/CheltenhamBoroughCouncil.py +95 -61
- uk_bin_collection/uk_bin_collection/councils/CheshireEastCouncil.py +1 -0
- uk_bin_collection/uk_bin_collection/councils/CoventryCityCouncil.py +4 -1
- uk_bin_collection/uk_bin_collection/councils/ForestOfDeanDistrictCouncil.py +52 -41
- uk_bin_collection/uk_bin_collection/councils/GooglePublicCalendarCouncil.py +3 -4
- uk_bin_collection/uk_bin_collection/councils/LondonBoroughOfRichmondUponThames.py +11 -9
- uk_bin_collection/uk_bin_collection/councils/MiddlesbroughCouncil.py +13 -4
- uk_bin_collection/uk_bin_collection/councils/MonmouthshireCountyCouncil.py +5 -1
- uk_bin_collection/uk_bin_collection/councils/NewForestCouncil.py +1 -3
- uk_bin_collection/uk_bin_collection/councils/NorthDevonCountyCouncil.py +159 -0
- uk_bin_collection/uk_bin_collection/councils/NorwichCityCouncil.py +15 -3
- uk_bin_collection/uk_bin_collection/councils/NuneatonBedworthBoroughCouncil.py +873 -871
- uk_bin_collection/uk_bin_collection/councils/RugbyBoroughCouncil.py +1 -1
- uk_bin_collection/uk_bin_collection/councils/RushcliffeBoroughCouncil.py +3 -6
- uk_bin_collection/uk_bin_collection/councils/SouthHollandDistrictCouncil.py +136 -0
- uk_bin_collection/uk_bin_collection/councils/WalsallCouncil.py +6 -2
- uk_bin_collection/uk_bin_collection/councils/WalthamForest.py +1 -1
- uk_bin_collection/uk_bin_collection/councils/WestLindseyDistrictCouncil.py +6 -3
- uk_bin_collection/uk_bin_collection/councils/WychavonDistrictCouncil.py +1 -0
- {uk_bin_collection-0.138.1.dist-info → uk_bin_collection-0.140.0.dist-info}/METADATA +1 -1
- {uk_bin_collection-0.138.1.dist-info → uk_bin_collection-0.140.0.dist-info}/RECORD +32 -28
- {uk_bin_collection-0.138.1.dist-info → uk_bin_collection-0.140.0.dist-info}/LICENSE +0 -0
- {uk_bin_collection-0.138.1.dist-info → uk_bin_collection-0.140.0.dist-info}/WHEEL +0 -0
- {uk_bin_collection-0.138.1.dist-info → uk_bin_collection-0.140.0.dist-info}/entry_points.txt +0 -0
@@ -19,8 +19,8 @@
|
|
19
19
|
"wiki_note": "Replace XXXXXXXX with your UPRN. You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find it."
|
20
20
|
},
|
21
21
|
"AmberValleyBoroughCouncil": {
|
22
|
-
"url": "https://ambervalley.gov.uk",
|
23
22
|
"uprn": "100030026621",
|
23
|
+
"url": "https://ambervalley.gov.uk",
|
24
24
|
"wiki_name": "Amber Valley Borough Council",
|
25
25
|
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN."
|
26
26
|
},
|
@@ -71,8 +71,8 @@
|
|
71
71
|
"AshfordBoroughCouncil": {
|
72
72
|
"postcode": "TN23 7SP",
|
73
73
|
"uprn": "100060777899",
|
74
|
-
"web_driver": "http://selenium:4444",
|
75
74
|
"url": "https://ashford.gov.uk",
|
75
|
+
"web_driver": "http://selenium:4444",
|
76
76
|
"wiki_command_url_override": "https://ashford.gov.uk",
|
77
77
|
"wiki_name": "Ashford Borough Council",
|
78
78
|
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN."
|
@@ -203,8 +203,8 @@
|
|
203
203
|
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN."
|
204
204
|
},
|
205
205
|
"BolsoverCouncil": {
|
206
|
-
"url": "https://bolsover.gov.uk",
|
207
206
|
"uprn": "100030066827",
|
207
|
+
"url": "https://bolsover.gov.uk",
|
208
208
|
"wiki_name": "Bolsover Council",
|
209
209
|
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN."
|
210
210
|
},
|
@@ -282,6 +282,15 @@
|
|
282
282
|
"wiki_name": "Bristol City Council",
|
283
283
|
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN."
|
284
284
|
},
|
285
|
+
"BroadlandDistrictCouncil": {
|
286
|
+
"skip_get_url": true,
|
287
|
+
"house_number": "1 Park View, Horsford, Norfolk, NR10 3FD",
|
288
|
+
"postcode": "NR10 3FD",
|
289
|
+
"url": "https://area.southnorfolkandbroadland.gov.uk/FindAddress",
|
290
|
+
"web_driver": "http://selenium:4444",
|
291
|
+
"wiki_name": "Broadland District Council",
|
292
|
+
"wiki_note": "Use the full address as it appears on the drop-down on the site when you search by postcode."
|
293
|
+
},
|
285
294
|
"BromleyBoroughCouncil": {
|
286
295
|
"url": "https://recyclingservices.bromley.gov.uk/waste/6087017",
|
287
296
|
"web_driver": "http://selenium:4444",
|
@@ -386,6 +395,14 @@
|
|
386
395
|
"wiki_name": "Castlepoint District Council",
|
387
396
|
"wiki_note": "For this council, 'uprn' is actually a 4-digit code for your street. Go [here](https://apps.castlepoint.gov.uk/cpapps/index.cfm?fa=wastecalendar) and inspect the source of the dropdown box to find the 4-digit number for your street."
|
388
397
|
},
|
398
|
+
"CeredigionCountyCouncil": {
|
399
|
+
"house_number": "BLAEN CWMMAGWR, TRISANT, CEREDIGION, SY23 4RQ",
|
400
|
+
"postcode": "SY23 4RQ",
|
401
|
+
"url": "https://www.ceredigion.gov.uk/resident/bins-recycling/",
|
402
|
+
"web_driver": "http://selenium:4444",
|
403
|
+
"wiki_name": "Ceredigion County Council",
|
404
|
+
"wiki_note": "House Number is the full address as it appears on the drop-down on the site when you search by postcode. This parser requires a Selenium webdriver."
|
405
|
+
},
|
389
406
|
"CharnwoodBoroughCouncil": {
|
390
407
|
"url": "https://my.charnwood.gov.uk/location?put=cbc10070067259&rememberme=0&redirect=%2F",
|
391
408
|
"wiki_command_url_override": "https://my.charnwood.gov.uk/location?put=cbcXXXXXXXX&rememberme=0&redirect=%2F",
|
@@ -401,9 +418,9 @@
|
|
401
418
|
"wiki_note": "Follow the instructions [here](https://www.chelmsford.gov.uk/myhome/) until you get the page listing your address, then copy the entire address text and use that in the house number field."
|
402
419
|
},
|
403
420
|
"CheltenhamBoroughCouncil": {
|
404
|
-
"uprn": "100120372027",
|
405
421
|
"postcode": "GL51 3NA",
|
406
422
|
"skip_get_url": true,
|
423
|
+
"uprn": "100120372027",
|
407
424
|
"url": "https://www.cheltenham.gov.uk",
|
408
425
|
"wiki_name": "Cheltenham Borough Council",
|
409
426
|
"wiki_note": "Pass the UPRN. You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search)."
|
@@ -631,8 +648,8 @@
|
|
631
648
|
"house_number": "1",
|
632
649
|
"postcode": "CM20 2FZ",
|
633
650
|
"skip_get_url": true,
|
634
|
-
"web_driver": "http://selenium:4444",
|
635
651
|
"url": "https://www.eastherts.gov.uk",
|
652
|
+
"web_driver": "http://selenium:4444",
|
636
653
|
"wiki_name": "East Herts Council",
|
637
654
|
"wiki_note": "Pass the house number and postcode in their respective parameters."
|
638
655
|
},
|
@@ -725,8 +742,8 @@
|
|
725
742
|
},
|
726
743
|
"EppingForestDistrictCouncil": {
|
727
744
|
"postcode": "IG9 6EP",
|
728
|
-
"web_driver": "http://selenium:4444",
|
729
745
|
"url": "https://eppingforestdc.maps.arcgis.com/apps/instant/lookup/index.html?appid=bfca32b46e2a47cd9c0a84f2d8cdde17&find=IG9%206EP",
|
746
|
+
"web_driver": "http://selenium:4444",
|
730
747
|
"wiki_name": "Epping Forest District Council",
|
731
748
|
"wiki_note": "Replace the postcode in the URL with your own."
|
732
749
|
},
|
@@ -840,12 +857,12 @@
|
|
840
857
|
"wiki_note": "Pass the house number, postcode, and UPRN in their respective parameters. This parser requires a Selenium webdriver."
|
841
858
|
},
|
842
859
|
"GooglePublicCalendarCouncil": {
|
843
|
-
"wiki_name": "Google Calendar (Public)",
|
844
|
-
"url": "https://calendar.google.com/calendar/ical/c_bca6c975545213622b3958bddab567e1e7398709c63e377b6fe9a9140bd1caf8%40group.calendar.google.com/public/basic.ics",
|
845
860
|
"skip_get_url": true,
|
861
|
+
"url": "https://calendar.google.com/calendar/ical/c_bca6c975545213622b3958bddab567e1e7398709c63e377b6fe9a9140bd1caf8%40group.calendar.google.com/public/basic.ics",
|
862
|
+
"wiki_name": "Google Calendar (Public)",
|
846
863
|
"wiki_note": "The URL should be the public ics file URL for the public Google calendar. See https://support.google.com/calendar/answer/37083?sjid=7202815583021446882-EU. Councils that currently need this are Trafford."
|
847
864
|
},
|
848
|
-
|
865
|
+
"GraveshamBoroughCouncil": {
|
849
866
|
"skip_get_url": true,
|
850
867
|
"uprn": "100060927046",
|
851
868
|
"url": "https://www.gravesham.gov.uk",
|
@@ -1126,8 +1143,8 @@
|
|
1126
1143
|
"LondonBoroughOfRichmondUponThames": {
|
1127
1144
|
"house_number": "March Road",
|
1128
1145
|
"skip_get_url": true,
|
1129
|
-
"web_driver": "http://selenium:4444",
|
1130
1146
|
"url": "https://www.richmond.gov.uk/services/waste_and_recycling/collection_days/",
|
1147
|
+
"web_driver": "http://selenium:4444",
|
1131
1148
|
"wiki_name": "London Borough Of Richmond Upon Thames",
|
1132
1149
|
"wiki_note": "Pass the name of the street ONLY in the house number parameter, unfortunately post code's are not allowed. "
|
1133
1150
|
},
|
@@ -1209,14 +1226,6 @@
|
|
1209
1226
|
"wiki_name": "Mid Devon Council",
|
1210
1227
|
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN."
|
1211
1228
|
},
|
1212
|
-
"MiddlesbroughCouncil": {
|
1213
|
-
"house_number": "12 Constantine Court Park Road North, Middlesbrough",
|
1214
|
-
"skip_get_url": true,
|
1215
|
-
"url": "https://www.midsussex.gov.uk/waste-recycling/bin-collection/",
|
1216
|
-
"web_driver": "http://selenium:4444",
|
1217
|
-
"wiki_name": "Middlesbrough Council",
|
1218
|
-
"wiki_note": "Pass the entire address without postcode as it appears when you type it on the website. This parser requires a Selenium webdriver."
|
1219
|
-
},
|
1220
1229
|
"MidSuffolkDistrictCouncil": {
|
1221
1230
|
"house_number": "Monday",
|
1222
1231
|
"postcode": "Week 2",
|
@@ -1235,6 +1244,14 @@
|
|
1235
1244
|
"wiki_name": "Mid Sussex District Council",
|
1236
1245
|
"wiki_note": "Pass the name of the street with the house number parameter, wrapped in double quotes. This parser requires a Selenium webdriver."
|
1237
1246
|
},
|
1247
|
+
"MiddlesbroughCouncil": {
|
1248
|
+
"house_number": "12 Constantine Court Park Road North, Middlesbrough",
|
1249
|
+
"skip_get_url": true,
|
1250
|
+
"url": "https://www.midsussex.gov.uk/waste-recycling/bin-collection/",
|
1251
|
+
"web_driver": "http://selenium:4444",
|
1252
|
+
"wiki_name": "Middlesbrough Council",
|
1253
|
+
"wiki_note": "Pass the entire address without postcode as it appears when you type it on the website. This parser requires a Selenium webdriver."
|
1254
|
+
},
|
1238
1255
|
"MidlothianCouncil": {
|
1239
1256
|
"house_number": "52",
|
1240
1257
|
"postcode": "EH19 2EB",
|
@@ -1327,6 +1344,16 @@
|
|
1327
1344
|
"wiki_name": "North Ayrshire Council",
|
1328
1345
|
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN."
|
1329
1346
|
},
|
1347
|
+
"NorthDevonCountyCouncil": {
|
1348
|
+
"house_number": "1",
|
1349
|
+
"postcode": "EX31 2LE",
|
1350
|
+
"skip_get_url": true,
|
1351
|
+
"uprn": "100040249471",
|
1352
|
+
"url": "https://my.northdevon.gov.uk/service/WasteRecyclingCollectionCalendar",
|
1353
|
+
"web_driver": "http://selenium:4444",
|
1354
|
+
"wiki_name": "North Devon County Council",
|
1355
|
+
"wiki_note": "Pass the house number and postcode in their respective parameters. This parser requires a Selenium webdriver."
|
1356
|
+
},
|
1330
1357
|
"NorthEastDerbyshireDistrictCouncil": {
|
1331
1358
|
"postcode": "S42 5RB",
|
1332
1359
|
"skip_get_url": true,
|
@@ -1672,13 +1699,6 @@
|
|
1672
1699
|
"wiki_name": "Somerset Council",
|
1673
1700
|
"wiki_note": "Provide your UPRN and postcode. Find your UPRN using [FindMyAddress](https://www.findmyaddress.co.uk/search)."
|
1674
1701
|
},
|
1675
|
-
"SouthamptonCityCouncil": {
|
1676
|
-
"skip_get_url": true,
|
1677
|
-
"uprn": "100060731893",
|
1678
|
-
"url": "https://www.southampton.gov.uk",
|
1679
|
-
"wiki_name": "Southampton City Council",
|
1680
|
-
"wiki_note": "Pass the UPRN. You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search)."
|
1681
|
-
},
|
1682
1702
|
"SouthAyrshireCouncil": {
|
1683
1703
|
"postcode": "KA19 7BN",
|
1684
1704
|
"skip_get_url": true,
|
@@ -1715,6 +1735,16 @@
|
|
1715
1735
|
"wiki_name": "South Hams District Council",
|
1716
1736
|
"wiki_note": "Use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find your UPRN."
|
1717
1737
|
},
|
1738
|
+
"SouthHollandDistrictCouncil": {
|
1739
|
+
"house_number": "1",
|
1740
|
+
"postcode": "PE6 0HE",
|
1741
|
+
"skip_get_url": true,
|
1742
|
+
"uprn": "100030872493",
|
1743
|
+
"url": "https://www.sholland.gov.uk/mycollections",
|
1744
|
+
"web_driver": "http://selenium:4444",
|
1745
|
+
"wiki_name": "South Holland District Council",
|
1746
|
+
"wiki_note": "Pass the UPRN and postcode in their respective parameters. This parser requires a Selenium webdriver."
|
1747
|
+
},
|
1718
1748
|
"SouthKestevenDistrictCouncil": {
|
1719
1749
|
"house_number": "2 Althorpe Close, Market Deeping, PE6 8BL",
|
1720
1750
|
"postcode": "PE68BL",
|
@@ -1765,6 +1795,13 @@
|
|
1765
1795
|
"wiki_name": "South Tyneside Council",
|
1766
1796
|
"wiki_note": "Provide your house number in the `house_number` parameter and postcode in the `postcode` parameter."
|
1767
1797
|
},
|
1798
|
+
"SouthamptonCityCouncil": {
|
1799
|
+
"skip_get_url": true,
|
1800
|
+
"uprn": "100060731893",
|
1801
|
+
"url": "https://www.southampton.gov.uk",
|
1802
|
+
"wiki_name": "Southampton City Council",
|
1803
|
+
"wiki_note": "Pass the UPRN. You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search)."
|
1804
|
+
},
|
1768
1805
|
"SouthwarkCouncil": {
|
1769
1806
|
"uprn": "200003469271",
|
1770
1807
|
"url": "https://services.southwark.gov.uk/bins/lookup/",
|
@@ -2261,4 +2298,4 @@
|
|
2261
2298
|
"wiki_name": "York Council",
|
2262
2299
|
"wiki_note": "Provide your UPRN."
|
2263
2300
|
}
|
2264
|
-
}
|
2301
|
+
}
|
@@ -5,6 +5,7 @@ import requests
|
|
5
5
|
from uk_bin_collection.uk_bin_collection.common import *
|
6
6
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
7
7
|
|
8
|
+
|
8
9
|
# import the wonderful Beautiful Soup and the URL grabber
|
9
10
|
class CouncilClass(AbstractGetBinDataClass):
|
10
11
|
"""
|
@@ -12,7 +13,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
12
13
|
base class. They can also override some operations with a default
|
13
14
|
implementation.
|
14
15
|
"""
|
15
|
-
|
16
|
+
|
16
17
|
def parse_data(self, page: str, **kwargs) -> dict:
|
17
18
|
|
18
19
|
user_uprn = kwargs.get("uprn")
|
@@ -4,6 +4,7 @@ from bs4 import BeautifulSoup
|
|
4
4
|
from uk_bin_collection.uk_bin_collection.common import *
|
5
5
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
6
6
|
|
7
|
+
|
7
8
|
# import the wonderful Beautiful Soup and the URL grabber
|
8
9
|
class CouncilClass(AbstractGetBinDataClass):
|
9
10
|
"""
|
@@ -6,6 +6,7 @@ from bs4 import BeautifulSoup
|
|
6
6
|
from uk_bin_collection.uk_bin_collection.common import *
|
7
7
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
8
8
|
|
9
|
+
|
9
10
|
class CouncilClass(AbstractGetBinDataClass):
|
10
11
|
"""
|
11
12
|
Concrete classes have to implement all abstract operations of the
|
@@ -0,0 +1,185 @@
|
|
1
|
+
# This script pulls (in one hit) the data from Broadland District Council Bins Data
|
2
|
+
# Working command line:
|
3
|
+
# python collect_data.py BroadlandDistrictCouncil "https://area.southnorfolkandbroadland.gov.uk/FindAddress" -p "NR10 3FD" -n "1 Park View, Horsford, Norfolk, NR10 3FD"
|
4
|
+
|
5
|
+
import re
|
6
|
+
import time
|
7
|
+
from datetime import datetime
|
8
|
+
|
9
|
+
from bs4 import BeautifulSoup
|
10
|
+
from selenium.webdriver.common.by import By
|
11
|
+
from selenium.webdriver.common.keys import Keys
|
12
|
+
from selenium.webdriver.support import expected_conditions as EC
|
13
|
+
from selenium.webdriver.support.ui import Select
|
14
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
15
|
+
|
16
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
17
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
18
|
+
|
19
|
+
|
20
|
+
class CouncilClass(AbstractGetBinDataClass):
|
21
|
+
|
22
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
23
|
+
driver = None
|
24
|
+
try:
|
25
|
+
data = {"bins": []}
|
26
|
+
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64)"}
|
27
|
+
|
28
|
+
uprn = kwargs.get("uprn")
|
29
|
+
user_paon = kwargs.get("paon")
|
30
|
+
postcode = kwargs.get("postcode")
|
31
|
+
web_driver = kwargs.get("web_driver")
|
32
|
+
headless = kwargs.get("headless")
|
33
|
+
url = kwargs.get("url")
|
34
|
+
|
35
|
+
print(
|
36
|
+
f"Starting parse_data with parameters: postcode={postcode}, paon={user_paon}"
|
37
|
+
)
|
38
|
+
print(
|
39
|
+
f"Creating webdriver with: web_driver={web_driver}, headless={headless}"
|
40
|
+
)
|
41
|
+
|
42
|
+
driver = create_webdriver(web_driver, headless, None, __name__)
|
43
|
+
print(f"Navigating to URL: {url}")
|
44
|
+
driver.get(url)
|
45
|
+
print("Successfully loaded the page")
|
46
|
+
|
47
|
+
# Handle cookie confirmation dialog
|
48
|
+
try:
|
49
|
+
# Adjust the selector depending on the site's button
|
50
|
+
accept_button = WebDriverWait(driver, 10).until(
|
51
|
+
EC.element_to_be_clickable(
|
52
|
+
(By.ID, "CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll")
|
53
|
+
)
|
54
|
+
)
|
55
|
+
accept_button.click()
|
56
|
+
print("Cookie banner clicked.")
|
57
|
+
except:
|
58
|
+
print("No cookie banner appeared or selector failed.")
|
59
|
+
|
60
|
+
wait = WebDriverWait(driver, 60)
|
61
|
+
post_code_search = wait.until(
|
62
|
+
EC.presence_of_element_located((By.ID, "Postcode"))
|
63
|
+
)
|
64
|
+
post_code_search.send_keys(postcode)
|
65
|
+
|
66
|
+
# Click the Find address button
|
67
|
+
print("Looking for 'Find address' button...")
|
68
|
+
submit_btn = wait.until(
|
69
|
+
EC.presence_of_element_located(
|
70
|
+
(
|
71
|
+
By.XPATH,
|
72
|
+
"//input[@type='submit' and @class='button button--secondary']",
|
73
|
+
)
|
74
|
+
)
|
75
|
+
)
|
76
|
+
print("Clicking button...")
|
77
|
+
submit_btn.send_keys(Keys.ENTER)
|
78
|
+
|
79
|
+
# Wait for the address dropdown to appear
|
80
|
+
print("Waiting for address dropdown to appear...")
|
81
|
+
address_dropdown = wait.until(
|
82
|
+
EC.presence_of_element_located((By.ID, "UprnAddress"))
|
83
|
+
)
|
84
|
+
print("Found address dropdown")
|
85
|
+
|
86
|
+
# Create a Select object for the dropdown
|
87
|
+
dropdown_select = Select(address_dropdown)
|
88
|
+
|
89
|
+
# Search for the exact address
|
90
|
+
print(f"Looking for address: {user_paon}")
|
91
|
+
|
92
|
+
# Select the address by visible text
|
93
|
+
dropdown_select.select_by_visible_text(user_paon)
|
94
|
+
print(f"Selected address: {user_paon}")
|
95
|
+
|
96
|
+
print("Looking for submit button after address selection...")
|
97
|
+
submit_btn = wait.until(
|
98
|
+
EC.element_to_be_clickable((By.XPATH, "//input[@type='submit']"))
|
99
|
+
)
|
100
|
+
print("Clicking button...")
|
101
|
+
submit_btn.send_keys(Keys.ENTER)
|
102
|
+
|
103
|
+
print("Waiting for collection details to appear...")
|
104
|
+
address_dropdown = wait.until(
|
105
|
+
EC.presence_of_element_located(
|
106
|
+
(
|
107
|
+
By.XPATH,
|
108
|
+
"//div[contains(@class, 'card-body')]//h4[contains(text(), 'Your next collections')]",
|
109
|
+
)
|
110
|
+
)
|
111
|
+
)
|
112
|
+
|
113
|
+
# Make a BS4 object
|
114
|
+
print("Parsing page with BeautifulSoup...")
|
115
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
116
|
+
|
117
|
+
# Initialize current date
|
118
|
+
current_date = datetime.now()
|
119
|
+
|
120
|
+
# Process collection details
|
121
|
+
print("Looking for collection details in the page...")
|
122
|
+
|
123
|
+
# Find the card-body div that contains the bin collection information
|
124
|
+
card_body = soup.find("div", class_="card-body")
|
125
|
+
|
126
|
+
if card_body:
|
127
|
+
# Find the "Your next collections" heading
|
128
|
+
next_collections_heading = card_body.find(
|
129
|
+
"h4", string="Your next collections"
|
130
|
+
)
|
131
|
+
|
132
|
+
if next_collections_heading:
|
133
|
+
# Find all bin collection divs (each with class "my-2")
|
134
|
+
bin_divs = next_collections_heading.find_next_siblings(
|
135
|
+
"div", class_="my-2"
|
136
|
+
)
|
137
|
+
|
138
|
+
print(f"Found {len(bin_divs)} bin collection divs")
|
139
|
+
|
140
|
+
for bin_div in bin_divs:
|
141
|
+
# Find the bin type (in a strong tag)
|
142
|
+
bin_type_elem = bin_div.find("strong")
|
143
|
+
bin_type = None
|
144
|
+
|
145
|
+
if bin_type_elem:
|
146
|
+
bin_type = bin_type_elem.text.strip().replace(
|
147
|
+
" (if applicable)", ""
|
148
|
+
)
|
149
|
+
|
150
|
+
# Get the parent element that contains both the bin type and date
|
151
|
+
text_container = bin_type_elem.parent
|
152
|
+
if text_container:
|
153
|
+
# Extract the full text and remove the bin type to get the date part
|
154
|
+
full_text = text_container.get_text(strip=True)
|
155
|
+
date_text = full_text.replace(bin_type, "").strip()
|
156
|
+
print(f"Unparsed collection date: {date_text}")
|
157
|
+
|
158
|
+
# Parse the date
|
159
|
+
# First, remove any ordinal indicators (1st, 2nd, 3rd, etc.)
|
160
|
+
cleaned_date_text = (
|
161
|
+
remove_ordinal_indicator_from_date_string(date_text)
|
162
|
+
)
|
163
|
+
|
164
|
+
from dateutil.parser import parse
|
165
|
+
|
166
|
+
parsed_date = parse(cleaned_date_text, fuzzy=True)
|
167
|
+
bin_date = parsed_date.strftime("%d/%m/%Y")
|
168
|
+
|
169
|
+
# Only process if we have both bin_type and bin_date
|
170
|
+
if bin_type and bin_date:
|
171
|
+
dict_data = {
|
172
|
+
"type": bin_type,
|
173
|
+
"collectionDate": bin_date,
|
174
|
+
}
|
175
|
+
data["bins"].append(dict_data)
|
176
|
+
print(f"Added bin data: {dict_data}")
|
177
|
+
except Exception as e:
|
178
|
+
print(f"An error occurred: {e}")
|
179
|
+
raise
|
180
|
+
finally:
|
181
|
+
print("Cleaning up webdriver...")
|
182
|
+
if driver:
|
183
|
+
driver.quit()
|
184
|
+
|
185
|
+
return data
|
@@ -43,7 +43,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
43
43
|
rows = table.find_all("tr")
|
44
44
|
|
45
45
|
current_year = datetime.now().year
|
46
|
-
current_month = datetime.now().month
|
46
|
+
current_month = datetime.now().month
|
47
47
|
|
48
48
|
# Process each row into a list of dictionaries
|
49
49
|
for row in rows[1:]: # Skip the header row
|
@@ -56,13 +56,17 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
56
56
|
# Safely try to parse collection date
|
57
57
|
if collection_date_text:
|
58
58
|
try:
|
59
|
-
collection_date = datetime.strptime(
|
59
|
+
collection_date = datetime.strptime(
|
60
|
+
collection_date_text, "%a %d %b"
|
61
|
+
)
|
60
62
|
if collection_date.month == 1 and current_month != 1:
|
61
63
|
collection_date = collection_date.replace(year=current_year + 1)
|
62
64
|
else:
|
63
65
|
collection_date = collection_date.replace(year=current_year)
|
64
66
|
|
65
|
-
formatted_collection_date = collection_date.strftime(
|
67
|
+
formatted_collection_date = collection_date.strftime(
|
68
|
+
"%d/%m/%Y"
|
69
|
+
) # Use your desired date format
|
66
70
|
dict_data = {
|
67
71
|
"type": service,
|
68
72
|
"collectionDate": formatted_collection_date,
|
@@ -0,0 +1,157 @@
|
|
1
|
+
from time import sleep
|
2
|
+
|
3
|
+
from bs4 import BeautifulSoup
|
4
|
+
from selenium.webdriver.common.by import By
|
5
|
+
from selenium.webdriver.support import expected_conditions as EC
|
6
|
+
from selenium.webdriver.support.ui import Select, WebDriverWait
|
7
|
+
|
8
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
9
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
10
|
+
|
11
|
+
|
12
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
13
|
+
class CouncilClass(AbstractGetBinDataClass):
|
14
|
+
"""
|
15
|
+
Concrete classes have to implement all abstract operations of the base
|
16
|
+
class. They can also override some operations with a default
|
17
|
+
implementation.
|
18
|
+
"""
|
19
|
+
|
20
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
21
|
+
driver = None
|
22
|
+
try:
|
23
|
+
house_number = kwargs.get("paon")
|
24
|
+
user_postcode = kwargs.get("postcode")
|
25
|
+
web_driver = kwargs.get("web_driver")
|
26
|
+
headless = kwargs.get("headless")
|
27
|
+
check_paon(house_number)
|
28
|
+
check_postcode(user_postcode)
|
29
|
+
|
30
|
+
# Create Selenium webdriver
|
31
|
+
driver = create_webdriver(web_driver, headless, None, __name__)
|
32
|
+
driver.get("https://www.ceredigion.gov.uk/resident/bins-recycling/")
|
33
|
+
|
34
|
+
try:
|
35
|
+
accept_cookies = WebDriverWait(driver, timeout=10).until(
|
36
|
+
EC.element_to_be_clickable(
|
37
|
+
(By.XPATH, "//button[@id='ccc-reject-settings']")
|
38
|
+
)
|
39
|
+
)
|
40
|
+
accept_cookies.click()
|
41
|
+
except:
|
42
|
+
print(
|
43
|
+
"Accept cookies banner not found or clickable within the specified time."
|
44
|
+
)
|
45
|
+
pass
|
46
|
+
|
47
|
+
# Wait for postcode entry box
|
48
|
+
postcode_search = WebDriverWait(driver, 10).until(
|
49
|
+
EC.presence_of_element_located(
|
50
|
+
(By.XPATH, "//a[contains(text(), 'Postcode Search')]")
|
51
|
+
)
|
52
|
+
)
|
53
|
+
driver.execute_script("arguments[0].scrollIntoView(true);", postcode_search)
|
54
|
+
|
55
|
+
sleep(2) # Wait for the element to be in view
|
56
|
+
|
57
|
+
postcode_search.click()
|
58
|
+
|
59
|
+
postcode_entry_box = WebDriverWait(driver, 10).until(
|
60
|
+
EC.presence_of_element_located(
|
61
|
+
(By.XPATH, "//input[@data-ebv-desc='Postcode']")
|
62
|
+
)
|
63
|
+
)
|
64
|
+
|
65
|
+
# Enter postcode
|
66
|
+
postcode_entry_box.send_keys(user_postcode)
|
67
|
+
|
68
|
+
postcode_button = WebDriverWait(driver, 10).until(
|
69
|
+
EC.presence_of_element_located(
|
70
|
+
(By.XPATH, "//input[@value='Find Address']")
|
71
|
+
)
|
72
|
+
)
|
73
|
+
|
74
|
+
postcode_button.click()
|
75
|
+
|
76
|
+
address_dropdown = Select(
|
77
|
+
WebDriverWait(driver, 10).until(
|
78
|
+
EC.presence_of_element_located(
|
79
|
+
(By.XPATH, "//select[@data-ebv-desc='Select Address']")
|
80
|
+
)
|
81
|
+
)
|
82
|
+
)
|
83
|
+
|
84
|
+
address_dropdown.select_by_visible_text(house_number)
|
85
|
+
|
86
|
+
address_next_button = WebDriverWait(driver, 10).until(
|
87
|
+
EC.presence_of_element_located((By.XPATH, "//input[@value='Next']"))
|
88
|
+
)
|
89
|
+
|
90
|
+
address_next_button.click()
|
91
|
+
|
92
|
+
result = WebDriverWait(driver, 10).until(
|
93
|
+
EC.presence_of_element_located(
|
94
|
+
(By.XPATH, "//form[contains(., 'Next collection:')]")
|
95
|
+
)
|
96
|
+
)
|
97
|
+
|
98
|
+
# Make a BS4 object
|
99
|
+
soup = BeautifulSoup(
|
100
|
+
result.get_attribute("innerHTML"), features="html.parser"
|
101
|
+
)
|
102
|
+
|
103
|
+
data = {"bins": []}
|
104
|
+
|
105
|
+
# Find all panels containing collection info
|
106
|
+
collection_panels = soup.find_all("div", class_="eb-OL2RoeVH-panel")
|
107
|
+
|
108
|
+
for panel in collection_panels:
|
109
|
+
try:
|
110
|
+
# Extract the 'Next collection' date string
|
111
|
+
next_text = panel.find_all("span")[-1].text.strip()
|
112
|
+
match = re.search(
|
113
|
+
r"Next collection:\s*(\w+day)\s+(\d{1,2})(?:st|nd|rd|th)?\s+(\w+)",
|
114
|
+
next_text,
|
115
|
+
)
|
116
|
+
if not match:
|
117
|
+
continue
|
118
|
+
|
119
|
+
_, day, month = match.groups()
|
120
|
+
year = (
|
121
|
+
datetime.now().year
|
122
|
+
) # You could enhance this to calculate the correct year if needed
|
123
|
+
full_date = f"{day} {month} {year}"
|
124
|
+
|
125
|
+
collection_date = datetime.strptime(full_date, "%d %B %Y").strftime(
|
126
|
+
date_format
|
127
|
+
)
|
128
|
+
|
129
|
+
# Now get all bin types in the sibling image blocks
|
130
|
+
bin_image_blocks = panel.find_next_siblings(
|
131
|
+
"div", class_="waste_image"
|
132
|
+
)
|
133
|
+
for block in bin_image_blocks:
|
134
|
+
label = block.find("span")
|
135
|
+
if label:
|
136
|
+
bin_type = label.text.strip()
|
137
|
+
dict_data = {
|
138
|
+
"type": bin_type,
|
139
|
+
"collectionDate": collection_date,
|
140
|
+
}
|
141
|
+
data["bins"].append(dict_data)
|
142
|
+
except Exception as e:
|
143
|
+
print(f"Skipping one panel due to: {e}")
|
144
|
+
|
145
|
+
data["bins"].sort(
|
146
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
|
147
|
+
)
|
148
|
+
except Exception as e:
|
149
|
+
# Here you can log the exception if needed
|
150
|
+
print(f"An error occurred: {e}")
|
151
|
+
# Optionally, re-raise the exception if you want it to propagate
|
152
|
+
raise
|
153
|
+
finally:
|
154
|
+
# This block ensures that the driver is closed regardless of an exception
|
155
|
+
if driver:
|
156
|
+
driver.quit()
|
157
|
+
return data
|