uk_bin_collection 0.153.0__py3-none-any.whl → 0.157.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/tests/input.json +34 -25
- uk_bin_collection/uk_bin_collection/councils/AberdeenCityCouncil.py +0 -1
- uk_bin_collection/uk_bin_collection/councils/BCPCouncil.py +45 -120
- uk_bin_collection/uk_bin_collection/councils/BasingstokeCouncil.py +4 -1
- uk_bin_collection/uk_bin_collection/councils/BrightonandHoveCityCouncil.py +15 -36
- uk_bin_collection/uk_bin_collection/councils/CastlepointDistrictCouncil.py +55 -24
- uk_bin_collection/uk_bin_collection/councils/DacorumBoroughCouncil.py +22 -13
- uk_bin_collection/uk_bin_collection/councils/EastDunbartonshireCouncil.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/ErewashBoroughCouncil.py +32 -34
- uk_bin_collection/uk_bin_collection/councils/FarehamBoroughCouncil.py +5 -2
- uk_bin_collection/uk_bin_collection/councils/FolkstoneandHytheDistrictCouncil.py +22 -0
- uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py +1 -1
- uk_bin_collection/uk_bin_collection/councils/HartlepoolBoroughCouncil.py +3 -1
- uk_bin_collection/uk_bin_collection/councils/IslingtonCouncil.py +8 -5
- uk_bin_collection/uk_bin_collection/councils/LancasterCityCouncil.py +23 -10
- uk_bin_collection/uk_bin_collection/councils/MidSuffolkDistrictCouncil.py +70 -92
- uk_bin_collection/uk_bin_collection/councils/NewForestCouncil.py +104 -47
- uk_bin_collection/uk_bin_collection/councils/NewportCityCouncil.py +138 -21
- uk_bin_collection/uk_bin_collection/councils/NorthHertfordshireDistrictCouncil.py +26 -128
- uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py +245 -82
- uk_bin_collection/uk_bin_collection/councils/OxfordCityCouncil.py +1 -0
- uk_bin_collection/uk_bin_collection/councils/RenfrewshireCouncil.py +170 -13
- uk_bin_collection/uk_bin_collection/councils/RotherhamCouncil.py +70 -38
- uk_bin_collection/uk_bin_collection/councils/RushmoorCouncil.py +4 -2
- uk_bin_collection/uk_bin_collection/councils/SandwellBoroughCouncil.py +4 -11
- uk_bin_collection/uk_bin_collection/councils/SloughBoroughCouncil.py +39 -21
- uk_bin_collection/uk_bin_collection/councils/SomersetCouncil.py +136 -21
- uk_bin_collection/uk_bin_collection/councils/SouthGloucestershireCouncil.py +18 -22
- uk_bin_collection/uk_bin_collection/councils/TestValleyBoroughCouncil.py +138 -21
- uk_bin_collection/uk_bin_collection/councils/WestBerkshireCouncil.py +16 -13
- {uk_bin_collection-0.153.0.dist-info → uk_bin_collection-0.157.0.dist-info}/METADATA +1 -1
- {uk_bin_collection-0.153.0.dist-info → uk_bin_collection-0.157.0.dist-info}/RECORD +35 -34
- {uk_bin_collection-0.153.0.dist-info → uk_bin_collection-0.157.0.dist-info}/LICENSE +0 -0
- {uk_bin_collection-0.153.0.dist-info → uk_bin_collection-0.157.0.dist-info}/WHEEL +0 -0
- {uk_bin_collection-0.153.0.dist-info → uk_bin_collection-0.157.0.dist-info}/entry_points.txt +0 -0
@@ -102,12 +102,9 @@
|
|
102
102
|
},
|
103
103
|
"BCPCouncil": {
|
104
104
|
"LAD24CD": "E06000058",
|
105
|
-
"house_number": "3 HARBOUR VIEW ROAD, POOLE, BH14 0PD",
|
106
|
-
"postcode": "BH14 0PD",
|
107
|
-
"web_driver": "http://selenium:4444",
|
108
105
|
"skip_get_url": true,
|
109
106
|
"uprn": "100040810214",
|
110
|
-
"url": "https://
|
107
|
+
"url": "https://bcpportal.bcpcouncil.gov.uk/checkyourbincollection",
|
111
108
|
"wiki_name": "Bournemouth, Christchurch and Poole",
|
112
109
|
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN."
|
113
110
|
},
|
@@ -763,6 +760,13 @@
|
|
763
760
|
"wiki_note": "Pass the UPRN. You can find it using [FindMyAddress](https://www.findmyaddress.co.uk/search).",
|
764
761
|
"LAD24CD": "E07000040"
|
765
762
|
},
|
763
|
+
"EastDunbartonshireCouncil": {
|
764
|
+
"uprn": "132027197",
|
765
|
+
"url": "https://www.eastdunbarton.gov.uk/",
|
766
|
+
"wiki_name": "East Dunbartonshire",
|
767
|
+
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN.",
|
768
|
+
"LAD24CD": "S12000045"
|
769
|
+
},
|
766
770
|
"EastHertsCouncil": {
|
767
771
|
"LAD24CD": "E07000097",
|
768
772
|
"skip_get_url": true,
|
@@ -888,7 +892,7 @@
|
|
888
892
|
"ErewashBoroughCouncil": {
|
889
893
|
"skip_get_url": true,
|
890
894
|
"uprn": "10003582028",
|
891
|
-
"url": "https://
|
895
|
+
"url": "https://www.erewash.gov.uk",
|
892
896
|
"wiki_name": "Erewash",
|
893
897
|
"wiki_note": "Pass the UPRN. You can find it using [FindMyAddress](https://www.findmyaddress.co.uk/search).",
|
894
898
|
"LAD24CD": "E07000036"
|
@@ -995,7 +999,7 @@
|
|
995
999
|
},
|
996
1000
|
"GlasgowCityCouncil": {
|
997
1001
|
"uprn": "906700034497",
|
998
|
-
"url": "https://onlineservices.glasgow.gov.uk/forms/
|
1002
|
+
"url": "https://onlineservices.glasgow.gov.uk/forms/refuseandrecyclingcalendar/AddressSearch.aspx",
|
999
1003
|
"skip_get_url": true,
|
1000
1004
|
"wiki_name": "Glasgow City",
|
1001
1005
|
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN.",
|
@@ -1611,7 +1615,7 @@
|
|
1611
1615
|
"postcode": "SO41 0GJ",
|
1612
1616
|
"skip_get_url": true,
|
1613
1617
|
"uprn": "100060482345",
|
1614
|
-
"url": "https://forms.newforest.gov.uk/
|
1618
|
+
"url": "https://forms.newforest.gov.uk/ufs/FIND_MY_BIN_BAR.eb",
|
1615
1619
|
"web_driver": "http://selenium:4444",
|
1616
1620
|
"wiki_name": "New Forest",
|
1617
1621
|
"wiki_note": "Pass the postcode and UPRN. This parser requires a Selenium webdriver.",
|
@@ -1650,10 +1654,11 @@
|
|
1650
1654
|
"NewportCityCouncil": {
|
1651
1655
|
"postcode": "NP20 4HE",
|
1652
1656
|
"skip_get_url": true,
|
1653
|
-
"
|
1657
|
+
"house_number": "6",
|
1654
1658
|
"url": "https://www.newport.gov.uk/",
|
1659
|
+
"web_driver": "http://selenium:4444",
|
1655
1660
|
"wiki_name": "Newport",
|
1656
|
-
"wiki_note": "Pass the postcode and
|
1661
|
+
"wiki_note": "Pass the postcode and house number in their respective arguments, both wrapped in quotes.",
|
1657
1662
|
"LAD24CD": "W06000022"
|
1658
1663
|
},
|
1659
1664
|
"NorthAyrshireCouncil": {
|
@@ -1779,21 +1784,22 @@
|
|
1779
1784
|
"LAD24CD": "E06000065"
|
1780
1785
|
},
|
1781
1786
|
"NorthumberlandCouncil": {
|
1782
|
-
"
|
1783
|
-
"postcode": "
|
1787
|
+
"uprn": "010096302588",
|
1788
|
+
"postcode": "NE65 0ZP",
|
1784
1789
|
"skip_get_url": true,
|
1785
|
-
"url": "https://
|
1790
|
+
"url": "https://bincollection.northumberland.gov.uk/postcode",
|
1786
1791
|
"web_driver": "http://selenium:4444",
|
1787
1792
|
"wiki_name": "Northumberland",
|
1788
|
-
"wiki_note": "Pass the
|
1793
|
+
"wiki_note": "Pass the UPRN. You can find it using [FindMyAddress](https://www.findmyaddress.co.uk/search).",
|
1789
1794
|
"LAD24CD": "E06000057"
|
1790
1795
|
},
|
1791
1796
|
"NorwichCityCouncil": {
|
1792
|
-
"
|
1793
|
-
"
|
1794
|
-
"
|
1797
|
+
"house_number": "2",
|
1798
|
+
"postcode": "NR2 3TT",
|
1799
|
+
"url": "https://bnr-wrp.whitespacews.com",
|
1800
|
+
"wiki_command_url_override": "hhttps://bnr-wrp.whitespacews.com",
|
1795
1801
|
"wiki_name": "Norwich",
|
1796
|
-
"wiki_note": "
|
1802
|
+
"wiki_note": "Pass the house number and postcode in their respective parameters.",
|
1797
1803
|
"LAD24CD": "E07000148"
|
1798
1804
|
},
|
1799
1805
|
"NottinghamCityCouncil": {
|
@@ -2089,10 +2095,11 @@
|
|
2089
2095
|
"SomersetCouncil": {
|
2090
2096
|
"postcode": "TA6 4AA",
|
2091
2097
|
"skip_get_url": true,
|
2092
|
-
"
|
2098
|
+
"house_number": "5",
|
2093
2099
|
"url": "https://www.somerset.gov.uk/",
|
2100
|
+
"web_driver": "http://selenium:4444",
|
2094
2101
|
"wiki_name": "Somerset",
|
2095
|
-
"wiki_note": "Provide your
|
2102
|
+
"wiki_note": "Provide your house number and postcode",
|
2096
2103
|
"LAD24CD": "E06000066"
|
2097
2104
|
},
|
2098
2105
|
"SouthAyrshireCouncil": {
|
@@ -2124,7 +2131,7 @@
|
|
2124
2131
|
"SouthGloucestershireCouncil": {
|
2125
2132
|
"skip_get_url": true,
|
2126
2133
|
"uprn": "566419",
|
2127
|
-
"url": "https://
|
2134
|
+
"url": "https://api.southglos.gov.uk/wastecomp/GetCollectionDetails",
|
2128
2135
|
"wiki_name": "South Gloucestershire",
|
2129
2136
|
"wiki_note": "Provide your UPRN. You can find it using [FindMyAddress](https://www.findmyaddress.co.uk/search).",
|
2130
2137
|
"LAD24CD": "E06000025"
|
@@ -2417,12 +2424,13 @@
|
|
2417
2424
|
"LAD24CD": "E07000076"
|
2418
2425
|
},
|
2419
2426
|
"TestValleyBoroughCouncil": {
|
2420
|
-
"postcode": "SO51
|
2427
|
+
"postcode": "SO51 0BY",
|
2421
2428
|
"skip_get_url": true,
|
2422
|
-
"
|
2423
|
-
"url": "https://testvalley.gov.uk/wasteandrecycling/when-are-my-bins-collected",
|
2429
|
+
"house_number": "2",
|
2430
|
+
"url": "https://testvalley.gov.uk/wasteandrecycling/when-are-my-bins-collected/when-are-my-bins-collected",
|
2431
|
+
"web_driver": "http://selenium:4444",
|
2424
2432
|
"wiki_name": "Test Valley",
|
2425
|
-
"wiki_note": "Provide your
|
2433
|
+
"wiki_note": "Provide your house number and postcode",
|
2426
2434
|
"LAD24CD": "E07000093"
|
2427
2435
|
},
|
2428
2436
|
"ThanetDistrictCouncil": {
|
@@ -2465,6 +2473,7 @@
|
|
2465
2473
|
"skip_get_url": true,
|
2466
2474
|
"uprn": "10000016984",
|
2467
2475
|
"postcode": "TQ1 1AG",
|
2476
|
+
"web_driver": "http://selenium:4444",
|
2468
2477
|
"url": "https://www.torbay.gov.uk/recycling/bin-collections/",
|
2469
2478
|
"wiki_name": "Torbay",
|
2470
2479
|
"wiki_note": "Provide your UPRN. Use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find it.",
|
@@ -2804,4 +2813,4 @@
|
|
2804
2813
|
"wiki_note": "Provide your UPRN.",
|
2805
2814
|
"LAD24CD": "E06000014"
|
2806
2815
|
}
|
2807
|
-
}
|
2816
|
+
}
|
@@ -5,7 +5,6 @@ import requests
|
|
5
5
|
from uk_bin_collection.uk_bin_collection.common import *
|
6
6
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
7
7
|
|
8
|
-
|
9
8
|
# import the wonderful Beautiful Soup and the URL grabber
|
10
9
|
class CouncilClass(AbstractGetBinDataClass):
|
11
10
|
"""
|
@@ -1,15 +1,13 @@
|
|
1
|
-
import json
|
2
1
|
import time
|
3
|
-
|
4
|
-
|
5
|
-
from
|
6
|
-
|
7
|
-
from selenium.webdriver.support import expected_conditions as EC
|
8
|
-
from selenium.webdriver.common.keys import Keys
|
2
|
+
|
3
|
+
import requests
|
4
|
+
from dateutil.relativedelta import relativedelta
|
5
|
+
|
9
6
|
from uk_bin_collection.uk_bin_collection.common import *
|
10
7
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
11
8
|
|
12
9
|
|
10
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
13
11
|
class CouncilClass(AbstractGetBinDataClass):
|
14
12
|
"""
|
15
13
|
Concrete classes have to implement all abstract operations of the
|
@@ -18,116 +16,43 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
18
16
|
"""
|
19
17
|
|
20
18
|
def parse_data(self, page: str, **kwargs) -> dict:
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
# Find and select the address containing the house number
|
62
|
-
address_option = WebDriverWait(driver, 10).until(
|
63
|
-
EC.element_to_be_clickable((By.XPATH, f"//option[contains(text(), 'HARBOUR VIEW ROAD')]"))
|
64
|
-
)
|
65
|
-
address_option.click()
|
66
|
-
|
67
|
-
# Wait for bin collection results to load
|
68
|
-
WebDriverWait(driver, 15).until(
|
69
|
-
EC.presence_of_element_located((By.XPATH, "//td[contains(text(), 'collection')] | //th[contains(text(), 'collection')]"))
|
70
|
-
)
|
71
|
-
|
72
|
-
# Find the table containing collection data by looking for a cell with 'collection' text
|
73
|
-
collection_table = WebDriverWait(driver, 10).until(
|
74
|
-
EC.presence_of_element_located((By.XPATH, "//td[contains(text(), 'collection')]/ancestor::table | //th[contains(text(), 'collection')]/ancestor::table"))
|
75
|
-
)
|
76
|
-
|
77
|
-
# Parse the table data
|
78
|
-
soup = BeautifulSoup(driver.page_source, 'html.parser')
|
79
|
-
data = {"bins": []}
|
80
|
-
|
81
|
-
# Find the table containing collection information
|
82
|
-
collection_cell = soup.find(['td', 'th'], string=lambda text: text and 'collection' in text.lower())
|
83
|
-
if collection_cell:
|
84
|
-
table = collection_cell.find_parent('table')
|
85
|
-
if table:
|
86
|
-
rows = table.find_all('tr')
|
87
|
-
for row in rows[1:]: # Skip header row
|
88
|
-
cells = row.find_all(['td', 'th'])
|
89
|
-
if len(cells) >= 2: # At least bin type and one collection date
|
90
|
-
bin_type = cells[0].get_text(strip=True)
|
91
|
-
next_collection = cells[1].get_text(strip=True) if len(cells) > 1 else ""
|
92
|
-
following_collection = cells[2].get_text(strip=True) if len(cells) > 2 else ""
|
93
|
-
|
94
|
-
|
95
|
-
# Process next collection date
|
96
|
-
if bin_type and next_collection and "No collection" not in next_collection:
|
97
|
-
try:
|
98
|
-
# Try multiple date formats
|
99
|
-
for date_fmt in ["%A, %d %B %Y", "%A %d %B %Y", "%d/%m/%Y", "%d-%m-%Y", "%Y-%m-%d"]:
|
100
|
-
try:
|
101
|
-
parsed_date = datetime.strptime(next_collection, date_fmt)
|
102
|
-
data["bins"].append({
|
103
|
-
"type": bin_type,
|
104
|
-
"collectionDate": parsed_date.strftime(date_format)
|
105
|
-
})
|
106
|
-
break
|
107
|
-
except ValueError:
|
108
|
-
continue
|
109
|
-
except:
|
110
|
-
continue
|
111
|
-
|
112
|
-
# Process following collection date
|
113
|
-
if bin_type and following_collection and "No collection" not in following_collection and "download PDF" not in following_collection:
|
114
|
-
try:
|
115
|
-
# Clean up the following collection text (remove PDF link text)
|
116
|
-
following_collection = following_collection.replace("download PDF", "").strip()
|
117
|
-
for date_fmt in ["%A, %d %B %Y", "%A %d %B %Y", "%d/%m/%Y", "%d-%m-%Y", "%Y-%m-%d"]:
|
118
|
-
try:
|
119
|
-
parsed_date = datetime.strptime(following_collection, date_fmt)
|
120
|
-
data["bins"].append({
|
121
|
-
"type": bin_type,
|
122
|
-
"collectionDate": parsed_date.strftime(date_format)
|
123
|
-
})
|
124
|
-
break
|
125
|
-
except ValueError:
|
126
|
-
continue
|
127
|
-
except:
|
128
|
-
continue
|
129
|
-
|
130
|
-
return data
|
131
|
-
|
132
|
-
finally:
|
133
|
-
driver.quit()
|
19
|
+
# Make a BS4 object
|
20
|
+
uprn = kwargs.get("uprn")
|
21
|
+
# usrn = kwargs.get("paon")
|
22
|
+
check_uprn(uprn)
|
23
|
+
# check_usrn(usrn)
|
24
|
+
bindata = {"bins": []}
|
25
|
+
|
26
|
+
# uprn = uprn.zfill(12)
|
27
|
+
|
28
|
+
API_URL = "https://prod-17.uksouth.logic.azure.com/workflows/58253d7b7d754447acf9fe5fcf76f493/triggers/manual/paths/invoke?api-version=2016-06-01&sp=%2Ftriggers%2Fmanual%2Frun&sv=1.0&sig=TAvYIUFj6dzaP90XQCm2ElY6Cd34ze05I3ba7LKTiBs"
|
29
|
+
|
30
|
+
headers = {
|
31
|
+
"Content-Type": "application/json",
|
32
|
+
"Accept": "*/*",
|
33
|
+
"User-Agent": "Mozilla/5.0",
|
34
|
+
"Referer": "https://bcpportal.bcpcouncil.gov.uk/",
|
35
|
+
}
|
36
|
+
s = requests.session()
|
37
|
+
data = {
|
38
|
+
"uprn": uprn,
|
39
|
+
}
|
40
|
+
|
41
|
+
r = s.post(API_URL, json=data, headers=headers)
|
42
|
+
r.raise_for_status()
|
43
|
+
|
44
|
+
data = r.json()
|
45
|
+
rows_data = data["data"]
|
46
|
+
for row in rows_data:
|
47
|
+
bin_type = row["wasteContainerUsageTypeDescription"]
|
48
|
+
collections = row["scheduleDateRange"]
|
49
|
+
for collection in collections:
|
50
|
+
dict_data = {
|
51
|
+
"type": bin_type,
|
52
|
+
"collectionDate": datetime.strptime(
|
53
|
+
collection, "%Y-%m-%d"
|
54
|
+
).strftime(date_format),
|
55
|
+
}
|
56
|
+
bindata["bins"].append(dict_data)
|
57
|
+
|
58
|
+
return bindata
|
@@ -1,6 +1,8 @@
|
|
1
|
-
from bs4 import BeautifulSoup
|
2
1
|
from datetime import datetime
|
2
|
+
|
3
3
|
import requests
|
4
|
+
from bs4 import BeautifulSoup
|
5
|
+
|
4
6
|
from uk_bin_collection.uk_bin_collection.common import *
|
5
7
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
6
8
|
|
@@ -10,6 +12,7 @@ COLLECTION_KINDS = {
|
|
10
12
|
"glass": "rteelem_ctl03_pnlCollections_Glass",
|
11
13
|
# Garden waste data is only returned if the property is subscribed to the Garden Waste service
|
12
14
|
"garden": "rteelem_ctl03_pnlCollections_GardenWaste",
|
15
|
+
"food": "rteelem_ctl03_pnlCollections_Food",
|
13
16
|
}
|
14
17
|
|
15
18
|
|
@@ -72,7 +72,9 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
72
72
|
break
|
73
73
|
|
74
74
|
if not found:
|
75
|
-
raise Exception(
|
75
|
+
raise Exception(
|
76
|
+
f"Address containing '{user_paon}' not found in dropdown options"
|
77
|
+
)
|
76
78
|
|
77
79
|
submit_btn = wait.until(
|
78
80
|
EC.presence_of_element_located(
|
@@ -84,7 +86,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
84
86
|
|
85
87
|
results = wait.until(
|
86
88
|
EC.presence_of_element_located(
|
87
|
-
(By.XPATH, f'//
|
89
|
+
(By.XPATH, f'//div[contains(@class,"mx-name-listView1")]')
|
88
90
|
)
|
89
91
|
)
|
90
92
|
|
@@ -96,44 +98,21 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
96
98
|
current_date = datetime.now()
|
97
99
|
|
98
100
|
# Find all elements with class starting with 'mx-name-index-'
|
99
|
-
|
101
|
+
bin_view = soup.find(class_="mx-name-listView1")
|
102
|
+
bins = bin_view.find_all(
|
103
|
+
class_=lambda x: x and x.startswith("mx-name-index-")
|
104
|
+
)
|
100
105
|
|
101
106
|
for bin_item in bins:
|
102
|
-
bin_type = bin_item.find(class_="
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
): # Avoid taking the bin type as the date
|
110
|
-
next_sibling = elem.find_next_sibling()
|
111
|
-
if next_sibling:
|
112
|
-
bin_date_str = next_sibling.text.strip()
|
113
|
-
try:
|
114
|
-
# Try parsing the date string in the format 'dd Month' (e.g., '30 Dec', '5 January')
|
115
|
-
bin_date = datetime.strptime(bin_date_str, "%d %b")
|
116
|
-
except ValueError:
|
117
|
-
try:
|
118
|
-
# If the above format fails, try 'dd MonthName' (e.g., '30 December', '5 January')
|
119
|
-
bin_date = datetime.strptime(bin_date_str, "%d %B")
|
120
|
-
except ValueError:
|
121
|
-
pass
|
122
|
-
|
123
|
-
if bin_date:
|
124
|
-
# Set the year based on the logic provided
|
125
|
-
if bin_date.month < current_date.month:
|
126
|
-
bin_date = bin_date.replace(
|
127
|
-
year=current_date.year + 1
|
128
|
-
)
|
129
|
-
else:
|
130
|
-
bin_date = bin_date.replace(year=current_date.year)
|
131
|
-
# Format the date to the desired format
|
132
|
-
bin_date = bin_date.strftime("%d/%m/%Y")
|
133
|
-
break
|
107
|
+
bin_type = bin_item.find(class_="mx-name-text31").text.strip()
|
108
|
+
|
109
|
+
bin_date_str = bin_item.find(class_="mx-name-text29").text.strip()
|
110
|
+
|
111
|
+
bin_date = datetime.strptime(bin_date_str, "%d %B %Y")
|
112
|
+
bin_date = bin_date.strftime(date_format)
|
113
|
+
|
134
114
|
dict_data = {"type": bin_type, "collectionDate": bin_date}
|
135
115
|
data["bins"].append(dict_data)
|
136
|
-
print(data)
|
137
116
|
except Exception as e:
|
138
117
|
# Here you can log the exception if needed
|
139
118
|
print(f"An error occurred: {e}")
|
@@ -26,7 +26,9 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
26
26
|
uprn = kwargs.get("uprn")
|
27
27
|
check_uprn(uprn)
|
28
28
|
|
29
|
-
|
29
|
+
base_url = "https://apps.castlepoint.gov.uk/cpapps/"
|
30
|
+
|
31
|
+
post_url = f"{base_url}index.cfm?fa=myStreet.displayDetails"
|
30
32
|
post_header_str = (
|
31
33
|
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,"
|
32
34
|
"image/apng,"
|
@@ -51,31 +53,60 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
51
53
|
soup = BeautifulSoup(post_response.text, features="html.parser")
|
52
54
|
soup.prettify()
|
53
55
|
|
56
|
+
calMonthNext = f"{base_url}{soup.select_one("div.calMonthNext a")["href"]}"
|
57
|
+
nextmonth_response = requests.post(
|
58
|
+
calMonthNext, headers=post_headers, data=form_data, verify=False
|
59
|
+
)
|
60
|
+
soup_nextmonth = BeautifulSoup(nextmonth_response.text, features="html.parser")
|
61
|
+
soup_nextmonth.prettify()
|
62
|
+
|
54
63
|
data = {"bins": []}
|
55
|
-
collection_tuple = []
|
56
64
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
65
|
+
def parse_calendar_month(soup_one_month):
|
66
|
+
out = []
|
67
|
+
|
68
|
+
calendar = soup_one_month.find("table", class_="calendar")
|
69
|
+
if not calendar:
|
70
|
+
return out # be robust
|
71
|
+
|
72
|
+
# e.g. "[Aug]"
|
73
|
+
month_txt = soup_one_month.find("div", class_="calMonthCurrent").get_text(
|
74
|
+
strip=True
|
75
|
+
)
|
76
|
+
month = datetime.strptime(month_txt, "[%b]").strftime("%m")
|
77
|
+
|
78
|
+
# e.g. "About my Street - August 2025"
|
79
|
+
year_txt = soup_one_month.find("h1").get_text(strip=True)
|
80
|
+
year = datetime.strptime(year_txt, "About my Street - %B %Y").strftime("%Y")
|
81
|
+
|
82
|
+
pink_days = [
|
83
|
+
td.get_text(strip=True) for td in calendar.find_all("td", class_="pink")
|
84
|
+
]
|
85
|
+
black_days = [
|
86
|
+
td.get_text(strip=True)
|
87
|
+
for td in calendar.find_all("td", class_="normal")
|
88
|
+
]
|
89
|
+
|
90
|
+
for day in pink_days:
|
91
|
+
out.append(
|
92
|
+
(
|
93
|
+
"Pink collection",
|
94
|
+
datetime(year=int(year), month=int(month), day=int(day)),
|
95
|
+
)
|
96
|
+
)
|
97
|
+
for day in black_days:
|
98
|
+
out.append(
|
99
|
+
(
|
100
|
+
"Normal collection",
|
101
|
+
datetime(year=int(year), month=int(month), day=int(day)),
|
102
|
+
)
|
103
|
+
)
|
104
|
+
|
105
|
+
return out
|
106
|
+
|
107
|
+
collection_tuple = []
|
108
|
+
for s in (soup, soup_nextmonth):
|
109
|
+
collection_tuple.extend(parse_calendar_month(s))
|
79
110
|
|
80
111
|
ordered_data = sorted(collection_tuple, key=lambda x: x[1])
|
81
112
|
|
@@ -76,19 +76,28 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
76
76
|
)
|
77
77
|
|
78
78
|
for Collection in NextCollections:
|
79
|
-
|
80
|
-
if
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
"
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
79
|
+
strong_element = Collection.find("strong")
|
80
|
+
if strong_element:
|
81
|
+
BinType = strong_element.text.strip()
|
82
|
+
# Skip if this is not a bin type (e.g., informational text)
|
83
|
+
if BinType and not any(skip_text in BinType.lower() for skip_text in
|
84
|
+
["please note", "we may collect", "bank holiday", "different day"]):
|
85
|
+
date_cells = Collection.find_all("div", {"style": "display:table-cell;"})
|
86
|
+
if len(date_cells) > 1:
|
87
|
+
date_text = date_cells[1].get_text().strip()
|
88
|
+
if date_text:
|
89
|
+
try:
|
90
|
+
CollectionDate = datetime.strptime(date_text, "%a, %d %b %Y")
|
91
|
+
dict_data = {
|
92
|
+
"type": BinType,
|
93
|
+
"collectionDate": CollectionDate.strftime("%d/%m/%Y"),
|
94
|
+
}
|
95
|
+
# Check for duplicates before adding
|
96
|
+
if dict_data not in data["bins"]:
|
97
|
+
data["bins"].append(dict_data)
|
98
|
+
except ValueError:
|
99
|
+
# Skip if date parsing fails
|
100
|
+
continue
|
92
101
|
|
93
102
|
except Exception as e:
|
94
103
|
# Here you can log the exception if needed
|
@@ -0,0 +1,52 @@
|
|
1
|
+
import requests
|
2
|
+
from bs4 import BeautifulSoup, Tag
|
3
|
+
|
4
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
5
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
6
|
+
|
7
|
+
|
8
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
9
|
+
class CouncilClass(AbstractGetBinDataClass):
|
10
|
+
"""
|
11
|
+
Concrete classes have to implement all abstract operations of the
|
12
|
+
base class. They can also override some operations with a default
|
13
|
+
implementation.
|
14
|
+
"""
|
15
|
+
|
16
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
17
|
+
|
18
|
+
user_uprn = kwargs.get("uprn")
|
19
|
+
check_uprn(user_uprn)
|
20
|
+
bindata = {"bins": []}
|
21
|
+
|
22
|
+
URI = f"https://www.eastdunbarton.gov.uk/services/a-z-of-services/bins-waste-and-recycling/bins-and-recycling/collections/?uprn={user_uprn}"
|
23
|
+
|
24
|
+
# Make the GET request
|
25
|
+
response = requests.get(URI)
|
26
|
+
|
27
|
+
soup = BeautifulSoup(response.text, "html.parser")
|
28
|
+
|
29
|
+
table = soup.find("table", {"class": "bin-table"})
|
30
|
+
|
31
|
+
tbody = table.find("tbody")
|
32
|
+
|
33
|
+
trs = tbody.find_all("tr")
|
34
|
+
|
35
|
+
for tr in trs:
|
36
|
+
tds = tr.find_all("td")
|
37
|
+
bin_type = tds[0].get_text()
|
38
|
+
collection_date_str = tds[1].find("span").get_text()
|
39
|
+
|
40
|
+
collection_date = datetime.strptime(collection_date_str, "%A, %d %B %Y")
|
41
|
+
|
42
|
+
dict_data = {
|
43
|
+
"type": bin_type,
|
44
|
+
"collectionDate": collection_date.strftime(date_format),
|
45
|
+
}
|
46
|
+
bindata["bins"].append(dict_data)
|
47
|
+
|
48
|
+
bindata["bins"].sort(
|
49
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
|
50
|
+
)
|
51
|
+
|
52
|
+
return bindata
|