uk_bin_collection 0.152.0__py3-none-any.whl → 0.152.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/tests/input.json +92 -58
- uk_bin_collection/uk_bin_collection/councils/AdurAndWorthingCouncils.py +69 -24
- uk_bin_collection/uk_bin_collection/councils/BexleyCouncil.py +24 -47
- uk_bin_collection/uk_bin_collection/councils/CharnwoodBoroughCouncil.py +14 -3
- uk_bin_collection/uk_bin_collection/councils/CheltenhamBoroughCouncil.py +12 -12
- uk_bin_collection/uk_bin_collection/councils/CheshireEastCouncil.py +24 -3
- uk_bin_collection/uk_bin_collection/councils/ChorleyCouncil.py +4 -0
- uk_bin_collection/uk_bin_collection/councils/CroydonCouncil.py +114 -261
- uk_bin_collection/uk_bin_collection/councils/DartfordBoroughCouncil.py +13 -0
- uk_bin_collection/uk_bin_collection/councils/DoverDistrictCouncil.py +17 -2
- uk_bin_collection/uk_bin_collection/councils/EastDevonDC.py +14 -1
- uk_bin_collection/uk_bin_collection/councils/EastbourneBoroughCouncil.py +76 -0
- uk_bin_collection/uk_bin_collection/councils/EastleighBoroughCouncil.py +59 -45
- uk_bin_collection/uk_bin_collection/councils/EnvironmentFirst.py +2 -0
- uk_bin_collection/uk_bin_collection/councils/EppingForestDistrictCouncil.py +47 -15
- uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py +13 -1
- uk_bin_collection/uk_bin_collection/councils/GuildfordCouncil.py +2 -3
- uk_bin_collection/uk_bin_collection/councils/HerefordshireCouncil.py +13 -2
- uk_bin_collection/uk_bin_collection/councils/HuntingdonDistrictCouncil.py +18 -4
- uk_bin_collection/uk_bin_collection/councils/LewesDistrictCouncil.py +76 -0
- uk_bin_collection/uk_bin_collection/councils/LiverpoolCityCouncil.py +16 -4
- uk_bin_collection/uk_bin_collection/councils/MaidstoneBoroughCouncil.py +42 -47
- uk_bin_collection/uk_bin_collection/councils/NewhamCouncil.py +13 -6
- uk_bin_collection/uk_bin_collection/councils/NorthLincolnshireCouncil.py +2 -1
- uk_bin_collection/uk_bin_collection/councils/NorthSomersetCouncil.py +14 -9
- uk_bin_collection/uk_bin_collection/councils/NorthTynesideCouncil.py +2 -2
- uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py +50 -14
- uk_bin_collection/uk_bin_collection/councils/SouthRibbleCouncil.py +115 -65
- uk_bin_collection/uk_bin_collection/councils/StokeOnTrentCityCouncil.py +10 -5
- uk_bin_collection/uk_bin_collection/councils/TorbayCouncil.py +1 -3
- uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py +3 -0
- {uk_bin_collection-0.152.0.dist-info → uk_bin_collection-0.152.2.dist-info}/METADATA +179 -1
- {uk_bin_collection-0.152.0.dist-info → uk_bin_collection-0.152.2.dist-info}/RECORD +36 -34
- {uk_bin_collection-0.152.0.dist-info → uk_bin_collection-0.152.2.dist-info}/LICENSE +0 -0
- {uk_bin_collection-0.152.0.dist-info → uk_bin_collection-0.152.2.dist-info}/WHEEL +0 -0
- {uk_bin_collection-0.152.0.dist-info → uk_bin_collection-0.152.2.dist-info}/entry_points.txt +0 -0
@@ -1,18 +1,39 @@
|
|
1
|
-
from typing import
|
2
|
-
|
1
|
+
from typing import Any, Dict, Optional
|
2
|
+
|
3
|
+
from bs4 import BeautifulSoup, NavigableString, Tag
|
4
|
+
|
5
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
3
6
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
7
|
|
5
8
|
"""
|
6
9
|
This module provides bin collection data for Cheshire East Council.
|
7
10
|
"""
|
8
11
|
|
9
|
-
|
10
12
|
class CouncilClass(AbstractGetBinDataClass):
|
11
13
|
"""
|
12
14
|
A class to fetch and parse bin collection data for Cheshire East Council.
|
13
15
|
"""
|
14
16
|
|
15
17
|
def parse_data(self, page: Any, **kwargs: Any) -> Dict[str, Any]:
|
18
|
+
|
19
|
+
try:
|
20
|
+
user_uprn = kwargs.get("uprn")
|
21
|
+
check_uprn(user_uprn)
|
22
|
+
url = f"https://online.cheshireeast.gov.uk/MyCollectionDay/SearchByAjax/GetBartecJobList?uprn={user_uprn}"
|
23
|
+
if not user_uprn:
|
24
|
+
# This is a fallback for if the user stored a URL in old system. Ensures backwards compatibility.
|
25
|
+
url = kwargs.get("url")
|
26
|
+
except Exception as e:
|
27
|
+
raise ValueError(f"Error getting identifier: {str(e)}")
|
28
|
+
|
29
|
+
# Add warning suppression for the insecure request
|
30
|
+
import urllib3
|
31
|
+
|
32
|
+
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
33
|
+
|
34
|
+
# Make request with SSL verification disabled
|
35
|
+
page = requests.get(url, verify=False)
|
36
|
+
|
16
37
|
soup = BeautifulSoup(page.text, features="html.parser")
|
17
38
|
|
18
39
|
bin_data_dict: Dict[str, Any] = {"bins": []}
|
@@ -41,6 +41,10 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
41
41
|
check_uprn(user_uprn)
|
42
42
|
check_postcode(user_postcode)
|
43
43
|
|
44
|
+
# Ensure UPRN starts with "UPRN"
|
45
|
+
if not user_uprn.startswith("UPRN"):
|
46
|
+
user_uprn = f"UPRN{user_uprn}"
|
47
|
+
|
44
48
|
# Create Selenium webdriver
|
45
49
|
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
46
50
|
driver = create_webdriver(web_driver, headless, user_agent, __name__)
|
@@ -1,237 +1,16 @@
|
|
1
1
|
import time
|
2
2
|
|
3
3
|
from bs4 import BeautifulSoup
|
4
|
+
from selenium import webdriver
|
5
|
+
from selenium.webdriver.common.by import By
|
6
|
+
from selenium.webdriver.common.keys import Keys
|
7
|
+
from selenium.webdriver.support import expected_conditions as EC
|
8
|
+
from selenium.webdriver.support.ui import Select, WebDriverWait
|
4
9
|
|
5
10
|
from uk_bin_collection.uk_bin_collection.common import *
|
6
11
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
7
12
|
|
8
13
|
|
9
|
-
def get_headers(base_url: str, method: str) -> dict[str, str]:
|
10
|
-
"""
|
11
|
-
Gets request headers
|
12
|
-
:rtype: dict[str, str]
|
13
|
-
:param base_url: Base URL to use
|
14
|
-
:param method: Method to use
|
15
|
-
:return: Request headers
|
16
|
-
"""
|
17
|
-
headers = {
|
18
|
-
"Accept-Encoding": "gzip, deflate, br",
|
19
|
-
"Accept-Language": "en-GB,en-US;q=0.9,en;q=0.8",
|
20
|
-
"Cache-Control": "max-age=0",
|
21
|
-
"Connection": "keep-alive",
|
22
|
-
"Host": "service.croydon.gov.uk",
|
23
|
-
"Origin": base_url,
|
24
|
-
"sec-ch-ua": '"Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"',
|
25
|
-
"sec-ch-ua-mobile": "?0",
|
26
|
-
"sec-ch-ua-platform": "Windows",
|
27
|
-
"Sec-Fetch-Dest": "document",
|
28
|
-
"Sec-Fetch-User": "?1",
|
29
|
-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)"
|
30
|
-
" Chrome/109.0.0.0 Safari/537.36",
|
31
|
-
}
|
32
|
-
if method.lower() == "post":
|
33
|
-
headers["Accept"] = "application/json, text/javascript, */*; q=0.01"
|
34
|
-
headers["Content-Type"] = "application/x-www-form-urlencoded; charset=UTF-8"
|
35
|
-
headers["Sec-Fetch-Mode"] = "cors"
|
36
|
-
headers["Sec-Fetch-Mode"] = "same-origin"
|
37
|
-
headers["X-Requested-With"] = "XMLHttpRequest"
|
38
|
-
else:
|
39
|
-
headers["Accept"] = (
|
40
|
-
"text/html,application/xhtml+xml,application/xml;"
|
41
|
-
"q=0.9,image/avif,image/webp,image/apng,*/*;"
|
42
|
-
"q=0.8,application/signed-exchange;v=b3;q=0.9"
|
43
|
-
)
|
44
|
-
headers["Sec-Fetch-Mode"] = "navigate"
|
45
|
-
headers["Sec-Fetch-Mode"] = "none"
|
46
|
-
return headers
|
47
|
-
|
48
|
-
|
49
|
-
def get_session_storage_global() -> object:
|
50
|
-
"""
|
51
|
-
Gets session storage global object
|
52
|
-
:rtype: object
|
53
|
-
:return: Session storage global object
|
54
|
-
"""
|
55
|
-
return {
|
56
|
-
"destination_stack": [
|
57
|
-
"w/webpage/bin-day-enter-address",
|
58
|
-
"w/webpage/your-bin-collection-details?context_record_id=86086077"
|
59
|
-
"&webpage_token=5c047b2c10b4aad66bef2054aac6bea52ad7a5e185ffdf7090b01f8ddc96728f",
|
60
|
-
"w/webpage/bin-day-enter-address",
|
61
|
-
"w/webpage/your-bin-collection-details?context_record_id=86085229"
|
62
|
-
"&webpage_token=cf1b8fd6213f4823277d98c1dd8a992e6ebef1fabc7d892714e5d9dade448c37",
|
63
|
-
"w/webpage/bin-day-enter-address",
|
64
|
-
"w/webpage/your-bin-collection-details?context_record_id=86084221"
|
65
|
-
"&webpage_token=7f52fb51019bf0e6bfe9647b1b31000124bd92a9d95781f1557f58b3ed40da52",
|
66
|
-
"w/webpage/bin-day-enter-address",
|
67
|
-
"w/webpage/your-bin-collection-details?context_record_id=86083209"
|
68
|
-
"&webpage_token=de50c265da927336f526d9d9a44947595c3aa38965aa8c495ac2fb73d272ece8",
|
69
|
-
"w/webpage/bin-day-enter-address",
|
70
|
-
],
|
71
|
-
"last_context_record_id": "86086077",
|
72
|
-
}
|
73
|
-
|
74
|
-
|
75
|
-
def get_csrf_token(s: requests.session, base_url: str) -> str:
|
76
|
-
"""
|
77
|
-
Gets a CSRF token
|
78
|
-
:rtype: str
|
79
|
-
:param s: requests.Session() to use
|
80
|
-
:param base_url: Base URL to use
|
81
|
-
:return: CSRF token
|
82
|
-
"""
|
83
|
-
csrf_token = ""
|
84
|
-
response = s.get(
|
85
|
-
base_url + "/wasteservices/w/webpage/bin-day-enter-address",
|
86
|
-
headers=get_headers(base_url, "GET"),
|
87
|
-
)
|
88
|
-
if response.status_code == 200:
|
89
|
-
soup = BeautifulSoup(response.text, features="html.parser")
|
90
|
-
soup.prettify()
|
91
|
-
app_body = soup.find("div", {"class": "app-body"})
|
92
|
-
script = app_body.find("script", {"type": "text/javascript"}).string
|
93
|
-
p = re.compile("var CSRF = ('|\")(.*?)('|\");")
|
94
|
-
m = p.search(script)
|
95
|
-
csrf_token = m.groups()[1]
|
96
|
-
else:
|
97
|
-
raise ValueError(
|
98
|
-
"Code 1: Failed to get a CSRF token. Please ensure the council website is online first,"
|
99
|
-
" then open an issue on GitHub."
|
100
|
-
)
|
101
|
-
return csrf_token
|
102
|
-
|
103
|
-
|
104
|
-
def get_address_id(
|
105
|
-
s: requests.session, base_url: str, csrf_token: str, postcode: str, paon: str
|
106
|
-
) -> str:
|
107
|
-
"""
|
108
|
-
Gets the address ID
|
109
|
-
:rtype: str
|
110
|
-
:param s: requests.Session() to use
|
111
|
-
:param base_url: Base URL to use
|
112
|
-
:param csrf_token: CSRF token to use
|
113
|
-
:param postcode: Postcode to use
|
114
|
-
:param paon: House number/address to find
|
115
|
-
:return: address ID
|
116
|
-
"""
|
117
|
-
address_id = "0"
|
118
|
-
# Get the addresses for the postcode
|
119
|
-
form_data = {
|
120
|
-
"code_action": "search",
|
121
|
-
"code_params": '{"search_item":"' + postcode + '","is_ss":true}',
|
122
|
-
"fragment_action": "handle_event",
|
123
|
-
"fragment_id": "PCF0020408EECEC1",
|
124
|
-
"fragment_collection_class": "formtable",
|
125
|
-
"fragment_collection_editable_values": '{"PCF0021449EECEC1":"1"}',
|
126
|
-
"_session_storage": json.dumps(
|
127
|
-
{
|
128
|
-
"/wasteservices/w/webpage/bin-day-enter-address": {},
|
129
|
-
"_global": get_session_storage_global(),
|
130
|
-
}
|
131
|
-
),
|
132
|
-
"action_cell_id": "PCL0005629EECEC1",
|
133
|
-
"action_page_id": "PAG0000898EECEC1",
|
134
|
-
"form_check_ajax": csrf_token,
|
135
|
-
}
|
136
|
-
response = s.post(
|
137
|
-
base_url
|
138
|
-
+ "/wasteservices/w/webpage/bin-day-enter-address?webpage_subpage_id=PAG0000898EECEC1"
|
139
|
-
"&webpage_token=faab02e1f62a58f7bad4c2ae5b8622e19846b97dde2a76f546c4bb1230cee044"
|
140
|
-
"&widget_action=fragment_action",
|
141
|
-
headers=get_headers(base_url, "POST"),
|
142
|
-
data=form_data,
|
143
|
-
)
|
144
|
-
if response.status_code == 200:
|
145
|
-
json_response = json.loads(response.text)
|
146
|
-
addresses = json_response["response"]["items"]
|
147
|
-
# Find the matching address id for the paon
|
148
|
-
for address in addresses:
|
149
|
-
# Check for full matches first
|
150
|
-
if address.get("dropdown_display_field") == paon:
|
151
|
-
address_id = address.get("id")
|
152
|
-
break
|
153
|
-
# Check for matching start if no full match found
|
154
|
-
if address_id == "0":
|
155
|
-
for address in addresses:
|
156
|
-
if address.get("dropdown_display_field").split()[0] == paon.strip():
|
157
|
-
address_id = address.get("id")
|
158
|
-
break
|
159
|
-
# Check match was found
|
160
|
-
if address_id == "0":
|
161
|
-
raise ValueError(
|
162
|
-
"Code 2: No matching address for house number/full address found."
|
163
|
-
)
|
164
|
-
else:
|
165
|
-
raise ValueError("Code 3: No addresses found for provided postcode.")
|
166
|
-
return address_id
|
167
|
-
|
168
|
-
|
169
|
-
def get_collection_data(
|
170
|
-
s: requests.session, base_url: str, csrf_token: str, address_id: str
|
171
|
-
) -> str:
|
172
|
-
"""
|
173
|
-
Gets the collection data
|
174
|
-
:rtype: str
|
175
|
-
:param s: requests.Session() to use
|
176
|
-
:param base_url: Base URL to use
|
177
|
-
:param csrf_token: CSRF token to use
|
178
|
-
:param address_id: Address id to use
|
179
|
-
:param retries: Retries count
|
180
|
-
:return: Collection data
|
181
|
-
"""
|
182
|
-
collection_data = ""
|
183
|
-
if address_id != "0":
|
184
|
-
form_data = {
|
185
|
-
"form_check": csrf_token,
|
186
|
-
"submitted_page_id": "PAG0000898EECEC1",
|
187
|
-
"submitted_widget_group_id": "PWG0002644EECEC1",
|
188
|
-
"submitted_widget_group_type": "modify",
|
189
|
-
"submission_token": "63e9126bacd815.12997577",
|
190
|
-
"payload[PAG0000898EECEC1][PWG0002644EECEC1][PCL0005629EECEC1][formtable]"
|
191
|
-
"[C_63e9126bacfb3][PCF0020408EECEC1]": address_id,
|
192
|
-
"payload[PAG0000898EECEC1][PWG0002644EECEC1][PCL0005629EECEC1][formtable]"
|
193
|
-
"[C_63e9126bacfb3][PCF0021449EECEC1]": "1",
|
194
|
-
"payload[PAG0000898EECEC1][PWG0002644EECEC1][PCL0005629EECEC1][formtable]"
|
195
|
-
"[C_63e9126bacfb3][PCF0020072EECEC1]": "Next",
|
196
|
-
"submit_fragment_id": "PCF0020072EECEC1",
|
197
|
-
"_session_storage": json.dumps({"_global": get_session_storage_global()}),
|
198
|
-
"_update_page_content_request": 1,
|
199
|
-
"form_check_ajax": csrf_token,
|
200
|
-
}
|
201
|
-
response = s.post(
|
202
|
-
base_url
|
203
|
-
+ "/wasteservices/w/webpage/bin-day-enter-address?webpage_subpage_id=PAG0000898EECEC1"
|
204
|
-
"&webpage_token=faab02e1f62a58f7bad4c2ae5b8622e19846b97dde2a76f546c4bb1230cee044",
|
205
|
-
headers=get_headers(base_url, "POST"),
|
206
|
-
data=form_data,
|
207
|
-
)
|
208
|
-
if response.status_code == 200 and len(response.text) > 0:
|
209
|
-
json_response = json.loads(response.text)
|
210
|
-
form_data = {
|
211
|
-
"_dummy": 1,
|
212
|
-
"_session_storage": json.dumps(
|
213
|
-
{"_global": get_session_storage_global()}
|
214
|
-
),
|
215
|
-
"_update_page_content_request": 1,
|
216
|
-
"form_check_ajax": csrf_token,
|
217
|
-
}
|
218
|
-
response = s.post(
|
219
|
-
base_url + json_response["redirect_url"],
|
220
|
-
headers=get_headers(base_url, "POST"),
|
221
|
-
data=form_data,
|
222
|
-
)
|
223
|
-
if response.status_code == 200 and len(response.text) > 0:
|
224
|
-
json_response = json.loads(response.text)
|
225
|
-
collection_data = json_response["data"]
|
226
|
-
else:
|
227
|
-
raise ValueError("Code 4: Failed to get bin data.")
|
228
|
-
else:
|
229
|
-
raise ValueError(
|
230
|
-
"Code 5: Failed to get bin data. Too many requests. Please wait a few minutes before trying again."
|
231
|
-
)
|
232
|
-
return collection_data
|
233
|
-
|
234
|
-
|
235
14
|
class CouncilClass(AbstractGetBinDataClass):
|
236
15
|
"""
|
237
16
|
Concrete classes have to implement all abstract operations of the
|
@@ -240,47 +19,121 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
240
19
|
"""
|
241
20
|
|
242
21
|
def parse_data(self, page: str, **kwargs) -> dict:
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
22
|
+
driver = None
|
23
|
+
try:
|
24
|
+
user_postcode = kwargs.get("postcode")
|
25
|
+
if not user_postcode:
|
26
|
+
raise ValueError("No postcode provided.")
|
27
|
+
check_postcode(user_postcode)
|
28
|
+
|
29
|
+
user_paon = kwargs.get("paon")
|
30
|
+
check_paon(user_paon)
|
31
|
+
headless = kwargs.get("headless")
|
32
|
+
web_driver = kwargs.get("web_driver")
|
33
|
+
driver = create_webdriver(web_driver, headless, None, __name__)
|
34
|
+
page = "https://service.croydon.gov.uk/wasteservices/w/webpage/bin-day-enter-address"
|
35
|
+
|
36
|
+
driver.maximize_window()
|
37
|
+
|
38
|
+
driver.get(page)
|
39
|
+
|
40
|
+
postcode_input = WebDriverWait(driver, 60).until(
|
41
|
+
EC.presence_of_element_located(
|
42
|
+
(By.CSS_SELECTOR, 'input[data-ts_identifier="postcode_input"]')
|
43
|
+
)
|
44
|
+
)
|
250
45
|
|
251
|
-
|
252
|
-
csrf_token = get_csrf_token(s, base_url)
|
253
|
-
# Next, get the address_id
|
254
|
-
address_id = get_address_id(s, base_url, csrf_token, postcode, paon)
|
255
|
-
# Finally, use the address_id to get the collection data
|
256
|
-
collection_data = get_collection_data(s, base_url, csrf_token, address_id)
|
257
|
-
if collection_data != "":
|
258
|
-
soup = BeautifulSoup(collection_data, features="html.parser")
|
259
|
-
soup.prettify()
|
46
|
+
postcode_input.send_keys(user_postcode + Keys.ENTER)
|
260
47
|
|
261
|
-
|
262
|
-
|
263
|
-
|
48
|
+
time.sleep(5)
|
49
|
+
# Wait for address box to be visible
|
50
|
+
select_address_input = WebDriverWait(driver, 10).until(
|
51
|
+
EC.element_to_be_clickable(
|
52
|
+
(By.CSS_SELECTOR, 'select[data-ts_identifier="address_selection"]')
|
53
|
+
)
|
264
54
|
)
|
265
55
|
|
266
|
-
#
|
267
|
-
|
56
|
+
# Select address based on house number (paon)
|
57
|
+
select = Select(select_address_input)
|
58
|
+
paon = str(user_paon) # Ensure paon is a string for comparison
|
59
|
+
address_found = False
|
268
60
|
|
269
|
-
for
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
collection_date, "%A %d %B %Y"
|
276
|
-
).strftime(date_format),
|
277
|
-
}
|
278
|
-
data["bins"].append(dict_data)
|
61
|
+
for option in select.options:
|
62
|
+
# Look for house number pattern with surrounding spaces to avoid partial matches
|
63
|
+
if f" {paon} " in f" {option.text} ":
|
64
|
+
select.select_by_value(option.get_attribute("value"))
|
65
|
+
address_found = True
|
66
|
+
break
|
279
67
|
|
280
|
-
if
|
68
|
+
if not address_found:
|
281
69
|
raise ValueError(
|
282
|
-
"
|
283
|
-
|
70
|
+
f"Address with house number {paon} not found in the dropdown."
|
71
|
+
)
|
72
|
+
|
73
|
+
# Click the "Next" button
|
74
|
+
next_button = WebDriverWait(driver, 10).until(
|
75
|
+
EC.element_to_be_clickable(
|
76
|
+
(By.CSS_SELECTOR, 'input[type="submit"][value="Next"]')
|
77
|
+
)
|
78
|
+
)
|
79
|
+
next_button.click()
|
80
|
+
|
81
|
+
# Wait for the bin collection content to load
|
82
|
+
collection_content = WebDriverWait(driver, 10).until(
|
83
|
+
EC.presence_of_element_located(
|
84
|
+
(
|
85
|
+
By.XPATH,
|
86
|
+
'//*[@id="mats_content_wrapper"]/div[2]/div[2]/div[2]/div/div[1]/div/div[3]/div/div/div/div',
|
87
|
+
)
|
284
88
|
)
|
89
|
+
)
|
285
90
|
|
286
|
-
|
91
|
+
soup = BeautifulSoup(driver.page_source, "html.parser")
|
92
|
+
|
93
|
+
bin_data = {"bins": []}
|
94
|
+
|
95
|
+
# Find all bin collection sections
|
96
|
+
bin_sections = soup.find_all("div", {"class": "listing_template_record"})
|
97
|
+
|
98
|
+
for section in bin_sections:
|
99
|
+
# Get bin type from h2 tag
|
100
|
+
bin_type_elem = section.find("h2")
|
101
|
+
if bin_type_elem:
|
102
|
+
bin_type = bin_type_elem.text.strip()
|
103
|
+
|
104
|
+
# Find collection date span
|
105
|
+
date_span = section.find("span", {"class": "value-as-text"})
|
106
|
+
if date_span:
|
107
|
+
collection_date_string = date_span.text.strip()
|
108
|
+
|
109
|
+
# Convert date string to required format
|
110
|
+
try:
|
111
|
+
# Parse the date string (e.g., "Sunday 1 June 2025")
|
112
|
+
parsed_date = datetime.strptime(
|
113
|
+
collection_date_string, "%A %d %B %Y"
|
114
|
+
)
|
115
|
+
# Format as dd/mm/yyyy
|
116
|
+
formatted_date = parsed_date.strftime("%d/%m/%Y")
|
117
|
+
|
118
|
+
# Create bin entry
|
119
|
+
bin_info = {
|
120
|
+
"type": bin_type,
|
121
|
+
"collectionDate": formatted_date,
|
122
|
+
}
|
123
|
+
bin_data["bins"].append(bin_info)
|
124
|
+
except ValueError as e:
|
125
|
+
print(f"Error parsing date '{collection_date_string}': {e}")
|
126
|
+
|
127
|
+
if not bin_data["bins"]:
|
128
|
+
raise ValueError("No bin collection data found")
|
129
|
+
|
130
|
+
except Exception as e:
|
131
|
+
# Here you can log the exception if needed
|
132
|
+
print(f"An error occurred: {e}")
|
133
|
+
# Optionally, re-raise the exception if you want it to propagate
|
134
|
+
raise
|
135
|
+
finally:
|
136
|
+
# This block ensures that the driver is closed regardless of an exception
|
137
|
+
if driver:
|
138
|
+
driver.quit()
|
139
|
+
return bin_data
|
@@ -1,4 +1,5 @@
|
|
1
1
|
from bs4 import BeautifulSoup
|
2
|
+
|
2
3
|
from uk_bin_collection.uk_bin_collection.common import *
|
3
4
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
5
|
|
@@ -12,7 +13,19 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
12
13
|
"""
|
13
14
|
|
14
15
|
def parse_data(self, page: str, **kwargs) -> dict:
|
16
|
+
|
17
|
+
try:
|
18
|
+
user_uprn = kwargs.get("uprn")
|
19
|
+
check_uprn(user_uprn)
|
20
|
+
url = f"https://windmz.dartford.gov.uk/ufs/WS_CHECK_COLLECTIONS.eb?UPRN={user_uprn}"
|
21
|
+
if not user_uprn:
|
22
|
+
# This is a fallback for if the user stored a URL in old system. Ensures backwards compatibility.
|
23
|
+
url = kwargs.get("url")
|
24
|
+
except Exception as e:
|
25
|
+
raise ValueError(f"Error getting identifier: {str(e)}")
|
26
|
+
|
15
27
|
# Make a BS4 object
|
28
|
+
page = requests.get(url)
|
16
29
|
soup = BeautifulSoup(page.text, features="html.parser")
|
17
30
|
soup.prettify()
|
18
31
|
|
@@ -1,12 +1,27 @@
|
|
1
|
-
from bs4 import BeautifulSoup
|
2
|
-
from datetime import datetime
|
3
1
|
import re
|
2
|
+
from datetime import datetime
|
3
|
+
|
4
|
+
from bs4 import BeautifulSoup
|
5
|
+
|
4
6
|
from uk_bin_collection.uk_bin_collection.common import * # Consider specific imports
|
5
7
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
6
8
|
|
7
9
|
|
8
10
|
class CouncilClass(AbstractGetBinDataClass):
|
9
11
|
def parse_data(self, page: str, **kwargs) -> dict:
|
12
|
+
|
13
|
+
try:
|
14
|
+
user_uprn = kwargs.get("uprn")
|
15
|
+
check_uprn(user_uprn)
|
16
|
+
url = f"https://collections.dover.gov.uk/property/{user_uprn}"
|
17
|
+
if not user_uprn:
|
18
|
+
# This is a fallback for if the user stored a URL in old system. Ensures backwards compatibility.
|
19
|
+
url = kwargs.get("url")
|
20
|
+
except Exception as e:
|
21
|
+
raise ValueError(f"Error getting identifier: {str(e)}")
|
22
|
+
|
23
|
+
# Make a BS4 object
|
24
|
+
page = requests.get(url)
|
10
25
|
soup = BeautifulSoup(page.text, "html.parser")
|
11
26
|
|
12
27
|
bins_data = {"bins": []}
|
@@ -3,7 +3,8 @@ from datetime import datetime
|
|
3
3
|
|
4
4
|
import pandas as pd
|
5
5
|
from bs4 import BeautifulSoup
|
6
|
-
|
6
|
+
|
7
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
7
8
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
8
9
|
|
9
10
|
|
@@ -15,7 +16,19 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
15
16
|
"""
|
16
17
|
|
17
18
|
def parse_data(self, page: str, **kwargs) -> dict:
|
19
|
+
|
20
|
+
try:
|
21
|
+
user_uprn = kwargs.get("uprn")
|
22
|
+
check_uprn(user_uprn)
|
23
|
+
url = f"https://eastdevon.gov.uk/recycling-and-waste/recycling-waste-information/when-is-my-bin-collected/future-collections-calendar/?UPRN={user_uprn}"
|
24
|
+
if not user_uprn:
|
25
|
+
# This is a fallback for if the user stored a URL in old system. Ensures backwards compatibility.
|
26
|
+
url = kwargs.get("url")
|
27
|
+
except Exception as e:
|
28
|
+
raise ValueError(f"Error getting identifier: {str(e)}")
|
29
|
+
|
18
30
|
# Make a BS4 object
|
31
|
+
page = requests.get(url)
|
19
32
|
soup = BeautifulSoup(page.text, features="html.parser")
|
20
33
|
soup.prettify()
|
21
34
|
|
@@ -0,0 +1,76 @@
|
|
1
|
+
# Lewes Borough Council uses the same script.
|
2
|
+
|
3
|
+
from bs4 import BeautifulSoup
|
4
|
+
|
5
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
6
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
7
|
+
|
8
|
+
|
9
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
10
|
+
class CouncilClass(AbstractGetBinDataClass):
|
11
|
+
"""
|
12
|
+
Concrete classes have to implement all abstract operations of the
|
13
|
+
base class. They can also override some operations with a default
|
14
|
+
implementation.
|
15
|
+
"""
|
16
|
+
|
17
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
18
|
+
|
19
|
+
try:
|
20
|
+
user_uprn = kwargs.get("uprn")
|
21
|
+
check_uprn(user_uprn)
|
22
|
+
url = f"https://environmentfirst.co.uk/house.php?uprn={user_uprn}"
|
23
|
+
if not user_uprn:
|
24
|
+
# This is a fallback for if the user stored a URL in old system. Ensures backwards compatibility.
|
25
|
+
url = kwargs.get("url")
|
26
|
+
except Exception as e:
|
27
|
+
raise ValueError(f"Error getting identifier: {str(e)}")
|
28
|
+
|
29
|
+
# Make a BS4 object
|
30
|
+
page = requests.get(url)
|
31
|
+
soup = BeautifulSoup(page.text, features="html.parser")
|
32
|
+
soup.prettify()
|
33
|
+
|
34
|
+
# Get the paragraph lines from the page
|
35
|
+
data = {"bins": []}
|
36
|
+
page_text = soup.find("div", {"class": "collect"}).find_all("p")
|
37
|
+
|
38
|
+
# Parse the correct lines (find them, remove the ordinal indicator and make them the correct format date) and
|
39
|
+
# then add them to the dictionary
|
40
|
+
rubbish_day = datetime.strptime(
|
41
|
+
remove_ordinal_indicator_from_date_string(
|
42
|
+
page_text[2].find_next("strong").text
|
43
|
+
),
|
44
|
+
"%d %B %Y",
|
45
|
+
).strftime(date_format)
|
46
|
+
dict_data = {
|
47
|
+
"type": "Rubbish",
|
48
|
+
"collectionDate": rubbish_day,
|
49
|
+
}
|
50
|
+
data["bins"].append(dict_data)
|
51
|
+
recycling_day = datetime.strptime(
|
52
|
+
remove_ordinal_indicator_from_date_string(
|
53
|
+
page_text[4].find_next("strong").text
|
54
|
+
),
|
55
|
+
"%d %B %Y",
|
56
|
+
).strftime(date_format)
|
57
|
+
dict_data = {
|
58
|
+
"type": "Recycling",
|
59
|
+
"collectionDate": recycling_day,
|
60
|
+
}
|
61
|
+
data["bins"].append(dict_data)
|
62
|
+
|
63
|
+
if len(page_text) > 5:
|
64
|
+
garden_day = datetime.strptime(
|
65
|
+
remove_ordinal_indicator_from_date_string(
|
66
|
+
page_text[6].find_next("strong").text
|
67
|
+
),
|
68
|
+
"%d %B %Y",
|
69
|
+
).strftime(date_format)
|
70
|
+
dict_data = {
|
71
|
+
"type": "Garden",
|
72
|
+
"collectionDate": garden_day,
|
73
|
+
}
|
74
|
+
data["bins"].append(dict_data)
|
75
|
+
|
76
|
+
return data
|