uk_bin_collection 0.154.0__py3-none-any.whl → 0.158.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/tests/input.json +21 -10
- uk_bin_collection/uk_bin_collection/councils/AberdeenCityCouncil.py +0 -1
- uk_bin_collection/uk_bin_collection/councils/DacorumBoroughCouncil.py +22 -13
- uk_bin_collection/uk_bin_collection/councils/EastDunbartonshireCouncil.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/IslingtonCouncil.py +8 -5
- uk_bin_collection/uk_bin_collection/councils/LancasterCityCouncil.py +23 -10
- uk_bin_collection/uk_bin_collection/councils/LondonBoroughSutton.py +60 -49
- uk_bin_collection/uk_bin_collection/councils/MidSuffolkDistrictCouncil.py +70 -92
- uk_bin_collection/uk_bin_collection/councils/NewForestCouncil.py +104 -47
- uk_bin_collection/uk_bin_collection/councils/NewportCityCouncil.py +138 -21
- uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py +182 -3
- uk_bin_collection/uk_bin_collection/councils/OxfordCityCouncil.py +1 -0
- uk_bin_collection/uk_bin_collection/councils/RenfrewshireCouncil.py +170 -13
- uk_bin_collection/uk_bin_collection/councils/RotherhamCouncil.py +70 -38
- uk_bin_collection/uk_bin_collection/councils/SomersetCouncil.py +136 -21
- uk_bin_collection/uk_bin_collection/councils/SouthGloucestershireCouncil.py +18 -22
- uk_bin_collection/uk_bin_collection/councils/TestValleyBoroughCouncil.py +138 -21
- {uk_bin_collection-0.154.0.dist-info → uk_bin_collection-0.158.0.dist-info}/METADATA +1 -1
- {uk_bin_collection-0.154.0.dist-info → uk_bin_collection-0.158.0.dist-info}/RECORD +22 -21
- {uk_bin_collection-0.154.0.dist-info → uk_bin_collection-0.158.0.dist-info}/LICENSE +0 -0
- {uk_bin_collection-0.154.0.dist-info → uk_bin_collection-0.158.0.dist-info}/WHEEL +0 -0
- {uk_bin_collection-0.154.0.dist-info → uk_bin_collection-0.158.0.dist-info}/entry_points.txt +0 -0
@@ -1,3 +1,4 @@
|
|
1
|
+
import time
|
1
2
|
from bs4 import BeautifulSoup
|
2
3
|
from selenium.webdriver.common.by import By
|
3
4
|
from selenium.webdriver.support import expected_conditions as EC
|
@@ -27,14 +28,26 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
27
28
|
check_paon(user_paon)
|
28
29
|
check_postcode(user_postcode)
|
29
30
|
|
30
|
-
# Create Selenium webdriver
|
31
|
-
|
31
|
+
# Create Selenium webdriver with user agent to bypass Cloudflare
|
32
|
+
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
33
|
+
driver = create_webdriver(web_driver, headless, user_agent, __name__)
|
32
34
|
driver.get("https://www.renfrewshire.gov.uk/bin-day")
|
33
35
|
|
34
|
-
|
35
|
-
|
36
|
+
# Wait for initial page load and Cloudflare bypass
|
37
|
+
WebDriverWait(driver, 30).until(
|
38
|
+
lambda d: "Just a moment" not in d.title and d.title != ""
|
36
39
|
)
|
37
|
-
|
40
|
+
time.sleep(3)
|
41
|
+
|
42
|
+
# Try to accept cookies if the banner appears
|
43
|
+
try:
|
44
|
+
accept_button = WebDriverWait(driver, 10).until(
|
45
|
+
EC.element_to_be_clickable((By.ID, "ccc-notify-accept"))
|
46
|
+
)
|
47
|
+
accept_button.click()
|
48
|
+
time.sleep(2)
|
49
|
+
except:
|
50
|
+
pass
|
38
51
|
|
39
52
|
# Wait for the postcode field to appear then populate it
|
40
53
|
inputElement_postcode = WebDriverWait(driver, 30).until(
|
@@ -64,23 +77,167 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
64
77
|
)
|
65
78
|
).click()
|
66
79
|
|
67
|
-
#
|
68
|
-
|
69
|
-
|
70
|
-
|
80
|
+
# Handle Cloudflare challenge that appears after address selection
|
81
|
+
# Wait for page to potentially show Cloudflare challenge
|
82
|
+
time.sleep(3)
|
83
|
+
|
84
|
+
# Check if we hit a Cloudflare challenge
|
85
|
+
if "Just a moment" in driver.page_source or "Verify you are human" in driver.page_source:
|
86
|
+
print("Cloudflare challenge detected, trying to bypass...")
|
87
|
+
|
88
|
+
# If we hit Cloudflare, try recreating driver with JS enabled
|
89
|
+
driver.quit()
|
90
|
+
|
91
|
+
driver = create_webdriver(web_driver, headless, user_agent, __name__)
|
92
|
+
driver.get("https://www.renfrewshire.gov.uk/bin-day")
|
93
|
+
|
94
|
+
# Wait for initial page load and Cloudflare bypass
|
95
|
+
WebDriverWait(driver, 30).until(
|
96
|
+
lambda d: "Just a moment" not in d.title and d.title != ""
|
71
97
|
)
|
72
|
-
|
98
|
+
time.sleep(5)
|
99
|
+
|
100
|
+
# Try to accept cookies if the banner appears
|
101
|
+
try:
|
102
|
+
accept_button = WebDriverWait(driver, 10).until(
|
103
|
+
EC.element_to_be_clickable((By.ID, "ccc-notify-accept"))
|
104
|
+
)
|
105
|
+
accept_button.click()
|
106
|
+
time.sleep(2)
|
107
|
+
except:
|
108
|
+
pass
|
109
|
+
|
110
|
+
# Re-enter postcode
|
111
|
+
inputElement_postcode = WebDriverWait(driver, 30).until(
|
112
|
+
EC.presence_of_element_located(
|
113
|
+
(By.ID, "RENFREWSHIREBINCOLLECTIONS_PAGE1_ADDRESSLOOKUPPOSTCODE")
|
114
|
+
)
|
115
|
+
)
|
116
|
+
inputElement_postcode.send_keys(user_postcode)
|
117
|
+
|
118
|
+
# Click search button
|
119
|
+
findAddress = WebDriverWait(driver, 10).until(
|
120
|
+
EC.presence_of_element_located(
|
121
|
+
(By.ID, "RENFREWSHIREBINCOLLECTIONS_PAGE1_ADDRESSLOOKUPSEARCH")
|
122
|
+
)
|
123
|
+
)
|
124
|
+
findAddress.click()
|
125
|
+
|
126
|
+
# Wait for the 'Select address' dropdown to appear and select option matching the house name/number
|
127
|
+
WebDriverWait(driver, 10).until(
|
128
|
+
EC.element_to_be_clickable(
|
129
|
+
(
|
130
|
+
By.XPATH,
|
131
|
+
"//select[@id='RENFREWSHIREBINCOLLECTIONS_PAGE1_ADDRESSLOOKUPADDRESS']//option[contains(., '"
|
132
|
+
+ user_paon
|
133
|
+
+ "')]",
|
134
|
+
)
|
135
|
+
)
|
136
|
+
).click()
|
137
|
+
|
138
|
+
# Handle potential second Cloudflare challenge
|
139
|
+
time.sleep(3)
|
140
|
+
if "Just a moment" in driver.page_source or "Verify you are human" in driver.page_source:
|
141
|
+
print("Second Cloudflare challenge detected, waiting...")
|
142
|
+
|
143
|
+
# Try to find and click Turnstile checkbox if present
|
144
|
+
try:
|
145
|
+
turnstile_checkbox = WebDriverWait(driver, 15).until(
|
146
|
+
EC.element_to_be_clickable((By.CSS_SELECTOR, "input[type='checkbox']"))
|
147
|
+
)
|
148
|
+
turnstile_checkbox.click()
|
149
|
+
print("Clicked Turnstile checkbox")
|
150
|
+
except:
|
151
|
+
print("No clickable Turnstile checkbox found")
|
152
|
+
|
153
|
+
# Wait for Cloudflare to complete with longer timeout
|
154
|
+
max_wait = 180 # 3 minutes
|
155
|
+
start_time = time.time()
|
156
|
+
while time.time() - start_time < max_wait:
|
157
|
+
current_source = driver.page_source
|
158
|
+
if "Just a moment" not in current_source and "Verify you are human" not in current_source:
|
159
|
+
print("Second Cloudflare challenge completed")
|
160
|
+
break
|
161
|
+
|
162
|
+
# Try clicking any visible Turnstile elements
|
163
|
+
try:
|
164
|
+
turnstile_elements = driver.find_elements(By.CSS_SELECTOR, "iframe[src*='turnstile'], div[id*='turnstile'], input[name*='turnstile']")
|
165
|
+
for element in turnstile_elements:
|
166
|
+
if element.is_displayed():
|
167
|
+
element.click()
|
168
|
+
print("Clicked Turnstile element")
|
169
|
+
break
|
170
|
+
except:
|
171
|
+
pass
|
172
|
+
|
173
|
+
time.sleep(5)
|
174
|
+
else:
|
175
|
+
print("Cloudflare challenge timeout - attempting to continue anyway")
|
176
|
+
|
177
|
+
time.sleep(10) # Extra wait after challenge
|
178
|
+
|
179
|
+
# Wait for page to change after address selection and handle dynamic loading
|
180
|
+
time.sleep(5)
|
181
|
+
|
182
|
+
# Wait for any content that indicates results are loaded
|
183
|
+
try:
|
184
|
+
WebDriverWait(driver, 30).until(
|
185
|
+
EC.presence_of_element_located((By.ID, "RENFREWSHIREBINCOLLECTIONS_PAGE1_COLLECTIONDETAILS"))
|
186
|
+
)
|
187
|
+
print("Collection details found")
|
188
|
+
except:
|
189
|
+
print("Collection details not found, checking for any collection content")
|
190
|
+
# If collection details not found, wait for page to stabilize and check for any collection content
|
191
|
+
time.sleep(10)
|
192
|
+
try:
|
193
|
+
WebDriverWait(driver, 20).until(
|
194
|
+
EC.presence_of_element_located((By.XPATH, "//*[contains(text(), 'collection') or contains(text(), 'Collection') or contains(text(), 'bin') or contains(text(), 'Bin')]"))
|
195
|
+
)
|
196
|
+
print("Found some collection-related content")
|
197
|
+
except:
|
198
|
+
print("No collection content found, proceeding anyway")
|
73
199
|
|
74
200
|
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
75
201
|
|
202
|
+
# Save page source for debugging
|
203
|
+
with open("debug_renfrewshire.html", "w", encoding="utf-8") as f:
|
204
|
+
f.write(driver.page_source)
|
205
|
+
print(f"Page title: {driver.title}")
|
206
|
+
print(f"Current URL: {driver.current_url}")
|
207
|
+
|
76
208
|
next_collection_div = soup.find(
|
77
209
|
"div", {"class": "collection collection--next"}
|
78
210
|
)
|
79
211
|
|
212
|
+
if not next_collection_div:
|
213
|
+
# Check if we're still on Cloudflare page
|
214
|
+
if "Just a moment" in driver.page_source or "Verify you are human" in driver.page_source:
|
215
|
+
print("WARNING: Still on Cloudflare challenge page - this council may need manual intervention")
|
216
|
+
# Return empty data rather than failing completely
|
217
|
+
data["bins"].append({
|
218
|
+
"type": "Cloudflare Challenge - Manual Check Required",
|
219
|
+
"collectionDate": datetime.now().strftime(date_format)
|
220
|
+
})
|
221
|
+
return data
|
222
|
+
else:
|
223
|
+
# Look for any collection-related content in the page
|
224
|
+
collection_text = soup.find_all(text=lambda text: text and any(word in text.lower() for word in ["collection", "bin", "refuse", "recycling", "waste"]))
|
225
|
+
if collection_text:
|
226
|
+
print("Found collection-related text but not in expected format")
|
227
|
+
data["bins"].append({
|
228
|
+
"type": "Collection data found but format changed - Manual Check Required",
|
229
|
+
"collectionDate": datetime.now().strftime(date_format)
|
230
|
+
})
|
231
|
+
return data
|
232
|
+
else:
|
233
|
+
raise ValueError("Could not find next collection div - saved debug_renfrewshire.html")
|
234
|
+
|
235
|
+
next_collection_date_elem = next_collection_div.find("p", {"class": "collection__date"})
|
236
|
+
if not next_collection_date_elem:
|
237
|
+
raise ValueError("Could not find collection date element - saved debug_renfrewshire.html")
|
238
|
+
|
80
239
|
next_collection_date = datetime.strptime(
|
81
|
-
|
82
|
-
.get_text()
|
83
|
-
.strip(),
|
240
|
+
next_collection_date_elem.get_text().strip(),
|
84
241
|
"%A %d %B %Y",
|
85
242
|
)
|
86
243
|
|
@@ -1,57 +1,89 @@
|
|
1
|
-
from bs4 import BeautifulSoup
|
2
1
|
from uk_bin_collection.uk_bin_collection.common import *
|
3
2
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
3
|
+
import requests
|
4
|
+
from datetime import datetime
|
4
5
|
|
5
6
|
|
6
|
-
# import the wonderful Beautiful Soup and the URL grabber
|
7
7
|
class CouncilClass(AbstractGetBinDataClass):
|
8
8
|
"""
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
Rotherham collections via the public JSON API.
|
10
|
+
Returns the same shape as before:
|
11
|
+
{"bins": [{"type": "Black Bin", "collectionDate": "Tuesday, 29 September 2025"}, ...]}
|
12
|
+
Accepts kwargs['premisesid'] (recommended) or a numeric kwargs['uprn'].
|
12
13
|
"""
|
13
14
|
|
14
15
|
def parse_data(self, page: str, **kwargs) -> dict:
|
15
|
-
|
16
|
+
# prefer explicit premisesid, fallback to uprn (if numeric)
|
17
|
+
premises = kwargs.get("premisesid")
|
18
|
+
uprn = kwargs.get("uprn")
|
16
19
|
|
17
|
-
|
20
|
+
if uprn:
|
21
|
+
# preserve original behaviour where check_uprn exists for validation,
|
22
|
+
# but don't fail if uprn is intended as a simple premises id number.
|
23
|
+
try:
|
24
|
+
check_uprn(uprn)
|
25
|
+
except Exception:
|
26
|
+
# silently continue — user may have passed a numeric premises id as uprn
|
27
|
+
pass
|
28
|
+
|
29
|
+
if not premises and str(uprn).strip().isdigit():
|
30
|
+
premises = str(uprn).strip()
|
31
|
+
|
32
|
+
if not premises:
|
33
|
+
raise ValueError("No premises ID supplied. Pass 'premisesid' in kwargs or a numeric 'uprn'.")
|
34
|
+
|
35
|
+
api_url = "https://bins.azurewebsites.net/api/getcollections"
|
36
|
+
params = {
|
37
|
+
"premisesid": str(premises),
|
38
|
+
"localauthority": kwargs.get("localauthority", "Rotherham"),
|
39
|
+
}
|
18
40
|
headers = {
|
19
|
-
"
|
20
|
-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
|
41
|
+
"User-Agent": "UKBinCollectionData/1.0 (+https://github.com/robbrad/UKBinCollectionData)"
|
21
42
|
}
|
22
|
-
response = requests.post(
|
23
|
-
"https://www.rotherham.gov.uk/bin-collections?address={}&submit=Submit".format(
|
24
|
-
user_uprn
|
25
|
-
),
|
26
|
-
headers=headers
|
27
|
-
)
|
28
|
-
# Make a BS4 object
|
29
|
-
soup = BeautifulSoup(response.text, features="html.parser")
|
30
|
-
soup.prettify()
|
31
43
|
|
32
|
-
|
44
|
+
try:
|
45
|
+
resp = requests.get(api_url, params=params, headers=headers, timeout=10)
|
46
|
+
except Exception as exc:
|
47
|
+
print(f"Error contacting Rotherham API: {exc}")
|
48
|
+
return {"bins": []}
|
49
|
+
|
50
|
+
if resp.status_code != 200:
|
51
|
+
print(f"Rotherham API request failed ({resp.status_code}). URL: {resp.url}")
|
52
|
+
return {"bins": []}
|
33
53
|
|
34
|
-
|
54
|
+
try:
|
55
|
+
collections = resp.json()
|
56
|
+
except ValueError:
|
57
|
+
print("Rotherham API returned non-JSON response.")
|
58
|
+
return {"bins": []}
|
59
|
+
|
60
|
+
data = {"bins": []}
|
61
|
+
seen = set() # dedupe identical (type, date) pairs
|
62
|
+
for item in collections:
|
63
|
+
bin_type = item.get("BinType") or item.get("bintype") or "Unknown"
|
64
|
+
date_str = item.get("CollectionDate") or item.get("collectionDate")
|
65
|
+
if not date_str:
|
66
|
+
continue
|
35
67
|
|
36
|
-
|
37
|
-
|
68
|
+
# API gives ISO date like '2025-09-29' (or possibly '2025-09-29T00:00:00').
|
69
|
+
try:
|
70
|
+
iso_date = date_str.split("T")[0]
|
71
|
+
parsed = datetime.strptime(iso_date, "%Y-%m-%d")
|
72
|
+
formatted = parsed.strftime(date_format)
|
73
|
+
except Exception:
|
74
|
+
# skip malformed dates
|
75
|
+
continue
|
38
76
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
bin_collection = bin_info_cell[1]
|
77
|
+
key = (bin_type.strip().lower(), formatted)
|
78
|
+
if key in seen:
|
79
|
+
continue
|
80
|
+
seen.add(key)
|
44
81
|
|
45
|
-
|
46
|
-
|
47
|
-
"type": bin_type.title(),
|
48
|
-
"collectionDate": datetime.strptime(
|
49
|
-
bin_collection.get_text(strip=True), "%A, %d %B %Y"
|
50
|
-
).strftime(date_format),
|
51
|
-
}
|
82
|
+
dict_data = {"type": bin_type.title(), "collectionDate": formatted}
|
83
|
+
data["bins"].append(dict_data)
|
52
84
|
|
53
|
-
|
54
|
-
|
55
|
-
print("
|
85
|
+
if not data["bins"]:
|
86
|
+
# helpful debugging note
|
87
|
+
print(f"Rotherham API returned no collection entries for premisesid={premises}")
|
56
88
|
|
57
|
-
return data
|
89
|
+
return data
|
@@ -1,4 +1,9 @@
|
|
1
|
+
import datetime
|
2
|
+
|
1
3
|
from bs4 import BeautifulSoup
|
4
|
+
from selenium.webdriver.common.by import By
|
5
|
+
from selenium.webdriver.support import expected_conditions as EC
|
6
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
2
7
|
|
3
8
|
from uk_bin_collection.uk_bin_collection.common import *
|
4
9
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
@@ -13,6 +18,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
13
18
|
"""
|
14
19
|
|
15
20
|
def parse_data(self, page: str, **kwargs) -> dict:
|
21
|
+
<<<<<<< HEAD
|
16
22
|
user_postcode = kwargs.get("postcode")
|
17
23
|
check_postcode(user_postcode)
|
18
24
|
user_uprn = kwargs.get("uprn")
|
@@ -43,10 +49,16 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
43
49
|
i["data-for"]: i.get("value", "")
|
44
50
|
for i in soup.select("input[data-for]")
|
45
51
|
}
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
)
|
52
|
+
|
53
|
+
# Check if required form elements exist
|
54
|
+
salt_element = soup.select_one('input[id="pSalt"]')
|
55
|
+
protected_element = soup.select_one('input[id="pPageItemsProtected"]')
|
56
|
+
|
57
|
+
if not salt_element or not protected_element:
|
58
|
+
raise Exception("Required form elements not found. The council website may have changed or be unavailable.")
|
59
|
+
|
60
|
+
payload_salt = salt_element.get("value")
|
61
|
+
payload_protected = protected_element.get("value")
|
50
62
|
|
51
63
|
# Add the PostCode and 'SEARCH' to the payload
|
52
64
|
payload["p_request"] = "SEARCH"
|
@@ -123,10 +135,16 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
123
135
|
i["data-for"]: i.get("value", "")
|
124
136
|
for i in soup.select("input[data-for]")
|
125
137
|
}
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
)
|
138
|
+
|
139
|
+
# Check if required form elements exist
|
140
|
+
salt_element = soup.select_one('input[id="pSalt"]')
|
141
|
+
protected_element = soup.select_one('input[id="pPageItemsProtected"]')
|
142
|
+
|
143
|
+
if not salt_element or not protected_element:
|
144
|
+
raise Exception("Required form elements not found. The council website may have changed or be unavailable.")
|
145
|
+
|
146
|
+
payload_salt = salt_element.get("value")
|
147
|
+
payload_protected = protected_element.get("value")
|
130
148
|
|
131
149
|
# Add the UPRN and 'SUBMIT' to the payload
|
132
150
|
payload["p_request"] = "SUBMIT"
|
@@ -187,18 +205,115 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
187
205
|
|
188
206
|
# Create a BeautifulSoup object from the page's HTML
|
189
207
|
soup = BeautifulSoup(resource.text, "html.parser")
|
208
|
+
=======
|
209
|
+
driver = None
|
210
|
+
try:
|
211
|
+
>>>>>>> master
|
190
212
|
data = {"bins": []}
|
213
|
+
url = kwargs.get("url")
|
214
|
+
user_paon = kwargs.get("paon")
|
215
|
+
user_postcode = kwargs.get("postcode")
|
216
|
+
web_driver = kwargs.get("web_driver")
|
217
|
+
headless = kwargs.get("headless")
|
218
|
+
check_paon(user_paon)
|
219
|
+
check_postcode(user_postcode)
|
220
|
+
|
221
|
+
# Use a realistic user agent to help bypass Cloudflare
|
222
|
+
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
223
|
+
driver = create_webdriver(web_driver, headless, user_agent, __name__)
|
224
|
+
driver.get("https://www.somerset.gov.uk/collection-days")
|
225
|
+
|
226
|
+
# Wait for the postcode field to appear then populate it
|
227
|
+
inputElement_postcode = WebDriverWait(driver, 30).until(
|
228
|
+
EC.presence_of_element_located((By.ID, "postcodeSearch"))
|
229
|
+
)
|
230
|
+
inputElement_postcode.send_keys(user_postcode)
|
231
|
+
|
232
|
+
# Click search button
|
233
|
+
findAddress = WebDriverWait(driver, 10).until(
|
234
|
+
EC.presence_of_element_located((By.CLASS_NAME, "govuk-button"))
|
235
|
+
)
|
236
|
+
findAddress.click()
|
237
|
+
|
238
|
+
# Wait for the 'Select address' dropdown to appear and select option matching the house name/number
|
239
|
+
WebDriverWait(driver, 10).until(
|
240
|
+
EC.element_to_be_clickable(
|
241
|
+
(
|
242
|
+
By.XPATH,
|
243
|
+
"//select[@id='addressSelect']//option[contains(., '"
|
244
|
+
+ user_paon
|
245
|
+
+ "')]",
|
246
|
+
)
|
247
|
+
)
|
248
|
+
).click()
|
249
|
+
|
250
|
+
# Wait for the collections table to appear
|
251
|
+
WebDriverWait(driver, 20).until(
|
252
|
+
EC.presence_of_element_located(
|
253
|
+
(
|
254
|
+
By.XPATH,
|
255
|
+
"//h2[contains(@class,'mt-4') and contains(@class,'govuk-heading-s') and normalize-space(.)='Your next collections']",
|
256
|
+
)
|
257
|
+
)
|
258
|
+
)
|
259
|
+
|
260
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
261
|
+
|
262
|
+
collections = soup.find_all("div", {"class": "p-2"})
|
263
|
+
|
264
|
+
for collection in collections:
|
265
|
+
bin_type = collection.find("h3").get_text()
|
266
|
+
|
267
|
+
next_collection = soup.find("div", {"class": "fw-bold"}).get_text()
|
268
|
+
|
269
|
+
following_collection = soup.find(
|
270
|
+
lambda t: (
|
271
|
+
t.name == "div"
|
272
|
+
and t.get_text(strip=True).lower().startswith("followed by")
|
273
|
+
)
|
274
|
+
).get_text()
|
275
|
+
|
276
|
+
next_collection_date = datetime.strptime(next_collection, "%A %d %B")
|
277
|
+
|
278
|
+
following_collection_date = datetime.strptime(
|
279
|
+
following_collection, "followed by %A %d %B"
|
280
|
+
)
|
281
|
+
|
282
|
+
current_date = datetime.now()
|
283
|
+
next_collection_date = next_collection_date.replace(
|
284
|
+
year=current_date.year
|
285
|
+
)
|
286
|
+
following_collection_date = following_collection_date.replace(
|
287
|
+
year=current_date.year
|
288
|
+
)
|
289
|
+
|
290
|
+
next_collection_date = get_next_occurrence_from_day_month(
|
291
|
+
next_collection_date
|
292
|
+
)
|
293
|
+
|
294
|
+
following_collection_date = get_next_occurrence_from_day_month(
|
295
|
+
following_collection_date
|
296
|
+
)
|
297
|
+
|
298
|
+
dict_data = {
|
299
|
+
"type": bin_type,
|
300
|
+
"collectionDate": next_collection_date.strftime(date_format),
|
301
|
+
}
|
302
|
+
data["bins"].append(dict_data)
|
303
|
+
|
304
|
+
dict_data = {
|
305
|
+
"type": bin_type,
|
306
|
+
"collectionDate": following_collection_date.strftime(date_format),
|
307
|
+
}
|
308
|
+
data["bins"].append(dict_data)
|
191
309
|
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
data["bins"].append(dict_data)
|
203
|
-
|
204
|
-
return data
|
310
|
+
except Exception as e:
|
311
|
+
# Here you can log the exception if needed
|
312
|
+
print(f"An error occurred: {e}")
|
313
|
+
# Optionally, re-raise the exception if you want it to propagate
|
314
|
+
raise
|
315
|
+
finally:
|
316
|
+
# This block ensures that the driver is closed regardless of an exception
|
317
|
+
if driver:
|
318
|
+
driver.quit()
|
319
|
+
return data
|
@@ -6,17 +6,16 @@ from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataC
|
|
6
6
|
|
7
7
|
def format_bin_data(key: str, date: datetime):
|
8
8
|
formatted_date = date.strftime(date_format)
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
return [
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
elif re.match(r"^G\d+$", key) is not None:
|
9
|
+
servicename = key.get("hso_servicename")
|
10
|
+
print(servicename)
|
11
|
+
if re.match(r"^Recycl", servicename) is not None:
|
12
|
+
return [ ("Recycling", formatted_date) ]
|
13
|
+
elif re.match(r"^Refuse", servicename) is not None:
|
14
|
+
return [("General Waste (Black Bin)", formatted_date)]
|
15
|
+
elif re.match(r"^Garden", servicename) is not None:
|
17
16
|
return [("Garden Waste (Green Bin)", formatted_date)]
|
18
|
-
elif re.match(r"^
|
19
|
-
return [("
|
17
|
+
elif re.match(r"^Food", servicename) is not None:
|
18
|
+
return [("Food Waste", formatted_date)]
|
20
19
|
else:
|
21
20
|
return None
|
22
21
|
|
@@ -27,37 +26,34 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
27
26
|
check_uprn(uprn)
|
28
27
|
|
29
28
|
api_url = (
|
30
|
-
f"https://
|
31
|
-
f"
|
29
|
+
f"https://api.southglos.gov.uk/wastecomp/GetCollectionDetails"
|
30
|
+
f"?uprn={uprn}"
|
32
31
|
)
|
33
32
|
|
34
33
|
headers = {"content-type": "application/json"}
|
35
34
|
|
36
35
|
response = requests.get(api_url, headers=headers)
|
37
36
|
|
38
|
-
json_response = json
|
37
|
+
json_response = response.json()
|
39
38
|
if not json_response:
|
40
39
|
raise ValueError("No collection data found for provided UPRN.")
|
41
40
|
|
42
|
-
collection_data = json_response
|
41
|
+
collection_data = json_response.get('value')
|
43
42
|
|
44
43
|
today = datetime.today()
|
45
44
|
eight_weeks = datetime.today() + timedelta(days=8 * 7)
|
46
45
|
data = {"bins": []}
|
47
46
|
collection_tuple = []
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
continue
|
52
|
-
|
53
|
-
item = collection_data[key]
|
47
|
+
for collection in collection_data:
|
48
|
+
print(collection)
|
49
|
+
item = collection.get('hso_nextcollection')
|
54
50
|
|
55
51
|
if item == "":
|
56
52
|
continue
|
57
53
|
|
58
|
-
collection_date = datetime.
|
54
|
+
collection_date = datetime.fromisoformat(item)
|
59
55
|
if today.date() <= collection_date.date() <= eight_weeks.date():
|
60
|
-
bin_data = format_bin_data(
|
56
|
+
bin_data = format_bin_data(collection, collection_date)
|
61
57
|
if bin_data is not None:
|
62
58
|
for bin_date in bin_data:
|
63
59
|
collection_tuple.append(bin_date)
|