uk_bin_collection 0.153.0__py3-none-any.whl → 0.154.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/tests/input.json +13 -15
- uk_bin_collection/uk_bin_collection/councils/BCPCouncil.py +45 -120
- uk_bin_collection/uk_bin_collection/councils/BasingstokeCouncil.py +4 -1
- uk_bin_collection/uk_bin_collection/councils/BrightonandHoveCityCouncil.py +15 -36
- uk_bin_collection/uk_bin_collection/councils/CastlepointDistrictCouncil.py +55 -24
- uk_bin_collection/uk_bin_collection/councils/ErewashBoroughCouncil.py +32 -34
- uk_bin_collection/uk_bin_collection/councils/FarehamBoroughCouncil.py +5 -2
- uk_bin_collection/uk_bin_collection/councils/FolkstoneandHytheDistrictCouncil.py +22 -0
- uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py +1 -1
- uk_bin_collection/uk_bin_collection/councils/HartlepoolBoroughCouncil.py +3 -1
- uk_bin_collection/uk_bin_collection/councils/NorthHertfordshireDistrictCouncil.py +26 -128
- uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py +63 -79
- uk_bin_collection/uk_bin_collection/councils/RushmoorCouncil.py +4 -2
- uk_bin_collection/uk_bin_collection/councils/SandwellBoroughCouncil.py +4 -11
- uk_bin_collection/uk_bin_collection/councils/SloughBoroughCouncil.py +39 -21
- uk_bin_collection/uk_bin_collection/councils/WestBerkshireCouncil.py +16 -13
- {uk_bin_collection-0.153.0.dist-info → uk_bin_collection-0.154.0.dist-info}/METADATA +1 -1
- {uk_bin_collection-0.153.0.dist-info → uk_bin_collection-0.154.0.dist-info}/RECORD +21 -21
- {uk_bin_collection-0.153.0.dist-info → uk_bin_collection-0.154.0.dist-info}/LICENSE +0 -0
- {uk_bin_collection-0.153.0.dist-info → uk_bin_collection-0.154.0.dist-info}/WHEEL +0 -0
- {uk_bin_collection-0.153.0.dist-info → uk_bin_collection-0.154.0.dist-info}/entry_points.txt +0 -0
@@ -125,23 +125,6 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
125
125
|
# Wait for the page to load - giving it extra time
|
126
126
|
time.sleep(5)
|
127
127
|
|
128
|
-
# Use only the selector that we know works
|
129
|
-
# print("Looking for bin type elements...")
|
130
|
-
try:
|
131
|
-
bin_type_selector = (
|
132
|
-
By.CSS_SELECTOR,
|
133
|
-
"div.formatting_bold.formatting_size_bigger.formatting span.value-as-text",
|
134
|
-
)
|
135
|
-
WebDriverWait(driver, 15).until(
|
136
|
-
EC.presence_of_element_located(bin_type_selector)
|
137
|
-
)
|
138
|
-
# print(f"Found bin type elements with selector: {bin_type_selector}")
|
139
|
-
except TimeoutException:
|
140
|
-
# print("Could not find bin type elements. Taking screenshot for debugging...")
|
141
|
-
screenshot_path = f"bin_type_error_{int(time.time())}.png"
|
142
|
-
driver.save_screenshot(screenshot_path)
|
143
|
-
# print(f"Screenshot saved to {screenshot_path}")
|
144
|
-
|
145
128
|
# Create BS4 object from driver's page source
|
146
129
|
# print("Parsing page with BeautifulSoup...")
|
147
130
|
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
@@ -149,122 +132,37 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
149
132
|
# Initialize data dictionary
|
150
133
|
data = {"bins": []}
|
151
134
|
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
# Look specifically for date elements with the exact structure
|
159
|
-
date_elements = soup.select("div.col-sm-12.font-xs-3xl span.value-as-text")
|
160
|
-
hidden_dates = soup.select(
|
161
|
-
"div.col-sm-12.font-xs-3xl input[type='hidden'][value*='/']"
|
162
|
-
)
|
163
|
-
|
164
|
-
# print(f"Found {len(bin_type_elements)} bin types and {len(date_elements)} date elements")
|
165
|
-
|
166
|
-
# We need a smarter way to match bin types with their dates
|
167
|
-
bin_count = 0
|
135
|
+
for row in soup.select(".listing_template_row"):
|
136
|
+
# Title (waste stream) is the first <p> in the section
|
137
|
+
first_p = row.find("p")
|
138
|
+
if not first_p:
|
139
|
+
continue
|
140
|
+
stream = first_p.get_text(" ", strip=True)
|
168
141
|
|
169
|
-
|
170
|
-
|
142
|
+
for p in row.find_all("p"):
|
143
|
+
t = p.get_text("\n", strip=True)
|
171
144
|
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
)
|
145
|
+
if re.search(r"\bNext collection\b", t, flags=re.I):
|
146
|
+
# Expect format: "Next collection\nTuesday 16th September 2025"
|
147
|
+
parts = [x.strip() for x in t.split("\n") if x.strip()]
|
148
|
+
if len(parts) >= 2:
|
149
|
+
next_collection_display = parts[-1] # last line
|
178
150
|
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
# Find hidden date inputs with values in DD/MM/YYYY format
|
186
|
-
hidden_date_values = []
|
187
|
-
for hidden in hidden_dates:
|
188
|
-
value = hidden.get("value", "")
|
189
|
-
if re.match(r"\d{1,2}/\d{1,2}/\d{4}", value):
|
190
|
-
hidden_date_values.append(value)
|
191
|
-
# print(f"Found hidden date value: {value}")
|
192
|
-
|
193
|
-
# When filtering date elements
|
194
|
-
date_elements = soup.select("div.col-sm-12.font-xs-3xl span.value-as-text")
|
195
|
-
valid_date_elements = []
|
196
|
-
|
197
|
-
for element in date_elements:
|
198
|
-
text = element.get_text(strip=True)
|
199
|
-
if contains_date(text):
|
200
|
-
valid_date_elements.append(element)
|
201
|
-
# print(f"Found valid date element: {text}")
|
202
|
-
else:
|
203
|
-
pass
|
204
|
-
# print(f"Skipping non-date element: {text}")
|
205
|
-
|
206
|
-
# print(f"Found {len(bin_type_elements)} bin types and {len(valid_date_elements)} valid date elements")
|
207
|
-
|
208
|
-
# When processing each bin type
|
209
|
-
for i, bin_type_elem in enumerate(bin_type_elements):
|
210
|
-
bin_type = bin_type_elem.get_text(strip=True)
|
211
|
-
|
212
|
-
# Try to find a date for this bin type
|
213
|
-
date_text = None
|
214
|
-
|
215
|
-
# Look for a valid date element
|
216
|
-
if i < len(valid_date_elements):
|
217
|
-
date_elem = valid_date_elements[i]
|
218
|
-
date_text = date_elem.get_text(strip=True)
|
219
|
-
|
220
|
-
# If we don't have a valid date yet, try using the hidden input
|
221
|
-
if not date_text or not contains_date(date_text):
|
222
|
-
if i < len(hidden_dates):
|
223
|
-
date_value = hidden_dates[i].get("value")
|
224
|
-
if contains_date(date_value):
|
225
|
-
date_text = date_value
|
226
|
-
|
227
|
-
# Skip if we don't have a valid date
|
228
|
-
if not date_text or not contains_date(date_text):
|
229
|
-
# print(f"No valid date found for bin type: {bin_type}")
|
230
|
-
continue
|
151
|
+
# Build record
|
152
|
+
next_date = datetime.strptime(
|
153
|
+
remove_ordinal_indicator_from_date_string(next_collection_display),
|
154
|
+
"%A %d %B %Y",
|
155
|
+
)
|
231
156
|
|
232
|
-
#
|
157
|
+
# Create bin entry
|
158
|
+
bin_entry = {
|
159
|
+
"type": stream,
|
160
|
+
"collectionDate": next_date.strftime(date_format),
|
161
|
+
}
|
233
162
|
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
# Try to parse the date
|
239
|
-
try:
|
240
|
-
collection_date = datetime.strptime(
|
241
|
-
date_text, "%A %d %B %Y"
|
242
|
-
).date()
|
243
|
-
except ValueError:
|
244
|
-
try:
|
245
|
-
collection_date = datetime.strptime(
|
246
|
-
date_text, "%d/%m/%Y"
|
247
|
-
).date()
|
248
|
-
except ValueError:
|
249
|
-
# Last resort
|
250
|
-
collection_date = parse(date_text).date()
|
251
|
-
|
252
|
-
# Create bin entry
|
253
|
-
bin_entry = {
|
254
|
-
"type": bin_type,
|
255
|
-
"collectionDate": collection_date.strftime(date_format),
|
256
|
-
}
|
257
|
-
|
258
|
-
# Add to data
|
259
|
-
data["bins"].append(bin_entry)
|
260
|
-
bin_count += 1
|
261
|
-
# print(f"Added bin entry: {bin_entry}")
|
262
|
-
|
263
|
-
except Exception as e:
|
264
|
-
pass
|
265
|
-
# print(f"Error parsing date '{date_text}': {str(e)}")
|
266
|
-
|
267
|
-
# print(f"Successfully parsed {bin_count} bin collections")
|
163
|
+
# Add to data
|
164
|
+
data["bins"].append(bin_entry)
|
165
|
+
# print(f"Added bin entry: {bin_entry}")
|
268
166
|
|
269
167
|
if not data["bins"]:
|
270
168
|
# print("No bin data found. Saving page for debugging...")
|
@@ -1,17 +1,17 @@
|
|
1
1
|
import time
|
2
|
+
import datetime
|
2
3
|
|
4
|
+
from datetime import datetime
|
3
5
|
from bs4 import BeautifulSoup
|
4
6
|
from selenium.common.exceptions import TimeoutException
|
5
7
|
from selenium.webdriver.common.by import By
|
8
|
+
from selenium.webdriver.common.keys import Keys
|
6
9
|
from selenium.webdriver.support import expected_conditions as EC
|
7
|
-
from selenium.webdriver.support.ui import WebDriverWait
|
10
|
+
from selenium.webdriver.support.ui import Select, WebDriverWait
|
8
11
|
|
9
12
|
from uk_bin_collection.uk_bin_collection.common import *
|
10
13
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
11
14
|
|
12
|
-
# import the wonderful Beautiful Soup and the URL grabber
|
13
|
-
|
14
|
-
|
15
15
|
class CouncilClass(AbstractGetBinDataClass):
|
16
16
|
"""
|
17
17
|
Concrete classes have to implement all abstract operations of the
|
@@ -30,16 +30,18 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
30
30
|
def parse_data(self, page: str, **kwargs) -> dict:
|
31
31
|
driver = None
|
32
32
|
try:
|
33
|
-
page = "https://
|
33
|
+
page = "https://bincollection.northumberland.gov.uk/postcode"
|
34
34
|
|
35
35
|
data = {"bins": []}
|
36
36
|
|
37
|
-
user_paon = kwargs.get("paon")
|
38
37
|
user_postcode = kwargs.get("postcode")
|
38
|
+
user_uprn = kwargs.get("uprn")
|
39
|
+
|
40
|
+
check_postcode(user_postcode)
|
41
|
+
check_uprn(user_uprn)
|
42
|
+
|
39
43
|
web_driver = kwargs.get("web_driver")
|
40
44
|
headless = kwargs.get("headless")
|
41
|
-
check_paon(user_paon)
|
42
|
-
check_postcode(user_postcode)
|
43
45
|
|
44
46
|
# Create Selenium webdriver
|
45
47
|
driver = create_webdriver(web_driver, headless, None, __name__)
|
@@ -50,105 +52,87 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
50
52
|
|
51
53
|
# Wait for and click cookie button
|
52
54
|
cookie_button = wait.until(
|
53
|
-
EC.element_to_be_clickable(
|
55
|
+
EC.element_to_be_clickable(
|
56
|
+
(By.CLASS_NAME, "accept-all")
|
57
|
+
)
|
54
58
|
)
|
55
59
|
cookie_button.click()
|
56
60
|
|
57
|
-
# Wait for and find
|
58
|
-
|
61
|
+
# Wait for and find postcode input
|
62
|
+
inputElement_pc = wait.until(
|
59
63
|
EC.presence_of_element_located(
|
60
|
-
(
|
61
|
-
By.ID,
|
62
|
-
"p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_txtHouse",
|
63
|
-
)
|
64
|
+
(By.ID, "postcode")
|
64
65
|
)
|
65
66
|
)
|
66
67
|
|
67
|
-
#
|
68
|
-
inputElement_pc
|
68
|
+
# Enter postcode and submit
|
69
|
+
inputElement_pc.send_keys(user_postcode)
|
70
|
+
inputElement_pc.send_keys(Keys.ENTER)
|
71
|
+
|
72
|
+
# Wait for and find house number input
|
73
|
+
selectElement_address = wait.until(
|
69
74
|
EC.presence_of_element_located(
|
70
|
-
(
|
71
|
-
By.ID,
|
72
|
-
"p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_txtPostcode",
|
73
|
-
)
|
75
|
+
(By.ID, "address")
|
74
76
|
)
|
75
77
|
)
|
76
78
|
|
77
|
-
|
78
|
-
|
79
|
-
inputElement_hn.send_keys(user_paon)
|
79
|
+
dropdown = Select(selectElement_address)
|
80
|
+
dropdown.select_by_value(user_uprn)
|
80
81
|
|
81
|
-
# Click
|
82
|
-
|
82
|
+
# Click submit button and wait for results
|
83
|
+
submit_button = wait.until(
|
83
84
|
EC.element_to_be_clickable(
|
84
|
-
(
|
85
|
-
By.ID,
|
86
|
-
"p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_butLookup",
|
87
|
-
)
|
85
|
+
(By.CLASS_NAME, "govuk-button")
|
88
86
|
)
|
89
87
|
)
|
90
|
-
|
88
|
+
submit_button.click()
|
91
89
|
|
92
90
|
# Wait for results to load
|
93
91
|
route_summary = wait.until(
|
94
92
|
EC.presence_of_element_located(
|
95
|
-
(
|
96
|
-
By.ID,
|
97
|
-
"p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_spanRouteSummary",
|
98
|
-
)
|
93
|
+
(By.CLASS_NAME, "govuk-table")
|
99
94
|
)
|
100
95
|
)
|
101
96
|
|
97
|
+
now = datetime.now()
|
98
|
+
current_month = now.month
|
99
|
+
current_year = now.year
|
100
|
+
|
102
101
|
# Get page source after everything has loaded
|
103
102
|
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
104
103
|
|
105
|
-
#
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
104
|
+
# From the table, find all rows:
|
105
|
+
# - cell 1 is the date in format eg. 9 September (so no year value 🥲)
|
106
|
+
# - cell 2 is the day name, not useful
|
107
|
+
# - cell 3 is the bin type eg. "General waste", "Recycling", "Garden waste"
|
108
|
+
rows = soup.find("tbody", class_="govuk-table__body").find_all("tr", class_="govuk-table__row")
|
109
|
+
|
110
|
+
for row in rows:
|
111
|
+
bin_type=row.find_all("td")[-1].text.strip()
|
112
|
+
|
113
|
+
collection_date_string = row.find('th').text.strip()
|
114
|
+
|
115
|
+
# sometimes but not always the day is written "22nd" instead of 22 so make sure we get a proper int
|
116
|
+
collection_date_day = "".join([i for i in list(collection_date_string.split(" ")[0]) if i.isdigit()])
|
117
|
+
collection_date_month_name = collection_date_string.split(" ")[1]
|
118
|
+
|
119
|
+
# if we are currently in Oct, Nov, or Dec and the collection month is Jan, Feb, or Mar, let's assume its next year
|
120
|
+
if (current_month >= 10) and (collection_date_month_name in ["January", "February", "March"]):
|
121
|
+
collection_date_year = current_year + 1
|
122
|
+
else:
|
123
|
+
collection_date_year = current_year
|
124
|
+
|
125
|
+
collection_date = time.strptime(
|
126
|
+
f"{collection_date_day} {collection_date_month_name} {collection_date_year}", "%d %B %Y"
|
115
127
|
)
|
116
|
-
)
|
117
128
|
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
bin_colour = self.extract_styles(style_str)["background-color"].upper()
|
125
|
-
bins_by_colours[bin_colour] = bin
|
126
|
-
|
127
|
-
# Work through the tables gathering the dates, if the cell has a background colour - match it to the bin type.
|
128
|
-
calander_tables = soup.find_all("table", title="Calendar")
|
129
|
-
for table in calander_tables:
|
130
|
-
# Get month and year
|
131
|
-
# First row in table is the header
|
132
|
-
rows = table.find_all("tr")
|
133
|
-
month_and_year = (
|
134
|
-
rows[0].find("table", class_="calCtrlTitle").find("td").string
|
129
|
+
# Add it to the data
|
130
|
+
data["bins"].append(
|
131
|
+
{
|
132
|
+
"type": bin_type,
|
133
|
+
"collectionDate": time.strftime(date_format, collection_date),
|
134
|
+
}
|
135
135
|
)
|
136
|
-
bin_days = table.find_all("td", class_="calCtrlDay")
|
137
|
-
for day in bin_days:
|
138
|
-
day_styles = self.extract_styles(day["style"])
|
139
|
-
if "background-color" in day_styles:
|
140
|
-
colour = day_styles["background-color"].upper()
|
141
|
-
date = time.strptime(
|
142
|
-
f"{day.string} {month_and_year}", "%d %B %Y"
|
143
|
-
)
|
144
|
-
|
145
|
-
# Add it to the data
|
146
|
-
data["bins"].append(
|
147
|
-
{
|
148
|
-
"type": bins_by_colours[colour],
|
149
|
-
"collectionDate": time.strftime(date_format, date),
|
150
|
-
}
|
151
|
-
)
|
152
136
|
except Exception as e:
|
153
137
|
# Here you can log the exception if needed
|
154
138
|
print(f"An error occurred: {e}")
|
@@ -1,7 +1,8 @@
|
|
1
1
|
from bs4 import BeautifulSoup
|
2
|
+
from lxml import etree
|
3
|
+
|
2
4
|
from uk_bin_collection.uk_bin_collection.common import *
|
3
5
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
|
-
from lxml import etree
|
5
6
|
|
6
7
|
|
7
8
|
# import the wonderful Beautiful Soup and the URL grabber
|
@@ -20,7 +21,8 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
20
21
|
collections = []
|
21
22
|
|
22
23
|
# Convert the XML to JSON and load the next collection data
|
23
|
-
result = soup.find("p").contents[0]
|
24
|
+
result = soup.find("p").contents[0]
|
25
|
+
|
24
26
|
json_data = json.loads(result)["NextCollection"]
|
25
27
|
|
26
28
|
# Get general waste data
|
@@ -28,17 +28,10 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
28
28
|
"Referer": "https://my.sandwell.gov.uk/fillform/?iframe_id=fillform-frame-1&db_id=",
|
29
29
|
}
|
30
30
|
LOOKUPS = [
|
31
|
-
(
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
"Recycling (Blue)",
|
36
|
-
"Household Waste (Grey)",
|
37
|
-
"Food Waste (Brown)",
|
38
|
-
"Batteries",
|
39
|
-
],
|
40
|
-
),
|
41
|
-
("56b1cdaf6bb43", "GWDate", ["Garden Waste (Green)"]),
|
31
|
+
("686295a88a750", "GWDate", ["Garden Waste (Green)"]),
|
32
|
+
("686294de50729", "DWDate", ["Household Waste (Grey)"]),
|
33
|
+
("6863a78a1dd8e", "FWDate", ["Food Waste (Brown)"]),
|
34
|
+
("68629dd642423", "MDRDate", ["Recycling (Blue)"]),
|
42
35
|
]
|
43
36
|
|
44
37
|
def parse_data(self, page: str, **kwargs) -> dict:
|
@@ -1,15 +1,18 @@
|
|
1
|
-
import time
|
2
1
|
import re
|
3
|
-
import
|
2
|
+
import time
|
4
3
|
from datetime import datetime
|
4
|
+
|
5
|
+
import requests
|
5
6
|
from bs4 import BeautifulSoup
|
6
7
|
from selenium.webdriver.common.by import By
|
7
8
|
from selenium.webdriver.common.keys import Keys
|
8
9
|
from selenium.webdriver.support import expected_conditions as EC
|
9
10
|
from selenium.webdriver.support.ui import WebDriverWait
|
11
|
+
|
10
12
|
from uk_bin_collection.uk_bin_collection.common import *
|
11
13
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
12
14
|
|
15
|
+
|
13
16
|
def get_street_from_postcode(postcode: str, api_key: str) -> str:
|
14
17
|
url = "https://maps.googleapis.com/maps/api/geocode/json"
|
15
18
|
params = {"address": postcode, "key": api_key}
|
@@ -25,6 +28,7 @@ def get_street_from_postcode(postcode: str, api_key: str) -> str:
|
|
25
28
|
|
26
29
|
raise ValueError("No street (route) found in the response.")
|
27
30
|
|
31
|
+
|
28
32
|
class CouncilClass(AbstractGetBinDataClass):
|
29
33
|
def parse_data(self, page: str, **kwargs) -> dict:
|
30
34
|
driver = None
|
@@ -37,10 +41,10 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
37
41
|
|
38
42
|
headless = kwargs.get("headless")
|
39
43
|
web_driver = kwargs.get("web_driver")
|
40
|
-
|
44
|
+
UserAgent = "Mozilla/5.0"
|
45
|
+
driver = create_webdriver(web_driver, headless, UserAgent, __name__)
|
41
46
|
page = "https://www.slough.gov.uk/bin-collections"
|
42
47
|
driver.get(page)
|
43
|
-
|
44
48
|
# Accept cookies
|
45
49
|
WebDriverWait(driver, 10).until(
|
46
50
|
EC.element_to_be_clickable((By.ID, "ccc-recommended-settings"))
|
@@ -50,14 +54,20 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
50
54
|
address_input = WebDriverWait(driver, 10).until(
|
51
55
|
EC.presence_of_element_located((By.ID, "keyword_directory25"))
|
52
56
|
)
|
53
|
-
user_address = get_street_from_postcode(
|
57
|
+
user_address = get_street_from_postcode(
|
58
|
+
user_postcode, "AIzaSyBDLULT7EIlNtHerswPtfmL15Tt3Oc0bV8"
|
59
|
+
)
|
54
60
|
address_input.send_keys(user_address + Keys.ENTER)
|
55
61
|
|
56
62
|
# Wait for address results to load
|
57
63
|
WebDriverWait(driver, 10).until(
|
58
|
-
EC.presence_of_all_elements_located(
|
64
|
+
EC.presence_of_all_elements_located(
|
65
|
+
(By.CSS_SELECTOR, "span.list__link-text")
|
66
|
+
)
|
67
|
+
)
|
68
|
+
span_elements = driver.find_elements(
|
69
|
+
By.CSS_SELECTOR, "span.list__link-text"
|
59
70
|
)
|
60
|
-
span_elements = driver.find_elements(By.CSS_SELECTOR, "span.list__link-text")
|
61
71
|
|
62
72
|
for span in span_elements:
|
63
73
|
if user_address.lower() in span.text.lower():
|
@@ -68,7 +78,9 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
68
78
|
|
69
79
|
# Wait for address detail page
|
70
80
|
WebDriverWait(driver, 10).until(
|
71
|
-
EC.presence_of_element_located(
|
81
|
+
EC.presence_of_element_located(
|
82
|
+
(By.CSS_SELECTOR, "section.site-content")
|
83
|
+
)
|
72
84
|
)
|
73
85
|
soup = BeautifulSoup(driver.page_source, "html.parser")
|
74
86
|
|
@@ -86,28 +98,33 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
86
98
|
bin_url = "https://www.slough.gov.uk" + bin_url
|
87
99
|
|
88
100
|
# Visit the child page
|
89
|
-
print(f"Navigating to {bin_url}")
|
101
|
+
# print(f"Navigating to {bin_url}")
|
90
102
|
driver.get(bin_url)
|
91
103
|
WebDriverWait(driver, 10).until(
|
92
|
-
EC.presence_of_element_located(
|
104
|
+
EC.presence_of_element_located(
|
105
|
+
(By.CSS_SELECTOR, "div.page-content")
|
106
|
+
)
|
93
107
|
)
|
94
108
|
child_soup = BeautifulSoup(driver.page_source, "html.parser")
|
95
109
|
|
96
110
|
editor_div = child_soup.find("div", class_="editor")
|
97
111
|
if not editor_div:
|
98
|
-
print("No editor div found on bin detail page.")
|
112
|
+
# print("No editor div found on bin detail page.")
|
99
113
|
continue
|
100
114
|
|
101
115
|
ul = editor_div.find("ul")
|
102
116
|
if not ul:
|
103
|
-
print("No <ul> with dates found in editor div.")
|
117
|
+
# print("No <ul> with dates found in editor div.")
|
104
118
|
continue
|
105
119
|
|
106
120
|
for li in ul.find_all("li"):
|
107
121
|
raw_text = li.get_text(strip=True).replace(".", "")
|
108
122
|
|
109
|
-
if
|
110
|
-
|
123
|
+
if (
|
124
|
+
"no collection" in raw_text.lower()
|
125
|
+
or "no collections" in raw_text.lower()
|
126
|
+
):
|
127
|
+
# print(f"Ignoring non-collection note: {raw_text}")
|
111
128
|
continue
|
112
129
|
|
113
130
|
raw_date = raw_text
|
@@ -117,19 +134,20 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
117
134
|
except ValueError:
|
118
135
|
raw_date_cleaned = raw_date.split("(")[0].strip()
|
119
136
|
try:
|
120
|
-
parsed_date = datetime.strptime(
|
137
|
+
parsed_date = datetime.strptime(
|
138
|
+
raw_date_cleaned, "%d %B %Y"
|
139
|
+
)
|
121
140
|
except Exception:
|
122
141
|
print(f"Could not parse date: {raw_text}")
|
123
142
|
continue
|
124
143
|
|
125
144
|
formatted_date = parsed_date.strftime("%d/%m/%Y")
|
126
145
|
contains_date(formatted_date)
|
127
|
-
bin_data["bins"].append(
|
128
|
-
"type": bin_type,
|
129
|
-
|
130
|
-
})
|
146
|
+
bin_data["bins"].append(
|
147
|
+
{"type": bin_type, "collectionDate": formatted_date}
|
148
|
+
)
|
131
149
|
|
132
|
-
print(f"Type: {bin_type}, Date: {formatted_date}")
|
150
|
+
# print(f"Type: {bin_type}, Date: {formatted_date}")
|
133
151
|
|
134
152
|
except Exception as e:
|
135
153
|
print(f"An error occurred: {e}")
|
@@ -137,4 +155,4 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
137
155
|
finally:
|
138
156
|
if driver:
|
139
157
|
driver.quit()
|
140
|
-
return bin_data
|
158
|
+
return bin_data
|
@@ -38,7 +38,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
38
38
|
# Wait for the postcode field to appear then populate it
|
39
39
|
inputElement_postcode = WebDriverWait(driver, 30).until(
|
40
40
|
EC.presence_of_element_located(
|
41
|
-
(By.ID, "
|
41
|
+
(By.ID, "FINDYOURBINDAYS3WEEKLY_ADDRESSLOOKUPPOSTCODE")
|
42
42
|
)
|
43
43
|
)
|
44
44
|
inputElement_postcode.send_keys(user_postcode)
|
@@ -46,7 +46,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
46
46
|
# Click search button
|
47
47
|
findAddress = WebDriverWait(driver, 10).until(
|
48
48
|
EC.presence_of_element_located(
|
49
|
-
(By.ID, "
|
49
|
+
(By.ID, "FINDYOURBINDAYS3WEEKLY_ADDRESSLOOKUPSEARCH")
|
50
50
|
)
|
51
51
|
)
|
52
52
|
findAddress.click()
|
@@ -56,7 +56,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
56
56
|
(
|
57
57
|
By.XPATH,
|
58
58
|
""
|
59
|
-
"//*[@id='
|
59
|
+
"//*[@id='FINDYOURBINDAYS3WEEKLY_ADDRESSLOOKUPADDRESS']//option[contains(., '"
|
60
60
|
+ user_paon
|
61
61
|
+ "')]",
|
62
62
|
)
|
@@ -66,7 +66,10 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
66
66
|
# Wait for the submit button to appear, then click it to get the collection dates
|
67
67
|
WebDriverWait(driver, 30).until(
|
68
68
|
EC.presence_of_element_located(
|
69
|
-
(
|
69
|
+
(
|
70
|
+
By.XPATH,
|
71
|
+
'//*[@id="FINDYOURBINDAYS3WEEKLY_RUBBISHRECYCLEFOODDATE"]/div',
|
72
|
+
)
|
70
73
|
)
|
71
74
|
)
|
72
75
|
time.sleep(2)
|
@@ -74,10 +77,10 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
74
77
|
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
75
78
|
soup.prettify()
|
76
79
|
|
77
|
-
rubbish_div = soup.find(
|
78
|
-
|
80
|
+
rubbish_div = soup.find("div", {"class": "rubbish_collection_difs_black"})
|
81
|
+
rubbish_date = rubbish_div.find(
|
82
|
+
"div", {"class": "rubbish_date_container_left_datetext"}
|
79
83
|
)
|
80
|
-
rubbish_date = rubbish_div.find_all("div")[2]
|
81
84
|
if rubbish_date.text == "Today":
|
82
85
|
rubbish_date = datetime.now()
|
83
86
|
else:
|
@@ -86,10 +89,10 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
86
89
|
"%A %d %B",
|
87
90
|
).replace(year=datetime.now().year)
|
88
91
|
|
89
|
-
recycling_div = soup.find(
|
90
|
-
|
92
|
+
recycling_div = soup.find("div", {"class": "rubbish_collection_difs_green"})
|
93
|
+
recycling_date = recycling_div.find(
|
94
|
+
"div", {"class": "rubbish_date_container_left_datetext"}
|
91
95
|
)
|
92
|
-
recycling_date = recycling_div.find_all("div")[2]
|
93
96
|
if recycling_date.text == "Today":
|
94
97
|
recycling_date = datetime.now()
|
95
98
|
else:
|
@@ -98,10 +101,10 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
98
101
|
"%A %d %B",
|
99
102
|
).replace(year=datetime.now().year)
|
100
103
|
|
101
|
-
food_div = soup.find(
|
102
|
-
|
104
|
+
food_div = soup.find("div", {"class": "rubbish_collection_difs_purple"})
|
105
|
+
food_date = food_div.find(
|
106
|
+
"div", {"class": "rubbish_date_container_left_datetext"}
|
103
107
|
)
|
104
|
-
food_date = food_div.find_all("div")[2]
|
105
108
|
if food_date.text == "Today":
|
106
109
|
food_date = datetime.now()
|
107
110
|
else:
|