uk_bin_collection 0.148.3__py3-none-any.whl → 0.148.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/tests/input.json +315 -314
- uk_bin_collection/uk_bin_collection/councils/TorbayCouncil.py +222 -42
- {uk_bin_collection-0.148.3.dist-info → uk_bin_collection-0.148.5.dist-info}/METADATA +1 -1
- {uk_bin_collection-0.148.3.dist-info → uk_bin_collection-0.148.5.dist-info}/RECORD +7 -7
- {uk_bin_collection-0.148.3.dist-info → uk_bin_collection-0.148.5.dist-info}/LICENSE +0 -0
- {uk_bin_collection-0.148.3.dist-info → uk_bin_collection-0.148.5.dist-info}/WHEEL +0 -0
- {uk_bin_collection-0.148.3.dist-info → uk_bin_collection-0.148.5.dist-info}/entry_points.txt +0 -0
@@ -1,51 +1,231 @@
|
|
1
|
+
# This script pulls bin collection data from Barking and Dagenham Council
|
2
|
+
# Example URL: https://www.lbbd.gov.uk/rubbish-recycling/household-bin-collection/check-your-bin-collection-days
|
3
|
+
import time
|
4
|
+
|
5
|
+
from bs4 import BeautifulSoup
|
6
|
+
from dateutil.parser import parse
|
7
|
+
from selenium.common.exceptions import NoSuchElementException, TimeoutException
|
8
|
+
from selenium.webdriver.common.by import By
|
9
|
+
from selenium.webdriver.common.keys import Keys
|
10
|
+
from selenium.webdriver.support import expected_conditions as EC
|
11
|
+
from selenium.webdriver.support.ui import Select
|
12
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
13
|
+
|
1
14
|
from uk_bin_collection.uk_bin_collection.common import *
|
2
15
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
3
16
|
|
4
17
|
|
5
18
|
class CouncilClass(AbstractGetBinDataClass):
|
6
|
-
"""
|
7
|
-
Concrete classes have to implement all abstract operations of the
|
8
|
-
base class. They can also override some operations with a default
|
9
|
-
implementation.
|
10
|
-
"""
|
11
19
|
|
12
20
|
def parse_data(self, page: str, **kwargs) -> dict:
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
21
|
+
driver = None
|
22
|
+
try:
|
23
|
+
data = {"bins": []}
|
24
|
+
|
25
|
+
user_uprn = kwargs.get("uprn")
|
26
|
+
user_postcode = kwargs.get("postcode")
|
27
|
+
web_driver = kwargs.get("web_driver")
|
28
|
+
headless = kwargs.get("headless")
|
29
|
+
url = kwargs.get("url")
|
30
|
+
|
31
|
+
check_postcode(user_postcode)
|
32
|
+
|
33
|
+
print(
|
34
|
+
f"Starting parse_data with parameters: postcode={user_postcode}, uprn={user_uprn}"
|
35
|
+
)
|
36
|
+
print(
|
37
|
+
f"Creating webdriver with: web_driver={web_driver}, headless={headless}"
|
38
|
+
)
|
39
|
+
|
40
|
+
driver = create_webdriver(web_driver, headless, None, __name__)
|
41
|
+
print(f"Navigating to URL: {url}")
|
42
|
+
driver.get(url)
|
43
|
+
print("Successfully loaded the page")
|
44
|
+
|
45
|
+
driver.maximize_window()
|
46
|
+
|
47
|
+
WebDriverWait(driver, 10).until(
|
48
|
+
lambda d: d.execute_script("return document.readyState") == "complete"
|
49
|
+
)
|
50
|
+
|
51
|
+
# Handle cookie banner if present
|
52
|
+
wait = WebDriverWait(driver, 60)
|
53
|
+
try:
|
54
|
+
cookie_button = wait.until(
|
55
|
+
EC.element_to_be_clickable(
|
56
|
+
(
|
57
|
+
By.XPATH,
|
58
|
+
"/html/body/div[1]/div/div[2]/button[1]",
|
59
|
+
)
|
60
|
+
),
|
61
|
+
message="Cookie banner not found",
|
62
|
+
)
|
63
|
+
cookie_button.click()
|
64
|
+
print("Cookie banner clicked.")
|
65
|
+
time.sleep(1) # Brief pause to let banner disappear
|
66
|
+
except (TimeoutException, NoSuchElementException):
|
67
|
+
print("No cookie banner appeared or selector failed.")
|
68
|
+
|
69
|
+
bin_collection_button = wait.until(
|
70
|
+
EC.element_to_be_clickable(
|
71
|
+
(
|
72
|
+
By.XPATH,
|
73
|
+
"/html/body/main/div[4]/div/div[1]/div/div/div/div/div[2]/div/div/div/p/a",
|
74
|
+
)
|
75
|
+
),
|
76
|
+
)
|
77
|
+
bin_collection_button.click()
|
78
|
+
|
79
|
+
# Save the original window
|
80
|
+
original_window = driver.current_window_handle
|
81
|
+
|
82
|
+
# Wait for the new window or tab
|
83
|
+
WebDriverWait(driver, 10).until(EC.number_of_windows_to_be(2))
|
84
|
+
|
85
|
+
# Loop through until we find a new window handle
|
86
|
+
for window_handle in driver.window_handles:
|
87
|
+
if window_handle != original_window:
|
88
|
+
driver.switch_to.window(window_handle)
|
89
|
+
break
|
90
|
+
# Now you're in the new tab and can interact with the postcode input
|
91
|
+
# Enter postcode
|
92
|
+
print("Looking for postcode input...")
|
93
|
+
wait.until(EC.presence_of_element_located((By.ID, "FF1168-text")))
|
94
|
+
post_code_input = wait.until(
|
95
|
+
EC.element_to_be_clickable((By.ID, "FF1168-text")),
|
96
|
+
message="Postcode input not clickable",
|
97
|
+
)
|
98
|
+
post_code_input.clear()
|
99
|
+
post_code_input.send_keys(user_postcode)
|
100
|
+
print(f"Entered postcode: {user_postcode}")
|
101
|
+
|
102
|
+
post_code_input.send_keys(Keys.TAB + Keys.ENTER)
|
103
|
+
# driver.switch_to.active_element.send_keys(Keys.TAB + Keys.ENTER)
|
104
|
+
print("Pressed ENTER on Search button")
|
105
|
+
|
106
|
+
# Wait for the dropdown to be clickable
|
107
|
+
address_select = wait.until(
|
108
|
+
EC.element_to_be_clickable((By.ID, "FF1168-list")),
|
109
|
+
message="Address dropdown not found",
|
110
|
+
)
|
111
|
+
|
112
|
+
# Click to focus the dropdown
|
113
|
+
address_select.click()
|
114
|
+
time.sleep(0.5) # Brief pause to let the dropdown open
|
115
|
+
|
116
|
+
# Get all options
|
117
|
+
options = address_select.find_elements(By.TAG_NAME, "option")
|
118
|
+
print(f"Found {len(options)} options in dropdown")
|
119
|
+
|
120
|
+
# Print all options first for debugging
|
121
|
+
print("\nAvailable options:")
|
122
|
+
for opt in options:
|
123
|
+
value = opt.get_attribute("value")
|
124
|
+
text = opt.text
|
125
|
+
print(f"Value: '{value}', Text: '{text}'")
|
126
|
+
|
127
|
+
# Try to find our specific UPRN
|
128
|
+
target_uprn = f"U{user_uprn}|"
|
129
|
+
print(f"\nLooking for UPRN pattern: {target_uprn}")
|
130
|
+
|
131
|
+
found = False
|
132
|
+
for option in options:
|
133
|
+
value = option.get_attribute("value")
|
134
|
+
if value and target_uprn in value:
|
135
|
+
print(f"Found matching address with value: {value}")
|
136
|
+
option.click()
|
137
|
+
found = True
|
138
|
+
break
|
139
|
+
|
140
|
+
if not found:
|
141
|
+
print(f"No matching address found for UPRN: {user_uprn}")
|
142
|
+
return data
|
143
|
+
|
144
|
+
print("Address selected successfully")
|
145
|
+
time.sleep(1) # Give time for the selection to take effect
|
146
|
+
|
147
|
+
# Wait for the address selection confirmation to appear
|
148
|
+
print("Waiting for address selection confirmation...")
|
149
|
+
WebDriverWait(driver, 10).until(
|
150
|
+
EC.presence_of_element_located((By.CLASS_NAME, "esbAddressSelected"))
|
151
|
+
)
|
152
|
+
print("Address selection confirmed")
|
153
|
+
|
154
|
+
# Click the Submit button
|
155
|
+
print("Clicking Submit button...")
|
156
|
+
submit_button = WebDriverWait(driver, 10).until(
|
157
|
+
EC.element_to_be_clickable((By.ID, "submit-button"))
|
158
|
+
)
|
159
|
+
submit_button.click()
|
160
|
+
time.sleep(1) # Brief pause to let the navigation start
|
161
|
+
|
162
|
+
# Wait for the collection details to appear
|
163
|
+
print("Waiting for collection details to load...")
|
164
|
+
try:
|
165
|
+
schedule_list = WebDriverWait(driver, 20).until(
|
166
|
+
EC.presence_of_element_located((By.ID, "resiCollectionDetails"))
|
167
|
+
)
|
168
|
+
print("Collection details loaded successfully")
|
169
|
+
except TimeoutException:
|
170
|
+
print(
|
171
|
+
"Timeout waiting for collection details - checking if page needs refresh"
|
172
|
+
)
|
173
|
+
driver.refresh()
|
174
|
+
time.sleep(2)
|
175
|
+
schedule_list = WebDriverWait(driver, 20).until(
|
176
|
+
EC.presence_of_element_located((By.ID, "resiCollectionDetails"))
|
177
|
+
)
|
178
|
+
print("Collection details loaded after refresh")
|
179
|
+
|
180
|
+
# Make a BS4 object
|
181
|
+
print("Parsing page with BeautifulSoup...")
|
182
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
183
|
+
|
184
|
+
# Process collection details
|
185
|
+
print("Looking for collection details in the page...")
|
186
|
+
|
187
|
+
# Find all collection rows
|
188
|
+
collection_rows = soup.select("#resiCollectionDetails .row.fs-4")
|
189
|
+
print(f"\nProcessing {len(collection_rows)} collection rows...")
|
190
|
+
|
191
|
+
for row in collection_rows:
|
192
|
+
try:
|
193
|
+
# Get the collection service type (e.g., "Domestic Collection Service")
|
194
|
+
service_type = row.select_one("div.col:nth-child(3)").text.strip()
|
195
|
+
|
196
|
+
# Get the date from the second column
|
197
|
+
date_text = row.select_one("div[style*='width:360px']").text.strip()
|
198
|
+
|
199
|
+
# Parse the date
|
200
|
+
parsed_date = parse(date_text, fuzzy=True)
|
201
|
+
bin_date = parsed_date.strftime("%d/%m/%Y")
|
202
|
+
|
203
|
+
# Extract just the service type without " Collection Service"
|
204
|
+
bin_type = service_type.replace(" Collection Service", "")
|
205
|
+
|
206
|
+
# Add to data
|
207
|
+
if bin_type and bin_date:
|
208
|
+
dict_data = {
|
209
|
+
"type": bin_type,
|
210
|
+
"collectionDate": bin_date,
|
211
|
+
}
|
212
|
+
data["bins"].append(dict_data)
|
213
|
+
print(f"Successfully added collection: {dict_data}")
|
214
|
+
|
215
|
+
except Exception as e:
|
216
|
+
print(f"Error processing collection row: {e}")
|
217
|
+
continue
|
218
|
+
|
219
|
+
# Debug: Print the complete dict_data
|
220
|
+
print("\nFinal bin collection data:")
|
221
|
+
print(data)
|
222
|
+
|
223
|
+
except Exception as e:
|
224
|
+
print(f"An error occurred: {e}")
|
225
|
+
raise
|
226
|
+
finally:
|
227
|
+
print("Cleaning up webdriver...")
|
228
|
+
if driver:
|
229
|
+
driver.quit()
|
50
230
|
|
51
231
|
return data
|
@@ -7,7 +7,7 @@ uk_bin_collection/tests/council_feature_input_parity.py,sha256=DO6Mk4ImYgM5ZCZ-c
|
|
7
7
|
uk_bin_collection/tests/features/environment.py,sha256=VQZjJdJI_kZn08M0j5cUgvKT4k3iTw8icJge1DGOkoA,127
|
8
8
|
uk_bin_collection/tests/features/validate_council_outputs.feature,sha256=SJK-Vc737hrf03tssxxbeg_JIvAH-ddB8f6gU1LTbuQ,251
|
9
9
|
uk_bin_collection/tests/generate_map_test_results.py,sha256=CKnGK2ZgiSXomRGkomX90DitgMP-X7wkHhyKORDcL2E,1144
|
10
|
-
uk_bin_collection/tests/input.json,sha256=
|
10
|
+
uk_bin_collection/tests/input.json,sha256=Zjpi_QnZFlIojCGBJfBkQ8Tn9O2zbi48w_yKr0wbSXU,131194
|
11
11
|
uk_bin_collection/tests/output.schema,sha256=ZwKQBwYyTDEM4G2hJwfLUVM-5v1vKRvRK9W9SS1sd18,1086
|
12
12
|
uk_bin_collection/tests/step_defs/step_helpers/file_handler.py,sha256=Ygzi4V0S1MIHqbdstUlIqtRIwnynvhu4UtpweJ6-5N8,1474
|
13
13
|
uk_bin_collection/tests/step_defs/test_validate_council.py,sha256=VZ0a81sioJULD7syAYHjvK_-nT_Rd36tUyzPetSA0gk,3475
|
@@ -295,7 +295,7 @@ uk_bin_collection/uk_bin_collection/councils/ThanetDistrictCouncil.py,sha256=-op
|
|
295
295
|
uk_bin_collection/uk_bin_collection/councils/ThreeRiversDistrictCouncil.py,sha256=RHt3e9oeKzwxjjY-M8aC0nk-ZXhHIoyC81JzxkPVxsE,5531
|
296
296
|
uk_bin_collection/uk_bin_collection/councils/ThurrockCouncil.py,sha256=vAZMm6mcsdEcOkP15xwxWy9gdXpmLYQFH7qRifurNoY,2935
|
297
297
|
uk_bin_collection/uk_bin_collection/councils/TonbridgeAndMallingBC.py,sha256=UlgnHDoi8ecav2H5-HqKNDpqW1J3RN-c___5c08_Q7I,4859
|
298
|
-
uk_bin_collection/uk_bin_collection/councils/TorbayCouncil.py,sha256=
|
298
|
+
uk_bin_collection/uk_bin_collection/councils/TorbayCouncil.py,sha256=fpyr5f5x8JSsusdMaEH248HINPl6yQv14McD1-xLnqk,9269
|
299
299
|
uk_bin_collection/uk_bin_collection/councils/TorridgeDistrictCouncil.py,sha256=qyxzMFaQp4ymDsnvoySqmJonQ_MLo4jYBbKmREECur4,4777
|
300
300
|
uk_bin_collection/uk_bin_collection/councils/TunbridgeWellsCouncil.py,sha256=s8Nm9Ef-4561mEXPa0ylYHrXyYIulgCcNV2uAnrXyZk,2846
|
301
301
|
uk_bin_collection/uk_bin_collection/councils/UttlesfordDistrictCouncil.py,sha256=GSELWbSn5jtznv6FSLIMxK6CyQ27MW9FoY_m5jhTEBA,4175
|
@@ -337,8 +337,8 @@ uk_bin_collection/uk_bin_collection/councils/YorkCouncil.py,sha256=I2kBYMlsD4bId
|
|
337
337
|
uk_bin_collection/uk_bin_collection/councils/council_class_template/councilclasstemplate.py,sha256=QD4v4xpsEE0QheR_fGaNOIRMc2FatcUfKkkhAhseyVU,1159
|
338
338
|
uk_bin_collection/uk_bin_collection/create_new_council.py,sha256=m-IhmWmeWQlFsTZC4OxuFvtw5ZtB8EAJHxJTH4O59lQ,1536
|
339
339
|
uk_bin_collection/uk_bin_collection/get_bin_data.py,sha256=YvmHfZqanwrJ8ToGch34x-L-7yPe31nB_x77_Mgl_vo,4545
|
340
|
-
uk_bin_collection-0.148.
|
341
|
-
uk_bin_collection-0.148.
|
342
|
-
uk_bin_collection-0.148.
|
343
|
-
uk_bin_collection-0.148.
|
344
|
-
uk_bin_collection-0.148.
|
340
|
+
uk_bin_collection-0.148.5.dist-info/LICENSE,sha256=vABBUOzcrgfaTKpzeo-si9YVEun6juDkndqA8RKdKGs,1071
|
341
|
+
uk_bin_collection-0.148.5.dist-info/METADATA,sha256=QFI4MOygjxoWUqezFNR-eFxcJM105HMe1volyN_IQjk,20914
|
342
|
+
uk_bin_collection-0.148.5.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
343
|
+
uk_bin_collection-0.148.5.dist-info/entry_points.txt,sha256=36WCSGMWSc916S3Hi1ZkazzDKHaJ6CD-4fCEFm5MIao,90
|
344
|
+
uk_bin_collection-0.148.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|
{uk_bin_collection-0.148.3.dist-info → uk_bin_collection-0.148.5.dist-info}/entry_points.txt
RENAMED
File without changes
|