uk_bin_collection 0.152.2__py3-none-any.whl → 0.152.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/tests/input.json +6 -4
- uk_bin_collection/uk_bin_collection/councils/AdurAndWorthingCouncils.py +47 -30
- uk_bin_collection/uk_bin_collection/councils/BradfordMDC.py +18 -0
- uk_bin_collection/uk_bin_collection/councils/EastleighBoroughCouncil.py +7 -2
- uk_bin_collection/uk_bin_collection/councils/NorthHertfordshireDistrictCouncil.py +257 -63
- uk_bin_collection/uk_bin_collection/councils/SouthRibbleCouncil.py +42 -99
- {uk_bin_collection-0.152.2.dist-info → uk_bin_collection-0.152.4.dist-info}/METADATA +1 -1
- {uk_bin_collection-0.152.2.dist-info → uk_bin_collection-0.152.4.dist-info}/RECORD +11 -11
- {uk_bin_collection-0.152.2.dist-info → uk_bin_collection-0.152.4.dist-info}/LICENSE +0 -0
- {uk_bin_collection-0.152.2.dist-info → uk_bin_collection-0.152.4.dist-info}/WHEEL +0 -0
- {uk_bin_collection-0.152.2.dist-info → uk_bin_collection-0.152.4.dist-info}/entry_points.txt +0 -0
@@ -1693,9 +1693,10 @@
|
|
1693
1693
|
"LAD24CD": "E06000012"
|
1694
1694
|
},
|
1695
1695
|
"NorthHertfordshireDistrictCouncil": {
|
1696
|
-
"house_number": "
|
1696
|
+
"house_number": "22",
|
1697
1697
|
"postcode": "SG6 4BJ",
|
1698
1698
|
"url": "https://www.north-herts.gov.uk",
|
1699
|
+
"web_driver": "http://selenium:4444",
|
1699
1700
|
"wiki_name": "North Hertfordshire",
|
1700
1701
|
"wiki_note": "Pass the house number and postcode in their respective parameters.",
|
1701
1702
|
"LAD24CD": "E07000099"
|
@@ -2179,9 +2180,10 @@
|
|
2179
2180
|
"LAD24CD": "E07000179"
|
2180
2181
|
},
|
2181
2182
|
"SouthRibbleCouncil": {
|
2182
|
-
"uprn": "
|
2183
|
-
"
|
2184
|
-
"
|
2183
|
+
"uprn": "10013243496",
|
2184
|
+
"postcode": "PR26 7RZ",
|
2185
|
+
"url": "https://forms.chorleysouthribble.gov.uk/xfp/form/70",
|
2186
|
+
"wiki_command_url_override": "https://forms.chorleysouthribble.gov.uk/xfp/form/70",
|
2185
2187
|
"wiki_name": "South Ribble",
|
2186
2188
|
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find your UPRN.",
|
2187
2189
|
"LAD24CD": "E07000126"
|
@@ -37,38 +37,55 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
37
37
|
paragraphs = bin_row.find_all("p")
|
38
38
|
|
39
39
|
for p in paragraphs:
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
40
|
+
# Check for both singular and plural "Next collection(s):"
|
41
|
+
if p.get_text() and (
|
42
|
+
"Next collection:" in p.get_text()
|
43
|
+
or "Next collections:" in p.get_text()
|
44
|
+
):
|
45
|
+
# Extract collection dates
|
46
|
+
date_text = (
|
47
|
+
p.get_text()
|
48
|
+
.replace("Next collection:", "")
|
49
|
+
.replace("Next collections:", "")
|
50
|
+
.strip()
|
47
51
|
)
|
48
52
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
53
|
+
# Split multiple dates if comma-separated
|
54
|
+
date_strings = [date.strip() for date in date_text.split(",")]
|
55
|
+
|
56
|
+
for date_str in date_strings:
|
57
|
+
try:
|
58
|
+
# Extract day number from date string (e.g. "2" from "Tuesday 27th May")
|
59
|
+
day_number = int("".join(filter(str.isdigit, date_str)))
|
60
|
+
# Replace ordinal in date string with plain number
|
61
|
+
date_str = date_str.replace(
|
62
|
+
get_date_with_ordinal(day_number), str(day_number)
|
63
|
+
)
|
64
|
+
|
65
|
+
# Parse date with full format
|
66
|
+
bin_date = datetime.strptime(date_str, "%A %d %B")
|
67
|
+
|
68
|
+
# Add current year since it's not in the date string
|
69
|
+
current_year = datetime.now().year
|
70
|
+
bin_date = bin_date.replace(year=current_year)
|
71
|
+
|
72
|
+
# If the date is in the past, it's probably for next year
|
73
|
+
if bin_date < datetime.now():
|
74
|
+
bin_date = bin_date.replace(year=current_year + 1)
|
75
|
+
|
76
|
+
collections.append((bin_type, bin_date))
|
77
|
+
print(
|
78
|
+
f"Successfully parsed date for {bin_type}: {bin_date}"
|
79
|
+
)
|
80
|
+
|
81
|
+
except ValueError as e:
|
82
|
+
print(
|
83
|
+
f"Failed to parse date '{date_str}' for {bin_type}: {e}"
|
84
|
+
)
|
85
|
+
continue
|
86
|
+
|
87
|
+
# Found and processed the collection dates, so break the loop
|
88
|
+
break
|
72
89
|
|
73
90
|
except Exception as e:
|
74
91
|
print(f"Error processing bin row: {e}")
|
@@ -130,4 +130,22 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
130
130
|
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
|
131
131
|
)
|
132
132
|
|
133
|
+
data["bins"].sort(
|
134
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
|
135
|
+
)
|
136
|
+
|
137
|
+
# Deduplicate the bins based on type and collection date
|
138
|
+
# Feels a bit hacky, but fixes
|
139
|
+
# https://github.com/robbrad/UKBinCollectionData/issues/1436
|
140
|
+
unique_bins = []
|
141
|
+
seen = set()
|
142
|
+
for bin_item in data["bins"]:
|
143
|
+
# Create a unique identifier for each bin entry
|
144
|
+
bin_key = (bin_item["type"], bin_item["collectionDate"])
|
145
|
+
if bin_key not in seen:
|
146
|
+
seen.add(bin_key)
|
147
|
+
unique_bins.append(bin_item)
|
148
|
+
|
149
|
+
data["bins"] = unique_bins
|
150
|
+
|
133
151
|
return data
|
@@ -77,8 +77,13 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
77
77
|
return data
|
78
78
|
|
79
79
|
except Exception as e:
|
80
|
-
|
81
|
-
|
80
|
+
import traceback
|
81
|
+
|
82
|
+
error_message = f"Error fetching/parsing data for Eastleigh: {str(e)}\n{traceback.format_exc()}"
|
83
|
+
print(error_message)
|
84
|
+
# Use the correct date format for the error fallback
|
85
|
+
today = datetime.now().strftime("%d/%m/%Y")
|
86
|
+
return {"bins": [{"type": "Error", "collectionDate": today}]}
|
82
87
|
finally:
|
83
88
|
if "driver" in locals():
|
84
89
|
driver.quit()
|
@@ -1,93 +1,287 @@
|
|
1
|
-
|
1
|
+
# direct URL works, but includes a token, so I'm using Selenium
|
2
|
+
# https://waste.nc.north-herts.gov.uk/w/webpage/find-bin-collection-day-show-details?webpage_token=c7c7c3cbc2f0478735fc746ca985b8f4221dea31c24dde99e39fb1c556b07788&auth=YTc5YTAwZmUyMGQ3&id=1421457
|
3
|
+
|
4
|
+
import re
|
5
|
+
import time
|
6
|
+
from datetime import datetime
|
7
|
+
|
2
8
|
from bs4 import BeautifulSoup
|
9
|
+
from dateutil.parser import parse
|
10
|
+
from selenium.common.exceptions import NoSuchElementException, TimeoutException
|
11
|
+
from selenium.webdriver.common.by import By
|
12
|
+
from selenium.webdriver.common.keys import Keys
|
13
|
+
from selenium.webdriver.support import expected_conditions as EC
|
14
|
+
from selenium.webdriver.support.ui import Select
|
15
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
3
16
|
|
4
17
|
from uk_bin_collection.uk_bin_collection.common import *
|
5
18
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
6
19
|
|
7
20
|
|
8
|
-
# import the wonderful Beautiful Soup and the URL grabber
|
9
21
|
class CouncilClass(AbstractGetBinDataClass):
|
10
|
-
"""
|
11
|
-
Concrete classes have to implement all abstract operations of the
|
12
|
-
base class. They can also override some operations with a default
|
13
|
-
implementation.
|
14
|
-
"""
|
15
22
|
|
16
23
|
def parse_data(self, page: str, **kwargs) -> dict:
|
24
|
+
driver = None
|
25
|
+
try:
|
26
|
+
data = {"bins": []}
|
27
|
+
|
28
|
+
user_paon = kwargs.get("paon")
|
29
|
+
postcode = kwargs.get("postcode")
|
30
|
+
web_driver = kwargs.get("web_driver")
|
31
|
+
headless = kwargs.get("headless")
|
32
|
+
url = "https://waste.nc.north-herts.gov.uk/w/webpage/find-bin-collection-day-input-address"
|
33
|
+
|
34
|
+
driver = create_webdriver(web_driver, headless, None, __name__)
|
35
|
+
driver.get(url)
|
36
|
+
|
37
|
+
WebDriverWait(driver, 10).until(
|
38
|
+
lambda d: d.execute_script("return document.readyState") == "complete"
|
39
|
+
)
|
40
|
+
|
41
|
+
# Define the wait variable
|
42
|
+
wait = WebDriverWait(
|
43
|
+
driver, 20
|
44
|
+
) # Create the wait object with a 20-second timeout
|
45
|
+
|
46
|
+
# Enter postcode - try different approaches for reliability
|
47
|
+
# print("Looking for postcode input...")
|
48
|
+
|
49
|
+
postcode_input = wait.until(
|
50
|
+
EC.element_to_be_clickable(
|
51
|
+
(
|
52
|
+
By.CSS_SELECTOR,
|
53
|
+
"input.relation_path_type_ahead_search.form-control",
|
54
|
+
)
|
55
|
+
),
|
56
|
+
message="Postcode input not found by class",
|
57
|
+
)
|
58
|
+
postcode_input.clear()
|
59
|
+
postcode_input.send_keys(postcode)
|
60
|
+
# print(f"Entered postcode: {postcode}")
|
61
|
+
|
62
|
+
# Wait for the dropdown to load
|
63
|
+
# print("Waiting for address list to populate...")
|
64
|
+
try:
|
65
|
+
# Wait for the results to appear
|
66
|
+
wait.until(
|
67
|
+
EC.presence_of_element_located(
|
68
|
+
(By.CSS_SELECTOR, ".relation_path_type_ahead_results_holder")
|
69
|
+
),
|
70
|
+
message="Address results container not found",
|
71
|
+
)
|
72
|
+
|
73
|
+
# Wait for list items to appear
|
74
|
+
wait.until(
|
75
|
+
EC.presence_of_all_elements_located(
|
76
|
+
(By.CSS_SELECTOR, ".relation_path_type_ahead_results_holder li")
|
77
|
+
),
|
78
|
+
message="No address items found in the list",
|
79
|
+
)
|
80
|
+
# print("Address list populated successfully")
|
81
|
+
|
82
|
+
# Search for user_paon in the address list using aria-label attribute
|
83
|
+
try:
|
84
|
+
# Use XPath to look for aria-label containing user_paon
|
85
|
+
address_xpath = (
|
86
|
+
f"//li[@aria-label and contains(@aria-label, '{user_paon}')]"
|
87
|
+
)
|
88
|
+
matching_address = wait.until(
|
89
|
+
EC.element_to_be_clickable((By.XPATH, address_xpath)),
|
90
|
+
message=f"No address containing '{user_paon}' found in aria-label attributes",
|
91
|
+
)
|
92
|
+
# print(f"Found matching address: {matching_address.get_attribute('aria-label')}")
|
93
|
+
matching_address.click()
|
94
|
+
# print("Clicked on matching address")
|
95
|
+
|
96
|
+
# Allow time for the selection to take effect
|
97
|
+
time.sleep(2)
|
98
|
+
|
99
|
+
# Find and click the "Select address and continue" button
|
100
|
+
continue_button = wait.until(
|
101
|
+
EC.element_to_be_clickable(
|
102
|
+
(
|
103
|
+
By.CSS_SELECTOR,
|
104
|
+
"input.btn.bg-green[value='Select address and continue']",
|
105
|
+
)
|
106
|
+
),
|
107
|
+
message="Could not find 'Select address and continue' button",
|
108
|
+
)
|
109
|
+
# print("Found 'Select address and continue' button, clicking it...")
|
110
|
+
continue_button.click()
|
111
|
+
# print("Clicked on 'Select address and continue' button")
|
112
|
+
|
113
|
+
# Allow time for the page to load after clicking the button
|
114
|
+
time.sleep(3)
|
115
|
+
except TimeoutException as e:
|
116
|
+
# print(f"Error finding address: {e}")
|
117
|
+
raise
|
118
|
+
except TimeoutException as e:
|
119
|
+
# print(f"Error loading address list: {e}")
|
120
|
+
raise
|
121
|
+
|
122
|
+
# After pressing Next button and waiting for page to load
|
123
|
+
# print("Looking for schedule list...")
|
124
|
+
|
125
|
+
# Wait for the page to load - giving it extra time
|
126
|
+
time.sleep(5)
|
127
|
+
|
128
|
+
# Use only the selector that we know works
|
129
|
+
# print("Looking for bin type elements...")
|
130
|
+
try:
|
131
|
+
bin_type_selector = (
|
132
|
+
By.CSS_SELECTOR,
|
133
|
+
"div.formatting_bold.formatting_size_bigger.formatting span.value-as-text",
|
134
|
+
)
|
135
|
+
WebDriverWait(driver, 15).until(
|
136
|
+
EC.presence_of_element_located(bin_type_selector)
|
137
|
+
)
|
138
|
+
# print(f"Found bin type elements with selector: {bin_type_selector}")
|
139
|
+
except TimeoutException:
|
140
|
+
# print("Could not find bin type elements. Taking screenshot for debugging...")
|
141
|
+
screenshot_path = f"bin_type_error_{int(time.time())}.png"
|
142
|
+
driver.save_screenshot(screenshot_path)
|
143
|
+
# print(f"Screenshot saved to {screenshot_path}")
|
144
|
+
|
145
|
+
# Create BS4 object from driver's page source
|
146
|
+
# print("Parsing page with BeautifulSoup...")
|
147
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
148
|
+
|
149
|
+
# Initialize data dictionary
|
150
|
+
data = {"bins": []}
|
151
|
+
|
152
|
+
# Looking for bin types in the exact HTML structure
|
153
|
+
bin_type_elements = soup.select(
|
154
|
+
"div.formatting_bold.formatting_size_bigger.formatting span.value-as-text"
|
155
|
+
)
|
156
|
+
# print(f"Found {len(bin_type_elements)} bin type elements")
|
157
|
+
|
158
|
+
# Look specifically for date elements with the exact structure
|
159
|
+
date_elements = soup.select("div.col-sm-12.font-xs-3xl span.value-as-text")
|
160
|
+
hidden_dates = soup.select(
|
161
|
+
"div.col-sm-12.font-xs-3xl input[type='hidden'][value*='/']"
|
162
|
+
)
|
163
|
+
|
164
|
+
# print(f"Found {len(bin_type_elements)} bin types and {len(date_elements)} date elements")
|
165
|
+
|
166
|
+
# We need a smarter way to match bin types with their dates
|
167
|
+
bin_count = 0
|
168
|
+
|
169
|
+
# Map of bin types to their collection dates
|
170
|
+
bin_date_map = {}
|
171
|
+
|
172
|
+
# Extract all date strings that look like actual dates
|
173
|
+
date_texts = []
|
174
|
+
date_pattern = re.compile(
|
175
|
+
r"(?:Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)\s+\d+(?:st|nd|rd|th)?\s+\w+\s+\d{4}",
|
176
|
+
re.IGNORECASE,
|
177
|
+
)
|
17
178
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
179
|
+
for element in date_elements:
|
180
|
+
text = element.get_text(strip=True)
|
181
|
+
if date_pattern.search(text):
|
182
|
+
date_texts.append(text)
|
183
|
+
# print(f"Found valid date text: {text}")
|
23
184
|
|
24
|
-
|
185
|
+
# Find hidden date inputs with values in DD/MM/YYYY format
|
186
|
+
hidden_date_values = []
|
187
|
+
for hidden in hidden_dates:
|
188
|
+
value = hidden.get("value", "")
|
189
|
+
if re.match(r"\d{1,2}/\d{1,2}/\d{4}", value):
|
190
|
+
hidden_date_values.append(value)
|
191
|
+
# print(f"Found hidden date value: {value}")
|
25
192
|
|
26
|
-
|
193
|
+
# When filtering date elements
|
194
|
+
date_elements = soup.select("div.col-sm-12.font-xs-3xl span.value-as-text")
|
195
|
+
valid_date_elements = []
|
27
196
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
197
|
+
for element in date_elements:
|
198
|
+
text = element.get_text(strip=True)
|
199
|
+
if contains_date(text):
|
200
|
+
valid_date_elements.append(element)
|
201
|
+
# print(f"Found valid date element: {text}")
|
202
|
+
else:
|
203
|
+
pass
|
204
|
+
# print(f"Skipping non-date element: {text}")
|
34
205
|
|
35
|
-
|
206
|
+
# print(f"Found {len(bin_type_elements)} bin types and {len(valid_date_elements)} valid date elements")
|
36
207
|
|
37
|
-
|
38
|
-
|
208
|
+
# When processing each bin type
|
209
|
+
for i, bin_type_elem in enumerate(bin_type_elements):
|
210
|
+
bin_type = bin_type_elem.get_text(strip=True)
|
39
211
|
|
40
|
-
|
41
|
-
|
212
|
+
# Try to find a date for this bin type
|
213
|
+
date_text = None
|
42
214
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
215
|
+
# Look for a valid date element
|
216
|
+
if i < len(valid_date_elements):
|
217
|
+
date_elem = valid_date_elements[i]
|
218
|
+
date_text = date_elem.get_text(strip=True)
|
47
219
|
|
48
|
-
|
49
|
-
|
50
|
-
|
220
|
+
# If we don't have a valid date yet, try using the hidden input
|
221
|
+
if not date_text or not contains_date(date_text):
|
222
|
+
if i < len(hidden_dates):
|
223
|
+
date_value = hidden_dates[i].get("value")
|
224
|
+
if contains_date(date_value):
|
225
|
+
date_text = date_value
|
51
226
|
|
52
|
-
|
227
|
+
# Skip if we don't have a valid date
|
228
|
+
if not date_text or not contains_date(date_text):
|
229
|
+
# print(f"No valid date found for bin type: {bin_type}")
|
230
|
+
continue
|
53
231
|
|
54
|
-
|
55
|
-
alink = soup.find("div", id="property_list").find("a")
|
232
|
+
# print(f"Found bin type: {bin_type} with date: {date_text}")
|
56
233
|
|
57
|
-
|
58
|
-
|
234
|
+
try:
|
235
|
+
# Clean up the date text
|
236
|
+
date_text = remove_ordinal_indicator_from_date_string(date_text)
|
59
237
|
|
60
|
-
|
238
|
+
# Try to parse the date
|
239
|
+
try:
|
240
|
+
collection_date = datetime.strptime(
|
241
|
+
date_text, "%A %d %B %Y"
|
242
|
+
).date()
|
243
|
+
except ValueError:
|
244
|
+
try:
|
245
|
+
collection_date = datetime.strptime(
|
246
|
+
date_text, "%d/%m/%Y"
|
247
|
+
).date()
|
248
|
+
except ValueError:
|
249
|
+
# Last resort
|
250
|
+
collection_date = parse(date_text).date()
|
61
251
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
soup = BeautifulSoup(r.text, features="html.parser")
|
252
|
+
# Create bin entry
|
253
|
+
bin_entry = {
|
254
|
+
"type": bin_type,
|
255
|
+
"collectionDate": collection_date.strftime(date_format),
|
256
|
+
}
|
68
257
|
|
69
|
-
|
70
|
-
|
258
|
+
# Add to data
|
259
|
+
data["bins"].append(bin_entry)
|
260
|
+
bin_count += 1
|
261
|
+
# print(f"Added bin entry: {bin_entry}")
|
71
262
|
|
72
|
-
|
263
|
+
except Exception as e:
|
264
|
+
pass
|
265
|
+
# print(f"Error parsing date '{date_text}': {str(e)}")
|
73
266
|
|
74
|
-
|
75
|
-
lis = u1.find_all("li", recursive=False)
|
267
|
+
# print(f"Successfully parsed {bin_count} bin collections")
|
76
268
|
|
77
|
-
|
78
|
-
|
269
|
+
if not data["bins"]:
|
270
|
+
# print("No bin data found. Saving page for debugging...")
|
271
|
+
with open(f"debug_page_{int(time.time())}.html", "w") as f:
|
272
|
+
f.write(driver.page_source)
|
273
|
+
driver.save_screenshot(f"final_error_screenshot_{int(time.time())}.png")
|
274
|
+
raise ValueError(
|
275
|
+
"No bin collection data could be extracted from the page"
|
276
|
+
)
|
79
277
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
"%d/%m/%Y",
|
85
|
-
).strftime(date_format),
|
86
|
-
}
|
87
|
-
bindata["bins"].append(dict_data)
|
278
|
+
# Sort the bin collections by date
|
279
|
+
data["bins"].sort(
|
280
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
|
281
|
+
)
|
88
282
|
|
89
|
-
|
90
|
-
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
|
91
|
-
)
|
283
|
+
return data
|
92
284
|
|
93
|
-
|
285
|
+
except Exception as e:
|
286
|
+
# print(f"Error parsing bin collection data: {e}")
|
287
|
+
raise
|
@@ -1,29 +1,11 @@
|
|
1
|
-
from typing import Dict, List, Any, Optional
|
2
|
-
from bs4 import BeautifulSoup
|
3
|
-
from dateutil.relativedelta import relativedelta
|
4
1
|
import requests
|
5
|
-
import
|
6
|
-
import re
|
7
|
-
from datetime import datetime
|
8
|
-
from uk_bin_collection.uk_bin_collection.common import *
|
9
|
-
from dateutil.parser import parse
|
2
|
+
from bs4 import BeautifulSoup
|
10
3
|
|
11
|
-
from uk_bin_collection.uk_bin_collection.common import
|
4
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
12
5
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
13
6
|
|
14
7
|
|
15
|
-
|
16
|
-
"""
|
17
|
-
Get a __token to include in the form data
|
18
|
-
:param page: Page html
|
19
|
-
:return: Form __token
|
20
|
-
"""
|
21
|
-
soup = BeautifulSoup(page.text, features="html.parser")
|
22
|
-
soup.prettify()
|
23
|
-
token = soup.find("input", {"name": "__token"}).get("value")
|
24
|
-
return token
|
25
|
-
|
26
|
-
|
8
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
27
9
|
class CouncilClass(AbstractGetBinDataClass):
|
28
10
|
"""
|
29
11
|
Concrete classes have to implement all abstract operations of the
|
@@ -31,69 +13,38 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
31
13
|
implementation.
|
32
14
|
"""
|
33
15
|
|
34
|
-
def
|
35
|
-
"""This method makes the request to the council
|
36
|
-
|
37
|
-
Keyword arguments:
|
38
|
-
url -- the url to get the data from
|
39
|
-
"""
|
40
|
-
# Set a user agent so we look like a browser ;-)
|
41
|
-
user_agent = (
|
42
|
-
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
|
43
|
-
"Chrome/108.0.0.0 Safari/537.36"
|
44
|
-
)
|
45
|
-
headers = {"User-Agent": user_agent}
|
46
|
-
requests.packages.urllib3.disable_warnings()
|
16
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
47
17
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
return full_page
|
54
|
-
except requests.exceptions.HTTPError as errh:
|
55
|
-
logging.error(f"Http Error: {errh}")
|
56
|
-
raise
|
57
|
-
except requests.exceptions.ConnectionError as errc:
|
58
|
-
logging.error(f"Error Connecting: {errc}")
|
59
|
-
raise
|
60
|
-
except requests.exceptions.Timeout as errt:
|
61
|
-
logging.error(f"Timeout Error: {errt}")
|
62
|
-
raise
|
63
|
-
except requests.exceptions.RequestException as err:
|
64
|
-
logging.error(f"Oops: Something Else {err}")
|
65
|
-
raise
|
18
|
+
user_uprn = kwargs.get("uprn")
|
19
|
+
user_postcode = kwargs.get("postcode")
|
20
|
+
check_uprn(user_uprn)
|
21
|
+
check_postcode(user_postcode)
|
22
|
+
bindata = {"bins": []}
|
66
23
|
|
67
|
-
|
68
|
-
|
69
|
-
postcode: Optional[str] = kwargs.get("postcode")
|
24
|
+
session_uri = "https://forms.chorleysouthribble.gov.uk/xfp/form/70"
|
25
|
+
URI = "https://forms.chorleysouthribble.gov.uk/xfp/form/70#qc576c657112a8277ba6f954ebc0490c946168363_0"
|
70
26
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
27
|
+
session = requests.Session()
|
28
|
+
token_response = session.get(session_uri)
|
29
|
+
soup = BeautifulSoup(token_response.text, "html.parser")
|
30
|
+
token = soup.find("input", {"name": "__token"}).attrs["value"]
|
75
31
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
values = {
|
80
|
-
"__token": get_token(page),
|
81
|
-
"page": "491",
|
32
|
+
form_data = {
|
33
|
+
"__token": token,
|
34
|
+
"page": "196",
|
82
35
|
"locale": "en_GB",
|
83
|
-
"
|
84
|
-
"
|
36
|
+
"qc576c657112a8277ba6f954ebc0490c946168363_0_0": user_postcode,
|
37
|
+
"qc576c657112a8277ba6f954ebc0490c946168363_1_0": user_uprn,
|
85
38
|
"next": "Next",
|
86
39
|
}
|
87
|
-
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64)"}
|
88
|
-
requests.packages.urllib3.disable_warnings()
|
89
|
-
response = requests.request(
|
90
|
-
"POST",
|
91
|
-
"https://forms.chorleysouthribble.gov.uk/xfp/form/70",
|
92
|
-
headers=headers,
|
93
|
-
data=values,
|
94
|
-
)
|
95
40
|
|
96
|
-
|
41
|
+
collection_response = session.post(URI, data=form_data)
|
42
|
+
|
43
|
+
#collection_soup = BeautifulSoup(collection_response.text, "html.parser")
|
44
|
+
|
45
|
+
|
46
|
+
soup = BeautifulSoup(collection_response.text, "html.parser")
|
47
|
+
#print(soup)
|
97
48
|
|
98
49
|
rows = soup.find("table").find_all("tr")
|
99
50
|
|
@@ -103,31 +54,23 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
103
54
|
# Loops the Rows
|
104
55
|
for row in rows:
|
105
56
|
cells = row.find_all("td")
|
57
|
+
|
106
58
|
if cells:
|
107
59
|
bin_type = cells[0].get_text(strip=True)
|
108
60
|
collection_next = cells[1].get_text(strip=True)
|
109
61
|
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
# Make each Bin element in the JSON
|
125
|
-
dict_data = {
|
126
|
-
"type": bin_type,
|
127
|
-
"collectionDate": collection_date_obj.strftime(date_format),
|
128
|
-
}
|
129
|
-
|
130
|
-
# Add data to the main JSON Wrapper
|
131
|
-
data["bins"].append(dict_data)
|
132
|
-
|
62
|
+
if len(collection_next) != 1:
|
63
|
+
collection_date_obj = datetime.strptime(collection_next, "%d/%m/%y").date()
|
64
|
+
# since we only have the next collection day, if the parsed date is in the past,
|
65
|
+
# assume the day is instead next month
|
66
|
+
if collection_date_obj < datetime.now().date():
|
67
|
+
collection_date_obj += relativedelta(months=1)
|
68
|
+
# Make each Bin element in the JSON
|
69
|
+
dict_data = {
|
70
|
+
"type": bin_type,
|
71
|
+
"collectionDate": collection_date_obj.strftime("%d/%m/%Y"),
|
72
|
+
}
|
73
|
+
# Add data to the main JSON Wrapper
|
74
|
+
data["bins"].append(dict_data)
|
75
|
+
continue
|
133
76
|
return data
|
@@ -7,7 +7,7 @@ uk_bin_collection/tests/council_feature_input_parity.py,sha256=DO6Mk4ImYgM5ZCZ-c
|
|
7
7
|
uk_bin_collection/tests/features/environment.py,sha256=VQZjJdJI_kZn08M0j5cUgvKT4k3iTw8icJge1DGOkoA,127
|
8
8
|
uk_bin_collection/tests/features/validate_council_outputs.feature,sha256=SJK-Vc737hrf03tssxxbeg_JIvAH-ddB8f6gU1LTbuQ,251
|
9
9
|
uk_bin_collection/tests/generate_map_test_results.py,sha256=CKnGK2ZgiSXomRGkomX90DitgMP-X7wkHhyKORDcL2E,1144
|
10
|
-
uk_bin_collection/tests/input.json,sha256=
|
10
|
+
uk_bin_collection/tests/input.json,sha256=hy8tlgmuKQWiq3PxNE16kaYHk6XxesuFkBvtpk-9oIw,132590
|
11
11
|
uk_bin_collection/tests/output.schema,sha256=ZwKQBwYyTDEM4G2hJwfLUVM-5v1vKRvRK9W9SS1sd18,1086
|
12
12
|
uk_bin_collection/tests/step_defs/step_helpers/file_handler.py,sha256=Ygzi4V0S1MIHqbdstUlIqtRIwnynvhu4UtpweJ6-5N8,1474
|
13
13
|
uk_bin_collection/tests/step_defs/test_validate_council.py,sha256=VZ0a81sioJULD7syAYHjvK_-nT_Rd36tUyzPetSA0gk,3475
|
@@ -19,7 +19,7 @@ uk_bin_collection/uk_bin_collection/collect_data.py,sha256=dB7wWXsJX4fm5bIf84lex
|
|
19
19
|
uk_bin_collection/uk_bin_collection/common.py,sha256=izotgwavB08pUWisNL3wqcBrE9E1-bdrq-v6YKyriDE,11034
|
20
20
|
uk_bin_collection/uk_bin_collection/councils/AberdeenCityCouncil.py,sha256=Je8VwVLK9KnYl9vqf2gWJ7ZYDgUq3A7caDiIzk5Xof8,4194
|
21
21
|
uk_bin_collection/uk_bin_collection/councils/AberdeenshireCouncil.py,sha256=aO1CSdyqa8oAD0fB79y1Q9bikAWCP_JFa7CsyTa2j9s,1655
|
22
|
-
uk_bin_collection/uk_bin_collection/councils/AdurAndWorthingCouncils.py,sha256=
|
22
|
+
uk_bin_collection/uk_bin_collection/councils/AdurAndWorthingCouncils.py,sha256=N0BcdTxRlCiCh6SPluPK3wMlNDli8_wJgUOOwe4hgSE,4250
|
23
23
|
uk_bin_collection/uk_bin_collection/councils/AmberValleyBoroughCouncil.py,sha256=mTeluIIEcuxLxhfDQ95A1fp8RM6AkJT5tRGZPUbYGdk,1853
|
24
24
|
uk_bin_collection/uk_bin_collection/councils/AngusCouncil.py,sha256=YlhAnxkRAAvrwbUvleNKUuLROcwMTps2eMHElpuctm4,5894
|
25
25
|
uk_bin_collection/uk_bin_collection/councils/AntrimAndNewtonabbeyCouncil.py,sha256=Hp5pteaC5RjL5ZqPZ564S9WQ6ZTKLMO6Dl_fxip2TUc,1653
|
@@ -50,7 +50,7 @@ uk_bin_collection/uk_bin_collection/councils/BolsoverCouncil.py,sha256=_NZuSvSbl
|
|
50
50
|
uk_bin_collection/uk_bin_collection/councils/BoltonCouncil.py,sha256=WI68r8jB0IHPUT4CgmZMtng899AAMFTxkyTdPg9yLF8,4117
|
51
51
|
uk_bin_collection/uk_bin_collection/councils/BostonBoroughCouncil.py,sha256=8xv6FMNj8Qgwn5K0nMdB5X8hkcNFzhcJ48DMordflJY,5631
|
52
52
|
uk_bin_collection/uk_bin_collection/councils/BracknellForestCouncil.py,sha256=Llo1rULaAZ8rChVYZqXFFLo7CN6vbT0ULUJD6ActouY,9015
|
53
|
-
uk_bin_collection/uk_bin_collection/councils/BradfordMDC.py,sha256=
|
53
|
+
uk_bin_collection/uk_bin_collection/councils/BradfordMDC.py,sha256=qtCGHIwKDJQw0SNvQr0EZub21PFPDsdcxABOPv_MC6s,6114
|
54
54
|
uk_bin_collection/uk_bin_collection/councils/BraintreeDistrictCouncil.py,sha256=2vYHilpI8mSwC2Ykdr1gxYAN3excDWqF6AwtGbkwbTw,2441
|
55
55
|
uk_bin_collection/uk_bin_collection/councils/BrecklandCouncil.py,sha256=PX6A_pDvaN109aSNWmEhm88GFKfkClIkmbwGURWvsks,1744
|
56
56
|
uk_bin_collection/uk_bin_collection/councils/BrentCouncil.py,sha256=BsP7V0vezteX0WAxcxqMf3g6ro-J78W6hubefALRMyg,5222
|
@@ -114,7 +114,7 @@ uk_bin_collection/uk_bin_collection/councils/EastRidingCouncil.py,sha256=oL-Nqri
|
|
114
114
|
uk_bin_collection/uk_bin_collection/councils/EastStaffordshireBoroughCouncil.py,sha256=s13zlAN9Rac-RVHNFLIjIY0X8C6sPTNS37EL2t6vXw8,3692
|
115
115
|
uk_bin_collection/uk_bin_collection/councils/EastSuffolkCouncil.py,sha256=qQ0oOfGd0sWcczse_B22YoeL9uj3og8v3UJLt_Sx29c,4353
|
116
116
|
uk_bin_collection/uk_bin_collection/councils/EastbourneBoroughCouncil.py,sha256=t2-Ri58feN4BHZ-yZx83QjmWuxlCkF7iu9UvXJ2rVp8,2669
|
117
|
-
uk_bin_collection/uk_bin_collection/councils/EastleighBoroughCouncil.py,sha256=
|
117
|
+
uk_bin_collection/uk_bin_collection/councils/EastleighBoroughCouncil.py,sha256=ymHYdRVlTNNYIhZigvnwsEZUpJIecjxV0HrZm7lEdpY,3397
|
118
118
|
uk_bin_collection/uk_bin_collection/councils/EdinburghCityCouncil.py,sha256=YRjNgevnCxfaAIU8BV9dkqG17NiT6S-hp7l-1rdLVgQ,3150
|
119
119
|
uk_bin_collection/uk_bin_collection/councils/ElmbridgeBoroughCouncil.py,sha256=TgBOaReHWBbm0avV7HqRf0x7cxDe9cacTUcP9TFFprs,3005
|
120
120
|
uk_bin_collection/uk_bin_collection/councils/EnfieldCouncil.py,sha256=2yR5p-kdApOm1gHiynNECP0jQDvaYHOiT6MAQJAvunE,6144
|
@@ -213,7 +213,7 @@ uk_bin_collection/uk_bin_collection/councils/NorthAyrshireCouncil.py,sha256=o8zv
|
|
213
213
|
uk_bin_collection/uk_bin_collection/councils/NorthDevonCountyCouncil.py,sha256=tgJKIvu7nnCAHu_HImfG5SQABD6ygKFqrZU-ZoC6ObY,6260
|
214
214
|
uk_bin_collection/uk_bin_collection/councils/NorthEastDerbyshireDistrictCouncil.py,sha256=BfNpYcjG3z0Yz8OYN6NkfzvZ5k1FI-80D-rv211kPPU,5449
|
215
215
|
uk_bin_collection/uk_bin_collection/councils/NorthEastLincs.py,sha256=fYf438VZIaOaqPSwdTTWVjFTdrI0jGfFsxVzOc-QdkA,1817
|
216
|
-
uk_bin_collection/uk_bin_collection/councils/NorthHertfordshireDistrictCouncil.py,sha256=
|
216
|
+
uk_bin_collection/uk_bin_collection/councils/NorthHertfordshireDistrictCouncil.py,sha256=Zps52bZroibL1h0h7WKD1aKJzQD7-OLwd9tHXDQkDFg,12263
|
217
217
|
uk_bin_collection/uk_bin_collection/councils/NorthKestevenDistrictCouncil.py,sha256=vYOCerJXr9LTP6F2wm4vpYNYbQaWNZ6yfHEQ33N_hTw,1681
|
218
218
|
uk_bin_collection/uk_bin_collection/councils/NorthLanarkshireCouncil.py,sha256=npK1V8D3SLNTSSKkfEpEPvVgXDFyhH_tAsuGogsVKQY,1763
|
219
219
|
uk_bin_collection/uk_bin_collection/councils/NorthLincolnshireCouncil.py,sha256=MPzrfdo9YQFVlqBUOM-jDQkacz2DXnygLILQ_ojZeJo,2543
|
@@ -271,7 +271,7 @@ uk_bin_collection/uk_bin_collection/councils/SouthKestevenDistrictCouncil.py,sha
|
|
271
271
|
uk_bin_collection/uk_bin_collection/councils/SouthLanarkshireCouncil.py,sha256=fj-eZI0yrvQVCv8GvhcovZ3b9bV6Xv_ws3IunWjnv4U,3126
|
272
272
|
uk_bin_collection/uk_bin_collection/councils/SouthNorfolkCouncil.py,sha256=C2qIZjjbl9JnuukX9OH2RbfP0hSdp3uX76APGY33qKs,4622
|
273
273
|
uk_bin_collection/uk_bin_collection/councils/SouthOxfordshireCouncil.py,sha256=f9d2YDGv5hnN7Ul-u_I63h_BbpBU7CJFdgv-lOviRGc,4031
|
274
|
-
uk_bin_collection/uk_bin_collection/councils/SouthRibbleCouncil.py,sha256=
|
274
|
+
uk_bin_collection/uk_bin_collection/councils/SouthRibbleCouncil.py,sha256=5HSTmrPc-gN3ZzLq6n2VDs_NhtCgXhnGqJkEejDmSHI,2900
|
275
275
|
uk_bin_collection/uk_bin_collection/councils/SouthStaffordshireDistrictCouncil.py,sha256=ACQMHWyamnj1ag3gNF-8Jhp-DKUok1GhFdnzH4nCzwU,3201
|
276
276
|
uk_bin_collection/uk_bin_collection/councils/SouthTynesideCouncil.py,sha256=dxXGrJfg_fn2IPTBgq6Duwy0WY8GYLafMuisaCjOnbs,3426
|
277
277
|
uk_bin_collection/uk_bin_collection/councils/SouthamptonCityCouncil.py,sha256=exNoI-Vun_C5FowCYhZ_600MBUe_OPR7MdGZEMNLL0I,1542
|
@@ -346,8 +346,8 @@ uk_bin_collection/uk_bin_collection/councils/YorkCouncil.py,sha256=I2kBYMlsD4bId
|
|
346
346
|
uk_bin_collection/uk_bin_collection/councils/council_class_template/councilclasstemplate.py,sha256=QD4v4xpsEE0QheR_fGaNOIRMc2FatcUfKkkhAhseyVU,1159
|
347
347
|
uk_bin_collection/uk_bin_collection/create_new_council.py,sha256=m-IhmWmeWQlFsTZC4OxuFvtw5ZtB8EAJHxJTH4O59lQ,1536
|
348
348
|
uk_bin_collection/uk_bin_collection/get_bin_data.py,sha256=YvmHfZqanwrJ8ToGch34x-L-7yPe31nB_x77_Mgl_vo,4545
|
349
|
-
uk_bin_collection-0.152.
|
350
|
-
uk_bin_collection-0.152.
|
351
|
-
uk_bin_collection-0.152.
|
352
|
-
uk_bin_collection-0.152.
|
353
|
-
uk_bin_collection-0.152.
|
349
|
+
uk_bin_collection-0.152.4.dist-info/LICENSE,sha256=vABBUOzcrgfaTKpzeo-si9YVEun6juDkndqA8RKdKGs,1071
|
350
|
+
uk_bin_collection-0.152.4.dist-info/METADATA,sha256=wK9bVaoTAyW9e1hieARhWnIUnlDdpL6b_h0VNvGP4zw,26688
|
351
|
+
uk_bin_collection-0.152.4.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
352
|
+
uk_bin_collection-0.152.4.dist-info/entry_points.txt,sha256=36WCSGMWSc916S3Hi1ZkazzDKHaJ6CD-4fCEFm5MIao,90
|
353
|
+
uk_bin_collection-0.152.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|
{uk_bin_collection-0.152.2.dist-info → uk_bin_collection-0.152.4.dist-info}/entry_points.txt
RENAMED
File without changes
|