uk_bin_collection 0.152.8__py3-none-any.whl → 0.152.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/tests/input.json +14 -20
- uk_bin_collection/uk_bin_collection/councils/AngusCouncil.py +69 -46
- uk_bin_collection/uk_bin_collection/councils/BCPCouncil.py +119 -37
- uk_bin_collection/uk_bin_collection/councils/BarnetCouncil.py +158 -115
- uk_bin_collection/uk_bin_collection/councils/BasildonCouncil.py +87 -66
- uk_bin_collection/uk_bin_collection/councils/BlabyDistrictCouncil.py +5 -1
- uk_bin_collection/uk_bin_collection/councils/BlaenauGwentCountyBoroughCouncil.py +91 -66
- uk_bin_collection/uk_bin_collection/councils/BroxbourneCouncil.py +103 -67
- uk_bin_collection/uk_bin_collection/councils/BuckinghamshireCouncil.py +67 -56
- uk_bin_collection/uk_bin_collection/councils/ChelmsfordCityCouncil.py +63 -95
- uk_bin_collection/uk_bin_collection/councils/CherwellDistrictCouncil.py +39 -18
- uk_bin_collection/uk_bin_collection/councils/ChorleyCouncil.py +106 -97
- uk_bin_collection/uk_bin_collection/councils/CopelandBoroughCouncil.py +80 -75
- uk_bin_collection/uk_bin_collection/councils/CotswoldDistrictCouncil.py +191 -67
- uk_bin_collection/uk_bin_collection/councils/CoventryCityCouncil.py +6 -2
- uk_bin_collection/uk_bin_collection/councils/EastHertsCouncil.py +27 -116
- uk_bin_collection/uk_bin_collection/councils/EastLothianCouncil.py +27 -39
- uk_bin_collection/uk_bin_collection/councils/EastRenfrewshireCouncil.py +61 -56
- uk_bin_collection/uk_bin_collection/councils/EnfieldCouncil.py +80 -10
- uk_bin_collection/uk_bin_collection/councils/FermanaghOmaghDistrictCouncil.py +1 -1
- uk_bin_collection/uk_bin_collection/councils/GatesheadCouncil.py +112 -36
- uk_bin_collection/uk_bin_collection/councils/SouthwarkCouncil.py +23 -1
- uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py +4 -1
- uk_bin_collection/uk_bin_collection/get_bin_data.py +1 -1
- {uk_bin_collection-0.152.8.dist-info → uk_bin_collection-0.152.10.dist-info}/METADATA +1 -1
- {uk_bin_collection-0.152.8.dist-info → uk_bin_collection-0.152.10.dist-info}/RECORD +29 -30
- uk_bin_collection/uk_bin_collection/councils/AylesburyValeCouncil.py +0 -69
- {uk_bin_collection-0.152.8.dist-info → uk_bin_collection-0.152.10.dist-info}/LICENSE +0 -0
- {uk_bin_collection-0.152.8.dist-info → uk_bin_collection-0.152.10.dist-info}/WHEEL +0 -0
- {uk_bin_collection-0.152.8.dist-info → uk_bin_collection-0.152.10.dist-info}/entry_points.txt +0 -0
@@ -1,93 +1,98 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
import requests
|
3
2
|
from bs4 import BeautifulSoup
|
4
3
|
|
5
4
|
from uk_bin_collection.uk_bin_collection.common import *
|
6
5
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
7
6
|
|
8
7
|
|
8
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
9
9
|
class CouncilClass(AbstractGetBinDataClass):
|
10
10
|
"""
|
11
11
|
Concrete classes have to implement all abstract operations of the
|
12
|
-
|
13
|
-
|
12
|
+
base class. They can also override some operations with a default
|
13
|
+
implementation.
|
14
14
|
"""
|
15
15
|
|
16
16
|
def parse_data(self, page: str, **kwargs) -> dict:
|
17
|
-
uprn = kwargs.get("uprn")
|
18
|
-
check_uprn(uprn)
|
19
|
-
council = "CPL"
|
20
17
|
|
21
|
-
|
18
|
+
user_uprn = kwargs.get("uprn")
|
19
|
+
postcode = kwargs.get("postcode")
|
20
|
+
check_uprn(user_uprn)
|
21
|
+
bindata = {"bins": []}
|
22
|
+
|
23
|
+
URI = "https://waste.cumberland.gov.uk/renderform?t=25&k=E43CEB1FB59F859833EF2D52B16F3F4EBE1CAB6A"
|
24
|
+
|
25
|
+
s = requests.Session()
|
26
|
+
|
27
|
+
# Make the GET request
|
28
|
+
response = s.get(URI)
|
29
|
+
|
30
|
+
# Make a BS4 object
|
31
|
+
soup = BeautifulSoup(response.content, features="html.parser")
|
32
|
+
|
33
|
+
# print(soup)
|
34
|
+
|
35
|
+
token = (soup.find("input", {"name": "__RequestVerificationToken"})).get(
|
36
|
+
"value"
|
37
|
+
)
|
38
|
+
|
39
|
+
formguid = (soup.find("input", {"name": "FormGuid"})).get("value")
|
40
|
+
|
41
|
+
# print(token)
|
42
|
+
# print(formguid)
|
43
|
+
|
22
44
|
headers = {
|
23
|
-
"Content-Type": "
|
24
|
-
"
|
25
|
-
"
|
45
|
+
"Content-Type": "application/x-www-form-urlencoded",
|
46
|
+
"Origin": "https://waste.cumberland.gov.uk",
|
47
|
+
"Referer": "https://waste.cumberland.gov.uk/renderform?t=25&k=E43CEB1FB59F859833EF2D52B16F3F4EBE1CAB6A",
|
48
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 OPR/98.0.0.0",
|
49
|
+
"X-Requested-With": "XMLHttpRequest",
|
26
50
|
}
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
"
|
33
|
-
"
|
34
|
-
|
35
|
-
|
36
|
-
"
|
51
|
+
|
52
|
+
payload = {
|
53
|
+
"__RequestVerificationToken": token,
|
54
|
+
"FormGuid": formguid,
|
55
|
+
"ObjectTemplateID": "25",
|
56
|
+
"Trigger": "submit",
|
57
|
+
"CurrentSectionID": "33",
|
58
|
+
"TriggerCtl": "",
|
59
|
+
"FF265": f"U{user_uprn}",
|
60
|
+
"FF265lbltxt": "Please select your address",
|
61
|
+
"FF265-text": postcode
|
62
|
+
}
|
63
|
+
|
64
|
+
# print(payload)
|
65
|
+
|
66
|
+
response = s.post(
|
67
|
+
"https://waste.cumberland.gov.uk/renderform/Form",
|
37
68
|
headers=headers,
|
38
|
-
data=
|
69
|
+
data=payload,
|
70
|
+
)
|
71
|
+
|
72
|
+
soup = BeautifulSoup(response.content, features="html.parser")
|
73
|
+
for row in soup.find_all("div", class_="resirow"):
|
74
|
+
# Extract the type of collection (e.g., Recycling, Refuse)
|
75
|
+
collection_type_div = row.find("div", class_="col")
|
76
|
+
collection_type = (
|
77
|
+
collection_type_div.get("class")[1]
|
78
|
+
if collection_type_div
|
79
|
+
else "Unknown"
|
80
|
+
)
|
81
|
+
|
82
|
+
# Extract the collection date
|
83
|
+
date_div = row.find("div", style="width:360px;")
|
84
|
+
collection_date = date_div.text.strip() if date_div else "Unknown"
|
85
|
+
|
86
|
+
dict_data = {
|
87
|
+
"type": collection_type,
|
88
|
+
"collectionDate": datetime.strptime(
|
89
|
+
collection_date, "%A %d %B %Y"
|
90
|
+
).strftime(date_format),
|
91
|
+
}
|
92
|
+
bindata["bins"].append(dict_data)
|
93
|
+
|
94
|
+
bindata["bins"].sort(
|
95
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
|
39
96
|
)
|
40
97
|
|
41
|
-
|
42
|
-
raise ValueError("No bin data found for provided UPRN.")
|
43
|
-
|
44
|
-
# Get HTML from SOAP response
|
45
|
-
xmltree = ElementTree.fromstring(response.text)
|
46
|
-
html = xmltree.find(
|
47
|
-
".//{http://webaspx-collections.azurewebsites.net/}getRoundCalendarForUPRNResult"
|
48
|
-
).text
|
49
|
-
# Parse with BS4
|
50
|
-
soup = BeautifulSoup(html, features="html.parser")
|
51
|
-
soup.prettify()
|
52
|
-
|
53
|
-
data = {"bins": []}
|
54
|
-
for bin_type in ["Refuse", "Recycling", "Garden"]:
|
55
|
-
bin_el = soup.find("b", string=bin_type)
|
56
|
-
if bin_el:
|
57
|
-
bin_info = bin_el.next_sibling.split(": ")[1]
|
58
|
-
collection_date = ""
|
59
|
-
results = re.search("([A-Za-z]+ \\d\\d? [A-Za-z]+) then", bin_info)
|
60
|
-
if results:
|
61
|
-
if results[1] == "Today":
|
62
|
-
date = datetime.now()
|
63
|
-
elif results[1] == "Tomorrow":
|
64
|
-
date = datetime.now() + timedelta(days=1)
|
65
|
-
else:
|
66
|
-
date = get_next_occurrence_from_day_month(
|
67
|
-
datetime.strptime(
|
68
|
-
results[1] + " " + datetime.now().strftime("%Y"),
|
69
|
-
"%a %d %b %Y",
|
70
|
-
)
|
71
|
-
)
|
72
|
-
if date:
|
73
|
-
collection_date = date.strftime(date_format)
|
74
|
-
else:
|
75
|
-
results2 = re.search("([A-Za-z]+) then", bin_info)
|
76
|
-
if results2:
|
77
|
-
if results2[1] == "Today":
|
78
|
-
collection_date = datetime.now().strftime(date_format)
|
79
|
-
elif results2[1] == "Tomorrow":
|
80
|
-
collection_date = (
|
81
|
-
datetime.now() + timedelta(days=1)
|
82
|
-
).strftime(date_format)
|
83
|
-
else:
|
84
|
-
collection_date = results2[1]
|
85
|
-
|
86
|
-
if collection_date != "":
|
87
|
-
dict_data = {
|
88
|
-
"type": bin_type,
|
89
|
-
"collectionDate": collection_date,
|
90
|
-
}
|
91
|
-
data["bins"].append(dict_data)
|
92
|
-
|
93
|
-
return data
|
98
|
+
return bindata
|
@@ -1,5 +1,6 @@
|
|
1
1
|
import time
|
2
|
-
|
2
|
+
import re
|
3
|
+
from datetime import datetime, timedelta
|
3
4
|
|
4
5
|
from bs4 import BeautifulSoup
|
5
6
|
from selenium.webdriver.common.by import By
|
@@ -11,8 +12,6 @@ from selenium.webdriver.support.wait import WebDriverWait
|
|
11
12
|
from uk_bin_collection.uk_bin_collection.common import *
|
12
13
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
13
14
|
|
14
|
-
# import the wonderful Beautiful Soup and the URL grabber
|
15
|
-
|
16
15
|
|
17
16
|
class CouncilClass(AbstractGetBinDataClass):
|
18
17
|
"""
|
@@ -30,7 +29,8 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
30
29
|
|
31
30
|
house_number = kwargs.get("paon")
|
32
31
|
postcode = kwargs.get("postcode")
|
33
|
-
|
32
|
+
# Use house_number as full address since it contains the complete address
|
33
|
+
full_address = house_number if house_number else f"{house_number}, {postcode}"
|
34
34
|
web_driver = kwargs.get("web_driver")
|
35
35
|
headless = kwargs.get("headless")
|
36
36
|
|
@@ -38,81 +38,205 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
38
38
|
driver = create_webdriver(web_driver, headless, None, __name__)
|
39
39
|
driver.get(page)
|
40
40
|
|
41
|
-
#
|
41
|
+
# Wait for page to load completely
|
42
42
|
wait = WebDriverWait(driver, 60)
|
43
|
-
|
44
|
-
|
45
|
-
)
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
EC.
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
)
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
43
|
+
|
44
|
+
# Wait for the Salesforce Lightning page to be fully loaded
|
45
|
+
print("Waiting for Salesforce Lightning components to load...")
|
46
|
+
time.sleep(10)
|
47
|
+
|
48
|
+
# Wait for the address input field to be present
|
49
|
+
try:
|
50
|
+
wait.until(EC.presence_of_element_located((By.XPATH, "//label[contains(text(), 'Enter your address')]")))
|
51
|
+
print("Address label found")
|
52
|
+
time.sleep(5) # Additional wait for the input field to be ready
|
53
|
+
except Exception as e:
|
54
|
+
print(f"Address label not found: {e}")
|
55
|
+
|
56
|
+
# Find the address input field using the label
|
57
|
+
try:
|
58
|
+
address_entry_field = driver.find_element(By.XPATH, "//label[contains(text(), 'Enter your address')]/following-sibling::*//input")
|
59
|
+
print("Found address input field using label xpath")
|
60
|
+
except Exception as e:
|
61
|
+
print(f"Could not find address input field: {e}")
|
62
|
+
raise Exception("Could not find address input field")
|
63
|
+
|
64
|
+
# Clear any existing text and enter the address
|
65
|
+
try:
|
66
|
+
address_entry_field.clear()
|
67
|
+
address_entry_field.send_keys(str(full_address))
|
68
|
+
print(f"Entered address: {full_address}")
|
69
|
+
except Exception as e:
|
70
|
+
print(f"Error entering address: {e}")
|
71
|
+
raise
|
72
|
+
|
73
|
+
# Click the input field again to trigger the dropdown
|
74
|
+
try:
|
75
|
+
address_entry_field.click()
|
76
|
+
print("Clicked input field to trigger dropdown")
|
77
|
+
time.sleep(3) # Wait for dropdown to appear
|
78
|
+
except Exception as e:
|
79
|
+
print(f"Error clicking input field: {e}")
|
80
|
+
|
81
|
+
# Wait for and click the dropdown option
|
82
|
+
try:
|
83
|
+
dropdown_wait = WebDriverWait(driver, 10)
|
84
|
+
dropdown_option = dropdown_wait.until(EC.element_to_be_clickable((By.XPATH, "//li[@role='presentation']")))
|
85
|
+
dropdown_option.click()
|
86
|
+
print("Clicked dropdown option")
|
87
|
+
time.sleep(2)
|
88
|
+
except Exception as e:
|
89
|
+
print(f"Error clicking dropdown option: {e}")
|
90
|
+
raise
|
91
|
+
|
92
|
+
# Find and click the Next button
|
93
|
+
try:
|
94
|
+
next_wait = WebDriverWait(driver, 10)
|
95
|
+
next_button = next_wait.until(EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Next')]")))
|
96
|
+
next_button.click()
|
97
|
+
print("Clicked Next button")
|
98
|
+
time.sleep(5) # Wait for the bin collection data to load
|
99
|
+
except Exception as e:
|
100
|
+
print(f"Error clicking Next button: {e}")
|
101
|
+
raise
|
102
|
+
|
103
|
+
# Wait for the bin collection data table to load
|
104
|
+
try:
|
105
|
+
table_wait = WebDriverWait(driver, 15)
|
106
|
+
table_wait.until(EC.presence_of_element_located((By.XPATH, "//span[contains(text(), 'Collection Day')]")))
|
107
|
+
print("Bin collection data table loaded")
|
108
|
+
time.sleep(3)
|
109
|
+
except Exception as e:
|
110
|
+
print(f"Bin collection table not found: {e}")
|
111
|
+
|
74
112
|
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
75
|
-
|
76
|
-
rows = soup.find_all("tr", class_="slds-hint-parent")
|
77
113
|
current_year = datetime.now().year
|
78
114
|
|
115
|
+
# Try multiple approaches to find bin collection data
|
116
|
+
rows = []
|
117
|
+
|
118
|
+
# Try different table row selectors
|
119
|
+
table_selectors = [
|
120
|
+
"tr.slds-hint-parent",
|
121
|
+
"tr[class*='slds']",
|
122
|
+
"table tr",
|
123
|
+
".slds-table tr",
|
124
|
+
"tbody tr"
|
125
|
+
]
|
126
|
+
|
127
|
+
for selector in table_selectors:
|
128
|
+
rows = soup.select(selector)
|
129
|
+
if rows:
|
130
|
+
break
|
131
|
+
|
132
|
+
# If no table rows found, try to find any elements containing collection info
|
133
|
+
if not rows:
|
134
|
+
# Look for any elements that might contain bin collection information
|
135
|
+
collection_elements = soup.find_all(text=re.compile(r'(bin|collection|waste|recycling)', re.I))
|
136
|
+
if collection_elements:
|
137
|
+
# Try to extract information from the surrounding elements
|
138
|
+
for element in collection_elements[:10]: # Limit to first 10 matches
|
139
|
+
parent = element.parent
|
140
|
+
if parent:
|
141
|
+
text = parent.get_text().strip()
|
142
|
+
if text and len(text) > 10: # Only consider substantial text
|
143
|
+
# Try to extract date patterns
|
144
|
+
date_patterns = re.findall(r'\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b|\b\d{1,2}\s+\w+\s+\d{4}\b', text)
|
145
|
+
if date_patterns:
|
146
|
+
data["bins"].append({
|
147
|
+
"type": "General Collection",
|
148
|
+
"collectionDate": date_patterns[0]
|
149
|
+
})
|
150
|
+
break
|
151
|
+
|
152
|
+
# Process table rows if found
|
79
153
|
for row in rows:
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
)
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
154
|
+
try:
|
155
|
+
columns = row.find_all(["td", "th"])
|
156
|
+
if len(columns) >= 2:
|
157
|
+
# Try to identify container type and date
|
158
|
+
container_type = "Unknown"
|
159
|
+
collection_date = ""
|
160
|
+
|
161
|
+
# Look for header cell (th) for container type
|
162
|
+
th_element = row.find("th")
|
163
|
+
if th_element:
|
164
|
+
container_type = th_element.get_text().strip()
|
165
|
+
elif columns:
|
166
|
+
# If no th, use first column as type
|
167
|
+
container_type = columns[0].get_text().strip()
|
168
|
+
|
169
|
+
# Look for date in subsequent columns
|
170
|
+
for col in columns[1:] if th_element else columns[1:]:
|
171
|
+
col_text = col.get_text().strip()
|
172
|
+
if col_text:
|
173
|
+
if col_text.lower() == "today":
|
174
|
+
collection_date = datetime.now().strftime("%d/%m/%Y")
|
175
|
+
break
|
176
|
+
elif col_text.lower() == "tomorrow":
|
177
|
+
collection_date = (datetime.now() + timedelta(days=1)).strftime("%d/%m/%Y")
|
178
|
+
break
|
179
|
+
else:
|
180
|
+
# Try to parse various date formats
|
181
|
+
try:
|
182
|
+
# Clean the text
|
183
|
+
clean_text = re.sub(r"[^a-zA-Z0-9,\s/-]", "", col_text).strip()
|
184
|
+
|
185
|
+
# Try different date parsing approaches
|
186
|
+
date_formats = [
|
187
|
+
"%a, %d %B",
|
188
|
+
"%d %B %Y",
|
189
|
+
"%d/%m/%Y",
|
190
|
+
"%d-%m-%Y",
|
191
|
+
"%B %d, %Y"
|
192
|
+
]
|
193
|
+
|
194
|
+
for fmt in date_formats:
|
195
|
+
try:
|
196
|
+
parsed_date = datetime.strptime(clean_text, fmt)
|
197
|
+
if fmt == "%a, %d %B": # Add year if missing
|
198
|
+
if parsed_date.replace(year=current_year) < datetime.now():
|
199
|
+
parsed_date = parsed_date.replace(year=current_year + 1)
|
200
|
+
else:
|
201
|
+
parsed_date = parsed_date.replace(year=current_year)
|
202
|
+
collection_date = parsed_date.strftime("%d/%m/%Y")
|
203
|
+
break
|
204
|
+
except ValueError:
|
205
|
+
continue
|
206
|
+
|
207
|
+
if collection_date:
|
208
|
+
break
|
209
|
+
except Exception:
|
210
|
+
continue
|
211
|
+
|
212
|
+
# Add to data if we have both type and date
|
213
|
+
if container_type and collection_date and container_type.lower() != "unknown":
|
214
|
+
data["bins"].append({
|
215
|
+
"type": container_type,
|
216
|
+
"collectionDate": collection_date
|
217
|
+
})
|
218
|
+
except Exception as e:
|
219
|
+
print(f"Error processing row: {e}")
|
220
|
+
continue
|
221
|
+
|
222
|
+
# If no data found, add a debug entry
|
223
|
+
if not data["bins"]:
|
224
|
+
print("No bin collection data found. Page source:")
|
225
|
+
print(driver.page_source[:1000]) # Print first 1000 chars for debugging
|
93
226
|
|
94
|
-
# Parse the date from the string
|
95
|
-
parsed_date = datetime.strptime(collection_day, "%a, %d %B")
|
96
|
-
if parsed_date < datetime(
|
97
|
-
parsed_date.year, parsed_date.month, parsed_date.day
|
98
|
-
):
|
99
|
-
parsed_date = parsed_date.replace(year=current_year + 1)
|
100
|
-
else:
|
101
|
-
parsed_date = parsed_date.replace(year=current_year)
|
102
|
-
# Format the date as %d/%m/%Y
|
103
|
-
formatted_date = parsed_date.strftime("%d/%m/%Y")
|
104
|
-
|
105
|
-
# Add the bin type and collection date to the 'data' dictionary
|
106
|
-
data["bins"].append(
|
107
|
-
{"type": container_type, "collectionDate": formatted_date}
|
108
|
-
)
|
109
227
|
except Exception as e:
|
110
228
|
# Here you can log the exception if needed
|
111
229
|
print(f"An error occurred: {e}")
|
230
|
+
print(f"Full address used: {full_address}")
|
231
|
+
print(f"Page URL: {page}")
|
232
|
+
# Add some debug information
|
233
|
+
if driver:
|
234
|
+
print(f"Current page title: {driver.title}")
|
235
|
+
print(f"Current URL: {driver.current_url}")
|
112
236
|
# Optionally, re-raise the exception if you want it to propagate
|
113
237
|
raise
|
114
238
|
finally:
|
115
239
|
# This block ensures that the driver is closed regardless of an exception
|
116
240
|
if driver:
|
117
241
|
driver.quit()
|
118
|
-
return data
|
242
|
+
return data
|
@@ -18,6 +18,10 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
18
18
|
|
19
19
|
bindata = {"bins": []}
|
20
20
|
curr_date = datetime.today()
|
21
|
+
|
22
|
+
headers = {
|
23
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
24
|
+
}
|
21
25
|
|
22
26
|
soup = BeautifulSoup(page.content, features="html.parser")
|
23
27
|
button = soup.find(
|
@@ -25,10 +29,10 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
25
29
|
text="Find out which bin will be collected when and sign up for a free email reminder.",
|
26
30
|
)
|
27
31
|
|
28
|
-
if button
|
32
|
+
if button and button.get("href"):
|
29
33
|
URI = button["href"]
|
30
34
|
# Make the GET request
|
31
|
-
response = requests.get(URI)
|
35
|
+
response = requests.get(URI, headers=headers)
|
32
36
|
soup = BeautifulSoup(response.content, features="html.parser")
|
33
37
|
divs = soup.find_all("div", {"class": "editor"})
|
34
38
|
for div in divs:
|
@@ -1,7 +1,6 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
from
|
4
|
-
from selenium.webdriver.support.wait import WebDriverWait
|
1
|
+
import json
|
2
|
+
import requests
|
3
|
+
from datetime import datetime
|
5
4
|
|
6
5
|
from uk_bin_collection.uk_bin_collection.common import *
|
7
6
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
@@ -15,116 +14,28 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
15
14
|
"""
|
16
15
|
|
17
16
|
def parse_data(self, page: str, **kwargs) -> dict:
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
EC.presence_of_element_located((By.ID, "main-content"))
|
43
|
-
)
|
44
|
-
|
45
|
-
# Wait for the property number field to appear then populate it
|
46
|
-
inputElement_number = WebDriverWait(driver, 10).until(
|
47
|
-
EC.element_to_be_clickable(
|
48
|
-
(
|
49
|
-
By.ID,
|
50
|
-
"address_name_number",
|
51
|
-
)
|
52
|
-
)
|
53
|
-
)
|
54
|
-
inputElement_number.send_keys(user_paon)
|
55
|
-
|
56
|
-
# Wait for the postcode field to appear then populate it
|
57
|
-
inputElement_postcode = WebDriverWait(driver, 10).until(
|
58
|
-
EC.element_to_be_clickable(
|
59
|
-
(
|
60
|
-
By.ID,
|
61
|
-
"address_postcode",
|
62
|
-
)
|
63
|
-
)
|
64
|
-
)
|
65
|
-
inputElement_postcode.send_keys(user_postcode)
|
66
|
-
|
67
|
-
# Click search button
|
68
|
-
continueButton = WebDriverWait(driver, 10).until(
|
69
|
-
EC.element_to_be_clickable(
|
70
|
-
(
|
71
|
-
By.ID,
|
72
|
-
"Submit",
|
73
|
-
)
|
74
|
-
)
|
75
|
-
)
|
76
|
-
continueButton.click()
|
77
|
-
|
78
|
-
# Wait for the 'Search Results' to appear and select the first result
|
79
|
-
property = WebDriverWait(driver, 10).until(
|
80
|
-
EC.element_to_be_clickable(
|
81
|
-
(
|
82
|
-
By.CSS_SELECTOR,
|
83
|
-
"li.app-subnav__section-item a",
|
84
|
-
# "app-subnav__link govuk-link clicker colordarkblue fontfamilyArial fontsize12rem",
|
85
|
-
# "//a[starts-with(@aria-label, '{user_paon}')]",
|
86
|
-
)
|
87
|
-
)
|
88
|
-
)
|
89
|
-
property.click()
|
90
|
-
|
91
|
-
upcoming_scheduled_collections = WebDriverWait(driver, 10).until(
|
92
|
-
EC.presence_of_element_located(
|
93
|
-
(By.ID, "upcoming-scheduled-collections")
|
94
|
-
)
|
95
|
-
)
|
96
|
-
|
97
|
-
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
98
|
-
|
99
|
-
collections = []
|
100
|
-
for collection in soup.find_all(
|
101
|
-
"u1",
|
102
|
-
class_="displayinlineblock justifycontentleft alignitemscenter margin0 padding0",
|
103
|
-
):
|
104
|
-
date = collection.find(
|
105
|
-
"p", string=lambda text: text and "/" in text
|
106
|
-
).text.strip() # Extract date
|
107
|
-
service = collection.find(
|
108
|
-
"p", string=lambda text: text and "Collection Service" in text
|
109
|
-
).text.strip() # Extract service type
|
110
|
-
collections.append({"date": date, "service": service})
|
111
|
-
|
112
|
-
# Print the parsed data
|
113
|
-
for item in collections:
|
114
|
-
|
115
|
-
dict_data = {
|
116
|
-
"type": item["service"],
|
117
|
-
"collectionDate": item["date"],
|
118
|
-
}
|
119
|
-
bindata["bins"].append(dict_data)
|
120
|
-
|
121
|
-
except Exception as e:
|
122
|
-
# Here you can log the exception if needed
|
123
|
-
print(f"An error occurred: {e}")
|
124
|
-
# Optionally, re-raise the exception if you want it to propagate
|
125
|
-
raise
|
126
|
-
finally:
|
127
|
-
# This block ensures that the driver is closed regardless of an exception
|
128
|
-
if driver:
|
129
|
-
driver.quit()
|
17
|
+
user_uprn = kwargs.get("uprn")
|
18
|
+
check_uprn(user_uprn)
|
19
|
+
bindata = {"bins": []}
|
20
|
+
|
21
|
+
# Make API request
|
22
|
+
api_url = f"https://east-herts.co.uk/api/services/{user_uprn}"
|
23
|
+
response = requests.get(api_url)
|
24
|
+
response.raise_for_status()
|
25
|
+
|
26
|
+
data = response.json()
|
27
|
+
today = datetime.now().date()
|
28
|
+
|
29
|
+
for service in data.get("services", []):
|
30
|
+
collection_date_str = service.get("collectionDate")
|
31
|
+
if collection_date_str:
|
32
|
+
collection_date = datetime.strptime(collection_date_str, "%Y-%m-%d").date()
|
33
|
+
# Only include future dates
|
34
|
+
if collection_date >= today:
|
35
|
+
dict_data = {
|
36
|
+
"type": service.get("binType", ""),
|
37
|
+
"collectionDate": collection_date.strftime("%d/%m/%Y"),
|
38
|
+
}
|
39
|
+
bindata["bins"].append(dict_data)
|
40
|
+
|
130
41
|
return bindata
|