uk_bin_collection 0.151.0__py3-none-any.whl → 0.152.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/tests/input.json +137 -66
- uk_bin_collection/uk_bin_collection/councils/AdurAndWorthingCouncils.py +69 -24
- uk_bin_collection/uk_bin_collection/councils/AngusCouncil.py +149 -0
- uk_bin_collection/uk_bin_collection/councils/BarkingDagenham.py +11 -2
- uk_bin_collection/uk_bin_collection/councils/BexleyCouncil.py +24 -47
- uk_bin_collection/uk_bin_collection/councils/BrightonandHoveCityCouncil.py +11 -2
- uk_bin_collection/uk_bin_collection/councils/BroadlandDistrictCouncil.py +21 -6
- uk_bin_collection/uk_bin_collection/councils/CharnwoodBoroughCouncil.py +14 -3
- uk_bin_collection/uk_bin_collection/councils/CheltenhamBoroughCouncil.py +12 -12
- uk_bin_collection/uk_bin_collection/councils/CheshireEastCouncil.py +24 -2
- uk_bin_collection/uk_bin_collection/councils/ChichesterDistrictCouncil.py +105 -53
- uk_bin_collection/uk_bin_collection/councils/ChorleyCouncil.py +4 -0
- uk_bin_collection/uk_bin_collection/councils/CroydonCouncil.py +114 -261
- uk_bin_collection/uk_bin_collection/councils/DartfordBoroughCouncil.py +13 -0
- uk_bin_collection/uk_bin_collection/councils/DoverDistrictCouncil.py +17 -2
- uk_bin_collection/uk_bin_collection/councils/EastDevonDC.py +14 -1
- uk_bin_collection/uk_bin_collection/councils/EastbourneBoroughCouncil.py +76 -0
- uk_bin_collection/uk_bin_collection/councils/EastleighBoroughCouncil.py +59 -45
- uk_bin_collection/uk_bin_collection/councils/EnvironmentFirst.py +2 -0
- uk_bin_collection/uk_bin_collection/councils/EppingForestDistrictCouncil.py +47 -15
- uk_bin_collection/uk_bin_collection/councils/FermanaghOmaghDistrictCouncil.py +102 -0
- uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py +13 -1
- uk_bin_collection/uk_bin_collection/councils/GuildfordCouncil.py +2 -3
- uk_bin_collection/uk_bin_collection/councils/HerefordshireCouncil.py +13 -2
- uk_bin_collection/uk_bin_collection/councils/HuntingdonDistrictCouncil.py +18 -4
- uk_bin_collection/uk_bin_collection/councils/LewesDistrictCouncil.py +76 -0
- uk_bin_collection/uk_bin_collection/councils/LiverpoolCityCouncil.py +16 -4
- uk_bin_collection/uk_bin_collection/councils/MaidstoneBoroughCouncil.py +42 -47
- uk_bin_collection/uk_bin_collection/councils/NewhamCouncil.py +13 -6
- uk_bin_collection/uk_bin_collection/councils/NorthEastDerbyshireDistrictCouncil.py +61 -39
- uk_bin_collection/uk_bin_collection/councils/NorthSomersetCouncil.py +14 -9
- uk_bin_collection/uk_bin_collection/councils/NorthTynesideCouncil.py +2 -2
- uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py +50 -14
- uk_bin_collection/uk_bin_collection/councils/SloughBoroughCouncil.py +140 -0
- uk_bin_collection/uk_bin_collection/councils/SouthRibbleCouncil.py +115 -65
- uk_bin_collection/uk_bin_collection/councils/StokeOnTrentCityCouncil.py +10 -5
- uk_bin_collection/uk_bin_collection/councils/TewkesburyBoroughCouncil.py +40 -0
- uk_bin_collection/uk_bin_collection/councils/TorbayCouncil.py +1 -3
- uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py +3 -0
- {uk_bin_collection-0.151.0.dist-info → uk_bin_collection-0.152.1.dist-info}/METADATA +1 -1
- {uk_bin_collection-0.151.0.dist-info → uk_bin_collection-0.152.1.dist-info}/RECORD +44 -38
- {uk_bin_collection-0.151.0.dist-info → uk_bin_collection-0.152.1.dist-info}/LICENSE +0 -0
- {uk_bin_collection-0.151.0.dist-info → uk_bin_collection-0.152.1.dist-info}/WHEEL +0 -0
- {uk_bin_collection-0.151.0.dist-info → uk_bin_collection-0.152.1.dist-info}/entry_points.txt +0 -0
@@ -1,7 +1,8 @@
|
|
1
1
|
from bs4 import BeautifulSoup
|
2
|
+
from dateutil.relativedelta import relativedelta
|
3
|
+
|
2
4
|
from uk_bin_collection.uk_bin_collection.common import *
|
3
5
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
|
-
from dateutil.relativedelta import relativedelta
|
5
6
|
|
6
7
|
|
7
8
|
# import the wonderful Beautiful Soup and the URL grabber
|
@@ -18,9 +19,20 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
18
19
|
collections = []
|
19
20
|
curr_date = datetime.today()
|
20
21
|
|
21
|
-
|
22
|
-
|
23
|
-
|
22
|
+
try:
|
23
|
+
user_uprn = kwargs.get("uprn")
|
24
|
+
check_uprn(user_uprn)
|
25
|
+
url = f"https://liverpool.gov.uk/Bins/BinDatesTable?UPRN={user_uprn}"
|
26
|
+
if not user_uprn:
|
27
|
+
# This is a fallback for if the user stored a URL in old system. Ensures backwards compatibility.
|
28
|
+
url = kwargs.get("url")
|
29
|
+
except Exception as e:
|
30
|
+
raise ValueError(f"Error getting identifier: {str(e)}")
|
31
|
+
|
32
|
+
# Make a BS4 object
|
33
|
+
page = requests.get(url)
|
34
|
+
soup = BeautifulSoup(page.text, "html.parser")
|
35
|
+
soup.prettify
|
24
36
|
|
25
37
|
# Get all table rows on the page - enumerate gives us an index, which is handy for to keep a row count.
|
26
38
|
# In this case, the first (0th) row is headings, so we can skip it, then parse the other data.
|
@@ -3,6 +3,7 @@ from datetime import datetime
|
|
3
3
|
|
4
4
|
from bs4 import BeautifulSoup
|
5
5
|
from selenium.webdriver.common.by import By
|
6
|
+
from selenium.webdriver.common.keys import Keys
|
6
7
|
from selenium.webdriver.support import expected_conditions as EC
|
7
8
|
from selenium.webdriver.support.ui import Select
|
8
9
|
from selenium.webdriver.support.wait import WebDriverWait
|
@@ -10,8 +11,6 @@ from selenium.webdriver.support.wait import WebDriverWait
|
|
10
11
|
from uk_bin_collection.uk_bin_collection.common import *
|
11
12
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
12
13
|
|
13
|
-
# import the wonderful Beautiful Soup and the URL grabber
|
14
|
-
|
15
14
|
|
16
15
|
class CouncilClass(AbstractGetBinDataClass):
|
17
16
|
"""
|
@@ -30,7 +29,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
30
29
|
web_driver = kwargs.get("web_driver")
|
31
30
|
headless = kwargs.get("headless")
|
32
31
|
check_postcode(user_postcode)
|
33
|
-
|
32
|
+
|
34
33
|
# Create Selenium webdriver
|
35
34
|
driver = create_webdriver(web_driver, headless, None, __name__)
|
36
35
|
driver.get(page)
|
@@ -41,68 +40,64 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
41
40
|
driver.switch_to.frame(iframe_presense)
|
42
41
|
|
43
42
|
wait = WebDriverWait(driver, 60)
|
44
|
-
|
43
|
+
|
45
44
|
# Postal code input
|
46
45
|
inputElement_postcodesearch = wait.until(
|
47
46
|
EC.element_to_be_clickable((By.NAME, "postcode"))
|
48
47
|
)
|
49
48
|
inputElement_postcodesearch.send_keys(user_postcode)
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
dropdown_select.click()
|
56
|
-
|
49
|
+
|
50
|
+
time.sleep(5)
|
51
|
+
|
52
|
+
inputElement_postcodesearch.send_keys(Keys.TAB + Keys.DOWN)
|
53
|
+
|
57
54
|
dropdown = wait.until(
|
58
|
-
EC.element_to_be_clickable(
|
55
|
+
EC.element_to_be_clickable(
|
56
|
+
(By.XPATH, f"//div[contains(text(), ' {user_paon}')]")
|
57
|
+
)
|
59
58
|
)
|
60
59
|
dropdown.click()
|
61
60
|
|
62
|
-
#
|
63
|
-
WebDriverWait(driver,
|
61
|
+
# This website is horrible!
|
62
|
+
WebDriverWait(driver, 20).until(
|
64
63
|
EC.text_to_be_present_in_element(
|
65
|
-
(By.CSS_SELECTOR, "
|
66
|
-
)
|
67
|
-
)
|
68
|
-
|
69
|
-
# Wait for 'Searching for...' to be removed from page
|
70
|
-
WebDriverWait(driver, timeout=15).until(
|
71
|
-
EC.none_of(
|
72
|
-
EC.text_to_be_present_in_element(
|
73
|
-
(By.CSS_SELECTOR, "span[data-name=html1]"), "Searching"
|
74
|
-
)
|
64
|
+
(By.CSS_SELECTOR, "div.col-collection-panel"), "Next collection"
|
75
65
|
)
|
76
66
|
)
|
77
67
|
|
78
68
|
# Even then it can still be adding data to the page...
|
79
69
|
time.sleep(5)
|
80
70
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
def is_a_collection_date(t):
|
85
|
-
return any("Next collection" in c for c in t.children)
|
86
|
-
|
87
|
-
for next_collection in soup.find_all(is_a_collection_date):
|
88
|
-
bin_info = list(
|
89
|
-
next_collection.parent.select_one("div:nth-child(1)").children
|
90
|
-
)
|
91
|
-
if not bin_info:
|
92
|
-
continue
|
93
|
-
bin = bin_info[0].get_text()
|
94
|
-
date = next_collection.select_one("strong").get_text(strip=True)
|
95
|
-
bin_date = datetime.strptime(date, "%d %b %Y")
|
96
|
-
dict_data = {
|
97
|
-
"type": bin,
|
98
|
-
"collectionDate": bin_date.strftime(date_format),
|
99
|
-
}
|
100
|
-
bin_data["bins"].append(dict_data)
|
101
|
-
|
102
|
-
bin_data["bins"].sort(
|
103
|
-
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
|
71
|
+
# Scraping via Selenium rather than BeautifulSoup, to ensure eveything's loaded
|
72
|
+
collection_panels = driver.find_elements(
|
73
|
+
By.CSS_SELECTOR, "div.col-collection-panel"
|
104
74
|
)
|
105
75
|
|
76
|
+
for panel in collection_panels:
|
77
|
+
try:
|
78
|
+
# Get bin type (e.g., General waste, Food waste)
|
79
|
+
bin_type = panel.find_element(
|
80
|
+
By.CSS_SELECTOR, "h3.collectionDataHeader"
|
81
|
+
).text.strip()
|
82
|
+
# Get next collection date
|
83
|
+
lines = panel.find_elements(By.CSS_SELECTOR, "ul li")
|
84
|
+
for line in lines:
|
85
|
+
if "Next collection" in line.text:
|
86
|
+
date_str = (
|
87
|
+
line.text.split("Next collection")[1]
|
88
|
+
.strip(": ")
|
89
|
+
.strip()
|
90
|
+
)
|
91
|
+
bin_date = datetime.strptime(date_str, "%d/%m/%Y")
|
92
|
+
bin_data["bins"].append(
|
93
|
+
{
|
94
|
+
"type": bin_type,
|
95
|
+
"collectionDate": bin_date.strftime(date_format),
|
96
|
+
}
|
97
|
+
)
|
98
|
+
except Exception as inner_e:
|
99
|
+
print(f"Skipping one panel due to error: {inner_e}")
|
100
|
+
|
106
101
|
except Exception as e:
|
107
102
|
# Here you can log the exception if needed
|
108
103
|
print(f"An error occurred: {e}")
|
@@ -7,14 +7,21 @@ from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataC
|
|
7
7
|
|
8
8
|
class CouncilClass(AbstractGetBinDataClass):
|
9
9
|
def parse_data(self, page: str, **kwargs) -> dict:
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
10
|
+
|
11
|
+
try:
|
12
|
+
user_uprn = kwargs.get("uprn")
|
13
|
+
check_uprn(user_uprn)
|
14
|
+
url = f"https://bincollection.newham.gov.uk/Details/Index/{user_uprn}"
|
15
|
+
if not user_uprn:
|
16
|
+
# This is a fallback for if the user stored a URL in old system. Ensures backwards compatibility.
|
17
|
+
url = kwargs.get("url")
|
18
|
+
except Exception as e:
|
19
|
+
raise ValueError(f"Error getting identifier: {str(e)}")
|
14
20
|
|
15
21
|
# Make a BS4 object
|
16
|
-
|
17
|
-
soup.
|
22
|
+
page = requests.get(url)
|
23
|
+
soup = BeautifulSoup(page.text, "html.parser")
|
24
|
+
soup.prettify
|
18
25
|
|
19
26
|
# Form a JSON wrapper
|
20
27
|
data = {"bins": []}
|
@@ -1,4 +1,5 @@
|
|
1
1
|
from datetime import datetime
|
2
|
+
from time import sleep
|
2
3
|
|
3
4
|
from bs4 import BeautifulSoup
|
4
5
|
from selenium.webdriver.common.by import By
|
@@ -9,8 +10,6 @@ from selenium.webdriver.support.wait import WebDriverWait
|
|
9
10
|
from uk_bin_collection.uk_bin_collection.common import *
|
10
11
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
11
12
|
|
12
|
-
# import the wonderful Beautiful Soup and the URL grabber
|
13
|
-
|
14
13
|
|
15
14
|
class CouncilClass(AbstractGetBinDataClass):
|
16
15
|
"""
|
@@ -34,82 +33,105 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
34
33
|
headless = kwargs.get("headless")
|
35
34
|
check_uprn(user_uprn)
|
36
35
|
check_postcode(user_postcode)
|
37
|
-
|
36
|
+
|
38
37
|
driver = create_webdriver(web_driver, headless, None, __name__)
|
39
38
|
driver.get(page)
|
40
39
|
|
41
|
-
# If you bang in the house number (or property name) and postcode in the box it should find your property
|
42
|
-
|
43
40
|
iframe_presense = WebDriverWait(driver, 30).until(
|
44
41
|
EC.presence_of_element_located((By.ID, "fillform-frame-1"))
|
45
42
|
)
|
46
43
|
|
47
44
|
driver.switch_to.frame(iframe_presense)
|
48
45
|
wait = WebDriverWait(driver, 60)
|
46
|
+
|
49
47
|
inputElement_postcodesearch = wait.until(
|
50
48
|
EC.element_to_be_clickable((By.NAME, "postcode_search"))
|
51
49
|
)
|
52
|
-
|
53
50
|
inputElement_postcodesearch.send_keys(str(user_postcode))
|
54
51
|
|
55
|
-
# Wait for the 'Select your property' dropdown to appear and select the first result
|
56
52
|
dropdown = wait.until(EC.element_to_be_clickable((By.NAME, "selAddress")))
|
57
|
-
|
58
53
|
dropdown_options = wait.until(
|
59
54
|
EC.presence_of_element_located((By.CLASS_NAME, "lookup-option"))
|
60
55
|
)
|
61
56
|
|
62
|
-
# Create a 'Select' for it, then select the first address in the list
|
63
|
-
# (Index 0 is "Make a selection from the list")
|
64
57
|
drop_down_values = Select(dropdown)
|
65
58
|
option_element = wait.until(
|
66
59
|
EC.presence_of_element_located(
|
67
60
|
(By.CSS_SELECTOR, f'option.lookup-option[value="{str(user_uprn)}"]')
|
68
61
|
)
|
69
62
|
)
|
70
|
-
|
71
63
|
drop_down_values.select_by_value(str(user_uprn))
|
72
64
|
|
73
|
-
# Wait for the 'View more' link to appear, then click it to get the full set of dates
|
74
65
|
h3_element = wait.until(
|
75
66
|
EC.presence_of_element_located(
|
76
67
|
(By.XPATH, "//th[contains(text(), 'Waste Collection')]")
|
77
68
|
)
|
78
69
|
)
|
79
70
|
|
71
|
+
sleep(10)
|
72
|
+
|
80
73
|
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
74
|
+
print("Parsing HTML content...")
|
75
|
+
|
76
|
+
collection_rows = soup.find_all("tr")
|
77
|
+
|
78
|
+
for row in collection_rows:
|
79
|
+
cells = row.find_all("td")
|
80
|
+
if len(cells) == 3: # Date, Image, Bin Type
|
81
|
+
# Extract date carefully
|
82
|
+
date_labels = cells[0].find_all("label")
|
83
|
+
collection_date = None
|
84
|
+
for label in date_labels:
|
85
|
+
label_text = label.get_text().strip()
|
86
|
+
if contains_date(label_text):
|
87
|
+
collection_date = label_text
|
88
|
+
break
|
89
|
+
|
90
|
+
# Extract bin type
|
91
|
+
bin_label = cells[2].find("label")
|
92
|
+
bin_types = bin_label.get_text().strip() if bin_label else None
|
93
|
+
|
94
|
+
if collection_date and bin_types:
|
95
|
+
print(f"Found collection: {collection_date} - {bin_types}")
|
96
|
+
|
97
|
+
# Handle combined collections
|
98
|
+
if "&" in bin_types:
|
99
|
+
if "Burgundy" in bin_types:
|
100
|
+
data["bins"].append(
|
101
|
+
{
|
102
|
+
"type": "Burgundy Bin",
|
103
|
+
"collectionDate": datetime.strptime(
|
104
|
+
collection_date, "%d/%m/%Y"
|
105
|
+
).strftime(date_format),
|
106
|
+
}
|
107
|
+
)
|
108
|
+
if "Green" in bin_types:
|
109
|
+
data["bins"].append(
|
110
|
+
{
|
111
|
+
"type": "Green Bin",
|
112
|
+
"collectionDate": datetime.strptime(
|
113
|
+
collection_date, "%d/%m/%Y"
|
114
|
+
).strftime(date_format),
|
115
|
+
}
|
116
|
+
)
|
117
|
+
else:
|
118
|
+
if "Black" in bin_types:
|
119
|
+
data["bins"].append(
|
120
|
+
{
|
121
|
+
"type": "Black Bin",
|
122
|
+
"collectionDate": datetime.strptime(
|
123
|
+
collection_date, "%d/%m/%Y"
|
124
|
+
).strftime(date_format),
|
125
|
+
}
|
126
|
+
)
|
127
|
+
|
128
|
+
print(f"Found {len(data['bins'])} collections")
|
129
|
+
print(f"Final data: {data}")
|
81
130
|
|
82
|
-
target_h3 = soup.find("h3", string="Collection Details")
|
83
|
-
tables_after_h3 = target_h3.parent.parent.find_next("table")
|
84
|
-
|
85
|
-
table_rows = tables_after_h3.find_all("tr")
|
86
|
-
for row in table_rows:
|
87
|
-
rowdata = row.find_all("td")
|
88
|
-
if len(rowdata) == 3:
|
89
|
-
labels = rowdata[0].find_all("label")
|
90
|
-
# Strip the day (i.e., Monday) out of the collection date string for parsing
|
91
|
-
if len(labels) >= 2:
|
92
|
-
date_label = labels[1]
|
93
|
-
datestring = date_label.text.strip()
|
94
|
-
|
95
|
-
# Add the bin type and collection date to the 'data' dictionary
|
96
|
-
data["bins"].append(
|
97
|
-
{
|
98
|
-
"type": rowdata[2].text.strip(),
|
99
|
-
"collectionDate": datetime.strptime(
|
100
|
-
datestring, "%d/%m/%Y"
|
101
|
-
).strftime(
|
102
|
-
date_format
|
103
|
-
), # Format the date as needed
|
104
|
-
}
|
105
|
-
)
|
106
131
|
except Exception as e:
|
107
|
-
# Here you can log the exception if needed
|
108
132
|
print(f"An error occurred: {e}")
|
109
|
-
# Optionally, re-raise the exception if you want it to propagate
|
110
133
|
raise
|
111
134
|
finally:
|
112
|
-
# This block ensures that the driver is closed regardless of an exception
|
113
135
|
if driver:
|
114
136
|
driver.quit()
|
115
137
|
return data
|
@@ -43,9 +43,13 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
43
43
|
collectionDate = (
|
44
44
|
cells[1].get_text(strip=True) + " " + datetime.now().strftime("%Y")
|
45
45
|
)
|
46
|
-
|
47
|
-
|
48
|
-
|
46
|
+
|
47
|
+
if len(cells) > 2:
|
48
|
+
nextCollectionDate = (
|
49
|
+
cells[2].get_text(strip=True) + " " + datetime.now().strftime("%Y")
|
50
|
+
)
|
51
|
+
else:
|
52
|
+
nextCollectionDate = ""
|
49
53
|
|
50
54
|
# Make each Bin element in the JSON
|
51
55
|
dict_data = {
|
@@ -59,12 +63,13 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
59
63
|
data["bins"].append(dict_data)
|
60
64
|
|
61
65
|
# Make each next Bin element in the JSON
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
66
|
+
if nextCollectionDate != "":
|
67
|
+
dict_data = {
|
68
|
+
"type": binType,
|
69
|
+
"collectionDate": get_next_occurrence_from_day_month(
|
70
|
+
datetime.strptime(nextCollectionDate, "%A %d %B %Y")
|
71
|
+
).strftime(date_format),
|
72
|
+
}
|
68
73
|
|
69
74
|
# Add data to the main JSON Wrapper
|
70
75
|
data["bins"].append(dict_data)
|
@@ -81,9 +81,9 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
81
81
|
# The regular calendar only shows until end of March 2026, work out how many weeks that is
|
82
82
|
weeks_total = math.floor((datetime(2026, 4, 1) - datetime.now()).days / 7)
|
83
83
|
|
84
|
-
# The garden calendar only shows until end of November
|
84
|
+
# The garden calendar only shows until end of November 2025, work out how many weeks that is
|
85
85
|
garden_weeks_total = math.floor(
|
86
|
-
(datetime(
|
86
|
+
(datetime(2025, 12, 1) - datetime.now()).days / 7
|
87
87
|
)
|
88
88
|
|
89
89
|
regular_collections, garden_collections, special_collections = [], [], []
|
@@ -1,7 +1,10 @@
|
|
1
1
|
import time
|
2
2
|
|
3
3
|
from bs4 import BeautifulSoup
|
4
|
+
from selenium.common.exceptions import TimeoutException
|
4
5
|
from selenium.webdriver.common.by import By
|
6
|
+
from selenium.webdriver.support import expected_conditions as EC
|
7
|
+
from selenium.webdriver.support.ui import WebDriverWait
|
5
8
|
|
6
9
|
from uk_bin_collection.uk_bin_collection.common import *
|
7
10
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
@@ -42,28 +45,61 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
42
45
|
driver = create_webdriver(web_driver, headless, None, __name__)
|
43
46
|
driver.get(page)
|
44
47
|
|
45
|
-
|
48
|
+
# Create wait object
|
49
|
+
wait = WebDriverWait(driver, 20)
|
46
50
|
|
47
|
-
#
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
51
|
+
# Wait for and click cookie button
|
52
|
+
cookie_button = wait.until(
|
53
|
+
EC.element_to_be_clickable((By.ID, "ccc-notify-accept"))
|
54
|
+
)
|
55
|
+
cookie_button.click()
|
56
|
+
|
57
|
+
# Wait for and find house number input
|
58
|
+
inputElement_hn = wait.until(
|
59
|
+
EC.presence_of_element_located(
|
60
|
+
(
|
61
|
+
By.ID,
|
62
|
+
"p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_txtHouse",
|
63
|
+
)
|
64
|
+
)
|
53
65
|
)
|
54
|
-
|
55
|
-
|
56
|
-
|
66
|
+
|
67
|
+
# Wait for and find postcode input
|
68
|
+
inputElement_pc = wait.until(
|
69
|
+
EC.presence_of_element_located(
|
70
|
+
(
|
71
|
+
By.ID,
|
72
|
+
"p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_txtPostcode",
|
73
|
+
)
|
74
|
+
)
|
57
75
|
)
|
58
76
|
|
77
|
+
# Enter details
|
59
78
|
inputElement_pc.send_keys(user_postcode)
|
60
79
|
inputElement_hn.send_keys(user_paon)
|
61
80
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
81
|
+
# Click lookup button and wait for results
|
82
|
+
lookup_button = wait.until(
|
83
|
+
EC.element_to_be_clickable(
|
84
|
+
(
|
85
|
+
By.ID,
|
86
|
+
"p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_butLookup",
|
87
|
+
)
|
88
|
+
)
|
89
|
+
)
|
90
|
+
lookup_button.click()
|
91
|
+
|
92
|
+
# Wait for results to load
|
93
|
+
route_summary = wait.until(
|
94
|
+
EC.presence_of_element_located(
|
95
|
+
(
|
96
|
+
By.ID,
|
97
|
+
"p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_spanRouteSummary",
|
98
|
+
)
|
99
|
+
)
|
100
|
+
)
|
66
101
|
|
102
|
+
# Get page source after everything has loaded
|
67
103
|
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
68
104
|
|
69
105
|
# Work out which bins can be collected for this address. Glass bins are only on some houses due to pilot programme.
|
@@ -0,0 +1,140 @@
|
|
1
|
+
import time
|
2
|
+
import re
|
3
|
+
import requests
|
4
|
+
from datetime import datetime
|
5
|
+
from bs4 import BeautifulSoup
|
6
|
+
from selenium.webdriver.common.by import By
|
7
|
+
from selenium.webdriver.common.keys import Keys
|
8
|
+
from selenium.webdriver.support import expected_conditions as EC
|
9
|
+
from selenium.webdriver.support.ui import WebDriverWait
|
10
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
11
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
12
|
+
|
13
|
+
def get_street_from_postcode(postcode: str, api_key: str) -> str:
|
14
|
+
url = "https://maps.googleapis.com/maps/api/geocode/json"
|
15
|
+
params = {"address": postcode, "key": api_key}
|
16
|
+
response = requests.get(url, params=params)
|
17
|
+
data = response.json()
|
18
|
+
|
19
|
+
if data["status"] != "OK":
|
20
|
+
raise ValueError(f"API error: {data['status']}")
|
21
|
+
|
22
|
+
for component in data["results"][0]["address_components"]:
|
23
|
+
if "route" in component["types"]:
|
24
|
+
return component["long_name"]
|
25
|
+
|
26
|
+
raise ValueError("No street (route) found in the response.")
|
27
|
+
|
28
|
+
class CouncilClass(AbstractGetBinDataClass):
|
29
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
30
|
+
driver = None
|
31
|
+
bin_data = {"bins": []}
|
32
|
+
try:
|
33
|
+
user_postcode = kwargs.get("postcode")
|
34
|
+
if not user_postcode:
|
35
|
+
raise ValueError("No postcode provided.")
|
36
|
+
check_postcode(user_postcode)
|
37
|
+
|
38
|
+
headless = kwargs.get("headless")
|
39
|
+
web_driver = kwargs.get("web_driver")
|
40
|
+
driver = create_webdriver(web_driver, headless, None, __name__)
|
41
|
+
page = "https://www.slough.gov.uk/bin-collections"
|
42
|
+
driver.get(page)
|
43
|
+
|
44
|
+
# Accept cookies
|
45
|
+
WebDriverWait(driver, 10).until(
|
46
|
+
EC.element_to_be_clickable((By.ID, "ccc-recommended-settings"))
|
47
|
+
).click()
|
48
|
+
|
49
|
+
# Enter the street name into the address search
|
50
|
+
address_input = WebDriverWait(driver, 10).until(
|
51
|
+
EC.presence_of_element_located((By.ID, "keyword_directory25"))
|
52
|
+
)
|
53
|
+
user_address = get_street_from_postcode(user_postcode, "AIzaSyBDLULT7EIlNtHerswPtfmL15Tt3Oc0bV8")
|
54
|
+
address_input.send_keys(user_address + Keys.ENTER)
|
55
|
+
|
56
|
+
# Wait for address results to load
|
57
|
+
WebDriverWait(driver, 10).until(
|
58
|
+
EC.presence_of_all_elements_located((By.CSS_SELECTOR, "span.list__link-text"))
|
59
|
+
)
|
60
|
+
span_elements = driver.find_elements(By.CSS_SELECTOR, "span.list__link-text")
|
61
|
+
|
62
|
+
for span in span_elements:
|
63
|
+
if user_address.lower() in span.text.lower():
|
64
|
+
span.click()
|
65
|
+
break
|
66
|
+
else:
|
67
|
+
raise Exception(f"No link found containing address: {user_address}")
|
68
|
+
|
69
|
+
# Wait for address detail page
|
70
|
+
WebDriverWait(driver, 10).until(
|
71
|
+
EC.presence_of_element_located((By.CSS_SELECTOR, "section.site-content"))
|
72
|
+
)
|
73
|
+
soup = BeautifulSoup(driver.page_source, "html.parser")
|
74
|
+
|
75
|
+
# Extract each bin link and type
|
76
|
+
for heading in soup.select("dt.definition__heading"):
|
77
|
+
heading_text = heading.get_text(strip=True)
|
78
|
+
if "bin day details" in heading_text.lower():
|
79
|
+
bin_type = heading_text.split()[0].capitalize() + " bin"
|
80
|
+
dd = heading.find_next_sibling("dd")
|
81
|
+
link = dd.find("a", href=True)
|
82
|
+
|
83
|
+
if link:
|
84
|
+
bin_url = link["href"]
|
85
|
+
if not bin_url.startswith("http"):
|
86
|
+
bin_url = "https://www.slough.gov.uk" + bin_url
|
87
|
+
|
88
|
+
# Visit the child page
|
89
|
+
print(f"Navigating to {bin_url}")
|
90
|
+
driver.get(bin_url)
|
91
|
+
WebDriverWait(driver, 10).until(
|
92
|
+
EC.presence_of_element_located((By.CSS_SELECTOR, "div.page-content"))
|
93
|
+
)
|
94
|
+
child_soup = BeautifulSoup(driver.page_source, "html.parser")
|
95
|
+
|
96
|
+
editor_div = child_soup.find("div", class_="editor")
|
97
|
+
if not editor_div:
|
98
|
+
print("No editor div found on bin detail page.")
|
99
|
+
continue
|
100
|
+
|
101
|
+
ul = editor_div.find("ul")
|
102
|
+
if not ul:
|
103
|
+
print("No <ul> with dates found in editor div.")
|
104
|
+
continue
|
105
|
+
|
106
|
+
for li in ul.find_all("li"):
|
107
|
+
raw_text = li.get_text(strip=True).replace(".", "")
|
108
|
+
|
109
|
+
if "no collection" in raw_text.lower() or "no collections" in raw_text.lower():
|
110
|
+
print(f"Ignoring non-collection note: {raw_text}")
|
111
|
+
continue
|
112
|
+
|
113
|
+
raw_date = raw_text
|
114
|
+
|
115
|
+
try:
|
116
|
+
parsed_date = datetime.strptime(raw_date, "%d %B %Y")
|
117
|
+
except ValueError:
|
118
|
+
raw_date_cleaned = raw_date.split("(")[0].strip()
|
119
|
+
try:
|
120
|
+
parsed_date = datetime.strptime(raw_date_cleaned, "%d %B %Y")
|
121
|
+
except Exception:
|
122
|
+
print(f"Could not parse date: {raw_text}")
|
123
|
+
continue
|
124
|
+
|
125
|
+
formatted_date = parsed_date.strftime("%d/%m/%Y")
|
126
|
+
contains_date(formatted_date)
|
127
|
+
bin_data["bins"].append({
|
128
|
+
"type": bin_type,
|
129
|
+
"collectionDate": formatted_date
|
130
|
+
})
|
131
|
+
|
132
|
+
print(f"Type: {bin_type}, Date: {formatted_date}")
|
133
|
+
|
134
|
+
except Exception as e:
|
135
|
+
print(f"An error occurred: {e}")
|
136
|
+
raise
|
137
|
+
finally:
|
138
|
+
if driver:
|
139
|
+
driver.quit()
|
140
|
+
return bin_data
|