uk_bin_collection 0.150.1__py3-none-any.whl → 0.152.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/tests/input.json +46 -8
- uk_bin_collection/uk_bin_collection/councils/AngusCouncil.py +149 -0
- uk_bin_collection/uk_bin_collection/councils/ArgyllandButeCouncil.py +0 -2
- uk_bin_collection/uk_bin_collection/councils/BarkingDagenham.py +11 -2
- uk_bin_collection/uk_bin_collection/councils/BrentCouncil.py +47 -33
- uk_bin_collection/uk_bin_collection/councils/BrightonandHoveCityCouncil.py +11 -2
- uk_bin_collection/uk_bin_collection/councils/BroadlandDistrictCouncil.py +21 -6
- uk_bin_collection/uk_bin_collection/councils/ChichesterDistrictCouncil.py +105 -53
- uk_bin_collection/uk_bin_collection/councils/FermanaghOmaghDistrictCouncil.py +102 -0
- uk_bin_collection/uk_bin_collection/councils/MaidstoneBoroughCouncil.py +115 -0
- uk_bin_collection/uk_bin_collection/councils/NorthEastDerbyshireDistrictCouncil.py +61 -39
- uk_bin_collection/uk_bin_collection/councils/RugbyBoroughCouncil.py +128 -71
- uk_bin_collection/uk_bin_collection/councils/SloughBoroughCouncil.py +140 -0
- uk_bin_collection/uk_bin_collection/councils/TewkesburyBoroughCouncil.py +40 -0
- {uk_bin_collection-0.150.1.dist-info → uk_bin_collection-0.152.0.dist-info}/METADATA +1 -1
- {uk_bin_collection-0.150.1.dist-info → uk_bin_collection-0.152.0.dist-info}/RECORD +19 -14
- {uk_bin_collection-0.150.1.dist-info → uk_bin_collection-0.152.0.dist-info}/LICENSE +0 -0
- {uk_bin_collection-0.150.1.dist-info → uk_bin_collection-0.152.0.dist-info}/WHEEL +0 -0
- {uk_bin_collection-0.150.1.dist-info → uk_bin_collection-0.152.0.dist-info}/entry_points.txt +0 -0
@@ -1,110 +1,162 @@
|
|
1
1
|
import time
|
2
2
|
from datetime import datetime
|
3
3
|
|
4
|
-
from selenium.webdriver.support.ui import Select
|
5
4
|
from bs4 import BeautifulSoup
|
6
5
|
from selenium.webdriver.common.by import By
|
7
|
-
from selenium.webdriver.support import expected_conditions as EC
|
8
|
-
from selenium.webdriver.support.ui import Select
|
9
|
-
from selenium.webdriver.support.wait import WebDriverWait
|
10
6
|
from selenium.webdriver.common.keys import Keys
|
7
|
+
from selenium.webdriver.support.ui import WebDriverWait, Select
|
8
|
+
from selenium.webdriver.support import expected_conditions as EC
|
9
|
+
from selenium.common.exceptions import StaleElementReferenceException, TimeoutException
|
11
10
|
|
12
11
|
from uk_bin_collection.uk_bin_collection.common import *
|
13
12
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
14
13
|
|
14
|
+
date_format = "%d/%m/%Y"
|
15
15
|
|
16
|
-
# import the wonderful Beautiful Soup and the URL grabber
|
17
16
|
class CouncilClass(AbstractGetBinDataClass):
|
18
|
-
"""
|
19
|
-
Concrete classes have to implement all abstract operations of the
|
20
|
-
base class. They can also override some operations with a default
|
21
|
-
implementation.
|
22
|
-
"""
|
23
|
-
|
24
17
|
def parse_data(self, page: str, **kwargs) -> dict:
|
25
18
|
driver = None
|
26
19
|
try:
|
27
|
-
# Make a BS4 object
|
28
|
-
|
29
20
|
page = "https://www.chichester.gov.uk/checkyourbinday"
|
30
21
|
|
31
22
|
user_postcode = kwargs.get("postcode")
|
32
|
-
|
23
|
+
house_number = kwargs.get("paon")
|
33
24
|
web_driver = kwargs.get("web_driver")
|
34
25
|
headless = kwargs.get("headless")
|
35
|
-
house_number = kwargs.get("paon")
|
36
26
|
|
37
27
|
driver = create_webdriver(web_driver, headless, None, __name__)
|
38
28
|
driver.get(page)
|
39
29
|
|
40
30
|
wait = WebDriverWait(driver, 60)
|
41
31
|
|
42
|
-
|
32
|
+
input_postcode = wait.until(
|
43
33
|
EC.visibility_of_element_located(
|
44
34
|
(By.ID, "WASTECOLLECTIONCALENDARV5_CALENDAR_ADDRESSLOOKUPPOSTCODE")
|
45
35
|
)
|
46
36
|
)
|
37
|
+
input_postcode.send_keys(user_postcode)
|
47
38
|
|
48
|
-
|
49
|
-
|
50
|
-
inputElement_postcodesearch_btn = wait.until(
|
51
|
-
EC.visibility_of_element_located(
|
52
|
-
(By.ID, "WASTECOLLECTIONCALENDARV5_CALENDAR_ADDRESSLOOKUPSEARCH")
|
53
|
-
)
|
54
|
-
)
|
55
|
-
inputElement_postcodesearch_btn.send_keys(Keys.ENTER)
|
56
|
-
|
57
|
-
inputElement_select_address = wait.until(
|
39
|
+
search_button = wait.until(
|
58
40
|
EC.element_to_be_clickable(
|
59
|
-
(By.ID, "
|
41
|
+
(By.ID, "WASTECOLLECTIONCALENDARV5_CALENDAR_ADDRESSLOOKUPSEARCH")
|
60
42
|
)
|
61
43
|
)
|
62
|
-
|
63
|
-
By.ID, "WASTECOLLECTIONCALENDARV5_CALENDAR_ADDRESSLOOKUPADDRESS"
|
64
|
-
)
|
44
|
+
search_button.send_keys(Keys.ENTER)
|
65
45
|
|
66
|
-
|
67
|
-
dropdown = Select(dropdown_element)
|
46
|
+
self.smart_select_address(driver, house_number)
|
68
47
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
results = wait.until(
|
73
|
-
EC.element_to_be_clickable(
|
48
|
+
wait.until(
|
49
|
+
EC.presence_of_element_located(
|
74
50
|
(By.CLASS_NAME, "bin-collection-dates-container")
|
75
51
|
)
|
76
52
|
)
|
77
53
|
|
78
54
|
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
79
|
-
soup.
|
55
|
+
table = soup.find("table", class_="defaultgeneral bin-collection-dates")
|
56
|
+
rows = table.find_all("tr") if table else []
|
80
57
|
|
81
|
-
# Extract data from the table
|
82
58
|
bin_collection_data = []
|
83
|
-
rows = soup.find(
|
84
|
-
"table", class_="defaultgeneral bin-collection-dates"
|
85
|
-
).find_all("tr")
|
86
59
|
for row in rows:
|
87
60
|
cells = row.find_all("td")
|
88
61
|
if cells:
|
89
62
|
date_str = cells[0].text.strip()
|
90
63
|
bin_type = cells[1].text.strip()
|
91
|
-
# Convert date string to the required format DD/MM/YYYY
|
92
64
|
date_obj = datetime.strptime(date_str, "%d %B %Y")
|
93
|
-
|
94
|
-
bin_collection_data.append(
|
95
|
-
|
96
|
-
|
65
|
+
formatted_date = date_obj.strftime(date_format)
|
66
|
+
bin_collection_data.append({
|
67
|
+
"collectionDate": formatted_date,
|
68
|
+
"type": bin_type
|
69
|
+
})
|
97
70
|
|
98
|
-
|
99
|
-
|
71
|
+
print(bin_collection_data)
|
72
|
+
|
73
|
+
return {"bins": bin_collection_data}
|
100
74
|
|
101
75
|
except Exception as e:
|
102
|
-
# Here you can log the exception if needed
|
103
76
|
print(f"An error occurred: {e}")
|
104
|
-
# Optionally, re-raise the exception if you want it to propagate
|
105
77
|
raise
|
106
78
|
finally:
|
107
|
-
# This block ensures that the driver is closed regardless of an exception
|
108
79
|
if driver:
|
109
80
|
driver.quit()
|
110
|
-
|
81
|
+
|
82
|
+
def smart_select_address(self, driver, house_number: str):
|
83
|
+
dropdown_id = "WASTECOLLECTIONCALENDARV5_CALENDAR_ADDRESSLOOKUPADDRESS"
|
84
|
+
|
85
|
+
print("Waiting for address dropdown...")
|
86
|
+
|
87
|
+
def dropdown_has_addresses(d):
|
88
|
+
try:
|
89
|
+
dropdown_el = d.find_element(By.ID, dropdown_id)
|
90
|
+
select = Select(dropdown_el)
|
91
|
+
return len(select.options) > 1
|
92
|
+
except StaleElementReferenceException:
|
93
|
+
return False
|
94
|
+
|
95
|
+
WebDriverWait(driver, 30).until(dropdown_has_addresses)
|
96
|
+
|
97
|
+
dropdown_el = driver.find_element(By.ID, dropdown_id)
|
98
|
+
dropdown = Select(dropdown_el)
|
99
|
+
|
100
|
+
print("Address dropdown options:")
|
101
|
+
for opt in dropdown.options:
|
102
|
+
print(f"- {opt.text.strip()}")
|
103
|
+
|
104
|
+
user_input_clean = house_number.lower().strip()
|
105
|
+
found = False
|
106
|
+
|
107
|
+
for option in dropdown.options:
|
108
|
+
option_text_clean = option.text.lower().strip()
|
109
|
+
print(f"Comparing: {repr(option_text_clean)} == {repr(user_input_clean)}")
|
110
|
+
|
111
|
+
if (
|
112
|
+
option_text_clean == user_input_clean
|
113
|
+
or option_text_clean.startswith(f"{user_input_clean},")
|
114
|
+
):
|
115
|
+
try:
|
116
|
+
option.click()
|
117
|
+
found = True
|
118
|
+
print(f"Strict match clicked: {option.text.strip()}")
|
119
|
+
break
|
120
|
+
except StaleElementReferenceException:
|
121
|
+
print("Stale during click, retrying...")
|
122
|
+
dropdown_el = driver.find_element(By.ID, dropdown_id)
|
123
|
+
dropdown = Select(dropdown_el)
|
124
|
+
for fresh_option in dropdown.options:
|
125
|
+
if fresh_option.text.lower().strip() == option_text_clean:
|
126
|
+
fresh_option.click()
|
127
|
+
found = True
|
128
|
+
print(f"Strict match clicked after refresh: {fresh_option.text.strip()}")
|
129
|
+
break
|
130
|
+
|
131
|
+
if found:
|
132
|
+
break
|
133
|
+
|
134
|
+
if not found:
|
135
|
+
print("No strict match found, trying fuzzy match...")
|
136
|
+
for option in dropdown.options:
|
137
|
+
option_text_clean = option.text.lower().strip()
|
138
|
+
if user_input_clean in option_text_clean:
|
139
|
+
try:
|
140
|
+
option.click()
|
141
|
+
found = True
|
142
|
+
print(f"Fuzzy match clicked: {option.text.strip()}")
|
143
|
+
break
|
144
|
+
except StaleElementReferenceException:
|
145
|
+
print("Stale during fuzzy click, retrying...")
|
146
|
+
dropdown_el = driver.find_element(By.ID, dropdown_id)
|
147
|
+
dropdown = Select(dropdown_el)
|
148
|
+
for fresh_option in dropdown.options:
|
149
|
+
if fresh_option.text.lower().strip() == option_text_clean:
|
150
|
+
fresh_option.click()
|
151
|
+
found = True
|
152
|
+
print(f"Fuzzy match clicked after refresh: {fresh_option.text.strip()}")
|
153
|
+
break
|
154
|
+
|
155
|
+
if found:
|
156
|
+
break
|
157
|
+
|
158
|
+
if not found:
|
159
|
+
all_opts = [opt.text.strip() for opt in dropdown.options]
|
160
|
+
raise Exception(
|
161
|
+
f"Could not find address '{house_number}' in options: {all_opts}"
|
162
|
+
)
|
@@ -0,0 +1,102 @@
|
|
1
|
+
import difflib
|
2
|
+
from datetime import date, datetime
|
3
|
+
|
4
|
+
import requests
|
5
|
+
from bs4 import BeautifulSoup
|
6
|
+
|
7
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
8
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
9
|
+
|
10
|
+
|
11
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
12
|
+
class CouncilClass(AbstractGetBinDataClass):
|
13
|
+
"""
|
14
|
+
Concrete classes have to implement all abstract operations of the
|
15
|
+
base class. They can also override some operations with a default
|
16
|
+
implementation.
|
17
|
+
"""
|
18
|
+
|
19
|
+
base_url = "https://fermanaghomagh.isl-fusion.com/"
|
20
|
+
|
21
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
22
|
+
"""
|
23
|
+
This function will make a request to the search endpoint with the postcode, extract the
|
24
|
+
house numbers from the responses, then retrieve the ID of the entry with the house number that matches,
|
25
|
+
to then retrieve the bin schedule.
|
26
|
+
|
27
|
+
The API here is a weird combination of HTML in json responses.
|
28
|
+
"""
|
29
|
+
postcode = kwargs.get("postcode")
|
30
|
+
paon = kwargs.get("paon")
|
31
|
+
|
32
|
+
if not postcode:
|
33
|
+
raise ValueError("Must provide a postcode")
|
34
|
+
|
35
|
+
if not paon:
|
36
|
+
raise ValueError("Must provide a house number")
|
37
|
+
|
38
|
+
search_url = f"{self.base_url}/address/{postcode}"
|
39
|
+
|
40
|
+
requests.packages.urllib3.disable_warnings()
|
41
|
+
s = requests.Session()
|
42
|
+
response = s.get(search_url)
|
43
|
+
response.raise_for_status()
|
44
|
+
|
45
|
+
address_data = response.json()
|
46
|
+
|
47
|
+
address_list = address_data["html"]
|
48
|
+
|
49
|
+
soup = BeautifulSoup(address_list, features="html.parser")
|
50
|
+
|
51
|
+
address_by_id = {}
|
52
|
+
|
53
|
+
for li in soup.find_all("li"):
|
54
|
+
link = li.find_all("a")[0]
|
55
|
+
address_id = link.attrs["href"]
|
56
|
+
address = link.text
|
57
|
+
|
58
|
+
address_by_id[address_id] = address
|
59
|
+
|
60
|
+
addresses = list(address_by_id.values())
|
61
|
+
|
62
|
+
common = difflib.SequenceMatcher(
|
63
|
+
a=addresses[0], b=addresses[1]
|
64
|
+
).find_longest_match()
|
65
|
+
extra_bit = addresses[0][common.a : common.a + common.size]
|
66
|
+
|
67
|
+
ids_by_paon = {
|
68
|
+
a.replace(extra_bit, ""): a_id.replace("/view/", "").replace("/", "")
|
69
|
+
for a_id, a in address_by_id.items()
|
70
|
+
}
|
71
|
+
|
72
|
+
property_id = ids_by_paon.get(paon)
|
73
|
+
if not property_id:
|
74
|
+
raise ValueError(
|
75
|
+
f"Invalid house number, valid values are {', '.join(ids_by_paon.keys())}"
|
76
|
+
)
|
77
|
+
|
78
|
+
today = date.today()
|
79
|
+
calendar_url = (
|
80
|
+
f"{self.base_url}/calendar/{property_id}/{today.strftime('%Y-%m-%d')}"
|
81
|
+
)
|
82
|
+
response = s.get(calendar_url)
|
83
|
+
response.raise_for_status()
|
84
|
+
calendar_data = response.json()
|
85
|
+
next_collections = calendar_data["nextCollections"]
|
86
|
+
|
87
|
+
collections = list(next_collections["collections"].values())
|
88
|
+
|
89
|
+
data = {"bins": []}
|
90
|
+
|
91
|
+
for collection in collections:
|
92
|
+
collection_date = datetime.strptime(collection["date"], "%Y-%m-%d")
|
93
|
+
bins = [c["name"] for c in collection["collections"].values()]
|
94
|
+
|
95
|
+
for bin in bins:
|
96
|
+
data["bins"].append(
|
97
|
+
{
|
98
|
+
"type": bin,
|
99
|
+
"collectionDate": collection_date.strftime(date_format),
|
100
|
+
}
|
101
|
+
)
|
102
|
+
return data
|
@@ -0,0 +1,115 @@
|
|
1
|
+
import time
|
2
|
+
from datetime import datetime
|
3
|
+
|
4
|
+
from bs4 import BeautifulSoup
|
5
|
+
from selenium.webdriver.common.by import By
|
6
|
+
from selenium.webdriver.support import expected_conditions as EC
|
7
|
+
from selenium.webdriver.support.ui import Select
|
8
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
9
|
+
|
10
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
11
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
12
|
+
|
13
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
14
|
+
|
15
|
+
|
16
|
+
class CouncilClass(AbstractGetBinDataClass):
|
17
|
+
"""
|
18
|
+
Concrete classes have to implement all abstract operations of the
|
19
|
+
base class. They can also override some operations with a default
|
20
|
+
implementation.
|
21
|
+
"""
|
22
|
+
|
23
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
24
|
+
driver = None
|
25
|
+
try:
|
26
|
+
page = "https://my.maidstone.gov.uk/service/Find-your-bin-day"
|
27
|
+
bin_data = {"bins": []}
|
28
|
+
user_paon = kwargs.get("paon")
|
29
|
+
user_postcode = kwargs.get("postcode")
|
30
|
+
web_driver = kwargs.get("web_driver")
|
31
|
+
headless = kwargs.get("headless")
|
32
|
+
check_postcode(user_postcode)
|
33
|
+
|
34
|
+
# Create Selenium webdriver
|
35
|
+
driver = create_webdriver(web_driver, headless, None, __name__)
|
36
|
+
driver.get(page)
|
37
|
+
|
38
|
+
iframe_presense = WebDriverWait(driver, 30).until(
|
39
|
+
EC.presence_of_element_located((By.ID, "fillform-frame-1"))
|
40
|
+
)
|
41
|
+
driver.switch_to.frame(iframe_presense)
|
42
|
+
|
43
|
+
wait = WebDriverWait(driver, 60)
|
44
|
+
|
45
|
+
# Postal code input
|
46
|
+
inputElement_postcodesearch = wait.until(
|
47
|
+
EC.element_to_be_clickable((By.NAME, "postcode"))
|
48
|
+
)
|
49
|
+
inputElement_postcodesearch.send_keys(user_postcode)
|
50
|
+
|
51
|
+
# Wait for the 'Select address' dropdown to be updated
|
52
|
+
dropdown_select = wait.until(
|
53
|
+
EC.presence_of_element_located((By.XPATH, "//span[contains(text(), 'Select...')]"))
|
54
|
+
)
|
55
|
+
dropdown_select.click()
|
56
|
+
|
57
|
+
dropdown = wait.until(
|
58
|
+
EC.element_to_be_clickable((By.XPATH, f"//div[contains(text(), ' {user_paon}')]"))
|
59
|
+
)
|
60
|
+
dropdown.click()
|
61
|
+
|
62
|
+
# Wait for 'Searching for...' to be added to page
|
63
|
+
WebDriverWait(driver, timeout=15).until(
|
64
|
+
EC.text_to_be_present_in_element(
|
65
|
+
(By.CSS_SELECTOR, "span[data-name=html1]"), "Searching"
|
66
|
+
)
|
67
|
+
)
|
68
|
+
|
69
|
+
# Wait for 'Searching for...' to be removed from page
|
70
|
+
WebDriverWait(driver, timeout=15).until(
|
71
|
+
EC.none_of(
|
72
|
+
EC.text_to_be_present_in_element(
|
73
|
+
(By.CSS_SELECTOR, "span[data-name=html1]"), "Searching"
|
74
|
+
)
|
75
|
+
)
|
76
|
+
)
|
77
|
+
|
78
|
+
# Even then it can still be adding data to the page...
|
79
|
+
time.sleep(5)
|
80
|
+
|
81
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
82
|
+
|
83
|
+
# This is ugly but there is literally no consistency to the HTML
|
84
|
+
def is_a_collection_date(t):
|
85
|
+
return any("Next collection" in c for c in t.children)
|
86
|
+
|
87
|
+
for next_collection in soup.find_all(is_a_collection_date):
|
88
|
+
bin_info = list(
|
89
|
+
next_collection.parent.select_one("div:nth-child(1)").children
|
90
|
+
)
|
91
|
+
if not bin_info:
|
92
|
+
continue
|
93
|
+
bin = bin_info[0].get_text()
|
94
|
+
date = next_collection.select_one("strong").get_text(strip=True)
|
95
|
+
bin_date = datetime.strptime(date, "%d %b %Y")
|
96
|
+
dict_data = {
|
97
|
+
"type": bin,
|
98
|
+
"collectionDate": bin_date.strftime(date_format),
|
99
|
+
}
|
100
|
+
bin_data["bins"].append(dict_data)
|
101
|
+
|
102
|
+
bin_data["bins"].sort(
|
103
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
|
104
|
+
)
|
105
|
+
|
106
|
+
except Exception as e:
|
107
|
+
# Here you can log the exception if needed
|
108
|
+
print(f"An error occurred: {e}")
|
109
|
+
# Optionally, re-raise the exception if you want it to propagate
|
110
|
+
raise
|
111
|
+
finally:
|
112
|
+
# This block ensures that the driver is closed regardless of an exception
|
113
|
+
if driver:
|
114
|
+
driver.quit()
|
115
|
+
return bin_data
|
@@ -1,4 +1,5 @@
|
|
1
1
|
from datetime import datetime
|
2
|
+
from time import sleep
|
2
3
|
|
3
4
|
from bs4 import BeautifulSoup
|
4
5
|
from selenium.webdriver.common.by import By
|
@@ -9,8 +10,6 @@ from selenium.webdriver.support.wait import WebDriverWait
|
|
9
10
|
from uk_bin_collection.uk_bin_collection.common import *
|
10
11
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
11
12
|
|
12
|
-
# import the wonderful Beautiful Soup and the URL grabber
|
13
|
-
|
14
13
|
|
15
14
|
class CouncilClass(AbstractGetBinDataClass):
|
16
15
|
"""
|
@@ -34,82 +33,105 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
34
33
|
headless = kwargs.get("headless")
|
35
34
|
check_uprn(user_uprn)
|
36
35
|
check_postcode(user_postcode)
|
37
|
-
|
36
|
+
|
38
37
|
driver = create_webdriver(web_driver, headless, None, __name__)
|
39
38
|
driver.get(page)
|
40
39
|
|
41
|
-
# If you bang in the house number (or property name) and postcode in the box it should find your property
|
42
|
-
|
43
40
|
iframe_presense = WebDriverWait(driver, 30).until(
|
44
41
|
EC.presence_of_element_located((By.ID, "fillform-frame-1"))
|
45
42
|
)
|
46
43
|
|
47
44
|
driver.switch_to.frame(iframe_presense)
|
48
45
|
wait = WebDriverWait(driver, 60)
|
46
|
+
|
49
47
|
inputElement_postcodesearch = wait.until(
|
50
48
|
EC.element_to_be_clickable((By.NAME, "postcode_search"))
|
51
49
|
)
|
52
|
-
|
53
50
|
inputElement_postcodesearch.send_keys(str(user_postcode))
|
54
51
|
|
55
|
-
# Wait for the 'Select your property' dropdown to appear and select the first result
|
56
52
|
dropdown = wait.until(EC.element_to_be_clickable((By.NAME, "selAddress")))
|
57
|
-
|
58
53
|
dropdown_options = wait.until(
|
59
54
|
EC.presence_of_element_located((By.CLASS_NAME, "lookup-option"))
|
60
55
|
)
|
61
56
|
|
62
|
-
# Create a 'Select' for it, then select the first address in the list
|
63
|
-
# (Index 0 is "Make a selection from the list")
|
64
57
|
drop_down_values = Select(dropdown)
|
65
58
|
option_element = wait.until(
|
66
59
|
EC.presence_of_element_located(
|
67
60
|
(By.CSS_SELECTOR, f'option.lookup-option[value="{str(user_uprn)}"]')
|
68
61
|
)
|
69
62
|
)
|
70
|
-
|
71
63
|
drop_down_values.select_by_value(str(user_uprn))
|
72
64
|
|
73
|
-
# Wait for the 'View more' link to appear, then click it to get the full set of dates
|
74
65
|
h3_element = wait.until(
|
75
66
|
EC.presence_of_element_located(
|
76
67
|
(By.XPATH, "//th[contains(text(), 'Waste Collection')]")
|
77
68
|
)
|
78
69
|
)
|
79
70
|
|
71
|
+
sleep(10)
|
72
|
+
|
80
73
|
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
74
|
+
print("Parsing HTML content...")
|
75
|
+
|
76
|
+
collection_rows = soup.find_all("tr")
|
77
|
+
|
78
|
+
for row in collection_rows:
|
79
|
+
cells = row.find_all("td")
|
80
|
+
if len(cells) == 3: # Date, Image, Bin Type
|
81
|
+
# Extract date carefully
|
82
|
+
date_labels = cells[0].find_all("label")
|
83
|
+
collection_date = None
|
84
|
+
for label in date_labels:
|
85
|
+
label_text = label.get_text().strip()
|
86
|
+
if contains_date(label_text):
|
87
|
+
collection_date = label_text
|
88
|
+
break
|
89
|
+
|
90
|
+
# Extract bin type
|
91
|
+
bin_label = cells[2].find("label")
|
92
|
+
bin_types = bin_label.get_text().strip() if bin_label else None
|
93
|
+
|
94
|
+
if collection_date and bin_types:
|
95
|
+
print(f"Found collection: {collection_date} - {bin_types}")
|
96
|
+
|
97
|
+
# Handle combined collections
|
98
|
+
if "&" in bin_types:
|
99
|
+
if "Burgundy" in bin_types:
|
100
|
+
data["bins"].append(
|
101
|
+
{
|
102
|
+
"type": "Burgundy Bin",
|
103
|
+
"collectionDate": datetime.strptime(
|
104
|
+
collection_date, "%d/%m/%Y"
|
105
|
+
).strftime(date_format),
|
106
|
+
}
|
107
|
+
)
|
108
|
+
if "Green" in bin_types:
|
109
|
+
data["bins"].append(
|
110
|
+
{
|
111
|
+
"type": "Green Bin",
|
112
|
+
"collectionDate": datetime.strptime(
|
113
|
+
collection_date, "%d/%m/%Y"
|
114
|
+
).strftime(date_format),
|
115
|
+
}
|
116
|
+
)
|
117
|
+
else:
|
118
|
+
if "Black" in bin_types:
|
119
|
+
data["bins"].append(
|
120
|
+
{
|
121
|
+
"type": "Black Bin",
|
122
|
+
"collectionDate": datetime.strptime(
|
123
|
+
collection_date, "%d/%m/%Y"
|
124
|
+
).strftime(date_format),
|
125
|
+
}
|
126
|
+
)
|
127
|
+
|
128
|
+
print(f"Found {len(data['bins'])} collections")
|
129
|
+
print(f"Final data: {data}")
|
81
130
|
|
82
|
-
target_h3 = soup.find("h3", string="Collection Details")
|
83
|
-
tables_after_h3 = target_h3.parent.parent.find_next("table")
|
84
|
-
|
85
|
-
table_rows = tables_after_h3.find_all("tr")
|
86
|
-
for row in table_rows:
|
87
|
-
rowdata = row.find_all("td")
|
88
|
-
if len(rowdata) == 3:
|
89
|
-
labels = rowdata[0].find_all("label")
|
90
|
-
# Strip the day (i.e., Monday) out of the collection date string for parsing
|
91
|
-
if len(labels) >= 2:
|
92
|
-
date_label = labels[1]
|
93
|
-
datestring = date_label.text.strip()
|
94
|
-
|
95
|
-
# Add the bin type and collection date to the 'data' dictionary
|
96
|
-
data["bins"].append(
|
97
|
-
{
|
98
|
-
"type": rowdata[2].text.strip(),
|
99
|
-
"collectionDate": datetime.strptime(
|
100
|
-
datestring, "%d/%m/%Y"
|
101
|
-
).strftime(
|
102
|
-
date_format
|
103
|
-
), # Format the date as needed
|
104
|
-
}
|
105
|
-
)
|
106
131
|
except Exception as e:
|
107
|
-
# Here you can log the exception if needed
|
108
132
|
print(f"An error occurred: {e}")
|
109
|
-
# Optionally, re-raise the exception if you want it to propagate
|
110
133
|
raise
|
111
134
|
finally:
|
112
|
-
# This block ensures that the driver is closed regardless of an exception
|
113
135
|
if driver:
|
114
136
|
driver.quit()
|
115
137
|
return data
|