uk_bin_collection 0.78.0__py3-none-any.whl → 0.79.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/tests/council_feature_input_parity.py +38 -57
- uk_bin_collection/tests/features/validate_council_outputs.feature +5 -775
- uk_bin_collection/tests/input.json +65 -2
- uk_bin_collection/tests/step_defs/test_validate_council.py +44 -16
- uk_bin_collection/tests/test_common_functions.py +4 -2
- uk_bin_collection/uk_bin_collection/common.py +4 -1
- uk_bin_collection/uk_bin_collection/councils/ChichesterDistrictCouncil.py +110 -0
- uk_bin_collection/uk_bin_collection/councils/CrawleyBoroughCouncil.py +1 -1
- uk_bin_collection/uk_bin_collection/councils/DartfordBoroughCouncil.py +44 -0
- uk_bin_collection/uk_bin_collection/councils/DoverDistrictCouncil.py +11 -6
- uk_bin_collection/uk_bin_collection/councils/EppingForestDistrictCouncil.py +51 -0
- uk_bin_collection/uk_bin_collection/councils/FarehamBoroughCouncil.py +20 -21
- uk_bin_collection/uk_bin_collection/councils/HounslowCouncil.py +122 -0
- uk_bin_collection/uk_bin_collection/councils/KirkleesCouncil.py +3 -1
- uk_bin_collection/uk_bin_collection/councils/MoleValleyDistrictCouncil.py +13 -10
- uk_bin_collection/uk_bin_collection/councils/SouthKestevenDistrictCouncil.py +151 -0
- uk_bin_collection/uk_bin_collection/councils/StroudDistrictCouncil.py +94 -0
- uk_bin_collection/uk_bin_collection/councils/TendringDistrictCouncil.py +110 -0
- uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py +3 -1
- uk_bin_collection/uk_bin_collection/councils/WalthamForest.py +127 -0
- uk_bin_collection/uk_bin_collection/create_new_council.py +51 -0
- {uk_bin_collection-0.78.0.dist-info → uk_bin_collection-0.79.1.dist-info}/METADATA +1 -1
- {uk_bin_collection-0.78.0.dist-info → uk_bin_collection-0.79.1.dist-info}/RECORD +26 -17
- {uk_bin_collection-0.78.0.dist-info → uk_bin_collection-0.79.1.dist-info}/LICENSE +0 -0
- {uk_bin_collection-0.78.0.dist-info → uk_bin_collection-0.79.1.dist-info}/WHEEL +0 -0
- {uk_bin_collection-0.78.0.dist-info → uk_bin_collection-0.79.1.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,122 @@
|
|
1
|
+
import time
|
2
|
+
from datetime import datetime
|
3
|
+
|
4
|
+
from bs4 import BeautifulSoup
|
5
|
+
from selenium.webdriver.common.by import By
|
6
|
+
from selenium.webdriver.support import expected_conditions as EC
|
7
|
+
from selenium.webdriver.support.ui import Select
|
8
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
9
|
+
from selenium.webdriver.common.keys import Keys
|
10
|
+
|
11
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
12
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
13
|
+
|
14
|
+
|
15
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
16
|
+
class CouncilClass(AbstractGetBinDataClass):
|
17
|
+
"""
|
18
|
+
Concrete classes have to implement all abstract operations of the
|
19
|
+
base class. They can also override some operations with a default
|
20
|
+
implementation.
|
21
|
+
"""
|
22
|
+
|
23
|
+
def parse_date(self, date_str):
|
24
|
+
date_formats = [
|
25
|
+
"This %A - %d %b %Y", # Changed %B to %b to accommodate abbreviated month names
|
26
|
+
"Next %A - %d %b %Y", # Same change here
|
27
|
+
"%A %d %b %Y", # And here
|
28
|
+
]
|
29
|
+
for format in date_formats:
|
30
|
+
try:
|
31
|
+
return datetime.strptime(date_str, format).strftime("%d/%m/%Y")
|
32
|
+
except ValueError:
|
33
|
+
continue
|
34
|
+
raise ValueError(f"Date format not recognized: {date_str}")
|
35
|
+
|
36
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
37
|
+
driver = None
|
38
|
+
try:
|
39
|
+
# Make a BS4 object
|
40
|
+
|
41
|
+
page = "https://www.hounslow.gov.uk/info/20272/recycling_and_waste_collection_day_finder"
|
42
|
+
|
43
|
+
user_postcode = kwargs.get("postcode")
|
44
|
+
user_uprn = kwargs.get("uprn")
|
45
|
+
user_paon = kwargs.get("paon")
|
46
|
+
web_driver = kwargs.get("web_driver")
|
47
|
+
headless = kwargs.get("headless")
|
48
|
+
|
49
|
+
driver = create_webdriver(web_driver, headless, None, __name__)
|
50
|
+
driver.get(page)
|
51
|
+
|
52
|
+
wait = WebDriverWait(driver, 60)
|
53
|
+
|
54
|
+
inputElement_postcodesearch = wait.until(
|
55
|
+
EC.element_to_be_clickable((By.ID, "Postcode"))
|
56
|
+
)
|
57
|
+
|
58
|
+
inputElement_postcodesearch.send_keys(user_postcode)
|
59
|
+
|
60
|
+
inputElement_postcodesearch_btn = wait.until(
|
61
|
+
EC.element_to_be_clickable((By.ID, "findAddress"))
|
62
|
+
)
|
63
|
+
inputElement_postcodesearch_btn.click()
|
64
|
+
|
65
|
+
inputElement_select_address = wait.until(
|
66
|
+
EC.element_to_be_clickable((By.ID, "UPRN"))
|
67
|
+
)
|
68
|
+
|
69
|
+
select_element = wait.until(
|
70
|
+
EC.visibility_of_element_located((By.ID, "UPRN"))
|
71
|
+
) # Adjust this ID to your element's ID
|
72
|
+
|
73
|
+
# Create a Select object
|
74
|
+
select = Select(select_element)
|
75
|
+
|
76
|
+
# Fetch all options
|
77
|
+
options = select.options
|
78
|
+
|
79
|
+
# Loop through options to find the one that starts with the UPRN
|
80
|
+
for option in options:
|
81
|
+
if option.get_attribute("value").startswith(f"{user_uprn}|"):
|
82
|
+
option.click() # Select the matching option
|
83
|
+
break
|
84
|
+
|
85
|
+
results = wait.until(
|
86
|
+
EC.element_to_be_clickable((By.CLASS_NAME, "bin_day_main_wrapper"))
|
87
|
+
)
|
88
|
+
|
89
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
90
|
+
soup.prettify()
|
91
|
+
|
92
|
+
# Find all headers which include collection dates
|
93
|
+
collection_headers = soup.find_all("h4")
|
94
|
+
bins_data = []
|
95
|
+
|
96
|
+
# Process each collection date and corresponding bins
|
97
|
+
for header in collection_headers:
|
98
|
+
date_text = header.get_text(strip=True)
|
99
|
+
collection_date = self.parse_date(date_text)
|
100
|
+
|
101
|
+
# Get next sibling which should be the list of bins
|
102
|
+
bin_list = header.find_next_sibling("ul")
|
103
|
+
if bin_list:
|
104
|
+
for item in bin_list.find_all("li", class_="list-group-item"):
|
105
|
+
bin_type = item.get_text(strip=True)
|
106
|
+
bins_data.append(
|
107
|
+
{"type": bin_type, "collectionDate": collection_date}
|
108
|
+
)
|
109
|
+
|
110
|
+
# Construct the final JSON object
|
111
|
+
json_data = {"bins": bins_data}
|
112
|
+
|
113
|
+
except Exception as e:
|
114
|
+
# Here you can log the exception if needed
|
115
|
+
print(f"An error occurred: {e}")
|
116
|
+
# Optionally, re-raise the exception if you want it to propagate
|
117
|
+
raise
|
118
|
+
finally:
|
119
|
+
# This block ensures that the driver is closed regardless of an exception
|
120
|
+
if driver:
|
121
|
+
driver.quit()
|
122
|
+
return json_data
|
@@ -61,7 +61,9 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
61
61
|
user_postcode = kwargs["postcode"]
|
62
62
|
|
63
63
|
self._driver = driver = create_webdriver(
|
64
|
-
web_driver=kwargs["web_driver"],
|
64
|
+
web_driver=kwargs["web_driver"],
|
65
|
+
headless=kwargs.get("headless", True),
|
66
|
+
session_name=__name__,
|
65
67
|
)
|
66
68
|
driver.implicitly_wait(1)
|
67
69
|
|
@@ -1,7 +1,9 @@
|
|
1
|
-
from bs4 import BeautifulSoup
|
2
|
-
from datetime import datetime
|
3
1
|
import re
|
2
|
+
from datetime import datetime
|
3
|
+
|
4
4
|
import requests
|
5
|
+
from bs4 import BeautifulSoup
|
6
|
+
|
5
7
|
from uk_bin_collection.uk_bin_collection.common import *
|
6
8
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
7
9
|
|
@@ -19,35 +21,36 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
19
21
|
user_postcode = kwargs.get("postcode")
|
20
22
|
check_postcode(user_postcode)
|
21
23
|
|
22
|
-
root_url = "https://molevalley.
|
24
|
+
root_url = "https://myproperty.molevalley.gov.uk/molevalley/api/live_addresses/{}?format=json".format(
|
23
25
|
user_postcode
|
24
26
|
)
|
25
|
-
|
27
|
+
requests.packages.urllib3.disable_warnings()
|
28
|
+
response = requests.get(root_url, verify=False)
|
26
29
|
|
27
30
|
if not response.ok:
|
28
31
|
raise ValueError("Invalid server response code retreiving data.")
|
29
32
|
|
30
33
|
jsonData = response.json()
|
31
34
|
|
32
|
-
if len(jsonData["results"]) == 0:
|
35
|
+
if len(jsonData["results"]["features"]) == 0:
|
33
36
|
raise ValueError("No collection data found for postcode provided.")
|
34
37
|
|
35
|
-
properties_found = jsonData["results"][
|
38
|
+
properties_found = jsonData["results"]["features"]
|
36
39
|
|
37
40
|
# If UPRN is provided, we can check a specific address.
|
38
41
|
html_data = None
|
39
42
|
uprn = kwargs.get("uprn")
|
40
43
|
if uprn:
|
41
44
|
check_uprn(uprn)
|
42
|
-
for
|
43
|
-
if uprn == str(int(item["
|
44
|
-
html_data =
|
45
|
+
for item in properties_found:
|
46
|
+
if uprn == str(int(item["properties"]["blpu_uprn"])):
|
47
|
+
html_data = item["properties"]["three_column_layout_html"]
|
45
48
|
break
|
46
49
|
if html_data is None:
|
47
50
|
raise ValueError("No collection data found for UPRN provided.")
|
48
51
|
else:
|
49
52
|
# If UPRN not provided, just use the first result
|
50
|
-
html_data = properties_found[0]["
|
53
|
+
html_data = properties_found[0]["properties"]["three_column_layout_html"]
|
51
54
|
|
52
55
|
soup = BeautifulSoup(html_data, features="html.parser")
|
53
56
|
soup.prettify()
|
@@ -0,0 +1,151 @@
|
|
1
|
+
import time
|
2
|
+
from datetime import datetime
|
3
|
+
|
4
|
+
from selenium.webdriver.support.ui import Select
|
5
|
+
from bs4 import BeautifulSoup
|
6
|
+
from selenium.webdriver.common.by import By
|
7
|
+
from selenium.webdriver.support import expected_conditions as EC
|
8
|
+
from selenium.webdriver.support.ui import Select
|
9
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
10
|
+
from selenium.webdriver.common.keys import Keys
|
11
|
+
|
12
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
13
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
14
|
+
|
15
|
+
|
16
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
17
|
+
class CouncilClass(AbstractGetBinDataClass):
|
18
|
+
"""
|
19
|
+
Concrete classes have to implement all abstract operations of the
|
20
|
+
base class. They can also override some operations with a default
|
21
|
+
implementation.
|
22
|
+
"""
|
23
|
+
|
24
|
+
# Extract data from the table
|
25
|
+
def format_date(self, date_str):
|
26
|
+
# Convert date format from "Fri 31 May 2024" to "31/05/2024"
|
27
|
+
date_match = re.search(r"\d{1,2} \w+ \d{4}", date_str)
|
28
|
+
if date_match:
|
29
|
+
date_obj = re.search(r"(\d{1,2}) (\w+) (\d{4})", date_match.group(0))
|
30
|
+
day = date_obj.group(1).zfill(2)
|
31
|
+
month_name = date_obj.group(2)
|
32
|
+
month = {
|
33
|
+
"January": "01",
|
34
|
+
"February": "02",
|
35
|
+
"March": "03",
|
36
|
+
"April": "04",
|
37
|
+
"May": "05",
|
38
|
+
"June": "06",
|
39
|
+
"July": "07",
|
40
|
+
"August": "08",
|
41
|
+
"September": "09",
|
42
|
+
"October": "10",
|
43
|
+
"November": "11",
|
44
|
+
"December": "12",
|
45
|
+
}[month_name]
|
46
|
+
year = date_obj.group(3)
|
47
|
+
formatted_date = f"{day}/{month}/{year}"
|
48
|
+
else:
|
49
|
+
formatted_date = "Unknown Date"
|
50
|
+
return formatted_date
|
51
|
+
|
52
|
+
def extract_bin_data(self, article):
|
53
|
+
date = article.find("div", class_="binday__cell--day").text.strip()
|
54
|
+
bin_type_class = article.get("class")[
|
55
|
+
1
|
56
|
+
] # Assuming the second class indicates the bin type
|
57
|
+
bin_type = "black" if "black" in bin_type_class else "silver"
|
58
|
+
formatted_date = self.format_date(date)
|
59
|
+
return {"type": bin_type, "collectionDate": formatted_date}
|
60
|
+
|
61
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
62
|
+
driver = None
|
63
|
+
try:
|
64
|
+
# Make a BS4 object
|
65
|
+
|
66
|
+
page = "https://pre.southkesteven.gov.uk/BinSearch.aspx"
|
67
|
+
|
68
|
+
user_postcode = kwargs.get("postcode")
|
69
|
+
user_uprn = kwargs.get("uprn")
|
70
|
+
web_driver = kwargs.get("web_driver")
|
71
|
+
headless = kwargs.get("headless")
|
72
|
+
house_number = kwargs.get("paon")
|
73
|
+
|
74
|
+
driver = create_webdriver(web_driver, headless, None, __name__)
|
75
|
+
driver.get(page)
|
76
|
+
|
77
|
+
wait = WebDriverWait(driver, 60)
|
78
|
+
|
79
|
+
inputElement_postcodesearch = wait.until(
|
80
|
+
EC.visibility_of_element_located((By.ID, "title"))
|
81
|
+
)
|
82
|
+
inputElement_postcodesearch.clear()
|
83
|
+
|
84
|
+
inputElement_postcodesearch.send_keys(user_postcode)
|
85
|
+
|
86
|
+
inputElement_postcodesearch_btn = wait.until(
|
87
|
+
EC.element_to_be_clickable((By.XPATH, "//button/span[text()='Search']"))
|
88
|
+
)
|
89
|
+
inputElement_postcodesearch_btn.click()
|
90
|
+
|
91
|
+
inputElement_select_address = wait.until(
|
92
|
+
EC.element_to_be_clickable((By.ID, "address"))
|
93
|
+
)
|
94
|
+
|
95
|
+
# Now create a Select object based on the found element
|
96
|
+
dropdown = Select(inputElement_select_address)
|
97
|
+
|
98
|
+
# Select the option by visible text
|
99
|
+
dropdown.select_by_visible_text(house_number)
|
100
|
+
|
101
|
+
inputElement_results_btn = wait.until(
|
102
|
+
EC.element_to_be_clickable(
|
103
|
+
(By.XPATH, "//button[text()='View your bin days']")
|
104
|
+
)
|
105
|
+
)
|
106
|
+
inputElement_results_btn.click()
|
107
|
+
|
108
|
+
p_element = wait.until(
|
109
|
+
EC.presence_of_element_located(
|
110
|
+
(
|
111
|
+
By.XPATH,
|
112
|
+
"//p[contains(text(), 'Your next bin collection date is ')]",
|
113
|
+
)
|
114
|
+
)
|
115
|
+
)
|
116
|
+
|
117
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
118
|
+
soup.prettify()
|
119
|
+
|
120
|
+
bin_data = []
|
121
|
+
|
122
|
+
# Extract data from the first aside element
|
123
|
+
first_aside = soup.find("aside", class_="alert")
|
124
|
+
if first_aside:
|
125
|
+
next_collection_date = first_aside.find(
|
126
|
+
"span", class_="alert__heading alpha"
|
127
|
+
).text.strip()
|
128
|
+
bin_info = {
|
129
|
+
"type": "purple", # Based on the provided information in the HTML, assuming it's a purple bin day.
|
130
|
+
"collectionDate": self.format_date(next_collection_date),
|
131
|
+
}
|
132
|
+
bin_data.append(bin_info)
|
133
|
+
|
134
|
+
# Extract data from articles
|
135
|
+
articles = soup.find_all("article", class_="binday")
|
136
|
+
for article in articles:
|
137
|
+
bin_info = self.extract_bin_data(article)
|
138
|
+
bin_data.append(bin_info)
|
139
|
+
|
140
|
+
result = {"bins": bin_data}
|
141
|
+
|
142
|
+
except Exception as e:
|
143
|
+
# Here you can log the exception if needed
|
144
|
+
print(f"An error occurred: {e}")
|
145
|
+
# Optionally, re-raise the exception if you want it to propagate
|
146
|
+
raise
|
147
|
+
finally:
|
148
|
+
# This block ensures that the driver is closed regardless of an exception
|
149
|
+
if driver:
|
150
|
+
driver.quit()
|
151
|
+
return result
|
@@ -0,0 +1,94 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
3
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
|
+
from datetime import datetime, timedelta
|
5
|
+
from typing import Dict, Any
|
6
|
+
|
7
|
+
|
8
|
+
class CouncilClass(AbstractGetBinDataClass):
|
9
|
+
"""
|
10
|
+
Concrete classes have to implement all abstract operations of the
|
11
|
+
base class. They can also override some operations with a default
|
12
|
+
implementation.
|
13
|
+
"""
|
14
|
+
|
15
|
+
def get_next_weekday(self, day_name: str) -> str:
|
16
|
+
days_of_week = [
|
17
|
+
"Monday",
|
18
|
+
"Tuesday",
|
19
|
+
"Wednesday",
|
20
|
+
"Thursday",
|
21
|
+
"Friday",
|
22
|
+
"Saturday",
|
23
|
+
"Sunday",
|
24
|
+
]
|
25
|
+
today = datetime.today()
|
26
|
+
target_day = days_of_week.index(day_name)
|
27
|
+
days_until_target = (target_day - today.weekday() + 7) % 7
|
28
|
+
if days_until_target == 0:
|
29
|
+
days_until_target = 7 # Next occurrence should be next week
|
30
|
+
next_weekday = today + timedelta(days=days_until_target)
|
31
|
+
return next_weekday.strftime("%d/%m/%Y")
|
32
|
+
|
33
|
+
def parse_data(self, page: Any, **kwargs: Any) -> Dict[str, Any]:
|
34
|
+
# Make a BS4 object
|
35
|
+
soup = BeautifulSoup(page.text, features="html.parser")
|
36
|
+
soup.prettify()
|
37
|
+
|
38
|
+
data = {"bins": []}
|
39
|
+
# Find the section with the title "Bins, rubbish & recycling"
|
40
|
+
h2_header = soup.find("h2", id="rubbish-header")
|
41
|
+
|
42
|
+
# Mapping original titles to new titles
|
43
|
+
title_mapping = {
|
44
|
+
"Next rubbish collection date": "Rubbish",
|
45
|
+
"Next recycling collection date": "Recycling",
|
46
|
+
"Food waste collection": "Food Waste",
|
47
|
+
"Garden waste collection": "Garden Waste",
|
48
|
+
}
|
49
|
+
|
50
|
+
# Extract the list items following this section
|
51
|
+
if h2_header:
|
52
|
+
list_items = h2_header.find_next("ul", class_="list-group").find_all("li")
|
53
|
+
|
54
|
+
extracted_data = {}
|
55
|
+
for item in list_items:
|
56
|
+
header = item.find("h3")
|
57
|
+
if header:
|
58
|
+
key = header.text.strip()
|
59
|
+
date = item.find("p").strong.text.strip()
|
60
|
+
extracted_data[key] = date
|
61
|
+
else:
|
62
|
+
# Special handling for garden waste collection
|
63
|
+
if "Garden waste collection" in item.text:
|
64
|
+
key = "Garden waste collection"
|
65
|
+
date = item.find_all("strong")[1].text.strip()
|
66
|
+
extracted_data[key] = date
|
67
|
+
|
68
|
+
print("Extracted data:", extracted_data)
|
69
|
+
|
70
|
+
# Transform the data to the required schema
|
71
|
+
bin_data = {"bins": []}
|
72
|
+
|
73
|
+
for key, value in extracted_data.items():
|
74
|
+
if value.startswith("Every"):
|
75
|
+
# Extract the day name
|
76
|
+
day_name = value.split()[1]
|
77
|
+
# Convert to the next occurrence of that day
|
78
|
+
formatted_date = self.get_next_weekday(day_name)
|
79
|
+
else:
|
80
|
+
# Convert date format from "Tuesday 28 May 2024" to "28/05/2024"
|
81
|
+
date_obj = datetime.strptime(value, "%A %d %B %Y")
|
82
|
+
formatted_date = date_obj.strftime("%d/%m/%Y")
|
83
|
+
|
84
|
+
bin_entry = {
|
85
|
+
"type": title_mapping.get(key, key),
|
86
|
+
"collectionDate": formatted_date,
|
87
|
+
}
|
88
|
+
|
89
|
+
bin_data["bins"].append(bin_entry)
|
90
|
+
|
91
|
+
return bin_data
|
92
|
+
else:
|
93
|
+
print("Section not found")
|
94
|
+
return data
|
@@ -0,0 +1,110 @@
|
|
1
|
+
import time
|
2
|
+
from datetime import datetime
|
3
|
+
|
4
|
+
from bs4 import BeautifulSoup
|
5
|
+
from selenium.webdriver.common.by import By
|
6
|
+
from selenium.webdriver.support import expected_conditions as EC
|
7
|
+
from selenium.webdriver.support.ui import Select
|
8
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
9
|
+
|
10
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
11
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
12
|
+
|
13
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
14
|
+
|
15
|
+
|
16
|
+
class CouncilClass(AbstractGetBinDataClass):
|
17
|
+
"""
|
18
|
+
Concrete classes have to implement all abstract operations of the
|
19
|
+
base class. They can also override some operations with a default
|
20
|
+
implementation.
|
21
|
+
"""
|
22
|
+
|
23
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
24
|
+
driver = None
|
25
|
+
try:
|
26
|
+
page = "https://tendring-self.achieveservice.com/en/service/Rubbish_and_recycling_collection_days"
|
27
|
+
|
28
|
+
bin_data = {"bins": []}
|
29
|
+
|
30
|
+
user_uprn = kwargs.get("uprn")
|
31
|
+
user_postcode = kwargs.get("postcode")
|
32
|
+
web_driver = kwargs.get("web_driver")
|
33
|
+
headless = kwargs.get("headless")
|
34
|
+
check_uprn(user_uprn)
|
35
|
+
check_postcode(user_postcode)
|
36
|
+
# Create Selenium webdriver
|
37
|
+
driver = create_webdriver(web_driver, headless, None, __name__)
|
38
|
+
driver.get(page)
|
39
|
+
|
40
|
+
cookies_button = WebDriverWait(driver, timeout=15).until(
|
41
|
+
EC.presence_of_element_located((By.ID, "close-cookie-message"))
|
42
|
+
)
|
43
|
+
cookies_button.click()
|
44
|
+
|
45
|
+
without_login_button = WebDriverWait(driver, timeout=15).until(
|
46
|
+
EC.presence_of_element_located(
|
47
|
+
(By.LINK_TEXT, "or, continue without an account")
|
48
|
+
)
|
49
|
+
)
|
50
|
+
without_login_button.click()
|
51
|
+
|
52
|
+
iframe_presense = WebDriverWait(driver, 30).until(
|
53
|
+
EC.presence_of_element_located((By.ID, "fillform-frame-1"))
|
54
|
+
)
|
55
|
+
|
56
|
+
driver.switch_to.frame(iframe_presense)
|
57
|
+
wait = WebDriverWait(driver, 60)
|
58
|
+
inputElement_postcodesearch = wait.until(
|
59
|
+
EC.element_to_be_clickable((By.NAME, "postcode_search"))
|
60
|
+
)
|
61
|
+
|
62
|
+
inputElement_postcodesearch.send_keys(user_postcode)
|
63
|
+
|
64
|
+
# Wait for the 'Select address' dropdown to be updated
|
65
|
+
time.sleep(1)
|
66
|
+
|
67
|
+
dropdown = wait.until(
|
68
|
+
EC.element_to_be_clickable((By.NAME, "selectAddress"))
|
69
|
+
)
|
70
|
+
# Create a 'Select' for it, then select the first address in the list
|
71
|
+
# (Index 0 is "Select...")
|
72
|
+
dropdownSelect = Select(dropdown)
|
73
|
+
dropdownSelect.select_by_value(str(user_uprn))
|
74
|
+
|
75
|
+
# Wait for 'wasteTable' to be shown
|
76
|
+
wait.until(EC.presence_of_element_located((By.CLASS_NAME, "wasteTable")))
|
77
|
+
|
78
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
79
|
+
bins = (
|
80
|
+
soup.find("table", {"class": "wasteTable"}).find("tbody").find_all("tr")
|
81
|
+
)
|
82
|
+
for bin_row in bins:
|
83
|
+
bin = bin_row.find_all("td")
|
84
|
+
if bin:
|
85
|
+
if bin[1].get_text(strip=True) != "":
|
86
|
+
bin_date = datetime.strptime(
|
87
|
+
bin[1].get_text(strip=True), "%d/%m/%Y"
|
88
|
+
)
|
89
|
+
dict_data = {
|
90
|
+
"type": re.sub(
|
91
|
+
r"\([^)]*\)", "", bin[0].get_text(strip=True)
|
92
|
+
),
|
93
|
+
"collectionDate": bin_date.strftime(date_format),
|
94
|
+
}
|
95
|
+
bin_data["bins"].append(dict_data)
|
96
|
+
|
97
|
+
bin_data["bins"].sort(
|
98
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
|
99
|
+
)
|
100
|
+
|
101
|
+
except Exception as e:
|
102
|
+
# Here you can log the exception if needed
|
103
|
+
print(f"An error occurred: {e}")
|
104
|
+
# Optionally, re-raise the exception if you want it to propagate
|
105
|
+
raise
|
106
|
+
finally:
|
107
|
+
# This block ensures that the driver is closed regardless of an exception
|
108
|
+
if driver:
|
109
|
+
driver.quit()
|
110
|
+
return bin_data
|
@@ -17,7 +17,9 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
17
17
|
try:
|
18
18
|
# Create Selenium webdriver
|
19
19
|
headless = kwargs.get("headless")
|
20
|
-
driver = create_webdriver(
|
20
|
+
driver = create_webdriver(
|
21
|
+
kwargs.get("web_driver"), headless, None, __name__
|
22
|
+
)
|
21
23
|
driver.get(kwargs.get("url"))
|
22
24
|
|
23
25
|
# Make a BS4 object
|
@@ -0,0 +1,127 @@
|
|
1
|
+
import time
|
2
|
+
from datetime import datetime
|
3
|
+
|
4
|
+
from bs4 import BeautifulSoup
|
5
|
+
from selenium.webdriver.common.by import By
|
6
|
+
from selenium.webdriver.support import expected_conditions as EC
|
7
|
+
from selenium.webdriver.support.ui import Select
|
8
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
9
|
+
from selenium.webdriver.common.keys import Keys
|
10
|
+
|
11
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
12
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
13
|
+
|
14
|
+
|
15
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
16
|
+
class CouncilClass(AbstractGetBinDataClass):
|
17
|
+
"""
|
18
|
+
Concrete classes have to implement all abstract operations of the
|
19
|
+
base class. They can also override some operations with a default
|
20
|
+
implementation.
|
21
|
+
"""
|
22
|
+
|
23
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
24
|
+
driver = None
|
25
|
+
try:
|
26
|
+
page = "https://portal.walthamforest.gov.uk/AchieveForms/?mode=fill&consentMessage=yes&form_uri=sandbox-publish://AF-Process-d62ccdd2-3de9-48eb-a229-8e20cbdd6393/AF-Stage-8bf39bf9-5391-4c24-857f-0dc2025c67f4/definition.json&process=1&process_uri=sandbox-processes://AF-Process-d62ccdd2-3de9-48eb-a229-8e20cbdd6393&process_id=AF-Process-d62ccdd2-3de9-48eb-a229-8e20cbdd6393"
|
27
|
+
|
28
|
+
user_postcode = kwargs.get("postcode")
|
29
|
+
user_uprn = kwargs.get("uprn")
|
30
|
+
user_paon = kwargs.get("paon")
|
31
|
+
web_driver = kwargs.get("web_driver")
|
32
|
+
headless = kwargs.get("headless")
|
33
|
+
|
34
|
+
driver = create_webdriver(web_driver, headless, None, __name__)
|
35
|
+
driver.get(page)
|
36
|
+
|
37
|
+
iframe_presense = WebDriverWait(driver, 30).until(
|
38
|
+
EC.presence_of_element_located((By.ID, "fillform-frame-1"))
|
39
|
+
)
|
40
|
+
|
41
|
+
driver.switch_to.frame(iframe_presense)
|
42
|
+
wait = WebDriverWait(driver, 60)
|
43
|
+
inputElement_postcodesearch = wait.until(
|
44
|
+
EC.element_to_be_clickable((By.NAME, "postcode_search"))
|
45
|
+
)
|
46
|
+
|
47
|
+
inputElement_postcodesearch.send_keys(user_postcode)
|
48
|
+
find_address_button = wait.until(
|
49
|
+
EC.element_to_be_clickable((By.ID, "lookupPostcode"))
|
50
|
+
)
|
51
|
+
|
52
|
+
find_address_button.send_keys(Keys.RETURN)
|
53
|
+
|
54
|
+
dropdown = wait.until(
|
55
|
+
EC.element_to_be_clickable((By.CLASS_NAME, "select2-choice"))
|
56
|
+
)
|
57
|
+
|
58
|
+
time.sleep(1)
|
59
|
+
dropdown.click()
|
60
|
+
|
61
|
+
dropdown_search = wait.until(
|
62
|
+
EC.element_to_be_clickable((By.CLASS_NAME, "select2-input"))
|
63
|
+
)
|
64
|
+
dropdown_search.click()
|
65
|
+
|
66
|
+
dropdown_search.send_keys(user_paon)
|
67
|
+
dropdown_search.send_keys(Keys.RETURN)
|
68
|
+
|
69
|
+
find_ac_button = wait.until(
|
70
|
+
EC.element_to_be_clickable((By.ID, "confirmSearchUPRN"))
|
71
|
+
)
|
72
|
+
|
73
|
+
find_ac_button.send_keys(Keys.RETURN)
|
74
|
+
h4_element = wait.until(
|
75
|
+
EC.presence_of_element_located(
|
76
|
+
(By.XPATH, "//h4[contains(text(), 'Your Collections')]")
|
77
|
+
)
|
78
|
+
)
|
79
|
+
|
80
|
+
data_table = WebDriverWait(driver, 10).until(
|
81
|
+
EC.presence_of_element_located(
|
82
|
+
(
|
83
|
+
By.XPATH,
|
84
|
+
'//div[contains(@class, "fieldContent")]',
|
85
|
+
)
|
86
|
+
)
|
87
|
+
)
|
88
|
+
# Make a BS4 object
|
89
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
90
|
+
|
91
|
+
data = {"bins": []}
|
92
|
+
|
93
|
+
collection_divs = soup.find_all("div", {"style": "text-align: center;"})
|
94
|
+
|
95
|
+
for collection_div in collection_divs:
|
96
|
+
h5_tag = collection_div.find("h5")
|
97
|
+
p_tag = collection_div.find("p")
|
98
|
+
|
99
|
+
if h5_tag and p_tag:
|
100
|
+
bin_type = h5_tag.get_text(strip=True)
|
101
|
+
collection_date_text = p_tag.find("b").get_text(strip=True)
|
102
|
+
|
103
|
+
# Extract and format the date
|
104
|
+
date_match = re.search(r"(\d+ \w+)", collection_date_text)
|
105
|
+
if date_match:
|
106
|
+
date_str = date_match.group(1)
|
107
|
+
date_obj = datetime.strptime(
|
108
|
+
date_str + " " + str(datetime.today().year), "%d %B %Y"
|
109
|
+
)
|
110
|
+
collection_date = get_next_occurrence_from_day_month(
|
111
|
+
date_obj
|
112
|
+
).strftime(date_format)
|
113
|
+
|
114
|
+
data["bins"].append(
|
115
|
+
{"type": bin_type, "collectionDate": collection_date}
|
116
|
+
)
|
117
|
+
|
118
|
+
except Exception as e:
|
119
|
+
# Here you can log the exception if needed
|
120
|
+
print(f"An error occurred: {e}")
|
121
|
+
# Optionally, re-raise the exception if you want it to propagate
|
122
|
+
raise
|
123
|
+
finally:
|
124
|
+
# This block ensures that the driver is closed regardless of an exception
|
125
|
+
if driver:
|
126
|
+
driver.quit()
|
127
|
+
return data
|