uk_bin_collection 0.78.0__py3-none-any.whl → 0.79.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. uk_bin_collection/tests/council_feature_input_parity.py +38 -57
  2. uk_bin_collection/tests/features/validate_council_outputs.feature +5 -775
  3. uk_bin_collection/tests/input.json +65 -2
  4. uk_bin_collection/tests/step_defs/test_validate_council.py +44 -16
  5. uk_bin_collection/tests/test_common_functions.py +4 -2
  6. uk_bin_collection/uk_bin_collection/common.py +4 -1
  7. uk_bin_collection/uk_bin_collection/councils/ChichesterDistrictCouncil.py +110 -0
  8. uk_bin_collection/uk_bin_collection/councils/DartfordBoroughCouncil.py +44 -0
  9. uk_bin_collection/uk_bin_collection/councils/DoverDistrictCouncil.py +11 -6
  10. uk_bin_collection/uk_bin_collection/councils/EppingForestDistrictCouncil.py +51 -0
  11. uk_bin_collection/uk_bin_collection/councils/FarehamBoroughCouncil.py +20 -21
  12. uk_bin_collection/uk_bin_collection/councils/HounslowCouncil.py +122 -0
  13. uk_bin_collection/uk_bin_collection/councils/KirkleesCouncil.py +3 -1
  14. uk_bin_collection/uk_bin_collection/councils/MoleValleyDistrictCouncil.py +13 -10
  15. uk_bin_collection/uk_bin_collection/councils/SouthKestevenDistrictCouncil.py +151 -0
  16. uk_bin_collection/uk_bin_collection/councils/StroudDistrictCouncil.py +94 -0
  17. uk_bin_collection/uk_bin_collection/councils/TendringDistrictCouncil.py +110 -0
  18. uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py +3 -1
  19. uk_bin_collection/uk_bin_collection/councils/WalthamForest.py +127 -0
  20. uk_bin_collection/uk_bin_collection/create_new_council.py +51 -0
  21. {uk_bin_collection-0.78.0.dist-info → uk_bin_collection-0.79.0.dist-info}/METADATA +1 -1
  22. {uk_bin_collection-0.78.0.dist-info → uk_bin_collection-0.79.0.dist-info}/RECORD +25 -16
  23. {uk_bin_collection-0.78.0.dist-info → uk_bin_collection-0.79.0.dist-info}/LICENSE +0 -0
  24. {uk_bin_collection-0.78.0.dist-info → uk_bin_collection-0.79.0.dist-info}/WHEEL +0 -0
  25. {uk_bin_collection-0.78.0.dist-info → uk_bin_collection-0.79.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,122 @@
1
+ import time
2
+ from datetime import datetime
3
+
4
+ from bs4 import BeautifulSoup
5
+ from selenium.webdriver.common.by import By
6
+ from selenium.webdriver.support import expected_conditions as EC
7
+ from selenium.webdriver.support.ui import Select
8
+ from selenium.webdriver.support.wait import WebDriverWait
9
+ from selenium.webdriver.common.keys import Keys
10
+
11
+ from uk_bin_collection.uk_bin_collection.common import *
12
+ from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
13
+
14
+
15
+ # import the wonderful Beautiful Soup and the URL grabber
16
+ class CouncilClass(AbstractGetBinDataClass):
17
+ """
18
+ Concrete classes have to implement all abstract operations of the
19
+ base class. They can also override some operations with a default
20
+ implementation.
21
+ """
22
+
23
+ def parse_date(self, date_str):
24
+ date_formats = [
25
+ "This %A - %d %b %Y", # Changed %B to %b to accommodate abbreviated month names
26
+ "Next %A - %d %b %Y", # Same change here
27
+ "%A %d %b %Y", # And here
28
+ ]
29
+ for format in date_formats:
30
+ try:
31
+ return datetime.strptime(date_str, format).strftime("%d/%m/%Y")
32
+ except ValueError:
33
+ continue
34
+ raise ValueError(f"Date format not recognized: {date_str}")
35
+
36
+ def parse_data(self, page: str, **kwargs) -> dict:
37
+ driver = None
38
+ try:
39
+ # Make a BS4 object
40
+
41
+ page = "https://www.hounslow.gov.uk/info/20272/recycling_and_waste_collection_day_finder"
42
+
43
+ user_postcode = kwargs.get("postcode")
44
+ user_uprn = kwargs.get("uprn")
45
+ user_paon = kwargs.get("paon")
46
+ web_driver = kwargs.get("web_driver")
47
+ headless = kwargs.get("headless")
48
+
49
+ driver = create_webdriver(web_driver, headless, None, __name__)
50
+ driver.get(page)
51
+
52
+ wait = WebDriverWait(driver, 60)
53
+
54
+ inputElement_postcodesearch = wait.until(
55
+ EC.element_to_be_clickable((By.ID, "Postcode"))
56
+ )
57
+
58
+ inputElement_postcodesearch.send_keys(user_postcode)
59
+
60
+ inputElement_postcodesearch_btn = wait.until(
61
+ EC.element_to_be_clickable((By.ID, "findAddress"))
62
+ )
63
+ inputElement_postcodesearch_btn.click()
64
+
65
+ inputElement_select_address = wait.until(
66
+ EC.element_to_be_clickable((By.ID, "UPRN"))
67
+ )
68
+
69
+ select_element = wait.until(
70
+ EC.visibility_of_element_located((By.ID, "UPRN"))
71
+ ) # Adjust this ID to your element's ID
72
+
73
+ # Create a Select object
74
+ select = Select(select_element)
75
+
76
+ # Fetch all options
77
+ options = select.options
78
+
79
+ # Loop through options to find the one that starts with the UPRN
80
+ for option in options:
81
+ if option.get_attribute("value").startswith(f"{user_uprn}|"):
82
+ option.click() # Select the matching option
83
+ break
84
+
85
+ results = wait.until(
86
+ EC.element_to_be_clickable((By.CLASS_NAME, "bin_day_main_wrapper"))
87
+ )
88
+
89
+ soup = BeautifulSoup(driver.page_source, features="html.parser")
90
+ soup.prettify()
91
+
92
+ # Find all headers which include collection dates
93
+ collection_headers = soup.find_all("h4")
94
+ bins_data = []
95
+
96
+ # Process each collection date and corresponding bins
97
+ for header in collection_headers:
98
+ date_text = header.get_text(strip=True)
99
+ collection_date = self.parse_date(date_text)
100
+
101
+ # Get next sibling which should be the list of bins
102
+ bin_list = header.find_next_sibling("ul")
103
+ if bin_list:
104
+ for item in bin_list.find_all("li", class_="list-group-item"):
105
+ bin_type = item.get_text(strip=True)
106
+ bins_data.append(
107
+ {"type": bin_type, "collectionDate": collection_date}
108
+ )
109
+
110
+ # Construct the final JSON object
111
+ json_data = {"bins": bins_data}
112
+
113
+ except Exception as e:
114
+ # Here you can log the exception if needed
115
+ print(f"An error occurred: {e}")
116
+ # Optionally, re-raise the exception if you want it to propagate
117
+ raise
118
+ finally:
119
+ # This block ensures that the driver is closed regardless of an exception
120
+ if driver:
121
+ driver.quit()
122
+ return json_data
@@ -61,7 +61,9 @@ class CouncilClass(AbstractGetBinDataClass):
61
61
  user_postcode = kwargs["postcode"]
62
62
 
63
63
  self._driver = driver = create_webdriver(
64
- web_driver=kwargs["web_driver"], headless=kwargs.get("headless", True), session_name=__name__
64
+ web_driver=kwargs["web_driver"],
65
+ headless=kwargs.get("headless", True),
66
+ session_name=__name__,
65
67
  )
66
68
  driver.implicitly_wait(1)
67
69
 
@@ -1,7 +1,9 @@
1
- from bs4 import BeautifulSoup
2
- from datetime import datetime
3
1
  import re
2
+ from datetime import datetime
3
+
4
4
  import requests
5
+ from bs4 import BeautifulSoup
6
+
5
7
  from uk_bin_collection.uk_bin_collection.common import *
6
8
  from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
7
9
 
@@ -19,35 +21,36 @@ class CouncilClass(AbstractGetBinDataClass):
19
21
  user_postcode = kwargs.get("postcode")
20
22
  check_postcode(user_postcode)
21
23
 
22
- root_url = "https://molevalley.cloudmappin.com/my-mv-address-search/search/{}/0".format(
24
+ root_url = "https://myproperty.molevalley.gov.uk/molevalley/api/live_addresses/{}?format=json".format(
23
25
  user_postcode
24
26
  )
25
- response = requests.get(root_url)
27
+ requests.packages.urllib3.disable_warnings()
28
+ response = requests.get(root_url, verify=False)
26
29
 
27
30
  if not response.ok:
28
31
  raise ValueError("Invalid server response code retreiving data.")
29
32
 
30
33
  jsonData = response.json()
31
34
 
32
- if len(jsonData["results"]) == 0:
35
+ if len(jsonData["results"]["features"]) == 0:
33
36
  raise ValueError("No collection data found for postcode provided.")
34
37
 
35
- properties_found = jsonData["results"][0]["items"]
38
+ properties_found = jsonData["results"]["features"]
36
39
 
37
40
  # If UPRN is provided, we can check a specific address.
38
41
  html_data = None
39
42
  uprn = kwargs.get("uprn")
40
43
  if uprn:
41
44
  check_uprn(uprn)
42
- for n, item in enumerate(properties_found):
43
- if uprn == str(int(item["info"][0][1]["value"])):
44
- html_data = properties_found[n]["info"][2][1]["value"]
45
+ for item in properties_found:
46
+ if uprn == str(int(item["properties"]["blpu_uprn"])):
47
+ html_data = item["properties"]["three_column_layout_html"]
45
48
  break
46
49
  if html_data is None:
47
50
  raise ValueError("No collection data found for UPRN provided.")
48
51
  else:
49
52
  # If UPRN not provided, just use the first result
50
- html_data = properties_found[0]["info"][2][1]["value"]
53
+ html_data = properties_found[0]["properties"]["three_column_layout_html"]
51
54
 
52
55
  soup = BeautifulSoup(html_data, features="html.parser")
53
56
  soup.prettify()
@@ -0,0 +1,151 @@
1
+ import time
2
+ from datetime import datetime
3
+
4
+ from selenium.webdriver.support.ui import Select
5
+ from bs4 import BeautifulSoup
6
+ from selenium.webdriver.common.by import By
7
+ from selenium.webdriver.support import expected_conditions as EC
8
+ from selenium.webdriver.support.ui import Select
9
+ from selenium.webdriver.support.wait import WebDriverWait
10
+ from selenium.webdriver.common.keys import Keys
11
+
12
+ from uk_bin_collection.uk_bin_collection.common import *
13
+ from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
14
+
15
+
16
+ # import the wonderful Beautiful Soup and the URL grabber
17
+ class CouncilClass(AbstractGetBinDataClass):
18
+ """
19
+ Concrete classes have to implement all abstract operations of the
20
+ base class. They can also override some operations with a default
21
+ implementation.
22
+ """
23
+
24
+ # Extract data from the table
25
+ def format_date(self, date_str):
26
+ # Convert date format from "Fri 31 May 2024" to "31/05/2024"
27
+ date_match = re.search(r"\d{1,2} \w+ \d{4}", date_str)
28
+ if date_match:
29
+ date_obj = re.search(r"(\d{1,2}) (\w+) (\d{4})", date_match.group(0))
30
+ day = date_obj.group(1).zfill(2)
31
+ month_name = date_obj.group(2)
32
+ month = {
33
+ "January": "01",
34
+ "February": "02",
35
+ "March": "03",
36
+ "April": "04",
37
+ "May": "05",
38
+ "June": "06",
39
+ "July": "07",
40
+ "August": "08",
41
+ "September": "09",
42
+ "October": "10",
43
+ "November": "11",
44
+ "December": "12",
45
+ }[month_name]
46
+ year = date_obj.group(3)
47
+ formatted_date = f"{day}/{month}/{year}"
48
+ else:
49
+ formatted_date = "Unknown Date"
50
+ return formatted_date
51
+
52
+ def extract_bin_data(self, article):
53
+ date = article.find("div", class_="binday__cell--day").text.strip()
54
+ bin_type_class = article.get("class")[
55
+ 1
56
+ ] # Assuming the second class indicates the bin type
57
+ bin_type = "black" if "black" in bin_type_class else "silver"
58
+ formatted_date = self.format_date(date)
59
+ return {"type": bin_type, "collectionDate": formatted_date}
60
+
61
+ def parse_data(self, page: str, **kwargs) -> dict:
62
+ driver = None
63
+ try:
64
+ # Make a BS4 object
65
+
66
+ page = "https://pre.southkesteven.gov.uk/BinSearch.aspx"
67
+
68
+ user_postcode = kwargs.get("postcode")
69
+ user_uprn = kwargs.get("uprn")
70
+ web_driver = kwargs.get("web_driver")
71
+ headless = kwargs.get("headless")
72
+ house_number = kwargs.get("paon")
73
+
74
+ driver = create_webdriver(web_driver, headless, None, __name__)
75
+ driver.get(page)
76
+
77
+ wait = WebDriverWait(driver, 60)
78
+
79
+ inputElement_postcodesearch = wait.until(
80
+ EC.visibility_of_element_located((By.ID, "title"))
81
+ )
82
+ inputElement_postcodesearch.clear()
83
+
84
+ inputElement_postcodesearch.send_keys(user_postcode)
85
+
86
+ inputElement_postcodesearch_btn = wait.until(
87
+ EC.element_to_be_clickable((By.XPATH, "//button/span[text()='Search']"))
88
+ )
89
+ inputElement_postcodesearch_btn.click()
90
+
91
+ inputElement_select_address = wait.until(
92
+ EC.element_to_be_clickable((By.ID, "address"))
93
+ )
94
+
95
+ # Now create a Select object based on the found element
96
+ dropdown = Select(inputElement_select_address)
97
+
98
+ # Select the option by visible text
99
+ dropdown.select_by_visible_text(house_number)
100
+
101
+ inputElement_results_btn = wait.until(
102
+ EC.element_to_be_clickable(
103
+ (By.XPATH, "//button[text()='View your bin days']")
104
+ )
105
+ )
106
+ inputElement_results_btn.click()
107
+
108
+ p_element = wait.until(
109
+ EC.presence_of_element_located(
110
+ (
111
+ By.XPATH,
112
+ "//p[contains(text(), 'Your next bin collection date is ')]",
113
+ )
114
+ )
115
+ )
116
+
117
+ soup = BeautifulSoup(driver.page_source, features="html.parser")
118
+ soup.prettify()
119
+
120
+ bin_data = []
121
+
122
+ # Extract data from the first aside element
123
+ first_aside = soup.find("aside", class_="alert")
124
+ if first_aside:
125
+ next_collection_date = first_aside.find(
126
+ "span", class_="alert__heading alpha"
127
+ ).text.strip()
128
+ bin_info = {
129
+ "type": "purple", # Based on the provided information in the HTML, assuming it's a purple bin day.
130
+ "collectionDate": self.format_date(next_collection_date),
131
+ }
132
+ bin_data.append(bin_info)
133
+
134
+ # Extract data from articles
135
+ articles = soup.find_all("article", class_="binday")
136
+ for article in articles:
137
+ bin_info = self.extract_bin_data(article)
138
+ bin_data.append(bin_info)
139
+
140
+ result = {"bins": bin_data}
141
+
142
+ except Exception as e:
143
+ # Here you can log the exception if needed
144
+ print(f"An error occurred: {e}")
145
+ # Optionally, re-raise the exception if you want it to propagate
146
+ raise
147
+ finally:
148
+ # This block ensures that the driver is closed regardless of an exception
149
+ if driver:
150
+ driver.quit()
151
+ return result
@@ -0,0 +1,94 @@
1
+ from bs4 import BeautifulSoup
2
+ from uk_bin_collection.uk_bin_collection.common import *
3
+ from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
4
+ from datetime import datetime, timedelta
5
+ from typing import Dict, Any
6
+
7
+
8
+ class CouncilClass(AbstractGetBinDataClass):
9
+ """
10
+ Concrete classes have to implement all abstract operations of the
11
+ base class. They can also override some operations with a default
12
+ implementation.
13
+ """
14
+
15
+ def get_next_weekday(self, day_name: str) -> str:
16
+ days_of_week = [
17
+ "Monday",
18
+ "Tuesday",
19
+ "Wednesday",
20
+ "Thursday",
21
+ "Friday",
22
+ "Saturday",
23
+ "Sunday",
24
+ ]
25
+ today = datetime.today()
26
+ target_day = days_of_week.index(day_name)
27
+ days_until_target = (target_day - today.weekday() + 7) % 7
28
+ if days_until_target == 0:
29
+ days_until_target = 7 # Next occurrence should be next week
30
+ next_weekday = today + timedelta(days=days_until_target)
31
+ return next_weekday.strftime("%d/%m/%Y")
32
+
33
+ def parse_data(self, page: Any, **kwargs: Any) -> Dict[str, Any]:
34
+ # Make a BS4 object
35
+ soup = BeautifulSoup(page.text, features="html.parser")
36
+ soup.prettify()
37
+
38
+ data = {"bins": []}
39
+ # Find the section with the title "Bins, rubbish & recycling"
40
+ h2_header = soup.find("h2", id="rubbish-header")
41
+
42
+ # Mapping original titles to new titles
43
+ title_mapping = {
44
+ "Next rubbish collection date": "Rubbish",
45
+ "Next recycling collection date": "Recycling",
46
+ "Food waste collection": "Food Waste",
47
+ "Garden waste collection": "Garden Waste",
48
+ }
49
+
50
+ # Extract the list items following this section
51
+ if h2_header:
52
+ list_items = h2_header.find_next("ul", class_="list-group").find_all("li")
53
+
54
+ extracted_data = {}
55
+ for item in list_items:
56
+ header = item.find("h3")
57
+ if header:
58
+ key = header.text.strip()
59
+ date = item.find("p").strong.text.strip()
60
+ extracted_data[key] = date
61
+ else:
62
+ # Special handling for garden waste collection
63
+ if "Garden waste collection" in item.text:
64
+ key = "Garden waste collection"
65
+ date = item.find_all("strong")[1].text.strip()
66
+ extracted_data[key] = date
67
+
68
+ print("Extracted data:", extracted_data)
69
+
70
+ # Transform the data to the required schema
71
+ bin_data = {"bins": []}
72
+
73
+ for key, value in extracted_data.items():
74
+ if value.startswith("Every"):
75
+ # Extract the day name
76
+ day_name = value.split()[1]
77
+ # Convert to the next occurrence of that day
78
+ formatted_date = self.get_next_weekday(day_name)
79
+ else:
80
+ # Convert date format from "Tuesday 28 May 2024" to "28/05/2024"
81
+ date_obj = datetime.strptime(value, "%A %d %B %Y")
82
+ formatted_date = date_obj.strftime("%d/%m/%Y")
83
+
84
+ bin_entry = {
85
+ "type": title_mapping.get(key, key),
86
+ "collectionDate": formatted_date,
87
+ }
88
+
89
+ bin_data["bins"].append(bin_entry)
90
+
91
+ return bin_data
92
+ else:
93
+ print("Section not found")
94
+ return data
@@ -0,0 +1,110 @@
1
+ import time
2
+ from datetime import datetime
3
+
4
+ from bs4 import BeautifulSoup
5
+ from selenium.webdriver.common.by import By
6
+ from selenium.webdriver.support import expected_conditions as EC
7
+ from selenium.webdriver.support.ui import Select
8
+ from selenium.webdriver.support.wait import WebDriverWait
9
+
10
+ from uk_bin_collection.uk_bin_collection.common import *
11
+ from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
12
+
13
+ # import the wonderful Beautiful Soup and the URL grabber
14
+
15
+
16
+ class CouncilClass(AbstractGetBinDataClass):
17
+ """
18
+ Concrete classes have to implement all abstract operations of the
19
+ base class. They can also override some operations with a default
20
+ implementation.
21
+ """
22
+
23
+ def parse_data(self, page: str, **kwargs) -> dict:
24
+ driver = None
25
+ try:
26
+ page = "https://tendring-self.achieveservice.com/en/service/Rubbish_and_recycling_collection_days"
27
+
28
+ bin_data = {"bins": []}
29
+
30
+ user_uprn = kwargs.get("uprn")
31
+ user_postcode = kwargs.get("postcode")
32
+ web_driver = kwargs.get("web_driver")
33
+ headless = kwargs.get("headless")
34
+ check_uprn(user_uprn)
35
+ check_postcode(user_postcode)
36
+ # Create Selenium webdriver
37
+ driver = create_webdriver(web_driver, headless, None, __name__)
38
+ driver.get(page)
39
+
40
+ cookies_button = WebDriverWait(driver, timeout=15).until(
41
+ EC.presence_of_element_located((By.ID, "close-cookie-message"))
42
+ )
43
+ cookies_button.click()
44
+
45
+ without_login_button = WebDriverWait(driver, timeout=15).until(
46
+ EC.presence_of_element_located(
47
+ (By.LINK_TEXT, "or, continue without an account")
48
+ )
49
+ )
50
+ without_login_button.click()
51
+
52
+ iframe_presense = WebDriverWait(driver, 30).until(
53
+ EC.presence_of_element_located((By.ID, "fillform-frame-1"))
54
+ )
55
+
56
+ driver.switch_to.frame(iframe_presense)
57
+ wait = WebDriverWait(driver, 60)
58
+ inputElement_postcodesearch = wait.until(
59
+ EC.element_to_be_clickable((By.NAME, "postcode_search"))
60
+ )
61
+
62
+ inputElement_postcodesearch.send_keys(user_postcode)
63
+
64
+ # Wait for the 'Select address' dropdown to be updated
65
+ time.sleep(1)
66
+
67
+ dropdown = wait.until(
68
+ EC.element_to_be_clickable((By.NAME, "selectAddress"))
69
+ )
70
+ # Create a 'Select' for it, then select the first address in the list
71
+ # (Index 0 is "Select...")
72
+ dropdownSelect = Select(dropdown)
73
+ dropdownSelect.select_by_value(str(user_uprn))
74
+
75
+ # Wait for 'wasteTable' to be shown
76
+ wait.until(EC.presence_of_element_located((By.CLASS_NAME, "wasteTable")))
77
+
78
+ soup = BeautifulSoup(driver.page_source, features="html.parser")
79
+ bins = (
80
+ soup.find("table", {"class": "wasteTable"}).find("tbody").find_all("tr")
81
+ )
82
+ for bin_row in bins:
83
+ bin = bin_row.find_all("td")
84
+ if bin:
85
+ if bin[1].get_text(strip=True) != "":
86
+ bin_date = datetime.strptime(
87
+ bin[1].get_text(strip=True), "%d/%m/%Y"
88
+ )
89
+ dict_data = {
90
+ "type": re.sub(
91
+ r"\([^)]*\)", "", bin[0].get_text(strip=True)
92
+ ),
93
+ "collectionDate": bin_date.strftime(date_format),
94
+ }
95
+ bin_data["bins"].append(dict_data)
96
+
97
+ bin_data["bins"].sort(
98
+ key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
99
+ )
100
+
101
+ except Exception as e:
102
+ # Here you can log the exception if needed
103
+ print(f"An error occurred: {e}")
104
+ # Optionally, re-raise the exception if you want it to propagate
105
+ raise
106
+ finally:
107
+ # This block ensures that the driver is closed regardless of an exception
108
+ if driver:
109
+ driver.quit()
110
+ return bin_data
@@ -17,7 +17,9 @@ class CouncilClass(AbstractGetBinDataClass):
17
17
  try:
18
18
  # Create Selenium webdriver
19
19
  headless = kwargs.get("headless")
20
- driver = create_webdriver(kwargs.get("web_driver"), headless, None, __name__)
20
+ driver = create_webdriver(
21
+ kwargs.get("web_driver"), headless, None, __name__
22
+ )
21
23
  driver.get(kwargs.get("url"))
22
24
 
23
25
  # Make a BS4 object
@@ -0,0 +1,127 @@
1
+ import time
2
+ from datetime import datetime
3
+
4
+ from bs4 import BeautifulSoup
5
+ from selenium.webdriver.common.by import By
6
+ from selenium.webdriver.support import expected_conditions as EC
7
+ from selenium.webdriver.support.ui import Select
8
+ from selenium.webdriver.support.wait import WebDriverWait
9
+ from selenium.webdriver.common.keys import Keys
10
+
11
+ from uk_bin_collection.uk_bin_collection.common import *
12
+ from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
13
+
14
+
15
+ # import the wonderful Beautiful Soup and the URL grabber
16
+ class CouncilClass(AbstractGetBinDataClass):
17
+ """
18
+ Concrete classes have to implement all abstract operations of the
19
+ base class. They can also override some operations with a default
20
+ implementation.
21
+ """
22
+
23
+ def parse_data(self, page: str, **kwargs) -> dict:
24
+ driver = None
25
+ try:
26
+ page = "https://portal.walthamforest.gov.uk/AchieveForms/?mode=fill&consentMessage=yes&form_uri=sandbox-publish://AF-Process-d62ccdd2-3de9-48eb-a229-8e20cbdd6393/AF-Stage-8bf39bf9-5391-4c24-857f-0dc2025c67f4/definition.json&process=1&process_uri=sandbox-processes://AF-Process-d62ccdd2-3de9-48eb-a229-8e20cbdd6393&process_id=AF-Process-d62ccdd2-3de9-48eb-a229-8e20cbdd6393"
27
+
28
+ user_postcode = kwargs.get("postcode")
29
+ user_uprn = kwargs.get("uprn")
30
+ user_paon = kwargs.get("paon")
31
+ web_driver = kwargs.get("web_driver")
32
+ headless = kwargs.get("headless")
33
+
34
+ driver = create_webdriver(web_driver, headless, None, __name__)
35
+ driver.get(page)
36
+
37
+ iframe_presense = WebDriverWait(driver, 30).until(
38
+ EC.presence_of_element_located((By.ID, "fillform-frame-1"))
39
+ )
40
+
41
+ driver.switch_to.frame(iframe_presense)
42
+ wait = WebDriverWait(driver, 60)
43
+ inputElement_postcodesearch = wait.until(
44
+ EC.element_to_be_clickable((By.NAME, "postcode_search"))
45
+ )
46
+
47
+ inputElement_postcodesearch.send_keys(user_postcode)
48
+ find_address_button = wait.until(
49
+ EC.element_to_be_clickable((By.ID, "lookupPostcode"))
50
+ )
51
+
52
+ find_address_button.send_keys(Keys.RETURN)
53
+
54
+ dropdown = wait.until(
55
+ EC.element_to_be_clickable((By.CLASS_NAME, "select2-choice"))
56
+ )
57
+
58
+ time.sleep(1)
59
+ dropdown.click()
60
+
61
+ dropdown_search = wait.until(
62
+ EC.element_to_be_clickable((By.CLASS_NAME, "select2-input"))
63
+ )
64
+ dropdown_search.click()
65
+
66
+ dropdown_search.send_keys(user_paon)
67
+ dropdown_search.send_keys(Keys.RETURN)
68
+
69
+ find_ac_button = wait.until(
70
+ EC.element_to_be_clickable((By.ID, "confirmSearchUPRN"))
71
+ )
72
+
73
+ find_ac_button.send_keys(Keys.RETURN)
74
+ h4_element = wait.until(
75
+ EC.presence_of_element_located(
76
+ (By.XPATH, "//h4[contains(text(), 'Your Collections')]")
77
+ )
78
+ )
79
+
80
+ data_table = WebDriverWait(driver, 10).until(
81
+ EC.presence_of_element_located(
82
+ (
83
+ By.XPATH,
84
+ '//div[contains(@class, "fieldContent")]',
85
+ )
86
+ )
87
+ )
88
+ # Make a BS4 object
89
+ soup = BeautifulSoup(driver.page_source, features="html.parser")
90
+
91
+ data = {"bins": []}
92
+
93
+ collection_divs = soup.find_all("div", {"style": "text-align: center;"})
94
+
95
+ for collection_div in collection_divs:
96
+ h5_tag = collection_div.find("h5")
97
+ p_tag = collection_div.find("p")
98
+
99
+ if h5_tag and p_tag:
100
+ bin_type = h5_tag.get_text(strip=True)
101
+ collection_date_text = p_tag.find("b").get_text(strip=True)
102
+
103
+ # Extract and format the date
104
+ date_match = re.search(r"(\d+ \w+)", collection_date_text)
105
+ if date_match:
106
+ date_str = date_match.group(1)
107
+ date_obj = datetime.strptime(
108
+ date_str + " " + str(datetime.today().year), "%d %B %Y"
109
+ )
110
+ collection_date = get_next_occurrence_from_day_month(
111
+ date_obj
112
+ ).strftime(date_format)
113
+
114
+ data["bins"].append(
115
+ {"type": bin_type, "collectionDate": collection_date}
116
+ )
117
+
118
+ except Exception as e:
119
+ # Here you can log the exception if needed
120
+ print(f"An error occurred: {e}")
121
+ # Optionally, re-raise the exception if you want it to propagate
122
+ raise
123
+ finally:
124
+ # This block ensures that the driver is closed regardless of an exception
125
+ if driver:
126
+ driver.quit()
127
+ return data