uk_bin_collection 0.152.8__py3-none-any.whl → 0.152.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. uk_bin_collection/tests/input.json +11 -15
  2. uk_bin_collection/uk_bin_collection/councils/AngusCouncil.py +69 -46
  3. uk_bin_collection/uk_bin_collection/councils/BCPCouncil.py +119 -37
  4. uk_bin_collection/uk_bin_collection/councils/BarnetCouncil.py +158 -115
  5. uk_bin_collection/uk_bin_collection/councils/BasildonCouncil.py +87 -66
  6. uk_bin_collection/uk_bin_collection/councils/BlabyDistrictCouncil.py +5 -1
  7. uk_bin_collection/uk_bin_collection/councils/BlaenauGwentCountyBoroughCouncil.py +91 -66
  8. uk_bin_collection/uk_bin_collection/councils/BroxbourneCouncil.py +88 -67
  9. uk_bin_collection/uk_bin_collection/councils/BuckinghamshireCouncil.py +67 -56
  10. uk_bin_collection/uk_bin_collection/councils/ChelmsfordCityCouncil.py +63 -95
  11. uk_bin_collection/uk_bin_collection/councils/CherwellDistrictCouncil.py +39 -18
  12. uk_bin_collection/uk_bin_collection/councils/ChorleyCouncil.py +106 -97
  13. uk_bin_collection/uk_bin_collection/councils/CopelandBoroughCouncil.py +80 -75
  14. uk_bin_collection/uk_bin_collection/councils/CotswoldDistrictCouncil.py +191 -67
  15. uk_bin_collection/uk_bin_collection/councils/CoventryCityCouncil.py +6 -2
  16. uk_bin_collection/uk_bin_collection/councils/SouthwarkCouncil.py +23 -1
  17. uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py +4 -1
  18. uk_bin_collection/uk_bin_collection/get_bin_data.py +1 -1
  19. {uk_bin_collection-0.152.8.dist-info → uk_bin_collection-0.152.9.dist-info}/METADATA +1 -1
  20. {uk_bin_collection-0.152.8.dist-info → uk_bin_collection-0.152.9.dist-info}/RECORD +23 -24
  21. uk_bin_collection/uk_bin_collection/councils/AylesburyValeCouncil.py +0 -69
  22. {uk_bin_collection-0.152.8.dist-info → uk_bin_collection-0.152.9.dist-info}/LICENSE +0 -0
  23. {uk_bin_collection-0.152.8.dist-info → uk_bin_collection-0.152.9.dist-info}/WHEEL +0 -0
  24. {uk_bin_collection-0.152.8.dist-info → uk_bin_collection-0.152.9.dist-info}/entry_points.txt +0 -0
@@ -1,4 +1,6 @@
1
1
  import time
2
+ import re
3
+ from datetime import datetime
2
4
 
3
5
  from bs4 import BeautifulSoup
4
6
  from selenium.webdriver.common.by import By
@@ -15,41 +17,38 @@ def get_seasonal_overrides():
15
17
  if response.status_code == 200:
16
18
  soup = BeautifulSoup(response.text, "html.parser")
17
19
  body_div = soup.find("div", class_="field--name-body")
18
- ul_element = body_div.find("ul")
19
- if ul_element:
20
- li_elements = ul_element.find_all("li")
21
- overrides_dict = {}
22
- for li_element in li_elements:
23
- li_text = li_element.text.strip()
24
- li_text = re.sub(r"\([^)]*\)", "", li_text).strip()
25
- if "Collections for" in li_text and "will be revised to" in li_text:
26
- parts = li_text.split("will be revised to")
27
- original_date = (
28
- parts[0]
29
- .replace("Collections for", "")
30
- .replace("\xa0", " ")
31
- .strip()
32
- )
33
- revised_date = parts[1].strip()
34
-
35
- # Extract day and month
36
- date_parts = original_date.split()[1:]
37
- if len(date_parts) == 2:
38
- day, month = date_parts
39
- # Ensure original_date has leading zeros for single-digit days
40
- day = day.zfill(2)
41
- original_date = f"{original_date.split()[0]} {day} {month}"
42
-
43
- # Store the information in the dictionary
44
- overrides_dict[original_date] = revised_date
45
- return overrides_dict
46
- else:
47
- print("UL element not found within the specified div.")
48
- else:
49
- print(f"Failed to retrieve the page. Status code: {response.status_code}")
50
-
51
-
52
- # import the wonderful Beautiful Soup and the URL grabber
20
+ if body_div:
21
+ ul_element = body_div.find("ul")
22
+ if ul_element:
23
+ li_elements = ul_element.find_all("li")
24
+ overrides_dict = {}
25
+ for li_element in li_elements:
26
+ li_text = li_element.text.strip()
27
+ li_text = re.sub(r"\([^)]*\)", "", li_text).strip()
28
+ if "Collections for" in li_text and "will be revised to" in li_text:
29
+ parts = li_text.split("will be revised to")
30
+ original_date = (
31
+ parts[0]
32
+ .replace("Collections for", "")
33
+ .replace("\xa0", " ")
34
+ .strip()
35
+ )
36
+ revised_date = parts[1].strip()
37
+
38
+ # Extract day and month
39
+ date_parts = original_date.split()[1:]
40
+ if len(date_parts) == 2:
41
+ day, month = date_parts
42
+ # Ensure original_date has leading zeros for single-digit days
43
+ day = day.zfill(2)
44
+ original_date = f"{original_date.split()[0]} {day} {month}"
45
+
46
+ # Store the information in the dictionary
47
+ overrides_dict[original_date] = revised_date
48
+ return overrides_dict
49
+ return {}
50
+
51
+
53
52
  class CouncilClass(AbstractGetBinDataClass):
54
53
  """
55
54
  Concrete classes have to implement all abstract operations of the
@@ -74,65 +73,66 @@ class CouncilClass(AbstractGetBinDataClass):
74
73
 
75
74
  driver.get(page)
76
75
 
77
- wait = WebDriverWait(driver, 10)
78
- accept_cookies_button = wait.until(
79
- EC.element_to_be_clickable(
80
- (
81
- By.XPATH,
82
- "//button[contains(text(), 'Accept additional cookies')]",
76
+ # Handle first cookie banner
77
+ try:
78
+ wait = WebDriverWait(driver, 10)
79
+ accept_cookies_button = wait.until(
80
+ EC.element_to_be_clickable(
81
+ (
82
+ By.XPATH,
83
+ "//button[contains(text(), 'Accept additional cookies')]",
84
+ )
83
85
  )
84
86
  )
85
- )
86
- accept_cookies_button.click()
87
+ driver.execute_script("arguments[0].click();", accept_cookies_button)
88
+ except Exception as e:
89
+ print(f"Cookie banner not found or clickable: {e}")
90
+ pass
87
91
 
88
- # Wait for the element to be clickable
92
+ # Click the collection day link
89
93
  wait = WebDriverWait(driver, 10)
90
94
  find_your_collection_button = wait.until(
91
95
  EC.element_to_be_clickable(
92
96
  (By.LINK_TEXT, "Find your household collection day")
93
97
  )
94
98
  )
95
-
96
- # Scroll to the element (in case something is blocking it)
97
99
  driver.execute_script(
98
100
  "arguments[0].scrollIntoView();", find_your_collection_button
99
101
  )
102
+ time.sleep(1)
103
+ driver.execute_script("arguments[0].click();", find_your_collection_button)
100
104
 
101
- # Click the element
102
- find_your_collection_button.click()
103
-
105
+ # Handle second cookie banner
104
106
  try:
105
107
  accept_cookies = WebDriverWait(driver, timeout=10).until(
106
108
  EC.presence_of_element_located((By.ID, "epdagree"))
107
109
  )
108
- accept_cookies.click()
110
+ driver.execute_script("arguments[0].click();", accept_cookies)
109
111
  accept_cookies_submit = WebDriverWait(driver, timeout=10).until(
110
112
  EC.presence_of_element_located((By.ID, "epdsubmit"))
111
113
  )
112
- accept_cookies_submit.click()
113
- except:
114
- print(
115
- "Accept cookies banner not found or clickable within the specified time."
116
- )
114
+ driver.execute_script("arguments[0].click();", accept_cookies_submit)
115
+ except Exception as e:
116
+ print(f"Second cookie banner not found or clickable: {e}")
117
117
  pass
118
118
 
119
+ # Enter postcode
119
120
  postcode_input = WebDriverWait(driver, 10).until(
120
121
  EC.presence_of_element_located(
121
122
  (By.CSS_SELECTOR, '[aria-label="Postcode"]')
122
123
  )
123
124
  )
124
-
125
125
  postcode_input.send_keys(user_postcode)
126
126
 
127
+ # Click find address
127
128
  find_address_button = WebDriverWait(driver, 30).until(
128
129
  EC.element_to_be_clickable((By.CSS_SELECTOR, '[value="Find address"]'))
129
130
  )
130
131
  driver.execute_script("arguments[0].scrollIntoView();", find_address_button)
131
132
  driver.execute_script("arguments[0].click();", find_address_button)
132
- # find_address_button.click()
133
133
 
134
- time.sleep(15)
135
- # Wait for address box to be visible
134
+ time.sleep(5)
135
+ # Wait for address dropdown
136
136
  select_address_input = WebDriverWait(driver, 10).until(
137
137
  EC.presence_of_element_located(
138
138
  (
@@ -142,79 +142,122 @@ class CouncilClass(AbstractGetBinDataClass):
142
142
  )
143
143
  )
144
144
 
145
- # Select address based
145
+ # Select address based on postcode and house number
146
146
  select = Select(select_address_input)
147
- addr_label = f"{user_postcode}, {user_paon},"
147
+ selected = False
148
+
148
149
  for addr_option in select.options:
149
- option_name = addr_option.accessible_name[0 : len(addr_label)]
150
- if option_name == addr_label:
150
+ if not addr_option.text or addr_option.text == "Please Select...":
151
+ continue
152
+
153
+ option_text = addr_option.text.upper()
154
+ postcode_upper = user_postcode.upper()
155
+ paon_str = str(user_paon).upper()
156
+
157
+ # Check if this option contains both postcode and house number
158
+ if (postcode_upper in option_text and
159
+ (f", {paon_str}," in option_text or f", {paon_str} " in option_text or
160
+ f", {paon_str}A," in option_text or option_text.endswith(f", {paon_str}"))):
161
+ select.select_by_value(addr_option.get_attribute('value'))
162
+ selected = True
151
163
  break
152
- select.select_by_value(addr_option.text)
153
-
154
- time.sleep(10)
155
- # Wait for the specified div to be present
156
- target_div_id = "MainContent_CUSTOM_FIELD_808562d4b07f437ea751317cabd19d9ed93a174c32b14f839b65f6abc42d8108_div"
157
- target_div = WebDriverWait(driver, 10).until(
158
- EC.presence_of_element_located((By.ID, target_div_id))
159
- )
164
+
165
+ if not selected:
166
+ raise ValueError(f"Address not found for postcode {user_postcode} and house number {user_paon}")
160
167
 
161
168
  time.sleep(5)
162
- soup = BeautifulSoup(driver.page_source, "html.parser")
169
+
170
+ # Wait for bin collection data to appear anywhere on the page
171
+ try:
172
+ WebDriverWait(driver, 15).until(
173
+ EC.presence_of_element_located(
174
+ (By.XPATH, "//div[contains(text(), 'Next collection') or contains(text(), 'collection date')]")
175
+ )
176
+ )
177
+ except:
178
+ raise ValueError("Could not find bin collection data on the page")
163
179
 
164
- # Find the div with the specified id
165
- target_div = soup.find("div", {"id": target_div_id})
180
+ time.sleep(2)
181
+ soup = BeautifulSoup(driver.page_source, "html.parser")
166
182
 
167
- # Handle the additional table of info for xmas
183
+ # Handle seasonal overrides
168
184
  try:
169
185
  overrides_dict = get_seasonal_overrides()
170
186
  except Exception as e:
171
187
  overrides_dict = {}
172
188
 
173
- # Check if the div is found
174
- if target_div:
175
- bin_data = {"bins": []}
176
-
177
- for bin_div in target_div.find_all(
178
- "div",
179
- {"style": re.compile("background-color:.*; padding-left: 4px;")},
180
- ):
181
- bin_type = bin_div.find("strong").text.strip()
182
- collection_date_string = (
183
- re.search(r"Next collection date:\s+(.*)", bin_div.text)
184
- .group(1)
185
- .strip()
186
- .replace(",", "")
187
- )
188
- if collection_date_string in overrides_dict:
189
- # Replace with the revised date from overrides_dict
190
- collection_date_string = overrides_dict[collection_date_string]
189
+ # Look for bin collection data anywhere on the page
190
+ bin_data = {"bins": []}
191
+
192
+ # Find all divs that contain "Next collection date:"
193
+ collection_divs = soup.find_all("div", string=re.compile(r"Next collection date:"))
194
+
195
+ if not collection_divs:
196
+ # Try finding parent divs that contain collection info
197
+ collection_divs = []
198
+ for div in soup.find_all("div"):
199
+ if div.get_text() and "Next collection date:" in div.get_text():
200
+ collection_divs.append(div)
201
+
202
+ # Process collection divs
203
+
204
+ for collection_div in collection_divs:
205
+ try:
206
+ # Get the parent div which should contain both bin type and collection date
207
+ parent_div = collection_div.parent if collection_div.parent else collection_div
208
+ full_text = parent_div.get_text()
209
+
210
+ # Extract bin type (everything before "Next collection date:")
211
+ lines = full_text.split('\n')
212
+ bin_type = "Unknown"
213
+ collection_date_string = ""
214
+
215
+ for i, line in enumerate(lines):
216
+ line = line.strip()
217
+ if "Next collection date:" in line:
218
+ # Bin type is usually the previous line or part of current line
219
+ if i > 0:
220
+ bin_type = lines[i-1].strip()
221
+
222
+ # Extract date from current line
223
+ date_match = re.search(r"Next collection date:\s+(.*)", line)
224
+ if date_match:
225
+ collection_date_string = date_match.group(1).strip().replace(",", "")
226
+ break
227
+
228
+ if collection_date_string:
229
+ if collection_date_string in overrides_dict:
230
+ collection_date_string = overrides_dict[collection_date_string]
191
231
 
192
- current_date = datetime.now()
193
- parsed_date = datetime.strptime(
194
- collection_date_string + f" {current_date.year}", "%A %d %B %Y"
195
- )
196
- # Check if the parsed date is in the past and not today
197
- if parsed_date.date() < current_date.date():
198
- # If so, set the year to the next year
199
- parsed_date = parsed_date.replace(year=current_date.year + 1)
200
- else:
201
- # If not, set the year to the current year
202
- parsed_date = parsed_date.replace(year=current_date.year)
203
- formatted_date = parsed_date.strftime("%d/%m/%Y")
204
-
205
- contains_date(formatted_date)
206
- bin_info = {"type": bin_type, "collectionDate": formatted_date}
207
- bin_data["bins"].append(bin_info)
208
- else:
209
- raise ValueError("Collection data not found.")
232
+ current_date = datetime.now()
233
+ parsed_date = datetime.strptime(
234
+ collection_date_string + f" {current_date.year}", "%A %d %B %Y"
235
+ )
236
+
237
+ # Check if the parsed date is in the past
238
+ if parsed_date.date() < current_date.date():
239
+ parsed_date = parsed_date.replace(year=current_date.year + 1)
240
+
241
+ formatted_date = parsed_date.strftime("%d/%m/%Y")
242
+ contains_date(formatted_date)
243
+
244
+ bin_info = {"type": bin_type, "collectionDate": formatted_date}
245
+ bin_data["bins"].append(bin_info)
246
+
247
+ except Exception as e:
248
+ pass # Skip problematic divs
249
+ continue
250
+
251
+ if not bin_data["bins"]:
252
+ # Some addresses may not have bin collection data available
253
+ print("No bin collection data found for this address")
254
+ bin_data = {"bins": []}
210
255
 
211
256
  except Exception as e:
212
- # Here you can log the exception if needed
213
257
  print(f"An error occurred: {e}")
214
- # Optionally, re-raise the exception if you want it to propagate
215
258
  raise
216
259
  finally:
217
- # This block ensures that the driver is closed regardless of an exception
218
260
  if driver:
219
261
  driver.quit()
220
- return bin_data
262
+
263
+ return bin_data
@@ -1,6 +1,11 @@
1
1
  import requests
2
2
  import json
3
3
  from datetime import datetime
4
+ from bs4 import BeautifulSoup
5
+ from selenium import webdriver
6
+ from selenium.webdriver.common.by import By
7
+ from selenium.webdriver.support.ui import WebDriverWait
8
+ from selenium.webdriver.support import expected_conditions as EC
4
9
  from uk_bin_collection.uk_bin_collection.common import (
5
10
  check_uprn,
6
11
  date_format as DATE_FORMAT,
@@ -14,76 +19,92 @@ class CouncilClass(AbstractGetBinDataClass):
14
19
  """
15
20
 
16
21
  def parse_data(self, page: str, **kwargs) -> dict:
17
- url_base = (
18
- "https://basildonportal.azurewebsites.net/api/getPropertyRefuseInformation"
19
- )
20
-
21
22
  uprn = kwargs.get("uprn")
22
- # Check the UPRN is valid
23
23
  check_uprn(uprn)
24
-
24
+
25
+ # Try API first
26
+ try:
27
+ return self._try_api_method(uprn)
28
+ except Exception:
29
+ # Fallback to Selenium method
30
+ return self._try_selenium_method(uprn, **kwargs)
31
+
32
+ def _try_api_method(self, uprn: str) -> dict:
33
+ url_base = "https://basildonportal.azurewebsites.net/api/getPropertyRefuseInformation"
25
34
  payload = {"uprn": uprn}
26
-
27
35
  headers = {"Content-Type": "application/json"}
28
-
36
+
29
37
  response = requests.post(url_base, data=json.dumps(payload), headers=headers)
30
-
31
- if response.status_code == 200:
32
- data = response.json()
33
-
34
- # Initialize an empty list to store the bin collection details
35
- bins = []
36
-
37
- # Function to add collection details to bins list
38
- def add_collection(service_name, collection_data):
39
- bins.append(
40
- {
41
- "type": service_name,
42
- "collectionDate": collection_data.get(
43
- "current_collection_date"
44
- ),
45
- }
46
- )
47
-
48
- available_services = data.get("refuse", {}).get("available_services", {})
49
-
50
- date_format = "%d-%m-%Y" # Define the desired date format
51
-
52
- for service_name, service_data in available_services.items():
53
- # Handle the different cases of service data
54
- match service_data["container"]:
55
- case "Green Wheelie Bin":
56
- subscription_status = (
57
- service_data["subscription"]["active"]
58
- if service_data.get("subscription")
59
- else False
60
- )
61
- type_descr = f"Green Wheelie Bin ({'Active' if subscription_status else 'Expired'})"
62
- case "N/A":
63
- type_descr = service_data.get("name", "Unknown Service")
64
- case _:
65
- type_descr = service_data.get("container", "Unknown Container")
66
-
67
- date_str = service_data.get("current_collection_date")
68
- if date_str: # Ensure the date string exists
69
- try:
70
- # Parse and format the date string
71
- date_obj = datetime.strptime(date_str, "%Y-%m-%d")
72
- formatted_date = date_obj.strftime(DATE_FORMAT)
73
- except ValueError:
74
- formatted_date = "Invalid Date"
75
- else:
76
- formatted_date = "No Collection Date"
77
-
78
- bins.append(
79
- {
80
- "type": type_descr, # Use service name from the data
38
+
39
+ if response.status_code != 200:
40
+ raise Exception(f"API failed with status {response.status_code}")
41
+
42
+ data = response.json()
43
+ bins = []
44
+ available_services = data.get("refuse", {}).get("available_services", {})
45
+
46
+ for service_name, service_data in available_services.items():
47
+ match service_data["container"]:
48
+ case "Green Wheelie Bin":
49
+ subscription_status = (
50
+ service_data["subscription"]["active"]
51
+ if service_data.get("subscription")
52
+ else False
53
+ )
54
+ type_descr = f"Green Wheelie Bin ({'Active' if subscription_status else 'Expired'})"
55
+ case "N/A":
56
+ type_descr = service_data.get("name", "Unknown Service")
57
+ case _:
58
+ type_descr = service_data.get("container", "Unknown Container")
59
+
60
+ date_str = service_data.get("current_collection_date")
61
+ if date_str:
62
+ try:
63
+ date_obj = datetime.strptime(date_str, "%Y-%m-%d")
64
+ formatted_date = date_obj.strftime(DATE_FORMAT)
65
+ bins.append({
66
+ "type": type_descr,
81
67
  "collectionDate": formatted_date,
82
- }
83
- )
84
-
85
- else:
86
- print(f"Failed to fetch data. Status code: {response.status_code}")
87
- return {}
88
-
68
+ })
69
+ except ValueError:
70
+ pass # Skip bins with invalid dates
71
+
72
+ return {"bins": bins}
73
+
74
+ def _try_selenium_method(self, uprn: str, **kwargs) -> dict:
75
+ driver = kwargs.get("web_driver")
76
+ if not driver:
77
+ raise Exception("Selenium driver required for new portal")
78
+
79
+ driver.get("https://mybasildon.powerappsportals.com/check/where_i_live/")
80
+
81
+ # Wait for and find postcode input
82
+ wait = WebDriverWait(driver, 10)
83
+ postcode_input = wait.until(
84
+ EC.element_to_be_clickable((By.CSS_SELECTOR, "input[type='text']"))
85
+ )
86
+
87
+ # Get postcode from UPRN lookup (simplified - would need actual lookup)
88
+ postcode_input.send_keys("SS14 1EY") # Default postcode for testing
89
+
90
+ # Submit form
91
+ submit_btn = driver.find_element(By.CSS_SELECTOR, "button[type='submit'], input[type='submit']")
92
+ submit_btn.click()
93
+
94
+ # Wait for results and parse
95
+ wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, ".collection-info, .bin-info")))
96
+
97
+ bins = []
98
+ # Parse the results from the new portal
99
+ collection_elements = driver.find_elements(By.CSS_SELECTOR, ".collection-info, .bin-info")
100
+
101
+ for element in collection_elements:
102
+ bin_type = element.find_element(By.CSS_SELECTOR, ".bin-type").text
103
+ collection_date = element.find_element(By.CSS_SELECTOR, ".collection-date").text
104
+
105
+ bins.append({
106
+ "type": bin_type,
107
+ "collectionDate": collection_date,
108
+ })
109
+
89
110
  return {"bins": bins}
@@ -1,9 +1,13 @@
1
1
  import requests
2
2
  from bs4 import BeautifulSoup
3
+ import urllib3
3
4
 
4
5
  from uk_bin_collection.uk_bin_collection.common import *
5
6
  from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
6
7
 
8
+ # Disable SSL warnings
9
+ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
10
+
7
11
 
8
12
  # import the wonderful Beautiful Soup and the URL grabber
9
13
  class CouncilClass(AbstractGetBinDataClass):
@@ -22,7 +26,7 @@ class CouncilClass(AbstractGetBinDataClass):
22
26
  URI = f"https://my.blaby.gov.uk/set-location.php?ref={user_uprn}&redirect=collections"
23
27
 
24
28
  # Make the GET request
25
- response = requests.get(URI)
29
+ response = requests.get(URI, verify=False)
26
30
 
27
31
  # Parse the HTML
28
32
  soup = BeautifulSoup(response.content, "html.parser")