uk_bin_collection 0.153.0__py3-none-any.whl → 0.157.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. uk_bin_collection/tests/input.json +34 -25
  2. uk_bin_collection/uk_bin_collection/councils/AberdeenCityCouncil.py +0 -1
  3. uk_bin_collection/uk_bin_collection/councils/BCPCouncil.py +45 -120
  4. uk_bin_collection/uk_bin_collection/councils/BasingstokeCouncil.py +4 -1
  5. uk_bin_collection/uk_bin_collection/councils/BrightonandHoveCityCouncil.py +15 -36
  6. uk_bin_collection/uk_bin_collection/councils/CastlepointDistrictCouncil.py +55 -24
  7. uk_bin_collection/uk_bin_collection/councils/DacorumBoroughCouncil.py +22 -13
  8. uk_bin_collection/uk_bin_collection/councils/EastDunbartonshireCouncil.py +52 -0
  9. uk_bin_collection/uk_bin_collection/councils/ErewashBoroughCouncil.py +32 -34
  10. uk_bin_collection/uk_bin_collection/councils/FarehamBoroughCouncil.py +5 -2
  11. uk_bin_collection/uk_bin_collection/councils/FolkstoneandHytheDistrictCouncil.py +22 -0
  12. uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py +1 -1
  13. uk_bin_collection/uk_bin_collection/councils/HartlepoolBoroughCouncil.py +3 -1
  14. uk_bin_collection/uk_bin_collection/councils/IslingtonCouncil.py +8 -5
  15. uk_bin_collection/uk_bin_collection/councils/LancasterCityCouncil.py +23 -10
  16. uk_bin_collection/uk_bin_collection/councils/MidSuffolkDistrictCouncil.py +70 -92
  17. uk_bin_collection/uk_bin_collection/councils/NewForestCouncil.py +104 -47
  18. uk_bin_collection/uk_bin_collection/councils/NewportCityCouncil.py +138 -21
  19. uk_bin_collection/uk_bin_collection/councils/NorthHertfordshireDistrictCouncil.py +26 -128
  20. uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py +245 -82
  21. uk_bin_collection/uk_bin_collection/councils/OxfordCityCouncil.py +1 -0
  22. uk_bin_collection/uk_bin_collection/councils/RenfrewshireCouncil.py +170 -13
  23. uk_bin_collection/uk_bin_collection/councils/RotherhamCouncil.py +70 -38
  24. uk_bin_collection/uk_bin_collection/councils/RushmoorCouncil.py +4 -2
  25. uk_bin_collection/uk_bin_collection/councils/SandwellBoroughCouncil.py +4 -11
  26. uk_bin_collection/uk_bin_collection/councils/SloughBoroughCouncil.py +39 -21
  27. uk_bin_collection/uk_bin_collection/councils/SomersetCouncil.py +136 -21
  28. uk_bin_collection/uk_bin_collection/councils/SouthGloucestershireCouncil.py +18 -22
  29. uk_bin_collection/uk_bin_collection/councils/TestValleyBoroughCouncil.py +138 -21
  30. uk_bin_collection/uk_bin_collection/councils/WestBerkshireCouncil.py +16 -13
  31. {uk_bin_collection-0.153.0.dist-info → uk_bin_collection-0.157.0.dist-info}/METADATA +1 -1
  32. {uk_bin_collection-0.153.0.dist-info → uk_bin_collection-0.157.0.dist-info}/RECORD +35 -34
  33. {uk_bin_collection-0.153.0.dist-info → uk_bin_collection-0.157.0.dist-info}/LICENSE +0 -0
  34. {uk_bin_collection-0.153.0.dist-info → uk_bin_collection-0.157.0.dist-info}/WHEEL +0 -0
  35. {uk_bin_collection-0.153.0.dist-info → uk_bin_collection-0.157.0.dist-info}/entry_points.txt +0 -0
@@ -1,4 +1,9 @@
1
+ import datetime
2
+
1
3
  from bs4 import BeautifulSoup
4
+ from selenium.webdriver.common.by import By
5
+ from selenium.webdriver.support import expected_conditions as EC
6
+ from selenium.webdriver.support.wait import WebDriverWait
2
7
 
3
8
  from uk_bin_collection.uk_bin_collection.common import *
4
9
  from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
@@ -13,6 +18,7 @@ class CouncilClass(AbstractGetBinDataClass):
13
18
  """
14
19
 
15
20
  def parse_data(self, page: str, **kwargs) -> dict:
21
+ <<<<<<< HEAD
16
22
  user_postcode = kwargs.get("postcode")
17
23
  check_postcode(user_postcode)
18
24
  user_uprn = kwargs.get("uprn")
@@ -43,10 +49,16 @@ class CouncilClass(AbstractGetBinDataClass):
43
49
  i["data-for"]: i.get("value", "")
44
50
  for i in soup.select("input[data-for]")
45
51
  }
46
- payload_salt = soup.select_one('input[id="pSalt"]').get("value")
47
- payload_protected = soup.select_one('input[id="pPageItemsProtected"]').get(
48
- "value"
49
- )
52
+
53
+ # Check if required form elements exist
54
+ salt_element = soup.select_one('input[id="pSalt"]')
55
+ protected_element = soup.select_one('input[id="pPageItemsProtected"]')
56
+
57
+ if not salt_element or not protected_element:
58
+ raise Exception("Required form elements not found. The council website may have changed or be unavailable.")
59
+
60
+ payload_salt = salt_element.get("value")
61
+ payload_protected = protected_element.get("value")
50
62
 
51
63
  # Add the PostCode and 'SEARCH' to the payload
52
64
  payload["p_request"] = "SEARCH"
@@ -123,10 +135,16 @@ class CouncilClass(AbstractGetBinDataClass):
123
135
  i["data-for"]: i.get("value", "")
124
136
  for i in soup.select("input[data-for]")
125
137
  }
126
- payload_salt = soup.select_one('input[id="pSalt"]').get("value")
127
- payload_protected = soup.select_one('input[id="pPageItemsProtected"]').get(
128
- "value"
129
- )
138
+
139
+ # Check if required form elements exist
140
+ salt_element = soup.select_one('input[id="pSalt"]')
141
+ protected_element = soup.select_one('input[id="pPageItemsProtected"]')
142
+
143
+ if not salt_element or not protected_element:
144
+ raise Exception("Required form elements not found. The council website may have changed or be unavailable.")
145
+
146
+ payload_salt = salt_element.get("value")
147
+ payload_protected = protected_element.get("value")
130
148
 
131
149
  # Add the UPRN and 'SUBMIT' to the payload
132
150
  payload["p_request"] = "SUBMIT"
@@ -187,18 +205,117 @@ class CouncilClass(AbstractGetBinDataClass):
187
205
 
188
206
  # Create a BeautifulSoup object from the page's HTML
189
207
  soup = BeautifulSoup(resource.text, "html.parser")
208
+ =======
209
+ driver = None
210
+ try:
211
+ >>>>>>> master
190
212
  data = {"bins": []}
213
+ url = kwargs.get("url")
214
+ user_paon = kwargs.get("paon")
215
+ user_postcode = kwargs.get("postcode")
216
+ web_driver = kwargs.get("web_driver")
217
+ headless = kwargs.get("headless")
218
+ check_paon(user_paon)
219
+ check_postcode(user_postcode)
220
+
221
+ # Use a realistic user agent to help bypass Cloudflare
222
+ user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
223
+ driver = create_webdriver(web_driver, headless, user_agent, __name__)
224
+ driver.get(
225
+ "https://iportal.itouchvision.com/icollectionday/collection-day/?uuid=6CDD2A34C912312074D8E2410531401A8C00EFF7&lang=en"
226
+ )
227
+
228
+ # Wait for the postcode field to appear then populate it
229
+ inputElement_postcode = WebDriverWait(driver, 30).until(
230
+ EC.presence_of_element_located((By.ID, "postcodeSearch"))
231
+ )
232
+ inputElement_postcode.send_keys(user_postcode)
233
+
234
+ # Click search button
235
+ findAddress = WebDriverWait(driver, 10).until(
236
+ EC.presence_of_element_located((By.CLASS_NAME, "govuk-button"))
237
+ )
238
+ findAddress.click()
239
+
240
+ # Wait for the 'Select address' dropdown to appear and select option matching the house name/number
241
+ WebDriverWait(driver, 10).until(
242
+ EC.element_to_be_clickable(
243
+ (
244
+ By.XPATH,
245
+ "//select[@id='addressSelect']//option[contains(., '"
246
+ + user_paon
247
+ + "')]",
248
+ )
249
+ )
250
+ ).click()
251
+
252
+ # Wait for the collections table to appear
253
+ WebDriverWait(driver, 20).until(
254
+ EC.presence_of_element_located(
255
+ (
256
+ By.XPATH,
257
+ "//h2[contains(@class,'mt-4') and contains(@class,'govuk-heading-s') and normalize-space(.)='Your next collections']",
258
+ )
259
+ )
260
+ )
261
+
262
+ soup = BeautifulSoup(driver.page_source, features="html.parser")
263
+
264
+ collections = soup.find_all("div", {"class": "p-2"})
265
+
266
+ for collection in collections:
267
+ bin_type = collection.find("h3").get_text()
268
+
269
+ next_collection = soup.find("div", {"class": "fw-bold"}).get_text()
270
+
271
+ following_collection = soup.find(
272
+ lambda t: (
273
+ t.name == "div"
274
+ and t.get_text(strip=True).lower().startswith("followed by")
275
+ )
276
+ ).get_text()
277
+
278
+ next_collection_date = datetime.strptime(next_collection, "%A %d %B")
279
+
280
+ following_collection_date = datetime.strptime(
281
+ following_collection, "followed by %A %d %B"
282
+ )
283
+
284
+ current_date = datetime.now()
285
+ next_collection_date = next_collection_date.replace(
286
+ year=current_date.year
287
+ )
288
+ following_collection_date = following_collection_date.replace(
289
+ year=current_date.year
290
+ )
291
+
292
+ next_collection_date = get_next_occurrence_from_day_month(
293
+ next_collection_date
294
+ )
295
+
296
+ following_collection_date = get_next_occurrence_from_day_month(
297
+ following_collection_date
298
+ )
299
+
300
+ dict_data = {
301
+ "type": bin_type,
302
+ "collectionDate": next_collection_date.strftime(date_format),
303
+ }
304
+ data["bins"].append(dict_data)
305
+
306
+ dict_data = {
307
+ "type": bin_type,
308
+ "collectionDate": following_collection_date.strftime(date_format),
309
+ }
310
+ data["bins"].append(dict_data)
191
311
 
192
- # Loop through the items on the page and build a JSON object for ingestion
193
- for item in soup.select(".t-MediaList-item"):
194
- for value in item.select(".t-MediaList-body"):
195
- dict_data = {
196
- "type": value.select("span")[1].get_text(strip=True).title(),
197
- "collectionDate": datetime.strptime(
198
- value.select(".t-MediaList-desc")[0].get_text(strip=True),
199
- "%A, %d %B, %Y",
200
- ).strftime(date_format),
201
- }
202
- data["bins"].append(dict_data)
203
-
204
- return data
312
+ except Exception as e:
313
+ # Here you can log the exception if needed
314
+ print(f"An error occurred: {e}")
315
+ # Optionally, re-raise the exception if you want it to propagate
316
+ raise
317
+ finally:
318
+ # This block ensures that the driver is closed regardless of an exception
319
+ if driver:
320
+ driver.quit()
321
+ return data
@@ -125,23 +125,6 @@ class CouncilClass(AbstractGetBinDataClass):
125
125
  # Wait for the page to load - giving it extra time
126
126
  time.sleep(5)
127
127
 
128
- # Use only the selector that we know works
129
- # print("Looking for bin type elements...")
130
- try:
131
- bin_type_selector = (
132
- By.CSS_SELECTOR,
133
- "div.formatting_bold.formatting_size_bigger.formatting span.value-as-text",
134
- )
135
- WebDriverWait(driver, 15).until(
136
- EC.presence_of_element_located(bin_type_selector)
137
- )
138
- # print(f"Found bin type elements with selector: {bin_type_selector}")
139
- except TimeoutException:
140
- # print("Could not find bin type elements. Taking screenshot for debugging...")
141
- screenshot_path = f"bin_type_error_{int(time.time())}.png"
142
- driver.save_screenshot(screenshot_path)
143
- # print(f"Screenshot saved to {screenshot_path}")
144
-
145
128
  # Create BS4 object from driver's page source
146
129
  # print("Parsing page with BeautifulSoup...")
147
130
  soup = BeautifulSoup(driver.page_source, features="html.parser")
@@ -149,122 +132,37 @@ class CouncilClass(AbstractGetBinDataClass):
149
132
  # Initialize data dictionary
150
133
  data = {"bins": []}
151
134
 
152
- # Looking for bin types in the exact HTML structure
153
- bin_type_elements = soup.select(
154
- "div.page_cell.contains_widget:first-of-type div.formatting_bold.formatting_size_bigger.formatting span.value-as-text"
155
- )
156
- # print(f"Found {len(bin_type_elements)} bin type elements")
157
-
158
- # Look specifically for date elements with the exact structure
159
- date_elements = soup.select("div.col-sm-12.font-xs-3xl span.value-as-text")
160
- hidden_dates = soup.select(
161
- "div.col-sm-12.font-xs-3xl input[type='hidden'][value*='/']"
162
- )
163
-
164
- # print(f"Found {len(bin_type_elements)} bin types and {len(date_elements)} date elements")
165
-
166
- # We need a smarter way to match bin types with their dates
167
- bin_count = 0
135
+ for row in soup.select(".listing_template_row"):
136
+ # Title (waste stream) is the first <p> in the section
137
+ first_p = row.find("p")
138
+ if not first_p:
139
+ continue
140
+ stream = first_p.get_text(" ", strip=True)
168
141
 
169
- # Map of bin types to their collection dates
170
- bin_date_map = {}
142
+ for p in row.find_all("p"):
143
+ t = p.get_text("\n", strip=True)
171
144
 
172
- # Extract all date strings that look like actual dates
173
- date_texts = []
174
- date_pattern = re.compile(
175
- r"(?:Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)\s+\d+(?:st|nd|rd|th)?\s+\w+\s+\d{4}",
176
- re.IGNORECASE,
177
- )
145
+ if re.search(r"\bNext collection\b", t, flags=re.I):
146
+ # Expect format: "Next collection\nTuesday 16th September 2025"
147
+ parts = [x.strip() for x in t.split("\n") if x.strip()]
148
+ if len(parts) >= 2:
149
+ next_collection_display = parts[-1] # last line
178
150
 
179
- for element in date_elements:
180
- text = element.get_text(strip=True)
181
- if date_pattern.search(text):
182
- date_texts.append(text)
183
- # print(f"Found valid date text: {text}")
184
-
185
- # Find hidden date inputs with values in DD/MM/YYYY format
186
- hidden_date_values = []
187
- for hidden in hidden_dates:
188
- value = hidden.get("value", "")
189
- if re.match(r"\d{1,2}/\d{1,2}/\d{4}", value):
190
- hidden_date_values.append(value)
191
- # print(f"Found hidden date value: {value}")
192
-
193
- # When filtering date elements
194
- date_elements = soup.select("div.col-sm-12.font-xs-3xl span.value-as-text")
195
- valid_date_elements = []
196
-
197
- for element in date_elements:
198
- text = element.get_text(strip=True)
199
- if contains_date(text):
200
- valid_date_elements.append(element)
201
- # print(f"Found valid date element: {text}")
202
- else:
203
- pass
204
- # print(f"Skipping non-date element: {text}")
205
-
206
- # print(f"Found {len(bin_type_elements)} bin types and {len(valid_date_elements)} valid date elements")
207
-
208
- # When processing each bin type
209
- for i, bin_type_elem in enumerate(bin_type_elements):
210
- bin_type = bin_type_elem.get_text(strip=True)
211
-
212
- # Try to find a date for this bin type
213
- date_text = None
214
-
215
- # Look for a valid date element
216
- if i < len(valid_date_elements):
217
- date_elem = valid_date_elements[i]
218
- date_text = date_elem.get_text(strip=True)
219
-
220
- # If we don't have a valid date yet, try using the hidden input
221
- if not date_text or not contains_date(date_text):
222
- if i < len(hidden_dates):
223
- date_value = hidden_dates[i].get("value")
224
- if contains_date(date_value):
225
- date_text = date_value
226
-
227
- # Skip if we don't have a valid date
228
- if not date_text or not contains_date(date_text):
229
- # print(f"No valid date found for bin type: {bin_type}")
230
- continue
151
+ # Build record
152
+ next_date = datetime.strptime(
153
+ remove_ordinal_indicator_from_date_string(next_collection_display),
154
+ "%A %d %B %Y",
155
+ )
231
156
 
232
- # print(f"Found bin type: {bin_type} with date: {date_text}")
157
+ # Create bin entry
158
+ bin_entry = {
159
+ "type": stream,
160
+ "collectionDate": next_date.strftime(date_format),
161
+ }
233
162
 
234
- try:
235
- # Clean up the date text
236
- date_text = remove_ordinal_indicator_from_date_string(date_text)
237
-
238
- # Try to parse the date
239
- try:
240
- collection_date = datetime.strptime(
241
- date_text, "%A %d %B %Y"
242
- ).date()
243
- except ValueError:
244
- try:
245
- collection_date = datetime.strptime(
246
- date_text, "%d/%m/%Y"
247
- ).date()
248
- except ValueError:
249
- # Last resort
250
- collection_date = parse(date_text).date()
251
-
252
- # Create bin entry
253
- bin_entry = {
254
- "type": bin_type,
255
- "collectionDate": collection_date.strftime(date_format),
256
- }
257
-
258
- # Add to data
259
- data["bins"].append(bin_entry)
260
- bin_count += 1
261
- # print(f"Added bin entry: {bin_entry}")
262
-
263
- except Exception as e:
264
- pass
265
- # print(f"Error parsing date '{date_text}': {str(e)}")
266
-
267
- # print(f"Successfully parsed {bin_count} bin collections")
163
+ # Add to data
164
+ data["bins"].append(bin_entry)
165
+ # print(f"Added bin entry: {bin_entry}")
268
166
 
269
167
  if not data["bins"]:
270
168
  # print("No bin data found. Saving page for debugging...")