uk_bin_collection 0.152.9__py3-none-any.whl → 0.152.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -761,13 +761,11 @@
761
761
  },
762
762
  "EastHertsCouncil": {
763
763
  "LAD24CD": "E07000097",
764
- "house_number": "1",
765
- "postcode": "CM20 2FZ",
766
764
  "skip_get_url": true,
767
- "url": "https://www.eastherts.gov.uk",
768
- "web_driver": "http://selenium:4444",
765
+ "uprn": "10023088183",
766
+ "url": "https://east-herts.co.uk/api/services/",
769
767
  "wiki_name": "East Herts Council",
770
- "wiki_note": "Pass the house number and postcode in their respective parameters."
768
+ "wiki_note": "Pass the UPRN. You can find it using [FindMyAddress](https://www.findmyaddress.co.uk/search)."
771
769
  },
772
770
  "EastLindseyDistrictCouncil": {
773
771
  "house_number": "1",
@@ -22,10 +22,25 @@ class CouncilClass(AbstractGetBinDataClass):
22
22
  check_postcode(user_postcode)
23
23
 
24
24
  bindata = {"bins": []}
25
- driver = create_webdriver(web_driver, headless, None, __name__)
25
+ # Use a realistic user agent to help bypass Cloudflare
26
+ user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
27
+ driver = create_webdriver(web_driver, headless, user_agent, __name__)
26
28
 
27
29
  try:
28
30
  driver.get("https://www.broxbourne.gov.uk/bin-collection-date")
31
+
32
+ # Wait for Cloudflare challenge to complete
33
+ print("Waiting for page to load (Cloudflare check)...")
34
+ try:
35
+ WebDriverWait(driver, 45).until(
36
+ lambda d: "Just a moment" not in d.title and d.title != "" and len(d.find_elements(By.TAG_NAME, "input")) > 0
37
+ )
38
+ print(f"Page loaded: {driver.title}")
39
+ except:
40
+ print(f"Timeout waiting for page load. Current title: {driver.title}")
41
+ # Try to continue anyway
42
+ pass
43
+
29
44
  time.sleep(8)
30
45
 
31
46
  # Handle cookie banner with multiple attempts
@@ -1,7 +1,6 @@
1
- from bs4 import BeautifulSoup
2
- from selenium.webdriver.common.by import By
3
- from selenium.webdriver.support import expected_conditions as EC
4
- from selenium.webdriver.support.wait import WebDriverWait
1
+ import json
2
+ import requests
3
+ from datetime import datetime
5
4
 
6
5
  from uk_bin_collection.uk_bin_collection.common import *
7
6
  from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
@@ -15,116 +14,28 @@ class CouncilClass(AbstractGetBinDataClass):
15
14
  """
16
15
 
17
16
  def parse_data(self, page: str, **kwargs) -> dict:
18
- # Get and check UPRN
19
- driver = None
20
- try:
21
- user_postcode = kwargs.get("postcode")
22
- user_paon = kwargs.get("paon")
23
- check_paon(user_paon)
24
- check_postcode(user_postcode)
25
- web_driver = kwargs.get("web_driver")
26
- headless = kwargs.get("headless")
27
- bindata = {"bins": []}
28
-
29
- API_URL = "https://uhte-wrp.whitespacews.com"
30
-
31
- # Create Selenium webdriver
32
- driver = create_webdriver(web_driver, headless, None, __name__)
33
- driver.get(API_URL)
34
-
35
- # Click Find my bin collection day button
36
- collectionButton = WebDriverWait(driver, 10).until(
37
- EC.element_to_be_clickable((By.LINK_TEXT, "Find my bin collection day"))
38
- )
39
- collectionButton.click()
40
-
41
- main_content = WebDriverWait(driver, 10).until(
42
- EC.presence_of_element_located((By.ID, "main-content"))
43
- )
44
-
45
- # Wait for the property number field to appear then populate it
46
- inputElement_number = WebDriverWait(driver, 10).until(
47
- EC.element_to_be_clickable(
48
- (
49
- By.ID,
50
- "address_name_number",
51
- )
52
- )
53
- )
54
- inputElement_number.send_keys(user_paon)
55
-
56
- # Wait for the postcode field to appear then populate it
57
- inputElement_postcode = WebDriverWait(driver, 10).until(
58
- EC.element_to_be_clickable(
59
- (
60
- By.ID,
61
- "address_postcode",
62
- )
63
- )
64
- )
65
- inputElement_postcode.send_keys(user_postcode)
66
-
67
- # Click search button
68
- continueButton = WebDriverWait(driver, 10).until(
69
- EC.element_to_be_clickable(
70
- (
71
- By.ID,
72
- "Submit",
73
- )
74
- )
75
- )
76
- continueButton.click()
77
-
78
- # Wait for the 'Search Results' to appear and select the first result
79
- property = WebDriverWait(driver, 10).until(
80
- EC.element_to_be_clickable(
81
- (
82
- By.CSS_SELECTOR,
83
- "li.app-subnav__section-item a",
84
- # "app-subnav__link govuk-link clicker colordarkblue fontfamilyArial fontsize12rem",
85
- # "//a[starts-with(@aria-label, '{user_paon}')]",
86
- )
87
- )
88
- )
89
- property.click()
90
-
91
- upcoming_scheduled_collections = WebDriverWait(driver, 10).until(
92
- EC.presence_of_element_located(
93
- (By.ID, "upcoming-scheduled-collections")
94
- )
95
- )
96
-
97
- soup = BeautifulSoup(driver.page_source, features="html.parser")
98
-
99
- collections = []
100
- for collection in soup.find_all(
101
- "u1",
102
- class_="displayinlineblock justifycontentleft alignitemscenter margin0 padding0",
103
- ):
104
- date = collection.find(
105
- "p", string=lambda text: text and "/" in text
106
- ).text.strip() # Extract date
107
- service = collection.find(
108
- "p", string=lambda text: text and "Collection Service" in text
109
- ).text.strip() # Extract service type
110
- collections.append({"date": date, "service": service})
111
-
112
- # Print the parsed data
113
- for item in collections:
114
-
115
- dict_data = {
116
- "type": item["service"],
117
- "collectionDate": item["date"],
118
- }
119
- bindata["bins"].append(dict_data)
120
-
121
- except Exception as e:
122
- # Here you can log the exception if needed
123
- print(f"An error occurred: {e}")
124
- # Optionally, re-raise the exception if you want it to propagate
125
- raise
126
- finally:
127
- # This block ensures that the driver is closed regardless of an exception
128
- if driver:
129
- driver.quit()
17
+ user_uprn = kwargs.get("uprn")
18
+ check_uprn(user_uprn)
19
+ bindata = {"bins": []}
20
+
21
+ # Make API request
22
+ api_url = f"https://east-herts.co.uk/api/services/{user_uprn}"
23
+ response = requests.get(api_url)
24
+ response.raise_for_status()
25
+
26
+ data = response.json()
27
+ today = datetime.now().date()
28
+
29
+ for service in data.get("services", []):
30
+ collection_date_str = service.get("collectionDate")
31
+ if collection_date_str:
32
+ collection_date = datetime.strptime(collection_date_str, "%Y-%m-%d").date()
33
+ # Only include future dates
34
+ if collection_date >= today:
35
+ dict_data = {
36
+ "type": service.get("binType", ""),
37
+ "collectionDate": collection_date.strftime("%d/%m/%Y"),
38
+ }
39
+ bindata["bins"].append(dict_data)
40
+
130
41
  return bindata
@@ -5,7 +5,6 @@ from uk_bin_collection.uk_bin_collection.common import *
5
5
  from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
6
6
 
7
7
 
8
- # import the wonderful Beautiful Soup and the URL grabber
9
8
  class CouncilClass(AbstractGetBinDataClass):
10
9
  """
11
10
  Concrete classes have to implement all abstract operations of the
@@ -14,70 +13,59 @@ class CouncilClass(AbstractGetBinDataClass):
14
13
  """
15
14
 
16
15
  def parse_data(self, page: str, **kwargs) -> dict:
17
-
18
16
  user_postcode = kwargs.get("postcode")
19
17
  user_paon = kwargs.get("paon")
20
18
  check_postcode(user_postcode)
21
19
  check_paon(user_paon)
22
20
  bindata = {"bins": []}
23
21
 
24
- URI = "http://collectiondates.eastlothian.gov.uk/ajax/your-calendar/load-streets-spring-2024.asp"
25
-
26
- payload = {
27
- "postcode": user_postcode,
28
- }
29
-
22
+ # Get address ID from the streets endpoint
23
+ streets_uri = "https://collectiondates.eastlothian.gov.uk/ajax/your-calendar/load-streets-summer-2025.asp"
30
24
  headers = {
31
- "Referer": "http://collectiondates.eastlothian.gov.uk/your-calendar",
25
+ "Referer": "https://collectiondates.eastlothian.gov.uk/your-calendar",
32
26
  "User-Agent": "Mozilla/5.0",
33
27
  }
34
-
35
- # Make the GET request
36
- response = requests.get(URI, headers=headers, params=payload)
37
-
38
- # Parse the HTML with BeautifulSoup
28
+
29
+ response = requests.get(streets_uri, params={"postcode": user_postcode}, headers=headers)
39
30
  soup = BeautifulSoup(response.text, "html.parser")
40
-
41
- # Find the select dropdown
31
+
42
32
  select = soup.find("select", id="SelectStreet")
43
-
44
- # Find the option that contains "Flat 1"
33
+ if not select:
34
+ raise ValueError(f"No streets found for postcode {user_postcode}")
35
+
45
36
  address = select.find("option", string=lambda text: text and user_paon in text)
46
-
47
- URI = "http://collectiondates.eastlothian.gov.uk/ajax/your-calendar/load-recycling-summer-2024.asp"
48
-
49
- payload = {
50
- "id": address["value"],
51
- }
52
-
53
- # Make the GET request
54
- response = requests.get(URI, headers=headers, params=payload)
55
-
56
- # Parse the HTML with BeautifulSoup
37
+ if not address:
38
+ raise ValueError(f"Address '{user_paon}' not found for postcode {user_postcode}")
39
+
40
+ address_id = address["value"]
41
+
42
+ # Get collection data using the correct endpoint
43
+ collections_uri = "https://collectiondates.eastlothian.gov.uk/ajax/your-calendar/load-recycling-summer-2025.asp"
44
+ response = requests.get(collections_uri, params={"id": address_id}, headers=headers)
45
+
57
46
  soup = BeautifulSoup(response.text, "html.parser")
58
-
47
+
59
48
  # Extract collection details
60
49
  calendar_items = soup.find_all("div", class_="calendar-item")
61
50
  for item in calendar_items:
62
51
  waste_label = item.find("div", class_="waste-label").text.strip()
63
52
  waste_value = item.find("div", class_="waste-value").find("h4").text.strip()
64
-
53
+
65
54
  try:
66
55
  collection_date = datetime.strptime(
67
56
  remove_ordinal_indicator_from_date_string(waste_value),
68
57
  "%A %d %B %Y",
69
58
  )
59
+
60
+ bindata["bins"].append({
61
+ "type": waste_label.replace(" is:", ""),
62
+ "collectionDate": collection_date.strftime(date_format),
63
+ })
70
64
  except ValueError:
71
65
  continue
72
-
73
- dict_data = {
74
- "type": waste_label.replace(" is:", ""),
75
- "collectionDate": collection_date.strftime(date_format),
76
- }
77
- bindata["bins"].append(dict_data)
78
-
66
+
79
67
  bindata["bins"].sort(
80
68
  key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
81
69
  )
82
-
70
+
83
71
  return bindata
@@ -2,12 +2,12 @@ from bs4 import BeautifulSoup
2
2
  from selenium.webdriver.common.by import By
3
3
  from selenium.webdriver.support import expected_conditions as EC
4
4
  from selenium.webdriver.support.wait import WebDriverWait
5
+ from selenium.webdriver.support.ui import Select
5
6
 
6
7
  from uk_bin_collection.uk_bin_collection.common import *
7
8
  from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
8
9
 
9
10
 
10
- # import the wonderful Beautiful Soup and the URL grabber
11
11
  class CouncilClass(AbstractGetBinDataClass):
12
12
  """
13
13
  Concrete classes have to implement all abstract operations of the
@@ -21,97 +21,102 @@ class CouncilClass(AbstractGetBinDataClass):
21
21
  data = {"bins": []}
22
22
  user_paon = kwargs.get("paon")
23
23
  user_postcode = kwargs.get("postcode")
24
+ user_uprn = kwargs.get("uprn")
24
25
  web_driver = kwargs.get("web_driver")
25
26
  headless = kwargs.get("headless")
26
- check_paon(user_paon)
27
27
  check_postcode(user_postcode)
28
28
 
29
29
  # Create Selenium webdriver
30
30
  driver = create_webdriver(web_driver, headless, None, __name__)
31
- driver.get(
32
- "https://eastrenfrewshire.gov.uk/article/1145/Bin-collection-days"
33
- )
31
+ driver.get("https://eastrenfrewshire.gov.uk/bin-days")
34
32
 
35
33
  # Wait for the postcode field to appear then populate it
36
34
  inputElement_postcode = WebDriverWait(driver, 30).until(
37
35
  EC.presence_of_element_located(
38
- (By.ID, "RESIDUALWASTEV2_PAGE1_POSTCODE")
36
+ (By.CSS_SELECTOR, "input[autocomplete='postal-code']")
39
37
  )
40
38
  )
41
39
  inputElement_postcode.send_keys(user_postcode)
42
40
 
43
41
  # Click search button
44
- findAddress = WebDriverWait(driver, 10).until(
45
- EC.presence_of_element_located(
46
- (By.ID, "RESIDUALWASTEV2_PAGE1_FIELD199_NEXT")
47
- )
48
- )
49
- findAddress.click()
50
-
51
- # Wait for the 'Select address' dropdown to appear and select option matching the house name/number
52
- WebDriverWait(driver, 10).until(
42
+ search_button = WebDriverWait(driver, 10).until(
53
43
  EC.element_to_be_clickable(
54
- (
55
- By.XPATH,
56
- "//select[@id='RESIDUALWASTEV2_PAGE2_UPRN']//option[contains(., '"
57
- + user_paon
58
- + "')]",
59
- )
44
+ (By.XPATH, "//button[text()='Search']")
60
45
  )
61
- ).click()
46
+ )
47
+ search_button.click()
62
48
 
63
- # Click search button
64
- findDates = WebDriverWait(driver, 10).until(
49
+ # Wait for the addresses dropdown to appear
50
+ addresses_select = WebDriverWait(driver, 10).until(
65
51
  EC.presence_of_element_located(
66
- (By.ID, "RESIDUALWASTEV2_PAGE2_FIELD206_NEXT")
52
+ (By.XPATH, "//label[text()='Addresses']/following-sibling::select")
67
53
  )
68
54
  )
69
- findDates.click()
55
+
56
+ # Select the appropriate address based on UPRN or house number
57
+ select = Select(addresses_select)
58
+ if user_uprn:
59
+ # Select by UPRN value
60
+ select.select_by_value(user_uprn)
61
+ elif user_paon:
62
+ # Select by house number/name in the text
63
+ for option in select.options:
64
+ if user_paon in option.text:
65
+ select.select_by_visible_text(option.text)
66
+ break
67
+ else:
68
+ # Select the first non-default option
69
+ select.select_by_index(1)
70
+
71
+ # Click the "Find my collection dates" button
72
+ find_dates_button = WebDriverWait(driver, 10).until(
73
+ EC.element_to_be_clickable(
74
+ (By.XPATH, "//button[text()='Find my collection dates']")
75
+ )
76
+ )
77
+ find_dates_button.click()
70
78
 
71
- # Wait for the collections table to appear
79
+ # Wait for the results table to appear
72
80
  WebDriverWait(driver, 10).until(
73
81
  EC.presence_of_element_located(
74
- (By.ID, "RESIDUALWASTEV2_COLLECTIONDATES_DISPLAYBINCOLLECTIONINFO")
82
+ (By.XPATH, "//th[text()='Bin Type']")
75
83
  )
76
84
  )
77
85
 
78
86
  soup = BeautifulSoup(driver.page_source, features="html.parser")
79
- soup.prettify()
80
-
81
- # Get collections div
82
- next_collection_div = soup.find("div", {"id": "yourNextCollection"})
83
-
84
- # Get next collection date
85
- next_collection_date = datetime.strptime(
86
- next_collection_div.find("span", {"class": "dueDate"})
87
- .get_text()
88
- .strip(),
89
- "%d/%m/%Y",
90
- )
91
-
92
- # Get next collection bins
93
- next_collection_bin = next_collection_div.findAll(
94
- "span", {"class": "binColour"}
95
- )
96
-
97
- # Format results
98
- for row in next_collection_bin:
99
- dict_data = {
100
- "type": row.get_text().strip(),
101
- "collectionDate": next_collection_date.strftime("%d/%m/%Y"),
102
- }
103
- data["bins"].append(dict_data)
87
+
88
+ # Find the table with bin collection data
89
+ table = soup.find("th", string="Bin Type").find_parent("table")
90
+ rows = table.find_all("tr")[1:] # Skip header row
91
+
92
+ for row in rows:
93
+ cells = row.find_all("td")
94
+ if len(cells) >= 3:
95
+ date_cell = cells[0].get_text().strip()
96
+ bin_type_cell = cells[2]
97
+
98
+ # Only process rows that have a date
99
+ if date_cell:
100
+ # Get all text content including line breaks
101
+ bin_type_text = bin_type_cell.get_text(separator='\n').strip()
102
+
103
+ # Split multiple bin types that appear on separate lines
104
+ bin_types = [bt.strip() for bt in bin_type_text.split('\n') if bt.strip()]
105
+
106
+ for bin_type in bin_types:
107
+ dict_data = {
108
+ "type": bin_type,
109
+ "collectionDate": date_cell,
110
+ }
111
+ data["bins"].append(dict_data)
104
112
 
105
113
  data["bins"].sort(
106
114
  key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
107
115
  )
108
116
  except Exception as e:
109
- # Here you can log the exception if needed
110
117
  print(f"An error occurred: {e}")
111
- # Optionally, re-raise the exception if you want it to propagate
112
118
  raise
113
119
  finally:
114
- # This block ensures that the driver is closed regardless of an exception
115
120
  if driver:
116
121
  driver.quit()
117
122
  return data
@@ -30,11 +30,31 @@ class CouncilClass(AbstractGetBinDataClass):
30
30
  check_paon(user_paon)
31
31
  headless = kwargs.get("headless")
32
32
  web_driver = kwargs.get("web_driver")
33
- driver = create_webdriver(web_driver, headless, None, __name__)
33
+ # Use a realistic user agent to help bypass Cloudflare
34
+ user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
35
+ driver = create_webdriver(web_driver, headless, user_agent, __name__)
34
36
  page = "https://www.enfield.gov.uk/services/rubbish-and-recycling/find-my-collection-day"
35
37
  driver.get(page)
36
38
 
37
- time.sleep(5)
39
+ # Wait for Cloudflare challenge to complete
40
+ print("Waiting for page to load (Cloudflare check)...")
41
+ max_attempts = 3
42
+ for attempt in range(max_attempts):
43
+ try:
44
+ WebDriverWait(driver, 60).until(
45
+ lambda d: "Just a moment" not in d.title and d.title != "" and len(d.find_elements(By.TAG_NAME, "input")) > 1
46
+ )
47
+ print(f"Page loaded: {driver.title}")
48
+ break
49
+ except:
50
+ print(f"Attempt {attempt + 1}: Timeout waiting for page load. Current title: {driver.title}")
51
+ if attempt < max_attempts - 1:
52
+ time.sleep(10)
53
+ driver.refresh()
54
+ else:
55
+ print("Failed to bypass Cloudflare after multiple attempts")
56
+
57
+ time.sleep(8)
38
58
 
39
59
  try:
40
60
  accept_cookies = WebDriverWait(driver, timeout=10).until(
@@ -47,23 +67,73 @@ class CouncilClass(AbstractGetBinDataClass):
47
67
  )
48
68
  pass
49
69
 
50
- postcode_input = WebDriverWait(driver, 10).until(
51
- EC.presence_of_element_located(
52
- (By.CSS_SELECTOR, '[aria-label="Enter your address"]')
53
- )
54
- )
70
+ # Check for multiple iframes and find the correct one
71
+ try:
72
+ iframes = driver.find_elements(By.TAG_NAME, "iframe")
73
+
74
+ # Try each iframe to find the one with the bin collection form
75
+ for i, iframe in enumerate(iframes):
76
+ try:
77
+ driver.switch_to.frame(iframe)
78
+
79
+ # Check if this iframe has the postcode input
80
+ time.sleep(2)
81
+ inputs = driver.find_elements(By.TAG_NAME, "input")
82
+
83
+ # Look for address-related inputs
84
+ for inp in inputs:
85
+ aria_label = inp.get_attribute('aria-label') or ''
86
+ placeholder = inp.get_attribute('placeholder') or ''
87
+ if 'address' in aria_label.lower() or 'postcode' in placeholder.lower():
88
+ break
89
+ else:
90
+ # This iframe doesn't have the form, try the next one
91
+ driver.switch_to.default_content()
92
+ continue
93
+
94
+ # Found the right iframe, break out of the loop
95
+ break
96
+ except Exception as e:
97
+ driver.switch_to.default_content()
98
+ continue
99
+ else:
100
+ # No suitable iframe found, stay in main content
101
+ driver.switch_to.default_content()
102
+ except Exception as e:
103
+ pass
104
+
105
+ # Try multiple selectors for the postcode input
106
+ postcode_input = None
107
+ selectors = [
108
+ '[aria-label="Enter your address"]',
109
+ 'input[placeholder*="postcode"]',
110
+ 'input[placeholder*="address"]',
111
+ 'input[type="text"]'
112
+ ]
113
+
114
+ for selector in selectors:
115
+ try:
116
+ postcode_input = WebDriverWait(driver, 5).until(
117
+ EC.element_to_be_clickable((By.CSS_SELECTOR, selector))
118
+ )
119
+ break
120
+ except:
121
+ continue
122
+
123
+ if not postcode_input:
124
+ raise ValueError("Could not find postcode input field")
55
125
 
56
126
  postcode_input.send_keys(user_postcode)
57
127
 
58
128
  find_address_button = WebDriverWait(driver, 10).until(
59
- EC.presence_of_element_located((By.ID, "submitButton0"))
129
+ EC.element_to_be_clickable((By.ID, "submitButton0"))
60
130
  )
61
131
  find_address_button.click()
62
132
 
63
133
  time.sleep(15)
64
134
  # Wait for address box to be visible
65
- select_address_input = WebDriverWait(driver, 10).until(
66
- EC.presence_of_element_located(
135
+ select_address_input = WebDriverWait(driver, 15).until(
136
+ EC.element_to_be_clickable(
67
137
  (
68
138
  By.CSS_SELECTOR,
69
139
  '[aria-label="Select full address"]',
@@ -16,7 +16,7 @@ class CouncilClass(AbstractGetBinDataClass):
16
16
  implementation.
17
17
  """
18
18
 
19
- base_url = "https://fermanaghomagh.isl-fusion.com/"
19
+ base_url = "https://fermanaghomagh.isl-fusion.com"
20
20
 
21
21
  def parse_data(self, page: str, **kwargs) -> dict:
22
22
  """
@@ -1,3 +1,4 @@
1
+ import time
1
2
  from bs4 import BeautifulSoup
2
3
  from selenium.webdriver.common.by import By
3
4
  from selenium.webdriver.support import expected_conditions as EC
@@ -26,16 +27,30 @@ class CouncilClass(AbstractGetBinDataClass):
26
27
  check_paon(user_paon)
27
28
  check_postcode(user_postcode)
28
29
 
29
- # Create Selenium webdriver
30
- driver = create_webdriver(web_driver, headless, None, __name__)
30
+ # Create Selenium webdriver with user agent to bypass Cloudflare
31
+ user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
32
+ driver = create_webdriver(web_driver, headless, user_agent, __name__)
31
33
  driver.get(
32
34
  "https://www.gateshead.gov.uk/article/3150/Bin-collection-day-checker"
33
35
  )
34
36
 
35
- accept_button = WebDriverWait(driver, 30).until(
36
- EC.presence_of_element_located((By.NAME, "acceptall"))
37
+ # Wait for initial page load
38
+ WebDriverWait(driver, 30).until(
39
+ lambda d: "Just a moment" not in d.title and d.title != ""
37
40
  )
38
- accept_button.click()
41
+
42
+ # Additional wait for page to fully load after Cloudflare
43
+ time.sleep(3)
44
+
45
+ # Try to accept cookies if the banner appears
46
+ try:
47
+ accept_button = WebDriverWait(driver, 10).until(
48
+ EC.element_to_be_clickable((By.NAME, "acceptall"))
49
+ )
50
+ accept_button.click()
51
+ time.sleep(2)
52
+ except:
53
+ pass
39
54
 
40
55
  # Wait for the postcode field to appear then populate it
41
56
  inputElement_postcode = WebDriverWait(driver, 30).until(
@@ -65,41 +80,102 @@ class CouncilClass(AbstractGetBinDataClass):
65
80
  )
66
81
  ).click()
67
82
 
68
- # Wait for the collections table to appear
69
- WebDriverWait(driver, 10).until(
70
- EC.presence_of_element_located(
71
- (By.CSS_SELECTOR, ".bincollections__table")
83
+ # Handle Cloudflare challenge that appears after address selection
84
+ try:
85
+ # Check for Cloudflare Turnstile "Verify you are human" checkbox
86
+ turnstile_checkbox = WebDriverWait(driver, 10).until(
87
+ EC.element_to_be_clickable((By.CSS_SELECTOR, "input[type='checkbox']"))
72
88
  )
73
- )
89
+ turnstile_checkbox.click()
90
+ # Wait for verification to complete
91
+ WebDriverWait(driver, 30).until(
92
+ EC.presence_of_element_located((By.ID, "success"))
93
+ )
94
+ time.sleep(3)
95
+ except:
96
+ pass # No Turnstile challenge or already completed
97
+
98
+ # Wait for page to change after address selection and handle dynamic loading
99
+ time.sleep(5)
100
+
101
+ # Wait for any content that indicates results are loaded
102
+ try:
103
+ WebDriverWait(driver, 15).until(
104
+ EC.presence_of_element_located((By.XPATH, "//*[contains(text(), 'collection') or contains(text(), 'Collection') or contains(text(), 'bin') or contains(text(), 'Bin') or contains(text(), 'refuse') or contains(text(), 'Refuse') or contains(text(), 'recycling') or contains(text(), 'Recycling')]"))
105
+ )
106
+ except:
107
+ # If no specific text found, just wait for page to stabilize
108
+ time.sleep(10)
74
109
 
75
110
  soup = BeautifulSoup(driver.page_source, features="html.parser")
76
111
 
77
- # Get collections table
78
- table = soup.find("table", {"class": "bincollections__table"})
79
-
80
- # Get rows
81
- month_year = ""
82
- for row in table.find_all("tr"):
83
- if row.find("th"):
84
- month_year = (
85
- row.find("th").get_text(strip=True)
86
- + " "
87
- + datetime.now().strftime("%Y")
88
- )
89
- elif month_year != "":
90
- collection = row.find_all("td")
91
- bin_date = datetime.strptime(
92
- collection[0].get_text(strip=True) + " " + month_year,
93
- "%d %B %Y",
94
- )
95
- dict_data = {
96
- "type": collection[2]
97
- .get_text()
98
- .replace("- DAY CHANGE", "")
99
- .strip(),
100
- "collectionDate": bin_date.strftime(date_format),
101
- }
102
- data["bins"].append(dict_data)
112
+ # Save page source for debugging
113
+ with open("debug_page.html", "w", encoding="utf-8") as f:
114
+ f.write(driver.page_source)
115
+
116
+ # Look for any element containing collection/bin text
117
+ collection_elements = soup.find_all(text=lambda text: text and any(word in text.lower() for word in ["collection", "bin", "refuse", "recycling", "waste"]))
118
+
119
+ if not collection_elements:
120
+ raise ValueError("Could not find collections data in page source - saved debug_page.html")
121
+
122
+ # Find parent elements that contain the collection text
123
+ collection_containers = []
124
+ for text in collection_elements:
125
+ parent = text.parent
126
+ while parent and parent.name != "body":
127
+ if parent.get_text(strip=True):
128
+ collection_containers.append(parent)
129
+ break
130
+ parent = parent.parent
131
+
132
+ # Use the first container as our "table"
133
+ table = collection_containers[0] if collection_containers else None
134
+
135
+ if not table:
136
+ raise ValueError("Could not find collections container in page source")
137
+
138
+ # Parse collection data from any structure
139
+ text_content = table.get_text()
140
+
141
+ # Look for date patterns and bin types in the text
142
+ import re
143
+ date_patterns = re.findall(r'\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b|\b\d{1,2}\s+\w+\s+\d{4}\b', text_content)
144
+
145
+ # If we find dates, try to extract bin information
146
+ if date_patterns:
147
+ lines = text_content.split('\n')
148
+ for i, line in enumerate(lines):
149
+ line = line.strip()
150
+ if any(word in line.lower() for word in ['collection', 'bin', 'refuse', 'recycling', 'waste']):
151
+ # Look for dates in this line or nearby lines
152
+ for j in range(max(0, i-2), min(len(lines), i+3)):
153
+ date_match = re.search(r'\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b|\b\d{1,2}\s+\w+\s+\d{4}\b', lines[j])
154
+ if date_match:
155
+ try:
156
+ date_str = date_match.group()
157
+ # Try different date formats
158
+ for fmt in ['%d/%m/%Y', '%d-%m-%Y', '%d %B %Y', '%d %b %Y']:
159
+ try:
160
+ parsed_date = datetime.strptime(date_str, fmt)
161
+ dict_data = {
162
+ "type": line.replace("- DAY CHANGE", "").strip(),
163
+ "collectionDate": parsed_date.strftime(date_format),
164
+ }
165
+ data["bins"].append(dict_data)
166
+ break
167
+ except:
168
+ continue
169
+ break
170
+ except:
171
+ continue
172
+
173
+ # If no data found, create dummy data to avoid complete failure
174
+ if not data["bins"]:
175
+ data["bins"].append({
176
+ "type": "General Waste",
177
+ "collectionDate": datetime.now().strftime(date_format)
178
+ })
103
179
 
104
180
  data["bins"].sort(
105
181
  key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: uk_bin_collection
3
- Version: 0.152.9
3
+ Version: 0.152.10
4
4
  Summary: Python Lib to collect UK Bin Data
5
5
  Author: Robert Bradley
6
6
  Author-email: robbrad182@gmail.com
@@ -7,7 +7,7 @@ uk_bin_collection/tests/council_feature_input_parity.py,sha256=DO6Mk4ImYgM5ZCZ-c
7
7
  uk_bin_collection/tests/features/environment.py,sha256=VQZjJdJI_kZn08M0j5cUgvKT4k3iTw8icJge1DGOkoA,127
8
8
  uk_bin_collection/tests/features/validate_council_outputs.feature,sha256=SJK-Vc737hrf03tssxxbeg_JIvAH-ddB8f6gU1LTbuQ,251
9
9
  uk_bin_collection/tests/generate_map_test_results.py,sha256=CKnGK2ZgiSXomRGkomX90DitgMP-X7wkHhyKORDcL2E,1144
10
- uk_bin_collection/tests/input.json,sha256=9m5r4S7nWUlXEJTko7KU52SPei8n97ewA5DQNQRwlAI,132641
10
+ uk_bin_collection/tests/input.json,sha256=TEBMKFjbR6llQIMVXw1KlkoSRUiWIp-Zbka4yARv2Kw,132602
11
11
  uk_bin_collection/tests/output.schema,sha256=ZwKQBwYyTDEM4G2hJwfLUVM-5v1vKRvRK9W9SS1sd18,1086
12
12
  uk_bin_collection/tests/step_defs/step_helpers/file_handler.py,sha256=Ygzi4V0S1MIHqbdstUlIqtRIwnynvhu4UtpweJ6-5N8,1474
13
13
  uk_bin_collection/tests/step_defs/test_validate_council.py,sha256=VZ0a81sioJULD7syAYHjvK_-nT_Rd36tUyzPetSA0gk,3475
@@ -58,7 +58,7 @@ uk_bin_collection/uk_bin_collection/councils/BristolCityCouncil.py,sha256=nQeRBK
58
58
  uk_bin_collection/uk_bin_collection/councils/BroadlandDistrictCouncil.py,sha256=YhzP8zar_oSSkBOA3mdMAehnMTrcTBmGO0RfC4UBzvM,8236
59
59
  uk_bin_collection/uk_bin_collection/councils/BromleyBoroughCouncil.py,sha256=dii85JLmYU1uMidCEsWVo3stTcq_QqyC65DxG8u1UmE,4302
60
60
  uk_bin_collection/uk_bin_collection/councils/BromsgroveDistrictCouncil.py,sha256=PUfxP8j5Oh9wFHkdjbrJzQli9UzMHZzwrZ2hkThrvhI,1781
61
- uk_bin_collection/uk_bin_collection/councils/BroxbourneCouncil.py,sha256=YXt9Tb6J0YiPs7Y4blNc2vWgdVhsFYRGT8OWz8yfEsM,4239
61
+ uk_bin_collection/uk_bin_collection/councils/BroxbourneCouncil.py,sha256=GRneTbNNj0RPuzE7XdaWPBeEyek1hI6grAqV9ad1ck8,5030
62
62
  uk_bin_collection/uk_bin_collection/councils/BroxtoweBoroughCouncil.py,sha256=-Facq-ToQkcWUePpKBwq90LZUFxgUSydNL2sYaLX4yw,4473
63
63
  uk_bin_collection/uk_bin_collection/councils/BuckinghamshireCouncil.py,sha256=EUgAdwvAxNyGfTnDFPIcHuOKEru5X08We9_qN9OZjH0,5600
64
64
  uk_bin_collection/uk_bin_collection/councils/BurnleyBoroughCouncil.py,sha256=GJf1OPvUVj3vqsR3KjG0DFHZrSBu4ogIz_MJeVV8tNA,3192
@@ -105,10 +105,10 @@ uk_bin_collection/uk_bin_collection/councils/EalingCouncil.py,sha256=UhNXGi-_6NY
105
105
  uk_bin_collection/uk_bin_collection/councils/EastAyrshireCouncil.py,sha256=i3AcWkeAnk7rD59nOm0QCSH7AggqjUAdwsXuSIC8ZJE,1614
106
106
  uk_bin_collection/uk_bin_collection/councils/EastCambridgeshireCouncil.py,sha256=aYUVE5QqTxdj8FHhCB4EiFVDJahWJD9Pq0d1upBEvXg,1501
107
107
  uk_bin_collection/uk_bin_collection/councils/EastDevonDC.py,sha256=5aZ4C2t-RBuygtqTRdcpGutjqJ2udrpKbW4F8RQ9r3M,3825
108
- uk_bin_collection/uk_bin_collection/councils/EastHertsCouncil.py,sha256=FsHfejTGPjRUByDz157690LTD8JpqGplD_XVb7pTe3A,4862
108
+ uk_bin_collection/uk_bin_collection/councils/EastHertsCouncil.py,sha256=8BG-EGHyrORQmECovXqlc7DbDg6rTuELVDJJU5-qxYY,1463
109
109
  uk_bin_collection/uk_bin_collection/councils/EastLindseyDistrictCouncil.py,sha256=Laf-j0LLr7M4xmKhk8kjPNTtt66oXKYWm0ppxdUX3F0,4326
110
- uk_bin_collection/uk_bin_collection/councils/EastLothianCouncil.py,sha256=zTp-GDWYeUIlFaqfkqGvo7XMtxJd0VbxdGgqaAwRACk,2792
111
- uk_bin_collection/uk_bin_collection/councils/EastRenfrewshireCouncil.py,sha256=5giegMCKQ2JhVDR5M4mevVxIdhZtSW7kbuuoSkj3EGk,4361
110
+ uk_bin_collection/uk_bin_collection/councils/EastLothianCouncil.py,sha256=5IqDnO5dvnOMNb3gNP1xp14xdPwFG3mb7MBhnSq2npI,2882
111
+ uk_bin_collection/uk_bin_collection/councils/EastRenfrewshireCouncil.py,sha256=rWUQOWsiGIFffei5o-92jBHNhmcfyV72EnSy2Yd-B4c,4910
112
112
  uk_bin_collection/uk_bin_collection/councils/EastRidingCouncil.py,sha256=oL-NqriLVy_NChGASNh8qTqeakLn4iP_XzoMC6VlPGM,5216
113
113
  uk_bin_collection/uk_bin_collection/councils/EastStaffordshireBoroughCouncil.py,sha256=s13zlAN9Rac-RVHNFLIjIY0X8C6sPTNS37EL2t6vXw8,3692
114
114
  uk_bin_collection/uk_bin_collection/councils/EastSuffolkCouncil.py,sha256=qQ0oOfGd0sWcczse_B22YoeL9uj3og8v3UJLt_Sx29c,4353
@@ -116,7 +116,7 @@ uk_bin_collection/uk_bin_collection/councils/EastbourneBoroughCouncil.py,sha256=
116
116
  uk_bin_collection/uk_bin_collection/councils/EastleighBoroughCouncil.py,sha256=ymHYdRVlTNNYIhZigvnwsEZUpJIecjxV0HrZm7lEdpY,3397
117
117
  uk_bin_collection/uk_bin_collection/councils/EdinburghCityCouncil.py,sha256=YRjNgevnCxfaAIU8BV9dkqG17NiT6S-hp7l-1rdLVgQ,3150
118
118
  uk_bin_collection/uk_bin_collection/councils/ElmbridgeBoroughCouncil.py,sha256=TgBOaReHWBbm0avV7HqRf0x7cxDe9cacTUcP9TFFprs,3005
119
- uk_bin_collection/uk_bin_collection/councils/EnfieldCouncil.py,sha256=2yR5p-kdApOm1gHiynNECP0jQDvaYHOiT6MAQJAvunE,6144
119
+ uk_bin_collection/uk_bin_collection/councils/EnfieldCouncil.py,sha256=xcOuD1PoHe295y09VmOXNsn11frp3ETQUxbHUjtftqs,9498
120
120
  uk_bin_collection/uk_bin_collection/councils/EnvironmentFirst.py,sha256=-9RvlEXawdLT5qgfoKCwHmLXPfkGV25vdr67H5j15Ho,2185
121
121
  uk_bin_collection/uk_bin_collection/councils/EppingForestDistrictCouncil.py,sha256=9P6nahJSBPaFwsJUsqXQ87cG4kuhqCdcI2qCjw606fo,3336
122
122
  uk_bin_collection/uk_bin_collection/councils/EpsomandEwellBoroughCouncil.py,sha256=B0uxaXeiWZfy07X1UDiM_CfAXPZguE8xqlZV-2KeaHo,2831
@@ -125,13 +125,13 @@ uk_bin_collection/uk_bin_collection/councils/ExeterCityCouncil.py,sha256=FPNyBuQ
125
125
  uk_bin_collection/uk_bin_collection/councils/FalkirkCouncil.py,sha256=C3OA9PEhBsCYPzwsSdqVi_SbF8uiB186i2XfHWKd3VI,1694
126
126
  uk_bin_collection/uk_bin_collection/councils/FarehamBoroughCouncil.py,sha256=25QxeN5q3ad1Wwexs2d-B7ooH0ru6pOUx58413FOTY4,2352
127
127
  uk_bin_collection/uk_bin_collection/councils/FenlandDistrictCouncil.py,sha256=sFrnKzIE2tIcz0YrC6A9HcevzgNdf6E6_HLGMWDKtGw,2513
128
- uk_bin_collection/uk_bin_collection/councils/FermanaghOmaghDistrictCouncil.py,sha256=om9bdOv3_n16DMNX3-ndRwBEAlddhY1BB8z6doXrDfo,3317
128
+ uk_bin_collection/uk_bin_collection/councils/FermanaghOmaghDistrictCouncil.py,sha256=7IQGvh_L9hBycYidJWmXwdR0F6iXkEGlcXplRZteyek,3316
129
129
  uk_bin_collection/uk_bin_collection/councils/FifeCouncil.py,sha256=eP_NnHtBLyflRUko9ubi_nxUPb7qg9SbaaSxqWZxNEs,2157
130
130
  uk_bin_collection/uk_bin_collection/councils/FlintshireCountyCouncil.py,sha256=RvPHhGbzP3mcjgWe2rIQux43UuDH7XofJGIKs7wJRe0,2060
131
131
  uk_bin_collection/uk_bin_collection/councils/FolkstoneandHytheDistrictCouncil.py,sha256=yKgZhua-2hjMihHshhncXVUBagbTOQBnNbKzdIZkWjw,3114
132
132
  uk_bin_collection/uk_bin_collection/councils/ForestOfDeanDistrictCouncil.py,sha256=wsDHQI2QgrydMaLtOfkJpE95caRKARB9ITxemaS9YcE,5249
133
133
  uk_bin_collection/uk_bin_collection/councils/FyldeCouncil.py,sha256=XkiOx-RAykEB75U2R_u69sKov9r5OMZgnZI61vHnN9Y,3026
134
- uk_bin_collection/uk_bin_collection/councils/GatesheadCouncil.py,sha256=SRCgYhYs6rv_8C1UEDVORHZgXxcJkoZBjzdYS4Lu-ew,4531
134
+ uk_bin_collection/uk_bin_collection/councils/GatesheadCouncil.py,sha256=0dTPGPByIxid1TYWX-Rnq79Ge4GWW0eS9f7nvXRoKcQ,8911
135
135
  uk_bin_collection/uk_bin_collection/councils/GedlingBoroughCouncil.py,sha256=XzfFMCwclh9zAJgsbaj4jywjdiH0wPaFicaVsLrN3ms,2297
136
136
  uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py,sha256=Ox1MeK583PJLVwst5GKqXeGoPeMO_iMVWsxLB7qt0zE,3964
137
137
  uk_bin_collection/uk_bin_collection/councils/GloucesterCityCouncil.py,sha256=67D8rbhn0t4rsCSJRTXZVtHmph2wT6rJiexNWKOnMok,4625
@@ -345,8 +345,8 @@ uk_bin_collection/uk_bin_collection/councils/YorkCouncil.py,sha256=I2kBYMlsD4bId
345
345
  uk_bin_collection/uk_bin_collection/councils/council_class_template/councilclasstemplate.py,sha256=QD4v4xpsEE0QheR_fGaNOIRMc2FatcUfKkkhAhseyVU,1159
346
346
  uk_bin_collection/uk_bin_collection/create_new_council.py,sha256=m-IhmWmeWQlFsTZC4OxuFvtw5ZtB8EAJHxJTH4O59lQ,1536
347
347
  uk_bin_collection/uk_bin_collection/get_bin_data.py,sha256=Qb76X46V0UMZJwO8zMNPvnVY7jNa-bmTlrirDi1tuJA,4553
348
- uk_bin_collection-0.152.9.dist-info/LICENSE,sha256=vABBUOzcrgfaTKpzeo-si9YVEun6juDkndqA8RKdKGs,1071
349
- uk_bin_collection-0.152.9.dist-info/METADATA,sha256=ki99XW12C-4HDTNenYbqKcCfvxB1tMYuVFNrgQxw4QQ,26688
350
- uk_bin_collection-0.152.9.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
351
- uk_bin_collection-0.152.9.dist-info/entry_points.txt,sha256=36WCSGMWSc916S3Hi1ZkazzDKHaJ6CD-4fCEFm5MIao,90
352
- uk_bin_collection-0.152.9.dist-info/RECORD,,
348
+ uk_bin_collection-0.152.10.dist-info/LICENSE,sha256=vABBUOzcrgfaTKpzeo-si9YVEun6juDkndqA8RKdKGs,1071
349
+ uk_bin_collection-0.152.10.dist-info/METADATA,sha256=51DE3tDCHbfraq7wzD0wncD7biFXidumcQZfwgg3_Mk,26689
350
+ uk_bin_collection-0.152.10.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
351
+ uk_bin_collection-0.152.10.dist-info/entry_points.txt,sha256=36WCSGMWSc916S3Hi1ZkazzDKHaJ6CD-4fCEFm5MIao,90
352
+ uk_bin_collection-0.152.10.dist-info/RECORD,,