uk_bin_collection 0.151.0__py3-none-any.whl → 0.152.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. uk_bin_collection/tests/input.json +137 -66
  2. uk_bin_collection/uk_bin_collection/councils/AdurAndWorthingCouncils.py +69 -24
  3. uk_bin_collection/uk_bin_collection/councils/AngusCouncil.py +149 -0
  4. uk_bin_collection/uk_bin_collection/councils/BarkingDagenham.py +11 -2
  5. uk_bin_collection/uk_bin_collection/councils/BexleyCouncil.py +24 -47
  6. uk_bin_collection/uk_bin_collection/councils/BrightonandHoveCityCouncil.py +11 -2
  7. uk_bin_collection/uk_bin_collection/councils/BroadlandDistrictCouncil.py +21 -6
  8. uk_bin_collection/uk_bin_collection/councils/CharnwoodBoroughCouncil.py +14 -3
  9. uk_bin_collection/uk_bin_collection/councils/CheltenhamBoroughCouncil.py +12 -12
  10. uk_bin_collection/uk_bin_collection/councils/CheshireEastCouncil.py +24 -2
  11. uk_bin_collection/uk_bin_collection/councils/ChichesterDistrictCouncil.py +105 -53
  12. uk_bin_collection/uk_bin_collection/councils/ChorleyCouncil.py +4 -0
  13. uk_bin_collection/uk_bin_collection/councils/CroydonCouncil.py +114 -261
  14. uk_bin_collection/uk_bin_collection/councils/DartfordBoroughCouncil.py +13 -0
  15. uk_bin_collection/uk_bin_collection/councils/DoverDistrictCouncil.py +17 -2
  16. uk_bin_collection/uk_bin_collection/councils/EastDevonDC.py +14 -1
  17. uk_bin_collection/uk_bin_collection/councils/EastbourneBoroughCouncil.py +76 -0
  18. uk_bin_collection/uk_bin_collection/councils/EastleighBoroughCouncil.py +59 -45
  19. uk_bin_collection/uk_bin_collection/councils/EnvironmentFirst.py +2 -0
  20. uk_bin_collection/uk_bin_collection/councils/EppingForestDistrictCouncil.py +47 -15
  21. uk_bin_collection/uk_bin_collection/councils/FermanaghOmaghDistrictCouncil.py +102 -0
  22. uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py +13 -1
  23. uk_bin_collection/uk_bin_collection/councils/GuildfordCouncil.py +2 -3
  24. uk_bin_collection/uk_bin_collection/councils/HerefordshireCouncil.py +13 -2
  25. uk_bin_collection/uk_bin_collection/councils/HuntingdonDistrictCouncil.py +18 -4
  26. uk_bin_collection/uk_bin_collection/councils/LewesDistrictCouncil.py +76 -0
  27. uk_bin_collection/uk_bin_collection/councils/LiverpoolCityCouncil.py +16 -4
  28. uk_bin_collection/uk_bin_collection/councils/MaidstoneBoroughCouncil.py +42 -47
  29. uk_bin_collection/uk_bin_collection/councils/NewhamCouncil.py +13 -6
  30. uk_bin_collection/uk_bin_collection/councils/NorthEastDerbyshireDistrictCouncil.py +61 -39
  31. uk_bin_collection/uk_bin_collection/councils/NorthSomersetCouncil.py +14 -9
  32. uk_bin_collection/uk_bin_collection/councils/NorthTynesideCouncil.py +2 -2
  33. uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py +50 -14
  34. uk_bin_collection/uk_bin_collection/councils/SloughBoroughCouncil.py +140 -0
  35. uk_bin_collection/uk_bin_collection/councils/SouthRibbleCouncil.py +115 -65
  36. uk_bin_collection/uk_bin_collection/councils/StokeOnTrentCityCouncil.py +10 -5
  37. uk_bin_collection/uk_bin_collection/councils/TewkesburyBoroughCouncil.py +40 -0
  38. uk_bin_collection/uk_bin_collection/councils/TorbayCouncil.py +1 -3
  39. uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py +3 -0
  40. {uk_bin_collection-0.151.0.dist-info → uk_bin_collection-0.152.1.dist-info}/METADATA +1 -1
  41. {uk_bin_collection-0.151.0.dist-info → uk_bin_collection-0.152.1.dist-info}/RECORD +44 -38
  42. {uk_bin_collection-0.151.0.dist-info → uk_bin_collection-0.152.1.dist-info}/LICENSE +0 -0
  43. {uk_bin_collection-0.151.0.dist-info → uk_bin_collection-0.152.1.dist-info}/WHEEL +0 -0
  44. {uk_bin_collection-0.151.0.dist-info → uk_bin_collection-0.152.1.dist-info}/entry_points.txt +0 -0
@@ -1,7 +1,8 @@
1
1
  from bs4 import BeautifulSoup
2
+ from dateutil.relativedelta import relativedelta
3
+
2
4
  from uk_bin_collection.uk_bin_collection.common import *
3
5
  from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
4
- from dateutil.relativedelta import relativedelta
5
6
 
6
7
 
7
8
  # import the wonderful Beautiful Soup and the URL grabber
@@ -18,9 +19,20 @@ class CouncilClass(AbstractGetBinDataClass):
18
19
  collections = []
19
20
  curr_date = datetime.today()
20
21
 
21
- # Parse the page
22
- soup = BeautifulSoup(page.text, features="html.parser")
23
- soup.prettify()
22
+ try:
23
+ user_uprn = kwargs.get("uprn")
24
+ check_uprn(user_uprn)
25
+ url = f"https://liverpool.gov.uk/Bins/BinDatesTable?UPRN={user_uprn}"
26
+ if not user_uprn:
27
+ # This is a fallback for if the user stored a URL in old system. Ensures backwards compatibility.
28
+ url = kwargs.get("url")
29
+ except Exception as e:
30
+ raise ValueError(f"Error getting identifier: {str(e)}")
31
+
32
+ # Make a BS4 object
33
+ page = requests.get(url)
34
+ soup = BeautifulSoup(page.text, "html.parser")
35
+ soup.prettify
24
36
 
25
37
  # Get all table rows on the page - enumerate gives us an index, which is handy for to keep a row count.
26
38
  # In this case, the first (0th) row is headings, so we can skip it, then parse the other data.
@@ -3,6 +3,7 @@ from datetime import datetime
3
3
 
4
4
  from bs4 import BeautifulSoup
5
5
  from selenium.webdriver.common.by import By
6
+ from selenium.webdriver.common.keys import Keys
6
7
  from selenium.webdriver.support import expected_conditions as EC
7
8
  from selenium.webdriver.support.ui import Select
8
9
  from selenium.webdriver.support.wait import WebDriverWait
@@ -10,8 +11,6 @@ from selenium.webdriver.support.wait import WebDriverWait
10
11
  from uk_bin_collection.uk_bin_collection.common import *
11
12
  from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
12
13
 
13
- # import the wonderful Beautiful Soup and the URL grabber
14
-
15
14
 
16
15
  class CouncilClass(AbstractGetBinDataClass):
17
16
  """
@@ -30,7 +29,7 @@ class CouncilClass(AbstractGetBinDataClass):
30
29
  web_driver = kwargs.get("web_driver")
31
30
  headless = kwargs.get("headless")
32
31
  check_postcode(user_postcode)
33
-
32
+
34
33
  # Create Selenium webdriver
35
34
  driver = create_webdriver(web_driver, headless, None, __name__)
36
35
  driver.get(page)
@@ -41,68 +40,64 @@ class CouncilClass(AbstractGetBinDataClass):
41
40
  driver.switch_to.frame(iframe_presense)
42
41
 
43
42
  wait = WebDriverWait(driver, 60)
44
-
43
+
45
44
  # Postal code input
46
45
  inputElement_postcodesearch = wait.until(
47
46
  EC.element_to_be_clickable((By.NAME, "postcode"))
48
47
  )
49
48
  inputElement_postcodesearch.send_keys(user_postcode)
50
-
51
- # Wait for the 'Select address' dropdown to be updated
52
- dropdown_select = wait.until(
53
- EC.presence_of_element_located((By.XPATH, "//span[contains(text(), 'Select...')]"))
54
- )
55
- dropdown_select.click()
56
-
49
+
50
+ time.sleep(5)
51
+
52
+ inputElement_postcodesearch.send_keys(Keys.TAB + Keys.DOWN)
53
+
57
54
  dropdown = wait.until(
58
- EC.element_to_be_clickable((By.XPATH, f"//div[contains(text(), ' {user_paon}')]"))
55
+ EC.element_to_be_clickable(
56
+ (By.XPATH, f"//div[contains(text(), ' {user_paon}')]")
57
+ )
59
58
  )
60
59
  dropdown.click()
61
60
 
62
- # Wait for 'Searching for...' to be added to page
63
- WebDriverWait(driver, timeout=15).until(
61
+ # This website is horrible!
62
+ WebDriverWait(driver, 20).until(
64
63
  EC.text_to_be_present_in_element(
65
- (By.CSS_SELECTOR, "span[data-name=html1]"), "Searching"
66
- )
67
- )
68
-
69
- # Wait for 'Searching for...' to be removed from page
70
- WebDriverWait(driver, timeout=15).until(
71
- EC.none_of(
72
- EC.text_to_be_present_in_element(
73
- (By.CSS_SELECTOR, "span[data-name=html1]"), "Searching"
74
- )
64
+ (By.CSS_SELECTOR, "div.col-collection-panel"), "Next collection"
75
65
  )
76
66
  )
77
67
 
78
68
  # Even then it can still be adding data to the page...
79
69
  time.sleep(5)
80
70
 
81
- soup = BeautifulSoup(driver.page_source, features="html.parser")
82
-
83
- # This is ugly but there is literally no consistency to the HTML
84
- def is_a_collection_date(t):
85
- return any("Next collection" in c for c in t.children)
86
-
87
- for next_collection in soup.find_all(is_a_collection_date):
88
- bin_info = list(
89
- next_collection.parent.select_one("div:nth-child(1)").children
90
- )
91
- if not bin_info:
92
- continue
93
- bin = bin_info[0].get_text()
94
- date = next_collection.select_one("strong").get_text(strip=True)
95
- bin_date = datetime.strptime(date, "%d %b %Y")
96
- dict_data = {
97
- "type": bin,
98
- "collectionDate": bin_date.strftime(date_format),
99
- }
100
- bin_data["bins"].append(dict_data)
101
-
102
- bin_data["bins"].sort(
103
- key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
71
+ # Scraping via Selenium rather than BeautifulSoup, to ensure eveything's loaded
72
+ collection_panels = driver.find_elements(
73
+ By.CSS_SELECTOR, "div.col-collection-panel"
104
74
  )
105
75
 
76
+ for panel in collection_panels:
77
+ try:
78
+ # Get bin type (e.g., General waste, Food waste)
79
+ bin_type = panel.find_element(
80
+ By.CSS_SELECTOR, "h3.collectionDataHeader"
81
+ ).text.strip()
82
+ # Get next collection date
83
+ lines = panel.find_elements(By.CSS_SELECTOR, "ul li")
84
+ for line in lines:
85
+ if "Next collection" in line.text:
86
+ date_str = (
87
+ line.text.split("Next collection")[1]
88
+ .strip(": ")
89
+ .strip()
90
+ )
91
+ bin_date = datetime.strptime(date_str, "%d/%m/%Y")
92
+ bin_data["bins"].append(
93
+ {
94
+ "type": bin_type,
95
+ "collectionDate": bin_date.strftime(date_format),
96
+ }
97
+ )
98
+ except Exception as inner_e:
99
+ print(f"Skipping one panel due to error: {inner_e}")
100
+
106
101
  except Exception as e:
107
102
  # Here you can log the exception if needed
108
103
  print(f"An error occurred: {e}")
@@ -7,14 +7,21 @@ from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataC
7
7
 
8
8
  class CouncilClass(AbstractGetBinDataClass):
9
9
  def parse_data(self, page: str, **kwargs) -> dict:
10
- # get the page data
11
- http = urllib3.PoolManager()
12
- response = http.request("GET", kwargs["url"])
13
- page_data = response.data
10
+
11
+ try:
12
+ user_uprn = kwargs.get("uprn")
13
+ check_uprn(user_uprn)
14
+ url = f"https://bincollection.newham.gov.uk/Details/Index/{user_uprn}"
15
+ if not user_uprn:
16
+ # This is a fallback for if the user stored a URL in old system. Ensures backwards compatibility.
17
+ url = kwargs.get("url")
18
+ except Exception as e:
19
+ raise ValueError(f"Error getting identifier: {str(e)}")
14
20
 
15
21
  # Make a BS4 object
16
- soup = BeautifulSoup(page_data, features="html.parser")
17
- soup.prettify()
22
+ page = requests.get(url)
23
+ soup = BeautifulSoup(page.text, "html.parser")
24
+ soup.prettify
18
25
 
19
26
  # Form a JSON wrapper
20
27
  data = {"bins": []}
@@ -1,4 +1,5 @@
1
1
  from datetime import datetime
2
+ from time import sleep
2
3
 
3
4
  from bs4 import BeautifulSoup
4
5
  from selenium.webdriver.common.by import By
@@ -9,8 +10,6 @@ from selenium.webdriver.support.wait import WebDriverWait
9
10
  from uk_bin_collection.uk_bin_collection.common import *
10
11
  from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
11
12
 
12
- # import the wonderful Beautiful Soup and the URL grabber
13
-
14
13
 
15
14
  class CouncilClass(AbstractGetBinDataClass):
16
15
  """
@@ -34,82 +33,105 @@ class CouncilClass(AbstractGetBinDataClass):
34
33
  headless = kwargs.get("headless")
35
34
  check_uprn(user_uprn)
36
35
  check_postcode(user_postcode)
37
- # Create Selenium webdriver
36
+
38
37
  driver = create_webdriver(web_driver, headless, None, __name__)
39
38
  driver.get(page)
40
39
 
41
- # If you bang in the house number (or property name) and postcode in the box it should find your property
42
-
43
40
  iframe_presense = WebDriverWait(driver, 30).until(
44
41
  EC.presence_of_element_located((By.ID, "fillform-frame-1"))
45
42
  )
46
43
 
47
44
  driver.switch_to.frame(iframe_presense)
48
45
  wait = WebDriverWait(driver, 60)
46
+
49
47
  inputElement_postcodesearch = wait.until(
50
48
  EC.element_to_be_clickable((By.NAME, "postcode_search"))
51
49
  )
52
-
53
50
  inputElement_postcodesearch.send_keys(str(user_postcode))
54
51
 
55
- # Wait for the 'Select your property' dropdown to appear and select the first result
56
52
  dropdown = wait.until(EC.element_to_be_clickable((By.NAME, "selAddress")))
57
-
58
53
  dropdown_options = wait.until(
59
54
  EC.presence_of_element_located((By.CLASS_NAME, "lookup-option"))
60
55
  )
61
56
 
62
- # Create a 'Select' for it, then select the first address in the list
63
- # (Index 0 is "Make a selection from the list")
64
57
  drop_down_values = Select(dropdown)
65
58
  option_element = wait.until(
66
59
  EC.presence_of_element_located(
67
60
  (By.CSS_SELECTOR, f'option.lookup-option[value="{str(user_uprn)}"]')
68
61
  )
69
62
  )
70
-
71
63
  drop_down_values.select_by_value(str(user_uprn))
72
64
 
73
- # Wait for the 'View more' link to appear, then click it to get the full set of dates
74
65
  h3_element = wait.until(
75
66
  EC.presence_of_element_located(
76
67
  (By.XPATH, "//th[contains(text(), 'Waste Collection')]")
77
68
  )
78
69
  )
79
70
 
71
+ sleep(10)
72
+
80
73
  soup = BeautifulSoup(driver.page_source, features="html.parser")
74
+ print("Parsing HTML content...")
75
+
76
+ collection_rows = soup.find_all("tr")
77
+
78
+ for row in collection_rows:
79
+ cells = row.find_all("td")
80
+ if len(cells) == 3: # Date, Image, Bin Type
81
+ # Extract date carefully
82
+ date_labels = cells[0].find_all("label")
83
+ collection_date = None
84
+ for label in date_labels:
85
+ label_text = label.get_text().strip()
86
+ if contains_date(label_text):
87
+ collection_date = label_text
88
+ break
89
+
90
+ # Extract bin type
91
+ bin_label = cells[2].find("label")
92
+ bin_types = bin_label.get_text().strip() if bin_label else None
93
+
94
+ if collection_date and bin_types:
95
+ print(f"Found collection: {collection_date} - {bin_types}")
96
+
97
+ # Handle combined collections
98
+ if "&" in bin_types:
99
+ if "Burgundy" in bin_types:
100
+ data["bins"].append(
101
+ {
102
+ "type": "Burgundy Bin",
103
+ "collectionDate": datetime.strptime(
104
+ collection_date, "%d/%m/%Y"
105
+ ).strftime(date_format),
106
+ }
107
+ )
108
+ if "Green" in bin_types:
109
+ data["bins"].append(
110
+ {
111
+ "type": "Green Bin",
112
+ "collectionDate": datetime.strptime(
113
+ collection_date, "%d/%m/%Y"
114
+ ).strftime(date_format),
115
+ }
116
+ )
117
+ else:
118
+ if "Black" in bin_types:
119
+ data["bins"].append(
120
+ {
121
+ "type": "Black Bin",
122
+ "collectionDate": datetime.strptime(
123
+ collection_date, "%d/%m/%Y"
124
+ ).strftime(date_format),
125
+ }
126
+ )
127
+
128
+ print(f"Found {len(data['bins'])} collections")
129
+ print(f"Final data: {data}")
81
130
 
82
- target_h3 = soup.find("h3", string="Collection Details")
83
- tables_after_h3 = target_h3.parent.parent.find_next("table")
84
-
85
- table_rows = tables_after_h3.find_all("tr")
86
- for row in table_rows:
87
- rowdata = row.find_all("td")
88
- if len(rowdata) == 3:
89
- labels = rowdata[0].find_all("label")
90
- # Strip the day (i.e., Monday) out of the collection date string for parsing
91
- if len(labels) >= 2:
92
- date_label = labels[1]
93
- datestring = date_label.text.strip()
94
-
95
- # Add the bin type and collection date to the 'data' dictionary
96
- data["bins"].append(
97
- {
98
- "type": rowdata[2].text.strip(),
99
- "collectionDate": datetime.strptime(
100
- datestring, "%d/%m/%Y"
101
- ).strftime(
102
- date_format
103
- ), # Format the date as needed
104
- }
105
- )
106
131
  except Exception as e:
107
- # Here you can log the exception if needed
108
132
  print(f"An error occurred: {e}")
109
- # Optionally, re-raise the exception if you want it to propagate
110
133
  raise
111
134
  finally:
112
- # This block ensures that the driver is closed regardless of an exception
113
135
  if driver:
114
136
  driver.quit()
115
137
  return data
@@ -43,9 +43,13 @@ class CouncilClass(AbstractGetBinDataClass):
43
43
  collectionDate = (
44
44
  cells[1].get_text(strip=True) + " " + datetime.now().strftime("%Y")
45
45
  )
46
- nextCollectionDate = (
47
- cells[2].get_text(strip=True) + " " + datetime.now().strftime("%Y")
48
- )
46
+
47
+ if len(cells) > 2:
48
+ nextCollectionDate = (
49
+ cells[2].get_text(strip=True) + " " + datetime.now().strftime("%Y")
50
+ )
51
+ else:
52
+ nextCollectionDate = ""
49
53
 
50
54
  # Make each Bin element in the JSON
51
55
  dict_data = {
@@ -59,12 +63,13 @@ class CouncilClass(AbstractGetBinDataClass):
59
63
  data["bins"].append(dict_data)
60
64
 
61
65
  # Make each next Bin element in the JSON
62
- dict_data = {
63
- "type": binType,
64
- "collectionDate": get_next_occurrence_from_day_month(
65
- datetime.strptime(nextCollectionDate, "%A %d %B %Y")
66
- ).strftime(date_format),
67
- }
66
+ if nextCollectionDate != "":
67
+ dict_data = {
68
+ "type": binType,
69
+ "collectionDate": get_next_occurrence_from_day_month(
70
+ datetime.strptime(nextCollectionDate, "%A %d %B %Y")
71
+ ).strftime(date_format),
72
+ }
68
73
 
69
74
  # Add data to the main JSON Wrapper
70
75
  data["bins"].append(dict_data)
@@ -81,9 +81,9 @@ class CouncilClass(AbstractGetBinDataClass):
81
81
  # The regular calendar only shows until end of March 2026, work out how many weeks that is
82
82
  weeks_total = math.floor((datetime(2026, 4, 1) - datetime.now()).days / 7)
83
83
 
84
- # The garden calendar only shows until end of November 2024, work out how many weeks that is
84
+ # The garden calendar only shows until end of November 2025, work out how many weeks that is
85
85
  garden_weeks_total = math.floor(
86
- (datetime(2024, 12, 1) - datetime.now()).days / 7
86
+ (datetime(2025, 12, 1) - datetime.now()).days / 7
87
87
  )
88
88
 
89
89
  regular_collections, garden_collections, special_collections = [], [], []
@@ -1,7 +1,10 @@
1
1
  import time
2
2
 
3
3
  from bs4 import BeautifulSoup
4
+ from selenium.common.exceptions import TimeoutException
4
5
  from selenium.webdriver.common.by import By
6
+ from selenium.webdriver.support import expected_conditions as EC
7
+ from selenium.webdriver.support.ui import WebDriverWait
5
8
 
6
9
  from uk_bin_collection.uk_bin_collection.common import *
7
10
  from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
@@ -42,28 +45,61 @@ class CouncilClass(AbstractGetBinDataClass):
42
45
  driver = create_webdriver(web_driver, headless, None, __name__)
43
46
  driver.get(page)
44
47
 
45
- time.sleep(1)
48
+ # Create wait object
49
+ wait = WebDriverWait(driver, 20)
46
50
 
47
- # Press the cookie accept - wait is to let the JS load it up
48
- driver.find_element(By.ID, "ccc-notify-accept").click()
49
-
50
- inputElement_hn = driver.find_element(
51
- By.ID,
52
- "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_txtHouse",
51
+ # Wait for and click cookie button
52
+ cookie_button = wait.until(
53
+ EC.element_to_be_clickable((By.ID, "ccc-notify-accept"))
54
+ )
55
+ cookie_button.click()
56
+
57
+ # Wait for and find house number input
58
+ inputElement_hn = wait.until(
59
+ EC.presence_of_element_located(
60
+ (
61
+ By.ID,
62
+ "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_txtHouse",
63
+ )
64
+ )
53
65
  )
54
- inputElement_pc = driver.find_element(
55
- By.ID,
56
- "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_txtPostcode",
66
+
67
+ # Wait for and find postcode input
68
+ inputElement_pc = wait.until(
69
+ EC.presence_of_element_located(
70
+ (
71
+ By.ID,
72
+ "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_txtPostcode",
73
+ )
74
+ )
57
75
  )
58
76
 
77
+ # Enter details
59
78
  inputElement_pc.send_keys(user_postcode)
60
79
  inputElement_hn.send_keys(user_paon)
61
80
 
62
- driver.find_element(
63
- By.ID,
64
- "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_butLookup",
65
- ).click()
81
+ # Click lookup button and wait for results
82
+ lookup_button = wait.until(
83
+ EC.element_to_be_clickable(
84
+ (
85
+ By.ID,
86
+ "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_butLookup",
87
+ )
88
+ )
89
+ )
90
+ lookup_button.click()
91
+
92
+ # Wait for results to load
93
+ route_summary = wait.until(
94
+ EC.presence_of_element_located(
95
+ (
96
+ By.ID,
97
+ "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_spanRouteSummary",
98
+ )
99
+ )
100
+ )
66
101
 
102
+ # Get page source after everything has loaded
67
103
  soup = BeautifulSoup(driver.page_source, features="html.parser")
68
104
 
69
105
  # Work out which bins can be collected for this address. Glass bins are only on some houses due to pilot programme.
@@ -0,0 +1,140 @@
1
+ import time
2
+ import re
3
+ import requests
4
+ from datetime import datetime
5
+ from bs4 import BeautifulSoup
6
+ from selenium.webdriver.common.by import By
7
+ from selenium.webdriver.common.keys import Keys
8
+ from selenium.webdriver.support import expected_conditions as EC
9
+ from selenium.webdriver.support.ui import WebDriverWait
10
+ from uk_bin_collection.uk_bin_collection.common import *
11
+ from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
12
+
13
+ def get_street_from_postcode(postcode: str, api_key: str) -> str:
14
+ url = "https://maps.googleapis.com/maps/api/geocode/json"
15
+ params = {"address": postcode, "key": api_key}
16
+ response = requests.get(url, params=params)
17
+ data = response.json()
18
+
19
+ if data["status"] != "OK":
20
+ raise ValueError(f"API error: {data['status']}")
21
+
22
+ for component in data["results"][0]["address_components"]:
23
+ if "route" in component["types"]:
24
+ return component["long_name"]
25
+
26
+ raise ValueError("No street (route) found in the response.")
27
+
28
+ class CouncilClass(AbstractGetBinDataClass):
29
+ def parse_data(self, page: str, **kwargs) -> dict:
30
+ driver = None
31
+ bin_data = {"bins": []}
32
+ try:
33
+ user_postcode = kwargs.get("postcode")
34
+ if not user_postcode:
35
+ raise ValueError("No postcode provided.")
36
+ check_postcode(user_postcode)
37
+
38
+ headless = kwargs.get("headless")
39
+ web_driver = kwargs.get("web_driver")
40
+ driver = create_webdriver(web_driver, headless, None, __name__)
41
+ page = "https://www.slough.gov.uk/bin-collections"
42
+ driver.get(page)
43
+
44
+ # Accept cookies
45
+ WebDriverWait(driver, 10).until(
46
+ EC.element_to_be_clickable((By.ID, "ccc-recommended-settings"))
47
+ ).click()
48
+
49
+ # Enter the street name into the address search
50
+ address_input = WebDriverWait(driver, 10).until(
51
+ EC.presence_of_element_located((By.ID, "keyword_directory25"))
52
+ )
53
+ user_address = get_street_from_postcode(user_postcode, "AIzaSyBDLULT7EIlNtHerswPtfmL15Tt3Oc0bV8")
54
+ address_input.send_keys(user_address + Keys.ENTER)
55
+
56
+ # Wait for address results to load
57
+ WebDriverWait(driver, 10).until(
58
+ EC.presence_of_all_elements_located((By.CSS_SELECTOR, "span.list__link-text"))
59
+ )
60
+ span_elements = driver.find_elements(By.CSS_SELECTOR, "span.list__link-text")
61
+
62
+ for span in span_elements:
63
+ if user_address.lower() in span.text.lower():
64
+ span.click()
65
+ break
66
+ else:
67
+ raise Exception(f"No link found containing address: {user_address}")
68
+
69
+ # Wait for address detail page
70
+ WebDriverWait(driver, 10).until(
71
+ EC.presence_of_element_located((By.CSS_SELECTOR, "section.site-content"))
72
+ )
73
+ soup = BeautifulSoup(driver.page_source, "html.parser")
74
+
75
+ # Extract each bin link and type
76
+ for heading in soup.select("dt.definition__heading"):
77
+ heading_text = heading.get_text(strip=True)
78
+ if "bin day details" in heading_text.lower():
79
+ bin_type = heading_text.split()[0].capitalize() + " bin"
80
+ dd = heading.find_next_sibling("dd")
81
+ link = dd.find("a", href=True)
82
+
83
+ if link:
84
+ bin_url = link["href"]
85
+ if not bin_url.startswith("http"):
86
+ bin_url = "https://www.slough.gov.uk" + bin_url
87
+
88
+ # Visit the child page
89
+ print(f"Navigating to {bin_url}")
90
+ driver.get(bin_url)
91
+ WebDriverWait(driver, 10).until(
92
+ EC.presence_of_element_located((By.CSS_SELECTOR, "div.page-content"))
93
+ )
94
+ child_soup = BeautifulSoup(driver.page_source, "html.parser")
95
+
96
+ editor_div = child_soup.find("div", class_="editor")
97
+ if not editor_div:
98
+ print("No editor div found on bin detail page.")
99
+ continue
100
+
101
+ ul = editor_div.find("ul")
102
+ if not ul:
103
+ print("No <ul> with dates found in editor div.")
104
+ continue
105
+
106
+ for li in ul.find_all("li"):
107
+ raw_text = li.get_text(strip=True).replace(".", "")
108
+
109
+ if "no collection" in raw_text.lower() or "no collections" in raw_text.lower():
110
+ print(f"Ignoring non-collection note: {raw_text}")
111
+ continue
112
+
113
+ raw_date = raw_text
114
+
115
+ try:
116
+ parsed_date = datetime.strptime(raw_date, "%d %B %Y")
117
+ except ValueError:
118
+ raw_date_cleaned = raw_date.split("(")[0].strip()
119
+ try:
120
+ parsed_date = datetime.strptime(raw_date_cleaned, "%d %B %Y")
121
+ except Exception:
122
+ print(f"Could not parse date: {raw_text}")
123
+ continue
124
+
125
+ formatted_date = parsed_date.strftime("%d/%m/%Y")
126
+ contains_date(formatted_date)
127
+ bin_data["bins"].append({
128
+ "type": bin_type,
129
+ "collectionDate": formatted_date
130
+ })
131
+
132
+ print(f"Type: {bin_type}, Date: {formatted_date}")
133
+
134
+ except Exception as e:
135
+ print(f"An error occurred: {e}")
136
+ raise
137
+ finally:
138
+ if driver:
139
+ driver.quit()
140
+ return bin_data