uk_bin_collection 0.151.0__py3-none-any.whl → 0.152.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. uk_bin_collection/tests/input.json +137 -66
  2. uk_bin_collection/uk_bin_collection/councils/AdurAndWorthingCouncils.py +69 -24
  3. uk_bin_collection/uk_bin_collection/councils/AngusCouncil.py +149 -0
  4. uk_bin_collection/uk_bin_collection/councils/BarkingDagenham.py +11 -2
  5. uk_bin_collection/uk_bin_collection/councils/BexleyCouncil.py +24 -47
  6. uk_bin_collection/uk_bin_collection/councils/BrightonandHoveCityCouncil.py +11 -2
  7. uk_bin_collection/uk_bin_collection/councils/BroadlandDistrictCouncil.py +21 -6
  8. uk_bin_collection/uk_bin_collection/councils/CharnwoodBoroughCouncil.py +14 -3
  9. uk_bin_collection/uk_bin_collection/councils/CheltenhamBoroughCouncil.py +12 -12
  10. uk_bin_collection/uk_bin_collection/councils/CheshireEastCouncil.py +24 -2
  11. uk_bin_collection/uk_bin_collection/councils/ChichesterDistrictCouncil.py +105 -53
  12. uk_bin_collection/uk_bin_collection/councils/ChorleyCouncil.py +4 -0
  13. uk_bin_collection/uk_bin_collection/councils/CroydonCouncil.py +114 -261
  14. uk_bin_collection/uk_bin_collection/councils/DartfordBoroughCouncil.py +13 -0
  15. uk_bin_collection/uk_bin_collection/councils/DoverDistrictCouncil.py +17 -2
  16. uk_bin_collection/uk_bin_collection/councils/EastDevonDC.py +14 -1
  17. uk_bin_collection/uk_bin_collection/councils/EastbourneBoroughCouncil.py +76 -0
  18. uk_bin_collection/uk_bin_collection/councils/EastleighBoroughCouncil.py +59 -45
  19. uk_bin_collection/uk_bin_collection/councils/EnvironmentFirst.py +2 -0
  20. uk_bin_collection/uk_bin_collection/councils/EppingForestDistrictCouncil.py +47 -15
  21. uk_bin_collection/uk_bin_collection/councils/FermanaghOmaghDistrictCouncil.py +102 -0
  22. uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py +13 -1
  23. uk_bin_collection/uk_bin_collection/councils/GuildfordCouncil.py +2 -3
  24. uk_bin_collection/uk_bin_collection/councils/HerefordshireCouncil.py +13 -2
  25. uk_bin_collection/uk_bin_collection/councils/HuntingdonDistrictCouncil.py +18 -4
  26. uk_bin_collection/uk_bin_collection/councils/LewesDistrictCouncil.py +76 -0
  27. uk_bin_collection/uk_bin_collection/councils/LiverpoolCityCouncil.py +16 -4
  28. uk_bin_collection/uk_bin_collection/councils/MaidstoneBoroughCouncil.py +42 -47
  29. uk_bin_collection/uk_bin_collection/councils/NewhamCouncil.py +13 -6
  30. uk_bin_collection/uk_bin_collection/councils/NorthEastDerbyshireDistrictCouncil.py +61 -39
  31. uk_bin_collection/uk_bin_collection/councils/NorthSomersetCouncil.py +14 -9
  32. uk_bin_collection/uk_bin_collection/councils/NorthTynesideCouncil.py +2 -2
  33. uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py +50 -14
  34. uk_bin_collection/uk_bin_collection/councils/SloughBoroughCouncil.py +140 -0
  35. uk_bin_collection/uk_bin_collection/councils/SouthRibbleCouncil.py +115 -65
  36. uk_bin_collection/uk_bin_collection/councils/StokeOnTrentCityCouncil.py +10 -5
  37. uk_bin_collection/uk_bin_collection/councils/TewkesburyBoroughCouncil.py +40 -0
  38. uk_bin_collection/uk_bin_collection/councils/TorbayCouncil.py +1 -3
  39. uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py +3 -0
  40. {uk_bin_collection-0.151.0.dist-info → uk_bin_collection-0.152.1.dist-info}/METADATA +1 -1
  41. {uk_bin_collection-0.151.0.dist-info → uk_bin_collection-0.152.1.dist-info}/RECORD +44 -38
  42. {uk_bin_collection-0.151.0.dist-info → uk_bin_collection-0.152.1.dist-info}/LICENSE +0 -0
  43. {uk_bin_collection-0.151.0.dist-info → uk_bin_collection-0.152.1.dist-info}/WHEEL +0 -0
  44. {uk_bin_collection-0.151.0.dist-info → uk_bin_collection-0.152.1.dist-info}/entry_points.txt +0 -0
@@ -1,110 +1,162 @@
1
1
  import time
2
2
  from datetime import datetime
3
3
 
4
- from selenium.webdriver.support.ui import Select
5
4
  from bs4 import BeautifulSoup
6
5
  from selenium.webdriver.common.by import By
7
- from selenium.webdriver.support import expected_conditions as EC
8
- from selenium.webdriver.support.ui import Select
9
- from selenium.webdriver.support.wait import WebDriverWait
10
6
  from selenium.webdriver.common.keys import Keys
7
+ from selenium.webdriver.support.ui import WebDriverWait, Select
8
+ from selenium.webdriver.support import expected_conditions as EC
9
+ from selenium.common.exceptions import StaleElementReferenceException, TimeoutException
11
10
 
12
11
  from uk_bin_collection.uk_bin_collection.common import *
13
12
  from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
14
13
 
14
+ date_format = "%d/%m/%Y"
15
15
 
16
- # import the wonderful Beautiful Soup and the URL grabber
17
16
  class CouncilClass(AbstractGetBinDataClass):
18
- """
19
- Concrete classes have to implement all abstract operations of the
20
- base class. They can also override some operations with a default
21
- implementation.
22
- """
23
-
24
17
  def parse_data(self, page: str, **kwargs) -> dict:
25
18
  driver = None
26
19
  try:
27
- # Make a BS4 object
28
-
29
20
  page = "https://www.chichester.gov.uk/checkyourbinday"
30
21
 
31
22
  user_postcode = kwargs.get("postcode")
32
- user_uprn = kwargs.get("uprn")
23
+ house_number = kwargs.get("paon")
33
24
  web_driver = kwargs.get("web_driver")
34
25
  headless = kwargs.get("headless")
35
- house_number = kwargs.get("paon")
36
26
 
37
27
  driver = create_webdriver(web_driver, headless, None, __name__)
38
28
  driver.get(page)
39
29
 
40
30
  wait = WebDriverWait(driver, 60)
41
31
 
42
- inputElement_postcodesearch = wait.until(
32
+ input_postcode = wait.until(
43
33
  EC.visibility_of_element_located(
44
34
  (By.ID, "WASTECOLLECTIONCALENDARV5_CALENDAR_ADDRESSLOOKUPPOSTCODE")
45
35
  )
46
36
  )
37
+ input_postcode.send_keys(user_postcode)
47
38
 
48
- inputElement_postcodesearch.send_keys(user_postcode)
49
-
50
- inputElement_postcodesearch_btn = wait.until(
51
- EC.visibility_of_element_located(
52
- (By.ID, "WASTECOLLECTIONCALENDARV5_CALENDAR_ADDRESSLOOKUPSEARCH")
53
- )
54
- )
55
- inputElement_postcodesearch_btn.send_keys(Keys.ENTER)
56
-
57
- inputElement_select_address = wait.until(
39
+ search_button = wait.until(
58
40
  EC.element_to_be_clickable(
59
- (By.ID, "WASTECOLLECTIONCALENDARV5_CALENDAR_ADDRESSLOOKUPADDRESS")
41
+ (By.ID, "WASTECOLLECTIONCALENDARV5_CALENDAR_ADDRESSLOOKUPSEARCH")
60
42
  )
61
43
  )
62
- dropdown_element = driver.find_element(
63
- By.ID, "WASTECOLLECTIONCALENDARV5_CALENDAR_ADDRESSLOOKUPADDRESS"
64
- )
44
+ search_button.send_keys(Keys.ENTER)
65
45
 
66
- # Now create a Select object based on the found element
67
- dropdown = Select(dropdown_element)
46
+ self.smart_select_address(driver, house_number)
68
47
 
69
- # Select the option by visible text
70
- dropdown.select_by_visible_text(house_number)
71
-
72
- results = wait.until(
73
- EC.element_to_be_clickable(
48
+ wait.until(
49
+ EC.presence_of_element_located(
74
50
  (By.CLASS_NAME, "bin-collection-dates-container")
75
51
  )
76
52
  )
77
53
 
78
54
  soup = BeautifulSoup(driver.page_source, features="html.parser")
79
- soup.prettify()
55
+ table = soup.find("table", class_="defaultgeneral bin-collection-dates")
56
+ rows = table.find_all("tr") if table else []
80
57
 
81
- # Extract data from the table
82
58
  bin_collection_data = []
83
- rows = soup.find(
84
- "table", class_="defaultgeneral bin-collection-dates"
85
- ).find_all("tr")
86
59
  for row in rows:
87
60
  cells = row.find_all("td")
88
61
  if cells:
89
62
  date_str = cells[0].text.strip()
90
63
  bin_type = cells[1].text.strip()
91
- # Convert date string to the required format DD/MM/YYYY
92
64
  date_obj = datetime.strptime(date_str, "%d %B %Y")
93
- date_formatted = date_obj.strftime(date_format)
94
- bin_collection_data.append(
95
- {"collectionDate": date_formatted, "type": bin_type}
96
- )
65
+ formatted_date = date_obj.strftime(date_format)
66
+ bin_collection_data.append({
67
+ "collectionDate": formatted_date,
68
+ "type": bin_type
69
+ })
97
70
 
98
- # Convert to JSON
99
- json_data = {"bins": bin_collection_data}
71
+ print(bin_collection_data)
72
+
73
+ return {"bins": bin_collection_data}
100
74
 
101
75
  except Exception as e:
102
- # Here you can log the exception if needed
103
76
  print(f"An error occurred: {e}")
104
- # Optionally, re-raise the exception if you want it to propagate
105
77
  raise
106
78
  finally:
107
- # This block ensures that the driver is closed regardless of an exception
108
79
  if driver:
109
80
  driver.quit()
110
- return json_data
81
+
82
+ def smart_select_address(self, driver, house_number: str):
83
+ dropdown_id = "WASTECOLLECTIONCALENDARV5_CALENDAR_ADDRESSLOOKUPADDRESS"
84
+
85
+ print("Waiting for address dropdown...")
86
+
87
+ def dropdown_has_addresses(d):
88
+ try:
89
+ dropdown_el = d.find_element(By.ID, dropdown_id)
90
+ select = Select(dropdown_el)
91
+ return len(select.options) > 1
92
+ except StaleElementReferenceException:
93
+ return False
94
+
95
+ WebDriverWait(driver, 30).until(dropdown_has_addresses)
96
+
97
+ dropdown_el = driver.find_element(By.ID, dropdown_id)
98
+ dropdown = Select(dropdown_el)
99
+
100
+ print("Address dropdown options:")
101
+ for opt in dropdown.options:
102
+ print(f"- {opt.text.strip()}")
103
+
104
+ user_input_clean = house_number.lower().strip()
105
+ found = False
106
+
107
+ for option in dropdown.options:
108
+ option_text_clean = option.text.lower().strip()
109
+ print(f"Comparing: {repr(option_text_clean)} == {repr(user_input_clean)}")
110
+
111
+ if (
112
+ option_text_clean == user_input_clean
113
+ or option_text_clean.startswith(f"{user_input_clean},")
114
+ ):
115
+ try:
116
+ option.click()
117
+ found = True
118
+ print(f"Strict match clicked: {option.text.strip()}")
119
+ break
120
+ except StaleElementReferenceException:
121
+ print("Stale during click, retrying...")
122
+ dropdown_el = driver.find_element(By.ID, dropdown_id)
123
+ dropdown = Select(dropdown_el)
124
+ for fresh_option in dropdown.options:
125
+ if fresh_option.text.lower().strip() == option_text_clean:
126
+ fresh_option.click()
127
+ found = True
128
+ print(f"Strict match clicked after refresh: {fresh_option.text.strip()}")
129
+ break
130
+
131
+ if found:
132
+ break
133
+
134
+ if not found:
135
+ print("No strict match found, trying fuzzy match...")
136
+ for option in dropdown.options:
137
+ option_text_clean = option.text.lower().strip()
138
+ if user_input_clean in option_text_clean:
139
+ try:
140
+ option.click()
141
+ found = True
142
+ print(f"Fuzzy match clicked: {option.text.strip()}")
143
+ break
144
+ except StaleElementReferenceException:
145
+ print("Stale during fuzzy click, retrying...")
146
+ dropdown_el = driver.find_element(By.ID, dropdown_id)
147
+ dropdown = Select(dropdown_el)
148
+ for fresh_option in dropdown.options:
149
+ if fresh_option.text.lower().strip() == option_text_clean:
150
+ fresh_option.click()
151
+ found = True
152
+ print(f"Fuzzy match clicked after refresh: {fresh_option.text.strip()}")
153
+ break
154
+
155
+ if found:
156
+ break
157
+
158
+ if not found:
159
+ all_opts = [opt.text.strip() for opt in dropdown.options]
160
+ raise Exception(
161
+ f"Could not find address '{house_number}' in options: {all_opts}"
162
+ )
@@ -41,6 +41,10 @@ class CouncilClass(AbstractGetBinDataClass):
41
41
  check_uprn(user_uprn)
42
42
  check_postcode(user_postcode)
43
43
 
44
+ # Ensure UPRN starts with "UPRN"
45
+ if not user_uprn.startswith("UPRN"):
46
+ user_uprn = f"UPRN{user_uprn}"
47
+
44
48
  # Create Selenium webdriver
45
49
  user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
46
50
  driver = create_webdriver(web_driver, headless, user_agent, __name__)
@@ -1,237 +1,16 @@
1
1
  import time
2
2
 
3
3
  from bs4 import BeautifulSoup
4
+ from selenium import webdriver
5
+ from selenium.webdriver.common.by import By
6
+ from selenium.webdriver.common.keys import Keys
7
+ from selenium.webdriver.support import expected_conditions as EC
8
+ from selenium.webdriver.support.ui import Select, WebDriverWait
4
9
 
5
10
  from uk_bin_collection.uk_bin_collection.common import *
6
11
  from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
7
12
 
8
13
 
9
- def get_headers(base_url: str, method: str) -> dict[str, str]:
10
- """
11
- Gets request headers
12
- :rtype: dict[str, str]
13
- :param base_url: Base URL to use
14
- :param method: Method to use
15
- :return: Request headers
16
- """
17
- headers = {
18
- "Accept-Encoding": "gzip, deflate, br",
19
- "Accept-Language": "en-GB,en-US;q=0.9,en;q=0.8",
20
- "Cache-Control": "max-age=0",
21
- "Connection": "keep-alive",
22
- "Host": "service.croydon.gov.uk",
23
- "Origin": base_url,
24
- "sec-ch-ua": '"Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"',
25
- "sec-ch-ua-mobile": "?0",
26
- "sec-ch-ua-platform": "Windows",
27
- "Sec-Fetch-Dest": "document",
28
- "Sec-Fetch-User": "?1",
29
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)"
30
- " Chrome/109.0.0.0 Safari/537.36",
31
- }
32
- if method.lower() == "post":
33
- headers["Accept"] = "application/json, text/javascript, */*; q=0.01"
34
- headers["Content-Type"] = "application/x-www-form-urlencoded; charset=UTF-8"
35
- headers["Sec-Fetch-Mode"] = "cors"
36
- headers["Sec-Fetch-Mode"] = "same-origin"
37
- headers["X-Requested-With"] = "XMLHttpRequest"
38
- else:
39
- headers["Accept"] = (
40
- "text/html,application/xhtml+xml,application/xml;"
41
- "q=0.9,image/avif,image/webp,image/apng,*/*;"
42
- "q=0.8,application/signed-exchange;v=b3;q=0.9"
43
- )
44
- headers["Sec-Fetch-Mode"] = "navigate"
45
- headers["Sec-Fetch-Mode"] = "none"
46
- return headers
47
-
48
-
49
- def get_session_storage_global() -> object:
50
- """
51
- Gets session storage global object
52
- :rtype: object
53
- :return: Session storage global object
54
- """
55
- return {
56
- "destination_stack": [
57
- "w/webpage/bin-day-enter-address",
58
- "w/webpage/your-bin-collection-details?context_record_id=86086077"
59
- "&webpage_token=5c047b2c10b4aad66bef2054aac6bea52ad7a5e185ffdf7090b01f8ddc96728f",
60
- "w/webpage/bin-day-enter-address",
61
- "w/webpage/your-bin-collection-details?context_record_id=86085229"
62
- "&webpage_token=cf1b8fd6213f4823277d98c1dd8a992e6ebef1fabc7d892714e5d9dade448c37",
63
- "w/webpage/bin-day-enter-address",
64
- "w/webpage/your-bin-collection-details?context_record_id=86084221"
65
- "&webpage_token=7f52fb51019bf0e6bfe9647b1b31000124bd92a9d95781f1557f58b3ed40da52",
66
- "w/webpage/bin-day-enter-address",
67
- "w/webpage/your-bin-collection-details?context_record_id=86083209"
68
- "&webpage_token=de50c265da927336f526d9d9a44947595c3aa38965aa8c495ac2fb73d272ece8",
69
- "w/webpage/bin-day-enter-address",
70
- ],
71
- "last_context_record_id": "86086077",
72
- }
73
-
74
-
75
- def get_csrf_token(s: requests.session, base_url: str) -> str:
76
- """
77
- Gets a CSRF token
78
- :rtype: str
79
- :param s: requests.Session() to use
80
- :param base_url: Base URL to use
81
- :return: CSRF token
82
- """
83
- csrf_token = ""
84
- response = s.get(
85
- base_url + "/wasteservices/w/webpage/bin-day-enter-address",
86
- headers=get_headers(base_url, "GET"),
87
- )
88
- if response.status_code == 200:
89
- soup = BeautifulSoup(response.text, features="html.parser")
90
- soup.prettify()
91
- app_body = soup.find("div", {"class": "app-body"})
92
- script = app_body.find("script", {"type": "text/javascript"}).string
93
- p = re.compile("var CSRF = ('|\")(.*?)('|\");")
94
- m = p.search(script)
95
- csrf_token = m.groups()[1]
96
- else:
97
- raise ValueError(
98
- "Code 1: Failed to get a CSRF token. Please ensure the council website is online first,"
99
- " then open an issue on GitHub."
100
- )
101
- return csrf_token
102
-
103
-
104
- def get_address_id(
105
- s: requests.session, base_url: str, csrf_token: str, postcode: str, paon: str
106
- ) -> str:
107
- """
108
- Gets the address ID
109
- :rtype: str
110
- :param s: requests.Session() to use
111
- :param base_url: Base URL to use
112
- :param csrf_token: CSRF token to use
113
- :param postcode: Postcode to use
114
- :param paon: House number/address to find
115
- :return: address ID
116
- """
117
- address_id = "0"
118
- # Get the addresses for the postcode
119
- form_data = {
120
- "code_action": "search",
121
- "code_params": '{"search_item":"' + postcode + '","is_ss":true}',
122
- "fragment_action": "handle_event",
123
- "fragment_id": "PCF0020408EECEC1",
124
- "fragment_collection_class": "formtable",
125
- "fragment_collection_editable_values": '{"PCF0021449EECEC1":"1"}',
126
- "_session_storage": json.dumps(
127
- {
128
- "/wasteservices/w/webpage/bin-day-enter-address": {},
129
- "_global": get_session_storage_global(),
130
- }
131
- ),
132
- "action_cell_id": "PCL0005629EECEC1",
133
- "action_page_id": "PAG0000898EECEC1",
134
- "form_check_ajax": csrf_token,
135
- }
136
- response = s.post(
137
- base_url
138
- + "/wasteservices/w/webpage/bin-day-enter-address?webpage_subpage_id=PAG0000898EECEC1"
139
- "&webpage_token=faab02e1f62a58f7bad4c2ae5b8622e19846b97dde2a76f546c4bb1230cee044"
140
- "&widget_action=fragment_action",
141
- headers=get_headers(base_url, "POST"),
142
- data=form_data,
143
- )
144
- if response.status_code == 200:
145
- json_response = json.loads(response.text)
146
- addresses = json_response["response"]["items"]
147
- # Find the matching address id for the paon
148
- for address in addresses:
149
- # Check for full matches first
150
- if address.get("dropdown_display_field") == paon:
151
- address_id = address.get("id")
152
- break
153
- # Check for matching start if no full match found
154
- if address_id == "0":
155
- for address in addresses:
156
- if address.get("dropdown_display_field").split()[0] == paon.strip():
157
- address_id = address.get("id")
158
- break
159
- # Check match was found
160
- if address_id == "0":
161
- raise ValueError(
162
- "Code 2: No matching address for house number/full address found."
163
- )
164
- else:
165
- raise ValueError("Code 3: No addresses found for provided postcode.")
166
- return address_id
167
-
168
-
169
- def get_collection_data(
170
- s: requests.session, base_url: str, csrf_token: str, address_id: str
171
- ) -> str:
172
- """
173
- Gets the collection data
174
- :rtype: str
175
- :param s: requests.Session() to use
176
- :param base_url: Base URL to use
177
- :param csrf_token: CSRF token to use
178
- :param address_id: Address id to use
179
- :param retries: Retries count
180
- :return: Collection data
181
- """
182
- collection_data = ""
183
- if address_id != "0":
184
- form_data = {
185
- "form_check": csrf_token,
186
- "submitted_page_id": "PAG0000898EECEC1",
187
- "submitted_widget_group_id": "PWG0002644EECEC1",
188
- "submitted_widget_group_type": "modify",
189
- "submission_token": "63e9126bacd815.12997577",
190
- "payload[PAG0000898EECEC1][PWG0002644EECEC1][PCL0005629EECEC1][formtable]"
191
- "[C_63e9126bacfb3][PCF0020408EECEC1]": address_id,
192
- "payload[PAG0000898EECEC1][PWG0002644EECEC1][PCL0005629EECEC1][formtable]"
193
- "[C_63e9126bacfb3][PCF0021449EECEC1]": "1",
194
- "payload[PAG0000898EECEC1][PWG0002644EECEC1][PCL0005629EECEC1][formtable]"
195
- "[C_63e9126bacfb3][PCF0020072EECEC1]": "Next",
196
- "submit_fragment_id": "PCF0020072EECEC1",
197
- "_session_storage": json.dumps({"_global": get_session_storage_global()}),
198
- "_update_page_content_request": 1,
199
- "form_check_ajax": csrf_token,
200
- }
201
- response = s.post(
202
- base_url
203
- + "/wasteservices/w/webpage/bin-day-enter-address?webpage_subpage_id=PAG0000898EECEC1"
204
- "&webpage_token=faab02e1f62a58f7bad4c2ae5b8622e19846b97dde2a76f546c4bb1230cee044",
205
- headers=get_headers(base_url, "POST"),
206
- data=form_data,
207
- )
208
- if response.status_code == 200 and len(response.text) > 0:
209
- json_response = json.loads(response.text)
210
- form_data = {
211
- "_dummy": 1,
212
- "_session_storage": json.dumps(
213
- {"_global": get_session_storage_global()}
214
- ),
215
- "_update_page_content_request": 1,
216
- "form_check_ajax": csrf_token,
217
- }
218
- response = s.post(
219
- base_url + json_response["redirect_url"],
220
- headers=get_headers(base_url, "POST"),
221
- data=form_data,
222
- )
223
- if response.status_code == 200 and len(response.text) > 0:
224
- json_response = json.loads(response.text)
225
- collection_data = json_response["data"]
226
- else:
227
- raise ValueError("Code 4: Failed to get bin data.")
228
- else:
229
- raise ValueError(
230
- "Code 5: Failed to get bin data. Too many requests. Please wait a few minutes before trying again."
231
- )
232
- return collection_data
233
-
234
-
235
14
  class CouncilClass(AbstractGetBinDataClass):
236
15
  """
237
16
  Concrete classes have to implement all abstract operations of the
@@ -240,47 +19,121 @@ class CouncilClass(AbstractGetBinDataClass):
240
19
  """
241
20
 
242
21
  def parse_data(self, page: str, **kwargs) -> dict:
243
- requests.packages.urllib3.disable_warnings()
244
- s = requests.Session()
245
- base_url = "https://service.croydon.gov.uk"
246
- paon = kwargs.get("paon")
247
- postcode = kwargs.get("postcode")
248
- check_paon(paon)
249
- check_postcode(postcode)
22
+ driver = None
23
+ try:
24
+ user_postcode = kwargs.get("postcode")
25
+ if not user_postcode:
26
+ raise ValueError("No postcode provided.")
27
+ check_postcode(user_postcode)
28
+
29
+ user_paon = kwargs.get("paon")
30
+ check_paon(user_paon)
31
+ headless = kwargs.get("headless")
32
+ web_driver = kwargs.get("web_driver")
33
+ driver = create_webdriver(web_driver, headless, None, __name__)
34
+ page = "https://service.croydon.gov.uk/wasteservices/w/webpage/bin-day-enter-address"
35
+
36
+ driver.maximize_window()
37
+
38
+ driver.get(page)
39
+
40
+ postcode_input = WebDriverWait(driver, 60).until(
41
+ EC.presence_of_element_located(
42
+ (By.CSS_SELECTOR, 'input[data-ts_identifier="postcode_input"]')
43
+ )
44
+ )
250
45
 
251
- # Firstly, get a CSRF (cross-site request forgery) token
252
- csrf_token = get_csrf_token(s, base_url)
253
- # Next, get the address_id
254
- address_id = get_address_id(s, base_url, csrf_token, postcode, paon)
255
- # Finally, use the address_id to get the collection data
256
- collection_data = get_collection_data(s, base_url, csrf_token, address_id)
257
- if collection_data != "":
258
- soup = BeautifulSoup(collection_data, features="html.parser")
259
- soup.prettify()
46
+ postcode_input.send_keys(user_postcode + Keys.ENTER)
260
47
 
261
- # Find the list elements
262
- collection_record_elements = soup.find_all(
263
- "div", {"class": "listing_template_record"}
48
+ time.sleep(5)
49
+ # Wait for address box to be visible
50
+ select_address_input = WebDriverWait(driver, 10).until(
51
+ EC.element_to_be_clickable(
52
+ (By.CSS_SELECTOR, 'select[data-ts_identifier="address_selection"]')
53
+ )
264
54
  )
265
55
 
266
- # Form a JSON wrapper
267
- data = {"bins": []}
56
+ # Select address based on house number (paon)
57
+ select = Select(select_address_input)
58
+ paon = str(user_paon) # Ensure paon is a string for comparison
59
+ address_found = False
268
60
 
269
- for e in collection_record_elements:
270
- collection_type = e.find("h2").get_text()
271
- collection_date = e.find("span", {"class": "value-as-text"}).get_text()
272
- dict_data = {
273
- "type": collection_type,
274
- "collectionDate": datetime.strptime(
275
- collection_date, "%A %d %B %Y"
276
- ).strftime(date_format),
277
- }
278
- data["bins"].append(dict_data)
61
+ for option in select.options:
62
+ # Look for house number pattern with surrounding spaces to avoid partial matches
63
+ if f" {paon} " in f" {option.text} ":
64
+ select.select_by_value(option.get_attribute("value"))
65
+ address_found = True
66
+ break
279
67
 
280
- if len(data["bins"]) == 0:
68
+ if not address_found:
281
69
  raise ValueError(
282
- "Code 5: No bin data found. Please ensure the council website is showing data first,"
283
- " then open an issue on GitHub."
70
+ f"Address with house number {paon} not found in the dropdown."
71
+ )
72
+
73
+ # Click the "Next" button
74
+ next_button = WebDriverWait(driver, 10).until(
75
+ EC.element_to_be_clickable(
76
+ (By.CSS_SELECTOR, 'input[type="submit"][value="Next"]')
77
+ )
78
+ )
79
+ next_button.click()
80
+
81
+ # Wait for the bin collection content to load
82
+ collection_content = WebDriverWait(driver, 10).until(
83
+ EC.presence_of_element_located(
84
+ (
85
+ By.XPATH,
86
+ '//*[@id="mats_content_wrapper"]/div[2]/div[2]/div[2]/div/div[1]/div/div[3]/div/div/div/div',
87
+ )
284
88
  )
89
+ )
285
90
 
286
- return data
91
+ soup = BeautifulSoup(driver.page_source, "html.parser")
92
+
93
+ bin_data = {"bins": []}
94
+
95
+ # Find all bin collection sections
96
+ bin_sections = soup.find_all("div", {"class": "listing_template_record"})
97
+
98
+ for section in bin_sections:
99
+ # Get bin type from h2 tag
100
+ bin_type_elem = section.find("h2")
101
+ if bin_type_elem:
102
+ bin_type = bin_type_elem.text.strip()
103
+
104
+ # Find collection date span
105
+ date_span = section.find("span", {"class": "value-as-text"})
106
+ if date_span:
107
+ collection_date_string = date_span.text.strip()
108
+
109
+ # Convert date string to required format
110
+ try:
111
+ # Parse the date string (e.g., "Sunday 1 June 2025")
112
+ parsed_date = datetime.strptime(
113
+ collection_date_string, "%A %d %B %Y"
114
+ )
115
+ # Format as dd/mm/yyyy
116
+ formatted_date = parsed_date.strftime("%d/%m/%Y")
117
+
118
+ # Create bin entry
119
+ bin_info = {
120
+ "type": bin_type,
121
+ "collectionDate": formatted_date,
122
+ }
123
+ bin_data["bins"].append(bin_info)
124
+ except ValueError as e:
125
+ print(f"Error parsing date '{collection_date_string}': {e}")
126
+
127
+ if not bin_data["bins"]:
128
+ raise ValueError("No bin collection data found")
129
+
130
+ except Exception as e:
131
+ # Here you can log the exception if needed
132
+ print(f"An error occurred: {e}")
133
+ # Optionally, re-raise the exception if you want it to propagate
134
+ raise
135
+ finally:
136
+ # This block ensures that the driver is closed regardless of an exception
137
+ if driver:
138
+ driver.quit()
139
+ return bin_data
@@ -1,4 +1,5 @@
1
1
  from bs4 import BeautifulSoup
2
+
2
3
  from uk_bin_collection.uk_bin_collection.common import *
3
4
  from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
4
5
 
@@ -12,7 +13,19 @@ class CouncilClass(AbstractGetBinDataClass):
12
13
  """
13
14
 
14
15
  def parse_data(self, page: str, **kwargs) -> dict:
16
+
17
+ try:
18
+ user_uprn = kwargs.get("uprn")
19
+ check_uprn(user_uprn)
20
+ url = f"https://windmz.dartford.gov.uk/ufs/WS_CHECK_COLLECTIONS.eb?UPRN={user_uprn}"
21
+ if not user_uprn:
22
+ # This is a fallback for if the user stored a URL in old system. Ensures backwards compatibility.
23
+ url = kwargs.get("url")
24
+ except Exception as e:
25
+ raise ValueError(f"Error getting identifier: {str(e)}")
26
+
15
27
  # Make a BS4 object
28
+ page = requests.get(url)
16
29
  soup = BeautifulSoup(page.text, features="html.parser")
17
30
  soup.prettify()
18
31