uk_bin_collection 0.152.7__py3-none-any.whl → 0.152.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. uk_bin_collection/tests/input.json +11 -15
  2. uk_bin_collection/uk_bin_collection/councils/AngusCouncil.py +69 -46
  3. uk_bin_collection/uk_bin_collection/councils/BCPCouncil.py +119 -37
  4. uk_bin_collection/uk_bin_collection/councils/BarnetCouncil.py +158 -115
  5. uk_bin_collection/uk_bin_collection/councils/BasildonCouncil.py +87 -66
  6. uk_bin_collection/uk_bin_collection/councils/BedfordshireCouncil.py +2 -0
  7. uk_bin_collection/uk_bin_collection/councils/BlabyDistrictCouncil.py +5 -1
  8. uk_bin_collection/uk_bin_collection/councils/BlaenauGwentCountyBoroughCouncil.py +91 -66
  9. uk_bin_collection/uk_bin_collection/councils/BroxbourneCouncil.py +88 -67
  10. uk_bin_collection/uk_bin_collection/councils/BuckinghamshireCouncil.py +67 -56
  11. uk_bin_collection/uk_bin_collection/councils/ChelmsfordCityCouncil.py +63 -95
  12. uk_bin_collection/uk_bin_collection/councils/CherwellDistrictCouncil.py +39 -18
  13. uk_bin_collection/uk_bin_collection/councils/ChorleyCouncil.py +106 -97
  14. uk_bin_collection/uk_bin_collection/councils/CopelandBoroughCouncil.py +80 -75
  15. uk_bin_collection/uk_bin_collection/councils/CotswoldDistrictCouncil.py +191 -67
  16. uk_bin_collection/uk_bin_collection/councils/CoventryCityCouncil.py +6 -2
  17. uk_bin_collection/uk_bin_collection/councils/MidlothianCouncil.py +12 -3
  18. uk_bin_collection/uk_bin_collection/councils/NewcastleUnderLymeCouncil.py +2 -1
  19. uk_bin_collection/uk_bin_collection/councils/RoyalBoroughofGreenwich.py +8 -2
  20. uk_bin_collection/uk_bin_collection/councils/SouthwarkCouncil.py +23 -1
  21. uk_bin_collection/uk_bin_collection/councils/SwindonBoroughCouncil.py +2 -1
  22. uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py +4 -1
  23. uk_bin_collection/uk_bin_collection/councils/WestOxfordshireDistrictCouncil.py +3 -3
  24. uk_bin_collection/uk_bin_collection/get_bin_data.py +1 -1
  25. {uk_bin_collection-0.152.7.dist-info → uk_bin_collection-0.152.9.dist-info}/METADATA +1 -1
  26. {uk_bin_collection-0.152.7.dist-info → uk_bin_collection-0.152.9.dist-info}/RECORD +29 -30
  27. uk_bin_collection/uk_bin_collection/councils/AylesburyValeCouncil.py +0 -69
  28. {uk_bin_collection-0.152.7.dist-info → uk_bin_collection-0.152.9.dist-info}/LICENSE +0 -0
  29. {uk_bin_collection-0.152.7.dist-info → uk_bin_collection-0.152.9.dist-info}/WHEEL +0 -0
  30. {uk_bin_collection-0.152.7.dist-info → uk_bin_collection-0.152.9.dist-info}/entry_points.txt +0 -0
@@ -1,93 +1,98 @@
1
- from xml.etree import ElementTree
2
-
1
+ import requests
3
2
  from bs4 import BeautifulSoup
4
3
 
5
4
  from uk_bin_collection.uk_bin_collection.common import *
6
5
  from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
7
6
 
8
7
 
8
+ # import the wonderful Beautiful Soup and the URL grabber
9
9
  class CouncilClass(AbstractGetBinDataClass):
10
10
  """
11
11
  Concrete classes have to implement all abstract operations of the
12
- baseclass. They can also override some
13
- operations with a default implementation.
12
+ base class. They can also override some operations with a default
13
+ implementation.
14
14
  """
15
15
 
16
16
  def parse_data(self, page: str, **kwargs) -> dict:
17
- uprn = kwargs.get("uprn")
18
- check_uprn(uprn)
19
- council = "CPL"
20
17
 
21
- # Make SOAP request
18
+ user_uprn = kwargs.get("uprn")
19
+ postcode = kwargs.get("postcode")
20
+ check_uprn(user_uprn)
21
+ bindata = {"bins": []}
22
+
23
+ URI = "https://waste.cumberland.gov.uk/renderform?t=25&k=E43CEB1FB59F859833EF2D52B16F3F4EBE1CAB6A"
24
+
25
+ s = requests.Session()
26
+
27
+ # Make the GET request
28
+ response = s.get(URI)
29
+
30
+ # Make a BS4 object
31
+ soup = BeautifulSoup(response.content, features="html.parser")
32
+
33
+ # print(soup)
34
+
35
+ token = (soup.find("input", {"name": "__RequestVerificationToken"})).get(
36
+ "value"
37
+ )
38
+
39
+ formguid = (soup.find("input", {"name": "FormGuid"})).get("value")
40
+
41
+ # print(token)
42
+ # print(formguid)
43
+
22
44
  headers = {
23
- "Content-Type": "text/xml; charset=UTF-8",
24
- "Referer": "https://collections-copeland.azurewebsites.net/calendar.html",
25
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
45
+ "Content-Type": "application/x-www-form-urlencoded",
46
+ "Origin": "https://waste.cumberland.gov.uk",
47
+ "Referer": "https://waste.cumberland.gov.uk/renderform?t=25&k=E43CEB1FB59F859833EF2D52B16F3F4EBE1CAB6A",
48
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 OPR/98.0.0.0",
49
+ "X-Requested-With": "XMLHttpRequest",
26
50
  }
27
- requests.packages.urllib3.disable_warnings()
28
- post_data = (
29
- '<?xml version="1.0" encoding="utf-8"?>'
30
- '<soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">'
31
- '<soap:Body><getRoundCalendarForUPRN xmlns="http://webaspx-collections.azurewebsites.net/">'
32
- "<council>" + council + "</council><UPRN>" + uprn + "</UPRN>"
33
- "<from>Chtml</from></getRoundCalendarForUPRN></soap:Body></soap:Envelope>"
34
- )
35
- response = requests.post(
36
- "https://collections-copeland.azurewebsites.net/WSCollExternal.asmx",
51
+
52
+ payload = {
53
+ "__RequestVerificationToken": token,
54
+ "FormGuid": formguid,
55
+ "ObjectTemplateID": "25",
56
+ "Trigger": "submit",
57
+ "CurrentSectionID": "33",
58
+ "TriggerCtl": "",
59
+ "FF265": f"U{user_uprn}",
60
+ "FF265lbltxt": "Please select your address",
61
+ "FF265-text": postcode
62
+ }
63
+
64
+ # print(payload)
65
+
66
+ response = s.post(
67
+ "https://waste.cumberland.gov.uk/renderform/Form",
37
68
  headers=headers,
38
- data=post_data,
69
+ data=payload,
70
+ )
71
+
72
+ soup = BeautifulSoup(response.content, features="html.parser")
73
+ for row in soup.find_all("div", class_="resirow"):
74
+ # Extract the type of collection (e.g., Recycling, Refuse)
75
+ collection_type_div = row.find("div", class_="col")
76
+ collection_type = (
77
+ collection_type_div.get("class")[1]
78
+ if collection_type_div
79
+ else "Unknown"
80
+ )
81
+
82
+ # Extract the collection date
83
+ date_div = row.find("div", style="width:360px;")
84
+ collection_date = date_div.text.strip() if date_div else "Unknown"
85
+
86
+ dict_data = {
87
+ "type": collection_type,
88
+ "collectionDate": datetime.strptime(
89
+ collection_date, "%A %d %B %Y"
90
+ ).strftime(date_format),
91
+ }
92
+ bindata["bins"].append(dict_data)
93
+
94
+ bindata["bins"].sort(
95
+ key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
39
96
  )
40
97
 
41
- if response.status_code != 200:
42
- raise ValueError("No bin data found for provided UPRN.")
43
-
44
- # Get HTML from SOAP response
45
- xmltree = ElementTree.fromstring(response.text)
46
- html = xmltree.find(
47
- ".//{http://webaspx-collections.azurewebsites.net/}getRoundCalendarForUPRNResult"
48
- ).text
49
- # Parse with BS4
50
- soup = BeautifulSoup(html, features="html.parser")
51
- soup.prettify()
52
-
53
- data = {"bins": []}
54
- for bin_type in ["Refuse", "Recycling", "Garden"]:
55
- bin_el = soup.find("b", string=bin_type)
56
- if bin_el:
57
- bin_info = bin_el.next_sibling.split(": ")[1]
58
- collection_date = ""
59
- results = re.search("([A-Za-z]+ \\d\\d? [A-Za-z]+) then", bin_info)
60
- if results:
61
- if results[1] == "Today":
62
- date = datetime.now()
63
- elif results[1] == "Tomorrow":
64
- date = datetime.now() + timedelta(days=1)
65
- else:
66
- date = get_next_occurrence_from_day_month(
67
- datetime.strptime(
68
- results[1] + " " + datetime.now().strftime("%Y"),
69
- "%a %d %b %Y",
70
- )
71
- )
72
- if date:
73
- collection_date = date.strftime(date_format)
74
- else:
75
- results2 = re.search("([A-Za-z]+) then", bin_info)
76
- if results2:
77
- if results2[1] == "Today":
78
- collection_date = datetime.now().strftime(date_format)
79
- elif results2[1] == "Tomorrow":
80
- collection_date = (
81
- datetime.now() + timedelta(days=1)
82
- ).strftime(date_format)
83
- else:
84
- collection_date = results2[1]
85
-
86
- if collection_date != "":
87
- dict_data = {
88
- "type": bin_type,
89
- "collectionDate": collection_date,
90
- }
91
- data["bins"].append(dict_data)
92
-
93
- return data
98
+ return bindata
@@ -1,5 +1,6 @@
1
1
  import time
2
- from datetime import datetime
2
+ import re
3
+ from datetime import datetime, timedelta
3
4
 
4
5
  from bs4 import BeautifulSoup
5
6
  from selenium.webdriver.common.by import By
@@ -11,8 +12,6 @@ from selenium.webdriver.support.wait import WebDriverWait
11
12
  from uk_bin_collection.uk_bin_collection.common import *
12
13
  from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
13
14
 
14
- # import the wonderful Beautiful Soup and the URL grabber
15
-
16
15
 
17
16
  class CouncilClass(AbstractGetBinDataClass):
18
17
  """
@@ -30,7 +29,8 @@ class CouncilClass(AbstractGetBinDataClass):
30
29
 
31
30
  house_number = kwargs.get("paon")
32
31
  postcode = kwargs.get("postcode")
33
- full_address = f"{house_number}, {postcode}"
32
+ # Use house_number as full address since it contains the complete address
33
+ full_address = house_number if house_number else f"{house_number}, {postcode}"
34
34
  web_driver = kwargs.get("web_driver")
35
35
  headless = kwargs.get("headless")
36
36
 
@@ -38,81 +38,205 @@ class CouncilClass(AbstractGetBinDataClass):
38
38
  driver = create_webdriver(web_driver, headless, None, __name__)
39
39
  driver.get(page)
40
40
 
41
- # If you bang in the house number (or property name) and postcode in the box it should find your property
41
+ # Wait for page to load completely
42
42
  wait = WebDriverWait(driver, 60)
43
- address_entry_field = wait.until(
44
- EC.element_to_be_clickable((By.XPATH, '//*[@id="combobox-input-22"]'))
45
- )
46
-
47
- address_entry_field.send_keys(str(full_address))
48
-
49
- address_entry_field = wait.until(
50
- EC.element_to_be_clickable((By.XPATH, '//*[@id="combobox-input-22"]'))
51
- )
52
- address_entry_field.click()
53
- address_entry_field.send_keys(Keys.BACKSPACE)
54
- address_entry_field.send_keys(str(full_address[len(full_address) - 1]))
55
-
56
- first_found_address = wait.until(
57
- EC.element_to_be_clickable(
58
- (By.XPATH, '//*[@id="dropdown-element-22"]/ul')
59
- )
60
- )
61
-
62
- first_found_address.click()
63
- # Wait for the 'Select your property' dropdown to appear and select the first result
64
- next_btn = wait.until(
65
- EC.element_to_be_clickable((By.XPATH, "//lightning-button/button"))
66
- )
67
- next_btn.click()
68
- bin_data = wait.until(
69
- EC.presence_of_element_located(
70
- (By.XPATH, "//span[contains(text(), 'Container')]")
71
- )
72
- )
73
-
43
+
44
+ # Wait for the Salesforce Lightning page to be fully loaded
45
+ print("Waiting for Salesforce Lightning components to load...")
46
+ time.sleep(10)
47
+
48
+ # Wait for the address input field to be present
49
+ try:
50
+ wait.until(EC.presence_of_element_located((By.XPATH, "//label[contains(text(), 'Enter your address')]")))
51
+ print("Address label found")
52
+ time.sleep(5) # Additional wait for the input field to be ready
53
+ except Exception as e:
54
+ print(f"Address label not found: {e}")
55
+
56
+ # Find the address input field using the label
57
+ try:
58
+ address_entry_field = driver.find_element(By.XPATH, "//label[contains(text(), 'Enter your address')]/following-sibling::*//input")
59
+ print("Found address input field using label xpath")
60
+ except Exception as e:
61
+ print(f"Could not find address input field: {e}")
62
+ raise Exception("Could not find address input field")
63
+
64
+ # Clear any existing text and enter the address
65
+ try:
66
+ address_entry_field.clear()
67
+ address_entry_field.send_keys(str(full_address))
68
+ print(f"Entered address: {full_address}")
69
+ except Exception as e:
70
+ print(f"Error entering address: {e}")
71
+ raise
72
+
73
+ # Click the input field again to trigger the dropdown
74
+ try:
75
+ address_entry_field.click()
76
+ print("Clicked input field to trigger dropdown")
77
+ time.sleep(3) # Wait for dropdown to appear
78
+ except Exception as e:
79
+ print(f"Error clicking input field: {e}")
80
+
81
+ # Wait for and click the dropdown option
82
+ try:
83
+ dropdown_wait = WebDriverWait(driver, 10)
84
+ dropdown_option = dropdown_wait.until(EC.element_to_be_clickable((By.XPATH, "//li[@role='presentation']")))
85
+ dropdown_option.click()
86
+ print("Clicked dropdown option")
87
+ time.sleep(2)
88
+ except Exception as e:
89
+ print(f"Error clicking dropdown option: {e}")
90
+ raise
91
+
92
+ # Find and click the Next button
93
+ try:
94
+ next_wait = WebDriverWait(driver, 10)
95
+ next_button = next_wait.until(EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Next')]")))
96
+ next_button.click()
97
+ print("Clicked Next button")
98
+ time.sleep(5) # Wait for the bin collection data to load
99
+ except Exception as e:
100
+ print(f"Error clicking Next button: {e}")
101
+ raise
102
+
103
+ # Wait for the bin collection data table to load
104
+ try:
105
+ table_wait = WebDriverWait(driver, 15)
106
+ table_wait.until(EC.presence_of_element_located((By.XPATH, "//span[contains(text(), 'Collection Day')]")))
107
+ print("Bin collection data table loaded")
108
+ time.sleep(3)
109
+ except Exception as e:
110
+ print(f"Bin collection table not found: {e}")
111
+
74
112
  soup = BeautifulSoup(driver.page_source, features="html.parser")
75
-
76
- rows = soup.find_all("tr", class_="slds-hint-parent")
77
113
  current_year = datetime.now().year
78
114
 
115
+ # Try multiple approaches to find bin collection data
116
+ rows = []
117
+
118
+ # Try different table row selectors
119
+ table_selectors = [
120
+ "tr.slds-hint-parent",
121
+ "tr[class*='slds']",
122
+ "table tr",
123
+ ".slds-table tr",
124
+ "tbody tr"
125
+ ]
126
+
127
+ for selector in table_selectors:
128
+ rows = soup.select(selector)
129
+ if rows:
130
+ break
131
+
132
+ # If no table rows found, try to find any elements containing collection info
133
+ if not rows:
134
+ # Look for any elements that might contain bin collection information
135
+ collection_elements = soup.find_all(text=re.compile(r'(bin|collection|waste|recycling)', re.I))
136
+ if collection_elements:
137
+ # Try to extract information from the surrounding elements
138
+ for element in collection_elements[:10]: # Limit to first 10 matches
139
+ parent = element.parent
140
+ if parent:
141
+ text = parent.get_text().strip()
142
+ if text and len(text) > 10: # Only consider substantial text
143
+ # Try to extract date patterns
144
+ date_patterns = re.findall(r'\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b|\b\d{1,2}\s+\w+\s+\d{4}\b', text)
145
+ if date_patterns:
146
+ data["bins"].append({
147
+ "type": "General Collection",
148
+ "collectionDate": date_patterns[0]
149
+ })
150
+ break
151
+
152
+ # Process table rows if found
79
153
  for row in rows:
80
- columns = row.find_all("td")
81
- if columns:
82
- container_type = row.find("th").text.strip()
83
- if columns[0].get_text() == "Today":
84
- collection_day = datetime.now().strftime("%a, %d %B")
85
- elif columns[0].get_text() == "Tomorrow":
86
- collection_day = (datetime.now() + timedelta(days=1)).strftime(
87
- "%a, %d %B"
88
- )
89
- else:
90
- collection_day = re.sub(
91
- r"[^a-zA-Z0-9,\s]", "", columns[0].get_text()
92
- ).strip()
154
+ try:
155
+ columns = row.find_all(["td", "th"])
156
+ if len(columns) >= 2:
157
+ # Try to identify container type and date
158
+ container_type = "Unknown"
159
+ collection_date = ""
160
+
161
+ # Look for header cell (th) for container type
162
+ th_element = row.find("th")
163
+ if th_element:
164
+ container_type = th_element.get_text().strip()
165
+ elif columns:
166
+ # If no th, use first column as type
167
+ container_type = columns[0].get_text().strip()
168
+
169
+ # Look for date in subsequent columns
170
+ for col in columns[1:] if th_element else columns[1:]:
171
+ col_text = col.get_text().strip()
172
+ if col_text:
173
+ if col_text.lower() == "today":
174
+ collection_date = datetime.now().strftime("%d/%m/%Y")
175
+ break
176
+ elif col_text.lower() == "tomorrow":
177
+ collection_date = (datetime.now() + timedelta(days=1)).strftime("%d/%m/%Y")
178
+ break
179
+ else:
180
+ # Try to parse various date formats
181
+ try:
182
+ # Clean the text
183
+ clean_text = re.sub(r"[^a-zA-Z0-9,\s/-]", "", col_text).strip()
184
+
185
+ # Try different date parsing approaches
186
+ date_formats = [
187
+ "%a, %d %B",
188
+ "%d %B %Y",
189
+ "%d/%m/%Y",
190
+ "%d-%m-%Y",
191
+ "%B %d, %Y"
192
+ ]
193
+
194
+ for fmt in date_formats:
195
+ try:
196
+ parsed_date = datetime.strptime(clean_text, fmt)
197
+ if fmt == "%a, %d %B": # Add year if missing
198
+ if parsed_date.replace(year=current_year) < datetime.now():
199
+ parsed_date = parsed_date.replace(year=current_year + 1)
200
+ else:
201
+ parsed_date = parsed_date.replace(year=current_year)
202
+ collection_date = parsed_date.strftime("%d/%m/%Y")
203
+ break
204
+ except ValueError:
205
+ continue
206
+
207
+ if collection_date:
208
+ break
209
+ except Exception:
210
+ continue
211
+
212
+ # Add to data if we have both type and date
213
+ if container_type and collection_date and container_type.lower() != "unknown":
214
+ data["bins"].append({
215
+ "type": container_type,
216
+ "collectionDate": collection_date
217
+ })
218
+ except Exception as e:
219
+ print(f"Error processing row: {e}")
220
+ continue
221
+
222
+ # If no data found, add a debug entry
223
+ if not data["bins"]:
224
+ print("No bin collection data found. Page source:")
225
+ print(driver.page_source[:1000]) # Print first 1000 chars for debugging
93
226
 
94
- # Parse the date from the string
95
- parsed_date = datetime.strptime(collection_day, "%a, %d %B")
96
- if parsed_date < datetime(
97
- parsed_date.year, parsed_date.month, parsed_date.day
98
- ):
99
- parsed_date = parsed_date.replace(year=current_year + 1)
100
- else:
101
- parsed_date = parsed_date.replace(year=current_year)
102
- # Format the date as %d/%m/%Y
103
- formatted_date = parsed_date.strftime("%d/%m/%Y")
104
-
105
- # Add the bin type and collection date to the 'data' dictionary
106
- data["bins"].append(
107
- {"type": container_type, "collectionDate": formatted_date}
108
- )
109
227
  except Exception as e:
110
228
  # Here you can log the exception if needed
111
229
  print(f"An error occurred: {e}")
230
+ print(f"Full address used: {full_address}")
231
+ print(f"Page URL: {page}")
232
+ # Add some debug information
233
+ if driver:
234
+ print(f"Current page title: {driver.title}")
235
+ print(f"Current URL: {driver.current_url}")
112
236
  # Optionally, re-raise the exception if you want it to propagate
113
237
  raise
114
238
  finally:
115
239
  # This block ensures that the driver is closed regardless of an exception
116
240
  if driver:
117
241
  driver.quit()
118
- return data
242
+ return data
@@ -18,6 +18,10 @@ class CouncilClass(AbstractGetBinDataClass):
18
18
 
19
19
  bindata = {"bins": []}
20
20
  curr_date = datetime.today()
21
+
22
+ headers = {
23
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
24
+ }
21
25
 
22
26
  soup = BeautifulSoup(page.content, features="html.parser")
23
27
  button = soup.find(
@@ -25,10 +29,10 @@ class CouncilClass(AbstractGetBinDataClass):
25
29
  text="Find out which bin will be collected when and sign up for a free email reminder.",
26
30
  )
27
31
 
28
- if button["href"]:
32
+ if button and button.get("href"):
29
33
  URI = button["href"]
30
34
  # Make the GET request
31
- response = requests.get(URI)
35
+ response = requests.get(URI, headers=headers)
32
36
  soup = BeautifulSoup(response.content, features="html.parser")
33
37
  divs = soup.find_all("div", {"class": "editor"})
34
38
  for div in divs:
@@ -22,6 +22,15 @@ class CouncilClass(AbstractGetBinDataClass):
22
22
  "Next brown bin collection": "Brown Bin",
23
23
  "Next food bin collection": "Food Bin",
24
24
  }
25
+ HEADERS = {
26
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
27
+ "Accept-Language": "en-GB,en;q=0.9",
28
+ "Connection": "keep-alive",
29
+ "Host": "www.midlothian.gov.uk",
30
+ "Referer": "https://www.midlothian.gov.uk/info/200284/bins_and_recycling",
31
+ "Upgrade-Insecure-Requests": "1",
32
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
33
+ }
25
34
 
26
35
  def parse_data(self, page: str, **kwargs) -> dict:
27
36
 
@@ -46,7 +55,7 @@ class CouncilClass(AbstractGetBinDataClass):
46
55
  search_url = self.DIRECTORY_URL.format(quote(postcode))
47
56
 
48
57
  try:
49
- search_results_html = requests.get(search_url)
58
+ search_results_html = requests.get(search_url, headers=self.HEADERS)
50
59
  search_results_html.raise_for_status()
51
60
 
52
61
  soup = BeautifulSoup(search_results_html.text, "html.parser")
@@ -93,7 +102,7 @@ class CouncilClass(AbstractGetBinDataClass):
93
102
  next_page_url = next_page_link["href"]
94
103
 
95
104
  # Send a GET request to the next page
96
- next_response = requests.get(next_page_url)
105
+ next_response = requests.get(next_page_url, headers=self.HEADERS)
97
106
  next_response.raise_for_status() # Raise an exception for HTTP errors
98
107
 
99
108
  # Parse the HTML content of the next page
@@ -116,7 +125,7 @@ class CouncilClass(AbstractGetBinDataClass):
116
125
  def _fetch_bin_collection_data(self, url: str) -> list:
117
126
  """Fetch and parse bin collection data from the given URL."""
118
127
  try:
119
- bin_collection_html = requests.get(url)
128
+ bin_collection_html = requests.get(url, headers=self.HEADERS)
120
129
  bin_collection_html.raise_for_status()
121
130
 
122
131
  soup = BeautifulSoup(bin_collection_html.text, "html.parser")
@@ -23,7 +23,8 @@ class CouncilClass(AbstractGetBinDataClass):
23
23
  URI = f"https://www.newcastle-staffs.gov.uk/homepage/97/check-your-bin-day?uprn={user_uprn}"
24
24
 
25
25
  # Make the GET request
26
- response = requests.get(URI)
26
+ request_headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
27
+ response = requests.get(URI, headers=request_headers)
27
28
  response.raise_for_status()
28
29
  soup = BeautifulSoup(response.text, features="html.parser")
29
30
  soup.prettify()
@@ -22,12 +22,18 @@ class CouncilClass(AbstractGetBinDataClass):
22
22
  check_paon(user_paon)
23
23
  bindata = {"bins": []}
24
24
 
25
+ headers = {
26
+ "Origin": "https://www.royalgreenwich.gov.uk/",
27
+ "Referer": "https://www.royalgreenwich.gov.uk/info/200171/recycling_and_rubbish/100/bin_collection_days",
28
+ "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64)",
29
+ }
30
+
25
31
  user_postcode = user_postcode.replace(" ", "+")
26
32
 
27
33
  URI = f"https://www.royalgreenwich.gov.uk/site/custom_scripts/apps/waste-collection/new2023/source.php?term={user_postcode}"
28
34
 
29
35
  # Make the GET request
30
- response = requests.get(URI)
36
+ response = requests.get(URI, headers=headers)
31
37
 
32
38
  for address in response.json():
33
39
  if user_paon in address:
@@ -38,7 +44,7 @@ class CouncilClass(AbstractGetBinDataClass):
38
44
 
39
45
  data = {"address": collection_address}
40
46
 
41
- response = requests.post(URI, data=data)
47
+ response = requests.post(URI, data=data, headers=headers)
42
48
 
43
49
  response = response.json()
44
50
 
@@ -95,6 +95,28 @@ class CouncilClass(AbstractGetBinDataClass):
95
95
  }
96
96
  data["bins"].append(dict_data)
97
97
 
98
+ # Extract communal food waste collection information
99
+ comfood_section = soup.find(
100
+ "div", {"aria-labelledby": "communalFoodCollectionTitle"}
101
+ )
102
+ if comfood_section:
103
+ comfood_title = comfood_section.find(
104
+ "p", {"id": "communalFoodCollectionTitle"}
105
+ ).text
106
+ comfood_next_collection = (
107
+ comfood_section.find(text=lambda text: "Next collection" in text)
108
+ .strip()
109
+ .split(": ")[1]
110
+ )
111
+
112
+ dict_data = {
113
+ "type": comfood_title,
114
+ "collectionDate": datetime.strptime(
115
+ comfood_next_collection, "%a, %d %B %Y"
116
+ ).strftime("%d/%m/%Y"),
117
+ }
118
+ data["bins"].append(dict_data)
119
+
98
120
  comrec_section = soup.find(
99
121
  "div", {"aria-labelledby": "recyclingCommunalCollectionTitle"}
100
122
  )
@@ -137,4 +159,4 @@ class CouncilClass(AbstractGetBinDataClass):
137
159
  }
138
160
  data["bins"].append(dict_data)
139
161
 
140
- return data
162
+ return data
@@ -22,9 +22,10 @@ class CouncilClass(AbstractGetBinDataClass):
22
22
  bindata = {"bins": []}
23
23
 
24
24
  URI = f"https://www.swindon.gov.uk/info/20122/rubbish_and_recycling_collection_days?addressList={user_uprn}&uprnSubmit=Yes"
25
+ headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64)"}
25
26
 
26
27
  # Make the GET request
27
- response = requests.get(URI)
28
+ response = requests.get(URI, headers=headers)
28
29
 
29
30
  # Parse the JSON response
30
31
  soup = BeautifulSoup(response.text, "html.parser")
@@ -36,7 +36,10 @@ class CouncilClass(AbstractGetBinDataClass):
36
36
  if s.get_text(strip=True).lower() == "bin collections":
37
37
  rows = s.find_next_sibling(
38
38
  "div", {"class": "c-content-section_body"}
39
- ).find_all("div", class_="tablet:l-col-fb-4 u-mt-10")
39
+ ).find_all(
40
+ "div",
41
+ class_=lambda x: x and "tablet:l-col-fb-4" in x and "u-mt-10" in x
42
+ )
40
43
 
41
44
  for row in rows:
42
45
  title_elem = row.find("div", class_="u-mb-4")