uk_bin_collection 0.152.11__py3-none-any.whl → 0.154.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. uk_bin_collection/tests/input.json +16 -21
  2. uk_bin_collection/uk_bin_collection/councils/BCPCouncil.py +45 -120
  3. uk_bin_collection/uk_bin_collection/councils/BasingstokeCouncil.py +4 -1
  4. uk_bin_collection/uk_bin_collection/councils/BoltonCouncil.py +1 -1
  5. uk_bin_collection/uk_bin_collection/councils/BrightonandHoveCityCouncil.py +15 -36
  6. uk_bin_collection/uk_bin_collection/councils/BuckinghamshireCouncil.py +75 -100
  7. uk_bin_collection/uk_bin_collection/councils/CastlepointDistrictCouncil.py +55 -24
  8. uk_bin_collection/uk_bin_collection/councils/EastHertsCouncil.py +82 -24
  9. uk_bin_collection/uk_bin_collection/councils/ErewashBoroughCouncil.py +32 -34
  10. uk_bin_collection/uk_bin_collection/councils/FarehamBoroughCouncil.py +5 -2
  11. uk_bin_collection/uk_bin_collection/councils/FolkstoneandHytheDistrictCouncil.py +22 -0
  12. uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py +1 -1
  13. uk_bin_collection/uk_bin_collection/councils/HartlepoolBoroughCouncil.py +3 -1
  14. uk_bin_collection/uk_bin_collection/councils/HinckleyandBosworthBoroughCouncil.py +7 -1
  15. uk_bin_collection/uk_bin_collection/councils/IpswichBoroughCouncil.py +3 -1
  16. uk_bin_collection/uk_bin_collection/councils/LichfieldDistrictCouncil.py +7 -1
  17. uk_bin_collection/uk_bin_collection/councils/NorthEastLincs.py +17 -6
  18. uk_bin_collection/uk_bin_collection/councils/NorthHertfordshireDistrictCouncil.py +26 -128
  19. uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py +63 -79
  20. uk_bin_collection/uk_bin_collection/councils/NorwichCityCouncil.py +67 -66
  21. uk_bin_collection/uk_bin_collection/councils/NuneatonBedworthBoroughCouncil.py +19 -7
  22. uk_bin_collection/uk_bin_collection/councils/RunnymedeBoroughCouncil.py +7 -1
  23. uk_bin_collection/uk_bin_collection/councils/RushmoorCouncil.py +4 -2
  24. uk_bin_collection/uk_bin_collection/councils/SandwellBoroughCouncil.py +4 -11
  25. uk_bin_collection/uk_bin_collection/councils/SloughBoroughCouncil.py +39 -21
  26. uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py +4 -0
  27. uk_bin_collection/uk_bin_collection/councils/WestBerkshireCouncil.py +16 -13
  28. uk_bin_collection/uk_bin_collection/councils/WiltshireCouncil.py +47 -29
  29. {uk_bin_collection-0.152.11.dist-info → uk_bin_collection-0.154.0.dist-info}/METADATA +1 -1
  30. {uk_bin_collection-0.152.11.dist-info → uk_bin_collection-0.154.0.dist-info}/RECORD +33 -33
  31. {uk_bin_collection-0.152.11.dist-info → uk_bin_collection-0.154.0.dist-info}/LICENSE +0 -0
  32. {uk_bin_collection-0.152.11.dist-info → uk_bin_collection-0.154.0.dist-info}/WHEEL +0 -0
  33. {uk_bin_collection-0.152.11.dist-info → uk_bin_collection-0.154.0.dist-info}/entry_points.txt +0 -0
@@ -26,7 +26,9 @@ class CouncilClass(AbstractGetBinDataClass):
26
26
  uprn = kwargs.get("uprn")
27
27
  check_uprn(uprn)
28
28
 
29
- post_url = "https://apps.castlepoint.gov.uk/cpapps/index.cfm?fa=myStreet.displayDetails"
29
+ base_url = "https://apps.castlepoint.gov.uk/cpapps/"
30
+
31
+ post_url = f"{base_url}index.cfm?fa=myStreet.displayDetails"
30
32
  post_header_str = (
31
33
  "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,"
32
34
  "image/apng,"
@@ -51,31 +53,60 @@ class CouncilClass(AbstractGetBinDataClass):
51
53
  soup = BeautifulSoup(post_response.text, features="html.parser")
52
54
  soup.prettify()
53
55
 
56
+ calMonthNext = f"{base_url}{soup.select_one("div.calMonthNext a")["href"]}"
57
+ nextmonth_response = requests.post(
58
+ calMonthNext, headers=post_headers, data=form_data, verify=False
59
+ )
60
+ soup_nextmonth = BeautifulSoup(nextmonth_response.text, features="html.parser")
61
+ soup_nextmonth.prettify()
62
+
54
63
  data = {"bins": []}
55
- collection_tuple = []
56
64
 
57
- calendar = soup.find("table", class_="calendar")
58
- month = datetime.strptime(
59
- soup.find("div", class_="calMonthCurrent").get_text(), "[%b]"
60
- ).strftime("%m")
61
- year = datetime.strptime(
62
- soup.find("h1").get_text(), "About my Street - %B %Y"
63
- ).strftime("%Y")
64
-
65
- pink_days = [
66
- day.get_text().strip() for day in calendar.find_all("td", class_="pink")
67
- ]
68
- black_days = [
69
- day.get_text().strip() for day in calendar.find_all("td", class_="normal")
70
- ]
71
-
72
- for day in pink_days:
73
- collection_date = datetime(year=int(year), month=int(month), day=int(day))
74
- collection_tuple.append(("Pink collection", collection_date))
75
-
76
- for day in black_days:
77
- collection_date = datetime(year=int(year), month=int(month), day=int(day))
78
- collection_tuple.append(("Normal collection", collection_date))
65
+ def parse_calendar_month(soup_one_month):
66
+ out = []
67
+
68
+ calendar = soup_one_month.find("table", class_="calendar")
69
+ if not calendar:
70
+ return out # be robust
71
+
72
+ # e.g. "[Aug]"
73
+ month_txt = soup_one_month.find("div", class_="calMonthCurrent").get_text(
74
+ strip=True
75
+ )
76
+ month = datetime.strptime(month_txt, "[%b]").strftime("%m")
77
+
78
+ # e.g. "About my Street - August 2025"
79
+ year_txt = soup_one_month.find("h1").get_text(strip=True)
80
+ year = datetime.strptime(year_txt, "About my Street - %B %Y").strftime("%Y")
81
+
82
+ pink_days = [
83
+ td.get_text(strip=True) for td in calendar.find_all("td", class_="pink")
84
+ ]
85
+ black_days = [
86
+ td.get_text(strip=True)
87
+ for td in calendar.find_all("td", class_="normal")
88
+ ]
89
+
90
+ for day in pink_days:
91
+ out.append(
92
+ (
93
+ "Pink collection",
94
+ datetime(year=int(year), month=int(month), day=int(day)),
95
+ )
96
+ )
97
+ for day in black_days:
98
+ out.append(
99
+ (
100
+ "Normal collection",
101
+ datetime(year=int(year), month=int(month), day=int(day)),
102
+ )
103
+ )
104
+
105
+ return out
106
+
107
+ collection_tuple = []
108
+ for s in (soup, soup_nextmonth):
109
+ collection_tuple.extend(parse_calendar_month(s))
79
110
 
80
111
  ordered_data = sorted(collection_tuple, key=lambda x: x[1])
81
112
 
@@ -1,11 +1,13 @@
1
- import json
1
+ import time
2
+
2
3
  import requests
3
- from datetime import datetime
4
+ from dateutil.relativedelta import relativedelta
4
5
 
5
6
  from uk_bin_collection.uk_bin_collection.common import *
6
7
  from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
7
8
 
8
9
 
10
+ # import the wonderful Beautiful Soup and the URL grabber
9
11
  class CouncilClass(AbstractGetBinDataClass):
10
12
  """
11
13
  Concrete classes have to implement all abstract operations of the
@@ -14,28 +16,84 @@ class CouncilClass(AbstractGetBinDataClass):
14
16
  """
15
17
 
16
18
  def parse_data(self, page: str, **kwargs) -> dict:
17
- user_uprn = kwargs.get("uprn")
18
- check_uprn(user_uprn)
19
+ # Make a BS4 object
20
+ uprn = kwargs.get("uprn")
21
+ # usrn = kwargs.get("paon")
22
+ check_uprn(uprn)
23
+ # check_usrn(usrn)
19
24
  bindata = {"bins": []}
20
-
21
- # Make API request
22
- api_url = f"https://east-herts.co.uk/api/services/{user_uprn}"
23
- response = requests.get(api_url)
24
- response.raise_for_status()
25
-
26
- data = response.json()
27
- today = datetime.now().date()
28
-
29
- for service in data.get("services", []):
30
- collection_date_str = service.get("collectionDate")
31
- if collection_date_str:
32
- collection_date = datetime.strptime(collection_date_str, "%Y-%m-%d").date()
33
- # Only include future dates
34
- if collection_date >= today:
35
- dict_data = {
36
- "type": service.get("binType", ""),
37
- "collectionDate": collection_date.strftime("%d/%m/%Y"),
25
+
26
+ # uprn = uprn.zfill(12)
27
+
28
+ SESSION_URL = "https://eastherts-self.achieveservice.com/authapi/isauthenticated?uri=https%253A%252F%252Feastherts-self.achieveservice.com%252FAchieveForms%252F%253Fmode%253Dfill%2526consentMessage%253Dyes%2526form_uri%253Dsandbox-publish%253A%252F%252FAF-Process-98782935-6101-4962-9a55-5923e76057b6%252FAF-Stage-dcd0ec18-dfb4-496a-a266-bd8fadaa28a7%252Fdefinition.json%2526process%253D1%2526process_uri%253Dsandbox-processes%253A%252F%252FAF-Process-98782935-6101-4962-9a55-5923e76057b6%2526process_id%253DAF-Process-98782935-6101-4962-9a55-5923e76057b6&hostname=eastherts-self.achieveservice.com&withCredentials=true"
29
+
30
+ API_URL = "https://eastherts-self.achieveservice.com/apibroker/runLookup"
31
+
32
+ headers = {
33
+ "Content-Type": "application/json",
34
+ "Accept": "*/*",
35
+ "User-Agent": "Mozilla/5.0",
36
+ "X-Requested-With": "XMLHttpRequest",
37
+ "Referer": "https://eastherts-self.achieveservice.com/fillform/?iframe_id=fillform-frame-1&db_id=",
38
+ }
39
+ s = requests.session()
40
+ r = s.get(SESSION_URL)
41
+ r.raise_for_status()
42
+ session_data = r.json()
43
+ sid = session_data["auth-session"]
44
+ params = {
45
+ # unix_timestamp
46
+ "_": str(int(time.time() * 1000)),
47
+ "sid": sid,
48
+ }
49
+
50
+ params = {
51
+ "id": "683d9ff0e299d",
52
+ "repeat_against": "",
53
+ "noRetry": "true",
54
+ "getOnlyTokens": "undefined",
55
+ "log_id": "",
56
+ "app_name": "AF-Renderer::Self",
57
+ # unix_timestamp
58
+ "_": str(int(time.time() * 1000)),
59
+ "sid": sid,
60
+ }
61
+
62
+ data = {
63
+ "formValues": {
64
+ "Collection Days": {
65
+ "inputUPRN": {
66
+ "value": uprn,
38
67
  }
39
- bindata["bins"].append(dict_data)
40
-
68
+ },
69
+ }
70
+ }
71
+
72
+ r = s.post(API_URL, json=data, headers=headers, params=params)
73
+ r.raise_for_status()
74
+
75
+ data = r.json()
76
+ rows_data = data["integration"]["transformed"]["rows_data"]["0"]
77
+ if not isinstance(rows_data, dict):
78
+ raise ValueError("Invalid data returned from API")
79
+
80
+ # Extract each service's relevant details for the bin schedule
81
+ for key, value in rows_data.items():
82
+ if key.endswith("NextDate"):
83
+ BinType = key.replace("NextDate", "ServiceName")
84
+ for key2, value2 in rows_data.items():
85
+ if key2 == BinType:
86
+ BinType = value2
87
+ next_collection = datetime.strptime(
88
+ remove_ordinal_indicator_from_date_string(value), "%A %d %B"
89
+ ).replace(year=datetime.now().year)
90
+ if datetime.now().month == 12 and next_collection.month == 1:
91
+ next_collection = next_collection + relativedelta(years=1)
92
+
93
+ dict_data = {
94
+ "type": BinType,
95
+ "collectionDate": next_collection.strftime(date_format),
96
+ }
97
+ bindata["bins"].append(dict_data)
98
+
41
99
  return bindata
@@ -1,4 +1,7 @@
1
+ import json
2
+
1
3
  from bs4 import BeautifulSoup
4
+
2
5
  from uk_bin_collection.uk_bin_collection.common import *
3
6
  from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
4
7
 
@@ -16,46 +19,41 @@ class CouncilClass(AbstractGetBinDataClass):
16
19
  uprn = kwargs.get("uprn")
17
20
  check_uprn(uprn)
18
21
 
22
+ label_map = {
23
+ "domestic-waste-collection-service": "Household Waste",
24
+ "recycling-collection-service": "Recycling",
25
+ "garden-waste-collection-service": "Garden Waste",
26
+ }
27
+
19
28
  requests.packages.urllib3.disable_warnings()
20
29
  response = requests.get(
21
- f"https://map.erewash.gov.uk/isharelive.web/myerewash.aspx?action=SetAddress&UniqueId={uprn}",
30
+ f"https://www.erewash.gov.uk/bbd-whitespace/one-year-collection-dates-without-christmas?uprn={uprn}",
22
31
  headers={"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64)"},
23
32
  )
33
+ # Parse the JSON response
34
+ payload = response.json()
35
+ bin_collection = json.loads(payload) if isinstance(payload, str) else payload
24
36
 
25
- soup = BeautifulSoup(response.text, features="html.parser")
26
- collections = soup.find("div", {"aria-label": "Waste Collection"}).find_all(
27
- "div", {"class": "atPanelContent"}
37
+ cd = next(
38
+ i["settings"]["collection_dates"]
39
+ for i in bin_collection
40
+ if i.get("command") == "settings"
28
41
  )
29
- for c in collections:
30
- bin_type = c.find("h4").get_text(strip=True)
31
- if "my next" in bin_type.lower():
32
- collection_info = c.find("div", {"class": "atPanelData"}).get_text(
33
- strip=True
34
- )
35
- results = re.search(
36
- "([A-Za-z]+ \\d+[A-Za-z]+ [A-Za-z]+ \\d*)", collection_info
42
+
43
+ for month in cd.values():
44
+ for e in month:
45
+ d = e["date"] # "YYYY-MM-DD"
46
+ label = label_map.get(
47
+ e.get("service-identifier"),
48
+ e.get("service") or e.get("service-identifier"),
37
49
  )
38
- if results:
39
- collection_date = datetime.strptime(
40
- remove_ordinal_indicator_from_date_string(results[1]).strip(),
41
- "%A %d %B %Y",
42
- ).strftime(date_format)
43
- dict_data = {
44
- "type": bin_type.replace("My Next ", "").replace(
45
- " Collection", ""
46
- ),
47
- "collectionDate": collection_date,
48
- }
49
- data["bins"].append(dict_data)
50
- if "garden waste" in collection_info.lower():
51
- dict_data = {
52
- "type": "Garden Waste",
53
- "collectionDate": collection_date,
54
- }
55
- data["bins"].append(dict_data)
56
-
57
- data["bins"].sort(
58
- key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
59
- )
50
+
51
+ dict_data = {
52
+ "type": label,
53
+ "collectionDate": datetime.strptime(d, "%Y-%m-%d").strftime(
54
+ date_format
55
+ ),
56
+ }
57
+ data["bins"].append(dict_data)
60
58
 
61
59
  return data
@@ -38,11 +38,14 @@ class CouncilClass(AbstractGetBinDataClass):
38
38
  if "rows" in bin_data:
39
39
  collection_str = bin_data["rows"][0]["DomesticBinDay"]
40
40
 
41
- results = re.findall(r"(\d\d?\/\d\d?\/\d{4}) \((\w*)\)", collection_str)
41
+ results = re.findall(r'(\d{1,2}/\d{1,2}/\d{4}|today)\s*\(([^)]+)\)', collection_str)
42
42
 
43
43
  if results:
44
44
  for result in results:
45
- collection_date = datetime.strptime(result[0], "%d/%m/%Y")
45
+ if (result[0] == "today"):
46
+ collection_date = datetime.today()
47
+ else:
48
+ collection_date = datetime.strptime(result[0], "%d/%m/%Y")
46
49
  dict_data = {
47
50
  "type": result[1],
48
51
  "collectionDate": collection_date.strftime(date_format),
@@ -74,6 +74,28 @@ class CouncilClass(AbstractGetBinDataClass):
74
74
  }
75
75
  bindata["bins"].append(dict_data)
76
76
 
77
+ # Extract the Garden Waste schedule
78
+ garden_waste_section = soup.find(
79
+ "span", text=lambda x: x and "Garden Waste" in x
80
+ )
81
+ if garden_waste_section:
82
+ bin_types = garden_waste_section.text.replace("Garden Waste: ", "").split(
83
+ " / "
84
+ )
85
+ garden_waste_dates = garden_waste_section.find_next("ul").find_all("li")
86
+ for date in garden_waste_dates:
87
+ for bin_type in bin_types:
88
+ dict_data = {
89
+ "type": bin_type.strip(),
90
+ "collectionDate": datetime.strptime(
91
+ remove_ordinal_indicator_from_date_string(
92
+ date.text.strip()
93
+ ),
94
+ "%A %d %B %Y",
95
+ ).strftime("%d/%m/%Y"),
96
+ }
97
+ bindata["bins"].append(dict_data)
98
+
77
99
  bindata["bins"].sort(
78
100
  key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
79
101
  )
@@ -18,7 +18,7 @@ class CouncilClass(AbstractGetBinDataClass):
18
18
  try:
19
19
  user_uprn = kwargs.get("uprn")
20
20
  check_uprn(user_uprn)
21
- url = f"https://onlineservices.glasgow.gov.uk/forms/RefuseAndRecyclingWebApplication/CollectionsCalendar.aspx?UPRN={user_uprn}"
21
+ url = f"https://onlineservices.glasgow.gov.uk/forms/refuseandrecyclingcalendar/CollectionsCalendar.aspx?UPRN={user_uprn}"
22
22
  if not user_uprn:
23
23
  # This is a fallback for if the user stored a URL in old system. Ensures backwards compatibility.
24
24
  url = kwargs.get("url")
@@ -73,7 +73,9 @@ class CouncilClass(AbstractGetBinDataClass):
73
73
  for div in soup.find_all("div"):
74
74
  # Extract bin type and date from the span tag
75
75
  text = div.find("span").text.strip()
76
- bin_type, date = text.split(" ", 1)
76
+ parts = text.split(" ")
77
+ date = parts[-1] # assume the last token is the date
78
+ bin_type = " ".join(parts[:-1])
77
79
  dict_data = {
78
80
  "type": bin_type,
79
81
  "collectionDate": date,
@@ -20,10 +20,16 @@ class CouncilClass(AbstractGetBinDataClass):
20
20
  check_uprn(user_uprn)
21
21
  bindata = {"bins": []}
22
22
 
23
+ headers = {
24
+ "Origin": "https://www.hinckley-bosworth.gov.uk",
25
+ "Referer": "https://www.hinckley-bosworth.gov.uk",
26
+ "User-Agent": "Mozilla/5.0",
27
+ }
28
+
23
29
  URI = f"https://www.hinckley-bosworth.gov.uk/set-location?id={user_uprn}&redirect=refuse&rememberloc="
24
30
 
25
31
  # Make the GET request
26
- response = requests.get(URI)
32
+ response = requests.get(URI, headers=headers)
27
33
 
28
34
  # Parse the HTML
29
35
  soup = BeautifulSoup(response.content, "html.parser")
@@ -31,7 +31,9 @@ class CouncilClass(AbstractGetBinDataClass):
31
31
  IBC_ENDPOINT = "https://app.ipswich.gov.uk/bin-collection/"
32
32
 
33
33
  def transform_date(self, date_str):
34
- date_str = re.sub(r"(st|nd|rd|th)", "", date_str) # Remove ordinal suffixes
34
+ date_str = re.sub(
35
+ r"(\d{1,2})(st|nd|rd|th)", r"\1", date_str
36
+ ) # Remove ordinal suffixes
35
37
  date_obj = datetime.strptime(date_str, "%A %d %B %Y")
36
38
  return date_obj.strftime(date_format)
37
39
 
@@ -24,10 +24,16 @@ class CouncilClass(AbstractGetBinDataClass):
24
24
  def solve(s):
25
25
  return re.sub(r"(\d)(st|nd|rd|th)", r"\1", s)
26
26
 
27
+ headers = {
28
+ "Origin": "https://www.lichfielddc.gov.uk",
29
+ "Referer": "https://www.lichfielddc.gov.uk",
30
+ "User-Agent": "Mozilla/5.0",
31
+ }
32
+
27
33
  URI = f"https://www.lichfielddc.gov.uk/homepage/6/bin-collection-dates?uprn={user_uprn}"
28
34
 
29
35
  # Make the GET request
30
- response = requests.get(URI)
36
+ response = requests.get(URI, headers=headers)
31
37
 
32
38
  soup = BeautifulSoup(response.text, "html.parser")
33
39
 
@@ -1,5 +1,7 @@
1
1
  import pandas as pd
2
+ import requests
2
3
  from bs4 import BeautifulSoup
4
+
3
5
  from uk_bin_collection.uk_bin_collection.common import date_format
4
6
  from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
5
7
 
@@ -12,15 +14,26 @@ class CouncilClass(AbstractGetBinDataClass):
12
14
  """
13
15
 
14
16
  def parse_data(self, page: str, **kwargs) -> dict:
15
- # Make a BS4 object
16
- soup = BeautifulSoup(page.text, features="html.parser")
17
+ user_url = kwargs.get("url")
18
+
19
+ headers = {
20
+ "Origin": "https://www.nelincs.gov.uk",
21
+ "Referer": "https://www.nelincs.gov.uk",
22
+ "User-Agent": "Mozilla/5.0",
23
+ }
24
+
25
+ # Make the GET request
26
+ response = requests.get(user_url, headers=headers)
27
+
28
+ # Parse the HTML
29
+ soup = BeautifulSoup(response.content, "html.parser")
17
30
  soup.prettify()
18
31
 
19
32
  data = {"bins": []}
20
33
 
21
34
  # Get list items that can be seen on page
22
35
  for element in soup.find_all(
23
- "li", {"class": "list-group-item p-0 p-3 bin-collection-item"}
36
+ "li", {"class": "border-0 list-group-item p-3 bg-light rounded p-2"}
24
37
  ):
25
38
  element_text = element.text.strip().split("\n\n")
26
39
  element_text = [x.strip() for x in element_text]
@@ -35,9 +48,7 @@ class CouncilClass(AbstractGetBinDataClass):
35
48
  data["bins"].append(dict_data)
36
49
 
37
50
  # Get hidden list items too
38
- for element in soup.find_all(
39
- "li", {"class": "list-group-item p-0 p-3 bin-collection-item d-none"}
40
- ):
51
+ for element in soup.find_all("li", {"class": "border-0 list-group-item p-3"}):
41
52
  element_text = element.text.strip().split("\n\n")
42
53
  element_text = [x.strip() for x in element_text]
43
54
 
@@ -125,23 +125,6 @@ class CouncilClass(AbstractGetBinDataClass):
125
125
  # Wait for the page to load - giving it extra time
126
126
  time.sleep(5)
127
127
 
128
- # Use only the selector that we know works
129
- # print("Looking for bin type elements...")
130
- try:
131
- bin_type_selector = (
132
- By.CSS_SELECTOR,
133
- "div.formatting_bold.formatting_size_bigger.formatting span.value-as-text",
134
- )
135
- WebDriverWait(driver, 15).until(
136
- EC.presence_of_element_located(bin_type_selector)
137
- )
138
- # print(f"Found bin type elements with selector: {bin_type_selector}")
139
- except TimeoutException:
140
- # print("Could not find bin type elements. Taking screenshot for debugging...")
141
- screenshot_path = f"bin_type_error_{int(time.time())}.png"
142
- driver.save_screenshot(screenshot_path)
143
- # print(f"Screenshot saved to {screenshot_path}")
144
-
145
128
  # Create BS4 object from driver's page source
146
129
  # print("Parsing page with BeautifulSoup...")
147
130
  soup = BeautifulSoup(driver.page_source, features="html.parser")
@@ -149,122 +132,37 @@ class CouncilClass(AbstractGetBinDataClass):
149
132
  # Initialize data dictionary
150
133
  data = {"bins": []}
151
134
 
152
- # Looking for bin types in the exact HTML structure
153
- bin_type_elements = soup.select(
154
- "div.page_cell.contains_widget:first-of-type div.formatting_bold.formatting_size_bigger.formatting span.value-as-text"
155
- )
156
- # print(f"Found {len(bin_type_elements)} bin type elements")
157
-
158
- # Look specifically for date elements with the exact structure
159
- date_elements = soup.select("div.col-sm-12.font-xs-3xl span.value-as-text")
160
- hidden_dates = soup.select(
161
- "div.col-sm-12.font-xs-3xl input[type='hidden'][value*='/']"
162
- )
163
-
164
- # print(f"Found {len(bin_type_elements)} bin types and {len(date_elements)} date elements")
165
-
166
- # We need a smarter way to match bin types with their dates
167
- bin_count = 0
135
+ for row in soup.select(".listing_template_row"):
136
+ # Title (waste stream) is the first <p> in the section
137
+ first_p = row.find("p")
138
+ if not first_p:
139
+ continue
140
+ stream = first_p.get_text(" ", strip=True)
168
141
 
169
- # Map of bin types to their collection dates
170
- bin_date_map = {}
142
+ for p in row.find_all("p"):
143
+ t = p.get_text("\n", strip=True)
171
144
 
172
- # Extract all date strings that look like actual dates
173
- date_texts = []
174
- date_pattern = re.compile(
175
- r"(?:Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)\s+\d+(?:st|nd|rd|th)?\s+\w+\s+\d{4}",
176
- re.IGNORECASE,
177
- )
145
+ if re.search(r"\bNext collection\b", t, flags=re.I):
146
+ # Expect format: "Next collection\nTuesday 16th September 2025"
147
+ parts = [x.strip() for x in t.split("\n") if x.strip()]
148
+ if len(parts) >= 2:
149
+ next_collection_display = parts[-1] # last line
178
150
 
179
- for element in date_elements:
180
- text = element.get_text(strip=True)
181
- if date_pattern.search(text):
182
- date_texts.append(text)
183
- # print(f"Found valid date text: {text}")
184
-
185
- # Find hidden date inputs with values in DD/MM/YYYY format
186
- hidden_date_values = []
187
- for hidden in hidden_dates:
188
- value = hidden.get("value", "")
189
- if re.match(r"\d{1,2}/\d{1,2}/\d{4}", value):
190
- hidden_date_values.append(value)
191
- # print(f"Found hidden date value: {value}")
192
-
193
- # When filtering date elements
194
- date_elements = soup.select("div.col-sm-12.font-xs-3xl span.value-as-text")
195
- valid_date_elements = []
196
-
197
- for element in date_elements:
198
- text = element.get_text(strip=True)
199
- if contains_date(text):
200
- valid_date_elements.append(element)
201
- # print(f"Found valid date element: {text}")
202
- else:
203
- pass
204
- # print(f"Skipping non-date element: {text}")
205
-
206
- # print(f"Found {len(bin_type_elements)} bin types and {len(valid_date_elements)} valid date elements")
207
-
208
- # When processing each bin type
209
- for i, bin_type_elem in enumerate(bin_type_elements):
210
- bin_type = bin_type_elem.get_text(strip=True)
211
-
212
- # Try to find a date for this bin type
213
- date_text = None
214
-
215
- # Look for a valid date element
216
- if i < len(valid_date_elements):
217
- date_elem = valid_date_elements[i]
218
- date_text = date_elem.get_text(strip=True)
219
-
220
- # If we don't have a valid date yet, try using the hidden input
221
- if not date_text or not contains_date(date_text):
222
- if i < len(hidden_dates):
223
- date_value = hidden_dates[i].get("value")
224
- if contains_date(date_value):
225
- date_text = date_value
226
-
227
- # Skip if we don't have a valid date
228
- if not date_text or not contains_date(date_text):
229
- # print(f"No valid date found for bin type: {bin_type}")
230
- continue
151
+ # Build record
152
+ next_date = datetime.strptime(
153
+ remove_ordinal_indicator_from_date_string(next_collection_display),
154
+ "%A %d %B %Y",
155
+ )
231
156
 
232
- # print(f"Found bin type: {bin_type} with date: {date_text}")
157
+ # Create bin entry
158
+ bin_entry = {
159
+ "type": stream,
160
+ "collectionDate": next_date.strftime(date_format),
161
+ }
233
162
 
234
- try:
235
- # Clean up the date text
236
- date_text = remove_ordinal_indicator_from_date_string(date_text)
237
-
238
- # Try to parse the date
239
- try:
240
- collection_date = datetime.strptime(
241
- date_text, "%A %d %B %Y"
242
- ).date()
243
- except ValueError:
244
- try:
245
- collection_date = datetime.strptime(
246
- date_text, "%d/%m/%Y"
247
- ).date()
248
- except ValueError:
249
- # Last resort
250
- collection_date = parse(date_text).date()
251
-
252
- # Create bin entry
253
- bin_entry = {
254
- "type": bin_type,
255
- "collectionDate": collection_date.strftime(date_format),
256
- }
257
-
258
- # Add to data
259
- data["bins"].append(bin_entry)
260
- bin_count += 1
261
- # print(f"Added bin entry: {bin_entry}")
262
-
263
- except Exception as e:
264
- pass
265
- # print(f"Error parsing date '{date_text}': {str(e)}")
266
-
267
- # print(f"Successfully parsed {bin_count} bin collections")
163
+ # Add to data
164
+ data["bins"].append(bin_entry)
165
+ # print(f"Added bin entry: {bin_entry}")
268
166
 
269
167
  if not data["bins"]:
270
168
  # print("No bin data found. Saving page for debugging...")