uk_bin_collection 0.158.1__py3-none-any.whl → 0.159.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1757,12 +1757,12 @@
1757
1757
  "LAD24CD": "E06000024"
1758
1758
  },
1759
1759
  "NorthTynesideCouncil": {
1760
- "postcode": "NE26 2TG",
1761
1760
  "skip_get_url": true,
1762
1761
  "uprn": "47097627",
1763
- "url": "https://my.northtyneside.gov.uk/category/81/bin-collection-dates",
1764
- "wiki_name": "North Tyneside",
1765
- "wiki_note": "Pass the postcode and UPRN. You can find the UPRN using [FindMyAddress](https://www.findmyaddress.co.uk/search).",
1762
+ "url": "https://www.northtyneside.gov.uk/waste-collection-schedule",
1763
+ "wiki_command_url_override": "https://www.northtyneside.gov.uk/waste-collection-schedule/view/XXXXXXXX",
1764
+ "wiki_name": "North Tyneside",
1765
+ "wiki_note": "Pass only the UPRN (no postcode). You can find the UPRN using [FindMyAddress](https://www.findmyaddress.co.uk/search).",
1766
1766
  "LAD24CD": "E08000022"
1767
1767
  },
1768
1768
  "NorthWestLeicestershire": {
@@ -2155,13 +2155,11 @@
2155
2155
  "LAD24CD": "E07000140"
2156
2156
  },
2157
2157
  "SouthKestevenDistrictCouncil": {
2158
- "house_number": "2 Althorpe Close, Market Deeping, PE6 8BL",
2159
2158
  "postcode": "PE68BL",
2160
2159
  "skip_get_url": true,
2161
- "url": "https://pre.southkesteven.gov.uk/BinSearch.aspx",
2162
- "web_driver": "http://selenium:4444",
2163
- "wiki_name": "South Kesteven",
2164
- "wiki_note": "Provide your full address in the `house_number` parameter and your postcode in the `postcode` parameter.",
2160
+ "url": "https://pre.southkesteven.gov.uk/skdcNext/tempforms/checkmybin.aspx",
2161
+ "wiki_name": "South Kesteven District Council",
2162
+ "wiki_note": "Provide your postcode in the `postcode` parameter. The scraper uses requests-based form submission and OCR to parse calendar images for accurate bin type determination and green bin collection patterns.",
2165
2163
  "LAD24CD": "E07000141"
2166
2164
  },
2167
2165
  "SouthLanarkshireCouncil": {
@@ -1,138 +1,115 @@
1
- import math
2
- from datetime import *
3
-
4
- import requests
1
+ import logging
2
+ from datetime import datetime
5
3
  from bs4 import BeautifulSoup
6
4
 
7
- from uk_bin_collection.uk_bin_collection.common import *
5
+ from uk_bin_collection.uk_bin_collection.common import check_uprn, date_format
8
6
  from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
9
7
 
8
+ logger = logging.getLogger(__name__)
9
+
10
10
 
11
11
  class CouncilClass(AbstractGetBinDataClass):
12
+ """
13
+ North Tyneside Council bin collection schedule parser
14
+ """
12
15
 
13
16
  def parse_data(self, page: str, **kwargs) -> dict:
17
+ """
18
+ Parse waste collection schedule data for a given UPRN.
19
+
20
+ Args:
21
+ page (str): Unused parameter (required by parent class interface).
22
+ **kwargs: Keyword arguments containing:
23
+ - uprn (str): The Unique Property Reference Number for the property.
24
+
25
+ Returns:
26
+ dict: A dictionary containing:
27
+ - bins (list): A list of dictionaries, each containing:
28
+ - type (str): Bin type and colour in format "Type (Colour)"
29
+ (e.g., "Recycling (Grey)")
30
+ - collectionDate (str): Collection date in the format specified
31
+ by date_format
32
+
33
+ Raises:
34
+ ValueError: If no waste collection schedule is found on the page, indicating
35
+ the page structure may have changed.
36
+ requests.HTTPError: If the HTTP request to fetch the schedule fails.
37
+
38
+ Notes:
39
+ - The method handles bank holiday notifications that may appear in the
40
+ collection type field, extracting only the direct text content.
41
+ - Invalid or unparsable collection entries are logged and skipped.
42
+ - Results are sorted by collection date in ascending order.
43
+ """
44
+ # `page` is unused because we construct the view URL directly.
45
+ del page
14
46
 
15
47
  user_uprn = kwargs.get("uprn")
16
- user_postcode = kwargs.get("postcode")
17
48
  check_uprn(user_uprn)
18
- check_postcode(user_postcode)
19
49
 
20
- requests.packages.urllib3.disable_warnings()
21
- s = requests.Session()
50
+ # Fetch the schedule page (includes UA, verify=False, timeout)
51
+ view_url = f"https://www.northtyneside.gov.uk/waste-collection-schedule/view/{user_uprn}"
52
+ response = self.get_data(view_url)
22
53
 
23
- # Get the first form
24
- response = s.get(
25
- "https://my.northtyneside.gov.uk/category/81/bin-collection-dates",
26
- verify=False,
27
- )
54
+ # Fail fast on HTTP errors
55
+ if getattr(response, "raise_for_status", None):
56
+ response.raise_for_status()
28
57
 
29
- # Find the form ID and submit with a postcode
30
- soup = BeautifulSoup(response.text, features="html.parser")
31
- form_build_id = soup.find("input", {"name": "form_build_id"})["value"]
32
- response = s.post(
33
- "https://my.northtyneside.gov.uk/category/81/bin-collection-dates",
34
- data={
35
- "postcode": user_postcode,
36
- "op": "Find",
37
- "form_build_id": form_build_id,
38
- "form_id": "ntc_address_wizard",
39
- },
40
- verify=False,
41
- )
42
-
43
- # Find the form ID and submit with the UPRN
44
- soup = BeautifulSoup(response.text, features="html.parser")
45
- form_build_id = soup.find("input", {"name": "form_build_id"})["value"]
46
- response = s.post(
47
- "https://my.northtyneside.gov.uk/category/81/bin-collection-dates",
48
- data={
49
- "house_number": f"0000{user_uprn}",
50
- "op": "Use",
51
- "form_build_id": form_build_id,
52
- "form_id": "ntc_address_wizard",
53
- },
54
- verify=False,
55
- )
56
58
 
57
59
  # Parse form page and get the day of week and week offsets
58
60
  soup = BeautifulSoup(response.text, features="html.parser")
59
- info_section = soup.find("section", {"class": "block block-ntc-bins clearfix"})
60
-
61
- regular_day, garden_day, special_day = None, None, None
62
- # Get day of week and week label for refuse, garden and special collections.
63
- # Week label is A or B. Convert that to an int to use as an offset.
64
- for anchor in info_section.findAll("a"):
65
- if anchor.text.startswith("Refuse and Recycling"):
66
- regular_day = anchor.text.strip().split()[-3]
67
- if anchor.text.strip().split()[-1] == "A":
68
- regular_week = 0
69
- else:
70
- regular_week = 1
71
- elif anchor.text.startswith("Garden Waste"):
72
- garden_day = anchor.text.strip().split()[-3]
73
- if anchor.text.strip().split()[-1] == "A":
74
- garden_week = 0
75
- else:
76
- garden_week = 1
77
- for para in info_section.findAll("p"):
78
- if para.text.startswith("Your special collections day"):
79
- special_day = para.find("strong").text.strip()
80
-
81
- # The regular calendar only shows until end of March 2026, work out how many weeks that is
82
- weeks_total = math.floor((datetime(2026, 4, 1) - datetime.now()).days / 7)
83
-
84
- # The garden calendar only shows until end of November 2025, work out how many weeks that is
85
- garden_weeks_total = math.floor(
86
- (datetime(2025, 12, 1) - datetime.now()).days / 7
87
- )
88
-
89
- regular_collections, garden_collections, special_collections = [], [], []
90
- # Convert day text to series of dates using previous calculation
91
- if regular_day is not None:
92
- regular_collections = get_weekday_dates_in_period(
93
- datetime.today(),
94
- days_of_week.get(regular_day.capitalize()),
95
- amount=weeks_total,
96
- )
97
- if garden_day is not None:
98
- garden_collections = get_weekday_dates_in_period(
99
- datetime.today(),
100
- days_of_week.get(garden_day.capitalize()),
101
- amount=garden_weeks_total,
102
- )
103
- if special_day is not None:
104
- special_collections = get_weekday_dates_in_period(
105
- datetime.today(),
106
- days_of_week.get(special_day.capitalize()),
107
- amount=weeks_total,
108
- )
61
+ schedule = soup.find("div", {"class": "waste-collection__schedule"})
62
+
63
+ if schedule is None:
64
+ raise ValueError("No waste-collection schedule found. The page structure may have changed.")
65
+
66
+
67
+ # Find days of form:
68
+ #
69
+ # <li class="waste-collection__day">
70
+ # <span class="waste-collection__day--day"><time datetime="2025-11-13">13</time></span>
71
+ # <span class="waste-collection__day--type">Recycling</span>
72
+ # <span class="waste-collection__day--colour waste-collection__day--grey">Grey</span>
73
+ # </li>
74
+ #
75
+ #
76
+ # Note that on back holidays the collection type is of form:
77
+ # ...
78
+ # <span class="waste-collection__day--type">Recycling
79
+ # <span>
80
+ # Public holiday - services may be affected. Check service updates on <a href="/household-rubbish-and-recycling/household-bin-collections/bank-holiday-bin-collections">our website</a>
81
+ # </span>
82
+ # </span>
83
+ # ...
109
84
 
110
85
  collections = []
111
86
 
112
- # Add regular collections, and differentiate between regular and recycling bins
113
- for item in regular_collections:
114
- item_as_date = datetime.strptime(item, date_format)
115
- # Check if holiday (calendar only has one day that's a holiday, and it's moved to the next day)
116
- if is_holiday(item_as_date, Region.ENG):
117
- item_as_date += timedelta(days=1)
118
- # Use the isoweek number to separate collections based on week label.
119
- if (item_as_date.date().isocalendar()[1] % 2) == regular_week:
120
- collections.append(("Refuse (green)", item_as_date))
121
- else:
122
- collections.append(("Recycling (grey)", item_as_date))
123
-
124
- # Add garden collections
125
- for item in garden_collections:
126
- item_as_date = datetime.strptime(item, date_format)
127
- # Garden collections do not move for bank holidays
128
- if (item_as_date.date().isocalendar()[1] % 2) == garden_week:
129
- collections.append(("Garden Waste (brown)", item_as_date))
130
-
131
- # Add special collections
132
- collections += [
133
- ("Special Collection (bookable)", datetime.strptime(item, date_format))
134
- for item in special_collections
135
- ]
87
+ for day in schedule.find_all("li", {"class": "waste-collection__day"}):
88
+ try:
89
+ time_el = day.find("time")
90
+ if not time_el or not time_el.get("datetime"):
91
+ logger.warning("Skipping day: missing time/datetime")
92
+ continue
93
+ collection_date = datetime.strptime(time_el["datetime"], "%Y-%m-%d")
94
+
95
+ type_span = day.find("span", {"class": "waste-collection__day--type"})
96
+ # Direct text only (exclude nested spans, e.g., bank-holiday note)
97
+ bin_type_text = type_span.find(text=True, recursive=False) if type_span else None
98
+ if not bin_type_text:
99
+ logger.warning("Skipping day: missing type")
100
+ continue
101
+ bin_type = bin_type_text.strip()
102
+
103
+ colour_span = day.find("span", {"class": "waste-collection__day--colour"})
104
+ if not colour_span:
105
+ logger.warning("Skipping day: missing colour")
106
+ continue
107
+ bin_colour = colour_span.get_text(strip=True)
108
+
109
+ collections.append((f"{bin_type} ({bin_colour})", collection_date))
110
+ except (AttributeError, KeyError, TypeError, ValueError) as e:
111
+ logger.warning(f"Skipping unparsable day node: {e}")
112
+ continue
136
113
 
137
114
  return {
138
115
  "bins": [