uk_bin_collection 0.152.11__py3-none-any.whl → 0.154.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/tests/input.json +16 -21
- uk_bin_collection/uk_bin_collection/councils/BCPCouncil.py +45 -120
- uk_bin_collection/uk_bin_collection/councils/BasingstokeCouncil.py +4 -1
- uk_bin_collection/uk_bin_collection/councils/BoltonCouncil.py +1 -1
- uk_bin_collection/uk_bin_collection/councils/BrightonandHoveCityCouncil.py +15 -36
- uk_bin_collection/uk_bin_collection/councils/BuckinghamshireCouncil.py +75 -100
- uk_bin_collection/uk_bin_collection/councils/CastlepointDistrictCouncil.py +55 -24
- uk_bin_collection/uk_bin_collection/councils/EastHertsCouncil.py +82 -24
- uk_bin_collection/uk_bin_collection/councils/ErewashBoroughCouncil.py +32 -34
- uk_bin_collection/uk_bin_collection/councils/FarehamBoroughCouncil.py +5 -2
- uk_bin_collection/uk_bin_collection/councils/FolkstoneandHytheDistrictCouncil.py +22 -0
- uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py +1 -1
- uk_bin_collection/uk_bin_collection/councils/HartlepoolBoroughCouncil.py +3 -1
- uk_bin_collection/uk_bin_collection/councils/HinckleyandBosworthBoroughCouncil.py +7 -1
- uk_bin_collection/uk_bin_collection/councils/IpswichBoroughCouncil.py +3 -1
- uk_bin_collection/uk_bin_collection/councils/LichfieldDistrictCouncil.py +7 -1
- uk_bin_collection/uk_bin_collection/councils/NorthEastLincs.py +17 -6
- uk_bin_collection/uk_bin_collection/councils/NorthHertfordshireDistrictCouncil.py +26 -128
- uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py +63 -79
- uk_bin_collection/uk_bin_collection/councils/NorwichCityCouncil.py +67 -66
- uk_bin_collection/uk_bin_collection/councils/NuneatonBedworthBoroughCouncil.py +19 -7
- uk_bin_collection/uk_bin_collection/councils/RunnymedeBoroughCouncil.py +7 -1
- uk_bin_collection/uk_bin_collection/councils/RushmoorCouncil.py +4 -2
- uk_bin_collection/uk_bin_collection/councils/SandwellBoroughCouncil.py +4 -11
- uk_bin_collection/uk_bin_collection/councils/SloughBoroughCouncil.py +39 -21
- uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py +4 -0
- uk_bin_collection/uk_bin_collection/councils/WestBerkshireCouncil.py +16 -13
- uk_bin_collection/uk_bin_collection/councils/WiltshireCouncil.py +47 -29
- {uk_bin_collection-0.152.11.dist-info → uk_bin_collection-0.154.0.dist-info}/METADATA +1 -1
- {uk_bin_collection-0.152.11.dist-info → uk_bin_collection-0.154.0.dist-info}/RECORD +33 -33
- {uk_bin_collection-0.152.11.dist-info → uk_bin_collection-0.154.0.dist-info}/LICENSE +0 -0
- {uk_bin_collection-0.152.11.dist-info → uk_bin_collection-0.154.0.dist-info}/WHEEL +0 -0
- {uk_bin_collection-0.152.11.dist-info → uk_bin_collection-0.154.0.dist-info}/entry_points.txt +0 -0
@@ -26,7 +26,9 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
26
26
|
uprn = kwargs.get("uprn")
|
27
27
|
check_uprn(uprn)
|
28
28
|
|
29
|
-
|
29
|
+
base_url = "https://apps.castlepoint.gov.uk/cpapps/"
|
30
|
+
|
31
|
+
post_url = f"{base_url}index.cfm?fa=myStreet.displayDetails"
|
30
32
|
post_header_str = (
|
31
33
|
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,"
|
32
34
|
"image/apng,"
|
@@ -51,31 +53,60 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
51
53
|
soup = BeautifulSoup(post_response.text, features="html.parser")
|
52
54
|
soup.prettify()
|
53
55
|
|
56
|
+
calMonthNext = f"{base_url}{soup.select_one("div.calMonthNext a")["href"]}"
|
57
|
+
nextmonth_response = requests.post(
|
58
|
+
calMonthNext, headers=post_headers, data=form_data, verify=False
|
59
|
+
)
|
60
|
+
soup_nextmonth = BeautifulSoup(nextmonth_response.text, features="html.parser")
|
61
|
+
soup_nextmonth.prettify()
|
62
|
+
|
54
63
|
data = {"bins": []}
|
55
|
-
collection_tuple = []
|
56
64
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
65
|
+
def parse_calendar_month(soup_one_month):
|
66
|
+
out = []
|
67
|
+
|
68
|
+
calendar = soup_one_month.find("table", class_="calendar")
|
69
|
+
if not calendar:
|
70
|
+
return out # be robust
|
71
|
+
|
72
|
+
# e.g. "[Aug]"
|
73
|
+
month_txt = soup_one_month.find("div", class_="calMonthCurrent").get_text(
|
74
|
+
strip=True
|
75
|
+
)
|
76
|
+
month = datetime.strptime(month_txt, "[%b]").strftime("%m")
|
77
|
+
|
78
|
+
# e.g. "About my Street - August 2025"
|
79
|
+
year_txt = soup_one_month.find("h1").get_text(strip=True)
|
80
|
+
year = datetime.strptime(year_txt, "About my Street - %B %Y").strftime("%Y")
|
81
|
+
|
82
|
+
pink_days = [
|
83
|
+
td.get_text(strip=True) for td in calendar.find_all("td", class_="pink")
|
84
|
+
]
|
85
|
+
black_days = [
|
86
|
+
td.get_text(strip=True)
|
87
|
+
for td in calendar.find_all("td", class_="normal")
|
88
|
+
]
|
89
|
+
|
90
|
+
for day in pink_days:
|
91
|
+
out.append(
|
92
|
+
(
|
93
|
+
"Pink collection",
|
94
|
+
datetime(year=int(year), month=int(month), day=int(day)),
|
95
|
+
)
|
96
|
+
)
|
97
|
+
for day in black_days:
|
98
|
+
out.append(
|
99
|
+
(
|
100
|
+
"Normal collection",
|
101
|
+
datetime(year=int(year), month=int(month), day=int(day)),
|
102
|
+
)
|
103
|
+
)
|
104
|
+
|
105
|
+
return out
|
106
|
+
|
107
|
+
collection_tuple = []
|
108
|
+
for s in (soup, soup_nextmonth):
|
109
|
+
collection_tuple.extend(parse_calendar_month(s))
|
79
110
|
|
80
111
|
ordered_data = sorted(collection_tuple, key=lambda x: x[1])
|
81
112
|
|
@@ -1,11 +1,13 @@
|
|
1
|
-
import
|
1
|
+
import time
|
2
|
+
|
2
3
|
import requests
|
3
|
-
from
|
4
|
+
from dateutil.relativedelta import relativedelta
|
4
5
|
|
5
6
|
from uk_bin_collection.uk_bin_collection.common import *
|
6
7
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
7
8
|
|
8
9
|
|
10
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
9
11
|
class CouncilClass(AbstractGetBinDataClass):
|
10
12
|
"""
|
11
13
|
Concrete classes have to implement all abstract operations of the
|
@@ -14,28 +16,84 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
14
16
|
"""
|
15
17
|
|
16
18
|
def parse_data(self, page: str, **kwargs) -> dict:
|
17
|
-
|
18
|
-
|
19
|
+
# Make a BS4 object
|
20
|
+
uprn = kwargs.get("uprn")
|
21
|
+
# usrn = kwargs.get("paon")
|
22
|
+
check_uprn(uprn)
|
23
|
+
# check_usrn(usrn)
|
19
24
|
bindata = {"bins": []}
|
20
|
-
|
21
|
-
#
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
25
|
+
|
26
|
+
# uprn = uprn.zfill(12)
|
27
|
+
|
28
|
+
SESSION_URL = "https://eastherts-self.achieveservice.com/authapi/isauthenticated?uri=https%253A%252F%252Feastherts-self.achieveservice.com%252FAchieveForms%252F%253Fmode%253Dfill%2526consentMessage%253Dyes%2526form_uri%253Dsandbox-publish%253A%252F%252FAF-Process-98782935-6101-4962-9a55-5923e76057b6%252FAF-Stage-dcd0ec18-dfb4-496a-a266-bd8fadaa28a7%252Fdefinition.json%2526process%253D1%2526process_uri%253Dsandbox-processes%253A%252F%252FAF-Process-98782935-6101-4962-9a55-5923e76057b6%2526process_id%253DAF-Process-98782935-6101-4962-9a55-5923e76057b6&hostname=eastherts-self.achieveservice.com&withCredentials=true"
|
29
|
+
|
30
|
+
API_URL = "https://eastherts-self.achieveservice.com/apibroker/runLookup"
|
31
|
+
|
32
|
+
headers = {
|
33
|
+
"Content-Type": "application/json",
|
34
|
+
"Accept": "*/*",
|
35
|
+
"User-Agent": "Mozilla/5.0",
|
36
|
+
"X-Requested-With": "XMLHttpRequest",
|
37
|
+
"Referer": "https://eastherts-self.achieveservice.com/fillform/?iframe_id=fillform-frame-1&db_id=",
|
38
|
+
}
|
39
|
+
s = requests.session()
|
40
|
+
r = s.get(SESSION_URL)
|
41
|
+
r.raise_for_status()
|
42
|
+
session_data = r.json()
|
43
|
+
sid = session_data["auth-session"]
|
44
|
+
params = {
|
45
|
+
# unix_timestamp
|
46
|
+
"_": str(int(time.time() * 1000)),
|
47
|
+
"sid": sid,
|
48
|
+
}
|
49
|
+
|
50
|
+
params = {
|
51
|
+
"id": "683d9ff0e299d",
|
52
|
+
"repeat_against": "",
|
53
|
+
"noRetry": "true",
|
54
|
+
"getOnlyTokens": "undefined",
|
55
|
+
"log_id": "",
|
56
|
+
"app_name": "AF-Renderer::Self",
|
57
|
+
# unix_timestamp
|
58
|
+
"_": str(int(time.time() * 1000)),
|
59
|
+
"sid": sid,
|
60
|
+
}
|
61
|
+
|
62
|
+
data = {
|
63
|
+
"formValues": {
|
64
|
+
"Collection Days": {
|
65
|
+
"inputUPRN": {
|
66
|
+
"value": uprn,
|
38
67
|
}
|
39
|
-
|
40
|
-
|
68
|
+
},
|
69
|
+
}
|
70
|
+
}
|
71
|
+
|
72
|
+
r = s.post(API_URL, json=data, headers=headers, params=params)
|
73
|
+
r.raise_for_status()
|
74
|
+
|
75
|
+
data = r.json()
|
76
|
+
rows_data = data["integration"]["transformed"]["rows_data"]["0"]
|
77
|
+
if not isinstance(rows_data, dict):
|
78
|
+
raise ValueError("Invalid data returned from API")
|
79
|
+
|
80
|
+
# Extract each service's relevant details for the bin schedule
|
81
|
+
for key, value in rows_data.items():
|
82
|
+
if key.endswith("NextDate"):
|
83
|
+
BinType = key.replace("NextDate", "ServiceName")
|
84
|
+
for key2, value2 in rows_data.items():
|
85
|
+
if key2 == BinType:
|
86
|
+
BinType = value2
|
87
|
+
next_collection = datetime.strptime(
|
88
|
+
remove_ordinal_indicator_from_date_string(value), "%A %d %B"
|
89
|
+
).replace(year=datetime.now().year)
|
90
|
+
if datetime.now().month == 12 and next_collection.month == 1:
|
91
|
+
next_collection = next_collection + relativedelta(years=1)
|
92
|
+
|
93
|
+
dict_data = {
|
94
|
+
"type": BinType,
|
95
|
+
"collectionDate": next_collection.strftime(date_format),
|
96
|
+
}
|
97
|
+
bindata["bins"].append(dict_data)
|
98
|
+
|
41
99
|
return bindata
|
@@ -1,4 +1,7 @@
|
|
1
|
+
import json
|
2
|
+
|
1
3
|
from bs4 import BeautifulSoup
|
4
|
+
|
2
5
|
from uk_bin_collection.uk_bin_collection.common import *
|
3
6
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
7
|
|
@@ -16,46 +19,41 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
16
19
|
uprn = kwargs.get("uprn")
|
17
20
|
check_uprn(uprn)
|
18
21
|
|
22
|
+
label_map = {
|
23
|
+
"domestic-waste-collection-service": "Household Waste",
|
24
|
+
"recycling-collection-service": "Recycling",
|
25
|
+
"garden-waste-collection-service": "Garden Waste",
|
26
|
+
}
|
27
|
+
|
19
28
|
requests.packages.urllib3.disable_warnings()
|
20
29
|
response = requests.get(
|
21
|
-
f"https://
|
30
|
+
f"https://www.erewash.gov.uk/bbd-whitespace/one-year-collection-dates-without-christmas?uprn={uprn}",
|
22
31
|
headers={"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64)"},
|
23
32
|
)
|
33
|
+
# Parse the JSON response
|
34
|
+
payload = response.json()
|
35
|
+
bin_collection = json.loads(payload) if isinstance(payload, str) else payload
|
24
36
|
|
25
|
-
|
26
|
-
|
27
|
-
|
37
|
+
cd = next(
|
38
|
+
i["settings"]["collection_dates"]
|
39
|
+
for i in bin_collection
|
40
|
+
if i.get("command") == "settings"
|
28
41
|
)
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
"([A-Za-z]+ \\d+[A-Za-z]+ [A-Za-z]+ \\d*)", collection_info
|
42
|
+
|
43
|
+
for month in cd.values():
|
44
|
+
for e in month:
|
45
|
+
d = e["date"] # "YYYY-MM-DD"
|
46
|
+
label = label_map.get(
|
47
|
+
e.get("service-identifier"),
|
48
|
+
e.get("service") or e.get("service-identifier"),
|
37
49
|
)
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
),
|
47
|
-
"collectionDate": collection_date,
|
48
|
-
}
|
49
|
-
data["bins"].append(dict_data)
|
50
|
-
if "garden waste" in collection_info.lower():
|
51
|
-
dict_data = {
|
52
|
-
"type": "Garden Waste",
|
53
|
-
"collectionDate": collection_date,
|
54
|
-
}
|
55
|
-
data["bins"].append(dict_data)
|
56
|
-
|
57
|
-
data["bins"].sort(
|
58
|
-
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
|
59
|
-
)
|
50
|
+
|
51
|
+
dict_data = {
|
52
|
+
"type": label,
|
53
|
+
"collectionDate": datetime.strptime(d, "%Y-%m-%d").strftime(
|
54
|
+
date_format
|
55
|
+
),
|
56
|
+
}
|
57
|
+
data["bins"].append(dict_data)
|
60
58
|
|
61
59
|
return data
|
@@ -38,11 +38,14 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
38
38
|
if "rows" in bin_data:
|
39
39
|
collection_str = bin_data["rows"][0]["DomesticBinDay"]
|
40
40
|
|
41
|
-
results = re.findall(r
|
41
|
+
results = re.findall(r'(\d{1,2}/\d{1,2}/\d{4}|today)\s*\(([^)]+)\)', collection_str)
|
42
42
|
|
43
43
|
if results:
|
44
44
|
for result in results:
|
45
|
-
|
45
|
+
if (result[0] == "today"):
|
46
|
+
collection_date = datetime.today()
|
47
|
+
else:
|
48
|
+
collection_date = datetime.strptime(result[0], "%d/%m/%Y")
|
46
49
|
dict_data = {
|
47
50
|
"type": result[1],
|
48
51
|
"collectionDate": collection_date.strftime(date_format),
|
@@ -74,6 +74,28 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
74
74
|
}
|
75
75
|
bindata["bins"].append(dict_data)
|
76
76
|
|
77
|
+
# Extract the Garden Waste schedule
|
78
|
+
garden_waste_section = soup.find(
|
79
|
+
"span", text=lambda x: x and "Garden Waste" in x
|
80
|
+
)
|
81
|
+
if garden_waste_section:
|
82
|
+
bin_types = garden_waste_section.text.replace("Garden Waste: ", "").split(
|
83
|
+
" / "
|
84
|
+
)
|
85
|
+
garden_waste_dates = garden_waste_section.find_next("ul").find_all("li")
|
86
|
+
for date in garden_waste_dates:
|
87
|
+
for bin_type in bin_types:
|
88
|
+
dict_data = {
|
89
|
+
"type": bin_type.strip(),
|
90
|
+
"collectionDate": datetime.strptime(
|
91
|
+
remove_ordinal_indicator_from_date_string(
|
92
|
+
date.text.strip()
|
93
|
+
),
|
94
|
+
"%A %d %B %Y",
|
95
|
+
).strftime("%d/%m/%Y"),
|
96
|
+
}
|
97
|
+
bindata["bins"].append(dict_data)
|
98
|
+
|
77
99
|
bindata["bins"].sort(
|
78
100
|
key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
|
79
101
|
)
|
@@ -18,7 +18,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
18
18
|
try:
|
19
19
|
user_uprn = kwargs.get("uprn")
|
20
20
|
check_uprn(user_uprn)
|
21
|
-
url = f"https://onlineservices.glasgow.gov.uk/forms/
|
21
|
+
url = f"https://onlineservices.glasgow.gov.uk/forms/refuseandrecyclingcalendar/CollectionsCalendar.aspx?UPRN={user_uprn}"
|
22
22
|
if not user_uprn:
|
23
23
|
# This is a fallback for if the user stored a URL in old system. Ensures backwards compatibility.
|
24
24
|
url = kwargs.get("url")
|
@@ -73,7 +73,9 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
73
73
|
for div in soup.find_all("div"):
|
74
74
|
# Extract bin type and date from the span tag
|
75
75
|
text = div.find("span").text.strip()
|
76
|
-
|
76
|
+
parts = text.split(" ")
|
77
|
+
date = parts[-1] # assume the last token is the date
|
78
|
+
bin_type = " ".join(parts[:-1])
|
77
79
|
dict_data = {
|
78
80
|
"type": bin_type,
|
79
81
|
"collectionDate": date,
|
@@ -20,10 +20,16 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
20
20
|
check_uprn(user_uprn)
|
21
21
|
bindata = {"bins": []}
|
22
22
|
|
23
|
+
headers = {
|
24
|
+
"Origin": "https://www.hinckley-bosworth.gov.uk",
|
25
|
+
"Referer": "https://www.hinckley-bosworth.gov.uk",
|
26
|
+
"User-Agent": "Mozilla/5.0",
|
27
|
+
}
|
28
|
+
|
23
29
|
URI = f"https://www.hinckley-bosworth.gov.uk/set-location?id={user_uprn}&redirect=refuse&rememberloc="
|
24
30
|
|
25
31
|
# Make the GET request
|
26
|
-
response = requests.get(URI)
|
32
|
+
response = requests.get(URI, headers=headers)
|
27
33
|
|
28
34
|
# Parse the HTML
|
29
35
|
soup = BeautifulSoup(response.content, "html.parser")
|
@@ -31,7 +31,9 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
31
31
|
IBC_ENDPOINT = "https://app.ipswich.gov.uk/bin-collection/"
|
32
32
|
|
33
33
|
def transform_date(self, date_str):
|
34
|
-
date_str = re.sub(
|
34
|
+
date_str = re.sub(
|
35
|
+
r"(\d{1,2})(st|nd|rd|th)", r"\1", date_str
|
36
|
+
) # Remove ordinal suffixes
|
35
37
|
date_obj = datetime.strptime(date_str, "%A %d %B %Y")
|
36
38
|
return date_obj.strftime(date_format)
|
37
39
|
|
@@ -24,10 +24,16 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
24
24
|
def solve(s):
|
25
25
|
return re.sub(r"(\d)(st|nd|rd|th)", r"\1", s)
|
26
26
|
|
27
|
+
headers = {
|
28
|
+
"Origin": "https://www.lichfielddc.gov.uk",
|
29
|
+
"Referer": "https://www.lichfielddc.gov.uk",
|
30
|
+
"User-Agent": "Mozilla/5.0",
|
31
|
+
}
|
32
|
+
|
27
33
|
URI = f"https://www.lichfielddc.gov.uk/homepage/6/bin-collection-dates?uprn={user_uprn}"
|
28
34
|
|
29
35
|
# Make the GET request
|
30
|
-
response = requests.get(URI)
|
36
|
+
response = requests.get(URI, headers=headers)
|
31
37
|
|
32
38
|
soup = BeautifulSoup(response.text, "html.parser")
|
33
39
|
|
@@ -1,5 +1,7 @@
|
|
1
1
|
import pandas as pd
|
2
|
+
import requests
|
2
3
|
from bs4 import BeautifulSoup
|
4
|
+
|
3
5
|
from uk_bin_collection.uk_bin_collection.common import date_format
|
4
6
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
5
7
|
|
@@ -12,15 +14,26 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
12
14
|
"""
|
13
15
|
|
14
16
|
def parse_data(self, page: str, **kwargs) -> dict:
|
15
|
-
|
16
|
-
|
17
|
+
user_url = kwargs.get("url")
|
18
|
+
|
19
|
+
headers = {
|
20
|
+
"Origin": "https://www.nelincs.gov.uk",
|
21
|
+
"Referer": "https://www.nelincs.gov.uk",
|
22
|
+
"User-Agent": "Mozilla/5.0",
|
23
|
+
}
|
24
|
+
|
25
|
+
# Make the GET request
|
26
|
+
response = requests.get(user_url, headers=headers)
|
27
|
+
|
28
|
+
# Parse the HTML
|
29
|
+
soup = BeautifulSoup(response.content, "html.parser")
|
17
30
|
soup.prettify()
|
18
31
|
|
19
32
|
data = {"bins": []}
|
20
33
|
|
21
34
|
# Get list items that can be seen on page
|
22
35
|
for element in soup.find_all(
|
23
|
-
"li", {"class": "list-group-item p-
|
36
|
+
"li", {"class": "border-0 list-group-item p-3 bg-light rounded p-2"}
|
24
37
|
):
|
25
38
|
element_text = element.text.strip().split("\n\n")
|
26
39
|
element_text = [x.strip() for x in element_text]
|
@@ -35,9 +48,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
35
48
|
data["bins"].append(dict_data)
|
36
49
|
|
37
50
|
# Get hidden list items too
|
38
|
-
for element in soup.find_all(
|
39
|
-
"li", {"class": "list-group-item p-0 p-3 bin-collection-item d-none"}
|
40
|
-
):
|
51
|
+
for element in soup.find_all("li", {"class": "border-0 list-group-item p-3"}):
|
41
52
|
element_text = element.text.strip().split("\n\n")
|
42
53
|
element_text = [x.strip() for x in element_text]
|
43
54
|
|
@@ -125,23 +125,6 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
125
125
|
# Wait for the page to load - giving it extra time
|
126
126
|
time.sleep(5)
|
127
127
|
|
128
|
-
# Use only the selector that we know works
|
129
|
-
# print("Looking for bin type elements...")
|
130
|
-
try:
|
131
|
-
bin_type_selector = (
|
132
|
-
By.CSS_SELECTOR,
|
133
|
-
"div.formatting_bold.formatting_size_bigger.formatting span.value-as-text",
|
134
|
-
)
|
135
|
-
WebDriverWait(driver, 15).until(
|
136
|
-
EC.presence_of_element_located(bin_type_selector)
|
137
|
-
)
|
138
|
-
# print(f"Found bin type elements with selector: {bin_type_selector}")
|
139
|
-
except TimeoutException:
|
140
|
-
# print("Could not find bin type elements. Taking screenshot for debugging...")
|
141
|
-
screenshot_path = f"bin_type_error_{int(time.time())}.png"
|
142
|
-
driver.save_screenshot(screenshot_path)
|
143
|
-
# print(f"Screenshot saved to {screenshot_path}")
|
144
|
-
|
145
128
|
# Create BS4 object from driver's page source
|
146
129
|
# print("Parsing page with BeautifulSoup...")
|
147
130
|
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
@@ -149,122 +132,37 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
149
132
|
# Initialize data dictionary
|
150
133
|
data = {"bins": []}
|
151
134
|
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
# Look specifically for date elements with the exact structure
|
159
|
-
date_elements = soup.select("div.col-sm-12.font-xs-3xl span.value-as-text")
|
160
|
-
hidden_dates = soup.select(
|
161
|
-
"div.col-sm-12.font-xs-3xl input[type='hidden'][value*='/']"
|
162
|
-
)
|
163
|
-
|
164
|
-
# print(f"Found {len(bin_type_elements)} bin types and {len(date_elements)} date elements")
|
165
|
-
|
166
|
-
# We need a smarter way to match bin types with their dates
|
167
|
-
bin_count = 0
|
135
|
+
for row in soup.select(".listing_template_row"):
|
136
|
+
# Title (waste stream) is the first <p> in the section
|
137
|
+
first_p = row.find("p")
|
138
|
+
if not first_p:
|
139
|
+
continue
|
140
|
+
stream = first_p.get_text(" ", strip=True)
|
168
141
|
|
169
|
-
|
170
|
-
|
142
|
+
for p in row.find_all("p"):
|
143
|
+
t = p.get_text("\n", strip=True)
|
171
144
|
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
)
|
145
|
+
if re.search(r"\bNext collection\b", t, flags=re.I):
|
146
|
+
# Expect format: "Next collection\nTuesday 16th September 2025"
|
147
|
+
parts = [x.strip() for x in t.split("\n") if x.strip()]
|
148
|
+
if len(parts) >= 2:
|
149
|
+
next_collection_display = parts[-1] # last line
|
178
150
|
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
# Find hidden date inputs with values in DD/MM/YYYY format
|
186
|
-
hidden_date_values = []
|
187
|
-
for hidden in hidden_dates:
|
188
|
-
value = hidden.get("value", "")
|
189
|
-
if re.match(r"\d{1,2}/\d{1,2}/\d{4}", value):
|
190
|
-
hidden_date_values.append(value)
|
191
|
-
# print(f"Found hidden date value: {value}")
|
192
|
-
|
193
|
-
# When filtering date elements
|
194
|
-
date_elements = soup.select("div.col-sm-12.font-xs-3xl span.value-as-text")
|
195
|
-
valid_date_elements = []
|
196
|
-
|
197
|
-
for element in date_elements:
|
198
|
-
text = element.get_text(strip=True)
|
199
|
-
if contains_date(text):
|
200
|
-
valid_date_elements.append(element)
|
201
|
-
# print(f"Found valid date element: {text}")
|
202
|
-
else:
|
203
|
-
pass
|
204
|
-
# print(f"Skipping non-date element: {text}")
|
205
|
-
|
206
|
-
# print(f"Found {len(bin_type_elements)} bin types and {len(valid_date_elements)} valid date elements")
|
207
|
-
|
208
|
-
# When processing each bin type
|
209
|
-
for i, bin_type_elem in enumerate(bin_type_elements):
|
210
|
-
bin_type = bin_type_elem.get_text(strip=True)
|
211
|
-
|
212
|
-
# Try to find a date for this bin type
|
213
|
-
date_text = None
|
214
|
-
|
215
|
-
# Look for a valid date element
|
216
|
-
if i < len(valid_date_elements):
|
217
|
-
date_elem = valid_date_elements[i]
|
218
|
-
date_text = date_elem.get_text(strip=True)
|
219
|
-
|
220
|
-
# If we don't have a valid date yet, try using the hidden input
|
221
|
-
if not date_text or not contains_date(date_text):
|
222
|
-
if i < len(hidden_dates):
|
223
|
-
date_value = hidden_dates[i].get("value")
|
224
|
-
if contains_date(date_value):
|
225
|
-
date_text = date_value
|
226
|
-
|
227
|
-
# Skip if we don't have a valid date
|
228
|
-
if not date_text or not contains_date(date_text):
|
229
|
-
# print(f"No valid date found for bin type: {bin_type}")
|
230
|
-
continue
|
151
|
+
# Build record
|
152
|
+
next_date = datetime.strptime(
|
153
|
+
remove_ordinal_indicator_from_date_string(next_collection_display),
|
154
|
+
"%A %d %B %Y",
|
155
|
+
)
|
231
156
|
|
232
|
-
#
|
157
|
+
# Create bin entry
|
158
|
+
bin_entry = {
|
159
|
+
"type": stream,
|
160
|
+
"collectionDate": next_date.strftime(date_format),
|
161
|
+
}
|
233
162
|
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
# Try to parse the date
|
239
|
-
try:
|
240
|
-
collection_date = datetime.strptime(
|
241
|
-
date_text, "%A %d %B %Y"
|
242
|
-
).date()
|
243
|
-
except ValueError:
|
244
|
-
try:
|
245
|
-
collection_date = datetime.strptime(
|
246
|
-
date_text, "%d/%m/%Y"
|
247
|
-
).date()
|
248
|
-
except ValueError:
|
249
|
-
# Last resort
|
250
|
-
collection_date = parse(date_text).date()
|
251
|
-
|
252
|
-
# Create bin entry
|
253
|
-
bin_entry = {
|
254
|
-
"type": bin_type,
|
255
|
-
"collectionDate": collection_date.strftime(date_format),
|
256
|
-
}
|
257
|
-
|
258
|
-
# Add to data
|
259
|
-
data["bins"].append(bin_entry)
|
260
|
-
bin_count += 1
|
261
|
-
# print(f"Added bin entry: {bin_entry}")
|
262
|
-
|
263
|
-
except Exception as e:
|
264
|
-
pass
|
265
|
-
# print(f"Error parsing date '{date_text}': {str(e)}")
|
266
|
-
|
267
|
-
# print(f"Successfully parsed {bin_count} bin collections")
|
163
|
+
# Add to data
|
164
|
+
data["bins"].append(bin_entry)
|
165
|
+
# print(f"Added bin entry: {bin_entry}")
|
268
166
|
|
269
167
|
if not data["bins"]:
|
270
168
|
# print("No bin data found. Saving page for debugging...")
|