uk_bin_collection 0.102.0__py3-none-any.whl → 0.104.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/tests/input.json +38 -1
- uk_bin_collection/uk_bin_collection/councils/AberdeenshireCouncil.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/BasildonCouncil.py +42 -39
- uk_bin_collection/uk_bin_collection/councils/BelfastCityCouncil.py +13 -8
- uk_bin_collection/uk_bin_collection/councils/BexleyCouncil.py +24 -21
- uk_bin_collection/uk_bin_collection/councils/BoltonCouncil.py +1 -1
- uk_bin_collection/uk_bin_collection/councils/CanterburyCityCouncil.py +54 -0
- uk_bin_collection/uk_bin_collection/councils/CheshireEastCouncil.py +25 -10
- uk_bin_collection/uk_bin_collection/councils/CornwallCouncil.py +21 -20
- uk_bin_collection/uk_bin_collection/councils/EnfieldCouncil.py +16 -18
- uk_bin_collection/uk_bin_collection/councils/GedlingBoroughCouncil.py +10 -4
- uk_bin_collection/uk_bin_collection/councils/IslingtonCouncil.py +6 -4
- uk_bin_collection/uk_bin_collection/councils/LutonBoroughCouncil.py +81 -0
- uk_bin_collection/uk_bin_collection/councils/MoleValleyDistrictCouncil.py +37 -20
- uk_bin_collection/uk_bin_collection/councils/NorthTynesideCouncil.py +11 -9
- uk_bin_collection/uk_bin_collection/councils/RochfordCouncil.py +1 -2
- uk_bin_collection/uk_bin_collection/councils/RotherhamCouncil.py +8 -6
- uk_bin_collection/uk_bin_collection/councils/SwindonBoroughCouncil.py +56 -0
- uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py +21 -11
- uk_bin_collection/uk_bin_collection/councils/WestOxfordshireDistrictCouncil.py +113 -0
- uk_bin_collection/uk_bin_collection/councils/WokinghamBoroughCouncil.py +1 -1
- {uk_bin_collection-0.102.0.dist-info → uk_bin_collection-0.104.0.dist-info}/METADATA +1 -1
- {uk_bin_collection-0.102.0.dist-info → uk_bin_collection-0.104.0.dist-info}/RECORD +26 -21
- {uk_bin_collection-0.102.0.dist-info → uk_bin_collection-0.104.0.dist-info}/LICENSE +0 -0
- {uk_bin_collection-0.102.0.dist-info → uk_bin_collection-0.104.0.dist-info}/WHEEL +0 -0
- {uk_bin_collection-0.102.0.dist-info → uk_bin_collection-0.104.0.dist-info}/entry_points.txt +0 -0
@@ -56,9 +56,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
56
56
|
postcode_input.send_keys(user_postcode)
|
57
57
|
|
58
58
|
find_address_button = WebDriverWait(driver, 10).until(
|
59
|
-
EC.presence_of_element_located(
|
60
|
-
(By.ID, 'submitButton0')
|
61
|
-
)
|
59
|
+
EC.presence_of_element_located((By.ID, "submitButton0"))
|
62
60
|
)
|
63
61
|
find_address_button.click()
|
64
62
|
|
@@ -80,7 +78,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
80
78
|
template_parts = first_option.split(", ")
|
81
79
|
template_parts[0] = user_paon # Replace the first part with user_paon
|
82
80
|
|
83
|
-
addr_label =
|
81
|
+
addr_label = ", ".join(template_parts)
|
84
82
|
for addr_option in select.options:
|
85
83
|
option_name = addr_option.accessible_name[0 : len(addr_label)]
|
86
84
|
if option_name == addr_label:
|
@@ -100,32 +98,27 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
100
98
|
# Find the div with the specified id
|
101
99
|
target_div = soup.find("div", {"id": target_div_id})
|
102
100
|
|
103
|
-
|
104
101
|
# Check if the div is found
|
105
102
|
if target_div:
|
106
103
|
bin_data = {"bins": []}
|
107
104
|
|
108
|
-
for bin_div in target_div.find_all(
|
109
|
-
"div"
|
110
|
-
):
|
105
|
+
for bin_div in target_div.find_all("div"):
|
111
106
|
# Extract the collection date from the message
|
112
107
|
try:
|
113
108
|
bin_collection_message = bin_div.find("p").text.strip()
|
114
109
|
date_pattern = r"\b\d{2}/\d{2}/\d{4}\b"
|
115
110
|
|
116
111
|
collection_date_string = (
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
112
|
+
re.search(date_pattern, bin_div.text)
|
113
|
+
.group(0)
|
114
|
+
.strip()
|
115
|
+
.replace(",", "")
|
116
|
+
)
|
122
117
|
except AttributeError:
|
123
118
|
continue
|
124
119
|
|
125
120
|
current_date = datetime.now()
|
126
|
-
parsed_date = datetime.strptime(
|
127
|
-
collection_date_string, "%d/%m/%Y"
|
128
|
-
)
|
121
|
+
parsed_date = datetime.strptime(collection_date_string, "%d/%m/%Y")
|
129
122
|
# Check if the parsed date is in the past and not today
|
130
123
|
if parsed_date.date() < current_date.date():
|
131
124
|
# If so, set the year to the next year
|
@@ -137,9 +130,14 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
137
130
|
contains_date(formatted_date)
|
138
131
|
|
139
132
|
# Extract the bin type from the message
|
140
|
-
bin_type_match = re.search(
|
133
|
+
bin_type_match = re.search(
|
134
|
+
r"Your next (.*?) collection", bin_collection_message
|
135
|
+
)
|
141
136
|
if bin_type_match:
|
142
|
-
bin_info = {
|
137
|
+
bin_info = {
|
138
|
+
"type": bin_type_match.group(1),
|
139
|
+
"collectionDate": formatted_date,
|
140
|
+
}
|
143
141
|
bin_data["bins"].append(bin_info)
|
144
142
|
else:
|
145
143
|
raise ValueError("Collection data not found.")
|
@@ -16,7 +16,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
16
16
|
def parse_data(self, page: str, **kwargs) -> dict:
|
17
17
|
data = {"bins": []}
|
18
18
|
collections = []
|
19
|
-
selected_collections = kwargs.get("paon").split(
|
19
|
+
selected_collections = kwargs.get("paon").split(",")
|
20
20
|
calendar_urls = []
|
21
21
|
run_date = datetime.now().date()
|
22
22
|
|
@@ -25,9 +25,13 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
25
25
|
for item in selected_collections:
|
26
26
|
item = item.strip().lower().replace(" ", "_")
|
27
27
|
if has_numbers(item):
|
28
|
-
calendar_urls.append(
|
28
|
+
calendar_urls.append(
|
29
|
+
f"https://www.gbcbincalendars.co.uk/json/gedling_borough_council_{item}_bin_schedule.json"
|
30
|
+
)
|
29
31
|
else:
|
30
|
-
calendar_urls.append(
|
32
|
+
calendar_urls.append(
|
33
|
+
f"https://www.gbcbincalendars.co.uk/json/gedling_borough_council_{item}_garden_bin_schedule.json"
|
34
|
+
)
|
31
35
|
|
32
36
|
# Parse each URL and load future data
|
33
37
|
for url in calendar_urls:
|
@@ -36,7 +40,9 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
36
40
|
raise ConnectionError(f"Could not get response from: {url}")
|
37
41
|
json_data = response.json()["collectionDates"]
|
38
42
|
for col in json_data:
|
39
|
-
bin_date = datetime.strptime(
|
43
|
+
bin_date = datetime.strptime(
|
44
|
+
col.get("collectionDate"), "%Y-%m-%d"
|
45
|
+
).date()
|
40
46
|
if bin_date >= run_date:
|
41
47
|
collections.append((col.get("alternativeName"), bin_date))
|
42
48
|
|
@@ -29,9 +29,11 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
29
29
|
waste_type = row.find("th").text.strip()
|
30
30
|
next_collection = parse(row.find("td").text.strip()).date()
|
31
31
|
|
32
|
-
data[
|
33
|
-
|
34
|
-
|
35
|
-
|
32
|
+
data["bins"].append(
|
33
|
+
{
|
34
|
+
"type": waste_type,
|
35
|
+
"collectionDate": next_collection.strftime(date_format),
|
36
|
+
}
|
37
|
+
)
|
36
38
|
|
37
39
|
return data
|
@@ -0,0 +1,81 @@
|
|
1
|
+
import time
|
2
|
+
|
3
|
+
import requests
|
4
|
+
from bs4 import BeautifulSoup
|
5
|
+
|
6
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
7
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
8
|
+
|
9
|
+
|
10
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
11
|
+
class CouncilClass(AbstractGetBinDataClass):
|
12
|
+
"""
|
13
|
+
Concrete classes have to implement all abstract operations of the
|
14
|
+
base class. They can also override some operations with a default
|
15
|
+
implementation.
|
16
|
+
"""
|
17
|
+
|
18
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
19
|
+
|
20
|
+
user_uprn = kwargs.get("uprn")
|
21
|
+
check_uprn(user_uprn)
|
22
|
+
bindata = {"bins": []}
|
23
|
+
|
24
|
+
SESSION_URL = "https://myforms.luton.gov.uk/authapi/isauthenticated?uri=https%253A%252F%252Fmyforms.luton.gov.uk%252Fservice%252FFind_my_bin_collection_date&hostname=myforms.luton.gov.uk&withCredentials=true"
|
25
|
+
|
26
|
+
API_URL = "https://myforms.luton.gov.uk/apibroker/runLookup"
|
27
|
+
|
28
|
+
data = {
|
29
|
+
"formValues": {
|
30
|
+
"Find my bin collection date": {
|
31
|
+
"id": {
|
32
|
+
"value": f"1-{user_uprn}",
|
33
|
+
},
|
34
|
+
},
|
35
|
+
}
|
36
|
+
}
|
37
|
+
|
38
|
+
headers = {
|
39
|
+
"Content-Type": "application/json",
|
40
|
+
"Accept": "application/json",
|
41
|
+
"User-Agent": "Mozilla/5.0",
|
42
|
+
"X-Requested-With": "XMLHttpRequest",
|
43
|
+
"Referer": "https://myforms.luton.gov.uk/fillform/?iframe_id=fillform-frame-1&db_id=",
|
44
|
+
}
|
45
|
+
s = requests.session()
|
46
|
+
r = s.get(SESSION_URL)
|
47
|
+
r.raise_for_status()
|
48
|
+
session_data = r.json()
|
49
|
+
sid = session_data["auth-session"]
|
50
|
+
params = {
|
51
|
+
"id": "65cb710f8d525",
|
52
|
+
"repeat_against": "",
|
53
|
+
"noRetry": "true",
|
54
|
+
"getOnlyTokens": "undefined",
|
55
|
+
"log_id": "",
|
56
|
+
"app_name": "AF-Renderer::Self",
|
57
|
+
# unix_timestamp
|
58
|
+
"_": str(int(time.time() * 1000)),
|
59
|
+
"sid": sid,
|
60
|
+
}
|
61
|
+
r = s.post(API_URL, json=data, headers=headers, params=params)
|
62
|
+
r.raise_for_status()
|
63
|
+
data = r.json()
|
64
|
+
rows_data = data["integration"]["transformed"]["rows_data"][f"{user_uprn}"]
|
65
|
+
|
66
|
+
soup = BeautifulSoup(rows_data["html"], features="html.parser")
|
67
|
+
soup.prettify()
|
68
|
+
for collection in soup.find_all("tr"):
|
69
|
+
tds = collection.find_all("td")
|
70
|
+
bin_type = tds[1].text
|
71
|
+
collection_date = datetime.strptime(
|
72
|
+
tds[0].text,
|
73
|
+
"%A %d %b %Y",
|
74
|
+
)
|
75
|
+
dict_data = {
|
76
|
+
"type": bin_type,
|
77
|
+
"collectionDate": collection_date.strftime(date_format),
|
78
|
+
}
|
79
|
+
bindata["bins"].append(dict_data)
|
80
|
+
|
81
|
+
return bindata
|
@@ -7,6 +7,7 @@ from bs4 import BeautifulSoup
|
|
7
7
|
from uk_bin_collection.uk_bin_collection.common import *
|
8
8
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
9
9
|
|
10
|
+
|
10
11
|
class CouncilClass(AbstractGetBinDataClass):
|
11
12
|
def parse_data(self, page: str, **kwargs) -> dict:
|
12
13
|
|
@@ -14,7 +15,9 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
14
15
|
check_postcode(user_postcode)
|
15
16
|
|
16
17
|
# Fetch the page content
|
17
|
-
root_url = "https://myproperty.molevalley.gov.uk/molevalley/api/live_addresses/{}?format=json".format(
|
18
|
+
root_url = "https://myproperty.molevalley.gov.uk/molevalley/api/live_addresses/{}?format=json".format(
|
19
|
+
user_postcode
|
20
|
+
)
|
18
21
|
response = requests.get(root_url, verify=False)
|
19
22
|
|
20
23
|
if not response.ok:
|
@@ -63,23 +66,27 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
63
66
|
if bins_panel:
|
64
67
|
panel = bins_panel.find_parent("div", class_="panel")
|
65
68
|
print("Found 'Bins and Recycling' panel.")
|
66
|
-
|
69
|
+
|
67
70
|
# Extract bin collection info from the un-commented HTML
|
68
71
|
for strong_tag in panel.find_all("strong"):
|
69
72
|
bin_type = strong_tag.text.strip()
|
70
73
|
collection_string = strong_tag.find_next("p").text.strip()
|
71
|
-
|
74
|
+
|
72
75
|
# Debugging output
|
73
76
|
print(f"Processing bin type: {bin_type}")
|
74
77
|
print(f"Collection string: {collection_string}")
|
75
|
-
|
78
|
+
|
76
79
|
match = regex_date.search(collection_string)
|
77
80
|
if match:
|
78
|
-
collection_date = datetime.strptime(
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
81
|
+
collection_date = datetime.strptime(
|
82
|
+
match.group(1), "%d/%m/%Y"
|
83
|
+
).date()
|
84
|
+
data["bins"].append(
|
85
|
+
{
|
86
|
+
"type": bin_type,
|
87
|
+
"collectionDate": collection_date.strftime("%d/%m/%Y"),
|
88
|
+
}
|
89
|
+
)
|
83
90
|
all_collection_dates.append(collection_date)
|
84
91
|
else:
|
85
92
|
# Add a debug line to show which collections are missing dates
|
@@ -88,7 +95,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
88
95
|
# Search for additional collections like electrical and textiles
|
89
96
|
for p in panel.find_all("p"):
|
90
97
|
additional_match = regex_additional_collection.match(p.text.strip())
|
91
|
-
|
98
|
+
|
92
99
|
# Debugging output for additional collections
|
93
100
|
if additional_match:
|
94
101
|
bin_type = additional_match.group(1)
|
@@ -96,23 +103,33 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
96
103
|
if "each collection day" in additional_match.group(2):
|
97
104
|
if all_collection_dates:
|
98
105
|
collection_date = min(all_collection_dates)
|
99
|
-
data["bins"].append(
|
100
|
-
|
101
|
-
|
102
|
-
|
106
|
+
data["bins"].append(
|
107
|
+
{
|
108
|
+
"type": bin_type,
|
109
|
+
"collectionDate": collection_date.strftime(
|
110
|
+
"%d/%m/%Y"
|
111
|
+
),
|
112
|
+
}
|
113
|
+
)
|
103
114
|
else:
|
104
|
-
print(
|
115
|
+
print(
|
116
|
+
"No collection dates available for additional collection."
|
117
|
+
)
|
105
118
|
raise ValueError("No valid bin collection dates found.")
|
106
119
|
else:
|
107
|
-
print(
|
120
|
+
print(
|
121
|
+
f"No additional collection found in paragraph: {p.text.strip()}"
|
122
|
+
)
|
108
123
|
else:
|
109
|
-
raise ValueError(
|
124
|
+
raise ValueError(
|
125
|
+
"Unable to find 'Bins and Recycling' panel in the HTML data."
|
126
|
+
)
|
110
127
|
|
111
128
|
# Debugging to check collected data
|
112
129
|
print(f"Collected bin data: {data}")
|
113
|
-
|
130
|
+
|
114
131
|
# Handle the case where no collection dates were found
|
115
132
|
if not all_collection_dates:
|
116
133
|
raise ValueError("No valid collection dates were found in the data.")
|
117
|
-
|
118
|
-
return data
|
134
|
+
|
135
|
+
return data
|
@@ -23,7 +23,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
23
23
|
# Get the first form
|
24
24
|
response = s.get(
|
25
25
|
"https://my.northtyneside.gov.uk/category/81/bin-collection-dates",
|
26
|
-
verify
|
26
|
+
verify=False,
|
27
27
|
)
|
28
28
|
|
29
29
|
# Find the form ID and submit with a postcode
|
@@ -31,13 +31,13 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
31
31
|
form_build_id = soup.find("input", {"name": "form_build_id"})["value"]
|
32
32
|
response = s.post(
|
33
33
|
"https://my.northtyneside.gov.uk/category/81/bin-collection-dates",
|
34
|
-
data
|
34
|
+
data={
|
35
35
|
"postcode": user_postcode,
|
36
36
|
"op": "Find",
|
37
37
|
"form_build_id": form_build_id,
|
38
38
|
"form_id": "ntc_address_wizard",
|
39
39
|
},
|
40
|
-
verify
|
40
|
+
verify=False,
|
41
41
|
)
|
42
42
|
|
43
43
|
# Find the form ID and submit with the UPRN
|
@@ -45,18 +45,18 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
45
45
|
form_build_id = soup.find("input", {"name": "form_build_id"})["value"]
|
46
46
|
response = s.post(
|
47
47
|
"https://my.northtyneside.gov.uk/category/81/bin-collection-dates",
|
48
|
-
data
|
48
|
+
data={
|
49
49
|
"house_number": f"0000{user_uprn}",
|
50
50
|
"op": "Use",
|
51
51
|
"form_build_id": form_build_id,
|
52
52
|
"form_id": "ntc_address_wizard",
|
53
53
|
},
|
54
|
-
verify
|
54
|
+
verify=False,
|
55
55
|
)
|
56
56
|
|
57
57
|
# Parse form page and get the day of week and week offsets
|
58
58
|
soup = BeautifulSoup(response.text, features="html.parser")
|
59
|
-
info_section
|
59
|
+
info_section = soup.find("section", {"class": "block block-ntc-bins clearfix"})
|
60
60
|
|
61
61
|
regular_day, garden_day, special_day = None, None, None
|
62
62
|
# Get day of week and week label for refuse, garden and special collections.
|
@@ -82,7 +82,9 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
82
82
|
weeks_total = math.floor((datetime(2026, 4, 1) - datetime.now()).days / 7)
|
83
83
|
|
84
84
|
# The garden calendar only shows until end of November 2024, work out how many weeks that is
|
85
|
-
garden_weeks_total = math.floor(
|
85
|
+
garden_weeks_total = math.floor(
|
86
|
+
(datetime(2024, 12, 1) - datetime.now()).days / 7
|
87
|
+
)
|
86
88
|
|
87
89
|
regular_collections, garden_collections, special_collections = [], [], []
|
88
90
|
# Convert day text to series of dates using previous calculation
|
@@ -134,10 +136,10 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
134
136
|
|
135
137
|
return {
|
136
138
|
"bins": [
|
137
|
-
|
139
|
+
{
|
138
140
|
"type": item[0],
|
139
141
|
"collectionDate": item[1].strftime(date_format),
|
140
142
|
}
|
141
143
|
for item in sorted(collections, key=lambda x: x[1])
|
142
144
|
]
|
143
|
-
}
|
145
|
+
}
|
@@ -36,8 +36,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
36
36
|
collection_date = datetime.strptime(
|
37
37
|
remove_ordinal_indicator_from_date_string(
|
38
38
|
week_text[0].split(" - ")[0]
|
39
|
-
)
|
40
|
-
.strip(),
|
39
|
+
).strip(),
|
41
40
|
"%A %d %B",
|
42
41
|
)
|
43
42
|
next_collection = collection_date.replace(year=datetime.now().year)
|
@@ -17,7 +17,9 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
17
17
|
check_uprn(user_uprn)
|
18
18
|
|
19
19
|
response = requests.post(
|
20
|
-
"https://www.rotherham.gov.uk/bin-collections?address={}&submit=Submit".format(
|
20
|
+
"https://www.rotherham.gov.uk/bin-collections?address={}&submit=Submit".format(
|
21
|
+
user_uprn
|
22
|
+
)
|
21
23
|
)
|
22
24
|
# Make a BS4 object
|
23
25
|
soup = BeautifulSoup(response.text, features="html.parser")
|
@@ -25,15 +27,15 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
25
27
|
|
26
28
|
data = {"bins": []}
|
27
29
|
|
28
|
-
table = soup.select(
|
30
|
+
table = soup.select("table")[0]
|
29
31
|
|
30
32
|
if table:
|
31
|
-
rows = table.select(
|
33
|
+
rows = table.select("tr")
|
32
34
|
|
33
35
|
for index, row in enumerate(rows):
|
34
|
-
bin_info_cell = row.select(
|
35
|
-
if bin_info_cell:
|
36
|
-
bin_type = bin_info_cell[0].get_text(separator=
|
36
|
+
bin_info_cell = row.select("td")
|
37
|
+
if bin_info_cell:
|
38
|
+
bin_type = bin_info_cell[0].get_text(separator=" ", strip=True)
|
37
39
|
bin_collection = bin_info_cell[1]
|
38
40
|
|
39
41
|
if bin_collection:
|
@@ -0,0 +1,56 @@
|
|
1
|
+
import time
|
2
|
+
|
3
|
+
import requests
|
4
|
+
from bs4 import BeautifulSoup
|
5
|
+
|
6
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
7
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
8
|
+
|
9
|
+
|
10
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
11
|
+
class CouncilClass(AbstractGetBinDataClass):
|
12
|
+
"""
|
13
|
+
Concrete classes have to implement all abstract operations of the
|
14
|
+
base class. They can also override some operations with a default
|
15
|
+
implementation.
|
16
|
+
"""
|
17
|
+
|
18
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
19
|
+
|
20
|
+
user_uprn = kwargs.get("uprn")
|
21
|
+
check_uprn(user_uprn)
|
22
|
+
bindata = {"bins": []}
|
23
|
+
|
24
|
+
URI = f"https://www.swindon.gov.uk/info/20122/rubbish_and_recycling_collection_days?addressList={user_uprn}&uprnSubmit=Yes"
|
25
|
+
|
26
|
+
# Make the GET request
|
27
|
+
response = requests.get(URI)
|
28
|
+
|
29
|
+
# Parse the JSON response
|
30
|
+
soup = BeautifulSoup(response.text, "html.parser")
|
31
|
+
|
32
|
+
bin_collection_content = soup.find_all(
|
33
|
+
"div", {"class": "bin-collection-content"}
|
34
|
+
)
|
35
|
+
for content in bin_collection_content:
|
36
|
+
content_left = content.find("div", {"class": "content-left"})
|
37
|
+
content_right = content.find("div", {"class": "content-right"})
|
38
|
+
if content_left and content_right:
|
39
|
+
|
40
|
+
bin_types = content_left.find("h3").text.split(" and ")
|
41
|
+
for bin_type in bin_types:
|
42
|
+
|
43
|
+
collection_date = datetime.strptime(
|
44
|
+
content_right.find(
|
45
|
+
"span", {"class": "nextCollectionDate"}
|
46
|
+
).text,
|
47
|
+
"%A, %d %B %Y",
|
48
|
+
).strftime(date_format)
|
49
|
+
|
50
|
+
dict_data = {
|
51
|
+
"type": bin_type,
|
52
|
+
"collectionDate": collection_date,
|
53
|
+
}
|
54
|
+
bindata["bins"].append(dict_data)
|
55
|
+
|
56
|
+
return bindata
|
@@ -3,6 +3,7 @@ from uk_bin_collection.uk_bin_collection.common import *
|
|
3
3
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
4
|
from datetime import datetime
|
5
5
|
|
6
|
+
|
6
7
|
class CouncilClass(AbstractGetBinDataClass):
|
7
8
|
"""
|
8
9
|
Concrete class to scrape bin collection data.
|
@@ -25,23 +26,23 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
25
26
|
data = {"bins": []}
|
26
27
|
# Locate the section with bin collection data
|
27
28
|
sections = soup.find_all("div", {"class": "wil_c-content-section_heading"})
|
28
|
-
|
29
|
+
|
29
30
|
for s in sections:
|
30
31
|
if s.get_text(strip=True).lower() == "bin collections":
|
31
32
|
rows = s.find_next_sibling(
|
32
33
|
"div", {"class": "c-content-section_body"}
|
33
34
|
).find_all("div", class_="tablet:l-col-fb-4 u-mt-10")
|
34
|
-
|
35
|
+
|
35
36
|
for row in rows:
|
36
37
|
title_elem = row.find("div", class_="u-mb-4")
|
37
38
|
if title_elem:
|
38
39
|
title = title_elem.get_text(strip=True).capitalize()
|
39
|
-
|
40
|
+
|
40
41
|
# Find all collection info in the same section
|
41
42
|
collections = row.find_all("div", class_="u-mb-2")
|
42
43
|
for c in collections:
|
43
44
|
text = c.get_text(strip=True).lower()
|
44
|
-
|
45
|
+
|
45
46
|
if "next collection" in text:
|
46
47
|
date_text = text.replace("next collection - ", "")
|
47
48
|
try:
|
@@ -51,34 +52,43 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
51
52
|
|
52
53
|
dict_data = {
|
53
54
|
"type": title,
|
54
|
-
"collectionDate": next_collection_date
|
55
|
+
"collectionDate": next_collection_date,
|
55
56
|
}
|
56
57
|
data["bins"].append(dict_data)
|
57
58
|
except ValueError:
|
58
59
|
# Skip if the date isn't a valid date
|
59
60
|
print(f"Skipping invalid date: {date_text}")
|
60
|
-
|
61
|
+
|
61
62
|
# Get future collections
|
62
63
|
future_collections_section = row.find("ul", class_="u-mt-4")
|
63
64
|
if future_collections_section:
|
64
|
-
future_collections =
|
65
|
+
future_collections = (
|
66
|
+
future_collections_section.find_all("li")
|
67
|
+
)
|
65
68
|
for future_collection in future_collections:
|
66
|
-
future_date_text = future_collection.get_text(
|
69
|
+
future_date_text = future_collection.get_text(
|
70
|
+
strip=True
|
71
|
+
)
|
67
72
|
try:
|
68
73
|
future_collection_date = datetime.strptime(
|
69
74
|
future_date_text, "%A, %d %B %Y"
|
70
75
|
).strftime(date_format)
|
71
76
|
|
72
77
|
# Avoid duplicates of next collection date
|
73
|
-
if
|
78
|
+
if (
|
79
|
+
future_collection_date
|
80
|
+
!= next_collection_date
|
81
|
+
):
|
74
82
|
dict_data = {
|
75
83
|
"type": title,
|
76
|
-
"collectionDate": future_collection_date
|
84
|
+
"collectionDate": future_collection_date,
|
77
85
|
}
|
78
86
|
data["bins"].append(dict_data)
|
79
87
|
except ValueError:
|
80
88
|
# Skip if the future collection date isn't valid
|
81
|
-
print(
|
89
|
+
print(
|
90
|
+
f"Skipping invalid future date: {future_date_text}"
|
91
|
+
)
|
82
92
|
|
83
93
|
# Sort the collections by date
|
84
94
|
data["bins"].sort(
|