uk_bin_collection 0.102.0__py3-none-any.whl → 0.104.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- uk_bin_collection/tests/input.json +38 -1
- uk_bin_collection/uk_bin_collection/councils/AberdeenshireCouncil.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/BasildonCouncil.py +42 -39
- uk_bin_collection/uk_bin_collection/councils/BelfastCityCouncil.py +13 -8
- uk_bin_collection/uk_bin_collection/councils/BexleyCouncil.py +24 -21
- uk_bin_collection/uk_bin_collection/councils/BoltonCouncil.py +1 -1
- uk_bin_collection/uk_bin_collection/councils/CanterburyCityCouncil.py +54 -0
- uk_bin_collection/uk_bin_collection/councils/CheshireEastCouncil.py +25 -10
- uk_bin_collection/uk_bin_collection/councils/CornwallCouncil.py +21 -20
- uk_bin_collection/uk_bin_collection/councils/EnfieldCouncil.py +16 -18
- uk_bin_collection/uk_bin_collection/councils/GedlingBoroughCouncil.py +10 -4
- uk_bin_collection/uk_bin_collection/councils/IslingtonCouncil.py +6 -4
- uk_bin_collection/uk_bin_collection/councils/LutonBoroughCouncil.py +81 -0
- uk_bin_collection/uk_bin_collection/councils/MoleValleyDistrictCouncil.py +37 -20
- uk_bin_collection/uk_bin_collection/councils/NorthTynesideCouncil.py +11 -9
- uk_bin_collection/uk_bin_collection/councils/RochfordCouncil.py +1 -2
- uk_bin_collection/uk_bin_collection/councils/RotherhamCouncil.py +8 -6
- uk_bin_collection/uk_bin_collection/councils/SwindonBoroughCouncil.py +56 -0
- uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py +21 -11
- uk_bin_collection/uk_bin_collection/councils/WestOxfordshireDistrictCouncil.py +113 -0
- uk_bin_collection/uk_bin_collection/councils/WokinghamBoroughCouncil.py +1 -1
- {uk_bin_collection-0.102.0.dist-info → uk_bin_collection-0.104.0.dist-info}/METADATA +1 -1
- {uk_bin_collection-0.102.0.dist-info → uk_bin_collection-0.104.0.dist-info}/RECORD +26 -21
- {uk_bin_collection-0.102.0.dist-info → uk_bin_collection-0.104.0.dist-info}/LICENSE +0 -0
- {uk_bin_collection-0.102.0.dist-info → uk_bin_collection-0.104.0.dist-info}/WHEEL +0 -0
- {uk_bin_collection-0.102.0.dist-info → uk_bin_collection-0.104.0.dist-info}/entry_points.txt +0 -0
@@ -56,9 +56,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
56
56
|
postcode_input.send_keys(user_postcode)
|
57
57
|
|
58
58
|
find_address_button = WebDriverWait(driver, 10).until(
|
59
|
-
EC.presence_of_element_located(
|
60
|
-
(By.ID, 'submitButton0')
|
61
|
-
)
|
59
|
+
EC.presence_of_element_located((By.ID, "submitButton0"))
|
62
60
|
)
|
63
61
|
find_address_button.click()
|
64
62
|
|
@@ -80,7 +78,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
80
78
|
template_parts = first_option.split(", ")
|
81
79
|
template_parts[0] = user_paon # Replace the first part with user_paon
|
82
80
|
|
83
|
-
addr_label =
|
81
|
+
addr_label = ", ".join(template_parts)
|
84
82
|
for addr_option in select.options:
|
85
83
|
option_name = addr_option.accessible_name[0 : len(addr_label)]
|
86
84
|
if option_name == addr_label:
|
@@ -100,32 +98,27 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
100
98
|
# Find the div with the specified id
|
101
99
|
target_div = soup.find("div", {"id": target_div_id})
|
102
100
|
|
103
|
-
|
104
101
|
# Check if the div is found
|
105
102
|
if target_div:
|
106
103
|
bin_data = {"bins": []}
|
107
104
|
|
108
|
-
for bin_div in target_div.find_all(
|
109
|
-
"div"
|
110
|
-
):
|
105
|
+
for bin_div in target_div.find_all("div"):
|
111
106
|
# Extract the collection date from the message
|
112
107
|
try:
|
113
108
|
bin_collection_message = bin_div.find("p").text.strip()
|
114
109
|
date_pattern = r"\b\d{2}/\d{2}/\d{4}\b"
|
115
110
|
|
116
111
|
collection_date_string = (
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
112
|
+
re.search(date_pattern, bin_div.text)
|
113
|
+
.group(0)
|
114
|
+
.strip()
|
115
|
+
.replace(",", "")
|
116
|
+
)
|
122
117
|
except AttributeError:
|
123
118
|
continue
|
124
119
|
|
125
120
|
current_date = datetime.now()
|
126
|
-
parsed_date = datetime.strptime(
|
127
|
-
collection_date_string, "%d/%m/%Y"
|
128
|
-
)
|
121
|
+
parsed_date = datetime.strptime(collection_date_string, "%d/%m/%Y")
|
129
122
|
# Check if the parsed date is in the past and not today
|
130
123
|
if parsed_date.date() < current_date.date():
|
131
124
|
# If so, set the year to the next year
|
@@ -137,9 +130,14 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
137
130
|
contains_date(formatted_date)
|
138
131
|
|
139
132
|
# Extract the bin type from the message
|
140
|
-
bin_type_match = re.search(
|
133
|
+
bin_type_match = re.search(
|
134
|
+
r"Your next (.*?) collection", bin_collection_message
|
135
|
+
)
|
141
136
|
if bin_type_match:
|
142
|
-
bin_info = {
|
137
|
+
bin_info = {
|
138
|
+
"type": bin_type_match.group(1),
|
139
|
+
"collectionDate": formatted_date,
|
140
|
+
}
|
143
141
|
bin_data["bins"].append(bin_info)
|
144
142
|
else:
|
145
143
|
raise ValueError("Collection data not found.")
|
@@ -16,7 +16,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
16
16
|
def parse_data(self, page: str, **kwargs) -> dict:
|
17
17
|
data = {"bins": []}
|
18
18
|
collections = []
|
19
|
-
selected_collections = kwargs.get("paon").split(
|
19
|
+
selected_collections = kwargs.get("paon").split(",")
|
20
20
|
calendar_urls = []
|
21
21
|
run_date = datetime.now().date()
|
22
22
|
|
@@ -25,9 +25,13 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
25
25
|
for item in selected_collections:
|
26
26
|
item = item.strip().lower().replace(" ", "_")
|
27
27
|
if has_numbers(item):
|
28
|
-
calendar_urls.append(
|
28
|
+
calendar_urls.append(
|
29
|
+
f"https://www.gbcbincalendars.co.uk/json/gedling_borough_council_{item}_bin_schedule.json"
|
30
|
+
)
|
29
31
|
else:
|
30
|
-
calendar_urls.append(
|
32
|
+
calendar_urls.append(
|
33
|
+
f"https://www.gbcbincalendars.co.uk/json/gedling_borough_council_{item}_garden_bin_schedule.json"
|
34
|
+
)
|
31
35
|
|
32
36
|
# Parse each URL and load future data
|
33
37
|
for url in calendar_urls:
|
@@ -36,7 +40,9 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
36
40
|
raise ConnectionError(f"Could not get response from: {url}")
|
37
41
|
json_data = response.json()["collectionDates"]
|
38
42
|
for col in json_data:
|
39
|
-
bin_date = datetime.strptime(
|
43
|
+
bin_date = datetime.strptime(
|
44
|
+
col.get("collectionDate"), "%Y-%m-%d"
|
45
|
+
).date()
|
40
46
|
if bin_date >= run_date:
|
41
47
|
collections.append((col.get("alternativeName"), bin_date))
|
42
48
|
|
@@ -29,9 +29,11 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
29
29
|
waste_type = row.find("th").text.strip()
|
30
30
|
next_collection = parse(row.find("td").text.strip()).date()
|
31
31
|
|
32
|
-
data[
|
33
|
-
|
34
|
-
|
35
|
-
|
32
|
+
data["bins"].append(
|
33
|
+
{
|
34
|
+
"type": waste_type,
|
35
|
+
"collectionDate": next_collection.strftime(date_format),
|
36
|
+
}
|
37
|
+
)
|
36
38
|
|
37
39
|
return data
|
@@ -0,0 +1,81 @@
|
|
1
|
+
import time
|
2
|
+
|
3
|
+
import requests
|
4
|
+
from bs4 import BeautifulSoup
|
5
|
+
|
6
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
7
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
8
|
+
|
9
|
+
|
10
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
11
|
+
class CouncilClass(AbstractGetBinDataClass):
|
12
|
+
"""
|
13
|
+
Concrete classes have to implement all abstract operations of the
|
14
|
+
base class. They can also override some operations with a default
|
15
|
+
implementation.
|
16
|
+
"""
|
17
|
+
|
18
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
19
|
+
|
20
|
+
user_uprn = kwargs.get("uprn")
|
21
|
+
check_uprn(user_uprn)
|
22
|
+
bindata = {"bins": []}
|
23
|
+
|
24
|
+
SESSION_URL = "https://myforms.luton.gov.uk/authapi/isauthenticated?uri=https%253A%252F%252Fmyforms.luton.gov.uk%252Fservice%252FFind_my_bin_collection_date&hostname=myforms.luton.gov.uk&withCredentials=true"
|
25
|
+
|
26
|
+
API_URL = "https://myforms.luton.gov.uk/apibroker/runLookup"
|
27
|
+
|
28
|
+
data = {
|
29
|
+
"formValues": {
|
30
|
+
"Find my bin collection date": {
|
31
|
+
"id": {
|
32
|
+
"value": f"1-{user_uprn}",
|
33
|
+
},
|
34
|
+
},
|
35
|
+
}
|
36
|
+
}
|
37
|
+
|
38
|
+
headers = {
|
39
|
+
"Content-Type": "application/json",
|
40
|
+
"Accept": "application/json",
|
41
|
+
"User-Agent": "Mozilla/5.0",
|
42
|
+
"X-Requested-With": "XMLHttpRequest",
|
43
|
+
"Referer": "https://myforms.luton.gov.uk/fillform/?iframe_id=fillform-frame-1&db_id=",
|
44
|
+
}
|
45
|
+
s = requests.session()
|
46
|
+
r = s.get(SESSION_URL)
|
47
|
+
r.raise_for_status()
|
48
|
+
session_data = r.json()
|
49
|
+
sid = session_data["auth-session"]
|
50
|
+
params = {
|
51
|
+
"id": "65cb710f8d525",
|
52
|
+
"repeat_against": "",
|
53
|
+
"noRetry": "true",
|
54
|
+
"getOnlyTokens": "undefined",
|
55
|
+
"log_id": "",
|
56
|
+
"app_name": "AF-Renderer::Self",
|
57
|
+
# unix_timestamp
|
58
|
+
"_": str(int(time.time() * 1000)),
|
59
|
+
"sid": sid,
|
60
|
+
}
|
61
|
+
r = s.post(API_URL, json=data, headers=headers, params=params)
|
62
|
+
r.raise_for_status()
|
63
|
+
data = r.json()
|
64
|
+
rows_data = data["integration"]["transformed"]["rows_data"][f"{user_uprn}"]
|
65
|
+
|
66
|
+
soup = BeautifulSoup(rows_data["html"], features="html.parser")
|
67
|
+
soup.prettify()
|
68
|
+
for collection in soup.find_all("tr"):
|
69
|
+
tds = collection.find_all("td")
|
70
|
+
bin_type = tds[1].text
|
71
|
+
collection_date = datetime.strptime(
|
72
|
+
tds[0].text,
|
73
|
+
"%A %d %b %Y",
|
74
|
+
)
|
75
|
+
dict_data = {
|
76
|
+
"type": bin_type,
|
77
|
+
"collectionDate": collection_date.strftime(date_format),
|
78
|
+
}
|
79
|
+
bindata["bins"].append(dict_data)
|
80
|
+
|
81
|
+
return bindata
|
@@ -7,6 +7,7 @@ from bs4 import BeautifulSoup
|
|
7
7
|
from uk_bin_collection.uk_bin_collection.common import *
|
8
8
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
9
9
|
|
10
|
+
|
10
11
|
class CouncilClass(AbstractGetBinDataClass):
|
11
12
|
def parse_data(self, page: str, **kwargs) -> dict:
|
12
13
|
|
@@ -14,7 +15,9 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
14
15
|
check_postcode(user_postcode)
|
15
16
|
|
16
17
|
# Fetch the page content
|
17
|
-
root_url = "https://myproperty.molevalley.gov.uk/molevalley/api/live_addresses/{}?format=json".format(
|
18
|
+
root_url = "https://myproperty.molevalley.gov.uk/molevalley/api/live_addresses/{}?format=json".format(
|
19
|
+
user_postcode
|
20
|
+
)
|
18
21
|
response = requests.get(root_url, verify=False)
|
19
22
|
|
20
23
|
if not response.ok:
|
@@ -63,23 +66,27 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
63
66
|
if bins_panel:
|
64
67
|
panel = bins_panel.find_parent("div", class_="panel")
|
65
68
|
print("Found 'Bins and Recycling' panel.")
|
66
|
-
|
69
|
+
|
67
70
|
# Extract bin collection info from the un-commented HTML
|
68
71
|
for strong_tag in panel.find_all("strong"):
|
69
72
|
bin_type = strong_tag.text.strip()
|
70
73
|
collection_string = strong_tag.find_next("p").text.strip()
|
71
|
-
|
74
|
+
|
72
75
|
# Debugging output
|
73
76
|
print(f"Processing bin type: {bin_type}")
|
74
77
|
print(f"Collection string: {collection_string}")
|
75
|
-
|
78
|
+
|
76
79
|
match = regex_date.search(collection_string)
|
77
80
|
if match:
|
78
|
-
collection_date = datetime.strptime(
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
81
|
+
collection_date = datetime.strptime(
|
82
|
+
match.group(1), "%d/%m/%Y"
|
83
|
+
).date()
|
84
|
+
data["bins"].append(
|
85
|
+
{
|
86
|
+
"type": bin_type,
|
87
|
+
"collectionDate": collection_date.strftime("%d/%m/%Y"),
|
88
|
+
}
|
89
|
+
)
|
83
90
|
all_collection_dates.append(collection_date)
|
84
91
|
else:
|
85
92
|
# Add a debug line to show which collections are missing dates
|
@@ -88,7 +95,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
88
95
|
# Search for additional collections like electrical and textiles
|
89
96
|
for p in panel.find_all("p"):
|
90
97
|
additional_match = regex_additional_collection.match(p.text.strip())
|
91
|
-
|
98
|
+
|
92
99
|
# Debugging output for additional collections
|
93
100
|
if additional_match:
|
94
101
|
bin_type = additional_match.group(1)
|
@@ -96,23 +103,33 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
96
103
|
if "each collection day" in additional_match.group(2):
|
97
104
|
if all_collection_dates:
|
98
105
|
collection_date = min(all_collection_dates)
|
99
|
-
data["bins"].append(
|
100
|
-
|
101
|
-
|
102
|
-
|
106
|
+
data["bins"].append(
|
107
|
+
{
|
108
|
+
"type": bin_type,
|
109
|
+
"collectionDate": collection_date.strftime(
|
110
|
+
"%d/%m/%Y"
|
111
|
+
),
|
112
|
+
}
|
113
|
+
)
|
103
114
|
else:
|
104
|
-
print(
|
115
|
+
print(
|
116
|
+
"No collection dates available for additional collection."
|
117
|
+
)
|
105
118
|
raise ValueError("No valid bin collection dates found.")
|
106
119
|
else:
|
107
|
-
print(
|
120
|
+
print(
|
121
|
+
f"No additional collection found in paragraph: {p.text.strip()}"
|
122
|
+
)
|
108
123
|
else:
|
109
|
-
raise ValueError(
|
124
|
+
raise ValueError(
|
125
|
+
"Unable to find 'Bins and Recycling' panel in the HTML data."
|
126
|
+
)
|
110
127
|
|
111
128
|
# Debugging to check collected data
|
112
129
|
print(f"Collected bin data: {data}")
|
113
|
-
|
130
|
+
|
114
131
|
# Handle the case where no collection dates were found
|
115
132
|
if not all_collection_dates:
|
116
133
|
raise ValueError("No valid collection dates were found in the data.")
|
117
|
-
|
118
|
-
return data
|
134
|
+
|
135
|
+
return data
|
@@ -23,7 +23,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
23
23
|
# Get the first form
|
24
24
|
response = s.get(
|
25
25
|
"https://my.northtyneside.gov.uk/category/81/bin-collection-dates",
|
26
|
-
verify
|
26
|
+
verify=False,
|
27
27
|
)
|
28
28
|
|
29
29
|
# Find the form ID and submit with a postcode
|
@@ -31,13 +31,13 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
31
31
|
form_build_id = soup.find("input", {"name": "form_build_id"})["value"]
|
32
32
|
response = s.post(
|
33
33
|
"https://my.northtyneside.gov.uk/category/81/bin-collection-dates",
|
34
|
-
data
|
34
|
+
data={
|
35
35
|
"postcode": user_postcode,
|
36
36
|
"op": "Find",
|
37
37
|
"form_build_id": form_build_id,
|
38
38
|
"form_id": "ntc_address_wizard",
|
39
39
|
},
|
40
|
-
verify
|
40
|
+
verify=False,
|
41
41
|
)
|
42
42
|
|
43
43
|
# Find the form ID and submit with the UPRN
|
@@ -45,18 +45,18 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
45
45
|
form_build_id = soup.find("input", {"name": "form_build_id"})["value"]
|
46
46
|
response = s.post(
|
47
47
|
"https://my.northtyneside.gov.uk/category/81/bin-collection-dates",
|
48
|
-
data
|
48
|
+
data={
|
49
49
|
"house_number": f"0000{user_uprn}",
|
50
50
|
"op": "Use",
|
51
51
|
"form_build_id": form_build_id,
|
52
52
|
"form_id": "ntc_address_wizard",
|
53
53
|
},
|
54
|
-
verify
|
54
|
+
verify=False,
|
55
55
|
)
|
56
56
|
|
57
57
|
# Parse form page and get the day of week and week offsets
|
58
58
|
soup = BeautifulSoup(response.text, features="html.parser")
|
59
|
-
info_section
|
59
|
+
info_section = soup.find("section", {"class": "block block-ntc-bins clearfix"})
|
60
60
|
|
61
61
|
regular_day, garden_day, special_day = None, None, None
|
62
62
|
# Get day of week and week label for refuse, garden and special collections.
|
@@ -82,7 +82,9 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
82
82
|
weeks_total = math.floor((datetime(2026, 4, 1) - datetime.now()).days / 7)
|
83
83
|
|
84
84
|
# The garden calendar only shows until end of November 2024, work out how many weeks that is
|
85
|
-
garden_weeks_total = math.floor(
|
85
|
+
garden_weeks_total = math.floor(
|
86
|
+
(datetime(2024, 12, 1) - datetime.now()).days / 7
|
87
|
+
)
|
86
88
|
|
87
89
|
regular_collections, garden_collections, special_collections = [], [], []
|
88
90
|
# Convert day text to series of dates using previous calculation
|
@@ -134,10 +136,10 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
134
136
|
|
135
137
|
return {
|
136
138
|
"bins": [
|
137
|
-
|
139
|
+
{
|
138
140
|
"type": item[0],
|
139
141
|
"collectionDate": item[1].strftime(date_format),
|
140
142
|
}
|
141
143
|
for item in sorted(collections, key=lambda x: x[1])
|
142
144
|
]
|
143
|
-
}
|
145
|
+
}
|
@@ -36,8 +36,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
36
36
|
collection_date = datetime.strptime(
|
37
37
|
remove_ordinal_indicator_from_date_string(
|
38
38
|
week_text[0].split(" - ")[0]
|
39
|
-
)
|
40
|
-
.strip(),
|
39
|
+
).strip(),
|
41
40
|
"%A %d %B",
|
42
41
|
)
|
43
42
|
next_collection = collection_date.replace(year=datetime.now().year)
|
@@ -17,7 +17,9 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
17
17
|
check_uprn(user_uprn)
|
18
18
|
|
19
19
|
response = requests.post(
|
20
|
-
"https://www.rotherham.gov.uk/bin-collections?address={}&submit=Submit".format(
|
20
|
+
"https://www.rotherham.gov.uk/bin-collections?address={}&submit=Submit".format(
|
21
|
+
user_uprn
|
22
|
+
)
|
21
23
|
)
|
22
24
|
# Make a BS4 object
|
23
25
|
soup = BeautifulSoup(response.text, features="html.parser")
|
@@ -25,15 +27,15 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
25
27
|
|
26
28
|
data = {"bins": []}
|
27
29
|
|
28
|
-
table = soup.select(
|
30
|
+
table = soup.select("table")[0]
|
29
31
|
|
30
32
|
if table:
|
31
|
-
rows = table.select(
|
33
|
+
rows = table.select("tr")
|
32
34
|
|
33
35
|
for index, row in enumerate(rows):
|
34
|
-
bin_info_cell = row.select(
|
35
|
-
if bin_info_cell:
|
36
|
-
bin_type = bin_info_cell[0].get_text(separator=
|
36
|
+
bin_info_cell = row.select("td")
|
37
|
+
if bin_info_cell:
|
38
|
+
bin_type = bin_info_cell[0].get_text(separator=" ", strip=True)
|
37
39
|
bin_collection = bin_info_cell[1]
|
38
40
|
|
39
41
|
if bin_collection:
|
@@ -0,0 +1,56 @@
|
|
1
|
+
import time
|
2
|
+
|
3
|
+
import requests
|
4
|
+
from bs4 import BeautifulSoup
|
5
|
+
|
6
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
7
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
8
|
+
|
9
|
+
|
10
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
11
|
+
class CouncilClass(AbstractGetBinDataClass):
|
12
|
+
"""
|
13
|
+
Concrete classes have to implement all abstract operations of the
|
14
|
+
base class. They can also override some operations with a default
|
15
|
+
implementation.
|
16
|
+
"""
|
17
|
+
|
18
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
19
|
+
|
20
|
+
user_uprn = kwargs.get("uprn")
|
21
|
+
check_uprn(user_uprn)
|
22
|
+
bindata = {"bins": []}
|
23
|
+
|
24
|
+
URI = f"https://www.swindon.gov.uk/info/20122/rubbish_and_recycling_collection_days?addressList={user_uprn}&uprnSubmit=Yes"
|
25
|
+
|
26
|
+
# Make the GET request
|
27
|
+
response = requests.get(URI)
|
28
|
+
|
29
|
+
# Parse the JSON response
|
30
|
+
soup = BeautifulSoup(response.text, "html.parser")
|
31
|
+
|
32
|
+
bin_collection_content = soup.find_all(
|
33
|
+
"div", {"class": "bin-collection-content"}
|
34
|
+
)
|
35
|
+
for content in bin_collection_content:
|
36
|
+
content_left = content.find("div", {"class": "content-left"})
|
37
|
+
content_right = content.find("div", {"class": "content-right"})
|
38
|
+
if content_left and content_right:
|
39
|
+
|
40
|
+
bin_types = content_left.find("h3").text.split(" and ")
|
41
|
+
for bin_type in bin_types:
|
42
|
+
|
43
|
+
collection_date = datetime.strptime(
|
44
|
+
content_right.find(
|
45
|
+
"span", {"class": "nextCollectionDate"}
|
46
|
+
).text,
|
47
|
+
"%A, %d %B %Y",
|
48
|
+
).strftime(date_format)
|
49
|
+
|
50
|
+
dict_data = {
|
51
|
+
"type": bin_type,
|
52
|
+
"collectionDate": collection_date,
|
53
|
+
}
|
54
|
+
bindata["bins"].append(dict_data)
|
55
|
+
|
56
|
+
return bindata
|
@@ -3,6 +3,7 @@ from uk_bin_collection.uk_bin_collection.common import *
|
|
3
3
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
4
|
from datetime import datetime
|
5
5
|
|
6
|
+
|
6
7
|
class CouncilClass(AbstractGetBinDataClass):
|
7
8
|
"""
|
8
9
|
Concrete class to scrape bin collection data.
|
@@ -25,23 +26,23 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
25
26
|
data = {"bins": []}
|
26
27
|
# Locate the section with bin collection data
|
27
28
|
sections = soup.find_all("div", {"class": "wil_c-content-section_heading"})
|
28
|
-
|
29
|
+
|
29
30
|
for s in sections:
|
30
31
|
if s.get_text(strip=True).lower() == "bin collections":
|
31
32
|
rows = s.find_next_sibling(
|
32
33
|
"div", {"class": "c-content-section_body"}
|
33
34
|
).find_all("div", class_="tablet:l-col-fb-4 u-mt-10")
|
34
|
-
|
35
|
+
|
35
36
|
for row in rows:
|
36
37
|
title_elem = row.find("div", class_="u-mb-4")
|
37
38
|
if title_elem:
|
38
39
|
title = title_elem.get_text(strip=True).capitalize()
|
39
|
-
|
40
|
+
|
40
41
|
# Find all collection info in the same section
|
41
42
|
collections = row.find_all("div", class_="u-mb-2")
|
42
43
|
for c in collections:
|
43
44
|
text = c.get_text(strip=True).lower()
|
44
|
-
|
45
|
+
|
45
46
|
if "next collection" in text:
|
46
47
|
date_text = text.replace("next collection - ", "")
|
47
48
|
try:
|
@@ -51,34 +52,43 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
51
52
|
|
52
53
|
dict_data = {
|
53
54
|
"type": title,
|
54
|
-
"collectionDate": next_collection_date
|
55
|
+
"collectionDate": next_collection_date,
|
55
56
|
}
|
56
57
|
data["bins"].append(dict_data)
|
57
58
|
except ValueError:
|
58
59
|
# Skip if the date isn't a valid date
|
59
60
|
print(f"Skipping invalid date: {date_text}")
|
60
|
-
|
61
|
+
|
61
62
|
# Get future collections
|
62
63
|
future_collections_section = row.find("ul", class_="u-mt-4")
|
63
64
|
if future_collections_section:
|
64
|
-
future_collections =
|
65
|
+
future_collections = (
|
66
|
+
future_collections_section.find_all("li")
|
67
|
+
)
|
65
68
|
for future_collection in future_collections:
|
66
|
-
future_date_text = future_collection.get_text(
|
69
|
+
future_date_text = future_collection.get_text(
|
70
|
+
strip=True
|
71
|
+
)
|
67
72
|
try:
|
68
73
|
future_collection_date = datetime.strptime(
|
69
74
|
future_date_text, "%A, %d %B %Y"
|
70
75
|
).strftime(date_format)
|
71
76
|
|
72
77
|
# Avoid duplicates of next collection date
|
73
|
-
if
|
78
|
+
if (
|
79
|
+
future_collection_date
|
80
|
+
!= next_collection_date
|
81
|
+
):
|
74
82
|
dict_data = {
|
75
83
|
"type": title,
|
76
|
-
"collectionDate": future_collection_date
|
84
|
+
"collectionDate": future_collection_date,
|
77
85
|
}
|
78
86
|
data["bins"].append(dict_data)
|
79
87
|
except ValueError:
|
80
88
|
# Skip if the future collection date isn't valid
|
81
|
-
print(
|
89
|
+
print(
|
90
|
+
f"Skipping invalid future date: {future_date_text}"
|
91
|
+
)
|
82
92
|
|
83
93
|
# Sort the collections by date
|
84
94
|
data["bins"].sort(
|