uk_bin_collection 0.153.0__py3-none-any.whl → 0.157.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/tests/input.json +34 -25
- uk_bin_collection/uk_bin_collection/councils/AberdeenCityCouncil.py +0 -1
- uk_bin_collection/uk_bin_collection/councils/BCPCouncil.py +45 -120
- uk_bin_collection/uk_bin_collection/councils/BasingstokeCouncil.py +4 -1
- uk_bin_collection/uk_bin_collection/councils/BrightonandHoveCityCouncil.py +15 -36
- uk_bin_collection/uk_bin_collection/councils/CastlepointDistrictCouncil.py +55 -24
- uk_bin_collection/uk_bin_collection/councils/DacorumBoroughCouncil.py +22 -13
- uk_bin_collection/uk_bin_collection/councils/EastDunbartonshireCouncil.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/ErewashBoroughCouncil.py +32 -34
- uk_bin_collection/uk_bin_collection/councils/FarehamBoroughCouncil.py +5 -2
- uk_bin_collection/uk_bin_collection/councils/FolkstoneandHytheDistrictCouncil.py +22 -0
- uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py +1 -1
- uk_bin_collection/uk_bin_collection/councils/HartlepoolBoroughCouncil.py +3 -1
- uk_bin_collection/uk_bin_collection/councils/IslingtonCouncil.py +8 -5
- uk_bin_collection/uk_bin_collection/councils/LancasterCityCouncil.py +23 -10
- uk_bin_collection/uk_bin_collection/councils/MidSuffolkDistrictCouncil.py +70 -92
- uk_bin_collection/uk_bin_collection/councils/NewForestCouncil.py +104 -47
- uk_bin_collection/uk_bin_collection/councils/NewportCityCouncil.py +138 -21
- uk_bin_collection/uk_bin_collection/councils/NorthHertfordshireDistrictCouncil.py +26 -128
- uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py +245 -82
- uk_bin_collection/uk_bin_collection/councils/OxfordCityCouncil.py +1 -0
- uk_bin_collection/uk_bin_collection/councils/RenfrewshireCouncil.py +170 -13
- uk_bin_collection/uk_bin_collection/councils/RotherhamCouncil.py +70 -38
- uk_bin_collection/uk_bin_collection/councils/RushmoorCouncil.py +4 -2
- uk_bin_collection/uk_bin_collection/councils/SandwellBoroughCouncil.py +4 -11
- uk_bin_collection/uk_bin_collection/councils/SloughBoroughCouncil.py +39 -21
- uk_bin_collection/uk_bin_collection/councils/SomersetCouncil.py +136 -21
- uk_bin_collection/uk_bin_collection/councils/SouthGloucestershireCouncil.py +18 -22
- uk_bin_collection/uk_bin_collection/councils/TestValleyBoroughCouncil.py +138 -21
- uk_bin_collection/uk_bin_collection/councils/WestBerkshireCouncil.py +16 -13
- {uk_bin_collection-0.153.0.dist-info → uk_bin_collection-0.157.0.dist-info}/METADATA +1 -1
- {uk_bin_collection-0.153.0.dist-info → uk_bin_collection-0.157.0.dist-info}/RECORD +35 -34
- {uk_bin_collection-0.153.0.dist-info → uk_bin_collection-0.157.0.dist-info}/LICENSE +0 -0
- {uk_bin_collection-0.153.0.dist-info → uk_bin_collection-0.157.0.dist-info}/WHEEL +0 -0
- {uk_bin_collection-0.153.0.dist-info → uk_bin_collection-0.157.0.dist-info}/entry_points.txt +0 -0
@@ -1,4 +1,7 @@
|
|
1
|
+
import json
|
2
|
+
|
1
3
|
from bs4 import BeautifulSoup
|
4
|
+
|
2
5
|
from uk_bin_collection.uk_bin_collection.common import *
|
3
6
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
7
|
|
@@ -16,46 +19,41 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
16
19
|
uprn = kwargs.get("uprn")
|
17
20
|
check_uprn(uprn)
|
18
21
|
|
22
|
+
label_map = {
|
23
|
+
"domestic-waste-collection-service": "Household Waste",
|
24
|
+
"recycling-collection-service": "Recycling",
|
25
|
+
"garden-waste-collection-service": "Garden Waste",
|
26
|
+
}
|
27
|
+
|
19
28
|
requests.packages.urllib3.disable_warnings()
|
20
29
|
response = requests.get(
|
21
|
-
f"https://
|
30
|
+
f"https://www.erewash.gov.uk/bbd-whitespace/one-year-collection-dates-without-christmas?uprn={uprn}",
|
22
31
|
headers={"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64)"},
|
23
32
|
)
|
33
|
+
# Parse the JSON response
|
34
|
+
payload = response.json()
|
35
|
+
bin_collection = json.loads(payload) if isinstance(payload, str) else payload
|
24
36
|
|
25
|
-
|
26
|
-
|
27
|
-
|
37
|
+
cd = next(
|
38
|
+
i["settings"]["collection_dates"]
|
39
|
+
for i in bin_collection
|
40
|
+
if i.get("command") == "settings"
|
28
41
|
)
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
"([A-Za-z]+ \\d+[A-Za-z]+ [A-Za-z]+ \\d*)", collection_info
|
42
|
+
|
43
|
+
for month in cd.values():
|
44
|
+
for e in month:
|
45
|
+
d = e["date"] # "YYYY-MM-DD"
|
46
|
+
label = label_map.get(
|
47
|
+
e.get("service-identifier"),
|
48
|
+
e.get("service") or e.get("service-identifier"),
|
37
49
|
)
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
),
|
47
|
-
"collectionDate": collection_date,
|
48
|
-
}
|
49
|
-
data["bins"].append(dict_data)
|
50
|
-
if "garden waste" in collection_info.lower():
|
51
|
-
dict_data = {
|
52
|
-
"type": "Garden Waste",
|
53
|
-
"collectionDate": collection_date,
|
54
|
-
}
|
55
|
-
data["bins"].append(dict_data)
|
56
|
-
|
57
|
-
data["bins"].sort(
|
58
|
-
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
|
59
|
-
)
|
50
|
+
|
51
|
+
dict_data = {
|
52
|
+
"type": label,
|
53
|
+
"collectionDate": datetime.strptime(d, "%Y-%m-%d").strftime(
|
54
|
+
date_format
|
55
|
+
),
|
56
|
+
}
|
57
|
+
data["bins"].append(dict_data)
|
60
58
|
|
61
59
|
return data
|
@@ -38,11 +38,14 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
38
38
|
if "rows" in bin_data:
|
39
39
|
collection_str = bin_data["rows"][0]["DomesticBinDay"]
|
40
40
|
|
41
|
-
results = re.findall(r
|
41
|
+
results = re.findall(r'(\d{1,2}/\d{1,2}/\d{4}|today)\s*\(([^)]+)\)', collection_str)
|
42
42
|
|
43
43
|
if results:
|
44
44
|
for result in results:
|
45
|
-
|
45
|
+
if (result[0] == "today"):
|
46
|
+
collection_date = datetime.today()
|
47
|
+
else:
|
48
|
+
collection_date = datetime.strptime(result[0], "%d/%m/%Y")
|
46
49
|
dict_data = {
|
47
50
|
"type": result[1],
|
48
51
|
"collectionDate": collection_date.strftime(date_format),
|
@@ -74,6 +74,28 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
74
74
|
}
|
75
75
|
bindata["bins"].append(dict_data)
|
76
76
|
|
77
|
+
# Extract the Garden Waste schedule
|
78
|
+
garden_waste_section = soup.find(
|
79
|
+
"span", text=lambda x: x and "Garden Waste" in x
|
80
|
+
)
|
81
|
+
if garden_waste_section:
|
82
|
+
bin_types = garden_waste_section.text.replace("Garden Waste: ", "").split(
|
83
|
+
" / "
|
84
|
+
)
|
85
|
+
garden_waste_dates = garden_waste_section.find_next("ul").find_all("li")
|
86
|
+
for date in garden_waste_dates:
|
87
|
+
for bin_type in bin_types:
|
88
|
+
dict_data = {
|
89
|
+
"type": bin_type.strip(),
|
90
|
+
"collectionDate": datetime.strptime(
|
91
|
+
remove_ordinal_indicator_from_date_string(
|
92
|
+
date.text.strip()
|
93
|
+
),
|
94
|
+
"%A %d %B %Y",
|
95
|
+
).strftime("%d/%m/%Y"),
|
96
|
+
}
|
97
|
+
bindata["bins"].append(dict_data)
|
98
|
+
|
77
99
|
bindata["bins"].sort(
|
78
100
|
key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
|
79
101
|
)
|
@@ -18,7 +18,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
18
18
|
try:
|
19
19
|
user_uprn = kwargs.get("uprn")
|
20
20
|
check_uprn(user_uprn)
|
21
|
-
url = f"https://onlineservices.glasgow.gov.uk/forms/
|
21
|
+
url = f"https://onlineservices.glasgow.gov.uk/forms/refuseandrecyclingcalendar/CollectionsCalendar.aspx?UPRN={user_uprn}"
|
22
22
|
if not user_uprn:
|
23
23
|
# This is a fallback for if the user stored a URL in old system. Ensures backwards compatibility.
|
24
24
|
url = kwargs.get("url")
|
@@ -73,7 +73,9 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
73
73
|
for div in soup.find_all("div"):
|
74
74
|
# Extract bin type and date from the span tag
|
75
75
|
text = div.find("span").text.strip()
|
76
|
-
|
76
|
+
parts = text.split(" ")
|
77
|
+
date = parts[-1] # assume the last token is the date
|
78
|
+
bin_type = " ".join(parts[:-1])
|
77
79
|
dict_data = {
|
78
80
|
"type": bin_type,
|
79
81
|
"collectionDate": date,
|
@@ -17,11 +17,14 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
17
17
|
|
18
18
|
data = {"bins": []}
|
19
19
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
20
|
+
# Find the waste and recycling section with proper null checking
|
21
|
+
waste_section = soup.find(string="Waste and recycling collections")
|
22
|
+
waste_table = None
|
23
|
+
|
24
|
+
if waste_section:
|
25
|
+
toggle_content = waste_section.find_next("div", class_="m-toggle-content")
|
26
|
+
if toggle_content:
|
27
|
+
waste_table = toggle_content.find("table")
|
25
28
|
|
26
29
|
if waste_table:
|
27
30
|
rows = waste_table.find_all("tr")
|
@@ -57,17 +57,30 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
57
57
|
response = session.get(addr_link)
|
58
58
|
new_soup = BeautifulSoup(response.text, features="html.parser")
|
59
59
|
services = new_soup.find("section", {"id": "scheduled-collections"})
|
60
|
+
|
61
|
+
if services is None:
|
62
|
+
raise Exception("Could not find scheduled collections section on the page")
|
63
|
+
|
60
64
|
services_sub = services.find_all("li")
|
65
|
+
if not services_sub:
|
66
|
+
raise Exception("No collection services found")
|
67
|
+
|
61
68
|
for i in range(0, len(services_sub), 3):
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
69
|
+
if i + 2 < len(services_sub):
|
70
|
+
date_text = services_sub[i + 1].text.strip() if services_sub[i + 1] else None
|
71
|
+
if date_text:
|
72
|
+
try:
|
73
|
+
dt = datetime.strptime(date_text, "%d/%m/%Y").date()
|
74
|
+
bin_type_element = BeautifulSoup(services_sub[i + 2].text, features="lxml").find("p")
|
75
|
+
if bin_type_element and bin_type_element.text:
|
76
|
+
data["bins"].append(
|
77
|
+
{
|
78
|
+
"type": bin_type_element.text.strip().removesuffix(" Collection Service"),
|
79
|
+
"collectionDate": dt.strftime(date_format),
|
80
|
+
}
|
81
|
+
)
|
82
|
+
except (ValueError, AttributeError) as e:
|
83
|
+
# Skip invalid date or missing elements
|
84
|
+
continue
|
72
85
|
|
73
86
|
return data
|
@@ -1,6 +1,7 @@
|
|
1
1
|
import re
|
2
2
|
import time
|
3
3
|
|
4
|
+
import holidays
|
4
5
|
import requests
|
5
6
|
from bs4 import BeautifulSoup
|
6
7
|
from selenium.webdriver.common.by import By
|
@@ -50,58 +51,63 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
50
51
|
refuse_dates = get_dates_every_x_days(refusestartDate, 14, 28)
|
51
52
|
recycling_dates = get_dates_every_x_days(recyclingstartDate, 14, 28)
|
52
53
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
(
|
57
|
-
(
|
58
|
-
(
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
(
|
63
|
-
("
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
("09/05/2025", 1),
|
78
|
-
("26/05/2025", 1),
|
79
|
-
("27/05/2025", 1),
|
80
|
-
("28/05/2025", 1),
|
81
|
-
("29/05/2025", 1),
|
82
|
-
("30/05/2025", 1),
|
83
|
-
("25/08/2025", 1),
|
84
|
-
("26/08/2025", 1),
|
85
|
-
("27/08/2025", 1),
|
86
|
-
("28/08/2025", 1),
|
87
|
-
("29/08/2025", 1),
|
88
|
-
]
|
54
|
+
# Generate bank holidays dynamically using the holidays library
|
55
|
+
def get_bank_holidays_set():
|
56
|
+
"""Get set of bank holiday dates for quick lookup."""
|
57
|
+
current_year = datetime.now().year
|
58
|
+
uk_holidays = holidays.UK(years=range(current_year - 1, current_year + 3))
|
59
|
+
return set(uk_holidays.keys())
|
60
|
+
|
61
|
+
def find_next_collection_day(original_date):
|
62
|
+
"""Find the next valid collection day, avoiding weekends and bank holidays."""
|
63
|
+
bank_holiday_dates = get_bank_holidays_set()
|
64
|
+
check_date = datetime.strptime(original_date, "%d/%m/%Y")
|
65
|
+
|
66
|
+
# Safety limit to prevent infinite loops
|
67
|
+
max_attempts = 10
|
68
|
+
attempts = 0
|
69
|
+
|
70
|
+
# Keep moving forward until we find a valid collection day
|
71
|
+
while attempts < max_attempts:
|
72
|
+
attempts += 1
|
73
|
+
|
74
|
+
# Check if it's a weekend (Saturday=5, Sunday=6)
|
75
|
+
if check_date.weekday() >= 5:
|
76
|
+
check_date += timedelta(days=1)
|
77
|
+
continue
|
89
78
|
|
90
|
-
|
79
|
+
# Check if it's a bank holiday
|
80
|
+
if check_date.date() in bank_holiday_dates:
|
81
|
+
# Major holidays (Christmas/New Year) get bigger delays
|
82
|
+
holiday_name = str(holidays.UK().get(check_date.date(), ''))
|
83
|
+
is_major_holiday = (
|
84
|
+
'Christmas' in holiday_name or
|
85
|
+
'Boxing' in holiday_name or
|
86
|
+
'New Year' in holiday_name
|
87
|
+
)
|
88
|
+
delay_days = 2 if is_major_holiday else 1
|
89
|
+
check_date += timedelta(days=delay_days)
|
90
|
+
continue
|
91
|
+
|
92
|
+
# Found a valid collection day
|
93
|
+
break
|
94
|
+
|
95
|
+
# If we've exhausted attempts, return the original date as fallback
|
96
|
+
if attempts >= max_attempts:
|
97
|
+
return original_date
|
98
|
+
|
99
|
+
return check_date.strftime("%d/%m/%Y")
|
100
|
+
|
101
|
+
bank_holidays = [] # No longer needed - using smart date calculation
|
91
102
|
|
92
|
-
|
103
|
+
for refuseDate in refuse_dates:
|
104
|
+
# Calculate initial collection date
|
105
|
+
initial_date = (
|
93
106
|
datetime.strptime(refuseDate, "%d/%m/%Y") + timedelta(days=offset_days)
|
94
107
|
).strftime("%d/%m/%Y")
|
95
108
|
|
96
|
-
|
97
|
-
|
98
|
-
)
|
99
|
-
|
100
|
-
if holiday_offset > 0:
|
101
|
-
collection_date = (
|
102
|
-
datetime.strptime(collection_date, "%d/%m/%Y")
|
103
|
-
+ timedelta(days=holiday_offset)
|
104
|
-
).strftime("%d/%m/%Y")
|
109
|
+
# Find the next valid collection day (handles weekends + cascading holidays)
|
110
|
+
collection_date = find_next_collection_day(initial_date)
|
105
111
|
|
106
112
|
dict_data = {
|
107
113
|
"type": "Refuse Bin",
|
@@ -110,21 +116,14 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
110
116
|
bindata["bins"].append(dict_data)
|
111
117
|
|
112
118
|
for recyclingDate in recycling_dates:
|
113
|
-
|
114
|
-
|
119
|
+
# Calculate initial collection date
|
120
|
+
initial_date = (
|
115
121
|
datetime.strptime(recyclingDate, "%d/%m/%Y")
|
116
122
|
+ timedelta(days=offset_days)
|
117
123
|
).strftime("%d/%m/%Y")
|
118
124
|
|
119
|
-
|
120
|
-
|
121
|
-
)
|
122
|
-
|
123
|
-
if holiday_offset > 0:
|
124
|
-
collection_date = (
|
125
|
-
datetime.strptime(collection_date, "%d/%m/%Y")
|
126
|
-
+ timedelta(days=holiday_offset)
|
127
|
-
).strftime("%d/%m/%Y")
|
125
|
+
# Find the next valid collection day (handles weekends + cascading holidays)
|
126
|
+
collection_date = find_next_collection_day(initial_date)
|
128
127
|
|
129
128
|
dict_data = {
|
130
129
|
"type": "Recycling Bin",
|
@@ -140,48 +139,27 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
140
139
|
|
141
140
|
garden_dates = get_dates_every_x_days(gardenstartDate, 14, 28)
|
142
141
|
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
("31/12/2024", 1),
|
151
|
-
("01/01/2025", 1),
|
152
|
-
("02/01/2025", 1),
|
153
|
-
("03/01/2025", 1),
|
154
|
-
]
|
142
|
+
def is_christmas_period(date_obj):
|
143
|
+
"""Check if date is in Christmas/New Year skip period for garden collections."""
|
144
|
+
if date_obj.month == 12 and date_obj.day >= 23:
|
145
|
+
return True
|
146
|
+
if date_obj.month == 1 and date_obj.day <= 3:
|
147
|
+
return True
|
148
|
+
return False
|
155
149
|
|
156
150
|
for gardenDate in garden_dates:
|
157
|
-
|
158
|
-
|
151
|
+
# Calculate initial collection date
|
152
|
+
initial_date = (
|
159
153
|
datetime.strptime(gardenDate, "%d/%m/%Y")
|
160
154
|
+ timedelta(days=offset_days_garden)
|
161
|
-
).strftime("%d/%m/%Y")
|
162
|
-
|
163
|
-
garden_holiday = next(
|
164
|
-
(
|
165
|
-
value
|
166
|
-
for date, value in garden_bank_holidays
|
167
|
-
if date == collection_date
|
168
|
-
),
|
169
|
-
0,
|
170
155
|
)
|
171
156
|
|
172
|
-
|
157
|
+
# Skip garden collections during Christmas/New Year period
|
158
|
+
if is_christmas_period(initial_date):
|
173
159
|
continue
|
174
160
|
|
175
|
-
|
176
|
-
|
177
|
-
0,
|
178
|
-
)
|
179
|
-
|
180
|
-
if holiday_offset > 0:
|
181
|
-
collection_date = (
|
182
|
-
datetime.strptime(collection_date, "%d/%m/%Y")
|
183
|
-
+ timedelta(days=holiday_offset)
|
184
|
-
).strftime("%d/%m/%Y")
|
161
|
+
# Find the next valid collection day (handles weekends + holidays)
|
162
|
+
collection_date = find_next_collection_day(initial_date.strftime("%d/%m/%Y"))
|
185
163
|
|
186
164
|
dict_data = {
|
187
165
|
"type": "Garden Bin",
|
@@ -1,16 +1,14 @@
|
|
1
1
|
import logging
|
2
|
-
import pickle
|
3
2
|
import time
|
4
3
|
|
5
|
-
import requests
|
6
4
|
from bs4 import BeautifulSoup
|
7
5
|
from selenium import webdriver
|
6
|
+
from selenium.common.exceptions import NoSuchElementException
|
8
7
|
from selenium.webdriver.common.by import By
|
9
8
|
from selenium.webdriver.common.keys import Keys
|
10
9
|
from selenium.webdriver.support import expected_conditions as EC
|
11
10
|
from selenium.webdriver.support.ui import Select
|
12
11
|
from selenium.webdriver.support.wait import WebDriverWait
|
13
|
-
from uk_bin_collection.uk_bin_collection.common import *
|
14
12
|
|
15
13
|
from uk_bin_collection.uk_bin_collection.common import *
|
16
14
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
@@ -23,17 +21,64 @@ logging.basicConfig(
|
|
23
21
|
|
24
22
|
class CouncilClass(AbstractGetBinDataClass):
|
25
23
|
|
24
|
+
def get_legacy_bins(self, page: str) -> []:
|
25
|
+
|
26
|
+
logging.info("Extracting legacy bin collection data")
|
27
|
+
soup = BeautifulSoup(page, features="html.parser")
|
28
|
+
legacy_bins = []
|
29
|
+
|
30
|
+
# Rubbish and recycling
|
31
|
+
rubbish_recycling = soup.find(
|
32
|
+
"span", class_="CTID-77-_ eb-77-Override-textControl"
|
33
|
+
)
|
34
|
+
if rubbish_recycling:
|
35
|
+
match = re.search(r"collected weekly on (\w+)", rubbish_recycling.text)
|
36
|
+
if match:
|
37
|
+
day_name = match.group(1)
|
38
|
+
next_collection = get_next_day_of_week(day_name)
|
39
|
+
legacy_bins.append(
|
40
|
+
{
|
41
|
+
"type": "Rubbish and recycling",
|
42
|
+
"collectionDate": next_collection,
|
43
|
+
}
|
44
|
+
)
|
45
|
+
logging.info(f"Rubbish and Recycling: {str(next_collection)}")
|
46
|
+
|
47
|
+
# Glass collection
|
48
|
+
glass_collection = soup.find("span", class_="CTID-78-_ eb-78-textControl")
|
49
|
+
if glass_collection:
|
50
|
+
match = re.search(
|
51
|
+
r"next collection is\s+(\d{2}/\d{2}/\d{4})", glass_collection.text
|
52
|
+
)
|
53
|
+
if match:
|
54
|
+
legacy_bins.append(
|
55
|
+
{"type": "Glass collection", "collectionDate": match.group(1)}
|
56
|
+
)
|
57
|
+
logging.info(f"Glass: {str(match.group(1))}")
|
58
|
+
|
59
|
+
# Garden waste
|
60
|
+
garden_waste = soup.find("div", class_="eb-2HIpCnWC-Override-EditorInput")
|
61
|
+
if garden_waste:
|
62
|
+
match = re.search(r"(\d{2}/\d{2}/\d{4})", garden_waste.text)
|
63
|
+
if match:
|
64
|
+
legacy_bins.append(
|
65
|
+
{"type": "Garden waste", "collectionDate": match.group(1)}
|
66
|
+
)
|
67
|
+
logging.info(f"Garden: {str(match.group(1))}")
|
68
|
+
|
69
|
+
# return bins
|
70
|
+
return legacy_bins
|
71
|
+
|
26
72
|
def parse_data(self, page: str, **kwargs) -> dict:
|
27
73
|
driver = None
|
28
74
|
try:
|
29
|
-
|
30
|
-
collections = []
|
75
|
+
bins = []
|
31
76
|
user_uprn = kwargs.get("uprn")
|
32
77
|
user_postcode = kwargs.get("postcode")
|
33
78
|
web_driver = kwargs.get("web_driver")
|
34
79
|
headless = kwargs.get("headless")
|
35
80
|
check_postcode(user_postcode)
|
36
|
-
url = "https://forms.newforest.gov.uk/ufs/
|
81
|
+
url = "https://forms.newforest.gov.uk/ufs/FIND_MY_BIN_BAR.eb"
|
37
82
|
|
38
83
|
# Get session cookies using requests
|
39
84
|
|
@@ -52,10 +97,20 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
52
97
|
|
53
98
|
logging.info("Entering postcode")
|
54
99
|
input_element_postcode = wait.until(
|
55
|
-
EC.
|
100
|
+
EC.element_to_be_clickable(
|
101
|
+
(By.XPATH, '//input[@id="CTID-JmLqCKl2-_-A"]')
|
102
|
+
)
|
103
|
+
)
|
104
|
+
|
105
|
+
driver.execute_script(
|
106
|
+
"arguments[0].scrollIntoView();", input_element_postcode
|
56
107
|
)
|
57
108
|
|
58
|
-
|
109
|
+
logging.info(f"Entering postcode '{str(user_postcode)}'")
|
110
|
+
# Force the value through the DOM cos send_keys just don't work for some reason :(
|
111
|
+
driver.execute_script(
|
112
|
+
f"arguments[0].value='{str(user_postcode)}'", input_element_postcode
|
113
|
+
)
|
59
114
|
|
60
115
|
logging.info("Searching for postcode")
|
61
116
|
input_element_postcode_btn = wait.until(
|
@@ -66,7 +121,9 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
66
121
|
|
67
122
|
logging.info("Waiting for address dropdown")
|
68
123
|
input_element_postcode_dropdown = wait.until(
|
69
|
-
EC.
|
124
|
+
EC.element_to_be_clickable(
|
125
|
+
(By.XPATH, '//select[@id="CTID-KOeKcmrC-_-A"]')
|
126
|
+
)
|
70
127
|
)
|
71
128
|
|
72
129
|
logging.info("Selecting address")
|
@@ -86,51 +143,51 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
86
143
|
|
87
144
|
input_element_address_btn.click()
|
88
145
|
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
146
|
+
# Be patient, clicks take time!
|
147
|
+
time.sleep(2)
|
148
|
+
# logging.info(driver.page_source)
|
149
|
+
|
150
|
+
try:
|
151
|
+
link_element = driver.find_element(
|
152
|
+
By.XPATH,
|
153
|
+
'//a[contains(text(),"Find your current bin collection day")]',
|
154
|
+
)
|
155
|
+
logging.info(
|
156
|
+
"Found override panel span, search for link and use old logic"
|
93
157
|
)
|
94
|
-
)
|
95
158
|
|
96
|
-
|
97
|
-
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
98
|
-
bins = []
|
159
|
+
link_element.click()
|
99
160
|
|
100
|
-
|
101
|
-
|
102
|
-
"span", class_="CTID-77-_ eb-77-Override-textControl"
|
103
|
-
)
|
104
|
-
if rubbish_recycling:
|
105
|
-
match = re.search(r"collected weekly on (\w+)", rubbish_recycling.text)
|
106
|
-
if match:
|
107
|
-
day_name = match.group(1)
|
108
|
-
next_collection = get_next_day_of_week(day_name)
|
109
|
-
bins.append(
|
110
|
-
{
|
111
|
-
"type": "Rubbish and recycling",
|
112
|
-
"collectionDate": next_collection,
|
113
|
-
}
|
114
|
-
)
|
161
|
+
# Be patient, clicks take time!
|
162
|
+
time.sleep(2)
|
115
163
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
164
|
+
bins = self.get_legacy_bins(driver.page_source)
|
165
|
+
|
166
|
+
except NoSuchElementException:
|
167
|
+
logging.info("Waiting for bin collection table")
|
168
|
+
collections_table = wait.until(
|
169
|
+
EC.presence_of_element_located(
|
170
|
+
(
|
171
|
+
By.XPATH,
|
172
|
+
'//table[contains(@class,"eb-1j4UaesZ-tableContent")]',
|
173
|
+
)
|
125
174
|
)
|
175
|
+
)
|
126
176
|
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
177
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
178
|
+
rows = soup.find_all(class_="eb-1j4UaesZ-tableRow")
|
179
|
+
|
180
|
+
for row in rows:
|
181
|
+
cols = row.find_all("td")
|
182
|
+
date_string = cols[0].findChild("div").findChild("div").get_text()
|
183
|
+
bin_type = cols[1].findChild("div").findChild("div").get_text()
|
184
|
+
|
185
|
+
col_date = datetime.strptime(date_string, "%A %B %d, %Y")
|
132
186
|
bins.append(
|
133
|
-
{
|
187
|
+
{
|
188
|
+
"type": bin_type,
|
189
|
+
"collectionDate": datetime.strftime(col_date, date_format),
|
190
|
+
}
|
134
191
|
)
|
135
192
|
|
136
193
|
return {"bins": bins}
|