uk_bin_collection 0.152.11__py3-none-any.whl → 0.154.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/tests/input.json +16 -21
- uk_bin_collection/uk_bin_collection/councils/BCPCouncil.py +45 -120
- uk_bin_collection/uk_bin_collection/councils/BasingstokeCouncil.py +4 -1
- uk_bin_collection/uk_bin_collection/councils/BoltonCouncil.py +1 -1
- uk_bin_collection/uk_bin_collection/councils/BrightonandHoveCityCouncil.py +15 -36
- uk_bin_collection/uk_bin_collection/councils/BuckinghamshireCouncil.py +75 -100
- uk_bin_collection/uk_bin_collection/councils/CastlepointDistrictCouncil.py +55 -24
- uk_bin_collection/uk_bin_collection/councils/EastHertsCouncil.py +82 -24
- uk_bin_collection/uk_bin_collection/councils/ErewashBoroughCouncil.py +32 -34
- uk_bin_collection/uk_bin_collection/councils/FarehamBoroughCouncil.py +5 -2
- uk_bin_collection/uk_bin_collection/councils/FolkstoneandHytheDistrictCouncil.py +22 -0
- uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py +1 -1
- uk_bin_collection/uk_bin_collection/councils/HartlepoolBoroughCouncil.py +3 -1
- uk_bin_collection/uk_bin_collection/councils/HinckleyandBosworthBoroughCouncil.py +7 -1
- uk_bin_collection/uk_bin_collection/councils/IpswichBoroughCouncil.py +3 -1
- uk_bin_collection/uk_bin_collection/councils/LichfieldDistrictCouncil.py +7 -1
- uk_bin_collection/uk_bin_collection/councils/NorthEastLincs.py +17 -6
- uk_bin_collection/uk_bin_collection/councils/NorthHertfordshireDistrictCouncil.py +26 -128
- uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py +63 -79
- uk_bin_collection/uk_bin_collection/councils/NorwichCityCouncil.py +67 -66
- uk_bin_collection/uk_bin_collection/councils/NuneatonBedworthBoroughCouncil.py +19 -7
- uk_bin_collection/uk_bin_collection/councils/RunnymedeBoroughCouncil.py +7 -1
- uk_bin_collection/uk_bin_collection/councils/RushmoorCouncil.py +4 -2
- uk_bin_collection/uk_bin_collection/councils/SandwellBoroughCouncil.py +4 -11
- uk_bin_collection/uk_bin_collection/councils/SloughBoroughCouncil.py +39 -21
- uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py +4 -0
- uk_bin_collection/uk_bin_collection/councils/WestBerkshireCouncil.py +16 -13
- uk_bin_collection/uk_bin_collection/councils/WiltshireCouncil.py +47 -29
- {uk_bin_collection-0.152.11.dist-info → uk_bin_collection-0.154.0.dist-info}/METADATA +1 -1
- {uk_bin_collection-0.152.11.dist-info → uk_bin_collection-0.154.0.dist-info}/RECORD +33 -33
- {uk_bin_collection-0.152.11.dist-info → uk_bin_collection-0.154.0.dist-info}/LICENSE +0 -0
- {uk_bin_collection-0.152.11.dist-info → uk_bin_collection-0.154.0.dist-info}/WHEEL +0 -0
- {uk_bin_collection-0.152.11.dist-info → uk_bin_collection-0.154.0.dist-info}/entry_points.txt +0 -0
@@ -1,17 +1,17 @@
|
|
1
1
|
import time
|
2
|
+
import datetime
|
2
3
|
|
4
|
+
from datetime import datetime
|
3
5
|
from bs4 import BeautifulSoup
|
4
6
|
from selenium.common.exceptions import TimeoutException
|
5
7
|
from selenium.webdriver.common.by import By
|
8
|
+
from selenium.webdriver.common.keys import Keys
|
6
9
|
from selenium.webdriver.support import expected_conditions as EC
|
7
|
-
from selenium.webdriver.support.ui import WebDriverWait
|
10
|
+
from selenium.webdriver.support.ui import Select, WebDriverWait
|
8
11
|
|
9
12
|
from uk_bin_collection.uk_bin_collection.common import *
|
10
13
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
11
14
|
|
12
|
-
# import the wonderful Beautiful Soup and the URL grabber
|
13
|
-
|
14
|
-
|
15
15
|
class CouncilClass(AbstractGetBinDataClass):
|
16
16
|
"""
|
17
17
|
Concrete classes have to implement all abstract operations of the
|
@@ -30,16 +30,18 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
30
30
|
def parse_data(self, page: str, **kwargs) -> dict:
|
31
31
|
driver = None
|
32
32
|
try:
|
33
|
-
page = "https://
|
33
|
+
page = "https://bincollection.northumberland.gov.uk/postcode"
|
34
34
|
|
35
35
|
data = {"bins": []}
|
36
36
|
|
37
|
-
user_paon = kwargs.get("paon")
|
38
37
|
user_postcode = kwargs.get("postcode")
|
38
|
+
user_uprn = kwargs.get("uprn")
|
39
|
+
|
40
|
+
check_postcode(user_postcode)
|
41
|
+
check_uprn(user_uprn)
|
42
|
+
|
39
43
|
web_driver = kwargs.get("web_driver")
|
40
44
|
headless = kwargs.get("headless")
|
41
|
-
check_paon(user_paon)
|
42
|
-
check_postcode(user_postcode)
|
43
45
|
|
44
46
|
# Create Selenium webdriver
|
45
47
|
driver = create_webdriver(web_driver, headless, None, __name__)
|
@@ -50,105 +52,87 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
50
52
|
|
51
53
|
# Wait for and click cookie button
|
52
54
|
cookie_button = wait.until(
|
53
|
-
EC.element_to_be_clickable(
|
55
|
+
EC.element_to_be_clickable(
|
56
|
+
(By.CLASS_NAME, "accept-all")
|
57
|
+
)
|
54
58
|
)
|
55
59
|
cookie_button.click()
|
56
60
|
|
57
|
-
# Wait for and find
|
58
|
-
|
61
|
+
# Wait for and find postcode input
|
62
|
+
inputElement_pc = wait.until(
|
59
63
|
EC.presence_of_element_located(
|
60
|
-
(
|
61
|
-
By.ID,
|
62
|
-
"p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_txtHouse",
|
63
|
-
)
|
64
|
+
(By.ID, "postcode")
|
64
65
|
)
|
65
66
|
)
|
66
67
|
|
67
|
-
#
|
68
|
-
inputElement_pc
|
68
|
+
# Enter postcode and submit
|
69
|
+
inputElement_pc.send_keys(user_postcode)
|
70
|
+
inputElement_pc.send_keys(Keys.ENTER)
|
71
|
+
|
72
|
+
# Wait for and find house number input
|
73
|
+
selectElement_address = wait.until(
|
69
74
|
EC.presence_of_element_located(
|
70
|
-
(
|
71
|
-
By.ID,
|
72
|
-
"p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_txtPostcode",
|
73
|
-
)
|
75
|
+
(By.ID, "address")
|
74
76
|
)
|
75
77
|
)
|
76
78
|
|
77
|
-
|
78
|
-
|
79
|
-
inputElement_hn.send_keys(user_paon)
|
79
|
+
dropdown = Select(selectElement_address)
|
80
|
+
dropdown.select_by_value(user_uprn)
|
80
81
|
|
81
|
-
# Click
|
82
|
-
|
82
|
+
# Click submit button and wait for results
|
83
|
+
submit_button = wait.until(
|
83
84
|
EC.element_to_be_clickable(
|
84
|
-
(
|
85
|
-
By.ID,
|
86
|
-
"p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_butLookup",
|
87
|
-
)
|
85
|
+
(By.CLASS_NAME, "govuk-button")
|
88
86
|
)
|
89
87
|
)
|
90
|
-
|
88
|
+
submit_button.click()
|
91
89
|
|
92
90
|
# Wait for results to load
|
93
91
|
route_summary = wait.until(
|
94
92
|
EC.presence_of_element_located(
|
95
|
-
(
|
96
|
-
By.ID,
|
97
|
-
"p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_spanRouteSummary",
|
98
|
-
)
|
93
|
+
(By.CLASS_NAME, "govuk-table")
|
99
94
|
)
|
100
95
|
)
|
101
96
|
|
97
|
+
now = datetime.now()
|
98
|
+
current_month = now.month
|
99
|
+
current_year = now.year
|
100
|
+
|
102
101
|
# Get page source after everything has loaded
|
103
102
|
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
104
103
|
|
105
|
-
#
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
104
|
+
# From the table, find all rows:
|
105
|
+
# - cell 1 is the date in format eg. 9 September (so no year value 🥲)
|
106
|
+
# - cell 2 is the day name, not useful
|
107
|
+
# - cell 3 is the bin type eg. "General waste", "Recycling", "Garden waste"
|
108
|
+
rows = soup.find("tbody", class_="govuk-table__body").find_all("tr", class_="govuk-table__row")
|
109
|
+
|
110
|
+
for row in rows:
|
111
|
+
bin_type=row.find_all("td")[-1].text.strip()
|
112
|
+
|
113
|
+
collection_date_string = row.find('th').text.strip()
|
114
|
+
|
115
|
+
# sometimes but not always the day is written "22nd" instead of 22 so make sure we get a proper int
|
116
|
+
collection_date_day = "".join([i for i in list(collection_date_string.split(" ")[0]) if i.isdigit()])
|
117
|
+
collection_date_month_name = collection_date_string.split(" ")[1]
|
118
|
+
|
119
|
+
# if we are currently in Oct, Nov, or Dec and the collection month is Jan, Feb, or Mar, let's assume its next year
|
120
|
+
if (current_month >= 10) and (collection_date_month_name in ["January", "February", "March"]):
|
121
|
+
collection_date_year = current_year + 1
|
122
|
+
else:
|
123
|
+
collection_date_year = current_year
|
124
|
+
|
125
|
+
collection_date = time.strptime(
|
126
|
+
f"{collection_date_day} {collection_date_month_name} {collection_date_year}", "%d %B %Y"
|
115
127
|
)
|
116
|
-
)
|
117
128
|
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
bin_colour = self.extract_styles(style_str)["background-color"].upper()
|
125
|
-
bins_by_colours[bin_colour] = bin
|
126
|
-
|
127
|
-
# Work through the tables gathering the dates, if the cell has a background colour - match it to the bin type.
|
128
|
-
calander_tables = soup.find_all("table", title="Calendar")
|
129
|
-
for table in calander_tables:
|
130
|
-
# Get month and year
|
131
|
-
# First row in table is the header
|
132
|
-
rows = table.find_all("tr")
|
133
|
-
month_and_year = (
|
134
|
-
rows[0].find("table", class_="calCtrlTitle").find("td").string
|
129
|
+
# Add it to the data
|
130
|
+
data["bins"].append(
|
131
|
+
{
|
132
|
+
"type": bin_type,
|
133
|
+
"collectionDate": time.strftime(date_format, collection_date),
|
134
|
+
}
|
135
135
|
)
|
136
|
-
bin_days = table.find_all("td", class_="calCtrlDay")
|
137
|
-
for day in bin_days:
|
138
|
-
day_styles = self.extract_styles(day["style"])
|
139
|
-
if "background-color" in day_styles:
|
140
|
-
colour = day_styles["background-color"].upper()
|
141
|
-
date = time.strptime(
|
142
|
-
f"{day.string} {month_and_year}", "%d %B %Y"
|
143
|
-
)
|
144
|
-
|
145
|
-
# Add it to the data
|
146
|
-
data["bins"].append(
|
147
|
-
{
|
148
|
-
"type": bins_by_colours[colour],
|
149
|
-
"collectionDate": time.strftime(date_format, date),
|
150
|
-
}
|
151
|
-
)
|
152
136
|
except Exception as e:
|
153
137
|
# Here you can log the exception if needed
|
154
138
|
print(f"An error occurred: {e}")
|
@@ -1,5 +1,3 @@
|
|
1
|
-
import time
|
2
|
-
|
3
1
|
import requests
|
4
2
|
from bs4 import BeautifulSoup
|
5
3
|
|
@@ -17,76 +15,79 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
17
15
|
|
18
16
|
def parse_data(self, page: str, **kwargs) -> dict:
|
19
17
|
|
20
|
-
|
21
|
-
|
18
|
+
user_postcode = kwargs.get("postcode")
|
19
|
+
user_paon = kwargs.get("paon")
|
20
|
+
check_postcode(user_postcode)
|
21
|
+
check_paon(user_paon)
|
22
22
|
bindata = {"bins": []}
|
23
23
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
24
|
+
URI = "https://bnr-wrp.whitespacews.com/"
|
25
|
+
|
26
|
+
session = requests.Session()
|
27
|
+
|
28
|
+
# get link from first page as has some kind of unique hash
|
29
|
+
r = session.get(
|
30
|
+
URI,
|
31
|
+
)
|
32
|
+
r.raise_for_status()
|
33
|
+
soup = BeautifulSoup(r.text, features="html.parser")
|
34
|
+
|
35
|
+
alink = soup.find("a", text="View my collections")
|
36
|
+
|
37
|
+
if alink is None:
|
38
|
+
raise Exception("Initial page did not load correctly")
|
39
|
+
|
40
|
+
# greplace 'seq' query string to skip next step
|
41
|
+
nextpageurl = alink["href"].replace("seq=1", "seq=2")
|
42
|
+
|
43
|
+
data = {
|
44
|
+
"address_name_number": user_paon,
|
45
|
+
"address_postcode": user_postcode,
|
36
46
|
}
|
37
47
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
soup = BeautifulSoup(
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
for bin in bin_types:
|
71
|
-
collections.append(
|
72
|
-
(
|
73
|
-
bin.capitalize(),
|
74
|
-
datetime.strptime(strong[1].text.strip(), date_format),
|
75
|
-
)
|
76
|
-
)
|
77
|
-
i += 2
|
78
|
-
|
79
|
-
if len(strong) > 3:
|
80
|
-
collections.append(
|
81
|
-
("Garden", datetime.strptime(strong[4].text.strip(), date_format))
|
82
|
-
)
|
83
|
-
|
84
|
-
ordered_data = sorted(collections, key=lambda x: x[1])
|
85
|
-
for item in ordered_data:
|
48
|
+
# get list of addresses
|
49
|
+
r = session.post(nextpageurl, data)
|
50
|
+
r.raise_for_status()
|
51
|
+
|
52
|
+
soup = BeautifulSoup(r.text, features="html.parser")
|
53
|
+
|
54
|
+
# get first address (if you don't enter enough argument values this won't find the right address)
|
55
|
+
alink = soup.find("div", id="property_list").find("a")
|
56
|
+
|
57
|
+
if alink is None:
|
58
|
+
raise Exception("Address not found")
|
59
|
+
|
60
|
+
nextpageurl = URI + alink["href"]
|
61
|
+
|
62
|
+
# get collection page
|
63
|
+
r = session.get(
|
64
|
+
nextpageurl,
|
65
|
+
)
|
66
|
+
r.raise_for_status()
|
67
|
+
soup = BeautifulSoup(r.text, features="html.parser")
|
68
|
+
|
69
|
+
if soup.find("span", id="waste-hint"):
|
70
|
+
raise Exception("No scheduled services at this address")
|
71
|
+
|
72
|
+
u1s = soup.find("section", id="scheduled-collections").find_all("u1")
|
73
|
+
|
74
|
+
for u1 in u1s:
|
75
|
+
lis = u1.find_all("li", recursive=False)
|
76
|
+
|
77
|
+
date = lis[1].text.replace("\n", "")
|
78
|
+
bin_type = lis[2].text.replace("\n", "")
|
79
|
+
|
86
80
|
dict_data = {
|
87
|
-
"type":
|
88
|
-
"collectionDate":
|
81
|
+
"type": bin_type,
|
82
|
+
"collectionDate": datetime.strptime(
|
83
|
+
date,
|
84
|
+
"%d/%m/%Y",
|
85
|
+
).strftime(date_format),
|
89
86
|
}
|
90
87
|
bindata["bins"].append(dict_data)
|
91
88
|
|
89
|
+
bindata["bins"].sort(
|
90
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
|
91
|
+
)
|
92
|
+
|
92
93
|
return bindata
|
@@ -1,23 +1,29 @@
|
|
1
|
+
import re
|
2
|
+
import urllib.parse
|
3
|
+
|
4
|
+
import requests
|
1
5
|
from bs4 import BeautifulSoup
|
6
|
+
|
2
7
|
from uk_bin_collection.uk_bin_collection.common import *
|
3
8
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
9
|
|
5
|
-
from bs4 import BeautifulSoup
|
6
|
-
import urllib.parse
|
7
|
-
import requests
|
8
|
-
import re
|
9
|
-
|
10
10
|
|
11
11
|
class CouncilClass(AbstractGetBinDataClass):
|
12
12
|
def parse_data(self, page: str, **kwargs) -> dict:
|
13
13
|
|
14
14
|
data = {"bins": []}
|
15
15
|
|
16
|
+
headers = {
|
17
|
+
"Origin": "https://www.nuneatonandbedworth.gov.uk/",
|
18
|
+
"Referer": "https://www.nuneatonandbedworth.gov.uk/",
|
19
|
+
"User-Agent": "Mozilla/5.0",
|
20
|
+
}
|
21
|
+
|
16
22
|
street = urllib.parse.quote_plus(kwargs.get("paon"))
|
17
23
|
base_url = "https://www.nuneatonandbedworth.gov.uk/"
|
18
24
|
search_query = f"directory/search?directoryID=3&showInMap=&keywords={street}&search=Search+directory"
|
19
25
|
|
20
|
-
search_response = requests.get(base_url + search_query)
|
26
|
+
search_response = requests.get(base_url + search_query, headers=headers)
|
21
27
|
|
22
28
|
if search_response.status_code == 200:
|
23
29
|
soup = BeautifulSoup(search_response.content, "html.parser")
|
@@ -56,7 +62,13 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
56
62
|
|
57
63
|
def get_bin_data(self, url) -> dict:
|
58
64
|
|
59
|
-
|
65
|
+
headers = {
|
66
|
+
"Origin": "https://www.nuneatonandbedworth.gov.uk/",
|
67
|
+
"Referer": "https://www.nuneatonandbedworth.gov.uk/",
|
68
|
+
"User-Agent": "Mozilla/5.0",
|
69
|
+
}
|
70
|
+
|
71
|
+
bin_day_response = requests.get(url, headers=headers)
|
60
72
|
|
61
73
|
if bin_day_response.status_code == 200:
|
62
74
|
|
@@ -21,10 +21,16 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
21
21
|
check_uprn(user_uprn)
|
22
22
|
bindata = {"bins": []}
|
23
23
|
|
24
|
+
headers = {
|
25
|
+
"Origin": "https://www.runnymede.gov.uk",
|
26
|
+
"Referer": "https://www.runnymede.gov.uk",
|
27
|
+
"User-Agent": "Mozilla/5.0",
|
28
|
+
}
|
29
|
+
|
24
30
|
URI = f"https://www.runnymede.gov.uk/homepage/150/check-your-bin-collection-day?address={user_uprn}"
|
25
31
|
|
26
32
|
# Make the GET request
|
27
|
-
response = requests.get(URI)
|
33
|
+
response = requests.get(URI, headers=headers)
|
28
34
|
|
29
35
|
soup = BeautifulSoup(response.text, "html.parser")
|
30
36
|
|
@@ -1,7 +1,8 @@
|
|
1
1
|
from bs4 import BeautifulSoup
|
2
|
+
from lxml import etree
|
3
|
+
|
2
4
|
from uk_bin_collection.uk_bin_collection.common import *
|
3
5
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
|
-
from lxml import etree
|
5
6
|
|
6
7
|
|
7
8
|
# import the wonderful Beautiful Soup and the URL grabber
|
@@ -20,7 +21,8 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
20
21
|
collections = []
|
21
22
|
|
22
23
|
# Convert the XML to JSON and load the next collection data
|
23
|
-
result = soup.find("p").contents[0]
|
24
|
+
result = soup.find("p").contents[0]
|
25
|
+
|
24
26
|
json_data = json.loads(result)["NextCollection"]
|
25
27
|
|
26
28
|
# Get general waste data
|
@@ -28,17 +28,10 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
28
28
|
"Referer": "https://my.sandwell.gov.uk/fillform/?iframe_id=fillform-frame-1&db_id=",
|
29
29
|
}
|
30
30
|
LOOKUPS = [
|
31
|
-
(
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
"Recycling (Blue)",
|
36
|
-
"Household Waste (Grey)",
|
37
|
-
"Food Waste (Brown)",
|
38
|
-
"Batteries",
|
39
|
-
],
|
40
|
-
),
|
41
|
-
("56b1cdaf6bb43", "GWDate", ["Garden Waste (Green)"]),
|
31
|
+
("686295a88a750", "GWDate", ["Garden Waste (Green)"]),
|
32
|
+
("686294de50729", "DWDate", ["Household Waste (Grey)"]),
|
33
|
+
("6863a78a1dd8e", "FWDate", ["Food Waste (Brown)"]),
|
34
|
+
("68629dd642423", "MDRDate", ["Recycling (Blue)"]),
|
42
35
|
]
|
43
36
|
|
44
37
|
def parse_data(self, page: str, **kwargs) -> dict:
|
@@ -1,15 +1,18 @@
|
|
1
|
-
import time
|
2
1
|
import re
|
3
|
-
import
|
2
|
+
import time
|
4
3
|
from datetime import datetime
|
4
|
+
|
5
|
+
import requests
|
5
6
|
from bs4 import BeautifulSoup
|
6
7
|
from selenium.webdriver.common.by import By
|
7
8
|
from selenium.webdriver.common.keys import Keys
|
8
9
|
from selenium.webdriver.support import expected_conditions as EC
|
9
10
|
from selenium.webdriver.support.ui import WebDriverWait
|
11
|
+
|
10
12
|
from uk_bin_collection.uk_bin_collection.common import *
|
11
13
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
12
14
|
|
15
|
+
|
13
16
|
def get_street_from_postcode(postcode: str, api_key: str) -> str:
|
14
17
|
url = "https://maps.googleapis.com/maps/api/geocode/json"
|
15
18
|
params = {"address": postcode, "key": api_key}
|
@@ -25,6 +28,7 @@ def get_street_from_postcode(postcode: str, api_key: str) -> str:
|
|
25
28
|
|
26
29
|
raise ValueError("No street (route) found in the response.")
|
27
30
|
|
31
|
+
|
28
32
|
class CouncilClass(AbstractGetBinDataClass):
|
29
33
|
def parse_data(self, page: str, **kwargs) -> dict:
|
30
34
|
driver = None
|
@@ -37,10 +41,10 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
37
41
|
|
38
42
|
headless = kwargs.get("headless")
|
39
43
|
web_driver = kwargs.get("web_driver")
|
40
|
-
|
44
|
+
UserAgent = "Mozilla/5.0"
|
45
|
+
driver = create_webdriver(web_driver, headless, UserAgent, __name__)
|
41
46
|
page = "https://www.slough.gov.uk/bin-collections"
|
42
47
|
driver.get(page)
|
43
|
-
|
44
48
|
# Accept cookies
|
45
49
|
WebDriverWait(driver, 10).until(
|
46
50
|
EC.element_to_be_clickable((By.ID, "ccc-recommended-settings"))
|
@@ -50,14 +54,20 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
50
54
|
address_input = WebDriverWait(driver, 10).until(
|
51
55
|
EC.presence_of_element_located((By.ID, "keyword_directory25"))
|
52
56
|
)
|
53
|
-
user_address = get_street_from_postcode(
|
57
|
+
user_address = get_street_from_postcode(
|
58
|
+
user_postcode, "AIzaSyBDLULT7EIlNtHerswPtfmL15Tt3Oc0bV8"
|
59
|
+
)
|
54
60
|
address_input.send_keys(user_address + Keys.ENTER)
|
55
61
|
|
56
62
|
# Wait for address results to load
|
57
63
|
WebDriverWait(driver, 10).until(
|
58
|
-
EC.presence_of_all_elements_located(
|
64
|
+
EC.presence_of_all_elements_located(
|
65
|
+
(By.CSS_SELECTOR, "span.list__link-text")
|
66
|
+
)
|
67
|
+
)
|
68
|
+
span_elements = driver.find_elements(
|
69
|
+
By.CSS_SELECTOR, "span.list__link-text"
|
59
70
|
)
|
60
|
-
span_elements = driver.find_elements(By.CSS_SELECTOR, "span.list__link-text")
|
61
71
|
|
62
72
|
for span in span_elements:
|
63
73
|
if user_address.lower() in span.text.lower():
|
@@ -68,7 +78,9 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
68
78
|
|
69
79
|
# Wait for address detail page
|
70
80
|
WebDriverWait(driver, 10).until(
|
71
|
-
EC.presence_of_element_located(
|
81
|
+
EC.presence_of_element_located(
|
82
|
+
(By.CSS_SELECTOR, "section.site-content")
|
83
|
+
)
|
72
84
|
)
|
73
85
|
soup = BeautifulSoup(driver.page_source, "html.parser")
|
74
86
|
|
@@ -86,28 +98,33 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
86
98
|
bin_url = "https://www.slough.gov.uk" + bin_url
|
87
99
|
|
88
100
|
# Visit the child page
|
89
|
-
print(f"Navigating to {bin_url}")
|
101
|
+
# print(f"Navigating to {bin_url}")
|
90
102
|
driver.get(bin_url)
|
91
103
|
WebDriverWait(driver, 10).until(
|
92
|
-
EC.presence_of_element_located(
|
104
|
+
EC.presence_of_element_located(
|
105
|
+
(By.CSS_SELECTOR, "div.page-content")
|
106
|
+
)
|
93
107
|
)
|
94
108
|
child_soup = BeautifulSoup(driver.page_source, "html.parser")
|
95
109
|
|
96
110
|
editor_div = child_soup.find("div", class_="editor")
|
97
111
|
if not editor_div:
|
98
|
-
print("No editor div found on bin detail page.")
|
112
|
+
# print("No editor div found on bin detail page.")
|
99
113
|
continue
|
100
114
|
|
101
115
|
ul = editor_div.find("ul")
|
102
116
|
if not ul:
|
103
|
-
print("No <ul> with dates found in editor div.")
|
117
|
+
# print("No <ul> with dates found in editor div.")
|
104
118
|
continue
|
105
119
|
|
106
120
|
for li in ul.find_all("li"):
|
107
121
|
raw_text = li.get_text(strip=True).replace(".", "")
|
108
122
|
|
109
|
-
if
|
110
|
-
|
123
|
+
if (
|
124
|
+
"no collection" in raw_text.lower()
|
125
|
+
or "no collections" in raw_text.lower()
|
126
|
+
):
|
127
|
+
# print(f"Ignoring non-collection note: {raw_text}")
|
111
128
|
continue
|
112
129
|
|
113
130
|
raw_date = raw_text
|
@@ -117,19 +134,20 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
117
134
|
except ValueError:
|
118
135
|
raw_date_cleaned = raw_date.split("(")[0].strip()
|
119
136
|
try:
|
120
|
-
parsed_date = datetime.strptime(
|
137
|
+
parsed_date = datetime.strptime(
|
138
|
+
raw_date_cleaned, "%d %B %Y"
|
139
|
+
)
|
121
140
|
except Exception:
|
122
141
|
print(f"Could not parse date: {raw_text}")
|
123
142
|
continue
|
124
143
|
|
125
144
|
formatted_date = parsed_date.strftime("%d/%m/%Y")
|
126
145
|
contains_date(formatted_date)
|
127
|
-
bin_data["bins"].append(
|
128
|
-
"type": bin_type,
|
129
|
-
|
130
|
-
})
|
146
|
+
bin_data["bins"].append(
|
147
|
+
{"type": bin_type, "collectionDate": formatted_date}
|
148
|
+
)
|
131
149
|
|
132
|
-
print(f"Type: {bin_type}, Date: {formatted_date}")
|
150
|
+
# print(f"Type: {bin_type}, Date: {formatted_date}")
|
133
151
|
|
134
152
|
except Exception as e:
|
135
153
|
print(f"An error occurred: {e}")
|
@@ -137,4 +155,4 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
137
155
|
finally:
|
138
156
|
if driver:
|
139
157
|
driver.quit()
|
140
|
-
return bin_data
|
158
|
+
return bin_data
|
@@ -77,6 +77,10 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
77
77
|
)
|
78
78
|
submit.click()
|
79
79
|
|
80
|
+
WebDriverWait(driver, 10).until(
|
81
|
+
EC.presence_of_element_located((By.CLASS_NAME, "bin-collection__month"))
|
82
|
+
)
|
83
|
+
|
80
84
|
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
81
85
|
|
82
86
|
# Quit Selenium webdriver to release session
|