uk_bin_collection 0.152.0__py3-none-any.whl → 0.152.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/tests/input.json +92 -58
- uk_bin_collection/uk_bin_collection/councils/AdurAndWorthingCouncils.py +69 -24
- uk_bin_collection/uk_bin_collection/councils/BexleyCouncil.py +24 -47
- uk_bin_collection/uk_bin_collection/councils/CharnwoodBoroughCouncil.py +14 -3
- uk_bin_collection/uk_bin_collection/councils/CheltenhamBoroughCouncil.py +12 -12
- uk_bin_collection/uk_bin_collection/councils/CheshireEastCouncil.py +24 -3
- uk_bin_collection/uk_bin_collection/councils/ChorleyCouncil.py +4 -0
- uk_bin_collection/uk_bin_collection/councils/CroydonCouncil.py +114 -261
- uk_bin_collection/uk_bin_collection/councils/DartfordBoroughCouncil.py +13 -0
- uk_bin_collection/uk_bin_collection/councils/DoverDistrictCouncil.py +17 -2
- uk_bin_collection/uk_bin_collection/councils/EastDevonDC.py +14 -1
- uk_bin_collection/uk_bin_collection/councils/EastbourneBoroughCouncil.py +76 -0
- uk_bin_collection/uk_bin_collection/councils/EastleighBoroughCouncil.py +59 -45
- uk_bin_collection/uk_bin_collection/councils/EnvironmentFirst.py +2 -0
- uk_bin_collection/uk_bin_collection/councils/EppingForestDistrictCouncil.py +47 -15
- uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py +13 -1
- uk_bin_collection/uk_bin_collection/councils/GuildfordCouncil.py +2 -3
- uk_bin_collection/uk_bin_collection/councils/HerefordshireCouncil.py +13 -2
- uk_bin_collection/uk_bin_collection/councils/HuntingdonDistrictCouncil.py +18 -4
- uk_bin_collection/uk_bin_collection/councils/LewesDistrictCouncil.py +76 -0
- uk_bin_collection/uk_bin_collection/councils/LiverpoolCityCouncil.py +16 -4
- uk_bin_collection/uk_bin_collection/councils/MaidstoneBoroughCouncil.py +42 -47
- uk_bin_collection/uk_bin_collection/councils/NewhamCouncil.py +13 -6
- uk_bin_collection/uk_bin_collection/councils/NorthLincolnshireCouncil.py +2 -1
- uk_bin_collection/uk_bin_collection/councils/NorthSomersetCouncil.py +14 -9
- uk_bin_collection/uk_bin_collection/councils/NorthTynesideCouncil.py +2 -2
- uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py +50 -14
- uk_bin_collection/uk_bin_collection/councils/SouthRibbleCouncil.py +115 -65
- uk_bin_collection/uk_bin_collection/councils/StokeOnTrentCityCouncil.py +10 -5
- uk_bin_collection/uk_bin_collection/councils/TorbayCouncil.py +1 -3
- uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py +3 -0
- {uk_bin_collection-0.152.0.dist-info → uk_bin_collection-0.152.2.dist-info}/METADATA +179 -1
- {uk_bin_collection-0.152.0.dist-info → uk_bin_collection-0.152.2.dist-info}/RECORD +36 -34
- {uk_bin_collection-0.152.0.dist-info → uk_bin_collection-0.152.2.dist-info}/LICENSE +0 -0
- {uk_bin_collection-0.152.0.dist-info → uk_bin_collection-0.152.2.dist-info}/WHEEL +0 -0
- {uk_bin_collection-0.152.0.dist-info → uk_bin_collection-0.152.2.dist-info}/entry_points.txt +0 -0
@@ -1,4 +1,8 @@
|
|
1
1
|
from bs4 import BeautifulSoup
|
2
|
+
from selenium.webdriver.common.by import By
|
3
|
+
from selenium.webdriver.support import expected_conditions as EC
|
4
|
+
from selenium.webdriver.support.ui import Select, WebDriverWait
|
5
|
+
|
2
6
|
from uk_bin_collection.uk_bin_collection.common import *
|
3
7
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
8
|
|
@@ -12,59 +16,69 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
12
16
|
"""
|
13
17
|
|
14
18
|
def parse_data(self, page: str, **kwargs) -> dict:
|
15
|
-
|
16
|
-
|
17
|
-
|
19
|
+
try:
|
20
|
+
uprn = kwargs.get("uprn")
|
21
|
+
# Check the UPRN is valid
|
22
|
+
check_uprn(uprn)
|
23
|
+
headless = kwargs.get("headless")
|
24
|
+
web_driver = kwargs.get("web_driver")
|
25
|
+
url = f"https://www.eastleigh.gov.uk/waste-bins-and-recycling/collection-dates/your-waste-bin-and-recycling-collections?uprn={uprn}"
|
26
|
+
driver = create_webdriver(web_driver, headless, None, __name__)
|
27
|
+
driver.get(url)
|
18
28
|
|
19
|
-
|
20
|
-
|
29
|
+
wait = WebDriverWait(driver, 10)
|
30
|
+
bin_content = wait.until(
|
31
|
+
EC.presence_of_element_located((By.CLASS_NAME, "dl-horizontal"))
|
32
|
+
)
|
21
33
|
|
22
|
-
|
23
|
-
|
24
|
-
page = requests.get(url)
|
34
|
+
# Make a BS4 object from driver's page source
|
35
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
25
36
|
|
26
|
-
|
27
|
-
|
28
|
-
soup.prettify()
|
37
|
+
# Data to return
|
38
|
+
data = {"bins": []}
|
29
39
|
|
30
|
-
|
31
|
-
|
40
|
+
# Valid bin types
|
41
|
+
binTypes = [
|
42
|
+
"Household Waste Bin",
|
43
|
+
"Recycling Bin",
|
44
|
+
"Food Waste Bin",
|
45
|
+
"Glass Box and Batteries",
|
46
|
+
"Garden Waste Bin",
|
47
|
+
]
|
32
48
|
|
33
|
-
|
34
|
-
|
35
|
-
"Household Waste Bin",
|
36
|
-
"Recycling Bin",
|
37
|
-
"Food Waste Bin",
|
38
|
-
"Glass Box and Batteries",
|
39
|
-
"Garden Waste Bin",
|
40
|
-
]
|
49
|
+
# Value to create dict for DL values
|
50
|
+
keys, values = [], []
|
41
51
|
|
42
|
-
|
43
|
-
|
52
|
+
# Loop though DT and DD for DL containing bins
|
53
|
+
dl = soup.find("dl", {"class": "dl-horizontal"})
|
54
|
+
for dt in dl.find_all("dt"):
|
55
|
+
keys.append(dt.text.strip())
|
56
|
+
for dd in dl.find_all("dd"):
|
57
|
+
values.append(dd.text.strip())
|
44
58
|
|
45
|
-
|
46
|
-
|
47
|
-
for dt in dl.find_all("dt"):
|
48
|
-
keys.append(dt.text.strip())
|
49
|
-
for dd in dl.find_all("dd"):
|
50
|
-
values.append(dd.text.strip())
|
59
|
+
# Create dict for bin name and string dates
|
60
|
+
binDict = dict(zip(keys, values))
|
51
61
|
|
52
|
-
|
53
|
-
|
62
|
+
# Process dict for valid bin types
|
63
|
+
for bin in list(binDict):
|
64
|
+
if bin in binTypes:
|
65
|
+
if not binDict[bin].startswith("You haven't yet signed up for"):
|
66
|
+
# Convert date
|
67
|
+
date = datetime.strptime(binDict[bin], "%a, %d %b %Y")
|
54
68
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
69
|
+
# Set bin data
|
70
|
+
dict_data = {
|
71
|
+
"type": bin,
|
72
|
+
"collectionDate": date.strftime(date_format),
|
73
|
+
}
|
74
|
+
data["bins"].append(dict_data)
|
61
75
|
|
62
|
-
|
63
|
-
|
64
|
-
"type": bin,
|
65
|
-
"collectionDate": date.strftime(date_format),
|
66
|
-
}
|
67
|
-
data["bins"].append(dict_data)
|
76
|
+
# Return bin data
|
77
|
+
return data
|
68
78
|
|
69
|
-
|
70
|
-
|
79
|
+
except Exception as e:
|
80
|
+
print(f"Error fetching/parsing data: {str(e)}")
|
81
|
+
return {"bins": [{"type": "Error", "collectionDate": "2024-01-01"}]}
|
82
|
+
finally:
|
83
|
+
if "driver" in locals():
|
84
|
+
driver.quit()
|
@@ -1,13 +1,15 @@
|
|
1
|
+
from datetime import datetime
|
2
|
+
|
1
3
|
from bs4 import BeautifulSoup
|
2
|
-
from uk_bin_collection.uk_bin_collection.common import *
|
3
|
-
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
4
|
from selenium import webdriver
|
5
|
-
from selenium.webdriver.common.keys import Keys
|
6
5
|
from selenium.webdriver.common.by import By
|
7
|
-
from selenium.webdriver.
|
6
|
+
from selenium.webdriver.common.keys import Keys
|
8
7
|
from selenium.webdriver.support import expected_conditions as EC
|
9
|
-
from
|
8
|
+
from selenium.webdriver.support.ui import WebDriverWait
|
9
|
+
|
10
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
10
11
|
from uk_bin_collection.uk_bin_collection.common import date_format
|
12
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
11
13
|
|
12
14
|
|
13
15
|
class CouncilClass(AbstractGetBinDataClass):
|
@@ -15,27 +17,57 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
15
17
|
postcode = kwargs.get("postcode", "")
|
16
18
|
web_driver = kwargs.get("web_driver")
|
17
19
|
headless = kwargs.get("headless")
|
18
|
-
|
19
|
-
options = webdriver.ChromeOptions()
|
20
|
-
if headless:
|
21
|
-
options.add_argument("--headless")
|
22
|
-
driver = create_webdriver(web_driver, headless)
|
20
|
+
data = {"bins": []}
|
23
21
|
|
24
22
|
try:
|
25
|
-
|
26
|
-
|
23
|
+
# Initialize webdriver with logging
|
24
|
+
print(f"Initializing webdriver with: {web_driver}, headless: {headless}")
|
25
|
+
driver = create_webdriver(web_driver, headless, None, __name__)
|
26
|
+
|
27
|
+
# Format and load URL
|
28
|
+
page_url = f"https://eppingforestdc.maps.arcgis.com/apps/instant/lookup/index.html?appid=bfca32b46e2a47cd9c0a84f2d8cdde17&find={postcode}"
|
29
|
+
print(f"Accessing URL: {page_url}")
|
30
|
+
driver.get(page_url)
|
31
|
+
|
32
|
+
# Wait for initial page load
|
33
|
+
wait = WebDriverWait(driver, 20) # Reduced timeout to fail faster if issues
|
34
|
+
|
35
|
+
# First wait for any loading indicators to disappear
|
36
|
+
try:
|
37
|
+
print("Waiting for loading spinner to disappear...")
|
38
|
+
wait.until(
|
39
|
+
EC.invisibility_of_element_located(
|
40
|
+
(By.CSS_SELECTOR, ".esri-widget--loader-container")
|
41
|
+
)
|
42
|
+
)
|
43
|
+
except Exception as e:
|
44
|
+
print(f"Loading spinner wait failed (may be normal): {str(e)}")
|
45
|
+
|
46
|
+
# Then wait for the content container
|
47
|
+
print("Waiting for content container...")
|
48
|
+
wait.until(
|
49
|
+
EC.presence_of_element_located(
|
50
|
+
(By.CSS_SELECTOR, ".esri-feature-content")
|
51
|
+
)
|
27
52
|
)
|
28
|
-
|
29
|
-
|
53
|
+
|
54
|
+
# Finally wait for actual content
|
55
|
+
print("Waiting for content to be visible...")
|
56
|
+
content = wait.until(
|
30
57
|
EC.visibility_of_element_located(
|
31
58
|
(By.CSS_SELECTOR, ".esri-feature-content")
|
32
59
|
)
|
33
60
|
)
|
61
|
+
|
62
|
+
# Check if content is actually present
|
63
|
+
if not content:
|
64
|
+
raise ValueError("Content element found but empty")
|
65
|
+
|
66
|
+
print("Content found, getting page source...")
|
34
67
|
html_content = driver.page_source
|
35
68
|
|
36
69
|
soup = BeautifulSoup(html_content, "html.parser")
|
37
70
|
bin_info_divs = soup.select(".esri-feature-content p")
|
38
|
-
data = {"bins": []}
|
39
71
|
for div in bin_info_divs:
|
40
72
|
if "collection day is" in div.text:
|
41
73
|
bin_type, date_str = div.text.split(" collection day is ")
|
@@ -14,7 +14,19 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
14
14
|
"""
|
15
15
|
|
16
16
|
def parse_data(self, page: str, **kwargs) -> dict:
|
17
|
-
|
17
|
+
|
18
|
+
try:
|
19
|
+
user_uprn = kwargs.get("uprn")
|
20
|
+
check_uprn(user_uprn)
|
21
|
+
url = f"https://onlineservices.glasgow.gov.uk/forms/RefuseAndRecyclingWebApplication/CollectionsCalendar.aspx?UPRN={user_uprn}"
|
22
|
+
if not user_uprn:
|
23
|
+
# This is a fallback for if the user stored a URL in old system. Ensures backwards compatibility.
|
24
|
+
url = kwargs.get("url")
|
25
|
+
except Exception as e:
|
26
|
+
raise ValueError(f"Error getting identifier: {str(e)}")
|
27
|
+
|
28
|
+
# Make a BS4 object
|
29
|
+
page = requests.get(url, verify=False)
|
18
30
|
soup = BeautifulSoup(page.text, features="html.parser")
|
19
31
|
soup.prettify()
|
20
32
|
|
@@ -27,9 +27,8 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
27
27
|
def parse_data(self, page: str, **kwargs) -> dict:
|
28
28
|
driver = None
|
29
29
|
try:
|
30
|
-
uprn = kwargs.get("uprn")
|
31
30
|
postcode = kwargs.get("postcode")
|
32
|
-
|
31
|
+
house_number = kwargs.get("paon")
|
33
32
|
|
34
33
|
url = "https://my.guildford.gov.uk/customers/s/view-bin-collections"
|
35
34
|
|
@@ -60,7 +59,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
60
59
|
EC.presence_of_element_located(
|
61
60
|
(
|
62
61
|
By.XPATH,
|
63
|
-
f"//lightning-base-formatted-text[contains(text(), '{
|
62
|
+
f"//lightning-base-formatted-text[contains(text(), '{house_number}')]",
|
64
63
|
)
|
65
64
|
)
|
66
65
|
)
|
@@ -15,9 +15,20 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
15
15
|
"""
|
16
16
|
|
17
17
|
def parse_data(self, page: str, **kwargs) -> dict:
|
18
|
+
try:
|
19
|
+
user_uprn = kwargs.get("uprn")
|
20
|
+
check_uprn(user_uprn)
|
21
|
+
url = f"https://www.herefordshire.gov.uk/rubbish-recycling/check-bin-collection-day?blpu_uprn={user_uprn}"
|
22
|
+
if not user_uprn:
|
23
|
+
# This is a fallback for if the user stored a URL in old system. Ensures backwards compatibility.
|
24
|
+
url = kwargs.get("url")
|
25
|
+
except Exception as e:
|
26
|
+
raise ValueError(f"Error getting identifier: {str(e)}")
|
27
|
+
|
18
28
|
# Make a BS4 object
|
19
|
-
|
20
|
-
soup.
|
29
|
+
page = requests.get(url)
|
30
|
+
soup = BeautifulSoup(page.text, "html.parser")
|
31
|
+
soup.prettify
|
21
32
|
|
22
33
|
data = {"bins": []}
|
23
34
|
|
@@ -2,10 +2,12 @@
|
|
2
2
|
|
3
3
|
# This script pulls (in one hit) the data from
|
4
4
|
# Huntingdon District Council District Council Bins Data
|
5
|
+
from datetime import datetime
|
6
|
+
|
5
7
|
from bs4 import BeautifulSoup
|
8
|
+
|
9
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
6
10
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
7
|
-
from uk_bin_collection.uk_bin_collection.common import date_format
|
8
|
-
from datetime import datetime
|
9
11
|
|
10
12
|
|
11
13
|
# import the wonderful Beautiful Soup and the URL grabber
|
@@ -17,9 +19,21 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
17
19
|
"""
|
18
20
|
|
19
21
|
def parse_data(self, page, **kwargs) -> None:
|
22
|
+
|
23
|
+
try:
|
24
|
+
user_uprn = kwargs.get("uprn")
|
25
|
+
check_uprn(user_uprn)
|
26
|
+
url = f"http://www.huntingdonshire.gov.uk/refuse-calendar/{user_uprn}"
|
27
|
+
if not user_uprn:
|
28
|
+
# This is a fallback for if the user stored a URL in old system. Ensures backwards compatibility.
|
29
|
+
url = kwargs.get("url")
|
30
|
+
except Exception as e:
|
31
|
+
raise ValueError(f"Error getting identifier: {str(e)}")
|
32
|
+
|
20
33
|
# Make a BS4 object
|
21
|
-
|
22
|
-
soup.
|
34
|
+
page = requests.get(url)
|
35
|
+
soup = BeautifulSoup(page.text, "html.parser")
|
36
|
+
soup.prettify
|
23
37
|
|
24
38
|
data = {"bins": []}
|
25
39
|
|
@@ -0,0 +1,76 @@
|
|
1
|
+
# Eastbourne uses the same script.
|
2
|
+
|
3
|
+
from bs4 import BeautifulSoup
|
4
|
+
|
5
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
6
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
7
|
+
|
8
|
+
|
9
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
10
|
+
class CouncilClass(AbstractGetBinDataClass):
|
11
|
+
"""
|
12
|
+
Concrete classes have to implement all abstract operations of the
|
13
|
+
base class. They can also override some operations with a default
|
14
|
+
implementation.
|
15
|
+
"""
|
16
|
+
|
17
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
18
|
+
|
19
|
+
try:
|
20
|
+
user_uprn = kwargs.get("uprn")
|
21
|
+
check_uprn(user_uprn)
|
22
|
+
url = f"https://environmentfirst.co.uk/house.php?uprn={user_uprn}"
|
23
|
+
if not user_uprn:
|
24
|
+
# This is a fallback for if the user stored a URL in old system. Ensures backwards compatibility.
|
25
|
+
url = kwargs.get("url")
|
26
|
+
except Exception as e:
|
27
|
+
raise ValueError(f"Error getting identifier: {str(e)}")
|
28
|
+
|
29
|
+
# Make a BS4 object
|
30
|
+
page = requests.get(url)
|
31
|
+
soup = BeautifulSoup(page.text, features="html.parser")
|
32
|
+
soup.prettify()
|
33
|
+
|
34
|
+
# Get the paragraph lines from the page
|
35
|
+
data = {"bins": []}
|
36
|
+
page_text = soup.find("div", {"class": "collect"}).find_all("p")
|
37
|
+
|
38
|
+
# Parse the correct lines (find them, remove the ordinal indicator and make them the correct format date) and
|
39
|
+
# then add them to the dictionary
|
40
|
+
rubbish_day = datetime.strptime(
|
41
|
+
remove_ordinal_indicator_from_date_string(
|
42
|
+
page_text[2].find_next("strong").text
|
43
|
+
),
|
44
|
+
"%d %B %Y",
|
45
|
+
).strftime(date_format)
|
46
|
+
dict_data = {
|
47
|
+
"type": "Rubbish",
|
48
|
+
"collectionDate": rubbish_day,
|
49
|
+
}
|
50
|
+
data["bins"].append(dict_data)
|
51
|
+
recycling_day = datetime.strptime(
|
52
|
+
remove_ordinal_indicator_from_date_string(
|
53
|
+
page_text[4].find_next("strong").text
|
54
|
+
),
|
55
|
+
"%d %B %Y",
|
56
|
+
).strftime(date_format)
|
57
|
+
dict_data = {
|
58
|
+
"type": "Recycling",
|
59
|
+
"collectionDate": recycling_day,
|
60
|
+
}
|
61
|
+
data["bins"].append(dict_data)
|
62
|
+
|
63
|
+
if len(page_text) > 5:
|
64
|
+
garden_day = datetime.strptime(
|
65
|
+
remove_ordinal_indicator_from_date_string(
|
66
|
+
page_text[6].find_next("strong").text
|
67
|
+
),
|
68
|
+
"%d %B %Y",
|
69
|
+
).strftime(date_format)
|
70
|
+
dict_data = {
|
71
|
+
"type": "Garden",
|
72
|
+
"collectionDate": garden_day,
|
73
|
+
}
|
74
|
+
data["bins"].append(dict_data)
|
75
|
+
|
76
|
+
return data
|
@@ -1,7 +1,8 @@
|
|
1
1
|
from bs4 import BeautifulSoup
|
2
|
+
from dateutil.relativedelta import relativedelta
|
3
|
+
|
2
4
|
from uk_bin_collection.uk_bin_collection.common import *
|
3
5
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
|
-
from dateutil.relativedelta import relativedelta
|
5
6
|
|
6
7
|
|
7
8
|
# import the wonderful Beautiful Soup and the URL grabber
|
@@ -18,9 +19,20 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
18
19
|
collections = []
|
19
20
|
curr_date = datetime.today()
|
20
21
|
|
21
|
-
|
22
|
-
|
23
|
-
|
22
|
+
try:
|
23
|
+
user_uprn = kwargs.get("uprn")
|
24
|
+
check_uprn(user_uprn)
|
25
|
+
url = f"https://liverpool.gov.uk/Bins/BinDatesTable?UPRN={user_uprn}"
|
26
|
+
if not user_uprn:
|
27
|
+
# This is a fallback for if the user stored a URL in old system. Ensures backwards compatibility.
|
28
|
+
url = kwargs.get("url")
|
29
|
+
except Exception as e:
|
30
|
+
raise ValueError(f"Error getting identifier: {str(e)}")
|
31
|
+
|
32
|
+
# Make a BS4 object
|
33
|
+
page = requests.get(url)
|
34
|
+
soup = BeautifulSoup(page.text, "html.parser")
|
35
|
+
soup.prettify
|
24
36
|
|
25
37
|
# Get all table rows on the page - enumerate gives us an index, which is handy for to keep a row count.
|
26
38
|
# In this case, the first (0th) row is headings, so we can skip it, then parse the other data.
|
@@ -3,6 +3,7 @@ from datetime import datetime
|
|
3
3
|
|
4
4
|
from bs4 import BeautifulSoup
|
5
5
|
from selenium.webdriver.common.by import By
|
6
|
+
from selenium.webdriver.common.keys import Keys
|
6
7
|
from selenium.webdriver.support import expected_conditions as EC
|
7
8
|
from selenium.webdriver.support.ui import Select
|
8
9
|
from selenium.webdriver.support.wait import WebDriverWait
|
@@ -10,8 +11,6 @@ from selenium.webdriver.support.wait import WebDriverWait
|
|
10
11
|
from uk_bin_collection.uk_bin_collection.common import *
|
11
12
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
12
13
|
|
13
|
-
# import the wonderful Beautiful Soup and the URL grabber
|
14
|
-
|
15
14
|
|
16
15
|
class CouncilClass(AbstractGetBinDataClass):
|
17
16
|
"""
|
@@ -30,7 +29,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
30
29
|
web_driver = kwargs.get("web_driver")
|
31
30
|
headless = kwargs.get("headless")
|
32
31
|
check_postcode(user_postcode)
|
33
|
-
|
32
|
+
|
34
33
|
# Create Selenium webdriver
|
35
34
|
driver = create_webdriver(web_driver, headless, None, __name__)
|
36
35
|
driver.get(page)
|
@@ -41,68 +40,64 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
41
40
|
driver.switch_to.frame(iframe_presense)
|
42
41
|
|
43
42
|
wait = WebDriverWait(driver, 60)
|
44
|
-
|
43
|
+
|
45
44
|
# Postal code input
|
46
45
|
inputElement_postcodesearch = wait.until(
|
47
46
|
EC.element_to_be_clickable((By.NAME, "postcode"))
|
48
47
|
)
|
49
48
|
inputElement_postcodesearch.send_keys(user_postcode)
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
dropdown_select.click()
|
56
|
-
|
49
|
+
|
50
|
+
time.sleep(5)
|
51
|
+
|
52
|
+
inputElement_postcodesearch.send_keys(Keys.TAB + Keys.DOWN)
|
53
|
+
|
57
54
|
dropdown = wait.until(
|
58
|
-
EC.element_to_be_clickable(
|
55
|
+
EC.element_to_be_clickable(
|
56
|
+
(By.XPATH, f"//div[contains(text(), ' {user_paon}')]")
|
57
|
+
)
|
59
58
|
)
|
60
59
|
dropdown.click()
|
61
60
|
|
62
|
-
#
|
63
|
-
WebDriverWait(driver,
|
61
|
+
# This website is horrible!
|
62
|
+
WebDriverWait(driver, 20).until(
|
64
63
|
EC.text_to_be_present_in_element(
|
65
|
-
(By.CSS_SELECTOR, "
|
66
|
-
)
|
67
|
-
)
|
68
|
-
|
69
|
-
# Wait for 'Searching for...' to be removed from page
|
70
|
-
WebDriverWait(driver, timeout=15).until(
|
71
|
-
EC.none_of(
|
72
|
-
EC.text_to_be_present_in_element(
|
73
|
-
(By.CSS_SELECTOR, "span[data-name=html1]"), "Searching"
|
74
|
-
)
|
64
|
+
(By.CSS_SELECTOR, "div.col-collection-panel"), "Next collection"
|
75
65
|
)
|
76
66
|
)
|
77
67
|
|
78
68
|
# Even then it can still be adding data to the page...
|
79
69
|
time.sleep(5)
|
80
70
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
def is_a_collection_date(t):
|
85
|
-
return any("Next collection" in c for c in t.children)
|
86
|
-
|
87
|
-
for next_collection in soup.find_all(is_a_collection_date):
|
88
|
-
bin_info = list(
|
89
|
-
next_collection.parent.select_one("div:nth-child(1)").children
|
90
|
-
)
|
91
|
-
if not bin_info:
|
92
|
-
continue
|
93
|
-
bin = bin_info[0].get_text()
|
94
|
-
date = next_collection.select_one("strong").get_text(strip=True)
|
95
|
-
bin_date = datetime.strptime(date, "%d %b %Y")
|
96
|
-
dict_data = {
|
97
|
-
"type": bin,
|
98
|
-
"collectionDate": bin_date.strftime(date_format),
|
99
|
-
}
|
100
|
-
bin_data["bins"].append(dict_data)
|
101
|
-
|
102
|
-
bin_data["bins"].sort(
|
103
|
-
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
|
71
|
+
# Scraping via Selenium rather than BeautifulSoup, to ensure eveything's loaded
|
72
|
+
collection_panels = driver.find_elements(
|
73
|
+
By.CSS_SELECTOR, "div.col-collection-panel"
|
104
74
|
)
|
105
75
|
|
76
|
+
for panel in collection_panels:
|
77
|
+
try:
|
78
|
+
# Get bin type (e.g., General waste, Food waste)
|
79
|
+
bin_type = panel.find_element(
|
80
|
+
By.CSS_SELECTOR, "h3.collectionDataHeader"
|
81
|
+
).text.strip()
|
82
|
+
# Get next collection date
|
83
|
+
lines = panel.find_elements(By.CSS_SELECTOR, "ul li")
|
84
|
+
for line in lines:
|
85
|
+
if "Next collection" in line.text:
|
86
|
+
date_str = (
|
87
|
+
line.text.split("Next collection")[1]
|
88
|
+
.strip(": ")
|
89
|
+
.strip()
|
90
|
+
)
|
91
|
+
bin_date = datetime.strptime(date_str, "%d/%m/%Y")
|
92
|
+
bin_data["bins"].append(
|
93
|
+
{
|
94
|
+
"type": bin_type,
|
95
|
+
"collectionDate": bin_date.strftime(date_format),
|
96
|
+
}
|
97
|
+
)
|
98
|
+
except Exception as inner_e:
|
99
|
+
print(f"Skipping one panel due to error: {inner_e}")
|
100
|
+
|
106
101
|
except Exception as e:
|
107
102
|
# Here you can log the exception if needed
|
108
103
|
print(f"An error occurred: {e}")
|
@@ -7,14 +7,21 @@ from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataC
|
|
7
7
|
|
8
8
|
class CouncilClass(AbstractGetBinDataClass):
|
9
9
|
def parse_data(self, page: str, **kwargs) -> dict:
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
10
|
+
|
11
|
+
try:
|
12
|
+
user_uprn = kwargs.get("uprn")
|
13
|
+
check_uprn(user_uprn)
|
14
|
+
url = f"https://bincollection.newham.gov.uk/Details/Index/{user_uprn}"
|
15
|
+
if not user_uprn:
|
16
|
+
# This is a fallback for if the user stored a URL in old system. Ensures backwards compatibility.
|
17
|
+
url = kwargs.get("url")
|
18
|
+
except Exception as e:
|
19
|
+
raise ValueError(f"Error getting identifier: {str(e)}")
|
14
20
|
|
15
21
|
# Make a BS4 object
|
16
|
-
|
17
|
-
soup.
|
22
|
+
page = requests.get(url)
|
23
|
+
soup = BeautifulSoup(page.text, "html.parser")
|
24
|
+
soup.prettify
|
18
25
|
|
19
26
|
# Form a JSON wrapper
|
20
27
|
data = {"bins": []}
|
@@ -46,7 +46,8 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
46
46
|
"type": bin_type,
|
47
47
|
"collectionDate": get_next_occurrence_from_day_month(
|
48
48
|
datetime.strptime(
|
49
|
-
|
49
|
+
remove_ordinal_indicator_from_date_string(
|
50
|
+
c["BinCollectionDate"].replace(" (*)", "").strip())
|
50
51
|
+ " "
|
51
52
|
+ datetime.now().strftime("%Y"),
|
52
53
|
"%A %d %B %Y",
|
@@ -43,9 +43,13 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
43
43
|
collectionDate = (
|
44
44
|
cells[1].get_text(strip=True) + " " + datetime.now().strftime("%Y")
|
45
45
|
)
|
46
|
-
|
47
|
-
|
48
|
-
|
46
|
+
|
47
|
+
if len(cells) > 2:
|
48
|
+
nextCollectionDate = (
|
49
|
+
cells[2].get_text(strip=True) + " " + datetime.now().strftime("%Y")
|
50
|
+
)
|
51
|
+
else:
|
52
|
+
nextCollectionDate = ""
|
49
53
|
|
50
54
|
# Make each Bin element in the JSON
|
51
55
|
dict_data = {
|
@@ -59,12 +63,13 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
59
63
|
data["bins"].append(dict_data)
|
60
64
|
|
61
65
|
# Make each next Bin element in the JSON
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
66
|
+
if nextCollectionDate != "":
|
67
|
+
dict_data = {
|
68
|
+
"type": binType,
|
69
|
+
"collectionDate": get_next_occurrence_from_day_month(
|
70
|
+
datetime.strptime(nextCollectionDate, "%A %d %B %Y")
|
71
|
+
).strftime(date_format),
|
72
|
+
}
|
68
73
|
|
69
74
|
# Add data to the main JSON Wrapper
|
70
75
|
data["bins"].append(dict_data)
|
@@ -81,9 +81,9 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
81
81
|
# The regular calendar only shows until end of March 2026, work out how many weeks that is
|
82
82
|
weeks_total = math.floor((datetime(2026, 4, 1) - datetime.now()).days / 7)
|
83
83
|
|
84
|
-
# The garden calendar only shows until end of November
|
84
|
+
# The garden calendar only shows until end of November 2025, work out how many weeks that is
|
85
85
|
garden_weeks_total = math.floor(
|
86
|
-
(datetime(
|
86
|
+
(datetime(2025, 12, 1) - datetime.now()).days / 7
|
87
87
|
)
|
88
88
|
|
89
89
|
regular_collections, garden_collections, special_collections = [], [], []
|