uk_bin_collection 0.74.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/README.rst +0 -0
- uk_bin_collection/tests/council_feature_input_parity.py +79 -0
- uk_bin_collection/tests/features/environment.py +7 -0
- uk_bin_collection/tests/features/validate_council_outputs.feature +767 -0
- uk_bin_collection/tests/input.json +1077 -0
- uk_bin_collection/tests/output.schema +41 -0
- uk_bin_collection/tests/step_defs/step_helpers/file_handler.py +46 -0
- uk_bin_collection/tests/step_defs/test_validate_council.py +87 -0
- uk_bin_collection/tests/test_collect_data.py +104 -0
- uk_bin_collection/tests/test_common_functions.py +342 -0
- uk_bin_collection/uk_bin_collection/collect_data.py +133 -0
- uk_bin_collection/uk_bin_collection/common.py +292 -0
- uk_bin_collection/uk_bin_collection/councils/AdurAndWorthingCouncils.py +43 -0
- uk_bin_collection/uk_bin_collection/councils/ArunCouncil.py +97 -0
- uk_bin_collection/uk_bin_collection/councils/AylesburyValeCouncil.py +69 -0
- uk_bin_collection/uk_bin_collection/councils/BCPCouncil.py +51 -0
- uk_bin_collection/uk_bin_collection/councils/BarnetCouncil.py +180 -0
- uk_bin_collection/uk_bin_collection/councils/BarnsleyMBCouncil.py +109 -0
- uk_bin_collection/uk_bin_collection/councils/BasingstokeCouncil.py +72 -0
- uk_bin_collection/uk_bin_collection/councils/BathAndNorthEastSomersetCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/BedfordBoroughCouncil.py +49 -0
- uk_bin_collection/uk_bin_collection/councils/BedfordshireCouncil.py +70 -0
- uk_bin_collection/uk_bin_collection/councils/BexleyCouncil.py +147 -0
- uk_bin_collection/uk_bin_collection/councils/BirminghamCityCouncil.py +119 -0
- uk_bin_collection/uk_bin_collection/councils/BlackburnCouncil.py +105 -0
- uk_bin_collection/uk_bin_collection/councils/BoltonCouncil.py +104 -0
- uk_bin_collection/uk_bin_collection/councils/BradfordMDC.py +103 -0
- uk_bin_collection/uk_bin_collection/councils/BrightonandHoveCityCouncil.py +137 -0
- uk_bin_collection/uk_bin_collection/councils/BristolCityCouncil.py +141 -0
- uk_bin_collection/uk_bin_collection/councils/BromleyBoroughCouncil.py +115 -0
- uk_bin_collection/uk_bin_collection/councils/BroxtoweBoroughCouncil.py +107 -0
- uk_bin_collection/uk_bin_collection/councils/BuckinghamshireCouncil.py +95 -0
- uk_bin_collection/uk_bin_collection/councils/BuryCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/CalderdaleCouncil.py +123 -0
- uk_bin_collection/uk_bin_collection/councils/CannockChaseDistrictCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/CardiffCouncil.py +172 -0
- uk_bin_collection/uk_bin_collection/councils/CastlepointDistrictCouncil.py +96 -0
- uk_bin_collection/uk_bin_collection/councils/CharnwoodBoroughCouncil.py +54 -0
- uk_bin_collection/uk_bin_collection/councils/ChelmsfordCityCouncil.py +127 -0
- uk_bin_collection/uk_bin_collection/councils/CheshireEastCouncil.py +32 -0
- uk_bin_collection/uk_bin_collection/councils/CheshireWestAndChesterCouncil.py +125 -0
- uk_bin_collection/uk_bin_collection/councils/ChorleyCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/ConwyCountyBorough.py +27 -0
- uk_bin_collection/uk_bin_collection/councils/CrawleyBoroughCouncil.py +61 -0
- uk_bin_collection/uk_bin_collection/councils/CroydonCouncil.py +291 -0
- uk_bin_collection/uk_bin_collection/councils/DerbyshireDalesDistrictCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/DoncasterCouncil.py +77 -0
- uk_bin_collection/uk_bin_collection/councils/DorsetCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/DoverDistrictCouncil.py +41 -0
- uk_bin_collection/uk_bin_collection/councils/DurhamCouncil.py +49 -0
- uk_bin_collection/uk_bin_collection/councils/EastCambridgeshireCouncil.py +44 -0
- uk_bin_collection/uk_bin_collection/councils/EastDevonDC.py +74 -0
- uk_bin_collection/uk_bin_collection/councils/EastLindseyDistrictCouncil.py +108 -0
- uk_bin_collection/uk_bin_collection/councils/EastRidingCouncil.py +142 -0
- uk_bin_collection/uk_bin_collection/councils/EastSuffolkCouncil.py +112 -0
- uk_bin_collection/uk_bin_collection/councils/EastleighBoroughCouncil.py +70 -0
- uk_bin_collection/uk_bin_collection/councils/EnvironmentFirst.py +48 -0
- uk_bin_collection/uk_bin_collection/councils/ErewashBoroughCouncil.py +61 -0
- uk_bin_collection/uk_bin_collection/councils/FenlandDistrictCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/ForestOfDeanDistrictCouncil.py +113 -0
- uk_bin_collection/uk_bin_collection/councils/GatesheadCouncil.py +118 -0
- uk_bin_collection/uk_bin_collection/councils/GedlingBoroughCouncil.py +1580 -0
- uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py +55 -0
- uk_bin_collection/uk_bin_collection/councils/GuildfordCouncil.py +150 -0
- uk_bin_collection/uk_bin_collection/councils/HaltonBoroughCouncil.py +142 -0
- uk_bin_collection/uk_bin_collection/councils/HaringeyCouncil.py +59 -0
- uk_bin_collection/uk_bin_collection/councils/HarrogateBoroughCouncil.py +63 -0
- uk_bin_collection/uk_bin_collection/councils/HighPeakCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/HullCityCouncil.py +48 -0
- uk_bin_collection/uk_bin_collection/councils/HuntingdonDistrictCouncil.py +44 -0
- uk_bin_collection/uk_bin_collection/councils/KingstonUponThamesCouncil.py +84 -0
- uk_bin_collection/uk_bin_collection/councils/KirkleesCouncil.py +130 -0
- uk_bin_collection/uk_bin_collection/councils/KnowsleyMBCouncil.py +139 -0
- uk_bin_collection/uk_bin_collection/councils/LancasterCityCouncil.py +71 -0
- uk_bin_collection/uk_bin_collection/councils/LeedsCityCouncil.py +137 -0
- uk_bin_collection/uk_bin_collection/councils/LisburnCastlereaghCityCouncil.py +101 -0
- uk_bin_collection/uk_bin_collection/councils/LiverpoolCityCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/LondonBoroughHounslow.py +82 -0
- uk_bin_collection/uk_bin_collection/councils/LondonBoroughRedbridge.py +161 -0
- uk_bin_collection/uk_bin_collection/councils/MaldonDistrictCouncil.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/MalvernHillsDC.py +57 -0
- uk_bin_collection/uk_bin_collection/councils/ManchesterCityCouncil.py +106 -0
- uk_bin_collection/uk_bin_collection/councils/MansfieldDistrictCouncil.py +38 -0
- uk_bin_collection/uk_bin_collection/councils/MertonCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil.py +128 -0
- uk_bin_collection/uk_bin_collection/councils/MidSussexDistrictCouncil.py +80 -0
- uk_bin_collection/uk_bin_collection/councils/MiltonKeynesCityCouncil.py +54 -0
- uk_bin_collection/uk_bin_collection/councils/MoleValleyDistrictCouncil.py +98 -0
- uk_bin_collection/uk_bin_collection/councils/NeathPortTalbotCouncil.py +139 -0
- uk_bin_collection/uk_bin_collection/councils/NewarkAndSherwoodDC.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/NewcastleCityCouncil.py +57 -0
- uk_bin_collection/uk_bin_collection/councils/NewhamCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/NewportCityCouncil.py +203 -0
- uk_bin_collection/uk_bin_collection/councils/NorthEastDerbyshireDistrictCouncil.py +115 -0
- uk_bin_collection/uk_bin_collection/councils/NorthEastLincs.py +53 -0
- uk_bin_collection/uk_bin_collection/councils/NorthKestevenDistrictCouncil.py +45 -0
- uk_bin_collection/uk_bin_collection/councils/NorthLanarkshireCouncil.py +46 -0
- uk_bin_collection/uk_bin_collection/councils/NorthLincolnshireCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/NorthNorfolkDistrictCouncil.py +108 -0
- uk_bin_collection/uk_bin_collection/councils/NorthNorthamptonshireCouncil.py +72 -0
- uk_bin_collection/uk_bin_collection/councils/NorthSomersetCouncil.py +76 -0
- uk_bin_collection/uk_bin_collection/councils/NorthTynesideCouncil.py +220 -0
- uk_bin_collection/uk_bin_collection/councils/NorthWestLeicestershire.py +114 -0
- uk_bin_collection/uk_bin_collection/councils/NorthYorkshire.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py +123 -0
- uk_bin_collection/uk_bin_collection/councils/NottinghamCityCouncil.py +36 -0
- uk_bin_collection/uk_bin_collection/councils/OldhamCouncil.py +51 -0
- uk_bin_collection/uk_bin_collection/councils/PortsmouthCityCouncil.py +131 -0
- uk_bin_collection/uk_bin_collection/councils/PrestonCityCouncil.py +97 -0
- uk_bin_collection/uk_bin_collection/councils/ReadingBoroughCouncil.py +30 -0
- uk_bin_collection/uk_bin_collection/councils/ReigateAndBansteadBoroughCouncil.py +81 -0
- uk_bin_collection/uk_bin_collection/councils/RenfrewshireCouncil.py +135 -0
- uk_bin_collection/uk_bin_collection/councils/RhonddaCynonTaffCouncil.py +80 -0
- uk_bin_collection/uk_bin_collection/councils/RochdaleCouncil.py +69 -0
- uk_bin_collection/uk_bin_collection/councils/RochfordCouncil.py +60 -0
- uk_bin_collection/uk_bin_collection/councils/RugbyBoroughCouncil.py +93 -0
- uk_bin_collection/uk_bin_collection/councils/RushcliffeBoroughCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/RushmoorCouncil.py +81 -0
- uk_bin_collection/uk_bin_collection/councils/SalfordCityCouncil.py +70 -0
- uk_bin_collection/uk_bin_collection/councils/SevenoaksDistrictCouncil.py +106 -0
- uk_bin_collection/uk_bin_collection/councils/SheffieldCityCouncil.py +54 -0
- uk_bin_collection/uk_bin_collection/councils/ShropshireCouncil.py +45 -0
- uk_bin_collection/uk_bin_collection/councils/SolihullCouncil.py +48 -0
- uk_bin_collection/uk_bin_collection/councils/SomersetCouncil.py +203 -0
- uk_bin_collection/uk_bin_collection/councils/SouthAyrshireCouncil.py +73 -0
- uk_bin_collection/uk_bin_collection/councils/SouthCambridgeshireCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/SouthGloucestershireCouncil.py +74 -0
- uk_bin_collection/uk_bin_collection/councils/SouthLanarkshireCouncil.py +78 -0
- uk_bin_collection/uk_bin_collection/councils/SouthNorfolkCouncil.py +91 -0
- uk_bin_collection/uk_bin_collection/councils/SouthOxfordshireCouncil.py +93 -0
- uk_bin_collection/uk_bin_collection/councils/SouthTynesideCouncil.py +98 -0
- uk_bin_collection/uk_bin_collection/councils/StAlbansCityAndDistrictCouncil.py +43 -0
- uk_bin_collection/uk_bin_collection/councils/StHelensBC.py +56 -0
- uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py +112 -0
- uk_bin_collection/uk_bin_collection/councils/StockportBoroughCouncil.py +39 -0
- uk_bin_collection/uk_bin_collection/councils/StokeOnTrentCityCouncil.py +79 -0
- uk_bin_collection/uk_bin_collection/councils/StratfordUponAvonCouncil.py +94 -0
- uk_bin_collection/uk_bin_collection/councils/SunderlandCityCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/SwaleBoroughCouncil.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/TamesideMBCouncil.py +62 -0
- uk_bin_collection/uk_bin_collection/councils/TandridgeDistrictCouncil.py +60 -0
- uk_bin_collection/uk_bin_collection/councils/TelfordAndWrekinCouncil.py +50 -0
- uk_bin_collection/uk_bin_collection/councils/TestValleyBoroughCouncil.py +203 -0
- uk_bin_collection/uk_bin_collection/councils/TonbridgeAndMallingBC.py +101 -0
- uk_bin_collection/uk_bin_collection/councils/TorbayCouncil.py +51 -0
- uk_bin_collection/uk_bin_collection/councils/TorridgeDistrictCouncil.py +154 -0
- uk_bin_collection/uk_bin_collection/councils/ValeofGlamorganCouncil.py +119 -0
- uk_bin_collection/uk_bin_collection/councils/ValeofWhiteHorseCouncil.py +103 -0
- uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py +89 -0
- uk_bin_collection/uk_bin_collection/councils/WarwickDistrictCouncil.py +34 -0
- uk_bin_collection/uk_bin_collection/councils/WaverleyBoroughCouncil.py +119 -0
- uk_bin_collection/uk_bin_collection/councils/WealdenDistrictCouncil.py +86 -0
- uk_bin_collection/uk_bin_collection/councils/WelhatCouncil.py +73 -0
- uk_bin_collection/uk_bin_collection/councils/WestBerkshireCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/WestLindseyDistrictCouncil.py +118 -0
- uk_bin_collection/uk_bin_collection/councils/WestLothianCouncil.py +103 -0
- uk_bin_collection/uk_bin_collection/councils/WestNorthamptonshireCouncil.py +34 -0
- uk_bin_collection/uk_bin_collection/councils/WestSuffolkCouncil.py +64 -0
- uk_bin_collection/uk_bin_collection/councils/WiganBoroughCouncil.py +97 -0
- uk_bin_collection/uk_bin_collection/councils/WiltshireCouncil.py +135 -0
- uk_bin_collection/uk_bin_collection/councils/WindsorAndMaidenheadCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/WokingBoroughCouncil.py +114 -0
- uk_bin_collection/uk_bin_collection/councils/WyreCouncil.py +89 -0
- uk_bin_collection/uk_bin_collection/councils/YorkCouncil.py +45 -0
- uk_bin_collection/uk_bin_collection/councils/council_class_template/councilclasstemplate.py +33 -0
- uk_bin_collection/uk_bin_collection/get_bin_data.py +165 -0
- uk_bin_collection-0.74.0.dist-info/LICENSE +21 -0
- uk_bin_collection-0.74.0.dist-info/METADATA +247 -0
- uk_bin_collection-0.74.0.dist-info/RECORD +171 -0
- uk_bin_collection-0.74.0.dist-info/WHEEL +4 -0
- uk_bin_collection-0.74.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,44 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
|
3
|
+
# This script pulls (in one hit) the data from
|
4
|
+
# Huntingdon District Council District Council Bins Data
|
5
|
+
from bs4 import BeautifulSoup
|
6
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
7
|
+
from uk_bin_collection.uk_bin_collection.common import date_format
|
8
|
+
from datetime import datetime
|
9
|
+
|
10
|
+
|
11
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
12
|
+
class CouncilClass(AbstractGetBinDataClass):
|
13
|
+
"""
|
14
|
+
Concrete classes have to implement all abstract operations of the
|
15
|
+
base class. They can also override some operations with a default
|
16
|
+
implementation.
|
17
|
+
"""
|
18
|
+
|
19
|
+
def parse_data(self, page, **kwargs) -> None:
|
20
|
+
# Make a BS4 object
|
21
|
+
soup = BeautifulSoup(page.text, features="html.parser")
|
22
|
+
soup.prettify()
|
23
|
+
|
24
|
+
data = {"bins": []}
|
25
|
+
|
26
|
+
no_garden_message = "Your property does not receive a garden waste collection"
|
27
|
+
results = soup.find("ul", class_="d-print-none").find_all("li")
|
28
|
+
|
29
|
+
for result in results:
|
30
|
+
if no_garden_message in result.get_text(strip=True):
|
31
|
+
continue
|
32
|
+
else:
|
33
|
+
data["bins"].append(
|
34
|
+
{
|
35
|
+
"type": " ".join(
|
36
|
+
result.get_text(strip=True).split(" ")[5:7]
|
37
|
+
).capitalize(),
|
38
|
+
"collectionDate": datetime.strptime(
|
39
|
+
result.find("strong").get_text(strip=True), "%A %d %B %Y"
|
40
|
+
).strftime(date_format),
|
41
|
+
}
|
42
|
+
)
|
43
|
+
|
44
|
+
return data
|
@@ -0,0 +1,84 @@
|
|
1
|
+
# alternative implementation for retrieving bin data from Kingston Upon Thames Council
|
2
|
+
# principal URL is https://waste-services.kingston.gov.uk/waste/[uprn]
|
3
|
+
# https://www.kingston.gov.uk/info/200287/bins_and_recycling/1113/check_your_bin_collection_day
|
4
|
+
|
5
|
+
# switched to using Selenium as the htmx elements are not rendered reliably with requests
|
6
|
+
|
7
|
+
from selenium import webdriver
|
8
|
+
from selenium.webdriver.common.by import By
|
9
|
+
from selenium.webdriver.support.ui import WebDriverWait
|
10
|
+
from selenium.webdriver.support import expected_conditions as EC
|
11
|
+
from bs4 import BeautifulSoup
|
12
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
13
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
14
|
+
import re
|
15
|
+
|
16
|
+
|
17
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
18
|
+
class CouncilClass(AbstractGetBinDataClass):
|
19
|
+
"""
|
20
|
+
Concrete classes have to implement all abstract operations of the
|
21
|
+
base class. They can also override some operations with a default
|
22
|
+
implementation.
|
23
|
+
"""
|
24
|
+
|
25
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
26
|
+
|
27
|
+
driver = None
|
28
|
+
try:
|
29
|
+
|
30
|
+
headless = kwargs.get("headless")
|
31
|
+
web_driver = kwargs.get("web_driver")
|
32
|
+
driver = create_webdriver(web_driver, headless)
|
33
|
+
driver.get(kwargs.get("url"))
|
34
|
+
wait = WebDriverWait(driver, 15, 2)
|
35
|
+
|
36
|
+
wait.until(
|
37
|
+
EC.presence_of_element_located((By.CLASS_NAME, "waste-service-name"))
|
38
|
+
)
|
39
|
+
|
40
|
+
data = {"bins": []}
|
41
|
+
|
42
|
+
soup = BeautifulSoup(driver.page_source, "html.parser")
|
43
|
+
collections = soup.find_all("h3", {"class": "waste-service-name"})
|
44
|
+
for c in collections:
|
45
|
+
rows = c.find_next_sibling("div", {"class": "govuk-grid-row"}).find_all(
|
46
|
+
"div", {"class": "govuk-summary-list__row"}
|
47
|
+
)
|
48
|
+
for row in rows:
|
49
|
+
if row.find("dt").get_text().strip().lower() == "next collection":
|
50
|
+
collection_date = remove_ordinal_indicator_from_date_string(
|
51
|
+
row.find("dd").get_text()
|
52
|
+
).strip()
|
53
|
+
# strip out any text inside of the date string
|
54
|
+
collection_date = re.sub(
|
55
|
+
r"\n\s*\(this.*?\)", "", collection_date
|
56
|
+
)
|
57
|
+
dict_data = {
|
58
|
+
"type": c.get_text().strip().capitalize(),
|
59
|
+
"collectionDate": get_next_occurrence_from_day_month(
|
60
|
+
datetime.strptime(
|
61
|
+
collection_date
|
62
|
+
+ " "
|
63
|
+
+ datetime.now().strftime("%Y"),
|
64
|
+
"%A, %d %B %Y",
|
65
|
+
)
|
66
|
+
).strftime(date_format),
|
67
|
+
}
|
68
|
+
data["bins"].append(dict_data)
|
69
|
+
|
70
|
+
data["bins"].sort(
|
71
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
|
72
|
+
)
|
73
|
+
|
74
|
+
except Exception as e:
|
75
|
+
# Here you can log the exception if needed
|
76
|
+
print(f"An error occurred: {e}")
|
77
|
+
# Optionally, re-raise the exception if you want it to propagate
|
78
|
+
raise
|
79
|
+
finally:
|
80
|
+
# This block ensures that the driver is closed regardless of an exception
|
81
|
+
if driver:
|
82
|
+
driver.quit()
|
83
|
+
|
84
|
+
return data
|
@@ -0,0 +1,130 @@
|
|
1
|
+
from datetime import datetime
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
from selenium.common import TimeoutException
|
5
|
+
from selenium.webdriver.common.by import By
|
6
|
+
from selenium.webdriver.common.keys import Keys
|
7
|
+
from selenium.webdriver.remote.webdriver import WebDriver
|
8
|
+
from selenium.webdriver.support import expected_conditions as EC
|
9
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
10
|
+
|
11
|
+
from uk_bin_collection.uk_bin_collection.common import create_webdriver
|
12
|
+
from uk_bin_collection.uk_bin_collection.common import date_format
|
13
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
14
|
+
|
15
|
+
|
16
|
+
def wait_for_element(driver, element_type, element: str, timeout: int = 5):
|
17
|
+
element_present = EC.presence_of_element_located((element_type, element))
|
18
|
+
wait_for_element_conditions(driver, element_present, timeout=timeout)
|
19
|
+
|
20
|
+
|
21
|
+
def wait_for_element_conditions(driver, conditions, timeout: int = 5):
|
22
|
+
try:
|
23
|
+
WebDriverWait(driver, timeout).until(conditions)
|
24
|
+
except TimeoutException:
|
25
|
+
print("Timed out waiting for page to load")
|
26
|
+
raise
|
27
|
+
|
28
|
+
|
29
|
+
class CouncilClass(AbstractGetBinDataClass):
|
30
|
+
"""
|
31
|
+
Concrete classes have to implement all abstract operations of the
|
32
|
+
base class. They can also override some operations with a default
|
33
|
+
implementation.
|
34
|
+
"""
|
35
|
+
|
36
|
+
def __init__(self):
|
37
|
+
self._driver: Optional[WebDriver] = None
|
38
|
+
|
39
|
+
def parse_data(self, *args, **kwargs) -> dict:
|
40
|
+
try:
|
41
|
+
return self._parse_data(*args, **kwargs)
|
42
|
+
finally:
|
43
|
+
if self._driver:
|
44
|
+
self._driver.quit()
|
45
|
+
|
46
|
+
def _parse_data(self, page: str, **kwargs) -> dict:
|
47
|
+
"""
|
48
|
+
Process:
|
49
|
+
|
50
|
+
- Use a house number and postcode that is known to be domestic and resolves to a
|
51
|
+
single unique address. When the address search form is submitted with
|
52
|
+
those details, a session is created
|
53
|
+
|
54
|
+
- Now a session exists, navigate to the calendar URL, specifying the UPRN
|
55
|
+
|
56
|
+
- Extract info from the 'alt' attribute of the images on that page
|
57
|
+
"""
|
58
|
+
bins = []
|
59
|
+
|
60
|
+
user_paon = kwargs["paon"]
|
61
|
+
user_postcode = kwargs["postcode"]
|
62
|
+
|
63
|
+
self._driver = driver = create_webdriver(
|
64
|
+
web_driver=kwargs["web_driver"], headless=kwargs.get("headless", True)
|
65
|
+
)
|
66
|
+
driver.implicitly_wait(1)
|
67
|
+
|
68
|
+
driver.get(
|
69
|
+
"https://www.kirklees.gov.uk/beta/your-property-bins-recycling/your-bins/default.aspx"
|
70
|
+
)
|
71
|
+
|
72
|
+
wait_for_element(
|
73
|
+
driver, By.ID, "cphPageBody_cphContent_thisGeoSearch_txtGeoPremises"
|
74
|
+
)
|
75
|
+
|
76
|
+
house_input = driver.find_element(
|
77
|
+
By.ID, "cphPageBody_cphContent_thisGeoSearch_txtGeoPremises"
|
78
|
+
)
|
79
|
+
house_input.send_keys(user_paon)
|
80
|
+
|
81
|
+
postcode_input = driver.find_element(
|
82
|
+
By.ID, "cphPageBody_cphContent_thisGeoSearch_txtGeoSearch"
|
83
|
+
)
|
84
|
+
postcode_input.send_keys(user_postcode)
|
85
|
+
|
86
|
+
# submit address search
|
87
|
+
driver.find_element(By.ID, "butGeoSearch").send_keys(Keys.RETURN)
|
88
|
+
|
89
|
+
wait_for_element(
|
90
|
+
driver,
|
91
|
+
By.ID,
|
92
|
+
"cphPageBody_cphContent_wtcDomestic240__lnkAccordionAnchor",
|
93
|
+
# submitting can be slow
|
94
|
+
timeout=30,
|
95
|
+
)
|
96
|
+
|
97
|
+
# Open the panel
|
98
|
+
driver.find_element(
|
99
|
+
By.ID, "cphPageBody_cphContent_wtcDomestic240__lnkAccordionAnchor"
|
100
|
+
).click()
|
101
|
+
|
102
|
+
# Domestic waste calendar
|
103
|
+
wait_for_element(
|
104
|
+
driver, By.ID, "cphPageBody_cphContent_wtcDomestic240__LnkCalendar"
|
105
|
+
)
|
106
|
+
calendar_link = driver.find_element(
|
107
|
+
By.ID, "cphPageBody_cphContent_wtcDomestic240__LnkCalendar"
|
108
|
+
)
|
109
|
+
driver.execute_script("arguments[0].click();", calendar_link)
|
110
|
+
|
111
|
+
# <img alt="Recycling collection date 14 March 2024"
|
112
|
+
# <img alt="Domestic collection date 21 March 2024
|
113
|
+
date_strings = driver.find_elements(
|
114
|
+
By.CSS_SELECTOR, 'img[alt*="collection date"]'
|
115
|
+
)
|
116
|
+
|
117
|
+
for date in date_strings:
|
118
|
+
bin_type, _, _, day, month, year = date.get_attribute("alt").split()
|
119
|
+
collection_date = datetime.strptime(
|
120
|
+
f"{day} {month} {year}", "%d %B %Y"
|
121
|
+
).strftime(date_format)
|
122
|
+
|
123
|
+
bins.append(
|
124
|
+
{
|
125
|
+
"type": bin_type,
|
126
|
+
"collectionDate": collection_date,
|
127
|
+
}
|
128
|
+
)
|
129
|
+
|
130
|
+
return {"bins": bins}
|
@@ -0,0 +1,139 @@
|
|
1
|
+
import time
|
2
|
+
|
3
|
+
from bs4 import BeautifulSoup
|
4
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
5
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
6
|
+
from selenium.webdriver.common.by import By
|
7
|
+
from selenium.webdriver.support import expected_conditions as EC
|
8
|
+
from selenium.webdriver.support.ui import Select
|
9
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
10
|
+
|
11
|
+
|
12
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
13
|
+
class CouncilClass(AbstractGetBinDataClass):
|
14
|
+
"""
|
15
|
+
Concrete classes have to implement all abstract operations of the
|
16
|
+
base class. They can also override some operations with a default
|
17
|
+
implementation.
|
18
|
+
"""
|
19
|
+
|
20
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
21
|
+
driver = None
|
22
|
+
try:
|
23
|
+
data = {"bins": []}
|
24
|
+
collections = []
|
25
|
+
user_paon = kwargs.get("paon")
|
26
|
+
user_postcode = kwargs.get("postcode")
|
27
|
+
web_driver = kwargs.get("web_driver")
|
28
|
+
headless = kwargs.get("headless")
|
29
|
+
check_paon(user_paon)
|
30
|
+
check_postcode(user_postcode)
|
31
|
+
|
32
|
+
# Create Selenium webdriver
|
33
|
+
driver = create_webdriver(web_driver, headless)
|
34
|
+
driver.get(
|
35
|
+
"https://knowsleytransaction.mendixcloud.com/link/youarebeingredirected?target=bincollectioninformation"
|
36
|
+
)
|
37
|
+
|
38
|
+
# Wait for the postcode field to appear then populate it
|
39
|
+
inputElement_postcode = WebDriverWait(driver, 30).until(
|
40
|
+
EC.visibility_of_element_located(
|
41
|
+
(
|
42
|
+
By.XPATH,
|
43
|
+
"/html/body/div[1]/div/div/div/div/div/div[2]/div/div/div/div/div/div[3]/div/div[1]/div/div[1]/div/div/input",
|
44
|
+
)
|
45
|
+
)
|
46
|
+
)
|
47
|
+
inputElement_postcode.send_keys(user_postcode)
|
48
|
+
|
49
|
+
# Wait for address search button, then click it
|
50
|
+
addressSearch_button = WebDriverWait(driver, 10).until(
|
51
|
+
EC.presence_of_element_located(
|
52
|
+
(
|
53
|
+
By.XPATH,
|
54
|
+
"/html/body/div[1]/div/div/div/div/div/div[2]/div/div/div/div/div/div[3]/div/div[1]/div/div[2]/div/button",
|
55
|
+
)
|
56
|
+
)
|
57
|
+
)
|
58
|
+
addressSearch_button.click()
|
59
|
+
|
60
|
+
# Wait until the address list has loaded
|
61
|
+
WebDriverWait(driver, 30).until(
|
62
|
+
EC.presence_of_element_located(
|
63
|
+
(
|
64
|
+
By.XPATH,
|
65
|
+
"/html/body/div[1]/div/div/div/div/div/div[2]/div/div/div/div/div/div[3]/div/div[1]/div/div[3]/div/div",
|
66
|
+
)
|
67
|
+
)
|
68
|
+
)
|
69
|
+
|
70
|
+
# Select the correct address from the list
|
71
|
+
addressList_rows = driver.find_elements(By.CLASS_NAME, "row")
|
72
|
+
for row in addressList_rows:
|
73
|
+
option_name = row.text[0 : len(user_paon)]
|
74
|
+
if option_name == user_paon:
|
75
|
+
break
|
76
|
+
address_to_select = row.find_element(By.LINK_TEXT, "Choose this address")
|
77
|
+
address_to_select.click()
|
78
|
+
|
79
|
+
# Wait for bin dates to load
|
80
|
+
WebDriverWait(driver, 20).until(
|
81
|
+
EC.presence_of_element_located(
|
82
|
+
(
|
83
|
+
By.XPATH,
|
84
|
+
"/html/body/div[1]/div/div/div/div/div/div[2]/div/div/div/div/div/div[3]/div/div[1]/div/div[4]/div/div",
|
85
|
+
)
|
86
|
+
)
|
87
|
+
)
|
88
|
+
|
89
|
+
# Parse the HTML from the WebDriver
|
90
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
91
|
+
soup.prettify()
|
92
|
+
|
93
|
+
z = soup.find(
|
94
|
+
"div", {"class": "mx-name-textBox5 mx-textbox form-group"}
|
95
|
+
).find_next("div", {"class": "form-control-static"})
|
96
|
+
|
97
|
+
maroon_bin_date = datetime.strptime(
|
98
|
+
soup.find("div", {"class": "mx-name-textBox3 mx-textbox form-group"})
|
99
|
+
.find_next("div", {"class": "form-control-static"})
|
100
|
+
.get_text(strip=True),
|
101
|
+
"%A %d/%m/%Y",
|
102
|
+
)
|
103
|
+
collections.append(("Maroon bin", maroon_bin_date))
|
104
|
+
|
105
|
+
grey_bin_date = datetime.strptime(
|
106
|
+
soup.find("div", {"class": "mx-name-textBox4 mx-textbox form-group"})
|
107
|
+
.find_next("div", {"class": "form-control-static"})
|
108
|
+
.get_text(strip=True),
|
109
|
+
"%A %d/%m/%Y",
|
110
|
+
)
|
111
|
+
collections.append(("Grey bin", grey_bin_date))
|
112
|
+
|
113
|
+
blue_bin_date = datetime.strptime(
|
114
|
+
soup.find("div", {"class": "mx-name-textBox5 mx-textbox form-group"})
|
115
|
+
.find_next("div", {"class": "form-control-static"})
|
116
|
+
.get_text(strip=True),
|
117
|
+
"%A %d/%m/%Y",
|
118
|
+
)
|
119
|
+
collections.append(("Blue bin", blue_bin_date))
|
120
|
+
|
121
|
+
ordered_data = sorted(collections, key=lambda x: x[1])
|
122
|
+
for item in ordered_data:
|
123
|
+
dict_data = {
|
124
|
+
"type": item[0].capitalize(),
|
125
|
+
"collectionDate": item[1].strftime(date_format),
|
126
|
+
}
|
127
|
+
data["bins"].append(dict_data)
|
128
|
+
|
129
|
+
except Exception as e:
|
130
|
+
# Here you can log the exception if needed
|
131
|
+
print(f"An error occurred: {e}")
|
132
|
+
# Optionally, re-raise the exception if you want it to propagate
|
133
|
+
raise
|
134
|
+
finally:
|
135
|
+
# This block ensures that the driver is closed regardless of an exception
|
136
|
+
if driver:
|
137
|
+
driver.quit()
|
138
|
+
|
139
|
+
return data
|
@@ -0,0 +1,71 @@
|
|
1
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
2
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
3
|
+
from bs4 import BeautifulSoup
|
4
|
+
from datetime import datetime
|
5
|
+
import requests
|
6
|
+
|
7
|
+
|
8
|
+
class CouncilClass(AbstractGetBinDataClass):
|
9
|
+
"""
|
10
|
+
Concrete classes have to implement all abstract operations of the
|
11
|
+
base class. They can also override some operations with a default
|
12
|
+
implementation.
|
13
|
+
"""
|
14
|
+
|
15
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
16
|
+
# data to return
|
17
|
+
data = {"bins": []}
|
18
|
+
|
19
|
+
# start session
|
20
|
+
# note: this ignores the given url
|
21
|
+
base_url = "https://lcc-wrp.whitespacews.com"
|
22
|
+
session = requests.session()
|
23
|
+
response = session.get(base_url + "/#!")
|
24
|
+
links = [
|
25
|
+
a["href"]
|
26
|
+
for a in BeautifulSoup(response.text, features="html.parser").select("a")
|
27
|
+
]
|
28
|
+
portal_link = ""
|
29
|
+
for l in links:
|
30
|
+
if "seq=1" in l:
|
31
|
+
portal_link = l
|
32
|
+
|
33
|
+
# fill address form
|
34
|
+
response = session.get(portal_link)
|
35
|
+
form = BeautifulSoup(response.text, features="html.parser").find("form")
|
36
|
+
form_url = dict(form.attrs).get("action")
|
37
|
+
payload = {
|
38
|
+
"address_name_number": kwargs.get("number"),
|
39
|
+
"address_street": "",
|
40
|
+
"address_postcode": kwargs.get("postcode"),
|
41
|
+
}
|
42
|
+
|
43
|
+
# get (first) found address
|
44
|
+
response = session.post(form_url, data=payload)
|
45
|
+
links = [
|
46
|
+
a["href"]
|
47
|
+
for a in BeautifulSoup(response.text, features="html.parser").select("a")
|
48
|
+
]
|
49
|
+
addr_link = ""
|
50
|
+
for l in links:
|
51
|
+
if "seq=3" in l:
|
52
|
+
addr_link = base_url + "/" + l
|
53
|
+
|
54
|
+
# get json formatted bin data for addr
|
55
|
+
response = session.get(addr_link)
|
56
|
+
new_soup = BeautifulSoup(response.text, features="html.parser")
|
57
|
+
services = new_soup.find("section", {"id": "scheduled-collections"})
|
58
|
+
services_sub = services.find_all("li")
|
59
|
+
for i in range(0, len(services_sub), 3):
|
60
|
+
dt = datetime.strptime(services_sub[i + 1].text.strip(), "%d/%m/%Y").date()
|
61
|
+
bin_type = BeautifulSoup(services_sub[i + 2].text, features="lxml").find(
|
62
|
+
"p"
|
63
|
+
)
|
64
|
+
data["bins"].append(
|
65
|
+
{
|
66
|
+
"type": bin_type.text.strip().removesuffix(" Collection Service"),
|
67
|
+
"collectionDate": dt.strftime(date_format),
|
68
|
+
}
|
69
|
+
)
|
70
|
+
|
71
|
+
return data
|
@@ -0,0 +1,137 @@
|
|
1
|
+
from datetime import datetime
|
2
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
3
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
|
+
|
5
|
+
from bs4 import BeautifulSoup
|
6
|
+
from datetime import datetime
|
7
|
+
from selenium.webdriver.common.by import By
|
8
|
+
from selenium.webdriver.support import expected_conditions as EC
|
9
|
+
from selenium.webdriver.support.ui import Select
|
10
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
11
|
+
from selenium.webdriver.common.keys import Keys
|
12
|
+
|
13
|
+
import pandas as pd
|
14
|
+
import urllib.request
|
15
|
+
|
16
|
+
|
17
|
+
class CouncilClass(AbstractGetBinDataClass):
|
18
|
+
"""
|
19
|
+
Concrete classes have to implement all abstract operations of the base
|
20
|
+
class. They can also override some operations with a default
|
21
|
+
implementation.
|
22
|
+
"""
|
23
|
+
|
24
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
25
|
+
driver = None
|
26
|
+
try:
|
27
|
+
"""
|
28
|
+
Parse council provided CSVs to get the latest bin collections for address
|
29
|
+
"""
|
30
|
+
|
31
|
+
user_uprn = kwargs.get("uprn")
|
32
|
+
user_postcode = kwargs.get("postcode")
|
33
|
+
web_driver = kwargs.get("web_driver")
|
34
|
+
headless = kwargs.get("headless")
|
35
|
+
check_uprn(user_uprn)
|
36
|
+
check_postcode(user_postcode)
|
37
|
+
# Create Selenium webdriver
|
38
|
+
page = f"https://www.leeds.gov.uk/residents/bins-and-recycling/check-your-bin-day"
|
39
|
+
|
40
|
+
driver = create_webdriver(web_driver, headless)
|
41
|
+
driver.get(page)
|
42
|
+
|
43
|
+
wait = WebDriverWait(driver, 60)
|
44
|
+
postcode_box = wait.until(
|
45
|
+
EC.element_to_be_clickable(
|
46
|
+
(
|
47
|
+
By.ID,
|
48
|
+
"ctl00_ctl48_g_eea1a8ba_4306_488e_96f2_97f22038e29f_ctl00_txtPostCode",
|
49
|
+
)
|
50
|
+
)
|
51
|
+
)
|
52
|
+
postcode_box.send_keys(user_postcode)
|
53
|
+
postcode_btn_present = wait.until(
|
54
|
+
EC.presence_of_element_located(
|
55
|
+
(
|
56
|
+
By.ID,
|
57
|
+
"ctl00_ctl48_g_eea1a8ba_4306_488e_96f2_97f22038e29f_ctl00_btnSearchAddress",
|
58
|
+
)
|
59
|
+
)
|
60
|
+
)
|
61
|
+
postcode_btn = wait.until(
|
62
|
+
EC.element_to_be_clickable(
|
63
|
+
(
|
64
|
+
By.XPATH,
|
65
|
+
'//*[@id="ctl00_ctl48_g_eea1a8ba_4306_488e_96f2_97f22038e29f_ctl00_btnSearchAddress"]',
|
66
|
+
)
|
67
|
+
)
|
68
|
+
)
|
69
|
+
|
70
|
+
postcode_btn.send_keys(Keys.ENTER)
|
71
|
+
|
72
|
+
dropdown_present = wait.until(
|
73
|
+
EC.presence_of_element_located(
|
74
|
+
(
|
75
|
+
By.XPATH,
|
76
|
+
'//*[@id="ctl00_ctl48_g_eea1a8ba_4306_488e_96f2_97f22038e29f_ctl00_ddlAddressList"]/option',
|
77
|
+
)
|
78
|
+
)
|
79
|
+
)
|
80
|
+
address_dropdown = wait.until(
|
81
|
+
EC.element_to_be_clickable(
|
82
|
+
(
|
83
|
+
By.ID,
|
84
|
+
"ctl00_ctl48_g_eea1a8ba_4306_488e_96f2_97f22038e29f_ctl00_ddlAddressList",
|
85
|
+
)
|
86
|
+
)
|
87
|
+
)
|
88
|
+
|
89
|
+
dropdown_present.click()
|
90
|
+
|
91
|
+
dropdownSelect = Select(address_dropdown)
|
92
|
+
dropdownSelect.select_by_value(str(user_uprn))
|
93
|
+
results = wait.until(
|
94
|
+
EC.presence_of_element_located(
|
95
|
+
(
|
96
|
+
By.ID,
|
97
|
+
"ctl00_ctl48_g_eea1a8ba_4306_488e_96f2_97f22038e29f_ctl00_BinResultsDetails",
|
98
|
+
)
|
99
|
+
)
|
100
|
+
)
|
101
|
+
|
102
|
+
data = {"bins": []} # dictionary for data
|
103
|
+
soup = BeautifulSoup(driver.page_source, "html.parser")
|
104
|
+
|
105
|
+
bin_types = soup.find_all("ul", class_="binCollectionTimesList")
|
106
|
+
|
107
|
+
for bin_collection_dates in bin_types:
|
108
|
+
bin_collection_list = bin_collection_dates.find_all("li", class_="")
|
109
|
+
|
110
|
+
if bin_collection_list:
|
111
|
+
collection_dates = [
|
112
|
+
date.text.strip() for date in bin_collection_list
|
113
|
+
]
|
114
|
+
|
115
|
+
# Convert the collection dates to the desired format
|
116
|
+
formatted_dates = [
|
117
|
+
datetime.strptime(date, "%A %d %b %Y").strftime(date_format)
|
118
|
+
for date in collection_dates
|
119
|
+
]
|
120
|
+
|
121
|
+
# Extract the type of bin from the header
|
122
|
+
bin_type = bin_collection_dates.find_previous("h3").text.split()[0]
|
123
|
+
|
124
|
+
# Adding data to the 'bins' dictionary for each date
|
125
|
+
for date in formatted_dates:
|
126
|
+
dict_data = {"type": bin_type, "collectionDate": date}
|
127
|
+
data["bins"].append(dict_data)
|
128
|
+
except Exception as e:
|
129
|
+
# Here you can log the exception if needed
|
130
|
+
print(f"An error occurred: {e}")
|
131
|
+
# Optionally, re-raise the exception if you want it to propagate
|
132
|
+
raise
|
133
|
+
finally:
|
134
|
+
# This block ensures that the driver is closed regardless of an exception
|
135
|
+
if driver:
|
136
|
+
driver.quit()
|
137
|
+
return data
|