uk_bin_collection 0.74.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/README.rst +0 -0
- uk_bin_collection/tests/council_feature_input_parity.py +79 -0
- uk_bin_collection/tests/features/environment.py +7 -0
- uk_bin_collection/tests/features/validate_council_outputs.feature +767 -0
- uk_bin_collection/tests/input.json +1077 -0
- uk_bin_collection/tests/output.schema +41 -0
- uk_bin_collection/tests/step_defs/step_helpers/file_handler.py +46 -0
- uk_bin_collection/tests/step_defs/test_validate_council.py +87 -0
- uk_bin_collection/tests/test_collect_data.py +104 -0
- uk_bin_collection/tests/test_common_functions.py +342 -0
- uk_bin_collection/uk_bin_collection/collect_data.py +133 -0
- uk_bin_collection/uk_bin_collection/common.py +292 -0
- uk_bin_collection/uk_bin_collection/councils/AdurAndWorthingCouncils.py +43 -0
- uk_bin_collection/uk_bin_collection/councils/ArunCouncil.py +97 -0
- uk_bin_collection/uk_bin_collection/councils/AylesburyValeCouncil.py +69 -0
- uk_bin_collection/uk_bin_collection/councils/BCPCouncil.py +51 -0
- uk_bin_collection/uk_bin_collection/councils/BarnetCouncil.py +180 -0
- uk_bin_collection/uk_bin_collection/councils/BarnsleyMBCouncil.py +109 -0
- uk_bin_collection/uk_bin_collection/councils/BasingstokeCouncil.py +72 -0
- uk_bin_collection/uk_bin_collection/councils/BathAndNorthEastSomersetCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/BedfordBoroughCouncil.py +49 -0
- uk_bin_collection/uk_bin_collection/councils/BedfordshireCouncil.py +70 -0
- uk_bin_collection/uk_bin_collection/councils/BexleyCouncil.py +147 -0
- uk_bin_collection/uk_bin_collection/councils/BirminghamCityCouncil.py +119 -0
- uk_bin_collection/uk_bin_collection/councils/BlackburnCouncil.py +105 -0
- uk_bin_collection/uk_bin_collection/councils/BoltonCouncil.py +104 -0
- uk_bin_collection/uk_bin_collection/councils/BradfordMDC.py +103 -0
- uk_bin_collection/uk_bin_collection/councils/BrightonandHoveCityCouncil.py +137 -0
- uk_bin_collection/uk_bin_collection/councils/BristolCityCouncil.py +141 -0
- uk_bin_collection/uk_bin_collection/councils/BromleyBoroughCouncil.py +115 -0
- uk_bin_collection/uk_bin_collection/councils/BroxtoweBoroughCouncil.py +107 -0
- uk_bin_collection/uk_bin_collection/councils/BuckinghamshireCouncil.py +95 -0
- uk_bin_collection/uk_bin_collection/councils/BuryCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/CalderdaleCouncil.py +123 -0
- uk_bin_collection/uk_bin_collection/councils/CannockChaseDistrictCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/CardiffCouncil.py +172 -0
- uk_bin_collection/uk_bin_collection/councils/CastlepointDistrictCouncil.py +96 -0
- uk_bin_collection/uk_bin_collection/councils/CharnwoodBoroughCouncil.py +54 -0
- uk_bin_collection/uk_bin_collection/councils/ChelmsfordCityCouncil.py +127 -0
- uk_bin_collection/uk_bin_collection/councils/CheshireEastCouncil.py +32 -0
- uk_bin_collection/uk_bin_collection/councils/CheshireWestAndChesterCouncil.py +125 -0
- uk_bin_collection/uk_bin_collection/councils/ChorleyCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/ConwyCountyBorough.py +27 -0
- uk_bin_collection/uk_bin_collection/councils/CrawleyBoroughCouncil.py +61 -0
- uk_bin_collection/uk_bin_collection/councils/CroydonCouncil.py +291 -0
- uk_bin_collection/uk_bin_collection/councils/DerbyshireDalesDistrictCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/DoncasterCouncil.py +77 -0
- uk_bin_collection/uk_bin_collection/councils/DorsetCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/DoverDistrictCouncil.py +41 -0
- uk_bin_collection/uk_bin_collection/councils/DurhamCouncil.py +49 -0
- uk_bin_collection/uk_bin_collection/councils/EastCambridgeshireCouncil.py +44 -0
- uk_bin_collection/uk_bin_collection/councils/EastDevonDC.py +74 -0
- uk_bin_collection/uk_bin_collection/councils/EastLindseyDistrictCouncil.py +108 -0
- uk_bin_collection/uk_bin_collection/councils/EastRidingCouncil.py +142 -0
- uk_bin_collection/uk_bin_collection/councils/EastSuffolkCouncil.py +112 -0
- uk_bin_collection/uk_bin_collection/councils/EastleighBoroughCouncil.py +70 -0
- uk_bin_collection/uk_bin_collection/councils/EnvironmentFirst.py +48 -0
- uk_bin_collection/uk_bin_collection/councils/ErewashBoroughCouncil.py +61 -0
- uk_bin_collection/uk_bin_collection/councils/FenlandDistrictCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/ForestOfDeanDistrictCouncil.py +113 -0
- uk_bin_collection/uk_bin_collection/councils/GatesheadCouncil.py +118 -0
- uk_bin_collection/uk_bin_collection/councils/GedlingBoroughCouncil.py +1580 -0
- uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py +55 -0
- uk_bin_collection/uk_bin_collection/councils/GuildfordCouncil.py +150 -0
- uk_bin_collection/uk_bin_collection/councils/HaltonBoroughCouncil.py +142 -0
- uk_bin_collection/uk_bin_collection/councils/HaringeyCouncil.py +59 -0
- uk_bin_collection/uk_bin_collection/councils/HarrogateBoroughCouncil.py +63 -0
- uk_bin_collection/uk_bin_collection/councils/HighPeakCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/HullCityCouncil.py +48 -0
- uk_bin_collection/uk_bin_collection/councils/HuntingdonDistrictCouncil.py +44 -0
- uk_bin_collection/uk_bin_collection/councils/KingstonUponThamesCouncil.py +84 -0
- uk_bin_collection/uk_bin_collection/councils/KirkleesCouncil.py +130 -0
- uk_bin_collection/uk_bin_collection/councils/KnowsleyMBCouncil.py +139 -0
- uk_bin_collection/uk_bin_collection/councils/LancasterCityCouncil.py +71 -0
- uk_bin_collection/uk_bin_collection/councils/LeedsCityCouncil.py +137 -0
- uk_bin_collection/uk_bin_collection/councils/LisburnCastlereaghCityCouncil.py +101 -0
- uk_bin_collection/uk_bin_collection/councils/LiverpoolCityCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/LondonBoroughHounslow.py +82 -0
- uk_bin_collection/uk_bin_collection/councils/LondonBoroughRedbridge.py +161 -0
- uk_bin_collection/uk_bin_collection/councils/MaldonDistrictCouncil.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/MalvernHillsDC.py +57 -0
- uk_bin_collection/uk_bin_collection/councils/ManchesterCityCouncil.py +106 -0
- uk_bin_collection/uk_bin_collection/councils/MansfieldDistrictCouncil.py +38 -0
- uk_bin_collection/uk_bin_collection/councils/MertonCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil.py +128 -0
- uk_bin_collection/uk_bin_collection/councils/MidSussexDistrictCouncil.py +80 -0
- uk_bin_collection/uk_bin_collection/councils/MiltonKeynesCityCouncil.py +54 -0
- uk_bin_collection/uk_bin_collection/councils/MoleValleyDistrictCouncil.py +98 -0
- uk_bin_collection/uk_bin_collection/councils/NeathPortTalbotCouncil.py +139 -0
- uk_bin_collection/uk_bin_collection/councils/NewarkAndSherwoodDC.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/NewcastleCityCouncil.py +57 -0
- uk_bin_collection/uk_bin_collection/councils/NewhamCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/NewportCityCouncil.py +203 -0
- uk_bin_collection/uk_bin_collection/councils/NorthEastDerbyshireDistrictCouncil.py +115 -0
- uk_bin_collection/uk_bin_collection/councils/NorthEastLincs.py +53 -0
- uk_bin_collection/uk_bin_collection/councils/NorthKestevenDistrictCouncil.py +45 -0
- uk_bin_collection/uk_bin_collection/councils/NorthLanarkshireCouncil.py +46 -0
- uk_bin_collection/uk_bin_collection/councils/NorthLincolnshireCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/NorthNorfolkDistrictCouncil.py +108 -0
- uk_bin_collection/uk_bin_collection/councils/NorthNorthamptonshireCouncil.py +72 -0
- uk_bin_collection/uk_bin_collection/councils/NorthSomersetCouncil.py +76 -0
- uk_bin_collection/uk_bin_collection/councils/NorthTynesideCouncil.py +220 -0
- uk_bin_collection/uk_bin_collection/councils/NorthWestLeicestershire.py +114 -0
- uk_bin_collection/uk_bin_collection/councils/NorthYorkshire.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py +123 -0
- uk_bin_collection/uk_bin_collection/councils/NottinghamCityCouncil.py +36 -0
- uk_bin_collection/uk_bin_collection/councils/OldhamCouncil.py +51 -0
- uk_bin_collection/uk_bin_collection/councils/PortsmouthCityCouncil.py +131 -0
- uk_bin_collection/uk_bin_collection/councils/PrestonCityCouncil.py +97 -0
- uk_bin_collection/uk_bin_collection/councils/ReadingBoroughCouncil.py +30 -0
- uk_bin_collection/uk_bin_collection/councils/ReigateAndBansteadBoroughCouncil.py +81 -0
- uk_bin_collection/uk_bin_collection/councils/RenfrewshireCouncil.py +135 -0
- uk_bin_collection/uk_bin_collection/councils/RhonddaCynonTaffCouncil.py +80 -0
- uk_bin_collection/uk_bin_collection/councils/RochdaleCouncil.py +69 -0
- uk_bin_collection/uk_bin_collection/councils/RochfordCouncil.py +60 -0
- uk_bin_collection/uk_bin_collection/councils/RugbyBoroughCouncil.py +93 -0
- uk_bin_collection/uk_bin_collection/councils/RushcliffeBoroughCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/RushmoorCouncil.py +81 -0
- uk_bin_collection/uk_bin_collection/councils/SalfordCityCouncil.py +70 -0
- uk_bin_collection/uk_bin_collection/councils/SevenoaksDistrictCouncil.py +106 -0
- uk_bin_collection/uk_bin_collection/councils/SheffieldCityCouncil.py +54 -0
- uk_bin_collection/uk_bin_collection/councils/ShropshireCouncil.py +45 -0
- uk_bin_collection/uk_bin_collection/councils/SolihullCouncil.py +48 -0
- uk_bin_collection/uk_bin_collection/councils/SomersetCouncil.py +203 -0
- uk_bin_collection/uk_bin_collection/councils/SouthAyrshireCouncil.py +73 -0
- uk_bin_collection/uk_bin_collection/councils/SouthCambridgeshireCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/SouthGloucestershireCouncil.py +74 -0
- uk_bin_collection/uk_bin_collection/councils/SouthLanarkshireCouncil.py +78 -0
- uk_bin_collection/uk_bin_collection/councils/SouthNorfolkCouncil.py +91 -0
- uk_bin_collection/uk_bin_collection/councils/SouthOxfordshireCouncil.py +93 -0
- uk_bin_collection/uk_bin_collection/councils/SouthTynesideCouncil.py +98 -0
- uk_bin_collection/uk_bin_collection/councils/StAlbansCityAndDistrictCouncil.py +43 -0
- uk_bin_collection/uk_bin_collection/councils/StHelensBC.py +56 -0
- uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py +112 -0
- uk_bin_collection/uk_bin_collection/councils/StockportBoroughCouncil.py +39 -0
- uk_bin_collection/uk_bin_collection/councils/StokeOnTrentCityCouncil.py +79 -0
- uk_bin_collection/uk_bin_collection/councils/StratfordUponAvonCouncil.py +94 -0
- uk_bin_collection/uk_bin_collection/councils/SunderlandCityCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/SwaleBoroughCouncil.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/TamesideMBCouncil.py +62 -0
- uk_bin_collection/uk_bin_collection/councils/TandridgeDistrictCouncil.py +60 -0
- uk_bin_collection/uk_bin_collection/councils/TelfordAndWrekinCouncil.py +50 -0
- uk_bin_collection/uk_bin_collection/councils/TestValleyBoroughCouncil.py +203 -0
- uk_bin_collection/uk_bin_collection/councils/TonbridgeAndMallingBC.py +101 -0
- uk_bin_collection/uk_bin_collection/councils/TorbayCouncil.py +51 -0
- uk_bin_collection/uk_bin_collection/councils/TorridgeDistrictCouncil.py +154 -0
- uk_bin_collection/uk_bin_collection/councils/ValeofGlamorganCouncil.py +119 -0
- uk_bin_collection/uk_bin_collection/councils/ValeofWhiteHorseCouncil.py +103 -0
- uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py +89 -0
- uk_bin_collection/uk_bin_collection/councils/WarwickDistrictCouncil.py +34 -0
- uk_bin_collection/uk_bin_collection/councils/WaverleyBoroughCouncil.py +119 -0
- uk_bin_collection/uk_bin_collection/councils/WealdenDistrictCouncil.py +86 -0
- uk_bin_collection/uk_bin_collection/councils/WelhatCouncil.py +73 -0
- uk_bin_collection/uk_bin_collection/councils/WestBerkshireCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/WestLindseyDistrictCouncil.py +118 -0
- uk_bin_collection/uk_bin_collection/councils/WestLothianCouncil.py +103 -0
- uk_bin_collection/uk_bin_collection/councils/WestNorthamptonshireCouncil.py +34 -0
- uk_bin_collection/uk_bin_collection/councils/WestSuffolkCouncil.py +64 -0
- uk_bin_collection/uk_bin_collection/councils/WiganBoroughCouncil.py +97 -0
- uk_bin_collection/uk_bin_collection/councils/WiltshireCouncil.py +135 -0
- uk_bin_collection/uk_bin_collection/councils/WindsorAndMaidenheadCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/WokingBoroughCouncil.py +114 -0
- uk_bin_collection/uk_bin_collection/councils/WyreCouncil.py +89 -0
- uk_bin_collection/uk_bin_collection/councils/YorkCouncil.py +45 -0
- uk_bin_collection/uk_bin_collection/councils/council_class_template/councilclasstemplate.py +33 -0
- uk_bin_collection/uk_bin_collection/get_bin_data.py +165 -0
- uk_bin_collection-0.74.0.dist-info/LICENSE +21 -0
- uk_bin_collection-0.74.0.dist-info/METADATA +247 -0
- uk_bin_collection-0.74.0.dist-info/RECORD +171 -0
- uk_bin_collection-0.74.0.dist-info/WHEEL +4 -0
- uk_bin_collection-0.74.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,123 @@
|
|
1
|
+
import requests
|
2
|
+
from bs4 import BeautifulSoup
|
3
|
+
|
4
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
5
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
6
|
+
|
7
|
+
# This script pulls (in one hit) the data from Bromley Council Bins Data
|
8
|
+
import datetime
|
9
|
+
from bs4 import BeautifulSoup
|
10
|
+
from datetime import datetime
|
11
|
+
from selenium.webdriver.common.by import By
|
12
|
+
from selenium.webdriver.support import expected_conditions as EC
|
13
|
+
from selenium.webdriver.support.ui import Select
|
14
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
15
|
+
from selenium.webdriver.common.keys import Keys
|
16
|
+
import time
|
17
|
+
|
18
|
+
|
19
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
20
|
+
class CouncilClass(AbstractGetBinDataClass):
|
21
|
+
"""
|
22
|
+
Concrete classes have to implement all abstract operations of the
|
23
|
+
base class. They can also override some operations with a default
|
24
|
+
implementation.
|
25
|
+
"""
|
26
|
+
|
27
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
28
|
+
driver = None
|
29
|
+
try:
|
30
|
+
user_uprn = kwargs.get("uprn")
|
31
|
+
user_postcode = kwargs.get("postcode")
|
32
|
+
check_uprn(user_uprn)
|
33
|
+
check_postcode(user_postcode)
|
34
|
+
|
35
|
+
bin_data_dict = {"bins": []}
|
36
|
+
collections = []
|
37
|
+
web_driver = kwargs.get("web_driver")
|
38
|
+
headless = kwargs.get("headless")
|
39
|
+
|
40
|
+
data = {"bins": []}
|
41
|
+
|
42
|
+
# Get our initial session running
|
43
|
+
driver = create_webdriver(web_driver, headless)
|
44
|
+
driver.get(kwargs.get("url"))
|
45
|
+
|
46
|
+
wait = WebDriverWait(driver, 30)
|
47
|
+
postcode = wait.until(
|
48
|
+
EC.presence_of_element_located((By.XPATH, '//*[@id="pPostcode"]'))
|
49
|
+
)
|
50
|
+
|
51
|
+
postcode.send_keys(user_postcode)
|
52
|
+
postcode_search_btn = wait.until(
|
53
|
+
EC.element_to_be_clickable((By.CLASS_NAME, "searchbox_submit"))
|
54
|
+
)
|
55
|
+
postcode_search_btn.send_keys(Keys.ENTER)
|
56
|
+
# Wait for the 'Select your property' dropdown to appear and select the first result
|
57
|
+
dropdown = wait.until(EC.element_to_be_clickable((By.ID, "uprn")))
|
58
|
+
|
59
|
+
# Create a 'Select' for it, then select the first address in the list
|
60
|
+
# (Index 0 is "Make a selection from the list")
|
61
|
+
dropdownSelect = Select(dropdown)
|
62
|
+
dropdownSelect.select_by_value(str(user_uprn))
|
63
|
+
checkbox = wait.until(EC.presence_of_element_located((By.ID, "gdprTerms")))
|
64
|
+
checkbox.send_keys(Keys.SPACE)
|
65
|
+
get_bin_data_btn = wait.until(
|
66
|
+
EC.element_to_be_clickable((By.CLASS_NAME, "searchbox_submit"))
|
67
|
+
)
|
68
|
+
get_bin_data_btn.send_keys(Keys.ENTER)
|
69
|
+
# Make a BS4 object
|
70
|
+
results = wait.until(EC.presence_of_element_located((By.ID, "collection")))
|
71
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
72
|
+
soup.prettify()
|
73
|
+
|
74
|
+
data = {"bins": []}
|
75
|
+
|
76
|
+
# Get collections
|
77
|
+
row_index = 0
|
78
|
+
for row in soup.find("table", {"id": "collection"}).find_all("tr"):
|
79
|
+
# Skip headers row
|
80
|
+
if row_index < 1:
|
81
|
+
row_index += 1
|
82
|
+
continue
|
83
|
+
else:
|
84
|
+
# Get bin info
|
85
|
+
bin_info = row.find_all("td")
|
86
|
+
# Get the bin type
|
87
|
+
bin_type = bin_info[0].find("strong").get_text(strip=True)
|
88
|
+
# Get the collection date
|
89
|
+
collection_date = ""
|
90
|
+
for p in bin_info[2].find_all("p"):
|
91
|
+
if "your next collection" in p.get_text(strip=True):
|
92
|
+
collection_date = datetime.strptime(
|
93
|
+
" ".join(
|
94
|
+
p.get_text(strip=True)
|
95
|
+
.replace("will be your next collection.", "")
|
96
|
+
.split()
|
97
|
+
),
|
98
|
+
"%A %d %B %Y",
|
99
|
+
)
|
100
|
+
|
101
|
+
if collection_date != "":
|
102
|
+
# Append the bin type and date to the data dict
|
103
|
+
dict_data = {
|
104
|
+
"type": bin_type,
|
105
|
+
"collectionDate": collection_date.strftime(date_format),
|
106
|
+
}
|
107
|
+
data["bins"].append(dict_data)
|
108
|
+
|
109
|
+
row_index += 1
|
110
|
+
|
111
|
+
data["bins"].sort(
|
112
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
|
113
|
+
)
|
114
|
+
except Exception as e:
|
115
|
+
# Here you can log the exception if needed
|
116
|
+
print(f"An error occurred: {e}")
|
117
|
+
# Optionally, re-raise the exception if you want it to propagate
|
118
|
+
raise
|
119
|
+
finally:
|
120
|
+
# This block ensures that the driver is closed regardless of an exception
|
121
|
+
if driver:
|
122
|
+
driver.quit()
|
123
|
+
return data
|
@@ -0,0 +1,65 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
|
3
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
4
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
5
|
+
|
6
|
+
|
7
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
8
|
+
class CouncilClass(AbstractGetBinDataClass):
|
9
|
+
"""
|
10
|
+
Concrete classes have to implement all abstract operations of the
|
11
|
+
base class. They can also override some operations with a default
|
12
|
+
implementation.
|
13
|
+
"""
|
14
|
+
|
15
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
16
|
+
user_uprn = kwargs.get("uprn")
|
17
|
+
user_postcode = kwargs.get("postcode")
|
18
|
+
check_uprn(user_uprn)
|
19
|
+
check_postcode(user_postcode)
|
20
|
+
|
21
|
+
# Make SOAP Request
|
22
|
+
response = requests.post(
|
23
|
+
"https://ccdc.opendata.onl/DynamicCall.dll",
|
24
|
+
data="Method=CollectionDates&Postcode="
|
25
|
+
+ user_postcode
|
26
|
+
+ "&UPRN="
|
27
|
+
+ user_uprn,
|
28
|
+
headers={
|
29
|
+
"Content-Type": "application/x-www-form-urlencoded",
|
30
|
+
"Referer": "https://ccdc.opendata.onl/CCDC_WasteCollection",
|
31
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
|
32
|
+
},
|
33
|
+
)
|
34
|
+
|
35
|
+
# Make a BS4 object
|
36
|
+
soup = BeautifulSoup(response.text, "xml")
|
37
|
+
soup.prettify()
|
38
|
+
|
39
|
+
if (
|
40
|
+
soup.find("ErrorDescription")
|
41
|
+
and soup.find("ErrorDescription").get_text(strip=True)
|
42
|
+
== "No results returned"
|
43
|
+
):
|
44
|
+
raise ValueError("No collection data found for provided Postcode & UPRN.")
|
45
|
+
|
46
|
+
data = {"bins": []}
|
47
|
+
|
48
|
+
collections = soup.find_all("Collection")
|
49
|
+
|
50
|
+
for i in range(len(collections)):
|
51
|
+
dict_data = {
|
52
|
+
"type": collections[i]
|
53
|
+
.Service.get_text()
|
54
|
+
.replace("Collection Service", "")
|
55
|
+
.strip(),
|
56
|
+
"collectionDate": datetime.strptime(
|
57
|
+
collections[i].Date.get_text(), "%d/%m/%Y %H:%M:%S"
|
58
|
+
).strftime(date_format),
|
59
|
+
}
|
60
|
+
data["bins"].append(dict_data)
|
61
|
+
|
62
|
+
data["bins"].sort(
|
63
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
|
64
|
+
)
|
65
|
+
return data
|
@@ -0,0 +1,172 @@
|
|
1
|
+
# So this script is a little different to the others...
|
2
|
+
# Essentially, it uses Cardiff Council's waste collection API to return collections for a UPRN by pretending
|
3
|
+
# to be Google Chrome
|
4
|
+
|
5
|
+
import datetime
|
6
|
+
import json
|
7
|
+
from datetime import datetime
|
8
|
+
|
9
|
+
import requests
|
10
|
+
from requests import auth
|
11
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
12
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
13
|
+
|
14
|
+
|
15
|
+
# Taken from
|
16
|
+
# https://stackoverflow.com/questions/29931671/making-an-api-call-in-python-with-an-api-that-requires-a-bearer-token
|
17
|
+
class BearerAuth(requests.auth.AuthBase):
|
18
|
+
def __init__(self, token):
|
19
|
+
self.token = token
|
20
|
+
|
21
|
+
def __call__(self, r):
|
22
|
+
r.headers["authorization"] = "Bearer " + self.token
|
23
|
+
return r
|
24
|
+
|
25
|
+
|
26
|
+
def parse_token(text: str) -> str:
|
27
|
+
"""
|
28
|
+
Parses the response text to find the JWT token, which will always be the longest item in the list (I think)
|
29
|
+
:param text: The response text from the server
|
30
|
+
:return: Only the JWT token, as a string
|
31
|
+
"""
|
32
|
+
# You'd have thought I'd use something like etree for this, but that doesn't work so going for a hacky approach
|
33
|
+
xml_list = text.split('"')
|
34
|
+
bearer_token = max(xml_list, key=len)
|
35
|
+
return bearer_token
|
36
|
+
|
37
|
+
|
38
|
+
def get_jwt() -> str:
|
39
|
+
"""
|
40
|
+
Gets a JSON web token from the authentication server
|
41
|
+
:return: A JWT token as a string
|
42
|
+
"""
|
43
|
+
auth_url = (
|
44
|
+
"https://authwebservice.cardiff.gov.uk/AuthenticationWebService.asmx?op=GetJWT"
|
45
|
+
)
|
46
|
+
options_headers_str = (
|
47
|
+
"Accept: */*|Accept-Encoding: gzip, "
|
48
|
+
"deflate, br|Accept-Language: en-GB,en;q=0.9|Access-Control-Request-Headers: content-type"
|
49
|
+
"|Access-Control-Request-Method: POST|Connection: keep-alive|Host: "
|
50
|
+
"authwebservice.cardiff.gov.uk|Origin: https://www.cardiff.gov.uk|Referer: "
|
51
|
+
"https://www.cardiff.gov.uk/|Sec-Fetch-Dest: empty"
|
52
|
+
"|Sec-Fetch-Mode: cors|Sec-Fetch-Site: same-site|User-Agent: Mozilla/5.0 (Windows NT 10.0; "
|
53
|
+
"Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.134 Safari/537.36 "
|
54
|
+
)
|
55
|
+
|
56
|
+
request_headers_str = (
|
57
|
+
"Accept: */*|Accept-Encoding: gzip, deflate, br|Accept-Language: en-GB,en;q=0.9|Connection: "
|
58
|
+
'keep-alive|Content-Length: 284|Content-Type: text/xml; charset="UTF-8"|Host: '
|
59
|
+
"authwebservice.cardiff.gov.uk|Origin: https://www.cardiff.gov.uk|Referer: "
|
60
|
+
"https://www.cardiff.gov.uk/|Sec-Fetch-Dest: empty|Sec-Fetch-Mode: cors|Sec-Fetch-Site: "
|
61
|
+
"same-site|Sec-GPC: 1|User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
62
|
+
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.134 Safari/537.36 "
|
63
|
+
)
|
64
|
+
|
65
|
+
payload = (
|
66
|
+
"<?xml version='1.0' encoding='utf-8'?><soap:Envelope "
|
67
|
+
"xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xmlns:xsd='http://www.w3.org/2001/XMLSchema' "
|
68
|
+
"xmlns:soap='http://schemas.xmlsoap.org/soap/envelope/'><soap:Body><GetJWT xmlns='http://tempuri.org/' "
|
69
|
+
"/></soap:Body></soap:Envelope> "
|
70
|
+
)
|
71
|
+
|
72
|
+
options_headers = parse_header(options_headers_str)
|
73
|
+
request_headers = parse_header(request_headers_str)
|
74
|
+
try:
|
75
|
+
requests.packages.urllib3.disable_warnings()
|
76
|
+
options = requests.options(auth_url, headers=options_headers)
|
77
|
+
response = requests.post(auth_url, headers=request_headers, data=payload)
|
78
|
+
if not options.ok or not response.ok:
|
79
|
+
raise ValueError("Invalid server response code getting JWT!")
|
80
|
+
|
81
|
+
except Exception as ex:
|
82
|
+
print(f"Exception encountered: {ex}")
|
83
|
+
exit(1)
|
84
|
+
token = parse_token(response.text)
|
85
|
+
options.close()
|
86
|
+
response.close()
|
87
|
+
|
88
|
+
return token
|
89
|
+
|
90
|
+
|
91
|
+
class CouncilClass(AbstractGetBinDataClass):
|
92
|
+
"""
|
93
|
+
Concrete classes have to implement all abstract operations of the base
|
94
|
+
class. They can also override some operations with a default
|
95
|
+
implementation.
|
96
|
+
"""
|
97
|
+
|
98
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
99
|
+
"""
|
100
|
+
Parse council provided CSVs to get the latest bin collections for address
|
101
|
+
"""
|
102
|
+
# Change this
|
103
|
+
uprn = kwargs.get("uprn")
|
104
|
+
check_uprn(uprn)
|
105
|
+
|
106
|
+
data = {"bins": []}
|
107
|
+
token = get_jwt()
|
108
|
+
|
109
|
+
api_url = "https://api.cardiff.gov.uk/WasteManagement/api/WasteCollection"
|
110
|
+
options_header_str = (
|
111
|
+
"Accept: */*|Accept-Encoding: gzip, deflate, br|Accept-Language: en-GB,"
|
112
|
+
"en;q=0.9|Access-Control-Request-Headers: authorization,"
|
113
|
+
"content-type|Access-Control-Request-Method: POST|Connection: keep-alive|Host: "
|
114
|
+
"api.cardiff.gov.uk|Origin: https://www.cardiff.gov.uk|Referer: "
|
115
|
+
"https://www.cardiff.gov.uk/|Sec-Fetch-Dest: empty|Sec-Fetch-Mode: cors|Sec-Fetch-Site: "
|
116
|
+
"same-site|User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ("
|
117
|
+
"KHTML, like Gecko) Chrome/103.0.5060.134 Safari/537.36 "
|
118
|
+
)
|
119
|
+
response_header_str = (
|
120
|
+
"Accept: application/json, text/javascript, */*; q=0.01|Accept-Encoding: gzip, deflate, "
|
121
|
+
f"br|Accept-Language: en-GB,en;q=0.9|Authorization: {token}|Connection: "
|
122
|
+
"keep-alive|Content-Length: 62|Content-Type: application/json; charset=UTF-8|Host: "
|
123
|
+
"api.cardiff.gov.uk|Origin: https://www.cardiff.gov.uk|Referer: "
|
124
|
+
"https://www.cardiff.gov.uk/|Sec-Fetch-Dest: empty|Sec-Fetch-Mode: cors|Sec-Fetch-Site: "
|
125
|
+
"same-site|Sec-GPC: 1|User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
126
|
+
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.134 Safari/537.36 "
|
127
|
+
)
|
128
|
+
|
129
|
+
payload = (
|
130
|
+
'{ "systemReference": "web", "language": "eng", ' f'"uprn": {uprn} ' "}"
|
131
|
+
)
|
132
|
+
|
133
|
+
options_header = parse_header(options_header_str)
|
134
|
+
response_header = parse_header(response_header_str)
|
135
|
+
# Copy the request headers for options and post headers (replacing post auth with token variable) and post
|
136
|
+
# payload, then add here
|
137
|
+
try:
|
138
|
+
requests.packages.urllib3.disable_warnings()
|
139
|
+
options = requests.options(api_url, headers=options_header)
|
140
|
+
response = requests.post(
|
141
|
+
api_url, headers=response_header, auth=BearerAuth(token), data=payload
|
142
|
+
)
|
143
|
+
if not options.ok or not response.ok:
|
144
|
+
raise ValueError("Invalid server response code finding UPRN!")
|
145
|
+
|
146
|
+
except Exception as ex:
|
147
|
+
print(f"Exception encountered: {ex}")
|
148
|
+
exit(1)
|
149
|
+
|
150
|
+
result = json.loads(response.text)
|
151
|
+
|
152
|
+
options.close()
|
153
|
+
response.close()
|
154
|
+
|
155
|
+
collections = result["collectionWeeks"]
|
156
|
+
for week in collections:
|
157
|
+
collection = [(k, v) for k, v in week.items()]
|
158
|
+
collection_date = collection[1][1]
|
159
|
+
collection_date = datetime.strptime(
|
160
|
+
collection_date, "%Y-%m-%dT%H:%M:%S"
|
161
|
+
).strftime(date_format)
|
162
|
+
|
163
|
+
for bin in collection[3][1]:
|
164
|
+
bin_type = bin.get("type")
|
165
|
+
|
166
|
+
dict_data = {
|
167
|
+
"type": bin_type,
|
168
|
+
"collectionDate": collection_date,
|
169
|
+
}
|
170
|
+
data["bins"].append(dict_data)
|
171
|
+
|
172
|
+
return data
|
@@ -0,0 +1,96 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
3
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
|
+
|
5
|
+
|
6
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
7
|
+
class CouncilClass(AbstractGetBinDataClass):
|
8
|
+
"""
|
9
|
+
Concrete classes have to implement all abstract operations of the
|
10
|
+
base class. They can also override some operations with a default
|
11
|
+
implementation.
|
12
|
+
"""
|
13
|
+
|
14
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
15
|
+
# Disable the SSL warnings that otherwise break everything
|
16
|
+
requests.packages.urllib3.disable_warnings()
|
17
|
+
try:
|
18
|
+
requests.packages.urllib3.contrib.pyopenssl.util.ssl_.DEFAULT_CIPHERS += (
|
19
|
+
":HIGH:!DH:!aNULL"
|
20
|
+
)
|
21
|
+
except AttributeError:
|
22
|
+
pass
|
23
|
+
|
24
|
+
# UPRN is street id here
|
25
|
+
uprn = kwargs.get("uprn")
|
26
|
+
check_uprn(uprn)
|
27
|
+
|
28
|
+
post_url = "https://apps.castlepoint.gov.uk/cpapps/index.cfm?fa=wastecalendar.displayDetails"
|
29
|
+
post_header_str = (
|
30
|
+
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,"
|
31
|
+
"image/apng,"
|
32
|
+
"*/*;q=0.8,application/signed-exchange;v=b3;q=0.9|Accept-Encoding: gzip, deflate, "
|
33
|
+
"br|Accept-Language: en-GB;q=0.8|Cache-Control: max-age=0|Connection: "
|
34
|
+
"keep-alive|Content-Length: "
|
35
|
+
"11|Content-Type: application/x-www-form-urlencoded|Host: apps.castlepoint.gov.uk|Origin: "
|
36
|
+
"https://apps.castlepoint.gov.uk|Referer: "
|
37
|
+
"https://apps.castlepoint.gov.uk/cpapps/index.cfm?fa=wastecalendar|Sec-Fetch-Dest: "
|
38
|
+
"document|Sec-Fetch-Mode: navigate|Sec-Fetch-Site: same-origin|Sec-Fetch-User: ?1|Sec-GPC: "
|
39
|
+
"1|Upgrade-Insecure-Requests: 1|User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
40
|
+
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.134 Safari/537.36 "
|
41
|
+
)
|
42
|
+
|
43
|
+
post_headers = parse_header(post_header_str)
|
44
|
+
form_data = {"roadID": uprn}
|
45
|
+
post_response = requests.post(
|
46
|
+
post_url, headers=post_headers, data=form_data, verify=False
|
47
|
+
)
|
48
|
+
|
49
|
+
# Make a BS4 object
|
50
|
+
soup = BeautifulSoup(post_response.text, features="html.parser")
|
51
|
+
soup.prettify()
|
52
|
+
|
53
|
+
data = {"bins": []}
|
54
|
+
collection_tuple = []
|
55
|
+
|
56
|
+
for i in range(1, 3):
|
57
|
+
calendar = soup.select(
|
58
|
+
f"#wasteCalendarContainer > div:nth-child(2) > div:nth-child({i}) > div"
|
59
|
+
)[0]
|
60
|
+
month = datetime.strptime(
|
61
|
+
calendar.find_next("h2").get_text(), "%B %Y"
|
62
|
+
).strftime("%m")
|
63
|
+
year = datetime.strptime(
|
64
|
+
calendar.find_next("h2").get_text(), "%B %Y"
|
65
|
+
).strftime("%Y")
|
66
|
+
|
67
|
+
pink_days = [
|
68
|
+
day.get_text().strip() for day in calendar.find_all("td", class_="pink")
|
69
|
+
]
|
70
|
+
black_days = [
|
71
|
+
day.get_text().strip()
|
72
|
+
for day in calendar.find_all("td", class_="normal")
|
73
|
+
]
|
74
|
+
|
75
|
+
for day in pink_days:
|
76
|
+
collection_date = datetime(
|
77
|
+
year=int(year), month=int(month), day=int(day)
|
78
|
+
)
|
79
|
+
collection_tuple.append(("Pink collection", collection_date))
|
80
|
+
|
81
|
+
for day in black_days:
|
82
|
+
collection_date = datetime(
|
83
|
+
year=int(year), month=int(month), day=int(day)
|
84
|
+
)
|
85
|
+
collection_tuple.append(("Normal collection", collection_date))
|
86
|
+
|
87
|
+
ordered_data = sorted(collection_tuple, key=lambda x: x[1])
|
88
|
+
|
89
|
+
for item in ordered_data:
|
90
|
+
dict_data = {
|
91
|
+
"type": item[0],
|
92
|
+
"collectionDate": item[1].strftime(date_format),
|
93
|
+
}
|
94
|
+
data["bins"].append(dict_data)
|
95
|
+
|
96
|
+
return data
|
@@ -0,0 +1,54 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
3
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
|
+
|
5
|
+
from datetime import timedelta
|
6
|
+
from dateutil.relativedelta import relativedelta
|
7
|
+
|
8
|
+
|
9
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
10
|
+
class CouncilClass(AbstractGetBinDataClass):
|
11
|
+
"""
|
12
|
+
Concrete classes have to implement all abstract operations of the
|
13
|
+
base class. They can also override some operations with a default
|
14
|
+
implementation.
|
15
|
+
"""
|
16
|
+
|
17
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
18
|
+
# Make a BS4 object
|
19
|
+
soup = BeautifulSoup(page.text, features="html.parser")
|
20
|
+
soup.prettify()
|
21
|
+
|
22
|
+
data = {"bins": []}
|
23
|
+
curr_date = datetime.today()
|
24
|
+
|
25
|
+
for bins in soup.find_all("ul", {"class": "refuse"}):
|
26
|
+
binCollection = bins.find_all("li")
|
27
|
+
|
28
|
+
if binCollection:
|
29
|
+
for bin in binCollection:
|
30
|
+
collection_date = (
|
31
|
+
bin.find("strong", {"class": "date"}).contents[0].strip()
|
32
|
+
)
|
33
|
+
if collection_date.lower() == "today":
|
34
|
+
collection_date = datetime.now()
|
35
|
+
elif collection_date.lower() == "tomorrow":
|
36
|
+
collection_date = datetime.now() + timedelta(days=1)
|
37
|
+
else:
|
38
|
+
collection_date += f" {curr_date.year}"
|
39
|
+
collection_date = datetime.strptime(
|
40
|
+
remove_ordinal_indicator_from_date_string(
|
41
|
+
collection_date
|
42
|
+
).strip(),
|
43
|
+
"%a %d %b %Y",
|
44
|
+
)
|
45
|
+
if curr_date.month == 12 and collection_date.month == 1:
|
46
|
+
collection_date = collection_date + relativedelta(years=1)
|
47
|
+
dict_data = {
|
48
|
+
"type": bin.find("a").contents[0],
|
49
|
+
"collectionDate": collection_date.strftime(date_format),
|
50
|
+
}
|
51
|
+
|
52
|
+
data["bins"].append(dict_data)
|
53
|
+
|
54
|
+
return data
|
@@ -0,0 +1,127 @@
|
|
1
|
+
import re
|
2
|
+
import requests
|
3
|
+
from bs4 import BeautifulSoup
|
4
|
+
|
5
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
6
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
7
|
+
|
8
|
+
# This script pulls (in one hit) the data from Bromley Council Bins Data
|
9
|
+
import datetime
|
10
|
+
from datetime import datetime
|
11
|
+
from selenium.webdriver.common.by import By
|
12
|
+
from selenium.webdriver.support import expected_conditions as EC
|
13
|
+
from selenium.webdriver.support.ui import Select
|
14
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
15
|
+
from selenium.webdriver.common.keys import Keys
|
16
|
+
import time
|
17
|
+
|
18
|
+
|
19
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
20
|
+
class CouncilClass(AbstractGetBinDataClass):
|
21
|
+
"""
|
22
|
+
Concrete classes have to implement all abstract operations of the
|
23
|
+
base class. They can also override some operations with a default
|
24
|
+
implementation.
|
25
|
+
"""
|
26
|
+
|
27
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
28
|
+
driver = None
|
29
|
+
try:
|
30
|
+
data = {"bins": []}
|
31
|
+
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64)"}
|
32
|
+
|
33
|
+
uprn = kwargs.get("uprn")
|
34
|
+
postcode = kwargs.get("postcode")
|
35
|
+
user_paon = kwargs.get("paon")
|
36
|
+
web_driver = kwargs.get("web_driver")
|
37
|
+
headless = kwargs.get("headless")
|
38
|
+
driver = create_webdriver(web_driver, headless)
|
39
|
+
url = kwargs.get("url")
|
40
|
+
|
41
|
+
driver.execute_script(f"window.location.href='{url}'")
|
42
|
+
|
43
|
+
wait = WebDriverWait(driver, 120)
|
44
|
+
post_code_search = wait.until(
|
45
|
+
EC.presence_of_element_located((By.XPATH, '//input[@name="keyword"]'))
|
46
|
+
)
|
47
|
+
|
48
|
+
post_code_search.send_keys(postcode)
|
49
|
+
|
50
|
+
submit_btn = wait.until(
|
51
|
+
EC.presence_of_element_located((By.CLASS_NAME, "__submitButton"))
|
52
|
+
)
|
53
|
+
|
54
|
+
submit_btn.send_keys(Keys.ENTER)
|
55
|
+
|
56
|
+
address_results = wait.until(
|
57
|
+
EC.presence_of_element_located((By.CLASS_NAME, "directories-table"))
|
58
|
+
)
|
59
|
+
address_link = wait.until(
|
60
|
+
EC.presence_of_element_located(
|
61
|
+
(By.XPATH, f"//a[contains(text(), '{user_paon}')]")
|
62
|
+
)
|
63
|
+
)
|
64
|
+
|
65
|
+
address_link.send_keys(Keys.ENTER)
|
66
|
+
results = wait.until(
|
67
|
+
EC.presence_of_element_located((By.CLASS_NAME, "usercontent"))
|
68
|
+
)
|
69
|
+
|
70
|
+
# Make a BS4 object
|
71
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
72
|
+
soup.prettify()
|
73
|
+
|
74
|
+
# Get collection calendar
|
75
|
+
calendar_urls = soup.find_all(
|
76
|
+
"a", string=re.compile(r"view or download the collection calendar")
|
77
|
+
)
|
78
|
+
if len(calendar_urls) > 0:
|
79
|
+
requests.packages.urllib3.disable_warnings()
|
80
|
+
response = requests.get(calendar_urls[0].get("href"), headers=headers)
|
81
|
+
|
82
|
+
# Make a BS4 object
|
83
|
+
soup = BeautifulSoup(response.text, features="html.parser")
|
84
|
+
soup.prettify()
|
85
|
+
|
86
|
+
# Loop the months
|
87
|
+
for month in soup.find_all("div", {"class": "usercontent"}):
|
88
|
+
year = ""
|
89
|
+
if month.find("h2") and "calendar" not in month.find("h2").get_text(
|
90
|
+
strip=True
|
91
|
+
):
|
92
|
+
year = datetime.strptime(
|
93
|
+
month.find("h2").get_text(strip=True), "%B %Y"
|
94
|
+
).strftime("%Y")
|
95
|
+
elif month.find("h3"):
|
96
|
+
year = datetime.strptime(
|
97
|
+
month.find("h3").get_text(strip=True), "%B %Y"
|
98
|
+
).strftime("%Y")
|
99
|
+
if year != "":
|
100
|
+
for row in month.find_all("li"):
|
101
|
+
results = re.search(
|
102
|
+
"([A-Za-z]+ \\d\\d? [A-Za-z]+): (.+)",
|
103
|
+
row.get_text(strip=True),
|
104
|
+
)
|
105
|
+
if results:
|
106
|
+
dict_data = {
|
107
|
+
"type": results.groups()[1].capitalize(),
|
108
|
+
"collectionDate": datetime.strptime(
|
109
|
+
results.groups()[0] + " " + year, "%A %d %B %Y"
|
110
|
+
).strftime(date_format),
|
111
|
+
}
|
112
|
+
data["bins"].append(dict_data)
|
113
|
+
|
114
|
+
# Sort collections
|
115
|
+
data["bins"].sort(
|
116
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
|
117
|
+
)
|
118
|
+
except Exception as e:
|
119
|
+
# Here you can log the exception if needed
|
120
|
+
print(f"An error occurred: {e}")
|
121
|
+
# Optionally, re-raise the exception if you want it to propagate
|
122
|
+
raise
|
123
|
+
finally:
|
124
|
+
# This block ensures that the driver is closed regardless of an exception
|
125
|
+
if driver:
|
126
|
+
driver.quit()
|
127
|
+
return data
|
@@ -0,0 +1,32 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
3
|
+
|
4
|
+
|
5
|
+
class CouncilClass(AbstractGetBinDataClass):
|
6
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
7
|
+
soup = BeautifulSoup(page.text, features="html.parser")
|
8
|
+
|
9
|
+
bin_data_dict = {"bins": []}
|
10
|
+
|
11
|
+
table = soup.find("table", {"class": "job-details"})
|
12
|
+
if table:
|
13
|
+
rows = table.find_all("tr", {"class": "data-row"})
|
14
|
+
|
15
|
+
for row in rows:
|
16
|
+
cells = row.find_all(
|
17
|
+
"td", {"class": lambda L: L and L.startswith("visible-cell")}
|
18
|
+
)
|
19
|
+
labels = cells[0].find_all("label") if cells else []
|
20
|
+
|
21
|
+
if len(labels) >= 3:
|
22
|
+
bin_type = labels[2].get_text(strip=True)
|
23
|
+
collection_date = labels[1].get_text(strip=True)
|
24
|
+
|
25
|
+
bin_data_dict["bins"].append(
|
26
|
+
{
|
27
|
+
"type": bin_type,
|
28
|
+
"collectionDate": collection_date,
|
29
|
+
}
|
30
|
+
)
|
31
|
+
|
32
|
+
return bin_data_dict
|