uk_bin_collection 0.74.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/README.rst +0 -0
- uk_bin_collection/tests/council_feature_input_parity.py +79 -0
- uk_bin_collection/tests/features/environment.py +7 -0
- uk_bin_collection/tests/features/validate_council_outputs.feature +767 -0
- uk_bin_collection/tests/input.json +1077 -0
- uk_bin_collection/tests/output.schema +41 -0
- uk_bin_collection/tests/step_defs/step_helpers/file_handler.py +46 -0
- uk_bin_collection/tests/step_defs/test_validate_council.py +87 -0
- uk_bin_collection/tests/test_collect_data.py +104 -0
- uk_bin_collection/tests/test_common_functions.py +342 -0
- uk_bin_collection/uk_bin_collection/collect_data.py +133 -0
- uk_bin_collection/uk_bin_collection/common.py +292 -0
- uk_bin_collection/uk_bin_collection/councils/AdurAndWorthingCouncils.py +43 -0
- uk_bin_collection/uk_bin_collection/councils/ArunCouncil.py +97 -0
- uk_bin_collection/uk_bin_collection/councils/AylesburyValeCouncil.py +69 -0
- uk_bin_collection/uk_bin_collection/councils/BCPCouncil.py +51 -0
- uk_bin_collection/uk_bin_collection/councils/BarnetCouncil.py +180 -0
- uk_bin_collection/uk_bin_collection/councils/BarnsleyMBCouncil.py +109 -0
- uk_bin_collection/uk_bin_collection/councils/BasingstokeCouncil.py +72 -0
- uk_bin_collection/uk_bin_collection/councils/BathAndNorthEastSomersetCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/BedfordBoroughCouncil.py +49 -0
- uk_bin_collection/uk_bin_collection/councils/BedfordshireCouncil.py +70 -0
- uk_bin_collection/uk_bin_collection/councils/BexleyCouncil.py +147 -0
- uk_bin_collection/uk_bin_collection/councils/BirminghamCityCouncil.py +119 -0
- uk_bin_collection/uk_bin_collection/councils/BlackburnCouncil.py +105 -0
- uk_bin_collection/uk_bin_collection/councils/BoltonCouncil.py +104 -0
- uk_bin_collection/uk_bin_collection/councils/BradfordMDC.py +103 -0
- uk_bin_collection/uk_bin_collection/councils/BrightonandHoveCityCouncil.py +137 -0
- uk_bin_collection/uk_bin_collection/councils/BristolCityCouncil.py +141 -0
- uk_bin_collection/uk_bin_collection/councils/BromleyBoroughCouncil.py +115 -0
- uk_bin_collection/uk_bin_collection/councils/BroxtoweBoroughCouncil.py +107 -0
- uk_bin_collection/uk_bin_collection/councils/BuckinghamshireCouncil.py +95 -0
- uk_bin_collection/uk_bin_collection/councils/BuryCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/CalderdaleCouncil.py +123 -0
- uk_bin_collection/uk_bin_collection/councils/CannockChaseDistrictCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/CardiffCouncil.py +172 -0
- uk_bin_collection/uk_bin_collection/councils/CastlepointDistrictCouncil.py +96 -0
- uk_bin_collection/uk_bin_collection/councils/CharnwoodBoroughCouncil.py +54 -0
- uk_bin_collection/uk_bin_collection/councils/ChelmsfordCityCouncil.py +127 -0
- uk_bin_collection/uk_bin_collection/councils/CheshireEastCouncil.py +32 -0
- uk_bin_collection/uk_bin_collection/councils/CheshireWestAndChesterCouncil.py +125 -0
- uk_bin_collection/uk_bin_collection/councils/ChorleyCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/ConwyCountyBorough.py +27 -0
- uk_bin_collection/uk_bin_collection/councils/CrawleyBoroughCouncil.py +61 -0
- uk_bin_collection/uk_bin_collection/councils/CroydonCouncil.py +291 -0
- uk_bin_collection/uk_bin_collection/councils/DerbyshireDalesDistrictCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/DoncasterCouncil.py +77 -0
- uk_bin_collection/uk_bin_collection/councils/DorsetCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/DoverDistrictCouncil.py +41 -0
- uk_bin_collection/uk_bin_collection/councils/DurhamCouncil.py +49 -0
- uk_bin_collection/uk_bin_collection/councils/EastCambridgeshireCouncil.py +44 -0
- uk_bin_collection/uk_bin_collection/councils/EastDevonDC.py +74 -0
- uk_bin_collection/uk_bin_collection/councils/EastLindseyDistrictCouncil.py +108 -0
- uk_bin_collection/uk_bin_collection/councils/EastRidingCouncil.py +142 -0
- uk_bin_collection/uk_bin_collection/councils/EastSuffolkCouncil.py +112 -0
- uk_bin_collection/uk_bin_collection/councils/EastleighBoroughCouncil.py +70 -0
- uk_bin_collection/uk_bin_collection/councils/EnvironmentFirst.py +48 -0
- uk_bin_collection/uk_bin_collection/councils/ErewashBoroughCouncil.py +61 -0
- uk_bin_collection/uk_bin_collection/councils/FenlandDistrictCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/ForestOfDeanDistrictCouncil.py +113 -0
- uk_bin_collection/uk_bin_collection/councils/GatesheadCouncil.py +118 -0
- uk_bin_collection/uk_bin_collection/councils/GedlingBoroughCouncil.py +1580 -0
- uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py +55 -0
- uk_bin_collection/uk_bin_collection/councils/GuildfordCouncil.py +150 -0
- uk_bin_collection/uk_bin_collection/councils/HaltonBoroughCouncil.py +142 -0
- uk_bin_collection/uk_bin_collection/councils/HaringeyCouncil.py +59 -0
- uk_bin_collection/uk_bin_collection/councils/HarrogateBoroughCouncil.py +63 -0
- uk_bin_collection/uk_bin_collection/councils/HighPeakCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/HullCityCouncil.py +48 -0
- uk_bin_collection/uk_bin_collection/councils/HuntingdonDistrictCouncil.py +44 -0
- uk_bin_collection/uk_bin_collection/councils/KingstonUponThamesCouncil.py +84 -0
- uk_bin_collection/uk_bin_collection/councils/KirkleesCouncil.py +130 -0
- uk_bin_collection/uk_bin_collection/councils/KnowsleyMBCouncil.py +139 -0
- uk_bin_collection/uk_bin_collection/councils/LancasterCityCouncil.py +71 -0
- uk_bin_collection/uk_bin_collection/councils/LeedsCityCouncil.py +137 -0
- uk_bin_collection/uk_bin_collection/councils/LisburnCastlereaghCityCouncil.py +101 -0
- uk_bin_collection/uk_bin_collection/councils/LiverpoolCityCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/LondonBoroughHounslow.py +82 -0
- uk_bin_collection/uk_bin_collection/councils/LondonBoroughRedbridge.py +161 -0
- uk_bin_collection/uk_bin_collection/councils/MaldonDistrictCouncil.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/MalvernHillsDC.py +57 -0
- uk_bin_collection/uk_bin_collection/councils/ManchesterCityCouncil.py +106 -0
- uk_bin_collection/uk_bin_collection/councils/MansfieldDistrictCouncil.py +38 -0
- uk_bin_collection/uk_bin_collection/councils/MertonCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil.py +128 -0
- uk_bin_collection/uk_bin_collection/councils/MidSussexDistrictCouncil.py +80 -0
- uk_bin_collection/uk_bin_collection/councils/MiltonKeynesCityCouncil.py +54 -0
- uk_bin_collection/uk_bin_collection/councils/MoleValleyDistrictCouncil.py +98 -0
- uk_bin_collection/uk_bin_collection/councils/NeathPortTalbotCouncil.py +139 -0
- uk_bin_collection/uk_bin_collection/councils/NewarkAndSherwoodDC.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/NewcastleCityCouncil.py +57 -0
- uk_bin_collection/uk_bin_collection/councils/NewhamCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/NewportCityCouncil.py +203 -0
- uk_bin_collection/uk_bin_collection/councils/NorthEastDerbyshireDistrictCouncil.py +115 -0
- uk_bin_collection/uk_bin_collection/councils/NorthEastLincs.py +53 -0
- uk_bin_collection/uk_bin_collection/councils/NorthKestevenDistrictCouncil.py +45 -0
- uk_bin_collection/uk_bin_collection/councils/NorthLanarkshireCouncil.py +46 -0
- uk_bin_collection/uk_bin_collection/councils/NorthLincolnshireCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/NorthNorfolkDistrictCouncil.py +108 -0
- uk_bin_collection/uk_bin_collection/councils/NorthNorthamptonshireCouncil.py +72 -0
- uk_bin_collection/uk_bin_collection/councils/NorthSomersetCouncil.py +76 -0
- uk_bin_collection/uk_bin_collection/councils/NorthTynesideCouncil.py +220 -0
- uk_bin_collection/uk_bin_collection/councils/NorthWestLeicestershire.py +114 -0
- uk_bin_collection/uk_bin_collection/councils/NorthYorkshire.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py +123 -0
- uk_bin_collection/uk_bin_collection/councils/NottinghamCityCouncil.py +36 -0
- uk_bin_collection/uk_bin_collection/councils/OldhamCouncil.py +51 -0
- uk_bin_collection/uk_bin_collection/councils/PortsmouthCityCouncil.py +131 -0
- uk_bin_collection/uk_bin_collection/councils/PrestonCityCouncil.py +97 -0
- uk_bin_collection/uk_bin_collection/councils/ReadingBoroughCouncil.py +30 -0
- uk_bin_collection/uk_bin_collection/councils/ReigateAndBansteadBoroughCouncil.py +81 -0
- uk_bin_collection/uk_bin_collection/councils/RenfrewshireCouncil.py +135 -0
- uk_bin_collection/uk_bin_collection/councils/RhonddaCynonTaffCouncil.py +80 -0
- uk_bin_collection/uk_bin_collection/councils/RochdaleCouncil.py +69 -0
- uk_bin_collection/uk_bin_collection/councils/RochfordCouncil.py +60 -0
- uk_bin_collection/uk_bin_collection/councils/RugbyBoroughCouncil.py +93 -0
- uk_bin_collection/uk_bin_collection/councils/RushcliffeBoroughCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/RushmoorCouncil.py +81 -0
- uk_bin_collection/uk_bin_collection/councils/SalfordCityCouncil.py +70 -0
- uk_bin_collection/uk_bin_collection/councils/SevenoaksDistrictCouncil.py +106 -0
- uk_bin_collection/uk_bin_collection/councils/SheffieldCityCouncil.py +54 -0
- uk_bin_collection/uk_bin_collection/councils/ShropshireCouncil.py +45 -0
- uk_bin_collection/uk_bin_collection/councils/SolihullCouncil.py +48 -0
- uk_bin_collection/uk_bin_collection/councils/SomersetCouncil.py +203 -0
- uk_bin_collection/uk_bin_collection/councils/SouthAyrshireCouncil.py +73 -0
- uk_bin_collection/uk_bin_collection/councils/SouthCambridgeshireCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/SouthGloucestershireCouncil.py +74 -0
- uk_bin_collection/uk_bin_collection/councils/SouthLanarkshireCouncil.py +78 -0
- uk_bin_collection/uk_bin_collection/councils/SouthNorfolkCouncil.py +91 -0
- uk_bin_collection/uk_bin_collection/councils/SouthOxfordshireCouncil.py +93 -0
- uk_bin_collection/uk_bin_collection/councils/SouthTynesideCouncil.py +98 -0
- uk_bin_collection/uk_bin_collection/councils/StAlbansCityAndDistrictCouncil.py +43 -0
- uk_bin_collection/uk_bin_collection/councils/StHelensBC.py +56 -0
- uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py +112 -0
- uk_bin_collection/uk_bin_collection/councils/StockportBoroughCouncil.py +39 -0
- uk_bin_collection/uk_bin_collection/councils/StokeOnTrentCityCouncil.py +79 -0
- uk_bin_collection/uk_bin_collection/councils/StratfordUponAvonCouncil.py +94 -0
- uk_bin_collection/uk_bin_collection/councils/SunderlandCityCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/SwaleBoroughCouncil.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/TamesideMBCouncil.py +62 -0
- uk_bin_collection/uk_bin_collection/councils/TandridgeDistrictCouncil.py +60 -0
- uk_bin_collection/uk_bin_collection/councils/TelfordAndWrekinCouncil.py +50 -0
- uk_bin_collection/uk_bin_collection/councils/TestValleyBoroughCouncil.py +203 -0
- uk_bin_collection/uk_bin_collection/councils/TonbridgeAndMallingBC.py +101 -0
- uk_bin_collection/uk_bin_collection/councils/TorbayCouncil.py +51 -0
- uk_bin_collection/uk_bin_collection/councils/TorridgeDistrictCouncil.py +154 -0
- uk_bin_collection/uk_bin_collection/councils/ValeofGlamorganCouncil.py +119 -0
- uk_bin_collection/uk_bin_collection/councils/ValeofWhiteHorseCouncil.py +103 -0
- uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py +89 -0
- uk_bin_collection/uk_bin_collection/councils/WarwickDistrictCouncil.py +34 -0
- uk_bin_collection/uk_bin_collection/councils/WaverleyBoroughCouncil.py +119 -0
- uk_bin_collection/uk_bin_collection/councils/WealdenDistrictCouncil.py +86 -0
- uk_bin_collection/uk_bin_collection/councils/WelhatCouncil.py +73 -0
- uk_bin_collection/uk_bin_collection/councils/WestBerkshireCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/WestLindseyDistrictCouncil.py +118 -0
- uk_bin_collection/uk_bin_collection/councils/WestLothianCouncil.py +103 -0
- uk_bin_collection/uk_bin_collection/councils/WestNorthamptonshireCouncil.py +34 -0
- uk_bin_collection/uk_bin_collection/councils/WestSuffolkCouncil.py +64 -0
- uk_bin_collection/uk_bin_collection/councils/WiganBoroughCouncil.py +97 -0
- uk_bin_collection/uk_bin_collection/councils/WiltshireCouncil.py +135 -0
- uk_bin_collection/uk_bin_collection/councils/WindsorAndMaidenheadCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/WokingBoroughCouncil.py +114 -0
- uk_bin_collection/uk_bin_collection/councils/WyreCouncil.py +89 -0
- uk_bin_collection/uk_bin_collection/councils/YorkCouncil.py +45 -0
- uk_bin_collection/uk_bin_collection/councils/council_class_template/councilclasstemplate.py +33 -0
- uk_bin_collection/uk_bin_collection/get_bin_data.py +165 -0
- uk_bin_collection-0.74.0.dist-info/LICENSE +21 -0
- uk_bin_collection-0.74.0.dist-info/METADATA +247 -0
- uk_bin_collection-0.74.0.dist-info/RECORD +171 -0
- uk_bin_collection-0.74.0.dist-info/WHEEL +4 -0
- uk_bin_collection-0.74.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,123 @@
|
|
1
|
+
import time
|
2
|
+
from bs4 import BeautifulSoup
|
3
|
+
from selenium.webdriver.common.by import By
|
4
|
+
|
5
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
6
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
7
|
+
|
8
|
+
|
9
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
10
|
+
|
11
|
+
|
12
|
+
class CouncilClass(AbstractGetBinDataClass):
|
13
|
+
"""
|
14
|
+
Concrete classes have to implement all abstract operations of the
|
15
|
+
base class. They can also override some operations with a default
|
16
|
+
implementation.
|
17
|
+
"""
|
18
|
+
|
19
|
+
def extract_styles(self, style_str: str) -> dict:
|
20
|
+
return dict(
|
21
|
+
(a.strip(), b.strip())
|
22
|
+
for a, b in (
|
23
|
+
element.split(":") for element in style_str.split(";") if element
|
24
|
+
)
|
25
|
+
)
|
26
|
+
|
27
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
28
|
+
driver = None
|
29
|
+
try:
|
30
|
+
page = "https://www.northumberland.gov.uk/Waste/Bins/Bin-Calendars.aspx"
|
31
|
+
|
32
|
+
data = {"bins": []}
|
33
|
+
|
34
|
+
user_paon = kwargs.get("paon")
|
35
|
+
user_postcode = kwargs.get("postcode")
|
36
|
+
web_driver = kwargs.get("web_driver")
|
37
|
+
headless = kwargs.get("headless")
|
38
|
+
check_paon(user_paon)
|
39
|
+
check_postcode(user_postcode)
|
40
|
+
|
41
|
+
# Create Selenium webdriver
|
42
|
+
driver = create_webdriver(web_driver, headless)
|
43
|
+
driver.get(page)
|
44
|
+
|
45
|
+
time.sleep(1)
|
46
|
+
|
47
|
+
# Press the cookie accept - wait is to let the JS load it up
|
48
|
+
driver.find_element(By.ID, "ccc-notify-accept").click()
|
49
|
+
|
50
|
+
inputElement_hn = driver.find_element(
|
51
|
+
By.ID,
|
52
|
+
"p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_txtHouse",
|
53
|
+
)
|
54
|
+
inputElement_pc = driver.find_element(
|
55
|
+
By.ID,
|
56
|
+
"p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_txtPostcode",
|
57
|
+
)
|
58
|
+
|
59
|
+
inputElement_pc.send_keys(user_postcode)
|
60
|
+
inputElement_hn.send_keys(user_paon)
|
61
|
+
|
62
|
+
driver.find_element(
|
63
|
+
By.ID,
|
64
|
+
"p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_butLookup",
|
65
|
+
).click()
|
66
|
+
|
67
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
68
|
+
|
69
|
+
# Work out which bins can be collected for this address. Glass bins are only on some houses due to pilot programme.
|
70
|
+
bins_collected = list(
|
71
|
+
map(
|
72
|
+
str.strip,
|
73
|
+
soup.find(
|
74
|
+
"span",
|
75
|
+
id="p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_spanRouteSummary",
|
76
|
+
)
|
77
|
+
.string.replace("Routes found: ", "")
|
78
|
+
.split(","),
|
79
|
+
)
|
80
|
+
)
|
81
|
+
|
82
|
+
# Get the background colour for each of them...
|
83
|
+
bins_by_colours = dict()
|
84
|
+
for bin in bins_collected:
|
85
|
+
style_str = soup.find("span", string=bin)["style"]
|
86
|
+
bin_colour = self.extract_styles(style_str)["background-color"].upper()
|
87
|
+
bins_by_colours[bin_colour] = bin
|
88
|
+
|
89
|
+
# Work through the tables gathering the dates, if the cell has a background colour - match it to the bin type.
|
90
|
+
calander_tables = soup.find_all("table", title="Calendar")
|
91
|
+
for table in calander_tables:
|
92
|
+
# Get month and year
|
93
|
+
# First row in table is the header
|
94
|
+
rows = table.find_all("tr")
|
95
|
+
month_and_year = (
|
96
|
+
rows[0].find("table", class_="calCtrlTitle").find("td").string
|
97
|
+
)
|
98
|
+
bin_days = table.find_all("td", class_="calCtrlDay")
|
99
|
+
for day in bin_days:
|
100
|
+
day_styles = self.extract_styles(day["style"])
|
101
|
+
if "background-color" in day_styles:
|
102
|
+
colour = day_styles["background-color"].upper()
|
103
|
+
date = time.strptime(
|
104
|
+
f"{day.string} {month_and_year}", "%d %B %Y"
|
105
|
+
)
|
106
|
+
|
107
|
+
# Add it to the data
|
108
|
+
data["bins"].append(
|
109
|
+
{
|
110
|
+
"type": bins_by_colours[colour],
|
111
|
+
"collectionDate": time.strftime(date_format, date),
|
112
|
+
}
|
113
|
+
)
|
114
|
+
except Exception as e:
|
115
|
+
# Here you can log the exception if needed
|
116
|
+
print(f"An error occurred: {e}")
|
117
|
+
# Optionally, re-raise the exception if you want it to propagate
|
118
|
+
raise
|
119
|
+
finally:
|
120
|
+
# This block ensures that the driver is closed regardless of an exception
|
121
|
+
if driver:
|
122
|
+
driver.quit()
|
123
|
+
return data
|
@@ -0,0 +1,36 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
3
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
|
+
|
5
|
+
|
6
|
+
class CouncilClass(AbstractGetBinDataClass):
|
7
|
+
"""
|
8
|
+
Concrete classes have to implement all abstract operations of the
|
9
|
+
base class. They can also override some operations with a default
|
10
|
+
implementation.
|
11
|
+
"""
|
12
|
+
|
13
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
14
|
+
user_uprn = kwargs.get("uprn")
|
15
|
+
check_uprn(user_uprn)
|
16
|
+
|
17
|
+
api_url = f"https://geoserver.nottinghamcity.gov.uk/myproperty/handler/proxy.ashx?https://geoserver.nottinghamcity.gov.uk/bincollections2/api/collection/{user_uprn}"
|
18
|
+
|
19
|
+
requests.packages.urllib3.disable_warnings()
|
20
|
+
response = requests.get(api_url)
|
21
|
+
json_data = json.loads(response.text)
|
22
|
+
data = {"bins": []}
|
23
|
+
|
24
|
+
next_collections = json_data["nextCollections"]
|
25
|
+
|
26
|
+
for collection in next_collections:
|
27
|
+
bin_type = collection["collectionType"]
|
28
|
+
|
29
|
+
next_collection_date = datetime.fromisoformat(collection["collectionDate"])
|
30
|
+
dict_data = {
|
31
|
+
"type": bin_type,
|
32
|
+
"collectionDate": next_collection_date.strftime(date_format),
|
33
|
+
}
|
34
|
+
data["bins"].append(dict_data)
|
35
|
+
|
36
|
+
return data
|
@@ -0,0 +1,51 @@
|
|
1
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
2
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
3
|
+
|
4
|
+
from bs4 import BeautifulSoup
|
5
|
+
|
6
|
+
|
7
|
+
class CouncilClass(AbstractGetBinDataClass):
|
8
|
+
"""
|
9
|
+
Concrete classes have to implement all abstract operations of the
|
10
|
+
base class. They can also override some operations with a default
|
11
|
+
implementation.
|
12
|
+
"""
|
13
|
+
|
14
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
15
|
+
data = {"bins": []}
|
16
|
+
soup = BeautifulSoup(page.text, "html.parser")
|
17
|
+
|
18
|
+
# Find all tables with the class "data-table confirmation"
|
19
|
+
tables = soup.find_all("table", class_="data-table confirmation")
|
20
|
+
for table in tables:
|
21
|
+
rows = table.find_all("tr")
|
22
|
+
bin_type = None
|
23
|
+
bin_collection = None
|
24
|
+
|
25
|
+
# Search for the bin color in the table headers
|
26
|
+
th_element = table.find("th")
|
27
|
+
if th_element:
|
28
|
+
bin_type = th_element.text.strip()
|
29
|
+
|
30
|
+
for row in rows:
|
31
|
+
header = row.find("b")
|
32
|
+
if header:
|
33
|
+
header_text = header.text.strip()
|
34
|
+
value_cell = row.find("td", class_="coltwo")
|
35
|
+
if value_cell:
|
36
|
+
value_text = value_cell.text.strip()
|
37
|
+
|
38
|
+
if header_text == "Collection Date":
|
39
|
+
bin_collection = value_text
|
40
|
+
|
41
|
+
if bin_type and bin_collection:
|
42
|
+
dict_data = {
|
43
|
+
"type": bin_type,
|
44
|
+
"collectionDate": datetime.strptime(
|
45
|
+
bin_collection, "%d/%m/%Y"
|
46
|
+
).strftime(date_format),
|
47
|
+
}
|
48
|
+
|
49
|
+
data["bins"].append(dict_data)
|
50
|
+
|
51
|
+
return data
|
@@ -0,0 +1,131 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
from datetime import datetime
|
3
|
+
from selenium.webdriver.common.by import By
|
4
|
+
from selenium.webdriver.support import expected_conditions as EC
|
5
|
+
from selenium.webdriver.support.ui import Select
|
6
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
7
|
+
|
8
|
+
import time
|
9
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
10
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
11
|
+
|
12
|
+
|
13
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
14
|
+
|
15
|
+
|
16
|
+
class CouncilClass(AbstractGetBinDataClass):
|
17
|
+
"""
|
18
|
+
Concrete classes have to implement all abstract operations of the
|
19
|
+
base class. They can also override some operations with a default
|
20
|
+
implementation.
|
21
|
+
"""
|
22
|
+
|
23
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
24
|
+
driver = None
|
25
|
+
try:
|
26
|
+
page = "https://my.portsmouth.gov.uk/en/AchieveForms/?form_uri=sandbox-publish://AF-Process-26e27e70-f771-47b1-a34d-af276075cede/AF-Stage-cd7cc291-2e59-42cc-8c3f-1f93e132a2c9/definition.json&redirectlink=%2F&cancelRedirectLink=%2F"
|
27
|
+
|
28
|
+
data = {"bins": []}
|
29
|
+
|
30
|
+
user_uprn = kwargs.get("uprn")
|
31
|
+
user_postcode = kwargs.get("postcode")
|
32
|
+
web_driver = kwargs.get("web_driver")
|
33
|
+
headless = kwargs.get("headless")
|
34
|
+
check_uprn(user_uprn)
|
35
|
+
check_postcode(user_postcode)
|
36
|
+
# Create Selenium webdriver
|
37
|
+
driver = create_webdriver(web_driver, headless)
|
38
|
+
driver.get(page)
|
39
|
+
|
40
|
+
# If you bang in the house number (or property name) and postcode in the box it should find your property
|
41
|
+
|
42
|
+
iframe_presense = WebDriverWait(driver, 30).until(
|
43
|
+
EC.presence_of_element_located((By.ID, "fillform-frame-1"))
|
44
|
+
)
|
45
|
+
|
46
|
+
driver.switch_to.frame(iframe_presense)
|
47
|
+
wait = WebDriverWait(driver, 60)
|
48
|
+
inputElement_postcodesearch = wait.until(
|
49
|
+
EC.element_to_be_clickable((By.NAME, "postcode_search"))
|
50
|
+
)
|
51
|
+
|
52
|
+
inputElement_postcodesearch.send_keys(user_postcode)
|
53
|
+
lookupAddress_btn = wait.until(
|
54
|
+
EC.element_to_be_clickable((By.ID, "lookupAddress"))
|
55
|
+
)
|
56
|
+
|
57
|
+
lookupAddress_btn.click()
|
58
|
+
|
59
|
+
# Wait for the 'Select your property' dropdown to appear and select the first result
|
60
|
+
dropdown = wait.until(
|
61
|
+
EC.element_to_be_clickable((By.NAME, "Choose_Address"))
|
62
|
+
)
|
63
|
+
|
64
|
+
dropdown_options = wait.until(
|
65
|
+
EC.presence_of_element_located((By.CLASS_NAME, "lookup-option"))
|
66
|
+
)
|
67
|
+
time.sleep(1)
|
68
|
+
# Create a 'Select' for it, then select the first address in the list
|
69
|
+
# (Index 0 is "Make a selection from the list")
|
70
|
+
dropdownSelect = Select(dropdown)
|
71
|
+
dropdownSelect.select_by_value(str(user_uprn))
|
72
|
+
|
73
|
+
h4_element = wait.until(
|
74
|
+
EC.presence_of_element_located(
|
75
|
+
(By.XPATH, "//h4[contains(text(), 'next 10 collection dates')]")
|
76
|
+
)
|
77
|
+
)
|
78
|
+
|
79
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
80
|
+
# Define your XPath
|
81
|
+
elements_with_data_field_name = soup.find_all(
|
82
|
+
lambda tag: tag.has_attr("data-field-name")
|
83
|
+
and tag["data-field-name"].startswith("html")
|
84
|
+
)
|
85
|
+
if elements_with_data_field_name:
|
86
|
+
for element in elements_with_data_field_name:
|
87
|
+
# Extract h4 text from the current element
|
88
|
+
h4_text = (
|
89
|
+
element.find("h4").get_text(strip=True)
|
90
|
+
if element.find("h4")
|
91
|
+
else None
|
92
|
+
)
|
93
|
+
|
94
|
+
# Process the data (dates) in the current element (p tags)
|
95
|
+
|
96
|
+
if h4_text:
|
97
|
+
if "next 10" in h4_text:
|
98
|
+
data_paragraphs = element.find_all("p") if element else []
|
99
|
+
|
100
|
+
# Extract dates from the first <p> tag (assuming dates are in the first <p> tag)
|
101
|
+
dates_paragraph = (
|
102
|
+
data_paragraphs[0] if len(data_paragraphs) > 0 else None
|
103
|
+
)
|
104
|
+
dates = (
|
105
|
+
dates_paragraph.find_all(string=True, recursive=False)
|
106
|
+
if dates_paragraph
|
107
|
+
else []
|
108
|
+
)
|
109
|
+
|
110
|
+
for date in dates:
|
111
|
+
data["bins"].append(
|
112
|
+
{
|
113
|
+
"type": h4_text.split(" - ")[0],
|
114
|
+
"collectionDate": datetime.strptime(
|
115
|
+
re.sub(
|
116
|
+
r"[^a-zA-Z0-9,\s]", "", date
|
117
|
+
).strip(),
|
118
|
+
"%A %d %B %Y",
|
119
|
+
).strftime("%d/%m/%Y"),
|
120
|
+
}
|
121
|
+
)
|
122
|
+
except Exception as e:
|
123
|
+
# Here you can log the exception if needed
|
124
|
+
print(f"An error occurred: {e}")
|
125
|
+
# Optionally, re-raise the exception if you want it to propagate
|
126
|
+
raise
|
127
|
+
finally:
|
128
|
+
# This block ensures that the driver is closed regardless of an exception
|
129
|
+
if driver:
|
130
|
+
driver.quit()
|
131
|
+
return data
|
@@ -0,0 +1,97 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
from datetime import datetime
|
3
|
+
from selenium.webdriver.common.by import By
|
4
|
+
from selenium.webdriver.support import expected_conditions as EC
|
5
|
+
from selenium.webdriver.support.ui import Select
|
6
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
7
|
+
|
8
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
9
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
10
|
+
from selenium.webdriver.common.keys import Keys
|
11
|
+
|
12
|
+
|
13
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
14
|
+
|
15
|
+
|
16
|
+
class CouncilClass(AbstractGetBinDataClass):
|
17
|
+
"""
|
18
|
+
Concrete classes have to implement all abstract operations of the
|
19
|
+
base class. They can also override some operations with a default
|
20
|
+
implementation.
|
21
|
+
"""
|
22
|
+
|
23
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
24
|
+
driver = None
|
25
|
+
try:
|
26
|
+
page = "https://selfservice.preston.gov.uk/service/Forms/FindMyNearest.aspx?Service=bins"
|
27
|
+
|
28
|
+
data = {"bins": []}
|
29
|
+
|
30
|
+
user_paon = kwargs.get("paon")
|
31
|
+
user_postcode = kwargs.get("postcode")
|
32
|
+
web_driver = kwargs.get("web_driver")
|
33
|
+
headless = kwargs.get("headless")
|
34
|
+
check_paon(user_paon)
|
35
|
+
check_postcode(user_postcode)
|
36
|
+
|
37
|
+
# Create Selenium webdriver
|
38
|
+
driver = create_webdriver(web_driver, headless)
|
39
|
+
driver.get(page)
|
40
|
+
|
41
|
+
# If you bang in the house number (or property name) and postcode in the box it should find your property
|
42
|
+
inputElement_address = driver.find_element(
|
43
|
+
By.ID,
|
44
|
+
"MainContent_txtAddress",
|
45
|
+
)
|
46
|
+
|
47
|
+
inputElement_address.send_keys(user_paon)
|
48
|
+
inputElement_address.send_keys(" ")
|
49
|
+
inputElement_address.send_keys(user_postcode)
|
50
|
+
|
51
|
+
driver.find_element(
|
52
|
+
By.ID,
|
53
|
+
"btnSearch",
|
54
|
+
).send_keys(Keys.ENTER)
|
55
|
+
|
56
|
+
# Wait for the 'Select your property' dropdown to appear and select the first result
|
57
|
+
dropdown = WebDriverWait(driver, 10).until(
|
58
|
+
EC.presence_of_element_located((By.ID, "MainContent_ddlSearchResults"))
|
59
|
+
)
|
60
|
+
# Create a 'Select' for it, then select the first address in the list
|
61
|
+
# (Index 0 is "Make a selection from the list")
|
62
|
+
dropdownSelect = Select(dropdown)
|
63
|
+
dropdownSelect.select_by_index(1)
|
64
|
+
|
65
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
66
|
+
|
67
|
+
topLevelSpan = soup.find("span", id="MainContent_lblMoreCollectionDates")
|
68
|
+
|
69
|
+
collectionDivs = topLevelSpan.find_all("div", {"id": "container"})
|
70
|
+
|
71
|
+
for collectionDiv in collectionDivs:
|
72
|
+
type_and_date_divs = collectionDiv.find_all("b")
|
73
|
+
bin_type = type_and_date_divs[0].text
|
74
|
+
|
75
|
+
date_elements = collectionDiv.find_all("li")
|
76
|
+
for date_element in date_elements:
|
77
|
+
date_string = date_element.find("span").text.split(" ")[1]
|
78
|
+
collection_date = datetime.strptime(
|
79
|
+
date_string, "%d/%m/%Y"
|
80
|
+
).strftime(date_format)
|
81
|
+
|
82
|
+
data["bins"].append(
|
83
|
+
{
|
84
|
+
"type": re.sub(r"[^a-zA-Z0-9,\s]", "", bin_type).strip(),
|
85
|
+
"collectionDate": collection_date,
|
86
|
+
}
|
87
|
+
)
|
88
|
+
except Exception as e:
|
89
|
+
# Here you can log the exception if needed
|
90
|
+
print(f"An error occurred: {e}")
|
91
|
+
# Optionally, re-raise the exception if you want it to propagate
|
92
|
+
raise
|
93
|
+
finally:
|
94
|
+
# This block ensures that the driver is closed regardless of an exception
|
95
|
+
if driver:
|
96
|
+
driver.quit()
|
97
|
+
return data
|
@@ -0,0 +1,30 @@
|
|
1
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
2
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
3
|
+
|
4
|
+
|
5
|
+
class CouncilClass(AbstractGetBinDataClass):
|
6
|
+
"""
|
7
|
+
Concrete classes have to implement all abstract operations of the
|
8
|
+
base class. They can also override some operations with a default
|
9
|
+
implementation.
|
10
|
+
"""
|
11
|
+
|
12
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
13
|
+
json_result = json.loads(page.text)
|
14
|
+
|
15
|
+
data = {"bins": []}
|
16
|
+
|
17
|
+
for collection in json_result["collections"]:
|
18
|
+
bin_type = collection["service"]
|
19
|
+
bin_collection = collection["date"] # Date format is 14/12/2023 00:00:00
|
20
|
+
|
21
|
+
dict_data = {
|
22
|
+
"type": bin_type.replace(" Collection Service", " Bin"),
|
23
|
+
"collectionDate": datetime.strptime(
|
24
|
+
bin_collection, "%d/%m/%Y %H:%M:%S"
|
25
|
+
).strftime(date_format),
|
26
|
+
}
|
27
|
+
|
28
|
+
data["bins"].append(dict_data)
|
29
|
+
|
30
|
+
return data
|
@@ -0,0 +1,81 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
from selenium.webdriver.common.by import By
|
3
|
+
from selenium.webdriver.support import expected_conditions as EC
|
4
|
+
from selenium.webdriver.support.ui import WebDriverWait
|
5
|
+
|
6
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
7
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
8
|
+
|
9
|
+
|
10
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
11
|
+
class CouncilClass(AbstractGetBinDataClass):
|
12
|
+
"""
|
13
|
+
Concrete classes have to implement all abstract operations of the base
|
14
|
+
class. They can also override some operations with a default
|
15
|
+
implementation.
|
16
|
+
"""
|
17
|
+
|
18
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
19
|
+
driver = None
|
20
|
+
try:
|
21
|
+
user_uprn = kwargs.get("uprn")
|
22
|
+
web_driver = kwargs.get("web_driver")
|
23
|
+
headless = kwargs.get("headless")
|
24
|
+
check_uprn(user_uprn)
|
25
|
+
# Pad UPRN with 0's at the start for any that aren't 12 chars
|
26
|
+
user_uprn = user_uprn.zfill(12)
|
27
|
+
|
28
|
+
# Create Selenium webdriver
|
29
|
+
driver = create_webdriver(web_driver, headless)
|
30
|
+
driver.get(
|
31
|
+
f"https://my.reigate-banstead.gov.uk/en/service/Bins_and_recycling___collections_calendar?uprn={user_uprn}"
|
32
|
+
)
|
33
|
+
|
34
|
+
# Wait for iframe to load and switch to it
|
35
|
+
WebDriverWait(driver, 30).until(
|
36
|
+
EC.frame_to_be_available_and_switch_to_it((By.ID, "fillform-frame-1"))
|
37
|
+
)
|
38
|
+
|
39
|
+
# Wait for form
|
40
|
+
WebDriverWait(driver, 30).until(
|
41
|
+
EC.presence_of_element_located(
|
42
|
+
(By.CSS_SELECTOR, 'span[data-name="html2"] > div')
|
43
|
+
)
|
44
|
+
)
|
45
|
+
|
46
|
+
# Make a BS4 object
|
47
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
48
|
+
soup.prettify()
|
49
|
+
|
50
|
+
data = {"bins": []}
|
51
|
+
section = soup.find("span", {"data-name": "html2"})
|
52
|
+
dates = section.find_all("div")
|
53
|
+
for d in dates:
|
54
|
+
date = d.find("h3")
|
55
|
+
collections = d.find_all("li")
|
56
|
+
if date and collections:
|
57
|
+
collection_date = datetime.strptime(
|
58
|
+
date.get_text(strip=True), "%A %d %B %Y"
|
59
|
+
).strftime(date_format)
|
60
|
+
for c in collections:
|
61
|
+
collection_type = c.get_text(strip=True)
|
62
|
+
if c.get_text(strip=True):
|
63
|
+
dict_data = {
|
64
|
+
"type": collection_type,
|
65
|
+
"collectionDate": collection_date,
|
66
|
+
}
|
67
|
+
data["bins"].append(dict_data)
|
68
|
+
|
69
|
+
data["bins"].sort(
|
70
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
|
71
|
+
)
|
72
|
+
except Exception as e:
|
73
|
+
# Here you can log the exception if needed
|
74
|
+
print(f"An error occurred: {e}")
|
75
|
+
# Optionally, re-raise the exception if you want it to propagate
|
76
|
+
raise
|
77
|
+
finally:
|
78
|
+
# This block ensures that the driver is closed regardless of an exception
|
79
|
+
if driver:
|
80
|
+
driver.quit()
|
81
|
+
return data
|