uk_bin_collection 0.74.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/README.rst +0 -0
- uk_bin_collection/tests/council_feature_input_parity.py +79 -0
- uk_bin_collection/tests/features/environment.py +7 -0
- uk_bin_collection/tests/features/validate_council_outputs.feature +767 -0
- uk_bin_collection/tests/input.json +1077 -0
- uk_bin_collection/tests/output.schema +41 -0
- uk_bin_collection/tests/step_defs/step_helpers/file_handler.py +46 -0
- uk_bin_collection/tests/step_defs/test_validate_council.py +87 -0
- uk_bin_collection/tests/test_collect_data.py +104 -0
- uk_bin_collection/tests/test_common_functions.py +342 -0
- uk_bin_collection/uk_bin_collection/collect_data.py +133 -0
- uk_bin_collection/uk_bin_collection/common.py +292 -0
- uk_bin_collection/uk_bin_collection/councils/AdurAndWorthingCouncils.py +43 -0
- uk_bin_collection/uk_bin_collection/councils/ArunCouncil.py +97 -0
- uk_bin_collection/uk_bin_collection/councils/AylesburyValeCouncil.py +69 -0
- uk_bin_collection/uk_bin_collection/councils/BCPCouncil.py +51 -0
- uk_bin_collection/uk_bin_collection/councils/BarnetCouncil.py +180 -0
- uk_bin_collection/uk_bin_collection/councils/BarnsleyMBCouncil.py +109 -0
- uk_bin_collection/uk_bin_collection/councils/BasingstokeCouncil.py +72 -0
- uk_bin_collection/uk_bin_collection/councils/BathAndNorthEastSomersetCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/BedfordBoroughCouncil.py +49 -0
- uk_bin_collection/uk_bin_collection/councils/BedfordshireCouncil.py +70 -0
- uk_bin_collection/uk_bin_collection/councils/BexleyCouncil.py +147 -0
- uk_bin_collection/uk_bin_collection/councils/BirminghamCityCouncil.py +119 -0
- uk_bin_collection/uk_bin_collection/councils/BlackburnCouncil.py +105 -0
- uk_bin_collection/uk_bin_collection/councils/BoltonCouncil.py +104 -0
- uk_bin_collection/uk_bin_collection/councils/BradfordMDC.py +103 -0
- uk_bin_collection/uk_bin_collection/councils/BrightonandHoveCityCouncil.py +137 -0
- uk_bin_collection/uk_bin_collection/councils/BristolCityCouncil.py +141 -0
- uk_bin_collection/uk_bin_collection/councils/BromleyBoroughCouncil.py +115 -0
- uk_bin_collection/uk_bin_collection/councils/BroxtoweBoroughCouncil.py +107 -0
- uk_bin_collection/uk_bin_collection/councils/BuckinghamshireCouncil.py +95 -0
- uk_bin_collection/uk_bin_collection/councils/BuryCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/CalderdaleCouncil.py +123 -0
- uk_bin_collection/uk_bin_collection/councils/CannockChaseDistrictCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/CardiffCouncil.py +172 -0
- uk_bin_collection/uk_bin_collection/councils/CastlepointDistrictCouncil.py +96 -0
- uk_bin_collection/uk_bin_collection/councils/CharnwoodBoroughCouncil.py +54 -0
- uk_bin_collection/uk_bin_collection/councils/ChelmsfordCityCouncil.py +127 -0
- uk_bin_collection/uk_bin_collection/councils/CheshireEastCouncil.py +32 -0
- uk_bin_collection/uk_bin_collection/councils/CheshireWestAndChesterCouncil.py +125 -0
- uk_bin_collection/uk_bin_collection/councils/ChorleyCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/ConwyCountyBorough.py +27 -0
- uk_bin_collection/uk_bin_collection/councils/CrawleyBoroughCouncil.py +61 -0
- uk_bin_collection/uk_bin_collection/councils/CroydonCouncil.py +291 -0
- uk_bin_collection/uk_bin_collection/councils/DerbyshireDalesDistrictCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/DoncasterCouncil.py +77 -0
- uk_bin_collection/uk_bin_collection/councils/DorsetCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/DoverDistrictCouncil.py +41 -0
- uk_bin_collection/uk_bin_collection/councils/DurhamCouncil.py +49 -0
- uk_bin_collection/uk_bin_collection/councils/EastCambridgeshireCouncil.py +44 -0
- uk_bin_collection/uk_bin_collection/councils/EastDevonDC.py +74 -0
- uk_bin_collection/uk_bin_collection/councils/EastLindseyDistrictCouncil.py +108 -0
- uk_bin_collection/uk_bin_collection/councils/EastRidingCouncil.py +142 -0
- uk_bin_collection/uk_bin_collection/councils/EastSuffolkCouncil.py +112 -0
- uk_bin_collection/uk_bin_collection/councils/EastleighBoroughCouncil.py +70 -0
- uk_bin_collection/uk_bin_collection/councils/EnvironmentFirst.py +48 -0
- uk_bin_collection/uk_bin_collection/councils/ErewashBoroughCouncil.py +61 -0
- uk_bin_collection/uk_bin_collection/councils/FenlandDistrictCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/ForestOfDeanDistrictCouncil.py +113 -0
- uk_bin_collection/uk_bin_collection/councils/GatesheadCouncil.py +118 -0
- uk_bin_collection/uk_bin_collection/councils/GedlingBoroughCouncil.py +1580 -0
- uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py +55 -0
- uk_bin_collection/uk_bin_collection/councils/GuildfordCouncil.py +150 -0
- uk_bin_collection/uk_bin_collection/councils/HaltonBoroughCouncil.py +142 -0
- uk_bin_collection/uk_bin_collection/councils/HaringeyCouncil.py +59 -0
- uk_bin_collection/uk_bin_collection/councils/HarrogateBoroughCouncil.py +63 -0
- uk_bin_collection/uk_bin_collection/councils/HighPeakCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/HullCityCouncil.py +48 -0
- uk_bin_collection/uk_bin_collection/councils/HuntingdonDistrictCouncil.py +44 -0
- uk_bin_collection/uk_bin_collection/councils/KingstonUponThamesCouncil.py +84 -0
- uk_bin_collection/uk_bin_collection/councils/KirkleesCouncil.py +130 -0
- uk_bin_collection/uk_bin_collection/councils/KnowsleyMBCouncil.py +139 -0
- uk_bin_collection/uk_bin_collection/councils/LancasterCityCouncil.py +71 -0
- uk_bin_collection/uk_bin_collection/councils/LeedsCityCouncil.py +137 -0
- uk_bin_collection/uk_bin_collection/councils/LisburnCastlereaghCityCouncil.py +101 -0
- uk_bin_collection/uk_bin_collection/councils/LiverpoolCityCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/LondonBoroughHounslow.py +82 -0
- uk_bin_collection/uk_bin_collection/councils/LondonBoroughRedbridge.py +161 -0
- uk_bin_collection/uk_bin_collection/councils/MaldonDistrictCouncil.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/MalvernHillsDC.py +57 -0
- uk_bin_collection/uk_bin_collection/councils/ManchesterCityCouncil.py +106 -0
- uk_bin_collection/uk_bin_collection/councils/MansfieldDistrictCouncil.py +38 -0
- uk_bin_collection/uk_bin_collection/councils/MertonCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil.py +128 -0
- uk_bin_collection/uk_bin_collection/councils/MidSussexDistrictCouncil.py +80 -0
- uk_bin_collection/uk_bin_collection/councils/MiltonKeynesCityCouncil.py +54 -0
- uk_bin_collection/uk_bin_collection/councils/MoleValleyDistrictCouncil.py +98 -0
- uk_bin_collection/uk_bin_collection/councils/NeathPortTalbotCouncil.py +139 -0
- uk_bin_collection/uk_bin_collection/councils/NewarkAndSherwoodDC.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/NewcastleCityCouncil.py +57 -0
- uk_bin_collection/uk_bin_collection/councils/NewhamCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/NewportCityCouncil.py +203 -0
- uk_bin_collection/uk_bin_collection/councils/NorthEastDerbyshireDistrictCouncil.py +115 -0
- uk_bin_collection/uk_bin_collection/councils/NorthEastLincs.py +53 -0
- uk_bin_collection/uk_bin_collection/councils/NorthKestevenDistrictCouncil.py +45 -0
- uk_bin_collection/uk_bin_collection/councils/NorthLanarkshireCouncil.py +46 -0
- uk_bin_collection/uk_bin_collection/councils/NorthLincolnshireCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/NorthNorfolkDistrictCouncil.py +108 -0
- uk_bin_collection/uk_bin_collection/councils/NorthNorthamptonshireCouncil.py +72 -0
- uk_bin_collection/uk_bin_collection/councils/NorthSomersetCouncil.py +76 -0
- uk_bin_collection/uk_bin_collection/councils/NorthTynesideCouncil.py +220 -0
- uk_bin_collection/uk_bin_collection/councils/NorthWestLeicestershire.py +114 -0
- uk_bin_collection/uk_bin_collection/councils/NorthYorkshire.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py +123 -0
- uk_bin_collection/uk_bin_collection/councils/NottinghamCityCouncil.py +36 -0
- uk_bin_collection/uk_bin_collection/councils/OldhamCouncil.py +51 -0
- uk_bin_collection/uk_bin_collection/councils/PortsmouthCityCouncil.py +131 -0
- uk_bin_collection/uk_bin_collection/councils/PrestonCityCouncil.py +97 -0
- uk_bin_collection/uk_bin_collection/councils/ReadingBoroughCouncil.py +30 -0
- uk_bin_collection/uk_bin_collection/councils/ReigateAndBansteadBoroughCouncil.py +81 -0
- uk_bin_collection/uk_bin_collection/councils/RenfrewshireCouncil.py +135 -0
- uk_bin_collection/uk_bin_collection/councils/RhonddaCynonTaffCouncil.py +80 -0
- uk_bin_collection/uk_bin_collection/councils/RochdaleCouncil.py +69 -0
- uk_bin_collection/uk_bin_collection/councils/RochfordCouncil.py +60 -0
- uk_bin_collection/uk_bin_collection/councils/RugbyBoroughCouncil.py +93 -0
- uk_bin_collection/uk_bin_collection/councils/RushcliffeBoroughCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/RushmoorCouncil.py +81 -0
- uk_bin_collection/uk_bin_collection/councils/SalfordCityCouncil.py +70 -0
- uk_bin_collection/uk_bin_collection/councils/SevenoaksDistrictCouncil.py +106 -0
- uk_bin_collection/uk_bin_collection/councils/SheffieldCityCouncil.py +54 -0
- uk_bin_collection/uk_bin_collection/councils/ShropshireCouncil.py +45 -0
- uk_bin_collection/uk_bin_collection/councils/SolihullCouncil.py +48 -0
- uk_bin_collection/uk_bin_collection/councils/SomersetCouncil.py +203 -0
- uk_bin_collection/uk_bin_collection/councils/SouthAyrshireCouncil.py +73 -0
- uk_bin_collection/uk_bin_collection/councils/SouthCambridgeshireCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/SouthGloucestershireCouncil.py +74 -0
- uk_bin_collection/uk_bin_collection/councils/SouthLanarkshireCouncil.py +78 -0
- uk_bin_collection/uk_bin_collection/councils/SouthNorfolkCouncil.py +91 -0
- uk_bin_collection/uk_bin_collection/councils/SouthOxfordshireCouncil.py +93 -0
- uk_bin_collection/uk_bin_collection/councils/SouthTynesideCouncil.py +98 -0
- uk_bin_collection/uk_bin_collection/councils/StAlbansCityAndDistrictCouncil.py +43 -0
- uk_bin_collection/uk_bin_collection/councils/StHelensBC.py +56 -0
- uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py +112 -0
- uk_bin_collection/uk_bin_collection/councils/StockportBoroughCouncil.py +39 -0
- uk_bin_collection/uk_bin_collection/councils/StokeOnTrentCityCouncil.py +79 -0
- uk_bin_collection/uk_bin_collection/councils/StratfordUponAvonCouncil.py +94 -0
- uk_bin_collection/uk_bin_collection/councils/SunderlandCityCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/SwaleBoroughCouncil.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/TamesideMBCouncil.py +62 -0
- uk_bin_collection/uk_bin_collection/councils/TandridgeDistrictCouncil.py +60 -0
- uk_bin_collection/uk_bin_collection/councils/TelfordAndWrekinCouncil.py +50 -0
- uk_bin_collection/uk_bin_collection/councils/TestValleyBoroughCouncil.py +203 -0
- uk_bin_collection/uk_bin_collection/councils/TonbridgeAndMallingBC.py +101 -0
- uk_bin_collection/uk_bin_collection/councils/TorbayCouncil.py +51 -0
- uk_bin_collection/uk_bin_collection/councils/TorridgeDistrictCouncil.py +154 -0
- uk_bin_collection/uk_bin_collection/councils/ValeofGlamorganCouncil.py +119 -0
- uk_bin_collection/uk_bin_collection/councils/ValeofWhiteHorseCouncil.py +103 -0
- uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py +89 -0
- uk_bin_collection/uk_bin_collection/councils/WarwickDistrictCouncil.py +34 -0
- uk_bin_collection/uk_bin_collection/councils/WaverleyBoroughCouncil.py +119 -0
- uk_bin_collection/uk_bin_collection/councils/WealdenDistrictCouncil.py +86 -0
- uk_bin_collection/uk_bin_collection/councils/WelhatCouncil.py +73 -0
- uk_bin_collection/uk_bin_collection/councils/WestBerkshireCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/WestLindseyDistrictCouncil.py +118 -0
- uk_bin_collection/uk_bin_collection/councils/WestLothianCouncil.py +103 -0
- uk_bin_collection/uk_bin_collection/councils/WestNorthamptonshireCouncil.py +34 -0
- uk_bin_collection/uk_bin_collection/councils/WestSuffolkCouncil.py +64 -0
- uk_bin_collection/uk_bin_collection/councils/WiganBoroughCouncil.py +97 -0
- uk_bin_collection/uk_bin_collection/councils/WiltshireCouncil.py +135 -0
- uk_bin_collection/uk_bin_collection/councils/WindsorAndMaidenheadCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/WokingBoroughCouncil.py +114 -0
- uk_bin_collection/uk_bin_collection/councils/WyreCouncil.py +89 -0
- uk_bin_collection/uk_bin_collection/councils/YorkCouncil.py +45 -0
- uk_bin_collection/uk_bin_collection/councils/council_class_template/councilclasstemplate.py +33 -0
- uk_bin_collection/uk_bin_collection/get_bin_data.py +165 -0
- uk_bin_collection-0.74.0.dist-info/LICENSE +21 -0
- uk_bin_collection-0.74.0.dist-info/METADATA +247 -0
- uk_bin_collection-0.74.0.dist-info/RECORD +171 -0
- uk_bin_collection-0.74.0.dist-info/WHEEL +4 -0
- uk_bin_collection-0.74.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,58 @@
|
|
1
|
+
# This script pulls (in one hit) the data from Merton Council Bins Data
|
2
|
+
from bs4 import BeautifulSoup
|
3
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
4
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
5
|
+
|
6
|
+
|
7
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
8
|
+
class CouncilClass(AbstractGetBinDataClass):
|
9
|
+
"""
|
10
|
+
Concrete classes have to implement all abstract operations of the
|
11
|
+
base class. They can also override some operations with a default
|
12
|
+
implementation.
|
13
|
+
"""
|
14
|
+
|
15
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
16
|
+
# Make a BS4 object
|
17
|
+
soup = BeautifulSoup(page.text, features="html.parser")
|
18
|
+
soup.prettify()
|
19
|
+
|
20
|
+
data = {"bins": []}
|
21
|
+
collections = []
|
22
|
+
|
23
|
+
# Search for the specific bin in the table using BS4
|
24
|
+
rows = soup.find("table", class_=("collectiondays")).find_all(
|
25
|
+
"tr",
|
26
|
+
class_=(
|
27
|
+
"food-caddy",
|
28
|
+
"papercard-wheelie",
|
29
|
+
"plastics-boxes",
|
30
|
+
"rubbish-wheelie",
|
31
|
+
"textiles",
|
32
|
+
"batteries",
|
33
|
+
),
|
34
|
+
)
|
35
|
+
|
36
|
+
# Loops the Rows
|
37
|
+
for row in rows:
|
38
|
+
# Get all the cells
|
39
|
+
cells = row.find_all("td")
|
40
|
+
# First cell is the bin_type
|
41
|
+
bin_type = cells[0].get_text().strip()
|
42
|
+
# Date is on the second cell, second paragraph, wrapped in p
|
43
|
+
collectionDate = datetime.strptime(
|
44
|
+
cells[1].select("p > b")[2].get_text(strip=True), "%d %B %Y"
|
45
|
+
)
|
46
|
+
|
47
|
+
# Add each collection to the list as a tuple
|
48
|
+
collections.append((bin_type, collectionDate))
|
49
|
+
|
50
|
+
ordered_data = sorted(collections, key=lambda x: x[1])
|
51
|
+
for item in ordered_data:
|
52
|
+
dict_data = {
|
53
|
+
"type": item[0].capitalize(),
|
54
|
+
"collectionDate": item[1].strftime(date_format),
|
55
|
+
}
|
56
|
+
data["bins"].append(dict_data)
|
57
|
+
|
58
|
+
return data
|
@@ -0,0 +1,128 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
import time
|
3
|
+
from dateutil.relativedelta import relativedelta
|
4
|
+
from selenium.webdriver.common.by import By
|
5
|
+
from selenium.webdriver.common.keys import Keys
|
6
|
+
from selenium.webdriver.support.ui import Select
|
7
|
+
from selenium.common.exceptions import NoSuchElementException
|
8
|
+
|
9
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
10
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
11
|
+
|
12
|
+
|
13
|
+
class CouncilClass(AbstractGetBinDataClass):
|
14
|
+
"""
|
15
|
+
Concrete classes have to implement all abstract operations of the
|
16
|
+
base class. They can also override some operations with a default
|
17
|
+
implementation.
|
18
|
+
"""
|
19
|
+
|
20
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
21
|
+
driver = None
|
22
|
+
try:
|
23
|
+
page = "https://www.midandeastantrim.gov.uk/resident/waste-recycling/collection-dates/"
|
24
|
+
|
25
|
+
# Assign user info
|
26
|
+
user_postcode = kwargs.get("postcode")
|
27
|
+
# not used: user_paon = kwargs.get("paon")
|
28
|
+
web_driver = kwargs.get("web_driver")
|
29
|
+
headless = kwargs.get("headless")
|
30
|
+
|
31
|
+
# Create Selenium webdriver
|
32
|
+
options = webdriver.ChromeOptions()
|
33
|
+
options.add_experimental_option("excludeSwitches", ["enable-logging"])
|
34
|
+
driver = create_webdriver(web_driver, headless)
|
35
|
+
|
36
|
+
driver.get(page)
|
37
|
+
|
38
|
+
time.sleep(5)
|
39
|
+
number = 0
|
40
|
+
driver.switch_to.frame(number)
|
41
|
+
# Enter postcode in text box and wait
|
42
|
+
inputElement_pc = driver.find_element(By.ID, "txtAjaxSearch")
|
43
|
+
inputElement_pc.send_keys(user_postcode)
|
44
|
+
|
45
|
+
time.sleep(5)
|
46
|
+
|
47
|
+
# Submit address information and wait - selecting the top one only
|
48
|
+
# if it is an exact match then it will go straight to the results
|
49
|
+
try:
|
50
|
+
button = driver.find_element(By.XPATH, '//*[@id="show-button-0"]')
|
51
|
+
driver.execute_script("arguments[0].click();", button)
|
52
|
+
except NoSuchElementException:
|
53
|
+
pass
|
54
|
+
|
55
|
+
time.sleep(4)
|
56
|
+
|
57
|
+
# Read next collection information
|
58
|
+
page = driver.find_element(By.ID, "divCalendarGraphics").get_attribute(
|
59
|
+
"outerHTML"
|
60
|
+
)
|
61
|
+
|
62
|
+
# Make a BS4 object - remove bold tags and add @ so we can split the lines later
|
63
|
+
soup = BeautifulSoup(
|
64
|
+
page.strip()
|
65
|
+
.replace("<b>", "")
|
66
|
+
.replace("</b>", "")
|
67
|
+
.replace("<br>", "@"),
|
68
|
+
features="html.parser",
|
69
|
+
)
|
70
|
+
soup.prettify()
|
71
|
+
|
72
|
+
# Data to return
|
73
|
+
data = {"bins": []}
|
74
|
+
|
75
|
+
# Valid bin types
|
76
|
+
binTypes = ["Refuse", "Garden"]
|
77
|
+
|
78
|
+
# Value to create dict for bin values
|
79
|
+
keys, values = [], []
|
80
|
+
|
81
|
+
# Loop though html for text containing bins
|
82
|
+
# example of html (bold tags removed above)
|
83
|
+
# <div id="divCalendarGraphics">
|
84
|
+
# <br> <b>Refuse</b>: Tue 14 Nov then every alternate Tue<br><b>Recycling</b>: No Recycling waste collection for this address<br><b>Garden</b>: Tue 21 Nov then every alternate Tue<br><img src="img/Gif-Spacer.gif" alt="spacer" height="1" width="30">
|
85
|
+
# split by br tag and take first 4 splits
|
86
|
+
lines = soup.text.split("@", 4)
|
87
|
+
for line in lines[1:4]:
|
88
|
+
keys.append(line.split(":")[0].strip())
|
89
|
+
# strip out the day and month from the text
|
90
|
+
values.append(line.split(":")[1].strip().split(" ")[:3])
|
91
|
+
|
92
|
+
# Create dict for bin name and string dates
|
93
|
+
binDict = dict(zip(keys, values))
|
94
|
+
|
95
|
+
# Process dict for valid bin types
|
96
|
+
for bin in list(binDict):
|
97
|
+
if bin in binTypes:
|
98
|
+
# Convert date - no year value so take it from todays date
|
99
|
+
if binDict[bin][0] == "Tomorrow":
|
100
|
+
date = datetime.today() + relativedelta(days=1)
|
101
|
+
elif binDict[bin][0] == "Today":
|
102
|
+
date = datetime.today()
|
103
|
+
else:
|
104
|
+
date = datetime.strptime(
|
105
|
+
" ".join(binDict[bin][1:]), "%d %b"
|
106
|
+
).replace(year=datetime.today().year)
|
107
|
+
# if the date is in the past then it means the collection is next year so add a year
|
108
|
+
if date < datetime.today():
|
109
|
+
date = date + relativedelta(years=1)
|
110
|
+
|
111
|
+
# Set bin data
|
112
|
+
dict_data = {
|
113
|
+
"type": bin,
|
114
|
+
"collectionDate": date.strftime(date_format),
|
115
|
+
}
|
116
|
+
data["bins"].append(dict_data)
|
117
|
+
|
118
|
+
# Quit Selenium webdriver to release session
|
119
|
+
except Exception as e:
|
120
|
+
# Here you can log the exception if needed
|
121
|
+
print(f"An error occurred: {e}")
|
122
|
+
# Optionally, re-raise the exception if you want it to propagate
|
123
|
+
raise
|
124
|
+
finally:
|
125
|
+
# This block ensures that the driver is closed regardless of an exception
|
126
|
+
if driver:
|
127
|
+
driver.quit()
|
128
|
+
return data
|
@@ -0,0 +1,80 @@
|
|
1
|
+
import re
|
2
|
+
from datetime import datetime
|
3
|
+
|
4
|
+
import requests
|
5
|
+
from bs4 import BeautifulSoup
|
6
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
7
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
8
|
+
|
9
|
+
|
10
|
+
def get_token(res) -> str:
|
11
|
+
"""
|
12
|
+
Get a UFPRT code for the form data to be processed
|
13
|
+
:param res:
|
14
|
+
:return:
|
15
|
+
"""
|
16
|
+
soup = BeautifulSoup(res, features="html.parser")
|
17
|
+
soup.prettify()
|
18
|
+
token = soup.find("input", {"name": "ufprt"}).get("value")
|
19
|
+
return token
|
20
|
+
|
21
|
+
|
22
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
23
|
+
class CouncilClass(AbstractGetBinDataClass):
|
24
|
+
"""
|
25
|
+
Concrete classes have to implement all abstract operations of the
|
26
|
+
base class. They can also override some operations with a default
|
27
|
+
implementation.
|
28
|
+
"""
|
29
|
+
|
30
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
31
|
+
api_url = "https://www.midsussex.gov.uk/waste-recycling/bin-collection/"
|
32
|
+
user_postcode = kwargs.get("postcode")
|
33
|
+
user_paon = kwargs.get("paon")
|
34
|
+
postcode_re = "^([A-Za-z][A-Ha-hJ-Yj-y]?[0-9][A-Za-z0-9]? ?[0-9][A-Za-z]{2}|[Gg][Ii][Rr] ?0[Aa]{2})$"
|
35
|
+
user_full_addr = f"{user_paon} {user_postcode}"
|
36
|
+
|
37
|
+
check_postcode(user_postcode)
|
38
|
+
check_paon(user_paon)
|
39
|
+
|
40
|
+
form_data = {
|
41
|
+
"PostCodeStep.strAddressSearch": user_postcode,
|
42
|
+
"AddressStep.strAddressSelect": user_full_addr,
|
43
|
+
"Next": "true",
|
44
|
+
"StepIndex": "1",
|
45
|
+
}
|
46
|
+
|
47
|
+
# Get a ufprt by posting here (I have no idea how ufprt works, so may as well grab one from the server)
|
48
|
+
requests.packages.urllib3.disable_warnings()
|
49
|
+
init = requests.post(api_url, data=form_data)
|
50
|
+
ufprt = get_token(init.text)
|
51
|
+
form_data.update({"ufprt": ufprt})
|
52
|
+
|
53
|
+
response = requests.post(api_url, data=form_data)
|
54
|
+
|
55
|
+
# Make a BS4 object
|
56
|
+
soup = BeautifulSoup(response.text, features="html.parser")
|
57
|
+
soup.prettify()
|
58
|
+
|
59
|
+
data = {"bins": []}
|
60
|
+
|
61
|
+
table_element = soup.find("table", {"class": "collDates"})
|
62
|
+
table_rows = table_element.find_all_next("tr")
|
63
|
+
|
64
|
+
row_index = 0
|
65
|
+
for row in table_rows:
|
66
|
+
if row_index < 1:
|
67
|
+
row_index += 1
|
68
|
+
continue
|
69
|
+
else:
|
70
|
+
details = row.find_all_next("td")
|
71
|
+
dict_data = {
|
72
|
+
"type": details[1].get_text().replace("collection", "").strip(),
|
73
|
+
"collectionDate": datetime.strptime(
|
74
|
+
details[2].get_text(), "%A %d %B %Y"
|
75
|
+
).strftime(date_format),
|
76
|
+
}
|
77
|
+
data["bins"].append(dict_data)
|
78
|
+
row_index += 1
|
79
|
+
|
80
|
+
return data
|
@@ -0,0 +1,54 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
3
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
|
+
|
5
|
+
|
6
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
7
|
+
class CouncilClass(AbstractGetBinDataClass):
|
8
|
+
"""
|
9
|
+
Concrete classes have to implement all abstract operations of the
|
10
|
+
base class. They can also override some operations with a default
|
11
|
+
implementation.
|
12
|
+
"""
|
13
|
+
|
14
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
15
|
+
data = {"bins": []}
|
16
|
+
|
17
|
+
# Get the estate from the UPRN field
|
18
|
+
estate = kwargs.get("uprn")
|
19
|
+
|
20
|
+
# Parse the council's website
|
21
|
+
soup = BeautifulSoup(page.text, features="html.parser")
|
22
|
+
soup.prettify()
|
23
|
+
|
24
|
+
# Get a list of lists of estates and their collection days, then check for a match on estate name
|
25
|
+
collection_days = [
|
26
|
+
item.text.strip().replace("\xa0", " ").split(" - ")
|
27
|
+
for item in soup.find(
|
28
|
+
"div",
|
29
|
+
{
|
30
|
+
"class": "field field--name-localgov-paragraphs field--type-entity-reference-revisions field--label-hidden field__items"
|
31
|
+
},
|
32
|
+
).find_all("li")
|
33
|
+
]
|
34
|
+
result = [
|
35
|
+
result for result in collection_days if result[0].lower() == estate.lower()
|
36
|
+
]
|
37
|
+
|
38
|
+
# If there is a match, we can process it by getting the next eight dates for that day. Else, raise an exception.
|
39
|
+
if result is not None:
|
40
|
+
day_number = days_of_week.get(result[0][1].split()[0])
|
41
|
+
collection_dates = get_weekday_dates_in_period(
|
42
|
+
datetime.now(), day_number, 8
|
43
|
+
)
|
44
|
+
|
45
|
+
for date in collection_dates:
|
46
|
+
dict_data = {
|
47
|
+
"type": f"Weekly collection",
|
48
|
+
"collectionDate": date,
|
49
|
+
}
|
50
|
+
data["bins"].append(dict_data)
|
51
|
+
else:
|
52
|
+
raise ValueError("Estate not found on website.")
|
53
|
+
|
54
|
+
return data
|
@@ -0,0 +1,98 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
from datetime import datetime
|
3
|
+
import re
|
4
|
+
import requests
|
5
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
6
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
7
|
+
|
8
|
+
|
9
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
10
|
+
class CouncilClass(AbstractGetBinDataClass):
|
11
|
+
"""
|
12
|
+
Concrete classes have to implement all abstract operations of the
|
13
|
+
base class. They can also override some operations with a default
|
14
|
+
implementation.
|
15
|
+
"""
|
16
|
+
|
17
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
18
|
+
|
19
|
+
user_postcode = kwargs.get("postcode")
|
20
|
+
check_postcode(user_postcode)
|
21
|
+
|
22
|
+
root_url = "https://molevalley.cloudmappin.com/my-mv-address-search/search/{}/0".format(
|
23
|
+
user_postcode
|
24
|
+
)
|
25
|
+
response = requests.get(root_url)
|
26
|
+
|
27
|
+
if not response.ok:
|
28
|
+
raise ValueError("Invalid server response code retreiving data.")
|
29
|
+
|
30
|
+
jsonData = response.json()
|
31
|
+
|
32
|
+
if len(jsonData["results"]) == 0:
|
33
|
+
raise ValueError("No collection data found for postcode provided.")
|
34
|
+
|
35
|
+
properties_found = jsonData["results"][0]["items"]
|
36
|
+
|
37
|
+
# If UPRN is provided, we can check a specific address.
|
38
|
+
html_data = None
|
39
|
+
uprn = kwargs.get("uprn")
|
40
|
+
if uprn:
|
41
|
+
check_uprn(uprn)
|
42
|
+
for n, item in enumerate(properties_found):
|
43
|
+
if uprn == str(int(item["info"][0][1]["value"])):
|
44
|
+
html_data = properties_found[n]["info"][2][1]["value"]
|
45
|
+
break
|
46
|
+
if html_data is None:
|
47
|
+
raise ValueError("No collection data found for UPRN provided.")
|
48
|
+
else:
|
49
|
+
# If UPRN not provided, just use the first result
|
50
|
+
html_data = properties_found[0]["info"][2][1]["value"]
|
51
|
+
|
52
|
+
soup = BeautifulSoup(html_data, features="html.parser")
|
53
|
+
soup.prettify()
|
54
|
+
|
55
|
+
data = {"bins": []}
|
56
|
+
all_collection_dates = []
|
57
|
+
regex_date = re.compile(r".* ([\d]+\/[\d]+\/[\d]+)")
|
58
|
+
regex_additional_collection = re.compile(r"We also collect (.*) on (.*) -")
|
59
|
+
|
60
|
+
# Search for the 'Bins and Recycling' panel
|
61
|
+
for panel in soup.select('div[class*="panel"]'):
|
62
|
+
if panel.h2.text.strip() == "Bins and Recycling":
|
63
|
+
|
64
|
+
# Gather the bin types and dates
|
65
|
+
for collection in panel.select("div > strong"):
|
66
|
+
bin_type = collection.text.strip()
|
67
|
+
collection_string = collection.find_next("p").text.strip()
|
68
|
+
m = regex_date.match(collection_string)
|
69
|
+
if m:
|
70
|
+
collection_date = datetime.strptime(
|
71
|
+
m.group(1), "%d/%m/%Y"
|
72
|
+
).date()
|
73
|
+
data["bins"].append(
|
74
|
+
{
|
75
|
+
"type": bin_type,
|
76
|
+
"collectionDate": collection_date.strftime("%d/%m/%Y"),
|
77
|
+
}
|
78
|
+
)
|
79
|
+
all_collection_dates.append(collection_date)
|
80
|
+
|
81
|
+
# Search for additional collections
|
82
|
+
for p in panel.select("p"):
|
83
|
+
m2 = regex_additional_collection.match(p.text.strip())
|
84
|
+
if m2:
|
85
|
+
bin_type = m2.group(1)
|
86
|
+
if "each collection day" in m2.group(2):
|
87
|
+
collection_date = min(all_collection_dates)
|
88
|
+
data["bins"].append(
|
89
|
+
{
|
90
|
+
"type": bin_type,
|
91
|
+
"collectionDate": collection_date.strftime(
|
92
|
+
"%d/%m/%Y"
|
93
|
+
),
|
94
|
+
}
|
95
|
+
)
|
96
|
+
break
|
97
|
+
|
98
|
+
return data
|
@@ -0,0 +1,139 @@
|
|
1
|
+
import time
|
2
|
+
from bs4 import BeautifulSoup
|
3
|
+
from selenium.webdriver.common.by import By
|
4
|
+
from selenium.webdriver.support import expected_conditions as EC
|
5
|
+
from selenium.webdriver.support.ui import Select
|
6
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
7
|
+
|
8
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
9
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
10
|
+
|
11
|
+
|
12
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
13
|
+
class CouncilClass(AbstractGetBinDataClass):
|
14
|
+
"""
|
15
|
+
Concrete classes have to implement all abstract operations of the
|
16
|
+
base class. They can also override some operations with a default
|
17
|
+
implementation.
|
18
|
+
"""
|
19
|
+
|
20
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
21
|
+
driver = None
|
22
|
+
try:
|
23
|
+
data = {"bins": []}
|
24
|
+
user_uprn = kwargs.get("uprn")
|
25
|
+
user_postcode = kwargs.get("postcode")
|
26
|
+
web_driver = kwargs.get("web_driver")
|
27
|
+
headless = kwargs.get("headless")
|
28
|
+
check_uprn(user_uprn)
|
29
|
+
check_postcode(user_postcode)
|
30
|
+
|
31
|
+
# Create Selenium webdriver
|
32
|
+
driver = create_webdriver(web_driver, headless)
|
33
|
+
driver.get("https://www.npt.gov.uk/2195")
|
34
|
+
|
35
|
+
# Accept cookies banner
|
36
|
+
cookieAccept = WebDriverWait(driver, 10).until(
|
37
|
+
EC.presence_of_element_located((By.ID, "ccc-notify-accept"))
|
38
|
+
)
|
39
|
+
cookieAccept.click()
|
40
|
+
|
41
|
+
# Populate postcode field
|
42
|
+
inputElement_postcode = driver.find_element(
|
43
|
+
By.ID,
|
44
|
+
"ContentPlaceHolderDefault_ctl13_nptLLPG2_25_addresslookup_txtTmpPostcode",
|
45
|
+
)
|
46
|
+
inputElement_postcode.send_keys(user_postcode)
|
47
|
+
|
48
|
+
# Click search button
|
49
|
+
findAddress = WebDriverWait(driver, 10).until(
|
50
|
+
EC.presence_of_element_located(
|
51
|
+
(
|
52
|
+
By.ID,
|
53
|
+
"ContentPlaceHolderDefault_ctl13_nptLLPG2_25_addresslookup_btnFindAddress",
|
54
|
+
)
|
55
|
+
)
|
56
|
+
)
|
57
|
+
findAddress.click()
|
58
|
+
|
59
|
+
time.sleep(1)
|
60
|
+
|
61
|
+
# Wait for the 'Select address' dropdown to appear and select option matching UPRN
|
62
|
+
dropdown = WebDriverWait(driver, 10).until(
|
63
|
+
EC.presence_of_element_located(
|
64
|
+
(
|
65
|
+
By.ID,
|
66
|
+
"ContentPlaceHolderDefault_ctl13_nptLLPG2_25_addresslookup_ddlAddressLookup",
|
67
|
+
)
|
68
|
+
)
|
69
|
+
)
|
70
|
+
# Create a 'Select' for it, then select the matching URPN option
|
71
|
+
dropdownSelect = Select(dropdown)
|
72
|
+
dropdownSelect.select_by_value(user_uprn)
|
73
|
+
|
74
|
+
# Remove back to top button if exists
|
75
|
+
driver.execute_script(
|
76
|
+
"""
|
77
|
+
if (document.contains(document.querySelector(".backtotop"))) {
|
78
|
+
document.querySelector(".backtotop").remove();
|
79
|
+
}
|
80
|
+
"""
|
81
|
+
)
|
82
|
+
|
83
|
+
# Wait for the submit button to appear, then click it to get the collection dates
|
84
|
+
submit = WebDriverWait(driver, 10).until(
|
85
|
+
EC.presence_of_element_located(
|
86
|
+
(By.ID, "ContentPlaceHolderDefault_ctl13_nptLLPG2_25_btnDisplay")
|
87
|
+
)
|
88
|
+
)
|
89
|
+
submit.click()
|
90
|
+
|
91
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
92
|
+
|
93
|
+
# Get the property details
|
94
|
+
property_details = soup.find(
|
95
|
+
"div",
|
96
|
+
{
|
97
|
+
"id": "ContentPlaceHolderDefault_ctl13_nptLLPG2_25_divPropertyDetails"
|
98
|
+
},
|
99
|
+
)
|
100
|
+
|
101
|
+
# Get the dates
|
102
|
+
for date in property_details.find_all("h2"):
|
103
|
+
if date.get_text(strip=True) != "Bank Holidays":
|
104
|
+
bin_date = datetime.strptime(
|
105
|
+
date.get_text(strip=True).replace(" ", " ")
|
106
|
+
+ " "
|
107
|
+
+ datetime.now().strftime("%Y"),
|
108
|
+
"%A, %d %B %Y",
|
109
|
+
)
|
110
|
+
bin_types_wrapper = date.find_next_sibling("div")
|
111
|
+
for bin_type_wrapper in bin_types_wrapper.find_all(
|
112
|
+
"div", {"class": "card"}
|
113
|
+
):
|
114
|
+
if bin_date and bin_type_wrapper:
|
115
|
+
bin_type = bin_type_wrapper.find("a").get_text(strip=True)
|
116
|
+
bin_type += (
|
117
|
+
" ("
|
118
|
+
+ bin_type_wrapper.find("span").get_text(strip=True)
|
119
|
+
+ ")"
|
120
|
+
)
|
121
|
+
dict_data = {
|
122
|
+
"type": bin_type,
|
123
|
+
"collectionDate": bin_date.strftime(date_format),
|
124
|
+
}
|
125
|
+
data["bins"].append(dict_data)
|
126
|
+
|
127
|
+
data["bins"].sort(
|
128
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
|
129
|
+
)
|
130
|
+
except Exception as e:
|
131
|
+
# Here you can log the exception if needed
|
132
|
+
print(f"An error occurred: {e}")
|
133
|
+
# Optionally, re-raise the exception if you want it to propagate
|
134
|
+
raise
|
135
|
+
finally:
|
136
|
+
# This block ensures that the driver is closed regardless of an exception
|
137
|
+
if driver:
|
138
|
+
driver.quit()
|
139
|
+
return data
|
@@ -0,0 +1,52 @@
|
|
1
|
+
from datetime import timedelta
|
2
|
+
|
3
|
+
from bs4 import BeautifulSoup
|
4
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
5
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
6
|
+
|
7
|
+
|
8
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
9
|
+
class CouncilClass(AbstractGetBinDataClass):
|
10
|
+
"""
|
11
|
+
Concrete classes have to implement all abstract operations of the
|
12
|
+
base class. They can also override some operations with a default
|
13
|
+
implementation.
|
14
|
+
"""
|
15
|
+
|
16
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
17
|
+
# Get page with BS4
|
18
|
+
soup = BeautifulSoup(page.text, features="html.parser")
|
19
|
+
soup.prettify()
|
20
|
+
|
21
|
+
# Work out some date bounds
|
22
|
+
today = datetime.today()
|
23
|
+
eight_weeks = datetime.today() + timedelta(days=8 * 7)
|
24
|
+
data = {"bins": []}
|
25
|
+
|
26
|
+
# Each month calendar is a table, so get the object then find all rows in that object.
|
27
|
+
# Month and year is also a row and not included in the date, so save it then remove the row
|
28
|
+
for month in soup.select('table[class*="table table-condensed"]'):
|
29
|
+
info = month.find_all("tr")
|
30
|
+
month_year = info[0].text.strip()
|
31
|
+
info.pop(0)
|
32
|
+
# Each remaining item is a bin collection, so get the type and tidy up the date.
|
33
|
+
for item in info:
|
34
|
+
bin_type = item.text.split(",")[0].strip()
|
35
|
+
bin_date = datetime.strptime(
|
36
|
+
remove_ordinal_indicator_from_date_string(
|
37
|
+
item.text.split(",")[1].strip() + " " + month_year
|
38
|
+
),
|
39
|
+
"%A %d %B %Y",
|
40
|
+
)
|
41
|
+
# Only include dates on or after today, but also only within eight weeks
|
42
|
+
if (
|
43
|
+
today.date() <= bin_date.date() <= eight_weeks.date()
|
44
|
+
and "cancelled" not in bin_type
|
45
|
+
):
|
46
|
+
dict_data = {
|
47
|
+
"type": bin_type,
|
48
|
+
"collectionDate": bin_date.strftime(date_format),
|
49
|
+
}
|
50
|
+
data["bins"].append(dict_data)
|
51
|
+
|
52
|
+
return data
|
@@ -0,0 +1,57 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
|
3
|
+
# This script pulls (in one hit) the data from
|
4
|
+
# Newcastle City Council Bins Data
|
5
|
+
from datetime import datetime
|
6
|
+
|
7
|
+
from bs4 import BeautifulSoup
|
8
|
+
from uk_bin_collection.uk_bin_collection.common import date_format
|
9
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
10
|
+
|
11
|
+
|
12
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
13
|
+
class CouncilClass(AbstractGetBinDataClass):
|
14
|
+
"""
|
15
|
+
Concrete classes have to implement all abstract operations of the
|
16
|
+
base class. They can also override some operations with a default
|
17
|
+
implementation.
|
18
|
+
"""
|
19
|
+
|
20
|
+
def parse_data(self, page, **kwargs) -> None:
|
21
|
+
# Make a BS4 object
|
22
|
+
soup = BeautifulSoup(page.text, features="html.parser")
|
23
|
+
soup.prettify()
|
24
|
+
|
25
|
+
data = {"bins": []}
|
26
|
+
|
27
|
+
for element in soup.find_all("strong"):
|
28
|
+
collectionInfo = ""
|
29
|
+
# Domestic Waste is formatted differently to other bins
|
30
|
+
if "Green Bin (Domestic Waste) details:" in str(element):
|
31
|
+
if element.next_sibling.find("br"):
|
32
|
+
collectionInfo = element.next_sibling.find("br").next_element
|
33
|
+
elif "Next collection" in str(
|
34
|
+
element.next_sibling.next_sibling.next_sibling.next_sibling
|
35
|
+
):
|
36
|
+
collectionInfo = (
|
37
|
+
element.next_sibling.next_sibling.next_sibling.next_sibling
|
38
|
+
)
|
39
|
+
|
40
|
+
if collectionInfo != "" and collectionInfo != "Next collection : n/a":
|
41
|
+
bin_type = str(element)[
|
42
|
+
str(element).find("(") + 1 : str(element).find(")")
|
43
|
+
]
|
44
|
+
collectionDate = str(
|
45
|
+
datetime.strptime(
|
46
|
+
str(collectionInfo).replace("Next collection : ", ""),
|
47
|
+
"%d-%b-%Y",
|
48
|
+
)
|
49
|
+
.date()
|
50
|
+
.strftime(date_format)
|
51
|
+
)
|
52
|
+
|
53
|
+
dict_data = {"type": bin_type, "collectionDate": collectionDate}
|
54
|
+
|
55
|
+
data["bins"].append(dict_data)
|
56
|
+
|
57
|
+
return data
|