uk_bin_collection 0.74.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/README.rst +0 -0
- uk_bin_collection/tests/council_feature_input_parity.py +79 -0
- uk_bin_collection/tests/features/environment.py +7 -0
- uk_bin_collection/tests/features/validate_council_outputs.feature +767 -0
- uk_bin_collection/tests/input.json +1077 -0
- uk_bin_collection/tests/output.schema +41 -0
- uk_bin_collection/tests/step_defs/step_helpers/file_handler.py +46 -0
- uk_bin_collection/tests/step_defs/test_validate_council.py +87 -0
- uk_bin_collection/tests/test_collect_data.py +104 -0
- uk_bin_collection/tests/test_common_functions.py +342 -0
- uk_bin_collection/uk_bin_collection/collect_data.py +133 -0
- uk_bin_collection/uk_bin_collection/common.py +292 -0
- uk_bin_collection/uk_bin_collection/councils/AdurAndWorthingCouncils.py +43 -0
- uk_bin_collection/uk_bin_collection/councils/ArunCouncil.py +97 -0
- uk_bin_collection/uk_bin_collection/councils/AylesburyValeCouncil.py +69 -0
- uk_bin_collection/uk_bin_collection/councils/BCPCouncil.py +51 -0
- uk_bin_collection/uk_bin_collection/councils/BarnetCouncil.py +180 -0
- uk_bin_collection/uk_bin_collection/councils/BarnsleyMBCouncil.py +109 -0
- uk_bin_collection/uk_bin_collection/councils/BasingstokeCouncil.py +72 -0
- uk_bin_collection/uk_bin_collection/councils/BathAndNorthEastSomersetCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/BedfordBoroughCouncil.py +49 -0
- uk_bin_collection/uk_bin_collection/councils/BedfordshireCouncil.py +70 -0
- uk_bin_collection/uk_bin_collection/councils/BexleyCouncil.py +147 -0
- uk_bin_collection/uk_bin_collection/councils/BirminghamCityCouncil.py +119 -0
- uk_bin_collection/uk_bin_collection/councils/BlackburnCouncil.py +105 -0
- uk_bin_collection/uk_bin_collection/councils/BoltonCouncil.py +104 -0
- uk_bin_collection/uk_bin_collection/councils/BradfordMDC.py +103 -0
- uk_bin_collection/uk_bin_collection/councils/BrightonandHoveCityCouncil.py +137 -0
- uk_bin_collection/uk_bin_collection/councils/BristolCityCouncil.py +141 -0
- uk_bin_collection/uk_bin_collection/councils/BromleyBoroughCouncil.py +115 -0
- uk_bin_collection/uk_bin_collection/councils/BroxtoweBoroughCouncil.py +107 -0
- uk_bin_collection/uk_bin_collection/councils/BuckinghamshireCouncil.py +95 -0
- uk_bin_collection/uk_bin_collection/councils/BuryCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/CalderdaleCouncil.py +123 -0
- uk_bin_collection/uk_bin_collection/councils/CannockChaseDistrictCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/CardiffCouncil.py +172 -0
- uk_bin_collection/uk_bin_collection/councils/CastlepointDistrictCouncil.py +96 -0
- uk_bin_collection/uk_bin_collection/councils/CharnwoodBoroughCouncil.py +54 -0
- uk_bin_collection/uk_bin_collection/councils/ChelmsfordCityCouncil.py +127 -0
- uk_bin_collection/uk_bin_collection/councils/CheshireEastCouncil.py +32 -0
- uk_bin_collection/uk_bin_collection/councils/CheshireWestAndChesterCouncil.py +125 -0
- uk_bin_collection/uk_bin_collection/councils/ChorleyCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/ConwyCountyBorough.py +27 -0
- uk_bin_collection/uk_bin_collection/councils/CrawleyBoroughCouncil.py +61 -0
- uk_bin_collection/uk_bin_collection/councils/CroydonCouncil.py +291 -0
- uk_bin_collection/uk_bin_collection/councils/DerbyshireDalesDistrictCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/DoncasterCouncil.py +77 -0
- uk_bin_collection/uk_bin_collection/councils/DorsetCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/DoverDistrictCouncil.py +41 -0
- uk_bin_collection/uk_bin_collection/councils/DurhamCouncil.py +49 -0
- uk_bin_collection/uk_bin_collection/councils/EastCambridgeshireCouncil.py +44 -0
- uk_bin_collection/uk_bin_collection/councils/EastDevonDC.py +74 -0
- uk_bin_collection/uk_bin_collection/councils/EastLindseyDistrictCouncil.py +108 -0
- uk_bin_collection/uk_bin_collection/councils/EastRidingCouncil.py +142 -0
- uk_bin_collection/uk_bin_collection/councils/EastSuffolkCouncil.py +112 -0
- uk_bin_collection/uk_bin_collection/councils/EastleighBoroughCouncil.py +70 -0
- uk_bin_collection/uk_bin_collection/councils/EnvironmentFirst.py +48 -0
- uk_bin_collection/uk_bin_collection/councils/ErewashBoroughCouncil.py +61 -0
- uk_bin_collection/uk_bin_collection/councils/FenlandDistrictCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/ForestOfDeanDistrictCouncil.py +113 -0
- uk_bin_collection/uk_bin_collection/councils/GatesheadCouncil.py +118 -0
- uk_bin_collection/uk_bin_collection/councils/GedlingBoroughCouncil.py +1580 -0
- uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py +55 -0
- uk_bin_collection/uk_bin_collection/councils/GuildfordCouncil.py +150 -0
- uk_bin_collection/uk_bin_collection/councils/HaltonBoroughCouncil.py +142 -0
- uk_bin_collection/uk_bin_collection/councils/HaringeyCouncil.py +59 -0
- uk_bin_collection/uk_bin_collection/councils/HarrogateBoroughCouncil.py +63 -0
- uk_bin_collection/uk_bin_collection/councils/HighPeakCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/HullCityCouncil.py +48 -0
- uk_bin_collection/uk_bin_collection/councils/HuntingdonDistrictCouncil.py +44 -0
- uk_bin_collection/uk_bin_collection/councils/KingstonUponThamesCouncil.py +84 -0
- uk_bin_collection/uk_bin_collection/councils/KirkleesCouncil.py +130 -0
- uk_bin_collection/uk_bin_collection/councils/KnowsleyMBCouncil.py +139 -0
- uk_bin_collection/uk_bin_collection/councils/LancasterCityCouncil.py +71 -0
- uk_bin_collection/uk_bin_collection/councils/LeedsCityCouncil.py +137 -0
- uk_bin_collection/uk_bin_collection/councils/LisburnCastlereaghCityCouncil.py +101 -0
- uk_bin_collection/uk_bin_collection/councils/LiverpoolCityCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/LondonBoroughHounslow.py +82 -0
- uk_bin_collection/uk_bin_collection/councils/LondonBoroughRedbridge.py +161 -0
- uk_bin_collection/uk_bin_collection/councils/MaldonDistrictCouncil.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/MalvernHillsDC.py +57 -0
- uk_bin_collection/uk_bin_collection/councils/ManchesterCityCouncil.py +106 -0
- uk_bin_collection/uk_bin_collection/councils/MansfieldDistrictCouncil.py +38 -0
- uk_bin_collection/uk_bin_collection/councils/MertonCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil.py +128 -0
- uk_bin_collection/uk_bin_collection/councils/MidSussexDistrictCouncil.py +80 -0
- uk_bin_collection/uk_bin_collection/councils/MiltonKeynesCityCouncil.py +54 -0
- uk_bin_collection/uk_bin_collection/councils/MoleValleyDistrictCouncil.py +98 -0
- uk_bin_collection/uk_bin_collection/councils/NeathPortTalbotCouncil.py +139 -0
- uk_bin_collection/uk_bin_collection/councils/NewarkAndSherwoodDC.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/NewcastleCityCouncil.py +57 -0
- uk_bin_collection/uk_bin_collection/councils/NewhamCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/NewportCityCouncil.py +203 -0
- uk_bin_collection/uk_bin_collection/councils/NorthEastDerbyshireDistrictCouncil.py +115 -0
- uk_bin_collection/uk_bin_collection/councils/NorthEastLincs.py +53 -0
- uk_bin_collection/uk_bin_collection/councils/NorthKestevenDistrictCouncil.py +45 -0
- uk_bin_collection/uk_bin_collection/councils/NorthLanarkshireCouncil.py +46 -0
- uk_bin_collection/uk_bin_collection/councils/NorthLincolnshireCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/NorthNorfolkDistrictCouncil.py +108 -0
- uk_bin_collection/uk_bin_collection/councils/NorthNorthamptonshireCouncil.py +72 -0
- uk_bin_collection/uk_bin_collection/councils/NorthSomersetCouncil.py +76 -0
- uk_bin_collection/uk_bin_collection/councils/NorthTynesideCouncil.py +220 -0
- uk_bin_collection/uk_bin_collection/councils/NorthWestLeicestershire.py +114 -0
- uk_bin_collection/uk_bin_collection/councils/NorthYorkshire.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py +123 -0
- uk_bin_collection/uk_bin_collection/councils/NottinghamCityCouncil.py +36 -0
- uk_bin_collection/uk_bin_collection/councils/OldhamCouncil.py +51 -0
- uk_bin_collection/uk_bin_collection/councils/PortsmouthCityCouncil.py +131 -0
- uk_bin_collection/uk_bin_collection/councils/PrestonCityCouncil.py +97 -0
- uk_bin_collection/uk_bin_collection/councils/ReadingBoroughCouncil.py +30 -0
- uk_bin_collection/uk_bin_collection/councils/ReigateAndBansteadBoroughCouncil.py +81 -0
- uk_bin_collection/uk_bin_collection/councils/RenfrewshireCouncil.py +135 -0
- uk_bin_collection/uk_bin_collection/councils/RhonddaCynonTaffCouncil.py +80 -0
- uk_bin_collection/uk_bin_collection/councils/RochdaleCouncil.py +69 -0
- uk_bin_collection/uk_bin_collection/councils/RochfordCouncil.py +60 -0
- uk_bin_collection/uk_bin_collection/councils/RugbyBoroughCouncil.py +93 -0
- uk_bin_collection/uk_bin_collection/councils/RushcliffeBoroughCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/RushmoorCouncil.py +81 -0
- uk_bin_collection/uk_bin_collection/councils/SalfordCityCouncil.py +70 -0
- uk_bin_collection/uk_bin_collection/councils/SevenoaksDistrictCouncil.py +106 -0
- uk_bin_collection/uk_bin_collection/councils/SheffieldCityCouncil.py +54 -0
- uk_bin_collection/uk_bin_collection/councils/ShropshireCouncil.py +45 -0
- uk_bin_collection/uk_bin_collection/councils/SolihullCouncil.py +48 -0
- uk_bin_collection/uk_bin_collection/councils/SomersetCouncil.py +203 -0
- uk_bin_collection/uk_bin_collection/councils/SouthAyrshireCouncil.py +73 -0
- uk_bin_collection/uk_bin_collection/councils/SouthCambridgeshireCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/SouthGloucestershireCouncil.py +74 -0
- uk_bin_collection/uk_bin_collection/councils/SouthLanarkshireCouncil.py +78 -0
- uk_bin_collection/uk_bin_collection/councils/SouthNorfolkCouncil.py +91 -0
- uk_bin_collection/uk_bin_collection/councils/SouthOxfordshireCouncil.py +93 -0
- uk_bin_collection/uk_bin_collection/councils/SouthTynesideCouncil.py +98 -0
- uk_bin_collection/uk_bin_collection/councils/StAlbansCityAndDistrictCouncil.py +43 -0
- uk_bin_collection/uk_bin_collection/councils/StHelensBC.py +56 -0
- uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py +112 -0
- uk_bin_collection/uk_bin_collection/councils/StockportBoroughCouncil.py +39 -0
- uk_bin_collection/uk_bin_collection/councils/StokeOnTrentCityCouncil.py +79 -0
- uk_bin_collection/uk_bin_collection/councils/StratfordUponAvonCouncil.py +94 -0
- uk_bin_collection/uk_bin_collection/councils/SunderlandCityCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/SwaleBoroughCouncil.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/TamesideMBCouncil.py +62 -0
- uk_bin_collection/uk_bin_collection/councils/TandridgeDistrictCouncil.py +60 -0
- uk_bin_collection/uk_bin_collection/councils/TelfordAndWrekinCouncil.py +50 -0
- uk_bin_collection/uk_bin_collection/councils/TestValleyBoroughCouncil.py +203 -0
- uk_bin_collection/uk_bin_collection/councils/TonbridgeAndMallingBC.py +101 -0
- uk_bin_collection/uk_bin_collection/councils/TorbayCouncil.py +51 -0
- uk_bin_collection/uk_bin_collection/councils/TorridgeDistrictCouncil.py +154 -0
- uk_bin_collection/uk_bin_collection/councils/ValeofGlamorganCouncil.py +119 -0
- uk_bin_collection/uk_bin_collection/councils/ValeofWhiteHorseCouncil.py +103 -0
- uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py +89 -0
- uk_bin_collection/uk_bin_collection/councils/WarwickDistrictCouncil.py +34 -0
- uk_bin_collection/uk_bin_collection/councils/WaverleyBoroughCouncil.py +119 -0
- uk_bin_collection/uk_bin_collection/councils/WealdenDistrictCouncil.py +86 -0
- uk_bin_collection/uk_bin_collection/councils/WelhatCouncil.py +73 -0
- uk_bin_collection/uk_bin_collection/councils/WestBerkshireCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/WestLindseyDistrictCouncil.py +118 -0
- uk_bin_collection/uk_bin_collection/councils/WestLothianCouncil.py +103 -0
- uk_bin_collection/uk_bin_collection/councils/WestNorthamptonshireCouncil.py +34 -0
- uk_bin_collection/uk_bin_collection/councils/WestSuffolkCouncil.py +64 -0
- uk_bin_collection/uk_bin_collection/councils/WiganBoroughCouncil.py +97 -0
- uk_bin_collection/uk_bin_collection/councils/WiltshireCouncil.py +135 -0
- uk_bin_collection/uk_bin_collection/councils/WindsorAndMaidenheadCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/WokingBoroughCouncil.py +114 -0
- uk_bin_collection/uk_bin_collection/councils/WyreCouncil.py +89 -0
- uk_bin_collection/uk_bin_collection/councils/YorkCouncil.py +45 -0
- uk_bin_collection/uk_bin_collection/councils/council_class_template/councilclasstemplate.py +33 -0
- uk_bin_collection/uk_bin_collection/get_bin_data.py +165 -0
- uk_bin_collection-0.74.0.dist-info/LICENSE +21 -0
- uk_bin_collection-0.74.0.dist-info/METADATA +247 -0
- uk_bin_collection-0.74.0.dist-info/RECORD +171 -0
- uk_bin_collection-0.74.0.dist-info/WHEEL +4 -0
- uk_bin_collection-0.74.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,100 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
from selenium.webdriver.common.by import By
|
3
|
+
from selenium.webdriver.support import expected_conditions as EC
|
4
|
+
from selenium.webdriver.support.ui import Select
|
5
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
6
|
+
|
7
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
8
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
9
|
+
|
10
|
+
|
11
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
12
|
+
class CouncilClass(AbstractGetBinDataClass):
|
13
|
+
"""
|
14
|
+
Concrete classes have to implement all abstract operations of the
|
15
|
+
base class. They can also override some operations with a default
|
16
|
+
implementation.
|
17
|
+
"""
|
18
|
+
|
19
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
20
|
+
driver = None
|
21
|
+
try:
|
22
|
+
page = "https://selfserve.derbyshiredales.gov.uk/renderform.aspx?t=103&k=9644C066D2168A4C21BCDA351DA2642526359DFF"
|
23
|
+
|
24
|
+
data = {"bins": []}
|
25
|
+
|
26
|
+
user_uprn = kwargs.get("uprn")
|
27
|
+
user_postcode = kwargs.get("postcode")
|
28
|
+
web_driver = kwargs.get("web_driver")
|
29
|
+
headless = kwargs.get("headless")
|
30
|
+
check_uprn(user_uprn)
|
31
|
+
check_postcode(user_postcode)
|
32
|
+
|
33
|
+
# Create Selenium webdriver
|
34
|
+
driver = create_webdriver(web_driver, headless)
|
35
|
+
driver.get(page)
|
36
|
+
|
37
|
+
# Populate postcode field
|
38
|
+
inputElement_postcode = driver.find_element(
|
39
|
+
By.ID,
|
40
|
+
"ctl00_ContentPlaceHolder1_FF2924TB",
|
41
|
+
)
|
42
|
+
inputElement_postcode.send_keys(user_postcode)
|
43
|
+
|
44
|
+
# Click search button
|
45
|
+
driver.find_element(
|
46
|
+
By.ID,
|
47
|
+
"ctl00_ContentPlaceHolder1_FF2924BTN",
|
48
|
+
).click()
|
49
|
+
|
50
|
+
# Wait for the 'Select address' dropdown to appear and select option matching UPRN
|
51
|
+
dropdown = WebDriverWait(driver, 10).until(
|
52
|
+
EC.presence_of_element_located(
|
53
|
+
(By.ID, "ctl00_ContentPlaceHolder1_FF2924DDL")
|
54
|
+
)
|
55
|
+
)
|
56
|
+
# Create a 'Select' for it, then select the matching URPN option
|
57
|
+
dropdownSelect = Select(dropdown)
|
58
|
+
dropdownSelect.select_by_value("U" + user_uprn)
|
59
|
+
|
60
|
+
# Wait for the submit button to appear, then click it to get the collection dates
|
61
|
+
submit = WebDriverWait(driver, 10).until(
|
62
|
+
EC.presence_of_element_located(
|
63
|
+
(By.ID, "ctl00_ContentPlaceHolder1_btnSubmit")
|
64
|
+
)
|
65
|
+
)
|
66
|
+
submit.click()
|
67
|
+
|
68
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
69
|
+
|
70
|
+
bin_rows = (
|
71
|
+
soup.find("div", id="ctl00_ContentPlaceHolder1_pnlConfirmation")
|
72
|
+
.find("div", {"class": "row"})
|
73
|
+
.find_all("div", {"class": "row"})
|
74
|
+
)
|
75
|
+
if bin_rows:
|
76
|
+
for bin_row in bin_rows:
|
77
|
+
bin_data = bin_row.find_all("div")
|
78
|
+
if bin_data and bin_data[0] and bin_data[1]:
|
79
|
+
collection_date = datetime.strptime(
|
80
|
+
bin_data[0].get_text(strip=True), "%A%d %B, %Y"
|
81
|
+
)
|
82
|
+
dict_data = {
|
83
|
+
"type": bin_data[1].get_text(strip=True),
|
84
|
+
"collectionDate": collection_date.strftime(date_format),
|
85
|
+
}
|
86
|
+
data["bins"].append(dict_data)
|
87
|
+
|
88
|
+
data["bins"].sort(
|
89
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
|
90
|
+
)
|
91
|
+
except Exception as e:
|
92
|
+
# Here you can log the exception if needed
|
93
|
+
print(f"An error occurred: {e}")
|
94
|
+
# Optionally, re-raise the exception if you want it to propagate
|
95
|
+
raise
|
96
|
+
finally:
|
97
|
+
# This block ensures that the driver is closed regardless of an exception
|
98
|
+
if driver:
|
99
|
+
driver.quit()
|
100
|
+
return data
|
@@ -0,0 +1,77 @@
|
|
1
|
+
import json
|
2
|
+
import math
|
3
|
+
from datetime import timedelta
|
4
|
+
|
5
|
+
import requests
|
6
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
7
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
8
|
+
|
9
|
+
|
10
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
11
|
+
class CouncilClass(AbstractGetBinDataClass):
|
12
|
+
"""
|
13
|
+
Concrete classes have to implement all abstract operations of the
|
14
|
+
base class. They can also override some operations with a default
|
15
|
+
implementation.
|
16
|
+
"""
|
17
|
+
|
18
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
19
|
+
# I need to point out that this one gave me a good head scratch. Mainly because I wrote lots
|
20
|
+
# of code to parse the form and all that, then realised this url returns json data... oops.
|
21
|
+
base_url = "https://www.doncaster.gov.uk/Compass/PremiseDetail/GetCollectionsForCalendar"
|
22
|
+
|
23
|
+
user_uprn = kwargs.get("uprn")
|
24
|
+
check_uprn(user_uprn)
|
25
|
+
|
26
|
+
# Working with epoch times, otherwise known as posix/unix timestamps. The number of weeks
|
27
|
+
# to return can actually be customised in the below timedelta
|
28
|
+
today = math.floor(datetime.today().timestamp())
|
29
|
+
four_weeks = math.floor((datetime.today() + timedelta(days=4 * 7)).timestamp())
|
30
|
+
|
31
|
+
# For some reason, the actual web form uses a property id that's completely different
|
32
|
+
# from the uprn - luckily this one is easy to find!
|
33
|
+
params = {
|
34
|
+
"UPRN": user_uprn,
|
35
|
+
"Start": str(today),
|
36
|
+
"End": str(four_weeks),
|
37
|
+
}
|
38
|
+
|
39
|
+
requests.packages.urllib3.disable_warnings()
|
40
|
+
response = requests.get(base_url, params=params)
|
41
|
+
|
42
|
+
# 200 = ok. I got a 500 in testing, so assumed no data for that address
|
43
|
+
if response.status_code != 200:
|
44
|
+
raise ValueError("No bins found for provided UPRN.")
|
45
|
+
|
46
|
+
# Load the json results
|
47
|
+
json_results = json.loads(response.text)["slots"]
|
48
|
+
|
49
|
+
data = {"bins": []}
|
50
|
+
collections = []
|
51
|
+
|
52
|
+
# Each item is a dictionary, so accessing is easy
|
53
|
+
for item in json_results:
|
54
|
+
bin_type = item["title"]
|
55
|
+
|
56
|
+
# item["start"] actually returns a string, so we want to only take digits or +s.
|
57
|
+
# OK, we don't actually want the +s... or anything on the end of them, that's why
|
58
|
+
# we split the string then cast the remaining epoch to a float
|
59
|
+
epoch = "".join([i for i in item["start"] if i.isdigit() or i == "+"])
|
60
|
+
epoch = epoch.split("+")[0]
|
61
|
+
epoch = float(epoch)
|
62
|
+
bin_date = datetime.strptime(
|
63
|
+
str(datetime.fromtimestamp(epoch / 1000)), "%Y-%m-%d %H:%M:%S"
|
64
|
+
)
|
65
|
+
collections.append((bin_type, bin_date))
|
66
|
+
|
67
|
+
# This orders the data we just parsed to date order
|
68
|
+
ordered_data = sorted(collections, key=lambda x: x[1])
|
69
|
+
data = {"bins": []}
|
70
|
+
for bin in ordered_data:
|
71
|
+
dict_data = {
|
72
|
+
"type": bin[0],
|
73
|
+
"collectionDate": bin[1].strftime(date_format),
|
74
|
+
}
|
75
|
+
data["bins"].append(dict_data)
|
76
|
+
|
77
|
+
return data
|
@@ -0,0 +1,58 @@
|
|
1
|
+
from bs4 import BeautifulSoup, element
|
2
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
3
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
|
+
|
5
|
+
|
6
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
7
|
+
class CouncilClass(AbstractGetBinDataClass):
|
8
|
+
"""
|
9
|
+
Concrete classes have to implement all abstract operations of the
|
10
|
+
base class. They can also override some operations with a default
|
11
|
+
implementation.
|
12
|
+
"""
|
13
|
+
|
14
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
15
|
+
data = {"bins": []}
|
16
|
+
collections = []
|
17
|
+
|
18
|
+
# Parse the page and find all the result boxes
|
19
|
+
soup = BeautifulSoup(page.text, features="html.parser")
|
20
|
+
soup.prettify()
|
21
|
+
results = soup.find_all("li", {"class": "resultListItem"})
|
22
|
+
|
23
|
+
# If the result box has a wanted string in, we can use it. Check the contents of each box and find the
|
24
|
+
# desired text and dates
|
25
|
+
for r in results:
|
26
|
+
if "Your next" in r.text:
|
27
|
+
if type(r.contents[10]) is element.NavigableString:
|
28
|
+
bin_text = r.contents[10].text.split(" ")[2].title() + " bin"
|
29
|
+
bin_date = datetime.strptime(
|
30
|
+
remove_ordinal_indicator_from_date_string(
|
31
|
+
r.contents[11].text.strip()
|
32
|
+
),
|
33
|
+
"%A %d %B %Y",
|
34
|
+
)
|
35
|
+
else:
|
36
|
+
bin_text = r.contents[11].text.split(" ")[2].title() + " bin"
|
37
|
+
bin_date = datetime.strptime(
|
38
|
+
remove_ordinal_indicator_from_date_string(
|
39
|
+
r.contents[12].text.strip()
|
40
|
+
),
|
41
|
+
"%A %d %B %Y",
|
42
|
+
)
|
43
|
+
|
44
|
+
if bin_date.date() >= datetime.now().date():
|
45
|
+
collections.append((bin_text, bin_date))
|
46
|
+
|
47
|
+
# Sort the text and date elements by date
|
48
|
+
ordered_data = sorted(collections, key=lambda x: x[1])
|
49
|
+
|
50
|
+
# Put the elements into the dictionary
|
51
|
+
for item in ordered_data:
|
52
|
+
dict_data = {
|
53
|
+
"type": item[0],
|
54
|
+
"collectionDate": item[1].strftime(date_format),
|
55
|
+
}
|
56
|
+
data["bins"].append(dict_data)
|
57
|
+
|
58
|
+
return data
|
@@ -0,0 +1,41 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
from datetime import datetime
|
3
|
+
import re
|
4
|
+
from uk_bin_collection.uk_bin_collection.common import * # Consider specific imports
|
5
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
6
|
+
|
7
|
+
class CouncilClass(AbstractGetBinDataClass):
|
8
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
9
|
+
soup = BeautifulSoup(page.text, 'html.parser')
|
10
|
+
|
11
|
+
bins_data = {"bins": []}
|
12
|
+
bin_collections = []
|
13
|
+
|
14
|
+
results_wrapper = soup.find("div", {"class": "results-table-wrapper"})
|
15
|
+
if not results_wrapper:
|
16
|
+
return bins_data # Return empty if the results wrapper is not found
|
17
|
+
|
18
|
+
bins = results_wrapper.find_all("div", {"class": "service-wrapper"})
|
19
|
+
for bin_item in bins:
|
20
|
+
service_name = bin_item.find("h3", {"class": "service-name"})
|
21
|
+
next_service = bin_item.find("td", {"class": "next-service"})
|
22
|
+
|
23
|
+
if service_name and next_service:
|
24
|
+
bin_type = service_name.get_text().replace("Collection", "bin").strip()
|
25
|
+
date_span = next_service.find("span", {"class": "table-label"})
|
26
|
+
date_text = date_span.next_sibling.get_text().strip() if date_span else None
|
27
|
+
|
28
|
+
if date_text and re.match(r"\d{2}/\d{2}/\d{4}", date_text):
|
29
|
+
try:
|
30
|
+
bin_date = datetime.strptime(date_text, "%d/%m/%Y")
|
31
|
+
bin_collections.append((bin_type, bin_date))
|
32
|
+
except ValueError:
|
33
|
+
continue
|
34
|
+
|
35
|
+
for bin_type, bin_date in sorted(bin_collections, key=lambda x: x[1]):
|
36
|
+
bins_data["bins"].append({
|
37
|
+
"type": bin_type.capitalize(),
|
38
|
+
"collectionDate": bin_date.strftime("%d/%m/%Y"),
|
39
|
+
})
|
40
|
+
|
41
|
+
return bins_data
|
@@ -0,0 +1,49 @@
|
|
1
|
+
import re
|
2
|
+
from datetime import datetime
|
3
|
+
|
4
|
+
import requests
|
5
|
+
from bs4 import BeautifulSoup
|
6
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
7
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
8
|
+
|
9
|
+
|
10
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
11
|
+
class CouncilClass(AbstractGetBinDataClass):
|
12
|
+
"""
|
13
|
+
Concrete classes have to implement all abstract operations of the
|
14
|
+
base class. They can also override some operations with a default
|
15
|
+
implementation.
|
16
|
+
"""
|
17
|
+
|
18
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
19
|
+
url = "https://www.durham.gov.uk/bincollections?uprn="
|
20
|
+
uprn = kwargs.get("uprn")
|
21
|
+
check_uprn(uprn)
|
22
|
+
url += uprn
|
23
|
+
requests.packages.urllib3.disable_warnings()
|
24
|
+
page = requests.get(url)
|
25
|
+
|
26
|
+
# Make a BS4 object
|
27
|
+
soup = BeautifulSoup(page.text, features="html.parser")
|
28
|
+
|
29
|
+
data = {"bins": []}
|
30
|
+
|
31
|
+
for bin_type in ["rubbish", "recycling", "gardenwaste"]:
|
32
|
+
bin_info = soup.find(class_=f"bins{bin_type}")
|
33
|
+
|
34
|
+
if bin_info:
|
35
|
+
collection_text = bin_info.get_text(strip=True)
|
36
|
+
|
37
|
+
if collection_text:
|
38
|
+
results = re.search("\\d\\d? [A-Za-z]+ \\d{4}", collection_text)
|
39
|
+
if results:
|
40
|
+
date = datetime.strptime(results[0], "%d %B %Y")
|
41
|
+
if date:
|
42
|
+
data["bins"].append(
|
43
|
+
{
|
44
|
+
"type": bin_type,
|
45
|
+
"collectionDate": date.strftime(date_format),
|
46
|
+
}
|
47
|
+
)
|
48
|
+
|
49
|
+
return data
|
@@ -0,0 +1,44 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
3
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
|
+
|
5
|
+
|
6
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
7
|
+
class CouncilClass(AbstractGetBinDataClass):
|
8
|
+
"""
|
9
|
+
Concrete classes have to implement all abstract operations of the
|
10
|
+
base class. They can also override some operations with a default
|
11
|
+
implementation.
|
12
|
+
"""
|
13
|
+
|
14
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
15
|
+
uprn = kwargs.get("uprn")
|
16
|
+
# Check the UPRN is valid
|
17
|
+
check_uprn(uprn)
|
18
|
+
|
19
|
+
# Request URL
|
20
|
+
url = f"https://eastcambs-self.achieveservice.com/appshost/firmstep/self/apps/custompage/bincollections?language=en&uprn={uprn}"
|
21
|
+
|
22
|
+
# Make Request
|
23
|
+
requests.packages.urllib3.disable_warnings()
|
24
|
+
s = requests.session()
|
25
|
+
page = s.get(url)
|
26
|
+
|
27
|
+
# Make a BS4 object
|
28
|
+
soup = BeautifulSoup(page.text, features="html.parser")
|
29
|
+
soup.prettify()
|
30
|
+
|
31
|
+
# Form a JSON wrapper
|
32
|
+
data = {"bins": []}
|
33
|
+
|
34
|
+
for bins in soup.find_all("div", {"class": "row collectionsrow"}):
|
35
|
+
# Find the collection dates
|
36
|
+
_, bin_type, date = bins.find_all("div")
|
37
|
+
bin_type = bin_type.text
|
38
|
+
date = datetime.strptime(date.text, "%a - %d %b %Y").date()
|
39
|
+
|
40
|
+
data["bins"].append(
|
41
|
+
{"type": bin_type, "collectionDate": date.strftime(date_format)}
|
42
|
+
)
|
43
|
+
|
44
|
+
return data
|
@@ -0,0 +1,74 @@
|
|
1
|
+
import re
|
2
|
+
from datetime import datetime
|
3
|
+
|
4
|
+
import pandas as pd
|
5
|
+
from bs4 import BeautifulSoup
|
6
|
+
from uk_bin_collection.uk_bin_collection.common import date_format
|
7
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
8
|
+
|
9
|
+
|
10
|
+
class CouncilClass(AbstractGetBinDataClass):
|
11
|
+
"""
|
12
|
+
Concrete classes have to implement all abstract operations of the
|
13
|
+
baseclass. They can also override some
|
14
|
+
operations with a default implementation.
|
15
|
+
"""
|
16
|
+
|
17
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
18
|
+
# Make a BS4 object
|
19
|
+
soup = BeautifulSoup(page.text, features="html.parser")
|
20
|
+
soup.prettify()
|
21
|
+
|
22
|
+
data = {"bins": []}
|
23
|
+
month_class_name = 'class="eventmonth"'
|
24
|
+
regular_collection_class_name = "collectiondate regular-collection"
|
25
|
+
holiday_collection_class_name = "collectiondate bankholiday-change"
|
26
|
+
regex_string = "[^0-9]"
|
27
|
+
|
28
|
+
calendar_collection = soup.find("ol", {"class": "nonumbers news collections"})
|
29
|
+
calendar_list = calendar_collection.find_all("li")
|
30
|
+
current_month = ""
|
31
|
+
current_year = ""
|
32
|
+
|
33
|
+
for element in calendar_list:
|
34
|
+
element_tag = str(element)
|
35
|
+
if month_class_name in element_tag:
|
36
|
+
current_month = datetime.strptime(element.text, "%B %Y").strftime("%m")
|
37
|
+
current_year = datetime.strptime(element.text, "%B %Y").strftime("%Y")
|
38
|
+
elif regular_collection_class_name in element_tag:
|
39
|
+
week_value = element.find_next(
|
40
|
+
"span", {"class": f"{regular_collection_class_name}"}
|
41
|
+
)
|
42
|
+
day_of_week = re.sub(regex_string, "", week_value.text).strip()
|
43
|
+
collection_date = datetime(
|
44
|
+
int(current_year), int(current_month), int(day_of_week)
|
45
|
+
).strftime(date_format)
|
46
|
+
collections = week_value.find_next_siblings("span")
|
47
|
+
for item in collections:
|
48
|
+
x = item.text
|
49
|
+
bin_type = item.text.strip()
|
50
|
+
if len(bin_type) > 1:
|
51
|
+
dict_data = {
|
52
|
+
"type": bin_type,
|
53
|
+
"collectionDate": collection_date,
|
54
|
+
}
|
55
|
+
data["bins"].append(dict_data)
|
56
|
+
elif holiday_collection_class_name in element_tag:
|
57
|
+
week_value = element.find_next(
|
58
|
+
"span", {"class": f"{holiday_collection_class_name}"}
|
59
|
+
)
|
60
|
+
day_of_week = re.sub(regex_string, "", week_value.text).strip()
|
61
|
+
collection_date = datetime(
|
62
|
+
int(current_year), int(current_month), int(day_of_week)
|
63
|
+
).strftime(date_format)
|
64
|
+
collections = week_value.find_next_siblings("span")
|
65
|
+
for item in collections:
|
66
|
+
x = item.text
|
67
|
+
bin_type = item.text.strip()
|
68
|
+
if len(bin_type) > 1:
|
69
|
+
dict_data = {
|
70
|
+
"type": bin_type + " (bank holiday replacement)",
|
71
|
+
"collectionDate": collection_date,
|
72
|
+
}
|
73
|
+
data["bins"].append(dict_data)
|
74
|
+
return data
|
@@ -0,0 +1,108 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
from selenium.webdriver.common.by import By
|
3
|
+
from selenium.webdriver.support import expected_conditions as EC
|
4
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
5
|
+
|
6
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
7
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
8
|
+
|
9
|
+
|
10
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
11
|
+
class CouncilClass(AbstractGetBinDataClass):
|
12
|
+
"""
|
13
|
+
Concrete classes have to implement all abstract operations of the
|
14
|
+
base class. They can also override some operations with a default
|
15
|
+
implementation.
|
16
|
+
"""
|
17
|
+
|
18
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
19
|
+
driver = None
|
20
|
+
try:
|
21
|
+
data = {"bins": []}
|
22
|
+
user_paon = kwargs.get("paon")
|
23
|
+
user_postcode = kwargs.get("postcode")
|
24
|
+
web_driver = kwargs.get("web_driver")
|
25
|
+
headless = kwargs.get("headless")
|
26
|
+
check_paon(user_paon)
|
27
|
+
check_postcode(user_postcode)
|
28
|
+
|
29
|
+
# Create Selenium webdriver
|
30
|
+
driver = create_webdriver(web_driver, headless)
|
31
|
+
driver.get(
|
32
|
+
"https://www.e-lindsey.gov.uk/article/6714/Your-Waste-Collection-Days"
|
33
|
+
)
|
34
|
+
|
35
|
+
# Wait for the postcode field to appear then populate it
|
36
|
+
inputElement_postcode = WebDriverWait(driver, 30).until(
|
37
|
+
EC.presence_of_element_located(
|
38
|
+
(By.ID, "WASTECOLLECTIONDAYS202324_LOOKUP_ADDRESSLOOKUPPOSTCODE")
|
39
|
+
)
|
40
|
+
)
|
41
|
+
inputElement_postcode.send_keys(user_postcode)
|
42
|
+
|
43
|
+
# Click search button
|
44
|
+
findAddress = WebDriverWait(driver, 10).until(
|
45
|
+
EC.presence_of_element_located(
|
46
|
+
(By.ID, "WASTECOLLECTIONDAYS202324_LOOKUP_ADDRESSLOOKUPSEARCH")
|
47
|
+
)
|
48
|
+
)
|
49
|
+
findAddress.click()
|
50
|
+
|
51
|
+
# Wait for the 'Select address' dropdown to appear and select option matching the house name/number
|
52
|
+
WebDriverWait(driver, 10).until(
|
53
|
+
EC.element_to_be_clickable(
|
54
|
+
(
|
55
|
+
By.XPATH,
|
56
|
+
"//select[@id='WASTECOLLECTIONDAYS202324_LOOKUP_ADDRESSLOOKUPADDRESS']//option[contains(., '"
|
57
|
+
+ user_paon
|
58
|
+
+ "')]",
|
59
|
+
)
|
60
|
+
)
|
61
|
+
).click()
|
62
|
+
|
63
|
+
# Wait for the submit button to appear, then click it to get the collection dates
|
64
|
+
submit = WebDriverWait(driver, 10).until(
|
65
|
+
EC.presence_of_element_located(
|
66
|
+
(By.ID, "WASTECOLLECTIONDAYS202324_LOOKUP_FIELD2_NEXT")
|
67
|
+
)
|
68
|
+
)
|
69
|
+
submit.click()
|
70
|
+
|
71
|
+
# Wait for the collections table to appear
|
72
|
+
WebDriverWait(driver, 10).until(
|
73
|
+
EC.presence_of_element_located((By.CSS_SELECTOR, ".waste-results"))
|
74
|
+
)
|
75
|
+
|
76
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
77
|
+
|
78
|
+
# Get collections
|
79
|
+
for collection in soup.find_all("div", {"class": "waste-result"}):
|
80
|
+
ptags = collection.find_all("p")
|
81
|
+
dict_data = {
|
82
|
+
"type": collection.find("h3").get_text(strip=True),
|
83
|
+
"collectionDate": datetime.strptime(
|
84
|
+
remove_ordinal_indicator_from_date_string(
|
85
|
+
ptags[1]
|
86
|
+
.get_text()
|
87
|
+
.replace("The date of your next collection is", "")
|
88
|
+
.replace(".", "")
|
89
|
+
.strip()
|
90
|
+
),
|
91
|
+
"%A %d %B %Y",
|
92
|
+
).strftime(date_format),
|
93
|
+
}
|
94
|
+
data["bins"].append(dict_data)
|
95
|
+
|
96
|
+
data["bins"].sort(
|
97
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
|
98
|
+
)
|
99
|
+
except Exception as e:
|
100
|
+
# Here you can log the exception if needed
|
101
|
+
print(f"An error occurred: {e}")
|
102
|
+
# Optionally, re-raise the exception if you want it to propagate
|
103
|
+
raise
|
104
|
+
finally:
|
105
|
+
# This block ensures that the driver is closed regardless of an exception
|
106
|
+
if driver:
|
107
|
+
driver.quit()
|
108
|
+
return data
|