uk_bin_collection 0.74.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/README.rst +0 -0
- uk_bin_collection/tests/council_feature_input_parity.py +79 -0
- uk_bin_collection/tests/features/environment.py +7 -0
- uk_bin_collection/tests/features/validate_council_outputs.feature +767 -0
- uk_bin_collection/tests/input.json +1077 -0
- uk_bin_collection/tests/output.schema +41 -0
- uk_bin_collection/tests/step_defs/step_helpers/file_handler.py +46 -0
- uk_bin_collection/tests/step_defs/test_validate_council.py +87 -0
- uk_bin_collection/tests/test_collect_data.py +104 -0
- uk_bin_collection/tests/test_common_functions.py +342 -0
- uk_bin_collection/uk_bin_collection/collect_data.py +133 -0
- uk_bin_collection/uk_bin_collection/common.py +292 -0
- uk_bin_collection/uk_bin_collection/councils/AdurAndWorthingCouncils.py +43 -0
- uk_bin_collection/uk_bin_collection/councils/ArunCouncil.py +97 -0
- uk_bin_collection/uk_bin_collection/councils/AylesburyValeCouncil.py +69 -0
- uk_bin_collection/uk_bin_collection/councils/BCPCouncil.py +51 -0
- uk_bin_collection/uk_bin_collection/councils/BarnetCouncil.py +180 -0
- uk_bin_collection/uk_bin_collection/councils/BarnsleyMBCouncil.py +109 -0
- uk_bin_collection/uk_bin_collection/councils/BasingstokeCouncil.py +72 -0
- uk_bin_collection/uk_bin_collection/councils/BathAndNorthEastSomersetCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/BedfordBoroughCouncil.py +49 -0
- uk_bin_collection/uk_bin_collection/councils/BedfordshireCouncil.py +70 -0
- uk_bin_collection/uk_bin_collection/councils/BexleyCouncil.py +147 -0
- uk_bin_collection/uk_bin_collection/councils/BirminghamCityCouncil.py +119 -0
- uk_bin_collection/uk_bin_collection/councils/BlackburnCouncil.py +105 -0
- uk_bin_collection/uk_bin_collection/councils/BoltonCouncil.py +104 -0
- uk_bin_collection/uk_bin_collection/councils/BradfordMDC.py +103 -0
- uk_bin_collection/uk_bin_collection/councils/BrightonandHoveCityCouncil.py +137 -0
- uk_bin_collection/uk_bin_collection/councils/BristolCityCouncil.py +141 -0
- uk_bin_collection/uk_bin_collection/councils/BromleyBoroughCouncil.py +115 -0
- uk_bin_collection/uk_bin_collection/councils/BroxtoweBoroughCouncil.py +107 -0
- uk_bin_collection/uk_bin_collection/councils/BuckinghamshireCouncil.py +95 -0
- uk_bin_collection/uk_bin_collection/councils/BuryCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/CalderdaleCouncil.py +123 -0
- uk_bin_collection/uk_bin_collection/councils/CannockChaseDistrictCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/CardiffCouncil.py +172 -0
- uk_bin_collection/uk_bin_collection/councils/CastlepointDistrictCouncil.py +96 -0
- uk_bin_collection/uk_bin_collection/councils/CharnwoodBoroughCouncil.py +54 -0
- uk_bin_collection/uk_bin_collection/councils/ChelmsfordCityCouncil.py +127 -0
- uk_bin_collection/uk_bin_collection/councils/CheshireEastCouncil.py +32 -0
- uk_bin_collection/uk_bin_collection/councils/CheshireWestAndChesterCouncil.py +125 -0
- uk_bin_collection/uk_bin_collection/councils/ChorleyCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/ConwyCountyBorough.py +27 -0
- uk_bin_collection/uk_bin_collection/councils/CrawleyBoroughCouncil.py +61 -0
- uk_bin_collection/uk_bin_collection/councils/CroydonCouncil.py +291 -0
- uk_bin_collection/uk_bin_collection/councils/DerbyshireDalesDistrictCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/DoncasterCouncil.py +77 -0
- uk_bin_collection/uk_bin_collection/councils/DorsetCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/DoverDistrictCouncil.py +41 -0
- uk_bin_collection/uk_bin_collection/councils/DurhamCouncil.py +49 -0
- uk_bin_collection/uk_bin_collection/councils/EastCambridgeshireCouncil.py +44 -0
- uk_bin_collection/uk_bin_collection/councils/EastDevonDC.py +74 -0
- uk_bin_collection/uk_bin_collection/councils/EastLindseyDistrictCouncil.py +108 -0
- uk_bin_collection/uk_bin_collection/councils/EastRidingCouncil.py +142 -0
- uk_bin_collection/uk_bin_collection/councils/EastSuffolkCouncil.py +112 -0
- uk_bin_collection/uk_bin_collection/councils/EastleighBoroughCouncil.py +70 -0
- uk_bin_collection/uk_bin_collection/councils/EnvironmentFirst.py +48 -0
- uk_bin_collection/uk_bin_collection/councils/ErewashBoroughCouncil.py +61 -0
- uk_bin_collection/uk_bin_collection/councils/FenlandDistrictCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/ForestOfDeanDistrictCouncil.py +113 -0
- uk_bin_collection/uk_bin_collection/councils/GatesheadCouncil.py +118 -0
- uk_bin_collection/uk_bin_collection/councils/GedlingBoroughCouncil.py +1580 -0
- uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py +55 -0
- uk_bin_collection/uk_bin_collection/councils/GuildfordCouncil.py +150 -0
- uk_bin_collection/uk_bin_collection/councils/HaltonBoroughCouncil.py +142 -0
- uk_bin_collection/uk_bin_collection/councils/HaringeyCouncil.py +59 -0
- uk_bin_collection/uk_bin_collection/councils/HarrogateBoroughCouncil.py +63 -0
- uk_bin_collection/uk_bin_collection/councils/HighPeakCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/HullCityCouncil.py +48 -0
- uk_bin_collection/uk_bin_collection/councils/HuntingdonDistrictCouncil.py +44 -0
- uk_bin_collection/uk_bin_collection/councils/KingstonUponThamesCouncil.py +84 -0
- uk_bin_collection/uk_bin_collection/councils/KirkleesCouncil.py +130 -0
- uk_bin_collection/uk_bin_collection/councils/KnowsleyMBCouncil.py +139 -0
- uk_bin_collection/uk_bin_collection/councils/LancasterCityCouncil.py +71 -0
- uk_bin_collection/uk_bin_collection/councils/LeedsCityCouncil.py +137 -0
- uk_bin_collection/uk_bin_collection/councils/LisburnCastlereaghCityCouncil.py +101 -0
- uk_bin_collection/uk_bin_collection/councils/LiverpoolCityCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/LondonBoroughHounslow.py +82 -0
- uk_bin_collection/uk_bin_collection/councils/LondonBoroughRedbridge.py +161 -0
- uk_bin_collection/uk_bin_collection/councils/MaldonDistrictCouncil.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/MalvernHillsDC.py +57 -0
- uk_bin_collection/uk_bin_collection/councils/ManchesterCityCouncil.py +106 -0
- uk_bin_collection/uk_bin_collection/councils/MansfieldDistrictCouncil.py +38 -0
- uk_bin_collection/uk_bin_collection/councils/MertonCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil.py +128 -0
- uk_bin_collection/uk_bin_collection/councils/MidSussexDistrictCouncil.py +80 -0
- uk_bin_collection/uk_bin_collection/councils/MiltonKeynesCityCouncil.py +54 -0
- uk_bin_collection/uk_bin_collection/councils/MoleValleyDistrictCouncil.py +98 -0
- uk_bin_collection/uk_bin_collection/councils/NeathPortTalbotCouncil.py +139 -0
- uk_bin_collection/uk_bin_collection/councils/NewarkAndSherwoodDC.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/NewcastleCityCouncil.py +57 -0
- uk_bin_collection/uk_bin_collection/councils/NewhamCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/NewportCityCouncil.py +203 -0
- uk_bin_collection/uk_bin_collection/councils/NorthEastDerbyshireDistrictCouncil.py +115 -0
- uk_bin_collection/uk_bin_collection/councils/NorthEastLincs.py +53 -0
- uk_bin_collection/uk_bin_collection/councils/NorthKestevenDistrictCouncil.py +45 -0
- uk_bin_collection/uk_bin_collection/councils/NorthLanarkshireCouncil.py +46 -0
- uk_bin_collection/uk_bin_collection/councils/NorthLincolnshireCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/NorthNorfolkDistrictCouncil.py +108 -0
- uk_bin_collection/uk_bin_collection/councils/NorthNorthamptonshireCouncil.py +72 -0
- uk_bin_collection/uk_bin_collection/councils/NorthSomersetCouncil.py +76 -0
- uk_bin_collection/uk_bin_collection/councils/NorthTynesideCouncil.py +220 -0
- uk_bin_collection/uk_bin_collection/councils/NorthWestLeicestershire.py +114 -0
- uk_bin_collection/uk_bin_collection/councils/NorthYorkshire.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py +123 -0
- uk_bin_collection/uk_bin_collection/councils/NottinghamCityCouncil.py +36 -0
- uk_bin_collection/uk_bin_collection/councils/OldhamCouncil.py +51 -0
- uk_bin_collection/uk_bin_collection/councils/PortsmouthCityCouncil.py +131 -0
- uk_bin_collection/uk_bin_collection/councils/PrestonCityCouncil.py +97 -0
- uk_bin_collection/uk_bin_collection/councils/ReadingBoroughCouncil.py +30 -0
- uk_bin_collection/uk_bin_collection/councils/ReigateAndBansteadBoroughCouncil.py +81 -0
- uk_bin_collection/uk_bin_collection/councils/RenfrewshireCouncil.py +135 -0
- uk_bin_collection/uk_bin_collection/councils/RhonddaCynonTaffCouncil.py +80 -0
- uk_bin_collection/uk_bin_collection/councils/RochdaleCouncil.py +69 -0
- uk_bin_collection/uk_bin_collection/councils/RochfordCouncil.py +60 -0
- uk_bin_collection/uk_bin_collection/councils/RugbyBoroughCouncil.py +93 -0
- uk_bin_collection/uk_bin_collection/councils/RushcliffeBoroughCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/RushmoorCouncil.py +81 -0
- uk_bin_collection/uk_bin_collection/councils/SalfordCityCouncil.py +70 -0
- uk_bin_collection/uk_bin_collection/councils/SevenoaksDistrictCouncil.py +106 -0
- uk_bin_collection/uk_bin_collection/councils/SheffieldCityCouncil.py +54 -0
- uk_bin_collection/uk_bin_collection/councils/ShropshireCouncil.py +45 -0
- uk_bin_collection/uk_bin_collection/councils/SolihullCouncil.py +48 -0
- uk_bin_collection/uk_bin_collection/councils/SomersetCouncil.py +203 -0
- uk_bin_collection/uk_bin_collection/councils/SouthAyrshireCouncil.py +73 -0
- uk_bin_collection/uk_bin_collection/councils/SouthCambridgeshireCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/SouthGloucestershireCouncil.py +74 -0
- uk_bin_collection/uk_bin_collection/councils/SouthLanarkshireCouncil.py +78 -0
- uk_bin_collection/uk_bin_collection/councils/SouthNorfolkCouncil.py +91 -0
- uk_bin_collection/uk_bin_collection/councils/SouthOxfordshireCouncil.py +93 -0
- uk_bin_collection/uk_bin_collection/councils/SouthTynesideCouncil.py +98 -0
- uk_bin_collection/uk_bin_collection/councils/StAlbansCityAndDistrictCouncil.py +43 -0
- uk_bin_collection/uk_bin_collection/councils/StHelensBC.py +56 -0
- uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py +112 -0
- uk_bin_collection/uk_bin_collection/councils/StockportBoroughCouncil.py +39 -0
- uk_bin_collection/uk_bin_collection/councils/StokeOnTrentCityCouncil.py +79 -0
- uk_bin_collection/uk_bin_collection/councils/StratfordUponAvonCouncil.py +94 -0
- uk_bin_collection/uk_bin_collection/councils/SunderlandCityCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/SwaleBoroughCouncil.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/TamesideMBCouncil.py +62 -0
- uk_bin_collection/uk_bin_collection/councils/TandridgeDistrictCouncil.py +60 -0
- uk_bin_collection/uk_bin_collection/councils/TelfordAndWrekinCouncil.py +50 -0
- uk_bin_collection/uk_bin_collection/councils/TestValleyBoroughCouncil.py +203 -0
- uk_bin_collection/uk_bin_collection/councils/TonbridgeAndMallingBC.py +101 -0
- uk_bin_collection/uk_bin_collection/councils/TorbayCouncil.py +51 -0
- uk_bin_collection/uk_bin_collection/councils/TorridgeDistrictCouncil.py +154 -0
- uk_bin_collection/uk_bin_collection/councils/ValeofGlamorganCouncil.py +119 -0
- uk_bin_collection/uk_bin_collection/councils/ValeofWhiteHorseCouncil.py +103 -0
- uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py +89 -0
- uk_bin_collection/uk_bin_collection/councils/WarwickDistrictCouncil.py +34 -0
- uk_bin_collection/uk_bin_collection/councils/WaverleyBoroughCouncil.py +119 -0
- uk_bin_collection/uk_bin_collection/councils/WealdenDistrictCouncil.py +86 -0
- uk_bin_collection/uk_bin_collection/councils/WelhatCouncil.py +73 -0
- uk_bin_collection/uk_bin_collection/councils/WestBerkshireCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/WestLindseyDistrictCouncil.py +118 -0
- uk_bin_collection/uk_bin_collection/councils/WestLothianCouncil.py +103 -0
- uk_bin_collection/uk_bin_collection/councils/WestNorthamptonshireCouncil.py +34 -0
- uk_bin_collection/uk_bin_collection/councils/WestSuffolkCouncil.py +64 -0
- uk_bin_collection/uk_bin_collection/councils/WiganBoroughCouncil.py +97 -0
- uk_bin_collection/uk_bin_collection/councils/WiltshireCouncil.py +135 -0
- uk_bin_collection/uk_bin_collection/councils/WindsorAndMaidenheadCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/WokingBoroughCouncil.py +114 -0
- uk_bin_collection/uk_bin_collection/councils/WyreCouncil.py +89 -0
- uk_bin_collection/uk_bin_collection/councils/YorkCouncil.py +45 -0
- uk_bin_collection/uk_bin_collection/councils/council_class_template/councilclasstemplate.py +33 -0
- uk_bin_collection/uk_bin_collection/get_bin_data.py +165 -0
- uk_bin_collection-0.74.0.dist-info/LICENSE +21 -0
- uk_bin_collection-0.74.0.dist-info/METADATA +247 -0
- uk_bin_collection-0.74.0.dist-info/RECORD +171 -0
- uk_bin_collection-0.74.0.dist-info/WHEEL +4 -0
- uk_bin_collection-0.74.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,147 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
from datetime import datetime
|
3
|
+
from selenium.webdriver.common.by import By
|
4
|
+
from selenium.webdriver.support import expected_conditions as EC
|
5
|
+
from selenium.webdriver.support.ui import Select
|
6
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
7
|
+
from selenium.webdriver.common.keys import Keys
|
8
|
+
|
9
|
+
import time
|
10
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
11
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
12
|
+
|
13
|
+
|
14
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
15
|
+
|
16
|
+
|
17
|
+
class CouncilClass(AbstractGetBinDataClass):
|
18
|
+
"""
|
19
|
+
Concrete classes have to implement all abstract operations of the
|
20
|
+
base class. They can also override some operations with a default
|
21
|
+
implementation.
|
22
|
+
"""
|
23
|
+
|
24
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
25
|
+
driver = None
|
26
|
+
try:
|
27
|
+
page = "https://mybexley.bexley.gov.uk/service/When_is_my_collection_day"
|
28
|
+
|
29
|
+
data = {"bins": []}
|
30
|
+
|
31
|
+
user_uprn = kwargs.get("uprn")
|
32
|
+
user_paon = kwargs.get("paon")
|
33
|
+
user_postcode = kwargs.get("postcode")
|
34
|
+
web_driver = kwargs.get("web_driver")
|
35
|
+
headless = kwargs.get("headless")
|
36
|
+
|
37
|
+
# Create Selenium webdriver
|
38
|
+
driver = create_webdriver(web_driver, headless)
|
39
|
+
driver.get(page)
|
40
|
+
|
41
|
+
# If you bang in the house number (or property name) and postcode in the box it should find your property
|
42
|
+
|
43
|
+
iframe_presense = WebDriverWait(driver, 30).until(
|
44
|
+
EC.presence_of_element_located((By.ID, "fillform-frame-1"))
|
45
|
+
)
|
46
|
+
|
47
|
+
driver.switch_to.frame(iframe_presense)
|
48
|
+
wait = WebDriverWait(driver, 60)
|
49
|
+
start_btn = wait.until(
|
50
|
+
EC.element_to_be_clickable(
|
51
|
+
(By.XPATH, "//button/span[contains(text(), 'Next')]")
|
52
|
+
)
|
53
|
+
)
|
54
|
+
|
55
|
+
start_btn.click()
|
56
|
+
|
57
|
+
inputElement_postcodesearch = wait.until(
|
58
|
+
EC.element_to_be_clickable((By.ID, "postcode_search"))
|
59
|
+
)
|
60
|
+
inputElement_postcodesearch.send_keys(user_postcode)
|
61
|
+
|
62
|
+
find_address_btn = wait.until(
|
63
|
+
EC.element_to_be_clickable((By.XPATH, '//*[@id="search"]'))
|
64
|
+
)
|
65
|
+
find_address_btn.click()
|
66
|
+
|
67
|
+
dropdown_options = wait.until(
|
68
|
+
EC.presence_of_element_located(
|
69
|
+
(By.XPATH, '//*[@id="select2-chosen-1"]')
|
70
|
+
)
|
71
|
+
)
|
72
|
+
time.sleep(2)
|
73
|
+
dropdown_options.click()
|
74
|
+
time.sleep(1)
|
75
|
+
dropdown_input = wait.until(
|
76
|
+
EC.presence_of_element_located(
|
77
|
+
(By.XPATH, '//*[@id="s2id_autogen1_search"]')
|
78
|
+
)
|
79
|
+
)
|
80
|
+
time.sleep(1)
|
81
|
+
dropdown_input.send_keys(user_paon)
|
82
|
+
dropdown_input.send_keys(Keys.ENTER)
|
83
|
+
|
84
|
+
results_found = wait.until(
|
85
|
+
EC.presence_of_element_located((By.CLASS_NAME, "found-content"))
|
86
|
+
)
|
87
|
+
finish_btn = wait.until(
|
88
|
+
EC.element_to_be_clickable(
|
89
|
+
(By.XPATH, "//button/span[contains(text(), 'Next')]")
|
90
|
+
)
|
91
|
+
)
|
92
|
+
finish_btn.click()
|
93
|
+
final_page = wait.until(
|
94
|
+
EC.presence_of_element_located(
|
95
|
+
(By.CLASS_NAME, "waste-header-container")
|
96
|
+
)
|
97
|
+
)
|
98
|
+
|
99
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
100
|
+
|
101
|
+
bin_fields = soup.find_all("div", class_="waste-panel-container")
|
102
|
+
# Define your XPath
|
103
|
+
|
104
|
+
for bin in bin_fields:
|
105
|
+
# Extract h3 text from the current element
|
106
|
+
h3_text = (
|
107
|
+
bin.find("h3", class_="container-name").get_text(strip=True)
|
108
|
+
if bin.find("h3", class_="container-name")
|
109
|
+
else None
|
110
|
+
)
|
111
|
+
|
112
|
+
date_text = (
|
113
|
+
bin.find("p", class_="container-status").get_text(strip=True)
|
114
|
+
if bin.find("p", class_="container-status")
|
115
|
+
else None
|
116
|
+
)
|
117
|
+
|
118
|
+
if h3_text and date_text:
|
119
|
+
# Parse the date using the appropriate format
|
120
|
+
parsed_date = datetime.strptime(date_text, "%A %d %B")
|
121
|
+
|
122
|
+
# Assuming the current year is used for the collection date
|
123
|
+
current_year = datetime.now().year
|
124
|
+
|
125
|
+
# If the parsed date is in the past, assume it's for the next year
|
126
|
+
if parsed_date < datetime.now():
|
127
|
+
current_year += 1
|
128
|
+
|
129
|
+
data["bins"].append(
|
130
|
+
{
|
131
|
+
"type": h3_text,
|
132
|
+
"collectionDate": parsed_date.replace(
|
133
|
+
year=current_year
|
134
|
+
).strftime("%d/%m/%Y"),
|
135
|
+
}
|
136
|
+
)
|
137
|
+
|
138
|
+
except Exception as e:
|
139
|
+
# Here you can log the exception if needed
|
140
|
+
print(f"An error occurred: {e}")
|
141
|
+
# Optionally, re-raise the exception if you want it to propagate
|
142
|
+
raise
|
143
|
+
finally:
|
144
|
+
# This block ensures that the driver is closed regardless of an exception
|
145
|
+
if driver:
|
146
|
+
driver.quit()
|
147
|
+
return data
|
@@ -0,0 +1,119 @@
|
|
1
|
+
from dateutil.relativedelta import relativedelta
|
2
|
+
from bs4 import BeautifulSoup
|
3
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
4
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
5
|
+
|
6
|
+
|
7
|
+
def get_token(page) -> str:
|
8
|
+
"""
|
9
|
+
Get a __token to include in the form data
|
10
|
+
:param page: Page html
|
11
|
+
:return: Form __token
|
12
|
+
"""
|
13
|
+
soup = BeautifulSoup(page.text, features="html.parser")
|
14
|
+
soup.prettify()
|
15
|
+
token = soup.find("input", {"name": "__token"}).get("value")
|
16
|
+
return token
|
17
|
+
|
18
|
+
|
19
|
+
class CouncilClass(AbstractGetBinDataClass):
|
20
|
+
"""
|
21
|
+
Concrete classes have to implement all abstract operations of the
|
22
|
+
base class. They can also override some operations with a default
|
23
|
+
implementation.
|
24
|
+
"""
|
25
|
+
|
26
|
+
def get_data(self, url) -> str:
|
27
|
+
"""This method makes the request to the council
|
28
|
+
|
29
|
+
Keyword arguments:
|
30
|
+
url -- the url to get the data from
|
31
|
+
"""
|
32
|
+
# Set a user agent so we look like a browser ;-)
|
33
|
+
user_agent = (
|
34
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
|
35
|
+
"Chrome/108.0.0.0 Safari/537.36"
|
36
|
+
)
|
37
|
+
headers = {"User-Agent": user_agent}
|
38
|
+
requests.packages.urllib3.disable_warnings()
|
39
|
+
|
40
|
+
# Make the Request - change the URL - find out your property number
|
41
|
+
try:
|
42
|
+
session = requests.Session()
|
43
|
+
session.headers.update(headers)
|
44
|
+
full_page = session.get(url)
|
45
|
+
return full_page
|
46
|
+
except requests.exceptions.HTTPError as errh:
|
47
|
+
_LOGGER.error(f"Http Error: {errh}")
|
48
|
+
raise
|
49
|
+
except requests.exceptions.ConnectionError as errc:
|
50
|
+
_LOGGER.error(f"Error Connecting: {errc}")
|
51
|
+
raise
|
52
|
+
except requests.exceptions.Timeout as errt:
|
53
|
+
_LOGGER.error(f"Timeout Error: {errt}")
|
54
|
+
raise
|
55
|
+
except requests.exceptions.RequestException as err:
|
56
|
+
_LOGGER.error(f"Oops: Something Else {err}")
|
57
|
+
raise
|
58
|
+
|
59
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
60
|
+
uprn = kwargs.get("uprn")
|
61
|
+
postcode = kwargs.get("postcode")
|
62
|
+
check_uprn(uprn)
|
63
|
+
check_postcode(postcode)
|
64
|
+
|
65
|
+
values = {
|
66
|
+
"__token": get_token(page),
|
67
|
+
"page": "491",
|
68
|
+
"locale": "en_GB",
|
69
|
+
"q1f8ccce1d1e2f58649b4069712be6879a839233f_0_0": postcode,
|
70
|
+
"q1f8ccce1d1e2f58649b4069712be6879a839233f_1_0": uprn,
|
71
|
+
"next": "Next",
|
72
|
+
}
|
73
|
+
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64)"}
|
74
|
+
requests.packages.urllib3.disable_warnings()
|
75
|
+
response = requests.request(
|
76
|
+
"POST",
|
77
|
+
"https://www.birmingham.gov.uk/xfp/form/619",
|
78
|
+
headers=headers,
|
79
|
+
data=values,
|
80
|
+
)
|
81
|
+
|
82
|
+
soup = BeautifulSoup(response.text, features="html.parser")
|
83
|
+
|
84
|
+
rows = soup.find("table").find_all("tr")
|
85
|
+
|
86
|
+
# Form a JSON wrapper
|
87
|
+
data = {"bins": []}
|
88
|
+
|
89
|
+
# Loops the Rows
|
90
|
+
for row in rows:
|
91
|
+
cells = row.find_all("td")
|
92
|
+
if cells:
|
93
|
+
bin_type = cells[0].get_text(strip=True)
|
94
|
+
collection_next = cells[1].get_text(strip=True)
|
95
|
+
|
96
|
+
collection_date = re.findall("\(.*?\)", collection_next)
|
97
|
+
|
98
|
+
if len(collection_date) != 1:
|
99
|
+
continue
|
100
|
+
|
101
|
+
collection_date_obj = parse(
|
102
|
+
re.sub("[()]", "", collection_date[0])
|
103
|
+
).date()
|
104
|
+
|
105
|
+
# since we only have the next collection day, if the parsed date is in the past,
|
106
|
+
# assume the day is instead next month
|
107
|
+
if collection_date_obj < datetime.now().date():
|
108
|
+
collection_date_obj += relativedelta(months=1)
|
109
|
+
|
110
|
+
# Make each Bin element in the JSON
|
111
|
+
dict_data = {
|
112
|
+
"type": bin_type,
|
113
|
+
"collectionDate": collection_date_obj.strftime(date_format),
|
114
|
+
}
|
115
|
+
|
116
|
+
# Add data to the main JSON Wrapper
|
117
|
+
data["bins"].append(dict_data)
|
118
|
+
|
119
|
+
return data
|
@@ -0,0 +1,105 @@
|
|
1
|
+
import json
|
2
|
+
from collections import OrderedDict
|
3
|
+
from datetime import datetime
|
4
|
+
from bs4 import BeautifulSoup
|
5
|
+
import requests
|
6
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
7
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
8
|
+
import ssl
|
9
|
+
import urllib3
|
10
|
+
import logging
|
11
|
+
|
12
|
+
|
13
|
+
class CustomHttpAdapter(requests.adapters.HTTPAdapter):
|
14
|
+
"""Transport adapter" that allows us to use custom ssl_context."""
|
15
|
+
|
16
|
+
def __init__(self, ssl_context=None, **kwargs):
|
17
|
+
self.ssl_context = ssl_context
|
18
|
+
super().__init__(**kwargs)
|
19
|
+
|
20
|
+
def init_poolmanager(self, connections, maxsize, block=False):
|
21
|
+
self.poolmanager = urllib3.poolmanager.PoolManager(
|
22
|
+
num_pools=connections,
|
23
|
+
maxsize=maxsize,
|
24
|
+
block=block,
|
25
|
+
ssl_context=self.ssl_context,
|
26
|
+
)
|
27
|
+
|
28
|
+
|
29
|
+
class CouncilClass(AbstractGetBinDataClass):
|
30
|
+
"""
|
31
|
+
Concrete classes have to implement all abstract operations of the
|
32
|
+
base class. They can also override some operations with a default
|
33
|
+
implementation.
|
34
|
+
"""
|
35
|
+
|
36
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
37
|
+
# Make a BS4 object
|
38
|
+
|
39
|
+
driver = None
|
40
|
+
try:
|
41
|
+
data = {"bins": []}
|
42
|
+
uprn = kwargs.get("uprn")
|
43
|
+
web_driver = kwargs.get("web_driver")
|
44
|
+
headless = kwargs.get("headless")
|
45
|
+
current_month = datetime.today().strftime("%m")
|
46
|
+
current_year = datetime.today().strftime("%Y")
|
47
|
+
url = (
|
48
|
+
f"https://mybins.blackburn.gov.uk/api/mybins/getbincollectiondays?uprn={uprn}&month={current_month}"
|
49
|
+
f"&year={current_year}"
|
50
|
+
)
|
51
|
+
driver = create_webdriver(web_driver, headless)
|
52
|
+
driver.get(url)
|
53
|
+
|
54
|
+
soup = BeautifulSoup(driver.page_source, "html.parser")
|
55
|
+
|
56
|
+
# Find the <pre> tag that contains the JSON data
|
57
|
+
pre_tag = soup.find("pre")
|
58
|
+
|
59
|
+
if pre_tag:
|
60
|
+
# Extract the text content within the <pre> tag
|
61
|
+
|
62
|
+
# Return JSON from response and loop through collections
|
63
|
+
json_result = json.loads(pre_tag.contents[0])
|
64
|
+
bin_collections = json_result["BinCollectionDays"]
|
65
|
+
for collection in bin_collections:
|
66
|
+
if collection is not None:
|
67
|
+
bin_type = collection[0].get("BinType")
|
68
|
+
current_collection_date = datetime.strptime(
|
69
|
+
collection[0].get("CollectionDate"), "%Y-%m-%d"
|
70
|
+
)
|
71
|
+
next_collection_date = datetime.strptime(
|
72
|
+
collection[0].get("NextScheduledCollectionDate"), "%Y-%m-%d"
|
73
|
+
)
|
74
|
+
|
75
|
+
# Work out the most recent collection date to display
|
76
|
+
if (
|
77
|
+
datetime.today().date()
|
78
|
+
<= current_collection_date.date()
|
79
|
+
< next_collection_date.date()
|
80
|
+
):
|
81
|
+
collection_date = current_collection_date
|
82
|
+
else:
|
83
|
+
collection_date = next_collection_date
|
84
|
+
|
85
|
+
dict_data = {
|
86
|
+
"type": bin_type,
|
87
|
+
"collectionDate": collection_date.strftime(date_format),
|
88
|
+
}
|
89
|
+
data["bins"].append(dict_data)
|
90
|
+
|
91
|
+
data["bins"].sort(
|
92
|
+
key=lambda x: datetime.strptime(
|
93
|
+
x.get("collectionDate"), date_format
|
94
|
+
)
|
95
|
+
)
|
96
|
+
except Exception as e:
|
97
|
+
# Here you can log the exception if needed
|
98
|
+
print(f"An error occurred: {e}")
|
99
|
+
# Optionally, re-raise the exception if you want it to propagate
|
100
|
+
raise
|
101
|
+
finally:
|
102
|
+
# This block ensures that the driver is closed regardless of an exception
|
103
|
+
if driver:
|
104
|
+
driver.quit()
|
105
|
+
return data
|
@@ -0,0 +1,104 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
from datetime import datetime
|
3
|
+
from selenium.webdriver.common.by import By
|
4
|
+
from selenium.webdriver.support import expected_conditions as EC
|
5
|
+
from selenium.webdriver.support.ui import Select
|
6
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
7
|
+
from selenium.webdriver.common.keys import Keys
|
8
|
+
|
9
|
+
import time
|
10
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
11
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
12
|
+
|
13
|
+
|
14
|
+
class CouncilClass(AbstractGetBinDataClass):
|
15
|
+
"""
|
16
|
+
Concrete classes have to implement all abstract operations of the
|
17
|
+
base class. They can also override some operations with a default
|
18
|
+
implementation.
|
19
|
+
"""
|
20
|
+
|
21
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
22
|
+
driver = None
|
23
|
+
try:
|
24
|
+
user_uprn = kwargs.get("uprn")
|
25
|
+
check_uprn(user_uprn)
|
26
|
+
|
27
|
+
user_postcode = kwargs.get("postcode")
|
28
|
+
check_postcode(user_postcode)
|
29
|
+
web_driver = kwargs.get("web_driver")
|
30
|
+
headless = kwargs.get("headless")
|
31
|
+
|
32
|
+
data = {"bins": []}
|
33
|
+
|
34
|
+
# Get our initial session running
|
35
|
+
page = "https://carehomes.bolton.gov.uk/bins.aspx"
|
36
|
+
|
37
|
+
driver = create_webdriver(web_driver, headless)
|
38
|
+
driver.get(page)
|
39
|
+
|
40
|
+
# If you bang in the house number (or property name) and postcode in the box it should find your property
|
41
|
+
wait = WebDriverWait(driver, 30)
|
42
|
+
|
43
|
+
pc_search_box = wait.until(
|
44
|
+
EC.presence_of_element_located((By.ID, "txtPostcode"))
|
45
|
+
)
|
46
|
+
|
47
|
+
pc_search_box.send_keys(user_postcode)
|
48
|
+
|
49
|
+
pcsearch_btn = wait.until(EC.element_to_be_clickable((By.ID, "btnSubmit")))
|
50
|
+
|
51
|
+
pcsearch_btn.click()
|
52
|
+
|
53
|
+
# Wait for the 'Select your property' dropdown to appear and select the first result
|
54
|
+
dropdown = wait.until(EC.element_to_be_clickable((By.ID, "ddlAddresses")))
|
55
|
+
|
56
|
+
dropdown_options = wait.until(
|
57
|
+
EC.presence_of_element_located((By.XPATH, "//select/option[1]"))
|
58
|
+
)
|
59
|
+
time.sleep(1)
|
60
|
+
# Create a 'Select' for it, then select the first address in the list
|
61
|
+
# (Index 0 is "Make a selection from the list")
|
62
|
+
dropdownSelect = Select(dropdown)
|
63
|
+
dropdownSelect.select_by_value(str(user_uprn))
|
64
|
+
dropdown_options = wait.until(
|
65
|
+
EC.presence_of_element_located((By.ID, "pnlStep3"))
|
66
|
+
)
|
67
|
+
|
68
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
69
|
+
soup.prettify()
|
70
|
+
|
71
|
+
collections = []
|
72
|
+
|
73
|
+
# Find section with bins in
|
74
|
+
sections = soup.find_all("div", {"class": "bin-info"})
|
75
|
+
|
76
|
+
# For each bin section, get the text and the list elements
|
77
|
+
for item in sections:
|
78
|
+
words = item.find_next("strong").text.split()[2:4]
|
79
|
+
bin_type = " ".join(words).capitalize()
|
80
|
+
date_list = item.find_all("p")
|
81
|
+
for d in date_list:
|
82
|
+
next_collection = datetime.strptime(d.text.strip(), "%A %d %B %Y")
|
83
|
+
collections.append((bin_type, next_collection))
|
84
|
+
|
85
|
+
# Sort the text and list elements by date
|
86
|
+
ordered_data = sorted(collections, key=lambda x: x[1])
|
87
|
+
|
88
|
+
# Put the elements into the dictionary
|
89
|
+
for item in ordered_data:
|
90
|
+
dict_data = {
|
91
|
+
"type": item[0],
|
92
|
+
"collectionDate": item[1].strftime(date_format),
|
93
|
+
}
|
94
|
+
data["bins"].append(dict_data)
|
95
|
+
except Exception as e:
|
96
|
+
# Here you can log the exception if needed
|
97
|
+
print(f"An error occurred: {e}")
|
98
|
+
# Optionally, re-raise the exception if you want it to propagate
|
99
|
+
raise
|
100
|
+
finally:
|
101
|
+
# This block ensures that the driver is closed regardless of an exception
|
102
|
+
if driver:
|
103
|
+
driver.quit()
|
104
|
+
return data
|
@@ -0,0 +1,103 @@
|
|
1
|
+
import requests
|
2
|
+
from bs4 import BeautifulSoup
|
3
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
4
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
5
|
+
|
6
|
+
|
7
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
8
|
+
class CouncilClass(AbstractGetBinDataClass):
|
9
|
+
"""
|
10
|
+
Concrete classes have to implement all abstract operations of the
|
11
|
+
base class. They can also override some operations with a default
|
12
|
+
implementation.
|
13
|
+
"""
|
14
|
+
|
15
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
16
|
+
user_uprn = kwargs.get("uprn")
|
17
|
+
check_uprn(user_uprn)
|
18
|
+
|
19
|
+
# UPRN is passed in via a cookie. Set cookies/params and GET the page
|
20
|
+
cookies = {
|
21
|
+
"COLLECTIONDATES": f"{user_uprn}",
|
22
|
+
}
|
23
|
+
headers = {
|
24
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
|
25
|
+
"Accept-Language": "en-GB,en;q=0.7",
|
26
|
+
"Cache-Control": "max-age=0",
|
27
|
+
"Connection": "keep-alive",
|
28
|
+
"Referer": "https://onlineforms.bradford.gov.uk/ufs/collectiondates.eb",
|
29
|
+
"Sec-Fetch-Dest": "document",
|
30
|
+
"Sec-Fetch-Mode": "navigate",
|
31
|
+
"Sec-Fetch-Site": "same-origin",
|
32
|
+
"Sec-Fetch-User": "?1",
|
33
|
+
"Sec-GPC": "1",
|
34
|
+
"Upgrade-Insecure-Requests": "1",
|
35
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36",
|
36
|
+
}
|
37
|
+
params = {
|
38
|
+
"ebp": "30",
|
39
|
+
"ebd": "0",
|
40
|
+
"ebz": "1_1713270660323",
|
41
|
+
}
|
42
|
+
requests.packages.urllib3.disable_warnings()
|
43
|
+
response = requests.get(
|
44
|
+
"https://onlineforms.bradford.gov.uk/ufs/collectiondates.eb",
|
45
|
+
params=params,
|
46
|
+
headers=headers,
|
47
|
+
cookies=cookies,
|
48
|
+
)
|
49
|
+
|
50
|
+
# Parse response text for super speedy finding
|
51
|
+
soup = BeautifulSoup(response.text, features="html.parser")
|
52
|
+
soup.prettify()
|
53
|
+
|
54
|
+
data = {"bins": []}
|
55
|
+
|
56
|
+
# BradfordMDC site has lots of embedded tables, find the table titled 'Your next general/recycling collections are:'
|
57
|
+
for bin in soup.find_all(attrs={"class": "CTID-FHGh1Q77-_"}):
|
58
|
+
if bin.find_all(attrs={"class": "CTID-62bNngCB-_"}):
|
59
|
+
bin_type = "General Waste"
|
60
|
+
bin_colour = "Green"
|
61
|
+
bin_date_text = bin.find(attrs={"class": "CTID-62bNngCB-_"}).get_text()
|
62
|
+
elif bin.find_all(attrs={"class": "CTID-LHo9iO0y-_"}):
|
63
|
+
bin_type = "Recycling Waste"
|
64
|
+
bin_colour = "Grey"
|
65
|
+
bin_date_text = bin.find(attrs={"class": "CTID-LHo9iO0y-_"}).get_text()
|
66
|
+
else:
|
67
|
+
raise ValueError(f"No bin info found in {bin_type_info[0]}")
|
68
|
+
|
69
|
+
# Collection Date info is alongside the bin type, we got the whole line in the if/elif above
|
70
|
+
# below strips the text off at the beginning, to get a date, though recycling is a character shorter hence the lstrip
|
71
|
+
bin_date_info = bin_date_text[29:50].lstrip(" ")
|
72
|
+
|
73
|
+
if contains_date(bin_date_info):
|
74
|
+
bin_date = get_next_occurrence_from_day_month(
|
75
|
+
datetime.strptime(
|
76
|
+
bin_date_info, # + " " + datetime.today().strftime("%Y"),
|
77
|
+
"%a %b %d %Y",
|
78
|
+
)
|
79
|
+
).strftime(date_format)
|
80
|
+
# print(bin_date_info)
|
81
|
+
# print(bin_date)
|
82
|
+
# On exceptional collection schedule (e.g. around English Bank Holidays), date will be contained in the second stripped string
|
83
|
+
else:
|
84
|
+
bin_date = get_next_occurrence_from_day_month(
|
85
|
+
datetime.strptime(
|
86
|
+
bin_date_info[1] + " " + datetime.today().strftime("%Y"),
|
87
|
+
"%a %b %d %Y",
|
88
|
+
)
|
89
|
+
).strftime(date_format)
|
90
|
+
|
91
|
+
# Build data dict for each entry
|
92
|
+
dict_data = {
|
93
|
+
"type": bin_type,
|
94
|
+
"collectionDate": bin_date,
|
95
|
+
"colour": bin_colour,
|
96
|
+
}
|
97
|
+
data["bins"].append(dict_data)
|
98
|
+
|
99
|
+
data["bins"].sort(
|
100
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
|
101
|
+
)
|
102
|
+
|
103
|
+
return data
|