uk_bin_collection 0.74.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/README.rst +0 -0
- uk_bin_collection/tests/council_feature_input_parity.py +79 -0
- uk_bin_collection/tests/features/environment.py +7 -0
- uk_bin_collection/tests/features/validate_council_outputs.feature +767 -0
- uk_bin_collection/tests/input.json +1077 -0
- uk_bin_collection/tests/output.schema +41 -0
- uk_bin_collection/tests/step_defs/step_helpers/file_handler.py +46 -0
- uk_bin_collection/tests/step_defs/test_validate_council.py +87 -0
- uk_bin_collection/tests/test_collect_data.py +104 -0
- uk_bin_collection/tests/test_common_functions.py +342 -0
- uk_bin_collection/uk_bin_collection/collect_data.py +133 -0
- uk_bin_collection/uk_bin_collection/common.py +292 -0
- uk_bin_collection/uk_bin_collection/councils/AdurAndWorthingCouncils.py +43 -0
- uk_bin_collection/uk_bin_collection/councils/ArunCouncil.py +97 -0
- uk_bin_collection/uk_bin_collection/councils/AylesburyValeCouncil.py +69 -0
- uk_bin_collection/uk_bin_collection/councils/BCPCouncil.py +51 -0
- uk_bin_collection/uk_bin_collection/councils/BarnetCouncil.py +180 -0
- uk_bin_collection/uk_bin_collection/councils/BarnsleyMBCouncil.py +109 -0
- uk_bin_collection/uk_bin_collection/councils/BasingstokeCouncil.py +72 -0
- uk_bin_collection/uk_bin_collection/councils/BathAndNorthEastSomersetCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/BedfordBoroughCouncil.py +49 -0
- uk_bin_collection/uk_bin_collection/councils/BedfordshireCouncil.py +70 -0
- uk_bin_collection/uk_bin_collection/councils/BexleyCouncil.py +147 -0
- uk_bin_collection/uk_bin_collection/councils/BirminghamCityCouncil.py +119 -0
- uk_bin_collection/uk_bin_collection/councils/BlackburnCouncil.py +105 -0
- uk_bin_collection/uk_bin_collection/councils/BoltonCouncil.py +104 -0
- uk_bin_collection/uk_bin_collection/councils/BradfordMDC.py +103 -0
- uk_bin_collection/uk_bin_collection/councils/BrightonandHoveCityCouncil.py +137 -0
- uk_bin_collection/uk_bin_collection/councils/BristolCityCouncil.py +141 -0
- uk_bin_collection/uk_bin_collection/councils/BromleyBoroughCouncil.py +115 -0
- uk_bin_collection/uk_bin_collection/councils/BroxtoweBoroughCouncil.py +107 -0
- uk_bin_collection/uk_bin_collection/councils/BuckinghamshireCouncil.py +95 -0
- uk_bin_collection/uk_bin_collection/councils/BuryCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/CalderdaleCouncil.py +123 -0
- uk_bin_collection/uk_bin_collection/councils/CannockChaseDistrictCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/CardiffCouncil.py +172 -0
- uk_bin_collection/uk_bin_collection/councils/CastlepointDistrictCouncil.py +96 -0
- uk_bin_collection/uk_bin_collection/councils/CharnwoodBoroughCouncil.py +54 -0
- uk_bin_collection/uk_bin_collection/councils/ChelmsfordCityCouncil.py +127 -0
- uk_bin_collection/uk_bin_collection/councils/CheshireEastCouncil.py +32 -0
- uk_bin_collection/uk_bin_collection/councils/CheshireWestAndChesterCouncil.py +125 -0
- uk_bin_collection/uk_bin_collection/councils/ChorleyCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/ConwyCountyBorough.py +27 -0
- uk_bin_collection/uk_bin_collection/councils/CrawleyBoroughCouncil.py +61 -0
- uk_bin_collection/uk_bin_collection/councils/CroydonCouncil.py +291 -0
- uk_bin_collection/uk_bin_collection/councils/DerbyshireDalesDistrictCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/DoncasterCouncil.py +77 -0
- uk_bin_collection/uk_bin_collection/councils/DorsetCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/DoverDistrictCouncil.py +41 -0
- uk_bin_collection/uk_bin_collection/councils/DurhamCouncil.py +49 -0
- uk_bin_collection/uk_bin_collection/councils/EastCambridgeshireCouncil.py +44 -0
- uk_bin_collection/uk_bin_collection/councils/EastDevonDC.py +74 -0
- uk_bin_collection/uk_bin_collection/councils/EastLindseyDistrictCouncil.py +108 -0
- uk_bin_collection/uk_bin_collection/councils/EastRidingCouncil.py +142 -0
- uk_bin_collection/uk_bin_collection/councils/EastSuffolkCouncil.py +112 -0
- uk_bin_collection/uk_bin_collection/councils/EastleighBoroughCouncil.py +70 -0
- uk_bin_collection/uk_bin_collection/councils/EnvironmentFirst.py +48 -0
- uk_bin_collection/uk_bin_collection/councils/ErewashBoroughCouncil.py +61 -0
- uk_bin_collection/uk_bin_collection/councils/FenlandDistrictCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/ForestOfDeanDistrictCouncil.py +113 -0
- uk_bin_collection/uk_bin_collection/councils/GatesheadCouncil.py +118 -0
- uk_bin_collection/uk_bin_collection/councils/GedlingBoroughCouncil.py +1580 -0
- uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py +55 -0
- uk_bin_collection/uk_bin_collection/councils/GuildfordCouncil.py +150 -0
- uk_bin_collection/uk_bin_collection/councils/HaltonBoroughCouncil.py +142 -0
- uk_bin_collection/uk_bin_collection/councils/HaringeyCouncil.py +59 -0
- uk_bin_collection/uk_bin_collection/councils/HarrogateBoroughCouncil.py +63 -0
- uk_bin_collection/uk_bin_collection/councils/HighPeakCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/HullCityCouncil.py +48 -0
- uk_bin_collection/uk_bin_collection/councils/HuntingdonDistrictCouncil.py +44 -0
- uk_bin_collection/uk_bin_collection/councils/KingstonUponThamesCouncil.py +84 -0
- uk_bin_collection/uk_bin_collection/councils/KirkleesCouncil.py +130 -0
- uk_bin_collection/uk_bin_collection/councils/KnowsleyMBCouncil.py +139 -0
- uk_bin_collection/uk_bin_collection/councils/LancasterCityCouncil.py +71 -0
- uk_bin_collection/uk_bin_collection/councils/LeedsCityCouncil.py +137 -0
- uk_bin_collection/uk_bin_collection/councils/LisburnCastlereaghCityCouncil.py +101 -0
- uk_bin_collection/uk_bin_collection/councils/LiverpoolCityCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/LondonBoroughHounslow.py +82 -0
- uk_bin_collection/uk_bin_collection/councils/LondonBoroughRedbridge.py +161 -0
- uk_bin_collection/uk_bin_collection/councils/MaldonDistrictCouncil.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/MalvernHillsDC.py +57 -0
- uk_bin_collection/uk_bin_collection/councils/ManchesterCityCouncil.py +106 -0
- uk_bin_collection/uk_bin_collection/councils/MansfieldDistrictCouncil.py +38 -0
- uk_bin_collection/uk_bin_collection/councils/MertonCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil.py +128 -0
- uk_bin_collection/uk_bin_collection/councils/MidSussexDistrictCouncil.py +80 -0
- uk_bin_collection/uk_bin_collection/councils/MiltonKeynesCityCouncil.py +54 -0
- uk_bin_collection/uk_bin_collection/councils/MoleValleyDistrictCouncil.py +98 -0
- uk_bin_collection/uk_bin_collection/councils/NeathPortTalbotCouncil.py +139 -0
- uk_bin_collection/uk_bin_collection/councils/NewarkAndSherwoodDC.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/NewcastleCityCouncil.py +57 -0
- uk_bin_collection/uk_bin_collection/councils/NewhamCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/NewportCityCouncil.py +203 -0
- uk_bin_collection/uk_bin_collection/councils/NorthEastDerbyshireDistrictCouncil.py +115 -0
- uk_bin_collection/uk_bin_collection/councils/NorthEastLincs.py +53 -0
- uk_bin_collection/uk_bin_collection/councils/NorthKestevenDistrictCouncil.py +45 -0
- uk_bin_collection/uk_bin_collection/councils/NorthLanarkshireCouncil.py +46 -0
- uk_bin_collection/uk_bin_collection/councils/NorthLincolnshireCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/NorthNorfolkDistrictCouncil.py +108 -0
- uk_bin_collection/uk_bin_collection/councils/NorthNorthamptonshireCouncil.py +72 -0
- uk_bin_collection/uk_bin_collection/councils/NorthSomersetCouncil.py +76 -0
- uk_bin_collection/uk_bin_collection/councils/NorthTynesideCouncil.py +220 -0
- uk_bin_collection/uk_bin_collection/councils/NorthWestLeicestershire.py +114 -0
- uk_bin_collection/uk_bin_collection/councils/NorthYorkshire.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py +123 -0
- uk_bin_collection/uk_bin_collection/councils/NottinghamCityCouncil.py +36 -0
- uk_bin_collection/uk_bin_collection/councils/OldhamCouncil.py +51 -0
- uk_bin_collection/uk_bin_collection/councils/PortsmouthCityCouncil.py +131 -0
- uk_bin_collection/uk_bin_collection/councils/PrestonCityCouncil.py +97 -0
- uk_bin_collection/uk_bin_collection/councils/ReadingBoroughCouncil.py +30 -0
- uk_bin_collection/uk_bin_collection/councils/ReigateAndBansteadBoroughCouncil.py +81 -0
- uk_bin_collection/uk_bin_collection/councils/RenfrewshireCouncil.py +135 -0
- uk_bin_collection/uk_bin_collection/councils/RhonddaCynonTaffCouncil.py +80 -0
- uk_bin_collection/uk_bin_collection/councils/RochdaleCouncil.py +69 -0
- uk_bin_collection/uk_bin_collection/councils/RochfordCouncil.py +60 -0
- uk_bin_collection/uk_bin_collection/councils/RugbyBoroughCouncil.py +93 -0
- uk_bin_collection/uk_bin_collection/councils/RushcliffeBoroughCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/RushmoorCouncil.py +81 -0
- uk_bin_collection/uk_bin_collection/councils/SalfordCityCouncil.py +70 -0
- uk_bin_collection/uk_bin_collection/councils/SevenoaksDistrictCouncil.py +106 -0
- uk_bin_collection/uk_bin_collection/councils/SheffieldCityCouncil.py +54 -0
- uk_bin_collection/uk_bin_collection/councils/ShropshireCouncil.py +45 -0
- uk_bin_collection/uk_bin_collection/councils/SolihullCouncil.py +48 -0
- uk_bin_collection/uk_bin_collection/councils/SomersetCouncil.py +203 -0
- uk_bin_collection/uk_bin_collection/councils/SouthAyrshireCouncil.py +73 -0
- uk_bin_collection/uk_bin_collection/councils/SouthCambridgeshireCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/SouthGloucestershireCouncil.py +74 -0
- uk_bin_collection/uk_bin_collection/councils/SouthLanarkshireCouncil.py +78 -0
- uk_bin_collection/uk_bin_collection/councils/SouthNorfolkCouncil.py +91 -0
- uk_bin_collection/uk_bin_collection/councils/SouthOxfordshireCouncil.py +93 -0
- uk_bin_collection/uk_bin_collection/councils/SouthTynesideCouncil.py +98 -0
- uk_bin_collection/uk_bin_collection/councils/StAlbansCityAndDistrictCouncil.py +43 -0
- uk_bin_collection/uk_bin_collection/councils/StHelensBC.py +56 -0
- uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py +112 -0
- uk_bin_collection/uk_bin_collection/councils/StockportBoroughCouncil.py +39 -0
- uk_bin_collection/uk_bin_collection/councils/StokeOnTrentCityCouncil.py +79 -0
- uk_bin_collection/uk_bin_collection/councils/StratfordUponAvonCouncil.py +94 -0
- uk_bin_collection/uk_bin_collection/councils/SunderlandCityCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/SwaleBoroughCouncil.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/TamesideMBCouncil.py +62 -0
- uk_bin_collection/uk_bin_collection/councils/TandridgeDistrictCouncil.py +60 -0
- uk_bin_collection/uk_bin_collection/councils/TelfordAndWrekinCouncil.py +50 -0
- uk_bin_collection/uk_bin_collection/councils/TestValleyBoroughCouncil.py +203 -0
- uk_bin_collection/uk_bin_collection/councils/TonbridgeAndMallingBC.py +101 -0
- uk_bin_collection/uk_bin_collection/councils/TorbayCouncil.py +51 -0
- uk_bin_collection/uk_bin_collection/councils/TorridgeDistrictCouncil.py +154 -0
- uk_bin_collection/uk_bin_collection/councils/ValeofGlamorganCouncil.py +119 -0
- uk_bin_collection/uk_bin_collection/councils/ValeofWhiteHorseCouncil.py +103 -0
- uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py +89 -0
- uk_bin_collection/uk_bin_collection/councils/WarwickDistrictCouncil.py +34 -0
- uk_bin_collection/uk_bin_collection/councils/WaverleyBoroughCouncil.py +119 -0
- uk_bin_collection/uk_bin_collection/councils/WealdenDistrictCouncil.py +86 -0
- uk_bin_collection/uk_bin_collection/councils/WelhatCouncil.py +73 -0
- uk_bin_collection/uk_bin_collection/councils/WestBerkshireCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/WestLindseyDistrictCouncil.py +118 -0
- uk_bin_collection/uk_bin_collection/councils/WestLothianCouncil.py +103 -0
- uk_bin_collection/uk_bin_collection/councils/WestNorthamptonshireCouncil.py +34 -0
- uk_bin_collection/uk_bin_collection/councils/WestSuffolkCouncil.py +64 -0
- uk_bin_collection/uk_bin_collection/councils/WiganBoroughCouncil.py +97 -0
- uk_bin_collection/uk_bin_collection/councils/WiltshireCouncil.py +135 -0
- uk_bin_collection/uk_bin_collection/councils/WindsorAndMaidenheadCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/WokingBoroughCouncil.py +114 -0
- uk_bin_collection/uk_bin_collection/councils/WyreCouncil.py +89 -0
- uk_bin_collection/uk_bin_collection/councils/YorkCouncil.py +45 -0
- uk_bin_collection/uk_bin_collection/councils/council_class_template/councilclasstemplate.py +33 -0
- uk_bin_collection/uk_bin_collection/get_bin_data.py +165 -0
- uk_bin_collection-0.74.0.dist-info/LICENSE +21 -0
- uk_bin_collection-0.74.0.dist-info/METADATA +247 -0
- uk_bin_collection-0.74.0.dist-info/RECORD +171 -0
- uk_bin_collection-0.74.0.dist-info/WHEEL +4 -0
- uk_bin_collection-0.74.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,55 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
3
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
|
+
|
5
|
+
|
6
|
+
class CouncilClass(AbstractGetBinDataClass):
|
7
|
+
"""
|
8
|
+
Concrete classes have to implement all abstract operations of the
|
9
|
+
base class. They can also override some operations with a default
|
10
|
+
implementation.
|
11
|
+
"""
|
12
|
+
|
13
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
14
|
+
# Parse the page
|
15
|
+
soup = BeautifulSoup(page.text, features="html.parser")
|
16
|
+
soup.prettify()
|
17
|
+
|
18
|
+
# Declare an empty dict for data, and pair icon source URLs with their respective bin type
|
19
|
+
data = {"bins": []}
|
20
|
+
bin_types = {
|
21
|
+
"../images/bins/cal_blue.png": "Mixed recycling",
|
22
|
+
"../images/bins/cal_green.png": "General waste",
|
23
|
+
"../images/bins/cal_grey.png": "Food waste",
|
24
|
+
"../images/bins/cal_brown.png": "Organic waste",
|
25
|
+
"../images/bins/cal_purple.png": "Glass",
|
26
|
+
"../images/bins/cal_ash.png": "Ash bin",
|
27
|
+
}
|
28
|
+
|
29
|
+
# Find the page body with all the calendars
|
30
|
+
body = soup.find("div", {"id": "printArticle"})
|
31
|
+
cal_year = datetime.strptime(soup.select("#Year")[0].text.strip(), "%Y").year
|
32
|
+
calendars = body.find_all_next("table", {"title": "Calendar"})
|
33
|
+
|
34
|
+
# For each calendar grid, get the month and all icons within it. We only take icons with alt text, as this
|
35
|
+
# includes the bin type while excluding spacers
|
36
|
+
for item in calendars:
|
37
|
+
cal_month = datetime.strptime(item.find_next("td").text.strip(), "%B").month
|
38
|
+
icons = item.find_all("img", alt=True)
|
39
|
+
|
40
|
+
# For each icon, get the day box, so we can parse the correct day number and make a datetime
|
41
|
+
for icon in icons:
|
42
|
+
cal_item = icon.find_parent().find_parent().find_parent().contents
|
43
|
+
cal_day = datetime.strptime(cal_item[1].text.strip(), "%d").day
|
44
|
+
bin_date = datetime(cal_year, cal_month, cal_day)
|
45
|
+
|
46
|
+
# If the collection date is in the future, we want the date. Select the correct type, add the new
|
47
|
+
# datetime, then add to the list
|
48
|
+
if datetime.now() <= bin_date:
|
49
|
+
dict_data = {
|
50
|
+
"type": bin_types.get(icon["src"].lower()),
|
51
|
+
"collectionDate": bin_date.strftime(date_format),
|
52
|
+
}
|
53
|
+
data["bins"].append(dict_data)
|
54
|
+
|
55
|
+
return data
|
@@ -0,0 +1,150 @@
|
|
1
|
+
import re
|
2
|
+
import requests
|
3
|
+
from bs4 import BeautifulSoup
|
4
|
+
|
5
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
6
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
7
|
+
|
8
|
+
# This script pulls (in one hit) the data from Bromley Council Bins Data
|
9
|
+
import datetime
|
10
|
+
from datetime import datetime
|
11
|
+
from selenium.webdriver.common.by import By
|
12
|
+
from selenium.webdriver.support import expected_conditions as EC
|
13
|
+
from selenium.webdriver.support.ui import Select
|
14
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
15
|
+
from selenium.webdriver.common.keys import Keys
|
16
|
+
import time
|
17
|
+
|
18
|
+
|
19
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
20
|
+
class CouncilClass(AbstractGetBinDataClass):
|
21
|
+
"""
|
22
|
+
Concrete classes have to implement all abstract operations of the
|
23
|
+
base class. They can also override some operations with a default
|
24
|
+
implementation.
|
25
|
+
"""
|
26
|
+
|
27
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
28
|
+
driver = None
|
29
|
+
try:
|
30
|
+
uprn = kwargs.get("uprn")
|
31
|
+
postcode = kwargs.get("postcode")
|
32
|
+
full_address = kwargs.get("paon")
|
33
|
+
|
34
|
+
url = "https://my.guildford.gov.uk/customers/s/view-bin-collections"
|
35
|
+
|
36
|
+
web_driver = kwargs.get("web_driver")
|
37
|
+
headless = kwargs.get("headless")
|
38
|
+
|
39
|
+
driver = create_webdriver(web_driver, headless)
|
40
|
+
driver.get(kwargs.get("url"))
|
41
|
+
|
42
|
+
wait = WebDriverWait(driver, 120)
|
43
|
+
post_code_search = wait.until(
|
44
|
+
EC.presence_of_element_located(
|
45
|
+
(By.XPATH, "//input[contains(@class, 'slds-input')]")
|
46
|
+
)
|
47
|
+
)
|
48
|
+
|
49
|
+
post_code_search.send_keys(postcode)
|
50
|
+
|
51
|
+
post_code_submit_btn = wait.until(
|
52
|
+
EC.presence_of_element_located(
|
53
|
+
(By.XPATH, "//button[contains(@class,'slds-button')]")
|
54
|
+
)
|
55
|
+
)
|
56
|
+
post_code_submit_btn.send_keys(Keys.ENTER)
|
57
|
+
|
58
|
+
# Locate the element containing the specified address text
|
59
|
+
address_element = WebDriverWait(driver, 10).until(
|
60
|
+
EC.presence_of_element_located(
|
61
|
+
(
|
62
|
+
By.XPATH,
|
63
|
+
f"//lightning-base-formatted-text[contains(text(), '{full_address}')]",
|
64
|
+
)
|
65
|
+
)
|
66
|
+
)
|
67
|
+
|
68
|
+
# Find the associated radio button in the same row (preceding sibling)
|
69
|
+
radio_button = address_element.find_element(
|
70
|
+
By.XPATH, "../../../../preceding-sibling::td//input[@type='radio']"
|
71
|
+
)
|
72
|
+
|
73
|
+
radio_button.send_keys(Keys.SPACE)
|
74
|
+
address_submit_btn = wait.until(
|
75
|
+
EC.presence_of_element_located(
|
76
|
+
(By.XPATH, "//button[contains(@name,'NEXT')]")
|
77
|
+
)
|
78
|
+
)
|
79
|
+
address_submit_btn.send_keys(Keys.ENTER)
|
80
|
+
|
81
|
+
results = wait.until(
|
82
|
+
EC.presence_of_element_located((By.CLASS_NAME, "cBinScheduleDisplay"))
|
83
|
+
)
|
84
|
+
|
85
|
+
results2 = wait.until(
|
86
|
+
EC.presence_of_element_located(
|
87
|
+
(By.XPATH, f'//div[contains(@title,"Bin Job")]')
|
88
|
+
)
|
89
|
+
)
|
90
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
91
|
+
# Find all table rows containing bin information
|
92
|
+
rows = soup.find_all("tr", class_="slds-hint-parent")
|
93
|
+
|
94
|
+
data = {"bins": []}
|
95
|
+
|
96
|
+
# Extract bin type and next collection date for each row
|
97
|
+
for row in rows:
|
98
|
+
bin_type = (
|
99
|
+
row.find("td", {"data-label": "Bin Job"})
|
100
|
+
.find("strong")
|
101
|
+
.text.strip()
|
102
|
+
if row.find("td", {"data-label": "Bin Job"})
|
103
|
+
else None
|
104
|
+
)
|
105
|
+
|
106
|
+
next_collection_date = (
|
107
|
+
row.find("td", {"data-label": "Next Collection"}).text.strip()
|
108
|
+
if row.find("td", {"data-label": "Next Collection"})
|
109
|
+
else None
|
110
|
+
)
|
111
|
+
|
112
|
+
if bin_type and next_collection_date:
|
113
|
+
# Convert date string to datetime object
|
114
|
+
date_format = (
|
115
|
+
"%A, %d %B" # Adjust the format according to your date string
|
116
|
+
)
|
117
|
+
try:
|
118
|
+
next_collection_date = datetime.strptime(
|
119
|
+
next_collection_date, date_format
|
120
|
+
)
|
121
|
+
|
122
|
+
# Logic to determine year
|
123
|
+
current_date = datetime.now()
|
124
|
+
if next_collection_date.month < current_date.month:
|
125
|
+
year = current_date.year + 1
|
126
|
+
else:
|
127
|
+
year = current_date.year
|
128
|
+
|
129
|
+
# Format the date
|
130
|
+
next_collection_date = next_collection_date.replace(
|
131
|
+
year=year
|
132
|
+
).strftime("%d/%m/%Y")
|
133
|
+
except ValueError:
|
134
|
+
pass
|
135
|
+
|
136
|
+
dict_data = {
|
137
|
+
"type": bin_type,
|
138
|
+
"collectionDate": next_collection_date,
|
139
|
+
}
|
140
|
+
data["bins"].append(dict_data)
|
141
|
+
except Exception as e:
|
142
|
+
# Here you can log the exception if needed
|
143
|
+
print(f"An error occurred: {e}")
|
144
|
+
# Optionally, re-raise the exception if you want it to propagate
|
145
|
+
raise
|
146
|
+
finally:
|
147
|
+
# This block ensures that the driver is closed regardless of an exception
|
148
|
+
if driver:
|
149
|
+
driver.quit()
|
150
|
+
return data
|
@@ -0,0 +1,142 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
from datetime import datetime
|
3
|
+
from selenium.webdriver.common.by import By
|
4
|
+
from selenium.webdriver.common.keys import Keys
|
5
|
+
from selenium.webdriver.support import expected_conditions as EC
|
6
|
+
from selenium.webdriver.support.ui import Select
|
7
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
8
|
+
import time
|
9
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
10
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
11
|
+
|
12
|
+
|
13
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
14
|
+
|
15
|
+
|
16
|
+
class CouncilClass(AbstractGetBinDataClass):
|
17
|
+
"""
|
18
|
+
Concrete classes have to implement all abstract operations of the
|
19
|
+
base class. They can also override some operations with a default
|
20
|
+
implementation.
|
21
|
+
"""
|
22
|
+
|
23
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
24
|
+
driver = None
|
25
|
+
try:
|
26
|
+
data = {"bins": []}
|
27
|
+
|
28
|
+
user_paon = kwargs.get("paon")
|
29
|
+
user_postcode = kwargs.get("postcode")
|
30
|
+
web_driver = kwargs.get("web_driver")
|
31
|
+
headless = kwargs.get("headless")
|
32
|
+
# Create Selenium webdriver
|
33
|
+
page = (
|
34
|
+
f"https://webapp.halton.gov.uk/PublicWebForms/WasteServiceSearchv1.aspx"
|
35
|
+
)
|
36
|
+
|
37
|
+
driver = create_webdriver(web_driver, headless)
|
38
|
+
driver.get(page)
|
39
|
+
|
40
|
+
# If you bang in the house number (or property name) and postcode in the box it should find your property
|
41
|
+
|
42
|
+
# iframe_presense = WebDriverWait(driver, 30).until(
|
43
|
+
# EC.presence_of_element_located((By.ID, "fillform-frame-1"))
|
44
|
+
# )
|
45
|
+
|
46
|
+
# driver.switch_to.frame(iframe_presense)
|
47
|
+
wait = WebDriverWait(driver, 60)
|
48
|
+
|
49
|
+
inputElement_property = wait.until(
|
50
|
+
EC.element_to_be_clickable(
|
51
|
+
(By.NAME, "ctl00$ContentPlaceHolder1$txtProperty")
|
52
|
+
)
|
53
|
+
)
|
54
|
+
inputElement_property.send_keys(user_paon)
|
55
|
+
|
56
|
+
inputElement_postcodesearch = wait.until(
|
57
|
+
EC.element_to_be_clickable(
|
58
|
+
(By.NAME, "ctl00$ContentPlaceHolder1$txtPostcode")
|
59
|
+
)
|
60
|
+
)
|
61
|
+
inputElement_postcodesearch.send_keys(user_postcode)
|
62
|
+
time.sleep(1)
|
63
|
+
wait.until(
|
64
|
+
EC.frame_to_be_available_and_switch_to_it(
|
65
|
+
(
|
66
|
+
By.CSS_SELECTOR,
|
67
|
+
"iframe[name^='a-'][src^='https://www.google.com/recaptcha/api2/anchor?']",
|
68
|
+
)
|
69
|
+
)
|
70
|
+
)
|
71
|
+
wait.until(
|
72
|
+
EC.element_to_be_clickable((By.XPATH, "//span[@id='recaptcha-anchor']"))
|
73
|
+
).send_keys(Keys.ENTER)
|
74
|
+
time.sleep(5)
|
75
|
+
driver.switch_to.default_content()
|
76
|
+
search_btn = wait.until(
|
77
|
+
EC.element_to_be_clickable(
|
78
|
+
(By.XPATH, '//*[@id="ContentPlaceHolder1_btnSearch"]')
|
79
|
+
)
|
80
|
+
)
|
81
|
+
search_btn.send_keys(Keys.ENTER)
|
82
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
83
|
+
|
84
|
+
# Find all tab panels within the collectionTabs
|
85
|
+
# Find all anchor elements within the collectionTabs
|
86
|
+
anchor_elements = soup.select("#collectionTabs a.ui-tabs-anchor")
|
87
|
+
|
88
|
+
for anchor in anchor_elements:
|
89
|
+
# Extract the type of waste from the anchor text
|
90
|
+
waste_type = anchor.text.strip()
|
91
|
+
|
92
|
+
# Find the corresponding panel using the href attribute
|
93
|
+
panel_id = anchor.get("href")
|
94
|
+
panel = soup.select_one(panel_id)
|
95
|
+
|
96
|
+
# Find all ul elements within the corresponding panel
|
97
|
+
ul_elements = panel.find_all("ul")
|
98
|
+
|
99
|
+
# Check if there are at least two ul elements
|
100
|
+
if len(ul_elements) >= 2:
|
101
|
+
# Get the second ul element and extract its li elements
|
102
|
+
second_ul = ul_elements[1]
|
103
|
+
li_elements = second_ul.find_all("li")
|
104
|
+
|
105
|
+
# Extract the text content of each li element
|
106
|
+
date_texts = [
|
107
|
+
re.sub(r"[^a-zA-Z0-9,\s]", "", li.get_text(strip=True)).strip()
|
108
|
+
for li in li_elements
|
109
|
+
]
|
110
|
+
|
111
|
+
for date_text in date_texts:
|
112
|
+
# Extracting dates from the text using simple text manipulation
|
113
|
+
# Assuming the dates are in the format: "Friday 15th December 2023", "Friday 22nd December 2023", etc.
|
114
|
+
# Parse the date string into a datetime object
|
115
|
+
date_string_without_ordinal = re.sub(
|
116
|
+
r"(\d+)(st|nd|rd|th)", r"\1", date_text
|
117
|
+
)
|
118
|
+
|
119
|
+
parsed_date = datetime.strptime(
|
120
|
+
date_string_without_ordinal, "%A %d %B %Y"
|
121
|
+
)
|
122
|
+
|
123
|
+
# Format the datetime object into the desired format '%d/%m/%Y'
|
124
|
+
formatted_date = parsed_date.strftime("%d/%m/%Y")
|
125
|
+
|
126
|
+
# Add extracted data to the 'bins' list
|
127
|
+
data["bins"].append(
|
128
|
+
{
|
129
|
+
"type": waste_type.capitalize(),
|
130
|
+
"collectionDate": formatted_date,
|
131
|
+
}
|
132
|
+
)
|
133
|
+
except Exception as e:
|
134
|
+
# Here you can log the exception if needed
|
135
|
+
print(f"An error occurred: {e}")
|
136
|
+
# Optionally, re-raise the exception if you want it to propagate
|
137
|
+
raise
|
138
|
+
finally:
|
139
|
+
# This block ensures that the driver is closed regardless of an exception
|
140
|
+
if driver:
|
141
|
+
driver.quit()
|
142
|
+
return data
|
@@ -0,0 +1,59 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
|
3
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
4
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
5
|
+
|
6
|
+
|
7
|
+
class CouncilClass(AbstractGetBinDataClass):
|
8
|
+
"""
|
9
|
+
Concrete classes have to implement all abstract operations of the
|
10
|
+
base class. They can also override some operations with a default
|
11
|
+
implementation.
|
12
|
+
"""
|
13
|
+
|
14
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
15
|
+
data = {"bins": []}
|
16
|
+
|
17
|
+
uprn = kwargs.get("uprn")
|
18
|
+
check_uprn(uprn) # Assuming check_uprn() raises an exception if UPRN is invalid
|
19
|
+
|
20
|
+
try:
|
21
|
+
response = requests.post(
|
22
|
+
f"https://wastecollections.haringey.gov.uk/property/{uprn}",
|
23
|
+
timeout=10, # Set a timeout for the request
|
24
|
+
)
|
25
|
+
response.raise_for_status() # This will raise an exception for HTTP errors
|
26
|
+
except requests.RequestException as e:
|
27
|
+
logging.error(f"Network or HTTP error occurred: {e}")
|
28
|
+
raise ConnectionError("Failed to retrieve data.") from e
|
29
|
+
|
30
|
+
try:
|
31
|
+
soup = BeautifulSoup(response.text, features="html.parser")
|
32
|
+
soup.prettify()
|
33
|
+
|
34
|
+
sections = soup.find_all("div", {"class": "property-service-wrapper"})
|
35
|
+
|
36
|
+
date_regex = re.compile(r"\d{2}/\d{2}/\d{4}")
|
37
|
+
for section in sections:
|
38
|
+
service_name_element = section.find("h3", {"class": "service-name"})
|
39
|
+
next_service_element = section.find("tbody").find(
|
40
|
+
"td", {"class": "next-service"}
|
41
|
+
)
|
42
|
+
|
43
|
+
if service_name_element and next_service_element:
|
44
|
+
service = service_name_element.text
|
45
|
+
next_collection = next_service_element.find(text=date_regex)
|
46
|
+
|
47
|
+
if next_collection:
|
48
|
+
dict_data = {
|
49
|
+
"type": service.replace("Collect ", "")
|
50
|
+
.replace("Paid ", "")
|
51
|
+
.strip(),
|
52
|
+
"collectionDate": next_collection.strip(),
|
53
|
+
}
|
54
|
+
data["bins"].append(dict_data)
|
55
|
+
except Exception as e:
|
56
|
+
logging.error(f"Error parsing data: {e}")
|
57
|
+
raise ValueError("Error processing the HTML data.") from e
|
58
|
+
|
59
|
+
return data
|
@@ -0,0 +1,63 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
3
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
|
+
|
5
|
+
|
6
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
7
|
+
class CouncilClass(AbstractGetBinDataClass):
|
8
|
+
"""
|
9
|
+
Concrete classes have to implement all abstract operations of the
|
10
|
+
base class. They can also override some operations with a default
|
11
|
+
implementation.
|
12
|
+
"""
|
13
|
+
|
14
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
15
|
+
user_uprn = kwargs.get("uprn")
|
16
|
+
check_uprn(user_uprn)
|
17
|
+
|
18
|
+
data = {"bins": []}
|
19
|
+
|
20
|
+
headers = {
|
21
|
+
"accept-language": "en-GB,en;q=0.9",
|
22
|
+
"cache-control": "no-cache",
|
23
|
+
}
|
24
|
+
|
25
|
+
req_data = {
|
26
|
+
"uprn": user_uprn,
|
27
|
+
}
|
28
|
+
|
29
|
+
url = f"https://secure.harrogate.gov.uk/inmyarea/Property/?uprn={user_uprn}"
|
30
|
+
|
31
|
+
requests.packages.urllib3.disable_warnings()
|
32
|
+
response = requests.post(url, headers=headers)
|
33
|
+
|
34
|
+
soup = BeautifulSoup(response.text, features="html.parser")
|
35
|
+
soup.prettify()
|
36
|
+
|
37
|
+
collections = []
|
38
|
+
|
39
|
+
# Find section with bins in
|
40
|
+
table = soup.find_all("table", {"class": "hbcRounds"})[1]
|
41
|
+
|
42
|
+
# For each bin section, get the text and the list elements
|
43
|
+
for row in table.find_all("tr"):
|
44
|
+
bin_type = row.find("th").text
|
45
|
+
td = row.find("td")
|
46
|
+
for span in td.find_all("span"):
|
47
|
+
span.extract()
|
48
|
+
collectionDate = td.text.strip()
|
49
|
+
next_collection = datetime.strptime(collectionDate, "%a %d %b %Y")
|
50
|
+
collections.append((bin_type, next_collection))
|
51
|
+
|
52
|
+
# Sort the text and list elements by date
|
53
|
+
ordered_data = sorted(collections, key=lambda x: x[1])
|
54
|
+
|
55
|
+
# Put the elements into the dictionary
|
56
|
+
for item in ordered_data:
|
57
|
+
dict_data = {
|
58
|
+
"type": item[0],
|
59
|
+
"collectionDate": item[1].strftime(date_format),
|
60
|
+
}
|
61
|
+
data["bins"].append(dict_data)
|
62
|
+
|
63
|
+
return data
|
@@ -0,0 +1,134 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
from selenium.webdriver.common.by import By
|
3
|
+
from selenium.webdriver.common.keys import Keys
|
4
|
+
from selenium.webdriver.support import expected_conditions as EC
|
5
|
+
from selenium.webdriver.support.ui import Select
|
6
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
7
|
+
|
8
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
9
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
10
|
+
|
11
|
+
|
12
|
+
class CouncilClass(AbstractGetBinDataClass):
|
13
|
+
"""
|
14
|
+
Concrete classes have to implement all abstract operations of the
|
15
|
+
base class. They can also override some operations with a default
|
16
|
+
implementation.
|
17
|
+
"""
|
18
|
+
|
19
|
+
def get_data(self, page) -> dict:
|
20
|
+
# Make a BS4 object
|
21
|
+
soup = BeautifulSoup(page, features="html.parser")
|
22
|
+
soup.prettify()
|
23
|
+
|
24
|
+
data = {"bins": []}
|
25
|
+
|
26
|
+
for month in soup.select('div[class*="bin-collection__month"]'):
|
27
|
+
monthName = month.select('h3[class*="bin-collection__title"]')[
|
28
|
+
0
|
29
|
+
].text.strip()
|
30
|
+
for collectionDay in month.select('li[class*="bin-collection__item"]'):
|
31
|
+
bin_type = collectionDay.select('span[class*="bin-collection__type"]')[
|
32
|
+
0
|
33
|
+
].text.strip()
|
34
|
+
binCollection = (
|
35
|
+
collectionDay.select('span[class*="bin-collection__day"]')[
|
36
|
+
0
|
37
|
+
].text.strip()
|
38
|
+
+ ", "
|
39
|
+
+ collectionDay.select('span[class*="bin-collection__number"]')[
|
40
|
+
0
|
41
|
+
].text.strip()
|
42
|
+
+ " "
|
43
|
+
+ monthName
|
44
|
+
)
|
45
|
+
|
46
|
+
dict_data = {
|
47
|
+
"type": bin_type,
|
48
|
+
"collectionDate": datetime.strptime(
|
49
|
+
binCollection, "%A, %d %B %Y"
|
50
|
+
).strftime(date_format),
|
51
|
+
}
|
52
|
+
|
53
|
+
data["bins"].append(dict_data)
|
54
|
+
|
55
|
+
return data
|
56
|
+
|
57
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
58
|
+
driver = None
|
59
|
+
try:
|
60
|
+
page = "https://www.highpeak.gov.uk/findyourbinday"
|
61
|
+
|
62
|
+
# Assign user info
|
63
|
+
user_postcode = kwargs.get("postcode")
|
64
|
+
user_paon = kwargs.get("paon")
|
65
|
+
web_driver = kwargs.get("web_driver")
|
66
|
+
headless = kwargs.get("headless")
|
67
|
+
|
68
|
+
# Create Selenium webdriver
|
69
|
+
driver = create_webdriver(web_driver, headless)
|
70
|
+
driver.get(page)
|
71
|
+
|
72
|
+
# Hide Cookies
|
73
|
+
inputElement_hc = driver.find_element(
|
74
|
+
By.CLASS_NAME, "cookiemessage__link--close"
|
75
|
+
)
|
76
|
+
inputElement_hc.click()
|
77
|
+
|
78
|
+
# Enter postcode in text box and wait
|
79
|
+
inputElement_pc = driver.find_element(
|
80
|
+
By.ID, "FINDBINDAYSHIGHPEAK_POSTCODESELECT_POSTCODE"
|
81
|
+
)
|
82
|
+
inputElement_pc.send_keys(user_postcode)
|
83
|
+
inputElement_pc.send_keys(Keys.ENTER)
|
84
|
+
|
85
|
+
WebDriverWait(driver, 10).until(
|
86
|
+
EC.presence_of_element_located(
|
87
|
+
(By.ID, "FINDBINDAYSHIGHPEAK_ADDRESSSELECT_ADDRESS")
|
88
|
+
)
|
89
|
+
)
|
90
|
+
|
91
|
+
# Select address from dropdown and wait
|
92
|
+
inputElement_ad = Select(
|
93
|
+
driver.find_element(By.ID, "FINDBINDAYSHIGHPEAK_ADDRESSSELECT_ADDRESS")
|
94
|
+
)
|
95
|
+
|
96
|
+
inputElement_ad.select_by_visible_text(user_paon)
|
97
|
+
|
98
|
+
WebDriverWait(driver, 10).until(
|
99
|
+
EC.presence_of_element_located(
|
100
|
+
(
|
101
|
+
By.ID,
|
102
|
+
"FINDBINDAYSHIGHPEAK_ADDRESSSELECT_ADDRESSSELECTNEXTBTN_NEXT",
|
103
|
+
)
|
104
|
+
)
|
105
|
+
)
|
106
|
+
|
107
|
+
# Submit address information and wait
|
108
|
+
driver.find_element(
|
109
|
+
By.ID, "FINDBINDAYSHIGHPEAK_ADDRESSSELECT_ADDRESSSELECTNEXTBTN_NEXT"
|
110
|
+
).click()
|
111
|
+
|
112
|
+
WebDriverWait(driver, 10).until(
|
113
|
+
EC.presence_of_element_located(
|
114
|
+
(By.ID, "FINDBINDAYSHIGHPEAK_CALENDAR_MAINCALENDAR")
|
115
|
+
)
|
116
|
+
)
|
117
|
+
|
118
|
+
# Read next collection information into Pandas
|
119
|
+
table = driver.find_element(
|
120
|
+
By.ID, "FINDBINDAYSHIGHPEAK_CALENDAR_MAINCALENDAR"
|
121
|
+
).get_attribute("outerHTML")
|
122
|
+
|
123
|
+
# Parse data into dict
|
124
|
+
data = self.get_data(table)
|
125
|
+
except Exception as e:
|
126
|
+
# Here you can log the exception if needed
|
127
|
+
print(f"An error occurred: {e}")
|
128
|
+
# Optionally, re-raise the exception if you want it to propagate
|
129
|
+
raise
|
130
|
+
finally:
|
131
|
+
# This block ensures that the driver is closed regardless of an exception
|
132
|
+
if driver:
|
133
|
+
driver.quit()
|
134
|
+
return data
|
@@ -0,0 +1,48 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
3
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
|
+
|
5
|
+
|
6
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
7
|
+
class CouncilClass(AbstractGetBinDataClass):
|
8
|
+
"""
|
9
|
+
Concrete classes have to implement all abstract operations of the
|
10
|
+
base class. They can also override some operations with a default
|
11
|
+
implementation.
|
12
|
+
"""
|
13
|
+
|
14
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
15
|
+
|
16
|
+
user_uprn = kwargs.get("uprn")
|
17
|
+
check_uprn(user_uprn)
|
18
|
+
data = {"bins": []}
|
19
|
+
|
20
|
+
headers = {
|
21
|
+
"authority": "www.hull.gov.uk",
|
22
|
+
"accept": "*/*",
|
23
|
+
"accept-language": "en-GB,en;q=0.9",
|
24
|
+
"cache-control": "no-cache",
|
25
|
+
"pragma": "no-cache",
|
26
|
+
"referer": "https://www.hull.gov.uk/bins-and-recycling/bin-collections/bin-collection-day-checker",
|
27
|
+
"sec-fetch-dest": "empty",
|
28
|
+
"sec-fetch-mode": "cors",
|
29
|
+
"sec-fetch-site": "same-origin",
|
30
|
+
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.6167.186 Safari/537.36",
|
31
|
+
}
|
32
|
+
api_url = f"https://www.hull.gov.uk/ajax/bin-collection?bindate={user_uprn}"
|
33
|
+
|
34
|
+
res = requests.get(api_url, headers=headers)
|
35
|
+
if res.status_code != 200:
|
36
|
+
raise ConnectionRefusedError("Cannot connect to API!")
|
37
|
+
|
38
|
+
json_data = res.json()[0]
|
39
|
+
for item in json_data:
|
40
|
+
dict_data = {
|
41
|
+
"type": item.get("collection_type").capitalize(),
|
42
|
+
"collectionDate": datetime.strptime(
|
43
|
+
item.get("next_collection_date"), "%Y-%m-%d"
|
44
|
+
).strftime(date_format),
|
45
|
+
}
|
46
|
+
data["bins"].append(dict_data)
|
47
|
+
|
48
|
+
return data
|