uk_bin_collection 0.74.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/README.rst +0 -0
- uk_bin_collection/tests/council_feature_input_parity.py +79 -0
- uk_bin_collection/tests/features/environment.py +7 -0
- uk_bin_collection/tests/features/validate_council_outputs.feature +767 -0
- uk_bin_collection/tests/input.json +1077 -0
- uk_bin_collection/tests/output.schema +41 -0
- uk_bin_collection/tests/step_defs/step_helpers/file_handler.py +46 -0
- uk_bin_collection/tests/step_defs/test_validate_council.py +87 -0
- uk_bin_collection/tests/test_collect_data.py +104 -0
- uk_bin_collection/tests/test_common_functions.py +342 -0
- uk_bin_collection/uk_bin_collection/collect_data.py +133 -0
- uk_bin_collection/uk_bin_collection/common.py +292 -0
- uk_bin_collection/uk_bin_collection/councils/AdurAndWorthingCouncils.py +43 -0
- uk_bin_collection/uk_bin_collection/councils/ArunCouncil.py +97 -0
- uk_bin_collection/uk_bin_collection/councils/AylesburyValeCouncil.py +69 -0
- uk_bin_collection/uk_bin_collection/councils/BCPCouncil.py +51 -0
- uk_bin_collection/uk_bin_collection/councils/BarnetCouncil.py +180 -0
- uk_bin_collection/uk_bin_collection/councils/BarnsleyMBCouncil.py +109 -0
- uk_bin_collection/uk_bin_collection/councils/BasingstokeCouncil.py +72 -0
- uk_bin_collection/uk_bin_collection/councils/BathAndNorthEastSomersetCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/BedfordBoroughCouncil.py +49 -0
- uk_bin_collection/uk_bin_collection/councils/BedfordshireCouncil.py +70 -0
- uk_bin_collection/uk_bin_collection/councils/BexleyCouncil.py +147 -0
- uk_bin_collection/uk_bin_collection/councils/BirminghamCityCouncil.py +119 -0
- uk_bin_collection/uk_bin_collection/councils/BlackburnCouncil.py +105 -0
- uk_bin_collection/uk_bin_collection/councils/BoltonCouncil.py +104 -0
- uk_bin_collection/uk_bin_collection/councils/BradfordMDC.py +103 -0
- uk_bin_collection/uk_bin_collection/councils/BrightonandHoveCityCouncil.py +137 -0
- uk_bin_collection/uk_bin_collection/councils/BristolCityCouncil.py +141 -0
- uk_bin_collection/uk_bin_collection/councils/BromleyBoroughCouncil.py +115 -0
- uk_bin_collection/uk_bin_collection/councils/BroxtoweBoroughCouncil.py +107 -0
- uk_bin_collection/uk_bin_collection/councils/BuckinghamshireCouncil.py +95 -0
- uk_bin_collection/uk_bin_collection/councils/BuryCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/CalderdaleCouncil.py +123 -0
- uk_bin_collection/uk_bin_collection/councils/CannockChaseDistrictCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/CardiffCouncil.py +172 -0
- uk_bin_collection/uk_bin_collection/councils/CastlepointDistrictCouncil.py +96 -0
- uk_bin_collection/uk_bin_collection/councils/CharnwoodBoroughCouncil.py +54 -0
- uk_bin_collection/uk_bin_collection/councils/ChelmsfordCityCouncil.py +127 -0
- uk_bin_collection/uk_bin_collection/councils/CheshireEastCouncil.py +32 -0
- uk_bin_collection/uk_bin_collection/councils/CheshireWestAndChesterCouncil.py +125 -0
- uk_bin_collection/uk_bin_collection/councils/ChorleyCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/ConwyCountyBorough.py +27 -0
- uk_bin_collection/uk_bin_collection/councils/CrawleyBoroughCouncil.py +61 -0
- uk_bin_collection/uk_bin_collection/councils/CroydonCouncil.py +291 -0
- uk_bin_collection/uk_bin_collection/councils/DerbyshireDalesDistrictCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/DoncasterCouncil.py +77 -0
- uk_bin_collection/uk_bin_collection/councils/DorsetCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/DoverDistrictCouncil.py +41 -0
- uk_bin_collection/uk_bin_collection/councils/DurhamCouncil.py +49 -0
- uk_bin_collection/uk_bin_collection/councils/EastCambridgeshireCouncil.py +44 -0
- uk_bin_collection/uk_bin_collection/councils/EastDevonDC.py +74 -0
- uk_bin_collection/uk_bin_collection/councils/EastLindseyDistrictCouncil.py +108 -0
- uk_bin_collection/uk_bin_collection/councils/EastRidingCouncil.py +142 -0
- uk_bin_collection/uk_bin_collection/councils/EastSuffolkCouncil.py +112 -0
- uk_bin_collection/uk_bin_collection/councils/EastleighBoroughCouncil.py +70 -0
- uk_bin_collection/uk_bin_collection/councils/EnvironmentFirst.py +48 -0
- uk_bin_collection/uk_bin_collection/councils/ErewashBoroughCouncil.py +61 -0
- uk_bin_collection/uk_bin_collection/councils/FenlandDistrictCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/ForestOfDeanDistrictCouncil.py +113 -0
- uk_bin_collection/uk_bin_collection/councils/GatesheadCouncil.py +118 -0
- uk_bin_collection/uk_bin_collection/councils/GedlingBoroughCouncil.py +1580 -0
- uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py +55 -0
- uk_bin_collection/uk_bin_collection/councils/GuildfordCouncil.py +150 -0
- uk_bin_collection/uk_bin_collection/councils/HaltonBoroughCouncil.py +142 -0
- uk_bin_collection/uk_bin_collection/councils/HaringeyCouncil.py +59 -0
- uk_bin_collection/uk_bin_collection/councils/HarrogateBoroughCouncil.py +63 -0
- uk_bin_collection/uk_bin_collection/councils/HighPeakCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/HullCityCouncil.py +48 -0
- uk_bin_collection/uk_bin_collection/councils/HuntingdonDistrictCouncil.py +44 -0
- uk_bin_collection/uk_bin_collection/councils/KingstonUponThamesCouncil.py +84 -0
- uk_bin_collection/uk_bin_collection/councils/KirkleesCouncil.py +130 -0
- uk_bin_collection/uk_bin_collection/councils/KnowsleyMBCouncil.py +139 -0
- uk_bin_collection/uk_bin_collection/councils/LancasterCityCouncil.py +71 -0
- uk_bin_collection/uk_bin_collection/councils/LeedsCityCouncil.py +137 -0
- uk_bin_collection/uk_bin_collection/councils/LisburnCastlereaghCityCouncil.py +101 -0
- uk_bin_collection/uk_bin_collection/councils/LiverpoolCityCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/LondonBoroughHounslow.py +82 -0
- uk_bin_collection/uk_bin_collection/councils/LondonBoroughRedbridge.py +161 -0
- uk_bin_collection/uk_bin_collection/councils/MaldonDistrictCouncil.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/MalvernHillsDC.py +57 -0
- uk_bin_collection/uk_bin_collection/councils/ManchesterCityCouncil.py +106 -0
- uk_bin_collection/uk_bin_collection/councils/MansfieldDistrictCouncil.py +38 -0
- uk_bin_collection/uk_bin_collection/councils/MertonCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil.py +128 -0
- uk_bin_collection/uk_bin_collection/councils/MidSussexDistrictCouncil.py +80 -0
- uk_bin_collection/uk_bin_collection/councils/MiltonKeynesCityCouncil.py +54 -0
- uk_bin_collection/uk_bin_collection/councils/MoleValleyDistrictCouncil.py +98 -0
- uk_bin_collection/uk_bin_collection/councils/NeathPortTalbotCouncil.py +139 -0
- uk_bin_collection/uk_bin_collection/councils/NewarkAndSherwoodDC.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/NewcastleCityCouncil.py +57 -0
- uk_bin_collection/uk_bin_collection/councils/NewhamCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/NewportCityCouncil.py +203 -0
- uk_bin_collection/uk_bin_collection/councils/NorthEastDerbyshireDistrictCouncil.py +115 -0
- uk_bin_collection/uk_bin_collection/councils/NorthEastLincs.py +53 -0
- uk_bin_collection/uk_bin_collection/councils/NorthKestevenDistrictCouncil.py +45 -0
- uk_bin_collection/uk_bin_collection/councils/NorthLanarkshireCouncil.py +46 -0
- uk_bin_collection/uk_bin_collection/councils/NorthLincolnshireCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/NorthNorfolkDistrictCouncil.py +108 -0
- uk_bin_collection/uk_bin_collection/councils/NorthNorthamptonshireCouncil.py +72 -0
- uk_bin_collection/uk_bin_collection/councils/NorthSomersetCouncil.py +76 -0
- uk_bin_collection/uk_bin_collection/councils/NorthTynesideCouncil.py +220 -0
- uk_bin_collection/uk_bin_collection/councils/NorthWestLeicestershire.py +114 -0
- uk_bin_collection/uk_bin_collection/councils/NorthYorkshire.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py +123 -0
- uk_bin_collection/uk_bin_collection/councils/NottinghamCityCouncil.py +36 -0
- uk_bin_collection/uk_bin_collection/councils/OldhamCouncil.py +51 -0
- uk_bin_collection/uk_bin_collection/councils/PortsmouthCityCouncil.py +131 -0
- uk_bin_collection/uk_bin_collection/councils/PrestonCityCouncil.py +97 -0
- uk_bin_collection/uk_bin_collection/councils/ReadingBoroughCouncil.py +30 -0
- uk_bin_collection/uk_bin_collection/councils/ReigateAndBansteadBoroughCouncil.py +81 -0
- uk_bin_collection/uk_bin_collection/councils/RenfrewshireCouncil.py +135 -0
- uk_bin_collection/uk_bin_collection/councils/RhonddaCynonTaffCouncil.py +80 -0
- uk_bin_collection/uk_bin_collection/councils/RochdaleCouncil.py +69 -0
- uk_bin_collection/uk_bin_collection/councils/RochfordCouncil.py +60 -0
- uk_bin_collection/uk_bin_collection/councils/RugbyBoroughCouncil.py +93 -0
- uk_bin_collection/uk_bin_collection/councils/RushcliffeBoroughCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/RushmoorCouncil.py +81 -0
- uk_bin_collection/uk_bin_collection/councils/SalfordCityCouncil.py +70 -0
- uk_bin_collection/uk_bin_collection/councils/SevenoaksDistrictCouncil.py +106 -0
- uk_bin_collection/uk_bin_collection/councils/SheffieldCityCouncil.py +54 -0
- uk_bin_collection/uk_bin_collection/councils/ShropshireCouncil.py +45 -0
- uk_bin_collection/uk_bin_collection/councils/SolihullCouncil.py +48 -0
- uk_bin_collection/uk_bin_collection/councils/SomersetCouncil.py +203 -0
- uk_bin_collection/uk_bin_collection/councils/SouthAyrshireCouncil.py +73 -0
- uk_bin_collection/uk_bin_collection/councils/SouthCambridgeshireCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/SouthGloucestershireCouncil.py +74 -0
- uk_bin_collection/uk_bin_collection/councils/SouthLanarkshireCouncil.py +78 -0
- uk_bin_collection/uk_bin_collection/councils/SouthNorfolkCouncil.py +91 -0
- uk_bin_collection/uk_bin_collection/councils/SouthOxfordshireCouncil.py +93 -0
- uk_bin_collection/uk_bin_collection/councils/SouthTynesideCouncil.py +98 -0
- uk_bin_collection/uk_bin_collection/councils/StAlbansCityAndDistrictCouncil.py +43 -0
- uk_bin_collection/uk_bin_collection/councils/StHelensBC.py +56 -0
- uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py +112 -0
- uk_bin_collection/uk_bin_collection/councils/StockportBoroughCouncil.py +39 -0
- uk_bin_collection/uk_bin_collection/councils/StokeOnTrentCityCouncil.py +79 -0
- uk_bin_collection/uk_bin_collection/councils/StratfordUponAvonCouncil.py +94 -0
- uk_bin_collection/uk_bin_collection/councils/SunderlandCityCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/SwaleBoroughCouncil.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/TamesideMBCouncil.py +62 -0
- uk_bin_collection/uk_bin_collection/councils/TandridgeDistrictCouncil.py +60 -0
- uk_bin_collection/uk_bin_collection/councils/TelfordAndWrekinCouncil.py +50 -0
- uk_bin_collection/uk_bin_collection/councils/TestValleyBoroughCouncil.py +203 -0
- uk_bin_collection/uk_bin_collection/councils/TonbridgeAndMallingBC.py +101 -0
- uk_bin_collection/uk_bin_collection/councils/TorbayCouncil.py +51 -0
- uk_bin_collection/uk_bin_collection/councils/TorridgeDistrictCouncil.py +154 -0
- uk_bin_collection/uk_bin_collection/councils/ValeofGlamorganCouncil.py +119 -0
- uk_bin_collection/uk_bin_collection/councils/ValeofWhiteHorseCouncil.py +103 -0
- uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py +89 -0
- uk_bin_collection/uk_bin_collection/councils/WarwickDistrictCouncil.py +34 -0
- uk_bin_collection/uk_bin_collection/councils/WaverleyBoroughCouncil.py +119 -0
- uk_bin_collection/uk_bin_collection/councils/WealdenDistrictCouncil.py +86 -0
- uk_bin_collection/uk_bin_collection/councils/WelhatCouncil.py +73 -0
- uk_bin_collection/uk_bin_collection/councils/WestBerkshireCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/WestLindseyDistrictCouncil.py +118 -0
- uk_bin_collection/uk_bin_collection/councils/WestLothianCouncil.py +103 -0
- uk_bin_collection/uk_bin_collection/councils/WestNorthamptonshireCouncil.py +34 -0
- uk_bin_collection/uk_bin_collection/councils/WestSuffolkCouncil.py +64 -0
- uk_bin_collection/uk_bin_collection/councils/WiganBoroughCouncil.py +97 -0
- uk_bin_collection/uk_bin_collection/councils/WiltshireCouncil.py +135 -0
- uk_bin_collection/uk_bin_collection/councils/WindsorAndMaidenheadCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/WokingBoroughCouncil.py +114 -0
- uk_bin_collection/uk_bin_collection/councils/WyreCouncil.py +89 -0
- uk_bin_collection/uk_bin_collection/councils/YorkCouncil.py +45 -0
- uk_bin_collection/uk_bin_collection/councils/council_class_template/councilclasstemplate.py +33 -0
- uk_bin_collection/uk_bin_collection/get_bin_data.py +165 -0
- uk_bin_collection-0.74.0.dist-info/LICENSE +21 -0
- uk_bin_collection-0.74.0.dist-info/METADATA +247 -0
- uk_bin_collection-0.74.0.dist-info/RECORD +171 -0
- uk_bin_collection-0.74.0.dist-info/WHEEL +4 -0
- uk_bin_collection-0.74.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,203 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
3
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
|
+
|
5
|
+
|
6
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
7
|
+
class CouncilClass(AbstractGetBinDataClass):
|
8
|
+
"""
|
9
|
+
Concrete classes have to implement all abstract operations of the
|
10
|
+
base class. They can also override some operations with a default
|
11
|
+
implementation.
|
12
|
+
"""
|
13
|
+
|
14
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
15
|
+
user_postcode = kwargs.get("postcode")
|
16
|
+
check_postcode(user_postcode)
|
17
|
+
user_uprn = kwargs.get("uprn")
|
18
|
+
check_uprn(user_uprn)
|
19
|
+
|
20
|
+
headers = {
|
21
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) "
|
22
|
+
"Chrome/87.0.4280.141 Safari/537.36"
|
23
|
+
}
|
24
|
+
|
25
|
+
requests.packages.urllib3.disable_warnings()
|
26
|
+
with requests.Session() as s:
|
27
|
+
# Set Headers
|
28
|
+
s.headers = headers
|
29
|
+
|
30
|
+
# Get the first page - This is the Search for property by Post Code page
|
31
|
+
resource = s.get(
|
32
|
+
"https://iweb.itouchvision.com/portal/f?p=customer:BIN_DAYS:::NO:RP:UID:13353F039C4B1454827EE05536414091A8C058F4"
|
33
|
+
)
|
34
|
+
# Create a BeautifulSoup object from the page's HTML
|
35
|
+
soup = BeautifulSoup(resource.text, "html.parser")
|
36
|
+
|
37
|
+
# The page contains a number of values that must be passed into subsequent requests - extract them here
|
38
|
+
payload = {
|
39
|
+
i["name"]: i.get("value", "") for i in soup.select("input[name]")
|
40
|
+
}
|
41
|
+
payload2 = {
|
42
|
+
i["data-for"]: i.get("value", "")
|
43
|
+
for i in soup.select("input[data-for]")
|
44
|
+
}
|
45
|
+
payload_salt = soup.select_one('input[id="pSalt"]').get("value")
|
46
|
+
payload_protected = soup.select_one('input[id="pPageItemsProtected"]').get(
|
47
|
+
"value"
|
48
|
+
)
|
49
|
+
|
50
|
+
# Add the PostCode and 'SEARCH' to the payload
|
51
|
+
payload["p_request"] = "SEARCH"
|
52
|
+
payload["P153_POST_CODE"] = user_postcode
|
53
|
+
|
54
|
+
# Manipulate the lists and build the JSON that must be submitted in further requests - some data is nested
|
55
|
+
merged_list = {**payload, **payload2}
|
56
|
+
new_list = []
|
57
|
+
other_list = {}
|
58
|
+
for key in merged_list.keys():
|
59
|
+
temp_list = {}
|
60
|
+
val = merged_list[key]
|
61
|
+
if key in [
|
62
|
+
"P153_UPRN",
|
63
|
+
"P153_TEMP",
|
64
|
+
"P153_SYSDATE",
|
65
|
+
"P0_LANGUAGE",
|
66
|
+
"P153_POST_CODE",
|
67
|
+
]:
|
68
|
+
temp_list = {"n": key, "v": val}
|
69
|
+
new_list.append(temp_list)
|
70
|
+
elif key in [
|
71
|
+
"p_flow_id",
|
72
|
+
"p_flow_step_id",
|
73
|
+
"p_instance",
|
74
|
+
"p_page_submission_id",
|
75
|
+
"p_request",
|
76
|
+
"p_reload_on_submit",
|
77
|
+
]:
|
78
|
+
other_list[key] = val
|
79
|
+
else:
|
80
|
+
temp_list = {"n": key, "v": "", "ck": val}
|
81
|
+
new_list.append(temp_list)
|
82
|
+
|
83
|
+
json_builder = {
|
84
|
+
"pageItems": {
|
85
|
+
"itemsToSubmit": new_list,
|
86
|
+
"protected": payload_protected,
|
87
|
+
"rowVersion": "",
|
88
|
+
"formRegionChecksums": [],
|
89
|
+
},
|
90
|
+
"salt": payload_salt,
|
91
|
+
}
|
92
|
+
json_object = json.dumps(json_builder, separators=(",", ":"))
|
93
|
+
other_list["p_json"] = json_object
|
94
|
+
|
95
|
+
# Set Referrer header
|
96
|
+
s.headers.update(
|
97
|
+
{
|
98
|
+
"referer": "https://iweb.itouchvision.com/portal/f?p=customer:BIN_DAYS:::NO:RP:UID:13353F039C4B1454827EE05536414091A8C058F4"
|
99
|
+
}
|
100
|
+
)
|
101
|
+
|
102
|
+
# Generate POST including all the JSON we just built
|
103
|
+
s.post(
|
104
|
+
"https://iweb.itouchvision.com/portal/wwv_flow.accept", data=other_list
|
105
|
+
)
|
106
|
+
|
107
|
+
# The second page on the portal would normally allow you to select your property from a dropdown list of
|
108
|
+
# those that are at the postcode entered on the previous page
|
109
|
+
# The required cookies are stored within the session so re-use the session to keep them
|
110
|
+
resource = s.get(
|
111
|
+
"https://iweb.itouchvision.com/portal/itouchvision/r/customer/bin_days"
|
112
|
+
)
|
113
|
+
|
114
|
+
# Create a BeautifulSoup object from the page's HTML
|
115
|
+
soup = BeautifulSoup(resource.text, "html.parser")
|
116
|
+
|
117
|
+
# The page contains a number of values that must be passed into subsequent requests - extract them here
|
118
|
+
payload = {
|
119
|
+
i["name"]: i.get("value", "") for i in soup.select("input[name]")
|
120
|
+
}
|
121
|
+
payload2 = {
|
122
|
+
i["data-for"]: i.get("value", "")
|
123
|
+
for i in soup.select("input[data-for]")
|
124
|
+
}
|
125
|
+
payload_salt = soup.select_one('input[id="pSalt"]').get("value")
|
126
|
+
payload_protected = soup.select_one('input[id="pPageItemsProtected"]').get(
|
127
|
+
"value"
|
128
|
+
)
|
129
|
+
|
130
|
+
# Add the UPRN and 'SUBMIT' to the payload
|
131
|
+
payload["p_request"] = "SUBMIT"
|
132
|
+
payload["P153_UPRN"] = user_uprn
|
133
|
+
|
134
|
+
# Manipulate the lists and build the JSON that must be submitted in further requests - some data is nested
|
135
|
+
merged_list = {**payload, **payload2}
|
136
|
+
new_list = []
|
137
|
+
other_list = {}
|
138
|
+
for key in merged_list.keys():
|
139
|
+
temp_list = {}
|
140
|
+
val = merged_list[key]
|
141
|
+
if key in ["P153_UPRN", "P153_TEMP", "P153_SYSDATE", "P0_LANGUAGE"]:
|
142
|
+
temp_list = {"n": key, "v": val}
|
143
|
+
new_list.append(temp_list)
|
144
|
+
elif key in ["P153_ZABY"]:
|
145
|
+
temp_list = {"n": key, "v": "1", "ck": val}
|
146
|
+
new_list.append(temp_list)
|
147
|
+
elif key in ["P153_POST_CODE"]:
|
148
|
+
temp_list = {"n": key, "v": user_postcode, "ck": val}
|
149
|
+
new_list.append(temp_list)
|
150
|
+
elif key in [
|
151
|
+
"p_flow_id",
|
152
|
+
"p_flow_step_id",
|
153
|
+
"p_instance",
|
154
|
+
"p_page_submission_id",
|
155
|
+
"p_request",
|
156
|
+
"p_reload_on_submit",
|
157
|
+
]:
|
158
|
+
other_list[key] = val
|
159
|
+
else:
|
160
|
+
temp_list = {"n": key, "v": "", "ck": val}
|
161
|
+
new_list.append(temp_list)
|
162
|
+
|
163
|
+
json_builder = {
|
164
|
+
"pageItems": {
|
165
|
+
"itemsToSubmit": new_list,
|
166
|
+
"protected": payload_protected,
|
167
|
+
"rowVersion": "",
|
168
|
+
"formRegionChecksums": [],
|
169
|
+
},
|
170
|
+
"salt": payload_salt,
|
171
|
+
}
|
172
|
+
|
173
|
+
json_object = json.dumps(json_builder, separators=(",", ":"))
|
174
|
+
other_list["p_json"] = json_object
|
175
|
+
|
176
|
+
# Generate POST including all the JSON we just built
|
177
|
+
s.post(
|
178
|
+
"https://iweb.itouchvision.com/portal/wwv_flow.accept", data=other_list
|
179
|
+
)
|
180
|
+
|
181
|
+
# The third and final page on the portal shows the detail of the waste collection services
|
182
|
+
# The required cookies are stored within the session so re-use the session to keep them
|
183
|
+
resource = s.get(
|
184
|
+
"https://iweb.itouchvision.com/portal/itouchvision/r/customer/bin_days"
|
185
|
+
)
|
186
|
+
|
187
|
+
# Create a BeautifulSoup object from the page's HTML
|
188
|
+
soup = BeautifulSoup(resource.text, "html.parser")
|
189
|
+
data = {"bins": []}
|
190
|
+
|
191
|
+
# Loop through the items on the page and build a JSON object for ingestion
|
192
|
+
for item in soup.select(".t-MediaList-item"):
|
193
|
+
for value in item.select(".t-MediaList-body"):
|
194
|
+
dict_data = {
|
195
|
+
"type": value.select("span")[1].get_text(strip=True).title(),
|
196
|
+
"collectionDate": datetime.strptime(
|
197
|
+
value.select(".t-MediaList-desc")[0].get_text(strip=True),
|
198
|
+
"%A, %d %B, %Y",
|
199
|
+
).strftime(date_format),
|
200
|
+
}
|
201
|
+
data["bins"].append(dict_data)
|
202
|
+
|
203
|
+
return data
|
@@ -0,0 +1,101 @@
|
|
1
|
+
from datetime import *
|
2
|
+
|
3
|
+
import requests
|
4
|
+
from bs4 import BeautifulSoup
|
5
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
6
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
7
|
+
|
8
|
+
|
9
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
10
|
+
class CouncilClass(AbstractGetBinDataClass):
|
11
|
+
"""
|
12
|
+
Concrete classes have to implement all abstract operations of the
|
13
|
+
base class. They can also override some operations with a default
|
14
|
+
implementation.
|
15
|
+
"""
|
16
|
+
|
17
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
18
|
+
# Get UPRN and postcode from the parsed args
|
19
|
+
user_uprn = kwargs.get("uprn")
|
20
|
+
user_postcode = kwargs.get("postcode")
|
21
|
+
check_uprn(user_uprn)
|
22
|
+
check_postcode(user_postcode)
|
23
|
+
|
24
|
+
requests.packages.urllib3.disable_warnings()
|
25
|
+
|
26
|
+
# Set up some form data, then POST for the form and scrape the result
|
27
|
+
headers = {
|
28
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
|
29
|
+
"Accept-Language": "en-GB,en;q=0.6",
|
30
|
+
"Cache-Control": "no-cache",
|
31
|
+
"Connection": "keep-alive",
|
32
|
+
"Content-Type": "multipart/form-data; boundary=----WebKitFormBoundaryI1XYcX9fNeKxm4LB",
|
33
|
+
# 'Cookie': 'PHPSESSID=-3mn6j-vkWcY4xPPXbT3Ggk1gSQJLId%2CztSoQV5-f8Pi7Cju1wwE151qtwdUyE1c',
|
34
|
+
"Origin": "https://www.tmbc.gov.uk",
|
35
|
+
"Pragma": "no-cache",
|
36
|
+
"Referer": "https://www.tmbc.gov.uk/xfp/form/167",
|
37
|
+
"Sec-Fetch-Dest": "document",
|
38
|
+
"Sec-Fetch-Mode": "navigate",
|
39
|
+
"Sec-Fetch-Site": "same-origin",
|
40
|
+
"Sec-Fetch-User": "?1",
|
41
|
+
"Sec-GPC": "1",
|
42
|
+
"Upgrade-Insecure-Requests": "1",
|
43
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
|
44
|
+
"sec-ch-ua": '"Not?A_Brand";v="8", "Chromium";v="108", "Brave";v="108"',
|
45
|
+
"sec-ch-ua-mobile": "?0",
|
46
|
+
"sec-ch-ua-platform": '"Windows"',
|
47
|
+
}
|
48
|
+
data = (
|
49
|
+
f"------WebKitFormBoundaryI1XYcX9fNeKxm4LB\r\nContent-Disposition: form-data; "
|
50
|
+
f'name="__token"\r\n\r\ns_flSv1eIvJDeCwbFaYxclM3UTomdpWgg2cMWzZckaU\r\n'
|
51
|
+
f"------WebKitFormBoundaryI1XYcX9fNeKxm4LB\r\nContent-Disposition: form-data; "
|
52
|
+
f'name="page"\r\n\r\n128\r\n------WebKitFormBoundaryI1XYcX9fNeKxm4LB\r\nContent-Disposition: '
|
53
|
+
f'form-data; name="locale"\r\n\r\nen_GB\r\n------WebKitFormBoundaryI1XYcX9fNeKxm4LB\r\nContent'
|
54
|
+
f'-Disposition: form-data; name="q752eec300b2ffef2757e4536b77b07061842041a_0_0"\r\n\r\n'
|
55
|
+
f"{user_postcode}\r\n------WebKitFormBoundaryI1XYcX9fNeKxm4LB\r\nContent-Disposition: form-data; "
|
56
|
+
f'name="q752eec300b2ffef2757e4536b77b07061842041a_1_0"\r\n\r\n'
|
57
|
+
f"{user_uprn}\r\n------WebKitFormBoundaryI1XYcX9fNeKxm4LB\r\nContent-Disposition: form-data; "
|
58
|
+
f'name="next"\r\n\r\nNext\r\n------WebKitFormBoundaryI1XYcX9fNeKxm4LB--\r\n '
|
59
|
+
)
|
60
|
+
|
61
|
+
response = requests.post(
|
62
|
+
"https://www.tmbc.gov.uk/xfp/form/167", headers=headers, data=data
|
63
|
+
)
|
64
|
+
soup = BeautifulSoup(response.text, features="html.parser")
|
65
|
+
soup.prettify()
|
66
|
+
|
67
|
+
data = {"bins": []}
|
68
|
+
last_date = datetime.now()
|
69
|
+
|
70
|
+
# Find the table on the page and get data from each row (we don't care about the headings)
|
71
|
+
table = soup.find(
|
72
|
+
"table", {"class": "data-table waste-collections-table"}
|
73
|
+
).find("tbody")
|
74
|
+
for row in table.find_all("tr"):
|
75
|
+
bin_date = row.find_next("td").text.strip()
|
76
|
+
collection_types = row.find("div", {"class": "collections"}).find_all("p")
|
77
|
+
|
78
|
+
# For each collection type in the list, parse the time
|
79
|
+
for item in collection_types:
|
80
|
+
curr_bin_date = datetime.strptime(bin_date, "%a %d %B")
|
81
|
+
|
82
|
+
# The calendar doesn't include the year, so using this to try to mitigate year change (note: it's
|
83
|
+
# currently January, so no idea if it will work until the end of the year lol)
|
84
|
+
if last_date.date().isocalendar()[1] < 52:
|
85
|
+
curr_bin_date = datetime(
|
86
|
+
last_date.year, curr_bin_date.month, curr_bin_date.day
|
87
|
+
)
|
88
|
+
else:
|
89
|
+
curr_bin_date = datetime(
|
90
|
+
last_date.year + 1, curr_bin_date.month, curr_bin_date.day
|
91
|
+
)
|
92
|
+
|
93
|
+
# Add each collection to the dictionary
|
94
|
+
dict_data = {
|
95
|
+
"type": item.text.strip(),
|
96
|
+
"collectionDate": curr_bin_date.strftime(date_format),
|
97
|
+
}
|
98
|
+
data["bins"].append(dict_data)
|
99
|
+
last_date = curr_bin_date
|
100
|
+
|
101
|
+
return data
|
@@ -0,0 +1,51 @@
|
|
1
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
2
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
3
|
+
|
4
|
+
|
5
|
+
class CouncilClass(AbstractGetBinDataClass):
|
6
|
+
"""
|
7
|
+
Concrete classes have to implement all abstract operations of the
|
8
|
+
base class. They can also override some operations with a default
|
9
|
+
implementation.
|
10
|
+
"""
|
11
|
+
|
12
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
13
|
+
uprn = kwargs.get("uprn")
|
14
|
+
check_uprn(uprn)
|
15
|
+
|
16
|
+
headers = {
|
17
|
+
"Accept": "*/*",
|
18
|
+
"Accept-Encoding": "gzip, deflate, br",
|
19
|
+
"Accept-Language": "en-GB,en-US;q=0.9,en;q=0.8",
|
20
|
+
"Connection": "keep-alive",
|
21
|
+
"Host": "online.torbay.gov.uk",
|
22
|
+
"Origin": "https://www.torbay.gov.uk",
|
23
|
+
"Referer": "https://www.torbay.gov.uk/",
|
24
|
+
"sec-ch-ua": '"Chromium";v="110", "Not A(Brand";v="24", "Google Chrome";v="110"',
|
25
|
+
"sec-ch-ua-mobile": "?0",
|
26
|
+
"sec-ch-ua-platform": '"Windows"',
|
27
|
+
"Sec-Fetch-Dest": "empty",
|
28
|
+
"Sec-Fetch-Mode": "cors",
|
29
|
+
"Sec-Fetch-Site": "same-site",
|
30
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
|
31
|
+
}
|
32
|
+
requests.packages.urllib3.disable_warnings()
|
33
|
+
response = requests.get(
|
34
|
+
f"https://online.torbay.gov.uk/services.bartec/collections?uprn={uprn}",
|
35
|
+
headers=headers,
|
36
|
+
)
|
37
|
+
if response.status_code != 200:
|
38
|
+
raise ValueError("No bin data found for provided UPRN.")
|
39
|
+
json_data = json.loads(response.text)
|
40
|
+
|
41
|
+
data = {"bins": []}
|
42
|
+
for c in json_data:
|
43
|
+
dict_data = {
|
44
|
+
"type": c["Service"].replace("Empty ", "").strip(),
|
45
|
+
"collectionDate": datetime.strptime(
|
46
|
+
c["NextCollection"].strip(), "%d %B %Y"
|
47
|
+
).strftime(date_format),
|
48
|
+
}
|
49
|
+
data["bins"].append(dict_data)
|
50
|
+
|
51
|
+
return data
|
@@ -0,0 +1,154 @@
|
|
1
|
+
from xml.etree import ElementTree
|
2
|
+
|
3
|
+
from bs4 import BeautifulSoup
|
4
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
5
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
6
|
+
|
7
|
+
|
8
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
9
|
+
class CouncilClass(AbstractGetBinDataClass):
|
10
|
+
"""
|
11
|
+
Concrete classes have to implement all abstract operations of the
|
12
|
+
baseclass. They can also override some
|
13
|
+
operations with a default implementation.
|
14
|
+
"""
|
15
|
+
|
16
|
+
def get_data(cls, **kwargs) -> str:
|
17
|
+
"""This method makes the request to the council
|
18
|
+
|
19
|
+
Keyword arguments:
|
20
|
+
url -- the url to get the data from
|
21
|
+
"""
|
22
|
+
# Set a user agent so we look like a browser ;-)
|
23
|
+
user_agent = "Mozilla/5.0 (Windows NT 6.1; Win64; x64)"
|
24
|
+
headers = {"User-Agent": user_agent, "Content-Type": "text/xml"}
|
25
|
+
|
26
|
+
uprn = kwargs.get("uprn")
|
27
|
+
try:
|
28
|
+
if uprn is None or uprn == "":
|
29
|
+
raise ValueError("Invalid UPRN")
|
30
|
+
except Exception as ex:
|
31
|
+
print(f"Exception encountered: {ex}")
|
32
|
+
print(
|
33
|
+
"Please check the provided UPRN. If this error continues, please first trying setting the "
|
34
|
+
"UPRN manually on line 115 before raising an issue."
|
35
|
+
)
|
36
|
+
|
37
|
+
# Make the Request - change the URL - find out your property number
|
38
|
+
# URL
|
39
|
+
url = "https://collections-torridge.azurewebsites.net/WebService2.asmx"
|
40
|
+
# Post data
|
41
|
+
post_data = (
|
42
|
+
'<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><getRoundCalendarForUPRN xmlns="http://tempuri2.org/"><council>TOR</council><UPRN>'
|
43
|
+
+ uprn
|
44
|
+
+ "</UPRN><PW>wax01653</PW></getRoundCalendarForUPRN></soap:Body></soap:Envelope>"
|
45
|
+
)
|
46
|
+
requests.packages.urllib3.disable_warnings()
|
47
|
+
full_page = requests.post(url, headers=headers, data=post_data)
|
48
|
+
|
49
|
+
return full_page
|
50
|
+
|
51
|
+
def parse_data(self, page, **kwargs) -> dict:
|
52
|
+
"""This method makes the request to the council
|
53
|
+
|
54
|
+
Keyword arguments:
|
55
|
+
url -- the url to get the data from
|
56
|
+
"""
|
57
|
+
# Set a user agent so we look like a browser ;-)
|
58
|
+
user_agent = "Mozilla/5.0 (Windows NT 6.1; Win64; x64)"
|
59
|
+
headers = {"User-Agent": user_agent, "Content-Type": "text/xml"}
|
60
|
+
|
61
|
+
uprn = kwargs.get("uprn")
|
62
|
+
try:
|
63
|
+
if uprn is None or uprn == "":
|
64
|
+
raise ValueError("Invalid UPRN")
|
65
|
+
except Exception as ex:
|
66
|
+
print(f"Exception encountered: {ex}")
|
67
|
+
print(
|
68
|
+
"Please check the provided UPRN. If this error continues, please first trying setting the "
|
69
|
+
"UPRN manually on line 115 before raising an issue."
|
70
|
+
)
|
71
|
+
|
72
|
+
# Make the Request - change the URL - find out your property number
|
73
|
+
# URL
|
74
|
+
url = "https://collections-torridge.azurewebsites.net/WebService2.asmx"
|
75
|
+
# Post data
|
76
|
+
post_data = (
|
77
|
+
'<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><getRoundCalendarForUPRN xmlns="http://tempuri2.org/"><council>TOR</council><UPRN>'
|
78
|
+
+ uprn
|
79
|
+
+ "</UPRN><PW>wax01653</PW></getRoundCalendarForUPRN></soap:Body></soap:Envelope>"
|
80
|
+
)
|
81
|
+
requests.packages.urllib3.disable_warnings()
|
82
|
+
page = requests.post(url, headers=headers, data=post_data)
|
83
|
+
|
84
|
+
# Remove the soap wrapper
|
85
|
+
namespaces = {
|
86
|
+
"soap": "http://schemas.xmlsoap.org/soap/envelope/",
|
87
|
+
"a": "http://tempuri2.org/",
|
88
|
+
}
|
89
|
+
dom = ElementTree.fromstring(page.text)
|
90
|
+
page = dom.find(
|
91
|
+
"./soap:Body"
|
92
|
+
"/a:getRoundCalendarForUPRNResponse"
|
93
|
+
"/a:getRoundCalendarForUPRNResult",
|
94
|
+
namespaces,
|
95
|
+
)
|
96
|
+
# Make a BS4 object
|
97
|
+
soup = BeautifulSoup(page.text, features="html.parser")
|
98
|
+
soup.prettify()
|
99
|
+
|
100
|
+
data = {"bins": []}
|
101
|
+
|
102
|
+
b_el = soup.find("b", string="GardenBin")
|
103
|
+
if b_el:
|
104
|
+
results = re.search(
|
105
|
+
"([A-Za-z]+ \\d\\d? [A-Za-z]+) (.*?)", b_el.next_sibling.split(": ")[1]
|
106
|
+
)
|
107
|
+
if results and results.groups()[0]:
|
108
|
+
date = results.groups()[0] + " " + datetime.today().strftime("%Y")
|
109
|
+
data["bins"].append(
|
110
|
+
{
|
111
|
+
"type": "GardenBin",
|
112
|
+
"collectionDate": get_next_occurrence_from_day_month(
|
113
|
+
datetime.strptime(date, "%a %d %b %Y")
|
114
|
+
).strftime(date_format),
|
115
|
+
}
|
116
|
+
)
|
117
|
+
|
118
|
+
b_el = soup.find("b", string="Refuse")
|
119
|
+
if b_el:
|
120
|
+
results = re.search(
|
121
|
+
"([A-Za-z]+ \\d\\d? [A-Za-z]+) (.*?)", b_el.next_sibling.split(": ")[1]
|
122
|
+
)
|
123
|
+
if results and results.groups()[0]:
|
124
|
+
date = results.groups()[0] + " " + datetime.today().strftime("%Y")
|
125
|
+
data["bins"].append(
|
126
|
+
{
|
127
|
+
"type": "Refuse",
|
128
|
+
"collectionDate": get_next_occurrence_from_day_month(
|
129
|
+
datetime.strptime(date, "%a %d %b %Y")
|
130
|
+
).strftime(date_format),
|
131
|
+
}
|
132
|
+
)
|
133
|
+
|
134
|
+
b_el = soup.find("b", string="Recycling")
|
135
|
+
if b_el:
|
136
|
+
results = re.search(
|
137
|
+
"([A-Za-z]+ \\d\\d? [A-Za-z]+) (.*?)", b_el.next_sibling.split(": ")[1]
|
138
|
+
)
|
139
|
+
if results and results.groups()[0]:
|
140
|
+
date = results.groups()[0] + " " + datetime.today().strftime("%Y")
|
141
|
+
data["bins"].append(
|
142
|
+
{
|
143
|
+
"type": "Recycling",
|
144
|
+
"collectionDate": get_next_occurrence_from_day_month(
|
145
|
+
datetime.strptime(date, "%a %d %b %Y")
|
146
|
+
).strftime(date_format),
|
147
|
+
}
|
148
|
+
)
|
149
|
+
|
150
|
+
data["bins"].sort(
|
151
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
|
152
|
+
)
|
153
|
+
|
154
|
+
return data
|
@@ -0,0 +1,119 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
3
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
|
+
|
5
|
+
|
6
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
7
|
+
class CouncilClass(AbstractGetBinDataClass):
|
8
|
+
"""
|
9
|
+
Concrete classes have to implement all abstract operations of the
|
10
|
+
base class. They can also override some operations with a default
|
11
|
+
implementation.
|
12
|
+
"""
|
13
|
+
|
14
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
15
|
+
requests.packages.urllib3.disable_warnings()
|
16
|
+
user_uprn = kwargs.get("uprn")
|
17
|
+
check_uprn(user_uprn)
|
18
|
+
headers = {
|
19
|
+
"Accept": "*/*",
|
20
|
+
"Accept-Language": "en-GB,en;q=0.6",
|
21
|
+
"Connection": "keep-alive",
|
22
|
+
"Referer": "https://www.valeofglamorgan.gov.uk/",
|
23
|
+
"Sec-Fetch-Dest": "script",
|
24
|
+
"Sec-Fetch-Mode": "no-cors",
|
25
|
+
"Sec-Fetch-Site": "same-site",
|
26
|
+
"Sec-GPC": "1",
|
27
|
+
"sec-ch-ua": '"Not?A_Brand";v="8", "Chromium";v="108", "Brave";v="108"',
|
28
|
+
"sec-ch-ua-mobile": "?0",
|
29
|
+
"sec-ch-ua-platform": '"Windows"',
|
30
|
+
}
|
31
|
+
params = {
|
32
|
+
"RequestType": "LocalInfo",
|
33
|
+
"ms": "ValeOfGlamorgan/AllMaps",
|
34
|
+
"group": "Community and Living|Refuse HIDE2",
|
35
|
+
"type": "json",
|
36
|
+
"callback": "AddressInfoCallback",
|
37
|
+
"uid": user_uprn,
|
38
|
+
"import": "jQuery35108514154283927682_1673022974838",
|
39
|
+
"_": "1673022974840",
|
40
|
+
}
|
41
|
+
|
42
|
+
# Get a response from the council
|
43
|
+
response = requests.get(
|
44
|
+
"https://myvale.valeofglamorgan.gov.uk/getdata.aspx",
|
45
|
+
params=params,
|
46
|
+
headers=headers,
|
47
|
+
).text
|
48
|
+
|
49
|
+
# Load the JSON and seek out the bin week text, then add it to the calendar URL. Also take the weekly
|
50
|
+
# collection type and generate dates for it. Then make a GET request for the calendar
|
51
|
+
bin_week = str(
|
52
|
+
json.loads(response)["Results"]["Refuse_HIDE2"]["Your_Refuse_round_is"]
|
53
|
+
).replace(" ", "-")
|
54
|
+
weekly_collection = str(
|
55
|
+
json.loads(response)["Results"]["Refuse_HIDE2"]["Recycling__type"]
|
56
|
+
).capitalize()
|
57
|
+
weekly_dates = get_weekday_dates_in_period(
|
58
|
+
datetime.now(), days_of_week.get(bin_week.split("-")[0].strip()), amount=48
|
59
|
+
)
|
60
|
+
schedule_url = f"https://www.valeofglamorgan.gov.uk/en/living/Recycling-and-Waste/collections/Black-Bag-Collections/{bin_week}.aspx"
|
61
|
+
response = requests.get(schedule_url, verify=False)
|
62
|
+
|
63
|
+
# BS4 parses the calendar
|
64
|
+
soup = BeautifulSoup(response.text, features="html.parser")
|
65
|
+
soup.prettify()
|
66
|
+
|
67
|
+
# Some scraper variables
|
68
|
+
collections = []
|
69
|
+
|
70
|
+
# Get the calendar table and find the headers
|
71
|
+
table = soup.find("table", {"class": "TableStyle_Activities"}).find("tbody")
|
72
|
+
table_headers = table.find("tr").find_all("th")
|
73
|
+
# For all rows below the header, find all details in th next row
|
74
|
+
for tr in soup.find_all("tr")[1:]:
|
75
|
+
row = tr.find_all("td")
|
76
|
+
# Parse month and year - month needs converting from text to number
|
77
|
+
month_and_year = row[0].text.split()
|
78
|
+
if month_and_year[0] in list(calendar.month_abbr):
|
79
|
+
collection_month = datetime.strptime(month_and_year[0], "%b").month
|
80
|
+
elif month_and_year[0] == "Sept":
|
81
|
+
collection_month = int(9)
|
82
|
+
else:
|
83
|
+
collection_month = datetime.strptime(month_and_year[0], "%B").month
|
84
|
+
collection_year = datetime.strptime(month_and_year[1], "%Y").year
|
85
|
+
|
86
|
+
# Get the collection dates column, remove anything that's not a number or space and then convert to dates
|
87
|
+
for day in remove_alpha_characters(row[1].text.strip()).split():
|
88
|
+
try:
|
89
|
+
bin_date = datetime(collection_year, collection_month, int(day))
|
90
|
+
collections.append(
|
91
|
+
(
|
92
|
+
table_headers[1]
|
93
|
+
.text.strip()
|
94
|
+
.replace(" collection date", ""),
|
95
|
+
bin_date,
|
96
|
+
)
|
97
|
+
)
|
98
|
+
except Exception as ex:
|
99
|
+
continue
|
100
|
+
|
101
|
+
# Add in weekly dates to the tuple
|
102
|
+
for date in weekly_dates:
|
103
|
+
collections.append(
|
104
|
+
(weekly_collection, datetime.strptime(date, date_format))
|
105
|
+
)
|
106
|
+
|
107
|
+
# Order all the data, only including future dates
|
108
|
+
ordered_data = sorted(collections, key=lambda x: x[1])
|
109
|
+
data = {"bins": []}
|
110
|
+
for item in ordered_data:
|
111
|
+
collection_date = item[1]
|
112
|
+
if collection_date.date() >= datetime.now().date():
|
113
|
+
dict_data = {
|
114
|
+
"type": item[0],
|
115
|
+
"collectionDate": collection_date.strftime(date_format),
|
116
|
+
}
|
117
|
+
data["bins"].append(dict_data)
|
118
|
+
|
119
|
+
return data
|