uk_bin_collection 0.74.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/README.rst +0 -0
- uk_bin_collection/tests/council_feature_input_parity.py +79 -0
- uk_bin_collection/tests/features/environment.py +7 -0
- uk_bin_collection/tests/features/validate_council_outputs.feature +767 -0
- uk_bin_collection/tests/input.json +1077 -0
- uk_bin_collection/tests/output.schema +41 -0
- uk_bin_collection/tests/step_defs/step_helpers/file_handler.py +46 -0
- uk_bin_collection/tests/step_defs/test_validate_council.py +87 -0
- uk_bin_collection/tests/test_collect_data.py +104 -0
- uk_bin_collection/tests/test_common_functions.py +342 -0
- uk_bin_collection/uk_bin_collection/collect_data.py +133 -0
- uk_bin_collection/uk_bin_collection/common.py +292 -0
- uk_bin_collection/uk_bin_collection/councils/AdurAndWorthingCouncils.py +43 -0
- uk_bin_collection/uk_bin_collection/councils/ArunCouncil.py +97 -0
- uk_bin_collection/uk_bin_collection/councils/AylesburyValeCouncil.py +69 -0
- uk_bin_collection/uk_bin_collection/councils/BCPCouncil.py +51 -0
- uk_bin_collection/uk_bin_collection/councils/BarnetCouncil.py +180 -0
- uk_bin_collection/uk_bin_collection/councils/BarnsleyMBCouncil.py +109 -0
- uk_bin_collection/uk_bin_collection/councils/BasingstokeCouncil.py +72 -0
- uk_bin_collection/uk_bin_collection/councils/BathAndNorthEastSomersetCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/BedfordBoroughCouncil.py +49 -0
- uk_bin_collection/uk_bin_collection/councils/BedfordshireCouncil.py +70 -0
- uk_bin_collection/uk_bin_collection/councils/BexleyCouncil.py +147 -0
- uk_bin_collection/uk_bin_collection/councils/BirminghamCityCouncil.py +119 -0
- uk_bin_collection/uk_bin_collection/councils/BlackburnCouncil.py +105 -0
- uk_bin_collection/uk_bin_collection/councils/BoltonCouncil.py +104 -0
- uk_bin_collection/uk_bin_collection/councils/BradfordMDC.py +103 -0
- uk_bin_collection/uk_bin_collection/councils/BrightonandHoveCityCouncil.py +137 -0
- uk_bin_collection/uk_bin_collection/councils/BristolCityCouncil.py +141 -0
- uk_bin_collection/uk_bin_collection/councils/BromleyBoroughCouncil.py +115 -0
- uk_bin_collection/uk_bin_collection/councils/BroxtoweBoroughCouncil.py +107 -0
- uk_bin_collection/uk_bin_collection/councils/BuckinghamshireCouncil.py +95 -0
- uk_bin_collection/uk_bin_collection/councils/BuryCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/CalderdaleCouncil.py +123 -0
- uk_bin_collection/uk_bin_collection/councils/CannockChaseDistrictCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/CardiffCouncil.py +172 -0
- uk_bin_collection/uk_bin_collection/councils/CastlepointDistrictCouncil.py +96 -0
- uk_bin_collection/uk_bin_collection/councils/CharnwoodBoroughCouncil.py +54 -0
- uk_bin_collection/uk_bin_collection/councils/ChelmsfordCityCouncil.py +127 -0
- uk_bin_collection/uk_bin_collection/councils/CheshireEastCouncil.py +32 -0
- uk_bin_collection/uk_bin_collection/councils/CheshireWestAndChesterCouncil.py +125 -0
- uk_bin_collection/uk_bin_collection/councils/ChorleyCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/ConwyCountyBorough.py +27 -0
- uk_bin_collection/uk_bin_collection/councils/CrawleyBoroughCouncil.py +61 -0
- uk_bin_collection/uk_bin_collection/councils/CroydonCouncil.py +291 -0
- uk_bin_collection/uk_bin_collection/councils/DerbyshireDalesDistrictCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/DoncasterCouncil.py +77 -0
- uk_bin_collection/uk_bin_collection/councils/DorsetCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/DoverDistrictCouncil.py +41 -0
- uk_bin_collection/uk_bin_collection/councils/DurhamCouncil.py +49 -0
- uk_bin_collection/uk_bin_collection/councils/EastCambridgeshireCouncil.py +44 -0
- uk_bin_collection/uk_bin_collection/councils/EastDevonDC.py +74 -0
- uk_bin_collection/uk_bin_collection/councils/EastLindseyDistrictCouncil.py +108 -0
- uk_bin_collection/uk_bin_collection/councils/EastRidingCouncil.py +142 -0
- uk_bin_collection/uk_bin_collection/councils/EastSuffolkCouncil.py +112 -0
- uk_bin_collection/uk_bin_collection/councils/EastleighBoroughCouncil.py +70 -0
- uk_bin_collection/uk_bin_collection/councils/EnvironmentFirst.py +48 -0
- uk_bin_collection/uk_bin_collection/councils/ErewashBoroughCouncil.py +61 -0
- uk_bin_collection/uk_bin_collection/councils/FenlandDistrictCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/ForestOfDeanDistrictCouncil.py +113 -0
- uk_bin_collection/uk_bin_collection/councils/GatesheadCouncil.py +118 -0
- uk_bin_collection/uk_bin_collection/councils/GedlingBoroughCouncil.py +1580 -0
- uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py +55 -0
- uk_bin_collection/uk_bin_collection/councils/GuildfordCouncil.py +150 -0
- uk_bin_collection/uk_bin_collection/councils/HaltonBoroughCouncil.py +142 -0
- uk_bin_collection/uk_bin_collection/councils/HaringeyCouncil.py +59 -0
- uk_bin_collection/uk_bin_collection/councils/HarrogateBoroughCouncil.py +63 -0
- uk_bin_collection/uk_bin_collection/councils/HighPeakCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/HullCityCouncil.py +48 -0
- uk_bin_collection/uk_bin_collection/councils/HuntingdonDistrictCouncil.py +44 -0
- uk_bin_collection/uk_bin_collection/councils/KingstonUponThamesCouncil.py +84 -0
- uk_bin_collection/uk_bin_collection/councils/KirkleesCouncil.py +130 -0
- uk_bin_collection/uk_bin_collection/councils/KnowsleyMBCouncil.py +139 -0
- uk_bin_collection/uk_bin_collection/councils/LancasterCityCouncil.py +71 -0
- uk_bin_collection/uk_bin_collection/councils/LeedsCityCouncil.py +137 -0
- uk_bin_collection/uk_bin_collection/councils/LisburnCastlereaghCityCouncil.py +101 -0
- uk_bin_collection/uk_bin_collection/councils/LiverpoolCityCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/LondonBoroughHounslow.py +82 -0
- uk_bin_collection/uk_bin_collection/councils/LondonBoroughRedbridge.py +161 -0
- uk_bin_collection/uk_bin_collection/councils/MaldonDistrictCouncil.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/MalvernHillsDC.py +57 -0
- uk_bin_collection/uk_bin_collection/councils/ManchesterCityCouncil.py +106 -0
- uk_bin_collection/uk_bin_collection/councils/MansfieldDistrictCouncil.py +38 -0
- uk_bin_collection/uk_bin_collection/councils/MertonCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil.py +128 -0
- uk_bin_collection/uk_bin_collection/councils/MidSussexDistrictCouncil.py +80 -0
- uk_bin_collection/uk_bin_collection/councils/MiltonKeynesCityCouncil.py +54 -0
- uk_bin_collection/uk_bin_collection/councils/MoleValleyDistrictCouncil.py +98 -0
- uk_bin_collection/uk_bin_collection/councils/NeathPortTalbotCouncil.py +139 -0
- uk_bin_collection/uk_bin_collection/councils/NewarkAndSherwoodDC.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/NewcastleCityCouncil.py +57 -0
- uk_bin_collection/uk_bin_collection/councils/NewhamCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/NewportCityCouncil.py +203 -0
- uk_bin_collection/uk_bin_collection/councils/NorthEastDerbyshireDistrictCouncil.py +115 -0
- uk_bin_collection/uk_bin_collection/councils/NorthEastLincs.py +53 -0
- uk_bin_collection/uk_bin_collection/councils/NorthKestevenDistrictCouncil.py +45 -0
- uk_bin_collection/uk_bin_collection/councils/NorthLanarkshireCouncil.py +46 -0
- uk_bin_collection/uk_bin_collection/councils/NorthLincolnshireCouncil.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/NorthNorfolkDistrictCouncil.py +108 -0
- uk_bin_collection/uk_bin_collection/councils/NorthNorthamptonshireCouncil.py +72 -0
- uk_bin_collection/uk_bin_collection/councils/NorthSomersetCouncil.py +76 -0
- uk_bin_collection/uk_bin_collection/councils/NorthTynesideCouncil.py +220 -0
- uk_bin_collection/uk_bin_collection/councils/NorthWestLeicestershire.py +114 -0
- uk_bin_collection/uk_bin_collection/councils/NorthYorkshire.py +58 -0
- uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py +123 -0
- uk_bin_collection/uk_bin_collection/councils/NottinghamCityCouncil.py +36 -0
- uk_bin_collection/uk_bin_collection/councils/OldhamCouncil.py +51 -0
- uk_bin_collection/uk_bin_collection/councils/PortsmouthCityCouncil.py +131 -0
- uk_bin_collection/uk_bin_collection/councils/PrestonCityCouncil.py +97 -0
- uk_bin_collection/uk_bin_collection/councils/ReadingBoroughCouncil.py +30 -0
- uk_bin_collection/uk_bin_collection/councils/ReigateAndBansteadBoroughCouncil.py +81 -0
- uk_bin_collection/uk_bin_collection/councils/RenfrewshireCouncil.py +135 -0
- uk_bin_collection/uk_bin_collection/councils/RhonddaCynonTaffCouncil.py +80 -0
- uk_bin_collection/uk_bin_collection/councils/RochdaleCouncil.py +69 -0
- uk_bin_collection/uk_bin_collection/councils/RochfordCouncil.py +60 -0
- uk_bin_collection/uk_bin_collection/councils/RugbyBoroughCouncil.py +93 -0
- uk_bin_collection/uk_bin_collection/councils/RushcliffeBoroughCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/RushmoorCouncil.py +81 -0
- uk_bin_collection/uk_bin_collection/councils/SalfordCityCouncil.py +70 -0
- uk_bin_collection/uk_bin_collection/councils/SevenoaksDistrictCouncil.py +106 -0
- uk_bin_collection/uk_bin_collection/councils/SheffieldCityCouncil.py +54 -0
- uk_bin_collection/uk_bin_collection/councils/ShropshireCouncil.py +45 -0
- uk_bin_collection/uk_bin_collection/councils/SolihullCouncil.py +48 -0
- uk_bin_collection/uk_bin_collection/councils/SomersetCouncil.py +203 -0
- uk_bin_collection/uk_bin_collection/councils/SouthAyrshireCouncil.py +73 -0
- uk_bin_collection/uk_bin_collection/councils/SouthCambridgeshireCouncil.py +65 -0
- uk_bin_collection/uk_bin_collection/councils/SouthGloucestershireCouncil.py +74 -0
- uk_bin_collection/uk_bin_collection/councils/SouthLanarkshireCouncil.py +78 -0
- uk_bin_collection/uk_bin_collection/councils/SouthNorfolkCouncil.py +91 -0
- uk_bin_collection/uk_bin_collection/councils/SouthOxfordshireCouncil.py +93 -0
- uk_bin_collection/uk_bin_collection/councils/SouthTynesideCouncil.py +98 -0
- uk_bin_collection/uk_bin_collection/councils/StAlbansCityAndDistrictCouncil.py +43 -0
- uk_bin_collection/uk_bin_collection/councils/StHelensBC.py +56 -0
- uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py +112 -0
- uk_bin_collection/uk_bin_collection/councils/StockportBoroughCouncil.py +39 -0
- uk_bin_collection/uk_bin_collection/councils/StokeOnTrentCityCouncil.py +79 -0
- uk_bin_collection/uk_bin_collection/councils/StratfordUponAvonCouncil.py +94 -0
- uk_bin_collection/uk_bin_collection/councils/SunderlandCityCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/SwaleBoroughCouncil.py +52 -0
- uk_bin_collection/uk_bin_collection/councils/TamesideMBCouncil.py +62 -0
- uk_bin_collection/uk_bin_collection/councils/TandridgeDistrictCouncil.py +60 -0
- uk_bin_collection/uk_bin_collection/councils/TelfordAndWrekinCouncil.py +50 -0
- uk_bin_collection/uk_bin_collection/councils/TestValleyBoroughCouncil.py +203 -0
- uk_bin_collection/uk_bin_collection/councils/TonbridgeAndMallingBC.py +101 -0
- uk_bin_collection/uk_bin_collection/councils/TorbayCouncil.py +51 -0
- uk_bin_collection/uk_bin_collection/councils/TorridgeDistrictCouncil.py +154 -0
- uk_bin_collection/uk_bin_collection/councils/ValeofGlamorganCouncil.py +119 -0
- uk_bin_collection/uk_bin_collection/councils/ValeofWhiteHorseCouncil.py +103 -0
- uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py +89 -0
- uk_bin_collection/uk_bin_collection/councils/WarwickDistrictCouncil.py +34 -0
- uk_bin_collection/uk_bin_collection/councils/WaverleyBoroughCouncil.py +119 -0
- uk_bin_collection/uk_bin_collection/councils/WealdenDistrictCouncil.py +86 -0
- uk_bin_collection/uk_bin_collection/councils/WelhatCouncil.py +73 -0
- uk_bin_collection/uk_bin_collection/councils/WestBerkshireCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/WestLindseyDistrictCouncil.py +118 -0
- uk_bin_collection/uk_bin_collection/councils/WestLothianCouncil.py +103 -0
- uk_bin_collection/uk_bin_collection/councils/WestNorthamptonshireCouncil.py +34 -0
- uk_bin_collection/uk_bin_collection/councils/WestSuffolkCouncil.py +64 -0
- uk_bin_collection/uk_bin_collection/councils/WiganBoroughCouncil.py +97 -0
- uk_bin_collection/uk_bin_collection/councils/WiltshireCouncil.py +135 -0
- uk_bin_collection/uk_bin_collection/councils/WindsorAndMaidenheadCouncil.py +134 -0
- uk_bin_collection/uk_bin_collection/councils/WokingBoroughCouncil.py +114 -0
- uk_bin_collection/uk_bin_collection/councils/WyreCouncil.py +89 -0
- uk_bin_collection/uk_bin_collection/councils/YorkCouncil.py +45 -0
- uk_bin_collection/uk_bin_collection/councils/council_class_template/councilclasstemplate.py +33 -0
- uk_bin_collection/uk_bin_collection/get_bin_data.py +165 -0
- uk_bin_collection-0.74.0.dist-info/LICENSE +21 -0
- uk_bin_collection-0.74.0.dist-info/METADATA +247 -0
- uk_bin_collection-0.74.0.dist-info/RECORD +171 -0
- uk_bin_collection-0.74.0.dist-info/WHEEL +4 -0
- uk_bin_collection-0.74.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,134 @@
|
|
1
|
+
from datetime import datetime
|
2
|
+
from urllib.parse import urlparse
|
3
|
+
|
4
|
+
import requests
|
5
|
+
from bs4 import BeautifulSoup
|
6
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
7
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
8
|
+
|
9
|
+
|
10
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
11
|
+
class CouncilClass(AbstractGetBinDataClass):
|
12
|
+
"""
|
13
|
+
Concrete classes have to implement all abstract operations of the
|
14
|
+
base class. They can also override some operations with a default
|
15
|
+
implementation.
|
16
|
+
"""
|
17
|
+
|
18
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
19
|
+
root_url = "https://my.rbwm.gov.uk"
|
20
|
+
href_url = ""
|
21
|
+
api_url = "https://my.rbwm.gov.uk/block_refresh/block/47/node/136968?"
|
22
|
+
user_postcode = kwargs.get("postcode")
|
23
|
+
user_paon = kwargs.get("paon")
|
24
|
+
|
25
|
+
data = {"bins": []}
|
26
|
+
|
27
|
+
requests.packages.urllib3.disable_warnings()
|
28
|
+
s = requests.session()
|
29
|
+
# Form start
|
30
|
+
headers = {
|
31
|
+
"authority": "my.rbwm.gov.uk",
|
32
|
+
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
|
33
|
+
"accept-language": "en-GB,en;q=0.8",
|
34
|
+
"cache-control": "max-age=0",
|
35
|
+
"referer": "https://my.rbwm.gov.uk/special/your-collection-dates?uprn=100080371082&subdate=2022-08-19&addr=11%20Douglas%20Lane%20Wraysbury%20Staines%20TW19%205NF",
|
36
|
+
"sec-fetch-dest": "document",
|
37
|
+
"sec-fetch-mode": "navigate",
|
38
|
+
"sec-fetch-site": "same-origin",
|
39
|
+
"sec-fetch-user": "?1",
|
40
|
+
"sec-gpc": "1",
|
41
|
+
"upgrade-insecure-requests": "1",
|
42
|
+
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.102 Safari/537.36",
|
43
|
+
}
|
44
|
+
s.get(
|
45
|
+
"https://my.rbwm.gov.uk/special/find-your-collection-dates", headers=headers
|
46
|
+
)
|
47
|
+
|
48
|
+
# Select address
|
49
|
+
headers = {
|
50
|
+
"authority": "my.rbwm.gov.uk",
|
51
|
+
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
|
52
|
+
"accept-language": "en-GB,en;q=0.8",
|
53
|
+
"cache-control": "max-age=0",
|
54
|
+
"origin": "https://my.rbwm.gov.uk",
|
55
|
+
"referer": "https://my.rbwm.gov.uk/special/find-your-collection-dates",
|
56
|
+
"sec-fetch-dest": "document",
|
57
|
+
"sec-fetch-mode": "navigate",
|
58
|
+
"sec-fetch-site": "same-origin",
|
59
|
+
"sec-fetch-user": "?1",
|
60
|
+
"sec-gpc": "1",
|
61
|
+
"upgrade-insecure-requests": "1",
|
62
|
+
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.102 Safari/537.36",
|
63
|
+
}
|
64
|
+
request_data = {
|
65
|
+
"atTxtStreet": user_postcode,
|
66
|
+
"nodeid": "x",
|
67
|
+
"formname": "x",
|
68
|
+
"pg": "20",
|
69
|
+
"start": "1",
|
70
|
+
"selectaddress": "Select this address",
|
71
|
+
"selectheading": "The following addresses match the address you entered - choose your address",
|
72
|
+
"arg": "",
|
73
|
+
}
|
74
|
+
response = s.post(
|
75
|
+
"https://my.rbwm.gov.uk/special/address-selector-collection-dates",
|
76
|
+
headers=headers,
|
77
|
+
data=request_data,
|
78
|
+
)
|
79
|
+
|
80
|
+
soup = BeautifulSoup(response.content, features="html.parser")
|
81
|
+
soup.prettify()
|
82
|
+
|
83
|
+
table = soup.find("table")
|
84
|
+
if table:
|
85
|
+
table_rows = table.find_all("tr")
|
86
|
+
for tr in table_rows:
|
87
|
+
td = tr.find_all("td")
|
88
|
+
# row = [i.text for i in td]
|
89
|
+
for item in td:
|
90
|
+
if user_paon in item.text and user_postcode in item.text:
|
91
|
+
href_url = td[1].find("a").get("href")
|
92
|
+
continue
|
93
|
+
|
94
|
+
# Getting to bin data
|
95
|
+
headers = {
|
96
|
+
"authority": "my.rbwm.gov.uk",
|
97
|
+
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
|
98
|
+
"accept-language": "en-GB,en;q=0.8",
|
99
|
+
"referer": "https://my.rbwm.gov.uk/special/address-selector-collection-dates",
|
100
|
+
"sec-fetch-dest": "document",
|
101
|
+
"sec-fetch-mode": "navigate",
|
102
|
+
"sec-fetch-site": "same-origin",
|
103
|
+
"sec-fetch-user": "?1",
|
104
|
+
"sec-gpc": "1",
|
105
|
+
"upgrade-insecure-requests": "1",
|
106
|
+
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.102 Safari/537.36",
|
107
|
+
}
|
108
|
+
params = {}
|
109
|
+
parsed_params = urlparse(href_url).query.split("&")
|
110
|
+
for item in parsed_params:
|
111
|
+
values = item.split("=")
|
112
|
+
params.update({values[0]: values[1]})
|
113
|
+
|
114
|
+
s.get(root_url + href_url, params=params, headers=headers)
|
115
|
+
response = s.get(
|
116
|
+
api_url + href_url.split("?")[1], params=params, headers=headers
|
117
|
+
)
|
118
|
+
|
119
|
+
soup = BeautifulSoup(response.content, features="html.parser")
|
120
|
+
soup.prettify()
|
121
|
+
|
122
|
+
table_rows = soup.find_all("tr")
|
123
|
+
for tr in table_rows:
|
124
|
+
td = tr.find_all("td")
|
125
|
+
row = [i.text for i in td]
|
126
|
+
|
127
|
+
if len(row) > 0:
|
128
|
+
dict_data = {
|
129
|
+
"type": row[0],
|
130
|
+
"collectionDate": row[1],
|
131
|
+
}
|
132
|
+
data["bins"].append(dict_data)
|
133
|
+
|
134
|
+
return data
|
@@ -0,0 +1,114 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
import urllib
|
3
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
4
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
5
|
+
|
6
|
+
|
7
|
+
class CouncilClass(AbstractGetBinDataClass):
|
8
|
+
"""
|
9
|
+
Concrete classes have to implement all abstract operations of the
|
10
|
+
base class. They can also override some operations with a default
|
11
|
+
implementation.
|
12
|
+
"""
|
13
|
+
|
14
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
15
|
+
requests.packages.urllib3.disable_warnings()
|
16
|
+
root_url = "https://asjwsw-wrpwokingmunicipal-live.whitespacews.com/"
|
17
|
+
# Get the house number and postcode from the commandline
|
18
|
+
user_paon = kwargs.get("paon")
|
19
|
+
user_postcode = kwargs.get("postcode")
|
20
|
+
check_postcode(user_postcode)
|
21
|
+
|
22
|
+
# Start a new session for the form, and get the chosen URL from the commandline
|
23
|
+
session = requests.Session()
|
24
|
+
req = session.get(root_url)
|
25
|
+
|
26
|
+
# Parse the requested URL to get a link to the "View My Collections" portal with a unique service ID
|
27
|
+
start = BeautifulSoup(req.text, features="html.parser")
|
28
|
+
start.prettify()
|
29
|
+
base_link = start.select(
|
30
|
+
"#menu-content > div > div:nth-child(1) > p.govuk-body.govuk-\!-margin-bottom-0.colorblue.lineheight15 > a"
|
31
|
+
)[0].attrs.get("href")
|
32
|
+
|
33
|
+
# We need to reorder the query parts from the unique URL, so split them up to make it easier
|
34
|
+
query_parts = urllib.parse.urlparse(base_link).query.split("&")
|
35
|
+
parts = base_link.split("?")
|
36
|
+
addr_link = (
|
37
|
+
parts[0] + "/mop.php?" + query_parts[1] + "&" + query_parts[0] + "&seq=2"
|
38
|
+
)
|
39
|
+
|
40
|
+
# Bring in some headers to emulate a browser, and put the UPRN and postcode into the form data.
|
41
|
+
# This is sent in a POST request, emulating browser behaviour.
|
42
|
+
headers = {
|
43
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
44
|
+
"Accept-Language": "en-GB,en;q=0.9",
|
45
|
+
"Cache-Control": "no-cache",
|
46
|
+
"Connection": "keep-alive",
|
47
|
+
"Content-Type": "application/x-www-form-urlencoded",
|
48
|
+
"Origin": "https://asjwsw-wrpwokingmunicipal-live.whitespacews.com",
|
49
|
+
"Pragma": "no-cache",
|
50
|
+
"Referer": "https://asjwsw-wrpwokingmunicipal-live.whitespacews.com/",
|
51
|
+
"Sec-Fetch-Dest": "document",
|
52
|
+
"Sec-Fetch-Mode": "navigate",
|
53
|
+
"Sec-Fetch-Site": "same-origin",
|
54
|
+
"Sec-Fetch-User": "?1",
|
55
|
+
"Upgrade-Insecure-Requests": "1",
|
56
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 OPR/98.0.0.0",
|
57
|
+
"sec-ch-ua": '"Chromium";v="112", "Not_A Brand";v="24", "Opera GX";v="98"',
|
58
|
+
"sec-ch-ua-mobile": "?0",
|
59
|
+
"sec-ch-ua-platform": '"Windows"',
|
60
|
+
}
|
61
|
+
data = {
|
62
|
+
"address_name_number": user_paon,
|
63
|
+
"address_street": "",
|
64
|
+
"street_town": "",
|
65
|
+
"address_postcode": user_postcode,
|
66
|
+
}
|
67
|
+
addr_page = session.post(addr_link, headers=headers, data=data)
|
68
|
+
addr = BeautifulSoup(addr_page.text, features="html.parser")
|
69
|
+
addr.prettify()
|
70
|
+
|
71
|
+
# This page should only have one address, but regardless, select the first one and make a request to load the
|
72
|
+
# calendar page.
|
73
|
+
cal_link = root_url + addr.select("#property_list > ul > li > a")[0].attrs.get(
|
74
|
+
"href"
|
75
|
+
)
|
76
|
+
cal_page = session.get(cal_link)
|
77
|
+
|
78
|
+
# Parse the calendar page
|
79
|
+
soup = BeautifulSoup(cal_page.text, features="html.parser")
|
80
|
+
soup.prettify()
|
81
|
+
data = {"bins": []}
|
82
|
+
|
83
|
+
# For whatever reason, each row contains all the information for that row, and each one after it. This code
|
84
|
+
# essentially gets all items from each row, but ignores the whitespace that you get when splitting using \n.
|
85
|
+
# This produces a big list of dates then bin types, so we split them up into a list of lists - each pair is
|
86
|
+
# a date and the bin type.
|
87
|
+
items = [
|
88
|
+
i
|
89
|
+
for i in soup.find(
|
90
|
+
"u1",
|
91
|
+
{
|
92
|
+
"class": "displayinlineblock justifycontentleft alignitemscenter margin0 padding0"
|
93
|
+
},
|
94
|
+
).text.split("\n")
|
95
|
+
if i != ""
|
96
|
+
]
|
97
|
+
pairs = [items[i : i + 2] for i in range(0, len(items), 2)]
|
98
|
+
|
99
|
+
# Loop through the paired bin dates and types
|
100
|
+
for pair in pairs:
|
101
|
+
# This isn't necessary, but better safe than sorry
|
102
|
+
collection_date = datetime.strptime(pair[0], date_format).strftime(
|
103
|
+
date_format
|
104
|
+
)
|
105
|
+
# Change the formatting of the purple bins to replace the hyphens with slashes
|
106
|
+
if pair[1] == "Batteries-small electricals-textiles":
|
107
|
+
bin_type = pair[1].replace("-", "/").strip()
|
108
|
+
else:
|
109
|
+
bin_type = pair[1]
|
110
|
+
|
111
|
+
# Add the data into the dictionary
|
112
|
+
data["bins"].append({"type": bin_type, "collectionDate": collection_date})
|
113
|
+
|
114
|
+
return data
|
@@ -0,0 +1,89 @@
|
|
1
|
+
import requests
|
2
|
+
import json
|
3
|
+
import urllib.parse
|
4
|
+
from dateutil.relativedelta import relativedelta
|
5
|
+
from bs4 import BeautifulSoup
|
6
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
7
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
8
|
+
|
9
|
+
|
10
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
11
|
+
class CouncilClass(AbstractGetBinDataClass):
|
12
|
+
"""
|
13
|
+
Concrete classes have to implement all abstract operations of the
|
14
|
+
base class. They can also override some operations with a default
|
15
|
+
implementation.
|
16
|
+
"""
|
17
|
+
|
18
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
19
|
+
user_uprn = kwargs.get("uprn")
|
20
|
+
check_uprn(user_uprn)
|
21
|
+
|
22
|
+
user_postcode = kwargs.get("postcode")
|
23
|
+
check_postcode(user_postcode)
|
24
|
+
|
25
|
+
data = {"bins": []}
|
26
|
+
collections = []
|
27
|
+
|
28
|
+
headers = {
|
29
|
+
"authority": "www.wyre.gov.uk",
|
30
|
+
"accept": "application/json, text/javascript, */*; q=0.01",
|
31
|
+
"accept-language": "en-GB,en;q=0.9",
|
32
|
+
"cache-control": "no-cache",
|
33
|
+
# 'content-length': '0',
|
34
|
+
# 'cookie': 'PHPSESSID=ApMqEd65JEQwNgj2AHeeekU9YA5%2C8Tc8YW-nWYSmWkfYq3mS1nvE1WLzMfeWgyoj',
|
35
|
+
"origin": "https://www.wyre.gov.uk",
|
36
|
+
"pragma": "no-cache",
|
37
|
+
"referer": "https://www.wyre.gov.uk/bincollections",
|
38
|
+
"sec-fetch-dest": "empty",
|
39
|
+
"sec-fetch-mode": "cors",
|
40
|
+
"sec-fetch-site": "same-origin",
|
41
|
+
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.6167.186 Safari/537.36",
|
42
|
+
"x-requested-with": "XMLHttpRequest",
|
43
|
+
}
|
44
|
+
postcode_api = f"https://www.wyre.gov.uk/singlepoint_ajax/{urllib.parse.quote(user_postcode)}"
|
45
|
+
addr_res = requests.post(postcode_api, headers=headers)
|
46
|
+
json_data = addr_res.json()
|
47
|
+
|
48
|
+
for v in json_data.values():
|
49
|
+
v_uprn = v.get("uprn")
|
50
|
+
if v_uprn == user_uprn:
|
51
|
+
addr_line = v.get("label")
|
52
|
+
break
|
53
|
+
|
54
|
+
api_url = f"https://www.wyre.gov.uk/bincollections?uprn={user_uprn}&address={urllib.parse.quote(addr_line)}&submit="
|
55
|
+
res = requests.get(api_url, headers=headers)
|
56
|
+
|
57
|
+
soup = BeautifulSoup(res.text, features="html.parser")
|
58
|
+
soup.prettify()
|
59
|
+
|
60
|
+
bins = soup.find_all("div", {"class": "boxed"})
|
61
|
+
|
62
|
+
for item in bins:
|
63
|
+
collection_title = " ".join(
|
64
|
+
item.find("h3", {"class": "bin-collection-tasks__heading"}).text.split(
|
65
|
+
" "
|
66
|
+
)[2:4]
|
67
|
+
)
|
68
|
+
collection_date = datetime.strptime(
|
69
|
+
remove_ordinal_indicator_from_date_string(
|
70
|
+
item.find("div", {"class": "bin-collection-tasks__content"})
|
71
|
+
.text.strip()
|
72
|
+
.replace("\n", " ")
|
73
|
+
),
|
74
|
+
"%A %d %B",
|
75
|
+
)
|
76
|
+
next_collection = collection_date.replace(year=datetime.now().year)
|
77
|
+
if datetime.now().month == 12 and next_collection.month == 1:
|
78
|
+
next_collection = next_collection + relativedelta(years=1)
|
79
|
+
collections.append((collection_title, next_collection))
|
80
|
+
|
81
|
+
ordered_data = sorted(collections, key=lambda x: x[1])
|
82
|
+
for item in ordered_data:
|
83
|
+
dict_data = {
|
84
|
+
"type": item[0].capitalize(),
|
85
|
+
"collectionDate": item[1].strftime(date_format),
|
86
|
+
}
|
87
|
+
data["bins"].append(dict_data)
|
88
|
+
|
89
|
+
return data
|
@@ -0,0 +1,45 @@
|
|
1
|
+
import json
|
2
|
+
from datetime import datetime
|
3
|
+
|
4
|
+
import requests
|
5
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
6
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
7
|
+
|
8
|
+
|
9
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
10
|
+
class CouncilClass(AbstractGetBinDataClass):
|
11
|
+
"""
|
12
|
+
Concrete classes have to implement all abstract operations of the
|
13
|
+
base class. They can also override some operations with a default
|
14
|
+
implementation.
|
15
|
+
"""
|
16
|
+
|
17
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
18
|
+
api_url = (
|
19
|
+
"https://waste-api.york.gov.uk/api/Collections/GetBinCollectionDataForUprn/"
|
20
|
+
)
|
21
|
+
uprn = kwargs.get("uprn")
|
22
|
+
check_uprn(uprn)
|
23
|
+
|
24
|
+
requests.packages.urllib3.disable_warnings()
|
25
|
+
response = requests.get(f"{api_url}{uprn}")
|
26
|
+
json_response = json.loads(response.content)["services"]
|
27
|
+
data = {"bins": []}
|
28
|
+
collection_tuple = []
|
29
|
+
|
30
|
+
for item in json_response:
|
31
|
+
collection_date = datetime.strptime(
|
32
|
+
item.get("nextCollection"), "%Y-%m-%dT%H:%M:%S"
|
33
|
+
).strftime(date_format)
|
34
|
+
collection_tuple.append((item.get("service"), collection_date))
|
35
|
+
|
36
|
+
ordered_data = sorted(collection_tuple, key=lambda x: x[1])
|
37
|
+
|
38
|
+
for item in ordered_data:
|
39
|
+
dict_data = {
|
40
|
+
"type": item[0],
|
41
|
+
"collectionDate": item[1],
|
42
|
+
}
|
43
|
+
data["bins"].append(dict_data)
|
44
|
+
|
45
|
+
return data
|
@@ -0,0 +1,33 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
3
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
|
+
|
5
|
+
|
6
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
7
|
+
class CouncilClass(AbstractGetBinDataClass):
|
8
|
+
"""
|
9
|
+
Concrete classes have to implement all abstract operations of the
|
10
|
+
base class. They can also override some operations with a default
|
11
|
+
implementation.
|
12
|
+
"""
|
13
|
+
|
14
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
15
|
+
# Make a BS4 object
|
16
|
+
soup = BeautifulSoup(page.text, features="html.parser")
|
17
|
+
soup.prettify()
|
18
|
+
|
19
|
+
data = {"bins": []}
|
20
|
+
|
21
|
+
for bins in soup.select('div[class*="service-item"]'):
|
22
|
+
bin_type = bins.div.h3.text.strip()
|
23
|
+
bin_collection = bins.select("div > p")[1]
|
24
|
+
if bin_collection:
|
25
|
+
dict_data = {
|
26
|
+
"type": bin_type,
|
27
|
+
"collectionDate": datetime.strptime(
|
28
|
+
bin_collection.get_text(strip=True), "%A, %d %B %Y"
|
29
|
+
),
|
30
|
+
}
|
31
|
+
data["bins"].append(dict_data)
|
32
|
+
|
33
|
+
return data
|
@@ -0,0 +1,165 @@
|
|
1
|
+
"""Module that contains an abstract class that can be imported to
|
2
|
+
handle the data recieved from the provided council class.
|
3
|
+
|
4
|
+
Keyword arguments: None
|
5
|
+
"""
|
6
|
+
|
7
|
+
import json
|
8
|
+
import logging
|
9
|
+
from abc import ABC, abstractmethod
|
10
|
+
from logging.config import dictConfig
|
11
|
+
import os
|
12
|
+
|
13
|
+
import requests
|
14
|
+
|
15
|
+
from uk_bin_collection.uk_bin_collection.common import update_input_json
|
16
|
+
|
17
|
+
_LOGGER = logging.getLogger(__name__)
|
18
|
+
|
19
|
+
LOGGING_CONFIG = dict(
|
20
|
+
version=1,
|
21
|
+
formatters={"f": {"format": "%(asctime)s %(name)-12s %(levelname)-8s %(message)s"}},
|
22
|
+
handlers={
|
23
|
+
"h": {"class": "logging.StreamHandler", "formatter": "f", "level": logging.INFO}
|
24
|
+
},
|
25
|
+
root={"handlers": ["h"], "level": logging.INFO},
|
26
|
+
)
|
27
|
+
|
28
|
+
|
29
|
+
def setup_logging(logging_config, logger_name):
|
30
|
+
try:
|
31
|
+
logging.config.dictConfig(logging_config)
|
32
|
+
logger = logging.getLogger(logger_name)
|
33
|
+
return logger
|
34
|
+
except Exception as exp:
|
35
|
+
raise exp
|
36
|
+
|
37
|
+
|
38
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
39
|
+
|
40
|
+
|
41
|
+
class AbstractGetBinDataClass(ABC):
|
42
|
+
"""An abstract class that can be imported to handle the data recieved from the provided
|
43
|
+
council class.
|
44
|
+
|
45
|
+
Keyword arguments: None
|
46
|
+
"""
|
47
|
+
|
48
|
+
def template_method(self, address_url: str, **kwargs) -> None: # pragma: no cover
|
49
|
+
"""The main template method that is constructed
|
50
|
+
|
51
|
+
Keyword arguments:
|
52
|
+
address_url -- the url to get the data from
|
53
|
+
"""
|
54
|
+
this_url = address_url
|
55
|
+
this_postcode = kwargs.get("postcode", None)
|
56
|
+
this_paon = kwargs.get("paon", None)
|
57
|
+
this_uprn = kwargs.get("uprn", None)
|
58
|
+
this_usrn = kwargs.get("usrn", None)
|
59
|
+
this_web_driver = kwargs.get("web_driver", None)
|
60
|
+
this_headless = kwargs.get("headless", None)
|
61
|
+
skip_get_url = kwargs.get("skip_get_url", None)
|
62
|
+
dev_mode = kwargs.get("dev_mode", False)
|
63
|
+
council_module_str = kwargs.get("council_module_str", None)
|
64
|
+
if (
|
65
|
+
not skip_get_url or skip_get_url is False
|
66
|
+
): # we will not use the generic way to get data - needs a get data in the council class itself
|
67
|
+
page = self.get_data(address_url)
|
68
|
+
bin_data_dict = self.parse_data(
|
69
|
+
page,
|
70
|
+
postcode=this_postcode,
|
71
|
+
paon=this_paon,
|
72
|
+
uprn=this_uprn,
|
73
|
+
usrn=this_usrn,
|
74
|
+
web_driver=this_web_driver,
|
75
|
+
headless=this_headless,
|
76
|
+
url=this_url,
|
77
|
+
)
|
78
|
+
json_output = self.output_json(bin_data_dict)
|
79
|
+
else:
|
80
|
+
bin_data_dict = self.parse_data(
|
81
|
+
"",
|
82
|
+
postcode=this_postcode,
|
83
|
+
paon=this_paon,
|
84
|
+
uprn=this_uprn,
|
85
|
+
usrn=this_usrn,
|
86
|
+
web_driver=this_web_driver,
|
87
|
+
headless=this_headless,
|
88
|
+
url=this_url,
|
89
|
+
)
|
90
|
+
json_output = self.output_json(bin_data_dict)
|
91
|
+
|
92
|
+
# if dev mode create/update council's entry in the input.json
|
93
|
+
if dev_mode is not None and dev_mode is True:
|
94
|
+
cwd = os.getcwd()
|
95
|
+
input_file_path = os.path.join(
|
96
|
+
cwd, "uk_bin_collection", "tests", "input.json"
|
97
|
+
)
|
98
|
+
update_input_json(
|
99
|
+
council_module_str,
|
100
|
+
this_url,
|
101
|
+
input_file_path,
|
102
|
+
postcode=this_postcode,
|
103
|
+
paon=this_paon,
|
104
|
+
uprn=this_uprn,
|
105
|
+
usrn=this_usrn,
|
106
|
+
web_driver=this_web_driver,
|
107
|
+
skip_get_url=skip_get_url,
|
108
|
+
)
|
109
|
+
|
110
|
+
return json_output
|
111
|
+
|
112
|
+
@classmethod
|
113
|
+
def get_data(cls, url) -> str:
|
114
|
+
"""This method makes the request to the council
|
115
|
+
|
116
|
+
Keyword arguments:
|
117
|
+
url -- the url to get the data from
|
118
|
+
"""
|
119
|
+
# Set a user agent so we look like a browser ;-)
|
120
|
+
user_agent = (
|
121
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
|
122
|
+
"Chrome/108.0.0.0 Safari/537.36"
|
123
|
+
)
|
124
|
+
headers = {"User-Agent": user_agent}
|
125
|
+
requests.packages.urllib3.disable_warnings()
|
126
|
+
|
127
|
+
# Make the Request - change the URL - find out your property number
|
128
|
+
try:
|
129
|
+
full_page = requests.get(url, headers, verify=False)
|
130
|
+
return full_page
|
131
|
+
except requests.exceptions.HTTPError as errh:
|
132
|
+
_LOGGER.error(f"Http Error: {errh}")
|
133
|
+
raise
|
134
|
+
except requests.exceptions.ConnectionError as errc:
|
135
|
+
_LOGGER.error(f"Error Connecting: {errc}")
|
136
|
+
raise
|
137
|
+
except requests.exceptions.Timeout as errt:
|
138
|
+
_LOGGER.error(f"Timeout Error: {errt}")
|
139
|
+
raise
|
140
|
+
except requests.exceptions.RequestException as err:
|
141
|
+
_LOGGER.error(f"Oops: Something Else {err}")
|
142
|
+
raise
|
143
|
+
|
144
|
+
@abstractmethod
|
145
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
146
|
+
"""Abstract method that takes a page as a string
|
147
|
+
|
148
|
+
Keyword arguments:
|
149
|
+
page -- a string from the requested page
|
150
|
+
"""
|
151
|
+
|
152
|
+
@classmethod
|
153
|
+
def output_json(cls, bin_data_dict: dict) -> str:
|
154
|
+
"""Method to output the json as a pretty printed string
|
155
|
+
|
156
|
+
Keyword arguments:
|
157
|
+
bin_data_dict -- a dict parsed data
|
158
|
+
"""
|
159
|
+
# Form a JSON wrapper
|
160
|
+
# Make the JSON
|
161
|
+
|
162
|
+
json_data = json.dumps(bin_data_dict, sort_keys=False, indent=4)
|
163
|
+
|
164
|
+
# Output the data
|
165
|
+
return json_data
|
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2022 Robert Bradley
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|