uk_bin_collection 0.86.0__py3-none-any.whl → 0.86.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/tests/input.json +8 -0
- uk_bin_collection/uk_bin_collection/councils/GloucesterCityCouncil.py +128 -0
- uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py +47 -45
- {uk_bin_collection-0.86.0.dist-info → uk_bin_collection-0.86.2.dist-info}/METADATA +1 -1
- {uk_bin_collection-0.86.0.dist-info → uk_bin_collection-0.86.2.dist-info}/RECORD +8 -7
- {uk_bin_collection-0.86.0.dist-info → uk_bin_collection-0.86.2.dist-info}/LICENSE +0 -0
- {uk_bin_collection-0.86.0.dist-info → uk_bin_collection-0.86.2.dist-info}/WHEEL +0 -0
- {uk_bin_collection-0.86.0.dist-info → uk_bin_collection-0.86.2.dist-info}/entry_points.txt +0 -0
@@ -427,6 +427,14 @@
|
|
427
427
|
"wiki_name": "Glasgow City Council",
|
428
428
|
"wiki_note": "Replace XXXXXXXX with UPRN."
|
429
429
|
},
|
430
|
+
"GloucesterCityCouncil": {
|
431
|
+
"house_number": "111",
|
432
|
+
"postcode": "GL2 0RR",
|
433
|
+
"uprn": "100120479507",
|
434
|
+
"skip_get_url": true,
|
435
|
+
"url": "https://gloucester-self.achieveservice.com/service/Bins___Check_your_bin_day",
|
436
|
+
"wiki_name": "Gloucester City Council"
|
437
|
+
},
|
430
438
|
"GuildfordCouncil": {
|
431
439
|
"house_number": "THE LODGE, PUTTENHAM HILL HOUSE, PUTTENHAM HILL, PUTTENHAM, GUILDFORD, GU3 1AH",
|
432
440
|
"postcode": "GU3 1AH",
|
@@ -0,0 +1,128 @@
|
|
1
|
+
import time
|
2
|
+
from datetime import datetime
|
3
|
+
|
4
|
+
from bs4 import BeautifulSoup
|
5
|
+
from selenium.webdriver.common.by import By
|
6
|
+
from selenium.webdriver.support import expected_conditions as EC
|
7
|
+
from selenium.webdriver.support.ui import Select
|
8
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
9
|
+
|
10
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
11
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
12
|
+
|
13
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
14
|
+
|
15
|
+
|
16
|
+
class CouncilClass(AbstractGetBinDataClass):
|
17
|
+
"""
|
18
|
+
Concrete classes have to implement all abstract operations of the
|
19
|
+
base class. They can also override some operations with a default
|
20
|
+
implementation.
|
21
|
+
"""
|
22
|
+
|
23
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
24
|
+
driver = None
|
25
|
+
try:
|
26
|
+
page = "https://gloucester-self.achieveservice.com/service/Bins___Check_your_bin_day"
|
27
|
+
|
28
|
+
bin_data = {"bins": []}
|
29
|
+
|
30
|
+
user_uprn = kwargs.get("uprn")
|
31
|
+
user_postcode = kwargs.get("postcode")
|
32
|
+
web_driver = kwargs.get("web_driver")
|
33
|
+
headless = kwargs.get("headless")
|
34
|
+
check_uprn(user_uprn)
|
35
|
+
check_postcode(user_postcode)
|
36
|
+
# Create Selenium webdriver
|
37
|
+
driver = create_webdriver(web_driver, headless, None, __name__)
|
38
|
+
driver.get(page)
|
39
|
+
|
40
|
+
cookies_button = WebDriverWait(driver, timeout=15).until(
|
41
|
+
EC.presence_of_element_located((By.ID, "close-cookie-message"))
|
42
|
+
)
|
43
|
+
cookies_button.click()
|
44
|
+
|
45
|
+
without_login_button = WebDriverWait(driver, timeout=15).until(
|
46
|
+
EC.presence_of_element_located(
|
47
|
+
(By.LINK_TEXT, "or, Continue with no account")
|
48
|
+
)
|
49
|
+
)
|
50
|
+
without_login_button.click()
|
51
|
+
|
52
|
+
iframe_presense = WebDriverWait(driver, 30).until(
|
53
|
+
EC.presence_of_element_located((By.ID, "fillform-frame-1"))
|
54
|
+
)
|
55
|
+
|
56
|
+
driver.switch_to.frame(iframe_presense)
|
57
|
+
wait = WebDriverWait(driver, 60)
|
58
|
+
inputElement_postcodesearch = wait.until(
|
59
|
+
EC.element_to_be_clickable((By.NAME, "find_postcode"))
|
60
|
+
)
|
61
|
+
|
62
|
+
inputElement_postcodesearch.send_keys(user_postcode)
|
63
|
+
|
64
|
+
# Wait for the 'Select address' dropdown to be updated
|
65
|
+
time.sleep(2)
|
66
|
+
|
67
|
+
dropdown = wait.until(
|
68
|
+
EC.element_to_be_clickable((By.NAME, "chooseAddress"))
|
69
|
+
)
|
70
|
+
# Create a 'Select' for it, then select the first address in the list
|
71
|
+
# (Index 0 is "Select...")
|
72
|
+
dropdownSelect = Select(dropdown)
|
73
|
+
dropdownSelect.select_by_value(str(user_uprn))
|
74
|
+
|
75
|
+
# Wait for 'Searching for...' to be added to page
|
76
|
+
WebDriverWait(driver, timeout=15).until(
|
77
|
+
EC.text_to_be_present_in_element(
|
78
|
+
(By.CSS_SELECTOR, "span[data-name=html1]"), "Searching"
|
79
|
+
)
|
80
|
+
)
|
81
|
+
|
82
|
+
# Wait for 'Searching for...' to be removed from page
|
83
|
+
WebDriverWait(driver, timeout=15).until(
|
84
|
+
EC.none_of(
|
85
|
+
EC.text_to_be_present_in_element(
|
86
|
+
(By.CSS_SELECTOR, "span[data-name=html1]"), "Searching"
|
87
|
+
)
|
88
|
+
)
|
89
|
+
)
|
90
|
+
|
91
|
+
# Even then it can still be adding data to the page...
|
92
|
+
time.sleep(5)
|
93
|
+
|
94
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
95
|
+
|
96
|
+
# This is ugly but there is literally no consistency to the HTML
|
97
|
+
def is_a_collection_date(t):
|
98
|
+
return any("Next collection" in c for c in t.children)
|
99
|
+
|
100
|
+
for next_collection in soup.find_all(is_a_collection_date):
|
101
|
+
bin_info = list(
|
102
|
+
next_collection.parent.select_one("div:nth-child(1)").children
|
103
|
+
)
|
104
|
+
if not bin_info:
|
105
|
+
continue
|
106
|
+
bin = bin_info[0].get_text()
|
107
|
+
date = next_collection.select_one("strong").get_text(strip=True)
|
108
|
+
bin_date = datetime.strptime(date, "%d %b %Y")
|
109
|
+
dict_data = {
|
110
|
+
"type": bin,
|
111
|
+
"collectionDate": bin_date.strftime(date_format),
|
112
|
+
}
|
113
|
+
bin_data["bins"].append(dict_data)
|
114
|
+
|
115
|
+
bin_data["bins"].sort(
|
116
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
|
117
|
+
)
|
118
|
+
|
119
|
+
except Exception as e:
|
120
|
+
# Here you can log the exception if needed
|
121
|
+
print(f"An error occurred: {e}")
|
122
|
+
# Optionally, re-raise the exception if you want it to propagate
|
123
|
+
raise
|
124
|
+
finally:
|
125
|
+
# This block ensures that the driver is closed regardless of an exception
|
126
|
+
if driver:
|
127
|
+
driver.quit()
|
128
|
+
return bin_data
|
@@ -1,15 +1,11 @@
|
|
1
1
|
from bs4 import BeautifulSoup
|
2
|
-
|
3
2
|
from uk_bin_collection.uk_bin_collection.common import *
|
4
3
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
|
+
from datetime import datetime
|
5
5
|
|
6
|
-
|
7
|
-
# import the wonderful Beautiful Soup and the URL grabber
|
8
6
|
class CouncilClass(AbstractGetBinDataClass):
|
9
7
|
"""
|
10
|
-
Concrete
|
11
|
-
class. They can also override some operations with a default
|
12
|
-
implementation.
|
8
|
+
Concrete class to scrape bin collection data.
|
13
9
|
"""
|
14
10
|
|
15
11
|
def parse_data(self, page: str, **kwargs) -> dict:
|
@@ -27,65 +23,71 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
27
23
|
soup.prettify()
|
28
24
|
|
29
25
|
data = {"bins": []}
|
26
|
+
# Locate the section with bin collection data
|
30
27
|
sections = soup.find_all("div", {"class": "wil_c-content-section_heading"})
|
28
|
+
|
31
29
|
for s in sections:
|
32
30
|
if s.get_text(strip=True).lower() == "bin collections":
|
33
31
|
rows = s.find_next_sibling(
|
34
32
|
"div", {"class": "c-content-section_body"}
|
35
|
-
).find_all("div",
|
33
|
+
).find_all("div", class_="tablet:l-col-fb-4 u-mt-10")
|
34
|
+
|
36
35
|
for row in rows:
|
37
|
-
|
38
|
-
|
39
|
-
|
36
|
+
title_elem = row.find("div", class_="u-mb-4")
|
37
|
+
if title_elem:
|
38
|
+
title = title_elem.get_text(strip=True).capitalize()
|
39
|
+
|
40
|
+
# Find all collection info in the same section
|
41
|
+
collections = row.find_all("div", class_="u-mb-2")
|
40
42
|
for c in collections:
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
.
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
"
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
43
|
+
text = c.get_text(strip=True).lower()
|
44
|
+
|
45
|
+
if "next collection" in text:
|
46
|
+
date_text = text.replace("next collection - ", "")
|
47
|
+
try:
|
48
|
+
next_collection_date = datetime.strptime(
|
49
|
+
date_text, "%A, %d %B %Y"
|
50
|
+
).strftime(date_format)
|
51
|
+
|
52
|
+
dict_data = {
|
53
|
+
"type": title,
|
54
|
+
"collectionDate": next_collection_date
|
55
|
+
}
|
56
|
+
data["bins"].append(dict_data)
|
57
|
+
except ValueError:
|
58
|
+
# Skip if the date isn't a valid date
|
59
|
+
print(f"Skipping invalid date: {date_text}")
|
60
|
+
|
61
|
+
# Get future collections
|
62
|
+
future_collections_section = row.find("ul", class_="u-mt-4")
|
63
|
+
if future_collections_section:
|
64
|
+
future_collections = future_collections_section.find_all("li")
|
65
|
+
for future_collection in future_collections:
|
66
|
+
future_date_text = future_collection.get_text(strip=True)
|
67
|
+
try:
|
63
68
|
future_collection_date = datetime.strptime(
|
64
|
-
|
65
|
-
"%A, %d %B %Y",
|
69
|
+
future_date_text, "%A, %d %B %Y"
|
66
70
|
).strftime(date_format)
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
):
|
71
|
+
|
72
|
+
# Avoid duplicates of next collection date
|
73
|
+
if future_collection_date != next_collection_date:
|
71
74
|
dict_data = {
|
72
|
-
"type": title
|
73
|
-
|
74
|
-
).capitalize(),
|
75
|
-
"collectionDate": future_collection_date,
|
75
|
+
"type": title,
|
76
|
+
"collectionDate": future_collection_date
|
76
77
|
}
|
77
78
|
data["bins"].append(dict_data)
|
79
|
+
except ValueError:
|
80
|
+
# Skip if the future collection date isn't valid
|
81
|
+
print(f"Skipping invalid future date: {future_date_text}")
|
78
82
|
|
83
|
+
# Sort the collections by date
|
79
84
|
data["bins"].sort(
|
80
85
|
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
|
81
86
|
)
|
82
87
|
except Exception as e:
|
83
|
-
# Here you can log the exception if needed
|
84
88
|
print(f"An error occurred: {e}")
|
85
|
-
# Optionally, re-raise the exception if you want it to propagate
|
86
89
|
raise
|
87
90
|
finally:
|
88
|
-
# This block ensures that the driver is closed regardless of an exception
|
89
91
|
if driver:
|
90
92
|
driver.quit()
|
91
93
|
return data
|
@@ -2,7 +2,7 @@ uk_bin_collection/README.rst,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
|
|
2
2
|
uk_bin_collection/tests/council_feature_input_parity.py,sha256=DO6Mk4ImYgM5ZCZ-cutwz5RoYYWZRLYx2tr6zIs_9Rc,3843
|
3
3
|
uk_bin_collection/tests/features/environment.py,sha256=VQZjJdJI_kZn08M0j5cUgvKT4k3iTw8icJge1DGOkoA,127
|
4
4
|
uk_bin_collection/tests/features/validate_council_outputs.feature,sha256=SJK-Vc737hrf03tssxxbeg_JIvAH-ddB8f6gU1LTbuQ,251
|
5
|
-
uk_bin_collection/tests/input.json,sha256=
|
5
|
+
uk_bin_collection/tests/input.json,sha256=WJ6GsmG_nIzmOA0ItYnBRc7DwWD8jsytgA6XO5WstMo,60242
|
6
6
|
uk_bin_collection/tests/output.schema,sha256=ZwKQBwYyTDEM4G2hJwfLUVM-5v1vKRvRK9W9SS1sd18,1086
|
7
7
|
uk_bin_collection/tests/step_defs/step_helpers/file_handler.py,sha256=Ygzi4V0S1MIHqbdstUlIqtRIwnynvhu4UtpweJ6-5N8,1474
|
8
8
|
uk_bin_collection/tests/step_defs/test_validate_council.py,sha256=LrOSt_loA1Mw3vTqaO2LpaDMu7rYJy6k5Kr-EOBln7s,3424
|
@@ -71,6 +71,7 @@ uk_bin_collection/uk_bin_collection/councils/ForestOfDeanDistrictCouncil.py,sha2
|
|
71
71
|
uk_bin_collection/uk_bin_collection/councils/GatesheadCouncil.py,sha256=Ecq4kMbtAHnQrnxjhC7CG3oEZQ3D1aAk5qXVZk-ouxc,4601
|
72
72
|
uk_bin_collection/uk_bin_collection/councils/GedlingBoroughCouncil.py,sha256=IssL5CJSdcGPkJCB0q2kieUSEjfoS6nDKfeT7-9eKsQ,2183
|
73
73
|
uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py,sha256=IOgM8Wl-LpO1T-F9uU1FlVfPaEObpvsdP7S2h03Mycc,2528
|
74
|
+
uk_bin_collection/uk_bin_collection/councils/GloucesterCityCouncil.py,sha256=8Wjvmdvg5blHVrREaEnhhWZaWhYVP4v_KdDVPLIUxaU,4889
|
74
75
|
uk_bin_collection/uk_bin_collection/councils/GuildfordCouncil.py,sha256=9pVrmQhZcK2AD8gX8mNvP--L4L9KaY6L3B822VX6fec,5695
|
75
76
|
uk_bin_collection/uk_bin_collection/councils/HaltonBoroughCouncil.py,sha256=r8cmtWhMJg-XG63ZHxidKKW7i4yQNrZSSMSCkBwrqjI,5837
|
76
77
|
uk_bin_collection/uk_bin_collection/councils/HaringeyCouncil.py,sha256=t_6AkAu4wrv8Q0WlDhWh_82I0djl5tk531Pzs-SjWzg,2647
|
@@ -166,7 +167,7 @@ uk_bin_collection/uk_bin_collection/councils/TorridgeDistrictCouncil.py,sha256=6
|
|
166
167
|
uk_bin_collection/uk_bin_collection/councils/UttlesfordDistrictCouncil.py,sha256=8CvO-WgdKdvyaOf3TYc4XwME8ogAXojgB40oyGRL8Dw,4129
|
167
168
|
uk_bin_collection/uk_bin_collection/councils/ValeofGlamorganCouncil.py,sha256=Phgb_ECiUOOkqOx6OsfsTHMCW5VQfRmOC2zgYIQhuZA,5044
|
168
169
|
uk_bin_collection/uk_bin_collection/councils/ValeofWhiteHorseCouncil.py,sha256=5nZLbU5YVKNsJ2X_wuybrNLFAzjVAxkazu-bYP4IGXw,4292
|
169
|
-
uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py,sha256
|
170
|
+
uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py,sha256=-xqJOzHTrT4jOB3rHPXFYeqLaHyK9XmCPi92whaYBhw,4671
|
170
171
|
uk_bin_collection/uk_bin_collection/councils/WalthamForest.py,sha256=P7MMw0EhpRmDbbnHb25tY5_yvYuZUFwJ1br4TOv24sY,4997
|
171
172
|
uk_bin_collection/uk_bin_collection/councils/WarwickDistrictCouncil.py,sha256=3WQrAxzYzKoV4LyOqNTp9xINVsNi1xW9t8etducGeag,1146
|
172
173
|
uk_bin_collection/uk_bin_collection/councils/WaverleyBoroughCouncil.py,sha256=tp9l7vdgSGRzNNG0pDfnNuFj4D2bpRJUJmAiTJ6bM0g,4662
|
@@ -187,8 +188,8 @@ uk_bin_collection/uk_bin_collection/councils/YorkCouncil.py,sha256=I2kBYMlsD4bId
|
|
187
188
|
uk_bin_collection/uk_bin_collection/councils/council_class_template/councilclasstemplate.py,sha256=4s9ODGPAwPqwXc8SrTX5Wlfmizs3_58iXUtHc4Ir86o,1162
|
188
189
|
uk_bin_collection/uk_bin_collection/create_new_council.py,sha256=m-IhmWmeWQlFsTZC4OxuFvtw5ZtB8EAJHxJTH4O59lQ,1536
|
189
190
|
uk_bin_collection/uk_bin_collection/get_bin_data.py,sha256=YvmHfZqanwrJ8ToGch34x-L-7yPe31nB_x77_Mgl_vo,4545
|
190
|
-
uk_bin_collection-0.86.
|
191
|
-
uk_bin_collection-0.86.
|
192
|
-
uk_bin_collection-0.86.
|
193
|
-
uk_bin_collection-0.86.
|
194
|
-
uk_bin_collection-0.86.
|
191
|
+
uk_bin_collection-0.86.2.dist-info/LICENSE,sha256=vABBUOzcrgfaTKpzeo-si9YVEun6juDkndqA8RKdKGs,1071
|
192
|
+
uk_bin_collection-0.86.2.dist-info/METADATA,sha256=LRsdDRZdbzUoFma7eHGhI7jA8ZtZrf7y4xaO_3q6Lcw,16231
|
193
|
+
uk_bin_collection-0.86.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
194
|
+
uk_bin_collection-0.86.2.dist-info/entry_points.txt,sha256=36WCSGMWSc916S3Hi1ZkazzDKHaJ6CD-4fCEFm5MIao,90
|
195
|
+
uk_bin_collection-0.86.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|