uk_bin_collection 0.86.0__py3-none-any.whl → 0.86.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py +47 -45
- {uk_bin_collection-0.86.0.dist-info → uk_bin_collection-0.86.1.dist-info}/METADATA +1 -1
- {uk_bin_collection-0.86.0.dist-info → uk_bin_collection-0.86.1.dist-info}/RECORD +6 -6
- {uk_bin_collection-0.86.0.dist-info → uk_bin_collection-0.86.1.dist-info}/LICENSE +0 -0
- {uk_bin_collection-0.86.0.dist-info → uk_bin_collection-0.86.1.dist-info}/WHEEL +0 -0
- {uk_bin_collection-0.86.0.dist-info → uk_bin_collection-0.86.1.dist-info}/entry_points.txt +0 -0
@@ -1,15 +1,11 @@
|
|
1
1
|
from bs4 import BeautifulSoup
|
2
|
-
|
3
2
|
from uk_bin_collection.uk_bin_collection.common import *
|
4
3
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
|
+
from datetime import datetime
|
5
5
|
|
6
|
-
|
7
|
-
# import the wonderful Beautiful Soup and the URL grabber
|
8
6
|
class CouncilClass(AbstractGetBinDataClass):
|
9
7
|
"""
|
10
|
-
Concrete
|
11
|
-
class. They can also override some operations with a default
|
12
|
-
implementation.
|
8
|
+
Concrete class to scrape bin collection data.
|
13
9
|
"""
|
14
10
|
|
15
11
|
def parse_data(self, page: str, **kwargs) -> dict:
|
@@ -27,65 +23,71 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
27
23
|
soup.prettify()
|
28
24
|
|
29
25
|
data = {"bins": []}
|
26
|
+
# Locate the section with bin collection data
|
30
27
|
sections = soup.find_all("div", {"class": "wil_c-content-section_heading"})
|
28
|
+
|
31
29
|
for s in sections:
|
32
30
|
if s.get_text(strip=True).lower() == "bin collections":
|
33
31
|
rows = s.find_next_sibling(
|
34
32
|
"div", {"class": "c-content-section_body"}
|
35
|
-
).find_all("div",
|
33
|
+
).find_all("div", class_="tablet:l-col-fb-4 u-mt-10")
|
34
|
+
|
36
35
|
for row in rows:
|
37
|
-
|
38
|
-
|
39
|
-
|
36
|
+
title_elem = row.find("div", class_="u-mb-4")
|
37
|
+
if title_elem:
|
38
|
+
title = title_elem.get_text(strip=True).capitalize()
|
39
|
+
|
40
|
+
# Find all collection info in the same section
|
41
|
+
collections = row.find_all("div", class_="u-mb-2")
|
40
42
|
for c in collections:
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
.
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
"
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
43
|
+
text = c.get_text(strip=True).lower()
|
44
|
+
|
45
|
+
if "next collection" in text:
|
46
|
+
date_text = text.replace("next collection - ", "")
|
47
|
+
try:
|
48
|
+
next_collection_date = datetime.strptime(
|
49
|
+
date_text, "%A, %d %B %Y"
|
50
|
+
).strftime(date_format)
|
51
|
+
|
52
|
+
dict_data = {
|
53
|
+
"type": title,
|
54
|
+
"collectionDate": next_collection_date
|
55
|
+
}
|
56
|
+
data["bins"].append(dict_data)
|
57
|
+
except ValueError:
|
58
|
+
# Skip if the date isn't a valid date
|
59
|
+
print(f"Skipping invalid date: {date_text}")
|
60
|
+
|
61
|
+
# Get future collections
|
62
|
+
future_collections_section = row.find("ul", class_="u-mt-4")
|
63
|
+
if future_collections_section:
|
64
|
+
future_collections = future_collections_section.find_all("li")
|
65
|
+
for future_collection in future_collections:
|
66
|
+
future_date_text = future_collection.get_text(strip=True)
|
67
|
+
try:
|
63
68
|
future_collection_date = datetime.strptime(
|
64
|
-
|
65
|
-
"%A, %d %B %Y",
|
69
|
+
future_date_text, "%A, %d %B %Y"
|
66
70
|
).strftime(date_format)
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
):
|
71
|
+
|
72
|
+
# Avoid duplicates of next collection date
|
73
|
+
if future_collection_date != next_collection_date:
|
71
74
|
dict_data = {
|
72
|
-
"type": title
|
73
|
-
|
74
|
-
).capitalize(),
|
75
|
-
"collectionDate": future_collection_date,
|
75
|
+
"type": title,
|
76
|
+
"collectionDate": future_collection_date
|
76
77
|
}
|
77
78
|
data["bins"].append(dict_data)
|
79
|
+
except ValueError:
|
80
|
+
# Skip if the future collection date isn't valid
|
81
|
+
print(f"Skipping invalid future date: {future_date_text}")
|
78
82
|
|
83
|
+
# Sort the collections by date
|
79
84
|
data["bins"].sort(
|
80
85
|
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
|
81
86
|
)
|
82
87
|
except Exception as e:
|
83
|
-
# Here you can log the exception if needed
|
84
88
|
print(f"An error occurred: {e}")
|
85
|
-
# Optionally, re-raise the exception if you want it to propagate
|
86
89
|
raise
|
87
90
|
finally:
|
88
|
-
# This block ensures that the driver is closed regardless of an exception
|
89
91
|
if driver:
|
90
92
|
driver.quit()
|
91
93
|
return data
|
@@ -166,7 +166,7 @@ uk_bin_collection/uk_bin_collection/councils/TorridgeDistrictCouncil.py,sha256=6
|
|
166
166
|
uk_bin_collection/uk_bin_collection/councils/UttlesfordDistrictCouncil.py,sha256=8CvO-WgdKdvyaOf3TYc4XwME8ogAXojgB40oyGRL8Dw,4129
|
167
167
|
uk_bin_collection/uk_bin_collection/councils/ValeofGlamorganCouncil.py,sha256=Phgb_ECiUOOkqOx6OsfsTHMCW5VQfRmOC2zgYIQhuZA,5044
|
168
168
|
uk_bin_collection/uk_bin_collection/councils/ValeofWhiteHorseCouncil.py,sha256=5nZLbU5YVKNsJ2X_wuybrNLFAzjVAxkazu-bYP4IGXw,4292
|
169
|
-
uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py,sha256
|
169
|
+
uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py,sha256=-xqJOzHTrT4jOB3rHPXFYeqLaHyK9XmCPi92whaYBhw,4671
|
170
170
|
uk_bin_collection/uk_bin_collection/councils/WalthamForest.py,sha256=P7MMw0EhpRmDbbnHb25tY5_yvYuZUFwJ1br4TOv24sY,4997
|
171
171
|
uk_bin_collection/uk_bin_collection/councils/WarwickDistrictCouncil.py,sha256=3WQrAxzYzKoV4LyOqNTp9xINVsNi1xW9t8etducGeag,1146
|
172
172
|
uk_bin_collection/uk_bin_collection/councils/WaverleyBoroughCouncil.py,sha256=tp9l7vdgSGRzNNG0pDfnNuFj4D2bpRJUJmAiTJ6bM0g,4662
|
@@ -187,8 +187,8 @@ uk_bin_collection/uk_bin_collection/councils/YorkCouncil.py,sha256=I2kBYMlsD4bId
|
|
187
187
|
uk_bin_collection/uk_bin_collection/councils/council_class_template/councilclasstemplate.py,sha256=4s9ODGPAwPqwXc8SrTX5Wlfmizs3_58iXUtHc4Ir86o,1162
|
188
188
|
uk_bin_collection/uk_bin_collection/create_new_council.py,sha256=m-IhmWmeWQlFsTZC4OxuFvtw5ZtB8EAJHxJTH4O59lQ,1536
|
189
189
|
uk_bin_collection/uk_bin_collection/get_bin_data.py,sha256=YvmHfZqanwrJ8ToGch34x-L-7yPe31nB_x77_Mgl_vo,4545
|
190
|
-
uk_bin_collection-0.86.
|
191
|
-
uk_bin_collection-0.86.
|
192
|
-
uk_bin_collection-0.86.
|
193
|
-
uk_bin_collection-0.86.
|
194
|
-
uk_bin_collection-0.86.
|
190
|
+
uk_bin_collection-0.86.1.dist-info/LICENSE,sha256=vABBUOzcrgfaTKpzeo-si9YVEun6juDkndqA8RKdKGs,1071
|
191
|
+
uk_bin_collection-0.86.1.dist-info/METADATA,sha256=0Jhf4AjdTWOwmSNGJ_xLQNHjV-nrnLl0TXF6m_5_U8E,16231
|
192
|
+
uk_bin_collection-0.86.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
193
|
+
uk_bin_collection-0.86.1.dist-info/entry_points.txt,sha256=36WCSGMWSc916S3Hi1ZkazzDKHaJ6CD-4fCEFm5MIao,90
|
194
|
+
uk_bin_collection-0.86.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|