uk_bin_collection 0.105.0__py3-none-any.whl → 0.105.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- uk_bin_collection/tests/input.json +5 -3
- uk_bin_collection/uk_bin_collection/councils/BarnsleyMBCouncil.py +41 -25
- uk_bin_collection/uk_bin_collection/councils/MidlothianCouncil.py +115 -36
- {uk_bin_collection-0.105.0.dist-info → uk_bin_collection-0.105.1.dist-info}/METADATA +1 -1
- {uk_bin_collection-0.105.0.dist-info → uk_bin_collection-0.105.1.dist-info}/RECORD +8 -8
- {uk_bin_collection-0.105.0.dist-info → uk_bin_collection-0.105.1.dist-info}/LICENSE +0 -0
- {uk_bin_collection-0.105.0.dist-info → uk_bin_collection-0.105.1.dist-info}/WHEEL +0 -0
- {uk_bin_collection-0.105.0.dist-info → uk_bin_collection-0.105.1.dist-info}/entry_points.txt +0 -0
@@ -754,10 +754,12 @@
|
|
754
754
|
"wiki_note": "Pass the house name/number plus the name of the street with the postcode parameter, wrapped in double quotes. Check the address in the web site first. This version will only pick the first SHOW button returned by the search or if it is fully unique. The search is not very predictable (e.g. house number 4 returns 14,24,4,44 etc.)."
|
755
755
|
},
|
756
756
|
"MidlothianCouncil": {
|
757
|
-
"
|
758
|
-
"
|
757
|
+
"house_number": "52",
|
758
|
+
"postcode": "EH19 2EB",
|
759
|
+
"skip_get_url": true,
|
760
|
+
"url": "https://www.midlothian.gov.uk/info/1054/bins_and_recycling/343/bin_collection_days",
|
759
761
|
"wiki_name": "Midlothian Council",
|
760
|
-
"wiki_note": "
|
762
|
+
"wiki_note": "Pass the house name/number wrapped in double quotes along with the postcode parameter"
|
761
763
|
},
|
762
764
|
"MidSussexDistrictCouncil": {
|
763
765
|
"house_number": "OAKLANDS, OAKLANDS ROAD RH16 1SS",
|
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import Dict, Any
|
1
|
+
from typing import Dict, List, Any
|
2
2
|
from bs4 import BeautifulSoup
|
3
3
|
from dateutil.relativedelta import relativedelta
|
4
4
|
import requests
|
@@ -11,6 +11,30 @@ from uk_bin_collection.uk_bin_collection.common import (
|
|
11
11
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
12
12
|
|
13
13
|
|
14
|
+
def parse_bin_text(bin_type_str: str, bin_date_str: str) -> List[Dict[str, str]]:
|
15
|
+
"""
|
16
|
+
Takes a raw bin and date string, parses the bin(s) and date, then returns
|
17
|
+
a list of bins with their date.
|
18
|
+
"""
|
19
|
+
|
20
|
+
bins = []
|
21
|
+
|
22
|
+
if bin_date_str == "Today":
|
23
|
+
bin_date = datetime.today()
|
24
|
+
elif bin_date_str == "Tomorrow":
|
25
|
+
bin_date = datetime.today() + relativedelta(days=1)
|
26
|
+
else:
|
27
|
+
bin_date = datetime.strptime(bin_date_str, "%A, %B %d, %Y")
|
28
|
+
|
29
|
+
for bin_type in bin_type_str.split(", "):
|
30
|
+
bins.append({
|
31
|
+
"type": bin_type.strip() + " bin",
|
32
|
+
"collectionDate": bin_date.strftime(date_format)
|
33
|
+
})
|
34
|
+
|
35
|
+
return bins
|
36
|
+
|
37
|
+
|
14
38
|
class CouncilClass(AbstractGetBinDataClass):
|
15
39
|
"""
|
16
40
|
Concrete classes have to implement all abstract operations of the
|
@@ -73,37 +97,29 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
73
97
|
bin_date_str = highlight_content.find(
|
74
98
|
"em", {"class": "ui-bin-next-date"}
|
75
99
|
).text.strip()
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
)
|
100
|
+
bin_type_str = highlight_content.find(
|
101
|
+
"p", {"class": "ui-bin-next-type"}
|
102
|
+
).text.strip()
|
80
103
|
|
81
|
-
|
82
|
-
bin_date = datetime.today()
|
83
|
-
elif bin_date_str == "Tomorrow":
|
84
|
-
bin_date = datetime.today() + relativedelta(days=1)
|
85
|
-
else:
|
86
|
-
bin_date = datetime.strptime(bin_date_str, "%A, %B %d, %Y")
|
104
|
+
data["bins"].extend(parse_bin_text(bin_type_str, bin_date_str))
|
87
105
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
}
|
92
|
-
data["bins"].append(dict_data)
|
106
|
+
# Hold bins we already got from next collection, to avoid re-adding
|
107
|
+
# from upcoming collections.
|
108
|
+
used_bins = set(bin["type"] for bin in data["bins"])
|
93
109
|
|
94
110
|
# Upcoming collections
|
95
111
|
upcoming_collections = results[1].find("tbody").find_all("tr")
|
96
112
|
for row in upcoming_collections:
|
97
113
|
columns = row.find_all("td")
|
98
114
|
bin_date_str = columns[0].text.strip()
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
"
|
105
|
-
|
106
|
-
|
107
|
-
|
115
|
+
bin_type_str = columns[1].text.strip()
|
116
|
+
|
117
|
+
# Only add to bin list if not already present.
|
118
|
+
for bin in parse_bin_text(bin_type_str, bin_date_str):
|
119
|
+
if bin["type"] not in used_bins:
|
120
|
+
data["bins"].append(bin)
|
121
|
+
|
122
|
+
# Add to used bins, so future collections are not re-added.
|
123
|
+
used_bins.add(bin["type"])
|
108
124
|
|
109
125
|
return data
|
@@ -1,3 +1,5 @@
|
|
1
|
+
from urllib.parse import quote, urljoin
|
2
|
+
|
1
3
|
from bs4 import BeautifulSoup
|
2
4
|
|
3
5
|
from uk_bin_collection.uk_bin_collection.common import *
|
@@ -12,57 +14,134 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
12
14
|
implementation.
|
13
15
|
"""
|
14
16
|
|
17
|
+
BASE_URL = "https://www.midlothian.gov.uk"
|
18
|
+
DIRECTORY_URL = f"{BASE_URL}/site/scripts/directory_search.php?directoryID=35&keywords={{}}&search=Search"
|
19
|
+
BIN_TYPES = {
|
20
|
+
"Next recycling collection": "Recycling",
|
21
|
+
"Next grey bin collection": "Grey Bin",
|
22
|
+
"Next brown bin collection": "Brown Bin",
|
23
|
+
"Next food bin collection": "Food Bin",
|
24
|
+
}
|
25
|
+
|
15
26
|
def parse_data(self, page: str, **kwargs) -> dict:
|
16
|
-
# Parse the HTML content using BeautifulSoup
|
17
|
-
soup = BeautifulSoup(page.text, features="html.parser")
|
18
27
|
|
19
|
-
|
28
|
+
house_identifier = kwargs.get(
|
29
|
+
"paon", ""
|
30
|
+
).strip() # Could be house number or name
|
31
|
+
postcode = kwargs.get("postcode")
|
32
|
+
|
33
|
+
# Check if both house identifier and postcode are provided
|
34
|
+
if not house_identifier:
|
35
|
+
print("Error: House identifier (number or name) must be provided.")
|
36
|
+
return {"bins": []}
|
37
|
+
|
38
|
+
if not postcode:
|
39
|
+
print("Error: Postcode must be provided.")
|
40
|
+
return {"bins": []}
|
41
|
+
|
42
|
+
check_postcode(postcode)
|
43
|
+
check_paon(house_identifier)
|
44
|
+
|
20
45
|
data = {"bins": []}
|
46
|
+
search_url = self.DIRECTORY_URL.format(quote(postcode))
|
47
|
+
|
48
|
+
try:
|
49
|
+
search_results_html = requests.get(search_url)
|
50
|
+
search_results_html.raise_for_status()
|
51
|
+
|
52
|
+
soup = BeautifulSoup(search_results_html.text, "html.parser")
|
53
|
+
address_link = self._get_result_by_identifier(soup, house_identifier)
|
54
|
+
|
55
|
+
if address_link:
|
56
|
+
collections_url = urljoin(search_url, address_link["href"])
|
57
|
+
bin_collection_data = self._fetch_bin_collection_data(collections_url)
|
58
|
+
|
59
|
+
if bin_collection_data:
|
60
|
+
data["bins"].extend(bin_collection_data)
|
61
|
+
|
62
|
+
except requests.RequestException as e:
|
63
|
+
print(f"Warning: Failed to fetch data from {search_url}. Error: {e}")
|
64
|
+
|
65
|
+
return data
|
21
66
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
67
|
+
def _get_result_by_identifier(self, soup, identifier: str) -> list:
|
68
|
+
"""Extract the result link that matches the given house number or house name."""
|
69
|
+
try:
|
70
|
+
results_list = (
|
71
|
+
soup.find("article", class_="container")
|
72
|
+
.find("h2", text="Search results")
|
73
|
+
.find_next("ul", class_="item-list item-list__rich")
|
74
|
+
)
|
75
|
+
|
76
|
+
pattern = re.compile(re.escape(identifier.lower()) + r"[ ,]")
|
77
|
+
|
78
|
+
for item in results_list.find_all("li"):
|
79
|
+
address_link = item.find("a")
|
80
|
+
if address_link:
|
81
|
+
link_text = address_link.text.strip().lower()
|
82
|
+
if pattern.match(link_text):
|
83
|
+
return address_link
|
84
|
+
|
85
|
+
print(f"Warning: No results found for identifier '{identifier}'.")
|
86
|
+
return None # Return None if no match is found
|
87
|
+
|
88
|
+
except AttributeError as e:
|
89
|
+
print(f"Warning: Could not find the search results. Error: {e}")
|
90
|
+
return None # Return None if no result found
|
91
|
+
|
92
|
+
def _fetch_bin_collection_data(self, url: str) -> list:
|
93
|
+
"""Fetch and parse bin collection data from the given URL."""
|
94
|
+
try:
|
95
|
+
bin_collection_html = requests.get(url)
|
96
|
+
bin_collection_html.raise_for_status()
|
97
|
+
|
98
|
+
soup = BeautifulSoup(bin_collection_html.text, "html.parser")
|
99
|
+
bin_collections = soup.find("ul", class_="data-table")
|
100
|
+
|
101
|
+
if bin_collections:
|
102
|
+
return self._parse_bin_collection_items(
|
103
|
+
bin_collections.find_all("li")[2:] # Skip the first two items
|
104
|
+
)
|
105
|
+
|
106
|
+
except requests.RequestException as e:
|
107
|
+
print(
|
108
|
+
f"Warning: Failed to fetch bin collection data from {url}. Error: {e}"
|
109
|
+
)
|
110
|
+
|
111
|
+
return [] # Return an empty list on error
|
112
|
+
|
113
|
+
def _parse_bin_collection_items(self, bin_items: list) -> list:
|
114
|
+
"""Parse bin collection items into a structured format."""
|
115
|
+
parsed_bins = []
|
116
|
+
|
117
|
+
for bin_item in bin_items:
|
118
|
+
bin_type = None
|
119
|
+
try:
|
120
|
+
if bin_item.h2 and bin_item.h2.text.strip() in self.BIN_TYPES:
|
121
|
+
bin_type = self.BIN_TYPES[bin_item.h2.text.strip()]
|
44
122
|
|
45
123
|
bin_collection_date = None
|
46
|
-
|
47
|
-
if bin.div and bin.div.text.strip():
|
124
|
+
if bin_item.div and bin_item.div.text.strip():
|
48
125
|
try:
|
49
|
-
# Parse the collection date from the div text and format it
|
50
126
|
bin_collection_date = datetime.strptime(
|
51
|
-
|
52
|
-
"%A %d/%m/%Y",
|
127
|
+
bin_item.div.text.strip(), "%A %d/%m/%Y"
|
53
128
|
).strftime(date_format)
|
54
129
|
except ValueError:
|
55
|
-
|
56
|
-
|
130
|
+
print(
|
131
|
+
f"Warning: Date parsing failed for {bin_item.div.text.strip()}."
|
132
|
+
)
|
57
133
|
|
58
|
-
# If both bin type and collection date are identified, add to the data
|
59
134
|
if bin_type and bin_collection_date:
|
60
|
-
|
135
|
+
parsed_bins.append(
|
61
136
|
{
|
62
137
|
"type": bin_type,
|
63
138
|
"collectionDate": bin_collection_date,
|
64
139
|
}
|
65
140
|
)
|
141
|
+
else:
|
142
|
+
print(f"Warning: Missing data for bin item: {bin_item}")
|
66
143
|
|
67
|
-
|
68
|
-
|
144
|
+
except Exception as e:
|
145
|
+
print(f"Warning: An error occurred while parsing bin item. Error: {e}")
|
146
|
+
|
147
|
+
return parsed_bins
|
@@ -2,7 +2,7 @@ uk_bin_collection/README.rst,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
|
|
2
2
|
uk_bin_collection/tests/council_feature_input_parity.py,sha256=DO6Mk4ImYgM5ZCZ-cutwz5RoYYWZRLYx2tr6zIs_9Rc,3843
|
3
3
|
uk_bin_collection/tests/features/environment.py,sha256=VQZjJdJI_kZn08M0j5cUgvKT4k3iTw8icJge1DGOkoA,127
|
4
4
|
uk_bin_collection/tests/features/validate_council_outputs.feature,sha256=SJK-Vc737hrf03tssxxbeg_JIvAH-ddB8f6gU1LTbuQ,251
|
5
|
-
uk_bin_collection/tests/input.json,sha256=
|
5
|
+
uk_bin_collection/tests/input.json,sha256=nnUn45wSbrroBexbVAfuqwgFblWnDYA-SA-JcKOrJ2A,74417
|
6
6
|
uk_bin_collection/tests/output.schema,sha256=ZwKQBwYyTDEM4G2hJwfLUVM-5v1vKRvRK9W9SS1sd18,1086
|
7
7
|
uk_bin_collection/tests/step_defs/step_helpers/file_handler.py,sha256=Ygzi4V0S1MIHqbdstUlIqtRIwnynvhu4UtpweJ6-5N8,1474
|
8
8
|
uk_bin_collection/tests/step_defs/test_validate_council.py,sha256=LrOSt_loA1Mw3vTqaO2LpaDMu7rYJy6k5Kr-EOBln7s,3424
|
@@ -19,7 +19,7 @@ uk_bin_collection/uk_bin_collection/councils/ArunCouncil.py,sha256=yfhthv9nuogP1
|
|
19
19
|
uk_bin_collection/uk_bin_collection/councils/AylesburyValeCouncil.py,sha256=LouqjspEMt1TkOGqWHs2zkxwOETIy3n7p64uKIlAgUg,2401
|
20
20
|
uk_bin_collection/uk_bin_collection/councils/BCPCouncil.py,sha256=W7QBx6Mgso8RYosuXsaYo3GGNAu-tiyBSmuYxr1JSOU,1707
|
21
21
|
uk_bin_collection/uk_bin_collection/councils/BarnetCouncil.py,sha256=Sd4-pbv0QZsR7soxvXYqsfdOUIqZqS6notyoZthG77s,9182
|
22
|
-
uk_bin_collection/uk_bin_collection/councils/BarnsleyMBCouncil.py,sha256=
|
22
|
+
uk_bin_collection/uk_bin_collection/councils/BarnsleyMBCouncil.py,sha256=jHLSfRU9lPDUn249mUgNPc23UElU9SKmDon917S6ct0,4733
|
23
23
|
uk_bin_collection/uk_bin_collection/councils/BasildonCouncil.py,sha256=UBHINX8WknQfnHU43Wp5kXAqmHl00aWM0Fh8NQdWBZA,3244
|
24
24
|
uk_bin_collection/uk_bin_collection/councils/BasingstokeCouncil.py,sha256=VPWGljnH4C3q8qs5ZmCtqjNjgWQvviALzjk00q3EZeQ,2632
|
25
25
|
uk_bin_collection/uk_bin_collection/councils/BathAndNorthEastSomersetCouncil.py,sha256=N_TPiIv8VBzN3rY0p3JtLlxSEru-6k1wW4UNIhN5X1M,3709
|
@@ -117,7 +117,7 @@ uk_bin_collection/uk_bin_collection/councils/MansfieldDistrictCouncil.py,sha256=
|
|
117
117
|
uk_bin_collection/uk_bin_collection/councils/MertonCouncil.py,sha256=3Y2Un4xXo1sCcMsudynODSzocV_mMofWkX2JqONDb5o,1997
|
118
118
|
uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil.py,sha256=oOWwU5FSgGej2Mv7FQ66N-EzS5nZgmGsd0WnfLWUc1I,5238
|
119
119
|
uk_bin_collection/uk_bin_collection/councils/MidSussexDistrictCouncil.py,sha256=AZgC9wmDLEjUOtIFvf0ehF5LHturXTH4DkE3ioPSVBA,6254
|
120
|
-
uk_bin_collection/uk_bin_collection/councils/MidlothianCouncil.py,sha256=
|
120
|
+
uk_bin_collection/uk_bin_collection/councils/MidlothianCouncil.py,sha256=mM5-itJDNhjsT5UEjSFfWppmfmPFSns4u_1QblewuFU,5605
|
121
121
|
uk_bin_collection/uk_bin_collection/councils/MiltonKeynesCityCouncil.py,sha256=3olsWa77L34vz-c7NgeGK9xmNuR4Ws_oAk5D4UpIkPw,2005
|
122
122
|
uk_bin_collection/uk_bin_collection/councils/MoleValleyDistrictCouncil.py,sha256=xWR5S0gwQu9gXxjl788Wux1KaC0CT7ZFw0iXuRLZCEM,5599
|
123
123
|
uk_bin_collection/uk_bin_collection/councils/NeathPortTalbotCouncil.py,sha256=ychYR2nsyk2UIb8tjWaKrLUT4hxSsHN558l3RqZ0mjw,5635
|
@@ -227,8 +227,8 @@ uk_bin_collection/uk_bin_collection/councils/YorkCouncil.py,sha256=I2kBYMlsD4bId
|
|
227
227
|
uk_bin_collection/uk_bin_collection/councils/council_class_template/councilclasstemplate.py,sha256=4s9ODGPAwPqwXc8SrTX5Wlfmizs3_58iXUtHc4Ir86o,1162
|
228
228
|
uk_bin_collection/uk_bin_collection/create_new_council.py,sha256=m-IhmWmeWQlFsTZC4OxuFvtw5ZtB8EAJHxJTH4O59lQ,1536
|
229
229
|
uk_bin_collection/uk_bin_collection/get_bin_data.py,sha256=YvmHfZqanwrJ8ToGch34x-L-7yPe31nB_x77_Mgl_vo,4545
|
230
|
-
uk_bin_collection-0.105.
|
231
|
-
uk_bin_collection-0.105.
|
232
|
-
uk_bin_collection-0.105.
|
233
|
-
uk_bin_collection-0.105.
|
234
|
-
uk_bin_collection-0.105.
|
230
|
+
uk_bin_collection-0.105.1.dist-info/LICENSE,sha256=vABBUOzcrgfaTKpzeo-si9YVEun6juDkndqA8RKdKGs,1071
|
231
|
+
uk_bin_collection-0.105.1.dist-info/METADATA,sha256=zoE5z9wR8LOa65itnqEBAZzWvtMRLYyUFbt6GWRzVuQ,17630
|
232
|
+
uk_bin_collection-0.105.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
233
|
+
uk_bin_collection-0.105.1.dist-info/entry_points.txt,sha256=36WCSGMWSc916S3Hi1ZkazzDKHaJ6CD-4fCEFm5MIao,90
|
234
|
+
uk_bin_collection-0.105.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
{uk_bin_collection-0.105.0.dist-info → uk_bin_collection-0.105.1.dist-info}/entry_points.txt
RENAMED
File without changes
|