uk_bin_collection 0.157.0__py3-none-any.whl → 0.158.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/uk_bin_collection/councils/LondonBoroughSutton.py +60 -49
- {uk_bin_collection-0.157.0.dist-info → uk_bin_collection-0.158.0.dist-info}/METADATA +1 -1
- {uk_bin_collection-0.157.0.dist-info → uk_bin_collection-0.158.0.dist-info}/RECORD +6 -6
- {uk_bin_collection-0.157.0.dist-info → uk_bin_collection-0.158.0.dist-info}/LICENSE +0 -0
- {uk_bin_collection-0.157.0.dist-info → uk_bin_collection-0.158.0.dist-info}/WHEEL +0 -0
- {uk_bin_collection-0.157.0.dist-info → uk_bin_collection-0.158.0.dist-info}/entry_points.txt +0 -0
@@ -1,30 +1,24 @@
|
|
1
|
-
from time import sleep
|
2
|
-
|
3
1
|
import requests
|
4
2
|
from bs4 import BeautifulSoup
|
3
|
+
from datetime import datetime
|
4
|
+
import re
|
5
|
+
from time import sleep
|
5
6
|
|
6
7
|
from uk_bin_collection.uk_bin_collection.common import *
|
7
8
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
8
9
|
|
10
|
+
def remove_ordinal_indicator_from_date_string(date_str):
|
11
|
+
return re.sub(r'(\d+)(st|nd|rd|th)', r'\1', date_str)
|
9
12
|
|
10
|
-
# import the wonderful Beautiful Soup and the URL grabber
|
11
13
|
class CouncilClass(AbstractGetBinDataClass):
|
12
|
-
"""
|
13
|
-
Concrete classes have to implement all abstract operations of the
|
14
|
-
base class. They can also override some operations with a default
|
15
|
-
implementation.
|
16
|
-
"""
|
17
14
|
|
18
15
|
def parse_data(self, page: str, **kwargs) -> dict:
|
19
|
-
|
20
16
|
user_uprn = kwargs.get("uprn")
|
21
|
-
# check_uprn(user_uprn)
|
22
17
|
bindata = {"bins": []}
|
23
18
|
|
24
19
|
URI = f"https://waste-services.sutton.gov.uk/waste/{user_uprn}"
|
25
20
|
|
26
21
|
s = requests.Session()
|
27
|
-
|
28
22
|
r = s.get(URI)
|
29
23
|
while "Loading your bin days..." in r.text:
|
30
24
|
sleep(2)
|
@@ -32,48 +26,65 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
32
26
|
r.raise_for_status()
|
33
27
|
|
34
28
|
soup = BeautifulSoup(r.content, "html.parser")
|
35
|
-
|
36
29
|
current_year = datetime.now().year
|
37
30
|
next_year = current_year + 1
|
38
31
|
|
39
|
-
|
40
|
-
|
32
|
+
# Find all h3 headers (bin types)
|
33
|
+
services = soup.find_all("h3")
|
41
34
|
for service in services:
|
42
|
-
bin_type = service.get_text(
|
43
|
-
|
44
|
-
) # Bin type name (e.g., 'Food waste', 'Mixed recycling')
|
45
|
-
if bin_type == "Bulky Waste":
|
35
|
+
bin_type = service.get_text(strip=True)
|
36
|
+
if "Bulky Waste" in bin_type:
|
46
37
|
continue
|
47
|
-
service_details = service.find_next("div", class_="govuk-grid-row")
|
48
|
-
|
49
|
-
next_collection = (
|
50
|
-
(
|
51
|
-
service_details.find("dt", string="Next collection")
|
52
|
-
.find_next_sibling("dd")
|
53
|
-
.get_text(strip=True)
|
54
|
-
)
|
55
|
-
.replace("(this collection has been adjusted from its usual time)", "")
|
56
|
-
.strip()
|
57
|
-
)
|
58
|
-
|
59
|
-
next_collection = datetime.strptime(
|
60
|
-
remove_ordinal_indicator_from_date_string(next_collection),
|
61
|
-
"%A, %d %B",
|
62
|
-
)
|
63
|
-
|
64
|
-
if (datetime.now().month == 12) and (next_collection.month == 1):
|
65
|
-
next_collection = next_collection.replace(year=next_year)
|
66
|
-
else:
|
67
|
-
next_collection = next_collection.replace(year=current_year)
|
68
|
-
|
69
|
-
dict_data = {
|
70
|
-
"type": bin_type,
|
71
|
-
"collectionDate": next_collection.strftime("%d/%m/%Y"),
|
72
|
-
}
|
73
|
-
bindata["bins"].append(dict_data)
|
74
|
-
|
75
|
-
bindata["bins"].sort(
|
76
|
-
key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
|
77
|
-
)
|
78
38
|
|
39
|
+
# Find the next element (next sibling) which is likely a paragraph with date info
|
40
|
+
next_sib = service.find_next_sibling()
|
41
|
+
while next_sib and getattr(next_sib, 'name', None) not in [None, 'p']:
|
42
|
+
next_sib = next_sib.find_next_sibling()
|
43
|
+
|
44
|
+
next_coll = None
|
45
|
+
if next_sib:
|
46
|
+
text = next_sib.get_text() if hasattr(next_sib, 'get_text') else str(next_sib)
|
47
|
+
match = re.search(r"Next collection\s*([A-Za-z]+,? \d{1,2}(?:st|nd|rd|th)? [A-Za-z]+)", text)
|
48
|
+
if match:
|
49
|
+
next_coll = match.group(1)
|
50
|
+
else:
|
51
|
+
# Sometimes the text may be attached without a space after 'Next collection'
|
52
|
+
match = re.search(r"Next collection([A-Za-z]+,? \d{1,2}(?:st|nd|rd|th)? [A-Za-z]+)", text)
|
53
|
+
if match:
|
54
|
+
next_coll = match.group(1)
|
55
|
+
|
56
|
+
# Try several siblings forward if not found
|
57
|
+
if not next_coll:
|
58
|
+
sib_try = service
|
59
|
+
for _ in range(3):
|
60
|
+
if sib_try:
|
61
|
+
sib_try = sib_try.find_next_sibling()
|
62
|
+
else:
|
63
|
+
break
|
64
|
+
if sib_try:
|
65
|
+
text = sib_try.get_text() if hasattr(sib_try, 'get_text') else str(sib_try)
|
66
|
+
match = re.search(r"Next collection\s*([A-Za-z]+,? \d{1,2}(?:st|nd|rd|th)? [A-Za-z]+)", text)
|
67
|
+
if match:
|
68
|
+
next_coll = match.group(1)
|
69
|
+
break
|
70
|
+
|
71
|
+
if next_coll:
|
72
|
+
next_coll = remove_ordinal_indicator_from_date_string(next_coll)
|
73
|
+
try:
|
74
|
+
next_collection = datetime.strptime(next_coll, "%A, %d %B")
|
75
|
+
except ValueError:
|
76
|
+
continue
|
77
|
+
|
78
|
+
if (datetime.now().month == 12 and next_collection.month == 1):
|
79
|
+
next_collection = next_collection.replace(year=next_year)
|
80
|
+
else:
|
81
|
+
next_collection = next_collection.replace(year=current_year)
|
82
|
+
|
83
|
+
dict_data = {
|
84
|
+
"type": bin_type,
|
85
|
+
"collectionDate": next_collection.strftime("%d/%m/%Y"),
|
86
|
+
}
|
87
|
+
bindata["bins"].append(dict_data)
|
88
|
+
|
89
|
+
bindata["bins"].sort(key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y"))
|
79
90
|
return bindata
|
@@ -182,7 +182,7 @@ uk_bin_collection/uk_bin_collection/councils/LondonBoroughLambeth.py,sha256=r9D5
|
|
182
182
|
uk_bin_collection/uk_bin_collection/councils/LondonBoroughLewisham.py,sha256=d8rlJDTbY3nj-Zjg6iwvwfe-X13Gq86DGGW6QkQAUW0,5310
|
183
183
|
uk_bin_collection/uk_bin_collection/councils/LondonBoroughOfRichmondUponThames.py,sha256=H1ej7Bw1zW3kvxCqz0W1137YIZCdG5AmhAyVK3cvp00,3987
|
184
184
|
uk_bin_collection/uk_bin_collection/councils/LondonBoroughRedbridge.py,sha256=A_6Sis5hsF53Th04KeadHRasGbpAm6aoaWJ6X8eC4Y8,6604
|
185
|
-
uk_bin_collection/uk_bin_collection/councils/LondonBoroughSutton.py,sha256=
|
185
|
+
uk_bin_collection/uk_bin_collection/councils/LondonBoroughSutton.py,sha256=xixtPqXJ6Wms14e-Sc5Nq8ybbcrvZ65mzgpOhNJw5tY,3646
|
186
186
|
uk_bin_collection/uk_bin_collection/councils/LutonBoroughCouncil.py,sha256=vScUi_R8FnBddii2_zLlZBLxuh85mKmCm8nKW3zxky0,2758
|
187
187
|
uk_bin_collection/uk_bin_collection/councils/MaidstoneBoroughCouncil.py,sha256=T9R1_ciKitxGhGUcNu2UnSZszvUBuLVKReRA1rIlRIg,4280
|
188
188
|
uk_bin_collection/uk_bin_collection/councils/MaldonDistrictCouncil.py,sha256=PMVt2XFggttPmbWyrBrHJ-W6R_6-0ux1BkY1kj1IKzg,1997
|
@@ -347,8 +347,8 @@ uk_bin_collection/uk_bin_collection/councils/YorkCouncil.py,sha256=I2kBYMlsD4bId
|
|
347
347
|
uk_bin_collection/uk_bin_collection/councils/council_class_template/councilclasstemplate.py,sha256=QD4v4xpsEE0QheR_fGaNOIRMc2FatcUfKkkhAhseyVU,1159
|
348
348
|
uk_bin_collection/uk_bin_collection/create_new_council.py,sha256=m-IhmWmeWQlFsTZC4OxuFvtw5ZtB8EAJHxJTH4O59lQ,1536
|
349
349
|
uk_bin_collection/uk_bin_collection/get_bin_data.py,sha256=Qb76X46V0UMZJwO8zMNPvnVY7jNa-bmTlrirDi1tuJA,4553
|
350
|
-
uk_bin_collection-0.
|
351
|
-
uk_bin_collection-0.
|
352
|
-
uk_bin_collection-0.
|
353
|
-
uk_bin_collection-0.
|
354
|
-
uk_bin_collection-0.
|
350
|
+
uk_bin_collection-0.158.0.dist-info/LICENSE,sha256=vABBUOzcrgfaTKpzeo-si9YVEun6juDkndqA8RKdKGs,1071
|
351
|
+
uk_bin_collection-0.158.0.dist-info/METADATA,sha256=ihuNsB2pzq5yFtT06ae1WmUvgPCkLc8gJHt-zAHNl0Q,26688
|
352
|
+
uk_bin_collection-0.158.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
353
|
+
uk_bin_collection-0.158.0.dist-info/entry_points.txt,sha256=36WCSGMWSc916S3Hi1ZkazzDKHaJ6CD-4fCEFm5MIao,90
|
354
|
+
uk_bin_collection-0.158.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
{uk_bin_collection-0.157.0.dist-info → uk_bin_collection-0.158.0.dist-info}/entry_points.txt
RENAMED
File without changes
|