uk_bin_collection 0.148.6__py3-none-any.whl → 0.150.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1436,6 +1436,13 @@
1436
1436
  "wiki_note": "Pass the UPRN. You can find it using [FindMyAddress](https://www.findmyaddress.co.uk/search).",
1437
1437
  "LAD24CD": "E06000035"
1438
1438
  },
1439
+ "MeltonBoroughCouncil": {
1440
+ "uprn": "100030540956",
1441
+ "url": "https://my.melton.gov.uk/collections",
1442
+ "wiki_name": "Melton",
1443
+ "wiki_note": "To get the UPRN, you can use [FindMyAddress](https://www.findmyaddress.co.uk/search).",
1444
+ "LAD24CD": "E07000133"
1445
+ },
1439
1446
  "MertonCouncil": {
1440
1447
  "url": "https://myneighbourhood.merton.gov.uk/wasteservices/WasteServices.aspx?ID=25936129",
1441
1448
  "wiki_command_url_override": "https://myneighbourhood.merton.gov.uk/Wasteservices/WasteServices.aspx?ID=XXXXXXXX",
@@ -1483,7 +1490,7 @@
1483
1490
  "MiddlesbroughCouncil": {
1484
1491
  "house_number": "12 Constantine Court Park Road North, Middlesbrough",
1485
1492
  "skip_get_url": true,
1486
- "url": "https://www.midsussex.gov.uk/waste-recycling/bin-collection/",
1493
+ "url": "https://www.middlesbrough.gov.uk/recycling-and-rubbish/bin-collection-dates/",
1487
1494
  "web_driver": "http://selenium:4444",
1488
1495
  "wiki_name": "Middlesbrough",
1489
1496
  "wiki_note": "Pass the entire address without postcode as it appears when you type it on the website. This parser requires a Selenium webdriver.",
@@ -1532,7 +1539,7 @@
1532
1539
  "LAD24CD": "W06000021"
1533
1540
  },
1534
1541
  "MorayCouncil": {
1535
- "uprn": "28841",
1542
+ "uprn": "45438",
1536
1543
  "url": "https://bindayfinder.moray.gov.uk/",
1537
1544
  "wiki_name": "Moray",
1538
1545
  "wiki_note": "Find your property ID by going to (https://bindayfinder.moray.gov.uk), search for your property and extracting the ID from the URL. i.e. (https://bindayfinder.moray.gov.uk/disp_bins.php?id=00028841)",
@@ -1772,6 +1779,13 @@
1772
1779
  "wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN.",
1773
1780
  "LAD24CD": "E07000178"
1774
1781
  },
1782
+ "PembrokeshireCountyCouncil": {
1783
+ "url": "https://nearest.pembrokeshire.gov.uk/property/100100278790",
1784
+ "wiki_command_url_override": "https://nearest.pembrokeshire.gov.uk/property/XXXXXXXXXX",
1785
+ "wiki_name": "Pembrokeshire",
1786
+ "wiki_note": "Replace XXXXXXXX with your UPRN. You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find it.",
1787
+ "LAD24CD": "W06000009"
1788
+ },
1775
1789
  "PeterboroughCityCouncil": {
1776
1790
  "house_number": "7 Arundel Road, Peterborough, PE4 6JJ",
1777
1791
  "postcode": "PE4 6JJ",
@@ -0,0 +1,82 @@
1
+ import json
2
+ from datetime import datetime, timedelta
3
+
4
+ import requests
5
+ from bs4 import BeautifulSoup
6
+
7
+ from uk_bin_collection.uk_bin_collection.common import *
8
+ from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
9
+
10
+
11
+ # import the wonderful Beautiful Soup and the URL grabber
12
+ class CouncilClass(AbstractGetBinDataClass):
13
+ """
14
+ Concrete classes have to implement all abstract operations of the
15
+ base class. They can also override some operations with a default
16
+ implementation.
17
+ """
18
+
19
+ def extract_dates(self, date_string: str) -> list:
20
+ """
21
+ Extract dates from strings like "01/05/2025, and then 15/05/2025"
22
+ Returns list of datetime objects
23
+ """
24
+ date_string = date_string.replace("and then", ",")
25
+ date_parts = [part.strip() for part in date_string.split(",") if part.strip()]
26
+
27
+ dates = []
28
+ for part in date_parts:
29
+ try:
30
+ date_obj = datetime.strptime(part, "%d/%m/%Y")
31
+ dates.append(date_obj)
32
+ except ValueError:
33
+ continue
34
+
35
+ return dates
36
+
37
+ def parse_data(self, page: str, **kwargs) -> dict:
38
+ user_uprn = kwargs.get("uprn")
39
+ check_uprn(user_uprn)
40
+
41
+ url = f"https://my.melton.gov.uk/set-location?id={user_uprn}&redirect=collections&rememberloc="
42
+ response = requests.get(url)
43
+ soup = BeautifulSoup(response.text, "html.parser")
44
+
45
+ collections = []
46
+
47
+ # Find all box items
48
+ box_items = soup.find_all("li", class_=lambda x: x and x.startswith("box-item"))
49
+
50
+ for box in box_items:
51
+ bin_type = box.find("h2")
52
+ if (
53
+ bin_type and "Missed bin" not in bin_type.text
54
+ ): # Skip the "Missed bin" section
55
+ bin_name = bin_type.text.strip()
56
+
57
+ # Find the strong tag containing dates
58
+ dates_element = box.find("strong")
59
+ if dates_element:
60
+ dates_text = dates_element.text.strip()
61
+ # Use self.extract_dates instead of extract_dates
62
+ collection_dates = self.extract_dates(dates_text)
63
+
64
+ # Add each date for this bin type to collections
65
+ for date in collection_dates:
66
+ collections.append((bin_name, date))
67
+
68
+ # Sort the collections by date
69
+ ordered_data = sorted(collections, key=lambda x: x[1])
70
+
71
+ # Format the data as required
72
+ data = {"bins": []}
73
+ for item in ordered_data:
74
+ dict_data = {
75
+ "type": item[0],
76
+ "collectionDate": item[1].strftime(date_format),
77
+ }
78
+ data["bins"].append(dict_data)
79
+
80
+ print(json.dumps(data, indent=2))
81
+
82
+ return data
@@ -14,52 +14,70 @@ class CouncilClass(AbstractGetBinDataClass):
14
14
  """
15
15
 
16
16
  def parse_data(self, page: str, **kwargs) -> dict:
17
-
18
17
  user_uprn = kwargs.get("uprn")
18
+ print(f"Using UPRN: {user_uprn}") # Debug
19
19
  bindata = {"bins": []}
20
20
 
21
21
  user_uprn = user_uprn.zfill(8)
22
22
 
23
- year = datetime.today().year
24
- response = requests.get(
25
- f"https://bindayfinder.moray.gov.uk/cal_{year}_view.php",
26
- params={"id": user_uprn},
27
- )
28
- if response.status_code != 200:
29
- # fall back to known good calendar URL
30
- response = requests.get(
31
- "https://bindayfinder.moray.gov.uk/cal_2024_view.php",
32
- params={"id": user_uprn},
33
- )
23
+ url = f"https://bindayfinder.moray.gov.uk/disp_bins.php?id={user_uprn}"
24
+
25
+ # year = datetime.today().year
26
+ # url = f"https://bindayfinder.moray.gov.uk/cal_{year}_view.php"
27
+ print(f"Trying URL: {url}") # Debug
28
+
29
+ response = requests.get(url)
30
+ print(f"Response status code: {response.status_code}") # Debug
31
+
32
+ # if response.status_code != 200:
33
+ # fallback_url = "https://bindayfinder.moray.gov.uk/cal_2024_view.php"
34
+ # print(f"Falling back to: {fallback_url}") # Debug
35
+ # response = requests.get(
36
+ # fallback_url,
37
+ # params={"id": user_uprn},
38
+ # )
39
+ # print(f"Fallback response status: {response.status_code}") # Debug
40
+
34
41
  soup = BeautifulSoup(response.text, "html.parser")
35
42
 
36
- bin_types = {
37
- "G": "Green",
38
- "B": "Brown",
39
- "P": "Purple",
40
- "C": "Blue",
41
- "O": "Orange",
42
- }
43
-
44
- for month_container in soup.findAll("div", class_="month-container"):
45
- for div in month_container.findAll("div"):
46
- if "month-header" in div["class"]:
47
- month = div.text
48
- elif div["class"] and div["class"][0] in ["B", "GPOC", "GBPOC"]:
49
- bins = div["class"][0]
50
- dom = int(div.text)
51
- for i in bins:
43
+ # Find all container_images divs
44
+ container_images = soup.find_all("div", class_="container_images")
45
+ print(f"Found {len(container_images)} container images") # Debug
46
+
47
+ for container in container_images:
48
+ # Get bin type from image alt text
49
+ img = container.find("img")
50
+ if img and img.get("alt"):
51
+ # Use the full alt text as one bin type instead of splitting
52
+ bin_type = img["alt"]
53
+ print(f"Found bin type: {bin_type}") # Debug
54
+
55
+ # Get collection date from binz_txt
56
+ date_text = container.find("div", class_="binz_txt")
57
+ if date_text:
58
+ date_str = date_text.text
59
+ print(f"Found date text: {date_str}") # Debug
60
+
61
+ # Extract just the date portion
62
+ import re
63
+
64
+ date_match = re.search(r"(\d{1,2}\s+[A-Za-z]+\s+\d{4})", date_str)
65
+ if date_match:
66
+ date_portion = date_match.group(1)
67
+ try:
68
+ # Convert the date string to the required format
69
+ parsed_date = datetime.strptime(date_portion, "%d %B %Y")
70
+ collection_date = parsed_date.strftime("%d/%m/%Y")
71
+ print(f"Parsed date: {collection_date}") # Debug
72
+
52
73
  dict_data = {
53
- "type": bin_types.get(i),
54
- "collectionDate": datetime.strptime(
55
- f"{dom} {month} {year}",
56
- "%d %B %Y",
57
- ).strftime("%d/%m/%Y"),
74
+ "type": bin_type,
75
+ "collectionDate": collection_date,
58
76
  }
59
77
  bindata["bins"].append(dict_data)
78
+ except ValueError as e:
79
+ print(f"Error parsing date: {e}") # Debug
80
+ continue
60
81
 
61
- bindata["bins"].sort(
62
- key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
63
- )
64
-
82
+ print(f"Final bindata: {bindata}") # Debug
65
83
  return bindata
@@ -0,0 +1,96 @@
1
+ from datetime import datetime
2
+
3
+ import bs4.element
4
+ from bs4 import BeautifulSoup
5
+
6
+ from uk_bin_collection.uk_bin_collection.common import *
7
+ from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
8
+
9
+
10
+ class CouncilClass(AbstractGetBinDataClass):
11
+ """
12
+ Concrete classes have to implement all abstract operations of the
13
+ base class. They can also override some operations with a default
14
+ implementation.
15
+ """
16
+
17
+ def parse_data(self, page: str, **kwargs) -> dict:
18
+ # Make a BeautifulSoup object
19
+ soup = BeautifulSoup(page.text, features="html.parser")
20
+
21
+ data = {"bins": []}
22
+
23
+ # Locate the section containing bin collection data
24
+ bin_collection_divs = soup.find_all(
25
+ "div", class_="col-6 col-md-4 text-center mb-3"
26
+ )
27
+
28
+ if not bin_collection_divs:
29
+ raise ValueError("No bin collection data found in the provided HTML.")
30
+
31
+ for bin_div in bin_collection_divs:
32
+ # Get the image tag first to check if this is a bin collection div
33
+ img_tag = bin_div.find("img")
34
+ if (
35
+ not img_tag
36
+ or not img_tag.get("src")
37
+ or "pembrokeshire.gov.uk/images" not in img_tag["src"]
38
+ ):
39
+ continue
40
+
41
+ # Extract bin type - first try the image title
42
+ bin_type = None
43
+ if img_tag.get("title"):
44
+ bin_type = img_tag["title"].strip()
45
+
46
+ # If no title, get all text nodes and join them
47
+ if not bin_type:
48
+ # Get all text nodes that are not within a <strong> tag (to exclude the date)
49
+ text_nodes = [
50
+ text.strip()
51
+ for text in bin_div.find_all(text=True, recursive=True)
52
+ if text.strip()
53
+ and not isinstance(text.parent, bs4.element.Tag)
54
+ or text.parent.name != "strong"
55
+ ]
56
+ if text_nodes:
57
+ bin_type = " ".join(text_nodes).strip()
58
+
59
+ if not bin_type:
60
+ continue # Skip if we couldn't find a bin type
61
+
62
+ # Extract collection date
63
+ bin_date_tag = bin_div.find("strong")
64
+ if not bin_date_tag:
65
+ continue # Skip if no date found
66
+
67
+ bin_date = bin_date_tag.text.strip()
68
+
69
+ try:
70
+ # Parse the date into a datetime object
71
+ collection_date = datetime.strptime(bin_date, "%d/%m/%Y")
72
+ # Format date back to DD/MM/YYYY format as required by schema
73
+ formatted_date = collection_date.strftime("%d/%m/%Y")
74
+ except ValueError:
75
+ continue # Skip if date parsing fails
76
+
77
+ # Append the bin data to the list
78
+ dict_data = {
79
+ "type": bin_type,
80
+ "collectionDate": formatted_date,
81
+ }
82
+ data["bins"].append(dict_data)
83
+
84
+ if not data["bins"]:
85
+ raise ValueError(
86
+ "No valid bin collection data could be parsed from the HTML."
87
+ )
88
+
89
+ # Sort the bins by collection date
90
+ data["bins"].sort(
91
+ key=lambda x: datetime.strptime(x["collectionDate"], "%d/%m/%Y")
92
+ )
93
+
94
+ print(data)
95
+
96
+ return data
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: uk_bin_collection
3
- Version: 0.148.6
3
+ Version: 0.150.0
4
4
  Summary: Python Lib to collect UK Bin Data
5
5
  Author: Robert Bradley
6
6
  Author-email: robbrad182@gmail.com
@@ -7,7 +7,7 @@ uk_bin_collection/tests/council_feature_input_parity.py,sha256=DO6Mk4ImYgM5ZCZ-c
7
7
  uk_bin_collection/tests/features/environment.py,sha256=VQZjJdJI_kZn08M0j5cUgvKT4k3iTw8icJge1DGOkoA,127
8
8
  uk_bin_collection/tests/features/validate_council_outputs.feature,sha256=SJK-Vc737hrf03tssxxbeg_JIvAH-ddB8f6gU1LTbuQ,251
9
9
  uk_bin_collection/tests/generate_map_test_results.py,sha256=CKnGK2ZgiSXomRGkomX90DitgMP-X7wkHhyKORDcL2E,1144
10
- uk_bin_collection/tests/input.json,sha256=WD2BCIhsrpbU_53tvTPnHW7MQXiOYowTzkKN4UtuI9E,131240
10
+ uk_bin_collection/tests/input.json,sha256=A-3wFKuCCIS11YpMknZHsGtz3cv-cojKuu6lFqfPkIY,131984
11
11
  uk_bin_collection/tests/output.schema,sha256=ZwKQBwYyTDEM4G2hJwfLUVM-5v1vKRvRK9W9SS1sd18,1086
12
12
  uk_bin_collection/tests/step_defs/step_helpers/file_handler.py,sha256=Ygzi4V0S1MIHqbdstUlIqtRIwnynvhu4UtpweJ6-5N8,1474
13
13
  uk_bin_collection/tests/step_defs/test_validate_council.py,sha256=VZ0a81sioJULD7syAYHjvK_-nT_Rd36tUyzPetSA0gk,3475
@@ -184,6 +184,7 @@ uk_bin_collection/uk_bin_collection/councils/MalvernHillsDC.py,sha256=iQG0EkX2np
184
184
  uk_bin_collection/uk_bin_collection/councils/ManchesterCityCouncil.py,sha256=RY301_82z3-xInGai5ocT7rzoV75ATbf0N7uxn8Z9LE,3110
185
185
  uk_bin_collection/uk_bin_collection/councils/MansfieldDistrictCouncil.py,sha256=F5AiTxImrnjE1k3ry96bfstOf5XSNBJS_4qqmymmh3w,1386
186
186
  uk_bin_collection/uk_bin_collection/councils/MedwayCouncil.py,sha256=nBJSv09OUOascrfNu1ek1wNzE9ONu5ZkrBU-1qwDHJ0,1278
187
+ uk_bin_collection/uk_bin_collection/councils/MeltonBoroughCouncil.py,sha256=Xql_ydrk59nx3dPok6YSVZ7mk8GJT2-IKOGqILgE0xU,2802
187
188
  uk_bin_collection/uk_bin_collection/councils/MertonCouncil.py,sha256=xsOSX4KCcUHHo4BHB5JhFC9r5Q-h586Vk-5-X2VAJl0,2809
188
189
  uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil.py,sha256=oOWwU5FSgGej2Mv7FQ66N-EzS5nZgmGsd0WnfLWUc1I,5238
189
190
  uk_bin_collection/uk_bin_collection/councils/MidDevonCouncil.py,sha256=8MxqGgOJVseMkrTmEMT0EyDW7UMbXMoa5ZcJ2nD55Ew,3367
@@ -195,7 +196,7 @@ uk_bin_collection/uk_bin_collection/councils/MidlothianCouncil.py,sha256=-VKvdIh
195
196
  uk_bin_collection/uk_bin_collection/councils/MiltonKeynesCityCouncil.py,sha256=7e2pGBLCw24pNItHeI9jkxQ3rEOZ4WC4zVlbvKYGdXE,2600
196
197
  uk_bin_collection/uk_bin_collection/councils/MoleValleyDistrictCouncil.py,sha256=xWR5S0gwQu9gXxjl788Wux1KaC0CT7ZFw0iXuRLZCEM,5599
197
198
  uk_bin_collection/uk_bin_collection/councils/MonmouthshireCountyCouncil.py,sha256=PC2tui10S-DXmiKUqXZun5MInIgqqQjtT5wII1K_9a0,2532
198
- uk_bin_collection/uk_bin_collection/councils/MorayCouncil.py,sha256=jsHCQ_aV_bG0GPfF7h6g5TP84sroplYC5k2M6iEKiTw,2265
199
+ uk_bin_collection/uk_bin_collection/councils/MorayCouncil.py,sha256=B8unofp2x1HF72QT_3E0Iew78PWMywjzkNDJdWof2Tc,3309
199
200
  uk_bin_collection/uk_bin_collection/councils/NeathPortTalbotCouncil.py,sha256=p95UYogx3_WJ_1kgfeH5kGQdrZ3YyEJjVuZ7WOOPAvs,5710
200
201
  uk_bin_collection/uk_bin_collection/councils/NewForestCouncil.py,sha256=Vdmv_p75OJwSeltWE5wZhE8wy5XW0CIKIn1cKNvI0pQ,5336
201
202
  uk_bin_collection/uk_bin_collection/councils/NewarkAndSherwoodDC.py,sha256=lAleYfCGUWCKOi7Ye_cjgfpI3pWwTcFctlYmh0hjebM,2140
@@ -224,6 +225,7 @@ uk_bin_collection/uk_bin_collection/councils/NuneatonBedworthBoroughCouncil.py,s
224
225
  uk_bin_collection/uk_bin_collection/councils/OadbyAndWigstonBoroughCouncil.py,sha256=Kgy5HA0xZ9hR4_cAydPfOfskhGUB4j93AQF2-9Fj-Cg,2179
225
226
  uk_bin_collection/uk_bin_collection/councils/OldhamCouncil.py,sha256=9dlesCxNoVXlmQaqZj7QFh00smnJbm1Gnjkr_Uvzurs,1771
226
227
  uk_bin_collection/uk_bin_collection/councils/OxfordCityCouncil.py,sha256=d_bY0cXRDH4kSoWGGCTNN61MNErapSOf2WSTYDJr2r8,2318
228
+ uk_bin_collection/uk_bin_collection/councils/PembrokeshireCountyCouncil.py,sha256=GRAxjf_DuO5uZ660kEbZt_yCjP3n2maMxsYTPCTz3GQ,3324
227
229
  uk_bin_collection/uk_bin_collection/councils/PerthAndKinrossCouncil.py,sha256=Kos5GzN2co3Ij3tSHOXB9S71Yt78RROCfVRtnh7M1VU,3657
228
230
  uk_bin_collection/uk_bin_collection/councils/PeterboroughCityCouncil.py,sha256=lOrDD4jfJ-_C5UwCGqRcQ1G-U1F5X6rf255ypzYEBcg,6300
229
231
  uk_bin_collection/uk_bin_collection/councils/PlymouthCouncil.py,sha256=FJqpJ0GJhpjYeyZ9ioZPkKGl-zrqMD3y5iKa07e_i30,3202
@@ -337,8 +339,8 @@ uk_bin_collection/uk_bin_collection/councils/YorkCouncil.py,sha256=I2kBYMlsD4bId
337
339
  uk_bin_collection/uk_bin_collection/councils/council_class_template/councilclasstemplate.py,sha256=QD4v4xpsEE0QheR_fGaNOIRMc2FatcUfKkkhAhseyVU,1159
338
340
  uk_bin_collection/uk_bin_collection/create_new_council.py,sha256=m-IhmWmeWQlFsTZC4OxuFvtw5ZtB8EAJHxJTH4O59lQ,1536
339
341
  uk_bin_collection/uk_bin_collection/get_bin_data.py,sha256=YvmHfZqanwrJ8ToGch34x-L-7yPe31nB_x77_Mgl_vo,4545
340
- uk_bin_collection-0.148.6.dist-info/LICENSE,sha256=vABBUOzcrgfaTKpzeo-si9YVEun6juDkndqA8RKdKGs,1071
341
- uk_bin_collection-0.148.6.dist-info/METADATA,sha256=w-ll8-C6yznvAJKkIyof53UFTDki9OGprblylgnCglc,20914
342
- uk_bin_collection-0.148.6.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
343
- uk_bin_collection-0.148.6.dist-info/entry_points.txt,sha256=36WCSGMWSc916S3Hi1ZkazzDKHaJ6CD-4fCEFm5MIao,90
344
- uk_bin_collection-0.148.6.dist-info/RECORD,,
342
+ uk_bin_collection-0.150.0.dist-info/LICENSE,sha256=vABBUOzcrgfaTKpzeo-si9YVEun6juDkndqA8RKdKGs,1071
343
+ uk_bin_collection-0.150.0.dist-info/METADATA,sha256=HiNbfbMdwcpyKKz8YaAvrwvc_L1pQS4CeppVTKlYycA,20914
344
+ uk_bin_collection-0.150.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
345
+ uk_bin_collection-0.150.0.dist-info/entry_points.txt,sha256=36WCSGMWSc916S3Hi1ZkazzDKHaJ6CD-4fCEFm5MIao,90
346
+ uk_bin_collection-0.150.0.dist-info/RECORD,,