uk_bin_collection 0.148.5__py3-none-any.whl → 0.149.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1772,6 +1772,13 @@
1772
1772
  "wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN.",
1773
1773
  "LAD24CD": "E07000178"
1774
1774
  },
1775
+ "PembrokeshireCountyCouncil": {
1776
+ "url": "https://nearest.pembrokeshire.gov.uk/property/100100278790",
1777
+ "wiki_command_url_override": "https://nearest.pembrokeshire.gov.uk/property/XXXXXXXXXX",
1778
+ "wiki_name": "Pembrokeshire",
1779
+ "wiki_note": "Replace XXXXXXXX with your UPRN. You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find it.",
1780
+ "LAD24CD": "W06000009"
1781
+ },
1775
1782
  "PeterboroughCityCouncil": {
1776
1783
  "house_number": "7 Arundel Road, Peterborough, PE4 6JJ",
1777
1784
  "postcode": "PE4 6JJ",
@@ -2341,6 +2348,7 @@
2341
2348
  "ThanetDistrictCouncil": {
2342
2349
  "uprn": "100061111858",
2343
2350
  "url": "https://www.thanet.gov.uk",
2351
+ "web_driver": "http://selenium:4444",
2344
2352
  "wiki_name": "Thanet",
2345
2353
  "wiki_note": "Use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find your UPRN.",
2346
2354
  "LAD24CD": "E07000114"
@@ -0,0 +1,96 @@
1
+ from datetime import datetime
2
+
3
+ import bs4.element
4
+ from bs4 import BeautifulSoup
5
+
6
+ from uk_bin_collection.uk_bin_collection.common import *
7
+ from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
8
+
9
+
10
+ class CouncilClass(AbstractGetBinDataClass):
11
+ """
12
+ Concrete classes have to implement all abstract operations of the
13
+ base class. They can also override some operations with a default
14
+ implementation.
15
+ """
16
+
17
+ def parse_data(self, page: str, **kwargs) -> dict:
18
+ # Make a BeautifulSoup object
19
+ soup = BeautifulSoup(page.text, features="html.parser")
20
+
21
+ data = {"bins": []}
22
+
23
+ # Locate the section containing bin collection data
24
+ bin_collection_divs = soup.find_all(
25
+ "div", class_="col-6 col-md-4 text-center mb-3"
26
+ )
27
+
28
+ if not bin_collection_divs:
29
+ raise ValueError("No bin collection data found in the provided HTML.")
30
+
31
+ for bin_div in bin_collection_divs:
32
+ # Get the image tag first to check if this is a bin collection div
33
+ img_tag = bin_div.find("img")
34
+ if (
35
+ not img_tag
36
+ or not img_tag.get("src")
37
+ or "pembrokeshire.gov.uk/images" not in img_tag["src"]
38
+ ):
39
+ continue
40
+
41
+ # Extract bin type - first try the image title
42
+ bin_type = None
43
+ if img_tag.get("title"):
44
+ bin_type = img_tag["title"].strip()
45
+
46
+ # If no title, get all text nodes and join them
47
+ if not bin_type:
48
+ # Get all text nodes that are not within a <strong> tag (to exclude the date)
49
+ text_nodes = [
50
+ text.strip()
51
+ for text in bin_div.find_all(text=True, recursive=True)
52
+ if text.strip()
53
+ and not isinstance(text.parent, bs4.element.Tag)
54
+ or text.parent.name != "strong"
55
+ ]
56
+ if text_nodes:
57
+ bin_type = " ".join(text_nodes).strip()
58
+
59
+ if not bin_type:
60
+ continue # Skip if we couldn't find a bin type
61
+
62
+ # Extract collection date
63
+ bin_date_tag = bin_div.find("strong")
64
+ if not bin_date_tag:
65
+ continue # Skip if no date found
66
+
67
+ bin_date = bin_date_tag.text.strip()
68
+
69
+ try:
70
+ # Parse the date into a datetime object
71
+ collection_date = datetime.strptime(bin_date, "%d/%m/%Y")
72
+ # Format date back to DD/MM/YYYY format as required by schema
73
+ formatted_date = collection_date.strftime("%d/%m/%Y")
74
+ except ValueError:
75
+ continue # Skip if date parsing fails
76
+
77
+ # Append the bin data to the list
78
+ dict_data = {
79
+ "type": bin_type,
80
+ "collectionDate": formatted_date,
81
+ }
82
+ data["bins"].append(dict_data)
83
+
84
+ if not data["bins"]:
85
+ raise ValueError(
86
+ "No valid bin collection data could be parsed from the HTML."
87
+ )
88
+
89
+ # Sort the bins by collection date
90
+ data["bins"].sort(
91
+ key=lambda x: datetime.strptime(x["collectionDate"], "%d/%m/%Y")
92
+ )
93
+
94
+ print(data)
95
+
96
+ return data
@@ -1,12 +1,16 @@
1
+ import json
1
2
  import time
3
+ from datetime import datetime
2
4
 
3
- import requests
5
+ from bs4 import BeautifulSoup
6
+ from selenium.webdriver.common.by import By
7
+ from selenium.webdriver.support import expected_conditions as EC
8
+ from selenium.webdriver.support.ui import WebDriverWait
4
9
 
5
10
  from uk_bin_collection.uk_bin_collection.common import *
6
11
  from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
7
12
 
8
13
 
9
- # import the wonderful Beautiful Soup and the URL grabber
10
14
  class CouncilClass(AbstractGetBinDataClass):
11
15
  """
12
16
  Concrete classes have to implement all abstract operations of the
@@ -15,37 +19,60 @@ class CouncilClass(AbstractGetBinDataClass):
15
19
  """
16
20
 
17
21
  def parse_data(self, page: str, **kwargs) -> dict:
18
-
19
22
  user_uprn = kwargs.get("uprn")
20
23
  check_uprn(user_uprn)
21
24
  bindata = {"bins": []}
22
25
 
23
- URI = f"https://www.thanet.gov.uk/wp-content/mu-plugins/collection-day/incl/mu-collection-day-calls.php?pAddress={user_uprn}"
26
+ url = f"https://www.thanet.gov.uk/wp-content/mu-plugins/collection-day/incl/mu-collection-day-calls.php?pAddress={user_uprn}"
27
+ web_driver = kwargs.get("web_driver")
28
+ headless = kwargs.get("headless")
29
+
30
+ # Create the Selenium WebDriver
31
+ driver = create_webdriver(web_driver, headless, None, __name__)
32
+
33
+ try:
34
+ print(f"Navigating to URL: {url}")
35
+ driver.get(url)
36
+
37
+ # Wait for Cloudflare to complete its check
38
+ WebDriverWait(driver, 30).until(
39
+ lambda d: d.execute_script("return document.readyState") == "complete"
40
+ )
41
+ print("Page loaded successfully.")
42
+
43
+ # Parse the page source with BeautifulSoup
44
+ soup = BeautifulSoup(driver.page_source, "html.parser")
24
45
 
25
- headers = {
26
- "x-requested-with": "XMLHttpRequest",
27
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
28
- }
46
+ # Extract the JSON data from the page
47
+ print("Extracting bin collection data...")
48
+ body_content = soup.find("body").text
49
+ if not body_content:
50
+ raise ValueError("Expected JSON data not found in the <body> tag.")
29
51
 
30
- # Make the GET request
31
- response = requests.get(URI, headers=headers)
52
+ bin_collection = json.loads(body_content)
32
53
 
33
- # Parse the JSON response
34
- bin_collection = response.json()
54
+ # Process the bin collection data
55
+ for collection in bin_collection:
56
+ bin_type = collection["type"]
57
+ collection_date = collection["nextDate"].split(" ")[0]
35
58
 
36
- # Loop through each collection in bin_collection
37
- for collection in bin_collection:
38
- bin_type = collection["type"]
39
- collection_date = collection["nextDate"].split(" ")[0]
59
+ dict_data = {
60
+ "type": bin_type,
61
+ "collectionDate": collection_date,
62
+ }
63
+ bindata["bins"].append(dict_data)
40
64
 
41
- dict_data = {
42
- "type": bin_type,
43
- "collectionDate": collection_date,
44
- }
45
- bindata["bins"].append(dict_data)
65
+ # Sort the bins by collection date
66
+ bindata["bins"].sort(
67
+ key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
68
+ )
69
+ print(bindata)
46
70
 
47
- bindata["bins"].sort(
48
- key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
49
- )
71
+ except Exception as e:
72
+ print(f"An error occurred: {e}")
73
+ raise
74
+ finally:
75
+ print("Cleaning up WebDriver...")
76
+ driver.quit()
50
77
 
51
78
  return bindata
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: uk_bin_collection
3
- Version: 0.148.5
3
+ Version: 0.149.0
4
4
  Summary: Python Lib to collect UK Bin Data
5
5
  Author: Robert Bradley
6
6
  Author-email: robbrad182@gmail.com
@@ -7,7 +7,7 @@ uk_bin_collection/tests/council_feature_input_parity.py,sha256=DO6Mk4ImYgM5ZCZ-c
7
7
  uk_bin_collection/tests/features/environment.py,sha256=VQZjJdJI_kZn08M0j5cUgvKT4k3iTw8icJge1DGOkoA,127
8
8
  uk_bin_collection/tests/features/validate_council_outputs.feature,sha256=SJK-Vc737hrf03tssxxbeg_JIvAH-ddB8f6gU1LTbuQ,251
9
9
  uk_bin_collection/tests/generate_map_test_results.py,sha256=CKnGK2ZgiSXomRGkomX90DitgMP-X7wkHhyKORDcL2E,1144
10
- uk_bin_collection/tests/input.json,sha256=Zjpi_QnZFlIojCGBJfBkQ8Tn9O2zbi48w_yKr0wbSXU,131194
10
+ uk_bin_collection/tests/input.json,sha256=Uur26vWavpRAc9xJ5an1GUFQzNdct6NS24ZhWQxHad4,131672
11
11
  uk_bin_collection/tests/output.schema,sha256=ZwKQBwYyTDEM4G2hJwfLUVM-5v1vKRvRK9W9SS1sd18,1086
12
12
  uk_bin_collection/tests/step_defs/step_helpers/file_handler.py,sha256=Ygzi4V0S1MIHqbdstUlIqtRIwnynvhu4UtpweJ6-5N8,1474
13
13
  uk_bin_collection/tests/step_defs/test_validate_council.py,sha256=VZ0a81sioJULD7syAYHjvK_-nT_Rd36tUyzPetSA0gk,3475
@@ -224,6 +224,7 @@ uk_bin_collection/uk_bin_collection/councils/NuneatonBedworthBoroughCouncil.py,s
224
224
  uk_bin_collection/uk_bin_collection/councils/OadbyAndWigstonBoroughCouncil.py,sha256=Kgy5HA0xZ9hR4_cAydPfOfskhGUB4j93AQF2-9Fj-Cg,2179
225
225
  uk_bin_collection/uk_bin_collection/councils/OldhamCouncil.py,sha256=9dlesCxNoVXlmQaqZj7QFh00smnJbm1Gnjkr_Uvzurs,1771
226
226
  uk_bin_collection/uk_bin_collection/councils/OxfordCityCouncil.py,sha256=d_bY0cXRDH4kSoWGGCTNN61MNErapSOf2WSTYDJr2r8,2318
227
+ uk_bin_collection/uk_bin_collection/councils/PembrokeshireCountyCouncil.py,sha256=GRAxjf_DuO5uZ660kEbZt_yCjP3n2maMxsYTPCTz3GQ,3324
227
228
  uk_bin_collection/uk_bin_collection/councils/PerthAndKinrossCouncil.py,sha256=Kos5GzN2co3Ij3tSHOXB9S71Yt78RROCfVRtnh7M1VU,3657
228
229
  uk_bin_collection/uk_bin_collection/councils/PeterboroughCityCouncil.py,sha256=lOrDD4jfJ-_C5UwCGqRcQ1G-U1F5X6rf255ypzYEBcg,6300
229
230
  uk_bin_collection/uk_bin_collection/councils/PlymouthCouncil.py,sha256=FJqpJ0GJhpjYeyZ9ioZPkKGl-zrqMD3y5iKa07e_i30,3202
@@ -291,7 +292,7 @@ uk_bin_collection/uk_bin_collection/councils/TeignbridgeCouncil.py,sha256=-NowMN
291
292
  uk_bin_collection/uk_bin_collection/councils/TelfordAndWrekinCouncil.py,sha256=p1ZS5R4EGxbEWlRBrkGXgKwE_lkyBT-R60yKFFhVObc,1844
292
293
  uk_bin_collection/uk_bin_collection/councils/TendringDistrictCouncil.py,sha256=1_CkpWPTfRUEP5YJ9R4_dJRLtb-O9i83hfWJc1shw_c,4283
293
294
  uk_bin_collection/uk_bin_collection/councils/TestValleyBoroughCouncil.py,sha256=Dtfkyrwt795W7gqFJxVGRR8t3R5WMNQZwTWJckLpZWE,8480
294
- uk_bin_collection/uk_bin_collection/councils/ThanetDistrictCouncil.py,sha256=-opmZG9GzjB_NvmWpN6nFZ7rlkSoaRrQICU5E8T0DEQ,1659
295
+ uk_bin_collection/uk_bin_collection/councils/ThanetDistrictCouncil.py,sha256=Cxrf0tUryDL-wFclPH5yovVt8i7Sc7g-ZFrU9_wg6KY,2717
295
296
  uk_bin_collection/uk_bin_collection/councils/ThreeRiversDistrictCouncil.py,sha256=RHt3e9oeKzwxjjY-M8aC0nk-ZXhHIoyC81JzxkPVxsE,5531
296
297
  uk_bin_collection/uk_bin_collection/councils/ThurrockCouncil.py,sha256=vAZMm6mcsdEcOkP15xwxWy9gdXpmLYQFH7qRifurNoY,2935
297
298
  uk_bin_collection/uk_bin_collection/councils/TonbridgeAndMallingBC.py,sha256=UlgnHDoi8ecav2H5-HqKNDpqW1J3RN-c___5c08_Q7I,4859
@@ -337,8 +338,8 @@ uk_bin_collection/uk_bin_collection/councils/YorkCouncil.py,sha256=I2kBYMlsD4bId
337
338
  uk_bin_collection/uk_bin_collection/councils/council_class_template/councilclasstemplate.py,sha256=QD4v4xpsEE0QheR_fGaNOIRMc2FatcUfKkkhAhseyVU,1159
338
339
  uk_bin_collection/uk_bin_collection/create_new_council.py,sha256=m-IhmWmeWQlFsTZC4OxuFvtw5ZtB8EAJHxJTH4O59lQ,1536
339
340
  uk_bin_collection/uk_bin_collection/get_bin_data.py,sha256=YvmHfZqanwrJ8ToGch34x-L-7yPe31nB_x77_Mgl_vo,4545
340
- uk_bin_collection-0.148.5.dist-info/LICENSE,sha256=vABBUOzcrgfaTKpzeo-si9YVEun6juDkndqA8RKdKGs,1071
341
- uk_bin_collection-0.148.5.dist-info/METADATA,sha256=QFI4MOygjxoWUqezFNR-eFxcJM105HMe1volyN_IQjk,20914
342
- uk_bin_collection-0.148.5.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
343
- uk_bin_collection-0.148.5.dist-info/entry_points.txt,sha256=36WCSGMWSc916S3Hi1ZkazzDKHaJ6CD-4fCEFm5MIao,90
344
- uk_bin_collection-0.148.5.dist-info/RECORD,,
341
+ uk_bin_collection-0.149.0.dist-info/LICENSE,sha256=vABBUOzcrgfaTKpzeo-si9YVEun6juDkndqA8RKdKGs,1071
342
+ uk_bin_collection-0.149.0.dist-info/METADATA,sha256=88bzKT9T-AZ1OLv3p83LPJY3qbHpXS4-jk_NluPkBvA,20914
343
+ uk_bin_collection-0.149.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
344
+ uk_bin_collection-0.149.0.dist-info/entry_points.txt,sha256=36WCSGMWSc916S3Hi1ZkazzDKHaJ6CD-4fCEFm5MIao,90
345
+ uk_bin_collection-0.149.0.dist-info/RECORD,,