uk_bin_collection 0.148.5__py3-none-any.whl → 0.149.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/tests/input.json +8 -0
- uk_bin_collection/uk_bin_collection/councils/PembrokeshireCountyCouncil.py +96 -0
- uk_bin_collection/uk_bin_collection/councils/ThanetDistrictCouncil.py +51 -24
- {uk_bin_collection-0.148.5.dist-info → uk_bin_collection-0.149.0.dist-info}/METADATA +1 -1
- {uk_bin_collection-0.148.5.dist-info → uk_bin_collection-0.149.0.dist-info}/RECORD +8 -7
- {uk_bin_collection-0.148.5.dist-info → uk_bin_collection-0.149.0.dist-info}/LICENSE +0 -0
- {uk_bin_collection-0.148.5.dist-info → uk_bin_collection-0.149.0.dist-info}/WHEEL +0 -0
- {uk_bin_collection-0.148.5.dist-info → uk_bin_collection-0.149.0.dist-info}/entry_points.txt +0 -0
@@ -1772,6 +1772,13 @@
|
|
1772
1772
|
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN.",
|
1773
1773
|
"LAD24CD": "E07000178"
|
1774
1774
|
},
|
1775
|
+
"PembrokeshireCountyCouncil": {
|
1776
|
+
"url": "https://nearest.pembrokeshire.gov.uk/property/100100278790",
|
1777
|
+
"wiki_command_url_override": "https://nearest.pembrokeshire.gov.uk/property/XXXXXXXXXX",
|
1778
|
+
"wiki_name": "Pembrokeshire",
|
1779
|
+
"wiki_note": "Replace XXXXXXXX with your UPRN. You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find it.",
|
1780
|
+
"LAD24CD": "W06000009"
|
1781
|
+
},
|
1775
1782
|
"PeterboroughCityCouncil": {
|
1776
1783
|
"house_number": "7 Arundel Road, Peterborough, PE4 6JJ",
|
1777
1784
|
"postcode": "PE4 6JJ",
|
@@ -2341,6 +2348,7 @@
|
|
2341
2348
|
"ThanetDistrictCouncil": {
|
2342
2349
|
"uprn": "100061111858",
|
2343
2350
|
"url": "https://www.thanet.gov.uk",
|
2351
|
+
"web_driver": "http://selenium:4444",
|
2344
2352
|
"wiki_name": "Thanet",
|
2345
2353
|
"wiki_note": "Use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find your UPRN.",
|
2346
2354
|
"LAD24CD": "E07000114"
|
@@ -0,0 +1,96 @@
|
|
1
|
+
from datetime import datetime
|
2
|
+
|
3
|
+
import bs4.element
|
4
|
+
from bs4 import BeautifulSoup
|
5
|
+
|
6
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
7
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
8
|
+
|
9
|
+
|
10
|
+
class CouncilClass(AbstractGetBinDataClass):
|
11
|
+
"""
|
12
|
+
Concrete classes have to implement all abstract operations of the
|
13
|
+
base class. They can also override some operations with a default
|
14
|
+
implementation.
|
15
|
+
"""
|
16
|
+
|
17
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
18
|
+
# Make a BeautifulSoup object
|
19
|
+
soup = BeautifulSoup(page.text, features="html.parser")
|
20
|
+
|
21
|
+
data = {"bins": []}
|
22
|
+
|
23
|
+
# Locate the section containing bin collection data
|
24
|
+
bin_collection_divs = soup.find_all(
|
25
|
+
"div", class_="col-6 col-md-4 text-center mb-3"
|
26
|
+
)
|
27
|
+
|
28
|
+
if not bin_collection_divs:
|
29
|
+
raise ValueError("No bin collection data found in the provided HTML.")
|
30
|
+
|
31
|
+
for bin_div in bin_collection_divs:
|
32
|
+
# Get the image tag first to check if this is a bin collection div
|
33
|
+
img_tag = bin_div.find("img")
|
34
|
+
if (
|
35
|
+
not img_tag
|
36
|
+
or not img_tag.get("src")
|
37
|
+
or "pembrokeshire.gov.uk/images" not in img_tag["src"]
|
38
|
+
):
|
39
|
+
continue
|
40
|
+
|
41
|
+
# Extract bin type - first try the image title
|
42
|
+
bin_type = None
|
43
|
+
if img_tag.get("title"):
|
44
|
+
bin_type = img_tag["title"].strip()
|
45
|
+
|
46
|
+
# If no title, get all text nodes and join them
|
47
|
+
if not bin_type:
|
48
|
+
# Get all text nodes that are not within a <strong> tag (to exclude the date)
|
49
|
+
text_nodes = [
|
50
|
+
text.strip()
|
51
|
+
for text in bin_div.find_all(text=True, recursive=True)
|
52
|
+
if text.strip()
|
53
|
+
and not isinstance(text.parent, bs4.element.Tag)
|
54
|
+
or text.parent.name != "strong"
|
55
|
+
]
|
56
|
+
if text_nodes:
|
57
|
+
bin_type = " ".join(text_nodes).strip()
|
58
|
+
|
59
|
+
if not bin_type:
|
60
|
+
continue # Skip if we couldn't find a bin type
|
61
|
+
|
62
|
+
# Extract collection date
|
63
|
+
bin_date_tag = bin_div.find("strong")
|
64
|
+
if not bin_date_tag:
|
65
|
+
continue # Skip if no date found
|
66
|
+
|
67
|
+
bin_date = bin_date_tag.text.strip()
|
68
|
+
|
69
|
+
try:
|
70
|
+
# Parse the date into a datetime object
|
71
|
+
collection_date = datetime.strptime(bin_date, "%d/%m/%Y")
|
72
|
+
# Format date back to DD/MM/YYYY format as required by schema
|
73
|
+
formatted_date = collection_date.strftime("%d/%m/%Y")
|
74
|
+
except ValueError:
|
75
|
+
continue # Skip if date parsing fails
|
76
|
+
|
77
|
+
# Append the bin data to the list
|
78
|
+
dict_data = {
|
79
|
+
"type": bin_type,
|
80
|
+
"collectionDate": formatted_date,
|
81
|
+
}
|
82
|
+
data["bins"].append(dict_data)
|
83
|
+
|
84
|
+
if not data["bins"]:
|
85
|
+
raise ValueError(
|
86
|
+
"No valid bin collection data could be parsed from the HTML."
|
87
|
+
)
|
88
|
+
|
89
|
+
# Sort the bins by collection date
|
90
|
+
data["bins"].sort(
|
91
|
+
key=lambda x: datetime.strptime(x["collectionDate"], "%d/%m/%Y")
|
92
|
+
)
|
93
|
+
|
94
|
+
print(data)
|
95
|
+
|
96
|
+
return data
|
@@ -1,12 +1,16 @@
|
|
1
|
+
import json
|
1
2
|
import time
|
3
|
+
from datetime import datetime
|
2
4
|
|
3
|
-
import
|
5
|
+
from bs4 import BeautifulSoup
|
6
|
+
from selenium.webdriver.common.by import By
|
7
|
+
from selenium.webdriver.support import expected_conditions as EC
|
8
|
+
from selenium.webdriver.support.ui import WebDriverWait
|
4
9
|
|
5
10
|
from uk_bin_collection.uk_bin_collection.common import *
|
6
11
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
7
12
|
|
8
13
|
|
9
|
-
# import the wonderful Beautiful Soup and the URL grabber
|
10
14
|
class CouncilClass(AbstractGetBinDataClass):
|
11
15
|
"""
|
12
16
|
Concrete classes have to implement all abstract operations of the
|
@@ -15,37 +19,60 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
15
19
|
"""
|
16
20
|
|
17
21
|
def parse_data(self, page: str, **kwargs) -> dict:
|
18
|
-
|
19
22
|
user_uprn = kwargs.get("uprn")
|
20
23
|
check_uprn(user_uprn)
|
21
24
|
bindata = {"bins": []}
|
22
25
|
|
23
|
-
|
26
|
+
url = f"https://www.thanet.gov.uk/wp-content/mu-plugins/collection-day/incl/mu-collection-day-calls.php?pAddress={user_uprn}"
|
27
|
+
web_driver = kwargs.get("web_driver")
|
28
|
+
headless = kwargs.get("headless")
|
29
|
+
|
30
|
+
# Create the Selenium WebDriver
|
31
|
+
driver = create_webdriver(web_driver, headless, None, __name__)
|
32
|
+
|
33
|
+
try:
|
34
|
+
print(f"Navigating to URL: {url}")
|
35
|
+
driver.get(url)
|
36
|
+
|
37
|
+
# Wait for Cloudflare to complete its check
|
38
|
+
WebDriverWait(driver, 30).until(
|
39
|
+
lambda d: d.execute_script("return document.readyState") == "complete"
|
40
|
+
)
|
41
|
+
print("Page loaded successfully.")
|
42
|
+
|
43
|
+
# Parse the page source with BeautifulSoup
|
44
|
+
soup = BeautifulSoup(driver.page_source, "html.parser")
|
24
45
|
|
25
|
-
|
26
|
-
"
|
27
|
-
|
28
|
-
|
46
|
+
# Extract the JSON data from the page
|
47
|
+
print("Extracting bin collection data...")
|
48
|
+
body_content = soup.find("body").text
|
49
|
+
if not body_content:
|
50
|
+
raise ValueError("Expected JSON data not found in the <body> tag.")
|
29
51
|
|
30
|
-
|
31
|
-
response = requests.get(URI, headers=headers)
|
52
|
+
bin_collection = json.loads(body_content)
|
32
53
|
|
33
|
-
|
34
|
-
|
54
|
+
# Process the bin collection data
|
55
|
+
for collection in bin_collection:
|
56
|
+
bin_type = collection["type"]
|
57
|
+
collection_date = collection["nextDate"].split(" ")[0]
|
35
58
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
59
|
+
dict_data = {
|
60
|
+
"type": bin_type,
|
61
|
+
"collectionDate": collection_date,
|
62
|
+
}
|
63
|
+
bindata["bins"].append(dict_data)
|
40
64
|
|
41
|
-
|
42
|
-
|
43
|
-
"collectionDate"
|
44
|
-
|
45
|
-
bindata
|
65
|
+
# Sort the bins by collection date
|
66
|
+
bindata["bins"].sort(
|
67
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
|
68
|
+
)
|
69
|
+
print(bindata)
|
46
70
|
|
47
|
-
|
48
|
-
|
49
|
-
|
71
|
+
except Exception as e:
|
72
|
+
print(f"An error occurred: {e}")
|
73
|
+
raise
|
74
|
+
finally:
|
75
|
+
print("Cleaning up WebDriver...")
|
76
|
+
driver.quit()
|
50
77
|
|
51
78
|
return bindata
|
@@ -7,7 +7,7 @@ uk_bin_collection/tests/council_feature_input_parity.py,sha256=DO6Mk4ImYgM5ZCZ-c
|
|
7
7
|
uk_bin_collection/tests/features/environment.py,sha256=VQZjJdJI_kZn08M0j5cUgvKT4k3iTw8icJge1DGOkoA,127
|
8
8
|
uk_bin_collection/tests/features/validate_council_outputs.feature,sha256=SJK-Vc737hrf03tssxxbeg_JIvAH-ddB8f6gU1LTbuQ,251
|
9
9
|
uk_bin_collection/tests/generate_map_test_results.py,sha256=CKnGK2ZgiSXomRGkomX90DitgMP-X7wkHhyKORDcL2E,1144
|
10
|
-
uk_bin_collection/tests/input.json,sha256=
|
10
|
+
uk_bin_collection/tests/input.json,sha256=Uur26vWavpRAc9xJ5an1GUFQzNdct6NS24ZhWQxHad4,131672
|
11
11
|
uk_bin_collection/tests/output.schema,sha256=ZwKQBwYyTDEM4G2hJwfLUVM-5v1vKRvRK9W9SS1sd18,1086
|
12
12
|
uk_bin_collection/tests/step_defs/step_helpers/file_handler.py,sha256=Ygzi4V0S1MIHqbdstUlIqtRIwnynvhu4UtpweJ6-5N8,1474
|
13
13
|
uk_bin_collection/tests/step_defs/test_validate_council.py,sha256=VZ0a81sioJULD7syAYHjvK_-nT_Rd36tUyzPetSA0gk,3475
|
@@ -224,6 +224,7 @@ uk_bin_collection/uk_bin_collection/councils/NuneatonBedworthBoroughCouncil.py,s
|
|
224
224
|
uk_bin_collection/uk_bin_collection/councils/OadbyAndWigstonBoroughCouncil.py,sha256=Kgy5HA0xZ9hR4_cAydPfOfskhGUB4j93AQF2-9Fj-Cg,2179
|
225
225
|
uk_bin_collection/uk_bin_collection/councils/OldhamCouncil.py,sha256=9dlesCxNoVXlmQaqZj7QFh00smnJbm1Gnjkr_Uvzurs,1771
|
226
226
|
uk_bin_collection/uk_bin_collection/councils/OxfordCityCouncil.py,sha256=d_bY0cXRDH4kSoWGGCTNN61MNErapSOf2WSTYDJr2r8,2318
|
227
|
+
uk_bin_collection/uk_bin_collection/councils/PembrokeshireCountyCouncil.py,sha256=GRAxjf_DuO5uZ660kEbZt_yCjP3n2maMxsYTPCTz3GQ,3324
|
227
228
|
uk_bin_collection/uk_bin_collection/councils/PerthAndKinrossCouncil.py,sha256=Kos5GzN2co3Ij3tSHOXB9S71Yt78RROCfVRtnh7M1VU,3657
|
228
229
|
uk_bin_collection/uk_bin_collection/councils/PeterboroughCityCouncil.py,sha256=lOrDD4jfJ-_C5UwCGqRcQ1G-U1F5X6rf255ypzYEBcg,6300
|
229
230
|
uk_bin_collection/uk_bin_collection/councils/PlymouthCouncil.py,sha256=FJqpJ0GJhpjYeyZ9ioZPkKGl-zrqMD3y5iKa07e_i30,3202
|
@@ -291,7 +292,7 @@ uk_bin_collection/uk_bin_collection/councils/TeignbridgeCouncil.py,sha256=-NowMN
|
|
291
292
|
uk_bin_collection/uk_bin_collection/councils/TelfordAndWrekinCouncil.py,sha256=p1ZS5R4EGxbEWlRBrkGXgKwE_lkyBT-R60yKFFhVObc,1844
|
292
293
|
uk_bin_collection/uk_bin_collection/councils/TendringDistrictCouncil.py,sha256=1_CkpWPTfRUEP5YJ9R4_dJRLtb-O9i83hfWJc1shw_c,4283
|
293
294
|
uk_bin_collection/uk_bin_collection/councils/TestValleyBoroughCouncil.py,sha256=Dtfkyrwt795W7gqFJxVGRR8t3R5WMNQZwTWJckLpZWE,8480
|
294
|
-
uk_bin_collection/uk_bin_collection/councils/ThanetDistrictCouncil.py,sha256
|
295
|
+
uk_bin_collection/uk_bin_collection/councils/ThanetDistrictCouncil.py,sha256=Cxrf0tUryDL-wFclPH5yovVt8i7Sc7g-ZFrU9_wg6KY,2717
|
295
296
|
uk_bin_collection/uk_bin_collection/councils/ThreeRiversDistrictCouncil.py,sha256=RHt3e9oeKzwxjjY-M8aC0nk-ZXhHIoyC81JzxkPVxsE,5531
|
296
297
|
uk_bin_collection/uk_bin_collection/councils/ThurrockCouncil.py,sha256=vAZMm6mcsdEcOkP15xwxWy9gdXpmLYQFH7qRifurNoY,2935
|
297
298
|
uk_bin_collection/uk_bin_collection/councils/TonbridgeAndMallingBC.py,sha256=UlgnHDoi8ecav2H5-HqKNDpqW1J3RN-c___5c08_Q7I,4859
|
@@ -337,8 +338,8 @@ uk_bin_collection/uk_bin_collection/councils/YorkCouncil.py,sha256=I2kBYMlsD4bId
|
|
337
338
|
uk_bin_collection/uk_bin_collection/councils/council_class_template/councilclasstemplate.py,sha256=QD4v4xpsEE0QheR_fGaNOIRMc2FatcUfKkkhAhseyVU,1159
|
338
339
|
uk_bin_collection/uk_bin_collection/create_new_council.py,sha256=m-IhmWmeWQlFsTZC4OxuFvtw5ZtB8EAJHxJTH4O59lQ,1536
|
339
340
|
uk_bin_collection/uk_bin_collection/get_bin_data.py,sha256=YvmHfZqanwrJ8ToGch34x-L-7yPe31nB_x77_Mgl_vo,4545
|
340
|
-
uk_bin_collection-0.
|
341
|
-
uk_bin_collection-0.
|
342
|
-
uk_bin_collection-0.
|
343
|
-
uk_bin_collection-0.
|
344
|
-
uk_bin_collection-0.
|
341
|
+
uk_bin_collection-0.149.0.dist-info/LICENSE,sha256=vABBUOzcrgfaTKpzeo-si9YVEun6juDkndqA8RKdKGs,1071
|
342
|
+
uk_bin_collection-0.149.0.dist-info/METADATA,sha256=88bzKT9T-AZ1OLv3p83LPJY3qbHpXS4-jk_NluPkBvA,20914
|
343
|
+
uk_bin_collection-0.149.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
344
|
+
uk_bin_collection-0.149.0.dist-info/entry_points.txt,sha256=36WCSGMWSc916S3Hi1ZkazzDKHaJ6CD-4fCEFm5MIao,90
|
345
|
+
uk_bin_collection-0.149.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
{uk_bin_collection-0.148.5.dist-info → uk_bin_collection-0.149.0.dist-info}/entry_points.txt
RENAMED
File without changes
|