specklia 1.9.38__tar.gz → 1.9.40__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {specklia-1.9.38 → specklia-1.9.40}/PKG-INFO +1 -1
- {specklia-1.9.38 → specklia-1.9.40}/specklia/chunked_transfer.py +30 -51
- {specklia-1.9.38 → specklia-1.9.40}/specklia/client.py +11 -6
- {specklia-1.9.38 → specklia-1.9.40}/specklia.egg-info/PKG-INFO +1 -1
- {specklia-1.9.38 → specklia-1.9.40}/tests/test_chunked_transfer.py +17 -10
- {specklia-1.9.38 → specklia-1.9.40}/tests/test_client.py +1 -1
- {specklia-1.9.38 → specklia-1.9.40}/LICENCE +0 -0
- {specklia-1.9.38 → specklia-1.9.40}/README.md +0 -0
- {specklia-1.9.38 → specklia-1.9.40}/setup.cfg +0 -0
- {specklia-1.9.38 → specklia-1.9.40}/setup.py +0 -0
- {specklia-1.9.38 → specklia-1.9.40}/specklia/__init__.py +0 -0
- {specklia-1.9.38 → specklia-1.9.40}/specklia/utilities.py +0 -0
- {specklia-1.9.38 → specklia-1.9.40}/specklia.egg-info/SOURCES.txt +0 -0
- {specklia-1.9.38 → specklia-1.9.40}/specklia.egg-info/dependency_links.txt +0 -0
- {specklia-1.9.38 → specklia-1.9.40}/specklia.egg-info/requires.txt +0 -0
- {specklia-1.9.38 → specklia-1.9.40}/specklia.egg-info/top_level.txt +0 -0
- {specklia-1.9.38 → specklia-1.9.40}/tests/test_utilities.py +0 -0
|
@@ -21,7 +21,6 @@ IS PRIVATE BUT THIS PACKAGE IS PUBLIC!
|
|
|
21
21
|
"""
|
|
22
22
|
|
|
23
23
|
from enum import Enum
|
|
24
|
-
from http import HTTPStatus
|
|
25
24
|
from io import BytesIO
|
|
26
25
|
from logging import Logger
|
|
27
26
|
import struct
|
|
@@ -34,6 +33,8 @@ from pandas import DataFrame
|
|
|
34
33
|
from pandas import read_feather
|
|
35
34
|
import requests
|
|
36
35
|
|
|
36
|
+
log = Logger(__name__)
|
|
37
|
+
|
|
37
38
|
CHUNK_DB_NAME = "data_transfer_chunks"
|
|
38
39
|
CHUNK_METADATA_COLLECTION_NAME = "chunk_metadata"
|
|
39
40
|
MAX_CHUNK_AGE_SECONDS = 3600
|
|
@@ -53,7 +54,7 @@ class ChunkSetStatus(Enum):
|
|
|
53
54
|
EMPTYING = 1
|
|
54
55
|
|
|
55
56
|
|
|
56
|
-
def upload_chunks(api_address: str, chunks: List[Tuple[int, bytes]]
|
|
57
|
+
def upload_chunks(api_address: str, chunks: List[Tuple[int, bytes]]) -> str:
|
|
57
58
|
"""
|
|
58
59
|
Upload data chunks.
|
|
59
60
|
|
|
@@ -66,8 +67,6 @@ def upload_chunks(api_address: str, chunks: List[Tuple[int, bytes]], logger: Log
|
|
|
66
67
|
The full URL of the API, including port but not including endpoint, e.g. "http://127.0.0.1:9999"
|
|
67
68
|
chunks : List[Tuple[int, bytes]]
|
|
68
69
|
A list of tuples containing the ordinal number of the chunk and each chunk
|
|
69
|
-
logger : Logger
|
|
70
|
-
A logger with which to log the upload.
|
|
71
70
|
|
|
72
71
|
Returns
|
|
73
72
|
-------
|
|
@@ -78,26 +77,25 @@ def upload_chunks(api_address: str, chunks: List[Tuple[int, bytes]], logger: Log
|
|
|
78
77
|
response = requests.post(
|
|
79
78
|
api_address + f"/chunk/upload/{chunks[0][0]}-of-{len(chunks)}",
|
|
80
79
|
data=chunks[0][1])
|
|
81
|
-
|
|
82
|
-
|
|
80
|
+
response.raise_for_status()
|
|
81
|
+
log.info("response from very first /chunk/upload was '%s'", response.json())
|
|
83
82
|
chunk_set_uuid = response.json()['chunk_set_uuid']
|
|
84
83
|
|
|
85
84
|
# post the rest of the chunks in a random order
|
|
86
85
|
for i, chunk in chunks[1:]:
|
|
87
86
|
response = requests.post(
|
|
88
87
|
api_address + f"/chunk/upload/{chunk_set_uuid}/{i}-of-{len(chunks)}", data=chunk)
|
|
89
|
-
|
|
90
|
-
|
|
88
|
+
response.raise_for_status()
|
|
89
|
+
log.info("response from subsequent /chunk/upload/uuid call was '%s'", response.text)
|
|
91
90
|
|
|
92
91
|
return chunk_set_uuid
|
|
93
92
|
|
|
94
93
|
|
|
95
|
-
def download_chunks(api_address: str, chunk_set_uuid: str,
|
|
94
|
+
def download_chunks(api_address: str, chunk_set_uuid: str, num_chunks: int) -> bytes:
|
|
96
95
|
"""
|
|
97
96
|
Download data chunks.
|
|
98
97
|
|
|
99
|
-
Download a series of data chunks through the chunked transfer mechanism.
|
|
100
|
-
This method is for use on the client, not the server.
|
|
98
|
+
Download a series of data chunks sequentially through the chunked transfer mechanism.
|
|
101
99
|
|
|
102
100
|
Parameters
|
|
103
101
|
----------
|
|
@@ -105,53 +103,51 @@ def download_chunks(api_address: str, chunk_set_uuid: str, logger: Logger) -> Li
|
|
|
105
103
|
The full URL of the API, including port but not including endpoint, e.g. "http://127.0.0.1:9999"
|
|
106
104
|
chunk_set_uuid : str
|
|
107
105
|
The uuid of the chunk set to download.
|
|
108
|
-
|
|
109
|
-
|
|
106
|
+
num_chunks : int
|
|
107
|
+
The number of chunks to download.
|
|
110
108
|
|
|
111
109
|
Returns
|
|
112
110
|
-------
|
|
113
|
-
|
|
114
|
-
|
|
111
|
+
bytes
|
|
112
|
+
The concatenated data from all the chunks.
|
|
115
113
|
|
|
116
114
|
Raises
|
|
117
115
|
------
|
|
118
116
|
RuntimeError
|
|
119
117
|
If the download fails after a number of retries.
|
|
120
118
|
"""
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
finished = False
|
|
124
|
-
|
|
125
|
-
while not finished:
|
|
119
|
+
chunks = []
|
|
120
|
+
for chunk_ordinal in range(1, num_chunks + 1):
|
|
126
121
|
retries = 0
|
|
127
122
|
success = False
|
|
128
|
-
|
|
129
123
|
while retries < CHUNK_DOWNLOAD_RETRIES and not success:
|
|
130
124
|
try:
|
|
131
125
|
this_chunk_response = requests.get(
|
|
132
|
-
f"{api_address}/chunk/download/{chunk_set_uuid}",
|
|
126
|
+
f"{api_address}/chunk/download/{chunk_set_uuid}/{chunk_ordinal}",
|
|
133
127
|
timeout=CHUNK_DOWNLOAD_TIMEOUT_S
|
|
134
128
|
)
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
129
|
+
this_chunk_response.raise_for_status()
|
|
130
|
+
ordinal = struct.unpack('i', this_chunk_response.content[:4])[0]
|
|
131
|
+
chunk = this_chunk_response.content[4:]
|
|
132
|
+
assert ordinal == chunk_ordinal, (
|
|
133
|
+
f"Chunk ordinal mismatch: expected {chunk_ordinal}, got {ordinal}")
|
|
134
|
+
chunks.append(chunk)
|
|
141
135
|
success = True
|
|
142
|
-
|
|
143
136
|
except (requests.Timeout, requests.ConnectionError) as e:
|
|
144
137
|
retries += 1
|
|
145
|
-
|
|
138
|
+
log.warning(
|
|
146
139
|
"Request failed with %s. Retrying (%s/%s)...", e, retries, CHUNK_DOWNLOAD_RETRIES)
|
|
147
140
|
time.sleep(1) # Small backoff before retrying
|
|
148
|
-
|
|
149
141
|
if not success:
|
|
150
142
|
error_message = (
|
|
151
143
|
f"Failed to download from chunk set {chunk_set_uuid} after {CHUNK_DOWNLOAD_TIMEOUT_S} attempts.")
|
|
152
|
-
|
|
144
|
+
log.error(error_message)
|
|
153
145
|
raise RuntimeError(error_message)
|
|
154
|
-
|
|
146
|
+
|
|
147
|
+
# Let the server know that we are done with this data and it can be deleted.
|
|
148
|
+
requests.delete(f'{api_address}/chunk/delete/{chunk_set_uuid}')
|
|
149
|
+
|
|
150
|
+
return b''.join(chunks)
|
|
155
151
|
|
|
156
152
|
|
|
157
153
|
def split_into_chunks(data: bytes, chunk_size: int = MAX_CHUNK_SIZE_BYTES) -> List[Tuple[int, bytes]]:
|
|
@@ -174,23 +170,6 @@ def split_into_chunks(data: bytes, chunk_size: int = MAX_CHUNK_SIZE_BYTES) -> Li
|
|
|
174
170
|
enumerate((data[i:i + chunk_size] for i in range(0, len(data), chunk_size)), start=1))
|
|
175
171
|
|
|
176
172
|
|
|
177
|
-
def merge_from_chunks(chunks: List[Tuple[int, bytes]]) -> bytes:
|
|
178
|
-
"""
|
|
179
|
-
Merge data that has been split into compressed chunks back into a single message.
|
|
180
|
-
|
|
181
|
-
Parameters
|
|
182
|
-
----------
|
|
183
|
-
chunks : List[Tuple[int, bytes]]
|
|
184
|
-
A list of tuples containing the ordinal number of the chunk and each chunk
|
|
185
|
-
|
|
186
|
-
Returns
|
|
187
|
-
-------
|
|
188
|
-
bytes
|
|
189
|
-
The merged data
|
|
190
|
-
"""
|
|
191
|
-
return b''.join([dc[1] for dc in sorted(chunks, key=lambda x: x[0])])
|
|
192
|
-
|
|
193
|
-
|
|
194
173
|
def deserialise_dataframe(data: bytes) -> Union[DataFrame, GeoDataFrame]:
|
|
195
174
|
"""
|
|
196
175
|
Convert a binary serialised feather table to pandas dataframe.
|
|
@@ -223,7 +202,7 @@ def deserialise_dataframe(data: bytes) -> Union[DataFrame, GeoDataFrame]:
|
|
|
223
202
|
raise ValueError("Couldn't deserialise table format") from e
|
|
224
203
|
else:
|
|
225
204
|
raise ValueError("Couldn't deserialise table format") from e
|
|
226
|
-
return df
|
|
205
|
+
return df # type: ignore
|
|
227
206
|
|
|
228
207
|
|
|
229
208
|
def serialise_dataframe(df: Union[DataFrame, GeoDataFrame]) -> bytes:
|
|
@@ -231,9 +231,12 @@ class Specklia:
|
|
|
231
231
|
# stream and deserialise the results
|
|
232
232
|
if response_dict['num_chunks'] > 0:
|
|
233
233
|
gdf = chunked_transfer.deserialise_dataframe(
|
|
234
|
-
chunked_transfer.
|
|
235
|
-
|
|
236
|
-
|
|
234
|
+
chunked_transfer.download_chunks(
|
|
235
|
+
self.server_url,
|
|
236
|
+
response_dict['chunk_set_uuid'],
|
|
237
|
+
response_dict['num_chunks'],
|
|
238
|
+
)
|
|
239
|
+
)
|
|
237
240
|
else:
|
|
238
241
|
gdf = gpd.GeoDataFrame()
|
|
239
242
|
|
|
@@ -312,12 +315,14 @@ class Specklia:
|
|
|
312
315
|
# serialise and upload each dataframe
|
|
313
316
|
upload_points = []
|
|
314
317
|
for n in new_points:
|
|
318
|
+
chunks = chunked_transfer.split_into_chunks(
|
|
319
|
+
chunked_transfer.serialise_dataframe(n['gdf']))
|
|
315
320
|
chunk_set_uuid = chunked_transfer.upload_chunks(
|
|
316
|
-
self.server_url,
|
|
317
|
-
chunked_transfer.serialise_dataframe(n['gdf'])), _log)
|
|
321
|
+
self.server_url, chunks)
|
|
318
322
|
upload_points.append({
|
|
319
323
|
'source': n['source'],
|
|
320
|
-
'chunk_set_uuid': chunk_set_uuid
|
|
324
|
+
'chunk_set_uuid': chunk_set_uuid,
|
|
325
|
+
'num_chunks': len(chunks),
|
|
321
326
|
})
|
|
322
327
|
del n
|
|
323
328
|
|
|
@@ -13,36 +13,43 @@ def test_split_into_chunks():
|
|
|
13
13
|
(1, b'abcde'), (2, b'fghij'), (3, b'klmno'), (4, b'p')]
|
|
14
14
|
|
|
15
15
|
|
|
16
|
-
def test_merge_from_chunks():
|
|
17
|
-
assert chunked_transfer.merge_from_chunks([
|
|
18
|
-
(1, b'abcde'), (2, b'fghij'), (3, b'klmno'), (4, b'p')]) == b'abcdefghijklmnop'
|
|
19
|
-
|
|
20
|
-
|
|
21
16
|
def test_upload_chunks():
|
|
22
17
|
with patch('specklia.chunked_transfer.requests.post') as mock_post:
|
|
23
18
|
mock_post.return_value.status_code = HTTPStatus.OK
|
|
24
19
|
mock_post.return_value.json.return_value = {'chunk_set_uuid': 'cheese'}
|
|
25
20
|
|
|
26
21
|
assert chunked_transfer.upload_chunks(
|
|
27
|
-
api_address='wibble', chunks=[(1, b'a'), (2, b'b')]
|
|
22
|
+
api_address='wibble', chunks=[(1, b'a'), (2, b'b')]) == 'cheese'
|
|
28
23
|
|
|
29
24
|
mock_post.assert_has_calls([
|
|
30
25
|
call('wibble/chunk/upload/1-of-2', data=b'a'),
|
|
26
|
+
call().raise_for_status(),
|
|
31
27
|
call().json(),
|
|
32
28
|
call().json(),
|
|
33
|
-
call('wibble/chunk/upload/cheese/2-of-2', data=b'b')
|
|
29
|
+
call('wibble/chunk/upload/cheese/2-of-2', data=b'b'),
|
|
30
|
+
call().raise_for_status()])
|
|
34
31
|
|
|
35
32
|
|
|
36
33
|
def test_download_chunks():
|
|
37
|
-
with
|
|
34
|
+
with (
|
|
35
|
+
patch('specklia.chunked_transfer.requests.get') as mock_get,
|
|
36
|
+
patch('specklia.chunked_transfer.requests.delete') as mock_delete,
|
|
37
|
+
):
|
|
38
38
|
mock_get.side_effect = [
|
|
39
39
|
MagicMock(name="mock_response_1", status_code=HTTPStatus.OK, content=struct.pack('i', 1) + b'wibble'),
|
|
40
40
|
MagicMock(name="mock_response_2", status_code=HTTPStatus.OK, content=struct.pack('i', 2) + b'wobble'),
|
|
41
41
|
MagicMock(name="mock_response_3", status_code=HTTPStatus.NO_CONTENT, content=b'')]
|
|
42
42
|
|
|
43
|
+
mock_delete.side_effect = MagicMock(name="mock_response_4", status_code=HTTPStatus.OK),
|
|
44
|
+
|
|
43
45
|
assert chunked_transfer.download_chunks(
|
|
44
|
-
api_address='wibble',
|
|
45
|
-
|
|
46
|
+
api_address='wibble',
|
|
47
|
+
chunk_set_uuid='rawr',
|
|
48
|
+
num_chunks=2,
|
|
49
|
+
) == b'wibblewobble'
|
|
50
|
+
|
|
51
|
+
assert mock_get.call_count == 2
|
|
52
|
+
assert mock_delete.call_count == 1
|
|
46
53
|
|
|
47
54
|
|
|
48
55
|
def test_serialise_dataframe_roundtrip():
|
|
@@ -104,7 +104,7 @@ def test_add_points_to_dataset(
|
|
|
104
104
|
call('https://localhost/ingest',
|
|
105
105
|
json={'dataset_id': 'dummy_dataset',
|
|
106
106
|
'new_points': [
|
|
107
|
-
{'source': {'reference': 'cheese'}, 'chunk_set_uuid': 'brian'}],
|
|
107
|
+
{'source': {'reference': 'cheese'}, 'chunk_set_uuid': 'brian', 'num_chunks': 1}],
|
|
108
108
|
'duplicate_source_behaviour': 'error'},
|
|
109
109
|
headers={'Authorization': 'Bearer fake_token'})])
|
|
110
110
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|