specklia 1.9.38__tar.gz → 1.9.40__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: specklia
3
- Version: 1.9.38
3
+ Version: 1.9.40
4
4
  Summary: Python client for Specklia, a geospatial point cloud database by Earthwave.
5
5
  Home-page: https://specklia.earthwave.co.uk/
6
6
  Author: Earthwave Ltd
@@ -21,7 +21,6 @@ IS PRIVATE BUT THIS PACKAGE IS PUBLIC!
21
21
  """
22
22
 
23
23
  from enum import Enum
24
- from http import HTTPStatus
25
24
  from io import BytesIO
26
25
  from logging import Logger
27
26
  import struct
@@ -34,6 +33,8 @@ from pandas import DataFrame
34
33
  from pandas import read_feather
35
34
  import requests
36
35
 
36
+ log = Logger(__name__)
37
+
37
38
  CHUNK_DB_NAME = "data_transfer_chunks"
38
39
  CHUNK_METADATA_COLLECTION_NAME = "chunk_metadata"
39
40
  MAX_CHUNK_AGE_SECONDS = 3600
@@ -53,7 +54,7 @@ class ChunkSetStatus(Enum):
53
54
  EMPTYING = 1
54
55
 
55
56
 
56
- def upload_chunks(api_address: str, chunks: List[Tuple[int, bytes]], logger: Logger) -> str:
57
+ def upload_chunks(api_address: str, chunks: List[Tuple[int, bytes]]) -> str:
57
58
  """
58
59
  Upload data chunks.
59
60
 
@@ -66,8 +67,6 @@ def upload_chunks(api_address: str, chunks: List[Tuple[int, bytes]], logger: Log
66
67
  The full URL of the API, including port but not including endpoint, e.g. "http://127.0.0.1:9999"
67
68
  chunks : List[Tuple[int, bytes]]
68
69
  A list of tuples containing the ordinal number of the chunk and each chunk
69
- logger : Logger
70
- A logger with which to log the upload.
71
70
 
72
71
  Returns
73
72
  -------
@@ -78,26 +77,25 @@ def upload_chunks(api_address: str, chunks: List[Tuple[int, bytes]], logger: Log
78
77
  response = requests.post(
79
78
  api_address + f"/chunk/upload/{chunks[0][0]}-of-{len(chunks)}",
80
79
  data=chunks[0][1])
81
- logger.info("response from very first /chunk/upload was '%s'", response.json())
82
- assert response.status_code == HTTPStatus.OK, response.text
80
+ response.raise_for_status()
81
+ log.info("response from very first /chunk/upload was '%s'", response.json())
83
82
  chunk_set_uuid = response.json()['chunk_set_uuid']
84
83
 
85
84
  # post the rest of the chunks in a random order
86
85
  for i, chunk in chunks[1:]:
87
86
  response = requests.post(
88
87
  api_address + f"/chunk/upload/{chunk_set_uuid}/{i}-of-{len(chunks)}", data=chunk)
89
- logger.info("response from subsequent /chunk/upload/uuid call was '%s'", response.text)
90
- assert response.status_code == HTTPStatus.OK, response.text
88
+ response.raise_for_status()
89
+ log.info("response from subsequent /chunk/upload/uuid call was '%s'", response.text)
91
90
 
92
91
  return chunk_set_uuid
93
92
 
94
93
 
95
- def download_chunks(api_address: str, chunk_set_uuid: str, logger: Logger) -> List[Tuple[int, bytes]]:
94
+ def download_chunks(api_address: str, chunk_set_uuid: str, num_chunks: int) -> bytes:
96
95
  """
97
96
  Download data chunks.
98
97
 
99
- Download a series of data chunks through the chunked transfer mechanism.
100
- This method is for use on the client, not the server.
98
+ Download a series of data chunks sequentially through the chunked transfer mechanism.
101
99
 
102
100
  Parameters
103
101
  ----------
@@ -105,53 +103,51 @@ def download_chunks(api_address: str, chunk_set_uuid: str, logger: Logger) -> Li
105
103
  The full URL of the API, including port but not including endpoint, e.g. "http://127.0.0.1:9999"
106
104
  chunk_set_uuid : str
107
105
  The uuid of the chunk set to download.
108
- logger : Logger
109
- A logger with which to log the download.
106
+ num_chunks : int
107
+ The number of chunks to download.
110
108
 
111
109
  Returns
112
110
  -------
113
- chunks : List[Tuple[int, bytes]]
114
- A list of tuples containing the ordinal number of the chunk and each chunk
111
+ bytes
112
+ The concatenated data from all the chunks.
115
113
 
116
114
  Raises
117
115
  ------
118
116
  RuntimeError
119
117
  If the download fails after a number of retries.
120
118
  """
121
- # fetch the data
122
- data_chunks = []
123
- finished = False
124
-
125
- while not finished:
119
+ chunks = []
120
+ for chunk_ordinal in range(1, num_chunks + 1):
126
121
  retries = 0
127
122
  success = False
128
-
129
123
  while retries < CHUNK_DOWNLOAD_RETRIES and not success:
130
124
  try:
131
125
  this_chunk_response = requests.get(
132
- f"{api_address}/chunk/download/{chunk_set_uuid}",
126
+ f"{api_address}/chunk/download/{chunk_set_uuid}/{chunk_ordinal}",
133
127
  timeout=CHUNK_DOWNLOAD_TIMEOUT_S
134
128
  )
135
- if this_chunk_response.status_code == HTTPStatus.NO_CONTENT:
136
- finished = True
137
- else:
138
- data_chunks.append((
139
- struct.unpack('i', this_chunk_response.content[:4])[0],
140
- this_chunk_response.content[4:]))
129
+ this_chunk_response.raise_for_status()
130
+ ordinal = struct.unpack('i', this_chunk_response.content[:4])[0]
131
+ chunk = this_chunk_response.content[4:]
132
+ assert ordinal == chunk_ordinal, (
133
+ f"Chunk ordinal mismatch: expected {chunk_ordinal}, got {ordinal}")
134
+ chunks.append(chunk)
141
135
  success = True
142
-
143
136
  except (requests.Timeout, requests.ConnectionError) as e:
144
137
  retries += 1
145
- logger.warning(
138
+ log.warning(
146
139
  "Request failed with %s. Retrying (%s/%s)...", e, retries, CHUNK_DOWNLOAD_RETRIES)
147
140
  time.sleep(1) # Small backoff before retrying
148
-
149
141
  if not success:
150
142
  error_message = (
151
143
  f"Failed to download from chunk set {chunk_set_uuid} after {CHUNK_DOWNLOAD_TIMEOUT_S} attempts.")
152
- logger.error(error_message)
144
+ log.error(error_message)
153
145
  raise RuntimeError(error_message)
154
- return data_chunks
146
+
147
+ # Let the server know that we are done with this data and it can be deleted.
148
+ requests.delete(f'{api_address}/chunk/delete/{chunk_set_uuid}')
149
+
150
+ return b''.join(chunks)
155
151
 
156
152
 
157
153
  def split_into_chunks(data: bytes, chunk_size: int = MAX_CHUNK_SIZE_BYTES) -> List[Tuple[int, bytes]]:
@@ -174,23 +170,6 @@ def split_into_chunks(data: bytes, chunk_size: int = MAX_CHUNK_SIZE_BYTES) -> Li
174
170
  enumerate((data[i:i + chunk_size] for i in range(0, len(data), chunk_size)), start=1))
175
171
 
176
172
 
177
- def merge_from_chunks(chunks: List[Tuple[int, bytes]]) -> bytes:
178
- """
179
- Merge data that has been split into compressed chunks back into a single message.
180
-
181
- Parameters
182
- ----------
183
- chunks : List[Tuple[int, bytes]]
184
- A list of tuples containing the ordinal number of the chunk and each chunk
185
-
186
- Returns
187
- -------
188
- bytes
189
- The merged data
190
- """
191
- return b''.join([dc[1] for dc in sorted(chunks, key=lambda x: x[0])])
192
-
193
-
194
173
  def deserialise_dataframe(data: bytes) -> Union[DataFrame, GeoDataFrame]:
195
174
  """
196
175
  Convert a binary serialised feather table to pandas dataframe.
@@ -223,7 +202,7 @@ def deserialise_dataframe(data: bytes) -> Union[DataFrame, GeoDataFrame]:
223
202
  raise ValueError("Couldn't deserialise table format") from e
224
203
  else:
225
204
  raise ValueError("Couldn't deserialise table format") from e
226
- return df
205
+ return df # type: ignore
227
206
 
228
207
 
229
208
  def serialise_dataframe(df: Union[DataFrame, GeoDataFrame]) -> bytes:
@@ -231,9 +231,12 @@ class Specklia:
231
231
  # stream and deserialise the results
232
232
  if response_dict['num_chunks'] > 0:
233
233
  gdf = chunked_transfer.deserialise_dataframe(
234
- chunked_transfer.merge_from_chunks(
235
- chunked_transfer.download_chunks(
236
- self.server_url, response_dict['chunk_set_uuid'], _log)))
234
+ chunked_transfer.download_chunks(
235
+ self.server_url,
236
+ response_dict['chunk_set_uuid'],
237
+ response_dict['num_chunks'],
238
+ )
239
+ )
237
240
  else:
238
241
  gdf = gpd.GeoDataFrame()
239
242
 
@@ -312,12 +315,14 @@ class Specklia:
312
315
  # serialise and upload each dataframe
313
316
  upload_points = []
314
317
  for n in new_points:
318
+ chunks = chunked_transfer.split_into_chunks(
319
+ chunked_transfer.serialise_dataframe(n['gdf']))
315
320
  chunk_set_uuid = chunked_transfer.upload_chunks(
316
- self.server_url, chunked_transfer.split_into_chunks(
317
- chunked_transfer.serialise_dataframe(n['gdf'])), _log)
321
+ self.server_url, chunks)
318
322
  upload_points.append({
319
323
  'source': n['source'],
320
- 'chunk_set_uuid': chunk_set_uuid
324
+ 'chunk_set_uuid': chunk_set_uuid,
325
+ 'num_chunks': len(chunks),
321
326
  })
322
327
  del n
323
328
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: specklia
3
- Version: 1.9.38
3
+ Version: 1.9.40
4
4
  Summary: Python client for Specklia, a geospatial point cloud database by Earthwave.
5
5
  Home-page: https://specklia.earthwave.co.uk/
6
6
  Author: Earthwave Ltd
@@ -13,36 +13,43 @@ def test_split_into_chunks():
13
13
  (1, b'abcde'), (2, b'fghij'), (3, b'klmno'), (4, b'p')]
14
14
 
15
15
 
16
- def test_merge_from_chunks():
17
- assert chunked_transfer.merge_from_chunks([
18
- (1, b'abcde'), (2, b'fghij'), (3, b'klmno'), (4, b'p')]) == b'abcdefghijklmnop'
19
-
20
-
21
16
  def test_upload_chunks():
22
17
  with patch('specklia.chunked_transfer.requests.post') as mock_post:
23
18
  mock_post.return_value.status_code = HTTPStatus.OK
24
19
  mock_post.return_value.json.return_value = {'chunk_set_uuid': 'cheese'}
25
20
 
26
21
  assert chunked_transfer.upload_chunks(
27
- api_address='wibble', chunks=[(1, b'a'), (2, b'b')], logger=MagicMock(name="mock_logger")) == 'cheese'
22
+ api_address='wibble', chunks=[(1, b'a'), (2, b'b')]) == 'cheese'
28
23
 
29
24
  mock_post.assert_has_calls([
30
25
  call('wibble/chunk/upload/1-of-2', data=b'a'),
26
+ call().raise_for_status(),
31
27
  call().json(),
32
28
  call().json(),
33
- call('wibble/chunk/upload/cheese/2-of-2', data=b'b')])
29
+ call('wibble/chunk/upload/cheese/2-of-2', data=b'b'),
30
+ call().raise_for_status()])
34
31
 
35
32
 
36
33
  def test_download_chunks():
37
- with patch('specklia.chunked_transfer.requests.get') as mock_get:
34
+ with (
35
+ patch('specklia.chunked_transfer.requests.get') as mock_get,
36
+ patch('specklia.chunked_transfer.requests.delete') as mock_delete,
37
+ ):
38
38
  mock_get.side_effect = [
39
39
  MagicMock(name="mock_response_1", status_code=HTTPStatus.OK, content=struct.pack('i', 1) + b'wibble'),
40
40
  MagicMock(name="mock_response_2", status_code=HTTPStatus.OK, content=struct.pack('i', 2) + b'wobble'),
41
41
  MagicMock(name="mock_response_3", status_code=HTTPStatus.NO_CONTENT, content=b'')]
42
42
 
43
+ mock_delete.side_effect = MagicMock(name="mock_response_4", status_code=HTTPStatus.OK),
44
+
43
45
  assert chunked_transfer.download_chunks(
44
- api_address='wibble', chunk_set_uuid='rawr', logger=MagicMock(name="mock_logger")) == [
45
- (1, b'wibble'), (2, b'wobble')]
46
+ api_address='wibble',
47
+ chunk_set_uuid='rawr',
48
+ num_chunks=2,
49
+ ) == b'wibblewobble'
50
+
51
+ assert mock_get.call_count == 2
52
+ assert mock_delete.call_count == 1
46
53
 
47
54
 
48
55
  def test_serialise_dataframe_roundtrip():
@@ -104,7 +104,7 @@ def test_add_points_to_dataset(
104
104
  call('https://localhost/ingest',
105
105
  json={'dataset_id': 'dummy_dataset',
106
106
  'new_points': [
107
- {'source': {'reference': 'cheese'}, 'chunk_set_uuid': 'brian'}],
107
+ {'source': {'reference': 'cheese'}, 'chunk_set_uuid': 'brian', 'num_chunks': 1}],
108
108
  'duplicate_source_behaviour': 'error'},
109
109
  headers={'Authorization': 'Bearer fake_token'})])
110
110
 
File without changes
File without changes
File without changes
File without changes