PyPI - specklia - Versions diffs - 1.9.38__tar.gz → 1.9.40__tar.gz - Mend

specklia 1.9.38tar.gz → 1.9.40tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

{specklia-1.9.38 → specklia-1.9.40}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: specklia
-Version: 1.9.38
+Version: 1.9.40
 Summary: Python client for Specklia, a geospatial point cloud database by Earthwave.
 Home-page: https://specklia.earthwave.co.uk/
 Author: Earthwave Ltd

{specklia-1.9.38 → specklia-1.9.40}/specklia/chunked_transfer.py RENAMED Viewed

@@ -21,7 +21,6 @@ IS PRIVATE BUT THIS PACKAGE IS PUBLIC!
 """
 from enum import Enum
-from http import HTTPStatus
 from io import BytesIO
 from logging import Logger
 import struct
@@ -34,6 +33,8 @@ from pandas import DataFrame
 from pandas import read_feather
 import requests
+log = Logger(__name__)
 CHUNK_DB_NAME = "data_transfer_chunks"
 CHUNK_METADATA_COLLECTION_NAME = "chunk_metadata"
 MAX_CHUNK_AGE_SECONDS = 3600
@@ -53,7 +54,7 @@ class ChunkSetStatus(Enum):
     EMPTYING = 1
-def upload_chunks(api_address: str, chunks: List[Tuple[int, bytes]], logger: Logger) -> str:
+def upload_chunks(api_address: str, chunks: List[Tuple[int, bytes]]) -> str:
     """
     Upload data chunks.
@@ -66,8 +67,6 @@ def upload_chunks(api_address: str, chunks: List[Tuple[int, bytes]], logger: Log
         The full URL of the API, including port but not including endpoint, e.g. "http://127.0.0.1:9999"
     chunks : List[Tuple[int, bytes]]
         A list of tuples containing the ordinal number of the chunk and each chunk
-    logger : Logger
-        A logger with which to log the upload.
     Returns
     -------
@@ -78,26 +77,25 @@ def upload_chunks(api_address: str, chunks: List[Tuple[int, bytes]], logger: Log
     response = requests.post(
         api_address + f"/chunk/upload/{chunks[0][0]}-of-{len(chunks)}",
         data=chunks[0][1])
-    logger.info("response from very first /chunk/upload was '%s'", response.json())
-    assert response.status_code == HTTPStatus.OK, response.text
+    response.raise_for_status()
+    log.info("response from very first /chunk/upload was '%s'", response.json())
     chunk_set_uuid = response.json()['chunk_set_uuid']
     # post the rest of the chunks in a random order
     for i, chunk in chunks[1:]:
         response = requests.post(
             api_address + f"/chunk/upload/{chunk_set_uuid}/{i}-of-{len(chunks)}", data=chunk)
-        logger.info("response from subsequent /chunk/upload/uuid call was '%s'", response.text)
-        assert response.status_code == HTTPStatus.OK, response.text
+        response.raise_for_status()
+        log.info("response from subsequent /chunk/upload/uuid call was '%s'", response.text)
     return chunk_set_uuid
-def download_chunks(api_address: str, chunk_set_uuid: str, logger: Logger) -> List[Tuple[int, bytes]]:
+def download_chunks(api_address: str, chunk_set_uuid: str, num_chunks: int) -> bytes:
     """
     Download data chunks.
-    Download a series of data chunks through the chunked transfer mechanism.
-    This method is for use on the client, not the server.
+    Download a series of data chunks sequentially through the chunked transfer mechanism.
     Parameters
     ----------
@@ -105,53 +103,51 @@ def download_chunks(api_address: str, chunk_set_uuid: str, logger: Logger) -> Li
         The full URL of the API, including port but not including endpoint, e.g. "http://127.0.0.1:9999"
     chunk_set_uuid : str
         The uuid of the chunk set to download.
-    logger : Logger
-        A logger with which to log the download.
+    num_chunks : int
+        The number of chunks to download.
     Returns
     -------
-    chunks : List[Tuple[int, bytes]]
-        A list of tuples containing the ordinal number of the chunk and each chunk
+    bytes
+        The concatenated data from all the chunks.
     Raises
     ------
     RuntimeError
         If the download fails after a number of retries.
     """
-    # fetch the data
-    data_chunks = []
-    finished = False
-    while not finished:
+    chunks = []
+    for chunk_ordinal in range(1, num_chunks + 1):
         retries = 0
         success = False
         while retries < CHUNK_DOWNLOAD_RETRIES and not success:
             try:
                 this_chunk_response = requests.get(
-                    f"{api_address}/chunk/download/{chunk_set_uuid}",
+                    f"{api_address}/chunk/download/{chunk_set_uuid}/{chunk_ordinal}",
                     timeout=CHUNK_DOWNLOAD_TIMEOUT_S
                 )
-                if this_chunk_response.status_code == HTTPStatus.NO_CONTENT:
-                    finished = True
-                else:
-                    data_chunks.append((
-                        struct.unpack('i', this_chunk_response.content[:4])[0],
-                        this_chunk_response.content[4:]))
+                this_chunk_response.raise_for_status()
+                ordinal = struct.unpack('i', this_chunk_response.content[:4])[0]
+                chunk = this_chunk_response.content[4:]
+                assert ordinal == chunk_ordinal, (
+                    f"Chunk ordinal mismatch: expected {chunk_ordinal}, got {ordinal}")
+                chunks.append(chunk)
                 success = True
             except (requests.Timeout, requests.ConnectionError) as e:
                 retries += 1
-                logger.warning(
+                log.warning(
                     "Request failed with %s. Retrying (%s/%s)...", e, retries, CHUNK_DOWNLOAD_RETRIES)
                 time.sleep(1)  # Small backoff before retrying
         if not success:
             error_message = (
                 f"Failed to download from chunk set {chunk_set_uuid} after {CHUNK_DOWNLOAD_TIMEOUT_S} attempts.")
-            logger.error(error_message)
+            log.error(error_message)
             raise RuntimeError(error_message)
-    return data_chunks
+    # Let the server know that we are done with this data and it can be deleted.
+    requests.delete(f'{api_address}/chunk/delete/{chunk_set_uuid}')
+    return b''.join(chunks)
 def split_into_chunks(data: bytes, chunk_size: int = MAX_CHUNK_SIZE_BYTES) -> List[Tuple[int, bytes]]:
@@ -174,23 +170,6 @@ def split_into_chunks(data: bytes, chunk_size: int = MAX_CHUNK_SIZE_BYTES) -> Li
         enumerate((data[i:i + chunk_size] for i in range(0, len(data), chunk_size)), start=1))
-def merge_from_chunks(chunks: List[Tuple[int, bytes]]) -> bytes:
-    """
-    Merge data that has been split into compressed chunks back into a single message.
-    Parameters
-    ----------
-    chunks : List[Tuple[int, bytes]]
-        A list of tuples containing the ordinal number of the chunk and each chunk
-    Returns
-    -------
-    bytes
-        The merged data
-    """
-    return b''.join([dc[1] for dc in sorted(chunks, key=lambda x: x[0])])
 def deserialise_dataframe(data: bytes) -> Union[DataFrame, GeoDataFrame]:
     """
     Convert a binary serialised feather table to pandas dataframe.
@@ -223,7 +202,7 @@ def deserialise_dataframe(data: bytes) -> Union[DataFrame, GeoDataFrame]:
                 raise ValueError("Couldn't deserialise table format") from e
         else:
             raise ValueError("Couldn't deserialise table format") from e
-    return df
+    return df  # type: ignore
 def serialise_dataframe(df: Union[DataFrame, GeoDataFrame]) -> bytes:

{specklia-1.9.38 → specklia-1.9.40}/specklia/client.py RENAMED Viewed

@@ -231,9 +231,12 @@ class Specklia:
         # stream and deserialise the results
         if response_dict['num_chunks'] > 0:
             gdf = chunked_transfer.deserialise_dataframe(
-                chunked_transfer.merge_from_chunks(
-                    chunked_transfer.download_chunks(
-                        self.server_url, response_dict['chunk_set_uuid'], _log)))
+                chunked_transfer.download_chunks(
+                    self.server_url,
+                    response_dict['chunk_set_uuid'],
+                    response_dict['num_chunks'],
+                )
+            )
         else:
             gdf = gpd.GeoDataFrame()
@@ -312,12 +315,14 @@ class Specklia:
         # serialise and upload each dataframe
         upload_points = []
         for n in new_points:
+            chunks = chunked_transfer.split_into_chunks(
+                chunked_transfer.serialise_dataframe(n['gdf']))
             chunk_set_uuid = chunked_transfer.upload_chunks(
-                self.server_url, chunked_transfer.split_into_chunks(
-                    chunked_transfer.serialise_dataframe(n['gdf'])), _log)
+                self.server_url, chunks)
             upload_points.append({
                 'source': n['source'],
-                'chunk_set_uuid': chunk_set_uuid
+                'chunk_set_uuid': chunk_set_uuid,
+                'num_chunks': len(chunks),
             })
             del n

{specklia-1.9.38 → specklia-1.9.40}/specklia.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: specklia
-Version: 1.9.38
+Version: 1.9.40
 Summary: Python client for Specklia, a geospatial point cloud database by Earthwave.
 Home-page: https://specklia.earthwave.co.uk/
 Author: Earthwave Ltd

{specklia-1.9.38 → specklia-1.9.40}/tests/test_chunked_transfer.py RENAMED Viewed

@@ -13,36 +13,43 @@ def test_split_into_chunks():
         (1, b'abcde'), (2, b'fghij'), (3, b'klmno'), (4, b'p')]
-def test_merge_from_chunks():
-    assert chunked_transfer.merge_from_chunks([
-        (1, b'abcde'), (2, b'fghij'), (3, b'klmno'), (4, b'p')]) == b'abcdefghijklmnop'
 def test_upload_chunks():
     with patch('specklia.chunked_transfer.requests.post') as mock_post:
         mock_post.return_value.status_code = HTTPStatus.OK
         mock_post.return_value.json.return_value = {'chunk_set_uuid': 'cheese'}
         assert chunked_transfer.upload_chunks(
-            api_address='wibble', chunks=[(1, b'a'), (2, b'b')], logger=MagicMock(name="mock_logger")) == 'cheese'
+            api_address='wibble', chunks=[(1, b'a'), (2, b'b')]) == 'cheese'
         mock_post.assert_has_calls([
             call('wibble/chunk/upload/1-of-2', data=b'a'),
+            call().raise_for_status(),
             call().json(),
             call().json(),
-            call('wibble/chunk/upload/cheese/2-of-2', data=b'b')])
+            call('wibble/chunk/upload/cheese/2-of-2', data=b'b'),
+            call().raise_for_status()])
 def test_download_chunks():
-    with patch('specklia.chunked_transfer.requests.get') as mock_get:
+    with (
+        patch('specklia.chunked_transfer.requests.get') as mock_get,
+        patch('specklia.chunked_transfer.requests.delete') as mock_delete,
+    ):
         mock_get.side_effect = [
             MagicMock(name="mock_response_1", status_code=HTTPStatus.OK, content=struct.pack('i', 1) + b'wibble'),
             MagicMock(name="mock_response_2", status_code=HTTPStatus.OK, content=struct.pack('i', 2) + b'wobble'),
             MagicMock(name="mock_response_3", status_code=HTTPStatus.NO_CONTENT, content=b'')]
+        mock_delete.side_effect = MagicMock(name="mock_response_4", status_code=HTTPStatus.OK),
         assert chunked_transfer.download_chunks(
-            api_address='wibble', chunk_set_uuid='rawr', logger=MagicMock(name="mock_logger")) == [
-            (1, b'wibble'), (2, b'wobble')]
+            api_address='wibble',
+            chunk_set_uuid='rawr',
+            num_chunks=2,
+        ) == b'wibblewobble'
+        assert mock_get.call_count == 2
+        assert mock_delete.call_count == 1
 def test_serialise_dataframe_roundtrip():

{specklia-1.9.38 → specklia-1.9.40}/tests/test_client.py RENAMED Viewed

@@ -104,7 +104,7 @@ def test_add_points_to_dataset(
         call('https://localhost/ingest',
              json={'dataset_id': 'dummy_dataset',
                    'new_points': [
-                       {'source': {'reference': 'cheese'}, 'chunk_set_uuid': 'brian'}],
+                       {'source': {'reference': 'cheese'}, 'chunk_set_uuid': 'brian', 'num_chunks': 1}],
                    'duplicate_source_behaviour': 'error'},
              headers={'Authorization': 'Bearer fake_token'})])