PyPI - specklia - Versions diffs - 1.8.218__tar.gz → 1.9.0__tar.gz - Mend

specklia 1.8.218tar.gz → 1.9.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

{specklia-1.8.218 → specklia-1.9.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.2
 Name: specklia
-Version: 1.8.218
+Version: 1.9.0
 Summary: Python client for Specklia, a geospatial point cloud database by Earthwave.
 Home-page: https://specklia.earthwave.co.uk/
 Author: Earthwave Ltd
@@ -23,14 +23,23 @@ Requires-Python: >=3.11
 Description-Content-Type: text/markdown
 License-File: LICENCE
 Requires-Dist: blosc
-Requires-Dist: flask
 Requires-Dist: geopandas
 Requires-Dist: pandas
 Requires-Dist: pyarrow
 Requires-Dist: rasterio
 Requires-Dist: requests
 Requires-Dist: shapely
-Requires-Dist: simple-websocket
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: home-page
+Dynamic: license
+Dynamic: project-url
+Dynamic: requires-dist
+Dynamic: requires-python
+Dynamic: summary
 # Specklia

{specklia-1.8.218 → specklia-1.9.0}/setup.cfg RENAMED Viewed

@@ -18,6 +18,7 @@ application-import-names =
 	tests
 	conftest
 	specklia
+	ew_specklia_server
 per-file-ignores =
 	tests/*:D103, ANN201
 	**/__init__.py: D104

{specklia-1.8.218 → specklia-1.9.0}/setup.py RENAMED Viewed

@@ -53,13 +53,11 @@ setup(
     # requirements.txt should contain a specific known working version instead.
     install_requires=[
         'blosc',
-        'flask',
         'geopandas',
         'pandas',
         'pyarrow',
         'rasterio',
         'requests',
         'shapely',
-        'simple-websocket',
     ],
 )

specklia-1.9.0/specklia/chunked_transfer.py ADDED Viewed

@@ -0,0 +1,214 @@
+"""
+Interface to Mongo for using Mongo as a buffer for chunked data transfer.
+We use Mongo as a buffer because we cannot guarantee that all of the requests
+for individual chunks will hit the same worker. While we could use streamed responses for the download,
+they're not available for upload, so for simplicity we use the same approach in both directions.
+The intended usage pattern is that a single message is stored as a single "chunk set".
+The chunk set is first "filled" (either by the client or the server), then "emptied" to obtain the data
+(again, by either the client or the server).
+Note that while this can be used for pagination, it is not in itself pagination.
+We plan to gather most of this material into ew_common after the chunked transfer interface has been rolled out
+to its three main users (ew_geostore, ew_specklia, ew_online_processing_service) and proven effective for each.
+At that point, this entire module will move into ew_common. Note that the chunked transfer interface will always
+require MongoDB or a similar provision to work correctly.
+IMPORTANT: THE VERSION HERE IN THE SPECKLIA PACKAGE MUST NOT BE MADE DEPENDENT UPON EW_COMMON SINCE EW_COMMON
+IS PRIVATE BUT THIS PACKAGE IS PUBLIC!
+"""
+from enum import Enum
+from http import HTTPStatus
+from io import BytesIO
+from logging import Logger
+import struct
+from typing import List, Tuple, Union
+from geopandas import GeoDataFrame, read_feather as read_geofeather
+from pandas import DataFrame, read_feather
+import requests
+CHUNK_DB_NAME = "data_transfer_chunks"
+CHUNK_METADATA_COLLECTION_NAME = "chunk_metadata"
+MAX_CHUNK_AGE_SECONDS = 3600
+MAX_CHUNK_SIZE_BYTES = 5 * 1024 ** 2  # must be small enough to fit into an HTTP GET Request
+class ChunkSetStatus(Enum):
+    """
+    Chunk set status.
+    Prevents the accidental access of chunk sets that have not yet received all of their data.
+    """
+    FILLING = 0
+    EMPTYING = 1
+def upload_chunks(api_address: str, chunks: List[Tuple[int, bytes]], logger: Logger) -> str:
+    """
+    Upload data chunks.
+    Upload a series of data chunks through the chunked transfer mechanism.
+    This method is for use on the client, not the server.
+    Parameters
+    ----------
+    api_address : str
+        The full URL of the API, including port but not including endpoint, e.g. "http://127.0.0.1:9999"
+    chunks : List[Tuple[int, bytes]]
+        A list of tuples containing the ordinal number of the chunk and each chunk
+    logger : Logger
+        A logger with which to log the upload.
+    Returns
+    -------
+    str
+        The chunk set uuid of the uploaded chunks
+    """
+    # post the first chunk to start the upload
+    response = requests.post(
+        api_address + f"/chunk/upload/{chunks[0][0]}-of-{len(chunks)}",
+        data=chunks[0][1])
+    logger.info("response from very first /chunk/upload was '%s'", response.json())
+    assert response.status_code == HTTPStatus.OK, response.text
+    chunk_set_uuid = response.json()['chunk_set_uuid']
+    # post the rest of the chunks in a random order
+    for i, chunk in chunks[1:]:
+        response = requests.post(
+            api_address + f"/chunk/upload/{chunk_set_uuid}/{i}-of-{len(chunks)}", data=chunk)
+        logger.info("response from subsequent /chunk/upload/uuid call was '%s'", response.text)
+        assert response.status_code == HTTPStatus.OK, response.text
+    return chunk_set_uuid
+def download_chunks(api_address: str, chunk_set_uuid: str) -> List[Tuple[int, bytes]]:
+    """
+    Download data chunks.
+    Download a series of data chunks through the chunked transfer mechanism.
+    This method is for use on the client, not the server.
+    Parameters
+    ----------
+    api_address : str
+        The full URL of the API, including port but not including endpoint, e.g. "http://127.0.0.1:9999"
+    chunk_set_uuid : str
+        The uuid of the chunk set to download.
+    Returns
+    -------
+    chunks : List[Tuple[int, bytes]]
+        A list of tuples containing the ordinal number of the chunk and each chunk
+    """
+    # fetch the data
+    data_chunks = []
+    finished = False
+    while not finished:
+        this_chunk_response = requests.get(api_address + f"/chunk/download/{chunk_set_uuid}")
+        if this_chunk_response.status_code == HTTPStatus.NO_CONTENT:
+            finished = True
+        else:
+            data_chunks.append((
+                struct.unpack('i', this_chunk_response.content[:4])[0],
+                this_chunk_response.content[4:]))
+    return data_chunks
+def split_into_chunks(data: bytes, chunk_size: int = MAX_CHUNK_SIZE_BYTES) -> List[Tuple[int, bytes]]:
+    """
+    Split data into compressed chunks for transport.
+    Parameters
+    ----------
+    data : bytes
+        The data to be split into chunks.
+    chunk_size: int
+        The maximum number of bytes allowed in each chunk.
+    Returns
+    -------
+    List[Tuple[int, bytes]]
+        A list of tuples containing the ordinal number of the chunk and each chunk
+    """
+    return list(
+        enumerate((data[i:i + chunk_size] for i in range(0, len(data), chunk_size)), start=1))
+def merge_from_chunks(chunks: List[Tuple[int, bytes]]) -> bytes:
+    """
+    Merge data that has been split into compressed chunks back into a single message.
+    Parameters
+    ----------
+    chunks : List[Tuple[int, bytes]]
+        A list of tuples containing the ordinal number of the chunk and each chunk
+    Returns
+    -------
+    bytes
+        The merged data
+    """
+    return b''.join([dc[1] for dc in sorted(chunks, key=lambda x: x[0])])
+def deserialise_dataframe(data: bytes) -> Union[DataFrame, GeoDataFrame]:
+    """
+    Convert a binary serialised feather table to pandas dataframe.
+    Parameters
+    ----------
+    data : bytes
+        Binary serialised feather table.
+    Returns
+    -------
+    Union[DataFrame, GeoDataFrame]
+        Input table converted to a pandas dataframe.
+    Raises
+    ------
+    ValueError
+        When bytes can't be interpreted as meaningful dataframe.
+    """
+    try:
+        buffer = BytesIO(data)
+        df = read_geofeather(buffer)
+    except ValueError as e:
+        # First attempt to deserialise as a geodataframe. If geo meta is missing, we expect a clear ValueError
+        # and we then load as a plain dataframe instead.
+        if "Missing geo meta" in e.args[0] or "'geo' metadata" in e.args[0]:
+            try:
+                df = read_feather(BytesIO(data))
+            except ValueError as e:
+                raise ValueError("Couldn't deserialise table format") from e
+        else:
+            raise ValueError("Couldn't deserialise table format") from e
+    return df
+def serialise_dataframe(df: Union[DataFrame, GeoDataFrame]) -> bytes:
+    """
+    Serialise a dataframe using the feather table format.
+    Parameters
+    ----------
+    df : DataFrame
+        Input dataframe
+    Returns
+    -------
+    bytes
+        Serialised feather table.
+    """
+    feather_buffer = BytesIO()
+    # Browser implementations of feather do not support compressed feather formats.
+    df.to_feather(feather_buffer, compression='uncompressed')
+    feather_buffer.seek(0)
+    return feather_buffer.getvalue()

{specklia-1.8.218 → specklia-1.9.0}/specklia/client.py RENAMED Viewed

@@ -2,7 +2,6 @@
 from __future__ import annotations
 from datetime import datetime
-from http import HTTPStatus
 import json
 import logging
 from typing import Dict, List, Optional, Tuple, Union
@@ -14,9 +13,8 @@ import pandas as pd
 import requests
 from shapely import MultiPolygon, Polygon, to_geojson
 from shapely.geometry import shape
-import simple_websocket
-from specklia import _websocket_helpers
+from specklia import chunked_transfer, utilities
 _log = logging.getLogger(__name__)
@@ -168,11 +166,6 @@ class Specklia:
         source_information_only: bool
             If True, no geodataframe is returned, only the set of unique sources. By default, False
-        Raises
-        ------
-        RuntimeError
-            If the query failed for some reason.
         Returns
         -------
         Tuple[gpd.GeoDataFrame, List[Dict]]
@@ -214,26 +207,39 @@ class Specklia:
         """
         # note the use of json.loads() here, so effectively converting the geojson
         # back into a dictionary of JSON-compatible types to avoid "double-JSONing" it.
-        ws = simple_websocket.Client(
-            self.server_url.replace("http://", "ws://") + "/query")
-        # Authorise the connection and then send the requestion dictionary.
-        ws.send(bytes(self.auth_token, encoding="utf-8"))
-        _websocket_helpers.send_object_to_websocket(ws, {
+        request = {
             'dataset_id': dataset_id,
             'min_timestamp': int(min_datetime.timestamp()),
             'max_timestamp': int(max_datetime.timestamp()),
             'epsg4326_search_area': json.loads(to_geojson(epsg4326_polygon)),
             'columns_to_return': [] if columns_to_return is None else columns_to_return,
             'additional_filters': [] if additional_filters is None else additional_filters,
-            'source_information_only': source_information_only})
+            'source_information_only': source_information_only}
+        # submit the query
+        response = requests.post(
+            self.server_url + '/query',
+            data=json.dumps(request),
+            headers={"Authorization": "Bearer " + self.auth_token})
+        _check_response_ok(response)
+        _log.info('queried dataset with ID %s.', dataset_id)
-        response = _websocket_helpers.receive_object_from_websocket(ws, self._data_streaming_timeout_s)
-        if response['status'] == HTTPStatus.OK:
-            _log.info('queried dataset with ID %s.', dataset_id)
-            return response['gdf'], response['sources']
+        response_dict = response.json()
+        # stream and deserialise the results
+        if response_dict['num_chunks'] > 0:
+            gdf = chunked_transfer.deserialise_dataframe(
+                chunked_transfer.merge_from_chunks(
+                    chunked_transfer.download_chunks(
+                        self.server_url, response_dict['chunk_set_uuid'])))
         else:
-            _log.error('Failed to interact with Specklia server, error was %s', str(response))
-            raise RuntimeError(str(response))
+            gdf = gpd.GeoDataFrame()
+        # perform some light deserialisation of sources for backwards compatibility.
+        sources = utilities.deserialise_sources(response_dict['sources'])
+        return gdf, sources
     def update_points_in_dataset(
             self: Specklia, _dataset_id: str, _new_points: pd.DataFrame, _source_description: Dict) -> None:
@@ -294,28 +300,20 @@ class Specklia:
             The timestamp column must contain POSIX timestamps.
             The 'geometry' column must contain Points following the (lon, lat) convention.
             The GeoDataFrame must have its CRS specified as EPSG 4326.
-        Raises
-        ------
-        RuntimeError
-            If the ingest failed for some reason.
         """
-        ws = simple_websocket.Client(
-            self.server_url.replace("http://", "ws://") + "/ingest",
-            headers={"Authorization": "Bearer " + self.auth_token})
+        # serialise and upload each dataframe
+        for n in new_points:
+            n['chunk_set_uuid'] = chunked_transfer.upload_chunks(
+                self.server_url, chunked_transfer.split_into_chunks(
+                    chunked_transfer.serialise_dataframe(n['gdf'])), _log)
+            del n['gdf']
-        # Authorise the connection and then send the requestion dictionary.
-        ws.send(bytes(self.auth_token, encoding="utf-8"))
-        _websocket_helpers.send_object_to_websocket(ws, {
-            'dataset_id': dataset_id,
-            'new_points': new_points})
+        response = requests.post(self.server_url + "/ingest",
+                                 json={'dataset_id': dataset_id, 'new_points': new_points},
+                                 headers={"Authorization": "Bearer " + self.auth_token})
+        _check_response_ok(response)
-        response = _websocket_helpers.receive_object_from_websocket(ws, self._data_streaming_timeout_s)
-        if response['status'] == HTTPStatus.OK:
-            _log.info('Added new data to specklia dataset ID %s.', dataset_id)
-        else:
-            _log.error('Failed to interact with Specklia server, error was %s', str(response))
-            raise RuntimeError(str(response))
+        _log.info('Added new data to specklia dataset ID %s.', dataset_id)
     def delete_points_in_dataset(
             self: Specklia, _dataset_id: str, _source_ids_and_source_row_ids_to_delete: List[Tuple[str, str]]) -> None:
@@ -453,7 +451,7 @@ class Specklia:
         """
         response = requests.delete(
             self.server_url + "/groups", headers={"Authorization": "Bearer " + self.auth_token},
-            json={'group_id': group_id})
+            params={'group_id': group_id})
         _check_response_ok(response)
         _log.info('deleted group ID %s', group_id)
         return response.text.strip('\n"')
@@ -631,7 +629,7 @@ class Specklia:
         """
         response = requests.delete(
             self.server_url + "/groupmembership", headers={"Authorization": "Bearer " + self.auth_token},
-            json={'group_id': group_id, "user_to_delete_id": user_to_delete_id})
+            params={'group_id': group_id, "user_to_delete_id": user_to_delete_id})
         _check_response_ok(response)
         _log.info('Deleted user ID %s from group ID %s.', user_to_delete_id, group_id)
         return response.text.strip('\n"')
@@ -850,7 +848,7 @@ class Specklia:
         """
         response = requests.delete(
             self.server_url + "/metadata",
-            json={'dataset_id': dataset_id},
+            params={'dataset_id': dataset_id},
             headers={"Authorization": "Bearer " + self.auth_token}
         )
         _check_response_ok(response)

{specklia-1.8.218 → specklia-1.9.0}/specklia/utilities.py RENAMED Viewed

@@ -1,11 +1,12 @@
 """This file contains client-side utilities provided to make it easier to use Specklia."""
+from datetime import datetime
 import os
-from typing import Dict, Optional
+from typing import Dict, List, Optional
 import geopandas as gpd
 import numpy as np
 import rasterio
+from shapely.geometry import shape
 def save_gdf_as_tiff(
@@ -87,3 +88,28 @@ def save_gdf_as_tiff(
             compress='lzw',
             nodata=np.nan) as rst:
         rst.write_band(1, np.flipud(gridded_data))
+def deserialise_sources(sources: List[Dict]) -> List[Dict]:
+    """
+    Reverse some serialisation of sources returned from /query.
+    Reverses some serialisation of the sources dictionary returned from the /query endpoint for end-user convenience.
+    Convert the WKB coverage polygon into a Shapely geometry object, and the min and max times into datetimes.
+    Parameters
+    ----------
+    sources: List[Dict]
+        A list of sources returned from Specklia
+    Returns
+    -------
+    List[Dict]
+        Sources after the coverage polygon, min_time and max_time have been deserialised.
+    """
+    for source in sources:
+        source['geospatial_coverage'] = shape(source['geospatial_coverage'])
+        source['min_time'] = datetime.fromisoformat(source['min_time'])
+        source['max_time'] = datetime.fromisoformat(source['max_time'])
+    return sources

{specklia-1.8.218 → specklia-1.9.0}/specklia.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.2
 Name: specklia
-Version: 1.8.218
+Version: 1.9.0
 Summary: Python client for Specklia, a geospatial point cloud database by Earthwave.
 Home-page: https://specklia.earthwave.co.uk/
 Author: Earthwave Ltd
@@ -23,14 +23,23 @@ Requires-Python: >=3.11
 Description-Content-Type: text/markdown
 License-File: LICENCE
 Requires-Dist: blosc
-Requires-Dist: flask
 Requires-Dist: geopandas
 Requires-Dist: pandas
 Requires-Dist: pyarrow
 Requires-Dist: rasterio
 Requires-Dist: requests
 Requires-Dist: shapely
-Requires-Dist: simple-websocket
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: home-page
+Dynamic: license
+Dynamic: project-url
+Dynamic: requires-dist
+Dynamic: requires-python
+Dynamic: summary
 # Specklia

{specklia-1.8.218 → specklia-1.9.0}/specklia.egg-info/SOURCES.txt RENAMED Viewed

@@ -3,7 +3,7 @@ README.md
 setup.cfg
 setup.py
 specklia/__init__.py
-specklia/_websocket_helpers.py
+specklia/chunked_transfer.py
 specklia/client.py
 specklia/utilities.py
 specklia.egg-info/PKG-INFO
@@ -11,6 +11,6 @@ specklia.egg-info/SOURCES.txt
 specklia.egg-info/dependency_links.txt
 specklia.egg-info/requires.txt
 specklia.egg-info/top_level.txt
+tests/test_chunked_transfer.py
 tests/test_client.py
-tests/test_utilities.py
-tests/test_websocket_helpers.py
+tests/test_utilities.py

{specklia-1.8.218 → specklia-1.9.0}/specklia.egg-info/requires.txt RENAMED Viewed

@@ -1,9 +1,7 @@
 blosc
-flask
 geopandas
 pandas
 pyarrow
 rasterio
 requests
 shapely
-simple-websocket

specklia-1.9.0/tests/test_chunked_transfer.py ADDED Viewed

@@ -0,0 +1,51 @@
+"""Unit tests for chunked_transfer.py."""
+from http import HTTPStatus
+import struct
+from unittest.mock import call, MagicMock, patch
+import pandas as pd
+from specklia import chunked_transfer
+def test_split_into_chunks():
+    assert chunked_transfer.split_into_chunks(b'abcdefghijklmnop', chunk_size=5) == [
+        (1, b'abcde'), (2, b'fghij'), (3, b'klmno'), (4, b'p')]
+def test_merge_from_chunks():
+    assert chunked_transfer.merge_from_chunks([
+        (1, b'abcde'), (2, b'fghij'), (3, b'klmno'), (4, b'p')]) == b'abcdefghijklmnop'
+def test_upload_chunks():
+    with patch('specklia.chunked_transfer.requests.post') as mock_post:
+        mock_post.return_value.status_code = HTTPStatus.OK
+        mock_post.return_value.json.return_value = {'chunk_set_uuid': 'cheese'}
+        assert chunked_transfer.upload_chunks(
+            api_address='wibble', chunks=[(1, b'a'), (2, b'b')], logger=MagicMock(name="mock_logger")) == 'cheese'
+        mock_post.assert_has_calls([
+            call('wibble/chunk/upload/1-of-2', data=b'a'),
+            call().json(),
+            call().json(),
+            call('wibble/chunk/upload/cheese/2-of-2', data=b'b')])
+def test_download_chunks():
+    with patch('specklia.chunked_transfer.requests.get') as mock_get:
+        mock_get.side_effect = [
+            MagicMock(name="mock_response_1", status_code=HTTPStatus.OK, content=struct.pack('i', 1) + b'wibble'),
+            MagicMock(name="mock_response_2", status_code=HTTPStatus.OK, content=struct.pack('i', 2) + b'wobble'),
+            MagicMock(name="mock_response_3", status_code=HTTPStatus.NO_CONTENT, content=b'')]
+        assert chunked_transfer.download_chunks(api_address='wibble', chunk_set_uuid='rawr') == [
+            (1, b'wibble'), (2, b'wobble')]
+def test_serialise_dataframe_roundtrip():
+    df = pd.DataFrame({'a': [1, 1, 2, 3], 'b': ['alfred', 'dave', 'ken', 'sally'], 'c': [1, 2, 4, 4.4]})
+    pd.testing.assert_frame_equal(
+        df, chunked_transfer.deserialise_dataframe(chunked_transfer.serialise_dataframe(df)))

specklia 1.8.218__tar.gz → 1.9.0__tar.gz

specklia 1.8.218tar.gz → 1.9.0tar.gz