specklia 1.8.218__py3-none-any.whl → 1.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,214 @@
1
+ """
2
+ Interface to Mongo for using Mongo as a buffer for chunked data transfer.
3
+
4
+ We use Mongo as a buffer because we cannot guarantee that all of the requests
5
+ for individual chunks will hit the same worker. While we could use streamed responses for the download,
6
+ they're not available for upload, so for simplicity we use the same approach in both directions.
7
+
8
+ The intended usage pattern is that a single message is stored as a single "chunk set".
9
+ The chunk set is first "filled" (either by the client or the server), then "emptied" to obtain the data
10
+ (again, by either the client or the server).
11
+
12
+ Note that while this can be used for pagination, it is not in itself pagination.
13
+
14
+ We plan to gather most of this material into ew_common after the chunked transfer interface has been rolled out
15
+ to its three main users (ew_geostore, ew_specklia, ew_online_processing_service) and proven effective for each.
16
+ At that point, this entire module will move into ew_common. Note that the chunked transfer interface will always
17
+ require MongoDB or a similar provision to work correctly.
18
+
19
+ IMPORTANT: THE VERSION HERE IN THE SPECKLIA PACKAGE MUST NOT BE MADE DEPENDENT UPON EW_COMMON SINCE EW_COMMON
20
+ IS PRIVATE BUT THIS PACKAGE IS PUBLIC!
21
+ """
22
+
23
+ from enum import Enum
24
+ from http import HTTPStatus
25
+ from io import BytesIO
26
+ from logging import Logger
27
+ import struct
28
+ from typing import List, Tuple, Union
29
+
30
+ from geopandas import GeoDataFrame, read_feather as read_geofeather
31
+ from pandas import DataFrame, read_feather
32
+ import requests
33
+
34
+ CHUNK_DB_NAME = "data_transfer_chunks"
35
+ CHUNK_METADATA_COLLECTION_NAME = "chunk_metadata"
36
+ MAX_CHUNK_AGE_SECONDS = 3600
37
+ MAX_CHUNK_SIZE_BYTES = 5 * 1024 ** 2 # must be small enough to fit into an HTTP GET Request
38
+
39
+
40
+ class ChunkSetStatus(Enum):
41
+ """
42
+ Chunk set status.
43
+
44
+ Prevents the accidental access of chunk sets that have not yet received all of their data.
45
+ """
46
+
47
+ FILLING = 0
48
+ EMPTYING = 1
49
+
50
+
51
+ def upload_chunks(api_address: str, chunks: List[Tuple[int, bytes]], logger: Logger) -> str:
52
+ """
53
+ Upload data chunks.
54
+
55
+ Upload a series of data chunks through the chunked transfer mechanism.
56
+ This method is for use on the client, not the server.
57
+
58
+ Parameters
59
+ ----------
60
+ api_address : str
61
+ The full URL of the API, including port but not including endpoint, e.g. "http://127.0.0.1:9999"
62
+ chunks : List[Tuple[int, bytes]]
63
+ A list of tuples containing the ordinal number of the chunk and each chunk
64
+ logger : Logger
65
+ A logger with which to log the upload.
66
+
67
+ Returns
68
+ -------
69
+ str
70
+ The chunk set uuid of the uploaded chunks
71
+ """
72
+ # post the first chunk to start the upload
73
+ response = requests.post(
74
+ api_address + f"/chunk/upload/{chunks[0][0]}-of-{len(chunks)}",
75
+ data=chunks[0][1])
76
+ logger.info("response from very first /chunk/upload was '%s'", response.json())
77
+ assert response.status_code == HTTPStatus.OK, response.text
78
+ chunk_set_uuid = response.json()['chunk_set_uuid']
79
+
80
+ # post the rest of the chunks in a random order
81
+ for i, chunk in chunks[1:]:
82
+ response = requests.post(
83
+ api_address + f"/chunk/upload/{chunk_set_uuid}/{i}-of-{len(chunks)}", data=chunk)
84
+ logger.info("response from subsequent /chunk/upload/uuid call was '%s'", response.text)
85
+ assert response.status_code == HTTPStatus.OK, response.text
86
+
87
+ return chunk_set_uuid
88
+
89
+
90
+ def download_chunks(api_address: str, chunk_set_uuid: str) -> List[Tuple[int, bytes]]:
91
+ """
92
+ Download data chunks.
93
+
94
+ Download a series of data chunks through the chunked transfer mechanism.
95
+ This method is for use on the client, not the server.
96
+
97
+ Parameters
98
+ ----------
99
+ api_address : str
100
+ The full URL of the API, including port but not including endpoint, e.g. "http://127.0.0.1:9999"
101
+ chunk_set_uuid : str
102
+ The uuid of the chunk set to download.
103
+
104
+ Returns
105
+ -------
106
+ chunks : List[Tuple[int, bytes]]
107
+ A list of tuples containing the ordinal number of the chunk and each chunk
108
+ """
109
+ # fetch the data
110
+ data_chunks = []
111
+ finished = False
112
+ while not finished:
113
+ this_chunk_response = requests.get(api_address + f"/chunk/download/{chunk_set_uuid}")
114
+ if this_chunk_response.status_code == HTTPStatus.NO_CONTENT:
115
+ finished = True
116
+ else:
117
+ data_chunks.append((
118
+ struct.unpack('i', this_chunk_response.content[:4])[0],
119
+ this_chunk_response.content[4:]))
120
+
121
+ return data_chunks
122
+
123
+
124
+ def split_into_chunks(data: bytes, chunk_size: int = MAX_CHUNK_SIZE_BYTES) -> List[Tuple[int, bytes]]:
125
+ """
126
+ Split data into compressed chunks for transport.
127
+
128
+ Parameters
129
+ ----------
130
+ data : bytes
131
+ The data to be split into chunks.
132
+ chunk_size: int
133
+ The maximum number of bytes allowed in each chunk.
134
+
135
+ Returns
136
+ -------
137
+ List[Tuple[int, bytes]]
138
+ A list of tuples containing the ordinal number of the chunk and each chunk
139
+ """
140
+ return list(
141
+ enumerate((data[i:i + chunk_size] for i in range(0, len(data), chunk_size)), start=1))
142
+
143
+
144
+ def merge_from_chunks(chunks: List[Tuple[int, bytes]]) -> bytes:
145
+ """
146
+ Merge data that has been split into compressed chunks back into a single message.
147
+
148
+ Parameters
149
+ ----------
150
+ chunks : List[Tuple[int, bytes]]
151
+ A list of tuples containing the ordinal number of the chunk and each chunk
152
+
153
+ Returns
154
+ -------
155
+ bytes
156
+ The merged data
157
+ """
158
+ return b''.join([dc[1] for dc in sorted(chunks, key=lambda x: x[0])])
159
+
160
+
161
+ def deserialise_dataframe(data: bytes) -> Union[DataFrame, GeoDataFrame]:
162
+ """
163
+ Convert a binary serialised feather table to pandas dataframe.
164
+
165
+ Parameters
166
+ ----------
167
+ data : bytes
168
+ Binary serialised feather table.
169
+
170
+ Returns
171
+ -------
172
+ Union[DataFrame, GeoDataFrame]
173
+ Input table converted to a pandas dataframe.
174
+
175
+ Raises
176
+ ------
177
+ ValueError
178
+ When bytes can't be interpreted as meaningful dataframe.
179
+ """
180
+ try:
181
+ buffer = BytesIO(data)
182
+ df = read_geofeather(buffer)
183
+ except ValueError as e:
184
+ # First attempt to deserialise as a geodataframe. If geo meta is missing, we expect a clear ValueError
185
+ # and we then load as a plain dataframe instead.
186
+ if "Missing geo meta" in e.args[0] or "'geo' metadata" in e.args[0]:
187
+ try:
188
+ df = read_feather(BytesIO(data))
189
+ except ValueError as e:
190
+ raise ValueError("Couldn't deserialise table format") from e
191
+ else:
192
+ raise ValueError("Couldn't deserialise table format") from e
193
+ return df
194
+
195
+
196
+ def serialise_dataframe(df: Union[DataFrame, GeoDataFrame]) -> bytes:
197
+ """
198
+ Serialise a dataframe using the feather table format.
199
+
200
+ Parameters
201
+ ----------
202
+ df : DataFrame
203
+ Input dataframe
204
+
205
+ Returns
206
+ -------
207
+ bytes
208
+ Serialised feather table.
209
+ """
210
+ feather_buffer = BytesIO()
211
+ # Browser implementations of feather do not support compressed feather formats.
212
+ df.to_feather(feather_buffer, compression='uncompressed')
213
+ feather_buffer.seek(0)
214
+ return feather_buffer.getvalue()
specklia/client.py CHANGED
@@ -2,7 +2,6 @@
2
2
  from __future__ import annotations
3
3
 
4
4
  from datetime import datetime
5
- from http import HTTPStatus
6
5
  import json
7
6
  import logging
8
7
  from typing import Dict, List, Optional, Tuple, Union
@@ -14,9 +13,8 @@ import pandas as pd
14
13
  import requests
15
14
  from shapely import MultiPolygon, Polygon, to_geojson
16
15
  from shapely.geometry import shape
17
- import simple_websocket
18
16
 
19
- from specklia import _websocket_helpers
17
+ from specklia import chunked_transfer, utilities
20
18
 
21
19
  _log = logging.getLogger(__name__)
22
20
 
@@ -168,11 +166,6 @@ class Specklia:
168
166
  source_information_only: bool
169
167
  If True, no geodataframe is returned, only the set of unique sources. By default, False
170
168
 
171
- Raises
172
- ------
173
- RuntimeError
174
- If the query failed for some reason.
175
-
176
169
  Returns
177
170
  -------
178
171
  Tuple[gpd.GeoDataFrame, List[Dict]]
@@ -214,26 +207,39 @@ class Specklia:
214
207
  """
215
208
  # note the use of json.loads() here, so effectively converting the geojson
216
209
  # back into a dictionary of JSON-compatible types to avoid "double-JSONing" it.
217
- ws = simple_websocket.Client(
218
- self.server_url.replace("http://", "ws://") + "/query")
219
- # Authorise the connection and then send the requestion dictionary.
220
- ws.send(bytes(self.auth_token, encoding="utf-8"))
221
- _websocket_helpers.send_object_to_websocket(ws, {
210
+ request = {
222
211
  'dataset_id': dataset_id,
223
212
  'min_timestamp': int(min_datetime.timestamp()),
224
213
  'max_timestamp': int(max_datetime.timestamp()),
225
214
  'epsg4326_search_area': json.loads(to_geojson(epsg4326_polygon)),
226
215
  'columns_to_return': [] if columns_to_return is None else columns_to_return,
227
216
  'additional_filters': [] if additional_filters is None else additional_filters,
228
- 'source_information_only': source_information_only})
217
+ 'source_information_only': source_information_only}
218
+
219
+ # submit the query
220
+ response = requests.post(
221
+ self.server_url + '/query',
222
+ data=json.dumps(request),
223
+ headers={"Authorization": "Bearer " + self.auth_token})
224
+ _check_response_ok(response)
225
+
226
+ _log.info('queried dataset with ID %s.', dataset_id)
229
227
 
230
- response = _websocket_helpers.receive_object_from_websocket(ws, self._data_streaming_timeout_s)
231
- if response['status'] == HTTPStatus.OK:
232
- _log.info('queried dataset with ID %s.', dataset_id)
233
- return response['gdf'], response['sources']
228
+ response_dict = response.json()
229
+
230
+ # stream and deserialise the results
231
+ if response_dict['num_chunks'] > 0:
232
+ gdf = chunked_transfer.deserialise_dataframe(
233
+ chunked_transfer.merge_from_chunks(
234
+ chunked_transfer.download_chunks(
235
+ self.server_url, response_dict['chunk_set_uuid'])))
234
236
  else:
235
- _log.error('Failed to interact with Specklia server, error was %s', str(response))
236
- raise RuntimeError(str(response))
237
+ gdf = gpd.GeoDataFrame()
238
+
239
+ # perform some light deserialisation of sources for backwards compatibility.
240
+ sources = utilities.deserialise_sources(response_dict['sources'])
241
+
242
+ return gdf, sources
237
243
 
238
244
  def update_points_in_dataset(
239
245
  self: Specklia, _dataset_id: str, _new_points: pd.DataFrame, _source_description: Dict) -> None:
@@ -294,28 +300,20 @@ class Specklia:
294
300
  The timestamp column must contain POSIX timestamps.
295
301
  The 'geometry' column must contain Points following the (lon, lat) convention.
296
302
  The GeoDataFrame must have its CRS specified as EPSG 4326.
297
-
298
- Raises
299
- ------
300
- RuntimeError
301
- If the ingest failed for some reason.
302
303
  """
303
- ws = simple_websocket.Client(
304
- self.server_url.replace("http://", "ws://") + "/ingest",
305
- headers={"Authorization": "Bearer " + self.auth_token})
304
+ # serialise and upload each dataframe
305
+ for n in new_points:
306
+ n['chunk_set_uuid'] = chunked_transfer.upload_chunks(
307
+ self.server_url, chunked_transfer.split_into_chunks(
308
+ chunked_transfer.serialise_dataframe(n['gdf'])), _log)
309
+ del n['gdf']
306
310
 
307
- # Authorise the connection and then send the requestion dictionary.
308
- ws.send(bytes(self.auth_token, encoding="utf-8"))
309
- _websocket_helpers.send_object_to_websocket(ws, {
310
- 'dataset_id': dataset_id,
311
- 'new_points': new_points})
311
+ response = requests.post(self.server_url + "/ingest",
312
+ json={'dataset_id': dataset_id, 'new_points': new_points},
313
+ headers={"Authorization": "Bearer " + self.auth_token})
314
+ _check_response_ok(response)
312
315
 
313
- response = _websocket_helpers.receive_object_from_websocket(ws, self._data_streaming_timeout_s)
314
- if response['status'] == HTTPStatus.OK:
315
- _log.info('Added new data to specklia dataset ID %s.', dataset_id)
316
- else:
317
- _log.error('Failed to interact with Specklia server, error was %s', str(response))
318
- raise RuntimeError(str(response))
316
+ _log.info('Added new data to specklia dataset ID %s.', dataset_id)
319
317
 
320
318
  def delete_points_in_dataset(
321
319
  self: Specklia, _dataset_id: str, _source_ids_and_source_row_ids_to_delete: List[Tuple[str, str]]) -> None:
@@ -453,7 +451,7 @@ class Specklia:
453
451
  """
454
452
  response = requests.delete(
455
453
  self.server_url + "/groups", headers={"Authorization": "Bearer " + self.auth_token},
456
- json={'group_id': group_id})
454
+ params={'group_id': group_id})
457
455
  _check_response_ok(response)
458
456
  _log.info('deleted group ID %s', group_id)
459
457
  return response.text.strip('\n"')
@@ -631,7 +629,7 @@ class Specklia:
631
629
  """
632
630
  response = requests.delete(
633
631
  self.server_url + "/groupmembership", headers={"Authorization": "Bearer " + self.auth_token},
634
- json={'group_id': group_id, "user_to_delete_id": user_to_delete_id})
632
+ params={'group_id': group_id, "user_to_delete_id": user_to_delete_id})
635
633
  _check_response_ok(response)
636
634
  _log.info('Deleted user ID %s from group ID %s.', user_to_delete_id, group_id)
637
635
  return response.text.strip('\n"')
@@ -850,7 +848,7 @@ class Specklia:
850
848
  """
851
849
  response = requests.delete(
852
850
  self.server_url + "/metadata",
853
- json={'dataset_id': dataset_id},
851
+ params={'dataset_id': dataset_id},
854
852
  headers={"Authorization": "Bearer " + self.auth_token}
855
853
  )
856
854
  _check_response_ok(response)
specklia/utilities.py CHANGED
@@ -1,11 +1,12 @@
1
1
  """This file contains client-side utilities provided to make it easier to use Specklia."""
2
-
2
+ from datetime import datetime
3
3
  import os
4
- from typing import Dict, Optional
4
+ from typing import Dict, List, Optional
5
5
 
6
6
  import geopandas as gpd
7
7
  import numpy as np
8
8
  import rasterio
9
+ from shapely.geometry import shape
9
10
 
10
11
 
11
12
  def save_gdf_as_tiff(
@@ -87,3 +88,28 @@ def save_gdf_as_tiff(
87
88
  compress='lzw',
88
89
  nodata=np.nan) as rst:
89
90
  rst.write_band(1, np.flipud(gridded_data))
91
+
92
+
93
+ def deserialise_sources(sources: List[Dict]) -> List[Dict]:
94
+ """
95
+ Reverse some serialisation of sources returned from /query.
96
+
97
+ Reverses some serialisation of the sources dictionary returned from the /query endpoint for end-user convenience.
98
+ Convert the WKB coverage polygon into a Shapely geometry object, and the min and max times into datetimes.
99
+
100
+ Parameters
101
+ ----------
102
+ sources: List[Dict]
103
+ A list of sources returned from Specklia
104
+
105
+ Returns
106
+ -------
107
+ List[Dict]
108
+ Sources after the coverage polygon, min_time and max_time have been deserialised.
109
+ """
110
+ for source in sources:
111
+ source['geospatial_coverage'] = shape(source['geospatial_coverage'])
112
+ source['min_time'] = datetime.fromisoformat(source['min_time'])
113
+ source['max_time'] = datetime.fromisoformat(source['max_time'])
114
+
115
+ return sources
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: specklia
3
- Version: 1.8.218
3
+ Version: 1.9.0
4
4
  Summary: Python client for Specklia, a geospatial point cloud database by Earthwave.
5
5
  Home-page: https://specklia.earthwave.co.uk/
6
6
  Author: Earthwave Ltd
@@ -23,14 +23,23 @@ Requires-Python: >=3.11
23
23
  Description-Content-Type: text/markdown
24
24
  License-File: LICENCE
25
25
  Requires-Dist: blosc
26
- Requires-Dist: flask
27
26
  Requires-Dist: geopandas
28
27
  Requires-Dist: pandas
29
28
  Requires-Dist: pyarrow
30
29
  Requires-Dist: rasterio
31
30
  Requires-Dist: requests
32
31
  Requires-Dist: shapely
33
- Requires-Dist: simple-websocket
32
+ Dynamic: author
33
+ Dynamic: author-email
34
+ Dynamic: classifier
35
+ Dynamic: description
36
+ Dynamic: description-content-type
37
+ Dynamic: home-page
38
+ Dynamic: license
39
+ Dynamic: project-url
40
+ Dynamic: requires-dist
41
+ Dynamic: requires-python
42
+ Dynamic: summary
34
43
 
35
44
  # Specklia
36
45
 
@@ -0,0 +1,9 @@
1
+ specklia/__init__.py,sha256=ePVHqq642NocoE8tS0cNTd0B5wJdUB7r3y815oQXD6A,51
2
+ specklia/chunked_transfer.py,sha256=hO7luSNjznsH-8s585PFNks1agn3cj6v_Sxg_nLVdM8,7179
3
+ specklia/client.py,sha256=Ga-gJhKb7_LywBzzqR0YF-9NFUfvdlqpOYB4c7mvvc8,41153
4
+ specklia/utilities.py,sha256=0_pgTbcq2RgQhys0-CZ6h5YZJg9ZMPhD_ibGPggFUpE,5018
5
+ specklia-1.9.0.dist-info/LICENCE,sha256=kjWTA-TtT_rJtsWuAgWvesvu01BytVXgt_uCbeQgjOg,1061
6
+ specklia-1.9.0.dist-info/METADATA,sha256=5spwzYbdibg6x3Sj4nEUGwBKSlO2PZLHQotMas4rRf4,3081
7
+ specklia-1.9.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
8
+ specklia-1.9.0.dist-info/top_level.txt,sha256=XgU53UpAJbqEni5EjJaPdQPYuNx16Geg2I5A9lo1BQw,9
9
+ specklia-1.9.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.1.0)
2
+ Generator: setuptools (75.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,401 +0,0 @@
1
- """Utilities for exchanging arbitrary python objects over a websocket."""
2
- from __future__ import annotations
3
-
4
- from contextlib import suppress
5
- from dataclasses import dataclass
6
- from datetime import date
7
- from datetime import datetime
8
- from enum import Enum
9
- from io import BytesIO
10
- import json
11
- import re
12
- import socket
13
- from typing import Iterable, List, Tuple, Union
14
-
15
- import blosc
16
- from geopandas import GeoDataFrame
17
- from geopandas import read_feather as gpd_read_feather
18
- from pandas import DataFrame
19
- from pandas import read_feather
20
- from shapely import from_wkb
21
- from shapely import Geometry
22
- import simple_websocket
23
-
24
- # The blosc library can only compress up to 2 GiB at a time, so we transmit data in chunks of this size.
25
- MAX_BLOSC_COMPRESSION_SIZE = 2147483631
26
- MESSAGE_END_FLAG = b'message_ends'
27
- SERIALISED_SUBSTITUTION_PREFIX = '__SERIALISED__'
28
- # Type alias representing either a websocket server or client.
29
- WebSocketAgent = Union[simple_websocket.Server, simple_websocket.Client]
30
-
31
-
32
- class SerialisationType(str, Enum):
33
- """Supported serialisation types and their text representation for use in meta data messages."""
34
-
35
- geometry = "wkb"
36
- table = "feather"
37
-
38
-
39
- @dataclass
40
- class SerialisationData:
41
- """Wraps an object to serialise with its serialisation type."""
42
-
43
- serialisation_type: SerialisationType
44
- data: object
45
-
46
-
47
- def validate_websocket_endpoint_url(url: str) -> None:
48
- """
49
- Validate a websocket endpoint URL.
50
-
51
- Correct URLs are of the form "ws://localhost:1234/endpoint" or similar.
52
-
53
- Parameters
54
- ----------
55
- url : str
56
- The URL to be validated.
57
-
58
- Raises
59
- ------
60
- ValueError
61
- If the Websocket URL is invalid
62
- """
63
- # validate the server_url
64
- assert url.startswith("ws://")
65
- split_url = re.split(r':|/', url)
66
- assert len(split_url) >= 6
67
- address = split_url[3]
68
- port = int(split_url[4])
69
-
70
- try:
71
- socket.gethostbyname(address)
72
- except socket.gaierror:
73
- raise ValueError(f"{address} does not appear to be a valid address.")
74
-
75
- if port != 9065 and (port <= 1023 or port > 65353 or port % 1 != 0):
76
- raise ValueError(f"{port} does not appear to be a valid port.")
77
-
78
-
79
- def _receive_bytes_from_websocket(ws: WebSocketAgent, timeout: float = 30) -> bytes:
80
- """
81
- Receive arbitrary bytes object over a websocket.
82
-
83
- Bytes are assumed to be sent in chunks terminated by MESSAGE_END_FLAG.
84
-
85
- Works for both the Client and Server objects provided by simple_websocket.
86
-
87
- Parameters
88
- ----------
89
- ws : WebSocketAgent
90
- The Websocket Client or Server.
91
- timeout : float
92
- If provided, will raise RuntimeError if no message is received within this number of seconds.
93
- By default, 30 seconds.
94
-
95
- Returns
96
- -------
97
- bytes
98
- The decompressed bytes received via the websocket.
99
-
100
- Raises
101
- ------
102
- RuntimeError
103
- If the connection times out or is closed without a message being received.
104
- """
105
- message = b''
106
- message_chunk = b''
107
- while message_chunk != MESSAGE_END_FLAG and message_chunk is not None:
108
- try:
109
- if message_chunk is not None:
110
- message += message_chunk
111
- compressed_message_chunk = ws.receive(timeout)
112
- if compressed_message_chunk is None:
113
- message_chunk = None
114
- else:
115
- message_chunk = blosc.decompress(compressed_message_chunk)
116
- except simple_websocket.ws.ConnectionClosed:
117
- # ensure the input buffer is drained
118
- if len(ws.input_buffer) > 0:
119
- message_chunk = blosc.decompress(ws.input_buffer.pop(0))
120
- else:
121
- message_chunk = None
122
-
123
- if len(message) == 0:
124
- raise RuntimeError("Attempted to receive from a websocket, but nothing was sent.")
125
-
126
- return message
127
-
128
-
129
- def _send_bytes_to_websocket(ws: WebSocketAgent, message: bytes) -> None:
130
- """
131
- Send arbitrary bytes object over a websocket.
132
-
133
- Bytes are compressed and then transmitted in MESSAGE_CHUNK_SIZE chunks.
134
-
135
- Works for both the Client and Server objects provided by simple_websocket.
136
-
137
- Parameters
138
- ----------
139
- ws : WebSocketAgent
140
- The Websocket Client or Server.
141
- message : bytes
142
- The data to be sent over the websocket.
143
- """
144
- for i in range(0, len(message), MAX_BLOSC_COMPRESSION_SIZE):
145
- ws.send(blosc.compress(message[i:i + MAX_BLOSC_COMPRESSION_SIZE]))
146
- ws.send(blosc.compress(MESSAGE_END_FLAG))
147
-
148
-
149
- def _serialise_dataframe(df: Union[DataFrame, GeoDataFrame]) -> bytes:
150
- """
151
- Serialise a dataframe using the feather table format.
152
-
153
- Parameters
154
- ----------
155
- df : DataFrame
156
- Input dataframe
157
-
158
- Returns
159
- -------
160
- bytes
161
- Serialised feather table.
162
- """
163
- feather_buffer = BytesIO()
164
- df.to_feather(feather_buffer)
165
- feather_buffer.seek(0)
166
- return feather_buffer.getvalue()
167
-
168
-
169
- def _deserialise_feather_bytes_to_dataframe(data: bytes) -> Union[DataFrame, GeoDataFrame]:
170
- """
171
- Convert a binary serialised feather table to pandas dataframe.
172
-
173
- Parameters
174
- ----------
175
- data : bytes
176
- Binary serialised feather table.
177
-
178
- Returns
179
- -------
180
- Union[DataFrame, GeoDataFrame]
181
- Input table converted to a pandas dataframe.
182
-
183
- Raises
184
- ------
185
- ValueError
186
- When bytes can't be interpreted as meaningful dataframe.
187
- """
188
- try:
189
- buffer = BytesIO(data)
190
- df = gpd_read_feather(buffer)
191
- except ValueError as e:
192
- # First attempt to deserialise as a geodataframe. If geo meta is missing, we expect a clear ValueError
193
- # and we then load as a plain dataframe instead.
194
- if "Missing geo meta" in e.args[0] or "'geo' metadata" in e.args[0]:
195
- try:
196
- df = read_feather(BytesIO(data))
197
- except ValueError as e:
198
- raise ValueError("Couldn't deserialise table format") from e
199
- else:
200
- raise ValueError("Couldn't deserialise table format") from e
201
- return df
202
-
203
-
204
- def _extract_objects_to_serialise(data: object, object_dict: List[SerialisationData] = None) \
205
- -> Tuple[object, List[SerialisationData]]:
206
- """
207
- Iterate through an object, replacing complex objects with a placeholder string.
208
-
209
- This recursively traverses the object if it contains dictionaries or lists/iterables.
210
- When an object to be serialised is found, we explicitly:
211
- - Replace it with a magic string: SERIALISED_SUBSTITUTION_PREFIX{X} where X is an increasing numeric index.
212
- - Store the extracted object in a list, where X (above) is its place in this list. We use the
213
- SerialisationData type to keep both the object and the serialisation information.
214
-
215
- Parameters
216
- ----------
217
- data : object
218
- Input data object. Can be a single dataframe or primitive or a dictionary-like structure.
219
- object_dict : List[SerialisationData], optional
220
- Do not use this parameter! It is used in the recursive calls to store extracted object and related information,
221
- by default None
222
-
223
- Returns
224
- -------
225
- Tuple[object, List[SerialisationData]]
226
- - A json-friendly copy of the input object with all complex child items replaced with
227
- SERIALISED_SUBSTITUTION_PREFIX{X} where.
228
- X refers to the index of object in the objects list.
229
- - A list of objects from the input data decorated with a transmission friendly serialisation type.
230
- """
231
- if object_dict is None:
232
- object_dict = []
233
-
234
- return_data = data
235
- if isinstance(data, (GeoDataFrame, DataFrame)):
236
- object_dict.append(SerialisationData(serialisation_type=SerialisationType.table, data=data))
237
- return_data = f"{SERIALISED_SUBSTITUTION_PREFIX}{len(object_dict)}"
238
- elif isinstance(data, Geometry):
239
- object_dict.append(SerialisationData(serialisation_type=SerialisationType.geometry, data=data))
240
- return_data = f"{SERIALISED_SUBSTITUTION_PREFIX}{len(object_dict)}"
241
- elif isinstance(data, dict):
242
- return_data = data.copy()
243
- for key in return_data:
244
- return_data[key] = _extract_objects_to_serialise(return_data[key], object_dict)[0]
245
- # It's important to handle str before Iterable to avoid infinite recursion!
246
- elif isinstance(data, str):
247
- pass
248
- elif isinstance(data, Iterable):
249
- return_data = []
250
- for item in data:
251
- return_data.append(_extract_objects_to_serialise(item, object_dict)[0])
252
- return return_data, object_dict
253
-
254
-
255
- def _insert_deserialised_objects(data: object, object_list: List[object]) -> object:
256
- """
257
- Iterate through the object, replacing all special placeholder strings with objects.
258
-
259
- This can be a single object or a nested dictionary like structure.
260
-
261
- Parameters
262
- ----------
263
- data : object
264
- Object potentially containing placeholder strings.
265
- object_list : List[object]
266
- The list of objects to inject.
267
-
268
- Returns
269
- -------
270
- object
271
- The original object with placeholder references replaced by objects.
272
- """
273
- # Default case is return original object when a primitive type.
274
- return_data = data
275
-
276
- if isinstance(data, dict):
277
- return_data = data.copy()
278
- for key in return_data:
279
- return_data[key] = _insert_deserialised_objects(return_data[key], object_list)
280
- # It's important to handle str before Iterable to avoid infinite recursion!
281
- elif isinstance(data, str):
282
- if SERIALISED_SUBSTITUTION_PREFIX in data:
283
- # Use regex to extract the id using the expected placeholder pattern.
284
- match = re.match(f"{SERIALISED_SUBSTITUTION_PREFIX}(\\w+)", data)
285
- if match:
286
- item_index = int(match.group(1)) - 1
287
- return_data = object_list[item_index]
288
- # Also handle datetimes. Convert a string to a datetime whenever possible.
289
- else:
290
- with suppress(ValueError):
291
- # fromisoformat is a sensible level of stict, it allows 2001-01-01 but disallows 2001, 20010101
292
- return_data = datetime.fromisoformat(data)
293
-
294
- elif isinstance(data, Iterable):
295
- return_data = []
296
- for item in data:
297
- return_data.append(_insert_deserialised_objects(item, object_list))
298
- return return_data
299
-
300
-
301
- def _date_serialiser(item: object) -> str:
302
- if isinstance(item, (datetime, date)):
303
- return item.isoformat()
304
- else:
305
- raise TypeError(repr(item) + " is not JSON serializable")
306
-
307
-
308
- def send_object_to_websocket(ws: WebSocketAgent, data: object) -> None:
309
- """
310
- Send a semi-arbitrary python object over a websocket.
311
-
312
- The object is treated as json-like. When non-json-serialisable objects are encountered, they are treated as follows:
313
- - Datetime | Date: serialised, in place, using the isoformat text.
314
- - DataFrame | GeoDataFrame: Binary serialised using feather and sent as individual websocket messages.
315
- - Geometry [Shapely]: Binary serialised as well-known-binary and sent as individual websocket messages.
316
-
317
- The object is sent as a series of websocket messages as follows:
318
- 1. Send a meta data message as serialised json. This details which binary objects are to be expected in step 3,
319
- after the payload.
320
- 2. Send the payload as serialised json. This may contain substituted placeholder strings for binary serialised
321
- objects. Substituted strings take the form of SERIALISED_SUBSTITUTION_PREFIX{X} where x is an increasing
322
- index.
323
- 3. Send any number of binary serialised objects. Each object will be a separate websocket message. The number of
324
- messages is deduced from first interpretting the meta data message sent in step 1.
325
-
326
- See _send_bytes_to_websocket for compression and the chunking of large messages.
327
-
328
- Parameters
329
- ----------
330
- ws : WebSocketAgent
331
- The Websocket Client or Server.
332
- data : object
333
- The data to be sent over the websocket.
334
- """
335
- # Traverse the object, pull out anything that needs encoding and replace with a unique key
336
- message, objects = _extract_objects_to_serialise(data)
337
- message_meta = {"binary_type_mapping": [item.serialisation_type for item in objects]}
338
-
339
- # Serialise all "extracted" objects
340
- serialised_objects = []
341
- for item in objects:
342
- if item.serialisation_type == SerialisationType.geometry:
343
- # Use WKB for all shapely geomtery types
344
- serialised_objects.append(item.data.wkb)
345
- elif item.serialisation_type == SerialisationType.table:
346
- serialised_objects.append(_serialise_dataframe(item.data))
347
-
348
- message_meta = json.dumps(message_meta).encode("utf-8")
349
- message = json.dumps(message, default=_date_serialiser).encode("utf-8")
350
-
351
- for item in [message_meta, message, *serialised_objects]:
352
- _send_bytes_to_websocket(ws, item)
353
-
354
-
355
- def receive_object_from_websocket(ws: WebSocketAgent, timeout: float = 30) -> object:
356
- """
357
- Receive a semi-arbitrary python object over a websocket.
358
-
359
- This reverses the protocol employed by send_object_to_websocket:
360
- - Receive and decode the meta data message. This determines how many binary messages are expected after
361
- the payload.
362
- - Receive and decode the primary payload.
363
- - Receive each binary serialised object.
364
- - Deserialise each binary object and re-saturate the payload accordingly replacing any placeholder strings with
365
- python objects.
366
-
367
- Parameters
368
- ----------
369
- ws : WebSocketAgent
370
- The Websocket Client or Server.
371
- timeout : float
372
- If provided, will raise RuntimeError if no message is received within this number of seconds.
373
- By default, 30 seconds.
374
-
375
- Returns
376
- -------
377
- object
378
- The python object received via the websocket.
379
- """
380
- # Receive transmission meta data.
381
- message_meta = _receive_bytes_from_websocket(ws, timeout)
382
- message_meta = json.loads(message_meta.decode("utf-8"))
383
- objects_type_map = message_meta['binary_type_mapping']
384
-
385
- # Receive the main payload excluding binary objects
386
- data = _receive_bytes_from_websocket(ws, timeout)
387
- data = json.loads(data.decode("utf-8"))
388
-
389
- # Finally receive and deserialised binary objects
390
- deserialised_objects = []
391
- for object_type in objects_type_map:
392
- raw_object = _receive_bytes_from_websocket(ws, timeout)
393
- if object_type == SerialisationType.table:
394
- deserialised_objects.append(_deserialise_feather_bytes_to_dataframe(raw_object))
395
- elif object_type == SerialisationType.geometry:
396
- deserialised_objects.append(from_wkb(raw_object))
397
- del raw_object
398
-
399
- data = _insert_deserialised_objects(data, object_list=deserialised_objects)
400
-
401
- return data
@@ -1,9 +0,0 @@
1
- specklia/__init__.py,sha256=ePVHqq642NocoE8tS0cNTd0B5wJdUB7r3y815oQXD6A,51
2
- specklia/_websocket_helpers.py,sha256=eDOJrTZD16mnXqAXhU0NTCVhSa5kULbXOqptIhqNzKw,14714
3
- specklia/client.py,sha256=oI8NezhxeawQyUU3U7xGnReXZGuuL84ztA_CTquv-vE,41348
4
- specklia/utilities.py,sha256=6y0J3bbYNBD0cSGNHt1BC6h7QJ7YKSVwOBl5u2CnCgc,4074
5
- specklia-1.8.218.dist-info/LICENCE,sha256=kjWTA-TtT_rJtsWuAgWvesvu01BytVXgt_uCbeQgjOg,1061
6
- specklia-1.8.218.dist-info/METADATA,sha256=VUue0yHUBAVpNQhhpKpy1VFGp3BrgG3J84KzcqcR0S8,2901
7
- specklia-1.8.218.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
8
- specklia-1.8.218.dist-info/top_level.txt,sha256=XgU53UpAJbqEni5EjJaPdQPYuNx16Geg2I5A9lo1BQw,9
9
- specklia-1.8.218.dist-info/RECORD,,