water-column-sonar-processing 0.0.9__py3-none-any.whl → 26.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. water_column_sonar_processing/aws/dynamodb_manager.py +138 -59
  2. water_column_sonar_processing/aws/s3_manager.py +179 -141
  3. water_column_sonar_processing/aws/s3fs_manager.py +29 -33
  4. water_column_sonar_processing/aws/sqs_manager.py +1 -1
  5. water_column_sonar_processing/cruise/create_empty_zarr_store.py +35 -96
  6. water_column_sonar_processing/cruise/datatree_manager.py +21 -0
  7. water_column_sonar_processing/cruise/resample_regrid.py +142 -127
  8. water_column_sonar_processing/geometry/__init__.py +10 -2
  9. water_column_sonar_processing/geometry/elevation_manager.py +111 -0
  10. water_column_sonar_processing/geometry/geometry_manager.py +50 -49
  11. water_column_sonar_processing/geometry/line_simplification.py +176 -0
  12. water_column_sonar_processing/geometry/pmtile_generation.py +227 -223
  13. water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
  14. water_column_sonar_processing/index/index_manager.py +151 -33
  15. water_column_sonar_processing/model/zarr_manager.py +665 -262
  16. water_column_sonar_processing/processing/__init__.py +3 -3
  17. water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
  18. water_column_sonar_processing/processing/raw_to_zarr.py +206 -214
  19. water_column_sonar_processing/utility/__init__.py +9 -2
  20. water_column_sonar_processing/utility/constants.py +69 -18
  21. water_column_sonar_processing/utility/pipeline_status.py +11 -15
  22. water_column_sonar_processing/utility/timestamp.py +3 -4
  23. water_column_sonar_processing-26.1.9.dist-info/METADATA +239 -0
  24. water_column_sonar_processing-26.1.9.dist-info/RECORD +34 -0
  25. {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info}/WHEEL +1 -1
  26. {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info/licenses}/LICENSE +1 -1
  27. water_column_sonar_processing/geometry/geometry_simplification.py +0 -82
  28. water_column_sonar_processing/process.py +0 -147
  29. water_column_sonar_processing/processing/cruise_sampler.py +0 -342
  30. water_column_sonar_processing-0.0.9.dist-info/METADATA +0 -134
  31. water_column_sonar_processing-0.0.9.dist-info/RECORD +0 -32
  32. {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,13 @@
1
+ from .elevation_manager import ElevationManager
1
2
  from .geometry_manager import GeometryManager
2
- from .geometry_simplification import GeometrySimplification
3
+ from .line_simplification import LineSimplification
3
4
  from .pmtile_generation import PMTileGeneration
5
+ from .spatiotemporal import Spatiotemporal
4
6
 
5
- __all__ = ["GeometryManager", "GeometrySimplification", "PMTileGeneration"]
7
+ __all__ = [
8
+ "ElevationManager",
9
+ "GeometryManager",
10
+ "LineSimplification",
11
+ "PMTileGeneration",
12
+ "Spatiotemporal",
13
+ ]
@@ -0,0 +1,111 @@
1
+ """
2
+ https://gis.ngdc.noaa.gov/arcgis/rest/services/DEM_mosaics/DEM_global_mosaic/ImageServer/identify?geometry=-31.70235%2C13.03332&geometryType=esriGeometryPoint&returnGeometry=false&returnCatalogItems=false&f=json
3
+
4
+ https://gis.ngdc.noaa.gov/arcgis/rest/services/DEM_mosaics/DEM_global_mosaic/ImageServer/
5
+ identify?
6
+ geometry=-31.70235%2C13.03332
7
+ &geometryType=esriGeometryPoint
8
+ &returnGeometry=false
9
+ &returnCatalogItems=false
10
+ &f=json
11
+ {"objectId":0,"name":"Pixel","value":"-5733","location":{"x":-31.702349999999999,"y":13.03332,"spatialReference":{"wkid":4326,"latestWkid":4326}},"properties":null,"catalogItems":null,"catalogItemVisibilities":[]}
12
+ -5733
13
+
14
+ (base) rudy:deleteME rudy$ curl https://api.opentopodata.org/v1/gebco2020?locations=13.03332,-31.70235
15
+ {
16
+ "results": [
17
+ {
18
+ "dataset": "gebco2020",
19
+ "elevation": -5729.0,
20
+ "location": {
21
+ "lat": 13.03332,
22
+ "lng": -31.70235
23
+ }
24
+ }
25
+ ],
26
+ "status": "OK"
27
+ }
28
+ """
29
+
30
+ import json
31
+ import time
32
+ from collections.abc import Generator
33
+
34
+ import requests
35
+
36
+
37
+ def chunked(ll: list, n: int) -> Generator:
38
+ # Yields successively n-sized chunks from ll.
39
+ for i in range(0, len(ll), n):
40
+ yield ll[i : i + n]
41
+
42
+
43
+ class ElevationManager:
44
+ #######################################################
45
+ def __init__(
46
+ self,
47
+ ):
48
+ self.DECIMAL_PRECISION = 5 # precision for GPS coordinates
49
+ self.TIMEOUT_SECONDS = 10
50
+
51
+ #######################################################
52
+ def get_arcgis_elevation(
53
+ self,
54
+ lngs: list,
55
+ lats: list,
56
+ chunk_size: int = 500, # I think this is the api limit
57
+ ) -> int:
58
+ # Reference: https://developers.arcgis.com/rest/services-reference/enterprise/map-to-image/
59
+ # Info: https://www.arcgis.com/home/item.html?id=c876e3c96a8642ab8557646a3b4fa0ff
60
+ ### 'https://gis.ngdc.noaa.gov/arcgis/rest/services/DEM_mosaics/DEM_global_mosaic/ImageServer/identify?geometry={"points":[[-31.70235,13.03332],[-32.70235,14.03332]]}&geometryType=esriGeometryMultipoint&returnGeometry=false&returnCatalogItems=false&f=json'
61
+ if len(lngs) != len(lats):
62
+ raise ValueError("lngs and lats must have same length")
63
+
64
+ geometryType = "esriGeometryMultipoint" # TODO: allow single point?
65
+
66
+ depths = []
67
+
68
+ list_of_points = [list(elem) for elem in list(zip(lngs, lats))]
69
+ for chunk in chunked(list_of_points, chunk_size):
70
+ time.sleep(0.1)
71
+ # order: (lng, lat)
72
+ geometry = f'{{"points":{str(chunk)}}}'
73
+ url = f"https://gis.ngdc.noaa.gov/arcgis/rest/services/DEM_mosaics/DEM_global_mosaic/ImageServer/identify?geometry={geometry}&geometryType={geometryType}&returnGeometry=false&returnCatalogItems=false&f=json"
74
+ result = requests.get(url, timeout=self.TIMEOUT_SECONDS)
75
+ res = json.loads(result.content.decode("utf8"))
76
+ if "results" in res:
77
+ for element in res["results"]:
78
+ depths.append(float(element["value"]))
79
+ elif "value" in res:
80
+ depths.append(float(res["value"]))
81
+
82
+ return depths
83
+
84
+ # def get_gebco_bathymetry_elevation(self) -> int:
85
+ # # Documentation: https://www.opentopodata.org/datasets/gebco2020/
86
+ # latitude = 13.03332
87
+ # longitude = -31.70235
88
+ # dataset = "gebco2020"
89
+ # url = f"https://api.opentopodata.org/v1/{dataset}?locations={latitude},{longitude}"
90
+ # pass
91
+
92
+ # def get_elevation(
93
+ # self,
94
+ # df,
95
+ # lat_column,
96
+ # lon_column,
97
+ # ) -> int:
98
+ # """Query service using lat, lon. add the elevation values as a new column."""
99
+ # url = r'https://epqs.nationalmap.gov/v1/json?'
100
+ # elevations = []
101
+ # for lat, lon in zip(df[lat_column], df[lon_column]):
102
+ # # define rest query params
103
+ # params = {
104
+ # 'output': 'json',
105
+ # 'x': lon,
106
+ # 'y': lat,
107
+ # 'units': 'Meters'
108
+ # }
109
+ # result = requests.get((url + urllib.parse.urlencode(params)))
110
+ # elevations.append(result.json()['value'])
111
+ # return elevations
@@ -1,4 +1,3 @@
1
- import os
2
1
  from pathlib import Path
3
2
 
4
3
  import geopandas
@@ -8,17 +7,16 @@ import pandas as pd
8
7
  from water_column_sonar_processing.aws import S3Manager
9
8
  from water_column_sonar_processing.utility import Cleaner
10
9
 
11
- """
12
- // [Decimal / Places / Degrees / Object that can be recognized at scale / N/S or E/W at equator, E/W at 23N/S, E/W at 45N/S, E/W at 67N/S]
13
- // 0 1.0 1° 00′ 0″ country or large region 111.32 km 102.47 km 78.71 km 43.496 km
14
- // 1 0.1 0° 06′ 0″ large city or district 11.132 km 10.247 km 7.871 km 4.3496 km
15
- // 2 0.01 0° 00′ 36″ town or village 1.1132 km 1.0247 km 787.1 m 434.96 m
16
- // 3 0.001 0° 00′ 3.6″ neighborhood, street 111.32 m 102.47 m 78.71 m 43.496 m
17
- // 4 0.0001 0° 00′ 0.36″ individual street, land parcel 11.132 m 10.247 m 7.871 m 4.3496 m
18
- // 5 0.00001 0° 00′ 0.036″ individual trees, door entrance 1.1132 m 1.0247 m 787.1 mm 434.96 mm
19
- // 6 0.000001 0° 00′ 0.0036″ individual humans 111.32 mm 102.47 mm 78.71 mm 43.496 mm
20
- // 7 0.0000001 0° 00′ 0.00036″ practical limit of commercial surveying 11.132 mm 10.247 mm 7.871 mm 4.3496 mm
21
- """
10
+
11
+ # // [Decimal / Places / Degrees / Object that can be recognized at scale / N/S or E/W at equator, E/W at 23N/S, E/W at 45N/S, E/W at 67N/S]
12
+ # // 0 1.0 1° 00′ 0″ country or large region 111.32 km 102.47 km 78.71 km 43.496 km
13
+ # // 1 0.1 0° 06′ 0″ large city or district 11.132 km 10.247 km 7.871 km 4.3496 km
14
+ # // 2 0.01 0° 00′ 36″ town or village 1.1132 km 1.0247 km 787.1 m 434.96 m
15
+ # // 3 0.001 0° 00′ 3.6″ neighborhood, street 111.32 m 102.47 m 78.71 m 43.496 m
16
+ # // 4 0.0001 0° 00′ 0.36″ individual street, land parcel 11.132 m 10.247 m 7.871 m 4.3496 m
17
+ # // 5 0.00001 0° 00′ 0.036″ individual trees, door entrance 1.1132 m 1.0247 m 787.1 mm 434.96 mm
18
+ # // 6 0.000001 0° 00′ 0.0036″ individual humans 111.32 mm 102.47 mm 78.71 mm 43.496 mm
19
+ # // 7 0.0000001 0° 00′ 0.00036″ practical limit of commercial surveying 11.132 mm 10.247 mm 7.871 mm 4.3496 mm
22
20
 
23
21
 
24
22
  class GeometryManager:
@@ -26,7 +24,7 @@ class GeometryManager:
26
24
  def __init__(
27
25
  self,
28
26
  ):
29
- self.DECIMAL_PRECISION = 5 # precision for GPS coordinates
27
+ self.DECIMAL_PRECISION = 6 # precision for GPS coordinates
30
28
  self.SIMPLIFICATION_TOLERANCE = 0.0001 # RDP simplification to "street level"
31
29
 
32
30
  #######################################################
@@ -38,32 +36,31 @@ class GeometryManager:
38
36
  cruise_name,
39
37
  sensor_name,
40
38
  file_name,
39
+ endpoint_url=None,
41
40
  write_geojson=True,
42
41
  ) -> tuple:
43
42
  file_name_stem = Path(file_name).stem
44
43
  geo_json_name = f"{file_name_stem}.json"
45
44
 
46
- print("Getting GPS data from echopype object.")
45
+ print("Getting GPS dataset from echopype object.")
47
46
  try:
48
- latitude = np.round(
49
- echodata.platform.latitude.values, self.DECIMAL_PRECISION
50
- )
51
- longitude = np.round(
52
- echodata.platform.longitude.values, self.DECIMAL_PRECISION
53
- )
47
+ latitude = (
48
+ echodata.platform.latitude.values
49
+ ) # TODO: DONT get values from here!
50
+ longitude = echodata.platform.longitude.values
54
51
 
55
52
  # RE: time coordinates: https://github.com/OSOceanAcoustics/echopype/issues/656#issue-1219104771
56
53
  # 'nmea_times' are times from the nmea datalogger associated with GPS
57
54
  # note that nmea_times, unlike time1, can be sorted
58
55
  nmea_times = np.sort(echodata.platform.time1.values)
59
56
 
60
- # 'time1' are times from the echosounder associated with the data of the transducer measurement
57
+ # 'time1' are times from the echosounder associated with the dataset of the transducer measurement
61
58
  time1 = echodata.environment.time1.values
62
59
 
63
60
  if len(nmea_times) < len(time1):
64
61
  raise Exception(
65
62
  "Problem: Not enough NMEA times available to extrapolate time1."
66
- )
63
+ ) # TODO: explore this logic further...
67
64
 
68
65
  # Align 'sv_times' to 'nmea_times'
69
66
  if not (
@@ -99,14 +96,14 @@ class GeometryManager:
99
96
 
100
97
  # create requirement for minimum linestring size
101
98
  MIN_ALLOWED_SIZE = (
102
- 4 # don't want to process files with less than 4 data points
99
+ 4 # don't want to process files with less than 4 dataset points
103
100
  )
104
101
  if (
105
102
  len(lat[~np.isnan(lat)]) < MIN_ALLOWED_SIZE
106
103
  or len(lon[~np.isnan(lon)]) < MIN_ALLOWED_SIZE
107
104
  ):
108
105
  raise Exception(
109
- f"There was not enough data in lat or lon to create geojson, {len(lat[~np.isnan(lat)])} found, less than {MIN_ALLOWED_SIZE}."
106
+ f"There was not enough dataset in lat or lon to create geojson, {len(lat[~np.isnan(lat)])} found, less than {MIN_ALLOWED_SIZE}."
110
107
  )
111
108
 
112
109
  # https://osoceanacoustics.github.io/echopype-examples/echopype_tour.html
@@ -125,28 +122,33 @@ class GeometryManager:
125
122
  crs="epsg:4326",
126
123
  )
127
124
  # Note: We set np.nan to 0,0 so downstream missing values can be omitted
128
- # TODO: so what ends up here is data with corruption at null island!!!
125
+ # TODO: so what ends up here is dataset with corruption at null island!!!
129
126
  geo_json_line = gps_gdf.to_json()
130
127
  if write_geojson:
131
128
  print("Creating local copy of geojson file.")
132
129
  with open(geo_json_name, "w") as write_file:
133
- write_file.write(geo_json_line) # NOTE: this file can include zeros for lat lon
130
+ write_file.write(
131
+ geo_json_line
132
+ ) # NOTE: this file can include zeros for lat lon
134
133
 
135
134
  geo_json_prefix = (
136
135
  f"spatial/geojson/{ship_name}/{cruise_name}/{sensor_name}"
137
136
  )
138
137
 
139
138
  print("Checking s3 and deleting any existing GeoJSON file.")
140
- s3_manager = S3Manager()
141
- s3_objects = s3_manager.list_objects(
139
+ s3_manager = S3Manager(endpoint_url=endpoint_url)
140
+ geojson_object_exists = s3_manager.check_if_object_exists(
142
141
  bucket_name=output_bucket_name,
143
- prefix=f"{geo_json_prefix}/{geo_json_name}"
142
+ key_name=f"{geo_json_prefix}/{geo_json_name}",
144
143
  )
145
- if len(s3_objects) > 0:
144
+ if geojson_object_exists:
146
145
  print(
147
146
  "GeoJSON already exists in s3, deleting existing and continuing."
148
147
  )
149
- s3_manager.delete_nodd_objects(objects=s3_objects)
148
+ s3_manager.delete_nodd_object(
149
+ bucket_name=output_bucket_name,
150
+ key_name=f"{geo_json_prefix}/{geo_json_name}",
151
+ )
150
152
 
151
153
  print("Upload GeoJSON to s3.")
152
154
  s3_manager.upload_nodd_file(
@@ -176,32 +178,36 @@ class GeometryManager:
176
178
  #################################################################
177
179
  # GeoJSON FeatureCollection with IDs as "time"
178
180
  except Exception as err:
179
- print(
180
- f"Exception encountered extracting gps coordinates creating geojson: {err}"
181
+ raise RuntimeError(
182
+ f"Exception encountered extracting gps coordinates creating geojson, {err}"
181
183
  )
182
- raise
184
+
183
185
  # Note: returned lat/lon values can include np.nan because they need to be aligned with
184
- # the Sv data! GeoJSON needs simplification but has been filtered.
185
- return gps_df.index.values, gps_df.latitude.values, gps_df.longitude.values
186
+ # the Sv dataset! GeoJSON needs simplification but has been filtered.
187
+ # return gps_df.index.values, gps_df.latitude.values, gps_df.longitude.values
188
+ return gps_df.index.values, lat, lon
186
189
  # TODO: if geojson is already returned with 0,0, the return here
187
190
  # can include np.nan values?
188
191
 
189
192
  #######################################################
193
+ @staticmethod
190
194
  def read_s3_geo_json(
191
- self,
192
195
  ship_name,
193
196
  cruise_name,
194
197
  sensor_name,
195
198
  file_name_stem,
196
199
  input_xr_zarr_store,
200
+ endpoint_url,
201
+ output_bucket_name,
197
202
  ):
198
203
  try:
199
- s3_manager = S3Manager()
204
+ s3_manager = S3Manager(endpoint_url=endpoint_url)
200
205
  geo_json = s3_manager.read_s3_json(
201
206
  ship_name=ship_name,
202
207
  cruise_name=cruise_name,
203
208
  sensor_name=sensor_name,
204
209
  file_name_stem=file_name_stem,
210
+ output_bucket_name=output_bucket_name,
205
211
  )
206
212
  ###
207
213
  geospatial = geopandas.GeoDataFrame.from_features(
@@ -221,20 +227,15 @@ class GeometryManager:
221
227
  indices = np.searchsorted(a=aa, v=vv)
222
228
 
223
229
  return indices, geospatial
224
- except Exception as err: # Failure
225
- print(f"Exception encountered reading s3 GeoJSON: {err}")
226
- raise
230
+ except Exception as err:
231
+ raise RuntimeError(f"Exception encountered reading s3 GeoJSON, {err}")
227
232
 
228
233
  ############################################################################
229
234
  # COMES from the raw-to-zarr conversion
230
- def __write_geojson_to_file(
231
- self,
232
- store_name,
233
- data
234
- ) -> None:
235
- print('Writing GeoJSON to file.')
236
- with open(os.path.join(store_name, 'geo.json'), "w") as outfile:
237
- outfile.write(data)
235
+ # def __write_geojson_to_file(self, store_name, data) -> None:
236
+ # print("Writing GeoJSON to file.")
237
+ # with open(os.path.join(store_name, "geo.json"), "w") as outfile:
238
+ # outfile.write(data)
238
239
 
239
240
 
240
241
  ###########################################################
@@ -0,0 +1,176 @@
1
+ # import json
2
+ import geopandas as gpd
3
+ import numpy as np
4
+ from pykalman import KalmanFilter
5
+ from shapely.geometry import Point
6
+
7
+ # import hvplot.pandas
8
+ # from holoviews import opts
9
+ # hv.extension('bokeh')
10
+
11
+ # import matplotlib.pyplot as plt
12
+
13
+
14
+ # lambda for timestamp in form "yyyy-MM-ddTHH:mm:ssZ"
15
+ # dt = lambda: datetime.now().isoformat(timespec="seconds") + "Z"
16
+
17
+ # TODO: get line for example HB1906 ...save linestring to array for testing
18
+
19
+ MAX_SPEED_KNOTS = 50
20
+
21
+
22
+ # Lambert's formula ==> better accuracy than haversinte
23
+ # Lambert's formula (the formula used by the calculators above) is the method used to calculate the shortest distance along the surface of an ellipsoid. When used to approximate the Earth and calculate the distance on the Earth surface, it has an accuracy on the order of 10 meters over thousands of kilometers, which is more precise than the haversine formula.
24
+
25
+
26
+ def mph_to_knots(mph_value):
27
+ """TODO:"""
28
+ # 1 mile per hour === 0.868976 Knots
29
+ return mph_value * 0.868976
30
+
31
+
32
+ def mps_to_knots(mps_value):
33
+ return mps_value * 1.94384
34
+
35
+
36
+ ###############################################################################
37
+ # Colab Notebook:
38
+ # https://colab.research.google.com/drive/1Ihb1x0EeYRNwGJ4Bqi4RqQQHu9-40oDk?usp=sharing#scrollTo=hIPziqVO48Xg
39
+ ###############################################################################
40
+
41
+
42
+ # https://shapely.readthedocs.io/en/stable/reference/shapely.MultiLineString.html#shapely.MultiLineString
43
+ class LineSimplification:
44
+ """
45
+ // [Decimal / Places / Degrees / Object that can be recognized at scale / N/S or E/W at equator, E/W at 23N/S, E/W at 45N/S, E/W at 67N/S]
46
+ // 0 1.0 1° 00′ 0″ country or large region 111.32 km 102.47 km 78.71 km 43.496 km
47
+ // 1 0.1 0° 06′ 0″ large city or district 11.132 km 10.247 km 7.871 km 4.3496 km
48
+ // 2 0.01 0° 00′ 36″ town or village 1.1132 km 1.0247 km 787.1 m 434.96 m
49
+ // 3 0.001 0° 00′ 3.6″ neighborhood, street 111.32 m 102.47 m 78.71 m 43.496 m
50
+ // 4 0.0001 0° 00′ 0.36″ individual street, land parcel 11.132 m 10.247 m 7.871 m 4.3496 m
51
+ // 5 0.00001 0° 00′ 0.036″ individual trees, door entrance 1.1132 m 1.0247 m 787.1 mm 434.96 mm
52
+ // 6 0.000001 0° 00′ 0.0036″ individual humans 111.32 mm 102.47 mm 78.71 mm 43.496 mm
53
+ // 7 0.0000001 0° 00′ 0.00036″ practical limit of commercial surveying 11.132 mm 10.247 mm 7.871 mm 4.3496 mm
54
+ private static final int SRID = 8307;
55
+ private static final double simplificationTolerance = 0.0001;
56
+ private static final long splitGeometryMs = 900000L;
57
+ private static final int batchSize = 10000;
58
+ private static final int geoJsonPrecision = 5;
59
+ final int geoJsonPrecision = 5;
60
+ final double simplificationTolerance = 0.0001;
61
+ final int simplifierBatchSize = 3000;
62
+ final long maxCount = 0;
63
+ private static final double maxAllowedSpeedKnts = 60D;
64
+ """
65
+
66
+ # TODO: in the future move to standalone library
67
+ #######################################################
68
+ def __init__(
69
+ self,
70
+ ):
71
+ pass
72
+
73
+ #######################################################
74
+ @staticmethod
75
+ def kalman_filter(
76
+ longitudes,
77
+ latitudes,
78
+ ):
79
+ """
80
+ # TODO: need to use masked array to get the right number of values
81
+ """
82
+ ### https://github.com/pykalman/pykalman
83
+ # https://stackoverflow.com/questions/43377626/how-to-use-kalman-filter-in-python-for-location-data
84
+ measurements = np.asarray([list(elem) for elem in zip(longitudes, latitudes)])
85
+ initial_state_mean = [measurements[0, 0], 0, measurements[0, 1], 0]
86
+ transition_matrix = [[1, 1, 0, 0], [0, 1, 0, 0], [0, 0, 1, 1], [0, 0, 0, 1]]
87
+ observation_matrix = [[1, 0, 0, 0], [0, 0, 1, 0]]
88
+
89
+ kf = KalmanFilter(
90
+ transition_matrices=transition_matrix,
91
+ observation_matrices=observation_matrix,
92
+ initial_state_mean=initial_state_mean,
93
+ )
94
+ kf = kf.em(measurements, n_iter=2) # TODO: 5
95
+ (smoothed_state_means, smoothed_state_covariances) = kf.smooth(measurements)
96
+
97
+ # plt.plot(longitudes, latitudes, label="original")
98
+ # plt.plot(smoothed_state_means[:, 0], smoothed_state_means[:, 2], label="smoothed")
99
+ # plt.legend()
100
+ # plt.show()
101
+
102
+ return smoothed_state_means[:, [0, 2]]
103
+
104
+ #######################################################
105
+ @staticmethod
106
+ def get_speeds(
107
+ times: np.ndarray, # don't really need time, do need to segment the dataset first
108
+ latitudes: np.ndarray,
109
+ longitudes: np.ndarray,
110
+ ) -> np.ndarray:
111
+ print(MAX_SPEED_KNOTS) # TODO: too high
112
+ print(times[0], latitudes[0], longitudes[0])
113
+ # TODO: distance/time ==> need to take position2 - position1 to get speed
114
+
115
+ # get distance difference
116
+ geom = [Point(xy) for xy in zip(longitudes, latitudes)]
117
+ points_df = gpd.GeoDataFrame({"geometry": geom}, crs="EPSG:4326")
118
+ # Conversion to UTM, a rectilinear projection coordinate system where distance can be calculated with pythagorean theorem
119
+ # an alternative could be to use EPSG 32663
120
+ points_df.to_crs(
121
+ epsg=3310, inplace=True
122
+ ) # https://gis.stackexchange.com/questions/293310/finding-distance-between-two-points-with-geoseries-distance
123
+ distance_diffs = points_df.distance(points_df.shift())
124
+ # distance_diffs_sorted = distance_diffs.sort_values(
125
+ # ascending=False
126
+ # ) # TODO: get avg cutoff time
127
+ #
128
+ time_diffs_ns = np.append(0, (times[1:] - times[:-1]).astype(int))
129
+ # time_diffs_ns_sorted = np.sort(time_diffs_ns)
130
+ # largest time diffs HB0707 [ 17. 17.93749786 21.0781271 54.82812723 85.09374797, 113.56249805 204.87500006 216. 440.68749798 544.81249818]
131
+ # largest diffs HB1906 [3.01015808e+00 3.01016013e+00 3.01017805e+00 3.01018701e+00, 3.01018701e+00 3.01018906e+00 3.01019802e+00 3.01021005e+00, 3.01021005e+00 3.01021414e+00 3.01022208e+00 3.01022899e+00, 3.01024998e+00 3.01025920e+00 3.01026202e+00 3.01028096e+00, 3.01119411e+00 3.01120896e+00 3.01120998e+00 3.01120998e+00, 3.01122099e+00 3.01122790e+00 3.01122790e+00 3.01124506e+00, 3.01125197e+00 3.01128090e+00 3.01142707e+00 3.01219814e+00, 3.01221120e+00 3.01223014e+00 3.01225498e+00 3.01225882e+00, 3.01226010e+00 3.01312998e+00 3.01316096e+00 3.01321190e+00, 3.01321293e+00 3.01322880e+00 3.01322906e+00 3.01323110e+00, 3.01323213e+00 3.01323290e+00 3.01326208e+00 3.01328512e+00, 3.01418112e+00 3.01420109e+00 3.01421107e+00 3.01421184e+00, 3.01421414e+00 3.01424819e+00 3.01512883e+00 3.01516006e+00, 3.01524198e+00 3.01619917e+00 3.01623194e+00 3.01623296e+00, 3.01917594e+00 3.01921408e+00 3.01921587e+00 3.02022195e+00, 3.02025216e+00 3.02121702e+00 3.02325811e+00 3.02410291e+00, 3.02421914e+00 3.02426701e+00 3.02523776e+00 3.02718694e+00, 3.02927590e+00 3.03621606e+00 3.03826304e+00 3.34047514e+00, 3.36345114e+00 3.39148595e+00 4.36819302e+00 4.50157901e+00, 4.50315699e+00 4.50330598e+00 4.50333491e+00 4.50428416e+00, 4.50430490e+00 4.50430694e+00 4.50526387e+00 4.50530790e+00, 4.50530995e+00 4.50532301e+00 4.50533478e+00 4.50629402e+00, 4.50730701e+00 4.50825882e+00 4.50939008e+00 6.50179098e+00, 2.25025029e+01 1.39939425e+02 1.54452331e+02 1.60632653e+03, 1.74574667e+05 4.33569587e+05 4.35150475e+05 8.00044883e+05]
132
+ nanoseconds_per_second = 1e9
133
+ speed_meters_per_second = (
134
+ distance_diffs / time_diffs_ns * nanoseconds_per_second
135
+ )
136
+ # returns the speed in meters per second #TODO: get speed in knots
137
+ return speed_meters_per_second.to_numpy(dtype="float32") # includes nan
138
+
139
+ # def remove_null_island_values(
140
+ # self,
141
+ # epsilon=1e-5,
142
+ # ) -> None:
143
+ # # TODO: low priority
144
+ # print(epsilon)
145
+ # pass
146
+
147
+ def break_linestring_into_multi_linestring(
148
+ self,
149
+ ) -> None:
150
+ # TODO: medium priority
151
+ # For any line-strings across the antimeridian, break into multilinestring
152
+ # average cadence is measurements every 1 second
153
+ # break when over 1 minute
154
+ pass
155
+
156
+ def simplify(
157
+ self,
158
+ ) -> None:
159
+ # TODO: medium-high priority
160
+ pass
161
+
162
+ #######################################################
163
+
164
+
165
+ # [(-72.2001724243164, 40.51750183105469), # latBB
166
+ # (-72.20023345947266, 40.51749038696289),
167
+ # (-72.20033264160156, 40.51750183105469), # lonAA, latBB
168
+ # (-72.20030212402344, 40.517391204833984),
169
+ # (-72.20033264160156, 40.517330169677734), # lonAA, latCC
170
+ # (-72.2003402709961, 40.51729965209961),
171
+ # (-72.20033264160156, 40.517330169677734), # lonAA, latCC
172
+ # (-72.20040130615234, 40.5172004699707),
173
+ # (-72.20050048828125, 40.51716995239258),
174
+ # (-72.2004623413086, 40.51710891723633)]
175
+
176
+ ###########################################################