water-column-sonar-processing 0.0.5__py3-none-any.whl → 0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. water_column_sonar_processing/__init__.py +16 -0
  2. water_column_sonar_processing/aws/__init__.py +7 -4
  3. water_column_sonar_processing/aws/dynamodb_manager.py +70 -49
  4. water_column_sonar_processing/aws/s3_manager.py +112 -122
  5. water_column_sonar_processing/aws/s3fs_manager.py +13 -19
  6. water_column_sonar_processing/aws/sns_manager.py +10 -21
  7. water_column_sonar_processing/aws/sqs_manager.py +10 -18
  8. water_column_sonar_processing/cruise/__init__.py +4 -0
  9. water_column_sonar_processing/cruise/create_empty_zarr_store.py +51 -33
  10. water_column_sonar_processing/cruise/resample_regrid.py +109 -58
  11. water_column_sonar_processing/geometry/__init__.py +5 -0
  12. water_column_sonar_processing/geometry/geometry_manager.py +79 -48
  13. water_column_sonar_processing/geometry/geometry_simplification.py +13 -12
  14. water_column_sonar_processing/geometry/pmtile_generation.py +24 -23
  15. water_column_sonar_processing/index/__init__.py +3 -0
  16. water_column_sonar_processing/index/index_manager.py +104 -80
  17. water_column_sonar_processing/model/__init__.py +3 -0
  18. water_column_sonar_processing/model/zarr_manager.py +113 -75
  19. water_column_sonar_processing/process.py +76 -69
  20. water_column_sonar_processing/utility/__init__.py +6 -0
  21. water_column_sonar_processing/utility/cleaner.py +6 -7
  22. water_column_sonar_processing/utility/constants.py +42 -35
  23. water_column_sonar_processing/utility/pipeline_status.py +37 -10
  24. water_column_sonar_processing/utility/timestamp.py +3 -2
  25. {water_column_sonar_processing-0.0.5.dist-info → water_column_sonar_processing-0.0.6.dist-info}/METADATA +31 -1
  26. water_column_sonar_processing-0.0.6.dist-info/RECORD +29 -0
  27. water_column_sonar_processing-0.0.5.dist-info/RECORD +0 -29
  28. {water_column_sonar_processing-0.0.5.dist-info → water_column_sonar_processing-0.0.6.dist-info}/LICENSE +0 -0
  29. {water_column_sonar_processing-0.0.5.dist-info → water_column_sonar_processing-0.0.6.dist-info}/WHEEL +0 -0
  30. {water_column_sonar_processing-0.0.5.dist-info → water_column_sonar_processing-0.0.6.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,15 @@
1
1
  import gc
2
2
  import os
3
3
  from pathlib import Path
4
+
4
5
  import numcodecs
5
6
  import numpy as np
6
- import xarray as xr
7
7
  import pandas as pd
8
+ import xarray as xr
8
9
 
9
10
  from water_column_sonar_processing.aws.dynamodb_manager import DynamoDBManager
10
- from water_column_sonar_processing.model.zarr_manager import ZarrManager
11
11
  from water_column_sonar_processing.geometry.geometry_manager import GeometryManager
12
-
12
+ from water_column_sonar_processing.model.zarr_manager import ZarrManager
13
13
 
14
14
  numcodecs.blosc.use_threads = False
15
15
  numcodecs.blosc.set_nthreads(1)
@@ -19,30 +19,34 @@ numcodecs.blosc.set_nthreads(1)
19
19
  # ZARR_V3_EXPERIMENTAL_API = 1
20
20
  # creates the latlon data: foo = ep.consolidate.add_location(ds_Sv, echodata)
21
21
 
22
+
22
23
  class ResampleRegrid:
23
24
  #######################################################
24
25
  def __init__(
25
- self,
26
+ self,
26
27
  ):
27
28
  self.__overwrite = True
28
29
  self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
29
30
  self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
30
- self.dtype = 'float32'
31
+ self.dtype = "float32"
31
32
 
32
33
  #################################################################
33
34
  def interpolate_data(
34
- self,
35
- input_xr,
36
- ping_times,
37
- all_cruise_depth_values,
35
+ self,
36
+ input_xr,
37
+ ping_times,
38
+ all_cruise_depth_values,
38
39
  ) -> np.ndarray:
39
40
  print("Interpolating data.")
40
41
  try:
41
- data = np.empty((
42
- len(all_cruise_depth_values),
43
- len(ping_times),
44
- len(input_xr.frequency_nominal)
45
- ), dtype=self.dtype)
42
+ data = np.empty(
43
+ (
44
+ len(all_cruise_depth_values),
45
+ len(ping_times),
46
+ len(input_xr.frequency_nominal),
47
+ ),
48
+ dtype=self.dtype,
49
+ )
46
50
 
47
51
  data[:] = np.nan
48
52
 
@@ -53,37 +57,60 @@ class ResampleRegrid:
53
57
  "depth": all_cruise_depth_values,
54
58
  "time": ping_times,
55
59
  "frequency": input_xr.frequency_nominal.values,
56
- }
60
+ },
57
61
  )
58
62
 
59
63
  channels = input_xr.channel.values
60
- for channel in range(len(channels)): # TODO: leaving off here, need to subset for just indices in time axis
61
- print(np.nanmax(input_xr.echo_range.sel(channel=input_xr.channel[channel]).values))
64
+ for channel in range(
65
+ len(channels)
66
+ ): # TODO: leaving off here, need to subset for just indices in time axis
67
+ print(
68
+ np.nanmax(
69
+ input_xr.echo_range.sel(
70
+ channel=input_xr.channel[channel]
71
+ ).values
72
+ )
73
+ )
62
74
  #
63
75
  max_depths = np.nanmax(
64
76
  a=input_xr.echo_range.sel(channel=input_xr.channel[channel]).values,
65
- axis=1
77
+ axis=1,
66
78
  )
67
79
  superset_of_max_depths = set(
68
- np.nanmax(input_xr.echo_range.sel(channel=input_xr.channel[channel]).values, 1)
80
+ np.nanmax(
81
+ input_xr.echo_range.sel(
82
+ channel=input_xr.channel[channel]
83
+ ).values,
84
+ 1,
85
+ )
69
86
  )
70
- set_of_max_depths = list({x for x in superset_of_max_depths if x == x}) # removes nan's
87
+ set_of_max_depths = list(
88
+ {x for x in superset_of_max_depths if x == x}
89
+ ) # removes nan's
71
90
  # iterate through partitions of data with similar depths and resample
72
91
  for select_max_depth in set_of_max_depths:
73
92
  # TODO: for nan just skip and leave all nan's
74
- select_indices = [i for i in range(0, len(max_depths)) if max_depths[i] == select_max_depth]
93
+ select_indices = [
94
+ i
95
+ for i in range(0, len(max_depths))
96
+ if max_depths[i] == select_max_depth
97
+ ]
75
98
 
76
99
  # now create new DataArray with proper dimension and indices
77
100
  # data_select = input_xr.Sv.sel(
78
101
  # channel=input_xr.channel[channel]
79
102
  # ).values[select_indices, :].T # TODO: dont like this transpose
80
- data_select = input_xr.Sv.sel(channel=input_xr.channel[channel])[select_indices, :].T.values
103
+ data_select = input_xr.Sv.sel(channel=input_xr.channel[channel])[
104
+ select_indices, :
105
+ ].T.values
81
106
  # change from ".values[select_indices, :].T" to "[select_indices, :].values.T"
82
107
 
83
108
  times_select = input_xr.ping_time.values[select_indices]
84
109
  depths_select = input_xr.echo_range.sel(
85
110
  channel=input_xr.channel[channel]
86
- ).values[select_indices[0], :] # '0' because all others in group should be same
111
+ ).values[
112
+ select_indices[0], :
113
+ ] # '0' because all others in group should be same
87
114
 
88
115
  da_select = xr.DataArray(
89
116
  data=data_select,
@@ -91,27 +118,32 @@ class ResampleRegrid:
91
118
  coords={
92
119
  "depth": depths_select,
93
120
  "time": times_select,
94
- }
95
- ).dropna(dim='depth')
96
- resampled = da_select.interp(depth=all_cruise_depth_values, method="nearest")
121
+ },
122
+ ).dropna(dim="depth")
123
+ resampled = da_select.interp(
124
+ depth=all_cruise_depth_values, method="nearest"
125
+ )
97
126
  # write to the resample array
98
127
  regrid_resample.loc[
99
- dict(time=times_select, frequency=input_xr.frequency_nominal.values[channel])
128
+ dict(
129
+ time=times_select,
130
+ frequency=input_xr.frequency_nominal.values[channel],
131
+ )
100
132
  ] = resampled
101
133
  print(f"updated {len(times_select)} ping times")
102
134
  except Exception as err:
103
- print(f'Problem finding the dynamodb table: {err}')
135
+ print(f"Problem finding the dynamodb table: {err}")
104
136
  raise err
105
137
  print("Done interpolating data.")
106
138
  return regrid_resample
107
139
 
108
140
  #################################################################
109
141
  def resample_regrid(
110
- self,
111
- ship_name,
112
- cruise_name,
113
- sensor_name,
114
- table_name,
142
+ self,
143
+ ship_name,
144
+ cruise_name,
145
+ sensor_name,
146
+ table_name,
115
147
  ) -> None:
116
148
  """
117
149
  The goal here is to interpolate the data against the depth values already populated
@@ -144,7 +176,7 @@ class ResampleRegrid:
144
176
  #########################################################
145
177
  #########################################################
146
178
  # TODO: iterate files here
147
- all_file_names = cruise_df['FILE_NAME']
179
+ all_file_names = cruise_df["FILE_NAME"]
148
180
  for file_name in all_file_names:
149
181
  gc.collect()
150
182
  file_name_stem = Path(file_name).stem
@@ -158,7 +190,9 @@ class ResampleRegrid:
158
190
  # df[df['PIPELINE_STATUS'] < PipelineStatus.LEVEL_1_PROCESSING] = np.nan
159
191
 
160
192
  # Get index from all cruise files. Note: should be based on which are included in cruise.
161
- index = cruise_df.index[cruise_df['FILE_NAME'] == f"{file_name_stem}.raw"][0]
193
+ index = cruise_df.index[
194
+ cruise_df["FILE_NAME"] == f"{file_name_stem}.raw"
195
+ ][0]
162
196
 
163
197
  # get input store
164
198
  input_xr_zarr_store = zarr_manager.open_s3_zarr_store_with_xarray(
@@ -171,29 +205,38 @@ class ResampleRegrid:
171
205
  # [3] Get needed indices
172
206
  # Offset from start index to insert new data. Note that missing values are excluded.
173
207
  ping_time_cumsum = np.insert(
174
- np.cumsum(cruise_df['NUM_PING_TIME_DROPNA'].dropna().to_numpy(dtype=int)),
208
+ np.cumsum(
209
+ cruise_df["NUM_PING_TIME_DROPNA"].dropna().to_numpy(dtype=int)
210
+ ),
175
211
  obj=0,
176
- values=0
212
+ values=0,
177
213
  )
178
214
  start_ping_time_index = ping_time_cumsum[index]
179
215
  end_ping_time_index = ping_time_cumsum[index + 1]
180
216
 
181
- min_echo_range = np.nanmin(np.float32(cruise_df['MIN_ECHO_RANGE']))
182
- max_echo_range = np.nanmax(np.float32(cruise_df['MAX_ECHO_RANGE']))
217
+ min_echo_range = np.nanmin(np.float32(cruise_df["MIN_ECHO_RANGE"]))
218
+ max_echo_range = np.nanmax(np.float32(cruise_df["MAX_ECHO_RANGE"]))
183
219
 
184
- print("Creating empty ndarray for Sv data.") # Note: cruise_zarr dimensions are (depth, time, frequency)
220
+ print(
221
+ "Creating empty ndarray for Sv data."
222
+ ) # Note: cruise_zarr dimensions are (depth, time, frequency)
185
223
  cruise_sv_subset = np.empty(
186
- shape=output_zarr_store.Sv[:, start_ping_time_index:end_ping_time_index, :].shape
224
+ shape=output_zarr_store.Sv[
225
+ :, start_ping_time_index:end_ping_time_index, :
226
+ ].shape
187
227
  )
188
228
  cruise_sv_subset[:, :, :] = np.nan # (5208, 9778, 4)
189
229
 
190
230
  all_cruise_depth_values = zarr_manager.get_depth_values(
191
- min_echo_range=min_echo_range,
192
- max_echo_range=max_echo_range
231
+ min_echo_range=min_echo_range, max_echo_range=max_echo_range
193
232
  )
194
233
 
195
234
  print(" ".join(list(input_xr_zarr_store.Sv.dims)))
196
- if set(input_xr_zarr_store.Sv.dims) != {'channel', 'ping_time', 'range_sample'}:
235
+ if set(input_xr_zarr_store.Sv.dims) != {
236
+ "channel",
237
+ "ping_time",
238
+ "range_sample",
239
+ }:
197
240
  raise Exception("Xarray dimensions are not as expected.")
198
241
 
199
242
  # get geojson
@@ -209,8 +252,13 @@ class ResampleRegrid:
209
252
 
210
253
  ping_times = input_xr.ping_time.values
211
254
  # Date format: numpy.datetime64('2007-07-20T02:10:25.845073920') converts to "1184897425.845074"
212
- epoch_seconds = [(pd.Timestamp(i) - pd.Timestamp('1970-01-01')) / pd.Timedelta('1s') for i in ping_times]
213
- output_zarr_store.time[start_ping_time_index:end_ping_time_index] = epoch_seconds
255
+ epoch_seconds = [
256
+ (pd.Timestamp(i) - pd.Timestamp("1970-01-01")) / pd.Timedelta("1s")
257
+ for i in ping_times
258
+ ]
259
+ output_zarr_store.time[start_ping_time_index:end_ping_time_index] = (
260
+ epoch_seconds
261
+ )
214
262
 
215
263
  # --- UPDATING --- #
216
264
 
@@ -220,30 +268,33 @@ class ResampleRegrid:
220
268
  all_cruise_depth_values=all_cruise_depth_values,
221
269
  )
222
270
 
223
- print(f"start_ping_time_index: {start_ping_time_index}, end_ping_time_index: {end_ping_time_index}")
271
+ print(
272
+ f"start_ping_time_index: {start_ping_time_index}, end_ping_time_index: {end_ping_time_index}"
273
+ )
224
274
 
225
275
  #########################################################################
226
276
  # write Sv values to cruise-level-model-store
227
- for channel in range(len(input_xr.channel.values)): # doesn't like being written in one fell swoop :(
277
+ for channel in range(
278
+ len(input_xr.channel.values)
279
+ ): # doesn't like being written in one fell swoop :(
228
280
  output_zarr_store.Sv[
229
- :,
230
- start_ping_time_index:end_ping_time_index,
231
- channel
281
+ :, start_ping_time_index:end_ping_time_index, channel
232
282
  ] = regrid_resample[:, :, channel]
233
283
 
234
284
  #########################################################################
235
285
  # [5] write subset of latitude/longitude
236
- output_zarr_store.latitude[start_ping_time_index:end_ping_time_index] = geospatial.dropna()[
237
- 'latitude'
238
- ].values
239
- output_zarr_store.longitude[start_ping_time_index:end_ping_time_index] = geospatial.dropna()[
240
- 'longitude'
241
- ].values
286
+ output_zarr_store.latitude[
287
+ start_ping_time_index:end_ping_time_index
288
+ ] = geospatial.dropna()["latitude"].values
289
+ output_zarr_store.longitude[
290
+ start_ping_time_index:end_ping_time_index
291
+ ] = geospatial.dropna()["longitude"].values
242
292
  except Exception as err:
243
- print(f'Problem interpolating the data: {err}')
293
+ print(f"Problem interpolating the data: {err}")
244
294
  raise err
245
295
  print("Done interpolating data.")
246
296
 
247
297
  #######################################################
248
298
 
299
+
249
300
  ###########################################################
@@ -0,0 +1,5 @@
1
+ from .geometry_manager import GeometryManager
2
+ from .geometry_simplification import GeometrySimplification
3
+ from .pmtile_generation import PMTileGeneration
4
+
5
+ __all__ = ["GeometryManager", "GeometrySimplification", "PMTileGeneration"]
@@ -1,10 +1,11 @@
1
1
  from pathlib import Path
2
- import numpy as np
2
+
3
3
  import geopandas
4
+ import numpy as np
4
5
  import pandas as pd
5
6
 
6
- from water_column_sonar_processing.utility.cleaner import Cleaner
7
7
  from water_column_sonar_processing.aws.s3_manager import S3Manager
8
+ from water_column_sonar_processing.utility.cleaner import Cleaner
8
9
 
9
10
  """
10
11
  // [Decimal / Places / Degrees / Object that can be recognized at scale / N/S or E/W at equator, E/W at 23N/S, E/W at 45N/S, E/W at 67N/S]
@@ -22,28 +23,32 @@ from water_column_sonar_processing.aws.s3_manager import S3Manager
22
23
  class GeometryManager:
23
24
  #######################################################
24
25
  def __init__(
25
- self,
26
+ self,
26
27
  ):
27
28
  self.DECIMAL_PRECISION = 5 # precision for GPS coordinates
28
29
  self.SIMPLIFICATION_TOLERANCE = 0.0001 # RDP simplification to street level
29
30
 
30
31
  #######################################################
31
32
  def read_echodata_gps_data(
32
- self,
33
- echodata,
34
- ship_name,
35
- cruise_name,
36
- sensor_name,
37
- file_name,
38
- write_geojson=True,
33
+ self,
34
+ echodata,
35
+ ship_name,
36
+ cruise_name,
37
+ sensor_name,
38
+ file_name,
39
+ write_geojson=True,
39
40
  ) -> tuple:
40
41
  file_name_stem = Path(file_name).stem
41
42
  geo_json_name = f"{file_name_stem}.json"
42
43
 
43
- print('Getting GPS data from echopype object.')
44
+ print("Getting GPS data from echopype object.")
44
45
  try:
45
- latitude = np.round(echodata.platform.latitude.values, self.DECIMAL_PRECISION)
46
- longitude = np.round(echodata.platform.longitude.values, self.DECIMAL_PRECISION)
46
+ latitude = np.round(
47
+ echodata.platform.latitude.values, self.DECIMAL_PRECISION
48
+ )
49
+ longitude = np.round(
50
+ echodata.platform.longitude.values, self.DECIMAL_PRECISION
51
+ )
47
52
 
48
53
  # RE: time coordinates: https://github.com/OSOceanAcoustics/echopype/issues/656#issue-1219104771
49
54
  # 'nmea_times' are times from the nmea datalogger associated with GPS
@@ -54,10 +59,15 @@ class GeometryManager:
54
59
  time1 = echodata.environment.time1.values
55
60
 
56
61
  if len(nmea_times) < len(time1):
57
- raise Exception("Problem: Not enough NMEA times available to extrapolate time1.")
62
+ raise Exception(
63
+ "Problem: Not enough NMEA times available to extrapolate time1."
64
+ )
58
65
 
59
66
  # Align 'sv_times' to 'nmea_times'
60
- if not (np.all(time1[:-1] <= time1[1:]) and np.all(nmea_times[:-1] <= nmea_times[1:])):
67
+ if not (
68
+ np.all(time1[:-1] <= time1[1:])
69
+ and np.all(nmea_times[:-1] <= nmea_times[1:])
70
+ ):
61
71
  raise Exception("Problem: NMEA times are not sorted.")
62
72
 
63
73
  # Finds the indices where 'v' can be inserted just to the right of 'a'
@@ -67,65 +77,83 @@ class GeometryManager:
67
77
  lon = longitude[indices]
68
78
  lon[indices < 0] = np.nan
69
79
 
70
- if not (np.all(lat[~np.isnan(lat)] >= -90.) and np.all(lat[~np.isnan(lat)] <= 90.) and np.all(lon[~np.isnan(lon)] >= -180.) and np.all(lon[~np.isnan(lon)] <= 180.)):
80
+ if not (
81
+ np.all(lat[~np.isnan(lat)] >= -90.0)
82
+ and np.all(lat[~np.isnan(lat)] <= 90.0)
83
+ and np.all(lon[~np.isnan(lon)] >= -180.0)
84
+ and np.all(lon[~np.isnan(lon)] <= 180.0)
85
+ ):
71
86
  raise Exception("Problem: GPS Data falls outside allowed bounds.")
72
87
 
73
88
  # check for visits to null island
74
89
  null_island_indices = list(
75
- set.intersection(set(np.where(np.abs(lat) < 1e-3)[0]), set(np.where(np.abs(lon) < 1e-3)[0]))
90
+ set.intersection(
91
+ set(np.where(np.abs(lat) < 1e-3)[0]),
92
+ set(np.where(np.abs(lon) < 1e-3)[0]),
93
+ )
76
94
  )
77
95
  lat[null_island_indices] = np.nan
78
96
  lon[null_island_indices] = np.nan
79
97
 
80
98
  # create requirement for minimum linestring size
81
- MIN_ALLOWED_SIZE = 4 # don't want to process files with less than 4 data points
82
- if len(lat[~np.isnan(lat)]) < MIN_ALLOWED_SIZE or len(lon[~np.isnan(lon)]) < MIN_ALLOWED_SIZE:
99
+ MIN_ALLOWED_SIZE = (
100
+ 4 # don't want to process files with less than 4 data points
101
+ )
102
+ if (
103
+ len(lat[~np.isnan(lat)]) < MIN_ALLOWED_SIZE
104
+ or len(lon[~np.isnan(lon)]) < MIN_ALLOWED_SIZE
105
+ ):
83
106
  raise Exception(
84
107
  f"There was not enough data in lat or lon to create geojson, {len(lat[~np.isnan(lat)])} found, less than {MIN_ALLOWED_SIZE}."
85
108
  )
86
109
 
87
110
  # https://osoceanacoustics.github.io/echopype-examples/echopype_tour.html
88
- gps_df = pd.DataFrame({
89
- 'latitude': lat,
90
- 'longitude': lon,
91
- 'time': time1
92
- }).set_index(['time']).fillna(0)
111
+ gps_df = (
112
+ pd.DataFrame({"latitude": lat, "longitude": lon, "time": time1})
113
+ .set_index(["time"])
114
+ .fillna(0)
115
+ )
93
116
 
94
117
  # Note: We set np.nan to 0,0 so downstream missing values can be omitted
95
118
  gps_gdf = geopandas.GeoDataFrame(
96
119
  gps_df,
97
120
  geometry=geopandas.points_from_xy(
98
- gps_df['longitude'],
99
- gps_df['latitude']
121
+ gps_df["longitude"], gps_df["latitude"]
100
122
  ),
101
- crs="epsg:4326"
123
+ crs="epsg:4326",
102
124
  )
103
125
  # Note: We set np.nan to 0,0 so downstream missing values can be omitted
104
126
 
105
127
  geo_json_line = gps_gdf.to_json()
106
128
  if write_geojson:
107
- print('Creating local copy of geojson file.')
129
+ print("Creating local copy of geojson file.")
108
130
  with open(geo_json_name, "w") as write_file:
109
131
  write_file.write(geo_json_line)
110
132
 
111
- geo_json_prefix = f"spatial/geojson/{ship_name}/{cruise_name}/{sensor_name}"
133
+ geo_json_prefix = (
134
+ f"spatial/geojson/{ship_name}/{cruise_name}/{sensor_name}"
135
+ )
112
136
 
113
- print('Checking s3 and deleting any existing GeoJSON file.')
137
+ print("Checking s3 and deleting any existing GeoJSON file.")
114
138
  s3_manager = S3Manager()
115
- s3_objects = s3_manager.list_nodd_objects(prefix=f"{geo_json_prefix}/{geo_json_name}")
139
+ s3_objects = s3_manager.list_nodd_objects(
140
+ prefix=f"{geo_json_prefix}/{geo_json_name}"
141
+ )
116
142
  if len(s3_objects) > 0:
117
- print('GeoJSON already exists in s3, deleting existing and continuing.')
143
+ print(
144
+ "GeoJSON already exists in s3, deleting existing and continuing."
145
+ )
118
146
  s3_manager.delete_nodd_objects(objects=s3_objects)
119
147
 
120
- print('Upload GeoJSON to s3.')
148
+ print("Upload GeoJSON to s3.")
121
149
  s3_manager.upload_nodd_file(
122
150
  file_name=geo_json_name, # file_name
123
- key=f"{geo_json_prefix}/{geo_json_name}" # key
151
+ key=f"{geo_json_prefix}/{geo_json_name}", # key
124
152
  )
125
153
 
126
154
  # TODO: delete geo_json file
127
155
  cleaner = Cleaner()
128
- cleaner.delete_local_files(file_types=['*.json'])
156
+ cleaner.delete_local_files(file_types=["*.json"])
129
157
 
130
158
  #################################################################
131
159
  # TODO: simplify with shapely
@@ -144,7 +172,9 @@ class GeometryManager:
144
172
  #################################################################
145
173
  # GeoJSON FeatureCollection with IDs as "time"
146
174
  except Exception as err:
147
- print(f'Exception encountered extracting gps coordinates creating geojson: {err}')
175
+ print(
176
+ f"Exception encountered extracting gps coordinates creating geojson: {err}"
177
+ )
148
178
  raise
149
179
  # Note: returned lat/lon values can include np.nan because they need to be aligned with
150
180
  # the Sv data! GeoJSON needs simplification but has been filtered.
@@ -154,12 +184,12 @@ class GeometryManager:
154
184
 
155
185
  #######################################################
156
186
  def read_s3_geo_json(
157
- self,
158
- ship_name,
159
- cruise_name,
160
- sensor_name,
161
- file_name_stem,
162
- input_xr_zarr_store,
187
+ self,
188
+ ship_name,
189
+ cruise_name,
190
+ sensor_name,
191
+ file_name_stem,
192
+ input_xr_zarr_store,
163
193
  ):
164
194
  try:
165
195
  s3_manager = S3Manager()
@@ -170,25 +200,26 @@ class GeometryManager:
170
200
  file_name_stem=file_name_stem,
171
201
  )
172
202
  ###
173
- geospatial = geopandas.GeoDataFrame.from_features(geo_json['features']).set_index(
174
- pd.json_normalize(geo_json["features"])["id"].values
175
- )
203
+ geospatial = geopandas.GeoDataFrame.from_features(
204
+ geo_json["features"]
205
+ ).set_index(pd.json_normalize(geo_json["features"])["id"].values)
176
206
  null_island_indices = list(
177
207
  set.intersection(
178
208
  set(np.where(np.abs(geospatial.latitude.values) < 1e-3)[0]),
179
- set(np.where(np.abs(geospatial.longitude.values) < 1e-3)[0])
209
+ set(np.where(np.abs(geospatial.longitude.values) < 1e-3)[0]),
180
210
  )
181
211
  )
182
212
  geospatial.iloc[null_island_indices] = np.nan
183
213
  ###
184
- geospatial_index = geospatial.dropna().index.values.astype('datetime64[ns]')
214
+ geospatial_index = geospatial.dropna().index.values.astype("datetime64[ns]")
185
215
  aa = input_xr_zarr_store.ping_time.values.tolist()
186
216
  vv = geospatial_index.tolist()
187
217
  indices = np.searchsorted(a=aa, v=vv)
188
218
 
189
219
  return indices, geospatial
190
220
  except Exception as err: # Failure
191
- print(f'Exception encountered reading s3 GeoJSON: {err}')
221
+ print(f"Exception encountered reading s3 GeoJSON: {err}")
192
222
  raise
193
223
 
224
+
194
225
  ###########################################################
@@ -31,51 +31,52 @@
31
31
 
32
32
 
33
33
  """
34
- class GeometrySimplification(object):
34
+
35
+
36
+ class GeometrySimplification:
35
37
  # TODO: in the future move to standalone library
36
38
  #######################################################
37
39
  def __init__(
38
- self,
40
+ self,
39
41
  ):
40
42
  pass
41
43
 
42
44
  #######################################################
43
45
  def speed_check(
44
- self,
45
- speed_knots=50,
46
+ self,
47
+ speed_knots=50,
46
48
  ) -> None:
47
49
  print(speed_knots)
48
50
  pass
49
51
 
50
52
  def remove_null_island_values(
51
- self,
52
- epsilon=1e-5,
53
+ self,
54
+ epsilon=1e-5,
53
55
  ) -> None:
54
56
  print(epsilon)
55
57
  pass
56
58
 
57
59
  def stream_geometry(
58
- self,
60
+ self,
59
61
  ) -> None:
60
62
  pass
61
63
 
62
64
  def break_linestring_into_multi_linestring(
63
- self,
65
+ self,
64
66
  ) -> None:
65
67
  # For any line-strings across the antimeridian, break into multilinestring
66
68
  pass
67
69
 
68
70
  def simplify(
69
- self,
71
+ self,
70
72
  ) -> None:
71
73
  pass
72
74
 
73
- def kalman_filter(
74
- self
75
- ):
75
+ def kalman_filter(self):
76
76
  # for cruises with bad signal, filter so that
77
77
  pass
78
78
 
79
79
  #######################################################
80
80
 
81
+
81
82
  ###########################################################