water-column-sonar-processing 0.0.1__py3-none-any.whl → 26.1.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of water-column-sonar-processing might be problematic. Click here for more details.

Files changed (60) hide show
  1. water_column_sonar_processing/__init__.py +13 -0
  2. water_column_sonar_processing/aws/__init__.py +7 -0
  3. water_column_sonar_processing/aws/dynamodb_manager.py +355 -0
  4. water_column_sonar_processing/aws/s3_manager.py +418 -0
  5. water_column_sonar_processing/aws/s3fs_manager.py +64 -0
  6. {model → water_column_sonar_processing}/aws/sns_manager.py +10 -21
  7. {model → water_column_sonar_processing}/aws/sqs_manager.py +11 -19
  8. water_column_sonar_processing/cruise/__init__.py +4 -0
  9. water_column_sonar_processing/cruise/create_empty_zarr_store.py +129 -0
  10. water_column_sonar_processing/cruise/datatree_manager.py +21 -0
  11. water_column_sonar_processing/cruise/resample_regrid.py +323 -0
  12. water_column_sonar_processing/geometry/__init__.py +13 -0
  13. water_column_sonar_processing/geometry/elevation_manager.py +111 -0
  14. water_column_sonar_processing/geometry/geometry_manager.py +241 -0
  15. water_column_sonar_processing/geometry/line_simplification.py +176 -0
  16. water_column_sonar_processing/geometry/pmtile_generation.py +266 -0
  17. water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
  18. water_column_sonar_processing/index/__init__.py +3 -0
  19. water_column_sonar_processing/index/index_manager.py +381 -0
  20. water_column_sonar_processing/model/__init__.py +3 -0
  21. water_column_sonar_processing/model/zarr_manager.py +741 -0
  22. water_column_sonar_processing/processing/__init__.py +4 -0
  23. water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
  24. water_column_sonar_processing/processing/raw_to_zarr.py +331 -0
  25. water_column_sonar_processing/utility/__init__.py +13 -0
  26. {model → water_column_sonar_processing}/utility/cleaner.py +7 -7
  27. water_column_sonar_processing/utility/constants.py +118 -0
  28. {model → water_column_sonar_processing}/utility/pipeline_status.py +47 -24
  29. water_column_sonar_processing/utility/timestamp.py +12 -0
  30. water_column_sonar_processing-26.1.14.dist-info/METADATA +240 -0
  31. water_column_sonar_processing-26.1.14.dist-info/RECORD +34 -0
  32. {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-26.1.14.dist-info}/WHEEL +1 -1
  33. {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-26.1.14.dist-info/licenses}/LICENSE +1 -1
  34. water_column_sonar_processing-26.1.14.dist-info/top_level.txt +1 -0
  35. __init__.py +0 -0
  36. model/__init__.py +0 -0
  37. model/aws/__init__.py +0 -0
  38. model/aws/dynamodb_manager.py +0 -149
  39. model/aws/s3_manager.py +0 -356
  40. model/aws/s3fs_manager.py +0 -74
  41. model/cruise/__init__.py +0 -0
  42. model/cruise/create_empty_zarr_store.py +0 -166
  43. model/cruise/resample_regrid.py +0 -248
  44. model/geospatial/__init__.py +0 -0
  45. model/geospatial/geometry_manager.py +0 -194
  46. model/geospatial/geometry_simplification.py +0 -81
  47. model/geospatial/pmtile_generation.py +0 -74
  48. model/index/__init__.py +0 -0
  49. model/index/index.py +0 -228
  50. model/model.py +0 -138
  51. model/utility/__init__.py +0 -0
  52. model/utility/constants.py +0 -56
  53. model/utility/timestamp.py +0 -12
  54. model/zarr/__init__.py +0 -0
  55. model/zarr/bar.py +0 -28
  56. model/zarr/foo.py +0 -11
  57. model/zarr/zarr_manager.py +0 -298
  58. water_column_sonar_processing-0.0.1.dist-info/METADATA +0 -89
  59. water_column_sonar_processing-0.0.1.dist-info/RECORD +0 -32
  60. water_column_sonar_processing-0.0.1.dist-info/top_level.txt +0 -2
@@ -1,166 +0,0 @@
1
- import os
2
- import numcodecs
3
- import numpy as np
4
- from ..utility.cleaner import Cleaner
5
- from ..aws.dynamodb_manager import DynamoDBManager
6
- from ..aws.s3_manager import S3Manager
7
- from ..zarr.zarr_manager import ZarrManager
8
-
9
- numcodecs.blosc.use_threads = False
10
- numcodecs.blosc.set_nthreads(1)
11
-
12
- TEMPDIR = "/tmp"
13
-
14
- # TODO: when ready switch to version 3 of zarr spec
15
- # ZARR_V3_EXPERIMENTAL_API = 1
16
- # creates the latlon data: foo = ep.consolidate.add_location(ds_Sv, echodata)
17
-
18
- class CreateEmptyZarrStore:
19
- #######################################################
20
- def __init__(
21
- self,
22
- ):
23
- self.__overwrite = True
24
- # TODO: create output_bucket and input_bucket variables here?
25
- self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
26
- self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
27
-
28
- #######################################################
29
-
30
- def upload_zarr_store_to_s3(
31
- self,
32
- local_directory: str,
33
- object_prefix: str,
34
- cruise_name: str,
35
- ) -> None:
36
- print('uploading zarr store to s3')
37
- s3_manager = S3Manager()
38
- #
39
- print('Starting upload with thread pool executor.')
40
- # # 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
41
- all_files = []
42
- for subdir, dirs, files in os.walk(f"{local_directory}/{cruise_name}.zarr"):
43
- for file in files:
44
- local_path = os.path.join(subdir, file)
45
- # 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.zarr/.zattrs'
46
- s3_key = f'{object_prefix}/{cruise_name}.zarr{local_path.split(f"{cruise_name}.zarr")[-1]}'
47
- all_files.append([local_path, s3_key])
48
- #
49
- # print(all_files)
50
- s3_manager.upload_files_with_thread_pool_executor(
51
- all_files=all_files,
52
- )
53
- print('Done uploading with thread pool executor.')
54
- # TODO: move to common place
55
-
56
- #######################################################
57
- def create_cruise_level_zarr_store(
58
- self,
59
- ship_name: str,
60
- cruise_name: str,
61
- sensor_name: str,
62
- table_name: str
63
- ) -> None:
64
- try:
65
- # HB0806 - 123, HB0903 - 220
66
- dynamo_db_manager = DynamoDBManager()
67
-
68
- df = dynamo_db_manager.get_table_as_df(
69
- table_name=table_name,
70
- ship_name=ship_name,
71
- cruise_name=cruise_name,
72
- sensor_name=sensor_name
73
- )
74
-
75
- # filter the dataframe just for enums >= LEVEL_1_PROCESSING
76
- # df[df['PIPELINE_STATUS'] < PipelineStatus.LEVEL_1_PROCESSING] = np.nan
77
-
78
- # TODO: VERIFY GEOJSON EXISTS as prerequisite!!!
79
-
80
- print(f"DataFrame shape: {df.shape}")
81
- cruise_channels = list(set([i for sublist in df['CHANNELS'].dropna() for i in sublist]))
82
- cruise_channels.sort()
83
-
84
- consolidated_zarr_width = np.sum(df['NUM_PING_TIME_DROPNA'].dropna().astype(int))
85
-
86
- # [3] calculate the max/min measurement resolutions for the whole cruise
87
- cruise_min_echo_range = float(np.min(df['MIN_ECHO_RANGE'].dropna().astype(float)))
88
-
89
- # [4] calculate the maximum of the max depth values
90
- cruise_max_echo_range = float(np.max(df['MAX_ECHO_RANGE'].dropna().astype(float)))
91
- print(f"cruise_min_echo_range: {cruise_min_echo_range}, cruise_max_echo_range: {cruise_max_echo_range}")
92
-
93
- # [5] get number of channels
94
- cruise_frequencies = [float(i) for i in df['FREQUENCIES'].dropna().values.flatten()[0]]
95
- print(cruise_frequencies)
96
-
97
- new_width = int(consolidated_zarr_width)
98
- print(f"new_width: {new_width}")
99
- #################################################################
100
- store_name = f"{cruise_name}.zarr"
101
- print(store_name)
102
- ################################################################
103
- # Delete existing zarr store if it exists
104
- s3_manager = S3Manager()
105
- zarr_prefix = os.path.join("level_2", ship_name, cruise_name, sensor_name)
106
- child_objects = s3_manager.get_child_objects(
107
- bucket_name=self.output_bucket_name,
108
- sub_prefix=zarr_prefix,
109
- )
110
- if len(child_objects) > 0:
111
- s3_manager.delete_nodd_objects(
112
- objects=child_objects,
113
- )
114
- ################################################################
115
- # Create new zarr store
116
- zarr_manager = ZarrManager()
117
- new_height = len(zarr_manager.get_depth_values(
118
- min_echo_range=cruise_min_echo_range,
119
- max_echo_range=cruise_max_echo_range
120
- ))
121
- print(f"new_height: {new_height}")
122
-
123
- zarr_manager.create_zarr_store(
124
- path=TEMPDIR,
125
- ship_name=ship_name,
126
- cruise_name=cruise_name,
127
- sensor_name=sensor_name,
128
- frequencies=cruise_frequencies,
129
- width=new_width,
130
- min_echo_range=cruise_min_echo_range,
131
- max_echo_range=cruise_max_echo_range,
132
- calibration_status=True,
133
- )
134
- #################################################################
135
- self.upload_zarr_store_to_s3(
136
- local_directory=TEMPDIR,
137
- object_prefix=zarr_prefix,
138
- cruise_name=cruise_name,
139
- )
140
- # https://noaa-wcsd-zarr-pds.s3.amazonaws.com/index.html
141
- #################################################################
142
- # Verify count of the files uploaded
143
- # count = self.__get_file_count(store_name=store_name)
144
- # #
145
- # raw_zarr_files = self.__get_s3_files( # TODO: just need count
146
- # bucket_name=self.__output_bucket,
147
- # sub_prefix=os.path.join(zarr_prefix, store_name),
148
- # )
149
- # if len(raw_zarr_files) != count:
150
- # print(f'Problem writing {store_name} with proper count {count}.')
151
- # raise Exception("File count doesnt equal number of s3 Zarr store files.")
152
- # else:
153
- # print("File counts match.")
154
- #################################################################
155
- # Success
156
- # TODO: update enum in dynamodb
157
- #################################################################
158
- except Exception as err:
159
- print(f"Problem trying to create new cruise zarr store: {err}")
160
- finally:
161
- cleaner = Cleaner()
162
- cleaner.delete_local_files()
163
- print("Done creating cruise level zarr store")
164
-
165
-
166
- ###########################################################
@@ -1,248 +0,0 @@
1
- import gc
2
- import os
3
- from pathlib import Path
4
- import numcodecs
5
- import numpy as np
6
- import xarray as xr
7
- import pandas as pd
8
-
9
- from ..geospatial.geometry_manager import GeoManager
10
- from ..aws.dynamodb_manager import DynamoDBManager
11
- from ..zarr.zarr_manager import ZarrManager
12
-
13
- numcodecs.blosc.use_threads = False
14
- numcodecs.blosc.set_nthreads(1)
15
-
16
-
17
- # TODO: when ready switch to version 3 of zarr spec
18
- # ZARR_V3_EXPERIMENTAL_API = 1
19
- # creates the latlon data: foo = ep.consolidate.add_location(ds_Sv, echodata)
20
-
21
- class ResampleRegrid:
22
- #######################################################
23
- def __init__(
24
- self,
25
- ):
26
- self.__overwrite = True
27
- self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
28
- self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
29
- self.dtype = 'float32'
30
-
31
- #################################################################
32
- def interpolate_data(
33
- self,
34
- input_xr,
35
- ping_times,
36
- all_cruise_depth_values,
37
- ) -> np.ndarray:
38
- print("Interpolating data.")
39
- try:
40
- data = np.empty((
41
- len(all_cruise_depth_values),
42
- len(ping_times),
43
- len(input_xr.frequency_nominal)
44
- ), dtype=self.dtype)
45
-
46
- data[:] = np.nan
47
-
48
- regrid_resample = xr.DataArray(
49
- data=data,
50
- dims=("depth", "time", "frequency"),
51
- coords={
52
- "depth": all_cruise_depth_values,
53
- "time": ping_times,
54
- "frequency": input_xr.frequency_nominal.values,
55
- }
56
- )
57
-
58
- channels = input_xr.channel.values
59
- for channel in range(len(channels)): # TODO: leaving off here, need to subset for just indices in time axis
60
- print(np.nanmax(input_xr.echo_range.sel(channel=input_xr.channel[channel]).values))
61
- #
62
- max_depths = np.nanmax(
63
- a=input_xr.echo_range.sel(channel=input_xr.channel[channel]).values,
64
- axis=1
65
- )
66
- superset_of_max_depths = set(
67
- np.nanmax(input_xr.echo_range.sel(channel=input_xr.channel[channel]).values, 1)
68
- )
69
- set_of_max_depths = list({x for x in superset_of_max_depths if x == x}) # removes nan's
70
- # iterate through partitions of data with similar depths and resample
71
- for select_max_depth in set_of_max_depths:
72
- # TODO: for nan just skip and leave all nan's
73
- select_indices = [i for i in range(0, len(max_depths)) if max_depths[i] == select_max_depth]
74
-
75
- # now create new DataArray with proper dimension and indices
76
- # data_select = input_xr.Sv.sel(
77
- # channel=input_xr.channel[channel]
78
- # ).values[select_indices, :].T # TODO: dont like this transpose
79
- data_select = input_xr.Sv.sel(channel=input_xr.channel[channel])[select_indices, :].T.values
80
- # change from ".values[select_indices, :].T" to "[select_indices, :].values.T"
81
-
82
- times_select = input_xr.ping_time.values[select_indices]
83
- depths_select = input_xr.echo_range.sel(
84
- channel=input_xr.channel[channel]
85
- ).values[select_indices[0], :] # '0' because all others in group should be same
86
-
87
- da_select = xr.DataArray(
88
- data=data_select,
89
- dims=("depth", "time"),
90
- coords={
91
- "depth": depths_select,
92
- "time": times_select,
93
- }
94
- ).dropna(dim='depth')
95
- resampled = da_select.interp(depth=all_cruise_depth_values, method="nearest")
96
- # write to the resample array
97
- regrid_resample.loc[
98
- dict(time=times_select, frequency=input_xr.frequency_nominal.values[channel])
99
- ] = resampled
100
- print(f"updated {len(times_select)} ping times")
101
- except Exception as err:
102
- print(f'Problem finding the dynamodb table: {err}')
103
- raise err
104
- print("Done interpolating data.")
105
- return regrid_resample
106
-
107
- #################################################################
108
- def resample_regrid(
109
- self,
110
- ship_name,
111
- cruise_name,
112
- sensor_name,
113
- table_name,
114
- ) -> None:
115
- """
116
- The goal here is to interpolate the data against the depth values already populated
117
- in the existing file level zarr stores. We open the cruise-level store with zarr for
118
- read/write operations. We open the file-level store with Xarray to leverage tools for
119
- resampling and subsetting the data.
120
- """
121
- print("Interpolating data.")
122
- try:
123
- zarr_manager = ZarrManager()
124
- # s3_manager = S3Manager()
125
- geo_manager = GeoManager()
126
- # get zarr store
127
- output_zarr_store = zarr_manager.open_s3_zarr_store_with_zarr(
128
- ship_name=ship_name,
129
- cruise_name=cruise_name,
130
- sensor_name=sensor_name,
131
- # zarr_synchronizer=? # TODO: pass in for parallelization
132
- )
133
-
134
- # get dynamo stuff
135
- dynamo_db_manager = DynamoDBManager()
136
- cruise_df = dynamo_db_manager.get_table_as_df(
137
- ship_name=ship_name,
138
- cruise_name=cruise_name,
139
- sensor_name=sensor_name,
140
- table_name=table_name,
141
- )
142
-
143
- #########################################################
144
- #########################################################
145
- # TODO: iterate files here
146
- all_file_names = cruise_df['FILE_NAME']
147
- for file_name in all_file_names:
148
- gc.collect()
149
- file_name_stem = Path(file_name).stem
150
- # file_name_stem = "D20070724-T151330"
151
- print(f"Processing file: {file_name_stem}.")
152
- # if f"{file_name_stem}.raw" not in list(cruise_df['FILE_NAME']):
153
- # raise Exception(f"Raw file file_stem not found in dynamodb.")
154
-
155
- # status = PipelineStatus['LEVEL_1_PROCESSING']
156
- # TODO: filter rows by enum success, filter the dataframe just for enums >= LEVEL_1_PROCESSING
157
- # df[df['PIPELINE_STATUS'] < PipelineStatus.LEVEL_1_PROCESSING] = np.nan
158
-
159
- # Get index from all cruise files. Note: should be based on which are included in cruise.
160
- index = cruise_df.index[cruise_df['FILE_NAME'] == f"{file_name_stem}.raw"][0]
161
-
162
- # get input store
163
- input_xr_zarr_store = zarr_manager.open_s3_zarr_store_with_xarray(
164
- ship_name=ship_name,
165
- cruise_name=cruise_name,
166
- sensor_name=sensor_name,
167
- file_name_stem=file_name_stem,
168
- )
169
- #########################################################################
170
- # [3] Get needed indices
171
- # Offset from start index to insert new data. Note that missing values are excluded.
172
- ping_time_cumsum = np.insert(
173
- np.cumsum(cruise_df['NUM_PING_TIME_DROPNA'].dropna().to_numpy(dtype=int)),
174
- obj=0,
175
- values=0
176
- )
177
- start_ping_time_index = ping_time_cumsum[index]
178
- end_ping_time_index = ping_time_cumsum[index + 1]
179
-
180
- min_echo_range = np.nanmin(np.float32(cruise_df['MIN_ECHO_RANGE']))
181
- max_echo_range = np.nanmax(np.float32(cruise_df['MAX_ECHO_RANGE']))
182
-
183
- print("Creating empty ndarray for Sv data.") # Note: cruise_zarr dimensions are (depth, time, frequency)
184
- cruise_sv_subset = np.empty(
185
- shape=output_zarr_store.Sv[:, start_ping_time_index:end_ping_time_index, :].shape
186
- )
187
- cruise_sv_subset[:, :, :] = np.nan # (5208, 9778, 4)
188
-
189
- all_cruise_depth_values = zarr_manager.get_depth_values(
190
- min_echo_range=min_echo_range,
191
- max_echo_range=max_echo_range
192
- )
193
-
194
- print(" ".join(list(input_xr_zarr_store.Sv.dims)))
195
- if set(input_xr_zarr_store.Sv.dims) != {'channel', 'ping_time', 'range_sample'}:
196
- raise Exception("Xarray dimensions are not as expected.")
197
-
198
- # get geojson
199
- indices, geospatial = geo_manager.read_s3_geo_json(
200
- ship_name=ship_name,
201
- cruise_name=cruise_name,
202
- sensor_name=sensor_name,
203
- file_name_stem=file_name_stem,
204
- input_xr_zarr_store=input_xr_zarr_store,
205
- )
206
-
207
- input_xr = input_xr_zarr_store.isel(ping_time=indices)
208
-
209
- ping_times = input_xr.ping_time.values
210
- # Date format: numpy.datetime64('2007-07-20T02:10:25.845073920') converts to "1184897425.845074"
211
- epoch_seconds = [(pd.Timestamp(i) - pd.Timestamp('1970-01-01')) / pd.Timedelta('1s') for i in ping_times]
212
- output_zarr_store.time[start_ping_time_index:end_ping_time_index] = epoch_seconds
213
-
214
- # --- UPDATING --- #
215
-
216
- regrid_resample = self.interpolate_data(
217
- input_xr=input_xr,
218
- ping_times=ping_times,
219
- all_cruise_depth_values=all_cruise_depth_values,
220
- )
221
-
222
- print(f"start_ping_time_index: {start_ping_time_index}, end_ping_time_index: {end_ping_time_index}")
223
-
224
- #########################################################################
225
- # write Sv values to cruise-level-zarr-store
226
- for channel in range(len(input_xr.channel.values)): # doesn't like being written in one fell swoop :(
227
- output_zarr_store.Sv[
228
- :,
229
- start_ping_time_index:end_ping_time_index,
230
- channel
231
- ] = regrid_resample[:, :, channel]
232
-
233
- #########################################################################
234
- # [5] write subset of latitude/longitude
235
- output_zarr_store.latitude[start_ping_time_index:end_ping_time_index] = geospatial.dropna()[
236
- 'latitude'
237
- ].values
238
- output_zarr_store.longitude[start_ping_time_index:end_ping_time_index] = geospatial.dropna()[
239
- 'longitude'
240
- ].values
241
- except Exception as err:
242
- print(f'Problem interpolating the data: {err}')
243
- raise err
244
- print("Done interpolating data.")
245
-
246
- #######################################################
247
-
248
- ###########################################################
File without changes
@@ -1,194 +0,0 @@
1
- from pathlib import Path
2
- import numpy as np
3
- import geopandas
4
- import pandas as pd
5
-
6
- from ..utility.cleaner import Cleaner
7
- from ..aws.s3_manager import S3Manager
8
-
9
- """
10
- // [Decimal / Places / Degrees / Object that can be recognized at scale / N/S or E/W at equator, E/W at 23N/S, E/W at 45N/S, E/W at 67N/S]
11
- // 0 1.0 1° 00′ 0″ country or large region 111.32 km 102.47 km 78.71 km 43.496 km
12
- // 1 0.1 0° 06′ 0″ large city or district 11.132 km 10.247 km 7.871 km 4.3496 km
13
- // 2 0.01 0° 00′ 36″ town or village 1.1132 km 1.0247 km 787.1 m 434.96 m
14
- // 3 0.001 0° 00′ 3.6″ neighborhood, street 111.32 m 102.47 m 78.71 m 43.496 m
15
- // 4 0.0001 0° 00′ 0.36″ individual street, land parcel 11.132 m 10.247 m 7.871 m 4.3496 m
16
- // 5 0.00001 0° 00′ 0.036″ individual trees, door entrance 1.1132 m 1.0247 m 787.1 mm 434.96 mm
17
- // 6 0.000001 0° 00′ 0.0036″ individual humans 111.32 mm 102.47 mm 78.71 mm 43.496 mm
18
- // 7 0.0000001 0° 00′ 0.00036″ practical limit of commercial surveying 11.132 mm 10.247 mm 7.871 mm 4.3496 mm
19
- """
20
-
21
-
22
- class GeoManager:
23
- #######################################################
24
- def __init__(
25
- self,
26
- ):
27
- self.DECIMAL_PRECISION = 5 # precision for GPS coordinates
28
- self.SIMPLIFICATION_TOLERANCE = 0.0001 # RDP simplification to street level
29
-
30
- #######################################################
31
- def read_echodata_gps_data(
32
- self,
33
- echodata,
34
- ship_name,
35
- cruise_name,
36
- sensor_name,
37
- file_name,
38
- write_geojson=True,
39
- ) -> tuple:
40
- file_name_stem = Path(file_name).stem
41
- geo_json_name = f"{file_name_stem}.json"
42
-
43
- print('Getting GPS data from echopype object.')
44
- try:
45
- latitude = np.round(echodata.platform.latitude.values, self.DECIMAL_PRECISION)
46
- longitude = np.round(echodata.platform.longitude.values, self.DECIMAL_PRECISION)
47
-
48
- # RE: time coordinates: https://github.com/OSOceanAcoustics/echopype/issues/656#issue-1219104771
49
- # 'nmea_times' are times from the nmea datalogger associated with GPS
50
- # note that nmea_times, unlike time1, can be sorted
51
- nmea_times = np.sort(echodata.platform.time1.values)
52
-
53
- # 'time1' are times from the echosounder associated with the data of the transducer measurement
54
- time1 = echodata.environment.time1.values
55
-
56
- if len(nmea_times) < len(time1):
57
- raise Exception("Problem: Not enough NMEA times available to extrapolate time1.")
58
-
59
- # Align 'sv_times' to 'nmea_times'
60
- if not (np.all(time1[:-1] <= time1[1:]) and np.all(nmea_times[:-1] <= nmea_times[1:])):
61
- raise Exception("Problem: NMEA times are not sorted.")
62
-
63
- # Finds the indices where 'v' can be inserted just to the right of 'a'
64
- indices = np.searchsorted(a=nmea_times, v=time1, side="right") - 1
65
- lat = latitude[indices]
66
- lat[indices < 0] = np.nan # values recorded before indexing are set to nan
67
- lon = longitude[indices]
68
- lon[indices < 0] = np.nan
69
-
70
- if not (np.all(lat[~np.isnan(lat)] >= -90.) and np.all(lat[~np.isnan(lat)] <= 90.) and np.all(lon[~np.isnan(lon)] >= -180.) and np.all(lon[~np.isnan(lon)] <= 180.)):
71
- raise Exception("Problem: GPS Data falls outside allowed bounds.")
72
-
73
- # check for visits to null island
74
- null_island_indices = list(
75
- set.intersection(set(np.where(np.abs(lat) < 1e-3)[0]), set(np.where(np.abs(lon) < 1e-3)[0]))
76
- )
77
- lat[null_island_indices] = np.nan
78
- lon[null_island_indices] = np.nan
79
-
80
- # create requirement for minimum linestring size
81
- MIN_ALLOWED_SIZE = 4 # don't want to process files with less than 4 data points
82
- if len(lat[~np.isnan(lat)]) < MIN_ALLOWED_SIZE or len(lon[~np.isnan(lon)]) < MIN_ALLOWED_SIZE:
83
- raise Exception(
84
- f"There was not enough data in lat or lon to create geojson, {len(lat[~np.isnan(lat)])} found, less than {MIN_ALLOWED_SIZE}."
85
- )
86
-
87
- # https://osoceanacoustics.github.io/echopype-examples/echopype_tour.html
88
- gps_df = pd.DataFrame({
89
- 'latitude': lat,
90
- 'longitude': lon,
91
- 'time': time1
92
- }).set_index(['time']).fillna(0)
93
-
94
- # Note: We set np.nan to 0,0 so downstream missing values can be omitted
95
- gps_gdf = geopandas.GeoDataFrame(
96
- gps_df,
97
- geometry=geopandas.points_from_xy(
98
- gps_df['longitude'],
99
- gps_df['latitude']
100
- ),
101
- crs="epsg:4326"
102
- )
103
- # Note: We set np.nan to 0,0 so downstream missing values can be omitted
104
-
105
- geo_json_line = gps_gdf.to_json()
106
- if write_geojson:
107
- print('Creating local copy of geojson file.')
108
- with open(geo_json_name, "w") as write_file:
109
- write_file.write(geo_json_line)
110
-
111
- geo_json_prefix = f"spatial/geojson/{ship_name}/{cruise_name}/{sensor_name}"
112
-
113
- print('Checking s3 and deleting any existing GeoJSON file.')
114
- s3_manager = S3Manager()
115
- s3_objects = s3_manager.list_nodd_objects(prefix=f"{geo_json_prefix}/{geo_json_name}")
116
- if len(s3_objects) > 0:
117
- print('GeoJSON already exists in s3, deleting existing and continuing.')
118
- s3_manager.delete_nodd_objects(objects=s3_objects)
119
-
120
- print('Upload GeoJSON to s3.')
121
- s3_manager.upload_nodd_file(
122
- file_name=geo_json_name, # file_name
123
- key=f"{geo_json_prefix}/{geo_json_name}" # key
124
- )
125
-
126
- # TODO: delete geo_json file
127
- cleaner = Cleaner()
128
- cleaner.delete_local_files(file_types=['*.json'])
129
-
130
- #################################################################
131
- # TODO: simplify with shapely
132
- # linestring = shapely.geometry.LineString(
133
- # [xy for xy in zip(gps_gdf.longitude, gps_gdf.latitude)]
134
- # )
135
- # len(linestring.coords)
136
- # line_simplified = linestring.simplify(
137
- # tolerance=self.SIMPLIFICATION_TOLERANCE,
138
- # preserve_topology=True
139
- # )
140
- # print(f"Total number of points for original linestring: {len(linestring.coords)}")
141
- # print(f"Total number of points needed for the simplified linestring: {len(line_simplified.coords)}")
142
- # print(line_simplified)
143
- # geo_json_line_simplified = shapely.to_geojson(line_simplified)
144
- #################################################################
145
- # GeoJSON FeatureCollection with IDs as "time"
146
- except Exception as err:
147
- print(f'Exception encountered extracting gps coordinates creating geojson: {err}')
148
- raise
149
- # Note: returned lat/lon values can include np.nan because they need to be aligned with
150
- # the Sv data! GeoJSON needs simplification but has been filtered.
151
- return gps_df.index.values, gps_df.latitude.values, gps_df.longitude.values
152
- # TODO: if geojson is already returned with 0,0, the return here
153
- # can include np.nan values?
154
-
155
- #######################################################
156
- def read_s3_geo_json(
157
- self,
158
- ship_name,
159
- cruise_name,
160
- sensor_name,
161
- file_name_stem,
162
- input_xr_zarr_store,
163
- ):
164
- try:
165
- s3_manager = S3Manager()
166
- geo_json = s3_manager.read_s3_json(
167
- ship_name=ship_name,
168
- cruise_name=cruise_name,
169
- sensor_name=sensor_name,
170
- file_name_stem=file_name_stem,
171
- )
172
- ###
173
- geospatial = geopandas.GeoDataFrame.from_features(geo_json['features']).set_index(
174
- pd.json_normalize(geo_json["features"])["id"].values
175
- )
176
- null_island_indices = list(
177
- set.intersection(
178
- set(np.where(np.abs(geospatial.latitude.values) < 1e-3)[0]),
179
- set(np.where(np.abs(geospatial.longitude.values) < 1e-3)[0])
180
- )
181
- )
182
- geospatial.iloc[null_island_indices] = np.nan
183
- ###
184
- geospatial_index = geospatial.dropna().index.values.astype('datetime64[ns]')
185
- aa = input_xr_zarr_store.ping_time.values.tolist()
186
- vv = geospatial_index.tolist()
187
- indices = np.searchsorted(a=aa, v=vv)
188
-
189
- return indices, geospatial
190
- except Exception as err: # Failure
191
- print(f'Exception encountered reading s3 GeoJSON: {err}')
192
- raise
193
-
194
- ###########################################################