water-column-sonar-processing 0.0.9__py3-none-any.whl → 26.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. water_column_sonar_processing/aws/dynamodb_manager.py +138 -59
  2. water_column_sonar_processing/aws/s3_manager.py +179 -141
  3. water_column_sonar_processing/aws/s3fs_manager.py +29 -33
  4. water_column_sonar_processing/aws/sqs_manager.py +1 -1
  5. water_column_sonar_processing/cruise/create_empty_zarr_store.py +35 -96
  6. water_column_sonar_processing/cruise/datatree_manager.py +21 -0
  7. water_column_sonar_processing/cruise/resample_regrid.py +142 -127
  8. water_column_sonar_processing/geometry/__init__.py +10 -2
  9. water_column_sonar_processing/geometry/elevation_manager.py +111 -0
  10. water_column_sonar_processing/geometry/geometry_manager.py +50 -49
  11. water_column_sonar_processing/geometry/line_simplification.py +176 -0
  12. water_column_sonar_processing/geometry/pmtile_generation.py +227 -223
  13. water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
  14. water_column_sonar_processing/index/index_manager.py +151 -33
  15. water_column_sonar_processing/model/zarr_manager.py +665 -262
  16. water_column_sonar_processing/processing/__init__.py +3 -3
  17. water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
  18. water_column_sonar_processing/processing/raw_to_zarr.py +206 -214
  19. water_column_sonar_processing/utility/__init__.py +9 -2
  20. water_column_sonar_processing/utility/constants.py +69 -18
  21. water_column_sonar_processing/utility/pipeline_status.py +11 -15
  22. water_column_sonar_processing/utility/timestamp.py +3 -4
  23. water_column_sonar_processing-26.1.9.dist-info/METADATA +239 -0
  24. water_column_sonar_processing-26.1.9.dist-info/RECORD +34 -0
  25. {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info}/WHEEL +1 -1
  26. {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info/licenses}/LICENSE +1 -1
  27. water_column_sonar_processing/geometry/geometry_simplification.py +0 -82
  28. water_column_sonar_processing/process.py +0 -147
  29. water_column_sonar_processing/processing/cruise_sampler.py +0 -342
  30. water_column_sonar_processing-0.0.9.dist-info/METADATA +0 -134
  31. water_column_sonar_processing-0.0.9.dist-info/RECORD +0 -32
  32. {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info}/top_level.txt +0 -0
@@ -1,21 +1,13 @@
1
1
  import os
2
+ import tempfile
2
3
 
3
- import numcodecs
4
4
  import numpy as np
5
5
 
6
- from water_column_sonar_processing.aws import DynamoDBManager
7
- from water_column_sonar_processing.aws import S3Manager
6
+ from water_column_sonar_processing.utility import Constants
7
+ from water_column_sonar_processing.aws import DynamoDBManager, S3Manager
8
8
  from water_column_sonar_processing.model import ZarrManager
9
9
  from water_column_sonar_processing.utility import Cleaner
10
10
 
11
- numcodecs.blosc.use_threads = False
12
- numcodecs.blosc.set_nthreads(1)
13
-
14
- # TEMPDIR = "/tmp"
15
- # TODO: when ready switch to version 3 of model spec
16
- # ZARR_V3_EXPERIMENTAL_API = 1
17
- # creates the latlon data: foo = ep.consolidate.add_location(ds_Sv, echodata)
18
-
19
11
 
20
12
  # TODO: change name to "CreateLocalEmptyZarrStore"
21
13
  class CreateEmptyZarrStore:
@@ -24,65 +16,37 @@ class CreateEmptyZarrStore:
24
16
  self,
25
17
  ):
26
18
  self.__overwrite = True
27
- self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
28
- self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
29
-
30
- #######################################################
31
-
32
- # TODO: move this to the s3_manager
33
- def upload_zarr_store_to_s3(
34
- self,
35
- local_directory: str,
36
- object_prefix: str,
37
- cruise_name: str,
38
- ) -> None:
39
- print("uploading model store to s3")
40
- s3_manager = S3Manager()
41
- #
42
- print("Starting upload with thread pool executor.")
43
- # # 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
44
- all_files = []
45
- for subdir, dirs, files in os.walk(
46
- f"{local_directory}/{cruise_name}.zarr_manager"
47
- ):
48
- for file in files:
49
- local_path = os.path.join(subdir, file)
50
- # 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.model/.zattrs'
51
- s3_key = f'{object_prefix}/{cruise_name}.model{local_path.split(f"{cruise_name}.model")[-1]}'
52
- all_files.append([local_path, s3_key])
53
- #
54
- # print(all_files)
55
- s3_manager.upload_files_with_thread_pool_executor(
56
- all_files=all_files,
57
- )
58
- print("Done uploading with thread pool executor.")
59
- # TODO: move to common place
19
+ # self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
20
+ # self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
60
21
 
61
22
  #######################################################
23
+ @staticmethod
62
24
  def create_cruise_level_zarr_store(
63
- self,
25
+ output_bucket_name: str,
64
26
  ship_name: str,
65
27
  cruise_name: str,
66
28
  sensor_name: str,
67
29
  table_name: str,
68
- tempdir: str,
69
30
  ) -> None:
31
+ """
32
+ Initialize zarr store for the entire cruise which aggregates all the raw data.
33
+ All cruises will be resampled at 20 cm depth.
34
+ # tempdir="/tmp", # TODO: create better tmp directory for testing
35
+ """
36
+ tempdir = tempfile.TemporaryDirectory()
70
37
  try:
71
- # HB0806 - 123, HB0903 - 220
72
38
  dynamo_db_manager = DynamoDBManager()
73
39
  s3_manager = S3Manager()
74
40
 
75
41
  df = dynamo_db_manager.get_table_as_df(
76
42
  table_name=table_name,
77
- ship_name=ship_name,
78
43
  cruise_name=cruise_name,
79
- sensor_name=sensor_name,
80
44
  )
81
45
 
82
46
  # TODO: filter the dataframe just for enums >= LEVEL_1_PROCESSING
83
47
  # df[df['PIPELINE_STATUS'] < PipelineStatus.LEVEL_1_PROCESSING] = np.nan
84
48
 
85
- # TODO: VERIFY GEOJSON EXISTS as prerequisite!!!
49
+ # TODO: VERIFY GEOJSON EXISTS as prerequisite!!! ...no more geojson needed
86
50
 
87
51
  print(f"DataFrame shape: {df.shape}")
88
52
  cruise_channels = list(
@@ -94,89 +58,64 @@ class CreateEmptyZarrStore:
94
58
  df["NUM_PING_TIME_DROPNA"].dropna().astype(int)
95
59
  )
96
60
 
97
- # [3] calculate the max/min measurement resolutions for the whole cruise
98
- cruise_min_echo_range = float(
99
- np.min(df["MIN_ECHO_RANGE"].dropna().astype(float))
100
- )
61
+ # [4] max measurement resolution for the whole cruise
62
+ cruise_max_echo_range = np.max(df["MAX_ECHO_RANGE"].dropna().astype(float))
101
63
 
102
- # [4] calculate the maximum of the max depth values
103
- cruise_max_echo_range = float(
104
- np.max(df["MAX_ECHO_RANGE"].dropna().astype(float))
105
- )
106
- print(
107
- f"cruise_min_echo_range: {cruise_min_echo_range}, cruise_max_echo_range: {cruise_max_echo_range}"
108
- )
64
+ print(f"cruise_max_echo_range: {cruise_max_echo_range}")
109
65
 
110
66
  # [5] get number of channels
111
67
  cruise_frequencies = [
112
68
  float(i) for i in df["FREQUENCIES"].dropna().values.flatten()[0]
113
69
  ]
114
- print(cruise_frequencies)
115
70
 
116
71
  new_width = int(consolidated_zarr_width)
117
- print(f"new_width: {new_width}")
118
- #################################################################
119
- store_name = f"{cruise_name}.model"
120
- print(store_name)
121
72
  ################################################################
122
- # Delete existing model store if it exists
123
- zarr_prefix = os.path.join("level_2", ship_name, cruise_name, sensor_name)
73
+ # Delete any existing stores
74
+ zarr_prefix = os.path.join(
75
+ str(Constants.LEVEL_2.value), ship_name, cruise_name, sensor_name
76
+ )
124
77
  child_objects = s3_manager.get_child_objects(
125
- bucket_name=self.output_bucket_name,
78
+ bucket_name=output_bucket_name,
126
79
  sub_prefix=zarr_prefix,
127
80
  )
81
+
128
82
  if len(child_objects) > 0:
129
83
  s3_manager.delete_nodd_objects(
84
+ bucket_name=output_bucket_name,
130
85
  objects=child_objects,
131
86
  )
132
87
  ################################################################
133
88
  # Create new model store
134
89
  zarr_manager = ZarrManager()
135
- new_height = len(
136
- zarr_manager.get_depth_values(
137
- min_echo_range=cruise_min_echo_range,
138
- max_echo_range=cruise_max_echo_range,
139
- )
140
- )
141
- print(f"new_height: {new_height}")
142
-
143
90
  zarr_manager.create_zarr_store(
144
- path=tempdir,
91
+ path=tempdir.name,
145
92
  ship_name=ship_name,
146
93
  cruise_name=cruise_name,
147
94
  sensor_name=sensor_name,
148
95
  frequencies=cruise_frequencies,
149
96
  width=new_width,
150
- min_echo_range=cruise_min_echo_range,
151
97
  max_echo_range=cruise_max_echo_range,
98
+ # cruise_min_epsilon=cruise_min_epsilon,
152
99
  calibration_status=True,
153
100
  )
154
101
  #################################################################
155
- self.upload_zarr_store_to_s3(
156
- local_directory=tempdir,
102
+ # TODO: would be more elegant to create directly into s3 bucket
103
+ s3_manager.upload_zarr_store_to_s3(
104
+ output_bucket_name=output_bucket_name,
105
+ local_directory=tempdir.name,
157
106
  object_prefix=zarr_prefix,
158
107
  cruise_name=cruise_name,
159
108
  )
160
- # https://noaa-wcsd-zarr-pds.s3.amazonaws.com/index.html
161
109
  #################################################################
162
- # Verify count of the files uploaded
163
- # count = self.__get_file_count(store_name=store_name)
164
- # #
165
- # raw_zarr_files = self.__get_s3_files( # TODO: just need count
166
- # bucket_name=self.__output_bucket,
167
- # sub_prefix=os.path.join(zarr_prefix, store_name),
168
- # )
169
- # if len(raw_zarr_files) != count:
170
- # print(f'Problem writing {store_name} with proper count {count}.')
171
- # raise Exception("File count doesnt equal number of s3 Zarr store files.")
172
- # else:
173
- # print("File counts match.")
110
+ # TODO: verify count of the files uploaded
174
111
  #################################################################
175
- # Success
176
112
  # TODO: update enum in dynamodb
113
+ print("Done creating cruise level zarr store.")
177
114
  #################################################################
178
115
  except Exception as err:
179
- print(f"Problem trying to create new cruise model store: {err}")
116
+ raise RuntimeError(
117
+ f"Problem trying to create new cruise model store, {err}"
118
+ )
180
119
  finally:
181
120
  cleaner = Cleaner()
182
121
  cleaner.delete_local_files()
@@ -0,0 +1,21 @@
1
+ # ### https://xarray-datatree.readthedocs.io/en/latest/data-structures.html
2
+ # import xarray as xr
3
+ # from datatree import DataTree
4
+ #
5
+ #
6
+ # class DatatreeManager:
7
+ # #######################################################
8
+ # def __init__(
9
+ # self,
10
+ # ):
11
+ # self.dtype = "float32"
12
+ #
13
+ # #################################################################
14
+ # def create_datatree(
15
+ # self,
16
+ # input_ds,
17
+ # ) -> None:
18
+ # ds1 = xr.Dataset({"foo": "orange"})
19
+ # dt = DataTree(name="root", dataset=ds1) # create root node
20
+ # # ds2 = xr.Dataset({"bar": 0}, coords={"y": ("y", [0, 1, 2])})
21
+ # return dt
@@ -1,23 +1,14 @@
1
1
  import gc
2
- import os
2
+ import warnings
3
3
  from pathlib import Path
4
4
 
5
- import numcodecs
6
5
  import numpy as np
7
- import pandas as pd
8
6
  import xarray as xr
9
7
 
10
8
  from water_column_sonar_processing.aws import DynamoDBManager
11
- from water_column_sonar_processing.geometry import GeometryManager
12
9
  from water_column_sonar_processing.model import ZarrManager
13
10
 
14
- numcodecs.blosc.use_threads = False
15
- numcodecs.blosc.set_nthreads(1)
16
-
17
-
18
- # TODO: when ready switch to version 3 of model spec
19
- # ZARR_V3_EXPERIMENTAL_API = 1
20
- # creates the latlon data: foo = ep.consolidate.add_location(ds_Sv, echodata)
11
+ warnings.simplefilter("ignore", category=RuntimeWarning)
21
12
 
22
13
 
23
14
  class ResampleRegrid:
@@ -26,68 +17,55 @@ class ResampleRegrid:
26
17
  self,
27
18
  ):
28
19
  self.__overwrite = True
29
- self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
30
- self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
31
20
  self.dtype = "float32"
32
21
 
33
22
  #################################################################
34
23
  def interpolate_data(
35
24
  self,
36
- input_xr,
37
- ping_times,
38
- all_cruise_depth_values,
25
+ input_xr: xr.Dataset,
26
+ ping_times: np.ndarray,
27
+ all_cruise_depth_values: np.ndarray, # includes water_level offset
39
28
  ) -> np.ndarray:
40
- print("Interpolating data.")
29
+ """
30
+ Input dataset is passed in along with times and depth values to regrid to.
31
+ """
32
+ print("Interpolating dataset.")
41
33
  try:
42
34
  data = np.empty(
43
- (
35
+ ( # Depth / Time / Frequency
44
36
  len(all_cruise_depth_values),
45
37
  len(ping_times),
46
- len(input_xr.frequency_nominal),
38
+ len(input_xr.frequency_nominal.values),
47
39
  ),
48
40
  dtype=self.dtype,
49
41
  )
50
42
 
51
43
  data[:] = np.nan
52
44
 
53
- regrid_resample = xr.DataArray(
45
+ regrid_resample = xr.DataArray( # where data will be written to
54
46
  data=data,
55
- dims=("depth", "time", "frequency"),
56
47
  coords={
57
48
  "depth": all_cruise_depth_values,
58
49
  "time": ping_times,
59
50
  "frequency": input_xr.frequency_nominal.values,
60
51
  },
52
+ dims=("depth", "time", "frequency"),
53
+ name="Sv",
61
54
  )
62
55
 
63
56
  channels = input_xr.channel.values
64
57
  for channel in range(
65
58
  len(channels)
66
- ): # TODO: leaving off here, need to subset for just indices in time axis
67
- print(
68
- np.nanmax(
69
- input_xr.echo_range.sel(
70
- channel=input_xr.channel[channel]
71
- ).values
72
- )
73
- )
74
- #
59
+ ): # ?TODO: leaving off here, need to subset for just indices in time axis
60
+ gc.collect()
75
61
  max_depths = np.nanmax(
76
- a=input_xr.echo_range.sel(channel=input_xr.channel[channel]).values,
62
+ a=input_xr.depth.sel(channel=input_xr.channel[channel]).values,
77
63
  axis=1,
78
64
  )
79
- superset_of_max_depths = set(
80
- np.nanmax(
81
- input_xr.echo_range.sel(
82
- channel=input_xr.channel[channel]
83
- ).values,
84
- 1,
85
- )
86
- )
65
+ superset_of_max_depths = set(max_depths)
87
66
  set_of_max_depths = list(
88
67
  {x for x in superset_of_max_depths if x == x}
89
- ) # removes nan's
90
- # iterate through partitions of data with similar depths and resample
68
+ ) # To speed things up resample in groups denoted by max_depth
91
69
  for select_max_depth in set_of_max_depths:
92
70
  # TODO: for nan just skip and leave all nan's
93
71
  select_indices = [
@@ -96,34 +74,35 @@ class ResampleRegrid:
96
74
  if max_depths[i] == select_max_depth
97
75
  ]
98
76
 
99
- # now create new DataArray with proper dimension and indices
100
- # data_select = input_xr.Sv.sel(
101
- # channel=input_xr.channel[channel]
102
- # ).values[select_indices, :].T # TODO: dont like this transpose
103
77
  data_select = input_xr.Sv.sel(channel=input_xr.channel[channel])[
104
78
  select_indices, :
105
79
  ].T.values
106
- # change from ".values[select_indices, :].T" to "[select_indices, :].values.T"
107
80
 
108
81
  times_select = input_xr.ping_time.values[select_indices]
109
- depths_select = input_xr.echo_range.sel(
110
- channel=input_xr.channel[channel]
111
- ).values[
112
- select_indices[0], :
113
- ] # '0' because all others in group should be same
82
+ depths_all = input_xr.depth.sel(
83
+ channel=input_xr.channel[channel],
84
+ ping_time=input_xr.ping_time[select_indices[0]],
85
+ ).values
86
+ depths_select = depths_all[~np.isnan(depths_all)]
114
87
 
115
88
  da_select = xr.DataArray(
116
- data=data_select,
89
+ data=data_select[: len(depths_select), :],
117
90
  dims=("depth", "time"),
118
91
  coords={
119
92
  "depth": depths_select,
120
93
  "time": times_select,
121
94
  },
122
- ).dropna(dim="depth")
123
- resampled = da_select.interp(
124
- depth=all_cruise_depth_values, method="nearest"
125
95
  )
126
- # write to the resample array
96
+
97
+ resampled = (
98
+ da_select.interp( # TODO: problem here w D20070712-T152416.raw
99
+ depth=all_cruise_depth_values,
100
+ method="nearest",
101
+ assume_sorted=True,
102
+ )
103
+ )
104
+
105
+ ### write to outptut ###
127
106
  regrid_resample.loc[
128
107
  dict(
129
108
  time=times_select,
@@ -131,11 +110,15 @@ class ResampleRegrid:
131
110
  )
132
111
  ] = resampled
133
112
  print(f"updated {len(times_select)} ping times")
113
+ gc.collect()
114
+ return (
115
+ regrid_resample.values.copy()
116
+ ) # gets passed back wo depth, might need to include?
134
117
  except Exception as err:
135
- print(f"Problem finding the dynamodb table: {err}")
136
- raise err
137
- print("Done interpolating data.")
138
- return regrid_resample
118
+ raise RuntimeError(f"Problem finding the dynamodb table, {err}")
119
+ finally:
120
+ gc.collect()
121
+ print("Done interpolating dataset.")
139
122
 
140
123
  #################################################################
141
124
  def resample_regrid(
@@ -144,66 +127,79 @@ class ResampleRegrid:
144
127
  cruise_name,
145
128
  sensor_name,
146
129
  table_name,
130
+ bucket_name,
131
+ override_select_files=None,
132
+ # override_cruise_min_epsilon=None,
133
+ endpoint_url=None,
147
134
  ) -> None:
148
135
  """
149
- The goal here is to interpolate the data against the depth values already populated
136
+ The goal here is to interpolate the dataset against the depth values already populated
150
137
  in the existing file level model stores. We open the cruise-level store with model for
151
138
  read/write operations. We open the file-level store with Xarray to leverage tools for
152
- resampling and subsetting the data.
139
+ resampling and subsetting the dataset.
153
140
  """
154
- print("Interpolating data.")
141
+ print("Resample Regrid, Interpolating dataset.")
155
142
  try:
156
143
  zarr_manager = ZarrManager()
157
- # s3_manager = S3Manager()
158
- geo_manager = GeometryManager()
159
- # get model store
144
+ # geo_manager = GeometryManager()
145
+
160
146
  output_zarr_store = zarr_manager.open_s3_zarr_store_with_zarr(
161
147
  ship_name=ship_name,
162
148
  cruise_name=cruise_name,
163
149
  sensor_name=sensor_name,
164
- # zarr_synchronizer=? # TODO: pass in for parallelization
150
+ output_bucket_name=bucket_name,
151
+ endpoint_url=endpoint_url,
165
152
  )
166
153
 
167
- # get dynamo stuff
168
154
  dynamo_db_manager = DynamoDBManager()
169
155
  cruise_df = dynamo_db_manager.get_table_as_df(
170
- ship_name=ship_name,
171
156
  cruise_name=cruise_name,
172
- sensor_name=sensor_name,
173
157
  table_name=table_name,
174
158
  )
175
159
 
176
160
  #########################################################
177
161
  #########################################################
178
- # TODO: iterate files here
179
162
  all_file_names = cruise_df["FILE_NAME"]
163
+
164
+ if override_select_files is not None:
165
+ all_file_names = override_select_files
166
+
167
+ # Iterate files
180
168
  for file_name in all_file_names:
181
169
  gc.collect()
182
170
  file_name_stem = Path(file_name).stem
183
- # file_name_stem = "D20070724-T151330"
184
171
  print(f"Processing file: {file_name_stem}.")
185
- # if f"{file_name_stem}.raw" not in list(cruise_df['FILE_NAME']):
186
- # raise Exception(f"Raw file file_stem not found in dynamodb.")
172
+
173
+ if f"{file_name_stem}.raw" not in list(cruise_df["FILE_NAME"]):
174
+ raise Exception("Raw file file_stem not found in dynamodb.")
187
175
 
188
176
  # status = PipelineStatus['LEVEL_1_PROCESSING']
189
177
  # TODO: filter rows by enum success, filter the dataframe just for enums >= LEVEL_1_PROCESSING
190
178
  # df[df['PIPELINE_STATUS'] < PipelineStatus.LEVEL_1_PROCESSING] = np.nan
191
179
 
192
180
  # Get index from all cruise files. Note: should be based on which are included in cruise.
193
- index = cruise_df.index[
194
- cruise_df["FILE_NAME"] == f"{file_name_stem}.raw"
195
- ][0]
181
+ index = int(
182
+ cruise_df.index[cruise_df["FILE_NAME"] == f"{file_name_stem}.raw"][
183
+ 0
184
+ ]
185
+ )
196
186
 
197
- # get input store
187
+ # Get input store
198
188
  input_xr_zarr_store = zarr_manager.open_s3_zarr_store_with_xarray(
199
189
  ship_name=ship_name,
200
190
  cruise_name=cruise_name,
201
191
  sensor_name=sensor_name,
202
192
  file_name_stem=file_name_stem,
193
+ bucket_name=bucket_name,
194
+ endpoint_url=endpoint_url,
203
195
  )
196
+
197
+ # This is the vertical offset of the sensor related to the ocean surface
198
+ # See https://echopype.readthedocs.io/en/stable/data-proc-additional.html
199
+ # Ignoring water-level for now
204
200
  #########################################################################
205
- # [3] Get needed indices
206
- # Offset from start index to insert new data. Note that missing values are excluded.
201
+ # [3] Get needed time indices — along the x-axis
202
+ # Offset from start index to insert new dataset. Note that missing values are excluded.
207
203
  ping_time_cumsum = np.insert(
208
204
  np.cumsum(
209
205
  cruise_df["NUM_PING_TIME_DROPNA"].dropna().to_numpy(dtype=int)
@@ -214,85 +210,104 @@ class ResampleRegrid:
214
210
  start_ping_time_index = ping_time_cumsum[index]
215
211
  end_ping_time_index = ping_time_cumsum[index + 1]
216
212
 
217
- min_echo_range = np.nanmin(np.float32(cruise_df["MIN_ECHO_RANGE"]))
218
- max_echo_range = np.nanmax(np.float32(cruise_df["MAX_ECHO_RANGE"]))
219
-
220
- print(
221
- "Creating empty ndarray for Sv data."
222
- ) # Note: cruise_zarr dimensions are (depth, time, frequency)
223
- cruise_sv_subset = np.empty(
224
- shape=output_zarr_store.Sv[
225
- :, start_ping_time_index:end_ping_time_index, :
226
- ].shape
213
+ max_echo_range = np.max(
214
+ cruise_df["MAX_ECHO_RANGE"].dropna().astype(np.float32)
227
215
  )
228
- cruise_sv_subset[:, :, :] = np.nan # (5208, 9778, 4)
216
+ # cruise_min_epsilon = np.min(
217
+ # cruise_df["MIN_ECHO_RANGE"].dropna().astype(float)
218
+ # ) # TODO: currently overwriting to 0.25 m
229
219
 
230
220
  all_cruise_depth_values = zarr_manager.get_depth_values(
231
- min_echo_range=min_echo_range, max_echo_range=max_echo_range
221
+ max_echo_range=max_echo_range,
222
+ # cruise_min_epsilon=cruise_min_epsilon,
232
223
  )
233
224
 
234
- print(" ".join(list(input_xr_zarr_store.Sv.dims)))
235
- if set(input_xr_zarr_store.Sv.dims) != {
225
+ if set(
226
+ input_xr_zarr_store.Sv.dims
227
+ ) != { # Cruise dimensions are: (depth, time, frequency)
236
228
  "channel",
237
229
  "ping_time",
238
230
  "range_sample",
239
231
  }:
240
232
  raise Exception("Xarray dimensions are not as expected.")
241
233
 
242
- # get geojson
243
- indices, geospatial = geo_manager.read_s3_geo_json(
244
- ship_name=ship_name,
245
- cruise_name=cruise_name,
246
- sensor_name=sensor_name,
247
- file_name_stem=file_name_stem,
248
- input_xr_zarr_store=input_xr_zarr_store,
249
- )
234
+ # indices, geospatial = geo_manager.read_s3_geo_json( # TODO: remove this!!!!
235
+ # ship_name=ship_name,
236
+ # cruise_name=cruise_name,
237
+ # sensor_name=sensor_name,
238
+ # file_name_stem=file_name_stem,
239
+ # input_xr_zarr_store=input_xr_zarr_store,
240
+ # endpoint_url=endpoint_url,
241
+ # output_bucket_name=bucket_name,
242
+ # )
250
243
 
251
- input_xr = input_xr_zarr_store.isel(ping_time=indices)
244
+ input_xr = input_xr_zarr_store # .isel(ping_time=indices)
252
245
 
253
246
  ping_times = input_xr.ping_time.values
254
- # Date format: numpy.datetime64('2007-07-20T02:10:25.845073920') converts to "1184897425.845074"
255
- epoch_seconds = [
256
- (pd.Timestamp(i) - pd.Timestamp("1970-01-01")) / pd.Timedelta("1s")
257
- for i in ping_times
258
- ]
259
- output_zarr_store.time[start_ping_time_index:end_ping_time_index] = (
260
- epoch_seconds
247
+ output_zarr_store["time"][start_ping_time_index:end_ping_time_index] = (
248
+ input_xr.ping_time.data
261
249
  )
262
250
 
263
- # --- UPDATING --- #
264
-
251
+ # --- UPDATING --- # # TODO: problem, this returns dimensionless array
265
252
  regrid_resample = self.interpolate_data(
266
253
  input_xr=input_xr,
267
254
  ping_times=ping_times,
268
- all_cruise_depth_values=all_cruise_depth_values,
255
+ all_cruise_depth_values=all_cruise_depth_values, # should accommodate the water_level already
269
256
  )
270
257
 
271
258
  print(
272
259
  f"start_ping_time_index: {start_ping_time_index}, end_ping_time_index: {end_ping_time_index}"
273
260
  )
274
-
275
261
  #########################################################################
276
262
  # write Sv values to cruise-level-model-store
277
- for channel in range(
278
- len(input_xr.channel.values)
279
- ): # doesn't like being written in one fell swoop :(
280
- output_zarr_store.Sv[
281
- :, start_ping_time_index:end_ping_time_index, channel
282
- ] = regrid_resample[:, :, channel]
283
263
 
264
+ for fff in range(regrid_resample.shape[-1]):
265
+ output_zarr_store["Sv"][
266
+ : regrid_resample[:, :, fff].shape[0],
267
+ start_ping_time_index:end_ping_time_index,
268
+ fff,
269
+ ] = regrid_resample[:, :, fff]
270
+ #########################################################################
271
+ # in the future. See https://github.com/CI-CMG/water-column-sonar-processing/issues/11
272
+ if "detected_seafloor_depth" in list(input_xr.variables):
273
+ print("Adding detected_seafloor_depth to output")
274
+ detected_seafloor_depth = input_xr.detected_seafloor_depth.values
275
+ detected_seafloor_depth[detected_seafloor_depth == 0.0] = np.nan
276
+
277
+ # As requested, use the lowest frequencies to determine bottom
278
+ detected_seafloor_depths = detected_seafloor_depth[0, :]
279
+
280
+ detected_seafloor_depths[detected_seafloor_depths == 0.0] = np.nan
281
+ print(f"min depth measured: {np.nanmin(detected_seafloor_depths)}")
282
+ print(f"max depth measured: {np.nanmax(detected_seafloor_depths)}")
283
+ output_zarr_store["bottom"][
284
+ start_ping_time_index:end_ping_time_index
285
+ ] = detected_seafloor_depths
286
+ #
284
287
  #########################################################################
285
288
  # [5] write subset of latitude/longitude
286
- output_zarr_store.latitude[
289
+ # output_zarr_store["latitude"][
290
+ # start_ping_time_index:end_ping_time_index
291
+ # ] = geospatial.dropna()[
292
+ # "latitude"
293
+ # ].values # TODO: get from ds_sv directly, dont need geojson anymore
294
+ # output_zarr_store["longitude"][
295
+ # start_ping_time_index:end_ping_time_index
296
+ # ] = geospatial.dropna()["longitude"].values
297
+ #########################################################################
298
+ output_zarr_store["latitude"][
287
299
  start_ping_time_index:end_ping_time_index
288
- ] = geospatial.dropna()["latitude"].values
289
- output_zarr_store.longitude[
300
+ ] = input_xr_zarr_store.latitude.dropna(dim="ping_time").values
301
+ output_zarr_store["longitude"][
290
302
  start_ping_time_index:end_ping_time_index
291
- ] = geospatial.dropna()["longitude"].values
303
+ ] = input_xr_zarr_store.longitude.dropna(dim="ping_time").values
304
+ #########################################################################
292
305
  except Exception as err:
293
- print(f"Problem interpolating the data: {err}")
294
- raise err
295
- print("Done interpolating data.")
306
+ raise RuntimeError(f"Problem with resample_regrid, {err}")
307
+ finally:
308
+ print("Exiting resample_regrid.")
309
+ # TODO: read across times and verify dataset was written?
310
+ gc.collect()
296
311
 
297
312
  #######################################################
298
313