water-column-sonar-processing 0.0.1__py3-none-any.whl → 26.1.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of water-column-sonar-processing might be problematic. Click here for more details.
- water_column_sonar_processing/__init__.py +13 -0
- water_column_sonar_processing/aws/__init__.py +7 -0
- water_column_sonar_processing/aws/dynamodb_manager.py +355 -0
- water_column_sonar_processing/aws/s3_manager.py +418 -0
- water_column_sonar_processing/aws/s3fs_manager.py +64 -0
- {model → water_column_sonar_processing}/aws/sns_manager.py +10 -21
- {model → water_column_sonar_processing}/aws/sqs_manager.py +11 -19
- water_column_sonar_processing/cruise/__init__.py +4 -0
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +129 -0
- water_column_sonar_processing/cruise/datatree_manager.py +21 -0
- water_column_sonar_processing/cruise/resample_regrid.py +323 -0
- water_column_sonar_processing/geometry/__init__.py +13 -0
- water_column_sonar_processing/geometry/elevation_manager.py +111 -0
- water_column_sonar_processing/geometry/geometry_manager.py +241 -0
- water_column_sonar_processing/geometry/line_simplification.py +176 -0
- water_column_sonar_processing/geometry/pmtile_generation.py +266 -0
- water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
- water_column_sonar_processing/index/__init__.py +3 -0
- water_column_sonar_processing/index/index_manager.py +381 -0
- water_column_sonar_processing/model/__init__.py +3 -0
- water_column_sonar_processing/model/zarr_manager.py +741 -0
- water_column_sonar_processing/processing/__init__.py +4 -0
- water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
- water_column_sonar_processing/processing/raw_to_zarr.py +331 -0
- water_column_sonar_processing/utility/__init__.py +13 -0
- {model → water_column_sonar_processing}/utility/cleaner.py +7 -7
- water_column_sonar_processing/utility/constants.py +118 -0
- {model → water_column_sonar_processing}/utility/pipeline_status.py +47 -24
- water_column_sonar_processing/utility/timestamp.py +12 -0
- water_column_sonar_processing-26.1.14.dist-info/METADATA +240 -0
- water_column_sonar_processing-26.1.14.dist-info/RECORD +34 -0
- {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-26.1.14.dist-info}/WHEEL +1 -1
- {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-26.1.14.dist-info/licenses}/LICENSE +1 -1
- water_column_sonar_processing-26.1.14.dist-info/top_level.txt +1 -0
- __init__.py +0 -0
- model/__init__.py +0 -0
- model/aws/__init__.py +0 -0
- model/aws/dynamodb_manager.py +0 -149
- model/aws/s3_manager.py +0 -356
- model/aws/s3fs_manager.py +0 -74
- model/cruise/__init__.py +0 -0
- model/cruise/create_empty_zarr_store.py +0 -166
- model/cruise/resample_regrid.py +0 -248
- model/geospatial/__init__.py +0 -0
- model/geospatial/geometry_manager.py +0 -194
- model/geospatial/geometry_simplification.py +0 -81
- model/geospatial/pmtile_generation.py +0 -74
- model/index/__init__.py +0 -0
- model/index/index.py +0 -228
- model/model.py +0 -138
- model/utility/__init__.py +0 -0
- model/utility/constants.py +0 -56
- model/utility/timestamp.py +0 -12
- model/zarr/__init__.py +0 -0
- model/zarr/bar.py +0 -28
- model/zarr/foo.py +0 -11
- model/zarr/zarr_manager.py +0 -298
- water_column_sonar_processing-0.0.1.dist-info/METADATA +0 -89
- water_column_sonar_processing-0.0.1.dist-info/RECORD +0 -32
- water_column_sonar_processing-0.0.1.dist-info/top_level.txt +0 -2
|
@@ -1,166 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import numcodecs
|
|
3
|
-
import numpy as np
|
|
4
|
-
from ..utility.cleaner import Cleaner
|
|
5
|
-
from ..aws.dynamodb_manager import DynamoDBManager
|
|
6
|
-
from ..aws.s3_manager import S3Manager
|
|
7
|
-
from ..zarr.zarr_manager import ZarrManager
|
|
8
|
-
|
|
9
|
-
numcodecs.blosc.use_threads = False
|
|
10
|
-
numcodecs.blosc.set_nthreads(1)
|
|
11
|
-
|
|
12
|
-
TEMPDIR = "/tmp"
|
|
13
|
-
|
|
14
|
-
# TODO: when ready switch to version 3 of zarr spec
|
|
15
|
-
# ZARR_V3_EXPERIMENTAL_API = 1
|
|
16
|
-
# creates the latlon data: foo = ep.consolidate.add_location(ds_Sv, echodata)
|
|
17
|
-
|
|
18
|
-
class CreateEmptyZarrStore:
|
|
19
|
-
#######################################################
|
|
20
|
-
def __init__(
|
|
21
|
-
self,
|
|
22
|
-
):
|
|
23
|
-
self.__overwrite = True
|
|
24
|
-
# TODO: create output_bucket and input_bucket variables here?
|
|
25
|
-
self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
26
|
-
self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
27
|
-
|
|
28
|
-
#######################################################
|
|
29
|
-
|
|
30
|
-
def upload_zarr_store_to_s3(
|
|
31
|
-
self,
|
|
32
|
-
local_directory: str,
|
|
33
|
-
object_prefix: str,
|
|
34
|
-
cruise_name: str,
|
|
35
|
-
) -> None:
|
|
36
|
-
print('uploading zarr store to s3')
|
|
37
|
-
s3_manager = S3Manager()
|
|
38
|
-
#
|
|
39
|
-
print('Starting upload with thread pool executor.')
|
|
40
|
-
# # 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
|
|
41
|
-
all_files = []
|
|
42
|
-
for subdir, dirs, files in os.walk(f"{local_directory}/{cruise_name}.zarr"):
|
|
43
|
-
for file in files:
|
|
44
|
-
local_path = os.path.join(subdir, file)
|
|
45
|
-
# 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.zarr/.zattrs'
|
|
46
|
-
s3_key = f'{object_prefix}/{cruise_name}.zarr{local_path.split(f"{cruise_name}.zarr")[-1]}'
|
|
47
|
-
all_files.append([local_path, s3_key])
|
|
48
|
-
#
|
|
49
|
-
# print(all_files)
|
|
50
|
-
s3_manager.upload_files_with_thread_pool_executor(
|
|
51
|
-
all_files=all_files,
|
|
52
|
-
)
|
|
53
|
-
print('Done uploading with thread pool executor.')
|
|
54
|
-
# TODO: move to common place
|
|
55
|
-
|
|
56
|
-
#######################################################
|
|
57
|
-
def create_cruise_level_zarr_store(
|
|
58
|
-
self,
|
|
59
|
-
ship_name: str,
|
|
60
|
-
cruise_name: str,
|
|
61
|
-
sensor_name: str,
|
|
62
|
-
table_name: str
|
|
63
|
-
) -> None:
|
|
64
|
-
try:
|
|
65
|
-
# HB0806 - 123, HB0903 - 220
|
|
66
|
-
dynamo_db_manager = DynamoDBManager()
|
|
67
|
-
|
|
68
|
-
df = dynamo_db_manager.get_table_as_df(
|
|
69
|
-
table_name=table_name,
|
|
70
|
-
ship_name=ship_name,
|
|
71
|
-
cruise_name=cruise_name,
|
|
72
|
-
sensor_name=sensor_name
|
|
73
|
-
)
|
|
74
|
-
|
|
75
|
-
# filter the dataframe just for enums >= LEVEL_1_PROCESSING
|
|
76
|
-
# df[df['PIPELINE_STATUS'] < PipelineStatus.LEVEL_1_PROCESSING] = np.nan
|
|
77
|
-
|
|
78
|
-
# TODO: VERIFY GEOJSON EXISTS as prerequisite!!!
|
|
79
|
-
|
|
80
|
-
print(f"DataFrame shape: {df.shape}")
|
|
81
|
-
cruise_channels = list(set([i for sublist in df['CHANNELS'].dropna() for i in sublist]))
|
|
82
|
-
cruise_channels.sort()
|
|
83
|
-
|
|
84
|
-
consolidated_zarr_width = np.sum(df['NUM_PING_TIME_DROPNA'].dropna().astype(int))
|
|
85
|
-
|
|
86
|
-
# [3] calculate the max/min measurement resolutions for the whole cruise
|
|
87
|
-
cruise_min_echo_range = float(np.min(df['MIN_ECHO_RANGE'].dropna().astype(float)))
|
|
88
|
-
|
|
89
|
-
# [4] calculate the maximum of the max depth values
|
|
90
|
-
cruise_max_echo_range = float(np.max(df['MAX_ECHO_RANGE'].dropna().astype(float)))
|
|
91
|
-
print(f"cruise_min_echo_range: {cruise_min_echo_range}, cruise_max_echo_range: {cruise_max_echo_range}")
|
|
92
|
-
|
|
93
|
-
# [5] get number of channels
|
|
94
|
-
cruise_frequencies = [float(i) for i in df['FREQUENCIES'].dropna().values.flatten()[0]]
|
|
95
|
-
print(cruise_frequencies)
|
|
96
|
-
|
|
97
|
-
new_width = int(consolidated_zarr_width)
|
|
98
|
-
print(f"new_width: {new_width}")
|
|
99
|
-
#################################################################
|
|
100
|
-
store_name = f"{cruise_name}.zarr"
|
|
101
|
-
print(store_name)
|
|
102
|
-
################################################################
|
|
103
|
-
# Delete existing zarr store if it exists
|
|
104
|
-
s3_manager = S3Manager()
|
|
105
|
-
zarr_prefix = os.path.join("level_2", ship_name, cruise_name, sensor_name)
|
|
106
|
-
child_objects = s3_manager.get_child_objects(
|
|
107
|
-
bucket_name=self.output_bucket_name,
|
|
108
|
-
sub_prefix=zarr_prefix,
|
|
109
|
-
)
|
|
110
|
-
if len(child_objects) > 0:
|
|
111
|
-
s3_manager.delete_nodd_objects(
|
|
112
|
-
objects=child_objects,
|
|
113
|
-
)
|
|
114
|
-
################################################################
|
|
115
|
-
# Create new zarr store
|
|
116
|
-
zarr_manager = ZarrManager()
|
|
117
|
-
new_height = len(zarr_manager.get_depth_values(
|
|
118
|
-
min_echo_range=cruise_min_echo_range,
|
|
119
|
-
max_echo_range=cruise_max_echo_range
|
|
120
|
-
))
|
|
121
|
-
print(f"new_height: {new_height}")
|
|
122
|
-
|
|
123
|
-
zarr_manager.create_zarr_store(
|
|
124
|
-
path=TEMPDIR,
|
|
125
|
-
ship_name=ship_name,
|
|
126
|
-
cruise_name=cruise_name,
|
|
127
|
-
sensor_name=sensor_name,
|
|
128
|
-
frequencies=cruise_frequencies,
|
|
129
|
-
width=new_width,
|
|
130
|
-
min_echo_range=cruise_min_echo_range,
|
|
131
|
-
max_echo_range=cruise_max_echo_range,
|
|
132
|
-
calibration_status=True,
|
|
133
|
-
)
|
|
134
|
-
#################################################################
|
|
135
|
-
self.upload_zarr_store_to_s3(
|
|
136
|
-
local_directory=TEMPDIR,
|
|
137
|
-
object_prefix=zarr_prefix,
|
|
138
|
-
cruise_name=cruise_name,
|
|
139
|
-
)
|
|
140
|
-
# https://noaa-wcsd-zarr-pds.s3.amazonaws.com/index.html
|
|
141
|
-
#################################################################
|
|
142
|
-
# Verify count of the files uploaded
|
|
143
|
-
# count = self.__get_file_count(store_name=store_name)
|
|
144
|
-
# #
|
|
145
|
-
# raw_zarr_files = self.__get_s3_files( # TODO: just need count
|
|
146
|
-
# bucket_name=self.__output_bucket,
|
|
147
|
-
# sub_prefix=os.path.join(zarr_prefix, store_name),
|
|
148
|
-
# )
|
|
149
|
-
# if len(raw_zarr_files) != count:
|
|
150
|
-
# print(f'Problem writing {store_name} with proper count {count}.')
|
|
151
|
-
# raise Exception("File count doesnt equal number of s3 Zarr store files.")
|
|
152
|
-
# else:
|
|
153
|
-
# print("File counts match.")
|
|
154
|
-
#################################################################
|
|
155
|
-
# Success
|
|
156
|
-
# TODO: update enum in dynamodb
|
|
157
|
-
#################################################################
|
|
158
|
-
except Exception as err:
|
|
159
|
-
print(f"Problem trying to create new cruise zarr store: {err}")
|
|
160
|
-
finally:
|
|
161
|
-
cleaner = Cleaner()
|
|
162
|
-
cleaner.delete_local_files()
|
|
163
|
-
print("Done creating cruise level zarr store")
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
###########################################################
|
model/cruise/resample_regrid.py
DELETED
|
@@ -1,248 +0,0 @@
|
|
|
1
|
-
import gc
|
|
2
|
-
import os
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
import numcodecs
|
|
5
|
-
import numpy as np
|
|
6
|
-
import xarray as xr
|
|
7
|
-
import pandas as pd
|
|
8
|
-
|
|
9
|
-
from ..geospatial.geometry_manager import GeoManager
|
|
10
|
-
from ..aws.dynamodb_manager import DynamoDBManager
|
|
11
|
-
from ..zarr.zarr_manager import ZarrManager
|
|
12
|
-
|
|
13
|
-
numcodecs.blosc.use_threads = False
|
|
14
|
-
numcodecs.blosc.set_nthreads(1)
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
# TODO: when ready switch to version 3 of zarr spec
|
|
18
|
-
# ZARR_V3_EXPERIMENTAL_API = 1
|
|
19
|
-
# creates the latlon data: foo = ep.consolidate.add_location(ds_Sv, echodata)
|
|
20
|
-
|
|
21
|
-
class ResampleRegrid:
|
|
22
|
-
#######################################################
|
|
23
|
-
def __init__(
|
|
24
|
-
self,
|
|
25
|
-
):
|
|
26
|
-
self.__overwrite = True
|
|
27
|
-
self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
28
|
-
self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
29
|
-
self.dtype = 'float32'
|
|
30
|
-
|
|
31
|
-
#################################################################
|
|
32
|
-
def interpolate_data(
|
|
33
|
-
self,
|
|
34
|
-
input_xr,
|
|
35
|
-
ping_times,
|
|
36
|
-
all_cruise_depth_values,
|
|
37
|
-
) -> np.ndarray:
|
|
38
|
-
print("Interpolating data.")
|
|
39
|
-
try:
|
|
40
|
-
data = np.empty((
|
|
41
|
-
len(all_cruise_depth_values),
|
|
42
|
-
len(ping_times),
|
|
43
|
-
len(input_xr.frequency_nominal)
|
|
44
|
-
), dtype=self.dtype)
|
|
45
|
-
|
|
46
|
-
data[:] = np.nan
|
|
47
|
-
|
|
48
|
-
regrid_resample = xr.DataArray(
|
|
49
|
-
data=data,
|
|
50
|
-
dims=("depth", "time", "frequency"),
|
|
51
|
-
coords={
|
|
52
|
-
"depth": all_cruise_depth_values,
|
|
53
|
-
"time": ping_times,
|
|
54
|
-
"frequency": input_xr.frequency_nominal.values,
|
|
55
|
-
}
|
|
56
|
-
)
|
|
57
|
-
|
|
58
|
-
channels = input_xr.channel.values
|
|
59
|
-
for channel in range(len(channels)): # TODO: leaving off here, need to subset for just indices in time axis
|
|
60
|
-
print(np.nanmax(input_xr.echo_range.sel(channel=input_xr.channel[channel]).values))
|
|
61
|
-
#
|
|
62
|
-
max_depths = np.nanmax(
|
|
63
|
-
a=input_xr.echo_range.sel(channel=input_xr.channel[channel]).values,
|
|
64
|
-
axis=1
|
|
65
|
-
)
|
|
66
|
-
superset_of_max_depths = set(
|
|
67
|
-
np.nanmax(input_xr.echo_range.sel(channel=input_xr.channel[channel]).values, 1)
|
|
68
|
-
)
|
|
69
|
-
set_of_max_depths = list({x for x in superset_of_max_depths if x == x}) # removes nan's
|
|
70
|
-
# iterate through partitions of data with similar depths and resample
|
|
71
|
-
for select_max_depth in set_of_max_depths:
|
|
72
|
-
# TODO: for nan just skip and leave all nan's
|
|
73
|
-
select_indices = [i for i in range(0, len(max_depths)) if max_depths[i] == select_max_depth]
|
|
74
|
-
|
|
75
|
-
# now create new DataArray with proper dimension and indices
|
|
76
|
-
# data_select = input_xr.Sv.sel(
|
|
77
|
-
# channel=input_xr.channel[channel]
|
|
78
|
-
# ).values[select_indices, :].T # TODO: dont like this transpose
|
|
79
|
-
data_select = input_xr.Sv.sel(channel=input_xr.channel[channel])[select_indices, :].T.values
|
|
80
|
-
# change from ".values[select_indices, :].T" to "[select_indices, :].values.T"
|
|
81
|
-
|
|
82
|
-
times_select = input_xr.ping_time.values[select_indices]
|
|
83
|
-
depths_select = input_xr.echo_range.sel(
|
|
84
|
-
channel=input_xr.channel[channel]
|
|
85
|
-
).values[select_indices[0], :] # '0' because all others in group should be same
|
|
86
|
-
|
|
87
|
-
da_select = xr.DataArray(
|
|
88
|
-
data=data_select,
|
|
89
|
-
dims=("depth", "time"),
|
|
90
|
-
coords={
|
|
91
|
-
"depth": depths_select,
|
|
92
|
-
"time": times_select,
|
|
93
|
-
}
|
|
94
|
-
).dropna(dim='depth')
|
|
95
|
-
resampled = da_select.interp(depth=all_cruise_depth_values, method="nearest")
|
|
96
|
-
# write to the resample array
|
|
97
|
-
regrid_resample.loc[
|
|
98
|
-
dict(time=times_select, frequency=input_xr.frequency_nominal.values[channel])
|
|
99
|
-
] = resampled
|
|
100
|
-
print(f"updated {len(times_select)} ping times")
|
|
101
|
-
except Exception as err:
|
|
102
|
-
print(f'Problem finding the dynamodb table: {err}')
|
|
103
|
-
raise err
|
|
104
|
-
print("Done interpolating data.")
|
|
105
|
-
return regrid_resample
|
|
106
|
-
|
|
107
|
-
#################################################################
|
|
108
|
-
def resample_regrid(
|
|
109
|
-
self,
|
|
110
|
-
ship_name,
|
|
111
|
-
cruise_name,
|
|
112
|
-
sensor_name,
|
|
113
|
-
table_name,
|
|
114
|
-
) -> None:
|
|
115
|
-
"""
|
|
116
|
-
The goal here is to interpolate the data against the depth values already populated
|
|
117
|
-
in the existing file level zarr stores. We open the cruise-level store with zarr for
|
|
118
|
-
read/write operations. We open the file-level store with Xarray to leverage tools for
|
|
119
|
-
resampling and subsetting the data.
|
|
120
|
-
"""
|
|
121
|
-
print("Interpolating data.")
|
|
122
|
-
try:
|
|
123
|
-
zarr_manager = ZarrManager()
|
|
124
|
-
# s3_manager = S3Manager()
|
|
125
|
-
geo_manager = GeoManager()
|
|
126
|
-
# get zarr store
|
|
127
|
-
output_zarr_store = zarr_manager.open_s3_zarr_store_with_zarr(
|
|
128
|
-
ship_name=ship_name,
|
|
129
|
-
cruise_name=cruise_name,
|
|
130
|
-
sensor_name=sensor_name,
|
|
131
|
-
# zarr_synchronizer=? # TODO: pass in for parallelization
|
|
132
|
-
)
|
|
133
|
-
|
|
134
|
-
# get dynamo stuff
|
|
135
|
-
dynamo_db_manager = DynamoDBManager()
|
|
136
|
-
cruise_df = dynamo_db_manager.get_table_as_df(
|
|
137
|
-
ship_name=ship_name,
|
|
138
|
-
cruise_name=cruise_name,
|
|
139
|
-
sensor_name=sensor_name,
|
|
140
|
-
table_name=table_name,
|
|
141
|
-
)
|
|
142
|
-
|
|
143
|
-
#########################################################
|
|
144
|
-
#########################################################
|
|
145
|
-
# TODO: iterate files here
|
|
146
|
-
all_file_names = cruise_df['FILE_NAME']
|
|
147
|
-
for file_name in all_file_names:
|
|
148
|
-
gc.collect()
|
|
149
|
-
file_name_stem = Path(file_name).stem
|
|
150
|
-
# file_name_stem = "D20070724-T151330"
|
|
151
|
-
print(f"Processing file: {file_name_stem}.")
|
|
152
|
-
# if f"{file_name_stem}.raw" not in list(cruise_df['FILE_NAME']):
|
|
153
|
-
# raise Exception(f"Raw file file_stem not found in dynamodb.")
|
|
154
|
-
|
|
155
|
-
# status = PipelineStatus['LEVEL_1_PROCESSING']
|
|
156
|
-
# TODO: filter rows by enum success, filter the dataframe just for enums >= LEVEL_1_PROCESSING
|
|
157
|
-
# df[df['PIPELINE_STATUS'] < PipelineStatus.LEVEL_1_PROCESSING] = np.nan
|
|
158
|
-
|
|
159
|
-
# Get index from all cruise files. Note: should be based on which are included in cruise.
|
|
160
|
-
index = cruise_df.index[cruise_df['FILE_NAME'] == f"{file_name_stem}.raw"][0]
|
|
161
|
-
|
|
162
|
-
# get input store
|
|
163
|
-
input_xr_zarr_store = zarr_manager.open_s3_zarr_store_with_xarray(
|
|
164
|
-
ship_name=ship_name,
|
|
165
|
-
cruise_name=cruise_name,
|
|
166
|
-
sensor_name=sensor_name,
|
|
167
|
-
file_name_stem=file_name_stem,
|
|
168
|
-
)
|
|
169
|
-
#########################################################################
|
|
170
|
-
# [3] Get needed indices
|
|
171
|
-
# Offset from start index to insert new data. Note that missing values are excluded.
|
|
172
|
-
ping_time_cumsum = np.insert(
|
|
173
|
-
np.cumsum(cruise_df['NUM_PING_TIME_DROPNA'].dropna().to_numpy(dtype=int)),
|
|
174
|
-
obj=0,
|
|
175
|
-
values=0
|
|
176
|
-
)
|
|
177
|
-
start_ping_time_index = ping_time_cumsum[index]
|
|
178
|
-
end_ping_time_index = ping_time_cumsum[index + 1]
|
|
179
|
-
|
|
180
|
-
min_echo_range = np.nanmin(np.float32(cruise_df['MIN_ECHO_RANGE']))
|
|
181
|
-
max_echo_range = np.nanmax(np.float32(cruise_df['MAX_ECHO_RANGE']))
|
|
182
|
-
|
|
183
|
-
print("Creating empty ndarray for Sv data.") # Note: cruise_zarr dimensions are (depth, time, frequency)
|
|
184
|
-
cruise_sv_subset = np.empty(
|
|
185
|
-
shape=output_zarr_store.Sv[:, start_ping_time_index:end_ping_time_index, :].shape
|
|
186
|
-
)
|
|
187
|
-
cruise_sv_subset[:, :, :] = np.nan # (5208, 9778, 4)
|
|
188
|
-
|
|
189
|
-
all_cruise_depth_values = zarr_manager.get_depth_values(
|
|
190
|
-
min_echo_range=min_echo_range,
|
|
191
|
-
max_echo_range=max_echo_range
|
|
192
|
-
)
|
|
193
|
-
|
|
194
|
-
print(" ".join(list(input_xr_zarr_store.Sv.dims)))
|
|
195
|
-
if set(input_xr_zarr_store.Sv.dims) != {'channel', 'ping_time', 'range_sample'}:
|
|
196
|
-
raise Exception("Xarray dimensions are not as expected.")
|
|
197
|
-
|
|
198
|
-
# get geojson
|
|
199
|
-
indices, geospatial = geo_manager.read_s3_geo_json(
|
|
200
|
-
ship_name=ship_name,
|
|
201
|
-
cruise_name=cruise_name,
|
|
202
|
-
sensor_name=sensor_name,
|
|
203
|
-
file_name_stem=file_name_stem,
|
|
204
|
-
input_xr_zarr_store=input_xr_zarr_store,
|
|
205
|
-
)
|
|
206
|
-
|
|
207
|
-
input_xr = input_xr_zarr_store.isel(ping_time=indices)
|
|
208
|
-
|
|
209
|
-
ping_times = input_xr.ping_time.values
|
|
210
|
-
# Date format: numpy.datetime64('2007-07-20T02:10:25.845073920') converts to "1184897425.845074"
|
|
211
|
-
epoch_seconds = [(pd.Timestamp(i) - pd.Timestamp('1970-01-01')) / pd.Timedelta('1s') for i in ping_times]
|
|
212
|
-
output_zarr_store.time[start_ping_time_index:end_ping_time_index] = epoch_seconds
|
|
213
|
-
|
|
214
|
-
# --- UPDATING --- #
|
|
215
|
-
|
|
216
|
-
regrid_resample = self.interpolate_data(
|
|
217
|
-
input_xr=input_xr,
|
|
218
|
-
ping_times=ping_times,
|
|
219
|
-
all_cruise_depth_values=all_cruise_depth_values,
|
|
220
|
-
)
|
|
221
|
-
|
|
222
|
-
print(f"start_ping_time_index: {start_ping_time_index}, end_ping_time_index: {end_ping_time_index}")
|
|
223
|
-
|
|
224
|
-
#########################################################################
|
|
225
|
-
# write Sv values to cruise-level-zarr-store
|
|
226
|
-
for channel in range(len(input_xr.channel.values)): # doesn't like being written in one fell swoop :(
|
|
227
|
-
output_zarr_store.Sv[
|
|
228
|
-
:,
|
|
229
|
-
start_ping_time_index:end_ping_time_index,
|
|
230
|
-
channel
|
|
231
|
-
] = regrid_resample[:, :, channel]
|
|
232
|
-
|
|
233
|
-
#########################################################################
|
|
234
|
-
# [5] write subset of latitude/longitude
|
|
235
|
-
output_zarr_store.latitude[start_ping_time_index:end_ping_time_index] = geospatial.dropna()[
|
|
236
|
-
'latitude'
|
|
237
|
-
].values
|
|
238
|
-
output_zarr_store.longitude[start_ping_time_index:end_ping_time_index] = geospatial.dropna()[
|
|
239
|
-
'longitude'
|
|
240
|
-
].values
|
|
241
|
-
except Exception as err:
|
|
242
|
-
print(f'Problem interpolating the data: {err}')
|
|
243
|
-
raise err
|
|
244
|
-
print("Done interpolating data.")
|
|
245
|
-
|
|
246
|
-
#######################################################
|
|
247
|
-
|
|
248
|
-
###########################################################
|
model/geospatial/__init__.py
DELETED
|
File without changes
|
|
@@ -1,194 +0,0 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
import numpy as np
|
|
3
|
-
import geopandas
|
|
4
|
-
import pandas as pd
|
|
5
|
-
|
|
6
|
-
from ..utility.cleaner import Cleaner
|
|
7
|
-
from ..aws.s3_manager import S3Manager
|
|
8
|
-
|
|
9
|
-
"""
|
|
10
|
-
// [Decimal / Places / Degrees / Object that can be recognized at scale / N/S or E/W at equator, E/W at 23N/S, E/W at 45N/S, E/W at 67N/S]
|
|
11
|
-
// 0 1.0 1° 00′ 0″ country or large region 111.32 km 102.47 km 78.71 km 43.496 km
|
|
12
|
-
// 1 0.1 0° 06′ 0″ large city or district 11.132 km 10.247 km 7.871 km 4.3496 km
|
|
13
|
-
// 2 0.01 0° 00′ 36″ town or village 1.1132 km 1.0247 km 787.1 m 434.96 m
|
|
14
|
-
// 3 0.001 0° 00′ 3.6″ neighborhood, street 111.32 m 102.47 m 78.71 m 43.496 m
|
|
15
|
-
// 4 0.0001 0° 00′ 0.36″ individual street, land parcel 11.132 m 10.247 m 7.871 m 4.3496 m
|
|
16
|
-
// 5 0.00001 0° 00′ 0.036″ individual trees, door entrance 1.1132 m 1.0247 m 787.1 mm 434.96 mm
|
|
17
|
-
// 6 0.000001 0° 00′ 0.0036″ individual humans 111.32 mm 102.47 mm 78.71 mm 43.496 mm
|
|
18
|
-
// 7 0.0000001 0° 00′ 0.00036″ practical limit of commercial surveying 11.132 mm 10.247 mm 7.871 mm 4.3496 mm
|
|
19
|
-
"""
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class GeoManager:
|
|
23
|
-
#######################################################
|
|
24
|
-
def __init__(
|
|
25
|
-
self,
|
|
26
|
-
):
|
|
27
|
-
self.DECIMAL_PRECISION = 5 # precision for GPS coordinates
|
|
28
|
-
self.SIMPLIFICATION_TOLERANCE = 0.0001 # RDP simplification to street level
|
|
29
|
-
|
|
30
|
-
#######################################################
|
|
31
|
-
def read_echodata_gps_data(
|
|
32
|
-
self,
|
|
33
|
-
echodata,
|
|
34
|
-
ship_name,
|
|
35
|
-
cruise_name,
|
|
36
|
-
sensor_name,
|
|
37
|
-
file_name,
|
|
38
|
-
write_geojson=True,
|
|
39
|
-
) -> tuple:
|
|
40
|
-
file_name_stem = Path(file_name).stem
|
|
41
|
-
geo_json_name = f"{file_name_stem}.json"
|
|
42
|
-
|
|
43
|
-
print('Getting GPS data from echopype object.')
|
|
44
|
-
try:
|
|
45
|
-
latitude = np.round(echodata.platform.latitude.values, self.DECIMAL_PRECISION)
|
|
46
|
-
longitude = np.round(echodata.platform.longitude.values, self.DECIMAL_PRECISION)
|
|
47
|
-
|
|
48
|
-
# RE: time coordinates: https://github.com/OSOceanAcoustics/echopype/issues/656#issue-1219104771
|
|
49
|
-
# 'nmea_times' are times from the nmea datalogger associated with GPS
|
|
50
|
-
# note that nmea_times, unlike time1, can be sorted
|
|
51
|
-
nmea_times = np.sort(echodata.platform.time1.values)
|
|
52
|
-
|
|
53
|
-
# 'time1' are times from the echosounder associated with the data of the transducer measurement
|
|
54
|
-
time1 = echodata.environment.time1.values
|
|
55
|
-
|
|
56
|
-
if len(nmea_times) < len(time1):
|
|
57
|
-
raise Exception("Problem: Not enough NMEA times available to extrapolate time1.")
|
|
58
|
-
|
|
59
|
-
# Align 'sv_times' to 'nmea_times'
|
|
60
|
-
if not (np.all(time1[:-1] <= time1[1:]) and np.all(nmea_times[:-1] <= nmea_times[1:])):
|
|
61
|
-
raise Exception("Problem: NMEA times are not sorted.")
|
|
62
|
-
|
|
63
|
-
# Finds the indices where 'v' can be inserted just to the right of 'a'
|
|
64
|
-
indices = np.searchsorted(a=nmea_times, v=time1, side="right") - 1
|
|
65
|
-
lat = latitude[indices]
|
|
66
|
-
lat[indices < 0] = np.nan # values recorded before indexing are set to nan
|
|
67
|
-
lon = longitude[indices]
|
|
68
|
-
lon[indices < 0] = np.nan
|
|
69
|
-
|
|
70
|
-
if not (np.all(lat[~np.isnan(lat)] >= -90.) and np.all(lat[~np.isnan(lat)] <= 90.) and np.all(lon[~np.isnan(lon)] >= -180.) and np.all(lon[~np.isnan(lon)] <= 180.)):
|
|
71
|
-
raise Exception("Problem: GPS Data falls outside allowed bounds.")
|
|
72
|
-
|
|
73
|
-
# check for visits to null island
|
|
74
|
-
null_island_indices = list(
|
|
75
|
-
set.intersection(set(np.where(np.abs(lat) < 1e-3)[0]), set(np.where(np.abs(lon) < 1e-3)[0]))
|
|
76
|
-
)
|
|
77
|
-
lat[null_island_indices] = np.nan
|
|
78
|
-
lon[null_island_indices] = np.nan
|
|
79
|
-
|
|
80
|
-
# create requirement for minimum linestring size
|
|
81
|
-
MIN_ALLOWED_SIZE = 4 # don't want to process files with less than 4 data points
|
|
82
|
-
if len(lat[~np.isnan(lat)]) < MIN_ALLOWED_SIZE or len(lon[~np.isnan(lon)]) < MIN_ALLOWED_SIZE:
|
|
83
|
-
raise Exception(
|
|
84
|
-
f"There was not enough data in lat or lon to create geojson, {len(lat[~np.isnan(lat)])} found, less than {MIN_ALLOWED_SIZE}."
|
|
85
|
-
)
|
|
86
|
-
|
|
87
|
-
# https://osoceanacoustics.github.io/echopype-examples/echopype_tour.html
|
|
88
|
-
gps_df = pd.DataFrame({
|
|
89
|
-
'latitude': lat,
|
|
90
|
-
'longitude': lon,
|
|
91
|
-
'time': time1
|
|
92
|
-
}).set_index(['time']).fillna(0)
|
|
93
|
-
|
|
94
|
-
# Note: We set np.nan to 0,0 so downstream missing values can be omitted
|
|
95
|
-
gps_gdf = geopandas.GeoDataFrame(
|
|
96
|
-
gps_df,
|
|
97
|
-
geometry=geopandas.points_from_xy(
|
|
98
|
-
gps_df['longitude'],
|
|
99
|
-
gps_df['latitude']
|
|
100
|
-
),
|
|
101
|
-
crs="epsg:4326"
|
|
102
|
-
)
|
|
103
|
-
# Note: We set np.nan to 0,0 so downstream missing values can be omitted
|
|
104
|
-
|
|
105
|
-
geo_json_line = gps_gdf.to_json()
|
|
106
|
-
if write_geojson:
|
|
107
|
-
print('Creating local copy of geojson file.')
|
|
108
|
-
with open(geo_json_name, "w") as write_file:
|
|
109
|
-
write_file.write(geo_json_line)
|
|
110
|
-
|
|
111
|
-
geo_json_prefix = f"spatial/geojson/{ship_name}/{cruise_name}/{sensor_name}"
|
|
112
|
-
|
|
113
|
-
print('Checking s3 and deleting any existing GeoJSON file.')
|
|
114
|
-
s3_manager = S3Manager()
|
|
115
|
-
s3_objects = s3_manager.list_nodd_objects(prefix=f"{geo_json_prefix}/{geo_json_name}")
|
|
116
|
-
if len(s3_objects) > 0:
|
|
117
|
-
print('GeoJSON already exists in s3, deleting existing and continuing.')
|
|
118
|
-
s3_manager.delete_nodd_objects(objects=s3_objects)
|
|
119
|
-
|
|
120
|
-
print('Upload GeoJSON to s3.')
|
|
121
|
-
s3_manager.upload_nodd_file(
|
|
122
|
-
file_name=geo_json_name, # file_name
|
|
123
|
-
key=f"{geo_json_prefix}/{geo_json_name}" # key
|
|
124
|
-
)
|
|
125
|
-
|
|
126
|
-
# TODO: delete geo_json file
|
|
127
|
-
cleaner = Cleaner()
|
|
128
|
-
cleaner.delete_local_files(file_types=['*.json'])
|
|
129
|
-
|
|
130
|
-
#################################################################
|
|
131
|
-
# TODO: simplify with shapely
|
|
132
|
-
# linestring = shapely.geometry.LineString(
|
|
133
|
-
# [xy for xy in zip(gps_gdf.longitude, gps_gdf.latitude)]
|
|
134
|
-
# )
|
|
135
|
-
# len(linestring.coords)
|
|
136
|
-
# line_simplified = linestring.simplify(
|
|
137
|
-
# tolerance=self.SIMPLIFICATION_TOLERANCE,
|
|
138
|
-
# preserve_topology=True
|
|
139
|
-
# )
|
|
140
|
-
# print(f"Total number of points for original linestring: {len(linestring.coords)}")
|
|
141
|
-
# print(f"Total number of points needed for the simplified linestring: {len(line_simplified.coords)}")
|
|
142
|
-
# print(line_simplified)
|
|
143
|
-
# geo_json_line_simplified = shapely.to_geojson(line_simplified)
|
|
144
|
-
#################################################################
|
|
145
|
-
# GeoJSON FeatureCollection with IDs as "time"
|
|
146
|
-
except Exception as err:
|
|
147
|
-
print(f'Exception encountered extracting gps coordinates creating geojson: {err}')
|
|
148
|
-
raise
|
|
149
|
-
# Note: returned lat/lon values can include np.nan because they need to be aligned with
|
|
150
|
-
# the Sv data! GeoJSON needs simplification but has been filtered.
|
|
151
|
-
return gps_df.index.values, gps_df.latitude.values, gps_df.longitude.values
|
|
152
|
-
# TODO: if geojson is already returned with 0,0, the return here
|
|
153
|
-
# can include np.nan values?
|
|
154
|
-
|
|
155
|
-
#######################################################
|
|
156
|
-
def read_s3_geo_json(
|
|
157
|
-
self,
|
|
158
|
-
ship_name,
|
|
159
|
-
cruise_name,
|
|
160
|
-
sensor_name,
|
|
161
|
-
file_name_stem,
|
|
162
|
-
input_xr_zarr_store,
|
|
163
|
-
):
|
|
164
|
-
try:
|
|
165
|
-
s3_manager = S3Manager()
|
|
166
|
-
geo_json = s3_manager.read_s3_json(
|
|
167
|
-
ship_name=ship_name,
|
|
168
|
-
cruise_name=cruise_name,
|
|
169
|
-
sensor_name=sensor_name,
|
|
170
|
-
file_name_stem=file_name_stem,
|
|
171
|
-
)
|
|
172
|
-
###
|
|
173
|
-
geospatial = geopandas.GeoDataFrame.from_features(geo_json['features']).set_index(
|
|
174
|
-
pd.json_normalize(geo_json["features"])["id"].values
|
|
175
|
-
)
|
|
176
|
-
null_island_indices = list(
|
|
177
|
-
set.intersection(
|
|
178
|
-
set(np.where(np.abs(geospatial.latitude.values) < 1e-3)[0]),
|
|
179
|
-
set(np.where(np.abs(geospatial.longitude.values) < 1e-3)[0])
|
|
180
|
-
)
|
|
181
|
-
)
|
|
182
|
-
geospatial.iloc[null_island_indices] = np.nan
|
|
183
|
-
###
|
|
184
|
-
geospatial_index = geospatial.dropna().index.values.astype('datetime64[ns]')
|
|
185
|
-
aa = input_xr_zarr_store.ping_time.values.tolist()
|
|
186
|
-
vv = geospatial_index.tolist()
|
|
187
|
-
indices = np.searchsorted(a=aa, v=vv)
|
|
188
|
-
|
|
189
|
-
return indices, geospatial
|
|
190
|
-
except Exception as err: # Failure
|
|
191
|
-
print(f'Exception encountered reading s3 GeoJSON: {err}')
|
|
192
|
-
raise
|
|
193
|
-
|
|
194
|
-
###########################################################
|