water-column-sonar-processing 0.0.1__py3-none-any.whl → 25.11.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of water-column-sonar-processing might be problematic. Click here for more details.
- water_column_sonar_processing/__init__.py +13 -0
- water_column_sonar_processing/aws/__init__.py +7 -0
- water_column_sonar_processing/aws/dynamodb_manager.py +355 -0
- water_column_sonar_processing/aws/s3_manager.py +420 -0
- water_column_sonar_processing/aws/s3fs_manager.py +72 -0
- {model → water_column_sonar_processing}/aws/sns_manager.py +10 -21
- {model → water_column_sonar_processing}/aws/sqs_manager.py +11 -19
- water_column_sonar_processing/cruise/__init__.py +4 -0
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +191 -0
- water_column_sonar_processing/cruise/datatree_manager.py +21 -0
- water_column_sonar_processing/cruise/resample_regrid.py +339 -0
- water_column_sonar_processing/geometry/__init__.py +11 -0
- water_column_sonar_processing/geometry/elevation_manager.py +111 -0
- water_column_sonar_processing/geometry/geometry_manager.py +243 -0
- water_column_sonar_processing/geometry/line_simplification.py +176 -0
- water_column_sonar_processing/geometry/pmtile_generation.py +261 -0
- water_column_sonar_processing/index/__init__.py +3 -0
- water_column_sonar_processing/index/index_manager.py +384 -0
- water_column_sonar_processing/model/__init__.py +3 -0
- water_column_sonar_processing/model/zarr_manager.py +722 -0
- water_column_sonar_processing/process.py +149 -0
- water_column_sonar_processing/processing/__init__.py +4 -0
- water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
- water_column_sonar_processing/processing/raw_to_zarr.py +425 -0
- water_column_sonar_processing/utility/__init__.py +13 -0
- {model → water_column_sonar_processing}/utility/cleaner.py +7 -8
- water_column_sonar_processing/utility/constants.py +118 -0
- {model → water_column_sonar_processing}/utility/pipeline_status.py +47 -24
- water_column_sonar_processing/utility/timestamp.py +12 -0
- water_column_sonar_processing-25.11.1.dist-info/METADATA +182 -0
- water_column_sonar_processing-25.11.1.dist-info/RECORD +34 -0
- {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-25.11.1.dist-info}/WHEEL +1 -1
- {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-25.11.1.dist-info/licenses}/LICENSE +1 -1
- water_column_sonar_processing-25.11.1.dist-info/top_level.txt +1 -0
- __init__.py +0 -0
- model/__init__.py +0 -0
- model/aws/__init__.py +0 -0
- model/aws/dynamodb_manager.py +0 -149
- model/aws/s3_manager.py +0 -356
- model/aws/s3fs_manager.py +0 -74
- model/cruise/__init__.py +0 -0
- model/cruise/create_empty_zarr_store.py +0 -166
- model/cruise/resample_regrid.py +0 -248
- model/geospatial/__init__.py +0 -0
- model/geospatial/geometry_manager.py +0 -194
- model/geospatial/geometry_simplification.py +0 -81
- model/geospatial/pmtile_generation.py +0 -74
- model/index/__init__.py +0 -0
- model/index/index.py +0 -228
- model/model.py +0 -138
- model/utility/__init__.py +0 -0
- model/utility/constants.py +0 -56
- model/utility/timestamp.py +0 -12
- model/zarr/__init__.py +0 -0
- model/zarr/bar.py +0 -28
- model/zarr/foo.py +0 -11
- model/zarr/zarr_manager.py +0 -298
- water_column_sonar_processing-0.0.1.dist-info/METADATA +0 -89
- water_column_sonar_processing-0.0.1.dist-info/RECORD +0 -32
- water_column_sonar_processing-0.0.1.dist-info/top_level.txt +0 -2
model/cruise/resample_regrid.py
DELETED
|
@@ -1,248 +0,0 @@
|
|
|
1
|
-
import gc
|
|
2
|
-
import os
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
import numcodecs
|
|
5
|
-
import numpy as np
|
|
6
|
-
import xarray as xr
|
|
7
|
-
import pandas as pd
|
|
8
|
-
|
|
9
|
-
from ..geospatial.geometry_manager import GeoManager
|
|
10
|
-
from ..aws.dynamodb_manager import DynamoDBManager
|
|
11
|
-
from ..zarr.zarr_manager import ZarrManager
|
|
12
|
-
|
|
13
|
-
numcodecs.blosc.use_threads = False
|
|
14
|
-
numcodecs.blosc.set_nthreads(1)
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
# TODO: when ready switch to version 3 of zarr spec
|
|
18
|
-
# ZARR_V3_EXPERIMENTAL_API = 1
|
|
19
|
-
# creates the latlon data: foo = ep.consolidate.add_location(ds_Sv, echodata)
|
|
20
|
-
|
|
21
|
-
class ResampleRegrid:
|
|
22
|
-
#######################################################
|
|
23
|
-
def __init__(
|
|
24
|
-
self,
|
|
25
|
-
):
|
|
26
|
-
self.__overwrite = True
|
|
27
|
-
self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
28
|
-
self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
29
|
-
self.dtype = 'float32'
|
|
30
|
-
|
|
31
|
-
#################################################################
|
|
32
|
-
def interpolate_data(
|
|
33
|
-
self,
|
|
34
|
-
input_xr,
|
|
35
|
-
ping_times,
|
|
36
|
-
all_cruise_depth_values,
|
|
37
|
-
) -> np.ndarray:
|
|
38
|
-
print("Interpolating data.")
|
|
39
|
-
try:
|
|
40
|
-
data = np.empty((
|
|
41
|
-
len(all_cruise_depth_values),
|
|
42
|
-
len(ping_times),
|
|
43
|
-
len(input_xr.frequency_nominal)
|
|
44
|
-
), dtype=self.dtype)
|
|
45
|
-
|
|
46
|
-
data[:] = np.nan
|
|
47
|
-
|
|
48
|
-
regrid_resample = xr.DataArray(
|
|
49
|
-
data=data,
|
|
50
|
-
dims=("depth", "time", "frequency"),
|
|
51
|
-
coords={
|
|
52
|
-
"depth": all_cruise_depth_values,
|
|
53
|
-
"time": ping_times,
|
|
54
|
-
"frequency": input_xr.frequency_nominal.values,
|
|
55
|
-
}
|
|
56
|
-
)
|
|
57
|
-
|
|
58
|
-
channels = input_xr.channel.values
|
|
59
|
-
for channel in range(len(channels)): # TODO: leaving off here, need to subset for just indices in time axis
|
|
60
|
-
print(np.nanmax(input_xr.echo_range.sel(channel=input_xr.channel[channel]).values))
|
|
61
|
-
#
|
|
62
|
-
max_depths = np.nanmax(
|
|
63
|
-
a=input_xr.echo_range.sel(channel=input_xr.channel[channel]).values,
|
|
64
|
-
axis=1
|
|
65
|
-
)
|
|
66
|
-
superset_of_max_depths = set(
|
|
67
|
-
np.nanmax(input_xr.echo_range.sel(channel=input_xr.channel[channel]).values, 1)
|
|
68
|
-
)
|
|
69
|
-
set_of_max_depths = list({x for x in superset_of_max_depths if x == x}) # removes nan's
|
|
70
|
-
# iterate through partitions of data with similar depths and resample
|
|
71
|
-
for select_max_depth in set_of_max_depths:
|
|
72
|
-
# TODO: for nan just skip and leave all nan's
|
|
73
|
-
select_indices = [i for i in range(0, len(max_depths)) if max_depths[i] == select_max_depth]
|
|
74
|
-
|
|
75
|
-
# now create new DataArray with proper dimension and indices
|
|
76
|
-
# data_select = input_xr.Sv.sel(
|
|
77
|
-
# channel=input_xr.channel[channel]
|
|
78
|
-
# ).values[select_indices, :].T # TODO: dont like this transpose
|
|
79
|
-
data_select = input_xr.Sv.sel(channel=input_xr.channel[channel])[select_indices, :].T.values
|
|
80
|
-
# change from ".values[select_indices, :].T" to "[select_indices, :].values.T"
|
|
81
|
-
|
|
82
|
-
times_select = input_xr.ping_time.values[select_indices]
|
|
83
|
-
depths_select = input_xr.echo_range.sel(
|
|
84
|
-
channel=input_xr.channel[channel]
|
|
85
|
-
).values[select_indices[0], :] # '0' because all others in group should be same
|
|
86
|
-
|
|
87
|
-
da_select = xr.DataArray(
|
|
88
|
-
data=data_select,
|
|
89
|
-
dims=("depth", "time"),
|
|
90
|
-
coords={
|
|
91
|
-
"depth": depths_select,
|
|
92
|
-
"time": times_select,
|
|
93
|
-
}
|
|
94
|
-
).dropna(dim='depth')
|
|
95
|
-
resampled = da_select.interp(depth=all_cruise_depth_values, method="nearest")
|
|
96
|
-
# write to the resample array
|
|
97
|
-
regrid_resample.loc[
|
|
98
|
-
dict(time=times_select, frequency=input_xr.frequency_nominal.values[channel])
|
|
99
|
-
] = resampled
|
|
100
|
-
print(f"updated {len(times_select)} ping times")
|
|
101
|
-
except Exception as err:
|
|
102
|
-
print(f'Problem finding the dynamodb table: {err}')
|
|
103
|
-
raise err
|
|
104
|
-
print("Done interpolating data.")
|
|
105
|
-
return regrid_resample
|
|
106
|
-
|
|
107
|
-
#################################################################
|
|
108
|
-
def resample_regrid(
|
|
109
|
-
self,
|
|
110
|
-
ship_name,
|
|
111
|
-
cruise_name,
|
|
112
|
-
sensor_name,
|
|
113
|
-
table_name,
|
|
114
|
-
) -> None:
|
|
115
|
-
"""
|
|
116
|
-
The goal here is to interpolate the data against the depth values already populated
|
|
117
|
-
in the existing file level zarr stores. We open the cruise-level store with zarr for
|
|
118
|
-
read/write operations. We open the file-level store with Xarray to leverage tools for
|
|
119
|
-
resampling and subsetting the data.
|
|
120
|
-
"""
|
|
121
|
-
print("Interpolating data.")
|
|
122
|
-
try:
|
|
123
|
-
zarr_manager = ZarrManager()
|
|
124
|
-
# s3_manager = S3Manager()
|
|
125
|
-
geo_manager = GeoManager()
|
|
126
|
-
# get zarr store
|
|
127
|
-
output_zarr_store = zarr_manager.open_s3_zarr_store_with_zarr(
|
|
128
|
-
ship_name=ship_name,
|
|
129
|
-
cruise_name=cruise_name,
|
|
130
|
-
sensor_name=sensor_name,
|
|
131
|
-
# zarr_synchronizer=? # TODO: pass in for parallelization
|
|
132
|
-
)
|
|
133
|
-
|
|
134
|
-
# get dynamo stuff
|
|
135
|
-
dynamo_db_manager = DynamoDBManager()
|
|
136
|
-
cruise_df = dynamo_db_manager.get_table_as_df(
|
|
137
|
-
ship_name=ship_name,
|
|
138
|
-
cruise_name=cruise_name,
|
|
139
|
-
sensor_name=sensor_name,
|
|
140
|
-
table_name=table_name,
|
|
141
|
-
)
|
|
142
|
-
|
|
143
|
-
#########################################################
|
|
144
|
-
#########################################################
|
|
145
|
-
# TODO: iterate files here
|
|
146
|
-
all_file_names = cruise_df['FILE_NAME']
|
|
147
|
-
for file_name in all_file_names:
|
|
148
|
-
gc.collect()
|
|
149
|
-
file_name_stem = Path(file_name).stem
|
|
150
|
-
# file_name_stem = "D20070724-T151330"
|
|
151
|
-
print(f"Processing file: {file_name_stem}.")
|
|
152
|
-
# if f"{file_name_stem}.raw" not in list(cruise_df['FILE_NAME']):
|
|
153
|
-
# raise Exception(f"Raw file file_stem not found in dynamodb.")
|
|
154
|
-
|
|
155
|
-
# status = PipelineStatus['LEVEL_1_PROCESSING']
|
|
156
|
-
# TODO: filter rows by enum success, filter the dataframe just for enums >= LEVEL_1_PROCESSING
|
|
157
|
-
# df[df['PIPELINE_STATUS'] < PipelineStatus.LEVEL_1_PROCESSING] = np.nan
|
|
158
|
-
|
|
159
|
-
# Get index from all cruise files. Note: should be based on which are included in cruise.
|
|
160
|
-
index = cruise_df.index[cruise_df['FILE_NAME'] == f"{file_name_stem}.raw"][0]
|
|
161
|
-
|
|
162
|
-
# get input store
|
|
163
|
-
input_xr_zarr_store = zarr_manager.open_s3_zarr_store_with_xarray(
|
|
164
|
-
ship_name=ship_name,
|
|
165
|
-
cruise_name=cruise_name,
|
|
166
|
-
sensor_name=sensor_name,
|
|
167
|
-
file_name_stem=file_name_stem,
|
|
168
|
-
)
|
|
169
|
-
#########################################################################
|
|
170
|
-
# [3] Get needed indices
|
|
171
|
-
# Offset from start index to insert new data. Note that missing values are excluded.
|
|
172
|
-
ping_time_cumsum = np.insert(
|
|
173
|
-
np.cumsum(cruise_df['NUM_PING_TIME_DROPNA'].dropna().to_numpy(dtype=int)),
|
|
174
|
-
obj=0,
|
|
175
|
-
values=0
|
|
176
|
-
)
|
|
177
|
-
start_ping_time_index = ping_time_cumsum[index]
|
|
178
|
-
end_ping_time_index = ping_time_cumsum[index + 1]
|
|
179
|
-
|
|
180
|
-
min_echo_range = np.nanmin(np.float32(cruise_df['MIN_ECHO_RANGE']))
|
|
181
|
-
max_echo_range = np.nanmax(np.float32(cruise_df['MAX_ECHO_RANGE']))
|
|
182
|
-
|
|
183
|
-
print("Creating empty ndarray for Sv data.") # Note: cruise_zarr dimensions are (depth, time, frequency)
|
|
184
|
-
cruise_sv_subset = np.empty(
|
|
185
|
-
shape=output_zarr_store.Sv[:, start_ping_time_index:end_ping_time_index, :].shape
|
|
186
|
-
)
|
|
187
|
-
cruise_sv_subset[:, :, :] = np.nan # (5208, 9778, 4)
|
|
188
|
-
|
|
189
|
-
all_cruise_depth_values = zarr_manager.get_depth_values(
|
|
190
|
-
min_echo_range=min_echo_range,
|
|
191
|
-
max_echo_range=max_echo_range
|
|
192
|
-
)
|
|
193
|
-
|
|
194
|
-
print(" ".join(list(input_xr_zarr_store.Sv.dims)))
|
|
195
|
-
if set(input_xr_zarr_store.Sv.dims) != {'channel', 'ping_time', 'range_sample'}:
|
|
196
|
-
raise Exception("Xarray dimensions are not as expected.")
|
|
197
|
-
|
|
198
|
-
# get geojson
|
|
199
|
-
indices, geospatial = geo_manager.read_s3_geo_json(
|
|
200
|
-
ship_name=ship_name,
|
|
201
|
-
cruise_name=cruise_name,
|
|
202
|
-
sensor_name=sensor_name,
|
|
203
|
-
file_name_stem=file_name_stem,
|
|
204
|
-
input_xr_zarr_store=input_xr_zarr_store,
|
|
205
|
-
)
|
|
206
|
-
|
|
207
|
-
input_xr = input_xr_zarr_store.isel(ping_time=indices)
|
|
208
|
-
|
|
209
|
-
ping_times = input_xr.ping_time.values
|
|
210
|
-
# Date format: numpy.datetime64('2007-07-20T02:10:25.845073920') converts to "1184897425.845074"
|
|
211
|
-
epoch_seconds = [(pd.Timestamp(i) - pd.Timestamp('1970-01-01')) / pd.Timedelta('1s') for i in ping_times]
|
|
212
|
-
output_zarr_store.time[start_ping_time_index:end_ping_time_index] = epoch_seconds
|
|
213
|
-
|
|
214
|
-
# --- UPDATING --- #
|
|
215
|
-
|
|
216
|
-
regrid_resample = self.interpolate_data(
|
|
217
|
-
input_xr=input_xr,
|
|
218
|
-
ping_times=ping_times,
|
|
219
|
-
all_cruise_depth_values=all_cruise_depth_values,
|
|
220
|
-
)
|
|
221
|
-
|
|
222
|
-
print(f"start_ping_time_index: {start_ping_time_index}, end_ping_time_index: {end_ping_time_index}")
|
|
223
|
-
|
|
224
|
-
#########################################################################
|
|
225
|
-
# write Sv values to cruise-level-zarr-store
|
|
226
|
-
for channel in range(len(input_xr.channel.values)): # doesn't like being written in one fell swoop :(
|
|
227
|
-
output_zarr_store.Sv[
|
|
228
|
-
:,
|
|
229
|
-
start_ping_time_index:end_ping_time_index,
|
|
230
|
-
channel
|
|
231
|
-
] = regrid_resample[:, :, channel]
|
|
232
|
-
|
|
233
|
-
#########################################################################
|
|
234
|
-
# [5] write subset of latitude/longitude
|
|
235
|
-
output_zarr_store.latitude[start_ping_time_index:end_ping_time_index] = geospatial.dropna()[
|
|
236
|
-
'latitude'
|
|
237
|
-
].values
|
|
238
|
-
output_zarr_store.longitude[start_ping_time_index:end_ping_time_index] = geospatial.dropna()[
|
|
239
|
-
'longitude'
|
|
240
|
-
].values
|
|
241
|
-
except Exception as err:
|
|
242
|
-
print(f'Problem interpolating the data: {err}')
|
|
243
|
-
raise err
|
|
244
|
-
print("Done interpolating data.")
|
|
245
|
-
|
|
246
|
-
#######################################################
|
|
247
|
-
|
|
248
|
-
###########################################################
|
model/geospatial/__init__.py
DELETED
|
File without changes
|
|
@@ -1,194 +0,0 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
import numpy as np
|
|
3
|
-
import geopandas
|
|
4
|
-
import pandas as pd
|
|
5
|
-
|
|
6
|
-
from ..utility.cleaner import Cleaner
|
|
7
|
-
from ..aws.s3_manager import S3Manager
|
|
8
|
-
|
|
9
|
-
"""
|
|
10
|
-
// [Decimal / Places / Degrees / Object that can be recognized at scale / N/S or E/W at equator, E/W at 23N/S, E/W at 45N/S, E/W at 67N/S]
|
|
11
|
-
// 0 1.0 1° 00′ 0″ country or large region 111.32 km 102.47 km 78.71 km 43.496 km
|
|
12
|
-
// 1 0.1 0° 06′ 0″ large city or district 11.132 km 10.247 km 7.871 km 4.3496 km
|
|
13
|
-
// 2 0.01 0° 00′ 36″ town or village 1.1132 km 1.0247 km 787.1 m 434.96 m
|
|
14
|
-
// 3 0.001 0° 00′ 3.6″ neighborhood, street 111.32 m 102.47 m 78.71 m 43.496 m
|
|
15
|
-
// 4 0.0001 0° 00′ 0.36″ individual street, land parcel 11.132 m 10.247 m 7.871 m 4.3496 m
|
|
16
|
-
// 5 0.00001 0° 00′ 0.036″ individual trees, door entrance 1.1132 m 1.0247 m 787.1 mm 434.96 mm
|
|
17
|
-
// 6 0.000001 0° 00′ 0.0036″ individual humans 111.32 mm 102.47 mm 78.71 mm 43.496 mm
|
|
18
|
-
// 7 0.0000001 0° 00′ 0.00036″ practical limit of commercial surveying 11.132 mm 10.247 mm 7.871 mm 4.3496 mm
|
|
19
|
-
"""
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class GeoManager:
|
|
23
|
-
#######################################################
|
|
24
|
-
def __init__(
|
|
25
|
-
self,
|
|
26
|
-
):
|
|
27
|
-
self.DECIMAL_PRECISION = 5 # precision for GPS coordinates
|
|
28
|
-
self.SIMPLIFICATION_TOLERANCE = 0.0001 # RDP simplification to street level
|
|
29
|
-
|
|
30
|
-
#######################################################
|
|
31
|
-
def read_echodata_gps_data(
|
|
32
|
-
self,
|
|
33
|
-
echodata,
|
|
34
|
-
ship_name,
|
|
35
|
-
cruise_name,
|
|
36
|
-
sensor_name,
|
|
37
|
-
file_name,
|
|
38
|
-
write_geojson=True,
|
|
39
|
-
) -> tuple:
|
|
40
|
-
file_name_stem = Path(file_name).stem
|
|
41
|
-
geo_json_name = f"{file_name_stem}.json"
|
|
42
|
-
|
|
43
|
-
print('Getting GPS data from echopype object.')
|
|
44
|
-
try:
|
|
45
|
-
latitude = np.round(echodata.platform.latitude.values, self.DECIMAL_PRECISION)
|
|
46
|
-
longitude = np.round(echodata.platform.longitude.values, self.DECIMAL_PRECISION)
|
|
47
|
-
|
|
48
|
-
# RE: time coordinates: https://github.com/OSOceanAcoustics/echopype/issues/656#issue-1219104771
|
|
49
|
-
# 'nmea_times' are times from the nmea datalogger associated with GPS
|
|
50
|
-
# note that nmea_times, unlike time1, can be sorted
|
|
51
|
-
nmea_times = np.sort(echodata.platform.time1.values)
|
|
52
|
-
|
|
53
|
-
# 'time1' are times from the echosounder associated with the data of the transducer measurement
|
|
54
|
-
time1 = echodata.environment.time1.values
|
|
55
|
-
|
|
56
|
-
if len(nmea_times) < len(time1):
|
|
57
|
-
raise Exception("Problem: Not enough NMEA times available to extrapolate time1.")
|
|
58
|
-
|
|
59
|
-
# Align 'sv_times' to 'nmea_times'
|
|
60
|
-
if not (np.all(time1[:-1] <= time1[1:]) and np.all(nmea_times[:-1] <= nmea_times[1:])):
|
|
61
|
-
raise Exception("Problem: NMEA times are not sorted.")
|
|
62
|
-
|
|
63
|
-
# Finds the indices where 'v' can be inserted just to the right of 'a'
|
|
64
|
-
indices = np.searchsorted(a=nmea_times, v=time1, side="right") - 1
|
|
65
|
-
lat = latitude[indices]
|
|
66
|
-
lat[indices < 0] = np.nan # values recorded before indexing are set to nan
|
|
67
|
-
lon = longitude[indices]
|
|
68
|
-
lon[indices < 0] = np.nan
|
|
69
|
-
|
|
70
|
-
if not (np.all(lat[~np.isnan(lat)] >= -90.) and np.all(lat[~np.isnan(lat)] <= 90.) and np.all(lon[~np.isnan(lon)] >= -180.) and np.all(lon[~np.isnan(lon)] <= 180.)):
|
|
71
|
-
raise Exception("Problem: GPS Data falls outside allowed bounds.")
|
|
72
|
-
|
|
73
|
-
# check for visits to null island
|
|
74
|
-
null_island_indices = list(
|
|
75
|
-
set.intersection(set(np.where(np.abs(lat) < 1e-3)[0]), set(np.where(np.abs(lon) < 1e-3)[0]))
|
|
76
|
-
)
|
|
77
|
-
lat[null_island_indices] = np.nan
|
|
78
|
-
lon[null_island_indices] = np.nan
|
|
79
|
-
|
|
80
|
-
# create requirement for minimum linestring size
|
|
81
|
-
MIN_ALLOWED_SIZE = 4 # don't want to process files with less than 4 data points
|
|
82
|
-
if len(lat[~np.isnan(lat)]) < MIN_ALLOWED_SIZE or len(lon[~np.isnan(lon)]) < MIN_ALLOWED_SIZE:
|
|
83
|
-
raise Exception(
|
|
84
|
-
f"There was not enough data in lat or lon to create geojson, {len(lat[~np.isnan(lat)])} found, less than {MIN_ALLOWED_SIZE}."
|
|
85
|
-
)
|
|
86
|
-
|
|
87
|
-
# https://osoceanacoustics.github.io/echopype-examples/echopype_tour.html
|
|
88
|
-
gps_df = pd.DataFrame({
|
|
89
|
-
'latitude': lat,
|
|
90
|
-
'longitude': lon,
|
|
91
|
-
'time': time1
|
|
92
|
-
}).set_index(['time']).fillna(0)
|
|
93
|
-
|
|
94
|
-
# Note: We set np.nan to 0,0 so downstream missing values can be omitted
|
|
95
|
-
gps_gdf = geopandas.GeoDataFrame(
|
|
96
|
-
gps_df,
|
|
97
|
-
geometry=geopandas.points_from_xy(
|
|
98
|
-
gps_df['longitude'],
|
|
99
|
-
gps_df['latitude']
|
|
100
|
-
),
|
|
101
|
-
crs="epsg:4326"
|
|
102
|
-
)
|
|
103
|
-
# Note: We set np.nan to 0,0 so downstream missing values can be omitted
|
|
104
|
-
|
|
105
|
-
geo_json_line = gps_gdf.to_json()
|
|
106
|
-
if write_geojson:
|
|
107
|
-
print('Creating local copy of geojson file.')
|
|
108
|
-
with open(geo_json_name, "w") as write_file:
|
|
109
|
-
write_file.write(geo_json_line)
|
|
110
|
-
|
|
111
|
-
geo_json_prefix = f"spatial/geojson/{ship_name}/{cruise_name}/{sensor_name}"
|
|
112
|
-
|
|
113
|
-
print('Checking s3 and deleting any existing GeoJSON file.')
|
|
114
|
-
s3_manager = S3Manager()
|
|
115
|
-
s3_objects = s3_manager.list_nodd_objects(prefix=f"{geo_json_prefix}/{geo_json_name}")
|
|
116
|
-
if len(s3_objects) > 0:
|
|
117
|
-
print('GeoJSON already exists in s3, deleting existing and continuing.')
|
|
118
|
-
s3_manager.delete_nodd_objects(objects=s3_objects)
|
|
119
|
-
|
|
120
|
-
print('Upload GeoJSON to s3.')
|
|
121
|
-
s3_manager.upload_nodd_file(
|
|
122
|
-
file_name=geo_json_name, # file_name
|
|
123
|
-
key=f"{geo_json_prefix}/{geo_json_name}" # key
|
|
124
|
-
)
|
|
125
|
-
|
|
126
|
-
# TODO: delete geo_json file
|
|
127
|
-
cleaner = Cleaner()
|
|
128
|
-
cleaner.delete_local_files(file_types=['*.json'])
|
|
129
|
-
|
|
130
|
-
#################################################################
|
|
131
|
-
# TODO: simplify with shapely
|
|
132
|
-
# linestring = shapely.geometry.LineString(
|
|
133
|
-
# [xy for xy in zip(gps_gdf.longitude, gps_gdf.latitude)]
|
|
134
|
-
# )
|
|
135
|
-
# len(linestring.coords)
|
|
136
|
-
# line_simplified = linestring.simplify(
|
|
137
|
-
# tolerance=self.SIMPLIFICATION_TOLERANCE,
|
|
138
|
-
# preserve_topology=True
|
|
139
|
-
# )
|
|
140
|
-
# print(f"Total number of points for original linestring: {len(linestring.coords)}")
|
|
141
|
-
# print(f"Total number of points needed for the simplified linestring: {len(line_simplified.coords)}")
|
|
142
|
-
# print(line_simplified)
|
|
143
|
-
# geo_json_line_simplified = shapely.to_geojson(line_simplified)
|
|
144
|
-
#################################################################
|
|
145
|
-
# GeoJSON FeatureCollection with IDs as "time"
|
|
146
|
-
except Exception as err:
|
|
147
|
-
print(f'Exception encountered extracting gps coordinates creating geojson: {err}')
|
|
148
|
-
raise
|
|
149
|
-
# Note: returned lat/lon values can include np.nan because they need to be aligned with
|
|
150
|
-
# the Sv data! GeoJSON needs simplification but has been filtered.
|
|
151
|
-
return gps_df.index.values, gps_df.latitude.values, gps_df.longitude.values
|
|
152
|
-
# TODO: if geojson is already returned with 0,0, the return here
|
|
153
|
-
# can include np.nan values?
|
|
154
|
-
|
|
155
|
-
#######################################################
|
|
156
|
-
def read_s3_geo_json(
|
|
157
|
-
self,
|
|
158
|
-
ship_name,
|
|
159
|
-
cruise_name,
|
|
160
|
-
sensor_name,
|
|
161
|
-
file_name_stem,
|
|
162
|
-
input_xr_zarr_store,
|
|
163
|
-
):
|
|
164
|
-
try:
|
|
165
|
-
s3_manager = S3Manager()
|
|
166
|
-
geo_json = s3_manager.read_s3_json(
|
|
167
|
-
ship_name=ship_name,
|
|
168
|
-
cruise_name=cruise_name,
|
|
169
|
-
sensor_name=sensor_name,
|
|
170
|
-
file_name_stem=file_name_stem,
|
|
171
|
-
)
|
|
172
|
-
###
|
|
173
|
-
geospatial = geopandas.GeoDataFrame.from_features(geo_json['features']).set_index(
|
|
174
|
-
pd.json_normalize(geo_json["features"])["id"].values
|
|
175
|
-
)
|
|
176
|
-
null_island_indices = list(
|
|
177
|
-
set.intersection(
|
|
178
|
-
set(np.where(np.abs(geospatial.latitude.values) < 1e-3)[0]),
|
|
179
|
-
set(np.where(np.abs(geospatial.longitude.values) < 1e-3)[0])
|
|
180
|
-
)
|
|
181
|
-
)
|
|
182
|
-
geospatial.iloc[null_island_indices] = np.nan
|
|
183
|
-
###
|
|
184
|
-
geospatial_index = geospatial.dropna().index.values.astype('datetime64[ns]')
|
|
185
|
-
aa = input_xr_zarr_store.ping_time.values.tolist()
|
|
186
|
-
vv = geospatial_index.tolist()
|
|
187
|
-
indices = np.searchsorted(a=aa, v=vv)
|
|
188
|
-
|
|
189
|
-
return indices, geospatial
|
|
190
|
-
except Exception as err: # Failure
|
|
191
|
-
print(f'Exception encountered reading s3 GeoJSON: {err}')
|
|
192
|
-
raise
|
|
193
|
-
|
|
194
|
-
###########################################################
|
|
@@ -1,81 +0,0 @@
|
|
|
1
|
-
# import json
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
# lambda for timestamp in form "yyyy-MM-ddTHH:mm:ssZ"
|
|
5
|
-
# dt = lambda: datetime.now().isoformat(timespec="seconds") + "Z"
|
|
6
|
-
|
|
7
|
-
# https://shapely.readthedocs.io/en/stable/reference/shapely.MultiLineString.html#shapely.MultiLineString
|
|
8
|
-
"""
|
|
9
|
-
// [Decimal / Places / Degrees / Object that can be recognized at scale / N/S or E/W at equator, E/W at 23N/S, E/W at 45N/S, E/W at 67N/S]
|
|
10
|
-
// 0 1.0 1° 00′ 0″ country or large region 111.32 km 102.47 km 78.71 km 43.496 km
|
|
11
|
-
// 1 0.1 0° 06′ 0″ large city or district 11.132 km 10.247 km 7.871 km 4.3496 km
|
|
12
|
-
// 2 0.01 0° 00′ 36″ town or village 1.1132 km 1.0247 km 787.1 m 434.96 m
|
|
13
|
-
// 3 0.001 0° 00′ 3.6″ neighborhood, street 111.32 m 102.47 m 78.71 m 43.496 m
|
|
14
|
-
// 4 0.0001 0° 00′ 0.36″ individual street, land parcel 11.132 m 10.247 m 7.871 m 4.3496 m
|
|
15
|
-
// 5 0.00001 0° 00′ 0.036″ individual trees, door entrance 1.1132 m 1.0247 m 787.1 mm 434.96 mm
|
|
16
|
-
// 6 0.000001 0° 00′ 0.0036″ individual humans 111.32 mm 102.47 mm 78.71 mm 43.496 mm
|
|
17
|
-
// 7 0.0000001 0° 00′ 0.00036″ practical limit of commercial surveying 11.132 mm 10.247 mm 7.871 mm 4.3496 mm
|
|
18
|
-
"""
|
|
19
|
-
|
|
20
|
-
"""
|
|
21
|
-
private static final int SRID = 8307;
|
|
22
|
-
private static final double simplificationTolerance = 0.0001;
|
|
23
|
-
private static final long splitGeometryMs = 900000L;
|
|
24
|
-
private static final int batchSize = 10000;
|
|
25
|
-
private static final int geoJsonPrecision = 5;
|
|
26
|
-
final int geoJsonPrecision = 5;
|
|
27
|
-
final double simplificationTolerance = 0.0001;
|
|
28
|
-
final int simplifierBatchSize = 3000;
|
|
29
|
-
final long maxCount = 0;
|
|
30
|
-
private static final double maxAllowedSpeedKnts = 60D;
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
"""
|
|
34
|
-
class GeometrySimplification(object):
|
|
35
|
-
# TODO: in the future move to standalone library
|
|
36
|
-
#######################################################
|
|
37
|
-
def __init__(
|
|
38
|
-
self,
|
|
39
|
-
):
|
|
40
|
-
pass
|
|
41
|
-
|
|
42
|
-
#######################################################
|
|
43
|
-
def speed_check(
|
|
44
|
-
self,
|
|
45
|
-
speed_knots=50,
|
|
46
|
-
) -> None:
|
|
47
|
-
print(speed_knots)
|
|
48
|
-
pass
|
|
49
|
-
|
|
50
|
-
def remove_null_island_values(
|
|
51
|
-
self,
|
|
52
|
-
epsilon=1e-5,
|
|
53
|
-
) -> None:
|
|
54
|
-
print(epsilon)
|
|
55
|
-
pass
|
|
56
|
-
|
|
57
|
-
def stream_geometry(
|
|
58
|
-
self,
|
|
59
|
-
) -> None:
|
|
60
|
-
pass
|
|
61
|
-
|
|
62
|
-
def break_linestring_into_multi_linestring(
|
|
63
|
-
self,
|
|
64
|
-
) -> None:
|
|
65
|
-
# For any line-strings across the antimeridian, break into multilinestring
|
|
66
|
-
pass
|
|
67
|
-
|
|
68
|
-
def simplify(
|
|
69
|
-
self,
|
|
70
|
-
) -> None:
|
|
71
|
-
pass
|
|
72
|
-
|
|
73
|
-
def kalman_filter(
|
|
74
|
-
self
|
|
75
|
-
):
|
|
76
|
-
# for cruises with bad signal, filter so that
|
|
77
|
-
pass
|
|
78
|
-
|
|
79
|
-
#######################################################
|
|
80
|
-
|
|
81
|
-
###########################################################
|
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
# import json
|
|
2
|
-
# This is a sample Python script.
|
|
3
|
-
import pandas as pd
|
|
4
|
-
# import numpy as np
|
|
5
|
-
import os
|
|
6
|
-
# from glob import glob
|
|
7
|
-
from pathlib import Path
|
|
8
|
-
import geopandas
|
|
9
|
-
# import shapely
|
|
10
|
-
from shapely.geometry import LineString
|
|
11
|
-
# from shapely import wkt
|
|
12
|
-
# import json
|
|
13
|
-
# from shapely.geometry import shape, GeometryCollection
|
|
14
|
-
import fiona
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
class PMTileGeneration(object):
|
|
18
|
-
#######################################################
|
|
19
|
-
def __init__(
|
|
20
|
-
self,
|
|
21
|
-
):
|
|
22
|
-
pass
|
|
23
|
-
|
|
24
|
-
#######################################################
|
|
25
|
-
def generate_geojson_feature_collection(self):
|
|
26
|
-
# This was used to read from noaa-wcsd-zarr-pds bucket geojson files and then to
|
|
27
|
-
# generate the geopandas dataframe which could be exported to another comprehensive
|
|
28
|
-
# geojson file. That
|
|
29
|
-
result = list(Path("/Users/r2d2/Documents/echofish/geojson").rglob("*.json"))
|
|
30
|
-
# result = result[:100]
|
|
31
|
-
iii = 0
|
|
32
|
-
pieces = []
|
|
33
|
-
for iii in range(len(result)):
|
|
34
|
-
file_name = os.path.normpath(result[iii]).split(os.sep)[-1]
|
|
35
|
-
file_stem = os.path.splitext(os.path.basename(file_name))[0]
|
|
36
|
-
geom = geopandas.read_file(result[iii]).iloc[0]['geometry']
|
|
37
|
-
# TDOO: Filter (0,0) coordinates
|
|
38
|
-
if len(geom.coords.xy[0]) < 2:
|
|
39
|
-
continue
|
|
40
|
-
geom = LineString(list(zip(geom.coords.xy[1], geom.coords.xy[0])))
|
|
41
|
-
pieces.append({
|
|
42
|
-
'ship_name': os.path.normpath(result[iii]).split(os.sep)[-4],
|
|
43
|
-
'cruise_name': os.path.normpath(result[iii]).split(os.sep)[-3],
|
|
44
|
-
'file_stem': file_stem,
|
|
45
|
-
'file_path': result[iii],
|
|
46
|
-
'geom': geom,
|
|
47
|
-
})
|
|
48
|
-
df = pd.DataFrame(pieces)
|
|
49
|
-
print(df)
|
|
50
|
-
gps_gdf = geopandas.GeoDataFrame(
|
|
51
|
-
data=df[['ship_name', 'cruise_name', 'file_stem']], # try again with file_stem
|
|
52
|
-
geometry=df['geom'],
|
|
53
|
-
crs='EPSG:4326'
|
|
54
|
-
)
|
|
55
|
-
print(fiona.supported_drivers)
|
|
56
|
-
# gps_gdf.to_file('dataframe.shp', crs='epsg:4326')
|
|
57
|
-
# Convert geojson feature collection to pmtiles
|
|
58
|
-
gps_gdf.to_file('dataframe.geojson', driver='GeoJSON', crs='epsg:4326')
|
|
59
|
-
print('done')
|
|
60
|
-
"""
|
|
61
|
-
# need to eliminate visits to null island
|
|
62
|
-
tippecanoe --no-feature-limit -zg --projection=EPSG:4326 -o dataframe.pmtiles -l cruises dataframe.geojson
|
|
63
|
-
|
|
64
|
-
https://docs.protomaps.com/pmtiles/create
|
|
65
|
-
PMTiles
|
|
66
|
-
https://drive.google.com/file/d/17Bi-UIXB9IJkIz30BHpiKHXYpCOgRFge/view?usp=sharing
|
|
67
|
-
|
|
68
|
-
Viewer
|
|
69
|
-
https://protomaps.github.io/PMTiles/#map=8.91/56.0234/-166.6346
|
|
70
|
-
"""
|
|
71
|
-
|
|
72
|
-
#######################################################
|
|
73
|
-
|
|
74
|
-
###########################################################
|
model/index/__init__.py
DELETED
|
File without changes
|