water-column-sonar-processing 0.0.9__py3-none-any.whl → 26.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- water_column_sonar_processing/aws/dynamodb_manager.py +138 -59
- water_column_sonar_processing/aws/s3_manager.py +179 -141
- water_column_sonar_processing/aws/s3fs_manager.py +29 -33
- water_column_sonar_processing/aws/sqs_manager.py +1 -1
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +35 -96
- water_column_sonar_processing/cruise/datatree_manager.py +21 -0
- water_column_sonar_processing/cruise/resample_regrid.py +142 -127
- water_column_sonar_processing/geometry/__init__.py +10 -2
- water_column_sonar_processing/geometry/elevation_manager.py +111 -0
- water_column_sonar_processing/geometry/geometry_manager.py +50 -49
- water_column_sonar_processing/geometry/line_simplification.py +176 -0
- water_column_sonar_processing/geometry/pmtile_generation.py +227 -223
- water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
- water_column_sonar_processing/index/index_manager.py +151 -33
- water_column_sonar_processing/model/zarr_manager.py +665 -262
- water_column_sonar_processing/processing/__init__.py +3 -3
- water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
- water_column_sonar_processing/processing/raw_to_zarr.py +206 -214
- water_column_sonar_processing/utility/__init__.py +9 -2
- water_column_sonar_processing/utility/constants.py +69 -18
- water_column_sonar_processing/utility/pipeline_status.py +11 -15
- water_column_sonar_processing/utility/timestamp.py +3 -4
- water_column_sonar_processing-26.1.9.dist-info/METADATA +239 -0
- water_column_sonar_processing-26.1.9.dist-info/RECORD +34 -0
- {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info}/WHEEL +1 -1
- {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info/licenses}/LICENSE +1 -1
- water_column_sonar_processing/geometry/geometry_simplification.py +0 -82
- water_column_sonar_processing/process.py +0 -147
- water_column_sonar_processing/processing/cruise_sampler.py +0 -342
- water_column_sonar_processing-0.0.9.dist-info/METADATA +0 -134
- water_column_sonar_processing-0.0.9.dist-info/RECORD +0 -32
- {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info}/top_level.txt +0 -0
|
@@ -1,15 +1,8 @@
|
|
|
1
|
-
import glob
|
|
2
|
-
import os
|
|
3
|
-
from pathlib import Path
|
|
4
1
|
import fiona
|
|
5
|
-
import
|
|
2
|
+
import geopandas as gpd
|
|
6
3
|
import numpy as np
|
|
7
4
|
import pandas as pd
|
|
8
5
|
import xarray as xr
|
|
9
|
-
import geopandas
|
|
10
|
-
import geopandas as gpd
|
|
11
|
-
import pyogrio
|
|
12
|
-
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
13
6
|
from shapely.geometry import LineString
|
|
14
7
|
|
|
15
8
|
MAX_POOL_CONNECTIONS = 64
|
|
@@ -17,246 +10,257 @@ MAX_CONCURRENCY = 64
|
|
|
17
10
|
MAX_WORKERS = 64
|
|
18
11
|
GB = 1024**3
|
|
19
12
|
|
|
13
|
+
bucket_name = "noaa-wcsd-zarr-pds"
|
|
14
|
+
ship_name = "Henry_B._Bigelow"
|
|
15
|
+
sensor_name = "EK60"
|
|
16
|
+
|
|
17
|
+
# TODO: get pmtiles of all the evr points
|
|
18
|
+
|
|
20
19
|
|
|
21
20
|
class PMTileGeneration(object):
|
|
21
|
+
"""
|
|
22
|
+
- iterate through the zarr stores for all cruises
|
|
23
|
+
- generate geojson in geopandas df, simplify linestrings
|
|
24
|
+
- consolidate into singular df, one cruise per row
|
|
25
|
+
- export as geojson
|
|
26
|
+
- using tippecanoe, geojson --> pmtiles w linux command
|
|
27
|
+
- upload to s3
|
|
28
|
+
"""
|
|
29
|
+
|
|
22
30
|
#######################################################
|
|
23
31
|
def __init__(
|
|
24
32
|
self,
|
|
25
33
|
):
|
|
26
|
-
|
|
34
|
+
self.bucket_name = "noaa-wcsd-zarr-pds"
|
|
35
|
+
self.ship_name = "Henry_B._Bigelow"
|
|
36
|
+
self.sensor_name = "EK60"
|
|
27
37
|
|
|
28
38
|
#######################################################
|
|
29
|
-
|
|
30
|
-
def
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
39
|
+
@staticmethod
|
|
40
|
+
def check_all_cruises(bucket_name, cruises):
|
|
41
|
+
completed = []
|
|
42
|
+
for cruise_name in cruises:
|
|
43
|
+
print(cruise_name)
|
|
44
|
+
try:
|
|
45
|
+
zarr_store = f"{cruise_name}.zarr"
|
|
46
|
+
s3_zarr_store_path = f"{bucket_name}/level_2/{ship_name}/{cruise_name}/{sensor_name}/{zarr_store}"
|
|
47
|
+
kwargs = {"consolidated": False}
|
|
48
|
+
cruise = xr.open_dataset(
|
|
49
|
+
filename_or_obj=f"s3://{s3_zarr_store_path}",
|
|
50
|
+
engine="zarr",
|
|
51
|
+
storage_options={"anon": True},
|
|
52
|
+
**kwargs,
|
|
53
|
+
)
|
|
54
|
+
width = cruise.Sv.shape[1]
|
|
55
|
+
height = cruise.Sv.shape[0]
|
|
56
|
+
depth = cruise.Sv.shape[2]
|
|
57
|
+
print(
|
|
58
|
+
f"height: {height}, width: {width}, depth: {depth} = {width * height * depth}"
|
|
59
|
+
)
|
|
60
|
+
lats = cruise.latitude.to_numpy()
|
|
61
|
+
percent_done = np.count_nonzero(~np.isnan(lats)) / width
|
|
62
|
+
if percent_done != 1.0:
|
|
63
|
+
print(
|
|
64
|
+
f"percent done: {np.round(percent_done, 2)}, {np.count_nonzero(~np.isnan(cruise.latitude.values))}, {width}"
|
|
65
|
+
)
|
|
66
|
+
else:
|
|
67
|
+
completed.append(cruise_name)
|
|
68
|
+
except Exception as err:
|
|
69
|
+
raise RuntimeError(f"Problem parsing Zarr stores, {err}")
|
|
70
|
+
return completed
|
|
71
|
+
|
|
72
|
+
#######################################################
|
|
73
|
+
@staticmethod
|
|
74
|
+
def get_cruise_geometry(cruise_name, index):
|
|
75
|
+
print(cruise_name)
|
|
76
|
+
try:
|
|
77
|
+
pieces = []
|
|
78
|
+
zarr_store = f"{cruise_name}.zarr"
|
|
79
|
+
s3_zarr_store_path = f"{bucket_name}/level_2/{ship_name}/{cruise_name}/{sensor_name}/{zarr_store}"
|
|
80
|
+
cruise = xr.open_dataset(
|
|
81
|
+
filename_or_obj=f"s3://{s3_zarr_store_path}",
|
|
82
|
+
engine="zarr",
|
|
83
|
+
storage_options={"anon": True},
|
|
84
|
+
chunks={},
|
|
85
|
+
cache=True,
|
|
86
|
+
)
|
|
87
|
+
latitude_array = cruise.latitude.to_numpy()
|
|
88
|
+
longitude_array = cruise.longitude.to_numpy()
|
|
89
|
+
if np.isnan(latitude_array).any() or np.isnan(longitude_array).any():
|
|
90
|
+
raise RuntimeError(
|
|
91
|
+
f"There was missing lat-lon dataset for, {cruise_name}"
|
|
92
|
+
)
|
|
93
|
+
geom = LineString(list(zip(longitude_array, latitude_array))).simplify(
|
|
94
|
+
tolerance=0.001, # preserve_topology=True # 113
|
|
95
|
+
) # TODO: do speed check, convert linestrings to multilinestrings
|
|
96
|
+
print(len(geom.coords))
|
|
46
97
|
pieces.append(
|
|
47
98
|
{
|
|
48
|
-
"
|
|
49
|
-
"
|
|
50
|
-
"
|
|
51
|
-
"
|
|
99
|
+
"id": index,
|
|
100
|
+
"ship_name": ship_name,
|
|
101
|
+
"cruise_name": cruise_name,
|
|
102
|
+
"sensor_name": sensor_name,
|
|
52
103
|
"geom": geom,
|
|
53
104
|
}
|
|
54
105
|
)
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
# gps_gdf.to_file('dataframe.shp', crs='epsg:4326')
|
|
66
|
-
# Convert geojson feature collection to pmtiles
|
|
67
|
-
gps_gdf.to_file("dataframe.geojson", driver="GeoJSON", crs="epsg:4326")
|
|
68
|
-
print("done")
|
|
69
|
-
"""
|
|
70
|
-
# need to eliminate visits to null island
|
|
71
|
-
tippecanoe --no-feature-limit -zg --projection=EPSG:4326 -o dataframe.pmtiles -l cruises dataframe.geojson
|
|
72
|
-
|
|
73
|
-
https://docs.protomaps.com/pmtiles/create
|
|
74
|
-
PMTiles
|
|
75
|
-
https://drive.google.com/file/d/17Bi-UIXB9IJkIz30BHpiKHXYpCOgRFge/view?usp=sharing
|
|
76
|
-
|
|
77
|
-
Viewer
|
|
78
|
-
https://protomaps.github.io/PMTiles/#map=8.91/56.0234/-166.6346
|
|
79
|
-
"""
|
|
80
|
-
|
|
81
|
-
#######################################################
|
|
82
|
-
# TODO: temporary using this to get info
|
|
83
|
-
def get_info_from_zarr_store(
|
|
84
|
-
self,
|
|
85
|
-
ship_name,
|
|
86
|
-
cruise_names,
|
|
87
|
-
):
|
|
88
|
-
total_size = 0
|
|
89
|
-
s3_fs = s3fs.S3FileSystem(anon=True)
|
|
90
|
-
for cruise_name in cruise_names:
|
|
91
|
-
path_to_zarr_store = f"s3://noaa-wcsd-zarr-pds/level_2/{ship_name}/{cruise_name}/EK60/{cruise_name}.zarr"
|
|
92
|
-
zarr_store = s3fs.S3Map(root=path_to_zarr_store, s3=s3_fs)
|
|
93
|
-
xr_store = xr.open_zarr(store=zarr_store, consolidated=None)
|
|
94
|
-
print(f'Cruise: {cruise_name}, shape: {xr_store.time.shape[0]}')
|
|
95
|
-
total_size = total_size + xr_store.time.shape[0]
|
|
96
|
-
|
|
97
|
-
def get_geospatial_info_from_zarr_store(
|
|
98
|
-
self,
|
|
99
|
-
ship_name,
|
|
100
|
-
cruise_name,
|
|
101
|
-
):
|
|
102
|
-
"""
|
|
103
|
-
Open Zarr store, create geometry, write to geojson, return name
|
|
104
|
-
"""
|
|
105
|
-
s3_fs = s3fs.S3FileSystem(anon=True)
|
|
106
|
-
gps_gdf = geopandas.GeoDataFrame(
|
|
107
|
-
columns=["id", "ship", "cruise", "sensor", "geometry"],
|
|
108
|
-
geometry="geometry",
|
|
109
|
-
crs="EPSG:4326"
|
|
110
|
-
)
|
|
111
|
-
path_to_zarr_store = f"s3://noaa-wcsd-zarr-pds/level_2/{ship_name}/{cruise_name}/EK60/{cruise_name}.zarr"
|
|
112
|
-
# file_name = os.path.normpath(path_to_zarr_store).split(os.sep)[-1]
|
|
113
|
-
# file_stem = os.path.splitext(os.path.basename(file_name))[0]
|
|
114
|
-
zarr_store = s3fs.S3Map(root=path_to_zarr_store, s3=s3_fs)
|
|
115
|
-
# ---Open Zarr Store--- #
|
|
116
|
-
# TODO: try-except to allow failures
|
|
117
|
-
print('opening store')
|
|
118
|
-
# xr_store = xr.open_zarr(store=zarr_store, consolidated=False)
|
|
119
|
-
xr_store = xr.open_zarr(store=zarr_store, consolidated=None)
|
|
120
|
-
print(xr_store.Sv.shape)
|
|
121
|
-
# ---Read Zarr Store Time/Latitude/Longitude--- #
|
|
122
|
-
latitude = xr_store.latitude.values
|
|
123
|
-
longitude = xr_store.longitude.values
|
|
124
|
-
if np.isnan(latitude).any() or np.isnan(longitude).any():
|
|
125
|
-
print(f'there was missing lat-lon data for {cruise_name}')
|
|
126
|
-
return None
|
|
127
|
-
# ---Add To GeoPandas Dataframe--- #
|
|
128
|
-
# TODO: experiment with tolerance "0.001"
|
|
129
|
-
geom = LineString(list(zip(longitude, latitude))).simplify(tolerance=0.001, preserve_topology=True)
|
|
130
|
-
gps_gdf.loc[0] = (0, "Henry_B._Bigelow", cruise_name, "EK60", geom) # (ship, cruise, sensor, geometry)
|
|
131
|
-
gps_gdf.set_index('id', inplace=True)
|
|
132
|
-
gps_gdf.to_file(f"dataframe_{cruise_name}.geojson", driver="GeoJSON") #, engine="pyogrio")
|
|
133
|
-
return cruise_name
|
|
106
|
+
df = pd.DataFrame(pieces)
|
|
107
|
+
gps_gdf = gpd.GeoDataFrame(
|
|
108
|
+
data=df[["id", "ship_name", "cruise_name", "sensor_name"]],
|
|
109
|
+
geometry=df["geom"],
|
|
110
|
+
crs="EPSG:4326",
|
|
111
|
+
)
|
|
112
|
+
print(gps_gdf)
|
|
113
|
+
# {'DXF': 'rw', 'CSV': 'raw', 'OpenFileGDB': 'raw', 'ESRIJSON': 'r', 'ESRI Shapefile': 'raw', 'FlatGeobuf': 'raw', 'GeoJSON': 'raw', 'GeoJSONSeq': 'raw', 'GPKG': 'raw', 'GML': 'rw', 'OGR_GMT': 'rw', 'GPX': 'rw', 'MapInfo File': 'raw', 'DGN': 'raw', 'S57': 'r', 'SQLite': 'raw', 'TopoJSON': 'r'}
|
|
114
|
+
if "GeoJSON" not in fiona.supported_drivers.keys():
|
|
115
|
+
raise RuntimeError("Missing GeoJSON driver")
|
|
134
116
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
cruises: list,
|
|
139
|
-
):
|
|
140
|
-
# 'cruises' is a list of cruises to process
|
|
141
|
-
completed_cruises = []
|
|
142
|
-
try:
|
|
143
|
-
with ThreadPoolExecutor(max_workers=32) as executor:
|
|
144
|
-
futures = [
|
|
145
|
-
executor.submit(
|
|
146
|
-
self.get_geospatial_info_from_zarr_store,
|
|
147
|
-
"Henry_B._Bigelow", # ship_name
|
|
148
|
-
cruise, # cruise_name
|
|
149
|
-
)
|
|
150
|
-
for cruise in cruises
|
|
151
|
-
]
|
|
152
|
-
for future in as_completed(futures):
|
|
153
|
-
result = future.result()
|
|
154
|
-
if result:
|
|
155
|
-
completed_cruises.extend([result])
|
|
117
|
+
gps_gdf.set_index("id", inplace=True)
|
|
118
|
+
# gps_gdf.to_file(f"dataframe_{cruise_name}.geojson", driver="GeoJSON") #, crs="epsg:4326")
|
|
119
|
+
return gps_gdf
|
|
156
120
|
except Exception as err:
|
|
157
|
-
|
|
158
|
-
print("Done opening zarr stores using thread pool.")
|
|
159
|
-
return completed_cruises # Took ~12 minutes
|
|
121
|
+
raise RuntimeError(f"Problem parsing Zarr stores, {err}")
|
|
160
122
|
|
|
161
123
|
#######################################################
|
|
162
|
-
|
|
163
|
-
def aggregate_geojson_into_dataframe(
|
|
164
|
-
|
|
165
|
-
):
|
|
166
|
-
"""
|
|
167
|
-
iterate through cruises, threadpoolexecute geojson creation, aggregate geojson files into df,
|
|
168
|
-
"""
|
|
169
|
-
gps_gdf = geopandas.GeoDataFrame(
|
|
124
|
+
@staticmethod
|
|
125
|
+
def aggregate_geojson_into_dataframe(geoms):
|
|
126
|
+
gps_gdf = gpd.GeoDataFrame(
|
|
170
127
|
columns=["id", "ship", "cruise", "sensor", "geometry"],
|
|
171
128
|
geometry="geometry",
|
|
172
|
-
crs="EPSG:4326"
|
|
129
|
+
crs="EPSG:4326",
|
|
130
|
+
)
|
|
131
|
+
for iii, geom in enumerate(geoms):
|
|
132
|
+
gps_gdf.loc[iii] = (
|
|
133
|
+
iii,
|
|
134
|
+
geom.ship_name[iii],
|
|
135
|
+
geom.cruise_name[iii],
|
|
136
|
+
geom.sensor_name[iii],
|
|
137
|
+
geom.geometry[iii],
|
|
138
|
+
)
|
|
139
|
+
gps_gdf.set_index("id", inplace=True)
|
|
140
|
+
gps_gdf.to_file(
|
|
141
|
+
filename="dataset.geojson",
|
|
142
|
+
driver="GeoJSON",
|
|
143
|
+
engine="fiona", # or "pyogrio"
|
|
144
|
+
layer_options={"ID_GENERATE": "YES"},
|
|
145
|
+
crs="EPSG:4326",
|
|
146
|
+
id_generate=True, # required for the feature click selection
|
|
173
147
|
)
|
|
174
|
-
|
|
175
|
-
file_type = 'dataframe_*.geojson'
|
|
176
|
-
geojson_files = glob.glob(file_type)
|
|
177
|
-
for jjj in range(len(geojson_files)):
|
|
178
|
-
print(jjj)
|
|
179
|
-
geom = geopandas.read_file(geojson_files[jjj])
|
|
180
|
-
gps_gdf.loc[jjj] = (jjj, geom.ship[0], geom.cruise[0], geom.sensor[0], geom.geometry[0])
|
|
181
|
-
#gps_gdf.loc[0] = (0, "Henry_B._Bigelow", cruise_name, "EK60", geom) # (ship, cruise, sensor, geometry)
|
|
182
148
|
print(gps_gdf)
|
|
183
|
-
gps_gdf.set_index('id', inplace=True)
|
|
184
|
-
gps_gdf.to_file(f"data.geojson", driver="GeoJSON", engine="pyogrio", layer_options={"ID_GENERATE": "YES"})
|
|
185
|
-
return list(gps_gdf.cruise)
|
|
186
149
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
150
|
+
#######################################################
|
|
151
|
+
def create_collection_geojson(self):
|
|
152
|
+
cruises = [
|
|
153
|
+
"HB0706",
|
|
154
|
+
"HB0707",
|
|
155
|
+
"HB0710",
|
|
156
|
+
"HB0711",
|
|
157
|
+
"HB0802",
|
|
158
|
+
"HB0803",
|
|
159
|
+
"HB0805",
|
|
160
|
+
"HB0806",
|
|
161
|
+
"HB0807",
|
|
162
|
+
"HB0901",
|
|
163
|
+
"HB0902",
|
|
164
|
+
"HB0903",
|
|
165
|
+
"HB0904",
|
|
166
|
+
"HB0905",
|
|
167
|
+
"HB1002",
|
|
168
|
+
"HB1006",
|
|
169
|
+
"HB1102",
|
|
170
|
+
"HB1103",
|
|
171
|
+
"HB1105",
|
|
172
|
+
"HB1201",
|
|
173
|
+
"HB1206",
|
|
174
|
+
"HB1301",
|
|
175
|
+
"HB1303",
|
|
176
|
+
"HB1304",
|
|
177
|
+
"HB1401",
|
|
178
|
+
"HB1402",
|
|
179
|
+
"HB1403",
|
|
180
|
+
"HB1405",
|
|
181
|
+
"HB1501",
|
|
182
|
+
"HB1502",
|
|
183
|
+
"HB1503",
|
|
184
|
+
"HB1506",
|
|
185
|
+
"HB1507",
|
|
186
|
+
"HB1601",
|
|
187
|
+
"HB1603",
|
|
188
|
+
"HB1604",
|
|
189
|
+
"HB1701",
|
|
190
|
+
"HB1702",
|
|
191
|
+
"HB1801",
|
|
192
|
+
"HB1802",
|
|
193
|
+
"HB1803",
|
|
194
|
+
"HB1804",
|
|
195
|
+
"HB1805",
|
|
196
|
+
"HB1806",
|
|
197
|
+
"HB1901",
|
|
198
|
+
"HB1902",
|
|
199
|
+
"HB1903",
|
|
200
|
+
"HB1904",
|
|
201
|
+
"HB1906",
|
|
202
|
+
"HB1907",
|
|
203
|
+
"HB2001",
|
|
204
|
+
"HB2006",
|
|
205
|
+
"HB2007",
|
|
206
|
+
"HB20ORT",
|
|
207
|
+
"HB20TR",
|
|
208
|
+
]
|
|
209
|
+
completed_cruises = self.check_all_cruises(
|
|
210
|
+
bucket_name=bucket_name, cruises=cruises
|
|
211
|
+
) # TODO: threadpool this
|
|
212
|
+
### create linestring ###
|
|
213
|
+
geometries = []
|
|
214
|
+
for jjj, completed_cruise in enumerate(
|
|
215
|
+
completed_cruises
|
|
216
|
+
): # TODO: threadpool this
|
|
217
|
+
geometries.append(
|
|
218
|
+
self.get_cruise_geometry(cruise_name=completed_cruise, index=jjj)
|
|
219
|
+
)
|
|
220
|
+
#
|
|
221
|
+
self.aggregate_geojson_into_dataframe(geoms=geometries)
|
|
222
|
+
#
|
|
223
|
+
print(
|
|
224
|
+
'Now run this: "tippecanoe --no-feature-limit -zg -o dataset.pmtiles -l cruises dataset.geojson --force"'
|
|
225
|
+
)
|
|
226
|
+
# # water-column-sonar-id.pmtiles
|
|
227
|
+
# linux command: "tippecanoe --no-feature-limit -zg -o water-column-sonar-id.pmtiles -l cruises dataset.geojson --force"
|
|
228
|
+
# note: 'cruises' is the name of the layer
|
|
229
|
+
# size is ~3.3 MB for the pmtiles
|
|
230
|
+
# then drag-and-drop here: https://pmtiles.io/#map=6.79/39.802/-71.51
|
|
207
231
|
|
|
232
|
+
#######################################################
|
|
233
|
+
# TODO: copy the .pmtiles file to the s3 bucket "noaa-wcsd-pds-index"
|
|
234
|
+
#######################################################
|
|
208
235
|
|
|
236
|
+
#######################################################
|
|
237
|
+
# TODO: get threadpool working
|
|
238
|
+
# def open_zarr_stores_with_thread_pool_executor(
|
|
239
|
+
# self,
|
|
240
|
+
# cruises: list,
|
|
241
|
+
# ):
|
|
242
|
+
# # 'cruises' is a list of cruises to process
|
|
243
|
+
# completed_cruises = []
|
|
244
|
+
# try:
|
|
245
|
+
# with ThreadPoolExecutor(max_workers=32) as executor:
|
|
246
|
+
# futures = [
|
|
247
|
+
# executor.submit(
|
|
248
|
+
# self.get_geospatial_info_from_zarr_store,
|
|
249
|
+
# "Henry_B._Bigelow", # ship_name
|
|
250
|
+
# cruise, # cruise_name
|
|
251
|
+
# )
|
|
252
|
+
# for cruise in cruises
|
|
253
|
+
# ]
|
|
254
|
+
# for future in as_completed(futures):
|
|
255
|
+
# result = future.result()
|
|
256
|
+
# if result:
|
|
257
|
+
# completed_cruises.extend([result])
|
|
258
|
+
# except Exception as err:
|
|
259
|
+
# raise RuntimeError(f"Problem, {err}")
|
|
260
|
+
# print("Done opening zarr stores using thread pool.")
|
|
261
|
+
# return completed_cruises # Took ~12 minutes
|
|
209
262
|
|
|
210
|
-
|
|
211
|
-
#gps_gdf.__geo_interface__
|
|
212
|
-
#gps_gdf.set_index('id', inplace=True)
|
|
213
|
-
#gps_gdf.to_file(f"dataframe3.geojson", driver="GeoJSON", crs="EPSG:4326", engine="fiona", index=True)
|
|
263
|
+
#######################################################
|
|
214
264
|
|
|
215
|
-
### this gives the right layer id values
|
|
216
|
-
#gps_gdf.to_file(f"dataframe6.geojson", driver="GeoJSON", engine="pyogrio", layer_options={"ID_GENERATE": "YES"})
|
|
217
|
-
# jq '{"type": "FeatureCollection", "features": [.[] | .features[]]}' --slurp input*.geojson > output.geojson
|
|
218
|
-
#tippecanoe -zg --projection=EPSG:4326 -o water-column-sonar-id.pmtiles -l cruises output.geojson
|
|
219
|
-
#tippecanoe -zg --convert-stringified-ids-to-numbers --projection=EPSG:4326 -o water-column-sonar-id.pmtiles -l cruises dataframe*.geojson
|
|
220
|
-
# {
|
|
221
|
-
# "type": "FeatureCollection",
|
|
222
|
-
# "name": "dataframe5",
|
|
223
|
-
# "features": [
|
|
224
|
-
# { "type": "Feature", "id": 0, "properties": { "id": 0, "ship": "Henry_B._Bigelow", "cruise": "HB0706", "sensor": "EK60" }, "geometry": { "type": "LineString", "coordinates": [ [ -72.120498657226562, 39.659671783447266 ], [ -72.120773315429688, 39.660198211669922 ] ] } },
|
|
225
|
-
# { "type": "Feature", "id": 1, "properties": { "id": 1, "ship": "Henry_B._Bigelow", "cruise": "HB0707", "sensor": "EK60" }, "geometry": { "type": "LineString", "coordinates": [ [ -71.797836303710938, 41.003166198730469 ], [ -71.797996520996094, 41.002998352050781 ], [ -71.798583984375, 41.002994537353516 ] ] } },
|
|
226
|
-
# { "type": "Feature", "id": 2, "properties": { "id": 2, "ship": "Henry_B._Bigelow", "cruise": "HB0710", "sensor": "EK60" }, "geometry": { "type": "LineString", "coordinates": [ [ -72.489486694335938, 40.331901550292969 ], [ -72.490760803222656, 40.33099365234375 ] ] } }
|
|
227
|
-
# ]
|
|
228
|
-
# }
|
|
229
|
-
"""
|
|
230
|
-
# https://docs.protomaps.com/pmtiles/create
|
|
231
|
-
#ogr2ogr -t_srs EPSG:4326 data.geojson dataframe.shp
|
|
232
|
-
# Only need to do the second one here...
|
|
233
|
-
tippecanoe -zg --projection=EPSG:4326 -o data.pmtiles -l cruises dataframe.geojson
|
|
234
|
-
tippecanoe -zg --projection=EPSG:4326 -o data.pmtiles -l cruises --coalesce-densest-as-needed --extend-zooms-if-still-dropping dataframe*.geojson
|
|
235
|
-
# used this to combine all the geojson files into single pmtile file (2024-12-03):
|
|
236
|
-
tippecanoe -zg --projection=EPSG:4326 -o data.pmtiles -l cruises --coalesce-densest-as-needed --extend-zooms-if-still-dropping dataframe*.geojson
|
|
237
265
|
|
|
238
|
-
TODO:
|
|
239
|
-
run each one of the cruises in a separate ospool workflow.
|
|
240
|
-
each process gets own store
|
|
241
|
-
"""
|
|
242
266
|
###########################################################
|
|
243
|
-
|
|
244
|
-
# s3_manager = S3Manager() # endpoint_url=endpoint_url)
|
|
245
|
-
# # s3fs_manager = S3FSManager()
|
|
246
|
-
# # input_bucket_name = "test_input_bucket"
|
|
247
|
-
# # s3_manager.create_bucket(bucket_name=input_bucket_name)
|
|
248
|
-
# ship_name = "Henry_B._Bigelow"
|
|
249
|
-
# cruise_name = "HB0706"
|
|
250
|
-
# sensor_name = "EK60"
|
|
251
|
-
#
|
|
252
|
-
# # ---Scan Bucket For All Zarr Stores--- #
|
|
253
|
-
# # https://noaa-wcsd-zarr-pds.s3.amazonaws.com/index.html#level_2/Henry_B._Bigelow/HB0706/EK60/HB0706.zarr/
|
|
254
|
-
# path_to_zarr_store = f"s3://noaa-wcsd-zarr-pds/level_2/Henry_B._Bigelow/HB0706/EK60/HB0706.zarr"
|
|
255
|
-
# s3 = s3fs.S3FileSystem()
|
|
256
|
-
# zarr_store = s3fs.S3Map(path_to_zarr_store, s3=s3)
|
|
257
|
-
# ds_zarr = xr.open_zarr(zarr_store, consolidated=None)
|
|
258
|
-
# print(ds_zarr.Sv.shape)
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
total = [246847, 89911, 169763, 658047, 887640, 708771, 187099, 3672813, 4095002, 763268, 162727, 189454, 1925270, 3575857, 1031920, 1167590, 3737415, 4099957, 3990725, 3619996, 3573052, 2973090, 55851, 143192, 1550164, 3692819, 668400, 489735, 393260, 1311234, 242989, 4515760, 1303091, 704663, 270645, 3886437, 4204381, 1062090, 428639, 541455, 4206506, 298561, 1279329, 137416, 139836, 228947, 517949]
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import geopandas as gpd
|
|
2
|
+
import numpy as np
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from shapely.geometry import Point
|
|
5
|
+
|
|
6
|
+
from water_column_sonar_processing.model import ZarrManager
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
# Convert "meters per second" to "knots"
|
|
10
|
+
# meters_per_second_to_knots = lambda mps_value: mps_value * 1.94384
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Spatiotemporal:
|
|
14
|
+
#######################################################
|
|
15
|
+
def __init__(
|
|
16
|
+
self,
|
|
17
|
+
):
|
|
18
|
+
self.NANOSECONDS_PER_SECOND = 1e9
|
|
19
|
+
self.CUTOFF_DISTANCE_METERS = 50.0
|
|
20
|
+
self.CUTOFF_TIME_SECONDS = 10.0
|
|
21
|
+
|
|
22
|
+
#######################################################
|
|
23
|
+
@staticmethod
|
|
24
|
+
def meters_per_second_to_knots(
|
|
25
|
+
mps_value,
|
|
26
|
+
):
|
|
27
|
+
return mps_value * 1.94384
|
|
28
|
+
|
|
29
|
+
#######################################################
|
|
30
|
+
def compute_speed_and_distance(
|
|
31
|
+
self,
|
|
32
|
+
times_ns, #: np.ndarray[tuple[int], np.dtype[np.int64]],
|
|
33
|
+
latitudes, #: np.ndarray,
|
|
34
|
+
longitudes, #: np.ndarray,
|
|
35
|
+
) -> pd.DataFrame:
|
|
36
|
+
try:
|
|
37
|
+
# fix times
|
|
38
|
+
times = np.array([np.datetime64(int(i), "ns") for i in times_ns])
|
|
39
|
+
geom = [Point(xy) for xy in zip(longitudes, latitudes)]
|
|
40
|
+
points_df = gpd.GeoDataFrame({"geometry": geom}, crs="EPSG:4326")
|
|
41
|
+
# Conversion to a rectilinear projection coordinate system where distance can be calculated with pythagorean theorem
|
|
42
|
+
# EPSG:4087, WGS 84 / World Equidistant Cylindrical
|
|
43
|
+
# https://epsg.io/4087
|
|
44
|
+
points_df.to_crs(epsg=4087, inplace=True)
|
|
45
|
+
distance_diffs = points_df.distance(points_df.geometry.shift())
|
|
46
|
+
distance_diffs[0] = distance_diffs[1] # missing first datapoint, backfill
|
|
47
|
+
# Issue: np.max(distance_diffs) = 3397 meters
|
|
48
|
+
time_diffs_ns = np.append(0, (times[1:] - times[:-1]).astype(int))
|
|
49
|
+
time_diffs_ns[0] = time_diffs_ns[1] # missing first datapoint, backfill
|
|
50
|
+
time_diffs_seconds = time_diffs_ns / self.NANOSECONDS_PER_SECOND
|
|
51
|
+
# Calculate the speed in knots
|
|
52
|
+
speed_meters_per_second = np.array(
|
|
53
|
+
(distance_diffs / time_diffs_ns * self.NANOSECONDS_PER_SECOND),
|
|
54
|
+
dtype=np.float32,
|
|
55
|
+
)
|
|
56
|
+
knots = self.meters_per_second_to_knots(speed_meters_per_second)
|
|
57
|
+
metrics_df = pd.DataFrame(
|
|
58
|
+
{
|
|
59
|
+
"speed_knots": knots.astype(dtype=np.float32),
|
|
60
|
+
"distance_meters": distance_diffs.to_numpy(dtype=np.float32),
|
|
61
|
+
"diff_seconds": time_diffs_seconds.astype(np.float32),
|
|
62
|
+
},
|
|
63
|
+
index=times,
|
|
64
|
+
)
|
|
65
|
+
#
|
|
66
|
+
return metrics_df
|
|
67
|
+
except Exception as err:
|
|
68
|
+
raise RuntimeError(f"Exception encountered, {err}")
|
|
69
|
+
|
|
70
|
+
#######################################################
|
|
71
|
+
def add_speed_and_distance(
|
|
72
|
+
self,
|
|
73
|
+
ship_name,
|
|
74
|
+
cruise_name,
|
|
75
|
+
sensor_name,
|
|
76
|
+
bucket_name,
|
|
77
|
+
endpoint_url=None,
|
|
78
|
+
) -> None:
|
|
79
|
+
try:
|
|
80
|
+
zarr_manager = ZarrManager()
|
|
81
|
+
zarr_store = zarr_manager.open_s3_zarr_store_with_zarr(
|
|
82
|
+
ship_name=ship_name,
|
|
83
|
+
cruise_name=cruise_name,
|
|
84
|
+
sensor_name=sensor_name,
|
|
85
|
+
output_bucket_name=bucket_name,
|
|
86
|
+
endpoint_url=endpoint_url,
|
|
87
|
+
)
|
|
88
|
+
longitudes = zarr_store["longitude"][:]
|
|
89
|
+
latitudes = zarr_store["latitude"][:]
|
|
90
|
+
times = zarr_store["time"][:]
|
|
91
|
+
#
|
|
92
|
+
metrics_df = self.compute_speed_and_distance(
|
|
93
|
+
times_ns=times,
|
|
94
|
+
latitudes=latitudes,
|
|
95
|
+
longitudes=longitudes,
|
|
96
|
+
)
|
|
97
|
+
# Write the speed and distance to the output zarr store
|
|
98
|
+
zarr_store["speed"][:] = metrics_df.speed_knots.values
|
|
99
|
+
zarr_store["distance"][:] = metrics_df.distance_meters.values
|
|
100
|
+
except Exception as err:
|
|
101
|
+
raise RuntimeError(
|
|
102
|
+
f"Exception encountered writing the speed and distance, {err}"
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
###########################################################
|