water-column-sonar-processing 25.3.2__py3-none-any.whl → 25.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of water-column-sonar-processing might be problematic. Click here for more details.
- water_column_sonar_processing/aws/dynamodb_manager.py +6 -6
- water_column_sonar_processing/aws/s3_manager.py +95 -90
- water_column_sonar_processing/aws/s3fs_manager.py +5 -3
- water_column_sonar_processing/aws/sqs_manager.py +1 -1
- water_column_sonar_processing/cruise/__init__.py +2 -1
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +49 -43
- water_column_sonar_processing/cruise/create_empty_zarr_store_level_3.py +161 -0
- water_column_sonar_processing/cruise/datatree_manager.py +21 -21
- water_column_sonar_processing/cruise/resample_regrid.py +57 -47
- water_column_sonar_processing/dataset/__init__.py +3 -0
- water_column_sonar_processing/dataset/dataset_manager.py +205 -0
- water_column_sonar_processing/dataset/feature_manager.py +32 -0
- water_column_sonar_processing/geometry/geometry_manager.py +11 -12
- water_column_sonar_processing/geometry/line_simplification.py +26 -1
- water_column_sonar_processing/geometry/pmtile_generation.py +211 -247
- water_column_sonar_processing/index/index_manager.py +18 -17
- water_column_sonar_processing/model/zarr_manager.py +504 -256
- water_column_sonar_processing/processing/__init__.py +3 -2
- water_column_sonar_processing/processing/batch_downloader.py +11 -11
- water_column_sonar_processing/processing/raw_to_netcdf.py +319 -0
- water_column_sonar_processing/processing/raw_to_zarr.py +41 -31
- water_column_sonar_processing/utility/__init__.py +9 -2
- water_column_sonar_processing/utility/cleaner.py +1 -2
- water_column_sonar_processing/utility/constants.py +26 -7
- water_column_sonar_processing/utility/timestamp.py +1 -0
- water_column_sonar_processing-25.8.0.dist-info/METADATA +162 -0
- water_column_sonar_processing-25.8.0.dist-info/RECORD +39 -0
- {water_column_sonar_processing-25.3.2.dist-info → water_column_sonar_processing-25.8.0.dist-info}/WHEEL +1 -1
- water_column_sonar_processing-25.3.2.dist-info/licenses/LICENSE → water_column_sonar_processing-25.8.0.dist-info/licenses/LICENSE-MIT +1 -1
- water_column_sonar_processing-25.3.2.dist-info/METADATA +0 -170
- water_column_sonar_processing-25.3.2.dist-info/RECORD +0 -34
- {water_column_sonar_processing-25.3.2.dist-info → water_column_sonar_processing-25.8.0.dist-info}/top_level.txt +0 -0
|
@@ -42,7 +42,7 @@ class GeometryManager:
|
|
|
42
42
|
file_name_stem = Path(file_name).stem
|
|
43
43
|
geo_json_name = f"{file_name_stem}.json"
|
|
44
44
|
|
|
45
|
-
print("Getting GPS
|
|
45
|
+
print("Getting GPS dataset from echopype object.")
|
|
46
46
|
try:
|
|
47
47
|
latitude = np.round(
|
|
48
48
|
echodata.platform.latitude.values, self.DECIMAL_PRECISION
|
|
@@ -56,7 +56,7 @@ class GeometryManager:
|
|
|
56
56
|
# note that nmea_times, unlike time1, can be sorted
|
|
57
57
|
nmea_times = np.sort(echodata.platform.time1.values)
|
|
58
58
|
|
|
59
|
-
# 'time1' are times from the echosounder associated with the
|
|
59
|
+
# 'time1' are times from the echosounder associated with the dataset of the transducer measurement
|
|
60
60
|
time1 = echodata.environment.time1.values
|
|
61
61
|
|
|
62
62
|
if len(nmea_times) < len(time1):
|
|
@@ -98,14 +98,14 @@ class GeometryManager:
|
|
|
98
98
|
|
|
99
99
|
# create requirement for minimum linestring size
|
|
100
100
|
MIN_ALLOWED_SIZE = (
|
|
101
|
-
4 # don't want to process files with less than 4
|
|
101
|
+
4 # don't want to process files with less than 4 dataset points
|
|
102
102
|
)
|
|
103
103
|
if (
|
|
104
104
|
len(lat[~np.isnan(lat)]) < MIN_ALLOWED_SIZE
|
|
105
105
|
or len(lon[~np.isnan(lon)]) < MIN_ALLOWED_SIZE
|
|
106
106
|
):
|
|
107
107
|
raise Exception(
|
|
108
|
-
f"There was not enough
|
|
108
|
+
f"There was not enough dataset in lat or lon to create geojson, {len(lat[~np.isnan(lat)])} found, less than {MIN_ALLOWED_SIZE}."
|
|
109
109
|
)
|
|
110
110
|
|
|
111
111
|
# https://osoceanacoustics.github.io/echopype-examples/echopype_tour.html
|
|
@@ -124,7 +124,7 @@ class GeometryManager:
|
|
|
124
124
|
crs="epsg:4326",
|
|
125
125
|
)
|
|
126
126
|
# Note: We set np.nan to 0,0 so downstream missing values can be omitted
|
|
127
|
-
# TODO: so what ends up here is
|
|
127
|
+
# TODO: so what ends up here is dataset with corruption at null island!!!
|
|
128
128
|
geo_json_line = gps_gdf.to_json()
|
|
129
129
|
if write_geojson:
|
|
130
130
|
print("Creating local copy of geojson file.")
|
|
@@ -180,12 +180,12 @@ class GeometryManager:
|
|
|
180
180
|
#################################################################
|
|
181
181
|
# GeoJSON FeatureCollection with IDs as "time"
|
|
182
182
|
except Exception as err:
|
|
183
|
-
|
|
184
|
-
f"Exception encountered extracting gps coordinates creating geojson
|
|
183
|
+
raise RuntimeError(
|
|
184
|
+
f"Exception encountered extracting gps coordinates creating geojson, {err}"
|
|
185
185
|
)
|
|
186
|
-
|
|
186
|
+
|
|
187
187
|
# Note: returned lat/lon values can include np.nan because they need to be aligned with
|
|
188
|
-
# the Sv
|
|
188
|
+
# the Sv dataset! GeoJSON needs simplification but has been filtered.
|
|
189
189
|
# return gps_df.index.values, gps_df.latitude.values, gps_df.longitude.values
|
|
190
190
|
return gps_df.index.values, lat, lon
|
|
191
191
|
# TODO: if geojson is already returned with 0,0, the return here
|
|
@@ -229,9 +229,8 @@ class GeometryManager:
|
|
|
229
229
|
indices = np.searchsorted(a=aa, v=vv)
|
|
230
230
|
|
|
231
231
|
return indices, geospatial
|
|
232
|
-
except Exception as err:
|
|
233
|
-
|
|
234
|
-
raise
|
|
232
|
+
except Exception as err:
|
|
233
|
+
raise RuntimeError(f"Exception encountered reading s3 GeoJSON, {err}")
|
|
235
234
|
|
|
236
235
|
############################################################################
|
|
237
236
|
# COMES from the raw-to-zarr conversion
|
|
@@ -4,6 +4,10 @@ import numpy as np
|
|
|
4
4
|
from pykalman import KalmanFilter
|
|
5
5
|
from shapely.geometry import Point
|
|
6
6
|
|
|
7
|
+
# import hvplot.pandas
|
|
8
|
+
# from holoviews import opts
|
|
9
|
+
# hv.extension('bokeh')
|
|
10
|
+
|
|
7
11
|
# import matplotlib.pyplot as plt
|
|
8
12
|
|
|
9
13
|
|
|
@@ -24,6 +28,16 @@ def mph_to_knots(mph_value):
|
|
|
24
28
|
return mph_value * 0.868976
|
|
25
29
|
|
|
26
30
|
|
|
31
|
+
def mps_to_knots(mps_value):
|
|
32
|
+
return mps_value * 1.94384
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
###############################################################################
|
|
36
|
+
# Colab Notebook:
|
|
37
|
+
# https://colab.research.google.com/drive/1Ihb1x0EeYRNwGJ4Bqi4RqQQHu9-40oDk?usp=sharing#scrollTo=hIPziqVO48Xg
|
|
38
|
+
###############################################################################
|
|
39
|
+
|
|
40
|
+
|
|
27
41
|
# https://shapely.readthedocs.io/en/stable/reference/shapely.MultiLineString.html#shapely.MultiLineString
|
|
28
42
|
class LineSimplification:
|
|
29
43
|
"""
|
|
@@ -89,7 +103,7 @@ class LineSimplification:
|
|
|
89
103
|
#######################################################
|
|
90
104
|
def get_speeds(
|
|
91
105
|
self,
|
|
92
|
-
times: np.ndarray, # don't really need time, do need to segment the
|
|
106
|
+
times: np.ndarray, # don't really need time, do need to segment the dataset first
|
|
93
107
|
latitudes: np.ndarray,
|
|
94
108
|
longitudes: np.ndarray,
|
|
95
109
|
) -> np.ndarray:
|
|
@@ -147,4 +161,15 @@ class LineSimplification:
|
|
|
147
161
|
#######################################################
|
|
148
162
|
|
|
149
163
|
|
|
164
|
+
# [(-72.2001724243164, 40.51750183105469), # latBB
|
|
165
|
+
# (-72.20023345947266, 40.51749038696289),
|
|
166
|
+
# (-72.20033264160156, 40.51750183105469), # lonAA, latBB
|
|
167
|
+
# (-72.20030212402344, 40.517391204833984),
|
|
168
|
+
# (-72.20033264160156, 40.517330169677734), # lonAA, latCC
|
|
169
|
+
# (-72.2003402709961, 40.51729965209961),
|
|
170
|
+
# (-72.20033264160156, 40.517330169677734), # lonAA, latCC
|
|
171
|
+
# (-72.20040130615234, 40.5172004699707),
|
|
172
|
+
# (-72.20050048828125, 40.51716995239258),
|
|
173
|
+
# (-72.2004623413086, 40.51710891723633)]
|
|
174
|
+
|
|
150
175
|
###########################################################
|
|
@@ -1,10 +1,4 @@
|
|
|
1
|
-
import glob
|
|
2
|
-
import os
|
|
3
|
-
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
|
|
6
1
|
import fiona
|
|
7
|
-
import geopandas
|
|
8
2
|
import geopandas as gpd
|
|
9
3
|
import numpy as np
|
|
10
4
|
import pandas as pd
|
|
@@ -16,282 +10,252 @@ MAX_CONCURRENCY = 64
|
|
|
16
10
|
MAX_WORKERS = 64
|
|
17
11
|
GB = 1024**3
|
|
18
12
|
|
|
13
|
+
bucket_name = "noaa-wcsd-zarr-pds"
|
|
14
|
+
ship_name = "Henry_B._Bigelow"
|
|
15
|
+
sensor_name = "EK60"
|
|
16
|
+
|
|
17
|
+
# TODO: get pmtiles of all the evr points
|
|
18
|
+
|
|
19
19
|
|
|
20
20
|
class PMTileGeneration(object):
|
|
21
21
|
"""
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
- upload to s3
|
|
22
|
+
- iterate through the zarr stores for all cruises
|
|
23
|
+
- generate geojson in geopandas df, simplify linestrings
|
|
24
|
+
- consolidate into singular df, one cruise per row
|
|
25
|
+
- export as geojson
|
|
26
|
+
- using tippecanoe, geojson --> pmtiles w linux command
|
|
27
|
+
- upload to s3
|
|
29
28
|
"""
|
|
30
29
|
|
|
31
30
|
#######################################################
|
|
32
31
|
def __init__(
|
|
33
32
|
self,
|
|
34
33
|
):
|
|
35
|
-
|
|
34
|
+
self.bucket_name = "noaa-wcsd-zarr-pds"
|
|
35
|
+
self.ship_name = "Henry_B._Bigelow"
|
|
36
|
+
self.sensor_name = "EK60"
|
|
36
37
|
|
|
37
38
|
#######################################################
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
data=df[
|
|
68
|
-
["ship_name", "cruise_name", "file_stem"]
|
|
69
|
-
], # try again with file_stem
|
|
70
|
-
geometry=df["geom"],
|
|
71
|
-
crs="EPSG:4326",
|
|
72
|
-
)
|
|
73
|
-
print(fiona.supported_drivers)
|
|
74
|
-
# gps_gdf.to_file('dataframe.shp', crs='epsg:4326')
|
|
75
|
-
# Convert geojson feature collection to pmtiles
|
|
76
|
-
gps_gdf.to_file("dataframe.geojson", driver="GeoJSON", crs="epsg:4326")
|
|
77
|
-
print("done")
|
|
78
|
-
"""
|
|
79
|
-
# need to eliminate visits to null island
|
|
80
|
-
tippecanoe --no-feature-limit -zg --projection=EPSG:4326 -o dataframe.pmtiles -l cruises dataframe.geojson
|
|
81
|
-
|
|
82
|
-
https://docs.protomaps.com/pmtiles/create
|
|
83
|
-
PMTiles
|
|
84
|
-
https://drive.google.com/file/d/17Bi-UIXB9IJkIz30BHpiKHXYpCOgRFge/view?usp=sharing
|
|
85
|
-
|
|
86
|
-
Viewer
|
|
87
|
-
https://protomaps.github.io/PMTiles/#map=8.91/56.0234/-166.6346
|
|
88
|
-
"""
|
|
39
|
+
def check_all_cruises(self, bucket_name, cruises):
|
|
40
|
+
completed = []
|
|
41
|
+
for cruise_name in cruises:
|
|
42
|
+
print(cruise_name)
|
|
43
|
+
try:
|
|
44
|
+
zarr_store = f"{cruise_name}.zarr"
|
|
45
|
+
s3_zarr_store_path = f"{bucket_name}/level_2/{ship_name}/{cruise_name}/{sensor_name}/{zarr_store}"
|
|
46
|
+
cruise = xr.open_dataset(
|
|
47
|
+
filename_or_obj=f"s3://{s3_zarr_store_path}",
|
|
48
|
+
engine="zarr",
|
|
49
|
+
storage_options={"anon": True},
|
|
50
|
+
)
|
|
51
|
+
width = cruise.Sv.shape[1]
|
|
52
|
+
height = cruise.Sv.shape[0]
|
|
53
|
+
depth = cruise.Sv.shape[2]
|
|
54
|
+
print(
|
|
55
|
+
f"height: {height}, width: {width}, depth: {depth} = {width * height * depth}"
|
|
56
|
+
)
|
|
57
|
+
lats = cruise.latitude.to_numpy()
|
|
58
|
+
percent_done = np.count_nonzero(~np.isnan(lats)) / width
|
|
59
|
+
if percent_done != 1.0:
|
|
60
|
+
print(
|
|
61
|
+
f"percent done: {np.round(percent_done, 2)}, {np.count_nonzero(~np.isnan(cruise.latitude.values))}, {width}"
|
|
62
|
+
)
|
|
63
|
+
else:
|
|
64
|
+
completed.append(cruise_name)
|
|
65
|
+
except Exception as err:
|
|
66
|
+
raise RuntimeError(f"Problem parsing Zarr stores, {err}")
|
|
67
|
+
return completed
|
|
89
68
|
|
|
90
69
|
#######################################################
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
# s3_fs = s3fs.S3FileSystem(anon=True)
|
|
100
|
-
for cruise_name in cruise_names:
|
|
101
|
-
s3_path = f"s3://noaa-wcsd-zarr-pds/level_2/{ship_name}/{cruise_name}/EK60/{cruise_name}.zarr"
|
|
102
|
-
# zarr_store = s3fs.S3Map(root=s3_path, s3=s3_fs)
|
|
103
|
-
xr_store = xr.open_dataset(
|
|
104
|
-
filename_or_obj=s3_path,
|
|
70
|
+
def get_cruise_geometry(self, cruise_name, index):
|
|
71
|
+
print(cruise_name)
|
|
72
|
+
try:
|
|
73
|
+
pieces = []
|
|
74
|
+
zarr_store = f"{cruise_name}.zarr"
|
|
75
|
+
s3_zarr_store_path = f"{bucket_name}/level_2/{ship_name}/{cruise_name}/{sensor_name}/{zarr_store}"
|
|
76
|
+
cruise = xr.open_dataset(
|
|
77
|
+
filename_or_obj=f"s3://{s3_zarr_store_path}",
|
|
105
78
|
engine="zarr",
|
|
106
79
|
storage_options={"anon": True},
|
|
107
|
-
chunks={},
|
|
80
|
+
chunks={},
|
|
108
81
|
cache=True,
|
|
109
82
|
)
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
latitude = xr_store.latitude.values
|
|
140
|
-
longitude = xr_store.longitude.values
|
|
141
|
-
if np.isnan(latitude).any() or np.isnan(longitude).any():
|
|
142
|
-
print(f"there was missing lat-lon data for {cruise_name}")
|
|
143
|
-
return None
|
|
144
|
-
# ---Add To GeoPandas Dataframe--- #
|
|
145
|
-
# TODO: experiment with tolerance "0.001"
|
|
146
|
-
geom = LineString(list(zip(longitude, latitude))).simplify(
|
|
147
|
-
tolerance=0.001, preserve_topology=True
|
|
148
|
-
)
|
|
149
|
-
gps_gdf.loc[0] = (
|
|
150
|
-
0,
|
|
151
|
-
"Henry_B._Bigelow",
|
|
152
|
-
cruise_name,
|
|
153
|
-
"EK60",
|
|
154
|
-
geom,
|
|
155
|
-
) # (ship, cruise, sensor, geometry)
|
|
156
|
-
gps_gdf.set_index("id", inplace=True)
|
|
157
|
-
gps_gdf.to_file(
|
|
158
|
-
f"dataframe_{cruise_name}.geojson", driver="GeoJSON"
|
|
159
|
-
) # , engine="pyogrio")
|
|
160
|
-
return cruise_name
|
|
83
|
+
latitude_array = cruise.latitude.to_numpy()
|
|
84
|
+
longitude_array = cruise.longitude.to_numpy()
|
|
85
|
+
if np.isnan(latitude_array).any() or np.isnan(longitude_array).any():
|
|
86
|
+
raise RuntimeError(
|
|
87
|
+
f"There was missing lat-lon dataset for, {cruise_name}"
|
|
88
|
+
)
|
|
89
|
+
geom = LineString(list(zip(longitude_array, latitude_array))).simplify(
|
|
90
|
+
tolerance=0.001, # preserve_topology=True # 113
|
|
91
|
+
) # TODO: do speed check, convert linestrings to multilinestrings
|
|
92
|
+
print(len(geom.coords))
|
|
93
|
+
pieces.append(
|
|
94
|
+
{
|
|
95
|
+
"id": index,
|
|
96
|
+
"ship_name": ship_name,
|
|
97
|
+
"cruise_name": cruise_name,
|
|
98
|
+
"sensor_name": sensor_name,
|
|
99
|
+
"geom": geom,
|
|
100
|
+
}
|
|
101
|
+
)
|
|
102
|
+
df = pd.DataFrame(pieces)
|
|
103
|
+
gps_gdf = gpd.GeoDataFrame(
|
|
104
|
+
data=df[["id", "ship_name", "cruise_name", "sensor_name"]],
|
|
105
|
+
geometry=df["geom"],
|
|
106
|
+
crs="EPSG:4326",
|
|
107
|
+
)
|
|
108
|
+
print(gps_gdf)
|
|
109
|
+
# {'DXF': 'rw', 'CSV': 'raw', 'OpenFileGDB': 'raw', 'ESRIJSON': 'r', 'ESRI Shapefile': 'raw', 'FlatGeobuf': 'raw', 'GeoJSON': 'raw', 'GeoJSONSeq': 'raw', 'GPKG': 'raw', 'GML': 'rw', 'OGR_GMT': 'rw', 'GPX': 'rw', 'MapInfo File': 'raw', 'DGN': 'raw', 'S57': 'r', 'SQLite': 'raw', 'TopoJSON': 'r'}
|
|
110
|
+
if "GeoJSON" not in fiona.supported_drivers.keys():
|
|
111
|
+
raise RuntimeError("Missing GeoJSON driver")
|
|
161
112
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
cruises: list,
|
|
166
|
-
):
|
|
167
|
-
# 'cruises' is a list of cruises to process
|
|
168
|
-
completed_cruises = []
|
|
169
|
-
try:
|
|
170
|
-
with ThreadPoolExecutor(max_workers=32) as executor:
|
|
171
|
-
futures = [
|
|
172
|
-
executor.submit(
|
|
173
|
-
self.get_geospatial_info_from_zarr_store,
|
|
174
|
-
"Henry_B._Bigelow", # ship_name
|
|
175
|
-
cruise, # cruise_name
|
|
176
|
-
)
|
|
177
|
-
for cruise in cruises
|
|
178
|
-
]
|
|
179
|
-
for future in as_completed(futures):
|
|
180
|
-
result = future.result()
|
|
181
|
-
if result:
|
|
182
|
-
completed_cruises.extend([result])
|
|
113
|
+
gps_gdf.set_index("id", inplace=True)
|
|
114
|
+
# gps_gdf.to_file(f"dataframe_{cruise_name}.geojson", driver="GeoJSON") #, crs="epsg:4326")
|
|
115
|
+
return gps_gdf
|
|
183
116
|
except Exception as err:
|
|
184
|
-
|
|
185
|
-
print("Done opening zarr stores using thread pool.")
|
|
186
|
-
return completed_cruises # Took ~12 minutes
|
|
117
|
+
raise RuntimeError(f"Problem parsing Zarr stores, {err}")
|
|
187
118
|
|
|
188
119
|
#######################################################
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
"""
|
|
192
|
-
iterate through cruises, threadpoolexecute geojson creation, aggregate geojson files into df,
|
|
193
|
-
"""
|
|
194
|
-
gps_gdf = geopandas.GeoDataFrame(
|
|
120
|
+
def aggregate_geojson_into_dataframe(self, geoms):
|
|
121
|
+
gps_gdf = gpd.GeoDataFrame(
|
|
195
122
|
columns=["id", "ship", "cruise", "sensor", "geometry"],
|
|
196
123
|
geometry="geometry",
|
|
197
124
|
crs="EPSG:4326",
|
|
198
125
|
)
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
jjj,
|
|
207
|
-
geom.ship[0],
|
|
208
|
-
geom.cruise[0],
|
|
209
|
-
geom.sensor[0],
|
|
210
|
-
geom.geometry[0],
|
|
126
|
+
for iii, geom in enumerate(geoms):
|
|
127
|
+
gps_gdf.loc[iii] = (
|
|
128
|
+
iii,
|
|
129
|
+
geom.ship_name[iii],
|
|
130
|
+
geom.cruise_name[iii],
|
|
131
|
+
geom.sensor_name[iii],
|
|
132
|
+
geom.geometry[iii],
|
|
211
133
|
)
|
|
212
|
-
# gps_gdf.loc[0] = (0, "Henry_B._Bigelow", cruise_name, "EK60", geom) # (ship, cruise, sensor, geometry)
|
|
213
|
-
print(gps_gdf)
|
|
214
134
|
gps_gdf.set_index("id", inplace=True)
|
|
215
135
|
gps_gdf.to_file(
|
|
216
|
-
"
|
|
136
|
+
filename="dataset.geojson",
|
|
217
137
|
driver="GeoJSON",
|
|
218
|
-
engine="pyogrio"
|
|
138
|
+
engine="fiona", # or "pyogrio"
|
|
219
139
|
layer_options={"ID_GENERATE": "YES"},
|
|
140
|
+
crs="EPSG:4326",
|
|
141
|
+
id_generate=True, # required for the feature click selection
|
|
220
142
|
)
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
# gps_gdf.loc[iii] = (iii, "Henry_B._Bigelow", cruise_name, "EK60", geom) # (ship, cruise, sensor, geometry)
|
|
224
|
-
# print('writing to file')
|
|
225
|
-
# print(gps_gdf)
|
|
226
|
-
# gps_gdf.set_index('id', inplace=True)
|
|
227
|
-
# gps_gdf.to_file(f"dataframe_{cruise_name}.geojson", driver="GeoJSON", engine="pyogrio", layer_options={"ID_GENERATE": "YES"})
|
|
228
|
-
# https://gdal.org/en/latest/drivers/vector/jsonfg.html
|
|
229
|
-
# gps_gdf.to_file(
|
|
230
|
-
# f"data.geojson",
|
|
231
|
-
# driver="GeoJSON",
|
|
232
|
-
# engine="pyogrio",
|
|
233
|
-
# layer_options={"ID_FIELD": "id"}
|
|
234
|
-
# )
|
|
235
|
-
# gps_gdf.to_file(f"dataframe_{cruise_name}.geojson", driver="GeoJSON", engine="pyogrio", id_generate=True)
|
|
236
|
-
|
|
143
|
+
print(gps_gdf)
|
|
237
144
|
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
145
|
+
#######################################################
|
|
146
|
+
def create_collection_geojson(self):
|
|
147
|
+
cruises = [
|
|
148
|
+
"HB0706",
|
|
149
|
+
"HB0707",
|
|
150
|
+
"HB0710",
|
|
151
|
+
"HB0711",
|
|
152
|
+
"HB0802",
|
|
153
|
+
"HB0803",
|
|
154
|
+
"HB0805",
|
|
155
|
+
"HB0806",
|
|
156
|
+
"HB0807",
|
|
157
|
+
"HB0901",
|
|
158
|
+
"HB0902",
|
|
159
|
+
"HB0903",
|
|
160
|
+
"HB0904",
|
|
161
|
+
"HB0905",
|
|
162
|
+
"HB1002",
|
|
163
|
+
"HB1006",
|
|
164
|
+
"HB1102",
|
|
165
|
+
"HB1103",
|
|
166
|
+
"HB1105",
|
|
167
|
+
"HB1201",
|
|
168
|
+
"HB1206",
|
|
169
|
+
"HB1301",
|
|
170
|
+
"HB1303",
|
|
171
|
+
"HB1304",
|
|
172
|
+
"HB1401",
|
|
173
|
+
"HB1402",
|
|
174
|
+
"HB1403",
|
|
175
|
+
"HB1405",
|
|
176
|
+
"HB1501",
|
|
177
|
+
"HB1502",
|
|
178
|
+
"HB1503",
|
|
179
|
+
"HB1506",
|
|
180
|
+
"HB1507",
|
|
181
|
+
"HB1601",
|
|
182
|
+
"HB1603",
|
|
183
|
+
"HB1604",
|
|
184
|
+
"HB1701",
|
|
185
|
+
"HB1702",
|
|
186
|
+
"HB1801",
|
|
187
|
+
"HB1802",
|
|
188
|
+
"HB1803",
|
|
189
|
+
"HB1804",
|
|
190
|
+
"HB1805",
|
|
191
|
+
"HB1806",
|
|
192
|
+
"HB1901",
|
|
193
|
+
"HB1902",
|
|
194
|
+
"HB1903",
|
|
195
|
+
"HB1904",
|
|
196
|
+
"HB1906",
|
|
197
|
+
"HB1907",
|
|
198
|
+
"HB2001",
|
|
199
|
+
"HB2006",
|
|
200
|
+
"HB2007",
|
|
201
|
+
"HB20ORT",
|
|
202
|
+
"HB20TR",
|
|
203
|
+
]
|
|
204
|
+
completed_cruises = self.check_all_cruises(
|
|
205
|
+
bucket_name=bucket_name, cruises=cruises
|
|
206
|
+
) # TODO: threadpool this
|
|
207
|
+
### create linestring ###
|
|
208
|
+
geometries = []
|
|
209
|
+
for jjj, completed_cruise in enumerate(
|
|
210
|
+
completed_cruises
|
|
211
|
+
): # TODO: threadpool this
|
|
212
|
+
geometries.append(
|
|
213
|
+
self.get_cruise_geometry(cruise_name=completed_cruise, index=jjj)
|
|
214
|
+
)
|
|
215
|
+
#
|
|
216
|
+
self.aggregate_geojson_into_dataframe(geoms=geometries)
|
|
217
|
+
#
|
|
218
|
+
print(
|
|
219
|
+
'Now run this: "tippecanoe --no-feature-limit -zg -o dataset.pmtiles -l cruises dataset.geojson --force"'
|
|
220
|
+
)
|
|
221
|
+
# # water-column-sonar-id.pmtiles
|
|
222
|
+
# linux command: "tippecanoe --no-feature-limit -zg -o water-column-sonar-id.pmtiles -l cruises dataset.geojson --force"
|
|
223
|
+
# note: 'cruises' is the name of the layer
|
|
224
|
+
# size is ~3.3 MB for the pmtiles
|
|
225
|
+
# then drag-and-drop here: https://pmtiles.io/#map=6.79/39.802/-71.51
|
|
244
226
|
|
|
227
|
+
#######################################################
|
|
228
|
+
# TODO: copy the .pmtiles file to the s3 bucket "noaa-wcsd-pds-index"
|
|
229
|
+
#######################################################
|
|
245
230
|
|
|
246
|
-
|
|
247
|
-
#
|
|
248
|
-
#
|
|
249
|
-
#
|
|
231
|
+
#######################################################
|
|
232
|
+
# TODO: get threadpool working
|
|
233
|
+
# def open_zarr_stores_with_thread_pool_executor(
|
|
234
|
+
# self,
|
|
235
|
+
# cruises: list,
|
|
236
|
+
# ):
|
|
237
|
+
# # 'cruises' is a list of cruises to process
|
|
238
|
+
# completed_cruises = []
|
|
239
|
+
# try:
|
|
240
|
+
# with ThreadPoolExecutor(max_workers=32) as executor:
|
|
241
|
+
# futures = [
|
|
242
|
+
# executor.submit(
|
|
243
|
+
# self.get_geospatial_info_from_zarr_store,
|
|
244
|
+
# "Henry_B._Bigelow", # ship_name
|
|
245
|
+
# cruise, # cruise_name
|
|
246
|
+
# )
|
|
247
|
+
# for cruise in cruises
|
|
248
|
+
# ]
|
|
249
|
+
# for future in as_completed(futures):
|
|
250
|
+
# result = future.result()
|
|
251
|
+
# if result:
|
|
252
|
+
# completed_cruises.extend([result])
|
|
253
|
+
# except Exception as err:
|
|
254
|
+
# raise RuntimeError(f"Problem, {err}")
|
|
255
|
+
# print("Done opening zarr stores using thread pool.")
|
|
256
|
+
# return completed_cruises # Took ~12 minutes
|
|
250
257
|
|
|
251
|
-
|
|
252
|
-
# gps_gdf.to_file(f"dataframe6.geojson", driver="GeoJSON", engine="pyogrio", layer_options={"ID_GENERATE": "YES"})
|
|
253
|
-
# jq '{"type": "FeatureCollection", "features": [.[] | .features[]]}' --slurp input*.geojson > output.geojson
|
|
254
|
-
# tippecanoe -zg --projection=EPSG:4326 -o water-column-sonar-id.pmtiles -l cruises output.geojson
|
|
255
|
-
# tippecanoe -zg --convert-stringified-ids-to-numbers --projection=EPSG:4326 -o water-column-sonar-id.pmtiles -l cruises dataframe*.geojson
|
|
256
|
-
# {
|
|
257
|
-
# "type": "FeatureCollection",
|
|
258
|
-
# "name": "dataframe5",
|
|
259
|
-
# "features": [
|
|
260
|
-
# { "type": "Feature", "id": 0, "properties": { "id": 0, "ship": "Henry_B._Bigelow", "cruise": "HB0706", "sensor": "EK60" }, "geometry": { "type": "LineString", "coordinates": [ [ -72.120498657226562, 39.659671783447266 ], [ -72.120773315429688, 39.660198211669922 ] ] } },
|
|
261
|
-
# { "type": "Feature", "id": 1, "properties": { "id": 1, "ship": "Henry_B._Bigelow", "cruise": "HB0707", "sensor": "EK60" }, "geometry": { "type": "LineString", "coordinates": [ [ -71.797836303710938, 41.003166198730469 ], [ -71.797996520996094, 41.002998352050781 ], [ -71.798583984375, 41.002994537353516 ] ] } },
|
|
262
|
-
# { "type": "Feature", "id": 2, "properties": { "id": 2, "ship": "Henry_B._Bigelow", "cruise": "HB0710", "sensor": "EK60" }, "geometry": { "type": "LineString", "coordinates": [ [ -72.489486694335938, 40.331901550292969 ], [ -72.490760803222656, 40.33099365234375 ] ] } }
|
|
263
|
-
# ]
|
|
264
|
-
# }
|
|
258
|
+
#######################################################
|
|
265
259
|
|
|
266
|
-
# # https://docs.protomaps.com/pmtiles/create
|
|
267
|
-
# #ogr2ogr -t_srs EPSG:4326 data.geojson dataframe.shp
|
|
268
|
-
# # Only need to do the second one here...
|
|
269
|
-
# tippecanoe -zg --projection=EPSG:4326 -o data.pmtiles -l cruises dataframe.geojson
|
|
270
|
-
# tippecanoe -zg --projection=EPSG:4326 -o data.pmtiles -l cruises --coalesce-densest-as-needed --extend-zooms-if-still-dropping dataframe*.geojson
|
|
271
|
-
# # used this to combine all the geojson files into single pmtile file (2024-12-03):
|
|
272
|
-
# tippecanoe -zg --projection=EPSG:4326 -o data.pmtiles -l cruises --coalesce-densest-as-needed --extend-zooms-if-still-dropping dataframe*.geojson
|
|
273
|
-
#
|
|
274
|
-
# TODO:
|
|
275
|
-
# run each one of the cruises in a separate ospool workflow.
|
|
276
|
-
# each process gets own store
|
|
277
260
|
|
|
278
261
|
###########################################################
|
|
279
|
-
|
|
280
|
-
# s3_manager = S3Manager() # endpoint_url=endpoint_url)
|
|
281
|
-
# # s3fs_manager = S3FSManager()
|
|
282
|
-
# # input_bucket_name = "test_input_bucket"
|
|
283
|
-
# # s3_manager.create_bucket(bucket_name=input_bucket_name)
|
|
284
|
-
# ship_name = "Henry_B._Bigelow"
|
|
285
|
-
# cruise_name = "HB0706"
|
|
286
|
-
# sensor_name = "EK60"
|
|
287
|
-
#
|
|
288
|
-
# # ---Scan Bucket For All Zarr Stores--- #
|
|
289
|
-
# # https://noaa-wcsd-zarr-pds.s3.amazonaws.com/index.html#level_2/Henry_B._Bigelow/HB0706/EK60/HB0706.zarr/
|
|
290
|
-
# path_to_zarr_store = f"s3://noaa-wcsd-zarr-pds/level_2/Henry_B._Bigelow/HB0706/EK60/HB0706.zarr"
|
|
291
|
-
# s3 = s3fs.S3FileSystem()
|
|
292
|
-
# zarr_store = s3fs.S3Map(path_to_zarr_store, s3=s3)
|
|
293
|
-
# ds_zarr = xr.open_zarr(zarr_store, consolidated=None)
|
|
294
|
-
# print(ds_zarr.Sv.shape)
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
# total = [246847, 89911, 169763, 658047, 887640, 708771, 187099, 3672813, 4095002, 763268, 162727, 189454, 1925270, 3575857, 1031920, 1167590, 3737415, 4099957, 3990725, 3619996, 3573052, 2973090, 55851, 143192, 1550164, 3692819, 668400, 489735, 393260, 1311234, 242989, 4515760, 1303091, 704663, 270645, 3886437, 4204381, 1062090, 428639, 541455, 4206506, 298561, 1279329, 137416, 139836, 228947, 517949]
|