water-column-sonar-processing 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- water_column_sonar_processing/__init__.py +16 -0
- water_column_sonar_processing/aws/__init__.py +7 -0
- {aws_manager → water_column_sonar_processing/aws}/dynamodb_manager.py +71 -50
- {aws_manager → water_column_sonar_processing/aws}/s3_manager.py +120 -130
- {aws_manager → water_column_sonar_processing/aws}/s3fs_manager.py +13 -19
- {aws_manager → water_column_sonar_processing/aws}/sns_manager.py +10 -21
- {aws_manager → water_column_sonar_processing/aws}/sqs_manager.py +10 -18
- water_column_sonar_processing/cruise/__init__.py +4 -0
- {cruise → water_column_sonar_processing/cruise}/create_empty_zarr_store.py +62 -44
- {cruise → water_column_sonar_processing/cruise}/resample_regrid.py +117 -66
- water_column_sonar_processing/geometry/__init__.py +5 -0
- {geometry_manager → water_column_sonar_processing/geometry}/geometry_manager.py +80 -49
- {geometry_manager → water_column_sonar_processing/geometry}/geometry_simplification.py +13 -12
- {geometry_manager → water_column_sonar_processing/geometry}/pmtile_generation.py +25 -24
- water_column_sonar_processing/index/__init__.py +3 -0
- {index_manager → water_column_sonar_processing/index}/index_manager.py +106 -82
- water_column_sonar_processing/model/__init__.py +3 -0
- {zarr_manager → water_column_sonar_processing/model}/zarr_manager.py +119 -83
- water_column_sonar_processing/process.py +147 -0
- water_column_sonar_processing/utility/__init__.py +6 -0
- {utility → water_column_sonar_processing/utility}/cleaner.py +6 -7
- water_column_sonar_processing/utility/constants.py +63 -0
- {utility → water_column_sonar_processing/utility}/pipeline_status.py +37 -10
- {utility → water_column_sonar_processing/utility}/timestamp.py +3 -2
- {water_column_sonar_processing-0.0.4.dist-info → water_column_sonar_processing-0.0.6.dist-info}/METADATA +31 -1
- water_column_sonar_processing-0.0.6.dist-info/RECORD +29 -0
- water_column_sonar_processing-0.0.6.dist-info/top_level.txt +1 -0
- __init__.py +0 -0
- aws_manager/__init__.py +0 -4
- cruise/__init__.py +0 -0
- geometry_manager/__init__.py +0 -0
- index_manager/__init__.py +0 -0
- model.py +0 -140
- utility/__init__.py +0 -0
- utility/constants.py +0 -56
- water_column_sonar_processing-0.0.4.dist-info/RECORD +0 -29
- water_column_sonar_processing-0.0.4.dist-info/top_level.txt +0 -8
- zarr_manager/__init__.py +0 -0
- {water_column_sonar_processing-0.0.4.dist-info → water_column_sonar_processing-0.0.6.dist-info}/LICENSE +0 -0
- {water_column_sonar_processing-0.0.4.dist-info → water_column_sonar_processing-0.0.6.dist-info}/WHEEL +0 -0
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
|
-
|
|
2
|
+
|
|
3
3
|
import geopandas
|
|
4
|
+
import numpy as np
|
|
4
5
|
import pandas as pd
|
|
5
6
|
|
|
6
|
-
from
|
|
7
|
-
from
|
|
7
|
+
from water_column_sonar_processing.aws.s3_manager import S3Manager
|
|
8
|
+
from water_column_sonar_processing.utility.cleaner import Cleaner
|
|
8
9
|
|
|
9
10
|
"""
|
|
10
11
|
// [Decimal / Places / Degrees / Object that can be recognized at scale / N/S or E/W at equator, E/W at 23N/S, E/W at 45N/S, E/W at 67N/S]
|
|
@@ -22,28 +23,32 @@ from aws_manager.s3_manager import S3Manager
|
|
|
22
23
|
class GeometryManager:
|
|
23
24
|
#######################################################
|
|
24
25
|
def __init__(
|
|
25
|
-
|
|
26
|
+
self,
|
|
26
27
|
):
|
|
27
28
|
self.DECIMAL_PRECISION = 5 # precision for GPS coordinates
|
|
28
29
|
self.SIMPLIFICATION_TOLERANCE = 0.0001 # RDP simplification to street level
|
|
29
30
|
|
|
30
31
|
#######################################################
|
|
31
32
|
def read_echodata_gps_data(
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
33
|
+
self,
|
|
34
|
+
echodata,
|
|
35
|
+
ship_name,
|
|
36
|
+
cruise_name,
|
|
37
|
+
sensor_name,
|
|
38
|
+
file_name,
|
|
39
|
+
write_geojson=True,
|
|
39
40
|
) -> tuple:
|
|
40
41
|
file_name_stem = Path(file_name).stem
|
|
41
42
|
geo_json_name = f"{file_name_stem}.json"
|
|
42
43
|
|
|
43
|
-
print(
|
|
44
|
+
print("Getting GPS data from echopype object.")
|
|
44
45
|
try:
|
|
45
|
-
latitude = np.round(
|
|
46
|
-
|
|
46
|
+
latitude = np.round(
|
|
47
|
+
echodata.platform.latitude.values, self.DECIMAL_PRECISION
|
|
48
|
+
)
|
|
49
|
+
longitude = np.round(
|
|
50
|
+
echodata.platform.longitude.values, self.DECIMAL_PRECISION
|
|
51
|
+
)
|
|
47
52
|
|
|
48
53
|
# RE: time coordinates: https://github.com/OSOceanAcoustics/echopype/issues/656#issue-1219104771
|
|
49
54
|
# 'nmea_times' are times from the nmea datalogger associated with GPS
|
|
@@ -54,10 +59,15 @@ class GeometryManager:
|
|
|
54
59
|
time1 = echodata.environment.time1.values
|
|
55
60
|
|
|
56
61
|
if len(nmea_times) < len(time1):
|
|
57
|
-
raise Exception(
|
|
62
|
+
raise Exception(
|
|
63
|
+
"Problem: Not enough NMEA times available to extrapolate time1."
|
|
64
|
+
)
|
|
58
65
|
|
|
59
66
|
# Align 'sv_times' to 'nmea_times'
|
|
60
|
-
if not (
|
|
67
|
+
if not (
|
|
68
|
+
np.all(time1[:-1] <= time1[1:])
|
|
69
|
+
and np.all(nmea_times[:-1] <= nmea_times[1:])
|
|
70
|
+
):
|
|
61
71
|
raise Exception("Problem: NMEA times are not sorted.")
|
|
62
72
|
|
|
63
73
|
# Finds the indices where 'v' can be inserted just to the right of 'a'
|
|
@@ -67,65 +77,83 @@ class GeometryManager:
|
|
|
67
77
|
lon = longitude[indices]
|
|
68
78
|
lon[indices < 0] = np.nan
|
|
69
79
|
|
|
70
|
-
if not (
|
|
80
|
+
if not (
|
|
81
|
+
np.all(lat[~np.isnan(lat)] >= -90.0)
|
|
82
|
+
and np.all(lat[~np.isnan(lat)] <= 90.0)
|
|
83
|
+
and np.all(lon[~np.isnan(lon)] >= -180.0)
|
|
84
|
+
and np.all(lon[~np.isnan(lon)] <= 180.0)
|
|
85
|
+
):
|
|
71
86
|
raise Exception("Problem: GPS Data falls outside allowed bounds.")
|
|
72
87
|
|
|
73
88
|
# check for visits to null island
|
|
74
89
|
null_island_indices = list(
|
|
75
|
-
set.intersection(
|
|
90
|
+
set.intersection(
|
|
91
|
+
set(np.where(np.abs(lat) < 1e-3)[0]),
|
|
92
|
+
set(np.where(np.abs(lon) < 1e-3)[0]),
|
|
93
|
+
)
|
|
76
94
|
)
|
|
77
95
|
lat[null_island_indices] = np.nan
|
|
78
96
|
lon[null_island_indices] = np.nan
|
|
79
97
|
|
|
80
98
|
# create requirement for minimum linestring size
|
|
81
|
-
MIN_ALLOWED_SIZE =
|
|
82
|
-
|
|
99
|
+
MIN_ALLOWED_SIZE = (
|
|
100
|
+
4 # don't want to process files with less than 4 data points
|
|
101
|
+
)
|
|
102
|
+
if (
|
|
103
|
+
len(lat[~np.isnan(lat)]) < MIN_ALLOWED_SIZE
|
|
104
|
+
or len(lon[~np.isnan(lon)]) < MIN_ALLOWED_SIZE
|
|
105
|
+
):
|
|
83
106
|
raise Exception(
|
|
84
107
|
f"There was not enough data in lat or lon to create geojson, {len(lat[~np.isnan(lat)])} found, less than {MIN_ALLOWED_SIZE}."
|
|
85
108
|
)
|
|
86
109
|
|
|
87
110
|
# https://osoceanacoustics.github.io/echopype-examples/echopype_tour.html
|
|
88
|
-
gps_df =
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
111
|
+
gps_df = (
|
|
112
|
+
pd.DataFrame({"latitude": lat, "longitude": lon, "time": time1})
|
|
113
|
+
.set_index(["time"])
|
|
114
|
+
.fillna(0)
|
|
115
|
+
)
|
|
93
116
|
|
|
94
117
|
# Note: We set np.nan to 0,0 so downstream missing values can be omitted
|
|
95
118
|
gps_gdf = geopandas.GeoDataFrame(
|
|
96
119
|
gps_df,
|
|
97
120
|
geometry=geopandas.points_from_xy(
|
|
98
|
-
gps_df[
|
|
99
|
-
gps_df['latitude']
|
|
121
|
+
gps_df["longitude"], gps_df["latitude"]
|
|
100
122
|
),
|
|
101
|
-
crs="epsg:4326"
|
|
123
|
+
crs="epsg:4326",
|
|
102
124
|
)
|
|
103
125
|
# Note: We set np.nan to 0,0 so downstream missing values can be omitted
|
|
104
126
|
|
|
105
127
|
geo_json_line = gps_gdf.to_json()
|
|
106
128
|
if write_geojson:
|
|
107
|
-
print(
|
|
129
|
+
print("Creating local copy of geojson file.")
|
|
108
130
|
with open(geo_json_name, "w") as write_file:
|
|
109
131
|
write_file.write(geo_json_line)
|
|
110
132
|
|
|
111
|
-
geo_json_prefix =
|
|
133
|
+
geo_json_prefix = (
|
|
134
|
+
f"spatial/geojson/{ship_name}/{cruise_name}/{sensor_name}"
|
|
135
|
+
)
|
|
112
136
|
|
|
113
|
-
print(
|
|
137
|
+
print("Checking s3 and deleting any existing GeoJSON file.")
|
|
114
138
|
s3_manager = S3Manager()
|
|
115
|
-
s3_objects = s3_manager.list_nodd_objects(
|
|
139
|
+
s3_objects = s3_manager.list_nodd_objects(
|
|
140
|
+
prefix=f"{geo_json_prefix}/{geo_json_name}"
|
|
141
|
+
)
|
|
116
142
|
if len(s3_objects) > 0:
|
|
117
|
-
print(
|
|
143
|
+
print(
|
|
144
|
+
"GeoJSON already exists in s3, deleting existing and continuing."
|
|
145
|
+
)
|
|
118
146
|
s3_manager.delete_nodd_objects(objects=s3_objects)
|
|
119
147
|
|
|
120
|
-
print(
|
|
148
|
+
print("Upload GeoJSON to s3.")
|
|
121
149
|
s3_manager.upload_nodd_file(
|
|
122
150
|
file_name=geo_json_name, # file_name
|
|
123
|
-
key=f"{geo_json_prefix}/{geo_json_name}" # key
|
|
151
|
+
key=f"{geo_json_prefix}/{geo_json_name}", # key
|
|
124
152
|
)
|
|
125
153
|
|
|
126
154
|
# TODO: delete geo_json file
|
|
127
155
|
cleaner = Cleaner()
|
|
128
|
-
cleaner.delete_local_files(file_types=[
|
|
156
|
+
cleaner.delete_local_files(file_types=["*.json"])
|
|
129
157
|
|
|
130
158
|
#################################################################
|
|
131
159
|
# TODO: simplify with shapely
|
|
@@ -144,7 +172,9 @@ class GeometryManager:
|
|
|
144
172
|
#################################################################
|
|
145
173
|
# GeoJSON FeatureCollection with IDs as "time"
|
|
146
174
|
except Exception as err:
|
|
147
|
-
print(
|
|
175
|
+
print(
|
|
176
|
+
f"Exception encountered extracting gps coordinates creating geojson: {err}"
|
|
177
|
+
)
|
|
148
178
|
raise
|
|
149
179
|
# Note: returned lat/lon values can include np.nan because they need to be aligned with
|
|
150
180
|
# the Sv data! GeoJSON needs simplification but has been filtered.
|
|
@@ -154,12 +184,12 @@ class GeometryManager:
|
|
|
154
184
|
|
|
155
185
|
#######################################################
|
|
156
186
|
def read_s3_geo_json(
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
187
|
+
self,
|
|
188
|
+
ship_name,
|
|
189
|
+
cruise_name,
|
|
190
|
+
sensor_name,
|
|
191
|
+
file_name_stem,
|
|
192
|
+
input_xr_zarr_store,
|
|
163
193
|
):
|
|
164
194
|
try:
|
|
165
195
|
s3_manager = S3Manager()
|
|
@@ -170,25 +200,26 @@ class GeometryManager:
|
|
|
170
200
|
file_name_stem=file_name_stem,
|
|
171
201
|
)
|
|
172
202
|
###
|
|
173
|
-
geospatial = geopandas.GeoDataFrame.from_features(
|
|
174
|
-
|
|
175
|
-
)
|
|
203
|
+
geospatial = geopandas.GeoDataFrame.from_features(
|
|
204
|
+
geo_json["features"]
|
|
205
|
+
).set_index(pd.json_normalize(geo_json["features"])["id"].values)
|
|
176
206
|
null_island_indices = list(
|
|
177
207
|
set.intersection(
|
|
178
208
|
set(np.where(np.abs(geospatial.latitude.values) < 1e-3)[0]),
|
|
179
|
-
set(np.where(np.abs(geospatial.longitude.values) < 1e-3)[0])
|
|
209
|
+
set(np.where(np.abs(geospatial.longitude.values) < 1e-3)[0]),
|
|
180
210
|
)
|
|
181
211
|
)
|
|
182
212
|
geospatial.iloc[null_island_indices] = np.nan
|
|
183
213
|
###
|
|
184
|
-
geospatial_index = geospatial.dropna().index.values.astype(
|
|
214
|
+
geospatial_index = geospatial.dropna().index.values.astype("datetime64[ns]")
|
|
185
215
|
aa = input_xr_zarr_store.ping_time.values.tolist()
|
|
186
216
|
vv = geospatial_index.tolist()
|
|
187
217
|
indices = np.searchsorted(a=aa, v=vv)
|
|
188
218
|
|
|
189
219
|
return indices, geospatial
|
|
190
220
|
except Exception as err: # Failure
|
|
191
|
-
print(f
|
|
221
|
+
print(f"Exception encountered reading s3 GeoJSON: {err}")
|
|
192
222
|
raise
|
|
193
223
|
|
|
224
|
+
|
|
194
225
|
###########################################################
|
|
@@ -31,51 +31,52 @@
|
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
"""
|
|
34
|
-
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class GeometrySimplification:
|
|
35
37
|
# TODO: in the future move to standalone library
|
|
36
38
|
#######################################################
|
|
37
39
|
def __init__(
|
|
38
|
-
|
|
40
|
+
self,
|
|
39
41
|
):
|
|
40
42
|
pass
|
|
41
43
|
|
|
42
44
|
#######################################################
|
|
43
45
|
def speed_check(
|
|
44
|
-
|
|
45
|
-
|
|
46
|
+
self,
|
|
47
|
+
speed_knots=50,
|
|
46
48
|
) -> None:
|
|
47
49
|
print(speed_knots)
|
|
48
50
|
pass
|
|
49
51
|
|
|
50
52
|
def remove_null_island_values(
|
|
51
|
-
|
|
52
|
-
|
|
53
|
+
self,
|
|
54
|
+
epsilon=1e-5,
|
|
53
55
|
) -> None:
|
|
54
56
|
print(epsilon)
|
|
55
57
|
pass
|
|
56
58
|
|
|
57
59
|
def stream_geometry(
|
|
58
|
-
|
|
60
|
+
self,
|
|
59
61
|
) -> None:
|
|
60
62
|
pass
|
|
61
63
|
|
|
62
64
|
def break_linestring_into_multi_linestring(
|
|
63
|
-
|
|
65
|
+
self,
|
|
64
66
|
) -> None:
|
|
65
67
|
# For any line-strings across the antimeridian, break into multilinestring
|
|
66
68
|
pass
|
|
67
69
|
|
|
68
70
|
def simplify(
|
|
69
|
-
|
|
71
|
+
self,
|
|
70
72
|
) -> None:
|
|
71
73
|
pass
|
|
72
74
|
|
|
73
|
-
def kalman_filter(
|
|
74
|
-
self
|
|
75
|
-
):
|
|
75
|
+
def kalman_filter(self):
|
|
76
76
|
# for cruises with bad signal, filter so that
|
|
77
77
|
pass
|
|
78
78
|
|
|
79
79
|
#######################################################
|
|
80
80
|
|
|
81
|
+
|
|
81
82
|
###########################################################
|
|
@@ -1,29 +1,25 @@
|
|
|
1
|
-
# import json
|
|
2
|
-
# This is a sample Python script.
|
|
3
|
-
import pandas as pd
|
|
4
|
-
# import numpy as np
|
|
5
1
|
import os
|
|
6
|
-
# from glob import glob
|
|
7
2
|
from pathlib import Path
|
|
8
|
-
|
|
9
|
-
# import shapely
|
|
10
|
-
from shapely.geometry import LineString
|
|
3
|
+
|
|
11
4
|
# from shapely import wkt
|
|
12
5
|
# import json
|
|
13
6
|
# from shapely.geometry import shape, GeometryCollection
|
|
14
7
|
import fiona
|
|
8
|
+
import geopandas
|
|
9
|
+
import pandas as pd
|
|
10
|
+
from shapely.geometry import LineString
|
|
15
11
|
|
|
16
12
|
|
|
17
13
|
class PMTileGeneration(object):
|
|
18
14
|
#######################################################
|
|
19
15
|
def __init__(
|
|
20
|
-
|
|
16
|
+
self,
|
|
21
17
|
):
|
|
22
|
-
|
|
18
|
+
print("123")
|
|
23
19
|
|
|
24
20
|
#######################################################
|
|
25
21
|
def generate_geojson_feature_collection(self):
|
|
26
|
-
# This was used to read from noaa-wcsd-
|
|
22
|
+
# This was used to read from noaa-wcsd-model-pds bucket geojson files and then to
|
|
27
23
|
# generate the geopandas dataframe which could be exported to another comprehensive
|
|
28
24
|
# geojson file. That
|
|
29
25
|
result = list(Path("/Users/r2d2/Documents/echofish/geojson").rglob("*.json"))
|
|
@@ -33,30 +29,34 @@ class PMTileGeneration(object):
|
|
|
33
29
|
for iii in range(len(result)):
|
|
34
30
|
file_name = os.path.normpath(result[iii]).split(os.sep)[-1]
|
|
35
31
|
file_stem = os.path.splitext(os.path.basename(file_name))[0]
|
|
36
|
-
geom = geopandas.read_file(result[iii]).iloc[0][
|
|
32
|
+
geom = geopandas.read_file(result[iii]).iloc[0]["geometry"]
|
|
37
33
|
# TDOO: Filter (0,0) coordinates
|
|
38
34
|
if len(geom.coords.xy[0]) < 2:
|
|
39
35
|
continue
|
|
40
36
|
geom = LineString(list(zip(geom.coords.xy[1], geom.coords.xy[0])))
|
|
41
|
-
pieces.append(
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
37
|
+
pieces.append(
|
|
38
|
+
{
|
|
39
|
+
"ship_name": os.path.normpath(result[iii]).split(os.sep)[-4],
|
|
40
|
+
"cruise_name": os.path.normpath(result[iii]).split(os.sep)[-3],
|
|
41
|
+
"file_stem": file_stem,
|
|
42
|
+
"file_path": result[iii],
|
|
43
|
+
"geom": geom,
|
|
44
|
+
}
|
|
45
|
+
)
|
|
48
46
|
df = pd.DataFrame(pieces)
|
|
49
47
|
print(df)
|
|
50
48
|
gps_gdf = geopandas.GeoDataFrame(
|
|
51
|
-
data=df[
|
|
52
|
-
|
|
53
|
-
|
|
49
|
+
data=df[
|
|
50
|
+
["ship_name", "cruise_name", "file_stem"]
|
|
51
|
+
], # try again with file_stem
|
|
52
|
+
geometry=df["geom"],
|
|
53
|
+
crs="EPSG:4326",
|
|
54
54
|
)
|
|
55
55
|
print(fiona.supported_drivers)
|
|
56
56
|
# gps_gdf.to_file('dataframe.shp', crs='epsg:4326')
|
|
57
57
|
# Convert geojson feature collection to pmtiles
|
|
58
|
-
gps_gdf.to_file(
|
|
59
|
-
print(
|
|
58
|
+
gps_gdf.to_file("dataframe.geojson", driver="GeoJSON", crs="epsg:4326")
|
|
59
|
+
print("done")
|
|
60
60
|
"""
|
|
61
61
|
# need to eliminate visits to null island
|
|
62
62
|
tippecanoe --no-feature-limit -zg --projection=EPSG:4326 -o dataframe.pmtiles -l cruises dataframe.geojson
|
|
@@ -71,4 +71,5 @@ class PMTileGeneration(object):
|
|
|
71
71
|
|
|
72
72
|
#######################################################
|
|
73
73
|
|
|
74
|
+
|
|
74
75
|
###########################################################
|