water-column-sonar-annotation 26.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tests/__init__.py +0 -0
- tests/astronomical/__init__.py +0 -0
- tests/astronomical/test_astronomical_manager.py +148 -0
- tests/conftest.py +60 -0
- tests/cruise/__init__.py +0 -0
- tests/cruise/test_cruise_manager.py +80 -0
- tests/geospatial/__init__.py +0 -0
- tests/geospatial/test_geospatial_manager.py +86 -0
- tests/record/test_echoview_record_manager.py +160 -0
- water_column_sonar_annotation/__init__.py +5 -0
- water_column_sonar_annotation/astronomical/__init__.py +5 -0
- water_column_sonar_annotation/astronomical/astronomical_manager.py +82 -0
- water_column_sonar_annotation/cruise/__init__.py +5 -0
- water_column_sonar_annotation/cruise/cruise_manager.py +104 -0
- water_column_sonar_annotation/geospatial/__init__.py +5 -0
- water_column_sonar_annotation/geospatial/geospatial_manager.py +143 -0
- water_column_sonar_annotation/record/__init__.py +9 -0
- water_column_sonar_annotation/record/echoview_record_manager.py +426 -0
- water_column_sonar_annotation/record/graph_record_manager.py +82 -0
- water_column_sonar_annotation/record/parquet_record_manager.py +83 -0
- water_column_sonar_annotation/shape/__init__.py +5 -0
- water_column_sonar_annotation/shape/shape_manager.py +29 -0
- water_column_sonar_annotation-26.1.8.dist-info/METADATA +109 -0
- water_column_sonar_annotation-26.1.8.dist-info/RECORD +27 -0
- water_column_sonar_annotation-26.1.8.dist-info/WHEEL +5 -0
- water_column_sonar_annotation-26.1.8.dist-info/licenses/LICENSE +21 -0
- water_column_sonar_annotation-26.1.8.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,426 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import itertools
|
|
3
|
+
from os import listdir
|
|
4
|
+
from os.path import isfile, join
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import pandas as pd
|
|
9
|
+
|
|
10
|
+
from water_column_sonar_annotation.astronomical import AstronomicalManager
|
|
11
|
+
from water_column_sonar_annotation.cruise import CruiseManager
|
|
12
|
+
from water_column_sonar_annotation.geospatial import GeospatialManager
|
|
13
|
+
from water_column_sonar_annotation.record.graph_record_manager import (
|
|
14
|
+
GraphRecordManager,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
# from water_column_sonar_annotation.record import EchofishRecordManager
|
|
18
|
+
# from water_column_sonar_annotation.record import GRecordManager
|
|
19
|
+
|
|
20
|
+
"""
|
|
21
|
+
Documentation for echoview record files in EVR format:
|
|
22
|
+
https://support.echoview.com/WebHelp/Reference/File_Formats/Export_File_Formats/2D_Region_definition_file_format.htm
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def chunks(lst, n):
|
|
27
|
+
"""Yield strings from n-sized chunks from lst."""
|
|
28
|
+
for i in range(0, len(lst), n):
|
|
29
|
+
# yield lst[i:i + n]
|
|
30
|
+
yield " ".join(lst[i : i + n])
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class EchoviewRecordManager:
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
):
|
|
37
|
+
print("__init__ called")
|
|
38
|
+
self.region_creation_type = { # Data formats — The region creation type is one of the following
|
|
39
|
+
"-1": "No type",
|
|
40
|
+
"0": "Created from a selection made using the horizontal band tool horizontal selection tool",
|
|
41
|
+
"1": "Created from a selection made using the parallelogram tool parallelogram tool",
|
|
42
|
+
"2": "Created from a selection made using the polygon tool polygon selection tool",
|
|
43
|
+
"3": "Created from a selection made using the rectangle tool rectangle tool",
|
|
44
|
+
"4": "Created from a selection made using the vertical band tool vertical selection tool",
|
|
45
|
+
"5": "Created as a bottom-relative region or line-relative region",
|
|
46
|
+
"6": "Created or assigned as Marker region.",
|
|
47
|
+
"7": "Created using the Detect Schools command",
|
|
48
|
+
"8": "Invalid or unknown region type",
|
|
49
|
+
"9": "Created as a fish track region",
|
|
50
|
+
}
|
|
51
|
+
self.region_type = {
|
|
52
|
+
"0": "bad (no data)",
|
|
53
|
+
"1": "analysis",
|
|
54
|
+
"2": "marker",
|
|
55
|
+
"3": "fishtracks",
|
|
56
|
+
"4": "bad (empty water)",
|
|
57
|
+
}
|
|
58
|
+
self.evr_region_classifications = [
|
|
59
|
+
"possible_herring",
|
|
60
|
+
"atlantic_herring",
|
|
61
|
+
"fish_school",
|
|
62
|
+
"Unclassified regions", # TODO: per CWB continue to include this
|
|
63
|
+
"krill_schools", # excluding this field because of unknowns
|
|
64
|
+
"AH_School",
|
|
65
|
+
]
|
|
66
|
+
self.all_records_df = pd.DataFrame() # columns=["filename", "start_time"])
|
|
67
|
+
#
|
|
68
|
+
self.astronomical_manager = AstronomicalManager()
|
|
69
|
+
self.cruise_manager = CruiseManager()
|
|
70
|
+
self.geospatial_manager = GeospatialManager()
|
|
71
|
+
|
|
72
|
+
def __enter__(self):
|
|
73
|
+
print("__enter__ called")
|
|
74
|
+
return self
|
|
75
|
+
|
|
76
|
+
def __exit__(self, *a):
|
|
77
|
+
print("__exit__ called")
|
|
78
|
+
|
|
79
|
+
"""
|
|
80
|
+
# evr_region_structure_version = bbox_split[0] # "13" (will be incremented if the region structure changes in future versions)
|
|
81
|
+
# evr_point_count = bbox_split[1] # Number of points in the region
|
|
82
|
+
# evr_region_id = # Unique number for each region. Specify sequential numbers starting at 1 if creating a new file
|
|
83
|
+
# evr_selected = # "0" (always)
|
|
84
|
+
# evr_region_creation_type = # See "Data formats" definition
|
|
85
|
+
# evr_dummy = # Should always be "-1"
|
|
86
|
+
# evr_bounding_rectangle_calculated = # "1" if the next four fields are valid; "0" otherwise
|
|
87
|
+
# evr_left_x_value_of_bounding_rectangle = # Date and time of left boundary of bounding rectangle – ignored when importing into Echoview. See "Point 1" in table below.
|
|
88
|
+
# evr_top_y_value_of_bounding_rectangle = # Upper depth coordinate of bounding rectangle – ignored when importing into Echoview. See "Point 1" in table below.
|
|
89
|
+
# evr_right_x_value_of_bounding_rectangle = # Date and time of right boundary of bounding rectangle – ignored when importing into Echoview. See "Point 1" in table below.
|
|
90
|
+
# evr_bottom_y_value_of_bounding_rectangle = # Lower depth coordinate of bounding rectangle – ignored when importing into Echoview. See "Point 1" in table below.
|
|
91
|
+
# evr_number_of_lines_of_notes = # The number of lines of region notes to follow.
|
|
92
|
+
# evr_region_notes = # Notes associated with the region. Maximum length is 2048 characters. Embedded CR characters are encoded as hexadecimal FF. Embedded LF characters are encoded as hexadecimal FE.
|
|
93
|
+
# evr_number_of_lines_of_detection_settings = # The number of lines of detection settings to follow.
|
|
94
|
+
# evr_region_detection_settings = # The detection settings as defined in the Fish Track Detection Properties dialog box or Detect Schools dialog box.
|
|
95
|
+
# evr_region_classification = # Region classification (string). Default value is "Unclassified regions"
|
|
96
|
+
# evr_points = # Data for first point – See Data formats below. These data are used to bound the region when importing into Echoview
|
|
97
|
+
# evr_region_type = # "0" = bad (no data); "1" = analysis; "2" = marker, "3" = fishtracks; "4" = bad (empty water);
|
|
98
|
+
# evr_region_name = # String
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
# TODO:
|
|
102
|
+
# [2] write df to parquet and tag as github resource
|
|
103
|
+
|
|
104
|
+
@staticmethod
|
|
105
|
+
def process_datetime_string(
|
|
106
|
+
date_string: str,
|
|
107
|
+
time_string: str,
|
|
108
|
+
):
|
|
109
|
+
"""Returns time in UTC from strings '20190925' and '2053458953'"""
|
|
110
|
+
# np.datetime64()
|
|
111
|
+
return pd.to_datetime(f"{date_string} {time_string}", format="%Y%m%d %H%M%S%f")
|
|
112
|
+
|
|
113
|
+
def process_vertice(
|
|
114
|
+
self,
|
|
115
|
+
date_string: str,
|
|
116
|
+
time_string: str,
|
|
117
|
+
depth: float,
|
|
118
|
+
) -> tuple:
|
|
119
|
+
dt = self.process_datetime_string(date_string, time_string)
|
|
120
|
+
# print(dt.value) # is epoch time in nanoseconds
|
|
121
|
+
return dt, dt.value, np.round(depth, 2)
|
|
122
|
+
|
|
123
|
+
def process_evr_record(
|
|
124
|
+
self,
|
|
125
|
+
evr_record: str,
|
|
126
|
+
filename: str,
|
|
127
|
+
):
|
|
128
|
+
try:
|
|
129
|
+
#########################################################
|
|
130
|
+
record_lines = [x for x in evr_record.split("\n") if x]
|
|
131
|
+
############# get bbox #############
|
|
132
|
+
bbox_split = record_lines[0].split() # [x for x in record.split() if x]
|
|
133
|
+
#########################################################
|
|
134
|
+
# https://support.echoview.com/WebHelp/Reference/File_Formats/Export_File_Formats/2D_Region_definition_file_format.htm
|
|
135
|
+
#########################################################
|
|
136
|
+
evr_region_structure_version = bbox_split[0]
|
|
137
|
+
if evr_region_structure_version != "13":
|
|
138
|
+
raise Exception("EVR Region Structure Version must be 13")
|
|
139
|
+
#
|
|
140
|
+
evr_point_count = int(bbox_split[1])
|
|
141
|
+
print(f"EVR Point Count: {evr_point_count}")
|
|
142
|
+
#
|
|
143
|
+
evr_region_id = int(bbox_split[2])
|
|
144
|
+
print(f"EVR Region: {evr_region_id}")
|
|
145
|
+
#
|
|
146
|
+
evr_selected = bbox_split[3]
|
|
147
|
+
if evr_selected != "0":
|
|
148
|
+
raise Exception("EVR Selected must be 13")
|
|
149
|
+
#
|
|
150
|
+
evr_region_creation_type = bbox_split[4] # See "Data formats" definition
|
|
151
|
+
print(
|
|
152
|
+
f"EVR region creation type: {self.region_creation_type[evr_region_creation_type]}"
|
|
153
|
+
)
|
|
154
|
+
#
|
|
155
|
+
evr_dummy = bbox_split[5] # Should always be "-1"
|
|
156
|
+
if evr_dummy != "-1":
|
|
157
|
+
raise Exception("EVR Dummy Should always be -1")
|
|
158
|
+
#
|
|
159
|
+
### "1" if the next four fields are valid; "0" otherwise ###
|
|
160
|
+
evr_bounding_rectangle_calculated = bbox_split[6]
|
|
161
|
+
evr_left_x_value_of_bounding_rectangle = None
|
|
162
|
+
evr_top_y_value_of_bounding_rectangle = None
|
|
163
|
+
evr_right_x_value_of_bounding_rectangle = None
|
|
164
|
+
evr_bottom_y_value_of_bounding_rectangle = None
|
|
165
|
+
if evr_bounding_rectangle_calculated == "1":
|
|
166
|
+
# Date and time of left boundary of bounding rectangle – ignored when importing into Echoview. See "Point 1" in table below.
|
|
167
|
+
# '20190925 2053458953' <-- TODO: format into datetime
|
|
168
|
+
evr_left_x_value_of_bounding_rectangle = self.process_datetime_string(
|
|
169
|
+
bbox_split[7], bbox_split[8]
|
|
170
|
+
)
|
|
171
|
+
# Upper depth coordinate of bounding rectangle – ignored when importing into Echoview. See "Point 1" in table below.
|
|
172
|
+
evr_top_y_value_of_bounding_rectangle = float(bbox_split[9])
|
|
173
|
+
# Date and time of right boundary of bounding rectangle – ignored when importing into Echoview. See "Point 1" in table below.
|
|
174
|
+
evr_right_x_value_of_bounding_rectangle = self.process_datetime_string(
|
|
175
|
+
bbox_split[10], bbox_split[11]
|
|
176
|
+
)
|
|
177
|
+
# Lower depth coordinate of bounding rectangle – ignored when importing into Echoview. See "Point 1" in table below.
|
|
178
|
+
evr_bottom_y_value_of_bounding_rectangle = float(bbox_split[12])
|
|
179
|
+
print(
|
|
180
|
+
f"{evr_left_x_value_of_bounding_rectangle.isoformat()}, {evr_top_y_value_of_bounding_rectangle}, {evr_right_x_value_of_bounding_rectangle.isoformat()}, {evr_bottom_y_value_of_bounding_rectangle}"
|
|
181
|
+
)
|
|
182
|
+
# making sure times are in-order
|
|
183
|
+
if (
|
|
184
|
+
evr_left_x_value_of_bounding_rectangle
|
|
185
|
+
> evr_right_x_value_of_bounding_rectangle
|
|
186
|
+
):
|
|
187
|
+
raise Exception("Timestamps out of order!")
|
|
188
|
+
#
|
|
189
|
+
offset_index = 0
|
|
190
|
+
### The number of lines of region notes to follow. ###
|
|
191
|
+
evr_number_of_lines_of_notes = int(record_lines[1])
|
|
192
|
+
print(f"Number of region notes: {evr_number_of_lines_of_notes}")
|
|
193
|
+
### Notes associated with the region. Maximum length is 2048 characters. Embedded CR characters are encoded as hexadecimal FF. Embedded LF characters are encoded as hexadecimal FE. ###
|
|
194
|
+
if evr_number_of_lines_of_notes > 0:
|
|
195
|
+
offset_index = offset_index + evr_number_of_lines_of_notes + 1
|
|
196
|
+
evr_region_notes = record_lines[1:offset_index]
|
|
197
|
+
print(f"Region notes: {evr_region_notes}")
|
|
198
|
+
#
|
|
199
|
+
### The number of lines of detection settings to follow. ###
|
|
200
|
+
evr_number_of_lines_of_detection_settings = int(
|
|
201
|
+
record_lines[2 + offset_index]
|
|
202
|
+
)
|
|
203
|
+
print(
|
|
204
|
+
f"Number of lines of detection settings: {evr_number_of_lines_of_detection_settings}"
|
|
205
|
+
)
|
|
206
|
+
### The detection settings as defined in the Fish Track Detection Properties dialog box or Detect Schools dialog box. ###
|
|
207
|
+
if evr_number_of_lines_of_detection_settings > 0:
|
|
208
|
+
offset_index = (
|
|
209
|
+
evr_number_of_lines_of_notes
|
|
210
|
+
+ evr_number_of_lines_of_detection_settings
|
|
211
|
+
+ 3
|
|
212
|
+
)
|
|
213
|
+
evr_region_detection_settings = record_lines[3:offset_index]
|
|
214
|
+
print(f"Region detection settings: {evr_region_detection_settings}")
|
|
215
|
+
#
|
|
216
|
+
### Region classification (string). Default value is "Unclassified regions" ###
|
|
217
|
+
evr_region_classification = record_lines[-3]
|
|
218
|
+
if evr_region_classification not in self.evr_region_classifications:
|
|
219
|
+
raise Exception(
|
|
220
|
+
f"Problem, unknown region classification: {evr_region_classification}"
|
|
221
|
+
)
|
|
222
|
+
print(f"Region classification: {evr_region_classification}")
|
|
223
|
+
#
|
|
224
|
+
# TODO: If the data has krill, skip creating a record of it
|
|
225
|
+
if evr_region_classification == "krill_schools":
|
|
226
|
+
print("Krill, skipping!!!")
|
|
227
|
+
return
|
|
228
|
+
#
|
|
229
|
+
# Data for first point – See Data formats below. These data are used to bound the region when importing into Echoview
|
|
230
|
+
evr_points = [x for x in record_lines[-2].split(" ") if x][:-1]
|
|
231
|
+
# print(f"EVR points: {evr_points}") # TODO: strip last entry
|
|
232
|
+
#
|
|
233
|
+
evr_point_chunks = list(itertools.batched(evr_points, 3))
|
|
234
|
+
for evr_point_chunk in evr_point_chunks:
|
|
235
|
+
processed_point = self.process_vertice(
|
|
236
|
+
date_string=evr_point_chunk[0],
|
|
237
|
+
time_string=evr_point_chunk[1],
|
|
238
|
+
depth=float(evr_point_chunk[2]),
|
|
239
|
+
)
|
|
240
|
+
print(processed_point)
|
|
241
|
+
#
|
|
242
|
+
if len(evr_points) != evr_point_count * 3:
|
|
243
|
+
raise Exception("EVR point count does not match expected.")
|
|
244
|
+
#
|
|
245
|
+
# "0" = bad (no data); "1" = analysis; "2" = marker, "3" = fishtracks; "4" = bad (empty water);
|
|
246
|
+
evr_region_type = [x for x in record_lines[-2].split(" ") if x][-1]
|
|
247
|
+
print(f"Region type: {self.region_type[evr_region_type]}")
|
|
248
|
+
# String
|
|
249
|
+
evr_region_name = record_lines[-1]
|
|
250
|
+
print(f"Region name: {evr_region_name}")
|
|
251
|
+
#
|
|
252
|
+
print("get lat lon")
|
|
253
|
+
(latitude, longitude) = self.cruise_manager.get_coordinates(
|
|
254
|
+
start_time=evr_left_x_value_of_bounding_rectangle.isoformat(),
|
|
255
|
+
end_time=evr_right_x_value_of_bounding_rectangle.isoformat(),
|
|
256
|
+
)
|
|
257
|
+
print("get local time")
|
|
258
|
+
local_time = self.geospatial_manager.get_local_time(
|
|
259
|
+
iso_time=evr_left_x_value_of_bounding_rectangle.isoformat(),
|
|
260
|
+
latitude=latitude,
|
|
261
|
+
longitude=longitude,
|
|
262
|
+
)
|
|
263
|
+
print("get solar")
|
|
264
|
+
solar_altitude = self.astronomical_manager.get_solar_azimuth(
|
|
265
|
+
iso_time=evr_left_x_value_of_bounding_rectangle.isoformat(),
|
|
266
|
+
latitude=latitude,
|
|
267
|
+
longitude=longitude,
|
|
268
|
+
)
|
|
269
|
+
print("phase_of_day")
|
|
270
|
+
phase_of_day = self.astronomical_manager.phase_of_day(
|
|
271
|
+
iso_time=evr_left_x_value_of_bounding_rectangle.isoformat(),
|
|
272
|
+
latitude=latitude,
|
|
273
|
+
longitude=longitude,
|
|
274
|
+
)
|
|
275
|
+
print("distance")
|
|
276
|
+
distance_from_coastline = ( # Note this takes about 14 seconds each, very slow
|
|
277
|
+
self.geospatial_manager.check_distance_from_coastline(
|
|
278
|
+
latitude=latitude,
|
|
279
|
+
longitude=longitude,
|
|
280
|
+
)
|
|
281
|
+
)
|
|
282
|
+
print("altitude")
|
|
283
|
+
evr_altitude = self.cruise_manager.get_altitude(
|
|
284
|
+
start_time=evr_left_x_value_of_bounding_rectangle.isoformat(),
|
|
285
|
+
end_time=evr_right_x_value_of_bounding_rectangle.isoformat(),
|
|
286
|
+
bbox_max=evr_bottom_y_value_of_bounding_rectangle,
|
|
287
|
+
)
|
|
288
|
+
# #
|
|
289
|
+
# # print("%5.2f, %5.2f, {2}, {3}, {4]" % (latitude, longitude, local_time, solar_altitude, is_daytime, distance_from_coastline, evr_altitude))
|
|
290
|
+
# print(
|
|
291
|
+
# f"{latitude}, {longitude}, {local_time}, {solar_altitude}, {is_daytime}, {distance_from_coastline}, {evr_altitude}"
|
|
292
|
+
# )
|
|
293
|
+
#
|
|
294
|
+
# TODO: need additional infor for provenance --> need to create a unique key for each
|
|
295
|
+
# want a hash of some sort
|
|
296
|
+
# add the region_id (which will recycle from file to file)
|
|
297
|
+
#
|
|
298
|
+
### provenance ###
|
|
299
|
+
geometry_string = record_lines[-2] # inclusive of evr_region_type
|
|
300
|
+
# geometry_string = evr_record # TODO: should i hash the entire record or the geometry?
|
|
301
|
+
geometry_hash = (
|
|
302
|
+
f"{hashlib.sha256(geometry_string.encode('utf-8')).hexdigest()}"
|
|
303
|
+
# f"sha256:{hashlib.sha256(geometry_string.encode('utf-8')).hexdigest()}"
|
|
304
|
+
)
|
|
305
|
+
#
|
|
306
|
+
# parquet_record_manager = ParquetRecordManager()
|
|
307
|
+
# print(parquet_record_manager)
|
|
308
|
+
graph_record_manager = GraphRecordManager(
|
|
309
|
+
classification=evr_region_classification,
|
|
310
|
+
point_count=evr_point_count,
|
|
311
|
+
time_start=evr_left_x_value_of_bounding_rectangle.isoformat(),
|
|
312
|
+
time_end=evr_right_x_value_of_bounding_rectangle.isoformat(),
|
|
313
|
+
depth_min=np.round(evr_top_y_value_of_bounding_rectangle, 2),
|
|
314
|
+
depth_max=np.round(evr_bottom_y_value_of_bounding_rectangle, 2),
|
|
315
|
+
month=evr_left_x_value_of_bounding_rectangle.month, # TODO: UTC Month, maybe change to localtime
|
|
316
|
+
latitude=float(latitude), # TODO: too many digits
|
|
317
|
+
longitude=float(longitude),
|
|
318
|
+
local_time=local_time,
|
|
319
|
+
solar_altitude=solar_altitude,
|
|
320
|
+
phase_of_day=phase_of_day,
|
|
321
|
+
#
|
|
322
|
+
distance_from_coastline=distance_from_coastline,
|
|
323
|
+
altitude=evr_altitude,
|
|
324
|
+
# geometry="P(0, 1)", # TODO: https://hvplot.holoviz.org/en/docs/latest/ref/api/manual/hvplot.hvPlot.polygons.html
|
|
325
|
+
#
|
|
326
|
+
filename=filename, # how do i find in parquet
|
|
327
|
+
region_id=evr_region_id,
|
|
328
|
+
geometry_hash=geometry_hash,
|
|
329
|
+
)
|
|
330
|
+
# print(graph_record_manager.to_json())
|
|
331
|
+
#
|
|
332
|
+
update_df = pd.DataFrame([graph_record_manager.to_dict()])
|
|
333
|
+
self.all_records_df = pd.concat(
|
|
334
|
+
[self.all_records_df, update_df],
|
|
335
|
+
ignore_index=True,
|
|
336
|
+
)
|
|
337
|
+
except Exception as process_evr_record_exception:
|
|
338
|
+
print(f"Problem with process_evr_record: {process_evr_record_exception}")
|
|
339
|
+
finally:
|
|
340
|
+
print("______________________________________done reading_+_+_+_+_+_+_+_+")
|
|
341
|
+
|
|
342
|
+
def process_evr_file(
|
|
343
|
+
self,
|
|
344
|
+
evr_file_path: str = None,
|
|
345
|
+
evr_filename: str = None,
|
|
346
|
+
):
|
|
347
|
+
try:
|
|
348
|
+
print(f"Filename: {evr_filename}")
|
|
349
|
+
with open(evr_file_path + evr_filename, "r") as file:
|
|
350
|
+
lines = file.read()
|
|
351
|
+
|
|
352
|
+
records = lines.split("\n\n")
|
|
353
|
+
records = [i for i in records if i.startswith("13 ")] # filter
|
|
354
|
+
for evr_record in records:
|
|
355
|
+
self.process_evr_record(evr_record=evr_record, filename=evr_filename)
|
|
356
|
+
except Exception as process_evr_file_exception:
|
|
357
|
+
print(
|
|
358
|
+
f"Problem processing file {evr_filename}: {process_evr_file_exception}"
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
def process_evr_directory(self, evr_directory_path="../../data/HB201906/"):
|
|
362
|
+
"""Open evr directory and start to parse files"""
|
|
363
|
+
try:
|
|
364
|
+
all_evr_files = [
|
|
365
|
+
f
|
|
366
|
+
for f in listdir(evr_directory_path)
|
|
367
|
+
if isfile(join(evr_directory_path, f)) and Path(f).suffix == ".evr"
|
|
368
|
+
]
|
|
369
|
+
all_evr_files.sort()
|
|
370
|
+
print(f"Found {len(all_evr_files)} EVR files.")
|
|
371
|
+
for evr_file in all_evr_files[:1]: # TODO: fix this
|
|
372
|
+
self.process_evr_file(
|
|
373
|
+
evr_file_path=evr_directory_path, evr_filename=evr_file
|
|
374
|
+
)
|
|
375
|
+
# I don't have the lat/lon information to draw here... need to query the zarr store...
|
|
376
|
+
print(self.all_records_df)
|
|
377
|
+
self.all_records_df.set_index(
|
|
378
|
+
keys="geometry_hash", drop=False, inplace=True
|
|
379
|
+
)
|
|
380
|
+
# sort by time
|
|
381
|
+
self.all_records_df.sort_values(
|
|
382
|
+
by="time_start",
|
|
383
|
+
axis=0,
|
|
384
|
+
ascending=True,
|
|
385
|
+
inplace=True,
|
|
386
|
+
ignore_index=False,
|
|
387
|
+
)
|
|
388
|
+
print("writing files")
|
|
389
|
+
self.all_records_df.to_parquet(
|
|
390
|
+
path="graph_record.parquet",
|
|
391
|
+
engine="pyarrow",
|
|
392
|
+
compression="snappy",
|
|
393
|
+
index=True,
|
|
394
|
+
partition_cols=None,
|
|
395
|
+
)
|
|
396
|
+
self.all_records_df.to_csv(
|
|
397
|
+
path_or_buf="graph_record.csv",
|
|
398
|
+
header=True,
|
|
399
|
+
index=True,
|
|
400
|
+
mode="w",
|
|
401
|
+
)
|
|
402
|
+
print("done writing files")
|
|
403
|
+
#
|
|
404
|
+
except Exception as process_evr_directory_exception:
|
|
405
|
+
print(
|
|
406
|
+
f"Problem processing evr directory: {process_evr_directory_exception}"
|
|
407
|
+
)
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
if __name__ == "__main__":
|
|
411
|
+
try:
|
|
412
|
+
echoview_record_manager = EchoviewRecordManager()
|
|
413
|
+
echoview_record_manager.process_evr_directory(
|
|
414
|
+
evr_directory_path="../../data/HB201906/"
|
|
415
|
+
)
|
|
416
|
+
print("done processing everything")
|
|
417
|
+
except Exception as e:
|
|
418
|
+
print(e)
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
# Example of polygon
|
|
422
|
+
# 20191106 1314583780 25.4929369108 # top-left
|
|
423
|
+
# 20191106 1314583780 30.2941528987 # bottom-left
|
|
424
|
+
# 20191106 1314593790 30.2941528987 # bottom-right
|
|
425
|
+
# 20191106 1314593790 25.3008882713 # top-right
|
|
426
|
+
# 20191106 1314583780 25.3008882713 1 # top-left'ish, ends with '1' ...goes counter-clockwise
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
from json import dumps
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
Format for export and bulk ingest into neo4j
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class GraphRecordManager:
|
|
9
|
+
def __init__(
|
|
10
|
+
self,
|
|
11
|
+
classification,
|
|
12
|
+
point_count,
|
|
13
|
+
# geometry,
|
|
14
|
+
time_start,
|
|
15
|
+
time_end,
|
|
16
|
+
depth_min,
|
|
17
|
+
depth_max,
|
|
18
|
+
month,
|
|
19
|
+
altitude,
|
|
20
|
+
latitude: float,
|
|
21
|
+
longitude: float,
|
|
22
|
+
local_time,
|
|
23
|
+
distance_from_coastline,
|
|
24
|
+
solar_altitude,
|
|
25
|
+
phase_of_day,
|
|
26
|
+
filename,
|
|
27
|
+
region_id,
|
|
28
|
+
geometry_hash, # sha256 hash
|
|
29
|
+
ship: str = "Henry_B._Bigelow",
|
|
30
|
+
cruise: str = "HB1906",
|
|
31
|
+
instrument: str = "EK60",
|
|
32
|
+
):
|
|
33
|
+
print("__init__ called")
|
|
34
|
+
self.classification: str = classification
|
|
35
|
+
self.point_count: int = point_count
|
|
36
|
+
# self.geometry: str = geometry # Do not want for neo4j
|
|
37
|
+
### geospatial ###
|
|
38
|
+
self.time_start: str = time_start
|
|
39
|
+
self.time_end: str = time_end
|
|
40
|
+
self.depth_min: float = depth_min
|
|
41
|
+
self.depth_max: float = depth_max
|
|
42
|
+
self.month: int = month
|
|
43
|
+
self.altitude: float = altitude
|
|
44
|
+
self.latitude: float = latitude
|
|
45
|
+
self.longitude: float = longitude
|
|
46
|
+
self.local_time: str = local_time
|
|
47
|
+
self.distance_from_coastline: float = distance_from_coastline
|
|
48
|
+
### astronomical ###
|
|
49
|
+
self.solar_altitude: float = solar_altitude
|
|
50
|
+
self.phase_of_day: bool = phase_of_day
|
|
51
|
+
### provenance ###
|
|
52
|
+
self.filename: str = filename
|
|
53
|
+
self.region_id: str = region_id
|
|
54
|
+
self.geometry_hash: str = geometry_hash
|
|
55
|
+
self.ship: str = ship
|
|
56
|
+
self.cruise: str = cruise
|
|
57
|
+
self.instrument: str = instrument
|
|
58
|
+
|
|
59
|
+
# def __enter__(self):
|
|
60
|
+
# print("__enter__ called")
|
|
61
|
+
# return self
|
|
62
|
+
|
|
63
|
+
# def __exit__(self, *a):
|
|
64
|
+
# print("__exit__ called")
|
|
65
|
+
|
|
66
|
+
def to_dict(
|
|
67
|
+
self,
|
|
68
|
+
):
|
|
69
|
+
try:
|
|
70
|
+
return self.__dict__
|
|
71
|
+
except Exception as knowledge_graph_record_exception:
|
|
72
|
+
print(
|
|
73
|
+
f"Problem with knowledge graph record: {knowledge_graph_record_exception}"
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
def to_json(
|
|
77
|
+
self,
|
|
78
|
+
):
|
|
79
|
+
try:
|
|
80
|
+
return dumps(self.__dict__)
|
|
81
|
+
except Exception as knowledge_graph_record_exception:
|
|
82
|
+
print(f"Problem with echofish_record: {knowledge_graph_record_exception}")
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
from json import dumps
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
Format for export to parquet and bulk ingest into neo4j:
|
|
5
|
+
"""
|
|
6
|
+
# TODO:
|
|
7
|
+
# [1] write the records to a pandas dataframe
|
|
8
|
+
# [2] write df to parquet and tag as github resource
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ParquetRecordManager:
|
|
12
|
+
def __init__(
|
|
13
|
+
self,
|
|
14
|
+
classification,
|
|
15
|
+
point_count,
|
|
16
|
+
geometry,
|
|
17
|
+
time_start,
|
|
18
|
+
time_end,
|
|
19
|
+
depth_min,
|
|
20
|
+
depth_max,
|
|
21
|
+
month,
|
|
22
|
+
# altitude,
|
|
23
|
+
# latitude: float,
|
|
24
|
+
# longitude: float,
|
|
25
|
+
# local_time,
|
|
26
|
+
# distance_from_coastline,
|
|
27
|
+
# solar_altitude,
|
|
28
|
+
# is_daytime,
|
|
29
|
+
filename,
|
|
30
|
+
region_id,
|
|
31
|
+
geometry_hash, # sha256 hash
|
|
32
|
+
ship: str = "Henry_B._Bigelow",
|
|
33
|
+
cruise: str = "HB1906",
|
|
34
|
+
instrument: str = "EK60",
|
|
35
|
+
):
|
|
36
|
+
print("__init__ called")
|
|
37
|
+
self.classification: str = classification
|
|
38
|
+
self.point_count: int = point_count
|
|
39
|
+
# self.geometry: str = geometry
|
|
40
|
+
### geospatial ###
|
|
41
|
+
self.time_start: str = time_start
|
|
42
|
+
self.time_end: str = time_end
|
|
43
|
+
self.depth_min: float = depth_min
|
|
44
|
+
self.depth_max: float = depth_max
|
|
45
|
+
self.month: int = month
|
|
46
|
+
# self.altitude: float = altitude
|
|
47
|
+
# self.latitude: float = latitude
|
|
48
|
+
# self.longitude: float = longitude
|
|
49
|
+
# self.local_time: str = local_time
|
|
50
|
+
# self.distance_from_coastline: float = distance_from_coastline
|
|
51
|
+
# ### astronomical ###
|
|
52
|
+
# self.solar_altitude: float = solar_altitude
|
|
53
|
+
# self.is_daytime: bool = is_daytime
|
|
54
|
+
### provenance ###
|
|
55
|
+
self.filename: str = filename
|
|
56
|
+
self.region_id: str = region_id
|
|
57
|
+
self.geometry_hash: str = geometry_hash
|
|
58
|
+
self.ship: str = ship
|
|
59
|
+
self.cruise: str = cruise
|
|
60
|
+
self.instrument: str = instrument
|
|
61
|
+
|
|
62
|
+
# def __enter__(self):
|
|
63
|
+
# print("__enter__ called")
|
|
64
|
+
# return self
|
|
65
|
+
|
|
66
|
+
# def __exit__(self, *a):
|
|
67
|
+
# print("__exit__ called")
|
|
68
|
+
|
|
69
|
+
def to_dict(
|
|
70
|
+
self,
|
|
71
|
+
):
|
|
72
|
+
try:
|
|
73
|
+
return self.__dict__
|
|
74
|
+
except Exception as parquet_record_exception:
|
|
75
|
+
print(f"Problem with parquet record: {parquet_record_exception}")
|
|
76
|
+
|
|
77
|
+
def to_json(
|
|
78
|
+
self,
|
|
79
|
+
):
|
|
80
|
+
try:
|
|
81
|
+
return dumps(self.__dict__)
|
|
82
|
+
except Exception as parquet_record_exception:
|
|
83
|
+
print(f"Problem with parquet record: {parquet_record_exception}")
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
class ShapeManager:
|
|
2
|
+
def __init__(
|
|
3
|
+
self,
|
|
4
|
+
):
|
|
5
|
+
self.DECIMAL_PRECISION = 4
|
|
6
|
+
|
|
7
|
+
def point(
|
|
8
|
+
self,
|
|
9
|
+
date_string,
|
|
10
|
+
time_string,
|
|
11
|
+
depth_string,
|
|
12
|
+
): # -> returntype # TODO:
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
def polygon(
|
|
16
|
+
self,
|
|
17
|
+
date_string,
|
|
18
|
+
time_string,
|
|
19
|
+
depth_string,
|
|
20
|
+
): # -> type # TODO:
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
def bounding_box(
|
|
24
|
+
self,
|
|
25
|
+
date_string,
|
|
26
|
+
time_string,
|
|
27
|
+
depth_string,
|
|
28
|
+
): # -> returntype # TODO:
|
|
29
|
+
pass
|