water-column-sonar-annotation 26.1.8__py3-none-any.whl → 26.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tests/astronomical/test_astronomical_manager.py +10 -10
- tests/conftest.py +60 -60
- tests/cruise/test_cruise_manager.py +4 -5
- tests/geospatial/test_geospatial_manager.py +8 -12
- tests/record/test_echoview_record_manager.py +135 -13
- water_column_sonar_annotation/__init__.py +2 -2
- water_column_sonar_annotation/astronomical/astronomical_manager.py +5 -5
- water_column_sonar_annotation/geospatial/geospatial_manager.py +6 -9
- water_column_sonar_annotation/record/echoview_record_manager.py +103 -84
- water_column_sonar_annotation/record/graph_record_manager.py +1 -0
- water_column_sonar_annotation/record/parquet_record_manager.py +108 -36
- water_column_sonar_annotation/shape/shape_manager.py +29 -29
- {water_column_sonar_annotation-26.1.8.dist-info → water_column_sonar_annotation-26.2.0.dist-info}/METADATA +4 -3
- {water_column_sonar_annotation-26.1.8.dist-info → water_column_sonar_annotation-26.2.0.dist-info}/RECORD +17 -17
- {water_column_sonar_annotation-26.1.8.dist-info → water_column_sonar_annotation-26.2.0.dist-info}/WHEEL +0 -0
- {water_column_sonar_annotation-26.1.8.dist-info → water_column_sonar_annotation-26.2.0.dist-info}/licenses/LICENSE +0 -0
- {water_column_sonar_annotation-26.1.8.dist-info → water_column_sonar_annotation-26.2.0.dist-info}/top_level.txt +0 -0
|
@@ -51,7 +51,7 @@ def test_phase_of_day_at_noon():
|
|
|
51
51
|
latitude=39.9674884, # Boulder
|
|
52
52
|
longitude=-105.2532602,
|
|
53
53
|
)
|
|
54
|
-
assert phase ==
|
|
54
|
+
assert phase == "day"
|
|
55
55
|
|
|
56
56
|
|
|
57
57
|
def test_phase_of_day_at_midnight():
|
|
@@ -61,7 +61,7 @@ def test_phase_of_day_at_midnight():
|
|
|
61
61
|
latitude=39.9674884, # Boulder
|
|
62
62
|
longitude=-105.2532602,
|
|
63
63
|
)
|
|
64
|
-
assert phase ==
|
|
64
|
+
assert phase == "night"
|
|
65
65
|
|
|
66
66
|
|
|
67
67
|
def test_phase_of_day_before_sunset():
|
|
@@ -72,7 +72,7 @@ def test_phase_of_day_before_sunset():
|
|
|
72
72
|
latitude=39.9674884, # Boulder
|
|
73
73
|
longitude=-105.2532602,
|
|
74
74
|
)
|
|
75
|
-
assert phase ==
|
|
75
|
+
assert phase == "day" # day
|
|
76
76
|
|
|
77
77
|
|
|
78
78
|
def test_phase_of_day_after_sunset():
|
|
@@ -83,7 +83,7 @@ def test_phase_of_day_after_sunset():
|
|
|
83
83
|
latitude=39.9674884, # Boulder
|
|
84
84
|
longitude=-105.2532602,
|
|
85
85
|
)
|
|
86
|
-
assert phase ==
|
|
86
|
+
assert phase == "dusk" # dusk
|
|
87
87
|
|
|
88
88
|
|
|
89
89
|
def test_phase_of_day_before_nautical_sunset():
|
|
@@ -94,7 +94,7 @@ def test_phase_of_day_before_nautical_sunset():
|
|
|
94
94
|
latitude=39.9674884, # Boulder
|
|
95
95
|
longitude=-105.2532602,
|
|
96
96
|
)
|
|
97
|
-
assert phase_before_nautical_sunset ==
|
|
97
|
+
assert phase_before_nautical_sunset == "dusk" # dusk
|
|
98
98
|
|
|
99
99
|
|
|
100
100
|
def test_phase_of_day_after_nautical_sunset():
|
|
@@ -104,7 +104,7 @@ def test_phase_of_day_after_nautical_sunset():
|
|
|
104
104
|
latitude=39.9674884, # Boulder
|
|
105
105
|
longitude=-105.2532602,
|
|
106
106
|
)
|
|
107
|
-
assert phase_after_nautical_sunset ==
|
|
107
|
+
assert phase_after_nautical_sunset == "night" # night
|
|
108
108
|
|
|
109
109
|
|
|
110
110
|
def test_phase_of_day_before_sunrise():
|
|
@@ -114,7 +114,7 @@ def test_phase_of_day_before_sunrise():
|
|
|
114
114
|
latitude=39.9674884, # Boulder
|
|
115
115
|
longitude=-105.2532602,
|
|
116
116
|
)
|
|
117
|
-
assert phase_at_sunrise ==
|
|
117
|
+
assert phase_at_sunrise == "dawn" # dawn
|
|
118
118
|
|
|
119
119
|
|
|
120
120
|
def test_phase_of_day_after_sunrise():
|
|
@@ -124,7 +124,7 @@ def test_phase_of_day_after_sunrise():
|
|
|
124
124
|
latitude=39.9674884, # Boulder
|
|
125
125
|
longitude=-105.2532602,
|
|
126
126
|
)
|
|
127
|
-
assert phase_at_sunrise ==
|
|
127
|
+
assert phase_at_sunrise == "day" # day
|
|
128
128
|
|
|
129
129
|
|
|
130
130
|
def test_phase_of_day_before_nautical_sunrise():
|
|
@@ -135,7 +135,7 @@ def test_phase_of_day_before_nautical_sunrise():
|
|
|
135
135
|
latitude=39.9674884, # Boulder
|
|
136
136
|
longitude=-105.2532602,
|
|
137
137
|
)
|
|
138
|
-
assert phase_before_nautical_sunrise ==
|
|
138
|
+
assert phase_before_nautical_sunrise == "night" # night
|
|
139
139
|
|
|
140
140
|
|
|
141
141
|
def test_phase_of_day_after_nautical_sunrise():
|
|
@@ -145,4 +145,4 @@ def test_phase_of_day_after_nautical_sunrise():
|
|
|
145
145
|
latitude=39.9674884, # Boulder
|
|
146
146
|
longitude=-105.2532602,
|
|
147
147
|
)
|
|
148
|
-
assert phase ==
|
|
148
|
+
assert phase == "dawn" # dawn
|
tests/conftest.py
CHANGED
|
@@ -1,60 +1,60 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
|
|
3
|
-
import pooch
|
|
4
|
-
import pytest
|
|
5
|
-
|
|
6
|
-
HERE = Path(__file__).parent.absolute()
|
|
7
|
-
TEST_DATA_FOLDER = HERE / "test_resources"
|
|
8
|
-
|
|
9
|
-
HB1906_DATA = pooch.create(
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
)
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def fetch_raw_files():
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
@pytest.fixture(scope="session")
|
|
46
|
-
def test_path():
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
# """
|
|
53
|
-
# Folder locations in mac and windows:
|
|
54
|
-
#
|
|
55
|
-
# Windows
|
|
56
|
-
# C:\Users\<user>\AppData\Local\echopype\Cache\2024.12.23.10.10
|
|
57
|
-
#
|
|
58
|
-
# MacOS
|
|
59
|
-
# /Users//Library/Caches/echopype/2024.12.23.10.10
|
|
60
|
-
# """
|
|
1
|
+
# from pathlib import Path
|
|
2
|
+
#
|
|
3
|
+
# import pooch
|
|
4
|
+
# import pytest
|
|
5
|
+
#
|
|
6
|
+
# HERE = Path(__file__).parent.absolute()
|
|
7
|
+
# TEST_DATA_FOLDER = HERE / "test_resources"
|
|
8
|
+
#
|
|
9
|
+
# HB1906_DATA = pooch.create(
|
|
10
|
+
# path=pooch.os_cache("water-column-sonar-annotation"),
|
|
11
|
+
# base_url="https://github.com/CI-CMG/water-column-sonar-annotation/releases/download/v26.1.0/",
|
|
12
|
+
# retry_if_failed=1,
|
|
13
|
+
# registry={
|
|
14
|
+
# "HB201906_BOTTOMS.zip": "sha256:20609581493ea3326c1084b6868e02aafbb6c0eae871d946f30b8b5f0e7ba059",
|
|
15
|
+
# "HB201906_EVR.zip": "sha256:256778122e9c8b05884f5f2c5cf5cdad5502aa3dae61d417ec5e4278262d937a",
|
|
16
|
+
# #
|
|
17
|
+
# # "ne_50m_coastline.shp": "sha256:797d675af9613f80b51ab6049fa32e589974d7a97c6497ca56772965f179ed26",
|
|
18
|
+
# # "ne_50m_coastline.shx": "sha256:0ff1792f2d16b58246d074215edd9d12fa280880ecaad61a91b9382fee854065",
|
|
19
|
+
# #
|
|
20
|
+
# "ne_10m_coastline.shp": "sha256:459a4a97c09db19aadf5244026612de9d43748be27f83a360242b99f7fabb3c1",
|
|
21
|
+
# "ne_10m_coastline.shx": "sha256:f873afee7f56779ce52253f740ec251c2f12244aea911dc40f0a85d75de8d5f2",
|
|
22
|
+
# },
|
|
23
|
+
# )
|
|
24
|
+
#
|
|
25
|
+
#
|
|
26
|
+
# def fetch_raw_files():
|
|
27
|
+
# # HB1906_DATA.fetch(fname="HB201906_BOTTOMS.zip", progressbar=True)
|
|
28
|
+
# # HB1906_DATA.fetch(fname="HB201906_EVR.zip", progressbar=True)
|
|
29
|
+
#
|
|
30
|
+
# # HB1906_DATA.fetch(fname="ne_50m_coastline.shp", progressbar=True)
|
|
31
|
+
# # HB1906_DATA.fetch(fname="ne_50m_coastline.shx", progressbar=True)
|
|
32
|
+
#
|
|
33
|
+
# # HB1906_DATA.fetch(fname="ne_10m_coastline.shp", progressbar=True)
|
|
34
|
+
# # HB1906_DATA.fetch(fname="ne_10m_coastline.shx", progressbar=True)
|
|
35
|
+
#
|
|
36
|
+
# file_name = HB1906_DATA.fetch(fname="HB201906_EVR.zip", progressbar=True)
|
|
37
|
+
#
|
|
38
|
+
# """
|
|
39
|
+
# water-column-sonar-annotation user$ ls /Users/user/Library/Caches/water-column-sonar-annotation
|
|
40
|
+
# HB201906_BOTTOMS.zip HB201906_EVR.zip ne_10m_coastline.shp ne_10m_coastline.shx
|
|
41
|
+
# """
|
|
42
|
+
# return Path(file_name).parent
|
|
43
|
+
#
|
|
44
|
+
#
|
|
45
|
+
# @pytest.fixture(scope="session")
|
|
46
|
+
# def test_path():
|
|
47
|
+
# return {
|
|
48
|
+
# "DATA_TEST_PATH": fetch_raw_files(),
|
|
49
|
+
# }
|
|
50
|
+
#
|
|
51
|
+
#
|
|
52
|
+
# # """
|
|
53
|
+
# # Folder locations in mac and windows:
|
|
54
|
+
# #
|
|
55
|
+
# # Windows
|
|
56
|
+
# # C:\Users\<user>\AppData\Local\echopype\Cache\2024.12.23.10.10
|
|
57
|
+
# #
|
|
58
|
+
# # MacOS
|
|
59
|
+
# # /Users//Library/Caches/echopype/2024.12.23.10.10
|
|
60
|
+
# # """
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import numpy as np
|
|
2
|
-
import pytest
|
|
3
2
|
|
|
4
3
|
from water_column_sonar_annotation.cruise import CruiseManager
|
|
5
4
|
|
|
@@ -13,9 +12,9 @@ def teardown_module():
|
|
|
13
12
|
print("teardown")
|
|
14
13
|
|
|
15
14
|
|
|
16
|
-
@pytest.fixture
|
|
17
|
-
def process_cruise_path(test_path):
|
|
18
|
-
|
|
15
|
+
# @pytest.fixture
|
|
16
|
+
# def process_cruise_path(test_path):
|
|
17
|
+
# return test_path["DATA_TEST_PATH"]
|
|
19
18
|
|
|
20
19
|
|
|
21
20
|
#######################################################
|
|
@@ -26,7 +25,7 @@ def process_cruise_path(test_path):
|
|
|
26
25
|
# assert len(cruise.Sv.shape) == 3
|
|
27
26
|
|
|
28
27
|
|
|
29
|
-
def test_get_cruise(
|
|
28
|
+
def test_get_cruise():
|
|
30
29
|
cruise_manager = CruiseManager()
|
|
31
30
|
cruise = cruise_manager.get_cruise()
|
|
32
31
|
assert len(cruise.Sv.shape) == 3
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import numpy as np
|
|
2
|
-
import pytest
|
|
3
2
|
|
|
4
3
|
from water_column_sonar_annotation.geospatial import GeospatialManager
|
|
5
4
|
|
|
@@ -13,33 +12,30 @@ def teardown_module():
|
|
|
13
12
|
print("teardown")
|
|
14
13
|
|
|
15
14
|
|
|
16
|
-
@pytest.fixture
|
|
17
|
-
def process_check_distance_from_coastline(test_path):
|
|
18
|
-
|
|
15
|
+
# @pytest.fixture
|
|
16
|
+
# def process_check_distance_from_coastline(test_path):
|
|
17
|
+
# return test_path["DATA_TEST_PATH"]
|
|
19
18
|
|
|
20
19
|
|
|
21
20
|
#######################################################
|
|
22
|
-
def test_check_distance_from_coastline(process_check_distance_from_coastline, tmp_path):
|
|
21
|
+
# def test_check_distance_from_coastline(process_check_distance_from_coastline, tmp_path):
|
|
22
|
+
def test_check_distance_from_coastline():
|
|
23
23
|
geospatial_manager = GeospatialManager()
|
|
24
24
|
# Point in middle of atlantic https://wktmap.com/?ab28cbae
|
|
25
25
|
distance = geospatial_manager.check_distance_from_coastline(
|
|
26
26
|
latitude=51.508742,
|
|
27
27
|
longitude=-30.410156,
|
|
28
|
-
shapefile_path=process_check_distance_from_coastline,
|
|
29
28
|
)
|
|
30
|
-
#
|
|
31
|
-
assert np.isclose(distance, 1_233_910.720702243
|
|
29
|
+
# distance should be ~1,200 km away
|
|
30
|
+
assert np.isclose(distance, 1_233_911) # 1_233_910.720702243
|
|
32
31
|
|
|
33
32
|
|
|
34
|
-
def test_check_distance_from_coastline_woods_hole(
|
|
35
|
-
process_check_distance_from_coastline, tmp_path
|
|
36
|
-
):
|
|
33
|
+
def test_check_distance_from_coastline_woods_hole():
|
|
37
34
|
geospatial_manager = GeospatialManager()
|
|
38
35
|
# Point in middle of woods hole vineyard sound: https://wktmap.com/?9b405aa9
|
|
39
36
|
distance = geospatial_manager.check_distance_from_coastline(
|
|
40
37
|
latitude=41.494692,
|
|
41
38
|
longitude=-70.647926,
|
|
42
|
-
shapefile_path=process_check_distance_from_coastline,
|
|
43
39
|
)
|
|
44
40
|
# The sound is 5 km across
|
|
45
41
|
# assert np.isclose(distance, 4_457.0347) # 4.5 km --> should be 2.5 km?
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import numpy as np
|
|
1
2
|
import pytest
|
|
2
3
|
|
|
3
4
|
from water_column_sonar_annotation.record import EchoviewRecordManager
|
|
@@ -92,7 +93,6 @@ AH_School
|
|
|
92
93
|
20191106 1314583780 25.4929369108 20191106 1314583780 30.2941528987 20191106 1314593790 30.2941528987 20191106 1314593790 25.3008882713 20191106 1314583780 25.3008882713 1
|
|
93
94
|
Region 23"""
|
|
94
95
|
|
|
95
|
-
|
|
96
96
|
ah_school_example2 = """13 16 28 0 7 -1 1 20191106 1317305715 31.8305420148 20191106 1317335745 35.8635634446
|
|
97
97
|
0
|
|
98
98
|
10
|
|
@@ -120,41 +120,163 @@ atlantic_herring
|
|
|
120
120
|
|
|
121
121
|
def test_process_evr_record_possible_herring():
|
|
122
122
|
echoview_record_manager = EchoviewRecordManager()
|
|
123
|
-
echoview_record_manager.
|
|
124
|
-
evr_record=possible_herring_example,
|
|
123
|
+
output = echoview_record_manager.process_evr_record_full_geometry(
|
|
124
|
+
evr_record=possible_herring_example, file_name="test.evr"
|
|
125
|
+
)
|
|
126
|
+
assert output.time_start == np.datetime64("2019-09-25 22:47:24.213000")
|
|
127
|
+
assert output.time_end == np.datetime64("2019-09-25 22:47:36.246000")
|
|
128
|
+
assert output.depth_min == 24.13
|
|
129
|
+
assert output.depth_max == 35.17
|
|
130
|
+
assert output.altitude == 1.62
|
|
131
|
+
assert output.latitude == 41.29728
|
|
132
|
+
assert output.longitude == -70.96354
|
|
133
|
+
assert output.distance_from_coastline == 11249
|
|
134
|
+
assert output.local_time == "2019-09-25T18:47:24.213000-04:00"
|
|
135
|
+
assert output.month == 9
|
|
136
|
+
assert output.solar_altitude == -2.92
|
|
137
|
+
assert output.phase_of_day == "dusk"
|
|
138
|
+
assert output.classification == "possible_herring"
|
|
139
|
+
assert output.filename == "test.evr"
|
|
140
|
+
assert output.region_id == 7
|
|
141
|
+
assert output.ship == "Henry_B._Bigelow"
|
|
142
|
+
assert output.cruise == "HB1906"
|
|
143
|
+
assert output.instrument == "EK60"
|
|
144
|
+
assert output.point_count == 4
|
|
145
|
+
assert (
|
|
146
|
+
output.geometry_hash
|
|
147
|
+
== "a5032c1ae6a14cb534ae2dfcfcf15056a9dfc23c04270f7e7f1e44f3d23beb7c"
|
|
125
148
|
)
|
|
149
|
+
assert len(output.geometry) == 120 # TODO: elaborate
|
|
126
150
|
|
|
127
151
|
|
|
128
152
|
def test_process_evr_record_fish_school():
|
|
129
153
|
echoview_record_manager = EchoviewRecordManager()
|
|
130
|
-
echoview_record_manager.
|
|
131
|
-
evr_record=fish_school_example,
|
|
154
|
+
output = echoview_record_manager.process_evr_record_full_geometry(
|
|
155
|
+
evr_record=fish_school_example, file_name="test.evr"
|
|
156
|
+
)
|
|
157
|
+
assert output.time_start == np.datetime64("2019-09-25 17:49:45.160500")
|
|
158
|
+
assert output.time_end == np.datetime64("2019-09-25 17:49:50.164500")
|
|
159
|
+
assert output.depth_min == 20.28
|
|
160
|
+
assert output.depth_max == 26.3
|
|
161
|
+
assert output.altitude == 6.83
|
|
162
|
+
assert output.latitude == 41.38583
|
|
163
|
+
assert output.longitude == -71.31283
|
|
164
|
+
assert output.distance_from_coastline == 8145
|
|
165
|
+
assert output.local_time == "2019-09-25T13:49:45.160500-04:00"
|
|
166
|
+
assert output.month == 9
|
|
167
|
+
assert output.solar_altitude == 44.57
|
|
168
|
+
assert output.phase_of_day == "day"
|
|
169
|
+
assert output.classification == "fish_school"
|
|
170
|
+
assert output.filename == "test.evr"
|
|
171
|
+
assert output.region_id == 8
|
|
172
|
+
assert output.ship == "Henry_B._Bigelow"
|
|
173
|
+
assert output.cruise == "HB1906"
|
|
174
|
+
assert output.instrument == "EK60"
|
|
175
|
+
assert output.point_count == 30
|
|
176
|
+
assert (
|
|
177
|
+
output.geometry_hash
|
|
178
|
+
== "e8f26554fa6ade664adc7c2b60896cf86d47092636ed8f2152c4cfa45dd962a9"
|
|
132
179
|
)
|
|
180
|
+
assert len(output.geometry) == 898 # TODO: elaborate
|
|
133
181
|
|
|
134
182
|
|
|
135
183
|
def test_process_evr_record_unclassified_regions():
|
|
136
184
|
echoview_record_manager = EchoviewRecordManager()
|
|
137
|
-
echoview_record_manager.
|
|
138
|
-
evr_record=unclassified_regions_example,
|
|
185
|
+
output = echoview_record_manager.process_evr_record_full_geometry(
|
|
186
|
+
evr_record=unclassified_regions_example, file_name="test.evr"
|
|
139
187
|
)
|
|
188
|
+
# print(output)
|
|
189
|
+
assert output.time_start == np.datetime64("2019-09-25 20:53:45.895300")
|
|
190
|
+
assert output.time_end == np.datetime64("2019-09-25 20:54:11.931800")
|
|
191
|
+
assert output.depth_min == 9.28
|
|
192
|
+
assert output.depth_max == 11.53
|
|
193
|
+
assert output.altitude == 21.68
|
|
194
|
+
assert output.latitude == 41.346
|
|
195
|
+
assert output.longitude == -70.90754
|
|
196
|
+
assert output.distance_from_coastline == 5132
|
|
197
|
+
assert output.local_time == "2019-09-25T16:53:45.895300-04:00"
|
|
198
|
+
assert output.month == 9
|
|
199
|
+
assert output.solar_altitude == 18.07
|
|
200
|
+
assert output.phase_of_day == "day"
|
|
201
|
+
assert output.classification == "Unclassified regions"
|
|
202
|
+
assert output.filename == "test.evr"
|
|
203
|
+
assert output.region_id == 1
|
|
204
|
+
assert output.ship == "Henry_B._Bigelow"
|
|
205
|
+
assert output.cruise == "HB1906"
|
|
206
|
+
assert output.instrument == "EK60"
|
|
207
|
+
assert output.point_count == 12
|
|
208
|
+
assert (
|
|
209
|
+
output.geometry_hash
|
|
210
|
+
== "521b9994ebcc00cb4640a9e9cfddf7470f3903e3eab8c918604269c48e723d8a"
|
|
211
|
+
)
|
|
212
|
+
assert len(output.geometry) == 352 # TODO: elaborate
|
|
140
213
|
|
|
141
214
|
|
|
142
215
|
def test_process_evr_record_krill_schools():
|
|
143
216
|
echoview_record_manager = EchoviewRecordManager()
|
|
144
|
-
echoview_record_manager.
|
|
145
|
-
evr_record=krill_schools_example,
|
|
217
|
+
output = echoview_record_manager.process_evr_record_full_geometry(
|
|
218
|
+
evr_record=krill_schools_example, file_name="test.evr"
|
|
146
219
|
)
|
|
220
|
+
assert output is None
|
|
147
221
|
|
|
148
222
|
|
|
149
223
|
def test_process_evr_record_ah_school_1():
|
|
150
224
|
echoview_record_manager = EchoviewRecordManager()
|
|
151
|
-
echoview_record_manager.
|
|
152
|
-
evr_record=ah_school_example1,
|
|
225
|
+
output = echoview_record_manager.process_evr_record_full_geometry(
|
|
226
|
+
evr_record=ah_school_example1, file_name="test.evr"
|
|
227
|
+
)
|
|
228
|
+
assert output.time_start == np.datetime64("2019-11-06 13:14:58.378000")
|
|
229
|
+
assert output.time_end == np.datetime64("2019-11-06 13:14:59.379000")
|
|
230
|
+
assert output.depth_min == 25.3
|
|
231
|
+
assert output.depth_max == 30.29
|
|
232
|
+
assert output.altitude == 70.28
|
|
233
|
+
assert output.latitude == 44.13652
|
|
234
|
+
assert output.longitude == -67.0766
|
|
235
|
+
assert output.distance_from_coastline == 51279
|
|
236
|
+
assert output.local_time == "2019-11-06T09:14:58.378000-04:00"
|
|
237
|
+
assert output.month == 11
|
|
238
|
+
assert output.solar_altitude == 17.61
|
|
239
|
+
assert output.phase_of_day == "day"
|
|
240
|
+
assert output.classification == "AH_School"
|
|
241
|
+
assert output.filename == "test.evr"
|
|
242
|
+
assert output.region_id == 23
|
|
243
|
+
assert output.ship == "Henry_B._Bigelow"
|
|
244
|
+
assert output.cruise == "HB1906"
|
|
245
|
+
assert output.instrument == "EK60"
|
|
246
|
+
assert output.point_count == 5
|
|
247
|
+
assert (
|
|
248
|
+
output.geometry_hash
|
|
249
|
+
== "6b976f373a3e035a3d0df62984cbdacf9e9df81e36369352637c7ae5458924a2"
|
|
153
250
|
)
|
|
251
|
+
assert len(output.geometry) == 148 # TODO: elaborate
|
|
154
252
|
|
|
155
253
|
|
|
156
254
|
def test_process_evr_record_ah_school_2():
|
|
157
255
|
echoview_record_manager = EchoviewRecordManager()
|
|
158
|
-
echoview_record_manager.
|
|
159
|
-
evr_record=ah_school_example2,
|
|
256
|
+
output = echoview_record_manager.process_evr_record_full_geometry(
|
|
257
|
+
evr_record=ah_school_example2, file_name="test.evr"
|
|
258
|
+
)
|
|
259
|
+
assert output.time_start == np.datetime64("2019-11-06 13:17:30.571500")
|
|
260
|
+
assert output.time_end == np.datetime64("2019-11-06 13:17:33.574500")
|
|
261
|
+
assert output.depth_min == 31.83
|
|
262
|
+
assert output.depth_max == 35.86
|
|
263
|
+
assert output.altitude == 70.93
|
|
264
|
+
assert output.latitude == 44.1376
|
|
265
|
+
assert output.longitude == -67.06638
|
|
266
|
+
assert output.distance_from_coastline == 50464
|
|
267
|
+
assert output.local_time == "2019-11-06T09:17:30.571500-04:00"
|
|
268
|
+
assert output.month == 11
|
|
269
|
+
assert output.solar_altitude == 17.93
|
|
270
|
+
assert output.phase_of_day == "day"
|
|
271
|
+
assert output.classification == "AH_School"
|
|
272
|
+
assert output.filename == "test.evr"
|
|
273
|
+
assert output.region_id == 28
|
|
274
|
+
assert output.ship == "Henry_B._Bigelow"
|
|
275
|
+
assert output.cruise == "HB1906"
|
|
276
|
+
assert output.instrument == "EK60"
|
|
277
|
+
assert output.point_count == 16
|
|
278
|
+
assert (
|
|
279
|
+
output.geometry_hash
|
|
280
|
+
== "038c6e57dd95bf9c836ae8ef6145ae53a4fa64bd6f836050c8fa8f2601ba2a41"
|
|
160
281
|
)
|
|
282
|
+
assert len(output.geometry) == 480 # TODO: elaborate
|
|
@@ -47,7 +47,7 @@ class AstronomicalManager:
|
|
|
47
47
|
iso_time: str,
|
|
48
48
|
latitude: float,
|
|
49
49
|
longitude: float,
|
|
50
|
-
) ->
|
|
50
|
+
) -> str:
|
|
51
51
|
"""
|
|
52
52
|
Returns whether the time/gps references a Nautical Daylight time
|
|
53
53
|
Going to need to verify the az is correctly computed
|
|
@@ -62,12 +62,12 @@ class AstronomicalManager:
|
|
|
62
62
|
longitude=longitude,
|
|
63
63
|
)
|
|
64
64
|
if solar_azimuth < self.NAUTICAL_TWILIGHT_DEGREES:
|
|
65
|
-
return
|
|
65
|
+
return "night" # night
|
|
66
66
|
if solar_azimuth >= 0.0:
|
|
67
|
-
return
|
|
67
|
+
return "day" # day
|
|
68
68
|
if local_hour < 12:
|
|
69
|
-
return
|
|
70
|
-
return
|
|
69
|
+
return "dawn" # dawn
|
|
70
|
+
return "dusk" # dusk
|
|
71
71
|
|
|
72
72
|
# def get_moon_phase(self):
|
|
73
73
|
# # TODO: add method for getting the moon phase
|
|
@@ -58,30 +58,27 @@ class GeospatialManager:
|
|
|
58
58
|
self, # -30.410156 51.508742)
|
|
59
59
|
latitude: float = 51.508742, # 42.682435,
|
|
60
60
|
longitude: float = -30.410156, # -68.741455,
|
|
61
|
-
|
|
62
|
-
) -> np.float32 | None:
|
|
61
|
+
) -> int | None:
|
|
63
62
|
"""
|
|
64
63
|
# Note this takes about 14 seconds each, very slow!!!
|
|
65
64
|
"""
|
|
66
65
|
try:
|
|
67
66
|
# requires the shape file too
|
|
68
|
-
geometry_one = gpd.read_file(
|
|
67
|
+
geometry_one = gpd.read_file(
|
|
68
|
+
f"{data_path()['DATA_PATH']}/ne_10m_coastline.shp"
|
|
69
|
+
)
|
|
69
70
|
geometry_one = geometry_one.set_crs(self.crs)
|
|
70
71
|
geometry_two = Point([longitude, latitude])
|
|
71
72
|
gdf_p = gpd.GeoDataFrame(geometry=[geometry_two], crs=self.crs)
|
|
72
73
|
gdf_l = geometry_one
|
|
73
74
|
gdf_p = gdf_p.to_crs(gdf_p.estimate_utm_crs())
|
|
74
|
-
# print(gdf_p.to_string())
|
|
75
75
|
gdf_l = gdf_l.to_crs(gdf_p.crs)
|
|
76
|
-
# TODO: index 1399 has inf values, investigate
|
|
77
|
-
# RuntimeWarning: invalid value encountered in distance
|
|
78
|
-
# return lib.distance(a, b, **kwargs)
|
|
79
76
|
all_distances = [
|
|
80
77
|
gdf_p.geometry.distance(gdf_l.get_geometry(0)[i])[0]
|
|
81
78
|
for i in range(len(gdf_l.get_geometry(0)))
|
|
82
79
|
if gdf_l.get_geometry(0)[i].is_valid
|
|
83
80
|
]
|
|
84
|
-
return
|
|
81
|
+
return int(np.min(all_distances))
|
|
85
82
|
except Exception as e:
|
|
86
83
|
print(f"Could not process the distance: {e}")
|
|
87
84
|
|
|
@@ -102,8 +99,8 @@ class GeospatialManager:
|
|
|
102
99
|
local_time = utc.astimezone(to_zone)
|
|
103
100
|
return local_time.isoformat() # [:19]
|
|
104
101
|
|
|
102
|
+
@staticmethod
|
|
105
103
|
def get_local_hour_of_day(
|
|
106
|
-
self,
|
|
107
104
|
iso_time: str = "2026-01-26T20:35:00Z",
|
|
108
105
|
latitude: float = 51.508742,
|
|
109
106
|
longitude: float = -30.410156,
|
|
@@ -1,27 +1,50 @@
|
|
|
1
1
|
import hashlib
|
|
2
2
|
import itertools
|
|
3
|
-
|
|
4
|
-
|
|
3
|
+
import os
|
|
4
|
+
import zipfile
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
|
|
7
|
-
import numpy as np
|
|
8
7
|
import pandas as pd
|
|
8
|
+
import pooch
|
|
9
9
|
|
|
10
10
|
from water_column_sonar_annotation.astronomical import AstronomicalManager
|
|
11
11
|
from water_column_sonar_annotation.cruise import CruiseManager
|
|
12
12
|
from water_column_sonar_annotation.geospatial import GeospatialManager
|
|
13
|
-
from water_column_sonar_annotation.record.
|
|
14
|
-
|
|
13
|
+
from water_column_sonar_annotation.record.parquet_record_manager import (
|
|
14
|
+
ParquetRecordManager,
|
|
15
15
|
)
|
|
16
16
|
|
|
17
|
-
# from water_column_sonar_annotation.record import EchofishRecordManager
|
|
18
|
-
# from water_column_sonar_annotation.record import GRecordManager
|
|
19
|
-
|
|
20
17
|
"""
|
|
21
18
|
Documentation for echoview record files in EVR format:
|
|
22
19
|
https://support.echoview.com/WebHelp/Reference/File_Formats/Export_File_Formats/2D_Region_definition_file_format.htm
|
|
23
20
|
"""
|
|
24
21
|
|
|
22
|
+
HB201906_EVR = pooch.create(
|
|
23
|
+
path=pooch.os_cache("water-column-sonar-annotation"),
|
|
24
|
+
base_url="https://github.com/CI-CMG/water-column-sonar-annotation/releases/download/v26.1.0/",
|
|
25
|
+
retry_if_failed=1,
|
|
26
|
+
registry={
|
|
27
|
+
# "HB201906_BOTTOMS.zip": "sha256:20609581493ea3326c1084b6868e02aafbb6c0eae871d946f30b8b5f0e7ba059",
|
|
28
|
+
"HB201906_EVR.zip": "sha256:256778122e9c8b05884f5f2c5cf5cdad5502aa3dae61d417ec5e4278262d937a",
|
|
29
|
+
},
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def fetch_raw_files():
|
|
34
|
+
# HB201906_EVR.fetch(fname="HB201906_BOTTOMS.zip", progressbar=True)
|
|
35
|
+
file_path = HB201906_EVR.fetch(fname="HB201906_EVR.zip", progressbar=True)
|
|
36
|
+
if not os.path.isdir(os.path.join(Path(file_path).parent, "HB201906_EVR")):
|
|
37
|
+
with zipfile.ZipFile(file_path, "r") as zip_ref:
|
|
38
|
+
zip_ref.extractall(os.path.join(Path(file_path).parent, "HB201906_EVR"))
|
|
39
|
+
|
|
40
|
+
return os.path.join(Path(file_path).parent, "HB201906_EVR")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def data_path():
|
|
44
|
+
return {
|
|
45
|
+
"DATA_PATH": fetch_raw_files(),
|
|
46
|
+
}
|
|
47
|
+
|
|
25
48
|
|
|
26
49
|
def chunks(lst, n):
|
|
27
50
|
"""Yield strings from n-sized chunks from lst."""
|
|
@@ -64,7 +87,6 @@ class EchoviewRecordManager:
|
|
|
64
87
|
"AH_School",
|
|
65
88
|
]
|
|
66
89
|
self.all_records_df = pd.DataFrame() # columns=["filename", "start_time"])
|
|
67
|
-
#
|
|
68
90
|
self.astronomical_manager = AstronomicalManager()
|
|
69
91
|
self.cruise_manager = CruiseManager()
|
|
70
92
|
self.geospatial_manager = GeospatialManager()
|
|
@@ -98,9 +120,6 @@ class EchoviewRecordManager:
|
|
|
98
120
|
# evr_region_name = # String
|
|
99
121
|
"""
|
|
100
122
|
|
|
101
|
-
# TODO:
|
|
102
|
-
# [2] write df to parquet and tag as github resource
|
|
103
|
-
|
|
104
123
|
@staticmethod
|
|
105
124
|
def process_datetime_string(
|
|
106
125
|
date_string: str,
|
|
@@ -117,13 +136,13 @@ class EchoviewRecordManager:
|
|
|
117
136
|
depth: float,
|
|
118
137
|
) -> tuple:
|
|
119
138
|
dt = self.process_datetime_string(date_string, time_string)
|
|
120
|
-
# print(dt.
|
|
121
|
-
return dt
|
|
139
|
+
# print(dt.isoformat()) # is epoch time in nanoseconds
|
|
140
|
+
return dt.value, round(depth, 2)
|
|
122
141
|
|
|
123
|
-
def
|
|
142
|
+
def process_evr_record_full_geometry(
|
|
124
143
|
self,
|
|
125
144
|
evr_record: str,
|
|
126
|
-
|
|
145
|
+
file_name: str,
|
|
127
146
|
):
|
|
128
147
|
try:
|
|
129
148
|
#########################################################
|
|
@@ -224,20 +243,23 @@ class EchoviewRecordManager:
|
|
|
224
243
|
# TODO: If the data has krill, skip creating a record of it
|
|
225
244
|
if evr_region_classification == "krill_schools":
|
|
226
245
|
print("Krill, skipping!!!")
|
|
227
|
-
return
|
|
246
|
+
return None
|
|
228
247
|
#
|
|
229
248
|
# Data for first point – See Data formats below. These data are used to bound the region when importing into Echoview
|
|
230
249
|
evr_points = [x for x in record_lines[-2].split(" ") if x][:-1]
|
|
231
250
|
# print(f"EVR points: {evr_points}") # TODO: strip last entry
|
|
232
251
|
#
|
|
233
252
|
evr_point_chunks = list(itertools.batched(evr_points, 3))
|
|
253
|
+
processed_points = []
|
|
234
254
|
for evr_point_chunk in evr_point_chunks:
|
|
235
255
|
processed_point = self.process_vertice(
|
|
236
256
|
date_string=evr_point_chunk[0],
|
|
237
257
|
time_string=evr_point_chunk[1],
|
|
238
258
|
depth=float(evr_point_chunk[2]),
|
|
239
259
|
)
|
|
240
|
-
|
|
260
|
+
processed_points.append(processed_point)
|
|
261
|
+
#
|
|
262
|
+
geometry = processed_points
|
|
241
263
|
#
|
|
242
264
|
if len(evr_points) != evr_point_count * 3:
|
|
243
265
|
raise Exception("EVR point count does not match expected.")
|
|
@@ -285,55 +307,41 @@ class EchoviewRecordManager:
|
|
|
285
307
|
end_time=evr_right_x_value_of_bounding_rectangle.isoformat(),
|
|
286
308
|
bbox_max=evr_bottom_y_value_of_bounding_rectangle,
|
|
287
309
|
)
|
|
288
|
-
# #
|
|
289
|
-
# # print("%5.2f, %5.2f, {2}, {3}, {4]" % (latitude, longitude, local_time, solar_altitude, is_daytime, distance_from_coastline, evr_altitude))
|
|
290
|
-
# print(
|
|
291
|
-
# f"{latitude}, {longitude}, {local_time}, {solar_altitude}, {is_daytime}, {distance_from_coastline}, {evr_altitude}"
|
|
292
|
-
# )
|
|
293
|
-
#
|
|
294
|
-
# TODO: need additional infor for provenance --> need to create a unique key for each
|
|
295
|
-
# want a hash of some sort
|
|
296
|
-
# add the region_id (which will recycle from file to file)
|
|
297
310
|
#
|
|
298
311
|
### provenance ###
|
|
299
312
|
geometry_string = record_lines[-2] # inclusive of evr_region_type
|
|
300
|
-
|
|
301
|
-
geometry_hash = (
|
|
302
|
-
f"{hashlib.sha256(geometry_string.encode('utf-8')).hexdigest()}"
|
|
303
|
-
# f"sha256:{hashlib.sha256(geometry_string.encode('utf-8')).hexdigest()}"
|
|
304
|
-
)
|
|
313
|
+
geometry_hash = hashlib.sha256(geometry_string.encode("utf-8")).hexdigest()
|
|
305
314
|
#
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
latitude=float(latitude), # TODO: too many digits
|
|
317
|
-
longitude=float(longitude),
|
|
315
|
+
parquet_record_manager = ParquetRecordManager(
|
|
316
|
+
###
|
|
317
|
+
time_start=evr_left_x_value_of_bounding_rectangle,
|
|
318
|
+
time_end=evr_right_x_value_of_bounding_rectangle,
|
|
319
|
+
depth_min=evr_top_y_value_of_bounding_rectangle,
|
|
320
|
+
depth_max=evr_bottom_y_value_of_bounding_rectangle,
|
|
321
|
+
altitude=evr_altitude,
|
|
322
|
+
latitude=latitude,
|
|
323
|
+
longitude=longitude,
|
|
324
|
+
distance_from_coastline=distance_from_coastline,
|
|
318
325
|
local_time=local_time,
|
|
326
|
+
month=evr_left_x_value_of_bounding_rectangle.month,
|
|
319
327
|
solar_altitude=solar_altitude,
|
|
320
328
|
phase_of_day=phase_of_day,
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
altitude=evr_altitude,
|
|
324
|
-
# geometry="P(0, 1)", # TODO: https://hvplot.holoviz.org/en/docs/latest/ref/api/manual/hvplot.hvPlot.polygons.html
|
|
325
|
-
#
|
|
326
|
-
filename=filename, # how do i find in parquet
|
|
329
|
+
classification=evr_region_classification,
|
|
330
|
+
filename=file_name,
|
|
327
331
|
region_id=evr_region_id,
|
|
332
|
+
ship="Henry_B._Bigelow",
|
|
333
|
+
cruise="HB1906",
|
|
334
|
+
instrument="EK60",
|
|
335
|
+
point_count=evr_point_count,
|
|
328
336
|
geometry_hash=geometry_hash,
|
|
337
|
+
geometry=str(geometry),
|
|
329
338
|
)
|
|
330
|
-
|
|
331
|
-
#
|
|
332
|
-
update_df = pd.DataFrame([graph_record_manager.to_dict()])
|
|
339
|
+
update_df = pd.DataFrame([parquet_record_manager.to_dict()])
|
|
333
340
|
self.all_records_df = pd.concat(
|
|
334
341
|
[self.all_records_df, update_df],
|
|
335
342
|
ignore_index=True,
|
|
336
343
|
)
|
|
344
|
+
return parquet_record_manager
|
|
337
345
|
except Exception as process_evr_record_exception:
|
|
338
346
|
print(f"Problem with process_evr_record: {process_evr_record_exception}")
|
|
339
347
|
finally:
|
|
@@ -342,41 +350,51 @@ class EchoviewRecordManager:
|
|
|
342
350
|
def process_evr_file(
|
|
343
351
|
self,
|
|
344
352
|
evr_file_path: str = None,
|
|
345
|
-
|
|
353
|
+
evr_file_name: str = None,
|
|
346
354
|
):
|
|
347
355
|
try:
|
|
348
|
-
print(f"Filename: {
|
|
349
|
-
with open(evr_file_path
|
|
356
|
+
print(f"Filename: {evr_file_name}")
|
|
357
|
+
with open(os.path.join(evr_file_path, evr_file_name), "r") as file:
|
|
350
358
|
lines = file.read()
|
|
351
359
|
|
|
352
360
|
records = lines.split("\n\n")
|
|
353
361
|
records = [i for i in records if i.startswith("13 ")] # filter
|
|
354
362
|
for evr_record in records:
|
|
355
|
-
self.
|
|
363
|
+
self.process_evr_record_full_geometry(
|
|
364
|
+
evr_record=evr_record, file_name=evr_file_name
|
|
365
|
+
)
|
|
356
366
|
except Exception as process_evr_file_exception:
|
|
357
367
|
print(
|
|
358
|
-
f"Problem processing file {
|
|
368
|
+
f"Problem processing file {evr_file_name}: {process_evr_file_exception}"
|
|
359
369
|
)
|
|
360
370
|
|
|
361
|
-
def process_evr_directory(
|
|
371
|
+
def process_evr_directory(
|
|
372
|
+
self,
|
|
373
|
+
evr_directory_path: str = data_path()["DATA_PATH"],
|
|
374
|
+
):
|
|
362
375
|
"""Open evr directory and start to parse files"""
|
|
363
376
|
try:
|
|
364
377
|
all_evr_files = [
|
|
365
|
-
f
|
|
366
|
-
for
|
|
367
|
-
|
|
378
|
+
os.path.join(dp, f)
|
|
379
|
+
for dp, dn, filenames in os.walk(evr_directory_path)
|
|
380
|
+
for f in filenames
|
|
381
|
+
if (os.path.splitext(f)[1] == ".evr")
|
|
368
382
|
]
|
|
369
383
|
all_evr_files.sort()
|
|
370
384
|
print(f"Found {len(all_evr_files)} EVR files.")
|
|
371
|
-
|
|
385
|
+
#
|
|
386
|
+
# TODO: only processing two files right now
|
|
387
|
+
#
|
|
388
|
+
for evr_file in all_evr_files[0]:
|
|
372
389
|
self.process_evr_file(
|
|
373
|
-
evr_file_path=
|
|
390
|
+
evr_file_path=os.path.dirname(evr_file),
|
|
391
|
+
evr_file_name=os.path.basename(evr_file),
|
|
374
392
|
)
|
|
375
393
|
# I don't have the lat/lon information to draw here... need to query the zarr store...
|
|
376
394
|
print(self.all_records_df)
|
|
377
|
-
self.all_records_df.set_index(
|
|
378
|
-
|
|
379
|
-
)
|
|
395
|
+
# self.all_records_df.set_index(
|
|
396
|
+
# keys="geometry_hash", drop=False, inplace=True
|
|
397
|
+
# )
|
|
380
398
|
# sort by time
|
|
381
399
|
self.all_records_df.sort_values(
|
|
382
400
|
by="time_start",
|
|
@@ -386,19 +404,22 @@ class EchoviewRecordManager:
|
|
|
386
404
|
ignore_index=False,
|
|
387
405
|
)
|
|
388
406
|
print("writing files")
|
|
407
|
+
# TODO: write files to tmp directory???
|
|
408
|
+
# for front-end visualization
|
|
389
409
|
self.all_records_df.to_parquet(
|
|
390
|
-
path="
|
|
410
|
+
path="parquet_record_full.parquet",
|
|
391
411
|
engine="pyarrow",
|
|
392
412
|
compression="snappy",
|
|
393
413
|
index=True,
|
|
394
|
-
partition_cols=None,
|
|
395
|
-
)
|
|
396
|
-
self.all_records_df.to_csv(
|
|
397
|
-
path_or_buf="graph_record.csv",
|
|
398
|
-
header=True,
|
|
399
|
-
index=True,
|
|
400
|
-
mode="w",
|
|
414
|
+
# partition_cols=None,
|
|
401
415
|
)
|
|
416
|
+
# TODO: omit geometry and write to csv for upload to neo4j
|
|
417
|
+
# self.all_records_df.to_csv(
|
|
418
|
+
# path_or_buf="parquet_record_full_geometry.csv",
|
|
419
|
+
# header=True,
|
|
420
|
+
# index=False,
|
|
421
|
+
# mode="w",
|
|
422
|
+
# )
|
|
402
423
|
print("done writing files")
|
|
403
424
|
#
|
|
404
425
|
except Exception as process_evr_directory_exception:
|
|
@@ -407,15 +428,13 @@ class EchoviewRecordManager:
|
|
|
407
428
|
)
|
|
408
429
|
|
|
409
430
|
|
|
410
|
-
if __name__ == "__main__":
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
except Exception as e:
|
|
418
|
-
print(e)
|
|
431
|
+
# if __name__ == "__main__":
|
|
432
|
+
# try:
|
|
433
|
+
# echoview_record_manager = EchoviewRecordManager()
|
|
434
|
+
# echoview_record_manager.process_evr_directory()
|
|
435
|
+
# print("done processing everything")
|
|
436
|
+
# except Exception as e:
|
|
437
|
+
# print(e)
|
|
419
438
|
|
|
420
439
|
|
|
421
440
|
# Example of polygon
|
|
@@ -1,63 +1,62 @@
|
|
|
1
1
|
from json import dumps
|
|
2
2
|
|
|
3
|
+
|
|
4
|
+
# from pyspark.sql.functions import *
|
|
5
|
+
|
|
3
6
|
"""
|
|
4
7
|
Format for export to parquet and bulk ingest into neo4j:
|
|
5
8
|
"""
|
|
6
|
-
# TODO:
|
|
7
|
-
# [1] write the records to a pandas dataframe
|
|
8
|
-
# [2] write df to parquet and tag as github resource
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class ParquetRecordManager:
|
|
12
12
|
def __init__(
|
|
13
13
|
self,
|
|
14
|
-
|
|
15
|
-
point_count,
|
|
16
|
-
geometry,
|
|
14
|
+
###
|
|
17
15
|
time_start,
|
|
18
16
|
time_end,
|
|
19
17
|
depth_min,
|
|
20
18
|
depth_max,
|
|
19
|
+
altitude,
|
|
20
|
+
latitude,
|
|
21
|
+
longitude,
|
|
22
|
+
distance_from_coastline,
|
|
23
|
+
local_time,
|
|
21
24
|
month,
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
# local_time,
|
|
26
|
-
# distance_from_coastline,
|
|
27
|
-
# solar_altitude,
|
|
28
|
-
# is_daytime,
|
|
25
|
+
solar_altitude,
|
|
26
|
+
phase_of_day,
|
|
27
|
+
classification,
|
|
29
28
|
filename,
|
|
30
29
|
region_id,
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
30
|
+
ship,
|
|
31
|
+
cruise,
|
|
32
|
+
instrument,
|
|
33
|
+
point_count,
|
|
34
|
+
geometry_hash,
|
|
35
|
+
geometry,
|
|
36
|
+
###
|
|
35
37
|
):
|
|
36
38
|
print("__init__ called")
|
|
39
|
+
self.time_start = time_start
|
|
40
|
+
self.time_end = time_end
|
|
41
|
+
self.depth_min = round(depth_min, 2)
|
|
42
|
+
self.depth_max = round(depth_max, 2)
|
|
43
|
+
self.altitude = round(altitude, 2)
|
|
44
|
+
self.latitude = round(latitude, 5)
|
|
45
|
+
self.longitude = round(longitude, 5)
|
|
46
|
+
self.distance_from_coastline: int = int(distance_from_coastline)
|
|
47
|
+
self.local_time: str = local_time
|
|
48
|
+
self.month: int = int(month)
|
|
49
|
+
self.solar_altitude = round(solar_altitude, 2)
|
|
50
|
+
self.phase_of_day: str = phase_of_day
|
|
37
51
|
self.classification: str = classification
|
|
38
|
-
self.point_count: int = point_count
|
|
39
|
-
# self.geometry: str = geometry
|
|
40
|
-
### geospatial ###
|
|
41
|
-
self.time_start: str = time_start
|
|
42
|
-
self.time_end: str = time_end
|
|
43
|
-
self.depth_min: float = depth_min
|
|
44
|
-
self.depth_max: float = depth_max
|
|
45
|
-
self.month: int = month
|
|
46
|
-
# self.altitude: float = altitude
|
|
47
|
-
# self.latitude: float = latitude
|
|
48
|
-
# self.longitude: float = longitude
|
|
49
|
-
# self.local_time: str = local_time
|
|
50
|
-
# self.distance_from_coastline: float = distance_from_coastline
|
|
51
|
-
# ### astronomical ###
|
|
52
|
-
# self.solar_altitude: float = solar_altitude
|
|
53
|
-
# self.is_daytime: bool = is_daytime
|
|
54
|
-
### provenance ###
|
|
55
52
|
self.filename: str = filename
|
|
56
|
-
self.region_id:
|
|
57
|
-
self.geometry_hash: str = geometry_hash
|
|
53
|
+
self.region_id: int = int(region_id)
|
|
58
54
|
self.ship: str = ship
|
|
59
55
|
self.cruise: str = cruise
|
|
60
56
|
self.instrument: str = instrument
|
|
57
|
+
self.point_count: int = int(point_count)
|
|
58
|
+
self.geometry_hash: str = geometry_hash
|
|
59
|
+
self.geometry: str = str(geometry)
|
|
61
60
|
|
|
62
61
|
# def __enter__(self):
|
|
63
62
|
# print("__enter__ called")
|
|
@@ -66,6 +65,60 @@ class ParquetRecordManager:
|
|
|
66
65
|
# def __exit__(self, *a):
|
|
67
66
|
# print("__exit__ called")
|
|
68
67
|
|
|
68
|
+
# def save_test_data(self):
|
|
69
|
+
# # test_data = "geometry_hash,classification,point_count,time_start,time_end,depth_min,depth_max,month,altitude,latitude,longitude,local_time,distance_from_coastline,solar_altitude,filename,region_id,ship,cruise,instrument,phase_of_day
|
|
70
|
+
# # [e78ee8839c5bd4931b0a790dabe334d5f9200e80b9e4057e5e1a62f60a14e5cf,Unclassified regions,15,2019-09-25T14:02:06.601000,2019-09-25T14:02:57.165800,8.85,14.37,9,-2.88,41.5303955078125,-71.318603515625,2019-09-25T10:02:06.601000-04:00,250.0,35.02,d20190925_t135327-t233118_Zsc-DWBA-Schools_All-RegionDefs.evr,2,Henry_B._Bigelow,HB1906,EK60,2]
|
|
71
|
+
# # [46909f534985668542b6437224f0a533a8960619d93247fca0477995e559d9c0,possible_herring,8,2019-09-25T17:49:38.647000,2019-09-25T17:49:57.674000,18.75,29.31,9,3.4,41.38581466674805,-71.3131332397461,2019-09-25T13:49:38.647000-04:00,8139.0,44.57,d20190925_t135327-t233118_Zsc-DWBA-Schools_All-RegionDefs.evr,3,Henry_B._Bigelow,HB1906,EK60,2]
|
|
72
|
+
# # [20d22a2da4b120ba925abe0eb39aabfa29dc9b6990888e268d0ea8a3c76511bc,fish_school,30,2019-09-25T17:49:45.160500,2019-09-25T17:49:50.164500,20.28,26.3,9,6.83,41.38582992553711,-71.31282806396484,2019-09-25T13:49:45.160500-04:00,8146.0,44.57,d20190925_t135327-t233118_Zsc-DWBA-Schools_All-RegionDefs.evr,8,Henry_B._Bigelow,HB1906,EK60,2]
|
|
73
|
+
# times_start = pd.to_datetime(
|
|
74
|
+
# [ # dfp["time_start"] >= np.datetime64("2019-09-25T17:49:38.647000")
|
|
75
|
+
# "2019-09-25T14:02:06.601000",
|
|
76
|
+
# "2019-09-25T17:49:38.647000",
|
|
77
|
+
# "2019-09-25T17:49:45.160500",
|
|
78
|
+
# ]
|
|
79
|
+
# )
|
|
80
|
+
# times_end = pd.to_datetime(
|
|
81
|
+
# [
|
|
82
|
+
# "2019-09-25T14:02:57.165800",
|
|
83
|
+
# "2019-09-25T17:49:57.674000",
|
|
84
|
+
# "2019-09-25T17:49:50.164500",
|
|
85
|
+
# ]
|
|
86
|
+
# )
|
|
87
|
+
# geometry_hashes = [
|
|
88
|
+
# "e78ee8839c5bd4931b0a790dabe334d5f9200e80b9e4057e5e1a62f60a14e5cf",
|
|
89
|
+
# "46909f534985668542b6437224f0a533a8960619d93247fca0477995e559d9c0",
|
|
90
|
+
# "20d22a2da4b120ba925abe0eb39aabfa29dc9b6990888e268d0ea8a3c76511bc",
|
|
91
|
+
# ]
|
|
92
|
+
# df = pd.DataFrame(
|
|
93
|
+
# data={
|
|
94
|
+
# "time_start": times_start,
|
|
95
|
+
# "time_end": times_end,
|
|
96
|
+
# "geometry_hash": geometry_hashes,
|
|
97
|
+
# },
|
|
98
|
+
# # index=["geometry_hashes"],
|
|
99
|
+
# )
|
|
100
|
+
# # df.set_index("geometry_hash", drop=True, inplace=True)
|
|
101
|
+
# df.to_parquet(
|
|
102
|
+
# path="test.parquet",
|
|
103
|
+
# engine="pyarrow",
|
|
104
|
+
# compression="snappy",
|
|
105
|
+
# index=True,
|
|
106
|
+
# # partition_cols=df.columns,
|
|
107
|
+
# )
|
|
108
|
+
# print("done")
|
|
109
|
+
# # df = pd.read_csv("Henry_B._Bigelow_HB1906_annotations.parquet")
|
|
110
|
+
# #
|
|
111
|
+
# ### now test reading ###
|
|
112
|
+
# #
|
|
113
|
+
# dfp = pq.read_table(source="test.parquet")
|
|
114
|
+
# print(dfp.shape)
|
|
115
|
+
# dfp_select = dfp.filter(
|
|
116
|
+
# (dfp["time_start"] >= np.datetime64("2019-09-25T17:49:40"))
|
|
117
|
+
# & (dfp["time_end"] <= np.datetime64("2019-09-25T17:49:51"))
|
|
118
|
+
# )
|
|
119
|
+
# print(dfp_select["geometry_hash"])
|
|
120
|
+
# # df = pq.read_table(source="Henry_B._Bigelow_HB1906_annotations.parquet").to_pandas()
|
|
121
|
+
|
|
69
122
|
def to_dict(
|
|
70
123
|
self,
|
|
71
124
|
):
|
|
@@ -81,3 +134,22 @@ class ParquetRecordManager:
|
|
|
81
134
|
return dumps(self.__dict__)
|
|
82
135
|
except Exception as parquet_record_exception:
|
|
83
136
|
print(f"Problem with parquet record: {parquet_record_exception}")
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
# if __name__ == "__main__":
|
|
140
|
+
# try:
|
|
141
|
+
# parquet_record_manager = ParquetRecordManager()
|
|
142
|
+
# parquet_record_manager.save_test_data()
|
|
143
|
+
# except Exception as e:
|
|
144
|
+
# print(e)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
"""
|
|
148
|
+
with pandas:
|
|
149
|
+
pyarrow.Table
|
|
150
|
+
time_start: timestamp[us]
|
|
151
|
+
time_end: timestamp[us]
|
|
152
|
+
geometry_hash: large_string
|
|
153
|
+
with numpy datetime64 -> doesn't work
|
|
154
|
+
'>=' not supported between instances of 'pyarrow.lib.ChunkedArray' and 'Timestamp'
|
|
155
|
+
"""
|
|
@@ -1,29 +1,29 @@
|
|
|
1
|
-
class ShapeManager:
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
1
|
+
# class ShapeManager:
|
|
2
|
+
# def __init__(
|
|
3
|
+
# self,
|
|
4
|
+
# ):
|
|
5
|
+
# self.DECIMAL_PRECISION = 4
|
|
6
|
+
#
|
|
7
|
+
# def point(
|
|
8
|
+
# self,
|
|
9
|
+
# date_string,
|
|
10
|
+
# time_string,
|
|
11
|
+
# depth_string,
|
|
12
|
+
# ): # -> returntype # TODO:
|
|
13
|
+
# pass
|
|
14
|
+
#
|
|
15
|
+
# def polygon(
|
|
16
|
+
# self,
|
|
17
|
+
# date_string,
|
|
18
|
+
# time_string,
|
|
19
|
+
# depth_string,
|
|
20
|
+
# ): # -> type # TODO:
|
|
21
|
+
# pass
|
|
22
|
+
#
|
|
23
|
+
# def bounding_box(
|
|
24
|
+
# self,
|
|
25
|
+
# date_string,
|
|
26
|
+
# time_string,
|
|
27
|
+
# depth_string,
|
|
28
|
+
# ): # -> returntype # TODO:
|
|
29
|
+
# pass
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: water-column-sonar-annotation
|
|
3
|
-
Version: 26.
|
|
3
|
+
Version: 26.2.0
|
|
4
4
|
Summary: Processing Tool for Working with Water Column Sonar Annotations
|
|
5
5
|
Author-email: Rudy Klucik <rudy.klucik@noaa.gov>
|
|
6
6
|
Maintainer-email: Rudy Klucik <rudy.klucik@noaa.gov>
|
|
@@ -25,6 +25,7 @@ Requires-Dist: pooch
|
|
|
25
25
|
Requires-Dist: pvlib
|
|
26
26
|
Requires-Dist: pyarrow
|
|
27
27
|
Requires-Dist: pyogrio
|
|
28
|
+
Requires-Dist: pyspark
|
|
28
29
|
Requires-Dist: python-dotenv
|
|
29
30
|
Requires-Dist: s3fs
|
|
30
31
|
Requires-Dist: scipy
|
|
@@ -40,7 +41,7 @@ Dynamic: license-file
|
|
|
40
41
|
Tool for converting EVR files to annotated regions of interest in parquet format
|
|
41
42
|
|
|
42
43
|

|
|
43
|
-
  
|
|
44
45
|
|
|
45
46
|
# Setting up the Python Environment
|
|
46
47
|
|
|
@@ -82,7 +83,7 @@ TODO
|
|
|
82
83
|
Step 1 --> increment the semantic version in the zarr_manager.py "metadata" & the "pyproject.toml"
|
|
83
84
|
|
|
84
85
|
```commandline
|
|
85
|
-
git tag -a v26.1.
|
|
86
|
+
git tag -a v26.1.8 -m "Releasing v26.1.8"
|
|
86
87
|
git push origin --tags
|
|
87
88
|
gh release create v26.1.0
|
|
88
89
|
```
|
|
@@ -1,27 +1,27 @@
|
|
|
1
1
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
tests/conftest.py,sha256=
|
|
2
|
+
tests/conftest.py,sha256=qlztU9v4bk4zkUkUY55Oz4YUAxKptyQBK5Jfycgj2zA,2214
|
|
3
3
|
tests/astronomical/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
tests/astronomical/test_astronomical_manager.py,sha256=
|
|
4
|
+
tests/astronomical/test_astronomical_manager.py,sha256=BNVj_xlem-qXIfvOttaxN_X-b7m64kFYEhoi4_HN3-s,5127
|
|
5
5
|
tests/cruise/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
-
tests/cruise/test_cruise_manager.py,sha256=
|
|
6
|
+
tests/cruise/test_cruise_manager.py,sha256=ulQ8Sog4Lz8ETFI7SXvBpaSoXyEeD_EZJnFs5hnFMHo,2289
|
|
7
7
|
tests/geospatial/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
-
tests/geospatial/test_geospatial_manager.py,sha256=
|
|
9
|
-
tests/record/test_echoview_record_manager.py,sha256=
|
|
10
|
-
water_column_sonar_annotation/__init__.py,sha256
|
|
8
|
+
tests/geospatial/test_geospatial_manager.py,sha256=zsEy4sbUdsrRkAo95nD7UZ9QWNUS9V-w2E0LU50MP9I,2501
|
|
9
|
+
tests/record/test_echoview_record_manager.py,sha256=qVkiCGLIdY7YPN8NztKiqtyFnEBfTCb3sJlUkE71LJk,15372
|
|
10
|
+
water_column_sonar_annotation/__init__.py,sha256=mLrO0242C3iksRuXJBDXrwQ5mz2zyzJQ0fWWVkz21sk,139
|
|
11
11
|
water_column_sonar_annotation/astronomical/__init__.py,sha256=iWJIwLDD0uK9ZGBH1EkKFKXmAdtD-mYAbrWlibXAJFU,94
|
|
12
|
-
water_column_sonar_annotation/astronomical/astronomical_manager.py,sha256=
|
|
12
|
+
water_column_sonar_annotation/astronomical/astronomical_manager.py,sha256=igzisj3_n05kVAmd1nBj5Q73jTpjJXdDyEydaJNj7WA,3080
|
|
13
13
|
water_column_sonar_annotation/cruise/__init__.py,sha256=G2lZmW1UaX2AmznB7qEFH4j6mNiNQxLsQmTeRFTenOM,76
|
|
14
14
|
water_column_sonar_annotation/cruise/cruise_manager.py,sha256=Y-6eeHobHor9ZY33nSyk_iTz-cGzVgoWXgwPkNI2JMM,3546
|
|
15
15
|
water_column_sonar_annotation/geospatial/__init__.py,sha256=wy7fP2g5vic7L3bHcpFzlMsW811ca2WfReTCeOzeRKk,88
|
|
16
|
-
water_column_sonar_annotation/geospatial/geospatial_manager.py,sha256=
|
|
16
|
+
water_column_sonar_annotation/geospatial/geospatial_manager.py,sha256=yBtsMY0cQ2Sw27xS5FmJIfUgAN5jjN4H1Q3C7LnNVj4,4967
|
|
17
17
|
water_column_sonar_annotation/record/__init__.py,sha256=2OKbUE6xbLSY6LFSiEgqV9hTT6piOKzWHZiKXKaGqmw,261
|
|
18
|
-
water_column_sonar_annotation/record/echoview_record_manager.py,sha256=
|
|
19
|
-
water_column_sonar_annotation/record/graph_record_manager.py,sha256=
|
|
20
|
-
water_column_sonar_annotation/record/parquet_record_manager.py,sha256=
|
|
18
|
+
water_column_sonar_annotation/record/echoview_record_manager.py,sha256=YE1NqWJNVJ-1d3xOlElsRizpqkfCDlrJF-HwHv7Vhkk,20992
|
|
19
|
+
water_column_sonar_annotation/record/graph_record_manager.py,sha256=owqcpCEmwt4gCltEtVWzRHHDE0RUwb1MNT8UB5vzcXc,2363
|
|
20
|
+
water_column_sonar_annotation/record/parquet_record_manager.py,sha256=Rmdq3fge8iWfXd9nX3teHiulOkSGKRuAIMY8YFejZnI,5880
|
|
21
21
|
water_column_sonar_annotation/shape/__init__.py,sha256=Ssg9_cz1zkU42X-oy9FmnfcFtkl2ic5eCHRryG2hZoU,75
|
|
22
|
-
water_column_sonar_annotation/shape/shape_manager.py,sha256=
|
|
23
|
-
water_column_sonar_annotation-26.
|
|
24
|
-
water_column_sonar_annotation-26.
|
|
25
|
-
water_column_sonar_annotation-26.
|
|
26
|
-
water_column_sonar_annotation-26.
|
|
27
|
-
water_column_sonar_annotation-26.
|
|
22
|
+
water_column_sonar_annotation/shape/shape_manager.py,sha256=h7nsknPpcGiHS8l4KiXBVPmpQVix4FjdArkc2U7Tg4s,569
|
|
23
|
+
water_column_sonar_annotation-26.2.0.dist-info/licenses/LICENSE,sha256=lz4IpJ5_adG3S0ali-WaIpQFVTnEAOucMDQPECUVEYw,1110
|
|
24
|
+
water_column_sonar_annotation-26.2.0.dist-info/METADATA,sha256=FzuhwUAAqE1HwY6BrR8YY031K-k0pXsutqknaN227g0,2747
|
|
25
|
+
water_column_sonar_annotation-26.2.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
26
|
+
water_column_sonar_annotation-26.2.0.dist-info/top_level.txt,sha256=WmW_0S58c1A0pgWNXbozKC-ahrBAfhiDSGv68kzC-VE,36
|
|
27
|
+
water_column_sonar_annotation-26.2.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|