windborne 1.0.9__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- windborne/__init__.py +6 -15
- windborne/api_request.py +227 -0
- windborne/cli.py +53 -60
- windborne/cyclone_formatting.py +210 -0
- windborne/data_api.py +429 -1051
- windborne/forecasts_api.py +186 -305
- windborne/observation_formatting.py +456 -0
- windborne/utils.py +15 -887
- {windborne-1.0.9.dist-info → windborne-1.1.1.dist-info}/METADATA +1 -2
- windborne-1.1.1.dist-info/RECORD +13 -0
- windborne/config.py +0 -42
- windborne-1.0.9.dist-info/RECORD +0 -11
- {windborne-1.0.9.dist-info → windborne-1.1.1.dist-info}/WHEEL +0 -0
- {windborne-1.0.9.dist-info → windborne-1.1.1.dist-info}/entry_points.txt +0 -0
- {windborne-1.0.9.dist-info → windborne-1.1.1.dist-info}/top_level.txt +0 -0
windborne/data_api.py
CHANGED
@@ -1,23 +1,20 @@
|
|
1
|
-
from .config import DATA_API_BASE_URL, LAUNCH_SITES
|
2
|
-
from .utils import make_api_request, to_unix_timestamp, save_csv_json, format_little_r, convert_to_netcdf
|
3
|
-
|
4
1
|
import time
|
5
2
|
import os
|
6
|
-
from math import floor
|
7
3
|
from datetime import datetime, timezone, timedelta
|
8
4
|
import csv
|
9
5
|
import json
|
10
|
-
import hashlib
|
11
6
|
|
12
|
-
|
7
|
+
from .api_request import make_api_request
|
8
|
+
from .observation_formatting import format_little_r, convert_to_netcdf
|
9
|
+
from .utils import to_unix_timestamp, save_arbitrary_response
|
10
|
+
|
11
|
+
DATA_API_BASE_URL = "https://sensor-data.windbornesystems.com/api/v1"
|
13
12
|
|
14
13
|
# ------------
|
15
14
|
# CORE RESOURCES
|
16
15
|
# ------------
|
17
16
|
|
18
|
-
|
19
|
-
# ------------
|
20
|
-
def get_observations_page(since=None, min_time=None, max_time=None, include_ids=None, include_mission_name=True, include_updated_at=None, mission_id=None, min_latitude=None, max_latitude=None, min_longitude=None, max_longitude=None, save_to_file=None):
|
17
|
+
def get_observations_page(since=None, min_time=None, max_time=None, include_ids=None, include_mission_name=True, include_updated_at=None, mission_id=None, min_latitude=None, max_latitude=None, min_longitude=None, max_longitude=None, output_file=None):
|
21
18
|
"""
|
22
19
|
Retrieves observations page based on specified filters including geographical bounds.
|
23
20
|
|
@@ -35,7 +32,7 @@ def get_observations_page(since=None, min_time=None, max_time=None, include_ids=
|
|
35
32
|
min_longitude (float): Minimum longitude boundary.
|
36
33
|
max_longitude (float): Maximum longitude boundary.
|
37
34
|
|
38
|
-
|
35
|
+
output_file (str): Optional path to save the response data.
|
39
36
|
If provided, saves the data in CSV format.
|
40
37
|
|
41
38
|
Returns:
|
@@ -51,7 +48,7 @@ def get_observations_page(since=None, min_time=None, max_time=None, include_ids=
|
|
51
48
|
if min_time:
|
52
49
|
params["min_time"] = to_unix_timestamp(min_time)
|
53
50
|
if max_time:
|
54
|
-
params["max_time"] = to_unix_timestamp(
|
51
|
+
params["max_time"] = to_unix_timestamp(max_time)
|
55
52
|
if mission_id:
|
56
53
|
params["mission_id"] = mission_id
|
57
54
|
if min_latitude:
|
@@ -69,551 +66,17 @@ def get_observations_page(since=None, min_time=None, max_time=None, include_ids=
|
|
69
66
|
if include_updated_at:
|
70
67
|
params["include_updated_at"] = True
|
71
68
|
|
72
|
-
# Remove any keys where the value is None to avoid sending unnecessary parameters
|
73
69
|
params = {k: v for k, v in params.items() if v is not None}
|
74
70
|
|
75
71
|
response = make_api_request(url, params=params)
|
76
72
|
|
77
|
-
if
|
78
|
-
|
73
|
+
if output_file:
|
74
|
+
save_arbitrary_response(output_file, response, csv_data_key='observations')
|
79
75
|
|
80
76
|
return response
|
81
77
|
|
82
|
-
def observations(start_time, end_time=None, include_ids=None, include_updated_at=None, mission_id=None, min_latitude=None, max_latitude=None, min_longitude=None, max_longitude=None, interval=60, save_to_file=None, bucket_hours=6.0, output_format=None, output_dir=None, callback=None):
|
83
|
-
"""
|
84
|
-
Fetches observations between a start time and an optional end time and saves to files in specified format.
|
85
|
-
Files are broken up into time buckets, with filenames containing the time at the mid-point of the bucket.
|
86
|
-
For example, for 6-hour buckets centered on 00 UTC, the start time should be 21 UTC of the previous day.
|
87
|
-
|
88
|
-
Args:
|
89
|
-
start_time (str): A date string, supporting formats YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM and ISO strings,
|
90
|
-
representing the starting time of fetching data.
|
91
|
-
end_time (str): Optional. A date string, supporting formats YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM and ISO strings,
|
92
|
-
representing the end time of fetching data. If not provided, current time is used as end time.
|
93
|
-
|
94
|
-
include_ids (bool): Include observation IDs in response.
|
95
|
-
include_updated_at (bool): Include update timestamps in response.
|
96
|
-
mission_id (str): Filter observations by mission ID.
|
97
|
-
min_latitude (float): Minimum latitude boundary.
|
98
|
-
max_latitude (float): Maximum latitude boundary.
|
99
|
-
min_longitude (float): Minimum longitude boundary.
|
100
|
-
max_longitude (float): Maximum longitude boundary.
|
101
78
|
|
102
|
-
|
103
|
-
save_to_file (str): Saves all data to a single file instead of bucketing.
|
104
|
-
Supported formats are '.csv', '.json', '.little_r' and '.nc'
|
105
|
-
bucket_hours (int): Optional. Size of time buckets in hours. Defaults to 6 hours.
|
106
|
-
output_format (str): Optional. Format to save data in separate files. Supported formats are 'json, 'csv', 'little_r' and 'netcdf'.
|
107
|
-
output_dir (str): Optional. Directory path where the separate files should be saved. If not provided, files will be saved in current directory.
|
108
|
-
callback (callable): Optional callback function that receives (super observations, metadata) before saving.
|
109
|
-
This allows custom processing or saving in custom formats.
|
110
|
-
"""
|
111
|
-
|
112
|
-
start_time = to_unix_timestamp(start_time)
|
113
|
-
|
114
|
-
if end_time:
|
115
|
-
end_time = to_unix_timestamp(end_time)
|
116
|
-
else:
|
117
|
-
end_time = int(datetime.now().timestamp())
|
118
|
-
|
119
|
-
# Supported formats for saving into separate files:
|
120
|
-
# - csv (default)
|
121
|
-
# - little_r
|
122
|
-
# - json
|
123
|
-
# - netcdf
|
124
|
-
if output_format and output_format not in ['json', 'csv', 'little_r', 'netcdf']:
|
125
|
-
print("Please use one of the following formats:")
|
126
|
-
print(" - json")
|
127
|
-
print(" - csv")
|
128
|
-
print(" - little_r")
|
129
|
-
print(" - netcdf")
|
130
|
-
return
|
131
|
-
|
132
|
-
# Supported formats for saving into a single file:
|
133
|
-
# NOTE: for observations we handle .csv saving within observations and not using save_csv_json
|
134
|
-
# - .csv
|
135
|
-
# - .json
|
136
|
-
# - .little_r
|
137
|
-
# - .nc
|
138
|
-
if save_to_file and not save_to_file.endswith(('.json', '.csv', '.little_r', '.nc')):
|
139
|
-
print("Please use one of the following formats:")
|
140
|
-
print(" - .json")
|
141
|
-
print(" - .csv")
|
142
|
-
print(" - .little_r")
|
143
|
-
print(" - .nc")
|
144
|
-
return
|
145
|
-
|
146
|
-
# Convert start_time to datetime
|
147
|
-
start_dt = datetime.fromtimestamp(start_time, tz=timezone.utc)
|
148
|
-
|
149
|
-
# Calculate first center time that's after start_time
|
150
|
-
hours_since_day_start = start_dt.hour + start_dt.minute / 60
|
151
|
-
bucket_number = hours_since_day_start // bucket_hours
|
152
|
-
first_center = start_dt.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(hours=(bucket_number + 1) * bucket_hours)
|
153
|
-
|
154
|
-
|
155
|
-
# Headers for CSV files
|
156
|
-
headers = [
|
157
|
-
"timestamp", "id", "time", "latitude", "longitude", "altitude", "humidity",
|
158
|
-
"mission_name", "pressure", "specific_humidity", "speed_u", "speed_v", "temperature"
|
159
|
-
]
|
160
|
-
|
161
|
-
if save_to_file:
|
162
|
-
all_observations = {}
|
163
|
-
else:
|
164
|
-
buckets = {}
|
165
|
-
|
166
|
-
# Initialize the polling loop
|
167
|
-
current_timestamp = start_time
|
168
|
-
has_next_page = True
|
169
|
-
fetced_so_far = 0
|
170
|
-
|
171
|
-
print(f"Starting polling observations\nfrom {datetime.fromtimestamp(start_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC to {datetime.fromtimestamp(end_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC")
|
172
|
-
print("-----------------------------------------------------")
|
173
|
-
|
174
|
-
|
175
|
-
while has_next_page:
|
176
|
-
try:
|
177
|
-
# Fetch observations
|
178
|
-
observations_page = get_observations_page(
|
179
|
-
since=current_timestamp,
|
180
|
-
min_latitude=min_latitude,
|
181
|
-
max_latitude=max_latitude,
|
182
|
-
min_longitude=min_longitude,
|
183
|
-
max_longitude=max_longitude,
|
184
|
-
include_updated_at=include_updated_at,
|
185
|
-
mission_id=mission_id,
|
186
|
-
include_ids=include_ids,
|
187
|
-
include_mission_name=True
|
188
|
-
)
|
189
|
-
|
190
|
-
if observations_page is None:
|
191
|
-
print("\n----------------------------------------------------------------------")
|
192
|
-
print(f"Received null response from API. Retrying in {interval} seconds ...")
|
193
|
-
print("----------------------------------------------------------------------")
|
194
|
-
time.sleep(interval)
|
195
|
-
continue
|
196
|
-
|
197
|
-
observations = observations_page.get('observations', [])
|
198
|
-
fetced_so_far = fetced_so_far + len(observations)
|
199
|
-
print_current_timestamp = current_timestamp if current_timestamp < 1e11 else current_timestamp / 1e9
|
200
|
-
print(f"Fetched {fetced_so_far} observations")
|
201
|
-
print(f"Current time: {datetime.fromtimestamp(print_current_timestamp, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
|
202
|
-
print("-----------------------------------------------------")
|
203
|
-
|
204
|
-
# Invoke the callback with fetched observations
|
205
|
-
if callback:
|
206
|
-
print("--------\nCallback\n--------")
|
207
|
-
callback(observations)
|
208
|
-
|
209
|
-
for obs in observations:
|
210
|
-
if 'mission_name' not in obs:
|
211
|
-
print("Warning: got an observation without a mission name")
|
212
|
-
continue
|
213
|
-
|
214
|
-
timestamp = obs.get('timestamp')
|
215
|
-
if not timestamp:
|
216
|
-
continue
|
217
|
-
|
218
|
-
try:
|
219
|
-
obs_time = datetime.fromtimestamp(timestamp, tz=timezone.utc)
|
220
|
-
except (OSError, ValueError, TypeError, OverflowError):
|
221
|
-
continue
|
222
|
-
|
223
|
-
mission_name = obs.get('mission_name', 'Unknown')
|
224
|
-
obs['time'] = obs_time.replace(tzinfo=timezone.utc).isoformat()
|
225
|
-
|
226
|
-
processed_obs = {}
|
227
|
-
for header in headers:
|
228
|
-
value = obs.get(header)
|
229
|
-
if value is None or value == '' or (isinstance(value, str) and not value.strip()):
|
230
|
-
processed_obs[header] = 'None'
|
231
|
-
else:
|
232
|
-
processed_obs[header] = value
|
233
|
-
|
234
|
-
obs_id = f"{timestamp}_{mission_name}"
|
235
|
-
|
236
|
-
if save_to_file:
|
237
|
-
all_observations[obs_id] = processed_obs
|
238
|
-
else:
|
239
|
-
if obs_time >= start_dt: # Only process observations after start time
|
240
|
-
hours_diff = (obs_time - first_center).total_seconds() / 3600
|
241
|
-
bucket_index = floor(hours_diff / bucket_hours)
|
242
|
-
bucket_center = first_center + timedelta(hours=bucket_index * bucket_hours)
|
243
|
-
bucket_end = bucket_center + timedelta(hours=bucket_hours)
|
244
|
-
|
245
|
-
if obs_time <= bucket_end: # Include observations up to the end of the bucket
|
246
|
-
bucket_key = (bucket_center, mission_name)
|
247
|
-
if bucket_key not in buckets:
|
248
|
-
buckets[bucket_key] = {}
|
249
|
-
buckets[bucket_key][obs_id] = processed_obs
|
250
|
-
|
251
|
-
# Update pagination
|
252
|
-
next_timestamp = observations_page.get('next_since')
|
253
|
-
has_next_page = observations_page.get('has_next_page', False)
|
254
|
-
|
255
|
-
if not has_next_page or not next_timestamp or next_timestamp <= current_timestamp:
|
256
|
-
print("-----------------------------------------------------\n")
|
257
|
-
print("Fetching complete.")
|
258
|
-
print("\n-----------------------------------------------------")
|
259
|
-
break
|
260
|
-
|
261
|
-
current_timestamp = next_timestamp
|
262
|
-
|
263
|
-
except KeyboardInterrupt:
|
264
|
-
print("\n\n\U0001F6D1 Received interrupt, stopping...")
|
265
|
-
print("-----------------------------------------------------")
|
266
|
-
print("Requested data was not saved!\nRun again and do not interrupt the run to save data.")
|
267
|
-
print("-----------------------------------------------------")
|
268
|
-
exit(3)
|
269
|
-
except Exception as e:
|
270
|
-
print(f"Error occurred: {e}")
|
271
|
-
exit(1001)
|
272
|
-
|
273
|
-
# Save data to a single file
|
274
|
-
if save_to_file:
|
275
|
-
# Create directory path if it doesn't exist
|
276
|
-
directory = os.path.dirname(save_to_file)
|
277
|
-
if directory and not os.path.isdir(directory):
|
278
|
-
os.makedirs(directory, exist_ok=True)
|
279
|
-
filtered_observations = {obs_id: obs for obs_id, obs in all_observations.items()
|
280
|
-
if float(obs['timestamp']) >= start_time}
|
281
|
-
# Sort by timestamp
|
282
|
-
sorted_observations = dict(sorted(filtered_observations.items(),
|
283
|
-
key=lambda x: float(x[1]['timestamp'])))
|
284
|
-
|
285
|
-
print(f"Saving {len(sorted_observations)} {'observation' if len(sorted_observations) == 1 else 'observations'} to {save_to_file}")
|
286
|
-
print("This may take a while...")
|
287
|
-
print("-----------------------------------------------------\n")
|
288
|
-
|
289
|
-
if save_to_file.endswith('.nc'):
|
290
|
-
first_obs_timestamp = float(next(iter(sorted_observations.values()))['timestamp'])
|
291
|
-
convert_to_netcdf(sorted_observations, first_obs_timestamp, save_to_file)
|
292
|
-
elif save_to_file.endswith('.json'):
|
293
|
-
with open(save_to_file, 'w', encoding='utf-8') as f:
|
294
|
-
json.dump(sorted_observations, f, indent=4)
|
295
|
-
|
296
|
-
elif save_to_file.endswith('.csv'):
|
297
|
-
with open(save_to_file, mode='w', newline='') as file:
|
298
|
-
writer = csv.DictWriter(file, fieldnames=headers)
|
299
|
-
writer.writeheader()
|
300
|
-
writer.writerows(sorted_observations.values())
|
301
|
-
|
302
|
-
elif save_to_file.endswith('.little_r'):
|
303
|
-
little_r_records = format_little_r(list(sorted_observations.items()))
|
304
|
-
with open(save_to_file, 'w') as file:
|
305
|
-
file.write('\n'.join(little_r_records))
|
306
|
-
|
307
|
-
print(f"Saved {len(sorted_observations)} {'observation' if len(sorted_observations) == 1 else 'observations'} to {save_to_file}")
|
308
|
-
|
309
|
-
# Save data to multiple file
|
310
|
-
elif output_format:
|
311
|
-
# Create output directory if specified
|
312
|
-
if output_dir:
|
313
|
-
os.makedirs(output_dir, exist_ok=True)
|
314
|
-
print(f"Files will be saved to {output_dir}")
|
315
|
-
else:
|
316
|
-
print(f"Files will be saved to {os.getcwd()}")
|
317
|
-
print(f"Processing {fetced_so_far} {'observation' if fetced_so_far == 1 else 'observations'} and save them over multiple files.")
|
318
|
-
print("This may take a while...")
|
319
|
-
print("-----------------------------------------------------\n")
|
320
|
-
# Track statistics per mission
|
321
|
-
mission_stats = {} # {mission_name: {'files': 0, 'observations': 0}}
|
322
|
-
total_observations_written = 0
|
323
|
-
|
324
|
-
# Save bucketed data
|
325
|
-
for (bucket_center, mission_name), observations in buckets.items():
|
326
|
-
if observations:
|
327
|
-
# Format hour to be the actual bucket center
|
328
|
-
bucket_hour = int((bucket_center.hour + bucket_hours / 2) % 24)
|
329
|
-
|
330
|
-
# Generate file name based on output format
|
331
|
-
file_name_format = {
|
332
|
-
'csv': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv",
|
333
|
-
'json': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json",
|
334
|
-
'netcdf': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.nc",
|
335
|
-
'little_r': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r"
|
336
|
-
}
|
337
|
-
file_name = file_name_format[output_format] % (
|
338
|
-
bucket_center.year, bucket_center.month, bucket_center.day,
|
339
|
-
bucket_hour, bucket_hours)
|
340
|
-
|
341
|
-
output_file = os.path.join(output_dir or '.', file_name)
|
342
|
-
|
343
|
-
# Sort observations by timestamp within each bucket
|
344
|
-
sorted_obs = sorted(observations.values(), key=lambda x: int(x['timestamp']))
|
345
|
-
|
346
|
-
if output_format == 'netcdf':
|
347
|
-
convert_to_netcdf(sorted_obs, bucket_center.timestamp(), output_file)
|
348
|
-
|
349
|
-
elif output_format == 'csv':
|
350
|
-
with open(output_file, mode='w', newline='') as file:
|
351
|
-
writer = csv.DictWriter(file, fieldnames=headers)
|
352
|
-
writer.writeheader()
|
353
|
-
writer.writerows(sorted_obs)
|
354
|
-
|
355
|
-
elif output_format == 'json':
|
356
|
-
sorted_obs_dict = {k: v for k, v in sorted(observations.items(), key=lambda x: int(x[1]['timestamp']))}
|
357
|
-
with open(output_file, 'w', encoding='utf-8') as file:
|
358
|
-
json.dump(sorted_obs_dict, file, indent=4)
|
359
|
-
|
360
|
-
elif output_format == 'little_r':
|
361
|
-
little_r_records = format_little_r(sorted_obs)
|
362
|
-
with open(output_file, 'w') as file:
|
363
|
-
file.write('\n'.join(little_r_records))
|
364
|
-
total_observations_written += len(observations)
|
365
|
-
|
366
|
-
# Update statistics
|
367
|
-
if mission_name not in mission_stats:
|
368
|
-
mission_stats[mission_name] = {'files': 0, 'observations': 0}
|
369
|
-
mission_stats[mission_name]['files'] += 1
|
370
|
-
mission_stats[mission_name]['observations'] += len(observations)
|
371
|
-
# Print total observations written
|
372
|
-
print(f"Saved {total_observations_written} {'observation.' if total_observations_written == 1 else 'observations.'}")
|
373
|
-
print("-----------------------------------------------------")
|
374
|
-
|
375
|
-
# Print summary for each mission
|
376
|
-
for mission_name, stats in mission_stats.items():
|
377
|
-
print(f"Mission {mission_name}: Saved {stats['observations']} {'observation' if stats['observations'] == 1 else 'observations'} across {stats['files']} {'file' if stats['files'] == 1 else 'files'}")
|
378
|
-
|
379
|
-
print("-----------------------------------------------------")
|
380
|
-
print("All observations have been processed and saved.")
|
381
|
-
|
382
|
-
def poll_observations(start_time, include_ids=None, include_updated_at=None, mission_id=None, min_latitude=None, max_latitude=None, min_longitude=None, max_longitude=None, interval=60, bucket_hours=6.0, output_format=None, output_dir=None, callback=None):
|
383
|
-
"""
|
384
|
-
Continuously polls for observations and saves to files in specified format.
|
385
|
-
Will run indefinitely until interrupted.
|
386
|
-
|
387
|
-
Args:
|
388
|
-
start_time (str): Starting time in YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM or ISO format
|
389
|
-
include_ids (bool): Include observation IDs in response.
|
390
|
-
include_updated_at (bool): Include update timestamps in response.
|
391
|
-
mission_id (str): Filter observations by mission ID.
|
392
|
-
min_latitude (float): Minimum latitude boundary.
|
393
|
-
max_latitude (float): Maximum latitude boundary.
|
394
|
-
min_longitude (float): Minimum longitude boundary.
|
395
|
-
max_longitude (float): Maximum longitude boundary.
|
396
|
-
interval (int): Polling interval in seconds when no data is received (default: 60)
|
397
|
-
bucket_hours (float): Size of time buckets in hours (default: 6.0)
|
398
|
-
output_format (str): Format for bucket files ('json', 'csv', 'little_r', 'netcdf')
|
399
|
-
output_dir (str): Directory for bucket files (default: current directory)
|
400
|
-
callback (callable): Optional callback for data processing
|
401
|
-
"""
|
402
|
-
# Print warning about infinite loop
|
403
|
-
print(" ___________________________________________________________________")
|
404
|
-
print("| WARNING \U000026A0\U0000FE0F |")
|
405
|
-
print("| You are entering an endless loop. |")
|
406
|
-
print("| |")
|
407
|
-
print("| Press Ctrl + C anytime to exit. |")
|
408
|
-
print("|___________________________________________________________________|\n\n")
|
409
|
-
time.sleep(4)
|
410
|
-
|
411
|
-
start_time = to_unix_timestamp(start_time)
|
412
|
-
|
413
|
-
if output_format and output_format not in ['json', 'csv', 'little_r', 'netcdf']:
|
414
|
-
print("Please use one of the following formats:")
|
415
|
-
print(" - json\n - csv\n - little_r\n - netcdf")
|
416
|
-
return
|
417
|
-
|
418
|
-
if output_dir:
|
419
|
-
os.makedirs(output_dir, exist_ok=True)
|
420
|
-
print(f"\U0001F4C1 Files will be saved to {output_dir}")
|
421
|
-
else:
|
422
|
-
print(f"\U0001F4C1 Files will be saved to {os.getcwd()}")
|
423
|
-
|
424
|
-
# Convert start_time to datetime
|
425
|
-
start_dt = datetime.fromtimestamp(start_time, tz=timezone.utc)
|
426
|
-
|
427
|
-
# Calculate first center time that's after start_time
|
428
|
-
hours_since_day_start = start_dt.hour + start_dt.minute / 60
|
429
|
-
bucket_number = hours_since_day_start // bucket_hours
|
430
|
-
first_center = start_dt.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(hours=(bucket_number + 1) * bucket_hours)
|
431
|
-
|
432
|
-
headers = [
|
433
|
-
"timestamp", "id", "time", "latitude", "longitude", "altitude", "humidity",
|
434
|
-
"mission_name", "pressure", "specific_humidity", "speed_u", "speed_v", "temperature"
|
435
|
-
]
|
436
|
-
|
437
|
-
buckets = {} # {(bucket_center, mission_name): {'data': {}, 'last_write': timestamp, 'data_hash': str}}
|
438
|
-
current_timestamp = start_time
|
439
|
-
fetched_so_far = 0
|
440
|
-
mission_stats = {}
|
441
|
-
|
442
|
-
print(f"Starting continuous observations polling from {datetime.fromtimestamp(start_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC")
|
443
|
-
print(f"Polling interval: {interval} seconds")
|
444
|
-
print("-----------------------------------------------------")
|
445
|
-
|
446
|
-
try:
|
447
|
-
while True:
|
448
|
-
observations_page = get_observations_page(
|
449
|
-
since=current_timestamp,
|
450
|
-
min_latitude=min_latitude,
|
451
|
-
max_latitude=max_latitude,
|
452
|
-
min_longitude=min_longitude,
|
453
|
-
max_longitude=max_longitude,
|
454
|
-
include_updated_at=include_updated_at,
|
455
|
-
mission_id=mission_id,
|
456
|
-
include_ids=include_ids,
|
457
|
-
include_mission_name=True
|
458
|
-
)
|
459
|
-
|
460
|
-
if observations_page is None:
|
461
|
-
print(f"\nNull response from API. Retrying in {interval} seconds ...")
|
462
|
-
time.sleep(interval)
|
463
|
-
continue
|
464
|
-
|
465
|
-
observations = observations_page.get('observations', [])
|
466
|
-
|
467
|
-
# Invoke the callback with fetched super observations
|
468
|
-
if callback:
|
469
|
-
print("--------\nCallback\n--------")
|
470
|
-
callback(observations)
|
471
|
-
|
472
|
-
if observations:
|
473
|
-
fetched_so_far += len(observations)
|
474
|
-
print_current_timestamp = current_timestamp if current_timestamp < 1e11 else current_timestamp / 1e9
|
475
|
-
print(f"Fetched {fetched_so_far} observations")
|
476
|
-
print(f"Current time: {datetime.fromtimestamp(print_current_timestamp, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
|
477
|
-
print("-----------------------------------------------------")
|
478
|
-
|
479
|
-
for obs in observations:
|
480
|
-
if 'mission_name' not in obs:
|
481
|
-
continue
|
482
|
-
|
483
|
-
timestamp = obs.get('timestamp')
|
484
|
-
if not timestamp:
|
485
|
-
continue
|
486
|
-
|
487
|
-
try:
|
488
|
-
obs_time = datetime.fromtimestamp(timestamp, tz=timezone.utc)
|
489
|
-
except (OSError, ValueError, TypeError, OverflowError):
|
490
|
-
continue
|
491
|
-
|
492
|
-
mission_name = obs.get('mission_name', 'Unknown')
|
493
|
-
obs['time'] = obs_time.replace(tzinfo=timezone.utc).isoformat()
|
494
|
-
|
495
|
-
processed_obs = {
|
496
|
-
header: obs.get(header) if obs.get(header) not in [None, '', ' '] else 'None'
|
497
|
-
for header in headers
|
498
|
-
}
|
499
|
-
|
500
|
-
obs_id = f"{timestamp}_{mission_name}"
|
501
|
-
|
502
|
-
if obs_time >= start_dt:
|
503
|
-
hours_diff = (obs_time - first_center).total_seconds() / 3600
|
504
|
-
bucket_index = floor(hours_diff / bucket_hours)
|
505
|
-
bucket_center = first_center + timedelta(hours=bucket_index * bucket_hours)
|
506
|
-
bucket_end = bucket_center + timedelta(hours=bucket_hours)
|
507
|
-
|
508
|
-
if obs_time <= bucket_end:
|
509
|
-
bucket_key = (bucket_center, mission_name)
|
510
|
-
|
511
|
-
# Initialize bucket if needed
|
512
|
-
if bucket_key not in buckets:
|
513
|
-
buckets[bucket_key] = {
|
514
|
-
'data': {},
|
515
|
-
'last_write': 0,
|
516
|
-
'data_hash': ''
|
517
|
-
}
|
518
|
-
|
519
|
-
# Update bucket data
|
520
|
-
buckets[bucket_key]['data'][obs_id] = processed_obs
|
521
|
-
|
522
|
-
# Track statistics
|
523
|
-
if mission_name not in mission_stats:
|
524
|
-
mission_stats[mission_name] = {'files': set(), 'observations': 0}
|
525
|
-
mission_stats[mission_name]['observations'] += 1
|
526
|
-
|
527
|
-
# Calculate new data hash
|
528
|
-
sorted_data = sorted(buckets[bucket_key]['data'].items(), key=lambda x: int(x[1]['timestamp']))
|
529
|
-
data_hash = hashlib.md5(str(sorted_data).encode()).hexdigest()
|
530
|
-
|
531
|
-
# Check if we should write the bucket
|
532
|
-
current_time = datetime.now(timezone.utc)
|
533
|
-
time_since_last_write = current_time.timestamp() - buckets[bucket_key]['last_write']
|
534
|
-
data_changed = data_hash != buckets[bucket_key]['data_hash']
|
535
|
-
|
536
|
-
# Write if it's been more than interval seconds since last write OR if data has changed
|
537
|
-
if (time_since_last_write >= interval or data_changed) and output_format:
|
538
|
-
bucket_hour = int((bucket_center.hour + bucket_hours/2) % 24)
|
539
|
-
|
540
|
-
file_name_format = {
|
541
|
-
'csv': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv",
|
542
|
-
'json': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json",
|
543
|
-
'netcdf': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.nc",
|
544
|
-
'little_r': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r"
|
545
|
-
}
|
546
|
-
|
547
|
-
file_name = file_name_format[output_format] % (
|
548
|
-
bucket_center.year, bucket_center.month, bucket_center.day,
|
549
|
-
bucket_hour, bucket_hours)
|
550
|
-
|
551
|
-
output_file = os.path.join(output_dir or '.', file_name)
|
552
|
-
sorted_obs = [obs for _, obs in sorted_data]
|
553
|
-
|
554
|
-
# Write the file based on format
|
555
|
-
try:
|
556
|
-
if output_format == 'netcdf':
|
557
|
-
convert_to_netcdf(sorted_obs, bucket_center.timestamp(), output_file)
|
558
|
-
elif output_format == 'csv':
|
559
|
-
with open(output_file, mode='w', newline='') as file:
|
560
|
-
writer = csv.DictWriter(file, fieldnames=headers)
|
561
|
-
writer.writeheader()
|
562
|
-
writer.writerows(sorted_obs)
|
563
|
-
elif output_format == 'json':
|
564
|
-
sorted_obs_dict = {k: v for k, v in sorted_data}
|
565
|
-
with open(output_file, 'w', encoding='utf-8') as file:
|
566
|
-
json.dump(sorted_obs_dict, file, indent=4)
|
567
|
-
elif output_format == 'little_r':
|
568
|
-
little_r_records = format_little_r(sorted_obs)
|
569
|
-
with open(output_file, 'w') as file:
|
570
|
-
file.write('\n'.join(little_r_records))
|
571
|
-
|
572
|
-
buckets[bucket_key]['last_write'] = current_time.timestamp()
|
573
|
-
buckets[bucket_key]['data_hash'] = data_hash
|
574
|
-
mission_stats[mission_name]['files'].add(output_file)
|
575
|
-
except Exception as e:
|
576
|
-
print(f"Error writing bucket file {file_name}: {str(e)}")
|
577
|
-
|
578
|
-
# Clean up old buckets
|
579
|
-
current_time = datetime.now(timezone.utc)
|
580
|
-
buckets = {
|
581
|
-
k: v for k, v in buckets.items()
|
582
|
-
if current_time - k[0] <= timedelta(hours=bucket_hours * 2) # Keep slightly longer for potential updates
|
583
|
-
}
|
584
|
-
|
585
|
-
next_timestamp = observations_page.get('next_since')
|
586
|
-
has_next_page = observations_page.get('has_next_page', False)
|
587
|
-
|
588
|
-
if next_timestamp and next_timestamp > current_timestamp:
|
589
|
-
current_timestamp = next_timestamp
|
590
|
-
elif not has_next_page:
|
591
|
-
print("-----------------------------------------------------")
|
592
|
-
print(f"\U0001F503 Latest super observations data have been processed.\nRetrying getting new observations data in {interval} seconds...")
|
593
|
-
print("-----------------------------------------------------")
|
594
|
-
time.sleep(interval)
|
595
|
-
continue
|
596
|
-
|
597
|
-
if not observations:
|
598
|
-
print(f"\U0001F503 No new super observations data available.\n Retrying getting new observations data in {interval} seconds...")
|
599
|
-
print("-----------------------------------------------------")
|
600
|
-
time.sleep(interval)
|
601
|
-
|
602
|
-
except KeyboardInterrupt:
|
603
|
-
print("\n\n\U0001F6D1 Received interrupt, stopping...")
|
604
|
-
print("-----------------------------------------------------")
|
605
|
-
for mission_name, stats in mission_stats.items():
|
606
|
-
print(f"Mission {mission_name}: {stats['observations']} observations across {len(stats['files'])} files")
|
607
|
-
except Exception as e:
|
608
|
-
print(f"Error occurred: {str(e)}")
|
609
|
-
exit(1001)
|
610
|
-
finally:
|
611
|
-
print("-----------------------------------------------------")
|
612
|
-
print("Finished processing observations.")
|
613
|
-
|
614
|
-
# Super Observations
|
615
|
-
# ------------
|
616
|
-
def get_super_observations_page(since=None, min_time=None, max_time=None, include_ids=None, include_mission_name=None, include_updated_at=None, mission_id=None, save_to_file=None):
|
79
|
+
def get_super_observations_page(since=None, min_time=None, max_time=None, include_ids=None, include_mission_name=None, include_updated_at=None, mission_id=None, output_file=None):
|
617
80
|
"""
|
618
81
|
Retrieves super observations page based on specified filters.
|
619
82
|
|
@@ -625,7 +88,7 @@ def get_super_observations_page(since=None, min_time=None, max_time=None, includ
|
|
625
88
|
include_mission_name (bool): Include mission names in response.
|
626
89
|
include_updated_at (bool): Include update timestamps in response.
|
627
90
|
mission_id (str): Filter observations by mission ID.
|
628
|
-
|
91
|
+
output_file (str): Optional path to save the response data.
|
629
92
|
If provided, saves the data in CSV format.
|
630
93
|
|
631
94
|
Returns:
|
@@ -633,7 +96,7 @@ def get_super_observations_page(since=None, min_time=None, max_time=None, includ
|
|
633
96
|
"""
|
634
97
|
|
635
98
|
url = f"{DATA_API_BASE_URL}/super_observations.json"
|
636
|
-
|
99
|
+
|
637
100
|
params = {}
|
638
101
|
if since:
|
639
102
|
params["since"] = to_unix_timestamp(since)
|
@@ -649,316 +112,358 @@ def get_super_observations_page(since=None, min_time=None, max_time=None, includ
|
|
649
112
|
params["include_mission_name"] = True
|
650
113
|
if include_updated_at:
|
651
114
|
params["include_updated_at"] = True
|
652
|
-
|
115
|
+
|
653
116
|
params = {k: v for k, v in params.items() if v is not None}
|
654
|
-
|
117
|
+
|
655
118
|
response = make_api_request(url, params=params)
|
656
|
-
if
|
657
|
-
|
658
|
-
|
119
|
+
if output_file:
|
120
|
+
save_arbitrary_response(output_file, response, csv_data_key='observations')
|
121
|
+
|
659
122
|
return response
|
660
123
|
|
661
|
-
|
124
|
+
|
125
|
+
def save_observations_batch(observations, output_file, output_format, output_dir, start_time=None, end_time=None, bucket_hours=6.0, csv_headers=None, custom_save=None, prevent_overwrites=False):
|
126
|
+
filtered_observations = observations
|
127
|
+
if start_time is not None:
|
128
|
+
filtered_observations = [obs for obs in observations if float(obs['timestamp']) >= start_time]
|
129
|
+
|
130
|
+
if end_time is not None:
|
131
|
+
filtered_observations = [obs for obs in observations if float(obs['timestamp']) <= end_time]
|
132
|
+
|
133
|
+
# Sort by timestamp
|
134
|
+
sorted_observations = sorted(filtered_observations, key=lambda x: float(x['timestamp']))
|
135
|
+
|
136
|
+
if output_file:
|
137
|
+
if custom_save is not None:
|
138
|
+
custom_save(sorted_observations, output_file)
|
139
|
+
else:
|
140
|
+
save_observations_to_file(sorted_observations, output_file, csv_headers=csv_headers, prevent_overwrites=prevent_overwrites)
|
141
|
+
else:
|
142
|
+
save_observations_batch_in_buckets(sorted_observations, output_format, output_dir, bucket_hours=bucket_hours, csv_headers=csv_headers, custom_save=custom_save, prevent_overwrites=prevent_overwrites)
|
143
|
+
|
144
|
+
|
145
|
+
def save_observations_to_file(sorted_observations, output_file, csv_headers=None, prevent_overwrites=False):
|
146
|
+
if len(sorted_observations) == 0:
|
147
|
+
print(f"Skipping empty file {output_file}")
|
148
|
+
return
|
149
|
+
|
150
|
+
directory = os.path.dirname(output_file)
|
151
|
+
if directory and not os.path.isdir(directory):
|
152
|
+
os.makedirs(directory, exist_ok=True)
|
153
|
+
|
154
|
+
if prevent_overwrites and os.path.exists(output_file):
|
155
|
+
# save to outputfile.0.ext, outputfile.1.ext, etc.
|
156
|
+
base, ext = os.path.splitext(output_file)
|
157
|
+
if ext[0] == '.':
|
158
|
+
ext = ext[1:]
|
159
|
+
|
160
|
+
# if ext is already a .0.ext, we need to split it again
|
161
|
+
i = 1
|
162
|
+
if '.' in ext and ext.split('.')[0].isdigit():
|
163
|
+
i = int(ext.split('.')[0]) + 1
|
164
|
+
ext = '.'.join(ext.split('.')[1:])
|
165
|
+
|
166
|
+
while os.path.exists(f"{base}.{i}.{ext}"):
|
167
|
+
i += 1
|
168
|
+
|
169
|
+
output_file = f"{base}.{i}.{ext}"
|
170
|
+
|
171
|
+
print(f"Saving {len(sorted_observations)} {'observation' if len(sorted_observations) == 1 else 'observations'} to {output_file}")
|
172
|
+
if len(sorted_observations) > 10_000:
|
173
|
+
print("This may take a while...")
|
174
|
+
print("-----------------------------------------------------\n")
|
175
|
+
|
176
|
+
if output_file.endswith('.nc'):
|
177
|
+
first_obs_timestamp = float(sorted_observations[0]['timestamp'])
|
178
|
+
convert_to_netcdf(sorted_observations, first_obs_timestamp, output_file)
|
179
|
+
|
180
|
+
elif output_file.endswith('.json'):
|
181
|
+
with open(output_file, 'w', encoding='utf-8') as f:
|
182
|
+
json.dump(sorted_observations, f, indent=4)
|
183
|
+
|
184
|
+
elif output_file.endswith('.csv'):
|
185
|
+
with open(output_file, mode='w', newline='') as file:
|
186
|
+
writer = csv.DictWriter(file, fieldnames=csv_headers)
|
187
|
+
writer.writeheader()
|
188
|
+
writer.writerows(sorted_observations)
|
189
|
+
|
190
|
+
elif output_file.endswith('.little_r'):
|
191
|
+
little_r_records = format_little_r(sorted_observations)
|
192
|
+
with open(output_file, 'w') as file:
|
193
|
+
file.write('\n'.join(little_r_records))
|
194
|
+
|
195
|
+
print(f"Saved {len(sorted_observations)} {'observation' if len(sorted_observations) == 1 else 'observations'} to {output_file}")
|
196
|
+
|
197
|
+
|
198
|
+
def save_observations_batch_in_buckets(sorted_observations, output_format, output_dir, bucket_hours=6.0, csv_headers=None, custom_save=None, prevent_overwrites=False):
|
199
|
+
if output_dir:
|
200
|
+
os.makedirs(output_dir, exist_ok=True)
|
201
|
+
print(f"Files will be saved to {output_dir}")
|
202
|
+
else:
|
203
|
+
print(f"Files will be saved to {os.getcwd()}")
|
204
|
+
|
205
|
+
|
206
|
+
by_mission = {}
|
207
|
+
mission_names = {}
|
208
|
+
for observation in sorted_observations:
|
209
|
+
mission_id = observation['mission_id']
|
210
|
+
if mission_id not in by_mission:
|
211
|
+
by_mission[mission_id] = []
|
212
|
+
mission_names[mission_id] = observation.get('mission_name', mission_id)
|
213
|
+
|
214
|
+
by_mission[mission_id].append(observation)
|
215
|
+
|
216
|
+
for mission_id, accumulated_observations in by_mission.items():
|
217
|
+
mission_name = mission_names[mission_id]
|
218
|
+
start_index = 0
|
219
|
+
earliest_time = accumulated_observations[0]['timestamp']
|
220
|
+
curtime = earliest_time - earliest_time % (bucket_hours * 60 * 60)
|
221
|
+
|
222
|
+
for i in range(len(accumulated_observations)):
|
223
|
+
segment = None
|
224
|
+
if accumulated_observations[i]['timestamp'] - curtime > bucket_hours * 60 * 60:
|
225
|
+
segment = accumulated_observations[start_index:i]
|
226
|
+
|
227
|
+
if i == len(accumulated_observations) - 1:
|
228
|
+
segment = accumulated_observations[start_index:]
|
229
|
+
|
230
|
+
if segment is None:
|
231
|
+
continue
|
232
|
+
|
233
|
+
bucket_start = datetime.fromtimestamp(curtime, tz=timezone.utc)
|
234
|
+
|
235
|
+
file_name = f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh" % (
|
236
|
+
bucket_start.year, bucket_start.month, bucket_start.day,
|
237
|
+
bucket_start.hour, bucket_hours)
|
238
|
+
|
239
|
+
extension = f".{output_format}"
|
240
|
+
if output_format == 'netcdf':
|
241
|
+
extension = '.nc'
|
242
|
+
|
243
|
+
output_file = os.path.join(output_dir or '.', file_name + extension)
|
244
|
+
if custom_save is not None:
|
245
|
+
custom_save(segment, output_file)
|
246
|
+
else:
|
247
|
+
save_observations_to_file(segment, output_file, csv_headers=csv_headers, prevent_overwrites=prevent_overwrites)
|
248
|
+
|
249
|
+
start_index = i
|
250
|
+
curtime += timedelta(hours=bucket_hours).seconds
|
251
|
+
|
252
|
+
|
253
|
+
def get_observations_core(api_args, csv_headers, get_page, start_time=None, end_time=None, output_file=None, bucket_hours=6.0, output_format=None, output_dir=None, callback=None, custom_save=None, exit_at_end=True):
|
662
254
|
"""
|
663
|
-
Fetches
|
255
|
+
Fetches observations or superobservations between a start time and an optional end time and saves to files in specified format.
|
664
256
|
Files are broken up into time buckets, with filenames containing the time at the mid-point of the bucket.
|
665
257
|
For example, for 6-hour buckets centered on 00 UTC, the start time should be 21 UTC of the previous day.
|
666
258
|
|
667
259
|
Args:
|
260
|
+
api_args (dict): Arguments to pass to the API endpoint.
|
261
|
+
csv_headers (list): Headers for CSV files.
|
262
|
+
get_page (callable): Function to fetch a page of observations.
|
668
263
|
start_time (str): A date string, supporting formats YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM and ISO strings,
|
669
264
|
representing the starting time of fetching data.
|
670
265
|
end_time (str): Optional. A date string, supporting formats YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM and ISO strings,
|
671
266
|
representing the end time of fetching data. If not provided, current time is used as end time.
|
672
|
-
|
673
|
-
|
267
|
+
|
268
|
+
|
269
|
+
|
270
|
+
output_file (str): Saves all data to a single file instead of bucketing.
|
674
271
|
Supported formats are '.csv', '.json', '.little_r' and '.nc'
|
675
272
|
bucket_hours (int): Optional. Size of time buckets in hours. Defaults to 6 hours.
|
676
273
|
output_format (str): Optional. Format to save data in separate files. Supported formats are 'json, 'csv', 'little_r' and 'netcdf'.
|
677
274
|
output_dir (str): Optional. Directory path where the separate files should be saved. If not provided, files will be saved in current directory.
|
678
275
|
callback (callable): Optional callback function that receives (super observations, metadata) before saving.
|
679
276
|
This allows custom processing or saving in custom formats.
|
277
|
+
custom_save (callable): Optional function to save observations in a custom format.
|
278
|
+
exit_at_end (bool): Whether to exit after fetching all observations or keep polling.
|
680
279
|
"""
|
280
|
+
if output_format and not custom_save:
|
281
|
+
verify_observations_output_format(output_format)
|
282
|
+
|
283
|
+
if output_file and not custom_save:
|
284
|
+
verify_observations_output_format(output_file.split('.')[-1])
|
681
285
|
|
682
|
-
|
286
|
+
# When we don't clear batches, we can safely overwrite the output files; this is nice
|
287
|
+
# However, it also holds everything in memory, so we should only do this when we're not going to run indefinitely
|
288
|
+
clear_batches = not exit_at_end
|
289
|
+
batch_size = 10_000
|
290
|
+
if not batch_size: # save less frequently
|
291
|
+
batch_size = 100_000
|
683
292
|
|
684
|
-
if
|
293
|
+
if start_time is not None:
|
294
|
+
start_time = to_unix_timestamp(start_time)
|
295
|
+
|
296
|
+
if end_time is not None:
|
685
297
|
end_time = to_unix_timestamp(end_time)
|
686
|
-
else:
|
687
|
-
end_time = int(datetime.now().timestamp())
|
688
|
-
|
689
|
-
# Supported formats for saving into separate files:
|
690
|
-
# - csv (default)
|
691
|
-
# - little_r
|
692
|
-
# - json
|
693
|
-
# - netcdf
|
694
|
-
if output_format and output_format not in ['json', 'csv', 'little_r', 'netcdf']:
|
695
|
-
print("Please use one of the following formats:")
|
696
|
-
print(" - json")
|
697
|
-
print(" - csv")
|
698
|
-
print(" - little_r")
|
699
|
-
print(" - netcdf")
|
700
|
-
return
|
701
298
|
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
299
|
+
def save_with_context(observations_batch):
|
300
|
+
save_observations_batch(
|
301
|
+
observations_batch,
|
302
|
+
output_file=output_file,
|
303
|
+
output_format=output_format,
|
304
|
+
output_dir=output_dir,
|
305
|
+
start_time=start_time,
|
306
|
+
end_time=end_time,
|
307
|
+
bucket_hours=bucket_hours,
|
308
|
+
csv_headers=csv_headers,
|
309
|
+
custom_save=custom_save,
|
310
|
+
prevent_overwrites=clear_batches
|
311
|
+
)
|
715
312
|
|
716
|
-
|
717
|
-
|
313
|
+
result = iterate_through_observations(get_page, api_args, callback=callback, batch_callback=save_with_context, exit_at_end=exit_at_end, clear_batches=clear_batches, batch_size=batch_size)
|
314
|
+
if isinstance(result, int):
|
315
|
+
print(f"Processed {result} observations")
|
718
316
|
|
719
|
-
|
720
|
-
hours_since_day_start = start_dt.hour + start_dt.minute / 60
|
721
|
-
bucket_number = hours_since_day_start // bucket_hours
|
722
|
-
first_center = start_dt.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(hours=(bucket_number + 1) * bucket_hours)
|
317
|
+
return result
|
723
318
|
|
724
319
|
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
320
|
+
def iterate_through_observations(get_page, args, callback=None, batch_callback=None, exit_at_end=True, batch_size=10_000, clear_batches=True):
|
321
|
+
"""
|
322
|
+
Repeatedly calls `get_page` with `args`
|
323
|
+
For each page fetched, it calls `callback` with the full response
|
324
|
+
Every `batch_size` observations fetched, it calls `batch_callback` with the batched observations (if provided)
|
325
|
+
Returns an array of all observations fetched if no batch_callback is provided
|
730
326
|
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
print(f"Starting polling super observations\nfrom {datetime.fromtimestamp(start_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC to {datetime.fromtimestamp(end_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC")
|
742
|
-
print("-----------------------------------------------------")
|
743
|
-
|
744
|
-
while has_next_page:
|
745
|
-
try:
|
746
|
-
# Fetch observations
|
747
|
-
observations_page = get_super_observations_page(
|
748
|
-
since=current_timestamp,
|
749
|
-
min_time=start_time,
|
750
|
-
max_time=end_time,
|
751
|
-
include_ids=True,
|
752
|
-
include_mission_name=True
|
753
|
-
)
|
754
|
-
|
755
|
-
if observations_page is None:
|
756
|
-
print("\n----------------------------------------------------------------------")
|
757
|
-
print(f"Received null response from API. Retrying in {interval} seconds ...")
|
758
|
-
print("----------------------------------------------------------------------")
|
759
|
-
time.sleep(interval)
|
760
|
-
continue
|
327
|
+
Args:
|
328
|
+
get_page (callable): Function to fetch a page of observations
|
329
|
+
args (dict): Arguments to pass to `get_page`
|
330
|
+
callback (callable): Function to call with each page of observations
|
331
|
+
batch_callback (callable): Function to call with a batch of observations
|
332
|
+
exit_at_end (bool): Whether to exit after fetching all observations or keep polling
|
333
|
+
batch_size (int): Number of observations to accumulate before calling `batch_callback`
|
334
|
+
clear_batches (bool): Whether to clear the batched observations after calling `batch_callback`
|
335
|
+
"""
|
761
336
|
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
print(f"Fetched {fetced_so_far} super observations")
|
766
|
-
print(f"Current time: {datetime.fromtimestamp(print_current_timestamp, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
|
767
|
-
print("-----------------------------------------------------")
|
768
|
-
|
769
|
-
# Invoke the callback with fetched super observations
|
770
|
-
if callback:
|
771
|
-
print("--------\nCallback\n--------")
|
772
|
-
callback(observations)
|
773
|
-
|
774
|
-
for obs in observations:
|
775
|
-
if 'mission_name' not in obs:
|
776
|
-
print("Warning: got an super observation without a mission name")
|
777
|
-
continue
|
778
|
-
|
779
|
-
timestamp = obs.get('timestamp')
|
780
|
-
if not timestamp:
|
781
|
-
continue
|
782
|
-
|
783
|
-
try:
|
784
|
-
obs_time = datetime.fromtimestamp(timestamp, tz=timezone.utc)
|
785
|
-
except (OSError, ValueError, TypeError, OverflowError):
|
786
|
-
continue
|
787
|
-
|
788
|
-
mission_name = obs.get('mission_name', 'Unknown')
|
789
|
-
obs['time'] = obs_time.replace(tzinfo=timezone.utc).isoformat()
|
790
|
-
|
791
|
-
processed_obs = {}
|
792
|
-
for header in headers:
|
793
|
-
value = obs.get(header)
|
794
|
-
if value is None or value == '' or (isinstance(value, str) and not value.strip()):
|
795
|
-
processed_obs[header] = 'None'
|
796
|
-
else:
|
797
|
-
processed_obs[header] = value
|
798
|
-
|
799
|
-
obs_id = f"{timestamp}_{mission_name}"
|
800
|
-
|
801
|
-
if save_to_file:
|
802
|
-
all_observations[obs_id] = processed_obs
|
803
|
-
else:
|
804
|
-
if obs_time >= start_dt: # Only process observations after start time
|
805
|
-
hours_diff = (obs_time - first_center).total_seconds() / 3600
|
806
|
-
bucket_index = floor(hours_diff / bucket_hours)
|
807
|
-
bucket_center = first_center + timedelta(hours=bucket_index * bucket_hours)
|
808
|
-
bucket_end = bucket_center + timedelta(hours=bucket_hours)
|
809
|
-
|
810
|
-
if obs_time <= bucket_end: # Include observations up to the end of the bucket
|
811
|
-
bucket_key = (bucket_center, mission_name)
|
812
|
-
if bucket_key not in buckets:
|
813
|
-
buckets[bucket_key] = {}
|
814
|
-
buckets[bucket_key][obs_id] = processed_obs
|
815
|
-
|
816
|
-
# Update pagination
|
817
|
-
next_timestamp = observations_page.get('next_since')
|
818
|
-
has_next_page = observations_page.get('has_next_page', False)
|
819
|
-
|
820
|
-
if not has_next_page or not next_timestamp or next_timestamp <= current_timestamp:
|
821
|
-
print("-----------------------------------------------------\n")
|
822
|
-
print("Fetching complete.")
|
823
|
-
print("\n-----------------------------------------------------")
|
824
|
-
break
|
337
|
+
batched_observations = []
|
338
|
+
since = args.get('since', 0)
|
339
|
+
processed_count = 0
|
825
340
|
|
826
|
-
|
827
|
-
|
828
|
-
|
829
|
-
|
830
|
-
|
831
|
-
|
832
|
-
|
833
|
-
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
|
838
|
-
|
839
|
-
|
840
|
-
|
841
|
-
|
842
|
-
|
843
|
-
|
844
|
-
|
845
|
-
|
846
|
-
# Sort by timestamp
|
847
|
-
sorted_observations = dict(sorted(filtered_observations.items(),
|
848
|
-
key=lambda x: float(x[1]['timestamp'])))
|
849
|
-
|
850
|
-
print(f"Saving {len(sorted_observations)} super {'observation' if len(sorted_observations) == 1 else 'observations'} to {save_to_file}")
|
851
|
-
print("This may take a while...")
|
852
|
-
print("-----------------------------------------------------\n")
|
853
|
-
|
854
|
-
if save_to_file.endswith('.nc'):
|
855
|
-
first_obs_timestamp = float(next(iter(sorted_observations.values()))['timestamp'])
|
856
|
-
convert_to_netcdf(sorted_observations, first_obs_timestamp, save_to_file)
|
857
|
-
|
858
|
-
elif save_to_file.endswith('.json'):
|
859
|
-
with open(save_to_file, 'w', encoding='utf-8') as f:
|
860
|
-
json.dump(sorted_observations, f, indent=4)
|
861
|
-
|
862
|
-
elif save_to_file.endswith('.csv'):
|
863
|
-
with open(save_to_file, mode='w', newline='') as file:
|
864
|
-
writer = csv.DictWriter(file, fieldnames=headers)
|
865
|
-
writer.writeheader()
|
866
|
-
writer.writerows(sorted_observations.values())
|
867
|
-
|
868
|
-
elif save_to_file.endswith('.little_r'):
|
869
|
-
little_r_records = format_little_r(list(sorted_observations.items()))
|
870
|
-
with open(save_to_file, 'w') as file:
|
871
|
-
file.write('\n'.join(little_r_records))
|
872
|
-
|
873
|
-
print(f"Saved {len(sorted_observations)} super {'observation' if len(sorted_observations) == 1 else 'observations'} to {save_to_file}")
|
874
|
-
|
875
|
-
# Save data to multiple file
|
876
|
-
elif output_format:
|
877
|
-
# Create output directory if specified
|
878
|
-
if output_dir:
|
879
|
-
os.makedirs(output_dir, exist_ok=True)
|
880
|
-
print(f"Files will be saved to {output_dir}")
|
341
|
+
if args.get('min_time') is not None:
|
342
|
+
args['min_time'] = to_unix_timestamp(args['min_time'])
|
343
|
+
if since == 0:
|
344
|
+
since = args['min_time']
|
345
|
+
|
346
|
+
if args.get('max_time') is not None:
|
347
|
+
args['max_time'] = to_unix_timestamp(args['max_time'])
|
348
|
+
|
349
|
+
while True:
|
350
|
+
args = {**args, 'since': since}
|
351
|
+
response = get_page(**args)
|
352
|
+
if not response:
|
353
|
+
print("Received null response from API. Retrying in 10 seconds...")
|
354
|
+
time.sleep(10)
|
355
|
+
continue
|
356
|
+
|
357
|
+
observations = response.get('observations', [])
|
358
|
+
|
359
|
+
if callback:
|
360
|
+
callback(response)
|
881
361
|
else:
|
882
|
-
|
362
|
+
since_timestamp = since
|
363
|
+
if since_timestamp > 4_000_000_000: # in nanoseconds rather than seconds
|
364
|
+
since_timestamp /= 1_000_000_000
|
365
|
+
since_dt = datetime.fromtimestamp(since_timestamp, timezone.utc)
|
366
|
+
print(f"Fetched page with {len(observations)} observation(s) updated {since_dt} or later")
|
883
367
|
|
884
|
-
|
885
|
-
|
886
|
-
|
887
|
-
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
|
893
|
-
|
894
|
-
|
895
|
-
|
896
|
-
|
897
|
-
|
898
|
-
|
899
|
-
|
900
|
-
|
901
|
-
|
902
|
-
|
903
|
-
|
904
|
-
|
905
|
-
|
906
|
-
|
907
|
-
|
908
|
-
|
909
|
-
|
910
|
-
|
911
|
-
|
912
|
-
|
913
|
-
|
914
|
-
|
915
|
-
|
916
|
-
|
917
|
-
|
918
|
-
|
919
|
-
|
920
|
-
|
921
|
-
|
922
|
-
|
923
|
-
|
924
|
-
|
925
|
-
|
926
|
-
|
927
|
-
elif output_format == 'little_r':
|
928
|
-
little_r_records = format_little_r(sorted_obs)
|
929
|
-
with open(output_file, 'w') as file:
|
930
|
-
file.write('\n'.join(little_r_records))
|
931
|
-
total_observations_written += len(observations)
|
932
|
-
|
933
|
-
# Update statistics
|
934
|
-
if mission_name not in mission_stats:
|
935
|
-
mission_stats[mission_name] = {'files': 0, 'observations': 0}
|
936
|
-
mission_stats[mission_name]['files'] += 1
|
937
|
-
mission_stats[mission_name]['observations'] += len(observations)
|
938
|
-
# Print total super observations written
|
939
|
-
print(f"Total super {'observation' if total_observations_written == 1 else 'observations'} written: {total_observations_written}")
|
940
|
-
print("-----------------------------------------------------")
|
941
|
-
|
942
|
-
# Print summary for each mission
|
943
|
-
for mission_name, stats in mission_stats.items():
|
944
|
-
print(f"Mission {mission_name}: Saved {stats['observations']} super {'observation' if stats['observations'] == 1 else 'observations'} across {stats['files']} {'file' if stats['files'] == 1 else 'files'}")
|
945
|
-
|
946
|
-
print("-----------------------------------------------------")
|
947
|
-
print("All super observations have been processed and saved.")
|
948
|
-
|
949
|
-
def poll_super_observations(start_time, interval=60, bucket_hours=6.0, output_format=None, output_dir=None, callback=None):
|
368
|
+
batched_observations.extend(observations)
|
369
|
+
|
370
|
+
processed_count += len(observations)
|
371
|
+
|
372
|
+
if batch_callback and (len(batched_observations) >= batch_size or not response['has_next_page']):
|
373
|
+
batch_callback(batched_observations)
|
374
|
+
if clear_batches:
|
375
|
+
batched_observations = []
|
376
|
+
|
377
|
+
if not response['has_next_page']:
|
378
|
+
print("No more data available.")
|
379
|
+
if exit_at_end:
|
380
|
+
break
|
381
|
+
|
382
|
+
time.sleep(60)
|
383
|
+
continue
|
384
|
+
|
385
|
+
since = response['next_since']
|
386
|
+
|
387
|
+
if batch_callback and len(batched_observations) > 0:
|
388
|
+
batch_callback(batched_observations)
|
389
|
+
if clear_batches:
|
390
|
+
batched_observations = []
|
391
|
+
|
392
|
+
if batch_callback:
|
393
|
+
return processed_count
|
394
|
+
else:
|
395
|
+
return batched_observations
|
396
|
+
|
397
|
+
|
398
|
+
def verify_observations_output_format(output_format):
|
399
|
+
valid_formats = ['json', 'csv', 'little_r', 'netcdf', 'nc']
|
400
|
+
if output_format in valid_formats:
|
401
|
+
return True
|
402
|
+
|
403
|
+
print("Please use one of the following formats:")
|
404
|
+
for fmt in valid_formats:
|
405
|
+
print(f" - {fmt}")
|
406
|
+
|
407
|
+
exit(1)
|
408
|
+
|
409
|
+
def get_observations(start_time, end_time=None, include_updated_at=None, mission_id=None, min_latitude=None, max_latitude=None, min_longitude=None, max_longitude=None, output_file=None, bucket_hours=6.0, output_format=None, output_dir=None, callback=None, custom_save=None, exit_at_end=True):
|
950
410
|
"""
|
951
|
-
|
952
|
-
|
411
|
+
Fetches observations between a start time and an optional end time and saves to files in specified format.
|
412
|
+
Files are broken up into time buckets, with filenames containing the time at the mid-point of the bucket.
|
413
|
+
For example, for 6-hour buckets centered on 00 UTC, the start time should be 21 UTC of the previous day.
|
953
414
|
|
954
415
|
Args:
|
955
|
-
start_time (str):
|
956
|
-
|
957
|
-
|
958
|
-
|
959
|
-
|
960
|
-
|
416
|
+
start_time (str): A date string, supporting formats YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM and ISO strings,
|
417
|
+
representing the starting time of fetching data.
|
418
|
+
end_time (str): Optional. A date string, supporting formats YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM and ISO strings,
|
419
|
+
representing the end time of fetching data. If not provided, current time is used as end time.
|
420
|
+
|
421
|
+
include_updated_at (bool): Include update timestamps in response.
|
422
|
+
mission_id (str): Filter observations by mission ID.
|
423
|
+
min_latitude (float): Minimum latitude boundary.
|
424
|
+
max_latitude (float): Maximum latitude boundary.
|
425
|
+
min_longitude (float): Minimum longitude boundary.
|
426
|
+
max_longitude (float): Maximum longitude boundary.
|
427
|
+
|
428
|
+
output_file (str): Saves all data to a single file instead of bucketing.
|
429
|
+
Supported formats are '.csv', '.json', '.little_r' and '.nc'
|
430
|
+
bucket_hours (int): Optional. Size of time buckets in hours. Defaults to 6 hours.
|
431
|
+
output_format (str): Optional. Format to save data in separate files. Supported formats are 'json, 'csv', 'little_r' and 'netcdf'.
|
432
|
+
output_dir (str): Optional. Directory path where the separate files should be saved. If not provided, files will be saved in current directory.
|
433
|
+
callback (callable): Optional callback function that receives (super observations, metadata) before saving.
|
434
|
+
This allows custom processing or saving in custom formats.
|
435
|
+
custom_save (callable): Optional function to save observations in a custom format.
|
436
|
+
exit_at_end (bool): Whether to exit after fetching all observations or keep polling.
|
437
|
+
"""
|
438
|
+
|
439
|
+
# Headers for CSV files
|
440
|
+
csv_headers = [
|
441
|
+
"timestamp", "id", "time", "latitude", "longitude", "altitude", "humidity",
|
442
|
+
"pressure", "specific_humidity", "speed_u", "speed_v", "temperature", "mission_name", "mission_id"
|
443
|
+
]
|
444
|
+
|
445
|
+
api_args = {
|
446
|
+
'min_time': start_time,
|
447
|
+
'max_time': end_time,
|
448
|
+
'min_latitude': min_latitude,
|
449
|
+
'max_latitude': max_latitude,
|
450
|
+
'min_longitude': min_longitude,
|
451
|
+
'max_longitude': max_longitude,
|
452
|
+
'include_updated_at': include_updated_at,
|
453
|
+
'mission_id': mission_id,
|
454
|
+
'include_ids': True,
|
455
|
+
'include_mission_name': True
|
456
|
+
}
|
457
|
+
|
458
|
+
return get_observations_core(api_args, csv_headers, get_page=get_observations_page, start_time=start_time, end_time=end_time, output_file=output_file, bucket_hours=bucket_hours, output_format=output_format, output_dir=output_dir, callback=callback, custom_save=custom_save, exit_at_end=exit_at_end)
|
459
|
+
|
460
|
+
def poll_observations(**kwargs):
|
461
|
+
"""
|
462
|
+
Continuously polls for observations and saves to files in specified format.
|
463
|
+
Will run indefinitely until interrupted.
|
464
|
+
Same as get_observations, but runs in an infinite loop.
|
961
465
|
"""
|
466
|
+
|
962
467
|
# Print warning about infinite loop
|
963
468
|
print(" ___________________________________________________________________")
|
964
469
|
print("| WARNING \U000026A0\U0000FE0F |")
|
@@ -966,217 +471,78 @@ def poll_super_observations(start_time, interval=60, bucket_hours=6.0, output_fo
|
|
966
471
|
print("| |")
|
967
472
|
print("| Press Ctrl + C anytime to exit. |")
|
968
473
|
print("|___________________________________________________________________|\n\n")
|
969
|
-
time.sleep(4)
|
970
|
-
|
971
|
-
start_time = to_unix_timestamp(start_time)
|
972
474
|
|
973
|
-
|
974
|
-
print("Please use one of the following formats:")
|
975
|
-
print(" - json\n - csv\n - little_r\n - netcdf")
|
976
|
-
return
|
475
|
+
get_observations(**kwargs, exit_at_end=False)
|
977
476
|
|
978
|
-
|
979
|
-
|
980
|
-
|
981
|
-
|
982
|
-
|
983
|
-
|
984
|
-
# Convert start_time to datetime
|
985
|
-
start_dt = datetime.fromtimestamp(start_time, tz=timezone.utc)
|
986
|
-
|
987
|
-
# Calculate first center time that's after start_time
|
988
|
-
hours_since_day_start = start_dt.hour + start_dt.minute / 60
|
989
|
-
bucket_number = hours_since_day_start // bucket_hours
|
990
|
-
first_center = start_dt.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(hours=(bucket_number + 1) * bucket_hours)
|
477
|
+
def get_super_observations(start_time, end_time=None, mission_id=None, include_updated_at=True, output_file=None, bucket_hours=6.0, output_format=None, output_dir=None, callback=None, custom_save=None, exit_at_end=True):
|
478
|
+
"""
|
479
|
+
Fetches super observations between a start time and an optional end time and saves to files in specified format.
|
480
|
+
Files are broken up into time buckets, with filenames containing the time at the mid-point of the bucket.
|
481
|
+
For example, for 6-hour buckets centered on 00 UTC, the start time should be 21 UTC of the previous day.
|
991
482
|
|
992
|
-
|
483
|
+
Args:
|
484
|
+
start_time (str): A date string, supporting formats YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM and ISO strings,
|
485
|
+
representing the starting time of fetching data.
|
486
|
+
end_time (str): Optional. A date string, supporting formats YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM and ISO strings,
|
487
|
+
representing the end time of fetching data. If not provided, current time is used as end time.
|
488
|
+
mission_id (str): Filter observations by mission ID.
|
489
|
+
include_updated_at (bool): Include update timestamps in response.
|
490
|
+
output_file (str): Saves all data to a single file instead of bucketing.
|
491
|
+
Supported formats are '.csv', '.json', '.little_r' and '.nc'
|
492
|
+
bucket_hours (int): Optional. Size of time buckets in hours. Defaults to 6 hours.
|
493
|
+
output_format (str): Optional. Format to save data in separate files. Supported formats are 'json, 'csv', 'little_r' and 'netcdf'.
|
494
|
+
output_dir (str): Optional. Directory path where the separate files should be saved. If not provided, files will be saved in current directory.
|
495
|
+
callback (callable): Optional callback function that receives (super observations, metadata) before saving.
|
496
|
+
This allows custom processing or saving in custom formats.
|
497
|
+
custom_save (callable): Optional function to save observations in a custom format.
|
498
|
+
exit_at_end (bool): Whether to exit after fetching all observations or keep polling.
|
499
|
+
"""
|
500
|
+
csv_headers = [
|
993
501
|
"timestamp", "id", "time", "latitude", "longitude", "altitude", "humidity",
|
994
502
|
"mission_name", "pressure", "specific_humidity", "speed_u", "speed_v", "temperature"
|
995
503
|
]
|
996
504
|
|
997
|
-
|
998
|
-
|
999
|
-
|
1000
|
-
|
1001
|
-
|
1002
|
-
|
1003
|
-
|
1004
|
-
|
1005
|
-
|
1006
|
-
try:
|
1007
|
-
while True:
|
1008
|
-
observations_page = get_super_observations_page(
|
1009
|
-
since=current_timestamp,
|
1010
|
-
min_time=start_time,
|
1011
|
-
include_ids=True,
|
1012
|
-
include_mission_name=True
|
1013
|
-
)
|
1014
|
-
|
1015
|
-
if observations_page is None:
|
1016
|
-
print(f"\nNull response from API. Retrying in {interval} seconds ...")
|
1017
|
-
time.sleep(interval)
|
1018
|
-
continue
|
505
|
+
api_args = {
|
506
|
+
'min_time': start_time,
|
507
|
+
'max_time': end_time,
|
508
|
+
'mission_id': mission_id,
|
509
|
+
'include_updated_at': include_updated_at,
|
510
|
+
'include_ids': True,
|
511
|
+
'include_mission_name': True
|
512
|
+
}
|
1019
513
|
|
1020
|
-
|
1021
|
-
|
1022
|
-
|
1023
|
-
|
1024
|
-
|
1025
|
-
|
1026
|
-
|
1027
|
-
|
1028
|
-
|
1029
|
-
|
1030
|
-
|
1031
|
-
|
1032
|
-
|
1033
|
-
|
1034
|
-
|
1035
|
-
|
1036
|
-
|
1037
|
-
|
1038
|
-
timestamp = obs.get('timestamp')
|
1039
|
-
if not timestamp:
|
1040
|
-
continue
|
1041
|
-
|
1042
|
-
try:
|
1043
|
-
obs_time = datetime.fromtimestamp(timestamp, tz=timezone.utc)
|
1044
|
-
except (OSError, ValueError, TypeError, OverflowError):
|
1045
|
-
continue
|
1046
|
-
|
1047
|
-
mission_name = obs.get('mission_name', 'Unknown')
|
1048
|
-
obs['time'] = obs_time.replace(tzinfo=timezone.utc).isoformat()
|
1049
|
-
|
1050
|
-
processed_obs = {
|
1051
|
-
header: obs.get(header) if obs.get(header) not in [None, '', ' '] else 'None'
|
1052
|
-
for header in headers
|
1053
|
-
}
|
1054
|
-
|
1055
|
-
obs_id = f"{timestamp}_{mission_name}"
|
1056
|
-
|
1057
|
-
if obs_time >= start_dt:
|
1058
|
-
hours_diff = (obs_time - first_center).total_seconds() / 3600
|
1059
|
-
bucket_index = floor(hours_diff / bucket_hours)
|
1060
|
-
bucket_center = first_center + timedelta(hours=bucket_index * bucket_hours)
|
1061
|
-
bucket_end = bucket_center + timedelta(hours=bucket_hours)
|
1062
|
-
|
1063
|
-
if obs_time <= bucket_end:
|
1064
|
-
bucket_key = (bucket_center, mission_name)
|
1065
|
-
|
1066
|
-
# Initialize bucket if needed
|
1067
|
-
if bucket_key not in buckets:
|
1068
|
-
buckets[bucket_key] = {
|
1069
|
-
'data': {},
|
1070
|
-
'last_write': 0,
|
1071
|
-
'data_hash': ''
|
1072
|
-
}
|
1073
|
-
|
1074
|
-
# Update bucket data
|
1075
|
-
buckets[bucket_key]['data'][obs_id] = processed_obs
|
1076
|
-
|
1077
|
-
# Track statistics
|
1078
|
-
if mission_name not in mission_stats:
|
1079
|
-
mission_stats[mission_name] = {'files': set(), 'observations': 0}
|
1080
|
-
mission_stats[mission_name]['observations'] += 1
|
1081
|
-
|
1082
|
-
# Calculate new data hash
|
1083
|
-
sorted_data = sorted(buckets[bucket_key]['data'].items(), key=lambda x: int(x[1]['timestamp']))
|
1084
|
-
data_hash = hashlib.md5(str(sorted_data).encode()).hexdigest()
|
1085
|
-
|
1086
|
-
# Check if we should write the bucket
|
1087
|
-
current_time = datetime.now(timezone.utc)
|
1088
|
-
time_since_last_write = current_time.timestamp() - buckets[bucket_key]['last_write']
|
1089
|
-
data_changed = data_hash != buckets[bucket_key]['data_hash']
|
1090
|
-
|
1091
|
-
# Write if it's been more than interval seconds since last write OR if data has changed
|
1092
|
-
if (time_since_last_write >= interval or data_changed) and output_format:
|
1093
|
-
bucket_hour = int((bucket_center.hour + bucket_hours/2) % 24)
|
1094
|
-
|
1095
|
-
file_name_format = {
|
1096
|
-
'csv': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv",
|
1097
|
-
'json': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json",
|
1098
|
-
'netcdf': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.nc",
|
1099
|
-
'little_r': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r"
|
1100
|
-
}
|
1101
|
-
|
1102
|
-
file_name = file_name_format[output_format] % (
|
1103
|
-
bucket_center.year, bucket_center.month, bucket_center.day,
|
1104
|
-
bucket_hour, bucket_hours)
|
1105
|
-
|
1106
|
-
output_file = os.path.join(output_dir or '.', file_name)
|
1107
|
-
sorted_obs = [obs for _, obs in sorted_data]
|
1108
|
-
|
1109
|
-
# Write the file based on format
|
1110
|
-
try:
|
1111
|
-
if output_format == 'netcdf':
|
1112
|
-
convert_to_netcdf(sorted_obs, bucket_center.timestamp(), output_file)
|
1113
|
-
elif output_format == 'csv':
|
1114
|
-
with open(output_file, mode='w', newline='') as file:
|
1115
|
-
writer = csv.DictWriter(file, fieldnames=headers)
|
1116
|
-
writer.writeheader()
|
1117
|
-
writer.writerows(sorted_obs)
|
1118
|
-
elif output_format == 'json':
|
1119
|
-
sorted_obs_dict = {k: v for k, v in sorted_data}
|
1120
|
-
with open(output_file, 'w', encoding='utf-8') as file:
|
1121
|
-
json.dump(sorted_obs_dict, file, indent=4)
|
1122
|
-
elif output_format == 'little_r':
|
1123
|
-
little_r_records = format_little_r(sorted_obs)
|
1124
|
-
with open(output_file, 'w') as file:
|
1125
|
-
file.write('\n'.join(little_r_records))
|
1126
|
-
|
1127
|
-
buckets[bucket_key]['last_write'] = current_time.timestamp()
|
1128
|
-
buckets[bucket_key]['data_hash'] = data_hash
|
1129
|
-
mission_stats[mission_name]['files'].add(output_file)
|
1130
|
-
except Exception as e:
|
1131
|
-
print(f"Error writing bucket file {file_name}: {str(e)}")
|
1132
|
-
|
1133
|
-
# Clean up old buckets
|
1134
|
-
current_time = datetime.now(timezone.utc)
|
1135
|
-
buckets = {
|
1136
|
-
k: v for k, v in buckets.items()
|
1137
|
-
if current_time - k[0] <= timedelta(hours=bucket_hours * 2) # Keep slightly longer for potential updates
|
1138
|
-
}
|
1139
|
-
|
1140
|
-
next_timestamp = observations_page.get('next_since')
|
1141
|
-
has_next_page = observations_page.get('has_next_page', False)
|
1142
|
-
|
1143
|
-
if next_timestamp and next_timestamp > current_timestamp:
|
1144
|
-
current_timestamp = next_timestamp
|
1145
|
-
elif not has_next_page:
|
1146
|
-
print("-----------------------------------------------------")
|
1147
|
-
print(f"\U0001F503 Latest super observations data have been processed.\nRetrying getting new super observations data in {interval} seconds...")
|
1148
|
-
print("-----------------------------------------------------")
|
1149
|
-
time.sleep(interval)
|
1150
|
-
continue
|
514
|
+
return get_observations_core(api_args, csv_headers, get_page=get_super_observations_page, start_time=start_time, end_time=end_time, output_file=output_file, bucket_hours=bucket_hours, output_format=output_format, output_dir=output_dir, callback=callback, custom_save=custom_save, exit_at_end=exit_at_end)
|
515
|
+
|
516
|
+
def poll_super_observations(**kwargs):
|
517
|
+
"""
|
518
|
+
Continuously polls for super observations and saves to files in specified format.
|
519
|
+
Will run indefinitely until interrupted.
|
520
|
+
Same as get_super_observations, but runs in an infinite loop.
|
521
|
+
"""
|
522
|
+
|
523
|
+
# Print warning about infinite loop
|
524
|
+
print(" ___________________________________________________________________")
|
525
|
+
print("| WARNING \U000026A0\U0000FE0F |")
|
526
|
+
print("| You are entering an endless loop. |")
|
527
|
+
print("| |")
|
528
|
+
print("| Press Ctrl + C anytime to exit. |")
|
529
|
+
print("|___________________________________________________________________|\n\n")
|
530
|
+
|
531
|
+
get_super_observations(**kwargs, exit_at_end=False)
|
1151
532
|
|
1152
|
-
if not observations:
|
1153
|
-
print(f"\U0001F503 No new super observations data available.\n Retrying getting new super observations data in {interval} seconds...")
|
1154
|
-
print("-----------------------------------------------------")
|
1155
|
-
time.sleep(interval)
|
1156
|
-
|
1157
|
-
except KeyboardInterrupt:
|
1158
|
-
print("\n\U0001F6D1 Received interrupt, stopping...")
|
1159
|
-
print("-----------------------------------------------------")
|
1160
|
-
for mission_name, stats in mission_stats.items():
|
1161
|
-
print(f"Mission {mission_name}: {stats['observations']} super observations across {len(stats['files'])} files")
|
1162
|
-
except Exception as e:
|
1163
|
-
print(f"Error occurred: {str(e)}")
|
1164
|
-
exit(1001)
|
1165
|
-
finally:
|
1166
|
-
print("-----------------------------------------------------")
|
1167
|
-
print("Finished processing super observations.")
|
1168
533
|
|
1169
534
|
# ------------
|
1170
535
|
# METADATA
|
1171
536
|
# ------------
|
1172
|
-
def get_flying_missions(
|
537
|
+
def get_flying_missions(output_file=None, print_results=False):
|
1173
538
|
"""
|
1174
539
|
Retrieves a list of currently flying missions.
|
1175
540
|
In CLI mode, displays missions in a formatted table.
|
1176
541
|
|
1177
542
|
Args:
|
1178
|
-
|
543
|
+
output_file (str): Optional path to save the response data.
|
1179
544
|
If provided, saves the data in CSV or JSON format.
|
545
|
+
print_results (bool): Whether to print the results in the CLI.
|
1180
546
|
|
1181
547
|
Returns:
|
1182
548
|
dict: The API response containing list of flying missions.
|
@@ -1187,34 +553,47 @@ def get_flying_missions(cli=None, save_to_file=None):
|
|
1187
553
|
flying_missions = flying_missions_response.get("missions", [])
|
1188
554
|
|
1189
555
|
# Display currently flying missions only if we are in cli and we don't save info in file
|
1190
|
-
if
|
1191
|
-
|
1192
|
-
|
1193
|
-
|
1194
|
-
|
1195
|
-
|
1196
|
-
|
1197
|
-
|
1198
|
-
|
1199
|
-
|
1200
|
-
|
1201
|
-
|
1202
|
-
|
1203
|
-
|
1204
|
-
|
1205
|
-
|
1206
|
-
|
1207
|
-
|
1208
|
-
|
1209
|
-
|
1210
|
-
|
1211
|
-
|
556
|
+
if print_results:
|
557
|
+
if flying_missions:
|
558
|
+
print("Currently flying missions:\n")
|
559
|
+
|
560
|
+
# Define headers and data
|
561
|
+
headers = ["Index", "Mission ID", "Mission Name"]
|
562
|
+
rows = [
|
563
|
+
[str(i), mission.get("id", "N/A"), mission.get("name", "Unnamed Mission")]
|
564
|
+
for i, mission in enumerate(flying_missions, start=1)
|
565
|
+
]
|
566
|
+
|
567
|
+
# Kinda overkill | but it's a good practice if we ever change missions naming convention
|
568
|
+
# Calculate column widths
|
569
|
+
col_widths = [max(len(cell) for cell in col) + 2 for col in zip(headers, *rows)]
|
570
|
+
|
571
|
+
# Display table
|
572
|
+
print("".join(f"{headers[i]:<{col_widths[i]}}" for i in range(len(headers))))
|
573
|
+
print("".join("-" * col_width for col_width in col_widths))
|
574
|
+
for row in rows:
|
575
|
+
print("".join(f"{row[i]:<{col_widths[i]}}" for i in range(len(row))))
|
576
|
+
else:
|
577
|
+
print("No missions are currently flying.")
|
578
|
+
|
579
|
+
if output_file:
|
580
|
+
save_arbitrary_response(output_file, flying_missions_response, csv_data_key='missions')
|
1212
581
|
|
1213
|
-
return
|
582
|
+
return flying_missions
|
583
|
+
|
1214
584
|
|
1215
|
-
def get_mission_launch_site(mission_id=None,
|
585
|
+
def get_mission_launch_site(mission_id=None, output_file=None, print_result=False):
|
1216
586
|
"""
|
1217
587
|
Retrieves launch site information for a specified mission.
|
588
|
+
|
589
|
+
Args:
|
590
|
+
mission_id (str): The ID of the mission to fetch the launch site for.
|
591
|
+
output_file (str): Optional path to save the response data.
|
592
|
+
If provided, saves the data in CSV format.
|
593
|
+
print_result (bool): Whether to print the results in the CLI.
|
594
|
+
|
595
|
+
Returns:
|
596
|
+
dict: The API response containing the launch site information.
|
1218
597
|
"""
|
1219
598
|
if not mission_id:
|
1220
599
|
print("Must provide mission ID")
|
@@ -1223,34 +602,33 @@ def get_mission_launch_site(mission_id=None, save_to_file=None):
|
|
1223
602
|
url = f"{DATA_API_BASE_URL}/missions/{mission_id}/launch_site.json"
|
1224
603
|
response = make_api_request(url)
|
1225
604
|
|
1226
|
-
if response and
|
605
|
+
if response and print_result:
|
1227
606
|
launch_site = response.get('launch_site')
|
1228
607
|
if isinstance(launch_site, dict):
|
1229
|
-
site_name = LAUNCH_SITES.get(launch_site.get('id'), 'N/A')
|
1230
608
|
print("Mission launch site\n")
|
1231
|
-
print(f"{'
|
609
|
+
print(f"{'ID':<12} {launch_site.get('id')}")
|
1232
610
|
print(f"{'Latitude':<12} {launch_site.get('latitude', 'N/A')}")
|
1233
611
|
print(f"{'Longitude':<12} {launch_site.get('longitude', 'N/A')}")
|
1234
612
|
else:
|
1235
613
|
print("Unable to display launch site details - unexpected format")
|
1236
614
|
|
1237
|
-
if
|
1238
|
-
|
615
|
+
if output_file:
|
616
|
+
save_arbitrary_response(output_file, response, csv_data_key='launch_site')
|
1239
617
|
|
1240
|
-
return response
|
618
|
+
return response.get('launch_site')
|
1241
619
|
|
1242
|
-
def get_predicted_path(mission_id=None,
|
620
|
+
def get_predicted_path(mission_id=None, output_file=None):
|
1243
621
|
"""
|
1244
622
|
Fetches the predicted flight path for a given mission.
|
1245
623
|
Displays currently flying missions if the provided mission ID is invalid.
|
1246
624
|
|
1247
625
|
Args:
|
1248
626
|
mission_id (str): The ID of the mission to fetch the prediction for.
|
1249
|
-
|
627
|
+
output_file (str): Optional path to save the response data.
|
1250
628
|
If provided, saves the data in CSV format.
|
1251
629
|
|
1252
630
|
Returns:
|
1253
|
-
|
631
|
+
list: The API response containing the predicted flight path data.
|
1254
632
|
"""
|
1255
633
|
if not mission_id:
|
1256
634
|
print("To get the predicted flight path for a given mission you must provide a mission ID.")
|
@@ -1290,7 +668,7 @@ def get_predicted_path(mission_id=None, save_to_file=None):
|
|
1290
668
|
url = f"{DATA_API_BASE_URL}/missions/{mission_id}/prediction.json"
|
1291
669
|
response = make_api_request(url)
|
1292
670
|
|
1293
|
-
if
|
1294
|
-
|
671
|
+
if output_file:
|
672
|
+
save_arbitrary_response(output_file, response, csv_data_key='prediction')
|
1295
673
|
|
1296
|
-
return response
|
674
|
+
return response.get('prediction')
|