turbx 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
turbx/rgd_xpln_ccor.py ADDED
@@ -0,0 +1,701 @@
1
+ import os
2
+ import re
3
+ import sys
4
+ import timeit
5
+ from concurrent.futures import ThreadPoolExecutor
6
+ from pathlib import Path, PurePosixPath
7
+
8
+ import h5py
9
+ import numpy as np
10
+ import psutil
11
+ from mpi4py import MPI
12
+ from tqdm import tqdm
13
+
14
+ from .h5 import h5_print_contents
15
+ from .signal import ccor
16
+ from .utils import even_print, format_time_string
17
+
18
+ # ======================================================================
19
+
20
+ def _calc_ccor_xpln(self, **kwargs):
21
+ '''
22
+ calculate cross-correlation in [z,t] at every [x,y]
23
+ - designed for analyzing unsteady, thin planes in [x]
24
+ - Multithreaded with ThreadPoolExecutor()
25
+ - scipy.signal.csd() automatically tries to run multithreaded
26
+ - set OMP_NUM_THREADS=1 and pass 'n_threads' to as kwarg manually
27
+ '''
28
+
29
+ if (self.rank==0):
30
+ verbose = True
31
+ else:
32
+ verbose = False
33
+
34
+ if verbose: print('\n'+'rgd.calc_ccor_xpln()'+'\n'+72*'-')
35
+ t_start_func = timeit.default_timer()
36
+
37
+ ## assert that the opened RGD has fsubtype 'unsteady' (i.e. is NOT a prime file)
38
+ if (self.fsubtype!='unsteady'):
39
+ raise ValueError
40
+ if not self.usingmpi:
41
+ raise NotImplementedError('function is not implemented for non-MPI usage')
42
+
43
+ h5py_is_mpi_build = h5py.h5.get_config().mpi
44
+ if not h5py_is_mpi_build:
45
+ if verbose: print('h5py was not compiled for parallel usage! exiting.')
46
+ sys.exit(1)
47
+
48
+ rx = kwargs.get('rx',1)
49
+ ry = kwargs.get('ry',1)
50
+ rz = kwargs.get('rz',1)
51
+ rt = kwargs.get('rt',1)
52
+
53
+ # cy = kwargs.get('cy',1) ## number of subdivisions per rank [y] range
54
+ # if not isinstance(cy,int):
55
+ # raise TypeError('cy should be an int')
56
+ # if (cy<1):
57
+ # raise TypeError('cy should be an int')
58
+
59
+ sy = kwargs.get('sy',1) ## number of [y] layers to read at a time
60
+ if not isinstance(sy,int) or (sy<1):
61
+ raise TypeError('sy should be a positive non-zero int')
62
+
63
+ n_threads = kwargs.get('n_threads',1)
64
+ #try:
65
+ # n_threads = int(os.environ.get('OMP_NUM_THREADS'))
66
+ #except TypeError: ## not set
67
+ # n_threads = os.cpu_count()
68
+
69
+ fn_h5_out = kwargs.get('fn_h5_out',None) ## filename for output HDF5 (.h5) file
70
+
71
+ ## Debug Rank:Proc Affinity
72
+ #pp = psutil.Process()
73
+ #print(f"[Rank {self.rank}] sees CPUs: {pp.cpu_affinity()} | n_threads={n_threads} | OMP_NUM_THREADS={os.environ.get('OMP_NUM_THREADS')}")
74
+
75
+ ## only distribute data across [y]
76
+ if (rx!=1):
77
+ raise AssertionError('rx!=1')
78
+ if (rz!=1):
79
+ raise AssertionError('rz!=1')
80
+ if (rt!=1):
81
+ raise AssertionError('rt!=1')
82
+
83
+ if not isinstance(ry,int) or (ry<1):
84
+ raise ValueError('ry should be a positive non-zero int')
85
+
86
+ ## check the choice of ranks per dimension
87
+ if (rx*ry*rz*rt != self.n_ranks):
88
+ raise AssertionError('rx*ry*rz*rt != self.n_ranks')
89
+ if (rx>self.nx):
90
+ raise AssertionError('rx>self.nx')
91
+ if (ry>self.ny):
92
+ raise AssertionError('ry>self.ny')
93
+ if (rz>self.nz):
94
+ raise AssertionError('rz>self.nz')
95
+ if (rt>self.nt):
96
+ raise AssertionError('rt>self.nt')
97
+
98
+ if (self.ny%ry!=0):
99
+ raise ValueError('ny not divisible by ry')
100
+
101
+ ## distribute 4D data over ranks --> here only in [y]
102
+ ryl_ = np.array_split(np.arange(self.ny,dtype=np.int64),min(ry,self.ny))
103
+ ryl = [[b[0],b[-1]+1] for b in ryl_ ]
104
+ ry1,ry2 = ryl[self.rank]
105
+ nyr = ry2 - ry1
106
+
107
+ ## check all [y] ranges have same size
108
+ for ryl_ in ryl:
109
+ if not (ryl_[1]-ryl_[0]==nyr):
110
+ raise ValueError('[y] chunks are not even in size')
111
+
112
+ # ## [y] sub chunk range --> cyl = list of ranges in ry1:ry2
113
+ # ## cy is the NUMBER of chunks for the rank sub-range
114
+ # cyl_ = np.array_split( np.arange(ry1,ry2) , min(cy,nyr) )
115
+ # cyl = [[b[0],b[-1]+1] for b in cyl_ ]
116
+ #
117
+ # for nyc_ in [ cyl_[1]-cyl_[0] for cyl_ in cyl ]:
118
+ # if (nyc_ < 1):
119
+ # #raise ValueError
120
+ # print(f'rank {self.rank:d}: sub-range is <1')
121
+ # self.comm.Abort(1)
122
+ #
123
+ # if 1: ## assert that [y] sub-chunk ranges are correct
124
+ #
125
+ # yi = np.arange(self.ny, dtype=np.int32)
126
+ #
127
+ # local_indices = []
128
+ # for cyl_ in cyl:
129
+ # cy1, cy2 = cyl_
130
+ # local_indices += [ yi_ for yi_ in yi[cy1:cy2] ]
131
+ #
132
+ # G = self.comm.gather([ self.rank , local_indices ], root=0)
133
+ # G = self.comm.bcast(G, root=0)
134
+ #
135
+ # all_indices = []
136
+ # for G_ in G:
137
+ # all_indices += G_[1]
138
+ # all_indices = np.array( sorted(all_indices), dtype=np.int32 )
139
+ #
140
+ # if not np.array_equal( all_indices , yi ):
141
+ # raise AssertionError
142
+
143
+ if (nyr%sy!=0):
144
+ raise ValueError('nyr not divisible by sy')
145
+
146
+ ## output filename : HDF5 (.h5)
147
+ if (fn_h5_out is None): ## automatically determine name
148
+ fname_path = os.path.dirname(self.fname)
149
+ fname_base = os.path.basename(self.fname)
150
+ fname_root, fname_ext = os.path.splitext(fname_base)
151
+ fname_root = re.findall(r'io\S+_mpi_[0-9]+', fname_root)[0]
152
+ fn_h5_out_base = fname_root+'_ccor.h5'
153
+ fn_h5_out = str(PurePosixPath(fname_path, fn_h5_out_base))
154
+ if (Path(fn_h5_out).suffix != '.h5'):
155
+ raise ValueError(f"fn_h5_out='{str(fn_h5_out)}' must end in .h5")
156
+ if os.path.isfile(fn_h5_out):
157
+ #if (os.path.getsize(fn_h5_out) > 8*1024**3):
158
+ # raise ValueError(f"fn_h5_out='{str(fn_h5_out)}' exists and is >8 [GB]. exiting for your own safety.")
159
+ if (fn_h5_out == self.fname):
160
+ raise ValueError(f"fn_h5_out='{str(fn_h5_out)}' cannot be same as input filename.")
161
+
162
+ if verbose: even_print( 'fn_h5' , self.fname )
163
+ if verbose: even_print( 'fn_h5_out' , fn_h5_out )
164
+ if verbose: print(72*'-')
165
+ self.comm.Barrier()
166
+
167
+ ## the data dictionary to be pickled later
168
+ data = {}
169
+
170
+ ## infile
171
+ fsize = os.path.getsize(self.fname)/1024**3
172
+ if verbose: even_print(os.path.basename(self.fname),'%0.1f [GB]'%fsize)
173
+ if verbose: even_print('nx',f'{self.nx:d}')
174
+ if verbose: even_print('ny',f'{self.ny:d}')
175
+ if verbose: even_print('nz',f'{self.nz:d}')
176
+ if verbose: even_print('nt',f'{self.nt:d}')
177
+ if verbose: even_print('ngp',f'{self.ngp/1e6:0.1f} [M]')
178
+ #if verbose: even_print('cy',f'{cy:d}')
179
+ if verbose: even_print('sy',f'{sy:d}')
180
+ if verbose: even_print('n_ranks',f'{self.n_ranks:d}')
181
+ if verbose: even_print('n_threads',f'{n_threads:d}')
182
+ if verbose: print(72*'-')
183
+
184
+ ## 0D freestream scalars
185
+ lchar = self.lchar ; data['lchar'] = lchar
186
+ U_inf = self.U_inf ; data['U_inf'] = U_inf
187
+ rho_inf = self.rho_inf ; data['rho_inf'] = rho_inf
188
+ T_inf = self.T_inf ; data['T_inf'] = T_inf
189
+
190
+ #data['M_inf'] = self.M_inf
191
+ data['Ma'] = self.Ma
192
+ data['Pr'] = self.Pr
193
+
194
+ ## read in 1D coordinate arrays & re-dimensionalize
195
+ x = np.copy( self['dims/x'][()] * self.lchar )
196
+ y = np.copy( self['dims/y'][()] * self.lchar )
197
+ z = np.copy( self['dims/z'][()] * self.lchar )
198
+ t = np.copy( self['dims/t'][()] * self.tchar )
199
+
200
+ nx = self.nx ; data['nx'] = nx
201
+ ny = self.ny ; data['ny'] = ny
202
+ nz = self.nz ; data['nz'] = nz
203
+ nt = self.nt ; data['nt'] = nt
204
+
205
+ ## assert constant Δz
206
+ dz0 = np.diff(z)[0]
207
+ if not np.all(np.isclose(np.diff(z), dz0, rtol=1e-6)):
208
+ raise NotImplementedError('Δz not constant')
209
+ dz = np.diff(z)[0]
210
+
211
+ ## dimensional [s]
212
+ dt = self.dt * self.tchar
213
+ np.testing.assert_allclose(dt, t[1]-t[0], rtol=1e-12, atol=1e-12)
214
+
215
+ t_meas = self.duration * self.tchar
216
+ np.testing.assert_allclose(t_meas, t.max()-t.min(), rtol=1e-12, atol=1e-12)
217
+
218
+ zrange = z.max() - z.min()
219
+
220
+ data['x'] = x
221
+ data['y'] = y
222
+ data['z'] = z
223
+
224
+ data['t'] = t
225
+ data['t_meas'] = t_meas
226
+ data['dt'] = dt
227
+ data['dz'] = dz
228
+ data['zrange'] = zrange
229
+
230
+ if verbose: even_print( 'Δt/tchar' , f'{dt/self.tchar:0.8f}' )
231
+ if verbose: even_print( 'Δt' , f'{dt:0.3e} [s]' )
232
+ if verbose: even_print( 'duration/tchar' , f'{self.duration:0.1f}' )
233
+ if verbose: even_print( 'duration' , f'{self.duration*self.tchar:0.3e} [s]' )
234
+ if verbose: print(72*'-')
235
+
236
+ ## report
237
+ if verbose:
238
+ even_print('Δt' , f'{dt :0.5e} [s]' )
239
+ even_print('t_meas' , f'{t_meas:0.5e} [s]' )
240
+ even_print('Δz' , f'{dz0 :0.5e} [m]' )
241
+ even_print('zrange' , f'{zrange:0.5e} [m]' )
242
+ print(72*'-')
243
+
244
+ ## get lags [t]
245
+ #lags_t,_ = ccor( np.ones(win_len,dtype=np.float32) , np.ones(win_len,dtype=np.float32), get_lags=True )
246
+ #n_lags_t_ = win_len*2-1
247
+ lags_t,_ = ccor( np.ones(nt,dtype=np.float32) , np.ones(nt,dtype=np.float32), get_lags=True )
248
+ n_lags_t_ = nt*2-1
249
+ n_lags_t = lags_t.shape[0]
250
+ if (n_lags_t!=n_lags_t_):
251
+ raise AssertionError('check lags [t]')
252
+
253
+ data['lags_t'] = lags_t
254
+ data['n_lags_t'] = n_lags_t
255
+
256
+ if verbose:
257
+ even_print('n lags (Δt)' , '%i'%(n_lags_t,))
258
+
259
+ ## get lags [z]
260
+ lags_z,_ = ccor( np.ones(nz,dtype=np.float32) , np.ones(nz,dtype=np.float32), get_lags=True )
261
+ n_lags_z_ = nz*2-1
262
+ n_lags_z = lags_z.shape[0]
263
+ if (n_lags_z!=n_lags_z_):
264
+ raise AssertionError('check lags [z]')
265
+
266
+ data['lags_z'] = lags_z
267
+ data['n_lags_z'] = n_lags_z
268
+
269
+ if verbose:
270
+ even_print('n lags (Δz)' , '%i'%(n_lags_z,))
271
+
272
+ # ===
273
+
274
+ ## cross-correlation pairs
275
+ ## [ str:var1, str:var2, bool:do_density_weighting]
276
+ ccor_combis = [
277
+ [ 'u' , 'v' , True ], ## [ ρ·u″ , ρ·v″ ]
278
+ [ 'u' , 'u' , True ], ## [ ρ·u″ , ρ·u″ ]
279
+ [ 'v' , 'v' , True ], ## [ ρ·v″ , ρ·v″ ]
280
+ [ 'w' , 'w' , True ], ## [ ρ·w″ , ρ·w″ ]
281
+ [ 'u' , 'v' , False ], ## [ u′ , v′ ]
282
+ [ 'u' , 'u' , False ], ## [ u′ , u′ ]
283
+ [ 'v' , 'v' , False ], ## [ v′ , v′ ]
284
+ [ 'w' , 'w' , False ], ## [ w′ , w′ ]
285
+ ]
286
+
287
+ ## generate cross-correlation scalar names
288
+ scalars = []
289
+ for ccor_combi in ccor_combis:
290
+ s1,s2,do_density_weighting = ccor_combi
291
+ if do_density_weighting:
292
+ scalars.append(f'r{s1}II_r{s2}II')
293
+ else:
294
+ scalars.append(f'{s1}I_{s2}I')
295
+
296
+ ## generate avg scalar names
297
+ scalars_Re_avg = []
298
+ scalars_Fv_avg = []
299
+ for ccor_combi in ccor_combis:
300
+ s1,s2,do_density_weighting = ccor_combi
301
+ if do_density_weighting and ('rho' not in scalars_Re_avg):
302
+ scalars_Re_avg.append('rho')
303
+ if do_density_weighting:
304
+ if (s1 not in scalars_Fv_avg):
305
+ scalars_Fv_avg.append(s1)
306
+ if (s2 not in scalars_Fv_avg):
307
+ scalars_Fv_avg.append(s2)
308
+ else:
309
+ if (s1 not in scalars_Re_avg):
310
+ scalars_Re_avg.append(s1)
311
+ if (s2 not in scalars_Re_avg):
312
+ scalars_Re_avg.append(s2)
313
+
314
+ ## numpy formatted arrays: buffers for PSD & other data (rank-local)
315
+ Rz = np.zeros(shape=(nyr,n_lags_z ) , dtype={'names':scalars , 'formats':[ np.dtype(np.float64) for s in scalars ]})
316
+ Rt = np.zeros(shape=(nyr,n_lags_t ) , dtype={'names':scalars , 'formats':[ np.dtype(np.float64) for s in scalars ]})
317
+ covariance = np.zeros(shape=(nyr, ) , dtype={'names':scalars , 'formats':[ np.dtype(np.float64) for s in scalars ]})
318
+ avg_Re = np.zeros(shape=(nyr, ) , dtype={'names':scalars_Re_avg , 'formats':[ np.dtype(np.float64) for s in scalars_Re_avg ]})
319
+ avg_Fv = np.zeros(shape=(nyr, ) , dtype={'names':scalars_Fv_avg , 'formats':[ np.dtype(np.float64) for s in scalars_Fv_avg ]})
320
+
321
+ if verbose:
322
+ even_print('n cross-correlation scalar combinations' , f'{len(ccor_combis):d}')
323
+
324
+ # ==============================================================
325
+ # check memory
326
+ # ==============================================================
327
+
328
+ hostname = MPI.Get_processor_name()
329
+ mem_free_gb = psutil.virtual_memory().free / 1024**3
330
+ G = self.comm.gather([ self.rank , hostname , mem_free_gb ], root=0)
331
+ G = self.comm.bcast(G, root=0)
332
+
333
+ host_mem = {}
334
+ for rank, host, mem in G:
335
+ if host not in host_mem or mem < host_mem[host]:
336
+ host_mem[host] = mem
337
+ total_free = sum(host_mem.values())
338
+
339
+ if verbose:
340
+ print(72*'-')
341
+ for key,value in host_mem.items():
342
+ even_print(f'RAM free {key}', f'{int(np.floor(value)):d} [GB]')
343
+ even_print('RAM free (local,min)', f'{int(np.floor(min(host_mem.values()))):d} [GB]')
344
+ even_print('RAM free (global)', f'{int(np.floor(total_free)):d} [GB]')
345
+
346
+ shape_read = (nx,sy,nz,nt) ## local
347
+ if verbose: even_print('read shape (local)', f'[{nx:d},{sy:d},{nz:d},{nt:d}]')
348
+ data_gb = np.dtype(np.float64).itemsize * np.prod(shape_read) / 1024**3
349
+ if verbose: even_print('read size (global)', f'{int(np.ceil(data_gb*ry)):d} [GB]')
350
+
351
+ if verbose: even_print('read size (global) ×6', f'{int(np.ceil(data_gb*ry*6)):d} [GB]')
352
+ ram_usage_est = data_gb*ry*6/total_free
353
+ if verbose: even_print('RAM usage estimate', f'{100*ram_usage_est:0.1f} [%]')
354
+
355
+ self.comm.Barrier()
356
+ if (ram_usage_est>0.80):
357
+ print('RAM consumption might be too high. exiting.')
358
+ self.comm.Abort(1)
359
+
360
+ # ==============================================================
361
+ # main loop
362
+ # ==============================================================
363
+
364
+ if verbose:
365
+ progress_bar = tqdm(
366
+ #total=len(ccor_combis)*cy,
367
+ total=len(ccor_combis)*(nyr//sy),
368
+ ncols=100,
369
+ desc='ccor',
370
+ leave=True,
371
+ file=sys.stdout,
372
+ mininterval=0.1,
373
+ smoothing=0.,
374
+ #bar_format="\033[B{l_bar}{bar}| {n}/{total} [{percentage:.1f}%] {elapsed}/{remaining}\033[A\n\b",
375
+ bar_format="{l_bar}{bar}| {n}/{total} [{percentage:.1f}%] {elapsed}/{remaining}",
376
+ ascii="░█",
377
+ colour='#FF6600',
378
+ )
379
+
380
+ for cci,cc in enumerate(ccor_combis): ## ccor pairs
381
+
382
+ if verbose: tqdm.write(72*'-')
383
+
384
+ scalar_L, scalar_R, do_density_weighting = cc
385
+
386
+ if do_density_weighting:
387
+ msg = f'ccor[ρ·{scalar_L}″,ρ·{scalar_R}″]'
388
+ else:
389
+ msg = f'ccor[{scalar_L}′,{scalar_R}′]'
390
+ if verbose:
391
+ tqdm.write(even_print('computing',msg,s=True,))
392
+
393
+ dset_L = self[f'data/{scalar_L}']
394
+ dset_R = self[f'data/{scalar_R}']
395
+ dset_rho = self['data/rho']
396
+
397
+ scalar = scalars[cci]
398
+
399
+ ## assert scalar name
400
+ if do_density_weighting:
401
+ if (f'r{scalar_L}II_r{scalar_R}II' != scalar ):
402
+ raise ValueError
403
+ else:
404
+ if (f'{scalar_L}I_{scalar_R}I' != scalar ):
405
+ raise ValueError
406
+
407
+ # ## [y] loop outer (chunks within rank)
408
+ # for cyl_ in cyl:
409
+ # cy1, cy2 = cyl_
410
+ # nyc = cy2 - cy1
411
+
412
+ for ci in range(nyr//sy):
413
+ cy1 = ry1 + ci*sy
414
+ cy2 = cy1 + sy
415
+ nyc = cy2 - cy1
416
+
417
+ self.comm.Barrier()
418
+ t_start = timeit.default_timer()
419
+
420
+ ## read data L
421
+ n_scalars_read = 1 ## initialize
422
+ scalar_str = scalar_L
423
+ with dset_L.collective:
424
+ data_L = np.copy( dset_L[:,:,cy1:cy2,:].T ).astype(np.float64)
425
+
426
+ ## read data R (if != data L)
427
+ if (scalar_L==scalar_R):
428
+ data_R = np.copy( data_L )
429
+ else:
430
+ n_scalars_read += 1
431
+ scalar_str += f',{scalar_R}'
432
+ with dset_R.collective:
433
+ data_R = np.copy( dset_R[:,:,cy1:cy2,:].T ).astype(np.float64)
434
+
435
+ ## read ρ
436
+ if do_density_weighting:
437
+ n_scalars_read += 1
438
+ scalar_str += ',ρ'
439
+ with dset_rho.collective:
440
+ rho = np.copy( dset_rho[:,:,cy1:cy2,:].T ).astype(np.float64)
441
+ else:
442
+ rho = None
443
+
444
+ self.comm.Barrier()
445
+ t_delta = timeit.default_timer() - t_start
446
+ data_gb = n_scalars_read * ( self.nx * ry * (cy2-cy1) * self.nz * self.nt * dset_L.dtype.itemsize ) / 1024**3
447
+ if verbose:
448
+ tqdm.write(even_print(f'read: {scalar_str}', '%0.3f [GB] %0.3f [s] %0.3f [GB/s]'%(data_gb,t_delta,(data_gb/t_delta)), s=True))
449
+
450
+ ## data_L and data_R should be [nx,nyc,nz,nt] where nyc is the chunk [y] range
451
+ if ( data_L.shape != (nx,nyc,nz,nt) ) or ( data_R.shape != (nx,nyc,nz,nt) ):
452
+ print(f'rank {self.rank:d}: shape violation')
453
+ self.comm.Abort(1)
454
+ if (rho is not None) and ( rho.shape != (nx,nyc,nz,nt) ):
455
+ print(f'rank {self.rank:d}: shape violation')
456
+ self.comm.Abort(1)
457
+
458
+ # === redimensionalize
459
+
460
+ if scalar_L in ['u','v','w',]:
461
+ data_L *= U_inf
462
+ else:
463
+ raise ValueError
464
+
465
+ if scalar_R in ['u','v','w',]:
466
+ data_R *= U_inf
467
+ else:
468
+ raise ValueError
469
+
470
+ if (rho is not None): ## i.e. if do_density_weighting
471
+ rho *= rho_inf
472
+
473
+ # === compute mean-removed data
474
+
475
+ ## avg(□) or avg(ρ·□)/avg(ρ) in [t]
476
+ if do_density_weighting:
477
+ rho_avg = np.mean( rho , axis=3, dtype=np.float64, keepdims=True) ## [x,y,z,1]
478
+ data_L_avg = np.mean( rho*data_L , axis=3, dtype=np.float64, keepdims=True) ## [x,y,z,1]
479
+ data_L_avg /= rho_avg
480
+ data_R_avg = np.mean( rho*data_R , axis=3, dtype=np.float64, keepdims=True) ## [x,y,z,1]
481
+ data_R_avg /= rho_avg
482
+ else:
483
+ data_L_avg = np.mean( data_L , axis=3, dtype=np.float64, keepdims=True) ## [x,y,z,1]
484
+ data_R_avg = np.mean( data_R , axis=3, dtype=np.float64, keepdims=True) ## [x,y,z,1]
485
+
486
+ ## Reynolds prime □′ or Favre prime □″
487
+ data_L -= data_L_avg
488
+ data_R -= data_R_avg
489
+
490
+ ## assert stationarity / definition averaging
491
+ ## avg(□′)==0 or avg(ρ·□″)==0
492
+ if do_density_weighting:
493
+ a_ = np.mean(rho*data_L, axis=3, dtype=np.float64, keepdims=True)
494
+ b_ = np.mean(rho*data_R, axis=3, dtype=np.float64, keepdims=True)
495
+ else:
496
+ a_ = np.mean(data_L, axis=3, dtype=np.float64, keepdims=True)
497
+ b_ = np.mean(data_R, axis=3, dtype=np.float64, keepdims=True)
498
+ if not np.allclose( a_, np.zeros_like(a_), atol=1e-6 ) or not np.allclose( b_, np.zeros_like(b_), atol=1e-6 ):
499
+ print(f'rank {self.rank:d}: avg(□′)!=0 or avg(ρ·□″)!=0')
500
+ self.comm.Abort(1)
501
+
502
+ ## covariance: <□′·□′> OR <ρ□″·ρ□″> --> note that this is NOT the typical Favre <ρ·□″□″>
503
+ if do_density_weighting:
504
+ covariance_ = np.mean( rho*data_L * rho*data_R , axis=3 , dtype=np.float64, keepdims=True)
505
+ else:
506
+ covariance_ = np.mean( data_L*data_R , axis=3 , dtype=np.float64, keepdims=True)
507
+
508
+ ## write this chunk/scalar's covariance to covariance buffer
509
+ ## avg over [x,z] : [x,y,z,1] --> [y]
510
+ yiA = cy1 - ry1
511
+ yiB = cy2 - ry1
512
+ covariance[scalar][yiA:yiB] = np.squeeze( np.mean( covariance_ , axis=(0,2,3) , dtype=np.float64) )
513
+
514
+ ## write (rank-local) 1D [y] averages
515
+ if do_density_weighting:
516
+ avg_Fv[scalar_L][yiA:yiB] = np.squeeze( np.mean( data_L_avg , axis=(0,2,3) , dtype=np.float64) )
517
+ avg_Fv[scalar_R][yiA:yiB] = np.squeeze( np.mean( data_R_avg , axis=(0,2,3) , dtype=np.float64) )
518
+ avg_Re['rho'][yiA:yiB] = np.squeeze( np.mean( rho_avg , axis=(0,2,3) , dtype=np.float64) )
519
+ else:
520
+ avg_Re[scalar_L][yiA:yiB] = np.squeeze( np.mean( data_L_avg , axis=(0,2,3) , dtype=np.float64) )
521
+ avg_Re[scalar_R][yiA:yiB] = np.squeeze( np.mean( data_R_avg , axis=(0,2,3) , dtype=np.float64) )
522
+
523
+ # ===============================================================================
524
+ # At this point you have 4D [x,y,z,t] [□′,□′] or [ρ·□″,ρ·□″] data
525
+ # ===============================================================================
526
+
527
+ def __ccor_z_thread_kernel(xi,ti,yii,do_density_weighting):
528
+ if do_density_weighting:
529
+ uL = rho[xi,yii,:,ti] * data_L[xi,yii,:,ti]
530
+ uR = rho[xi,yii,:,ti] * data_R[xi,yii,:,ti]
531
+ else:
532
+ uL = data_L[xi,yii,:,ti]
533
+ uR = data_R[xi,yii,:,ti]
534
+ return xi,ti,ccor(uL,uR)
535
+
536
+ def __ccor_t_thread_kernel(xi,zi,yii,do_density_weighting):
537
+ if do_density_weighting:
538
+ uL = rho[xi,yii,zi,:] * data_L[xi,yii,zi,:]
539
+ uR = rho[xi,yii,zi,:] * data_R[xi,yii,zi,:]
540
+ else:
541
+ uL = data_L[xi,yii,zi,:]
542
+ uR = data_R[xi,yii,zi,:]
543
+ return xi,zi,ccor(uL,uR)
544
+
545
+ self.comm.Barrier()
546
+ t_start = timeit.default_timer()
547
+
548
+ ## [y] loop inner (indices within chunk)
549
+ for yi in range(cy1,cy2):
550
+
551
+ yii = yi - cy1 ## chunk local
552
+ yiii = yi - ry1 ## rank local
553
+
554
+ ## ccor buffers for [y] loop inner
555
+ R_xt = np.zeros((nx,nt,n_lags_z) , dtype=np.float64) ## [x,t] range for ccor(z)
556
+ R_xz = np.zeros((nx,nz,n_lags_t) , dtype=np.float64) ## [x,z] range for ccor(t)
557
+
558
+ # ===========================================================================
559
+ # ccor(z) : loop over [x,t]
560
+ # ===========================================================================
561
+
562
+ ## concurrent/threaded execution for ccor(z)
563
+ tasks = [(xi,ti,yii,do_density_weighting) for xi in range(nx) for ti in range(nt)]
564
+ with ThreadPoolExecutor(max_workers=n_threads) as executor:
565
+ results = executor.map(lambda t: __ccor_z_thread_kernel(*t,), tasks)
566
+ for xi,ti,result in results:
567
+ R_xt[xi,ti,:] = result
568
+
569
+ # for xi in range(nx):
570
+ # for ti in range(nt):
571
+ #
572
+ # ## 1D [z] □′ or ρ·□″ vectors
573
+ # if do_density_weighting:
574
+ # uL = np.copy( rho[xi,yii,:,ti] * data_L[xi,yii,:,ti] )
575
+ # uR = np.copy( rho[xi,yii,:,ti] * data_R[xi,yii,:,ti] )
576
+ # else:
577
+ # uL = np.copy( data_L[xi,yii,:,ti] )
578
+ # uR = np.copy( data_R[xi,yii,:,ti] )
579
+ #
580
+ # R_xt[xi,ti,:] = ccor( uL , uR )
581
+
582
+ ## avg in [x,t] & write in rank context
583
+ Rz[scalar][yiii,:] = np.mean(R_xt, axis=(0,1), dtype=np.float64)
584
+
585
+ # ===========================================================================
586
+ # ccor(t) : loop over [x,z]
587
+ # ===========================================================================
588
+
589
+ ## concurrent/threaded execution for ccor(t)
590
+ tasks = [(xi,zi,yii,do_density_weighting) for xi in range(nx) for zi in range(nz)]
591
+ with ThreadPoolExecutor(max_workers=n_threads) as executor:
592
+ results = executor.map(lambda t: __ccor_t_thread_kernel(*t,), tasks)
593
+ for xi,zi,result in results:
594
+ R_xz[xi,zi,:] = result
595
+
596
+ # for xi in range(nx):
597
+ # for zi in range(nz):
598
+ #
599
+ # ## 1D [z] □′ or ρ·□″ vectors
600
+ # if do_density_weighting:
601
+ # uL = np.copy( rho[xi,yii,zi,:] * data_L[xi,yii,zi,:] )
602
+ # uR = np.copy( rho[xi,yii,zi,:] * data_R[xi,yii,zi,:] )
603
+ # else:
604
+ # uL = np.copy( data_L[xi,yii,zi,:] )
605
+ # uR = np.copy( data_R[xi,yii,zi,:] )
606
+ #
607
+ # R_xz[xi,zi,:] = ccor( uL , uR )
608
+
609
+ ## avg in [x,z] & write in rank context
610
+ Rt[scalar][yiii,:] = np.mean(R_xz, axis=(0,1), dtype=np.float64)
611
+
612
+ self.comm.Barrier()
613
+ t_delta = timeit.default_timer() - t_start
614
+ if verbose: tqdm.write(even_print(msg, format_time_string(t_delta), s=True))
615
+ if verbose: progress_bar.update() ## (scalar, [y] chunk) progress
616
+ #break ## debug
617
+ #break ## debug
618
+ if verbose: progress_bar.close()
619
+ self.comm.Barrier()
620
+ if verbose: print(72*'-')
621
+
622
+ # ==============================================================
623
+ # write HDF5 (.h5) file
624
+ # ==============================================================
625
+
626
+ ## overwrite outfile!
627
+ ## open on rank 0 and write attributes, dimensions, etc.
628
+ if (self.rank==0):
629
+ with h5py.File(fn_h5_out, 'w') as hfw:
630
+
631
+ ## write floats,ints as top-level attributes
632
+ for key,val in data.items():
633
+ if isinstance(data[key], (int,np.int32,np.int64)):
634
+ hfw.attrs[key] = val
635
+ elif isinstance(data[key], (float,np.float32,np.float64)):
636
+ hfw.attrs[key] = val
637
+ elif isinstance(data[key], np.ndarray):
638
+ pass
639
+ else:
640
+ print(f'key {key} is type {str(type(data[key]))}')
641
+ self.comm.Abort(1)
642
+
643
+ ## write numpy arrays
644
+ hfw.create_dataset( 'dims/x' , data=x ) ## [m]
645
+ hfw.create_dataset( 'dims/y' , data=y ) ## [m]
646
+ hfw.create_dataset( 'dims/z' , data=z ) ## [m]
647
+ hfw.create_dataset( 'dims/t' , data=t ) ## [s]
648
+ hfw.create_dataset( 'dims/lags_z' , data=lags_z ) ## [m]
649
+ hfw.create_dataset( 'dims/lags_t' , data=lags_t ) ## [s]
650
+
651
+ ## initialize datasets
652
+ for scalar in scalars:
653
+ hfw.create_dataset( f'covariance/{scalar}' , shape=(ny,) , dtype=np.float64, chunks=None , data=np.full((ny,),0.,np.float64) )
654
+ hfw.create_dataset( f'Rz/{scalar}' , shape=(ny,n_lags_z) , dtype=np.float64, chunks=(1,n_lags_z) , data=np.full((ny,n_lags_z),0.,np.float64) )
655
+ hfw.create_dataset( f'Rt/{scalar}' , shape=(ny,n_lags_t) , dtype=np.float64, chunks=(1,n_lags_t) , data=np.full((ny,n_lags_t),0.,np.float64) )
656
+
657
+ ## initialize datasets 1D [y] mean
658
+ for scalar in avg_Re.dtype.names:
659
+ hfw.create_dataset( f'avg/Re/{scalar}', shape=(ny,), dtype=np.float64, chunks=None, data=np.full((ny,),0.,np.float64) )
660
+ for scalar in avg_Fv.dtype.names:
661
+ hfw.create_dataset( f'avg/Fv/{scalar}', shape=(ny,), dtype=np.float64, chunks=None, data=np.full((ny,),0.,np.float64) )
662
+
663
+ self.comm.Barrier()
664
+
665
+ with h5py.File(fn_h5_out, 'a', driver='mpio', comm=self.comm) as hfw:
666
+
667
+ ## collectively write covariance,Rz,Rt
668
+ for scalar in scalars:
669
+ dset = hfw[f'covariance/{scalar}']
670
+ with dset.collective:
671
+ dset[ry1:ry2] = covariance[scalar][:]
672
+ dset = hfw[f'Rz/{scalar}']
673
+ with dset.collective:
674
+ dset[ry1:ry2,:] = Rz[scalar][:,:]
675
+ dset = hfw[f'Rt/{scalar}']
676
+ with dset.collective:
677
+ dset[ry1:ry2,:] = Rt[scalar][:,:]
678
+
679
+ ## collectively write 1D [y] avgs (Reynolds,Favre)
680
+ for scalar in avg_Re.dtype.names:
681
+ dset = hfw[f'avg/Re/{scalar}']
682
+ with dset.collective:
683
+ dset[ry1:ry2] = avg_Re[scalar][:]
684
+ for scalar in avg_Fv.dtype.names:
685
+ dset = hfw[f'avg/Fv/{scalar}']
686
+ with dset.collective:
687
+ dset[ry1:ry2] = avg_Fv[scalar][:]
688
+
689
+ ## report file contents
690
+ self.comm.Barrier()
691
+ if (self.rank==0):
692
+ even_print( os.path.basename(fn_h5_out) , f'{(os.path.getsize(fn_h5_out)/1024**2):0.1f} [MB]' )
693
+ print(72*'-')
694
+ with h5py.File(fn_h5_out,'r') as hfr:
695
+ h5_print_contents(hfr)
696
+ self.comm.Barrier()
697
+
698
+ if verbose: print(72*'-')
699
+ if verbose: print('total time : rgd.calc_ccor_xpln() : %s'%format_time_string((timeit.default_timer() - t_start_func)))
700
+ if verbose: print(72*'-')
701
+ return