ssb-sgis 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sgis/__init__.py +107 -121
- sgis/exceptions.py +5 -3
- sgis/geopandas_tools/__init__.py +1 -0
- sgis/geopandas_tools/bounds.py +86 -47
- sgis/geopandas_tools/buffer_dissolve_explode.py +62 -39
- sgis/geopandas_tools/centerlines.py +53 -44
- sgis/geopandas_tools/cleaning.py +87 -104
- sgis/geopandas_tools/conversion.py +164 -107
- sgis/geopandas_tools/duplicates.py +33 -19
- sgis/geopandas_tools/general.py +84 -52
- sgis/geopandas_tools/geometry_types.py +24 -10
- sgis/geopandas_tools/neighbors.py +23 -11
- sgis/geopandas_tools/overlay.py +136 -53
- sgis/geopandas_tools/point_operations.py +11 -10
- sgis/geopandas_tools/polygon_operations.py +53 -61
- sgis/geopandas_tools/polygons_as_rings.py +121 -78
- sgis/geopandas_tools/sfilter.py +17 -17
- sgis/helpers.py +116 -58
- sgis/io/dapla_functions.py +32 -23
- sgis/io/opener.py +13 -6
- sgis/io/read_parquet.py +2 -2
- sgis/maps/examine.py +55 -28
- sgis/maps/explore.py +471 -112
- sgis/maps/httpserver.py +12 -12
- sgis/maps/legend.py +285 -134
- sgis/maps/map.py +248 -129
- sgis/maps/maps.py +123 -119
- sgis/maps/thematicmap.py +260 -94
- sgis/maps/tilesources.py +3 -8
- sgis/networkanalysis/_get_route.py +5 -4
- sgis/networkanalysis/_od_cost_matrix.py +44 -1
- sgis/networkanalysis/_points.py +10 -4
- sgis/networkanalysis/_service_area.py +5 -2
- sgis/networkanalysis/closing_network_holes.py +22 -64
- sgis/networkanalysis/cutting_lines.py +58 -46
- sgis/networkanalysis/directednetwork.py +16 -8
- sgis/networkanalysis/finding_isolated_networks.py +6 -5
- sgis/networkanalysis/network.py +15 -13
- sgis/networkanalysis/networkanalysis.py +79 -61
- sgis/networkanalysis/networkanalysisrules.py +21 -17
- sgis/networkanalysis/nodes.py +2 -3
- sgis/networkanalysis/traveling_salesman.py +6 -3
- sgis/parallel/parallel.py +372 -142
- sgis/raster/base.py +9 -3
- sgis/raster/cube.py +331 -213
- sgis/raster/cubebase.py +15 -29
- sgis/raster/image_collection.py +2560 -0
- sgis/raster/indices.py +17 -12
- sgis/raster/raster.py +356 -275
- sgis/raster/sentinel_config.py +104 -0
- sgis/raster/zonal.py +38 -14
- {ssb_sgis-1.0.1.dist-info → ssb_sgis-1.0.3.dist-info}/LICENSE +1 -1
- {ssb_sgis-1.0.1.dist-info → ssb_sgis-1.0.3.dist-info}/METADATA +87 -16
- ssb_sgis-1.0.3.dist-info/RECORD +61 -0
- {ssb_sgis-1.0.1.dist-info → ssb_sgis-1.0.3.dist-info}/WHEEL +1 -1
- sgis/raster/bands.py +0 -48
- sgis/raster/gradient.py +0 -78
- sgis/raster/methods_as_functions.py +0 -124
- sgis/raster/torchgeo.py +0 -150
- ssb_sgis-1.0.1.dist-info/RECORD +0 -63
sgis/parallel/parallel.py
CHANGED
|
@@ -2,12 +2,14 @@ import functools
|
|
|
2
2
|
import inspect
|
|
3
3
|
import itertools
|
|
4
4
|
import multiprocessing
|
|
5
|
+
import pickle
|
|
5
6
|
import warnings
|
|
6
|
-
from collections.abc import Callable
|
|
7
|
+
from collections.abc import Callable
|
|
8
|
+
from collections.abc import Collection
|
|
9
|
+
from collections.abc import Iterable
|
|
7
10
|
from pathlib import Path
|
|
8
11
|
from typing import Any
|
|
9
12
|
|
|
10
|
-
|
|
11
13
|
try:
|
|
12
14
|
import dapla as dp
|
|
13
15
|
except ImportError:
|
|
@@ -16,18 +18,19 @@ except ImportError:
|
|
|
16
18
|
import joblib
|
|
17
19
|
import numpy as np
|
|
18
20
|
import pandas as pd
|
|
19
|
-
from geopandas import GeoDataFrame
|
|
21
|
+
from geopandas import GeoDataFrame
|
|
20
22
|
from pandas import DataFrame
|
|
21
|
-
from
|
|
23
|
+
from pandas import Series
|
|
22
24
|
|
|
23
|
-
from ..geopandas_tools.general import clean_clip, clean_geoms
|
|
24
25
|
from ..geopandas_tools.neighbors import get_neighbor_indices
|
|
25
26
|
from ..geopandas_tools.overlay import clean_overlay
|
|
26
|
-
from ..helpers import LocalFunctionError
|
|
27
|
-
|
|
27
|
+
from ..helpers import LocalFunctionError
|
|
28
|
+
from ..helpers import dict_zip_union
|
|
29
|
+
from ..helpers import in_jupyter
|
|
28
30
|
|
|
29
31
|
try:
|
|
30
|
-
from ..io.dapla_functions import
|
|
32
|
+
from ..io.dapla_functions import read_geopandas
|
|
33
|
+
from ..io.dapla_functions import write_geopandas
|
|
31
34
|
|
|
32
35
|
# from ..io.write_municipality_data import write_municipality_data
|
|
33
36
|
except ImportError:
|
|
@@ -35,16 +38,13 @@ except ImportError:
|
|
|
35
38
|
|
|
36
39
|
|
|
37
40
|
try:
|
|
38
|
-
from dapla import read_pandas
|
|
41
|
+
from dapla import read_pandas
|
|
42
|
+
from dapla import write_pandas
|
|
43
|
+
from dapla.gcs import GCSFileSystem
|
|
39
44
|
except ImportError:
|
|
40
|
-
pass
|
|
41
|
-
|
|
42
45
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
raise TypeError("args should be a tuple (it should not be unpacked with *)")
|
|
46
|
-
argnames = inspect.getfullargspec(func).args[index_start:]
|
|
47
|
-
return {name: value for value, name in zip(args, argnames, strict=False)}
|
|
46
|
+
class GCSFileSystem:
|
|
47
|
+
"""Placeholder."""
|
|
48
48
|
|
|
49
49
|
|
|
50
50
|
class Parallel:
|
|
@@ -87,10 +87,23 @@ class Parallel:
|
|
|
87
87
|
backend: str = "multiprocessing",
|
|
88
88
|
context: str = "spawn",
|
|
89
89
|
maxtasksperchild: int = 10,
|
|
90
|
+
chunksize: int = 1,
|
|
90
91
|
**kwargs,
|
|
91
|
-
):
|
|
92
|
+
) -> None:
|
|
93
|
+
"""Initialize a Parallel instance with specified settings for parallel execution.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
processes: Number of parallel processes. Set to 1 to run without parallelization.
|
|
97
|
+
backend: The backend to use for parallel execution. Defaults to 'multiprocessing'.
|
|
98
|
+
context: The context setting for multiprocessing. Defaults to 'spawn'.
|
|
99
|
+
maxtasksperchild: The maximum number of tasks a worker process can complete
|
|
100
|
+
before it is replaced. Defaults to 10.
|
|
101
|
+
chunksize: The size of the chunks of the iterable to distribute to workers.
|
|
102
|
+
**kwargs: Additional keyword arguments passed to the underlying parallel execution backend.
|
|
103
|
+
"""
|
|
92
104
|
self.processes = int(processes)
|
|
93
105
|
self.maxtasksperchild = maxtasksperchild
|
|
106
|
+
self.chunksize = chunksize
|
|
94
107
|
self.backend = backend
|
|
95
108
|
self.context = context
|
|
96
109
|
self.kwargs = kwargs
|
|
@@ -103,7 +116,6 @@ class Parallel:
|
|
|
103
116
|
iterable: Collection,
|
|
104
117
|
args: tuple | None = None,
|
|
105
118
|
kwargs: dict | None = None,
|
|
106
|
-
chunksize: int = 1,
|
|
107
119
|
) -> list[Any]:
|
|
108
120
|
"""Run functions in parallel with items of an iterable as 0th arguemnt.
|
|
109
121
|
|
|
@@ -111,7 +123,7 @@ class Parallel:
|
|
|
111
123
|
func: Function to be run.
|
|
112
124
|
iterable: An iterable where each item will be passed to func as
|
|
113
125
|
0th positional argument.
|
|
114
|
-
|
|
126
|
+
args: Positional arguments passed to 'func' starting from the 1st argument.
|
|
115
127
|
The 0th argument will be reserved for the values of 'iterable'.
|
|
116
128
|
kwargs: Keyword arguments passed to 'func'. Must be passed as a dict,
|
|
117
129
|
not unpacked into separate keyword arguments.
|
|
@@ -120,8 +132,8 @@ class Parallel:
|
|
|
120
132
|
A list of the return values of the function, one for each item in
|
|
121
133
|
'iterable'.
|
|
122
134
|
|
|
123
|
-
Examples
|
|
124
|
-
|
|
135
|
+
Examples:
|
|
136
|
+
---------
|
|
125
137
|
Multiply each list element by 2.
|
|
126
138
|
|
|
127
139
|
>>> iterable = [1, 2, 3]
|
|
@@ -159,21 +171,20 @@ class Parallel:
|
|
|
159
171
|
... print(results)
|
|
160
172
|
[2, 4, 6]
|
|
161
173
|
"""
|
|
162
|
-
|
|
163
174
|
if args:
|
|
164
175
|
# start at index 1, meaning the 0th argument (the iterable) is still available
|
|
165
|
-
args_as_kwargs =
|
|
176
|
+
args_as_kwargs = _turn_args_into_kwargs(func, args, index_start=1)
|
|
166
177
|
else:
|
|
167
178
|
args_as_kwargs = {}
|
|
168
179
|
|
|
169
|
-
self.
|
|
180
|
+
self._validate_execution(func)
|
|
170
181
|
|
|
171
182
|
kwargs = self._validate_kwargs(kwargs) | args_as_kwargs
|
|
172
183
|
|
|
173
184
|
func_with_kwargs = functools.partial(func, **kwargs)
|
|
174
185
|
|
|
175
186
|
if self.processes == 1:
|
|
176
|
-
return
|
|
187
|
+
return [func_with_kwargs(item) for item in iterable]
|
|
177
188
|
|
|
178
189
|
iterable = list(iterable)
|
|
179
190
|
|
|
@@ -182,21 +193,42 @@ class Parallel:
|
|
|
182
193
|
|
|
183
194
|
if not processes:
|
|
184
195
|
return []
|
|
196
|
+
elif processes == 1:
|
|
197
|
+
return [func_with_kwargs(item) for item in iterable]
|
|
185
198
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
199
|
+
try:
|
|
200
|
+
if self.backend == "multiprocessing":
|
|
201
|
+
with multiprocessing.get_context(self.context).Pool(
|
|
202
|
+
processes, maxtasksperchild=self.maxtasksperchild, **self.kwargs
|
|
203
|
+
) as pool:
|
|
204
|
+
try:
|
|
205
|
+
return pool.map(
|
|
206
|
+
func_with_kwargs, iterable, chunksize=self.chunksize
|
|
207
|
+
)
|
|
208
|
+
except Exception as e:
|
|
209
|
+
pool.terminate()
|
|
210
|
+
raise e
|
|
195
211
|
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
212
|
+
with joblib.Parallel(
|
|
213
|
+
n_jobs=processes, backend=self.backend, **self.kwargs
|
|
214
|
+
) as parallel:
|
|
215
|
+
return parallel(
|
|
216
|
+
joblib.delayed(func)(item, **kwargs) for item in iterable
|
|
217
|
+
)
|
|
218
|
+
except pickle.PickleError as e:
|
|
219
|
+
unpicklable = []
|
|
220
|
+
for k, v in locals().items():
|
|
221
|
+
try:
|
|
222
|
+
pickle.dumps(v)
|
|
223
|
+
except pickle.PickleError:
|
|
224
|
+
unpicklable.append(k)
|
|
225
|
+
except TypeError:
|
|
226
|
+
pass
|
|
227
|
+
if unpicklable:
|
|
228
|
+
raise pickle.PickleError(
|
|
229
|
+
f"Cannot unpickle objects: {unpicklable}"
|
|
230
|
+
) from e
|
|
231
|
+
raise e
|
|
200
232
|
|
|
201
233
|
def starmap(
|
|
202
234
|
self,
|
|
@@ -204,7 +236,6 @@ class Parallel:
|
|
|
204
236
|
iterable: Collection[Iterable[Any]],
|
|
205
237
|
args: tuple | None = None,
|
|
206
238
|
kwargs: dict | None = None,
|
|
207
|
-
chunksize: int = 1,
|
|
208
239
|
) -> list[Any]:
|
|
209
240
|
"""Run functions in parallel where items of the iterable are unpacked.
|
|
210
241
|
|
|
@@ -215,7 +246,7 @@ class Parallel:
|
|
|
215
246
|
func: Function to be run.
|
|
216
247
|
iterable: An iterable of iterables, where each item will be
|
|
217
248
|
unpacked as positional argument to the function.
|
|
218
|
-
|
|
249
|
+
args: Positional arguments passed to 'func' starting at argument position
|
|
219
250
|
n + 1, where n is the length of the iterables inside the iterable.
|
|
220
251
|
kwargs: Keyword arguments passed to 'func'. Must be passed as a dict,
|
|
221
252
|
not unpacked into separate keyword arguments.
|
|
@@ -224,8 +255,8 @@ class Parallel:
|
|
|
224
255
|
A list of the return values of the function, one for each item in
|
|
225
256
|
'iterable'.
|
|
226
257
|
|
|
227
|
-
Examples
|
|
228
|
-
|
|
258
|
+
Examples:
|
|
259
|
+
---------
|
|
229
260
|
Multiply each list element by 2.
|
|
230
261
|
|
|
231
262
|
>>> iterable = [(1, 2), (2, 3), (3, 4)]
|
|
@@ -262,13 +293,13 @@ class Parallel:
|
|
|
262
293
|
if args:
|
|
263
294
|
# starting the count at the length of the iterables inside the iterables
|
|
264
295
|
iterable = list(iterable)
|
|
265
|
-
args_as_kwargs =
|
|
296
|
+
args_as_kwargs = _turn_args_into_kwargs(
|
|
266
297
|
func, args, index_start=len(iterable[0])
|
|
267
298
|
)
|
|
268
299
|
else:
|
|
269
300
|
args_as_kwargs = {}
|
|
270
301
|
|
|
271
|
-
self.
|
|
302
|
+
self._validate_execution(func)
|
|
272
303
|
|
|
273
304
|
kwargs = self._validate_kwargs(kwargs) | args_as_kwargs
|
|
274
305
|
|
|
@@ -290,7 +321,9 @@ class Parallel:
|
|
|
290
321
|
processes, maxtasksperchild=self.maxtasksperchild, **self.kwargs
|
|
291
322
|
) as pool:
|
|
292
323
|
try:
|
|
293
|
-
return pool.starmap(
|
|
324
|
+
return pool.starmap(
|
|
325
|
+
func_with_kwargs, iterable, chunksize=self.chunksize
|
|
326
|
+
)
|
|
294
327
|
except Exception as e:
|
|
295
328
|
pool.terminate()
|
|
296
329
|
raise e
|
|
@@ -320,10 +353,10 @@ class Parallel:
|
|
|
320
353
|
Returns:
|
|
321
354
|
A DataFrame, or a list of DataFrames if concat is False.
|
|
322
355
|
"""
|
|
323
|
-
if
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
356
|
+
if strict:
|
|
357
|
+
res = self.map(read_pandas, files, kwargs=kwargs)
|
|
358
|
+
else:
|
|
359
|
+
res = self.map(_try_to_read_pandas, files, kwargs=kwargs)
|
|
327
360
|
|
|
328
361
|
return pd.concat(res, ignore_index=ignore_index) if concat else res
|
|
329
362
|
|
|
@@ -342,14 +375,19 @@ class Parallel:
|
|
|
342
375
|
concat: Whether to concat the results to a GeoDataFrame.
|
|
343
376
|
ignore_index: Defaults to True.
|
|
344
377
|
strict: If True (default), all files must exist.
|
|
378
|
+
chunksize: The size of the chunks of the iterable to distribute to workers.
|
|
345
379
|
**kwargs: Keyword arguments passed to sgis.read_geopandas.
|
|
346
380
|
|
|
347
381
|
Returns:
|
|
348
382
|
A GeoDataFrame, or a list of GeoDataFrames if concat is False.
|
|
349
383
|
"""
|
|
350
|
-
if not
|
|
351
|
-
|
|
352
|
-
|
|
384
|
+
if "file_system" not in kwargs:
|
|
385
|
+
kwargs["file_system"] = dp.FileClient.get_gcs_file_system()
|
|
386
|
+
|
|
387
|
+
if strict:
|
|
388
|
+
res = self.map(read_geopandas, files, kwargs=kwargs)
|
|
389
|
+
else:
|
|
390
|
+
res = self.map(_try_to_read_geopandas, files, kwargs=kwargs)
|
|
353
391
|
|
|
354
392
|
return pd.concat(res, ignore_index=ignore_index) if concat else res
|
|
355
393
|
|
|
@@ -367,10 +405,14 @@ class Parallel:
|
|
|
367
405
|
clip: bool = True,
|
|
368
406
|
max_rows_per_chunk: int = 150_000,
|
|
369
407
|
processes_in_clip: int = 1,
|
|
370
|
-
|
|
408
|
+
verbose: bool = True,
|
|
409
|
+
) -> None:
|
|
371
410
|
"""Split multiple datasets into municipalities and write as separate files.
|
|
372
411
|
|
|
373
412
|
The files will be named as the municipality number.
|
|
413
|
+
Each dataset in 'in_data' is intersected with 'municipalities'
|
|
414
|
+
in parallel. The intersections themselves can also be run in parallel
|
|
415
|
+
with the 'processes_in_clip' argument.
|
|
374
416
|
|
|
375
417
|
Args:
|
|
376
418
|
in_data: Dictionary with dataset names as keys and file paths or
|
|
@@ -397,7 +439,12 @@ class Parallel:
|
|
|
397
439
|
not have to have the same length as 'in_data'.
|
|
398
440
|
write_empty: If False (default), municipalities with no data will be skipped.
|
|
399
441
|
If True, an empty parquet file will be written.
|
|
400
|
-
clip: If True (default), the data will be clipped.
|
|
442
|
+
clip: If True (default), the data will be clipped. If False, the data will
|
|
443
|
+
be spatial joined.
|
|
444
|
+
max_rows_per_chunk: Number of rows per data chunk for processing.
|
|
445
|
+
processes_in_clip: Number of parallel processes for data clipping.
|
|
446
|
+
verbose: Whether to print during execution.
|
|
447
|
+
|
|
401
448
|
"""
|
|
402
449
|
shared_kwds = {
|
|
403
450
|
"municipalities": municipalities,
|
|
@@ -409,6 +456,7 @@ class Parallel:
|
|
|
409
456
|
"max_rows_per_chunk": max_rows_per_chunk,
|
|
410
457
|
"processes_in_clip": processes_in_clip,
|
|
411
458
|
"strict": strict,
|
|
459
|
+
"verbose": verbose,
|
|
412
460
|
}
|
|
413
461
|
|
|
414
462
|
if isinstance(out_data, (str, Path)):
|
|
@@ -417,10 +465,12 @@ class Parallel:
|
|
|
417
465
|
if funcdict is None:
|
|
418
466
|
funcdict = {}
|
|
419
467
|
|
|
420
|
-
|
|
468
|
+
fs = dp.FileClient.get_gcs_file_system()
|
|
421
469
|
|
|
422
|
-
for _, data, folder, postfunc in
|
|
423
|
-
if data is None
|
|
470
|
+
for _, data, folder, postfunc in dict_zip_union(in_data, out_data, funcdict):
|
|
471
|
+
if data is None or (
|
|
472
|
+
not strict and isinstance(data, (str | Path)) and not fs.exists(data)
|
|
473
|
+
):
|
|
424
474
|
continue
|
|
425
475
|
|
|
426
476
|
kwds = shared_kwds | {
|
|
@@ -439,15 +489,33 @@ class Parallel:
|
|
|
439
489
|
df: GeoDataFrame,
|
|
440
490
|
args: tuple | None = None,
|
|
441
491
|
kwargs: dict | None = None,
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
concat: bool =
|
|
492
|
+
n_chunks: int | None = None,
|
|
493
|
+
max_rows_per_chunk: int | None = None,
|
|
494
|
+
concat: bool = True,
|
|
445
495
|
) -> GeoDataFrame:
|
|
446
|
-
|
|
447
|
-
return func(df, *args, **kwargs)
|
|
496
|
+
"""Run a function in parallel on chunks of a (Geo)DataFrame.
|
|
448
497
|
|
|
449
|
-
|
|
450
|
-
|
|
498
|
+
Args:
|
|
499
|
+
func: Function to run chunkwise. It should take
|
|
500
|
+
a (Geo)DataFrame as first argument.
|
|
501
|
+
df: (Geo)DataFrame to split in n_chunks and passed
|
|
502
|
+
as first argument to 'func'.
|
|
503
|
+
args: Positional arguments in 'func' after the DataFrame.
|
|
504
|
+
kwargs: Additional keyword arguments in 'func'.
|
|
505
|
+
n_chunks: Optionally set number of chunks to split
|
|
506
|
+
'df' into. Defaults to the 'processes' attribute
|
|
507
|
+
of the Parallel instance.
|
|
508
|
+
max_rows_per_chunk: Alternatively decide number of chunks
|
|
509
|
+
by a maximum number of rows per chunk.
|
|
510
|
+
concat: Whether to use pd.concat on the results.
|
|
511
|
+
Defaults to True.
|
|
512
|
+
"""
|
|
513
|
+
if max_rows_per_chunk is None and n_chunks is None:
|
|
514
|
+
n_chunks: int = self.processes
|
|
515
|
+
elif n_chunks is None:
|
|
516
|
+
n_chunks: int = len(df) // max_rows_per_chunk
|
|
517
|
+
elif max_rows_per_chunk is not None and len(df) < max_rows_per_chunk:
|
|
518
|
+
return func(df, *args, **kwargs)
|
|
451
519
|
|
|
452
520
|
chunks = np.array_split(np.arange(len(df)), n_chunks)
|
|
453
521
|
|
|
@@ -464,7 +532,7 @@ class Parallel:
|
|
|
464
532
|
else:
|
|
465
533
|
return out
|
|
466
534
|
|
|
467
|
-
def
|
|
535
|
+
def _validate_execution(self, func: Callable) -> None:
|
|
468
536
|
"""Multiprocessing doesn't work with local variables in interactive interpreter.
|
|
469
537
|
|
|
470
538
|
Raising Exception to avoid confusion.
|
|
@@ -478,8 +546,8 @@ class Parallel:
|
|
|
478
546
|
raise LocalFunctionError(func)
|
|
479
547
|
|
|
480
548
|
@staticmethod
|
|
481
|
-
def _validate_kwargs(kwargs) -> dict:
|
|
482
|
-
"""Make sure kwargs is a dict (not ** unpacked or None)"""
|
|
549
|
+
def _validate_kwargs(kwargs: dict) -> dict:
|
|
550
|
+
"""Make sure kwargs is a dict (not ** unpacked or None)."""
|
|
483
551
|
if kwargs is None:
|
|
484
552
|
kwargs = {}
|
|
485
553
|
elif not isinstance(kwargs, dict):
|
|
@@ -487,7 +555,7 @@ class Parallel:
|
|
|
487
555
|
return kwargs
|
|
488
556
|
|
|
489
557
|
def _execute(self) -> list[Any]:
|
|
490
|
-
[self.
|
|
558
|
+
[self._validate_execution(func) for func in self.funcs]
|
|
491
559
|
|
|
492
560
|
if self.processes == 1:
|
|
493
561
|
return [func() for func in self.funcs]
|
|
@@ -513,7 +581,8 @@ class Parallel:
|
|
|
513
581
|
results = [pool.apply_async(func) for func in self.funcs]
|
|
514
582
|
return [result.get() for result in results]
|
|
515
583
|
|
|
516
|
-
def __repr__(self):
|
|
584
|
+
def __repr__(self) -> str:
|
|
585
|
+
"""String representation."""
|
|
517
586
|
return (
|
|
518
587
|
f"{self.__class__.__name__}(processes={self.processes}, "
|
|
519
588
|
f"backend='{self.backend}', context='{self.context}')"
|
|
@@ -523,7 +592,7 @@ class Parallel:
|
|
|
523
592
|
def write_municipality_data(
|
|
524
593
|
data: str | GeoDataFrame | DataFrame,
|
|
525
594
|
out_folder: str,
|
|
526
|
-
municipalities: GeoDataFrame,
|
|
595
|
+
municipalities: GeoDataFrame | list[str] | None = None,
|
|
527
596
|
with_neighbors: bool = False,
|
|
528
597
|
muni_number_col: str = "KOMMUNENR",
|
|
529
598
|
file_type: str = "parquet",
|
|
@@ -533,13 +602,39 @@ def write_municipality_data(
|
|
|
533
602
|
max_rows_per_chunk: int = 150_000,
|
|
534
603
|
processes_in_clip: int = 1,
|
|
535
604
|
strict: bool = True,
|
|
605
|
+
verbose: bool = True,
|
|
536
606
|
) -> None:
|
|
607
|
+
"""Splits and writes data into municipality-specific files.
|
|
608
|
+
|
|
609
|
+
Args:
|
|
610
|
+
data: Path to the data file or a GeoDataFrame.
|
|
611
|
+
out_folder: Path to the output directory where the municipality data
|
|
612
|
+
is written.
|
|
613
|
+
municipalities: Either a sequence of municipality numbers or a GeoDataFrame
|
|
614
|
+
of municipality polygons and municipality numbers in the column 'muni_number_col'.
|
|
615
|
+
Defaults to None.
|
|
616
|
+
with_neighbors: If True, include data from neighboring municipalities
|
|
617
|
+
for each municipality.
|
|
618
|
+
muni_number_col: Column name for municipality codes in 'municipalities'.
|
|
619
|
+
file_type: Format of the output file.
|
|
620
|
+
func: Function to process data before writing.
|
|
621
|
+
write_empty: If True, write empty files for municipalities without data.
|
|
622
|
+
clip: If True, clip the data to municipality boundaries. If False
|
|
623
|
+
the data is spatial joined.
|
|
624
|
+
max_rows_per_chunk: Maximum number of rows in each processed chunk.
|
|
625
|
+
processes_in_clip: Number of processes to use for clipping.
|
|
626
|
+
strict: If True (default) and the data has a municipality column,
|
|
627
|
+
all municipality numbers in 'data' must be present in 'municipalities'.
|
|
628
|
+
verbose: Whether to print during execution.
|
|
629
|
+
|
|
630
|
+
Returns:
|
|
631
|
+
None. The function writes files directly.
|
|
632
|
+
"""
|
|
537
633
|
write_func = (
|
|
538
634
|
_write_neighbor_municipality_data
|
|
539
635
|
if with_neighbors
|
|
540
636
|
else _write_municipality_data
|
|
541
637
|
)
|
|
542
|
-
|
|
543
638
|
return write_func(
|
|
544
639
|
data=data,
|
|
545
640
|
out_folder=out_folder,
|
|
@@ -552,27 +647,34 @@ def write_municipality_data(
|
|
|
552
647
|
max_rows_per_chunk=max_rows_per_chunk,
|
|
553
648
|
processes_in_clip=processes_in_clip,
|
|
554
649
|
strict=strict,
|
|
650
|
+
verbose=verbose,
|
|
555
651
|
)
|
|
556
652
|
|
|
557
653
|
|
|
558
|
-
def _validate_data(
|
|
559
|
-
|
|
560
|
-
|
|
654
|
+
def _validate_data(
|
|
655
|
+
data: str | list[str] | DataFrame | GeoDataFrame,
|
|
656
|
+
) -> DataFrame | GeoDataFrame:
|
|
561
657
|
if hasattr(data, "__iter__") and len(data) == 1:
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
658
|
+
data = data[0]
|
|
659
|
+
if isinstance(data, (str, Path)):
|
|
660
|
+
try:
|
|
661
|
+
return read_geopandas(str(data))
|
|
662
|
+
except ValueError as e:
|
|
663
|
+
try:
|
|
664
|
+
return read_pandas(str(data))
|
|
665
|
+
except ValueError as e2:
|
|
666
|
+
raise e.__class__(e, data) from e2
|
|
565
667
|
return data
|
|
566
668
|
|
|
567
669
|
|
|
568
|
-
def _get_out_path(out_folder, muni, file_type):
|
|
670
|
+
def _get_out_path(out_folder: str | Path, muni: str, file_type: str) -> str:
|
|
569
671
|
return str(Path(out_folder) / f"{muni}.{file_type.strip('.')}")
|
|
570
672
|
|
|
571
673
|
|
|
572
674
|
def _write_municipality_data(
|
|
573
675
|
data: str | GeoDataFrame | DataFrame,
|
|
574
676
|
out_folder: str,
|
|
575
|
-
municipalities: GeoDataFrame,
|
|
677
|
+
municipalities: GeoDataFrame | list[str] | None = None,
|
|
576
678
|
muni_number_col: str = "KOMMUNENR",
|
|
577
679
|
file_type: str = "parquet",
|
|
578
680
|
func: Callable | None = None,
|
|
@@ -581,21 +683,15 @@ def _write_municipality_data(
|
|
|
581
683
|
max_rows_per_chunk: int = 150_000,
|
|
582
684
|
processes_in_clip: int = 1,
|
|
583
685
|
strict: bool = True,
|
|
686
|
+
verbose: bool = True,
|
|
584
687
|
) -> None:
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
try:
|
|
589
|
-
gdf = read_geopandas(str(data))
|
|
590
|
-
except ValueError as e:
|
|
591
|
-
try:
|
|
592
|
-
gdf = read_pandas(str(data))
|
|
593
|
-
except ValueError:
|
|
594
|
-
raise e.__class__(e, data)
|
|
595
|
-
elif isinstance(data, DataFrame):
|
|
596
|
-
gdf = data
|
|
688
|
+
if verbose:
|
|
689
|
+
to_print = out_folder
|
|
690
|
+
print(to_print)
|
|
597
691
|
else:
|
|
598
|
-
|
|
692
|
+
to_print = None
|
|
693
|
+
|
|
694
|
+
gdf = _validate_data(data)
|
|
599
695
|
|
|
600
696
|
if func is not None:
|
|
601
697
|
gdf = func(gdf)
|
|
@@ -608,22 +704,29 @@ def _write_municipality_data(
|
|
|
608
704
|
max_rows_per_chunk,
|
|
609
705
|
processes_in_clip=processes_in_clip,
|
|
610
706
|
strict=strict,
|
|
707
|
+
to_print=to_print,
|
|
611
708
|
)
|
|
612
709
|
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
710
|
+
if municipalities is None:
|
|
711
|
+
muni_numbers = gdf[muni_number_col]
|
|
712
|
+
elif not isinstance(municipalities, DataFrame):
|
|
713
|
+
muni_numbers = municipalities
|
|
714
|
+
else:
|
|
715
|
+
muni_numbers = municipalities[muni_number_col]
|
|
716
|
+
|
|
717
|
+
# hardcode this to threading for efficiency in io bound task
|
|
718
|
+
Parallel(processes_in_clip, backend="threading").map(
|
|
719
|
+
_write_one_muni,
|
|
720
|
+
muni_numbers,
|
|
721
|
+
kwargs=dict(
|
|
722
|
+
gdf=gdf,
|
|
723
|
+
out_folder=out_folder,
|
|
724
|
+
muni_number_col=muni_number_col,
|
|
725
|
+
file_type=file_type,
|
|
726
|
+
write_empty=write_empty,
|
|
727
|
+
to_print=to_print,
|
|
728
|
+
),
|
|
729
|
+
)
|
|
627
730
|
|
|
628
731
|
|
|
629
732
|
def _write_neighbor_municipality_data(
|
|
@@ -638,11 +741,15 @@ def _write_neighbor_municipality_data(
|
|
|
638
741
|
max_rows_per_chunk: int = 150_000,
|
|
639
742
|
processes_in_clip: int = 1,
|
|
640
743
|
strict: bool = True,
|
|
744
|
+
verbose: bool = True,
|
|
641
745
|
) -> None:
|
|
642
|
-
|
|
746
|
+
if verbose:
|
|
747
|
+
to_print = out_folder
|
|
748
|
+
print("out_folder:", to_print)
|
|
749
|
+
else:
|
|
750
|
+
to_print = None
|
|
643
751
|
|
|
644
|
-
|
|
645
|
-
gdf = read_geopandas(str(data))
|
|
752
|
+
gdf = _validate_data(data)
|
|
646
753
|
|
|
647
754
|
if func is not None:
|
|
648
755
|
gdf = func(gdf)
|
|
@@ -655,6 +762,7 @@ def _write_neighbor_municipality_data(
|
|
|
655
762
|
max_rows_per_chunk,
|
|
656
763
|
processes_in_clip,
|
|
657
764
|
strict=strict,
|
|
765
|
+
to_print=to_print,
|
|
658
766
|
)
|
|
659
767
|
|
|
660
768
|
if municipalities.index.name != muni_number_col:
|
|
@@ -664,43 +772,97 @@ def _write_neighbor_municipality_data(
|
|
|
664
772
|
municipalities, municipalities, max_distance=1
|
|
665
773
|
)
|
|
666
774
|
|
|
667
|
-
for
|
|
668
|
-
|
|
775
|
+
# hardcode this to threading for efficiency in io bound task
|
|
776
|
+
Parallel(processes_in_clip, backend="threading").map(
|
|
777
|
+
_write_one_muni_with_neighbors,
|
|
778
|
+
municipalities.index,
|
|
779
|
+
kwargs=dict(
|
|
780
|
+
gdf=gdf,
|
|
781
|
+
neighbor_munis=neighbor_munis,
|
|
782
|
+
out_folder=out_folder,
|
|
783
|
+
muni_number_col=muni_number_col,
|
|
784
|
+
file_type=file_type,
|
|
785
|
+
write_empty=write_empty,
|
|
786
|
+
to_print=to_print,
|
|
787
|
+
),
|
|
788
|
+
)
|
|
669
789
|
|
|
670
|
-
muni_and_neighbors = neighbor_munis.loc[[muni]]
|
|
671
|
-
gdf_neighbor = gdf.loc[gdf[muni_number_col].isin(muni_and_neighbors)]
|
|
672
790
|
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
791
|
+
def _write_one_muni(
|
|
792
|
+
muni_number: Any,
|
|
793
|
+
gdf: GeoDataFrame | DataFrame,
|
|
794
|
+
out_folder: str | Path,
|
|
795
|
+
muni_number_col: str,
|
|
796
|
+
file_type: str,
|
|
797
|
+
write_empty: bool,
|
|
798
|
+
to_print: str | None = None,
|
|
799
|
+
) -> None:
|
|
800
|
+
out = _get_out_path(out_folder, muni_number, file_type)
|
|
801
|
+
|
|
802
|
+
if to_print:
|
|
803
|
+
print("writing:", out)
|
|
804
|
+
|
|
805
|
+
gdf_muni = gdf.loc[gdf[muni_number_col] == muni_number]
|
|
806
|
+
|
|
807
|
+
if not len(gdf_muni):
|
|
808
|
+
if write_empty:
|
|
809
|
+
gdf_muni = gdf_muni.drop(columns="geometry", errors="ignore")
|
|
810
|
+
gdf_muni["geometry"] = None
|
|
811
|
+
write_pandas(gdf_muni, out)
|
|
812
|
+
return
|
|
678
813
|
|
|
679
|
-
|
|
814
|
+
write_geopandas(gdf_muni, out)
|
|
815
|
+
|
|
816
|
+
|
|
817
|
+
def _write_one_muni_with_neighbors(
|
|
818
|
+
muni_number: Any,
|
|
819
|
+
gdf: GeoDataFrame | DataFrame,
|
|
820
|
+
neighbor_munis: Series,
|
|
821
|
+
out_folder: str | Path,
|
|
822
|
+
muni_number_col: str,
|
|
823
|
+
file_type: str,
|
|
824
|
+
write_empty: bool,
|
|
825
|
+
to_print: str | None = None,
|
|
826
|
+
) -> None:
|
|
827
|
+
out = _get_out_path(out_folder, muni_number, file_type)
|
|
828
|
+
|
|
829
|
+
if to_print:
|
|
830
|
+
print("writing:", out)
|
|
831
|
+
|
|
832
|
+
muni_and_neighbors: Series = neighbor_munis.loc[[muni_number]]
|
|
833
|
+
gdf_neighbor = gdf.loc[gdf[muni_number_col].isin(muni_and_neighbors)]
|
|
834
|
+
|
|
835
|
+
if not len(gdf_neighbor):
|
|
836
|
+
if write_empty:
|
|
837
|
+
gdf_neighbor = gdf_neighbor.drop(columns="geometry", errors="ignore")
|
|
838
|
+
gdf_neighbor["geometry"] = None
|
|
839
|
+
write_pandas(gdf_neighbor, out)
|
|
840
|
+
return
|
|
841
|
+
|
|
842
|
+
write_geopandas(gdf_neighbor, out)
|
|
680
843
|
|
|
681
844
|
|
|
682
845
|
def _fix_missing_muni_numbers(
|
|
683
|
-
gdf,
|
|
684
|
-
municipalities,
|
|
685
|
-
muni_number_col,
|
|
686
|
-
clip,
|
|
687
|
-
max_rows_per_chunk,
|
|
688
|
-
processes_in_clip,
|
|
689
|
-
strict,
|
|
690
|
-
|
|
846
|
+
gdf: GeoDataFrame,
|
|
847
|
+
municipalities: GeoDataFrame,
|
|
848
|
+
muni_number_col: str,
|
|
849
|
+
clip: bool,
|
|
850
|
+
max_rows_per_chunk: int,
|
|
851
|
+
processes_in_clip: int,
|
|
852
|
+
strict: bool,
|
|
853
|
+
to_print: str,
|
|
854
|
+
) -> GeoDataFrame:
|
|
691
855
|
if muni_number_col in gdf and gdf[muni_number_col].notna().all():
|
|
692
856
|
if municipalities is None:
|
|
693
857
|
return gdf
|
|
694
858
|
if diffs := set(gdf[muni_number_col].values).difference(
|
|
695
859
|
set(municipalities[muni_number_col].values)
|
|
696
860
|
):
|
|
697
|
-
message =
|
|
698
|
-
f"Different municipality numbers: {diffs}. Set 'strict=False' to ignore"
|
|
699
|
-
)
|
|
861
|
+
message = f"Different municipality numbers: {diffs}. Set 'strict=False' to ignore."
|
|
700
862
|
if strict:
|
|
701
863
|
raise ValueError(message)
|
|
702
864
|
else:
|
|
703
|
-
warnings.warn(message)
|
|
865
|
+
warnings.warn(message, stacklevel=1)
|
|
704
866
|
return gdf
|
|
705
867
|
|
|
706
868
|
if municipalities is None:
|
|
@@ -717,7 +879,10 @@ def _fix_missing_muni_numbers(
|
|
|
717
879
|
"GeoDataFrame to clip the geometries by."
|
|
718
880
|
)
|
|
719
881
|
|
|
720
|
-
|
|
882
|
+
try:
|
|
883
|
+
municipalities = municipalities[[muni_number_col, "geometry"]].to_crs(gdf.crs)
|
|
884
|
+
except Exception as e:
|
|
885
|
+
raise e.__class__(e, to_print) from e
|
|
721
886
|
|
|
722
887
|
if muni_number_col in gdf and gdf[muni_number_col].isna().any():
|
|
723
888
|
notna = gdf[gdf[muni_number_col].notna()]
|
|
@@ -732,6 +897,7 @@ def _fix_missing_muni_numbers(
|
|
|
732
897
|
municipalities[[muni_number_col, municipalities._geometry_column_name]],
|
|
733
898
|
processes=processes_in_clip,
|
|
734
899
|
max_rows_per_chunk=max_rows_per_chunk,
|
|
900
|
+
to_print=to_print,
|
|
735
901
|
)
|
|
736
902
|
|
|
737
903
|
return pd.concat([notna, notna_anymore], ignore_index=True)
|
|
@@ -744,25 +910,42 @@ def _fix_missing_muni_numbers(
|
|
|
744
910
|
municipalities[[muni_number_col, municipalities._geometry_column_name]],
|
|
745
911
|
processes=processes_in_clip,
|
|
746
912
|
max_rows_per_chunk=max_rows_per_chunk,
|
|
913
|
+
to_print=to_print,
|
|
747
914
|
)
|
|
748
915
|
|
|
749
916
|
|
|
750
917
|
def parallel_overlay(
|
|
751
918
|
df1: GeoDataFrame,
|
|
752
919
|
df2: GeoDataFrame,
|
|
753
|
-
# muni_number_col: str,
|
|
754
920
|
processes: int,
|
|
755
921
|
max_rows_per_chunk: int,
|
|
756
922
|
backend: str = "loky",
|
|
923
|
+
to_print: str | None = None,
|
|
757
924
|
**kwargs,
|
|
758
925
|
) -> GeoDataFrame:
|
|
759
|
-
|
|
926
|
+
"""Perform spatial overlay operations on two GeoDataFrames in parallel.
|
|
927
|
+
|
|
928
|
+
This function splits the first GeoDataFrame into chunks, processes each chunk in parallel using the specified
|
|
929
|
+
overlay operation with the second GeoDataFrame, and then concatenates the results.
|
|
760
930
|
|
|
931
|
+
Note that this function is most useful if df2 has few and simple geometries.
|
|
932
|
+
|
|
933
|
+
Args:
|
|
934
|
+
df1: The first GeoDataFrame for the overlay operation.
|
|
935
|
+
df2: The second GeoDataFrame for the overlay operation.
|
|
936
|
+
how: Type of overlay operation ('intersection', 'union', etc.).
|
|
937
|
+
processes: Number of parallel processes to use.
|
|
938
|
+
max_rows_per_chunk: Maximum number of rows per chunk for processing. This helps manage memory usage.
|
|
939
|
+
backend: The parallelization backend to use ('loky', 'multiprocessing', 'threading').
|
|
940
|
+
to_print: Optional text to print to see progression.
|
|
941
|
+
**kwargs: Additional keyword arguments to pass to the overlay function.
|
|
942
|
+
|
|
943
|
+
Returns:
|
|
944
|
+
A GeoDataFrame containing the result of the overlay operation.
|
|
945
|
+
"""
|
|
761
946
|
if len(df1) < max_rows_per_chunk:
|
|
762
947
|
return clean_overlay(df1, df2, **kwargs)
|
|
763
948
|
|
|
764
|
-
# df2 = df2.dissolve(by=muni_number_col, as_index=False)
|
|
765
|
-
|
|
766
949
|
n_chunks = len(df1) // max_rows_per_chunk
|
|
767
950
|
chunks = np.array_split(np.arange(len(df1)), n_chunks)
|
|
768
951
|
|
|
@@ -778,26 +961,50 @@ def parallel_overlay(
|
|
|
778
961
|
out = Parallel(processes, backend=backend).map(
|
|
779
962
|
_clean_intersection,
|
|
780
963
|
df1_chunked,
|
|
781
|
-
args=(df2,),
|
|
964
|
+
args=(df2, to_print) if to_print else (df2,),
|
|
782
965
|
)
|
|
783
966
|
return pd.concat(out, ignore_index=True)
|
|
784
967
|
|
|
785
968
|
|
|
786
|
-
def _clean_intersection(
|
|
787
|
-
|
|
969
|
+
def _clean_intersection(
|
|
970
|
+
df1: GeoDataFrame, df2: GeoDataFrame, to_print: str = ""
|
|
971
|
+
) -> GeoDataFrame:
|
|
972
|
+
print(to_print, "- intersection chunk len:", len(df1))
|
|
788
973
|
return clean_overlay(df1, df2, how="intersection")
|
|
789
974
|
|
|
790
975
|
|
|
791
976
|
def chunkwise(
|
|
792
977
|
func: Callable,
|
|
793
|
-
df: GeoDataFrame,
|
|
978
|
+
df: GeoDataFrame | pd.DataFrame,
|
|
794
979
|
max_rows_per_chunk: int = 150_000,
|
|
795
|
-
n_chunks: int = None,
|
|
980
|
+
n_chunks: int | None = None,
|
|
796
981
|
args: tuple | None = None,
|
|
797
982
|
kwargs: dict | None = None,
|
|
798
983
|
n_jobs: int = 1,
|
|
799
984
|
backend: str = "loky",
|
|
800
|
-
) -> GeoDataFrame:
|
|
985
|
+
) -> GeoDataFrame | pd.DataFrame:
|
|
986
|
+
"""Run a function in parallel on chunks of a DataFrame.
|
|
987
|
+
|
|
988
|
+
This method is used to process large (Geo)DataFrames in manageable pieces,
|
|
989
|
+
optionally in parallel.
|
|
990
|
+
|
|
991
|
+
Args:
|
|
992
|
+
func: The function to apply to each chunk. This function must accept a DataFrame as
|
|
993
|
+
its first argument and return a DataFrame.
|
|
994
|
+
df: The DataFrame to be chunked and processed.
|
|
995
|
+
max_rows_per_chunk: The maximum number of rows each chunk should contain.
|
|
996
|
+
n_chunks: The exact number of chunks to divide the dataframe into. If None, it will be
|
|
997
|
+
calculated based on 'max_rows_per_chunk'.
|
|
998
|
+
args: Additional positional arguments to pass to 'func'.
|
|
999
|
+
kwargs: Keyword arguments to pass to 'func'.
|
|
1000
|
+
n_jobs: The number of parallel jobs to run. Defaults to 1 (no parallel execution).
|
|
1001
|
+
backend: The backend to use for parallel execution (e.g., 'loky', 'multiprocessing').
|
|
1002
|
+
|
|
1003
|
+
Returns:
|
|
1004
|
+
GeoDataFrame: A GeoDataFrame resulting from concatenating the results of applying 'func'
|
|
1005
|
+
to each chunk of the original GeoDataFrame.
|
|
1006
|
+
|
|
1007
|
+
"""
|
|
801
1008
|
if len(df) < max_rows_per_chunk:
|
|
802
1009
|
return func(df, *args, **kwargs)
|
|
803
1010
|
|
|
@@ -815,3 +1022,26 @@ def chunkwise(
|
|
|
815
1022
|
kwargs=kwargs,
|
|
816
1023
|
)
|
|
817
1024
|
return pd.concat(out, ignore_index=True)
|
|
1025
|
+
|
|
1026
|
+
|
|
1027
|
+
def _turn_args_into_kwargs(func: Callable, args: tuple, index_start: int) -> dict:
|
|
1028
|
+
if not isinstance(args, tuple):
|
|
1029
|
+
raise TypeError("args should be a tuple (it should not be unpacked with *)")
|
|
1030
|
+
argnames = inspect.getfullargspec(func).args[index_start:]
|
|
1031
|
+
return {name: value for value, name in zip(args, argnames, strict=False)}
|
|
1032
|
+
|
|
1033
|
+
|
|
1034
|
+
def _try_to_read_geopandas(path: str, **kwargs) -> GeoDataFrame | DataFrame | None:
|
|
1035
|
+
"""Read with try/except because it's faster than checking exists first."""
|
|
1036
|
+
try:
|
|
1037
|
+
return read_geopandas(path, **kwargs)
|
|
1038
|
+
except FileNotFoundError:
|
|
1039
|
+
return None
|
|
1040
|
+
|
|
1041
|
+
|
|
1042
|
+
def _try_to_read_pandas(path: str, **kwargs) -> DataFrame | None:
|
|
1043
|
+
"""Read with try/except because it's faster than checking exists first."""
|
|
1044
|
+
try:
|
|
1045
|
+
return read_pandas(path, **kwargs)
|
|
1046
|
+
except FileNotFoundError:
|
|
1047
|
+
return None
|