swak 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- swak/__init__.py +0 -0
- swak/cli/__init__.py +14 -0
- swak/cli/argparser.py +159 -0
- swak/cli/envparser.py +67 -0
- swak/cli/exceptions.py +6 -0
- swak/cli/importer.py +68 -0
- swak/cloud/__init__.py +14 -0
- swak/cloud/gcp/__init__.py +31 -0
- swak/cloud/gcp/bucket.py +161 -0
- swak/cloud/gcp/dataset.py +231 -0
- swak/cloud/gcp/df2gbq.py +122 -0
- swak/cloud/gcp/exceptions.py +6 -0
- swak/cloud/gcp/gcs2df.py +121 -0
- swak/cloud/gcp/gcs2local.py +170 -0
- swak/cloud/gcp/query.py +86 -0
- swak/cloud/gcp/query2df.py +54 -0
- swak/cloud/gcp/query2gcs.py +171 -0
- swak/dictionary/__init__.py +13 -0
- swak/dictionary/valuesgetter.py +139 -0
- swak/funcflow/__init__.py +51 -0
- swak/funcflow/concurrent/__init__.py +16 -0
- swak/funcflow/concurrent/processfork.py +189 -0
- swak/funcflow/concurrent/processmap.py +147 -0
- swak/funcflow/concurrent/threadfork.py +187 -0
- swak/funcflow/concurrent/threadmap.py +137 -0
- swak/funcflow/curry.py +57 -0
- swak/funcflow/exceptions.py +79 -0
- swak/funcflow/filter.py +86 -0
- swak/funcflow/fork.py +141 -0
- swak/funcflow/loggers/__init__.py +16 -0
- swak/funcflow/loggers/stdout.py +231 -0
- swak/funcflow/map.py +87 -0
- swak/funcflow/misc.py +67 -0
- swak/funcflow/partial.py +56 -0
- swak/funcflow/pipe.py +139 -0
- swak/funcflow/reduce.py +60 -0
- swak/funcflow/route.py +207 -0
- swak/funcflow/safe.py +62 -0
- swak/funcflow/split.py +87 -0
- swak/funcflow/sum.py +50 -0
- swak/jsonobject/__init__.py +9 -0
- swak/jsonobject/exceptions.py +18 -0
- swak/jsonobject/fields/__init__.py +13 -0
- swak/jsonobject/fields/custom.py +22 -0
- swak/jsonobject/fields/flexidate.py +80 -0
- swak/jsonobject/fields/flexitime.py +71 -0
- swak/jsonobject/fields/maybe.py +43 -0
- swak/jsonobject/jsonobject.py +523 -0
- swak/jsonobject/jsonobjects.py +246 -0
- swak/misc/__init__.py +12 -0
- swak/misc/loggers.py +187 -0
- swak/misc/repr.py +159 -0
- swak/pd/__init__.py +25 -0
- swak/pd/frame.py +248 -0
- swak/pd/read.py +44 -0
- swak/pt/__init__.py +29 -0
- swak/pt/blocks.py +406 -0
- swak/pt/create.py +179 -0
- swak/pt/dists.py +88 -0
- swak/pt/embed/__init__.py +25 -0
- swak/pt/embed/activated.py +109 -0
- swak/pt/embed/categorical.py +145 -0
- swak/pt/embed/feature.py +114 -0
- swak/pt/embed/gated.py +109 -0
- swak/pt/embed/gated_residual.py +162 -0
- swak/pt/embed/numerical.py +144 -0
- swak/pt/exceptions.py +10 -0
- swak/pt/io.py +219 -0
- swak/pt/losses.py +503 -0
- swak/pt/misc.py +312 -0
- swak/pt/mix/__init__.py +21 -0
- swak/pt/mix/activated.py +116 -0
- swak/pt/mix/gated.py +117 -0
- swak/pt/mix/gated_residual.py +162 -0
- swak/pt/mix/weighted/__init__.py +21 -0
- swak/pt/mix/weighted/activated.py +141 -0
- swak/pt/mix/weighted/constant.py +86 -0
- swak/pt/mix/weighted/gated.py +146 -0
- swak/pt/mix/weighted/gated_residual.py +190 -0
- swak/pt/mix/weighted/variable.py +91 -0
- swak/pt/train/__init__.py +33 -0
- swak/pt/train/callbacks.py +151 -0
- swak/pt/train/checkpoints.py +233 -0
- swak/pt/train/data.py +81 -0
- swak/pt/train/schedulers.py +51 -0
- swak/pt/train/trainer.py +310 -0
- swak/pt/types.py +38 -0
- swak/text/__init__.py +23 -0
- swak/text/interpolate.py +171 -0
- swak/text/misc.py +14 -0
- swak/text/parse.py +37 -0
- swak/text/read.py +171 -0
- swak/text/resource.py +81 -0
- swak-0.0.3.dist-info/LICENSE +21 -0
- swak-0.0.3.dist-info/METADATA +48 -0
- swak-0.0.3.dist-info/RECORD +98 -0
- swak-0.0.3.dist-info/WHEEL +5 -0
- swak-0.0.3.dist-info/top_level.txt +1 -0
swak/__init__.py
ADDED
|
File without changes
|
swak/cli/__init__.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Tools to assist in consolidating sources of project configurations."""
|
|
2
|
+
|
|
3
|
+
from .importer import Importer
|
|
4
|
+
from .envparser import EnvParser
|
|
5
|
+
from .argparser import ArgParser, USAGE, DESCRIPTION, EPILOG
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
'Importer',
|
|
9
|
+
'EnvParser',
|
|
10
|
+
'ArgParser',
|
|
11
|
+
'USAGE',
|
|
12
|
+
'DESCRIPTION',
|
|
13
|
+
'EPILOG'
|
|
14
|
+
]
|
swak/cli/argparser.py
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import json
|
|
3
|
+
from json import JSONDecodeError
|
|
4
|
+
from ast import literal_eval
|
|
5
|
+
from argparse import ArgumentParser, RawTextHelpFormatter
|
|
6
|
+
from typing import Any
|
|
7
|
+
from itertools import takewhile, dropwhile, chain
|
|
8
|
+
from .exceptions import ArgParseError
|
|
9
|
+
|
|
10
|
+
type Parsed = tuple[list[str], dict[str, Any]]
|
|
11
|
+
|
|
12
|
+
USAGE = '%(prog)s [action(s)] [-h]'
|
|
13
|
+
|
|
14
|
+
DESCRIPTION = 'Refer to the README.md for the available actions!'
|
|
15
|
+
|
|
16
|
+
EPILOG = """
|
|
17
|
+
Additionally, all fields of the program's config can be set via
|
|
18
|
+
long-format options. Nested fields can be set by dot-separating
|
|
19
|
+
levels, e.g., "--root.level1.level2 value"
|
|
20
|
+
|
|
21
|
+
{!r}
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ArgParser:
|
|
26
|
+
"""Parse the command line for actions and any long-format options.
|
|
27
|
+
|
|
28
|
+
Using this command-line argument parser alleviates the need for
|
|
29
|
+
defining any groups or options beforehand. Arguments immediately
|
|
30
|
+
following the program call are interpreted as actions to perform as long
|
|
31
|
+
as they do not start with a hyphen. Starting with the first argument
|
|
32
|
+
that starts with a hyphen, command-line arguments will be interpreted as
|
|
33
|
+
``--key value`` pairs and this long format is the only one allowed.
|
|
34
|
+
Abbreviated options (``-k value``) right after actions (and before any
|
|
35
|
+
long-format options) are ignored.
|
|
36
|
+
|
|
37
|
+
Parameters
|
|
38
|
+
----------
|
|
39
|
+
default_action: str, optional
|
|
40
|
+
Default action to return if none is found in the command-line
|
|
41
|
+
arguments. Defaults to no action.
|
|
42
|
+
usage: str, optional
|
|
43
|
+
Program usage message.
|
|
44
|
+
description: str, optional
|
|
45
|
+
Program description.
|
|
46
|
+
epilog: str, optional
|
|
47
|
+
Text displayed after the help on command-line options.
|
|
48
|
+
fmt_cls: type, optional
|
|
49
|
+
Option passed on to the underlying ``argparse.ArgumentParser``.
|
|
50
|
+
Defaults to ``argparse.RawTextHelpFormatter``
|
|
51
|
+
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
def __init__(
|
|
55
|
+
self,
|
|
56
|
+
default_action: str | None = None,
|
|
57
|
+
usage: str = USAGE,
|
|
58
|
+
description: str = DESCRIPTION,
|
|
59
|
+
epilog: str = EPILOG,
|
|
60
|
+
fmt_cls: type = RawTextHelpFormatter
|
|
61
|
+
) -> None:
|
|
62
|
+
self.default_action = default_action
|
|
63
|
+
self.usage = usage
|
|
64
|
+
self.description = description
|
|
65
|
+
self.epilog = epilog
|
|
66
|
+
self.fmt_cls = fmt_cls
|
|
67
|
+
self.__parse = ArgumentParser(
|
|
68
|
+
usage=usage,
|
|
69
|
+
description=description,
|
|
70
|
+
epilog=epilog,
|
|
71
|
+
formatter_class=fmt_cls
|
|
72
|
+
).parse_known_args
|
|
73
|
+
|
|
74
|
+
def __repr__(self) -> str:
|
|
75
|
+
cls = self.__class__.__name__
|
|
76
|
+
return f'{cls}({self.default_action}, ...)'
|
|
77
|
+
|
|
78
|
+
def __call__(self, args: list[str] | None = None) -> Parsed:
|
|
79
|
+
"""Parse the command-line arguments into actions and options.
|
|
80
|
+
|
|
81
|
+
Parameters
|
|
82
|
+
----------
|
|
83
|
+
args: list of str, optional
|
|
84
|
+
The command-line arguments to parse. Mainly a debugging feature.
|
|
85
|
+
If none is given, ``sys.argv[1:]`` will be parsed.
|
|
86
|
+
|
|
87
|
+
Returns
|
|
88
|
+
-------
|
|
89
|
+
actions: list of str
|
|
90
|
+
A list with the actions (as strings) to perform. If none are
|
|
91
|
+
found on the command line and no `default_action` is specified,
|
|
92
|
+
that list will be empty.
|
|
93
|
+
options: dict
|
|
94
|
+
Dictionary with keys and values parsed from long-format
|
|
95
|
+
command line arguments.
|
|
96
|
+
|
|
97
|
+
"""
|
|
98
|
+
args = sys.argv[1:] if args is None else args
|
|
99
|
+
_, unknowns = self.__parse(args)
|
|
100
|
+
actions, args = self.__split(unknowns)
|
|
101
|
+
actions = self.__valid(actions)
|
|
102
|
+
args = self.__compatible(args)
|
|
103
|
+
options = self.__mapped(args)
|
|
104
|
+
return actions, options
|
|
105
|
+
|
|
106
|
+
def __split(self, args: list[str]) -> tuple[list[str], list[str]]:
|
|
107
|
+
"""Split command-line options into actions and config settings."""
|
|
108
|
+
# Everything before the first option (starting with "-") are actions.
|
|
109
|
+
actions = tuple(takewhile(lambda arg: not arg.startswith('-'), args))
|
|
110
|
+
# If there are none, fall back onto the default action (if specified).
|
|
111
|
+
if not actions and self.default_action is not None:
|
|
112
|
+
actions = self.default_action,
|
|
113
|
+
# Either way, replace dashes with underscores
|
|
114
|
+
actions = (action.lower().replace('-', '_') for action in actions)
|
|
115
|
+
|
|
116
|
+
# Everything after the action(s) are options (starting with "--").
|
|
117
|
+
args = dropwhile(lambda arg: not arg.startswith('--'), args)
|
|
118
|
+
# In case options are given as "--key=value", we split.
|
|
119
|
+
args = chain.from_iterable(arg.split('=') for arg in args)
|
|
120
|
+
return list(actions), list(args)
|
|
121
|
+
|
|
122
|
+
@staticmethod
|
|
123
|
+
def __valid(actions: list[str]) -> list[str]:
|
|
124
|
+
"""Raise if any action string is not a valid python identifier."""
|
|
125
|
+
for action in actions:
|
|
126
|
+
if not action.isidentifier():
|
|
127
|
+
msg = 'Actions must be valid identifiers, unlike "{}"!'
|
|
128
|
+
raise ArgParseError(msg.format(action))
|
|
129
|
+
return actions
|
|
130
|
+
|
|
131
|
+
@staticmethod
|
|
132
|
+
def __compatible(args: list[str]) -> list[str]:
|
|
133
|
+
"""Raise if a command-line option is not in long format with value."""
|
|
134
|
+
long_form = all(arg.startswith('--') for arg in args[::2])
|
|
135
|
+
alternating = all(not arg.startswith('-') for arg in args[1::2])
|
|
136
|
+
even_number = len(args) % 2 == 0
|
|
137
|
+
if not (long_form and alternating and even_number):
|
|
138
|
+
msg = ('Command-line arguments must be passed in long format (i.e.'
|
|
139
|
+
', as "--key value"), and a value must always be present!')
|
|
140
|
+
raise ArgParseError(msg)
|
|
141
|
+
return args
|
|
142
|
+
|
|
143
|
+
def __mapped(self, args: list[str]) -> dict[str, Any]:
|
|
144
|
+
"""Create dictionary wih config keys and (string) values from args."""
|
|
145
|
+
zipped = list(zip(args[::2], args[1::2]))
|
|
146
|
+
# Drop the first two characters (always "--") and replace "-" with "_".
|
|
147
|
+
return {k[2:].replace('-', '_'): self.__parsed(v) for k, v in zipped}
|
|
148
|
+
|
|
149
|
+
@staticmethod
|
|
150
|
+
def __parsed(value: str) -> Any:
|
|
151
|
+
"""Try to parse (string) command-line options into python objects."""
|
|
152
|
+
try:
|
|
153
|
+
parsed = json.loads(value)
|
|
154
|
+
except (TypeError, JSONDecodeError):
|
|
155
|
+
try:
|
|
156
|
+
parsed = literal_eval(value)
|
|
157
|
+
except (TypeError, ValueError, SyntaxError):
|
|
158
|
+
parsed = value
|
|
159
|
+
return parsed
|
swak/cli/envparser.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import json
|
|
3
|
+
from json import JSONDecodeError
|
|
4
|
+
from ast import literal_eval
|
|
5
|
+
from typing import Any
|
|
6
|
+
from ..misc import ArgRepr
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class EnvParser(ArgRepr):
|
|
10
|
+
"""Parse OS environment variables, preferring prefixed over pure versions.
|
|
11
|
+
|
|
12
|
+
Sometimes, environment variables desired for individual use are already
|
|
13
|
+
taken by the operating system or some other system component. In these
|
|
14
|
+
cases, one can resort to prefixing these to avoid conflicts. The present
|
|
15
|
+
class is instantiated with that prefix and will resolve conflicts when
|
|
16
|
+
objects are called, returning the OS environment as a dictionary with
|
|
17
|
+
the values of all variables parsed into python literals.
|
|
18
|
+
|
|
19
|
+
Parameters
|
|
20
|
+
----------
|
|
21
|
+
prefix: str, optional
|
|
22
|
+
Prefix of environment variables that would otherwise be shadowed
|
|
23
|
+
by existing ones. Defaults to empty string.
|
|
24
|
+
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(self, prefix: str = '') -> None:
|
|
28
|
+
super().__init__(prefix)
|
|
29
|
+
self.prefix = prefix
|
|
30
|
+
|
|
31
|
+
def __call__(self, env: dict[str, str] | None = None) -> dict[str, Any]:
|
|
32
|
+
"""Parse the OS environment, resolving potentially prefixed variables.
|
|
33
|
+
|
|
34
|
+
Parameters
|
|
35
|
+
----------
|
|
36
|
+
env: dict, optional
|
|
37
|
+
Dictionary to be parsed and resolved. Defaults to ``os.environ``.
|
|
38
|
+
|
|
39
|
+
Returns
|
|
40
|
+
-------
|
|
41
|
+
dict
|
|
42
|
+
Environment with prefixed keys removed and the values of their
|
|
43
|
+
non-prefixed counterparts updated accordingly.
|
|
44
|
+
|
|
45
|
+
"""
|
|
46
|
+
env = os.environ if env is None else env
|
|
47
|
+
prefixed = {}
|
|
48
|
+
original = {}
|
|
49
|
+
for key in env:
|
|
50
|
+
if key.startswith(self.prefix):
|
|
51
|
+
prefixed[key.removeprefix(self.prefix)] = env[key]
|
|
52
|
+
else:
|
|
53
|
+
original[key] = env[key]
|
|
54
|
+
merged = original | prefixed
|
|
55
|
+
return {key: self.__parsed(value) for key, value in merged.items()}
|
|
56
|
+
|
|
57
|
+
@staticmethod
|
|
58
|
+
def __parsed(value: str) -> Any:
|
|
59
|
+
"""Try to parse (string) environment variables into python objects."""
|
|
60
|
+
try:
|
|
61
|
+
parsed = json.loads(value)
|
|
62
|
+
except (TypeError, JSONDecodeError):
|
|
63
|
+
try:
|
|
64
|
+
parsed = literal_eval(value)
|
|
65
|
+
except (TypeError, ValueError, SyntaxError):
|
|
66
|
+
parsed = value
|
|
67
|
+
return parsed
|
swak/cli/exceptions.py
ADDED
swak/cli/importer.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
from importlib import import_module
|
|
2
|
+
from ..misc import ArgRepr
|
|
3
|
+
from .exceptions import ImporterError
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Importer(ArgRepr):
|
|
7
|
+
"""Programmatically import objects from a module under a top-level package.
|
|
8
|
+
|
|
9
|
+
For ease of use and clarity in API, relative imports are not supported.
|
|
10
|
+
Objects are instantiated with where to import from and called with what
|
|
11
|
+
to import.
|
|
12
|
+
|
|
13
|
+
Parameters
|
|
14
|
+
----------
|
|
15
|
+
package: str
|
|
16
|
+
Name of the top-level package to import from. Must not start with dots
|
|
17
|
+
but can contain any number of dots to indicate sub-packages.
|
|
18
|
+
module: str, optional
|
|
19
|
+
The specific module to import objects from. May contain dots to
|
|
20
|
+
indicate that it is located further down within some sub-package.
|
|
21
|
+
Defaults to "steps".
|
|
22
|
+
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(self, package: str, module: str = 'steps') -> None:
|
|
26
|
+
self.package = package.strip(' ./')
|
|
27
|
+
self.module = module.strip(' ./')
|
|
28
|
+
super().__init__(self.package, self.module)
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
def path(self) -> str:
|
|
32
|
+
"""Full path specification of (sub-)package and module concatenated."""
|
|
33
|
+
return '.'.join([self.package, self.module])
|
|
34
|
+
|
|
35
|
+
def __call__(self, *names: str) -> list:
|
|
36
|
+
"""Import any number of objects from the specified package.module.
|
|
37
|
+
|
|
38
|
+
Parameters
|
|
39
|
+
----------
|
|
40
|
+
*names: str
|
|
41
|
+
Name(s) of object(s) to import from ``package.module``.
|
|
42
|
+
|
|
43
|
+
Returns
|
|
44
|
+
-------
|
|
45
|
+
list
|
|
46
|
+
Imported objects.
|
|
47
|
+
|
|
48
|
+
Raises
|
|
49
|
+
------
|
|
50
|
+
ImporterError
|
|
51
|
+
When the ``package.module`` is mis-specified, can't be found, or
|
|
52
|
+
when the specified object(s) can't be found in it.
|
|
53
|
+
|
|
54
|
+
"""
|
|
55
|
+
try:
|
|
56
|
+
location = import_module(self.path)
|
|
57
|
+
except (TypeError, ModuleNotFoundError) as error:
|
|
58
|
+
msg = 'Could not import module "{}"!'
|
|
59
|
+
raise ImporterError(msg.format(self.path)) from error
|
|
60
|
+
imports = []
|
|
61
|
+
for name in names:
|
|
62
|
+
try:
|
|
63
|
+
imported = getattr(location, name)
|
|
64
|
+
except AttributeError as err:
|
|
65
|
+
msg = 'Could not import "{}" from module "{}"!'
|
|
66
|
+
raise ImporterError(msg.format(name, self.path)) from err
|
|
67
|
+
imports.append(imported)
|
|
68
|
+
return imports
|
swak/cloud/__init__.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Tools to interact with hosted cloud services.
|
|
2
|
+
|
|
3
|
+
Current only supports elements of the Google Cloud Project (GCP) but, in
|
|
4
|
+
the future, Amazon Web Services (AWS) might get implemented as well.
|
|
5
|
+
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from importlib.util import find_spec
|
|
9
|
+
|
|
10
|
+
required = 'google_cloud_bigquery', 'google_cloud_storage', 'pandas_gbq'
|
|
11
|
+
|
|
12
|
+
if any(find_spec(package) for package in required) is None:
|
|
13
|
+
msg = 'Install {} with the [cloud] extra to unlock this subpackage!'
|
|
14
|
+
raise ImportError(msg.format(__package__.split('.')[0]))
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Tools to interact with elements of the Google Cloud Project (GCP).
|
|
2
|
+
|
|
3
|
+
Specifically, data scientists tend to interact mostly with Google's BigQuery
|
|
4
|
+
(BQ) data-warehouse solution and the Google Cloud Storage (GCS).
|
|
5
|
+
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .dataset import Collation, Rounding, Billing, GbqDataset
|
|
9
|
+
from .bucket import Storage, GcsBucket
|
|
10
|
+
from .query import GbqQuery
|
|
11
|
+
from .query2gcs import GbqQuery2GcsParquet
|
|
12
|
+
from .gcs2local import GcsDir2LocalDir
|
|
13
|
+
from .gcs2df import GcsParquet2DataFrame
|
|
14
|
+
from .query2df import GbqQuery2DataFrame
|
|
15
|
+
from .df2gbq import IfExists, DataFrame2Gbq
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
'Collation',
|
|
19
|
+
'Rounding',
|
|
20
|
+
'Billing',
|
|
21
|
+
'GbqDataset',
|
|
22
|
+
'Storage',
|
|
23
|
+
'GcsBucket',
|
|
24
|
+
'GbqQuery',
|
|
25
|
+
'GbqQuery2GcsParquet',
|
|
26
|
+
'GcsDir2LocalDir',
|
|
27
|
+
'GcsParquet2DataFrame',
|
|
28
|
+
'GbqQuery2DataFrame',
|
|
29
|
+
'IfExists',
|
|
30
|
+
'DataFrame2Gbq'
|
|
31
|
+
]
|
swak/cloud/gcp/bucket.py
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
from enum import StrEnum
|
|
2
|
+
from typing import Any, Literal
|
|
3
|
+
from google.cloud.storage import Client, Bucket
|
|
4
|
+
from google.api_core.retry import Retry
|
|
5
|
+
from ...misc import ArgRepr
|
|
6
|
+
|
|
7
|
+
type StorageType = Literal['STANDARD', 'NEARLINE', 'COLDLINE', 'ARCHIVE']
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Storage(StrEnum):
|
|
11
|
+
"""Specify storage class for blobs in a Google Cloud Storage bucket."""
|
|
12
|
+
STANDARD = 'STANDARD'
|
|
13
|
+
NEARLINE = 'NEARLINE'
|
|
14
|
+
COLDLINE = 'COLDLINE'
|
|
15
|
+
ARCHIVE = 'ARCHIVE'
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class GcsBucket(ArgRepr):
|
|
19
|
+
"""Create a new bucket on Google Cloud Storage.
|
|
20
|
+
|
|
21
|
+
Parameters
|
|
22
|
+
----------
|
|
23
|
+
project: str
|
|
24
|
+
The project to create the bucket in.
|
|
25
|
+
bucket: str
|
|
26
|
+
The name of the bucket to create.
|
|
27
|
+
location: str
|
|
28
|
+
The physical datacenter location to create the bucket in. See the
|
|
29
|
+
Google Cloud Platform `documentation <https://cloud.google.com/storage/
|
|
30
|
+
docs/locations>`__ for options.
|
|
31
|
+
blob_expire_days: int, optional
|
|
32
|
+
Defaults to ``None``. If sets, blobs older than the specified number of
|
|
33
|
+
days will be automatically deleted.
|
|
34
|
+
labels: dict, optional
|
|
35
|
+
Any number of string-valued labels of the bucket. Defaults to none.
|
|
36
|
+
user_project: str, optional
|
|
37
|
+
The project billed for interacting with the bucket. Defaults to the
|
|
38
|
+
`project`
|
|
39
|
+
storage_class: str, optional
|
|
40
|
+
Defaults storage class for blobs in this bucket. Defaults to ``None``,
|
|
41
|
+
which results in "STANDARD". Use the ``Storage`` enum to specify
|
|
42
|
+
explicitly.
|
|
43
|
+
requester_pays: bool, optional
|
|
44
|
+
Whether the requester will be billed for interacting with the bucket.
|
|
45
|
+
Defaults to ``False``, which means that the (`user_`)`project` will be
|
|
46
|
+
billed.
|
|
47
|
+
**kwargs
|
|
48
|
+
Additional keyword arguments are passed to the constructor of the
|
|
49
|
+
Google Storage ``Client`` (see `documentation <https://cloud.google.
|
|
50
|
+
com/python/docs/reference/storage/latest/google.cloud.storage.
|
|
51
|
+
client.Client#parameters>`__ for options).
|
|
52
|
+
|
|
53
|
+
Notes
|
|
54
|
+
-----
|
|
55
|
+
There are a lot more options to set, which have been deliberately omitted
|
|
56
|
+
because of the complexity involved.
|
|
57
|
+
|
|
58
|
+
See Also
|
|
59
|
+
--------
|
|
60
|
+
Storage
|
|
61
|
+
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
def __init__(
|
|
65
|
+
self,
|
|
66
|
+
project: str,
|
|
67
|
+
bucket: str,
|
|
68
|
+
location: str,
|
|
69
|
+
blob_expire_days: int | None = None,
|
|
70
|
+
labels: dict[str, str] | None = None,
|
|
71
|
+
user_project: str | None = None,
|
|
72
|
+
storage_class: StorageType | None = None,
|
|
73
|
+
requester_pays: bool = False,
|
|
74
|
+
**kwargs: Any
|
|
75
|
+
) -> None:
|
|
76
|
+
self.project = project.strip(' /.')
|
|
77
|
+
self.bucket = bucket.strip(' /.')
|
|
78
|
+
self.location = location.strip().upper()
|
|
79
|
+
self.blob_expire_days = blob_expire_days
|
|
80
|
+
self.labels = {} if labels is None else labels
|
|
81
|
+
if user_project is None:
|
|
82
|
+
self.user_project = self.project
|
|
83
|
+
else:
|
|
84
|
+
self.user_project = user_project.strip(' /.')
|
|
85
|
+
self.storage_class = storage_class
|
|
86
|
+
self.requester_pays = requester_pays
|
|
87
|
+
self.kwargs = kwargs
|
|
88
|
+
super().__init__(
|
|
89
|
+
self.project,
|
|
90
|
+
self.bucket,
|
|
91
|
+
self.location,
|
|
92
|
+
blob_expire_days=self.blob_expire_days,
|
|
93
|
+
labels=self.labels,
|
|
94
|
+
user_project=self.user_project,
|
|
95
|
+
storage_class=self.storage_class,
|
|
96
|
+
requester_pays=self.requester_pays,
|
|
97
|
+
**kwargs
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
def __call__(
|
|
101
|
+
self,
|
|
102
|
+
exists_ok: bool = True,
|
|
103
|
+
retry: Retry | None = None,
|
|
104
|
+
timeout: float | tuple[float, float] | None = None,
|
|
105
|
+
) -> tuple[Bucket, bool]:
|
|
106
|
+
"""Create a new bucket on Google Cloud Storage.
|
|
107
|
+
|
|
108
|
+
Parameters
|
|
109
|
+
----------
|
|
110
|
+
exists_ok: bool, optional
|
|
111
|
+
Whether to raise a ``Conflict`` exception if the targeted bucket
|
|
112
|
+
already exists or not. Defaults to ``True``.
|
|
113
|
+
retry: Retry, optional
|
|
114
|
+
Retry policy for the request. Defaults to ``None``, which disables
|
|
115
|
+
retries. See the Google Cloud Platform `guide
|
|
116
|
+
<https://cloud.google.com/python/docs/reference/storage/1.39.0/
|
|
117
|
+
retry_timeout#configuring-retries>`__ and `reference
|
|
118
|
+
<https://googleapis.dev/python/google-api-core/latest/retry.html>`__
|
|
119
|
+
for options.
|
|
120
|
+
timeout: float, optional
|
|
121
|
+
The number of seconds to wait for the HTTP response to the API call
|
|
122
|
+
before using `retry` or a tuple with separate values for connection
|
|
123
|
+
and request timeouts. Defaults to ``None``, meaning wait forever.
|
|
124
|
+
|
|
125
|
+
Raises
|
|
126
|
+
------
|
|
127
|
+
Conflict
|
|
128
|
+
If `exists_ok` is set to ``False`` and the dataset already exists.
|
|
129
|
+
|
|
130
|
+
Returns
|
|
131
|
+
-------
|
|
132
|
+
Bucket
|
|
133
|
+
The existing or newly created bucket. If existing, then the bucket
|
|
134
|
+
is returned unchanged, that is, none of the specified options are
|
|
135
|
+
applied.
|
|
136
|
+
bool
|
|
137
|
+
``True`` if the requested bucket is newly created and ``False``
|
|
138
|
+
if an existing bucket is returned.
|
|
139
|
+
|
|
140
|
+
"""
|
|
141
|
+
client = Client(self.project, **self.kwargs)
|
|
142
|
+
bucket = Bucket(client, self.bucket, self.user_project)
|
|
143
|
+
|
|
144
|
+
bucket.requester_pays = self.requester_pays
|
|
145
|
+
bucket.storage_class = self.storage_class
|
|
146
|
+
bucket.labels = self.labels
|
|
147
|
+
if self.blob_expire_days:
|
|
148
|
+
bucket.add_lifecycle_delete_rule(age=self.blob_expire_days)
|
|
149
|
+
|
|
150
|
+
if bucket.exists() and exists_ok:
|
|
151
|
+
existing = client.get_bucket(bucket, retry=retry, timeout=timeout)
|
|
152
|
+
return existing, False
|
|
153
|
+
|
|
154
|
+
bucket.create(
|
|
155
|
+
client,
|
|
156
|
+
self.project,
|
|
157
|
+
self.location,
|
|
158
|
+
retry=retry,
|
|
159
|
+
timeout=timeout
|
|
160
|
+
)
|
|
161
|
+
return bucket, True
|