splitlog 4.1.6__tar.gz → 5.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {splitlog-4.1.6 → splitlog-5.0.0}/PKG-INFO +3 -2
- {splitlog-4.1.6 → splitlog-5.0.0}/pyproject.toml +6 -5
- {splitlog-4.1.6 → splitlog-5.0.0}/splitlog/__init__.py +9 -10
- {splitlog-4.1.6 → splitlog-5.0.0}/splitlog/__main__.py +8 -3
- splitlog-4.1.6/splitlog/outputfolder.py +0 -262
- {splitlog-4.1.6 → splitlog-5.0.0}/LICENSE +0 -0
- {splitlog-4.1.6 → splitlog-5.0.0}/README.md +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: splitlog
|
|
3
|
-
Version:
|
|
3
|
+
Version: 5.0.0
|
|
4
4
|
Summary: Utility to split aggregated logs from Apache Hadoop Yarn applications into a folder hierarchy
|
|
5
5
|
License-Expression: MIT
|
|
6
6
|
License-File: LICENSE
|
|
@@ -15,8 +15,9 @@ Classifier: Programming Language :: Python :: 3 :: Only
|
|
|
15
15
|
Classifier: Topic :: System :: Distributed Computing
|
|
16
16
|
Classifier: Topic :: System :: Logging
|
|
17
17
|
Classifier: Topic :: Utilities
|
|
18
|
+
Requires-Dist: fsspec (>=2026.2.0)
|
|
18
19
|
Requires-Dist: python-dateutil (>=2.9.0,<3.0.0)
|
|
19
|
-
Requires-Dist: pytz (>=
|
|
20
|
+
Requires-Dist: pytz (>=2026.1)
|
|
20
21
|
Project-URL: Bug Tracker, https://github.com/splitlog/splitlog/issues
|
|
21
22
|
Project-URL: Repository, https://github.com/splitlog/splitlog.git
|
|
22
23
|
Description-Content-Type: text/markdown
|
|
@@ -24,9 +24,10 @@ classifiers = [
|
|
|
24
24
|
]
|
|
25
25
|
dependencies = [
|
|
26
26
|
"python-dateutil (>=2.9.0,<3.0.0)",
|
|
27
|
-
"pytz (>=
|
|
27
|
+
"pytz (>=2026.1)",
|
|
28
|
+
"fsspec (>=2026.2.0)",
|
|
28
29
|
]
|
|
29
|
-
version = "
|
|
30
|
+
version = "5.0.0"
|
|
30
31
|
|
|
31
32
|
|
|
32
33
|
[project.urls]
|
|
@@ -38,11 +39,11 @@ splitlog = 'splitlog.__main__:main'
|
|
|
38
39
|
|
|
39
40
|
[dependency-groups]
|
|
40
41
|
dev = [
|
|
41
|
-
"mypy (>=1.
|
|
42
|
+
"mypy (>=1.19.1,<2.0.0)",
|
|
42
43
|
"black (>=26.3.1,<27.0.0)",
|
|
43
44
|
"types-python-dateutil (>=2.9.0.20251108,<3.0.0.0)",
|
|
44
|
-
"pytest (>=9.0.
|
|
45
|
-
"pytest-cov (>=7.
|
|
45
|
+
"pytest (>=9.0.2,<10.0.0)",
|
|
46
|
+
"pytest-cov (>=7.1.0,<8.0.0)",
|
|
46
47
|
]
|
|
47
48
|
|
|
48
49
|
[tool.poetry]
|
|
@@ -6,8 +6,7 @@ from pathlib import Path
|
|
|
6
6
|
from typing import BinaryIO, Optional, Set
|
|
7
7
|
|
|
8
8
|
from dateutil.parser import parse as parse_date
|
|
9
|
-
|
|
10
|
-
from splitlog.outputfolder import OutputFolder
|
|
9
|
+
from fsspec import AbstractFileSystem # type: ignore
|
|
11
10
|
|
|
12
11
|
logger = logging.getLogger(__name__)
|
|
13
12
|
|
|
@@ -45,7 +44,7 @@ class _Splitter(object):
|
|
|
45
44
|
)
|
|
46
45
|
|
|
47
46
|
def __init__(
|
|
48
|
-
self: "_Splitter", infile: BinaryIO, output_folder:
|
|
47
|
+
self: "_Splitter", infile: BinaryIO, output_folder: AbstractFileSystem
|
|
49
48
|
) -> None:
|
|
50
49
|
self.container: Optional[str] = None
|
|
51
50
|
self.host: Optional[str] = None
|
|
@@ -55,7 +54,7 @@ class _Splitter(object):
|
|
|
55
54
|
self.length: Optional[int] = None
|
|
56
55
|
self.dirs_created: Set[Path] = set()
|
|
57
56
|
self.infile: BinaryIO = infile
|
|
58
|
-
self.output_folder:
|
|
57
|
+
self.output_folder: AbstractFileSystem = output_folder
|
|
59
58
|
self.offset: int = 0
|
|
60
59
|
self.line: Optional[str] = None
|
|
61
60
|
self.eof: bool = False
|
|
@@ -247,14 +246,14 @@ class _Splitter(object):
|
|
|
247
246
|
|
|
248
247
|
def _create_hierarchy(self: "_Splitter") -> Path:
|
|
249
248
|
assert self.host is not None, "host must be present"
|
|
250
|
-
host_dir = self.
|
|
249
|
+
host_dir = Path(self.host)
|
|
251
250
|
if host_dir not in self.dirs_created:
|
|
252
|
-
self.output_folder.
|
|
251
|
+
self.output_folder.makedirs(host_dir.as_posix(), exist_ok=True)
|
|
253
252
|
self.dirs_created.add(host_dir)
|
|
254
253
|
assert self.container is not None, "container must be present"
|
|
255
254
|
container_dir = host_dir / self.container
|
|
256
255
|
if container_dir not in self.dirs_created:
|
|
257
|
-
self.output_folder.
|
|
256
|
+
self.output_folder.makedirs(container_dir.as_posix(), exist_ok=True)
|
|
258
257
|
self.dirs_created.add(container_dir)
|
|
259
258
|
return container_dir
|
|
260
259
|
|
|
@@ -263,14 +262,14 @@ class _Splitter(object):
|
|
|
263
262
|
assert self.filename, "filename must be present"
|
|
264
263
|
log_path = container_dir / self.filename
|
|
265
264
|
|
|
266
|
-
with self.output_folder.
|
|
265
|
+
with self.output_folder.open(log_path.as_posix(), "wb") as outfile:
|
|
267
266
|
logger.debug("Created empty log file %s", log_path)
|
|
268
267
|
|
|
269
268
|
def _copy(self: "_Splitter") -> None:
|
|
270
269
|
container_dir = self._create_hierarchy()
|
|
271
270
|
assert self.filename, "filename must be present"
|
|
272
271
|
log_path = container_dir / self.filename
|
|
273
|
-
with self.output_folder.
|
|
272
|
+
with self.output_folder.open(log_path.as_posix(), "wb") as outfile:
|
|
274
273
|
assert self.length is not None, "length must be present"
|
|
275
274
|
logger.debug("Created log file %s, size: %d", log_path, self.length)
|
|
276
275
|
remaining = self.length
|
|
@@ -321,7 +320,7 @@ class _Splitter(object):
|
|
|
321
320
|
return parse_error
|
|
322
321
|
|
|
323
322
|
|
|
324
|
-
def split(infile: BinaryIO, output_folder:
|
|
323
|
+
def split(infile: BinaryIO, output_folder: AbstractFileSystem) -> None:
|
|
325
324
|
splitter = _Splitter(infile=infile, output_folder=output_folder)
|
|
326
325
|
|
|
327
326
|
splitter.split()
|
|
@@ -126,12 +126,17 @@ def main(cli_args: Optional[List[str]] = None) -> None:
|
|
|
126
126
|
assert args.output_folder is not None, "output_folder argument must be present"
|
|
127
127
|
|
|
128
128
|
from splitlog import split, ParseError
|
|
129
|
-
from
|
|
129
|
+
from fsspec.implementations.dirfs import DirFileSystem # type: ignore
|
|
130
130
|
|
|
131
131
|
with _open_input(args.input_file) as infile:
|
|
132
132
|
try:
|
|
133
|
-
|
|
134
|
-
|
|
133
|
+
# Atomic directory creation. Fails if it already exists or if parent is missing.
|
|
134
|
+
args.output_folder.mkdir()
|
|
135
|
+
# use fsspec DirFileSystem to handle output folder
|
|
136
|
+
output_folder = DirFileSystem(
|
|
137
|
+
path=str(args.output_folder), auto_mkdir=False
|
|
138
|
+
)
|
|
139
|
+
split(infile, output_folder)
|
|
135
140
|
except FileNotFoundError as e:
|
|
136
141
|
_error_exit(e)
|
|
137
142
|
except FileExistsError as e:
|
|
@@ -1,262 +0,0 @@
|
|
|
1
|
-
import abc
|
|
2
|
-
import contextlib
|
|
3
|
-
import logging
|
|
4
|
-
import os
|
|
5
|
-
import stat
|
|
6
|
-
import types
|
|
7
|
-
import typing as t
|
|
8
|
-
from pathlib import Path
|
|
9
|
-
|
|
10
|
-
_logger = logging.getLogger(__name__)
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class BinWriter(contextlib.AbstractContextManager, metaclass=abc.ABCMeta):
|
|
14
|
-
"""Abstract base class for writing binary data into an output file."""
|
|
15
|
-
|
|
16
|
-
@abc.abstractmethod
|
|
17
|
-
def write(self, b: bytes) -> int:
|
|
18
|
-
"""Writes bytes into a file and returns how many bytes were written successfully.
|
|
19
|
-
|
|
20
|
-
:returns: number of bytes written successfully
|
|
21
|
-
"""
|
|
22
|
-
raise NotImplementedError()
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
class OutputFolder(contextlib.AbstractContextManager, metaclass=abc.ABCMeta):
|
|
26
|
-
"""Abstract base class for output folder IO."""
|
|
27
|
-
|
|
28
|
-
@property
|
|
29
|
-
@abc.abstractmethod
|
|
30
|
-
def root(self) -> Path:
|
|
31
|
-
"""Root path of the output folder to construct paths inside the output folder.
|
|
32
|
-
|
|
33
|
-
:returns: root path
|
|
34
|
-
"""
|
|
35
|
-
raise NotImplementedError()
|
|
36
|
-
|
|
37
|
-
@abc.abstractmethod
|
|
38
|
-
def mkdir(self, path: Path) -> None:
|
|
39
|
-
"""Creates a subdirectory."""
|
|
40
|
-
raise NotImplementedError()
|
|
41
|
-
|
|
42
|
-
@abc.abstractmethod
|
|
43
|
-
def create(self, path: Path) -> BinWriter:
|
|
44
|
-
"""Creates a new file and returns a writer to write to it.
|
|
45
|
-
|
|
46
|
-
:returns: writer to write binary data into the file
|
|
47
|
-
"""
|
|
48
|
-
raise NotImplementedError()
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
class FileWrapper(BinWriter):
|
|
52
|
-
def __init__(self, file: t.BinaryIO):
|
|
53
|
-
self._file = file
|
|
54
|
-
|
|
55
|
-
def write(self, b: bytes) -> int:
|
|
56
|
-
return self._file.write(b)
|
|
57
|
-
|
|
58
|
-
def __enter__(self) -> "FileWrapper":
|
|
59
|
-
return self
|
|
60
|
-
|
|
61
|
-
def __exit__(
|
|
62
|
-
self,
|
|
63
|
-
exc: t.Union[t.Type[BaseException], None],
|
|
64
|
-
value: t.Union[BaseException, None],
|
|
65
|
-
tb: t.Union[types.TracebackType, None],
|
|
66
|
-
) -> t.Union[bool, None]:
|
|
67
|
-
return self._file.__exit__(exc, value, tb)
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
def _is_relative_to(path: Path, parent: Path) -> bool:
|
|
71
|
-
try:
|
|
72
|
-
path.relative_to(parent)
|
|
73
|
-
except ValueError:
|
|
74
|
-
return False
|
|
75
|
-
else:
|
|
76
|
-
return True
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
class DefaultLocalFilesystemOutputFolder(OutputFolder):
|
|
80
|
-
"""Encapsulates filesystem IO on output folders.
|
|
81
|
-
|
|
82
|
-
This implementation is portable but unsafe to use when invoking splitlog with privileges.
|
|
83
|
-
"""
|
|
84
|
-
|
|
85
|
-
FILE_MODE = stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH
|
|
86
|
-
DIR_MODE = (
|
|
87
|
-
stat.S_IRUSR
|
|
88
|
-
| stat.S_IWUSR
|
|
89
|
-
| stat.S_IXUSR
|
|
90
|
-
| stat.S_IRGRP
|
|
91
|
-
| stat.S_IXGRP
|
|
92
|
-
| stat.S_IROTH
|
|
93
|
-
| stat.S_IXOTH
|
|
94
|
-
)
|
|
95
|
-
|
|
96
|
-
def __init__(self, path: Path):
|
|
97
|
-
self._path: Path = path.resolve()
|
|
98
|
-
|
|
99
|
-
def __enter__(self) -> OutputFolder:
|
|
100
|
-
self._path.mkdir(mode=self.DIR_MODE, exist_ok=False)
|
|
101
|
-
return self
|
|
102
|
-
|
|
103
|
-
def __exit__(
|
|
104
|
-
self,
|
|
105
|
-
exc: t.Union[t.Type[BaseException], None],
|
|
106
|
-
value: t.Union[BaseException, None],
|
|
107
|
-
tb: t.Union[types.TracebackType, None],
|
|
108
|
-
) -> t.Union[bool, None]:
|
|
109
|
-
return None
|
|
110
|
-
|
|
111
|
-
def mkdir(self, path: Path) -> None:
|
|
112
|
-
real_path = self._check_paths(path)
|
|
113
|
-
os.mkdir(real_path, mode=self.DIR_MODE)
|
|
114
|
-
|
|
115
|
-
def _check_paths(self, path: Path) -> Path:
|
|
116
|
-
if path.is_absolute():
|
|
117
|
-
raise ValueError(f"Path {path} must be relative")
|
|
118
|
-
real_path = Path(os.path.normpath(self._path / path))
|
|
119
|
-
if not _is_relative_to(real_path, self._path):
|
|
120
|
-
raise ValueError(f"Path {path} outside {self._path}")
|
|
121
|
-
return real_path
|
|
122
|
-
|
|
123
|
-
def create(self, path: Path) -> BinWriter:
|
|
124
|
-
real_path = self._check_paths(path)
|
|
125
|
-
f = open(real_path, "xb")
|
|
126
|
-
try:
|
|
127
|
-
return FileWrapper(f)
|
|
128
|
-
except Exception:
|
|
129
|
-
f.close()
|
|
130
|
-
raise
|
|
131
|
-
|
|
132
|
-
@property
|
|
133
|
-
def root(self) -> Path:
|
|
134
|
-
return Path()
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
class LinuxLocalFilesystemOutputFolder(OutputFolder):
|
|
138
|
-
"""Encapsulates filesystem IO on output folders.
|
|
139
|
-
|
|
140
|
-
This implementation avoids TOCTTOU attacks using modern Linux APIs.
|
|
141
|
-
"""
|
|
142
|
-
|
|
143
|
-
DIR_MODE = (
|
|
144
|
-
stat.S_IRUSR
|
|
145
|
-
| stat.S_IWUSR
|
|
146
|
-
| stat.S_IXUSR
|
|
147
|
-
| stat.S_IRGRP
|
|
148
|
-
| stat.S_IXGRP
|
|
149
|
-
| stat.S_IROTH
|
|
150
|
-
| stat.S_IXOTH
|
|
151
|
-
)
|
|
152
|
-
FILE_MODE = stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH
|
|
153
|
-
|
|
154
|
-
@staticmethod
|
|
155
|
-
def is_supported() -> bool:
|
|
156
|
-
for flag in ("O_PATH", "O_DIRECTORY", "O_NOFOLLOW", "O_CLOEXEC"):
|
|
157
|
-
if not hasattr(os, flag):
|
|
158
|
-
_logger.debug(f"os.{flag} not supported")
|
|
159
|
-
return False
|
|
160
|
-
|
|
161
|
-
for needs_dir_fd_support in (os.open, os.mkdir):
|
|
162
|
-
if needs_dir_fd_support not in os.supports_dir_fd:
|
|
163
|
-
_logger.debug(f"{needs_dir_fd_support} does not support dir fds")
|
|
164
|
-
return False
|
|
165
|
-
|
|
166
|
-
return True
|
|
167
|
-
|
|
168
|
-
def __init__(self, path: Path):
|
|
169
|
-
if not self.is_supported():
|
|
170
|
-
raise RuntimeError(
|
|
171
|
-
"File system semantics are not supported by runtime environment"
|
|
172
|
-
)
|
|
173
|
-
self._path: Path = path.resolve()
|
|
174
|
-
self._dir_fd: t.Union[int, None] = None
|
|
175
|
-
|
|
176
|
-
def __enter__(
|
|
177
|
-
self,
|
|
178
|
-
) -> OutputFolder:
|
|
179
|
-
# split path
|
|
180
|
-
parent = self._path.parent
|
|
181
|
-
name = self._path.name
|
|
182
|
-
|
|
183
|
-
# allow parent folder to be a symlink
|
|
184
|
-
parent_dir_fd = self._open_dir_fd(parent, no_follow=False)
|
|
185
|
-
|
|
186
|
-
try:
|
|
187
|
-
os.mkdir(name, mode=self.DIR_MODE, dir_fd=parent_dir_fd)
|
|
188
|
-
self._dir_fd = self._open_dir_fd(name, no_follow=True, dir_fd=parent_dir_fd)
|
|
189
|
-
return self
|
|
190
|
-
finally:
|
|
191
|
-
os.close(parent_dir_fd)
|
|
192
|
-
|
|
193
|
-
def __exit__(
|
|
194
|
-
self,
|
|
195
|
-
exc: t.Union[t.Type[BaseException], None],
|
|
196
|
-
value: t.Union[BaseException, None],
|
|
197
|
-
tb: t.Union[types.TracebackType, None],
|
|
198
|
-
) -> t.Union[bool, None]:
|
|
199
|
-
if self._dir_fd is not None:
|
|
200
|
-
saved, self._dir_fd = self._dir_fd, None
|
|
201
|
-
os.close(saved)
|
|
202
|
-
return None
|
|
203
|
-
|
|
204
|
-
@property
|
|
205
|
-
def root(self) -> Path:
|
|
206
|
-
return Path()
|
|
207
|
-
|
|
208
|
-
def mkdir(self, path: Path) -> None:
|
|
209
|
-
real_path = self._ensure_path_under_root(path)
|
|
210
|
-
os.mkdir(real_path, mode=self.DIR_MODE, dir_fd=self._dir_fd)
|
|
211
|
-
|
|
212
|
-
def _opener(self, path: str, flags: int) -> int:
|
|
213
|
-
return os.open(
|
|
214
|
-
path,
|
|
215
|
-
flags | getattr(os, "O_NOFOLLOW") | getattr(os, "O_CLOEXEC"),
|
|
216
|
-
mode=self.FILE_MODE,
|
|
217
|
-
dir_fd=self._dir_fd,
|
|
218
|
-
)
|
|
219
|
-
|
|
220
|
-
def create(self, path: Path) -> BinWriter:
|
|
221
|
-
real_path = self._ensure_path_under_root(path)
|
|
222
|
-
f = open(real_path, "xb", opener=self._opener)
|
|
223
|
-
try:
|
|
224
|
-
return FileWrapper(f)
|
|
225
|
-
except Exception:
|
|
226
|
-
f.close()
|
|
227
|
-
raise
|
|
228
|
-
|
|
229
|
-
def _ensure_path_under_root(self, path: Path) -> Path:
|
|
230
|
-
if path.is_absolute():
|
|
231
|
-
raise ValueError(f"Path {path} must be relative")
|
|
232
|
-
|
|
233
|
-
# remove all ".." and "." components
|
|
234
|
-
real_path = Path(os.path.normpath(self._path / path))
|
|
235
|
-
|
|
236
|
-
# ensure resulting absolute path is still inside self._path
|
|
237
|
-
if not _is_relative_to(real_path, self._path):
|
|
238
|
-
raise ValueError(f"Path {path} outside {self._path}")
|
|
239
|
-
return real_path.relative_to(self._path)
|
|
240
|
-
|
|
241
|
-
@staticmethod
|
|
242
|
-
def _open_dir_fd(
|
|
243
|
-
path: t.Union[Path, str], no_follow: bool, dir_fd: t.Optional[int] = None
|
|
244
|
-
) -> int:
|
|
245
|
-
flags = (
|
|
246
|
-
getattr(os, "O_PATH")
|
|
247
|
-
| getattr(os, "O_DIRECTORY")
|
|
248
|
-
| getattr(os, "O_CLOEXEC")
|
|
249
|
-
)
|
|
250
|
-
|
|
251
|
-
if no_follow:
|
|
252
|
-
flags |= getattr(os, "O_NOFOLLOW")
|
|
253
|
-
|
|
254
|
-
return os.open(path, flags, dir_fd=dir_fd)
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
def new_output_folder(path: Path) -> OutputFolder:
|
|
258
|
-
"""Chooses an output folder implementation and creates an instance."""
|
|
259
|
-
if LinuxLocalFilesystemOutputFolder.is_supported():
|
|
260
|
-
return LinuxLocalFilesystemOutputFolder(path=path)
|
|
261
|
-
else:
|
|
262
|
-
return DefaultLocalFilesystemOutputFolder(path=path)
|
|
File without changes
|
|
File without changes
|