splitlog 4.1.6__tar.gz → 5.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: splitlog
3
- Version: 4.1.6
3
+ Version: 5.0.0
4
4
  Summary: Utility to split aggregated logs from Apache Hadoop Yarn applications into a folder hierarchy
5
5
  License-Expression: MIT
6
6
  License-File: LICENSE
@@ -15,8 +15,9 @@ Classifier: Programming Language :: Python :: 3 :: Only
15
15
  Classifier: Topic :: System :: Distributed Computing
16
16
  Classifier: Topic :: System :: Logging
17
17
  Classifier: Topic :: Utilities
18
+ Requires-Dist: fsspec (>=2026.2.0)
18
19
  Requires-Dist: python-dateutil (>=2.9.0,<3.0.0)
19
- Requires-Dist: pytz (>=2025.2)
20
+ Requires-Dist: pytz (>=2026.1)
20
21
  Project-URL: Bug Tracker, https://github.com/splitlog/splitlog/issues
21
22
  Project-URL: Repository, https://github.com/splitlog/splitlog.git
22
23
  Description-Content-Type: text/markdown
@@ -24,9 +24,10 @@ classifiers = [
24
24
  ]
25
25
  dependencies = [
26
26
  "python-dateutil (>=2.9.0,<3.0.0)",
27
- "pytz (>=2025.2)",
27
+ "pytz (>=2026.1)",
28
+ "fsspec (>=2026.2.0)",
28
29
  ]
29
- version = "4.1.6"
30
+ version = "5.0.0"
30
31
 
31
32
 
32
33
  [project.urls]
@@ -38,11 +39,11 @@ splitlog = 'splitlog.__main__:main'
38
39
 
39
40
  [dependency-groups]
40
41
  dev = [
41
- "mypy (>=1.18.2,<2.0.0)",
42
+ "mypy (>=1.19.1,<2.0.0)",
42
43
  "black (>=26.3.1,<27.0.0)",
43
44
  "types-python-dateutil (>=2.9.0.20251108,<3.0.0.0)",
44
- "pytest (>=9.0.0,<10.0.0)",
45
- "pytest-cov (>=7.0.0,<8.0.0)",
45
+ "pytest (>=9.0.2,<10.0.0)",
46
+ "pytest-cov (>=7.1.0,<8.0.0)",
46
47
  ]
47
48
 
48
49
  [tool.poetry]
@@ -6,8 +6,7 @@ from pathlib import Path
6
6
  from typing import BinaryIO, Optional, Set
7
7
 
8
8
  from dateutil.parser import parse as parse_date
9
-
10
- from splitlog.outputfolder import OutputFolder
9
+ from fsspec import AbstractFileSystem # type: ignore
11
10
 
12
11
  logger = logging.getLogger(__name__)
13
12
 
@@ -45,7 +44,7 @@ class _Splitter(object):
45
44
  )
46
45
 
47
46
  def __init__(
48
- self: "_Splitter", infile: BinaryIO, output_folder: OutputFolder
47
+ self: "_Splitter", infile: BinaryIO, output_folder: AbstractFileSystem
49
48
  ) -> None:
50
49
  self.container: Optional[str] = None
51
50
  self.host: Optional[str] = None
@@ -55,7 +54,7 @@ class _Splitter(object):
55
54
  self.length: Optional[int] = None
56
55
  self.dirs_created: Set[Path] = set()
57
56
  self.infile: BinaryIO = infile
58
- self.output_folder: OutputFolder = output_folder
57
+ self.output_folder: AbstractFileSystem = output_folder
59
58
  self.offset: int = 0
60
59
  self.line: Optional[str] = None
61
60
  self.eof: bool = False
@@ -247,14 +246,14 @@ class _Splitter(object):
247
246
 
248
247
  def _create_hierarchy(self: "_Splitter") -> Path:
249
248
  assert self.host is not None, "host must be present"
250
- host_dir = self.output_folder.root / self.host
249
+ host_dir = Path(self.host)
251
250
  if host_dir not in self.dirs_created:
252
- self.output_folder.mkdir(host_dir)
251
+ self.output_folder.makedirs(host_dir.as_posix(), exist_ok=True)
253
252
  self.dirs_created.add(host_dir)
254
253
  assert self.container is not None, "container must be present"
255
254
  container_dir = host_dir / self.container
256
255
  if container_dir not in self.dirs_created:
257
- self.output_folder.mkdir(container_dir)
256
+ self.output_folder.makedirs(container_dir.as_posix(), exist_ok=True)
258
257
  self.dirs_created.add(container_dir)
259
258
  return container_dir
260
259
 
@@ -263,14 +262,14 @@ class _Splitter(object):
263
262
  assert self.filename, "filename must be present"
264
263
  log_path = container_dir / self.filename
265
264
 
266
- with self.output_folder.create(log_path) as outfile:
265
+ with self.output_folder.open(log_path.as_posix(), "wb") as outfile:
267
266
  logger.debug("Created empty log file %s", log_path)
268
267
 
269
268
  def _copy(self: "_Splitter") -> None:
270
269
  container_dir = self._create_hierarchy()
271
270
  assert self.filename, "filename must be present"
272
271
  log_path = container_dir / self.filename
273
- with self.output_folder.create(log_path) as outfile:
272
+ with self.output_folder.open(log_path.as_posix(), "wb") as outfile:
274
273
  assert self.length is not None, "length must be present"
275
274
  logger.debug("Created log file %s, size: %d", log_path, self.length)
276
275
  remaining = self.length
@@ -321,7 +320,7 @@ class _Splitter(object):
321
320
  return parse_error
322
321
 
323
322
 
324
- def split(infile: BinaryIO, output_folder: OutputFolder) -> None:
323
+ def split(infile: BinaryIO, output_folder: AbstractFileSystem) -> None:
325
324
  splitter = _Splitter(infile=infile, output_folder=output_folder)
326
325
 
327
326
  splitter.split()
@@ -126,12 +126,17 @@ def main(cli_args: Optional[List[str]] = None) -> None:
126
126
  assert args.output_folder is not None, "output_folder argument must be present"
127
127
 
128
128
  from splitlog import split, ParseError
129
- from splitlog.outputfolder import new_output_folder
129
+ from fsspec.implementations.dirfs import DirFileSystem # type: ignore
130
130
 
131
131
  with _open_input(args.input_file) as infile:
132
132
  try:
133
- with new_output_folder(args.output_folder) as output_folder:
134
- split(infile, output_folder)
133
+ # Atomic directory creation. Fails if it already exists or if parent is missing.
134
+ args.output_folder.mkdir()
135
+ # use fsspec DirFileSystem to handle output folder
136
+ output_folder = DirFileSystem(
137
+ path=str(args.output_folder), auto_mkdir=False
138
+ )
139
+ split(infile, output_folder)
135
140
  except FileNotFoundError as e:
136
141
  _error_exit(e)
137
142
  except FileExistsError as e:
@@ -1,262 +0,0 @@
1
- import abc
2
- import contextlib
3
- import logging
4
- import os
5
- import stat
6
- import types
7
- import typing as t
8
- from pathlib import Path
9
-
10
- _logger = logging.getLogger(__name__)
11
-
12
-
13
- class BinWriter(contextlib.AbstractContextManager, metaclass=abc.ABCMeta):
14
- """Abstract base class for writing binary data into an output file."""
15
-
16
- @abc.abstractmethod
17
- def write(self, b: bytes) -> int:
18
- """Writes bytes into a file and returns how many bytes were written successfully.
19
-
20
- :returns: number of bytes written successfully
21
- """
22
- raise NotImplementedError()
23
-
24
-
25
- class OutputFolder(contextlib.AbstractContextManager, metaclass=abc.ABCMeta):
26
- """Abstract base class for output folder IO."""
27
-
28
- @property
29
- @abc.abstractmethod
30
- def root(self) -> Path:
31
- """Root path of the output folder to construct paths inside the output folder.
32
-
33
- :returns: root path
34
- """
35
- raise NotImplementedError()
36
-
37
- @abc.abstractmethod
38
- def mkdir(self, path: Path) -> None:
39
- """Creates a subdirectory."""
40
- raise NotImplementedError()
41
-
42
- @abc.abstractmethod
43
- def create(self, path: Path) -> BinWriter:
44
- """Creates a new file and returns a writer to write to it.
45
-
46
- :returns: writer to write binary data into the file
47
- """
48
- raise NotImplementedError()
49
-
50
-
51
- class FileWrapper(BinWriter):
52
- def __init__(self, file: t.BinaryIO):
53
- self._file = file
54
-
55
- def write(self, b: bytes) -> int:
56
- return self._file.write(b)
57
-
58
- def __enter__(self) -> "FileWrapper":
59
- return self
60
-
61
- def __exit__(
62
- self,
63
- exc: t.Union[t.Type[BaseException], None],
64
- value: t.Union[BaseException, None],
65
- tb: t.Union[types.TracebackType, None],
66
- ) -> t.Union[bool, None]:
67
- return self._file.__exit__(exc, value, tb)
68
-
69
-
70
- def _is_relative_to(path: Path, parent: Path) -> bool:
71
- try:
72
- path.relative_to(parent)
73
- except ValueError:
74
- return False
75
- else:
76
- return True
77
-
78
-
79
- class DefaultLocalFilesystemOutputFolder(OutputFolder):
80
- """Encapsulates filesystem IO on output folders.
81
-
82
- This implementation is portable but unsafe to use when invoking splitlog with privileges.
83
- """
84
-
85
- FILE_MODE = stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH
86
- DIR_MODE = (
87
- stat.S_IRUSR
88
- | stat.S_IWUSR
89
- | stat.S_IXUSR
90
- | stat.S_IRGRP
91
- | stat.S_IXGRP
92
- | stat.S_IROTH
93
- | stat.S_IXOTH
94
- )
95
-
96
- def __init__(self, path: Path):
97
- self._path: Path = path.resolve()
98
-
99
- def __enter__(self) -> OutputFolder:
100
- self._path.mkdir(mode=self.DIR_MODE, exist_ok=False)
101
- return self
102
-
103
- def __exit__(
104
- self,
105
- exc: t.Union[t.Type[BaseException], None],
106
- value: t.Union[BaseException, None],
107
- tb: t.Union[types.TracebackType, None],
108
- ) -> t.Union[bool, None]:
109
- return None
110
-
111
- def mkdir(self, path: Path) -> None:
112
- real_path = self._check_paths(path)
113
- os.mkdir(real_path, mode=self.DIR_MODE)
114
-
115
- def _check_paths(self, path: Path) -> Path:
116
- if path.is_absolute():
117
- raise ValueError(f"Path {path} must be relative")
118
- real_path = Path(os.path.normpath(self._path / path))
119
- if not _is_relative_to(real_path, self._path):
120
- raise ValueError(f"Path {path} outside {self._path}")
121
- return real_path
122
-
123
- def create(self, path: Path) -> BinWriter:
124
- real_path = self._check_paths(path)
125
- f = open(real_path, "xb")
126
- try:
127
- return FileWrapper(f)
128
- except Exception:
129
- f.close()
130
- raise
131
-
132
- @property
133
- def root(self) -> Path:
134
- return Path()
135
-
136
-
137
- class LinuxLocalFilesystemOutputFolder(OutputFolder):
138
- """Encapsulates filesystem IO on output folders.
139
-
140
- This implementation avoids TOCTTOU attacks using modern Linux APIs.
141
- """
142
-
143
- DIR_MODE = (
144
- stat.S_IRUSR
145
- | stat.S_IWUSR
146
- | stat.S_IXUSR
147
- | stat.S_IRGRP
148
- | stat.S_IXGRP
149
- | stat.S_IROTH
150
- | stat.S_IXOTH
151
- )
152
- FILE_MODE = stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH
153
-
154
- @staticmethod
155
- def is_supported() -> bool:
156
- for flag in ("O_PATH", "O_DIRECTORY", "O_NOFOLLOW", "O_CLOEXEC"):
157
- if not hasattr(os, flag):
158
- _logger.debug(f"os.{flag} not supported")
159
- return False
160
-
161
- for needs_dir_fd_support in (os.open, os.mkdir):
162
- if needs_dir_fd_support not in os.supports_dir_fd:
163
- _logger.debug(f"{needs_dir_fd_support} does not support dir fds")
164
- return False
165
-
166
- return True
167
-
168
- def __init__(self, path: Path):
169
- if not self.is_supported():
170
- raise RuntimeError(
171
- "File system semantics are not supported by runtime environment"
172
- )
173
- self._path: Path = path.resolve()
174
- self._dir_fd: t.Union[int, None] = None
175
-
176
- def __enter__(
177
- self,
178
- ) -> OutputFolder:
179
- # split path
180
- parent = self._path.parent
181
- name = self._path.name
182
-
183
- # allow parent folder to be a symlink
184
- parent_dir_fd = self._open_dir_fd(parent, no_follow=False)
185
-
186
- try:
187
- os.mkdir(name, mode=self.DIR_MODE, dir_fd=parent_dir_fd)
188
- self._dir_fd = self._open_dir_fd(name, no_follow=True, dir_fd=parent_dir_fd)
189
- return self
190
- finally:
191
- os.close(parent_dir_fd)
192
-
193
- def __exit__(
194
- self,
195
- exc: t.Union[t.Type[BaseException], None],
196
- value: t.Union[BaseException, None],
197
- tb: t.Union[types.TracebackType, None],
198
- ) -> t.Union[bool, None]:
199
- if self._dir_fd is not None:
200
- saved, self._dir_fd = self._dir_fd, None
201
- os.close(saved)
202
- return None
203
-
204
- @property
205
- def root(self) -> Path:
206
- return Path()
207
-
208
- def mkdir(self, path: Path) -> None:
209
- real_path = self._ensure_path_under_root(path)
210
- os.mkdir(real_path, mode=self.DIR_MODE, dir_fd=self._dir_fd)
211
-
212
- def _opener(self, path: str, flags: int) -> int:
213
- return os.open(
214
- path,
215
- flags | getattr(os, "O_NOFOLLOW") | getattr(os, "O_CLOEXEC"),
216
- mode=self.FILE_MODE,
217
- dir_fd=self._dir_fd,
218
- )
219
-
220
- def create(self, path: Path) -> BinWriter:
221
- real_path = self._ensure_path_under_root(path)
222
- f = open(real_path, "xb", opener=self._opener)
223
- try:
224
- return FileWrapper(f)
225
- except Exception:
226
- f.close()
227
- raise
228
-
229
- def _ensure_path_under_root(self, path: Path) -> Path:
230
- if path.is_absolute():
231
- raise ValueError(f"Path {path} must be relative")
232
-
233
- # remove all ".." and "." components
234
- real_path = Path(os.path.normpath(self._path / path))
235
-
236
- # ensure resulting absolute path is still inside self._path
237
- if not _is_relative_to(real_path, self._path):
238
- raise ValueError(f"Path {path} outside {self._path}")
239
- return real_path.relative_to(self._path)
240
-
241
- @staticmethod
242
- def _open_dir_fd(
243
- path: t.Union[Path, str], no_follow: bool, dir_fd: t.Optional[int] = None
244
- ) -> int:
245
- flags = (
246
- getattr(os, "O_PATH")
247
- | getattr(os, "O_DIRECTORY")
248
- | getattr(os, "O_CLOEXEC")
249
- )
250
-
251
- if no_follow:
252
- flags |= getattr(os, "O_NOFOLLOW")
253
-
254
- return os.open(path, flags, dir_fd=dir_fd)
255
-
256
-
257
- def new_output_folder(path: Path) -> OutputFolder:
258
- """Chooses an output folder implementation and creates an instance."""
259
- if LinuxLocalFilesystemOutputFolder.is_supported():
260
- return LinuxLocalFilesystemOutputFolder(path=path)
261
- else:
262
- return DefaultLocalFilesystemOutputFolder(path=path)
File without changes
File without changes