turboloader 2.2.0__tar.gz → 2.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {turboloader-2.2.0 → turboloader-2.3.2}/MANIFEST.in +2 -7
- {turboloader-2.2.0 → turboloader-2.3.2}/PKG-INFO +2 -2
- {turboloader-2.2.0 → turboloader-2.3.2}/benchmarks/01_pil_baseline.py +56 -49
- {turboloader-2.2.0 → turboloader-2.3.2}/benchmarks/02_pytorch_naive.py +61 -59
- {turboloader-2.2.0 → turboloader-2.3.2}/benchmarks/03_pytorch_optimized.py +62 -60
- {turboloader-2.2.0 → turboloader-2.3.2}/benchmarks/04_pytorch_cached.py +77 -67
- {turboloader-2.2.0 → turboloader-2.3.2}/benchmarks/05_turboloader.py +54 -47
- {turboloader-2.2.0 → turboloader-2.3.2}/benchmarks/06_ffcv.py +79 -63
- {turboloader-2.2.0 → turboloader-2.3.2}/benchmarks/07_dali.py +91 -67
- {turboloader-2.2.0 → turboloader-2.3.2}/benchmarks/08_tensorflow.py +72 -50
- {turboloader-2.2.0 → turboloader-2.3.2}/benchmarks/09_resnet50_training.py +83 -86
- {turboloader-2.2.0 → turboloader-2.3.2}/benchmarks/benchmark_advanced_transforms.py +23 -10
- {turboloader-2.2.0 → turboloader-2.3.2}/benchmarks/comprehensive_benchmark.py +198 -127
- {turboloader-2.2.0 → turboloader-2.3.2}/benchmarks/final_comprehensive_benchmark.py +81 -73
- {turboloader-2.2.0 → turboloader-2.3.2}/benchmarks/format_converter_benchmark.py +31 -21
- {turboloader-2.2.0 → turboloader-2.3.2}/benchmarks/generate_web_data.py +84 -95
- {turboloader-2.2.0 → turboloader-2.3.2}/benchmarks/run_all_benchmarks.py +123 -85
- {turboloader-2.2.0 → turboloader-2.3.2}/examples/avx512_performance.py +13 -6
- {turboloader-2.2.0 → turboloader-2.3.2}/examples/complete_v110_workflow.py +36 -25
- {turboloader-2.2.0 → turboloader-2.3.2}/examples/distributed_ddp.py +60 -109
- {turboloader-2.2.0 → turboloader-2.3.2}/examples/imagenet_resnet50.py +96 -92
- {turboloader-2.2.0 → turboloader-2.3.2}/examples/pytorch_lightning_example.py +75 -77
- {turboloader-2.2.0 → turboloader-2.3.2}/examples/tbl_conversion.py +10 -3
- {turboloader-2.2.0 → turboloader-2.3.2}/examples/transform_example.py +4 -10
- {turboloader-2.2.0 → turboloader-2.3.2}/pyproject.toml +2 -2
- turboloader-2.3.2/setup.py +321 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/pipeline/pipeline.hpp +107 -17
- {turboloader-2.2.0 → turboloader-2.3.2}/src/python/turboloader_bindings.cpp +36 -14
- {turboloader-2.2.0 → turboloader-2.3.2}/src/readers/http_reader.hpp +5 -5
- {turboloader-2.2.0 → turboloader-2.3.2}/src/readers/tbl_v2_reader.hpp +3 -1
- turboloader-2.3.2/tests/create_test_dataset.py +83 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/tests/test_integrations.py +54 -55
- {turboloader-2.2.0 → turboloader-2.3.2}/tests/test_jax_integration.py +44 -50
- {turboloader-2.2.0 → turboloader-2.3.2}/tests/test_prefetch_pipeline.cpp +19 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/tests/test_pytorch_transforms.py +13 -25
- {turboloader-2.2.0 → turboloader-2.3.2}/tests/test_tensorflow_integration.py +45 -51
- {turboloader-2.2.0 → turboloader-2.3.2}/tests/test_transforms_tensorflow.py +14 -8
- {turboloader-2.2.0 → turboloader-2.3.2}/tests/test_v180_features.py +104 -45
- {turboloader-2.2.0 → turboloader-2.3.2}/tests/test_v190_features.py +85 -43
- {turboloader-2.2.0 → turboloader-2.3.2}/tests/test_webdataset_integration.py +54 -76
- {turboloader-2.2.0 → turboloader-2.3.2}/turboloader/__init__.py +90 -31
- {turboloader-2.2.0 → turboloader-2.3.2}/turboloader.egg-info/SOURCES.txt +1 -0
- turboloader-2.2.0/setup.py +0 -236
- {turboloader-2.2.0 → turboloader-2.3.2}/AUTHORS.md +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/CMakeLists.txt +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/CONTRIBUTING.md +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/LICENSE +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/README.md +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/benchmarks/BENCHMARK_PLAN.md +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/benchmarks/datasets/generate_synthetic.py +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/benchmarks/memory/bench_memory.py +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/benchmarks/throughput/bench_pytorch.py +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/benchmarks/throughput/bench_tbl_v2.py +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/benchmarks/throughput/bench_turboloader.py +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/benchmarks/throughput/bench_webdataset.py +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/benchmarks/transforms/bench_transforms.py +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/benchmarks/visualization/plot_results.py +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/examples/README.md +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/setup.cfg +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/cache/cache_key.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/cache/disk_cache.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/cache/lru_cache.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/cache/tiered_cache.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/core/object_pool.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/core/sample.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/core/spsc_ring_buffer.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/decode/bmp_decoder.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/decode/csv_decoder.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/decode/image_decoder.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/decode/jpeg_decoder.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/decode/nvjpeg_decoder.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/decode/parquet_decoder.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/decode/png_decoder.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/decode/tiff_decoder.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/decode/video_decoder.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/decode/webp_decoder.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/distributed/distributed_dataloader.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/formats/coco_voc_parser.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/formats/tbl_v2_format.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/gpu/multi_gpu_pipeline.cpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/gpu/multi_gpu_pipeline.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/io/io_uring_reader.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/pipeline/error_recovery.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/pipeline/prefetch_pipeline.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/pipeline/smart_batching.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/readers/azure_blob_reader.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/readers/gcs_reader.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/readers/hdf5_reader.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/readers/reader_orchestrator.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/readers/s3_reader.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/readers/tar_reader.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/readers/tfrecord_reader.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/readers/zarr_reader.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/transforms/affine_transform.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/transforms/autoaugment_transform.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/transforms/blur_transform.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/transforms/color_jitter_transform.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/transforms/crop_transform.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/transforms/erasing_transform.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/transforms/flip_transform.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/transforms/gpu/gpu_transforms.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/transforms/grayscale_transform.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/transforms/modern_augment_transform.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/transforms/normalize_transform.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/transforms/pad_transform.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/transforms/perspective_transform.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/transforms/posterize_transform.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/transforms/resize_transform.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/transforms/rotation_transform.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/transforms/simd_utils.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/transforms/solarize_transform.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/transforms/tensor_conversion.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/transforms/transform_base.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/transforms/transforms.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/utils/image_dimensions.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/src/writers/tbl_v2_writer.hpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/tests/test_advanced_transforms.cpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/tests/test_avx512_simd.cpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/tests/test_cache.cpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/tests/test_csv_decoder.cpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/tests/test_distributed.cpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/tests/test_gcs_reader.cpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/tests/test_http_reader.cpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/tests/test_image_decoder.cpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/tests/test_multi_gpu.cpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/tests/test_nvjpeg_decoder.cpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/tests/test_parquet_decoder.cpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/tests/test_pipeline_gpu_decode.cpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/tests/test_reader_orchestrator.cpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/tests/test_s3_reader.cpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/tests/test_smart_batching.cpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/tests/test_tar_reader.cpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/tests/test_tbl_v2.cpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/tests/test_transforms.cpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/tests/test_unified_pipeline.cpp +0 -0
- {turboloader-2.2.0 → turboloader-2.3.2}/tests/test_video_decoder.cpp +0 -0
|
@@ -3,18 +3,13 @@ include README.md
|
|
|
3
3
|
include LICENSE
|
|
4
4
|
include AUTHORS.md
|
|
5
5
|
include CONTRIBUTING.md
|
|
6
|
-
include ARCHITECTURE.md
|
|
7
6
|
|
|
8
7
|
# Include ALL C++ source files and headers
|
|
9
8
|
graft src
|
|
10
|
-
recursive-include src *.hpp *.
|
|
9
|
+
recursive-include src *.hpp *.cpp
|
|
11
10
|
|
|
12
11
|
# Include CMake files
|
|
13
12
|
include CMakeLists.txt
|
|
14
|
-
recursive-include cmake *.cmake
|
|
15
|
-
|
|
16
|
-
# Include Python bindings
|
|
17
|
-
recursive-include python *.cpp *.hpp CMakeLists.txt
|
|
18
13
|
|
|
19
14
|
# Include benchmarks
|
|
20
15
|
recursive-include benchmarks *.py *.md
|
|
@@ -23,7 +18,7 @@ recursive-include benchmarks *.py *.md
|
|
|
23
18
|
recursive-include examples *.py *.md
|
|
24
19
|
|
|
25
20
|
# Include tests
|
|
26
|
-
recursive-include tests *.cpp *.
|
|
21
|
+
recursive-include tests *.cpp *.py
|
|
27
22
|
|
|
28
23
|
# Exclude build artifacts
|
|
29
24
|
global-exclude *.pyc
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: turboloader
|
|
3
|
-
Version: 2.2
|
|
3
|
+
Version: 2.3.2
|
|
4
4
|
Summary: Production-ready ML data loading library with distributed training support, SIMD-accelerated transforms, pipe operator composition, HDF5/TFRecord/Zarr support, and GPU transforms. Built with C++20 for maximum performance.
|
|
5
5
|
Author: TurboLoader Contributors
|
|
6
6
|
Author-email: Arnav Jain <arnav@example.com>
|
|
7
7
|
Maintainer-email: Arnav Jain <arnav@example.com>
|
|
8
|
-
License: MIT
|
|
8
|
+
License-Expression: MIT
|
|
9
9
|
Project-URL: Homepage, https://github.com/arnavjain/turboloader
|
|
10
10
|
Project-URL: Documentation, https://github.com/arnavjain/turboloader/blob/main/README.md
|
|
11
11
|
Project-URL: Repository, https://github.com/arnavjain/turboloader
|
|
@@ -42,12 +42,14 @@ class PILDataLoader:
|
|
|
42
42
|
and batches them for training.
|
|
43
43
|
"""
|
|
44
44
|
|
|
45
|
-
def __init__(
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
45
|
+
def __init__(
|
|
46
|
+
self,
|
|
47
|
+
image_dir: str,
|
|
48
|
+
batch_size: int = 32,
|
|
49
|
+
shuffle: bool = False,
|
|
50
|
+
num_epochs: int = 1,
|
|
51
|
+
transform: bool = True,
|
|
52
|
+
):
|
|
51
53
|
"""
|
|
52
54
|
Initialize PIL DataLoader.
|
|
53
55
|
|
|
@@ -65,7 +67,7 @@ class PILDataLoader:
|
|
|
65
67
|
self.transform = transform
|
|
66
68
|
|
|
67
69
|
# Get all image files
|
|
68
|
-
self.image_files = sorted(self.image_dir.glob(
|
|
70
|
+
self.image_files = sorted(self.image_dir.glob("*.jpg"))
|
|
69
71
|
self.num_images = len(self.image_files)
|
|
70
72
|
|
|
71
73
|
if self.num_images == 0:
|
|
@@ -90,7 +92,7 @@ class PILDataLoader:
|
|
|
90
92
|
NumPy array (H, W, 3) with transformed image
|
|
91
93
|
"""
|
|
92
94
|
# Load image
|
|
93
|
-
img = Image.open(img_path).convert(
|
|
95
|
+
img = Image.open(img_path).convert("RGB")
|
|
94
96
|
|
|
95
97
|
if self.transform:
|
|
96
98
|
# Resize to 224x224 (standard ImageNet size)
|
|
@@ -116,6 +118,7 @@ class PILDataLoader:
|
|
|
116
118
|
# Shuffle if requested
|
|
117
119
|
if self.shuffle:
|
|
118
120
|
import random
|
|
121
|
+
|
|
119
122
|
image_files = list(self.image_files)
|
|
120
123
|
random.shuffle(image_files)
|
|
121
124
|
else:
|
|
@@ -123,7 +126,7 @@ class PILDataLoader:
|
|
|
123
126
|
|
|
124
127
|
# Create batches
|
|
125
128
|
for i in range(0, len(image_files), self.batch_size):
|
|
126
|
-
batch_files = image_files[i:i + self.batch_size]
|
|
129
|
+
batch_files = image_files[i : i + self.batch_size]
|
|
127
130
|
|
|
128
131
|
# Load and transform images
|
|
129
132
|
batch_images = []
|
|
@@ -137,10 +140,9 @@ class PILDataLoader:
|
|
|
137
140
|
yield batch
|
|
138
141
|
|
|
139
142
|
|
|
140
|
-
def run_benchmark(
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
shuffle: bool = False) -> Dict[str, Any]:
|
|
143
|
+
def run_benchmark(
|
|
144
|
+
image_dir: str, batch_size: int = 32, num_epochs: int = 3, shuffle: bool = False
|
|
145
|
+
) -> Dict[str, Any]:
|
|
144
146
|
"""
|
|
145
147
|
Run PIL baseline benchmark.
|
|
146
148
|
|
|
@@ -153,14 +155,14 @@ def run_benchmark(image_dir: str,
|
|
|
153
155
|
Returns:
|
|
154
156
|
Dictionary with benchmark results
|
|
155
157
|
"""
|
|
156
|
-
print("="*80)
|
|
158
|
+
print("=" * 80)
|
|
157
159
|
print("PIL BASELINE BENCHMARK")
|
|
158
|
-
print("="*80)
|
|
160
|
+
print("=" * 80)
|
|
159
161
|
print(f"Dataset: {image_dir}")
|
|
160
162
|
print(f"Batch size: {batch_size}")
|
|
161
163
|
print(f"Epochs: {num_epochs}")
|
|
162
164
|
print(f"Shuffle: {shuffle}")
|
|
163
|
-
print("="*80)
|
|
165
|
+
print("=" * 80)
|
|
164
166
|
|
|
165
167
|
# Create dataloader
|
|
166
168
|
loader = PILDataLoader(
|
|
@@ -168,7 +170,7 @@ def run_benchmark(image_dir: str,
|
|
|
168
170
|
batch_size=batch_size,
|
|
169
171
|
shuffle=shuffle,
|
|
170
172
|
num_epochs=num_epochs,
|
|
171
|
-
transform=True
|
|
173
|
+
transform=True,
|
|
172
174
|
)
|
|
173
175
|
|
|
174
176
|
# Track metrics
|
|
@@ -216,48 +218,53 @@ def run_benchmark(image_dir: str,
|
|
|
216
218
|
|
|
217
219
|
# Calculate statistics
|
|
218
220
|
results = {
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
221
|
+
"framework": "PIL Baseline",
|
|
222
|
+
"batch_size": batch_size,
|
|
223
|
+
"num_epochs": num_epochs,
|
|
224
|
+
"shuffle": shuffle,
|
|
225
|
+
"total_time": total_time,
|
|
226
|
+
"epoch_times": epoch_times,
|
|
227
|
+
"avg_epoch_time": np.mean(epoch_times),
|
|
228
|
+
"std_epoch_time": np.std(epoch_times),
|
|
229
|
+
"avg_batch_time": np.mean(batch_times),
|
|
230
|
+
"std_batch_time": np.std(batch_times),
|
|
231
|
+
"throughput": loader.num_images * num_epochs / total_time,
|
|
232
|
+
"peak_memory_mb": max(memory_usage) if memory_usage else 0,
|
|
233
|
+
"avg_memory_mb": np.mean(memory_usage) if memory_usage else 0,
|
|
232
234
|
}
|
|
233
235
|
|
|
234
236
|
# Print summary
|
|
235
|
-
print("\n" + "="*80)
|
|
237
|
+
print("\n" + "=" * 80)
|
|
236
238
|
print("BENCHMARK RESULTS")
|
|
237
|
-
print("="*80)
|
|
239
|
+
print("=" * 80)
|
|
238
240
|
print(f"Total time: {total_time:.2f}s")
|
|
239
|
-
print(
|
|
240
|
-
|
|
241
|
+
print(
|
|
242
|
+
f"Average epoch time: {results['avg_epoch_time']:.2f}s ± {results['std_epoch_time']:.2f}s"
|
|
243
|
+
)
|
|
244
|
+
print(
|
|
245
|
+
f"Average batch time: {results['avg_batch_time']*1000:.2f}ms ± {results['std_batch_time']*1000:.2f}ms"
|
|
246
|
+
)
|
|
241
247
|
print(f"Throughput: {results['throughput']:.1f} images/sec")
|
|
242
248
|
print(f"Peak memory: {results['peak_memory_mb']:.1f} MB")
|
|
243
249
|
print(f"Average memory: {results['avg_memory_mb']:.1f} MB")
|
|
244
|
-
print("="*80)
|
|
250
|
+
print("=" * 80)
|
|
245
251
|
|
|
246
252
|
return results
|
|
247
253
|
|
|
248
254
|
|
|
249
255
|
def main():
|
|
250
|
-
parser = argparse.ArgumentParser(description=
|
|
251
|
-
parser.add_argument(
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
parser.add_argument(
|
|
260
|
-
|
|
256
|
+
parser = argparse.ArgumentParser(description="PIL baseline benchmark")
|
|
257
|
+
parser.add_argument(
|
|
258
|
+
"--image-dir",
|
|
259
|
+
"-dir",
|
|
260
|
+
type=str,
|
|
261
|
+
default="/private/tmp/benchmark_datasets/bench_2k/images/",
|
|
262
|
+
help="Directory containing JPEG images",
|
|
263
|
+
)
|
|
264
|
+
parser.add_argument("--batch-size", "-b", type=int, default=32, help="Batch size (default: 32)")
|
|
265
|
+
parser.add_argument("--epochs", "-e", type=int, default=3, help="Number of epochs (default: 3)")
|
|
266
|
+
parser.add_argument("--shuffle", "-s", action="store_true", help="Shuffle data")
|
|
267
|
+
parser.add_argument("--output", "-o", type=str, help="Output JSON file for results")
|
|
261
268
|
|
|
262
269
|
args = parser.parse_args()
|
|
263
270
|
|
|
@@ -266,15 +273,15 @@ def main():
|
|
|
266
273
|
image_dir=args.image_dir,
|
|
267
274
|
batch_size=args.batch_size,
|
|
268
275
|
num_epochs=args.epochs,
|
|
269
|
-
shuffle=args.shuffle
|
|
276
|
+
shuffle=args.shuffle,
|
|
270
277
|
)
|
|
271
278
|
|
|
272
279
|
# Save results if requested
|
|
273
280
|
if args.output:
|
|
274
|
-
with open(args.output,
|
|
281
|
+
with open(args.output, "w") as f:
|
|
275
282
|
json.dump(results, f, indent=2)
|
|
276
283
|
print(f"\nResults saved to {args.output}")
|
|
277
284
|
|
|
278
285
|
|
|
279
|
-
if __name__ ==
|
|
286
|
+
if __name__ == "__main__":
|
|
280
287
|
main()
|
|
@@ -54,7 +54,7 @@ class ImageDataset(data.Dataset):
|
|
|
54
54
|
transform: Optional torchvision transforms
|
|
55
55
|
"""
|
|
56
56
|
self.image_dir = Path(image_dir)
|
|
57
|
-
self.image_files = sorted(self.image_dir.glob(
|
|
57
|
+
self.image_files = sorted(self.image_dir.glob("*.jpg"))
|
|
58
58
|
self.transform = transform
|
|
59
59
|
|
|
60
60
|
if len(self.image_files) == 0:
|
|
@@ -68,7 +68,7 @@ class ImageDataset(data.Dataset):
|
|
|
68
68
|
img_path = self.image_files[idx]
|
|
69
69
|
|
|
70
70
|
# Load image with PIL
|
|
71
|
-
image = Image.open(img_path).convert(
|
|
71
|
+
image = Image.open(img_path).convert("RGB")
|
|
72
72
|
|
|
73
73
|
# Apply transforms
|
|
74
74
|
if self.transform is not None:
|
|
@@ -87,21 +87,19 @@ def get_transforms():
|
|
|
87
87
|
Returns:
|
|
88
88
|
torchvision.transforms.Compose
|
|
89
89
|
"""
|
|
90
|
-
return transforms.Compose(
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
mean=[0.485, 0.456, 0.406],
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
num_workers: int = 4,
|
|
104
|
-
num_epochs: int = 3) -> Dict[str, Any]:
|
|
90
|
+
return transforms.Compose(
|
|
91
|
+
[
|
|
92
|
+
transforms.Resize(256),
|
|
93
|
+
transforms.CenterCrop(224),
|
|
94
|
+
transforms.ToTensor(),
|
|
95
|
+
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
|
96
|
+
]
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def run_benchmark(
|
|
101
|
+
image_dir: str, batch_size: int = 32, num_workers: int = 4, num_epochs: int = 3
|
|
102
|
+
) -> Dict[str, Any]:
|
|
105
103
|
"""
|
|
106
104
|
Run naive PyTorch DataLoader benchmark.
|
|
107
105
|
|
|
@@ -114,20 +112,17 @@ def run_benchmark(image_dir: str,
|
|
|
114
112
|
Returns:
|
|
115
113
|
Dictionary with benchmark results
|
|
116
114
|
"""
|
|
117
|
-
print("="*80)
|
|
115
|
+
print("=" * 80)
|
|
118
116
|
print("NAIVE PYTORCH DATALOADER BENCHMARK")
|
|
119
|
-
print("="*80)
|
|
117
|
+
print("=" * 80)
|
|
120
118
|
print(f"Dataset: {image_dir}")
|
|
121
119
|
print(f"Batch size: {batch_size}")
|
|
122
120
|
print(f"Num workers: {num_workers}")
|
|
123
121
|
print(f"Epochs: {num_epochs}")
|
|
124
|
-
print("="*80)
|
|
122
|
+
print("=" * 80)
|
|
125
123
|
|
|
126
124
|
# Create dataset
|
|
127
|
-
dataset = ImageDataset(
|
|
128
|
-
image_dir=image_dir,
|
|
129
|
-
transform=get_transforms()
|
|
130
|
-
)
|
|
125
|
+
dataset = ImageDataset(image_dir=image_dir, transform=get_transforms())
|
|
131
126
|
|
|
132
127
|
print(f"\nDataset initialized:")
|
|
133
128
|
print(f" Total images: {len(dataset)}")
|
|
@@ -141,7 +136,7 @@ def run_benchmark(image_dir: str,
|
|
|
141
136
|
num_workers=num_workers,
|
|
142
137
|
pin_memory=False, # Naive: no pin_memory
|
|
143
138
|
prefetch_factor=2, # Default PyTorch prefetch
|
|
144
|
-
persistent_workers=False # Naive: restart workers each epoch
|
|
139
|
+
persistent_workers=False, # Naive: restart workers each epoch
|
|
145
140
|
)
|
|
146
141
|
|
|
147
142
|
# Track metrics
|
|
@@ -189,50 +184,57 @@ def run_benchmark(image_dir: str,
|
|
|
189
184
|
|
|
190
185
|
# Calculate statistics
|
|
191
186
|
results = {
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
187
|
+
"framework": "PyTorch Naive DataLoader",
|
|
188
|
+
"batch_size": batch_size,
|
|
189
|
+
"num_workers": num_workers,
|
|
190
|
+
"num_epochs": num_epochs,
|
|
191
|
+
"persistent_workers": False,
|
|
192
|
+
"pin_memory": False,
|
|
193
|
+
"total_time": total_time,
|
|
194
|
+
"epoch_times": epoch_times,
|
|
195
|
+
"avg_epoch_time": np.mean(epoch_times),
|
|
196
|
+
"std_epoch_time": np.std(epoch_times),
|
|
197
|
+
"avg_batch_time": np.mean(batch_times),
|
|
198
|
+
"std_batch_time": np.std(batch_times),
|
|
199
|
+
"throughput": len(dataset) * num_epochs / total_time,
|
|
200
|
+
"peak_memory_mb": max(memory_usage) if memory_usage else 0,
|
|
201
|
+
"avg_memory_mb": np.mean(memory_usage) if memory_usage else 0,
|
|
207
202
|
}
|
|
208
203
|
|
|
209
204
|
# Print summary
|
|
210
|
-
print("\n" + "="*80)
|
|
205
|
+
print("\n" + "=" * 80)
|
|
211
206
|
print("BENCHMARK RESULTS")
|
|
212
|
-
print("="*80)
|
|
207
|
+
print("=" * 80)
|
|
213
208
|
print(f"Total time: {total_time:.2f}s")
|
|
214
|
-
print(
|
|
215
|
-
|
|
209
|
+
print(
|
|
210
|
+
f"Average epoch time: {results['avg_epoch_time']:.2f}s ± {results['std_epoch_time']:.2f}s"
|
|
211
|
+
)
|
|
212
|
+
print(
|
|
213
|
+
f"Average batch time: {results['avg_batch_time']*1000:.2f}ms ± {results['std_batch_time']*1000:.2f}ms"
|
|
214
|
+
)
|
|
216
215
|
print(f"Throughput: {results['throughput']:.1f} images/sec")
|
|
217
216
|
print(f"Peak memory: {results['peak_memory_mb']:.1f} MB")
|
|
218
217
|
print(f"Average memory: {results['avg_memory_mb']:.1f} MB")
|
|
219
|
-
print("="*80)
|
|
218
|
+
print("=" * 80)
|
|
220
219
|
|
|
221
220
|
return results
|
|
222
221
|
|
|
223
222
|
|
|
224
223
|
def main():
|
|
225
|
-
parser = argparse.ArgumentParser(description=
|
|
226
|
-
parser.add_argument(
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
parser.add_argument(
|
|
235
|
-
|
|
224
|
+
parser = argparse.ArgumentParser(description="Naive PyTorch DataLoader benchmark")
|
|
225
|
+
parser.add_argument(
|
|
226
|
+
"--image-dir",
|
|
227
|
+
"-dir",
|
|
228
|
+
type=str,
|
|
229
|
+
default="/private/tmp/benchmark_datasets/bench_2k/images/",
|
|
230
|
+
help="Directory containing JPEG images",
|
|
231
|
+
)
|
|
232
|
+
parser.add_argument("--batch-size", "-b", type=int, default=32, help="Batch size (default: 32)")
|
|
233
|
+
parser.add_argument(
|
|
234
|
+
"--num-workers", "-w", type=int, default=4, help="Number of worker processes (default: 4)"
|
|
235
|
+
)
|
|
236
|
+
parser.add_argument("--epochs", "-e", type=int, default=3, help="Number of epochs (default: 3)")
|
|
237
|
+
parser.add_argument("--output", "-o", type=str, help="Output JSON file for results")
|
|
236
238
|
|
|
237
239
|
args = parser.parse_args()
|
|
238
240
|
|
|
@@ -241,15 +243,15 @@ def main():
|
|
|
241
243
|
image_dir=args.image_dir,
|
|
242
244
|
batch_size=args.batch_size,
|
|
243
245
|
num_workers=args.num_workers,
|
|
244
|
-
num_epochs=args.epochs
|
|
246
|
+
num_epochs=args.epochs,
|
|
245
247
|
)
|
|
246
248
|
|
|
247
249
|
# Save results if requested
|
|
248
250
|
if args.output:
|
|
249
|
-
with open(args.output,
|
|
251
|
+
with open(args.output, "w") as f:
|
|
250
252
|
json.dump(results, f, indent=2)
|
|
251
253
|
print(f"\nResults saved to {args.output}")
|
|
252
254
|
|
|
253
255
|
|
|
254
|
-
if __name__ ==
|
|
256
|
+
if __name__ == "__main__":
|
|
255
257
|
main()
|
|
@@ -41,7 +41,7 @@ class ImageDataset(data.Dataset):
|
|
|
41
41
|
|
|
42
42
|
def __init__(self, image_dir: str, transform=None):
|
|
43
43
|
self.image_dir = Path(image_dir)
|
|
44
|
-
self.image_files = sorted(self.image_dir.glob(
|
|
44
|
+
self.image_files = sorted(self.image_dir.glob("*.jpg"))
|
|
45
45
|
self.transform = transform
|
|
46
46
|
|
|
47
47
|
if len(self.image_files) == 0:
|
|
@@ -52,7 +52,7 @@ class ImageDataset(data.Dataset):
|
|
|
52
52
|
|
|
53
53
|
def __getitem__(self, idx):
|
|
54
54
|
img_path = self.image_files[idx]
|
|
55
|
-
image = Image.open(img_path).convert(
|
|
55
|
+
image = Image.open(img_path).convert("RGB")
|
|
56
56
|
|
|
57
57
|
if self.transform is not None:
|
|
58
58
|
image = self.transform(image)
|
|
@@ -63,21 +63,19 @@ class ImageDataset(data.Dataset):
|
|
|
63
63
|
|
|
64
64
|
def get_transforms():
|
|
65
65
|
"""Get optimized ImageNet-style transforms"""
|
|
66
|
-
return transforms.Compose(
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
mean=[0.485, 0.456, 0.406],
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
num_workers: int = 8,
|
|
80
|
-
num_epochs: int = 3) -> Dict[str, Any]:
|
|
66
|
+
return transforms.Compose(
|
|
67
|
+
[
|
|
68
|
+
transforms.Resize(256),
|
|
69
|
+
transforms.CenterCrop(224),
|
|
70
|
+
transforms.ToTensor(),
|
|
71
|
+
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
|
72
|
+
]
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def run_benchmark(
|
|
77
|
+
image_dir: str, batch_size: int = 32, num_workers: int = 8, num_epochs: int = 3
|
|
78
|
+
) -> Dict[str, Any]:
|
|
81
79
|
"""
|
|
82
80
|
Run optimized PyTorch DataLoader benchmark.
|
|
83
81
|
|
|
@@ -90,21 +88,18 @@ def run_benchmark(image_dir: str,
|
|
|
90
88
|
Returns:
|
|
91
89
|
Dictionary with benchmark results
|
|
92
90
|
"""
|
|
93
|
-
print("="*80)
|
|
91
|
+
print("=" * 80)
|
|
94
92
|
print("OPTIMIZED PYTORCH DATALOADER BENCHMARK")
|
|
95
|
-
print("="*80)
|
|
93
|
+
print("=" * 80)
|
|
96
94
|
print(f"Dataset: {image_dir}")
|
|
97
95
|
print(f"Batch size: {batch_size}")
|
|
98
96
|
print(f"Num workers: {num_workers}")
|
|
99
97
|
print(f"Epochs: {num_epochs}")
|
|
100
98
|
print(f"Optimizations: pin_memory=True, persistent_workers=True, prefetch_factor=4")
|
|
101
|
-
print("="*80)
|
|
99
|
+
print("=" * 80)
|
|
102
100
|
|
|
103
101
|
# Create dataset
|
|
104
|
-
dataset = ImageDataset(
|
|
105
|
-
image_dir=image_dir,
|
|
106
|
-
transform=get_transforms()
|
|
107
|
-
)
|
|
102
|
+
dataset = ImageDataset(image_dir=image_dir, transform=get_transforms())
|
|
108
103
|
|
|
109
104
|
print(f"\nDataset initialized:")
|
|
110
105
|
print(f" Total images: {len(dataset)}")
|
|
@@ -118,7 +113,7 @@ def run_benchmark(image_dir: str,
|
|
|
118
113
|
num_workers=num_workers,
|
|
119
114
|
pin_memory=True, # Optimized: pin memory for faster GPU transfer
|
|
120
115
|
prefetch_factor=4, # Optimized: increased prefetching
|
|
121
|
-
persistent_workers=True # Optimized: keep workers alive between epochs
|
|
116
|
+
persistent_workers=True, # Optimized: keep workers alive between epochs
|
|
122
117
|
)
|
|
123
118
|
|
|
124
119
|
# Track metrics
|
|
@@ -166,51 +161,58 @@ def run_benchmark(image_dir: str,
|
|
|
166
161
|
|
|
167
162
|
# Calculate statistics
|
|
168
163
|
results = {
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
164
|
+
"framework": "PyTorch Optimized DataLoader",
|
|
165
|
+
"batch_size": batch_size,
|
|
166
|
+
"num_workers": num_workers,
|
|
167
|
+
"num_epochs": num_epochs,
|
|
168
|
+
"persistent_workers": True,
|
|
169
|
+
"pin_memory": True,
|
|
170
|
+
"prefetch_factor": 4,
|
|
171
|
+
"total_time": total_time,
|
|
172
|
+
"epoch_times": epoch_times,
|
|
173
|
+
"avg_epoch_time": np.mean(epoch_times),
|
|
174
|
+
"std_epoch_time": np.std(epoch_times),
|
|
175
|
+
"avg_batch_time": np.mean(batch_times),
|
|
176
|
+
"std_batch_time": np.std(batch_times),
|
|
177
|
+
"throughput": len(dataset) * num_epochs / total_time,
|
|
178
|
+
"peak_memory_mb": max(memory_usage) if memory_usage else 0,
|
|
179
|
+
"avg_memory_mb": np.mean(memory_usage) if memory_usage else 0,
|
|
185
180
|
}
|
|
186
181
|
|
|
187
182
|
# Print summary
|
|
188
|
-
print("\n" + "="*80)
|
|
183
|
+
print("\n" + "=" * 80)
|
|
189
184
|
print("BENCHMARK RESULTS")
|
|
190
|
-
print("="*80)
|
|
185
|
+
print("=" * 80)
|
|
191
186
|
print(f"Total time: {total_time:.2f}s")
|
|
192
|
-
print(
|
|
193
|
-
|
|
187
|
+
print(
|
|
188
|
+
f"Average epoch time: {results['avg_epoch_time']:.2f}s ± {results['std_epoch_time']:.2f}s"
|
|
189
|
+
)
|
|
190
|
+
print(
|
|
191
|
+
f"Average batch time: {results['avg_batch_time']*1000:.2f}ms ± {results['std_batch_time']*1000:.2f}ms"
|
|
192
|
+
)
|
|
194
193
|
print(f"Throughput: {results['throughput']:.1f} images/sec")
|
|
195
194
|
print(f"Peak memory: {results['peak_memory_mb']:.1f} MB")
|
|
196
195
|
print(f"Average memory: {results['avg_memory_mb']:.1f} MB")
|
|
197
|
-
print("="*80)
|
|
196
|
+
print("=" * 80)
|
|
198
197
|
|
|
199
198
|
return results
|
|
200
199
|
|
|
201
200
|
|
|
202
201
|
def main():
|
|
203
|
-
parser = argparse.ArgumentParser(description=
|
|
204
|
-
parser.add_argument(
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
parser.add_argument(
|
|
213
|
-
|
|
202
|
+
parser = argparse.ArgumentParser(description="Optimized PyTorch DataLoader benchmark")
|
|
203
|
+
parser.add_argument(
|
|
204
|
+
"--image-dir",
|
|
205
|
+
"-dir",
|
|
206
|
+
type=str,
|
|
207
|
+
default="/private/tmp/benchmark_datasets/bench_2k/images/",
|
|
208
|
+
help="Directory containing JPEG images",
|
|
209
|
+
)
|
|
210
|
+
parser.add_argument("--batch-size", "-b", type=int, default=32, help="Batch size (default: 32)")
|
|
211
|
+
parser.add_argument(
|
|
212
|
+
"--num-workers", "-w", type=int, default=8, help="Number of worker processes (default: 8)"
|
|
213
|
+
)
|
|
214
|
+
parser.add_argument("--epochs", "-e", type=int, default=3, help="Number of epochs (default: 3)")
|
|
215
|
+
parser.add_argument("--output", "-o", type=str, help="Output JSON file for results")
|
|
214
216
|
|
|
215
217
|
args = parser.parse_args()
|
|
216
218
|
|
|
@@ -219,15 +221,15 @@ def main():
|
|
|
219
221
|
image_dir=args.image_dir,
|
|
220
222
|
batch_size=args.batch_size,
|
|
221
223
|
num_workers=args.num_workers,
|
|
222
|
-
num_epochs=args.epochs
|
|
224
|
+
num_epochs=args.epochs,
|
|
223
225
|
)
|
|
224
226
|
|
|
225
227
|
# Save results if requested
|
|
226
228
|
if args.output:
|
|
227
|
-
with open(args.output,
|
|
229
|
+
with open(args.output, "w") as f:
|
|
228
230
|
json.dump(results, f, indent=2)
|
|
229
231
|
print(f"\nResults saved to {args.output}")
|
|
230
232
|
|
|
231
233
|
|
|
232
|
-
if __name__ ==
|
|
234
|
+
if __name__ == "__main__":
|
|
233
235
|
main()
|