soundhub-utils 0.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- soundhub_utils-0.0.2/LICENSE.md +28 -0
- soundhub_utils-0.0.2/PKG-INFO +138 -0
- soundhub_utils-0.0.2/README.md +128 -0
- soundhub_utils-0.0.2/pyproject.toml +66 -0
- soundhub_utils-0.0.2/setup.cfg +8 -0
- soundhub_utils-0.0.2/soundhub_utils/__init__.py +7 -0
- soundhub_utils-0.0.2/soundhub_utils/audio/__init__.py +1 -0
- soundhub_utils-0.0.2/soundhub_utils/audio/_shared.py +1032 -0
- soundhub_utils-0.0.2/soundhub_utils/audio/audio.py +221 -0
- soundhub_utils-0.0.2/soundhub_utils/audio/librosa_audio.py +379 -0
- soundhub_utils-0.0.2/soundhub_utils/audio/pytorch_audio.py +412 -0
- soundhub_utils-0.0.2/soundhub_utils/audio/tensorflow_audio.py +664 -0
- soundhub_utils-0.0.2/soundhub_utils/constants.py +10 -0
- soundhub_utils-0.0.2/soundhub_utils/inference/__init__.py +1 -0
- soundhub_utils-0.0.2/soundhub_utils/inference/audio_to_spectrogram_model.py +229 -0
- soundhub_utils-0.0.2/soundhub_utils/io/__init__.py +7 -0
- soundhub_utils-0.0.2/soundhub_utils/io/_shared.py +128 -0
- soundhub_utils-0.0.2/soundhub_utils/io/aws.py +259 -0
- soundhub_utils-0.0.2/soundhub_utils/io/gcs.py +302 -0
- soundhub_utils-0.0.2/soundhub_utils/io/io.py +225 -0
- soundhub_utils-0.0.2/soundhub_utils/io/local.py +210 -0
- soundhub_utils-0.0.2/soundhub_utils/io/url.py +293 -0
- soundhub_utils-0.0.2/soundhub_utils/names.py +52 -0
- soundhub_utils-0.0.2/soundhub_utils/utils/__init__.py +7 -0
- soundhub_utils-0.0.2/soundhub_utils/utils/flac.py +253 -0
- soundhub_utils-0.0.2/soundhub_utils/utils/lbrsa_pydub_audio.py +407 -0
- soundhub_utils-0.0.2/soundhub_utils.egg-info/PKG-INFO +138 -0
- soundhub_utils-0.0.2/soundhub_utils.egg-info/SOURCES.txt +30 -0
- soundhub_utils-0.0.2/soundhub_utils.egg-info/dependency_links.txt +1 -0
- soundhub_utils-0.0.2/soundhub_utils.egg-info/requires.txt +3 -0
- soundhub_utils-0.0.2/soundhub_utils.egg-info/top_level.txt +7 -0
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
BSD 3-Clause License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025, Regents of the University of California (Schmidt Center for Data Science and Environment at UC Berkeley)
|
|
4
|
+
|
|
5
|
+
Redistribution and use in source and binary forms, with or without
|
|
6
|
+
modification, are permitted provided that the following conditions are met:
|
|
7
|
+
|
|
8
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
9
|
+
list of conditions and the following disclaimer.
|
|
10
|
+
|
|
11
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
12
|
+
this list of conditions and the following disclaimer in the documentation
|
|
13
|
+
and/or other materials provided with the distribution.
|
|
14
|
+
|
|
15
|
+
3. Neither the name of the copyright holder nor the names of its
|
|
16
|
+
contributors may be used to endorse or promote products derived from
|
|
17
|
+
this software without specific prior written permission.
|
|
18
|
+
|
|
19
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
20
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
21
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
22
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
23
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
24
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
25
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
26
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
27
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
28
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: soundhub_utils
|
|
3
|
+
Version: 0.0.2
|
|
4
|
+
Description-Content-Type: text/markdown
|
|
5
|
+
License-File: LICENSE.md
|
|
6
|
+
Requires-Dist: tf_keras
|
|
7
|
+
Requires-Dist: cocina
|
|
8
|
+
Requires-Dist: sox_tensorflow
|
|
9
|
+
Dynamic: license-file
|
|
10
|
+
|
|
11
|
+
# Soundhub Utils
|
|
12
|
+
|
|
13
|
+
Audio processing utilities library for SoundHub model integration. This package provides core functionality for:
|
|
14
|
+
|
|
15
|
+
1. Reading audio files from AWS S3, Google Cloud Storage, HTTPS URLs, and local filesystems
|
|
16
|
+
2. Standard audio preprocessing (resampling, segmentation, format conversion)
|
|
17
|
+
3. Spectrogram generation using multiple backends (TensorFlow, PyTorch, librosa)
|
|
18
|
+
4. Unified I/O interface across different storage platforms
|
|
19
|
+
|
|
20
|
+
**Note**: This is a utilities library. For running models (like OWL), see [`soundhub_model_runner`](https://github.com/your-org/soundhub_model_runner).
|
|
21
|
+
|
|
22
|
+
## Table of Contents
|
|
23
|
+
|
|
24
|
+
- [Installation](#installation)
|
|
25
|
+
- [Core Modules](#core-modules)
|
|
26
|
+
- [Usage Examples](#usage-examples)
|
|
27
|
+
- [Style Guide](#style-guide)
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## Installation
|
|
32
|
+
|
|
33
|
+
### Using Pixi (Recommended for Development)
|
|
34
|
+
|
|
35
|
+
Requirements are managed through [Pixi](https://pixi.sh/latest). Install pixi, then:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
# Run commands in the pixi environment
|
|
39
|
+
pixi run python -c "import soundhub_utils; print(soundhub_utils.__version__)"
|
|
40
|
+
|
|
41
|
+
# Launch jupyter for development
|
|
42
|
+
pixi run jupyter lab .
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
The `pyproject.toml` includes `soundhub_utils = { path = ".", editable = true }`, so no separate installation is needed.
|
|
46
|
+
|
|
47
|
+
### Using pip
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
# Install from local directory
|
|
51
|
+
pip install -e .
|
|
52
|
+
|
|
53
|
+
# Or install specific version (when published to PyPI)
|
|
54
|
+
pip install soundhub-utils
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
## Core Modules
|
|
60
|
+
|
|
61
|
+
### I/O (soundhub_utils.io)
|
|
62
|
+
|
|
63
|
+
Unified interface for reading/writing audio files across multiple storage platforms:
|
|
64
|
+
|
|
65
|
+
- **`soundhub_utils.io.aws`**: AWS S3 integration with partial FLAC download support
|
|
66
|
+
- **`soundhub_utils.io.gcs`**: Google Cloud Storage operations
|
|
67
|
+
- **`soundhub_utils.io.local`**: Local filesystem operations
|
|
68
|
+
- **`soundhub_utils.io.url`**: HTTP/HTTPS streaming and downloads
|
|
69
|
+
- **`soundhub_utils.io.io`**: Unified interface that auto-routes based on URI scheme
|
|
70
|
+
|
|
71
|
+
### Audio Processing (soundhub_utils.audio)
|
|
72
|
+
|
|
73
|
+
Audio processing backends for spectrogram generation and preprocessing:
|
|
74
|
+
|
|
75
|
+
- **`soundhub_utils.audio.tensorflow_audio`**: TensorFlow-native audio processing
|
|
76
|
+
|
|
77
|
+
### Utilities (soundhub_utils.utils)
|
|
78
|
+
|
|
79
|
+
Helper functions for audio processing:
|
|
80
|
+
|
|
81
|
+
- **`soundhub_utils.utils.audio`**: Format conversion and validation
|
|
82
|
+
- **`soundhub_utils.utils.flac`**: FLAC metadata extraction and time-range processing
|
|
83
|
+
- **`soundhub_utils.names`**: File naming conventions for audio and spectrograms
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
|
|
87
|
+
## Usage Examples
|
|
88
|
+
|
|
89
|
+
### Reading Audio from Different Sources
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
from soundhub_utils.io import io
|
|
93
|
+
|
|
94
|
+
# Read from S3
|
|
95
|
+
audio_data = io.read_flac("s3://bucket/path/audio.flac")
|
|
96
|
+
|
|
97
|
+
# Read from Google Cloud Storage
|
|
98
|
+
audio_data = io.read_flac("gs://bucket/path/audio.flac")
|
|
99
|
+
|
|
100
|
+
# Read from HTTPS URL
|
|
101
|
+
audio_data = io.read_flac("https://example.com/audio.flac")
|
|
102
|
+
|
|
103
|
+
# Read from local file
|
|
104
|
+
audio_data = io.read_flac("/path/to/audio.flac")
|
|
105
|
+
|
|
106
|
+
# Read partial FLAC (time range)
|
|
107
|
+
audio_data = io.read_partial_flac(
|
|
108
|
+
"s3://bucket/audio.flac",
|
|
109
|
+
start_time=10.0, # seconds
|
|
110
|
+
duration=30.0 # seconds
|
|
111
|
+
)
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### Generating Spectrograms
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
from soundhub_utils.audio import tensorflow_audio
|
|
118
|
+
|
|
119
|
+
# Generate spectrograms from audio file
|
|
120
|
+
spectrograms = tensorflow_audio.generate_spectrograms(
|
|
121
|
+
audio_path="/path/to/audio.flac",
|
|
122
|
+
sample_rate=8000,
|
|
123
|
+
segment_duration=12.0,
|
|
124
|
+
spectrogram_shape=[257, 1000]
|
|
125
|
+
)
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
---
|
|
129
|
+
|
|
130
|
+
## Style Guide
|
|
131
|
+
|
|
132
|
+
Following PEP8. See [setup.cfg](./setup.cfg) for exceptions. Use `pycodestyle .` to check compliance.
|
|
133
|
+
|
|
134
|
+
---
|
|
135
|
+
|
|
136
|
+
## License
|
|
137
|
+
|
|
138
|
+
BSD 3-Clause
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# Soundhub Utils
|
|
2
|
+
|
|
3
|
+
Audio processing utilities library for SoundHub model integration. This package provides core functionality for:
|
|
4
|
+
|
|
5
|
+
1. Reading audio files from AWS S3, Google Cloud Storage, HTTPS URLs, and local filesystems
|
|
6
|
+
2. Standard audio preprocessing (resampling, segmentation, format conversion)
|
|
7
|
+
3. Spectrogram generation using multiple backends (TensorFlow, PyTorch, librosa)
|
|
8
|
+
4. Unified I/O interface across different storage platforms
|
|
9
|
+
|
|
10
|
+
**Note**: This is a utilities library. For running models (like OWL), see [`soundhub_model_runner`](https://github.com/your-org/soundhub_model_runner).
|
|
11
|
+
|
|
12
|
+
## Table of Contents
|
|
13
|
+
|
|
14
|
+
- [Installation](#installation)
|
|
15
|
+
- [Core Modules](#core-modules)
|
|
16
|
+
- [Usage Examples](#usage-examples)
|
|
17
|
+
- [Style Guide](#style-guide)
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## Installation
|
|
22
|
+
|
|
23
|
+
### Using Pixi (Recommended for Development)
|
|
24
|
+
|
|
25
|
+
Requirements are managed through [Pixi](https://pixi.sh/latest). Install pixi, then:
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
# Run commands in the pixi environment
|
|
29
|
+
pixi run python -c "import soundhub_utils; print(soundhub_utils.__version__)"
|
|
30
|
+
|
|
31
|
+
# Launch jupyter for development
|
|
32
|
+
pixi run jupyter lab .
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
The `pyproject.toml` includes `soundhub_utils = { path = ".", editable = true }`, so no separate installation is needed.
|
|
36
|
+
|
|
37
|
+
### Using pip
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
# Install from local directory
|
|
41
|
+
pip install -e .
|
|
42
|
+
|
|
43
|
+
# Or install specific version (when published to PyPI)
|
|
44
|
+
pip install soundhub-utils
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
## Core Modules
|
|
50
|
+
|
|
51
|
+
### I/O (soundhub_utils.io)
|
|
52
|
+
|
|
53
|
+
Unified interface for reading/writing audio files across multiple storage platforms:
|
|
54
|
+
|
|
55
|
+
- **`soundhub_utils.io.aws`**: AWS S3 integration with partial FLAC download support
|
|
56
|
+
- **`soundhub_utils.io.gcs`**: Google Cloud Storage operations
|
|
57
|
+
- **`soundhub_utils.io.local`**: Local filesystem operations
|
|
58
|
+
- **`soundhub_utils.io.url`**: HTTP/HTTPS streaming and downloads
|
|
59
|
+
- **`soundhub_utils.io.io`**: Unified interface that auto-routes based on URI scheme
|
|
60
|
+
|
|
61
|
+
### Audio Processing (soundhub_utils.audio)
|
|
62
|
+
|
|
63
|
+
Audio processing backends for spectrogram generation and preprocessing:
|
|
64
|
+
|
|
65
|
+
- **`soundhub_utils.audio.tensorflow_audio`**: TensorFlow-native audio processing
|
|
66
|
+
|
|
67
|
+
### Utilities (soundhub_utils.utils)
|
|
68
|
+
|
|
69
|
+
Helper functions for audio processing:
|
|
70
|
+
|
|
71
|
+
- **`soundhub_utils.utils.audio`**: Format conversion and validation
|
|
72
|
+
- **`soundhub_utils.utils.flac`**: FLAC metadata extraction and time-range processing
|
|
73
|
+
- **`soundhub_utils.names`**: File naming conventions for audio and spectrograms
|
|
74
|
+
|
|
75
|
+
---
|
|
76
|
+
|
|
77
|
+
## Usage Examples
|
|
78
|
+
|
|
79
|
+
### Reading Audio from Different Sources
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
from soundhub_utils.io import io
|
|
83
|
+
|
|
84
|
+
# Read from S3
|
|
85
|
+
audio_data = io.read_flac("s3://bucket/path/audio.flac")
|
|
86
|
+
|
|
87
|
+
# Read from Google Cloud Storage
|
|
88
|
+
audio_data = io.read_flac("gs://bucket/path/audio.flac")
|
|
89
|
+
|
|
90
|
+
# Read from HTTPS URL
|
|
91
|
+
audio_data = io.read_flac("https://example.com/audio.flac")
|
|
92
|
+
|
|
93
|
+
# Read from local file
|
|
94
|
+
audio_data = io.read_flac("/path/to/audio.flac")
|
|
95
|
+
|
|
96
|
+
# Read partial FLAC (time range)
|
|
97
|
+
audio_data = io.read_partial_flac(
|
|
98
|
+
"s3://bucket/audio.flac",
|
|
99
|
+
start_time=10.0, # seconds
|
|
100
|
+
duration=30.0 # seconds
|
|
101
|
+
)
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### Generating Spectrograms
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
from soundhub_utils.audio import tensorflow_audio
|
|
108
|
+
|
|
109
|
+
# Generate spectrograms from audio file
|
|
110
|
+
spectrograms = tensorflow_audio.generate_spectrograms(
|
|
111
|
+
audio_path="/path/to/audio.flac",
|
|
112
|
+
sample_rate=8000,
|
|
113
|
+
segment_duration=12.0,
|
|
114
|
+
spectrogram_shape=[257, 1000]
|
|
115
|
+
)
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
120
|
+
## Style Guide
|
|
121
|
+
|
|
122
|
+
Following PEP8. See [setup.cfg](./setup.cfg) for exceptions. Use `pycodestyle .` to check compliance.
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
## License
|
|
127
|
+
|
|
128
|
+
BSD 3-Clause
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "soundhub_utils"
|
|
7
|
+
version = "0.0.2"
|
|
8
|
+
readme = "README.md"
|
|
9
|
+
dependencies = [
|
|
10
|
+
'tf_keras',
|
|
11
|
+
'cocina',
|
|
12
|
+
'sox_tensorflow']
|
|
13
|
+
|
|
14
|
+
[tool.setuptools]
|
|
15
|
+
packages = { find = {} } # Auto-discover all packages including subpackages
|
|
16
|
+
|
|
17
|
+
[tool.pixi.project]
|
|
18
|
+
channels = ["conda-forge", "pytorch"]
|
|
19
|
+
platforms = ["osx-arm64", "linux-64", "osx-64", "linux-aarch64"]
|
|
20
|
+
|
|
21
|
+
[tool.pixi.pypi-dependencies]
|
|
22
|
+
soundhub_utils = { path = ".", editable = true }
|
|
23
|
+
|
|
24
|
+
# Feature for local cocina development
|
|
25
|
+
[tool.pixi.feature.local-cocina.pypi-dependencies]
|
|
26
|
+
cocina = { path = "/Users/brookie/code/dse/cocina/cocina", editable = true }
|
|
27
|
+
sox_tensorflow = { path = "/Users/brookie/code/dse/soundhub/sox_gpu/codebase/sox_tensorflow", editable = true }
|
|
28
|
+
|
|
29
|
+
[tool.pixi.dependencies]
|
|
30
|
+
pycodestyle = ">=2.14.0,<3"
|
|
31
|
+
ipykernel = ">=7.1.0,<8"
|
|
32
|
+
jupyterlab = ">=4.4.10,<5"
|
|
33
|
+
numpy = ">=2.3.4,<3"
|
|
34
|
+
pandas = ">=2.3.3,<3"
|
|
35
|
+
pydub = ">=0.25.1,<0.26"
|
|
36
|
+
boto3 = ">=1.40.65,<2"
|
|
37
|
+
audioop-lts = ">=0.2.2,<0.3"
|
|
38
|
+
pysoundfile = ">=0.13.1,<0.14"
|
|
39
|
+
matplotlib = ">=3.10.7,<4"
|
|
40
|
+
scipy = ">=1.16.3,<2"
|
|
41
|
+
google-cloud-storage = ">=3.4.1,<4"
|
|
42
|
+
librosa = ">=0.11.0,<0.12"
|
|
43
|
+
typing_extensions = ">=4.15.0,<5"
|
|
44
|
+
referencing = ">=0.37.0,<0.38"
|
|
45
|
+
pillow = ">=12.0.0,<13"
|
|
46
|
+
pyarrow = ">=19.0.1,<23"
|
|
47
|
+
tensorflow = ">=2.18.0,<3"
|
|
48
|
+
pytorch = ">=2.6.0,<3"
|
|
49
|
+
torchaudio = ">=2.7.0,<3"
|
|
50
|
+
duckdb = ">=1.4.1,<2"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# Environments
|
|
54
|
+
[tool.pixi.environments]
|
|
55
|
+
default = { features = [] }
|
|
56
|
+
local = { features = ["local-cocina"] }
|
|
57
|
+
dev = { features = ["dev"] }
|
|
58
|
+
|
|
59
|
+
[tool.pixi.feature.dev.dependencies]
|
|
60
|
+
twine = "*"
|
|
61
|
+
|
|
62
|
+
[tool.pixi.feature.dev.pypi-dependencies]
|
|
63
|
+
build = "*"
|
|
64
|
+
h5py = ">=3.15.1,<4"
|
|
65
|
+
|
|
66
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# __init__.py
|