sumit-sdk 1.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sumit_sdk-1.2.4/.gitignore +10 -0
- sumit_sdk-1.2.4/CHANGELOG.md +0 -0
- sumit_sdk-1.2.4/LICENSE +13 -0
- sumit_sdk-1.2.4/PKG-INFO +67 -0
- sumit_sdk-1.2.4/README.md +37 -0
- sumit_sdk-1.2.4/pyproject.toml +47 -0
- sumit_sdk-1.2.4/requirements.txt +5 -0
- sumit_sdk-1.2.4/samples/local_file_transcript_curl.sh +60 -0
- sumit_sdk-1.2.4/samples/realtime_transcript.py +42 -0
- sumit_sdk-1.2.4/samples/realtime_transcript_flask.py +69 -0
- sumit_sdk-1.2.4/samples/realtime_transcript_from_file.py +73 -0
- sumit_sdk-1.2.4/samples/realtime_transcript_gui.py +86 -0
- sumit_sdk-1.2.4/samples/storage_test.py +37 -0
- sumit_sdk-1.2.4/samples/stream_local_file_loopback.sh +5 -0
- sumit_sdk-1.2.4/samples/stream_stt.py +46 -0
- sumit_sdk-1.2.4/samples/streamlink_transcript.py +35 -0
- sumit_sdk-1.2.4/samples/summary_api.py +71 -0
- sumit_sdk-1.2.4/samples/transcript_from_storage.py +39 -0
- sumit_sdk-1.2.4/samples/translate_api.py +50 -0
- sumit_sdk-1.2.4/samples/translate_subtitles.py +21 -0
- sumit_sdk-1.2.4/samples/usage_report_example.py +18 -0
- sumit_sdk-1.2.4/samples/webui/index.html +55 -0
- sumit_sdk-1.2.4/samples/webui/index_old.html +37 -0
- sumit_sdk-1.2.4/samples/webui/transcript.html +5 -0
- sumit_sdk-1.2.4/setup.cfg +7 -0
- sumit_sdk-1.2.4/setup.py +49 -0
- sumit_sdk-1.2.4/sumit_sdk/__init__.py +0 -0
- sumit_sdk-1.2.4/sumit_sdk/api.py +41 -0
- sumit_sdk-1.2.4/sumit_sdk/api_helper.py +109 -0
- sumit_sdk-1.2.4/sumit_sdk/base_task.py +89 -0
- sumit_sdk-1.2.4/sumit_sdk/exceptions.py +0 -0
- sumit_sdk-1.2.4/sumit_sdk/realtime_stt.py +181 -0
- sumit_sdk-1.2.4/sumit_sdk/storage.py +278 -0
- sumit_sdk-1.2.4/sumit_sdk/stream_stt.py +214 -0
- sumit_sdk-1.2.4/sumit_sdk/sumit_reckit.py +84 -0
- sumit_sdk-1.2.4/sumit_sdk/summary_api.py +45 -0
- sumit_sdk-1.2.4/sumit_sdk/transcript.py +104 -0
- sumit_sdk-1.2.4/sumit_sdk/translate_api.py +49 -0
- sumit_sdk-1.2.4/sumit_sdk/translate_subtitles.py +52 -0
- sumit_sdk-1.2.4/sumit_sdk/users.py +26 -0
- sumit_sdk-1.2.4/sumit_sdk/utils/__init__.py +0 -0
- sumit_sdk-1.2.4/sumit_sdk/utils/alsa_recorder.py +74 -0
- sumit_sdk-1.2.4/sumit_sdk/utils/audio_helper.py +98 -0
- sumit_sdk-1.2.4/sumit_sdk/utils/downsample_helper.py +29 -0
- sumit_sdk-1.2.4/sumit_sdk/utils/socketio_client.py +45 -0
- sumit_sdk-1.2.4/sumit_sdk/utils/streamlink_helper.py +130 -0
- sumit_sdk-1.2.4/sumit_sdk/utils.py +0 -0
- sumit_sdk-1.2.4/sumit_sdk.egg-info/PKG-INFO +67 -0
- sumit_sdk-1.2.4/sumit_sdk.egg-info/SOURCES.txt +56 -0
- sumit_sdk-1.2.4/sumit_sdk.egg-info/dependency_links.txt +1 -0
- sumit_sdk-1.2.4/sumit_sdk.egg-info/requires.txt +9 -0
- sumit_sdk-1.2.4/sumit_sdk.egg-info/top_level.txt +1 -0
- sumit_sdk-1.2.4/test_docker/Dockerfile +7 -0
- sumit_sdk-1.2.4/test_docker/build.sh +2 -0
- sumit_sdk-1.2.4/test_docker/onprem_stt.py +49 -0
- sumit_sdk-1.2.4/test_docker/run.sh +1 -0
- sumit_sdk-1.2.4/tests/__init__.py +0 -0
|
File without changes
|
sumit_sdk-1.2.4/LICENSE
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
BSD-3-Clause License
|
|
2
|
+
|
|
3
|
+
Copyright 2025 Sumit-X
|
|
4
|
+
|
|
5
|
+
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
|
6
|
+
|
|
7
|
+
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
|
8
|
+
|
|
9
|
+
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
|
|
10
|
+
|
|
11
|
+
3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
|
|
12
|
+
|
|
13
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
sumit_sdk-1.2.4/PKG-INFO
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sumit_sdk
|
|
3
|
+
Version: 1.2.4
|
|
4
|
+
Summary: SDK to communicate with sumit API
|
|
5
|
+
Author-email: Sumit-AI <shlomi@sumit-ai.com>
|
|
6
|
+
License-Expression: BSD-3-Clause
|
|
7
|
+
Project-URL: Homepage, https://www.sumit-x.com
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
18
|
+
Requires-Python: >=3.8
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Requires-Dist: requests>=2.25.0
|
|
22
|
+
Requires-Dist: websocket-client>=1.5.0
|
|
23
|
+
Requires-Dist: retry>=0.9.2
|
|
24
|
+
Requires-Dist: python-socketio>=5.10.0
|
|
25
|
+
Requires-Dist: numpy<2.0.0,>=1.24.0
|
|
26
|
+
Provides-Extra: audio
|
|
27
|
+
Requires-Dist: PyAudio; extra == "audio"
|
|
28
|
+
Requires-Dist: pyalsaaudio; extra == "audio"
|
|
29
|
+
Dynamic: license-file
|
|
30
|
+
|
|
31
|
+
# Sumit-sdk
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
## Getting started
|
|
36
|
+
|
|
37
|
+
## Description
|
|
38
|
+
|
|
39
|
+
Sumit SDK is a Python package that allows users to interact with Sumit-AI API. This document describes how to install the SDK from a zip file and how to use its main features.
|
|
40
|
+
|
|
41
|
+
## Installation
|
|
42
|
+
|
|
43
|
+
### Step 1: Download the SDK
|
|
44
|
+
|
|
45
|
+
1. Navigate to the "Tags" section of the GitLab repository.
|
|
46
|
+
2. Find the desired version of the SDK and click on it.
|
|
47
|
+
3. Click on the "Download" button and select "ZIP" to download a zip file of the SDK.
|
|
48
|
+
|
|
49
|
+
### Step 2: Extract the ZIP File
|
|
50
|
+
|
|
51
|
+
Extract the downloaded zip file to a location of your choice. You can do this by right-clicking the zip file and selecting "Extract All..." (the exact option might vary depending on your operating system).
|
|
52
|
+
|
|
53
|
+
### Step 3: Install the SDK
|
|
54
|
+
|
|
55
|
+
Open a terminal and navigate to the directory where the zip file was extracted. The SDK can be installed using _pip_ by running the following command:
|
|
56
|
+
|
|
57
|
+
```sh
|
|
58
|
+
cd path/to/extracted/folder
|
|
59
|
+
pip install .
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
_depracted:
|
|
63
|
+
using `setup.py` installation with: `python setup.py install` is depracted and not supported in new python versions_
|
|
64
|
+
|
|
65
|
+
#### Install on Windows
|
|
66
|
+
installation of: PyAudio, pyalsaaudio may failed in Windows. In this case the installation of all required library will failed. So in this case please install the required packages via pip:
|
|
67
|
+
```pip install -r requirements.txt```
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Sumit-sdk
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
## Getting started
|
|
6
|
+
|
|
7
|
+
## Description
|
|
8
|
+
|
|
9
|
+
Sumit SDK is a Python package that allows users to interact with Sumit-AI API. This document describes how to install the SDK from a zip file and how to use its main features.
|
|
10
|
+
|
|
11
|
+
## Installation
|
|
12
|
+
|
|
13
|
+
### Step 1: Download the SDK
|
|
14
|
+
|
|
15
|
+
1. Navigate to the "Tags" section of the GitLab repository.
|
|
16
|
+
2. Find the desired version of the SDK and click on it.
|
|
17
|
+
3. Click on the "Download" button and select "ZIP" to download a zip file of the SDK.
|
|
18
|
+
|
|
19
|
+
### Step 2: Extract the ZIP File
|
|
20
|
+
|
|
21
|
+
Extract the downloaded zip file to a location of your choice. You can do this by right-clicking the zip file and selecting "Extract All..." (the exact option might vary depending on your operating system).
|
|
22
|
+
|
|
23
|
+
### Step 3: Install the SDK
|
|
24
|
+
|
|
25
|
+
Open a terminal and navigate to the directory where the zip file was extracted. The SDK can be installed using _pip_ by running the following command:
|
|
26
|
+
|
|
27
|
+
```sh
|
|
28
|
+
cd path/to/extracted/folder
|
|
29
|
+
pip install .
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
_depracted:
|
|
33
|
+
using `setup.py` installation with: `python setup.py install` is depracted and not supported in new python versions_
|
|
34
|
+
|
|
35
|
+
#### Install on Windows
|
|
36
|
+
installation of: PyAudio, pyalsaaudio may failed in Windows. In this case the installation of all required library will failed. So in this case please install the required packages via pip:
|
|
37
|
+
```pip install -r requirements.txt```
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "setuptools_scm"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "sumit_sdk"
|
|
7
|
+
dynamic = ["version"]
|
|
8
|
+
authors = [
|
|
9
|
+
{ name="Sumit-AI", email="shlomi@sumit-ai.com" },
|
|
10
|
+
]
|
|
11
|
+
description = "SDK to communicate with sumit API"
|
|
12
|
+
readme = "README.md"
|
|
13
|
+
requires-python = ">=3.8"
|
|
14
|
+
license = "BSD-3-Clause"
|
|
15
|
+
license-files = ["LICENSE"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 3 - Alpha",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"Programming Language :: Python :: 3",
|
|
20
|
+
"Programming Language :: Python :: 3.8",
|
|
21
|
+
"Programming Language :: Python :: 3.9",
|
|
22
|
+
"Programming Language :: Python :: 3.10",
|
|
23
|
+
"Programming Language :: Python :: 3.11",
|
|
24
|
+
"Programming Language :: Python :: 3.12",
|
|
25
|
+
"Programming Language :: Python :: 3.13",
|
|
26
|
+
"Programming Language :: Python :: 3.14",
|
|
27
|
+
]
|
|
28
|
+
dependencies = [
|
|
29
|
+
"requests>=2.25.0",
|
|
30
|
+
"websocket-client>=1.5.0",
|
|
31
|
+
"retry>=0.9.2",
|
|
32
|
+
"python-socketio>=5.10.0",
|
|
33
|
+
"numpy>=1.24.0,<2.0.0",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
[project.urls]
|
|
37
|
+
Homepage = "https://www.sumit-x.com"
|
|
38
|
+
|
|
39
|
+
[project.optional-dependencies]
|
|
40
|
+
audio = ["PyAudio", "pyalsaaudio"]
|
|
41
|
+
|
|
42
|
+
[tool.setuptools]
|
|
43
|
+
packages = ["sumit_sdk"] # Or use find:
|
|
44
|
+
# packages = { find = {} }
|
|
45
|
+
|
|
46
|
+
[tool.setuptools_scm]
|
|
47
|
+
local_scheme = "no-local-version"
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# API endpoints:
|
|
2
|
+
api_url=https://api.sumit-labs.com
|
|
3
|
+
login_ep=login
|
|
4
|
+
upload_ep=storage/upload
|
|
5
|
+
download_ep=storage/download
|
|
6
|
+
transcript=v3/transcript
|
|
7
|
+
transcript_status=get_status
|
|
8
|
+
|
|
9
|
+
credential_file=api-sa.json # your API credentials file
|
|
10
|
+
|
|
11
|
+
# files:
|
|
12
|
+
local_file=test.wav
|
|
13
|
+
remote_file=test.wav
|
|
14
|
+
transcript_file=test.json
|
|
15
|
+
flat_transcript_file=test_flat.json
|
|
16
|
+
language="he-IL"
|
|
17
|
+
|
|
18
|
+
# STEP 1:
|
|
19
|
+
|
|
20
|
+
# login and get token
|
|
21
|
+
resp=$(curl -X POST -H "Content-Type: application/json" -d @$credential_file $api_url/$login_ep)
|
|
22
|
+
# return:
|
|
23
|
+
# {"request": {"company": "xxx", "domain": "xxx", "email": "xxx", "user": "xxx"}, "success": true, "token": "YOUR.SECRET.TOKEN"}
|
|
24
|
+
# take the token from the response
|
|
25
|
+
token=$(echo $resp | jq ".token" | sed s/\"//g)
|
|
26
|
+
headers="Authorization: Bearer $token"
|
|
27
|
+
|
|
28
|
+
# get upload link
|
|
29
|
+
payload="{\"filename\": \"$remote_file\"}"
|
|
30
|
+
resp=$(curl -X POST -H "Content-Type: application/json" -H "$headers" -d "$payload" $api_url/$upload_ep)
|
|
31
|
+
# return:
|
|
32
|
+
# {"request": {"filename": "test.wav"}, "signed_url": "https://xxxxx/....d267dd0c", "success": true}
|
|
33
|
+
# take the url
|
|
34
|
+
dest_url=$(echo $resp | jq ".signed_url" | sed s/\"//g)
|
|
35
|
+
|
|
36
|
+
# upload - "--upload-file" is PUT request for a binary file
|
|
37
|
+
curl $dest_url --upload-file $local_file
|
|
38
|
+
|
|
39
|
+
# send to transcript:
|
|
40
|
+
payload="{\"path\": \"$remote_file\", \"output_path\": \"$transcript_file\", \"lang\": \"$language\", \"model\": \"he_gen_v2\", \"flat_output_path\": \"$flat_transcript_file\"}"
|
|
41
|
+
resp=$(curl -X POST -H "Content-Type: application/json" -H "$headers" -d "$payload" $api_url/$transcript)
|
|
42
|
+
echo $resp
|
|
43
|
+
task_id=$(echo $resp | jq ".response.job_id" | sed s/\"//g)
|
|
44
|
+
echo "follow task_id: $task_id"
|
|
45
|
+
|
|
46
|
+
# STEP 2:
|
|
47
|
+
|
|
48
|
+
# check for status...
|
|
49
|
+
payload="{\"id\": \"$task_id\"}"
|
|
50
|
+
resp=$(curl -X POST -H "Content-Type: application/json" -H "$headers" -d "$payload" $api_url/$transcript_status)
|
|
51
|
+
echo $resp
|
|
52
|
+
|
|
53
|
+
# STEP 3:
|
|
54
|
+
|
|
55
|
+
# download transcription:
|
|
56
|
+
# get download link
|
|
57
|
+
payload="{\"filename\": \"$transcript_file\"}" # replace to $flat_transcript_file for the flat version of the transcript
|
|
58
|
+
resp=$(curl -X POST -H "Content-Type: application/json" -H "$headers" -d "$payload" $api_url/$download_ep)
|
|
59
|
+
src_url=$(echo $resp | jq ".url" | sed s/\"//g)
|
|
60
|
+
curl -o $transcript_file $src_url
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from sumit_sdk.api import APIClient
|
|
3
|
+
from sumit_sdk.realtime_stt import RealtimeSTT
|
|
4
|
+
from sumit_sdk.realtime_stt import Profiles, VadProfile
|
|
5
|
+
from sumit_sdk.realtime_stt import BufferMode
|
|
6
|
+
from sumit_sdk.utils.audio_helper import Recorder # helper class to async record from microphone
|
|
7
|
+
|
|
8
|
+
# initialize API
|
|
9
|
+
api = APIClient("api-sa.json") # create client
|
|
10
|
+
rt_mgr = RealtimeSTT(api) # create realtime manager
|
|
11
|
+
|
|
12
|
+
# start session
|
|
13
|
+
def callback(data):
|
|
14
|
+
print('\u202b' + data['txt'] + '\u202c' ) # Reverse for proper view of Hebrew in terminal.
|
|
15
|
+
|
|
16
|
+
def write_segments_callbcak(data):
|
|
17
|
+
with open('out.txt', 'a') as fd:
|
|
18
|
+
fd.write(f'{time.time()}\t\t{data['txt']}\n')
|
|
19
|
+
print('\u202b' + data['txt'][::-1] + '\u202c' )
|
|
20
|
+
|
|
21
|
+
rt_mgr.start_session(callback, profile=Profiles.accurate, vad_profile=VadProfile.low, buffer_mode=BufferMode.default)
|
|
22
|
+
|
|
23
|
+
sock = rt_mgr.connect()
|
|
24
|
+
|
|
25
|
+
# create recorder
|
|
26
|
+
rec = Recorder(as_base64=True, buffer_sec=2.5) # encode samples as base64, to send the chunks over web-socket
|
|
27
|
+
rec.start()
|
|
28
|
+
stop_sig = False
|
|
29
|
+
while sock.connected and not stop_sig:
|
|
30
|
+
try:
|
|
31
|
+
data = rec.safe_get()
|
|
32
|
+
if data is None:
|
|
33
|
+
time.sleep(0.1)
|
|
34
|
+
continue
|
|
35
|
+
rt_mgr.send(sock, data)
|
|
36
|
+
except KeyboardInterrupt:
|
|
37
|
+
stop_sig = True
|
|
38
|
+
print("keyboard interupt. stop streaming")
|
|
39
|
+
|
|
40
|
+
# cleanup
|
|
41
|
+
rt_mgr.stop_session()
|
|
42
|
+
rec.stop()
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from sumit_sdk.api import APIClient
|
|
3
|
+
from sumit_sdk.realtime_stt import RealtimeSTT
|
|
4
|
+
from sumit_sdk.realtime_stt import Profiles, VadProfile
|
|
5
|
+
from sumit_sdk.utils.audio_helper import Recorder # helper class to async record from microphone
|
|
6
|
+
from flask import Flask, request, Response, json, render_template
|
|
7
|
+
from threading import Thread
|
|
8
|
+
from flask_socketio import SocketIO, emit
|
|
9
|
+
import asyncio
|
|
10
|
+
|
|
11
|
+
app = Flask(__name__, template_folder='webui')
|
|
12
|
+
app.config['SECRET_KEY'] = 'secret!'
|
|
13
|
+
socketio = SocketIO(app)
|
|
14
|
+
|
|
15
|
+
# initialize API
|
|
16
|
+
api = APIClient("api-sa.json") # create client
|
|
17
|
+
rt_mgr = RealtimeSTT(api) # create realtime manager
|
|
18
|
+
transcriptions = []
|
|
19
|
+
|
|
20
|
+
@app.route('/', methods=['POST', 'GET'])
|
|
21
|
+
def index(**kwargs):
|
|
22
|
+
return render_template('index.html')
|
|
23
|
+
|
|
24
|
+
@socketio.on('connect')
|
|
25
|
+
def handle_connect():
|
|
26
|
+
print('Client connected')
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@app.route('/transcription')
|
|
30
|
+
def suggestions():
|
|
31
|
+
return render_template('transcript.html', transcriptions=transcriptions)
|
|
32
|
+
|
|
33
|
+
last_two_texts = []
|
|
34
|
+
# start session
|
|
35
|
+
def callback(data):
|
|
36
|
+
# global transcriptions
|
|
37
|
+
print(data['txt'][::-1]) # Reverse for proper view of Hebrew in terminal.
|
|
38
|
+
# transcriptions.append(data['txt'])
|
|
39
|
+
global last_two_texts
|
|
40
|
+
last_two_texts.append(data['txt'])
|
|
41
|
+
last_two_texts = last_two_texts[-2:]
|
|
42
|
+
socketio.emit('update_texts', {'texts': last_two_texts})
|
|
43
|
+
|
|
44
|
+
rt_mgr.start_session(callback, profile=Profiles.default, vad_profile=VadProfile.default)
|
|
45
|
+
|
|
46
|
+
sock = rt_mgr.connect()
|
|
47
|
+
|
|
48
|
+
# create recorder
|
|
49
|
+
rec = Recorder(as_base64=True, buffer_sec=3) # encode samples as base64, to send the chunks over web-socket
|
|
50
|
+
rec.start()
|
|
51
|
+
|
|
52
|
+
Thread(target=lambda: socketio.run(app, host="127.0.0.1", port=5000, debug=False)).start()
|
|
53
|
+
|
|
54
|
+
stop_sig = False
|
|
55
|
+
while sock.connected and not stop_sig:
|
|
56
|
+
try:
|
|
57
|
+
data = rec.safe_get()
|
|
58
|
+
if data is None:
|
|
59
|
+
time.sleep(0.1)
|
|
60
|
+
continue
|
|
61
|
+
rt_mgr.send(sock, data)
|
|
62
|
+
except KeyboardInterrupt:
|
|
63
|
+
stop_sig = True
|
|
64
|
+
print("keyboard interupt. stop streaming")
|
|
65
|
+
|
|
66
|
+
# cleanup
|
|
67
|
+
rt_mgr.stop_session()
|
|
68
|
+
rec.stop()
|
|
69
|
+
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from sumit_sdk.api import APIClient
|
|
3
|
+
from sumit_sdk.realtime_stt import RealtimeSTT
|
|
4
|
+
from sumit_sdk.realtime_stt import Profiles, VadProfile
|
|
5
|
+
from sumit_sdk.realtime_stt import BufferMode
|
|
6
|
+
from sumit_sdk.utils.audio_helper import Recorder # helper class to async record from microphone
|
|
7
|
+
from scipy.io import wavfile as wf
|
|
8
|
+
from threading import Thread, Event
|
|
9
|
+
import sys
|
|
10
|
+
|
|
11
|
+
class FileRecorder(Recorder):
|
|
12
|
+
def __init__(self, filename: str, as_base64=False, buffer_sec: float = 1) -> None:
|
|
13
|
+
super().__init__(as_base64, buffer_sec)
|
|
14
|
+
self.filename = filename
|
|
15
|
+
self.sr, self.data = wf.read(self.filename)
|
|
16
|
+
self.stop_sig = Event()
|
|
17
|
+
self._ix = 0
|
|
18
|
+
self._thread = None
|
|
19
|
+
|
|
20
|
+
def _start(self):
|
|
21
|
+
while not self.stop_sig.is_set():
|
|
22
|
+
time.sleep(self.TIME_BUFFER)
|
|
23
|
+
d = self.data[self._ix:self._ix+self.CHUNK]
|
|
24
|
+
self._fill_buffer(d.tobytes(), None, None, None)
|
|
25
|
+
self._ix += self.CHUNK
|
|
26
|
+
if self._ix >= self.data.shape[0]:
|
|
27
|
+
self.stop()
|
|
28
|
+
|
|
29
|
+
def start(self):
|
|
30
|
+
self.stop_sig.clear()
|
|
31
|
+
self._thread = Thread(target=self._start)
|
|
32
|
+
self._thread.start()
|
|
33
|
+
|
|
34
|
+
def stop(self):
|
|
35
|
+
self.stop_sig.set()
|
|
36
|
+
|
|
37
|
+
# initialize API
|
|
38
|
+
api = APIClient("api-sa.json") # create client
|
|
39
|
+
rt_mgr = RealtimeSTT(api) # create realtime manager
|
|
40
|
+
|
|
41
|
+
# start session
|
|
42
|
+
def callback(data):
|
|
43
|
+
print('\u202b' + data['txt'] + '\u202c' ) # Reverse for proper view of Hebrew in terminal.
|
|
44
|
+
|
|
45
|
+
def write_segments_callbcak(data):
|
|
46
|
+
with open('out.txt', 'a') as fd:
|
|
47
|
+
fd.write(f'{time.time()}\t\t{data['txt']}\n')
|
|
48
|
+
print('\u202b' + data['txt'][::-1] + '\u202c' )
|
|
49
|
+
|
|
50
|
+
rt_mgr.start_session(write_segments_callbcak, profile=Profiles.accurate, vad_profile=VadProfile.low, buffer_mode=BufferMode.default)
|
|
51
|
+
|
|
52
|
+
sock = rt_mgr.connect()
|
|
53
|
+
|
|
54
|
+
# create recorder
|
|
55
|
+
rec = FileRecorder(sys.argv[1], as_base64=True, buffer_sec=2.5) # encode samples as base64, to send the chunks over web-socket
|
|
56
|
+
rec.start()
|
|
57
|
+
stop_sig = False
|
|
58
|
+
with open('out.txt', 'a') as fd:
|
|
59
|
+
fd.write(f'{time.time()}\t\tSTART_NEW_SESSION\n')
|
|
60
|
+
while sock.connected and not stop_sig:
|
|
61
|
+
try:
|
|
62
|
+
data = rec.safe_get()
|
|
63
|
+
if data is None:
|
|
64
|
+
time.sleep(0.1)
|
|
65
|
+
continue
|
|
66
|
+
rt_mgr.send(sock, data)
|
|
67
|
+
except KeyboardInterrupt:
|
|
68
|
+
stop_sig = True
|
|
69
|
+
print("keyboard interupt. stop streaming")
|
|
70
|
+
|
|
71
|
+
# cleanup
|
|
72
|
+
rt_mgr.stop_session()
|
|
73
|
+
rec.stop()
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from sumit_sdk.api import APIClient
|
|
3
|
+
from sumit_sdk.realtime_stt import RealtimeSTT
|
|
4
|
+
from sumit_sdk.utils.audio_helper import Recorder # helper class to async record from microphone
|
|
5
|
+
import gradio as gr # for GUI
|
|
6
|
+
import numpy as np
|
|
7
|
+
import queue
|
|
8
|
+
import base64
|
|
9
|
+
import scipy.signal as sps
|
|
10
|
+
from scipy import signal
|
|
11
|
+
|
|
12
|
+
# initialize API
|
|
13
|
+
api = APIClient("api-sa-prod.json") # create client
|
|
14
|
+
rt_mgr = RealtimeSTT(api) # create realtime manager
|
|
15
|
+
|
|
16
|
+
# start session
|
|
17
|
+
txt_buf = queue.Queue()
|
|
18
|
+
def callback(data):
|
|
19
|
+
txt_buf.put(data)
|
|
20
|
+
|
|
21
|
+
rt_mgr.start_session(callback)
|
|
22
|
+
sock = rt_mgr.connect()
|
|
23
|
+
|
|
24
|
+
# create recorder GUI
|
|
25
|
+
def butter_lowpass(lowcut, fs, order=10):
|
|
26
|
+
nyq = 0.5 * fs
|
|
27
|
+
low = lowcut / nyq
|
|
28
|
+
sos = signal.butter(order, low, analog=False, btype='low', output='sos')
|
|
29
|
+
return sos
|
|
30
|
+
|
|
31
|
+
sos = None # butter_lowpass(15680, 44100, 5)
|
|
32
|
+
def reformat_freq(sr, y):
|
|
33
|
+
global sos
|
|
34
|
+
if sos is None:
|
|
35
|
+
sos = butter_lowpass(15680, sr, 5)
|
|
36
|
+
if len(y.shape) > 1:
|
|
37
|
+
y = (y[:,0]+y[:,1])*0.5
|
|
38
|
+
number_of_samples = round(len(y) * float(16000) / sr)
|
|
39
|
+
data = signal.sosfilt(sos, y)
|
|
40
|
+
data = sps.resample(data, number_of_samples)
|
|
41
|
+
return sr, data.astype(np.int16)
|
|
42
|
+
|
|
43
|
+
audio_buf = np.array([], dtype=np.int16)
|
|
44
|
+
def transcribe(speech, state=""):
|
|
45
|
+
global audio_buf
|
|
46
|
+
try:
|
|
47
|
+
sr, y = reformat_freq(*speech)
|
|
48
|
+
except:
|
|
49
|
+
print("failed to parse data")
|
|
50
|
+
audio_buf = np.concatenate([audio_buf, y])
|
|
51
|
+
if len(audio_buf) < 46000:
|
|
52
|
+
return state, state
|
|
53
|
+
data = base64.b64encode(audio_buf.tobytes())
|
|
54
|
+
audio_buf = np.array([], dtype=np.int16)
|
|
55
|
+
rt_mgr.send(sock, data)
|
|
56
|
+
try:
|
|
57
|
+
ready = txt_buf.get(block=False)
|
|
58
|
+
while ready is not None:
|
|
59
|
+
if ready:
|
|
60
|
+
text = ready["txt"]
|
|
61
|
+
state += text + " "
|
|
62
|
+
try:
|
|
63
|
+
ready = txt_buf.get(block=False)
|
|
64
|
+
except:
|
|
65
|
+
break
|
|
66
|
+
except:
|
|
67
|
+
pass
|
|
68
|
+
return state, state
|
|
69
|
+
|
|
70
|
+
gr.Interface(
|
|
71
|
+
fn=transcribe,
|
|
72
|
+
inputs=[
|
|
73
|
+
gr.Audio(source="microphone", type="numpy", streaming=True, label="Speech"),
|
|
74
|
+
"state"
|
|
75
|
+
],
|
|
76
|
+
outputs=[
|
|
77
|
+
gr.Textbox(label="Transcript"),
|
|
78
|
+
"state"
|
|
79
|
+
],
|
|
80
|
+
theme=gr.themes.Soft(),
|
|
81
|
+
title="Sumit Realtime Example",
|
|
82
|
+
allow_flagging="never",
|
|
83
|
+
live=True).launch()
|
|
84
|
+
|
|
85
|
+
# cleanup
|
|
86
|
+
rt_mgr.stop_session()
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from sumit_sdk.api import APIClient
|
|
2
|
+
from sumit_sdk.storage import Storage
|
|
3
|
+
|
|
4
|
+
api = APIClient("api-sa.json") # create client
|
|
5
|
+
storage = Storage(api)
|
|
6
|
+
|
|
7
|
+
# An example of uploading a file to storage
|
|
8
|
+
|
|
9
|
+
filename = "<PATH_TO_FILE_IN_STORAGE>" # The path + and the name of the file that will appear in the storage.
|
|
10
|
+
|
|
11
|
+
path = "<YOUR_FILE_NAME>" # path from my computer
|
|
12
|
+
|
|
13
|
+
exp = 10 # hours
|
|
14
|
+
|
|
15
|
+
res = storage.upload(filename, path, exp)
|
|
16
|
+
print(res)
|
|
17
|
+
|
|
18
|
+
# An example of uploading a multi files to storage
|
|
19
|
+
|
|
20
|
+
remote_to_local_map = {"<PATH_TO_FILE_IN_STORAGE_1>": "<YOUR_LOCAL_FILE_PATH_1>",
|
|
21
|
+
"<PATH_TO_FILE_IN_STORAGE_2>": "<YOUR_LOCAL_FILE_PATH_2>",
|
|
22
|
+
"<PATH_TO_FILE_IN_STORAGE_3>": "<YOUR_LOCAL_FILE_PATH_3>"}
|
|
23
|
+
|
|
24
|
+
res = storage.upload_multi(remote_to_local_map)
|
|
25
|
+
print(res)
|
|
26
|
+
|
|
27
|
+
# An example of deleting a file from storage
|
|
28
|
+
|
|
29
|
+
filename = "<FULL_PATH_TO_FILE_IN_STORAGE>"
|
|
30
|
+
res = storage.delete_file(filename)
|
|
31
|
+
print(res)
|
|
32
|
+
|
|
33
|
+
# An example NO. 1 of getting list of files from storage
|
|
34
|
+
|
|
35
|
+
folder_name = "<PATH_TO_FOLDER_NAME>"
|
|
36
|
+
res = storage.list_files(folder_name)
|
|
37
|
+
print(res)
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import time
|
|
2
|
+
|
|
3
|
+
from sumit_sdk.api import APIClient # API core client
|
|
4
|
+
from sumit_sdk.stream_stt import StreamSTT
|
|
5
|
+
import librosa
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
def print_stt(data):
|
|
9
|
+
# print(f"response: {data}")
|
|
10
|
+
transcript = data["transcript"][0]
|
|
11
|
+
print(f"{data['id']}: {transcript['segment']}\nstart:{transcript.get('start')}\tend:{transcript.get('end')}")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# initialize API
|
|
15
|
+
api = APIClient("api-sa-prod.json", env='frankfurt') # create client
|
|
16
|
+
|
|
17
|
+
stt = StreamSTT(api, print_stt)
|
|
18
|
+
response = stt.start()
|
|
19
|
+
if response.get('status_code') != 0:
|
|
20
|
+
print(response)
|
|
21
|
+
raise Exception("failed to retrieve session token. it's probably due to: incorrect user, missing permissions or networking issue.")
|
|
22
|
+
|
|
23
|
+
stt.listen()
|
|
24
|
+
|
|
25
|
+
print("send files:")
|
|
26
|
+
for i in range(5):
|
|
27
|
+
stt.send_audio(f"audio_id_{i}", audio_path="/tmp/1.wav") # send short audio file
|
|
28
|
+
time.sleep(1)
|
|
29
|
+
|
|
30
|
+
# example 2:
|
|
31
|
+
# chunk long file:
|
|
32
|
+
|
|
33
|
+
# print("send chunks:")
|
|
34
|
+
# audio, _ = librosa.load("/tmp/2.wav", sr=16000, mono=True) # for this sample, use audio file longer than 50 seconds (or change the chunk size/number of chunks)
|
|
35
|
+
# # librosa load data as float, convert to int16
|
|
36
|
+
# audio *= (2 ** 15) - 1
|
|
37
|
+
# audio = audio.astype(np.int16)
|
|
38
|
+
# chunk_size = 10 * 16000 # 10 seconds length
|
|
39
|
+
# for i in range(5):
|
|
40
|
+
# audio_data = audio[int(i * chunk_size):int((i + 1) * chunk_size)]
|
|
41
|
+
# stt.send_audio(f"audio_id2_{i}", audio_data=audio_data)
|
|
42
|
+
# time.sleep(1)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
time.sleep(10)
|
|
46
|
+
stt.stop_listening()
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from sumit_sdk.api import APIClient
|
|
3
|
+
from sumit_sdk.realtime_stt import RealtimeSTT, Profiles
|
|
4
|
+
from sumit_sdk.utils.streamlink_helper import StreamlinkHelper # helper class to async record from URLs
|
|
5
|
+
|
|
6
|
+
# initialize API
|
|
7
|
+
api = APIClient("api-sa.json") # create client
|
|
8
|
+
rt_mgr = RealtimeSTT(api) # create realtime manager
|
|
9
|
+
|
|
10
|
+
# start session
|
|
11
|
+
def callback(data):
|
|
12
|
+
print(data['st'], data['txt'][::-1]) # Reverse for proper view of Hebrew in terminal.
|
|
13
|
+
|
|
14
|
+
rt_mgr.start_session(callback, profile=Profiles.very_accurate)
|
|
15
|
+
sock = rt_mgr.connect()
|
|
16
|
+
|
|
17
|
+
# create recorder
|
|
18
|
+
rec = StreamlinkHelper(chunk_len=5, as_base64=True)
|
|
19
|
+
rec.open_stream("https://www.youtube.com/watch?v=1IMi74Ybg8s")
|
|
20
|
+
rec.async_read()
|
|
21
|
+
stop_sig = False
|
|
22
|
+
while sock.connected and not stop_sig:
|
|
23
|
+
try:
|
|
24
|
+
data = rec.safe_get()
|
|
25
|
+
if data is None:
|
|
26
|
+
time.sleep(0.1)
|
|
27
|
+
continue
|
|
28
|
+
rt_mgr.send(sock, data)
|
|
29
|
+
except KeyboardInterrupt:
|
|
30
|
+
stop_sig = True
|
|
31
|
+
print("keyboard interupt. stop streaming")
|
|
32
|
+
|
|
33
|
+
# cleanup
|
|
34
|
+
rt_mgr.stop_session()
|
|
35
|
+
rec.stop()
|