sumit-sdk 1.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. sumit_sdk-1.2.4/.gitignore +10 -0
  2. sumit_sdk-1.2.4/CHANGELOG.md +0 -0
  3. sumit_sdk-1.2.4/LICENSE +13 -0
  4. sumit_sdk-1.2.4/PKG-INFO +67 -0
  5. sumit_sdk-1.2.4/README.md +37 -0
  6. sumit_sdk-1.2.4/pyproject.toml +47 -0
  7. sumit_sdk-1.2.4/requirements.txt +5 -0
  8. sumit_sdk-1.2.4/samples/local_file_transcript_curl.sh +60 -0
  9. sumit_sdk-1.2.4/samples/realtime_transcript.py +42 -0
  10. sumit_sdk-1.2.4/samples/realtime_transcript_flask.py +69 -0
  11. sumit_sdk-1.2.4/samples/realtime_transcript_from_file.py +73 -0
  12. sumit_sdk-1.2.4/samples/realtime_transcript_gui.py +86 -0
  13. sumit_sdk-1.2.4/samples/storage_test.py +37 -0
  14. sumit_sdk-1.2.4/samples/stream_local_file_loopback.sh +5 -0
  15. sumit_sdk-1.2.4/samples/stream_stt.py +46 -0
  16. sumit_sdk-1.2.4/samples/streamlink_transcript.py +35 -0
  17. sumit_sdk-1.2.4/samples/summary_api.py +71 -0
  18. sumit_sdk-1.2.4/samples/transcript_from_storage.py +39 -0
  19. sumit_sdk-1.2.4/samples/translate_api.py +50 -0
  20. sumit_sdk-1.2.4/samples/translate_subtitles.py +21 -0
  21. sumit_sdk-1.2.4/samples/usage_report_example.py +18 -0
  22. sumit_sdk-1.2.4/samples/webui/index.html +55 -0
  23. sumit_sdk-1.2.4/samples/webui/index_old.html +37 -0
  24. sumit_sdk-1.2.4/samples/webui/transcript.html +5 -0
  25. sumit_sdk-1.2.4/setup.cfg +7 -0
  26. sumit_sdk-1.2.4/setup.py +49 -0
  27. sumit_sdk-1.2.4/sumit_sdk/__init__.py +0 -0
  28. sumit_sdk-1.2.4/sumit_sdk/api.py +41 -0
  29. sumit_sdk-1.2.4/sumit_sdk/api_helper.py +109 -0
  30. sumit_sdk-1.2.4/sumit_sdk/base_task.py +89 -0
  31. sumit_sdk-1.2.4/sumit_sdk/exceptions.py +0 -0
  32. sumit_sdk-1.2.4/sumit_sdk/realtime_stt.py +181 -0
  33. sumit_sdk-1.2.4/sumit_sdk/storage.py +278 -0
  34. sumit_sdk-1.2.4/sumit_sdk/stream_stt.py +214 -0
  35. sumit_sdk-1.2.4/sumit_sdk/sumit_reckit.py +84 -0
  36. sumit_sdk-1.2.4/sumit_sdk/summary_api.py +45 -0
  37. sumit_sdk-1.2.4/sumit_sdk/transcript.py +104 -0
  38. sumit_sdk-1.2.4/sumit_sdk/translate_api.py +49 -0
  39. sumit_sdk-1.2.4/sumit_sdk/translate_subtitles.py +52 -0
  40. sumit_sdk-1.2.4/sumit_sdk/users.py +26 -0
  41. sumit_sdk-1.2.4/sumit_sdk/utils/__init__.py +0 -0
  42. sumit_sdk-1.2.4/sumit_sdk/utils/alsa_recorder.py +74 -0
  43. sumit_sdk-1.2.4/sumit_sdk/utils/audio_helper.py +98 -0
  44. sumit_sdk-1.2.4/sumit_sdk/utils/downsample_helper.py +29 -0
  45. sumit_sdk-1.2.4/sumit_sdk/utils/socketio_client.py +45 -0
  46. sumit_sdk-1.2.4/sumit_sdk/utils/streamlink_helper.py +130 -0
  47. sumit_sdk-1.2.4/sumit_sdk/utils.py +0 -0
  48. sumit_sdk-1.2.4/sumit_sdk.egg-info/PKG-INFO +67 -0
  49. sumit_sdk-1.2.4/sumit_sdk.egg-info/SOURCES.txt +56 -0
  50. sumit_sdk-1.2.4/sumit_sdk.egg-info/dependency_links.txt +1 -0
  51. sumit_sdk-1.2.4/sumit_sdk.egg-info/requires.txt +9 -0
  52. sumit_sdk-1.2.4/sumit_sdk.egg-info/top_level.txt +1 -0
  53. sumit_sdk-1.2.4/test_docker/Dockerfile +7 -0
  54. sumit_sdk-1.2.4/test_docker/build.sh +2 -0
  55. sumit_sdk-1.2.4/test_docker/onprem_stt.py +49 -0
  56. sumit_sdk-1.2.4/test_docker/run.sh +1 -0
  57. sumit_sdk-1.2.4/tests/__init__.py +0 -0
@@ -0,0 +1,10 @@
1
+ build/
2
+ dist/
3
+ */api-sa.json
4
+ */api-sa*.json
5
+ *.egg-info/
6
+ __pycache__/
7
+ venv
8
+ releases
9
+ samples/out_*.txt
10
+ samples/*.srt
File without changes
@@ -0,0 +1,13 @@
1
+ BSD-3-Clause License
2
+
3
+ Copyright 2025 Sumit-X
4
+
5
+ Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
6
+
7
+ 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
8
+
9
+ 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
10
+
11
+ 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
12
+
13
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,67 @@
1
+ Metadata-Version: 2.4
2
+ Name: sumit_sdk
3
+ Version: 1.2.4
4
+ Summary: SDK to communicate with sumit API
5
+ Author-email: Sumit-AI <shlomi@sumit-ai.com>
6
+ License-Expression: BSD-3-Clause
7
+ Project-URL: Homepage, https://www.sumit-x.com
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.8
12
+ Classifier: Programming Language :: Python :: 3.9
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Programming Language :: Python :: 3.14
18
+ Requires-Python: >=3.8
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Requires-Dist: requests>=2.25.0
22
+ Requires-Dist: websocket-client>=1.5.0
23
+ Requires-Dist: retry>=0.9.2
24
+ Requires-Dist: python-socketio>=5.10.0
25
+ Requires-Dist: numpy<2.0.0,>=1.24.0
26
+ Provides-Extra: audio
27
+ Requires-Dist: PyAudio; extra == "audio"
28
+ Requires-Dist: pyalsaaudio; extra == "audio"
29
+ Dynamic: license-file
30
+
31
+ # Sumit-sdk
32
+
33
+
34
+
35
+ ## Getting started
36
+
37
+ ## Description
38
+
39
+ Sumit SDK is a Python package that allows users to interact with Sumit-AI API. This document describes how to install the SDK from a zip file and how to use its main features.
40
+
41
+ ## Installation
42
+
43
+ ### Step 1: Download the SDK
44
+
45
+ 1. Navigate to the "Tags" section of the GitLab repository.
46
+ 2. Find the desired version of the SDK and click on it.
47
+ 3. Click on the "Download" button and select "ZIP" to download a zip file of the SDK.
48
+
49
+ ### Step 2: Extract the ZIP File
50
+
51
+ Extract the downloaded zip file to a location of your choice. You can do this by right-clicking the zip file and selecting "Extract All..." (the exact option might vary depending on your operating system).
52
+
53
+ ### Step 3: Install the SDK
54
+
55
+ Open a terminal and navigate to the directory where the zip file was extracted. The SDK can be installed using _pip_ by running the following command:
56
+
57
+ ```sh
58
+ cd path/to/extracted/folder
59
+ pip install .
60
+ ```
61
+
62
+ _depracted:
63
+ using `setup.py` installation with: `python setup.py install` is depracted and not supported in new python versions_
64
+
65
+ #### Install on Windows
66
+ installation of: PyAudio, pyalsaaudio may failed in Windows. In this case the installation of all required library will failed. So in this case please install the required packages via pip:
67
+ ```pip install -r requirements.txt```
@@ -0,0 +1,37 @@
1
+ # Sumit-sdk
2
+
3
+
4
+
5
+ ## Getting started
6
+
7
+ ## Description
8
+
9
+ Sumit SDK is a Python package that allows users to interact with Sumit-AI API. This document describes how to install the SDK from a zip file and how to use its main features.
10
+
11
+ ## Installation
12
+
13
+ ### Step 1: Download the SDK
14
+
15
+ 1. Navigate to the "Tags" section of the GitLab repository.
16
+ 2. Find the desired version of the SDK and click on it.
17
+ 3. Click on the "Download" button and select "ZIP" to download a zip file of the SDK.
18
+
19
+ ### Step 2: Extract the ZIP File
20
+
21
+ Extract the downloaded zip file to a location of your choice. You can do this by right-clicking the zip file and selecting "Extract All..." (the exact option might vary depending on your operating system).
22
+
23
+ ### Step 3: Install the SDK
24
+
25
+ Open a terminal and navigate to the directory where the zip file was extracted. The SDK can be installed using _pip_ by running the following command:
26
+
27
+ ```sh
28
+ cd path/to/extracted/folder
29
+ pip install .
30
+ ```
31
+
32
+ _depracted:
33
+ using `setup.py` installation with: `python setup.py install` is depracted and not supported in new python versions_
34
+
35
+ #### Install on Windows
36
+ installation of: PyAudio, pyalsaaudio may failed in Windows. In this case the installation of all required library will failed. So in this case please install the required packages via pip:
37
+ ```pip install -r requirements.txt```
@@ -0,0 +1,47 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "setuptools_scm"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "sumit_sdk"
7
+ dynamic = ["version"]
8
+ authors = [
9
+ { name="Sumit-AI", email="shlomi@sumit-ai.com" },
10
+ ]
11
+ description = "SDK to communicate with sumit API"
12
+ readme = "README.md"
13
+ requires-python = ">=3.8"
14
+ license = "BSD-3-Clause"
15
+ license-files = ["LICENSE"]
16
+ classifiers = [
17
+ "Development Status :: 3 - Alpha",
18
+ "Intended Audience :: Developers",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3.8",
21
+ "Programming Language :: Python :: 3.9",
22
+ "Programming Language :: Python :: 3.10",
23
+ "Programming Language :: Python :: 3.11",
24
+ "Programming Language :: Python :: 3.12",
25
+ "Programming Language :: Python :: 3.13",
26
+ "Programming Language :: Python :: 3.14",
27
+ ]
28
+ dependencies = [
29
+ "requests>=2.25.0",
30
+ "websocket-client>=1.5.0",
31
+ "retry>=0.9.2",
32
+ "python-socketio>=5.10.0",
33
+ "numpy>=1.24.0,<2.0.0",
34
+ ]
35
+
36
+ [project.urls]
37
+ Homepage = "https://www.sumit-x.com"
38
+
39
+ [project.optional-dependencies]
40
+ audio = ["PyAudio", "pyalsaaudio"]
41
+
42
+ [tool.setuptools]
43
+ packages = ["sumit_sdk"] # Or use find:
44
+ # packages = { find = {} }
45
+
46
+ [tool.setuptools_scm]
47
+ local_scheme = "no-local-version"
@@ -0,0 +1,5 @@
1
+ requests>=2.25.0
2
+ websocket-client>=1.5.0
3
+ retry>=0.9.2
4
+ python-socketio>=5.10.0
5
+ numpy>=1.24.0,<2.0.0
@@ -0,0 +1,60 @@
1
+ # API endpoints:
2
+ api_url=https://api.sumit-labs.com
3
+ login_ep=login
4
+ upload_ep=storage/upload
5
+ download_ep=storage/download
6
+ transcript=v3/transcript
7
+ transcript_status=get_status
8
+
9
+ credential_file=api-sa.json # your API credentials file
10
+
11
+ # files:
12
+ local_file=test.wav
13
+ remote_file=test.wav
14
+ transcript_file=test.json
15
+ flat_transcript_file=test_flat.json
16
+ language="he-IL"
17
+
18
+ # STEP 1:
19
+
20
+ # login and get token
21
+ resp=$(curl -X POST -H "Content-Type: application/json" -d @$credential_file $api_url/$login_ep)
22
+ # return:
23
+ # {"request": {"company": "xxx", "domain": "xxx", "email": "xxx", "user": "xxx"}, "success": true, "token": "YOUR.SECRET.TOKEN"}
24
+ # take the token from the response
25
+ token=$(echo $resp | jq ".token" | sed s/\"//g)
26
+ headers="Authorization: Bearer $token"
27
+
28
+ # get upload link
29
+ payload="{\"filename\": \"$remote_file\"}"
30
+ resp=$(curl -X POST -H "Content-Type: application/json" -H "$headers" -d "$payload" $api_url/$upload_ep)
31
+ # return:
32
+ # {"request": {"filename": "test.wav"}, "signed_url": "https://xxxxx/....d267dd0c", "success": true}
33
+ # take the url
34
+ dest_url=$(echo $resp | jq ".signed_url" | sed s/\"//g)
35
+
36
+ # upload - "--upload-file" is PUT request for a binary file
37
+ curl $dest_url --upload-file $local_file
38
+
39
+ # send to transcript:
40
+ payload="{\"path\": \"$remote_file\", \"output_path\": \"$transcript_file\", \"lang\": \"$language\", \"model\": \"he_gen_v2\", \"flat_output_path\": \"$flat_transcript_file\"}"
41
+ resp=$(curl -X POST -H "Content-Type: application/json" -H "$headers" -d "$payload" $api_url/$transcript)
42
+ echo $resp
43
+ task_id=$(echo $resp | jq ".response.job_id" | sed s/\"//g)
44
+ echo "follow task_id: $task_id"
45
+
46
+ # STEP 2:
47
+
48
+ # check for status...
49
+ payload="{\"id\": \"$task_id\"}"
50
+ resp=$(curl -X POST -H "Content-Type: application/json" -H "$headers" -d "$payload" $api_url/$transcript_status)
51
+ echo $resp
52
+
53
+ # STEP 3:
54
+
55
+ # download transcription:
56
+ # get download link
57
+ payload="{\"filename\": \"$transcript_file\"}" # replace to $flat_transcript_file for the flat version of the transcript
58
+ resp=$(curl -X POST -H "Content-Type: application/json" -H "$headers" -d "$payload" $api_url/$download_ep)
59
+ src_url=$(echo $resp | jq ".url" | sed s/\"//g)
60
+ curl -o $transcript_file $src_url
@@ -0,0 +1,42 @@
1
+ import time
2
+ from sumit_sdk.api import APIClient
3
+ from sumit_sdk.realtime_stt import RealtimeSTT
4
+ from sumit_sdk.realtime_stt import Profiles, VadProfile
5
+ from sumit_sdk.realtime_stt import BufferMode
6
+ from sumit_sdk.utils.audio_helper import Recorder # helper class to async record from microphone
7
+
8
+ # initialize API
9
+ api = APIClient("api-sa.json") # create client
10
+ rt_mgr = RealtimeSTT(api) # create realtime manager
11
+
12
+ # start session
13
+ def callback(data):
14
+ print('\u202b' + data['txt'] + '\u202c' ) # Reverse for proper view of Hebrew in terminal.
15
+
16
+ def write_segments_callbcak(data):
17
+ with open('out.txt', 'a') as fd:
18
+ fd.write(f'{time.time()}\t\t{data['txt']}\n')
19
+ print('\u202b' + data['txt'][::-1] + '\u202c' )
20
+
21
+ rt_mgr.start_session(callback, profile=Profiles.accurate, vad_profile=VadProfile.low, buffer_mode=BufferMode.default)
22
+
23
+ sock = rt_mgr.connect()
24
+
25
+ # create recorder
26
+ rec = Recorder(as_base64=True, buffer_sec=2.5) # encode samples as base64, to send the chunks over web-socket
27
+ rec.start()
28
+ stop_sig = False
29
+ while sock.connected and not stop_sig:
30
+ try:
31
+ data = rec.safe_get()
32
+ if data is None:
33
+ time.sleep(0.1)
34
+ continue
35
+ rt_mgr.send(sock, data)
36
+ except KeyboardInterrupt:
37
+ stop_sig = True
38
+ print("keyboard interupt. stop streaming")
39
+
40
+ # cleanup
41
+ rt_mgr.stop_session()
42
+ rec.stop()
@@ -0,0 +1,69 @@
1
+ import time
2
+ from sumit_sdk.api import APIClient
3
+ from sumit_sdk.realtime_stt import RealtimeSTT
4
+ from sumit_sdk.realtime_stt import Profiles, VadProfile
5
+ from sumit_sdk.utils.audio_helper import Recorder # helper class to async record from microphone
6
+ from flask import Flask, request, Response, json, render_template
7
+ from threading import Thread
8
+ from flask_socketio import SocketIO, emit
9
+ import asyncio
10
+
11
+ app = Flask(__name__, template_folder='webui')
12
+ app.config['SECRET_KEY'] = 'secret!'
13
+ socketio = SocketIO(app)
14
+
15
+ # initialize API
16
+ api = APIClient("api-sa.json") # create client
17
+ rt_mgr = RealtimeSTT(api) # create realtime manager
18
+ transcriptions = []
19
+
20
+ @app.route('/', methods=['POST', 'GET'])
21
+ def index(**kwargs):
22
+ return render_template('index.html')
23
+
24
+ @socketio.on('connect')
25
+ def handle_connect():
26
+ print('Client connected')
27
+
28
+
29
+ @app.route('/transcription')
30
+ def suggestions():
31
+ return render_template('transcript.html', transcriptions=transcriptions)
32
+
33
+ last_two_texts = []
34
+ # start session
35
+ def callback(data):
36
+ # global transcriptions
37
+ print(data['txt'][::-1]) # Reverse for proper view of Hebrew in terminal.
38
+ # transcriptions.append(data['txt'])
39
+ global last_two_texts
40
+ last_two_texts.append(data['txt'])
41
+ last_two_texts = last_two_texts[-2:]
42
+ socketio.emit('update_texts', {'texts': last_two_texts})
43
+
44
+ rt_mgr.start_session(callback, profile=Profiles.default, vad_profile=VadProfile.default)
45
+
46
+ sock = rt_mgr.connect()
47
+
48
+ # create recorder
49
+ rec = Recorder(as_base64=True, buffer_sec=3) # encode samples as base64, to send the chunks over web-socket
50
+ rec.start()
51
+
52
+ Thread(target=lambda: socketio.run(app, host="127.0.0.1", port=5000, debug=False)).start()
53
+
54
+ stop_sig = False
55
+ while sock.connected and not stop_sig:
56
+ try:
57
+ data = rec.safe_get()
58
+ if data is None:
59
+ time.sleep(0.1)
60
+ continue
61
+ rt_mgr.send(sock, data)
62
+ except KeyboardInterrupt:
63
+ stop_sig = True
64
+ print("keyboard interupt. stop streaming")
65
+
66
+ # cleanup
67
+ rt_mgr.stop_session()
68
+ rec.stop()
69
+
@@ -0,0 +1,73 @@
1
+ import time
2
+ from sumit_sdk.api import APIClient
3
+ from sumit_sdk.realtime_stt import RealtimeSTT
4
+ from sumit_sdk.realtime_stt import Profiles, VadProfile
5
+ from sumit_sdk.realtime_stt import BufferMode
6
+ from sumit_sdk.utils.audio_helper import Recorder # helper class to async record from microphone
7
+ from scipy.io import wavfile as wf
8
+ from threading import Thread, Event
9
+ import sys
10
+
11
+ class FileRecorder(Recorder):
12
+ def __init__(self, filename: str, as_base64=False, buffer_sec: float = 1) -> None:
13
+ super().__init__(as_base64, buffer_sec)
14
+ self.filename = filename
15
+ self.sr, self.data = wf.read(self.filename)
16
+ self.stop_sig = Event()
17
+ self._ix = 0
18
+ self._thread = None
19
+
20
+ def _start(self):
21
+ while not self.stop_sig.is_set():
22
+ time.sleep(self.TIME_BUFFER)
23
+ d = self.data[self._ix:self._ix+self.CHUNK]
24
+ self._fill_buffer(d.tobytes(), None, None, None)
25
+ self._ix += self.CHUNK
26
+ if self._ix >= self.data.shape[0]:
27
+ self.stop()
28
+
29
+ def start(self):
30
+ self.stop_sig.clear()
31
+ self._thread = Thread(target=self._start)
32
+ self._thread.start()
33
+
34
+ def stop(self):
35
+ self.stop_sig.set()
36
+
37
+ # initialize API
38
+ api = APIClient("api-sa.json") # create client
39
+ rt_mgr = RealtimeSTT(api) # create realtime manager
40
+
41
+ # start session
42
+ def callback(data):
43
+ print('\u202b' + data['txt'] + '\u202c' ) # Reverse for proper view of Hebrew in terminal.
44
+
45
+ def write_segments_callbcak(data):
46
+ with open('out.txt', 'a') as fd:
47
+ fd.write(f'{time.time()}\t\t{data['txt']}\n')
48
+ print('\u202b' + data['txt'][::-1] + '\u202c' )
49
+
50
+ rt_mgr.start_session(write_segments_callbcak, profile=Profiles.accurate, vad_profile=VadProfile.low, buffer_mode=BufferMode.default)
51
+
52
+ sock = rt_mgr.connect()
53
+
54
+ # create recorder
55
+ rec = FileRecorder(sys.argv[1], as_base64=True, buffer_sec=2.5) # encode samples as base64, to send the chunks over web-socket
56
+ rec.start()
57
+ stop_sig = False
58
+ with open('out.txt', 'a') as fd:
59
+ fd.write(f'{time.time()}\t\tSTART_NEW_SESSION\n')
60
+ while sock.connected and not stop_sig:
61
+ try:
62
+ data = rec.safe_get()
63
+ if data is None:
64
+ time.sleep(0.1)
65
+ continue
66
+ rt_mgr.send(sock, data)
67
+ except KeyboardInterrupt:
68
+ stop_sig = True
69
+ print("keyboard interupt. stop streaming")
70
+
71
+ # cleanup
72
+ rt_mgr.stop_session()
73
+ rec.stop()
@@ -0,0 +1,86 @@
1
+ import time
2
+ from sumit_sdk.api import APIClient
3
+ from sumit_sdk.realtime_stt import RealtimeSTT
4
+ from sumit_sdk.utils.audio_helper import Recorder # helper class to async record from microphone
5
+ import gradio as gr # for GUI
6
+ import numpy as np
7
+ import queue
8
+ import base64
9
+ import scipy.signal as sps
10
+ from scipy import signal
11
+
12
+ # initialize API
13
+ api = APIClient("api-sa-prod.json") # create client
14
+ rt_mgr = RealtimeSTT(api) # create realtime manager
15
+
16
+ # start session
17
+ txt_buf = queue.Queue()
18
+ def callback(data):
19
+ txt_buf.put(data)
20
+
21
+ rt_mgr.start_session(callback)
22
+ sock = rt_mgr.connect()
23
+
24
+ # create recorder GUI
25
+ def butter_lowpass(lowcut, fs, order=10):
26
+ nyq = 0.5 * fs
27
+ low = lowcut / nyq
28
+ sos = signal.butter(order, low, analog=False, btype='low', output='sos')
29
+ return sos
30
+
31
+ sos = None # butter_lowpass(15680, 44100, 5)
32
+ def reformat_freq(sr, y):
33
+ global sos
34
+ if sos is None:
35
+ sos = butter_lowpass(15680, sr, 5)
36
+ if len(y.shape) > 1:
37
+ y = (y[:,0]+y[:,1])*0.5
38
+ number_of_samples = round(len(y) * float(16000) / sr)
39
+ data = signal.sosfilt(sos, y)
40
+ data = sps.resample(data, number_of_samples)
41
+ return sr, data.astype(np.int16)
42
+
43
+ audio_buf = np.array([], dtype=np.int16)
44
+ def transcribe(speech, state=""):
45
+ global audio_buf
46
+ try:
47
+ sr, y = reformat_freq(*speech)
48
+ except:
49
+ print("failed to parse data")
50
+ audio_buf = np.concatenate([audio_buf, y])
51
+ if len(audio_buf) < 46000:
52
+ return state, state
53
+ data = base64.b64encode(audio_buf.tobytes())
54
+ audio_buf = np.array([], dtype=np.int16)
55
+ rt_mgr.send(sock, data)
56
+ try:
57
+ ready = txt_buf.get(block=False)
58
+ while ready is not None:
59
+ if ready:
60
+ text = ready["txt"]
61
+ state += text + " "
62
+ try:
63
+ ready = txt_buf.get(block=False)
64
+ except:
65
+ break
66
+ except:
67
+ pass
68
+ return state, state
69
+
70
+ gr.Interface(
71
+ fn=transcribe,
72
+ inputs=[
73
+ gr.Audio(source="microphone", type="numpy", streaming=True, label="Speech"),
74
+ "state"
75
+ ],
76
+ outputs=[
77
+ gr.Textbox(label="Transcript"),
78
+ "state"
79
+ ],
80
+ theme=gr.themes.Soft(),
81
+ title="Sumit Realtime Example",
82
+ allow_flagging="never",
83
+ live=True).launch()
84
+
85
+ # cleanup
86
+ rt_mgr.stop_session()
@@ -0,0 +1,37 @@
1
+ from sumit_sdk.api import APIClient
2
+ from sumit_sdk.storage import Storage
3
+
4
+ api = APIClient("api-sa.json") # create client
5
+ storage = Storage(api)
6
+
7
+ # An example of uploading a file to storage
8
+
9
+ filename = "<PATH_TO_FILE_IN_STORAGE>" # The path + and the name of the file that will appear in the storage.
10
+
11
+ path = "<YOUR_FILE_NAME>" # path from my computer
12
+
13
+ exp = 10 # hours
14
+
15
+ res = storage.upload(filename, path, exp)
16
+ print(res)
17
+
18
+ # An example of uploading a multi files to storage
19
+
20
+ remote_to_local_map = {"<PATH_TO_FILE_IN_STORAGE_1>": "<YOUR_LOCAL_FILE_PATH_1>",
21
+ "<PATH_TO_FILE_IN_STORAGE_2>": "<YOUR_LOCAL_FILE_PATH_2>",
22
+ "<PATH_TO_FILE_IN_STORAGE_3>": "<YOUR_LOCAL_FILE_PATH_3>"}
23
+
24
+ res = storage.upload_multi(remote_to_local_map)
25
+ print(res)
26
+
27
+ # An example of deleting a file from storage
28
+
29
+ filename = "<FULL_PATH_TO_FILE_IN_STORAGE>"
30
+ res = storage.delete_file(filename)
31
+ print(res)
32
+
33
+ # An example NO. 1 of getting list of files from storage
34
+
35
+ folder_name = "<PATH_TO_FOLDER_NAME>"
36
+ res = storage.list_files(folder_name)
37
+ print(res)
@@ -0,0 +1,5 @@
1
+ #sudo modprobe snd-aloop
2
+ fn=$1
3
+ date "+%s"
4
+ ffmpeg -re -i $1 -f alsa -ac 1 -ar 16000 -acodec pcm_s16le hw:Loopback
5
+ # ffmpeg -re -i $1 -f alsa -ac 2 -ar 44100 -b:a 128k -bufsize 128k -acodec pcm_s16le hw:Loopback,1,0
@@ -0,0 +1,46 @@
1
+ import time
2
+
3
+ from sumit_sdk.api import APIClient # API core client
4
+ from sumit_sdk.stream_stt import StreamSTT
5
+ import librosa
6
+ import numpy as np
7
+
8
+ def print_stt(data):
9
+ # print(f"response: {data}")
10
+ transcript = data["transcript"][0]
11
+ print(f"{data['id']}: {transcript['segment']}\nstart:{transcript.get('start')}\tend:{transcript.get('end')}")
12
+
13
+
14
+ # initialize API
15
+ api = APIClient("api-sa-prod.json", env='frankfurt') # create client
16
+
17
+ stt = StreamSTT(api, print_stt)
18
+ response = stt.start()
19
+ if response.get('status_code') != 0:
20
+ print(response)
21
+ raise Exception("failed to retrieve session token. it's probably due to: incorrect user, missing permissions or networking issue.")
22
+
23
+ stt.listen()
24
+
25
+ print("send files:")
26
+ for i in range(5):
27
+ stt.send_audio(f"audio_id_{i}", audio_path="/tmp/1.wav") # send short audio file
28
+ time.sleep(1)
29
+
30
+ # example 2:
31
+ # chunk long file:
32
+
33
+ # print("send chunks:")
34
+ # audio, _ = librosa.load("/tmp/2.wav", sr=16000, mono=True) # for this sample, use audio file longer than 50 seconds (or change the chunk size/number of chunks)
35
+ # # librosa load data as float, convert to int16
36
+ # audio *= (2 ** 15) - 1
37
+ # audio = audio.astype(np.int16)
38
+ # chunk_size = 10 * 16000 # 10 seconds length
39
+ # for i in range(5):
40
+ # audio_data = audio[int(i * chunk_size):int((i + 1) * chunk_size)]
41
+ # stt.send_audio(f"audio_id2_{i}", audio_data=audio_data)
42
+ # time.sleep(1)
43
+
44
+
45
+ time.sleep(10)
46
+ stt.stop_listening()
@@ -0,0 +1,35 @@
1
+ import time
2
+ from sumit_sdk.api import APIClient
3
+ from sumit_sdk.realtime_stt import RealtimeSTT, Profiles
4
+ from sumit_sdk.utils.streamlink_helper import StreamlinkHelper # helper class to async record from URLs
5
+
6
+ # initialize API
7
+ api = APIClient("api-sa.json") # create client
8
+ rt_mgr = RealtimeSTT(api) # create realtime manager
9
+
10
+ # start session
11
+ def callback(data):
12
+ print(data['st'], data['txt'][::-1]) # Reverse for proper view of Hebrew in terminal.
13
+
14
+ rt_mgr.start_session(callback, profile=Profiles.very_accurate)
15
+ sock = rt_mgr.connect()
16
+
17
+ # create recorder
18
+ rec = StreamlinkHelper(chunk_len=5, as_base64=True)
19
+ rec.open_stream("https://www.youtube.com/watch?v=1IMi74Ybg8s")
20
+ rec.async_read()
21
+ stop_sig = False
22
+ while sock.connected and not stop_sig:
23
+ try:
24
+ data = rec.safe_get()
25
+ if data is None:
26
+ time.sleep(0.1)
27
+ continue
28
+ rt_mgr.send(sock, data)
29
+ except KeyboardInterrupt:
30
+ stop_sig = True
31
+ print("keyboard interupt. stop streaming")
32
+
33
+ # cleanup
34
+ rt_mgr.stop_session()
35
+ rec.stop()