thinhost 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
thinhost/__init__.py ADDED
File without changes
@@ -0,0 +1,93 @@
1
+
2
+ try:
3
+ from simple_upload_host import UploadService
4
+ except:
5
+ from .simple_upload_host import UploadService
6
+
7
+ import os, sys
8
+ import httpx
9
+ from flask import Response, request
10
+ from processing_graph.ProcessingNode import ProcessingNode
11
+ class LLMProxyService(UploadService):
12
+
13
+ @classmethod
14
+ def _after_launch_endpoints(cls, app, hostSpec):
15
+ hostSpec = cls.tHostSpec(hostSpec)
16
+ openai_key = hostSpec.get(hostSpec.f_open_ai_key, None)
17
+ if not openai_key:
18
+ raise RuntimeError('openai_api_key is not set in hostSpec')
19
+
20
+ client = httpx.Client(
21
+ base_url='https://api.openai.com',
22
+ headers={'Authorization': f'Bearer {openai_key}'}
23
+ )
24
+
25
+ def _proxy(path: str):
26
+ body = request.get_data()
27
+ payload = request.get_json(silent=True) or {}
28
+
29
+ if payload.get("stream", False):
30
+ def generate():
31
+ with client.stream(
32
+ "POST", path,
33
+ content=body,
34
+ headers={"Content-Type":"application/json"},
35
+ timeout=None
36
+ ) as resp:
37
+ for chunk in resp.iter_bytes():
38
+ yield chunk
39
+ return Response(generate(), content_type="text/event-stream")
40
+
41
+ resp = client.post(path, content=body, headers={"Content-Type":"application/json"})
42
+ return Response(resp.content, status=resp.status_code,
43
+ content_type=resp.headers.get("content-type"))
44
+
45
+
46
+
47
+ def _proxy_models():
48
+ resp = client.get('/v1/models')
49
+ return Response(
50
+ resp.content,
51
+ status=resp.status_code,
52
+ content_type=resp.headers.get("content-type")
53
+ )
54
+
55
+ app.add_url_rule(
56
+ '/v1/models',
57
+ endpoint='proxy_models',
58
+ view_func=_proxy_models,
59
+ methods=['GET']
60
+ )
61
+
62
+ app.add_url_rule(
63
+ '/v1',
64
+ endpoint='health',
65
+ view_func=lambda: Response(
66
+ '{"status":"ok"}',
67
+ status=200,
68
+ content_type='application/json'
69
+ ),
70
+ methods=['GET']
71
+ )
72
+
73
+ # register both endpoints
74
+ app.add_url_rule(
75
+ '/v1/chat/completions',
76
+ endpoint='proxy_chat',
77
+ view_func=lambda: _proxy('/v1/chat/completions'),
78
+ methods=['POST']
79
+ )
80
+ app.add_url_rule(
81
+ '/v1/completions',
82
+ endpoint='proxy_completions',
83
+ view_func=lambda: _proxy('/v1/completions'),
84
+ methods=['POST']
85
+ )
86
+ if __name__ == "__main__":
87
+ from dotenv import load_dotenv
88
+ load_dotenv()
89
+ LLMProxyService.run_cli()
90
+ # python llm_base_service.py launch [[server=./test_specs/hostSpec.json]] -- For a quick test
91
+ # python llm_base_service.py start "[[server=./test_specs/hostSpec.json]]" -- deploy job to nodejobs
92
+ # open-webui serve UI layer (rests on service)
93
+ # TODO wrap up with retry and uptime monitor
@@ -0,0 +1,264 @@
1
+ #!/usr/bin/env python3
2
+ import unittest
3
+ import os
4
+ import shutil
5
+ import time
6
+ import requests
7
+ from dotenv import load_dotenv
8
+ from llm_base_service import LLMProxyService # your subclass with proxy support
9
+ from pathlib import Path
10
+ import json
11
+ from typing import Any, Dict
12
+ from decelium_wallet.commands.BaseData import BaseData
13
+
14
+ from proxy_service_model import ProxyServiceModel
15
+ '''
16
+ some proxy_service_model content:
17
+ ---
18
+
19
+ class ProxyServiceModel(BaseModel):
20
+ endpoint: str # e.g. "http://localhost:8003"
21
+ api_key: str
22
+ model_id: str
23
+
24
+ def generate(self,
25
+ messages: List[Dict[str, str]],
26
+ **kwargs: Any) -> str:
27
+ url = f"{self.endpoint}/v1/chat/completions"
28
+ assert self.endpoint != None
29
+ assert self.model_id != None
30
+ assert messages != None
31
+
32
+ headers = {
33
+ "Authorization": f"Bearer {self.api_key}",
34
+ "Content-Type": "application/json",
35
+ }
36
+ payload = {"model": self.model_id, "messages": messages, **kwargs}
37
+ print(f"URL OUT : {type(url)}----{url}")
38
+ r = requests.post(url, json=payload, headers=headers, timeout=60)
39
+ r.raise_for_status()
40
+ return r.json()["choices"][0]["message"]["content"]
41
+
42
+ def stream(self,
43
+ messages: List[Dict[str, str]],
44
+ **kwargs: Any):
45
+ url = f"{self.endpoint}/v1/chat/completions"
46
+ headers = {
47
+ "Authorization": f"Bearer {self.api_key}",
48
+ "Content-Type": "application/json",
49
+ }
50
+ payload = {"model": self.model_id,
51
+ "messages": messages,
52
+ "stream": True,
53
+ **kwargs}
54
+ r = requests.post(url, json=payload,
55
+ headers=headers,
56
+ stream=True,
57
+ timeout=None)
58
+ r.raise_for_status()
59
+ for line in r.iter_lines():
60
+ if line.startswith(b"data:"):
61
+ data = line.removeprefix(b"data:").strip()
62
+ if data == b"[DONE]":
63
+ break
64
+ yield data.decode()
65
+
66
+
67
+ '''
68
+ class LMStudioSchemaEntry(BaseData):
69
+ type: str
70
+ default: (Any, None)
71
+
72
+ class LMStudioConfigSchema(BaseData):
73
+ endpoint: LMStudioSchemaEntry
74
+ api_key: LMStudioSchemaEntry
75
+ model_id: LMStudioSchemaEntry
76
+ def get_keys(self): # Make sure all other keys are entries as well.
77
+ req,opt = super().get_keys()
78
+ req = {"*":LMStudioSchemaEntry}
79
+ return req,opt
80
+
81
+ class LMStudioModel(BaseData):
82
+ name: str
83
+ display_name: str
84
+ entrypoint: str
85
+ config_schema: LMStudioConfigSchema
86
+
87
+ class TestLLMProxyService(unittest.TestCase):
88
+ @staticmethod
89
+ def get_model_config():
90
+
91
+ model = LMStudioModel({
92
+ LMStudioModel.name: "lama-service",
93
+ LMStudioModel.display_name:"LamaService Proxy",
94
+ LMStudioModel.entrypoint: "model.py",
95
+ LMStudioModel.config_schema: LMStudioConfigSchema({
96
+ LMStudioConfigSchema.endpoint: {
97
+ LMStudioSchemaEntry.type: "string",
98
+ LMStudioSchemaEntry.default: "http://localhost:8003"
99
+ },
100
+ LMStudioConfigSchema.api_key: {
101
+ LMStudioSchemaEntry.type: "string",
102
+ LMStudioSchemaEntry.default: "<<OPENAI_API_KEY>>"
103
+ },
104
+ LMStudioConfigSchema.model_id: {
105
+ LMStudioSchemaEntry.type: "string",
106
+ LMStudioSchemaEntry.default: "gpt-3.5-turbo"
107
+ }
108
+ })
109
+ })
110
+ return model
111
+
112
+
113
+
114
+
115
+ def test_register(self):
116
+
117
+ model = TestLLMProxyService.get_model_config()
118
+
119
+ base_dir = Path.home() / ".lmstudio" / "models" / model[LMStudioModel.name]
120
+ base_dir.mkdir(parents=True, exist_ok=True)
121
+
122
+ model_json = base_dir / "model.json"
123
+ with model_json.open("w") as f:
124
+ json.dump(dict(model), f, indent=2)
125
+
126
+ # verify registration
127
+ self.assertTrue(model_json.exists())
128
+ data = json.loads(model_json.read_text())
129
+ self.assertEqual(data.get("name"), model[LMStudioModel.name])
130
+ self.assertEqual(data.get("entrypoint"), model[LMStudioModel.entrypoint])
131
+ # from proxy_service_model import ProxyServiceModel copy tp model.py
132
+ src = Path(__file__).parent / "proxy_service_model.py"
133
+ dst = base_dir / model[LMStudioModel.entrypoint]
134
+ shutil.copy(src, dst)
135
+ self.assertTrue(dst.exists())
136
+
137
+
138
+ def test_chat_completions_via_model(self):
139
+
140
+ # set up and start the proxy server
141
+ hostSpec = {
142
+ "protocol": "http",
143
+ "host": "127.0.0.1",
144
+ "port": "8003",
145
+ "url": "/do_upload",
146
+ "command_uri": "/cmd",
147
+ "upload_dir": "./tmp/upload",
148
+ "settings_dir": "./tmp/settings",
149
+ "tokens": "<<DSH_SERVER_TOKEN_LIST>>",
150
+ "open_ai_key": "<<OPENAI_API_KEY>>",
151
+ "published": []
152
+ }
153
+ hostSpec = LLMProxyService.tHostSpec(hostSpec)
154
+ shutil.rmtree(hostSpec["upload_dir"], ignore_errors=True)
155
+ shutil.rmtree(hostSpec["settings_dir"], ignore_errors=True)
156
+
157
+ LLMProxyService.run(__command=["stop"], **hostSpec)
158
+ time.sleep(0.2)
159
+ LLMProxyService.run(__command=["start"], **hostSpec)
160
+ time.sleep(0.5)
161
+
162
+ # build config for the ProxyServiceModel from our schema
163
+ model_cfg = TestLLMProxyService.get_model_config()
164
+ entries = dict(model_cfg.config_schema)
165
+ print(json.dumps(entries,indent=4))
166
+ #return
167
+ sc = LMStudioConfigSchema
168
+ conf = {
169
+ sc.endpoint: LMStudioSchemaEntry(entries[sc.endpoint]).default,
170
+ sc.api_key: LMStudioSchemaEntry(entries[sc.api_key]).default,
171
+ sc.model_id: LMStudioSchemaEntry(entries[sc.model_id]).default
172
+ }
173
+ assert conf["endpoint"]
174
+ assert conf["api_key"]
175
+ assert conf["model_id"]
176
+ proxy_model = ProxyServiceModel(**conf)
177
+
178
+ # non-streaming check
179
+ reply = proxy_model.generate([{"role": "user", "content": "Hello, world!"}])
180
+ self.assertIsInstance(reply, str)
181
+ self.assertTrue(reply.strip())
182
+
183
+ # streaming check
184
+ stream_iter = proxy_model.stream([{"role": "user", "content": "Hello again!"}])
185
+ collected = ""
186
+ for chunk in stream_iter:
187
+ try:
188
+ data = json.loads(chunk)
189
+ except json.JSONDecodeError:
190
+ continue
191
+ for choice in data.get("choices", []):
192
+ text = choice.get("delta", {}).get("content")
193
+ if text:
194
+ collected += text
195
+
196
+ self.assertTrue(len(collected.strip()) > 3, f"Stream did not yield enough content ->[{collected}]")
197
+ print(f"!!! {collected}")
198
+ with self.assertRaises(StopIteration):
199
+ next(stream_iter)
200
+
201
+ # shut down
202
+ LLMProxyService.run(__command=["stop"], **hostSpec)
203
+
204
+ def test_chat_completions_direct(self):
205
+ load_dotenv()
206
+
207
+ hostSpec = {
208
+ "protocol": "http",
209
+ "host": "127.0.0.1",
210
+ "port": "8003",
211
+ "url": "/do_upload",
212
+ "command_uri": "/cmd",
213
+ "upload_dir": "./tmp/upload",
214
+ "settings_dir": "./tmp/settings",
215
+ "tokens": "<<DSH_SERVER_TOKEN_LIST>>",
216
+ "open_ai_key": "<<OPENAI_API_KEY>>",
217
+ "published": []
218
+ }
219
+ hostSpec = LLMProxyService.tHostSpec(hostSpec)
220
+ # clean before start
221
+ shutil.rmtree(hostSpec["upload_dir"], ignore_errors=True)
222
+ shutil.rmtree(hostSpec["settings_dir"], ignore_errors=True)
223
+
224
+ # start server
225
+ LLMProxyService.run(__command=["stop"], **hostSpec)
226
+ time.sleep(1)
227
+ LLMProxyService.run(__command=["start"], **hostSpec)
228
+ time.sleep(0.2)
229
+
230
+ # hit chat completions proxy
231
+ resp = requests.post(
232
+ f"http://{hostSpec['host']}:{hostSpec['port']}/v1/chat/completions",
233
+ json={
234
+ "model": "gpt-3.5-turbo",
235
+ "messages": [{"role": "user", "content": "Hello, world!"}]
236
+ }
237
+ )
238
+ print(resp.json())
239
+ self.assertEqual(resp.status_code, 200)
240
+ data = resp.json()
241
+ self.assertIn("choices", data)
242
+
243
+ # hit models proxy
244
+ resp = requests.get(f"http://{hostSpec['host']}:{hostSpec['port']}/v1/models")
245
+ self.assertEqual(resp.status_code, 200)
246
+ models = resp.json()
247
+ self.assertIsInstance(models, dict)
248
+ self.assertIn("data", models)
249
+ self.assertIsInstance(models["data"], list)
250
+
251
+ # hit health endpoint
252
+ resp = requests.get(f"http://{hostSpec['host']}:{hostSpec['port']}/v1")
253
+ self.assertEqual(resp.status_code, 200)
254
+ self.assertEqual(resp.json(), {"status": "ok"})
255
+
256
+
257
+ if __name__ == "__main__":
258
+ #unittest.main()
259
+ from dotenv import load_dotenv
260
+ load_dotenv()
261
+ #unittest.main(defaultTest="TestLLMProxyService.test_chat_completions_via_model")
262
+ unittest.main(defaultTest="TestLLMProxyService.test_chat_completions_direct")
263
+
264
+ #unittest.main(defaultTest="TestLLMProxyService.test_register")
@@ -0,0 +1,50 @@
1
+ from lmstudio import BaseModel
2
+ import requests
3
+ from typing import List, Dict, Any
4
+
5
+ class ProxyServiceModel(BaseModel):
6
+ endpoint: str # e.g. "http://localhost:8003"
7
+ api_key: str
8
+ model_id: str
9
+
10
+ def generate(self,
11
+ messages: List[Dict[str, str]],
12
+ **kwargs: Any) -> str:
13
+ url = f"{self.endpoint}/v1/chat/completions"
14
+ assert self.endpoint != None
15
+ assert self.model_id != None
16
+ assert messages != None
17
+
18
+ headers = {
19
+ "Authorization": f"Bearer {self.api_key}",
20
+ "Content-Type": "application/json",
21
+ }
22
+ payload = {"model": self.model_id, "messages": messages, **kwargs}
23
+ print(f"URL OUT : {type(url)}----{url}")
24
+ r = requests.post(url, json=payload, headers=headers, timeout=60)
25
+ r.raise_for_status()
26
+ return r.json()["choices"][0]["message"]["content"]
27
+
28
+ def stream(self,
29
+ messages: List[Dict[str, str]],
30
+ **kwargs: Any):
31
+ url = f"{self.endpoint}/v1/chat/completions"
32
+ headers = {
33
+ "Authorization": f"Bearer {self.api_key}",
34
+ "Content-Type": "application/json",
35
+ }
36
+ payload = {"model": self.model_id,
37
+ "messages": messages,
38
+ "stream": True,
39
+ **kwargs}
40
+ r = requests.post(url, json=payload,
41
+ headers=headers,
42
+ stream=True,
43
+ timeout=None)
44
+ r.raise_for_status()
45
+ for line in r.iter_lines():
46
+ if line.startswith(b"data:"):
47
+ data = line.removeprefix(b"data:").strip()
48
+ if data == b"[DONE]":
49
+ break
50
+ yield data.decode()