tiny-lfm-builtin 0.0.1__cp38-abi3-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tiny_lfm.py ADDED
@@ -0,0 +1,185 @@
1
+ import os, sys
2
+ import tiny_lfm_builtin
3
+ from typing import List, Dict, Optional, Union, Generator
4
+ import urllib.request
5
+
6
+ # Configuration
7
+ MODEL_URL = "https://huggingface.co/cnmoro/LFM2-350M-Q4_0-GGUF/resolve/main/model-q4.gguf"
8
+ CACHE_DIR = os.path.join(os.path.expanduser("~"), ".cache", "tiny_lfm_builtin")
9
+ MODEL_FILENAME = "model-q4.gguf"
10
+
11
+ class TinyLFM:
12
+ def __init__(self):
13
+ """
14
+ Initialize the Liquid LFM model.
15
+
16
+ Args:
17
+ model_path (str, optional): Path to GGUF. If None, downloads/uses the cached model.
18
+ """
19
+ os.makedirs(CACHE_DIR, exist_ok=True)
20
+ model_path = os.path.join(CACHE_DIR, MODEL_FILENAME)
21
+
22
+ if not os.path.exists(model_path):
23
+ self._download_model(model_path)
24
+
25
+ if not os.path.exists(model_path):
26
+ raise FileNotFoundError(f"Model file not found at: {model_path}")
27
+
28
+ print(f"Loading LFM Engine from {model_path}...")
29
+ self._engine = tiny_lfm_builtin.LiquidLFM(model_path)
30
+ print("Engine loaded. KV Cache is active.")
31
+
32
+ def _download_model(self, dest_path: str):
33
+ print(f"Model not found locally.")
34
+ print(f"Downloading LFM2-350M (approx 200MB) to {dest_path}...")
35
+
36
+ def _progress(count, block_size, total_size):
37
+ percent = int(count * block_size * 100 / total_size)
38
+ sys.stdout.write(f"\rDownload: {percent}%")
39
+ sys.stdout.flush()
40
+
41
+ try:
42
+ urllib.request.urlretrieve(MODEL_URL, dest_path, reporthook=_progress)
43
+ print("\nDownload complete.")
44
+ except KeyboardInterrupt:
45
+ print("\nDownload cancelled.")
46
+ if os.path.exists(dest_path): os.remove(dest_path)
47
+ sys.exit(1)
48
+ except Exception as e:
49
+ print(f"\nError downloading model: {e}")
50
+ if os.path.exists(dest_path): os.remove(dest_path)
51
+ raise e
52
+
53
+ def chat(self,
54
+ messages: List[Dict[str, str]],
55
+ max_tokens: int = None,
56
+ stream: bool = True) -> Union[str, Generator[str, None, None]]:
57
+ """
58
+ Regular chat generation. Maintains history automatically via the input list.
59
+ KV Caching is handled automatically by the Rust engine based on prefix matching.
60
+
61
+ Args:
62
+ messages: List of dicts, e.g. [{"role": "user", "content": "..."}]
63
+ max_tokens: Maximum new tokens to generate.
64
+ stream: If True, returns a generator. If False, returns the full string.
65
+ """
66
+ streamer = self._engine.generate(messages, max_tokens) if max_tokens else self._engine.generate(messages)
67
+
68
+ if stream:
69
+ return self._stream_wrapper(streamer)
70
+ else:
71
+ return "".join(list(streamer))
72
+
73
+ def completion(self,
74
+ prompt: str,
75
+ system_prompt: Optional[str] = None,
76
+ assistant_start: Optional[str] = None,
77
+ stop: Optional[Union[str, List[str]]] = None,
78
+ max_tokens: int = None,
79
+ stream: bool = True) -> Union[str, Generator[str, None, None]]:
80
+ """
81
+ Raw completion with 'Prompt Hacking' capabilities.
82
+ Allows pre-filling the assistant's response to guide output (e.g., forcing JSON).
83
+
84
+ Args:
85
+ prompt: The user's input/query.
86
+ system_prompt: Optional system instruction.
87
+ assistant_start: Text to pre-fill the assistant's response with.
88
+ The model will continue generating from this point.
89
+ stop: A string or list of strings that should stop generation.
90
+ max_tokens: Max new tokens.
91
+ stream: Yield tokens as they arrive.
92
+ """
93
+ # 1. Construct the raw prompt manually to allow template hacking
94
+ full_prompt = ""
95
+
96
+ if system_prompt:
97
+ full_prompt += f"<|im_start|>system\n{system_prompt}<|im_end|>\n"
98
+
99
+ full_prompt += f"<|im_start|>user\n{prompt}<|im_end|>\n"
100
+ full_prompt += "<|im_start|>assistant\n"
101
+
102
+ if assistant_start:
103
+ # We append the pre-fill without an EOS token, so the model continues it
104
+ full_prompt += assistant_start
105
+
106
+ # 2. Call Rust Engine
107
+ # The engine will automatically check if 'full_prompt' shares a prefix
108
+ # with the previous generation and reuse the KV cache.
109
+ streamer = self._engine.completion(full_prompt, max_tokens) if max_tokens else self._engine.completion(full_prompt)
110
+
111
+ # 3. Handle Python-side stop tokens
112
+ stop_sequences = []
113
+ if stop:
114
+ stop_sequences = [stop] if isinstance(stop, str) else stop
115
+
116
+ generator = self._stop_aware_iterator(streamer, stop_sequences)
117
+
118
+ if stream:
119
+ return generator
120
+ else:
121
+ return "".join(list(generator))
122
+
123
+ def save_cache(self, session_name: str):
124
+ """Saves the current KV cache to disk."""
125
+ self._engine.save_session(session_name)
126
+
127
+ def load_cache(self, session_name: str):
128
+ """Loads a KV cache from disk."""
129
+ self._engine.load_session(session_name)
130
+
131
+ def _stream_wrapper(self, rust_streamer) -> Generator[str, None, None]:
132
+ """Simple wrapper to yield from Rust streamer."""
133
+ for token in rust_streamer:
134
+ yield token
135
+
136
+ def _stop_aware_iterator(self, rust_streamer, stop_sequences: List[str]) -> Generator[str, None, None]:
137
+ """
138
+ Wraps the Rust streamer to implement custom stop sequences in Python.
139
+ Note: The Rust engine handles standard EOS (<|im_end|>) internally.
140
+ """
141
+ generated_text = ""
142
+
143
+ for token in rust_streamer:
144
+ yield token
145
+ generated_text += token
146
+
147
+ # Check for stop sequences
148
+ if stop_sequences:
149
+ for seq in stop_sequences:
150
+ if seq in generated_text:
151
+ return # Stop generation immediately
152
+
153
+ if __name__ == "__main__":
154
+ try:
155
+ lfm = TinyLFM()
156
+
157
+ print("\n--- 1. Regular Chat Streaming ---")
158
+ history = [{"role": "user", "content": "What is 2+2?"}]
159
+ for token in lfm.chat(history):
160
+ print(token, end="", flush=True)
161
+ print("\n")
162
+
163
+ print("--- 2. Prompt Hacking (JSON Mode) ---")
164
+ # Scenario: We want to extract keywords as a JSON list.
165
+
166
+ sys_p = "You are a data extraction tool. Output only JSON."
167
+ user_p = "Extract keywords from: 'Liquid AI released LFM2, a powerful edge model.'"
168
+ pre_fill = "Sure, here are the keywords in JSON format:\n```json\n[\n"
169
+
170
+ stream = lfm.completion(
171
+ prompt=user_p,
172
+ system_prompt=sys_p,
173
+ assistant_start=pre_fill,
174
+ stop="]", # Stop when it tries to close the block
175
+ stream=True
176
+ )
177
+
178
+ for token in stream:
179
+ print(token, end="", flush=True)
180
+ print("\n")
181
+
182
+ except FileNotFoundError as e:
183
+ print(e)
184
+ except Exception as e:
185
+ print(f"An error occurred: {e}")
@@ -0,0 +1,5 @@
1
+ from .tiny_lfm_builtin import *
2
+
3
+ __doc__ = tiny_lfm_builtin.__doc__
4
+ if hasattr(tiny_lfm_builtin, "__all__"):
5
+ __all__ = tiny_lfm_builtin.__all__
Binary file
@@ -0,0 +1,15 @@
1
+ Metadata-Version: 2.4
2
+ Name: tiny_lfm_builtin
3
+ Version: 0.0.1
4
+ Classifier: Programming Language :: Rust
5
+ Classifier: Programming Language :: Python :: Implementation :: CPython
6
+ Classifier: Programming Language :: Python :: Implementation :: PyPy
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Operating System :: POSIX :: Linux
10
+ Classifier: Operating System :: Microsoft :: Windows
11
+ Classifier: Operating System :: MacOS
12
+ Summary: LiquidAI-LFM2-350M embedded in a python package (200mb); inference with rust; completely encapsulated
13
+ Author-email: Carlo Moro <cnmoro@gmail.com>
14
+ Requires-Python: >=3.8
15
+ Project-URL: Repository, https://github.com/cnmoro/tiny-lfm-builtin
@@ -0,0 +1,6 @@
1
+ tiny_lfm.py,sha256=O9a2Ov-Lo6jYez3WgUAITylnFVH7rad8amTsBTY9Gxs,7374
2
+ tiny_lfm_builtin\__init__.py,sha256=AC4g_i8_eQZutzQZXoyUQqtAaiC3zw1Z5SEuGjbF3kY,147
3
+ tiny_lfm_builtin\tiny_lfm_builtin.pyd,sha256=7gzLftdDhpsbofu6NZrhCLvA0FSAsABfc4ff617IlB0,10788352
4
+ tiny_lfm_builtin-0.0.1.dist-info\METADATA,sha256=O2ul0RlrjaJD-tFMsyQiRr6kLLDE5Yble2xt754U7oQ,725
5
+ tiny_lfm_builtin-0.0.1.dist-info\WHEEL,sha256=gPqN4EsdiAyGvmfrYy_ONrF276O8o0hPitI2CKZrEFA,95
6
+ tiny_lfm_builtin-0.0.1.dist-info\RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: maturin (1.11.5)
3
+ Root-Is-Purelib: false
4
+ Tag: cp38-abi3-win_amd64