PyPI - truss - Versions diffs - 0.10.0rc1__py3-none-any.whl → 0.60.0__py3-none-any.whl - Mend

truss 0.10.0rc1py3-none-any.whl → 0.60.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of truss might be problematic. Click here for more details.

Files changed (362) hide show

truss/templates/trtllm-audio/packages/whisper_trt/__init__.py ADDED Viewed

@@ -0,0 +1,215 @@
+import io
+import re
+from pathlib import Path
+from typing import Optional
+import tensorrt_llm
+import torch
+import torchaudio
+from torch import Tensor
+from whisper_trt.assets import download_assets
+from whisper_trt.batching import WhisperBatchProcessor
+from whisper_trt.custom_types import (
+    DEFAULT_MAX_NEW_TOKENS,
+    DEFAULT_NUM_BEAMS,
+    SUPPORTED_SAMPLE_RATE,
+    BatchWhisperItem,
+    Segment,
+    WhisperResult,
+)
+from whisper_trt.modeling import WhisperDecoding, WhisperEncoding
+from whisper_trt.tokenizer import REVERSED_LANGUAGES, get_tokenizer
+from whisper_trt.utils import log_mel_spectrogram
+SEGMENTS_PATTERN = re.compile(r"<\|([\d.]+)\|>([^<]+)<\|([\d.]+)\|>")
+LANG_CODE_PATTERN = re.compile(r"<\|([a-z]{2})\|>")
+class WhisperModel(object):
+    def __init__(
+        self,
+        engine_dir,
+        tokenizer_name="multilingual",
+        debug_mode=False,
+        assets_dir=None,
+        max_queue_time=0.01,  # 10 ms by default
+    ):
+        world_size = 1
+        runtime_rank = tensorrt_llm.mpi_rank()
+        runtime_mapping = tensorrt_llm.Mapping(world_size, runtime_rank)
+        torch.cuda.set_device(runtime_rank % runtime_mapping.gpus_per_node)
+        engine_dir = Path(engine_dir)
+        self.assets_dir = assets_dir
+        if self.assets_dir is None:
+            self.assets_dir = download_assets()
+        self.encoder = WhisperEncoding(engine_dir)
+        self.decoder = WhisperDecoding(
+            engine_dir, runtime_mapping, debug_mode=debug_mode
+        )
+        self.batch_size = self.decoder.decoder_config["max_batch_size"]
+        self.n_mels = self.encoder.n_mels
+        self.tokenizer = get_tokenizer(
+            name=tokenizer_name,
+            num_languages=self.encoder.num_languages,
+            tokenizer_dir=self.assets_dir,
+        )
+        self.eot_id = self.tokenizer.encode(
+            "<|endoftext|>", allowed_special=self.tokenizer.special_tokens_set
+        )[0]
+        self.batch_processor = WhisperBatchProcessor(
+            self, max_batch_size=self.batch_size, max_queue_time=max_queue_time
+        )
+    def preprocess_audio(self, binary_data) -> dict:
+        audio_stream = io.BytesIO(binary_data)
+        waveform, sample_rate = torchaudio.load(audio_stream)
+        # Resample audio to rate compatible with what the model was trained at
+        if sample_rate != SUPPORTED_SAMPLE_RATE:
+            waveform = torchaudio.transforms.Resample(
+                orig_freq=sample_rate, new_freq=SUPPORTED_SAMPLE_RATE
+            )(waveform)
+            sample_rate = SUPPORTED_SAMPLE_RATE
+        return waveform
+    def _get_text_prefix(
+        self,
+        language: str = "english",
+        prompt: Optional[str] = None,
+        timestamps: bool = False,
+        task: str = "transcribe",
+        prefix: Optional[str] = None,
+    ):
+        try:
+            language_code = REVERSED_LANGUAGES[language]
+        except KeyError:
+            language_code = language
+        text_prefix = f"<|startoftranscript|><|{language_code}|><|{task}|>"
+        if prompt is not None:
+            text_prefix = f"<|startofprev|> {prompt}" + text_prefix
+        if timestamps:
+            text_prefix += "<|0.00|>"
+        else:
+            text_prefix += "<|notimestamps|>"
+        if prefix is not None:
+            text_prefix += prefix
+        return text_prefix
+    def process_batch(
+        self,
+        mel_batch,
+        decoder_input_ids,
+        num_beams=DEFAULT_NUM_BEAMS,
+        max_new_tokens=DEFAULT_MAX_NEW_TOKENS,
+    ) -> Tensor:
+        encoder_output = self.encoder.get_audio_features(mel_batch)
+        output_ids = self.decoder.generate(
+            decoder_input_ids,
+            encoder_output,
+            self.eot_id,
+            max_new_tokens=max_new_tokens,
+            num_beams=num_beams,
+        )
+        return output_ids
+    def decode_output_ids(self, output_ids, text_prefix):
+        text = self.tokenizer.decode(output_ids[0]).strip()
+        text.replace(text_prefix, "")
+        return text
+    async def detect_audio_and_language(self, mel) -> Optional[str]:
+        """
+        Detects the audio and language from the given mel spectrogram.
+        Args:
+            mel: The mel spectrogram of the audio.
+        Returns:
+            The detected language code, or None if no speech is detected.
+        """
+        text_prefix = "<|startoftranscript|>"
+        prompt_ids = self.tokenizer.encode(
+            text_prefix, allowed_special=self.tokenizer.special_tokens_set
+        )
+        output_ids = await self.batch_processor.process(
+            item=BatchWhisperItem(mel=mel, prompt_ids=prompt_ids, max_new_tokens=1)
+        )
+        text = self.decode_output_ids(output_ids, text_prefix)
+        if text == "<|nospeech|>":
+            return None
+        return text.replace(text_prefix, "").replace("<|", "").replace("|>", "")
+    async def transcribe(
+        self,
+        waveform,
+        prompt: Optional[str] = None,
+        language: Optional[str] = None,
+        timestamps: bool = False,
+        num_beams: int = DEFAULT_NUM_BEAMS,
+        prefix: Optional[str] = None,
+        task: str = "transcribe",
+        max_new_tokens=128,
+    ):
+        mel = await log_mel_spectrogram(
+            waveform.numpy(),
+            self.n_mels,
+            device="cuda",
+            mel_filters_dir=self.assets_dir,
+        )
+        mel = mel.type(torch.float16)
+        if language is None:
+            language = await self.detect_audio_and_language(mel)
+            if language is None:
+                # No speech was detected. Can result empty segments
+                return WhisperResult(segments=[], language_code=None)
+        text_prefix = self._get_text_prefix(
+            language=language,
+            prompt=prompt,
+            timestamps=timestamps,
+            prefix=prefix,
+            task=task,
+        )
+        prompt_ids = self.tokenizer.encode(
+            text_prefix, allowed_special=self.tokenizer.special_tokens_set
+        )
+        output_ids: Tensor = await self.batch_processor.process(
+            item=BatchWhisperItem(
+                mel=mel, prompt_ids=prompt_ids, max_new_tokens=max_new_tokens
+            )
+        )
+        return self._postprocess_transcript(
+            self.decode_output_ids(output_ids, text_prefix)
+        )
+    def _postprocess_transcript(self, transcribed_text: str) -> WhisperResult:
+        """
+        Post-process the output of the transcription model.
+        """
+        language_code = LANG_CODE_PATTERN.findall(transcribed_text)[0]
+        # Find all matches in the input string
+        matches = SEGMENTS_PATTERN.findall(transcribed_text)
+        # Process matches to create the desired output format
+        segments = []
+        for match in matches:
+            start, text, end = match
+            segments.append(
+                Segment(
+                    **{"start": float(start), "end": float(end), "text": text.strip()}
+                )
+            )
+        return WhisperResult(segments=segments, language_code=language_code)

truss/templates/trtllm-audio/packages/whisper_trt/assets.py ADDED Viewed

@@ -0,0 +1,25 @@
+import os
+import urllib.request
+def _download_files(urls):
+    ASSETS_DIR = os.path.join(
+        os.path.expanduser("~"), ".cache", "whisper-trt", "assets"
+    )
+    os.makedirs(ASSETS_DIR, exist_ok=True)
+    for url in urls:
+        file_name = os.path.basename(url)
+        file_path = os.path.join(ASSETS_DIR, file_name)
+        if not os.path.exists(file_path):
+            urllib.request.urlretrieve(url, file_path)
+    return ASSETS_DIR
+def download_assets():
+    return _download_files(
+        [
+            "https://raw.githubusercontent.com/openai/whisper/main/whisper/assets/multilingual.tiktoken",
+            "https://raw.githubusercontent.com/openai/whisper/main/whisper/assets/mel_filters.npz",
+        ]
+    )

truss/templates/trtllm-audio/packages/whisper_trt/batching.py ADDED Viewed

@@ -0,0 +1,52 @@
+import logging
+from typing import TYPE_CHECKING, List
+if TYPE_CHECKING:
+    from whisper_trt import WhisperModel
+import torch
+from async_batcher.batcher import AsyncBatcher
+from torch import Tensor
+from whisper_trt.custom_types import DEFAULT_NUM_BEAMS, BatchWhisperItem
+FIXED_TEXT_PRFIX = "<|startoftranscript|><|en|><|transcribe|><|0.00|>"
+class WhisperBatchProcessor(AsyncBatcher[List[BatchWhisperItem], List[str]]):
+    def __init__(self, model, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.model: "WhisperModel" = model
+    def concat_and_pad_mels(self, tensors: List[Tensor]):
+        """Concatenates mel spectrograms to the maximum batch size using the last mel spectrogram as padding."""
+        while len(tensors) < self.max_batch_size:
+            tensors.append(tensors[-1])
+        res = torch.cat(tensors, dim=0).type(torch.float16)
+        return res
+    def concat_and_pad_prompts(self, prompts: List[List]) -> Tensor:
+        """Concatenates prompts to the maximum batch size using the last prompt as padding."""
+        while len(prompts) < self.max_batch_size:
+            prompts.append(prompts[-1])
+        return Tensor(prompts)
+    def process_batch(self, batch: List[BatchWhisperItem]) -> List[float]:
+        logging.warn(f"Processing batch of size {len(batch)}")
+        # Need to pad the batch up to the maximum batch size
+        decoder_input_ids = self.concat_and_pad_prompts(
+            [item.prompt_ids for item in batch]
+        )
+        mel_batch = self.concat_and_pad_mels([item.mel for item in batch])
+        max_new_tokens = max(item.max_new_tokens for item in batch)
+        batch_result = self.model.process_batch(
+            mel_batch,
+            decoder_input_ids,
+            max_new_tokens=max_new_tokens,
+            num_beams=DEFAULT_NUM_BEAMS,
+        )
+        # Splicing to len(batch) is needed to remove the padding we add
+        # during `concat_and_pad_mels` and `concat_and_pad_prompts`
+        return batch_result[: len(batch)]

truss/templates/trtllm-audio/packages/whisper_trt/custom_types.py ADDED Viewed

@@ -0,0 +1,26 @@
+from typing import List, NamedTuple
+from pydantic import BaseModel
+from torch import Tensor
+SUPPORTED_SAMPLE_RATE = 16_000
+DEFAULT_NUM_BEAMS = 1
+DEFAULT_MAX_NEW_TOKENS = 128
+class BatchWhisperItem(NamedTuple):
+    mel: Tensor
+    prompt_ids: Tensor
+    max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS
+    num_beams: int = DEFAULT_NUM_BEAMS
+class Segment(BaseModel):
+    start: float
+    end: float
+    text: str
+class WhisperResult(BaseModel):
+    segments: List[Segment]
+    language_code: str

truss/templates/trtllm-audio/packages/whisper_trt/modeling.py ADDED Viewed

@@ -0,0 +1,184 @@
+from collections import OrderedDict
+import tensorrt_llm
+import tensorrt_llm.logger as logger
+import torch
+from tensorrt_llm._utils import str_dtype_to_trt, trt_dtype_to_torch
+from tensorrt_llm.runtime import ModelConfig, SamplingConfig
+from tensorrt_llm.runtime.session import Session, TensorInfo
+from whisper_trt.custom_types import DEFAULT_NUM_BEAMS
+from whisper_trt.utils import read_config, remove_tensor_padding
+class WhisperEncoding:
+    def __init__(self, engine_dir):
+        self.session = self.get_session(engine_dir)
+    def get_session(self, engine_dir):
+        config = read_config("encoder", engine_dir)
+        self.encoder_config = config
+        self.dtype = config["dtype"]
+        self.n_mels = config["n_mels"]
+        self.num_languages = config["num_languages"]
+        serialize_path = engine_dir / "encoder" / "rank0.engine"
+        with open(serialize_path, "rb") as f:
+            session = Session.from_serialized_engine(f.read())
+        return session
+    def get_audio_features(self, mel):
+        input_lengths = torch.tensor(
+            [mel.shape[2] // 2 for _ in range(mel.shape[0])],
+            dtype=torch.int32,
+            device=mel.device,
+        )
+        if self.encoder_config["plugin_config"]["remove_input_padding"]:
+            mel_input_lengths = torch.full(
+                (mel.shape[0],), mel.shape[2], dtype=torch.int32, device="cuda"
+            )
+            # mel B,D,T -> B,T,D -> BxT, D
+            mel = mel.transpose(1, 2)
+            mel = remove_tensor_padding(mel, mel_input_lengths)
+        inputs = OrderedDict()
+        inputs["input_features"] = mel
+        inputs["input_lengths"] = input_lengths
+        output_list = [
+            TensorInfo("input_features", str_dtype_to_trt(self.dtype), mel.shape),
+            TensorInfo("input_lengths", str_dtype_to_trt("int32"), input_lengths.shape),
+        ]
+        output_info = (self.session).infer_shapes(output_list)
+        logger.debug(f"output info {output_info}")
+        outputs = {
+            t.name: torch.empty(
+                tuple(t.shape), dtype=trt_dtype_to_torch(t.dtype), device="cuda"
+            )
+            for t in output_info
+        }
+        stream = torch.cuda.current_stream()
+        ok = self.session.run(inputs=inputs, outputs=outputs, stream=stream.cuda_stream)
+        assert ok, "Engine execution failed"
+        stream.synchronize()
+        audio_features = outputs["encoder_output"]
+        return audio_features
+class WhisperDecoding:
+    def __init__(self, engine_dir, runtime_mapping, debug_mode=False):
+        self.decoder_config = read_config("decoder", engine_dir)
+        self.decoder_generation_session = self.get_session(
+            engine_dir, runtime_mapping, debug_mode
+        )
+    def get_session(self, engine_dir, runtime_mapping, debug_mode=False):
+        serialize_path = engine_dir / "decoder" / "rank0.engine"
+        with open(serialize_path, "rb") as f:
+            decoder_engine_buffer = f.read()
+        decoder_model_config = ModelConfig(
+            max_batch_size=self.decoder_config["max_batch_size"],
+            max_beam_width=self.decoder_config["max_beam_width"],
+            num_heads=self.decoder_config["num_attention_heads"],
+            num_kv_heads=self.decoder_config["num_attention_heads"],
+            hidden_size=self.decoder_config["hidden_size"],
+            vocab_size=self.decoder_config["vocab_size"],
+            cross_attention=True,
+            num_layers=self.decoder_config["num_hidden_layers"],
+            gpt_attention_plugin=self.decoder_config["plugin_config"][
+                "gpt_attention_plugin"
+            ],
+            remove_input_padding=self.decoder_config["plugin_config"][
+                "remove_input_padding"
+            ],
+            has_position_embedding=self.decoder_config["has_position_embedding"],
+            dtype=self.decoder_config["dtype"],
+            has_token_type_embedding=False,
+        )
+        decoder_generation_session = tensorrt_llm.runtime.GenerationSession(
+            decoder_model_config,
+            decoder_engine_buffer,
+            runtime_mapping,
+            debug_mode=debug_mode,
+        )
+        return decoder_generation_session
+    def generate(
+        self,
+        decoder_input_ids,
+        encoder_outputs,
+        eot_id,
+        max_new_tokens=40,
+        num_beams=DEFAULT_NUM_BEAMS,
+    ):
+        encoder_input_lengths = torch.tensor(
+            [encoder_outputs.shape[1] for x in range(encoder_outputs.shape[0])],
+            dtype=torch.int32,
+            device="cuda",
+        )
+        decoder_input_lengths = torch.tensor(
+            [decoder_input_ids.shape[-1] for _ in range(decoder_input_ids.shape[0])],
+            dtype=torch.int32,
+            device="cuda",
+        )
+        decoder_max_input_length = torch.max(decoder_input_lengths).item()
+        cross_attention_mask = (
+            torch.ones([encoder_outputs.shape[0], 1, encoder_outputs.shape[1]])
+            .int()
+            .cuda()
+        )
+        # generation config
+        sampling_config = SamplingConfig(
+            end_id=eot_id, pad_id=eot_id, num_beams=num_beams
+        )
+        self.decoder_generation_session.setup(
+            decoder_input_lengths.size(0),
+            decoder_max_input_length,
+            max_new_tokens,
+            beam_width=num_beams,
+            encoder_max_input_length=encoder_outputs.shape[1],
+        )
+        torch.cuda.synchronize()
+        decoder_input_ids = decoder_input_ids.type(torch.int32).cuda()
+        if self.decoder_config["plugin_config"]["remove_input_padding"]:
+            # 50256 is the index of <pad> for all whisper models' decoder
+            WHISPER_PAD_TOKEN_ID = 50256
+            decoder_input_ids = remove_tensor_padding(
+                decoder_input_ids, pad_value=WHISPER_PAD_TOKEN_ID
+            )
+            if encoder_outputs.dim() == 3:
+                encoder_output_lens = torch.full(
+                    (encoder_outputs.shape[0],),
+                    encoder_outputs.shape[1],
+                    dtype=torch.int32,
+                    device="cuda",
+                )
+                encoder_outputs = remove_tensor_padding(
+                    encoder_outputs, encoder_output_lens
+                )
+        output_ids = self.decoder_generation_session.decode(
+            decoder_input_ids,
+            decoder_input_lengths,
+            sampling_config,
+            encoder_output=encoder_outputs,
+            encoder_input_lengths=encoder_input_lengths,
+            cross_attention_mask=cross_attention_mask,
+        )
+        torch.cuda.synchronize()
+        # get the list of int from output_ids tensor
+        output_ids = output_ids.cpu().numpy().tolist()
+        return output_ids

truss/templates/trtllm-audio/packages/whisper_trt/tokenizer.py ADDED Viewed

@@ -0,0 +1,185 @@
+# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Modified from https://github.com/openai/whisper/blob/main/whisper/tokenizer.py
+import base64
+import os
+from typing import Optional
+import tiktoken
+LANGUAGES = {
+    "en": "english",
+    "zh": "chinese",
+    "de": "german",
+    "es": "spanish",
+    "ru": "russian",
+    "ko": "korean",
+    "fr": "french",
+    "ja": "japanese",
+    "pt": "portuguese",
+    "tr": "turkish",
+    "pl": "polish",
+    "ca": "catalan",
+    "nl": "dutch",
+    "ar": "arabic",
+    "sv": "swedish",
+    "it": "italian",
+    "id": "indonesian",
+    "hi": "hindi",
+    "fi": "finnish",
+    "vi": "vietnamese",
+    "he": "hebrew",
+    "uk": "ukrainian",
+    "el": "greek",
+    "ms": "malay",
+    "cs": "czech",
+    "ro": "romanian",
+    "da": "danish",
+    "hu": "hungarian",
+    "ta": "tamil",
+    "no": "norwegian",
+    "th": "thai",
+    "ur": "urdu",
+    "hr": "croatian",
+    "bg": "bulgarian",
+    "lt": "lithuanian",
+    "la": "latin",
+    "mi": "maori",
+    "ml": "malayalam",
+    "cy": "welsh",
+    "sk": "slovak",
+    "te": "telugu",
+    "fa": "persian",
+    "lv": "latvian",
+    "bn": "bengali",
+    "sr": "serbian",
+    "az": "azerbaijani",
+    "sl": "slovenian",
+    "kn": "kannada",
+    "et": "estonian",
+    "mk": "macedonian",
+    "br": "breton",
+    "eu": "basque",
+    "is": "icelandic",
+    "hy": "armenian",
+    "ne": "nepali",
+    "mn": "mongolian",
+    "bs": "bosnian",
+    "kk": "kazakh",
+    "sq": "albanian",
+    "sw": "swahili",
+    "gl": "galician",
+    "mr": "marathi",
+    "pa": "punjabi",
+    "si": "sinhala",
+    "km": "khmer",
+    "sn": "shona",
+    "yo": "yoruba",
+    "so": "somali",
+    "af": "afrikaans",
+    "oc": "occitan",
+    "ka": "georgian",
+    "be": "belarusian",
+    "tg": "tajik",
+    "sd": "sindhi",
+    "gu": "gujarati",
+    "am": "amharic",
+    "yi": "yiddish",
+    "lo": "lao",
+    "uz": "uzbek",
+    "fo": "faroese",
+    "ht": "haitian creole",
+    "ps": "pashto",
+    "tk": "turkmen",
+    "nn": "nynorsk",
+    "mt": "maltese",
+    "sa": "sanskrit",
+    "lb": "luxembourgish",
+    "my": "myanmar",
+    "bo": "tibetan",
+    "tl": "tagalog",
+    "mg": "malagasy",
+    "as": "assamese",
+    "tt": "tatar",
+    "haw": "hawaiian",
+    "ln": "lingala",
+    "ha": "hausa",
+    "ba": "bashkir",
+    "jw": "javanese",
+    "su": "sundanese",
+    "yue": "cantonese",
+}
+REVERSED_LANGUAGES = {v: k for k, v in LANGUAGES.items()}
+def get_tokenizer(
+    name: str = "multilingual",
+    num_languages: int = 99,
+    tokenizer_dir: Optional[str] = None,
+):
+    if tokenizer_dir is None:
+        vocab_path = os.path.join(os.path.dirname(__file__), f"assets/{name}.tiktoken")
+    else:
+        vocab_path = os.path.join(tokenizer_dir, f"{name}.tiktoken")
+    ranks = {
+        base64.b64decode(token): int(rank)
+        for token, rank in (line.split() for line in open(vocab_path) if line)
+    }
+    n_vocab = len(ranks)
+    special_tokens = {}
+    specials = [
+        "<|endoftext|>",
+        "<|startoftranscript|>",
+        *[f"<|{lang}|>" for lang in list(LANGUAGES.keys())[:num_languages]],
+        "<|translate|>",
+        "<|transcribe|>",
+        "<|startoflm|>",
+        "<|startofprev|>",
+        "<|nospeech|>",
+        "<|notimestamps|>",
+        *[f"<|{i * 0.02:.2f}|>" for i in range(1501)],
+    ]
+    for token in specials:
+        special_tokens[token] = n_vocab
+        n_vocab += 1
+    return tiktoken.Encoding(
+        name=os.path.basename(vocab_path),
+        explicit_n_vocab=n_vocab,
+        pat_str=r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""",
+        mergeable_ranks=ranks,
+        special_tokens=special_tokens,
+    )
+if __name__ == "__main__":
+    enc = get_tokenizer()
+    mytest_str = "<|startofprev|> Nvidia<|startoftranscript|><|en|><|transcribe|>"
+    encoding = enc.encode(mytest_str, allowed_special=enc.special_tokens_set)
+    mystr = enc.decode([50361, 45, 43021, 50258, 50259, 50359])
+    mystr2 = enc.decode([50361, 46284, 50258, 50259, 50359])
+    print(encoding, mystr, mystr2)
+    print(
+        enc.encode("<|startoftranscript|>", allowed_special=enc.special_tokens_set)[0]
+    )
+    my_zh_str = "好好学习"
+    encoding = enc.encode(my_zh_str, allowed_special=enc.special_tokens_set)
+    decoding = enc.decode(encoding)
+    print(type(decoding))
+    print(encoding, decoding)

truss 0.10.0rc1__py3-none-any.whl → 0.60.0__py3-none-any.whl

Potentially problematic release.

truss 0.10.0rc1py3-none-any.whl → 0.60.0py3-none-any.whl