xinference 1.9.1__py3-none-any.whl → 1.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +400 -3
- xinference/client/restful/async_restful_client.py +20 -3
- xinference/client/restful/restful_client.py +20 -3
- xinference/constants.py +2 -0
- xinference/core/supervisor.py +111 -49
- xinference/core/worker.py +10 -0
- xinference/deploy/cmdline.py +15 -0
- xinference/model/audio/core.py +26 -6
- xinference/model/audio/indextts2.py +166 -0
- xinference/model/audio/kokoro.py +1 -1
- xinference/model/audio/kokoro_zh.py +124 -0
- xinference/model/audio/model_spec.json +58 -1
- xinference/model/embedding/sentence_transformers/core.py +4 -4
- xinference/model/embedding/vllm/core.py +7 -1
- xinference/model/image/model_spec.json +71 -3
- xinference/model/image/stable_diffusion/core.py +13 -4
- xinference/model/llm/__init__.py +4 -0
- xinference/model/llm/core.py +10 -0
- xinference/model/llm/llama_cpp/core.py +1 -0
- xinference/model/llm/llm_family.json +503 -21
- xinference/model/llm/llm_family.py +1 -0
- xinference/model/llm/mlx/core.py +52 -33
- xinference/model/llm/sglang/core.py +32 -55
- xinference/model/llm/tool_parsers/__init__.py +58 -0
- xinference/model/llm/tool_parsers/abstract_tool_parser.py +33 -0
- xinference/model/llm/tool_parsers/deepseek_r1_tool_parser.py +190 -0
- xinference/model/llm/tool_parsers/deepseek_v3_tool_parser.py +145 -0
- xinference/model/llm/tool_parsers/glm4_tool_parser.py +123 -0
- xinference/model/llm/tool_parsers/llama3_tool_parser.py +77 -0
- xinference/model/llm/tool_parsers/qwen_tool_parser.py +320 -0
- xinference/model/llm/transformers/core.py +1 -1
- xinference/model/llm/transformers/multimodal/qwen2_vl.py +34 -8
- xinference/model/llm/utils.py +138 -53
- xinference/model/llm/vllm/core.py +95 -78
- xinference/thirdparty/audiotools/__init__.py +10 -0
- xinference/thirdparty/audiotools/core/__init__.py +4 -0
- xinference/thirdparty/audiotools/core/audio_signal.py +1682 -0
- xinference/thirdparty/audiotools/core/display.py +194 -0
- xinference/thirdparty/audiotools/core/dsp.py +390 -0
- xinference/thirdparty/audiotools/core/effects.py +647 -0
- xinference/thirdparty/audiotools/core/ffmpeg.py +211 -0
- xinference/thirdparty/audiotools/core/loudness.py +320 -0
- xinference/thirdparty/audiotools/core/playback.py +252 -0
- xinference/thirdparty/audiotools/core/templates/__init__.py +0 -0
- xinference/thirdparty/audiotools/core/templates/headers.html +322 -0
- xinference/thirdparty/audiotools/core/templates/pandoc.css +407 -0
- xinference/thirdparty/audiotools/core/templates/widget.html +52 -0
- xinference/thirdparty/audiotools/core/util.py +671 -0
- xinference/thirdparty/audiotools/core/whisper.py +97 -0
- xinference/thirdparty/audiotools/data/__init__.py +3 -0
- xinference/thirdparty/audiotools/data/datasets.py +517 -0
- xinference/thirdparty/audiotools/data/preprocess.py +81 -0
- xinference/thirdparty/audiotools/data/transforms.py +1592 -0
- xinference/thirdparty/audiotools/metrics/__init__.py +6 -0
- xinference/thirdparty/audiotools/metrics/distance.py +131 -0
- xinference/thirdparty/audiotools/metrics/quality.py +159 -0
- xinference/thirdparty/audiotools/metrics/spectral.py +247 -0
- xinference/thirdparty/audiotools/ml/__init__.py +5 -0
- xinference/thirdparty/audiotools/ml/accelerator.py +184 -0
- xinference/thirdparty/audiotools/ml/decorators.py +440 -0
- xinference/thirdparty/audiotools/ml/experiment.py +90 -0
- xinference/thirdparty/audiotools/ml/layers/__init__.py +2 -0
- xinference/thirdparty/audiotools/ml/layers/base.py +328 -0
- xinference/thirdparty/audiotools/ml/layers/spectral_gate.py +127 -0
- xinference/thirdparty/audiotools/post.py +140 -0
- xinference/thirdparty/audiotools/preference.py +600 -0
- xinference/thirdparty/indextts/BigVGAN/ECAPA_TDNN.py +656 -0
- xinference/thirdparty/indextts/BigVGAN/__init__.py +0 -0
- xinference/thirdparty/indextts/BigVGAN/activations.py +122 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/__init__.py +0 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/.gitignore +1 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/__init__.py +0 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/activation1d.py +76 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation_cuda.cu +256 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/compat.h +29 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/load.py +121 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/type_shim.h +92 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/__init__.py +6 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/act.py +31 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/filter.py +102 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/resample.py +58 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_torch/__init__.py +6 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_torch/act.py +29 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_torch/filter.py +96 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_torch/resample.py +49 -0
- xinference/thirdparty/indextts/BigVGAN/bigvgan.py +534 -0
- xinference/thirdparty/indextts/BigVGAN/models.py +451 -0
- xinference/thirdparty/indextts/BigVGAN/nnet/CNN.py +546 -0
- xinference/thirdparty/indextts/BigVGAN/nnet/__init__.py +0 -0
- xinference/thirdparty/indextts/BigVGAN/nnet/linear.py +89 -0
- xinference/thirdparty/indextts/BigVGAN/nnet/normalization.py +670 -0
- xinference/thirdparty/indextts/BigVGAN/utils.py +101 -0
- xinference/thirdparty/indextts/__init__.py +0 -0
- xinference/thirdparty/indextts/cli.py +65 -0
- xinference/thirdparty/indextts/gpt/__init__.py +0 -0
- xinference/thirdparty/indextts/gpt/conformer/__init__.py +0 -0
- xinference/thirdparty/indextts/gpt/conformer/attention.py +312 -0
- xinference/thirdparty/indextts/gpt/conformer/embedding.py +163 -0
- xinference/thirdparty/indextts/gpt/conformer/subsampling.py +348 -0
- xinference/thirdparty/indextts/gpt/conformer_encoder.py +520 -0
- xinference/thirdparty/indextts/gpt/model.py +713 -0
- xinference/thirdparty/indextts/gpt/model_v2.py +747 -0
- xinference/thirdparty/indextts/gpt/perceiver.py +317 -0
- xinference/thirdparty/indextts/gpt/transformers_beam_search.py +1013 -0
- xinference/thirdparty/indextts/gpt/transformers_generation_utils.py +4747 -0
- xinference/thirdparty/indextts/gpt/transformers_gpt2.py +1878 -0
- xinference/thirdparty/indextts/gpt/transformers_modeling_utils.py +5525 -0
- xinference/thirdparty/indextts/infer.py +690 -0
- xinference/thirdparty/indextts/infer_v2.py +739 -0
- xinference/thirdparty/indextts/s2mel/dac/__init__.py +16 -0
- xinference/thirdparty/indextts/s2mel/dac/__main__.py +36 -0
- xinference/thirdparty/indextts/s2mel/dac/model/__init__.py +4 -0
- xinference/thirdparty/indextts/s2mel/dac/model/base.py +294 -0
- xinference/thirdparty/indextts/s2mel/dac/model/dac.py +400 -0
- xinference/thirdparty/indextts/s2mel/dac/model/discriminator.py +228 -0
- xinference/thirdparty/indextts/s2mel/dac/model/encodec.py +320 -0
- xinference/thirdparty/indextts/s2mel/dac/nn/__init__.py +3 -0
- xinference/thirdparty/indextts/s2mel/dac/nn/layers.py +33 -0
- xinference/thirdparty/indextts/s2mel/dac/nn/loss.py +368 -0
- xinference/thirdparty/indextts/s2mel/dac/nn/quantize.py +339 -0
- xinference/thirdparty/indextts/s2mel/dac/utils/__init__.py +123 -0
- xinference/thirdparty/indextts/s2mel/dac/utils/decode.py +95 -0
- xinference/thirdparty/indextts/s2mel/dac/utils/encode.py +94 -0
- xinference/thirdparty/indextts/s2mel/hf_utils.py +12 -0
- xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/__init__.py +5 -0
- xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/act.py +29 -0
- xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/filter.py +96 -0
- xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/resample.py +57 -0
- xinference/thirdparty/indextts/s2mel/modules/audio.py +82 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/activations.py +120 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/__init__.py +0 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/activation1d.py +77 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation_cuda.cu +246 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/compat.h +29 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/load.py +86 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/type_shim.h +92 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/__init__.py +6 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/act.py +30 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/filter.py +101 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/resample.py +58 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/bigvgan.py +492 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/config.json +63 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/env.py +18 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/meldataset.py +354 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/utils.py +99 -0
- xinference/thirdparty/indextts/s2mel/modules/campplus/DTDNN.py +115 -0
- xinference/thirdparty/indextts/s2mel/modules/campplus/classifier.py +70 -0
- xinference/thirdparty/indextts/s2mel/modules/campplus/layers.py +253 -0
- xinference/thirdparty/indextts/s2mel/modules/commons.py +632 -0
- xinference/thirdparty/indextts/s2mel/modules/diffusion_transformer.py +257 -0
- xinference/thirdparty/indextts/s2mel/modules/encodec.py +292 -0
- xinference/thirdparty/indextts/s2mel/modules/flow_matching.py +171 -0
- xinference/thirdparty/indextts/s2mel/modules/gpt_fast/generate.py +436 -0
- xinference/thirdparty/indextts/s2mel/modules/gpt_fast/model.py +360 -0
- xinference/thirdparty/indextts/s2mel/modules/gpt_fast/quantize.py +622 -0
- xinference/thirdparty/indextts/s2mel/modules/hifigan/f0_predictor.py +55 -0
- xinference/thirdparty/indextts/s2mel/modules/hifigan/generator.py +454 -0
- xinference/thirdparty/indextts/s2mel/modules/layers.py +354 -0
- xinference/thirdparty/indextts/s2mel/modules/length_regulator.py +141 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/__init__.py +0 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/api.py +186 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/attentions.py +465 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/checkpoints_v2/converter/config.json +57 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/commons.py +160 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/mel_processing.py +183 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/models.py +499 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/modules.py +598 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/openvoice_app.py +275 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/se_extractor.py +153 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/transforms.py +209 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/utils.py +194 -0
- xinference/thirdparty/indextts/s2mel/modules/quantize.py +229 -0
- xinference/thirdparty/indextts/s2mel/modules/rmvpe.py +631 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/__init__.py +4 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/heads.py +164 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/helpers.py +71 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/loss.py +114 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/models.py +118 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/modules.py +213 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/pretrained.py +51 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/spectral_ops.py +192 -0
- xinference/thirdparty/indextts/s2mel/modules/wavenet.py +174 -0
- xinference/thirdparty/indextts/s2mel/optimizers.py +96 -0
- xinference/thirdparty/indextts/s2mel/wav2vecbert_extract.py +148 -0
- xinference/thirdparty/indextts/utils/__init__.py +0 -0
- xinference/thirdparty/indextts/utils/arch_util.py +120 -0
- xinference/thirdparty/indextts/utils/checkpoint.py +34 -0
- xinference/thirdparty/indextts/utils/common.py +121 -0
- xinference/thirdparty/indextts/utils/feature_extractors.py +50 -0
- xinference/thirdparty/indextts/utils/front.py +536 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/__init__.py +0 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/codec.py +427 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/__init__.py +11 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/factorized_vector_quantize.py +150 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/lookup_free_quantize.py +77 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/residual_vq.py +177 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/vector_quantize.py +401 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/vocos.py +881 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_dataset.py +264 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_inference.py +515 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_sampler.py +126 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_trainer.py +166 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/__init__.py +0 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/__init__.py +5 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/act.py +29 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/filter.py +96 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/resample.py +57 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_dataset.py +98 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_inference.py +137 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_trainer.py +776 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/__init__.py +1 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/bst.t7 +0 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/model.py +219 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/attentions.py +437 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/commons.py +331 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/gradient_reversal.py +35 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/layers.py +460 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/quantize.py +741 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/style_encoder.py +110 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/wavenet.py +224 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/optimizer.py +104 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/repcodec_model.py +210 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/vocos.py +850 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/melvqgan/melspec.py +108 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/README.md +216 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/__init__.py +6 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/__init__.py +5 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/act.py +29 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/filter.py +96 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/resample.py +57 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/facodec.py +1222 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/gradient_reversal.py +35 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/melspec.py +102 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/__init__.py +7 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/fvq.py +116 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/rvq.py +87 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/transformer.py +234 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/model.py +184 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/__init__.py +27 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/conv.py +346 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/lstm.py +46 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/norm.py +37 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/__init__.py +14 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/ac.py +317 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/core_vq.py +388 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/distrib.py +135 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/vq.py +125 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/seanet.py +414 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/vevo/vevo_repcodec.py +592 -0
- xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/ckpt/wav2vec2bert_stats.pt +0 -0
- xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/llama_nar.py +650 -0
- xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/maskgct_s2a.py +503 -0
- xinference/thirdparty/indextts/utils/maskgct_utils.py +259 -0
- xinference/thirdparty/indextts/utils/text_utils.py +41 -0
- xinference/thirdparty/indextts/utils/typical_sampling.py +30 -0
- xinference/thirdparty/indextts/utils/utils.py +93 -0
- xinference/thirdparty/indextts/utils/webui_utils.py +42 -0
- xinference/thirdparty/indextts/utils/xtransformers.py +1247 -0
- xinference/thirdparty/indextts/vqvae/__init__.py +0 -0
- xinference/thirdparty/indextts/vqvae/xtts_dvae.py +395 -0
- xinference/types.py +105 -2
- xinference/ui/gradio/media_interface.py +66 -8
- xinference/ui/web/ui/build/asset-manifest.json +6 -6
- xinference/ui/web/ui/build/index.html +1 -1
- xinference/ui/web/ui/build/static/css/main.5ea97072.css +2 -0
- xinference/ui/web/ui/build/static/css/main.5ea97072.css.map +1 -0
- xinference/ui/web/ui/build/static/js/main.d192c4f3.js +3 -0
- xinference/ui/web/ui/build/static/js/{main.1086c759.js.LICENSE.txt → main.d192c4f3.js.LICENSE.txt} +0 -7
- xinference/ui/web/ui/build/static/js/main.d192c4f3.js.map +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/089c38df5f52348d212ed868dda5c518a42e0c2762caed4175487c0405830c35.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/2b6e3a5b6eb2c5c5f2d007e68cd46c372721cd52bf63508adcdb21ecf79241d8.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/2d887825fd07a56f872eda4420da25fba0b5b62a23bdcc6c6da1a5281887f618.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/4001f9c3e64e73a4f2158826650c174a59d5e3f89ddecddf17cbb6bb688cc4ca.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/4a7018a69e6b7f90fc313248c2aa86f2a8f1eb1db120df586047a8023549b44b.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/64b12aaa1c1d1bf53820ada8a63769067c0ccc5aab46b32348eb1917ae7f2a11.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/7275b67c78ec76ce38a686bb8a576d8c9cecf54e1573614c84859d538efb9be5.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/a68b6ee3b31eadc051fb95ce8f8ccb9c2e8b52c60f290dbab545a1917e065282.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/ae8771cc37693feb160fa8727231312a0c54ef2d1d1ca893be568cd70016ca7e.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/bb4e8722d2d41d87f1fce3661bc8937bffe9448e231fc5f0462630849e851592.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/be6aada1ee4adc2bbf65dbe56d17db32bb3b5478be05d6b527805a8ba6cfb2b9.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/de91c352653c233cf0cb6674e6e04049a44fd0e1156560de65d5c4620521391e.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/e85f7002fc325c83b9c9cd8a1619e5b3ebc701d30e811afc284b88e6ae710cb5.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/e8b603c78944bf3d213639078bfe155ff5c0dfa4048a93cbb967cad6a4eb4ff3.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/f05535160a508b2a312de546a6de234776c613db276479ea4253c0b1bdeeb7d6.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/f09ba9e11106bd59a0de10cc85c55084097729dcab575f43dfcf07375961ed87.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/f995a2425dfb0822fd07127f66ffe9b026883bc156b402eb8bd0b83d52460a93.json +1 -0
- xinference/ui/web/ui/node_modules/.package-lock.json +0 -33
- xinference/ui/web/ui/package-lock.json +0 -34
- xinference/ui/web/ui/package.json +0 -1
- xinference/ui/web/ui/src/locales/en.json +9 -3
- xinference/ui/web/ui/src/locales/ja.json +9 -3
- xinference/ui/web/ui/src/locales/ko.json +9 -3
- xinference/ui/web/ui/src/locales/zh.json +9 -3
- {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/METADATA +24 -4
- {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/RECORD +302 -76
- xinference/ui/web/ui/build/static/css/main.013f296b.css +0 -2
- xinference/ui/web/ui/build/static/css/main.013f296b.css.map +0 -1
- xinference/ui/web/ui/build/static/js/main.1086c759.js +0 -3
- xinference/ui/web/ui/build/static/js/main.1086c759.js.map +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/0b0f77000cc1b482ca091cfbcae511dfe02f08916971645fad21d0b1234d04a2.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/1c5f8ff423a7c9202bea60b15680f04b1e9964b445b0da3f86c6ff70cf24e797.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/44ce7993e344980e3ed4f13e8f69237d4a5dfc60e37ca6b54f51f8ee1357bd67.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/4aec1cc414ac3ebb3481d3d915e4db597d9127de813291346eacb8554ab170d4.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/644cfec52f3c57a6e222ce60f112237a1efefe9835efd9aad857a685f53d8eed.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/663436f72af53fe0d72394f56d003fa4e0bba489e5bb4e483fd34b00f84637f7.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/69db82ca9bfe27fe417cc6cf2b1716b09be9c6f0cd198530f12bfc60e801bbcf.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/85087e27618d740c236bf159f30e0219db443ab55f0997388eed5fde6f9e90cc.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/88b07838348864aa86c672be3bbca1e9f58f6f3a2881b32070ec27f4e7b449d1.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/8b8cd408ccfbe115acef27ccfa5b233da8597131a2a5712add13e1e4d5d4504b.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/a23824fe746b9c6ca5eee9159b5764d1ff1653c1d856288c0f75c742bbb0023b.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/a3eb18af328280b139693c9092dff2a0ef8c9a967e6c8956ceee0996611f1984.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/bc1aacc65a102db325ca61bcd2f681e1ae22c36a1f1d98a6ff5e4ad49dc7544f.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/c682fd521747c19dae437d83ce3235a306ce6b68e24a117bc57c27ebb8d1f1ca.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/d5c224be7081f18cba1678b7874a9782eba895df004874ff8f243f94ba79942a.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/f7f18bfb539b036a6a342176dd98a85df5057a884a8da978d679f2a0264883d0.json +0 -1
- xinference/ui/web/ui/node_modules/clipboard/.babelrc.json +0 -11
- xinference/ui/web/ui/node_modules/clipboard/.eslintrc.json +0 -24
- xinference/ui/web/ui/node_modules/clipboard/.prettierrc.json +0 -9
- xinference/ui/web/ui/node_modules/clipboard/bower.json +0 -18
- xinference/ui/web/ui/node_modules/clipboard/composer.json +0 -25
- xinference/ui/web/ui/node_modules/clipboard/package.json +0 -63
- xinference/ui/web/ui/node_modules/delegate/package.json +0 -31
- xinference/ui/web/ui/node_modules/good-listener/bower.json +0 -11
- xinference/ui/web/ui/node_modules/good-listener/package.json +0 -35
- xinference/ui/web/ui/node_modules/select/bower.json +0 -13
- xinference/ui/web/ui/node_modules/select/package.json +0 -29
- xinference/ui/web/ui/node_modules/tiny-emitter/package.json +0 -53
- {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/WHEEL +0 -0
- {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/entry_points.txt +0 -0
- {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/licenses/LICENSE +0 -0
- {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/top_level.txt +0 -0
xinference/_version.py
CHANGED
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2025-
|
|
11
|
+
"date": "2025-09-30T23:23:16+0800",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "1.
|
|
14
|
+
"full-revisionid": "98a3c8ad4a6debd97ef29cc05aad3514f4ba488c",
|
|
15
|
+
"version": "1.10.1"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
xinference/api/restful_api.py
CHANGED
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
import asyncio
|
|
16
16
|
import inspect
|
|
17
|
+
import ipaddress
|
|
17
18
|
import json
|
|
18
19
|
import logging
|
|
19
20
|
import multiprocessing
|
|
@@ -21,6 +22,7 @@ import os
|
|
|
21
22
|
import pprint
|
|
22
23
|
import sys
|
|
23
24
|
import time
|
|
25
|
+
import uuid
|
|
24
26
|
import warnings
|
|
25
27
|
from typing import Any, Dict, List, Optional, Union
|
|
26
28
|
|
|
@@ -53,6 +55,7 @@ from xoscar.utils import get_next_port
|
|
|
53
55
|
from .._compat import BaseModel, Field
|
|
54
56
|
from .._version import get_versions
|
|
55
57
|
from ..constants import (
|
|
58
|
+
XINFERENCE_ALLOWED_IPS,
|
|
56
59
|
XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION,
|
|
57
60
|
XINFERENCE_DEFAULT_ENDPOINT_PORT,
|
|
58
61
|
XINFERENCE_DISABLE_METRICS,
|
|
@@ -61,11 +64,16 @@ from ..constants import (
|
|
|
61
64
|
from ..core.event import Event, EventCollectorActor, EventType
|
|
62
65
|
from ..core.supervisor import SupervisorActor
|
|
63
66
|
from ..core.utils import CancelMixin, json_dumps
|
|
67
|
+
|
|
68
|
+
# Import Anthropic-related types and availability flag
|
|
64
69
|
from ..types import (
|
|
70
|
+
ANTHROPIC_AVAILABLE,
|
|
71
|
+
AnthropicMessage,
|
|
65
72
|
ChatCompletion,
|
|
66
73
|
Completion,
|
|
67
74
|
CreateChatCompletion,
|
|
68
75
|
CreateCompletion,
|
|
76
|
+
CreateMessage,
|
|
69
77
|
ImageList,
|
|
70
78
|
PeftModelConfig,
|
|
71
79
|
SDAPIResult,
|
|
@@ -213,6 +221,9 @@ class BuildGradioMediaInterfaceRequest(BaseModel):
|
|
|
213
221
|
|
|
214
222
|
|
|
215
223
|
class RESTfulAPI(CancelMixin):
|
|
224
|
+
# Add new class attributes
|
|
225
|
+
_allowed_ip_list: Optional[List[ipaddress.IPv4Network]] = None
|
|
226
|
+
|
|
216
227
|
def __init__(
|
|
217
228
|
self,
|
|
218
229
|
supervisor_address: str,
|
|
@@ -229,6 +240,45 @@ class RESTfulAPI(CancelMixin):
|
|
|
229
240
|
self._auth_service = AuthService(auth_config_file)
|
|
230
241
|
self._router = APIRouter()
|
|
231
242
|
self._app = FastAPI()
|
|
243
|
+
# Initialize allowed IP list once
|
|
244
|
+
self._init_allowed_ip_list()
|
|
245
|
+
|
|
246
|
+
def _init_allowed_ip_list(self):
|
|
247
|
+
"""Initialize the allowed IP list from environment variable."""
|
|
248
|
+
if RESTfulAPI._allowed_ip_list is None:
|
|
249
|
+
# ie: export XINFERENCE_ALLOWED_IPS=192.168.1.0/24
|
|
250
|
+
allowed_ips = XINFERENCE_ALLOWED_IPS
|
|
251
|
+
if allowed_ips:
|
|
252
|
+
RESTfulAPI._allowed_ip_list = []
|
|
253
|
+
for ip in allowed_ips.split(","):
|
|
254
|
+
ip = ip.strip()
|
|
255
|
+
try:
|
|
256
|
+
# Try parsing as network/CIDR
|
|
257
|
+
if "/" in ip:
|
|
258
|
+
RESTfulAPI._allowed_ip_list.append(ipaddress.ip_network(ip))
|
|
259
|
+
else:
|
|
260
|
+
# Parse as single IP
|
|
261
|
+
RESTfulAPI._allowed_ip_list.append(
|
|
262
|
+
ipaddress.ip_network(f"{ip}/32")
|
|
263
|
+
)
|
|
264
|
+
except ValueError:
|
|
265
|
+
logger.error(
|
|
266
|
+
f"Invalid IP address or network: {ip}", exc_info=True
|
|
267
|
+
)
|
|
268
|
+
continue
|
|
269
|
+
|
|
270
|
+
def _is_ip_allowed(self, ip: str) -> bool:
|
|
271
|
+
"""Check if an IP is allowed based on configured rules."""
|
|
272
|
+
if not RESTfulAPI._allowed_ip_list:
|
|
273
|
+
return True
|
|
274
|
+
|
|
275
|
+
try:
|
|
276
|
+
client_ip = ipaddress.ip_address(ip)
|
|
277
|
+
return any(
|
|
278
|
+
client_ip in allowed_net for allowed_net in RESTfulAPI._allowed_ip_list
|
|
279
|
+
)
|
|
280
|
+
except ValueError:
|
|
281
|
+
return False
|
|
232
282
|
|
|
233
283
|
def is_authenticated(self):
|
|
234
284
|
return False if self._auth_service.config is None else True
|
|
@@ -287,6 +337,16 @@ class RESTfulAPI(CancelMixin):
|
|
|
287
337
|
allow_headers=["*"],
|
|
288
338
|
)
|
|
289
339
|
|
|
340
|
+
@self._app.middleware("http")
|
|
341
|
+
async def ip_restriction_middleware(request: Request, call_next):
|
|
342
|
+
client_ip = request.client.host
|
|
343
|
+
if not self._is_ip_allowed(client_ip):
|
|
344
|
+
return PlainTextResponse(
|
|
345
|
+
status_code=403, content=f"Access denied for IP: {client_ip}\n"
|
|
346
|
+
)
|
|
347
|
+
response = await call_next(request)
|
|
348
|
+
return response
|
|
349
|
+
|
|
290
350
|
@self._app.exception_handler(500)
|
|
291
351
|
async def internal_exception_handler(request: Request, exc: Exception):
|
|
292
352
|
logger.exception("Handling request %s failed: %s", request.url, exc)
|
|
@@ -532,6 +592,40 @@ class RESTfulAPI(CancelMixin):
|
|
|
532
592
|
else None
|
|
533
593
|
),
|
|
534
594
|
)
|
|
595
|
+
# Register messages endpoint only if Anthropic is available
|
|
596
|
+
if ANTHROPIC_AVAILABLE:
|
|
597
|
+
self._router.add_api_route(
|
|
598
|
+
"/anthropic/v1/messages",
|
|
599
|
+
self.create_message,
|
|
600
|
+
methods=["POST"],
|
|
601
|
+
response_model=AnthropicMessage,
|
|
602
|
+
dependencies=(
|
|
603
|
+
[Security(self._auth_service, scopes=["models:read"])]
|
|
604
|
+
if self.is_authenticated()
|
|
605
|
+
else None
|
|
606
|
+
),
|
|
607
|
+
)
|
|
608
|
+
# Register Anthropic models endpoints
|
|
609
|
+
self._router.add_api_route(
|
|
610
|
+
"/anthropic/v1/models",
|
|
611
|
+
self.anthropic_list_models,
|
|
612
|
+
methods=["GET"],
|
|
613
|
+
dependencies=(
|
|
614
|
+
[Security(self._auth_service, scopes=["models:list"])]
|
|
615
|
+
if self.is_authenticated()
|
|
616
|
+
else None
|
|
617
|
+
),
|
|
618
|
+
)
|
|
619
|
+
self._router.add_api_route(
|
|
620
|
+
"/anthropic/v1/models/{model_id}",
|
|
621
|
+
self.anthropic_get_model,
|
|
622
|
+
methods=["GET"],
|
|
623
|
+
dependencies=(
|
|
624
|
+
[Security(self._auth_service, scopes=["models:list"])]
|
|
625
|
+
if self.is_authenticated()
|
|
626
|
+
else None
|
|
627
|
+
),
|
|
628
|
+
)
|
|
535
629
|
self._router.add_api_route(
|
|
536
630
|
"/v1/embeddings",
|
|
537
631
|
self.create_embedding,
|
|
@@ -994,6 +1088,58 @@ class RESTfulAPI(CancelMixin):
|
|
|
994
1088
|
logger.error(e, exc_info=True)
|
|
995
1089
|
raise HTTPException(status_code=500, detail=str(e))
|
|
996
1090
|
|
|
1091
|
+
async def anthropic_list_models(self) -> JSONResponse:
|
|
1092
|
+
"""Anthropic-compatible models endpoint"""
|
|
1093
|
+
try:
|
|
1094
|
+
|
|
1095
|
+
# Get running models from xinference
|
|
1096
|
+
running_models = await (await self._get_supervisor_ref()).list_models()
|
|
1097
|
+
|
|
1098
|
+
# For backward compatibility with tests, only return running models by default
|
|
1099
|
+
model_list = []
|
|
1100
|
+
|
|
1101
|
+
# Add running models to the list
|
|
1102
|
+
for model_id, model_info in running_models.items():
|
|
1103
|
+
anthropic_model = {
|
|
1104
|
+
"id": model_id,
|
|
1105
|
+
"object": "model",
|
|
1106
|
+
"created": 0,
|
|
1107
|
+
"display_name": model_info.get("model_name", model_id),
|
|
1108
|
+
"type": model_info.get("model_type", "model"),
|
|
1109
|
+
"max_tokens": model_info.get("context_length", 4096),
|
|
1110
|
+
}
|
|
1111
|
+
model_list.append(anthropic_model)
|
|
1112
|
+
|
|
1113
|
+
return JSONResponse(content=model_list)
|
|
1114
|
+
except Exception as e:
|
|
1115
|
+
logger.error(e, exc_info=True)
|
|
1116
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
1117
|
+
|
|
1118
|
+
async def anthropic_get_model(self, model_id: str) -> JSONResponse:
|
|
1119
|
+
"""Anthropic-compatible model retrieval endpoint"""
|
|
1120
|
+
try:
|
|
1121
|
+
models = await (await self._get_supervisor_ref()).list_models()
|
|
1122
|
+
|
|
1123
|
+
model_info = models[model_id]
|
|
1124
|
+
|
|
1125
|
+
# Convert to Anthropic format
|
|
1126
|
+
anthropic_model = {
|
|
1127
|
+
"id": model_id, # Return the original requested ID
|
|
1128
|
+
"object": "model",
|
|
1129
|
+
"created": 0,
|
|
1130
|
+
"display_name": model_info.get("model_name", model_id),
|
|
1131
|
+
"type": model_info.get("model_type", "model"),
|
|
1132
|
+
"max_tokens": model_info.get("context_length", 4096),
|
|
1133
|
+
**model_info,
|
|
1134
|
+
}
|
|
1135
|
+
|
|
1136
|
+
return JSONResponse(content=anthropic_model)
|
|
1137
|
+
except HTTPException:
|
|
1138
|
+
raise
|
|
1139
|
+
except Exception as e:
|
|
1140
|
+
logger.error(e, exc_info=True)
|
|
1141
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
1142
|
+
|
|
997
1143
|
async def describe_model(self, model_uid: str) -> JSONResponse:
|
|
998
1144
|
try:
|
|
999
1145
|
data = await (await self._get_supervisor_ref()).describe_model(model_uid)
|
|
@@ -1417,6 +1563,151 @@ class RESTfulAPI(CancelMixin):
|
|
|
1417
1563
|
self.handle_request_limit_error(e)
|
|
1418
1564
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1419
1565
|
|
|
1566
|
+
async def create_message(self, request: Request) -> Response:
|
|
1567
|
+
raw_body = await request.json()
|
|
1568
|
+
body = CreateMessage.parse_obj(raw_body)
|
|
1569
|
+
|
|
1570
|
+
exclude = {
|
|
1571
|
+
"model",
|
|
1572
|
+
"messages",
|
|
1573
|
+
"stream",
|
|
1574
|
+
"stop_sequences",
|
|
1575
|
+
"metadata",
|
|
1576
|
+
"tool_choice",
|
|
1577
|
+
"tools",
|
|
1578
|
+
}
|
|
1579
|
+
raw_kwargs = {k: v for k, v in raw_body.items() if k not in exclude}
|
|
1580
|
+
kwargs = body.dict(exclude_unset=True, exclude=exclude)
|
|
1581
|
+
|
|
1582
|
+
# guided_decoding params
|
|
1583
|
+
kwargs.update(self.extract_guided_params(raw_body=raw_body))
|
|
1584
|
+
|
|
1585
|
+
# TODO: Decide if this default value override is necessary #1061
|
|
1586
|
+
if body.max_tokens is None:
|
|
1587
|
+
kwargs["max_tokens"] = max_tokens_field.default
|
|
1588
|
+
|
|
1589
|
+
messages = body.messages and list(body.messages)
|
|
1590
|
+
|
|
1591
|
+
if not messages or messages[-1].get("role") not in ["user", "assistant"]:
|
|
1592
|
+
raise HTTPException(
|
|
1593
|
+
status_code=400, detail="Invalid input. Please specify the prompt."
|
|
1594
|
+
)
|
|
1595
|
+
|
|
1596
|
+
# Handle tools parameter
|
|
1597
|
+
if hasattr(body, "tools") and body.tools:
|
|
1598
|
+
kwargs["tools"] = body.tools
|
|
1599
|
+
|
|
1600
|
+
# Handle tool_choice parameter
|
|
1601
|
+
if hasattr(body, "tool_choice") and body.tool_choice:
|
|
1602
|
+
kwargs["tool_choice"] = body.tool_choice
|
|
1603
|
+
|
|
1604
|
+
# Get model mapping
|
|
1605
|
+
try:
|
|
1606
|
+
running_models = await (await self._get_supervisor_ref()).list_models()
|
|
1607
|
+
except Exception as e:
|
|
1608
|
+
logger.error(f"Failed to get model mapping: {e}", exc_info=True)
|
|
1609
|
+
raise HTTPException(status_code=500, detail="Failed to get model mapping")
|
|
1610
|
+
|
|
1611
|
+
if not running_models:
|
|
1612
|
+
raise HTTPException(
|
|
1613
|
+
status_code=400,
|
|
1614
|
+
detail=f"No running models available. Please start a model in xinference first.",
|
|
1615
|
+
)
|
|
1616
|
+
|
|
1617
|
+
requested_model_id = body.model
|
|
1618
|
+
if "claude" in requested_model_id:
|
|
1619
|
+
requested_model_id = list(running_models.keys())[0]
|
|
1620
|
+
|
|
1621
|
+
if requested_model_id not in running_models:
|
|
1622
|
+
raise HTTPException(
|
|
1623
|
+
status_code=400,
|
|
1624
|
+
detail=f"Model '{requested_model_id}' is not available. Available models: {list(running_models.keys())}",
|
|
1625
|
+
)
|
|
1626
|
+
else:
|
|
1627
|
+
model_uid = requested_model_id
|
|
1628
|
+
|
|
1629
|
+
try:
|
|
1630
|
+
model = await (await self._get_supervisor_ref()).get_model(model_uid)
|
|
1631
|
+
except ValueError as ve:
|
|
1632
|
+
logger.error(str(ve), exc_info=True)
|
|
1633
|
+
await self._report_error_event(model_uid, str(ve))
|
|
1634
|
+
raise HTTPException(status_code=400, detail=str(ve))
|
|
1635
|
+
except Exception as e:
|
|
1636
|
+
logger.error(e, exc_info=True)
|
|
1637
|
+
await self._report_error_event(model_uid, str(e))
|
|
1638
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
1639
|
+
|
|
1640
|
+
if body.stream:
|
|
1641
|
+
|
|
1642
|
+
async def stream_results():
|
|
1643
|
+
iterator = None
|
|
1644
|
+
try:
|
|
1645
|
+
try:
|
|
1646
|
+
iterator = await model.chat(
|
|
1647
|
+
messages, kwargs, raw_params=raw_kwargs
|
|
1648
|
+
)
|
|
1649
|
+
except RuntimeError as re:
|
|
1650
|
+
self.handle_request_limit_error(re)
|
|
1651
|
+
|
|
1652
|
+
# Check if iterator is actually an async iterator
|
|
1653
|
+
if hasattr(iterator, "__aiter__"):
|
|
1654
|
+
async for item in iterator:
|
|
1655
|
+
yield item
|
|
1656
|
+
elif isinstance(iterator, (str, bytes)):
|
|
1657
|
+
# Handle case where chat returns bytes/string instead of iterator
|
|
1658
|
+
if isinstance(iterator, bytes):
|
|
1659
|
+
try:
|
|
1660
|
+
content = iterator.decode("utf-8")
|
|
1661
|
+
except UnicodeDecodeError:
|
|
1662
|
+
content = str(iterator)
|
|
1663
|
+
else:
|
|
1664
|
+
content = iterator
|
|
1665
|
+
yield dict(data=json.dumps({"content": content}))
|
|
1666
|
+
else:
|
|
1667
|
+
# Fallback: try to iterate normally
|
|
1668
|
+
try:
|
|
1669
|
+
for item in iterator:
|
|
1670
|
+
yield item
|
|
1671
|
+
except TypeError:
|
|
1672
|
+
# If not iterable, yield as single result
|
|
1673
|
+
yield dict(data=json.dumps({"content": str(iterator)}))
|
|
1674
|
+
|
|
1675
|
+
yield "[DONE]"
|
|
1676
|
+
except asyncio.CancelledError:
|
|
1677
|
+
logger.info(
|
|
1678
|
+
f"Disconnected from client (via refresh/close) {request.client} during chat."
|
|
1679
|
+
)
|
|
1680
|
+
return
|
|
1681
|
+
except Exception as ex:
|
|
1682
|
+
ex = await self._get_model_last_error(model.uid, ex)
|
|
1683
|
+
logger.exception("Message stream got an error: %s", ex)
|
|
1684
|
+
await self._report_error_event(model_uid, str(ex))
|
|
1685
|
+
yield dict(data=json.dumps({"error": str(ex)}))
|
|
1686
|
+
return
|
|
1687
|
+
finally:
|
|
1688
|
+
await model.decrease_serve_count()
|
|
1689
|
+
|
|
1690
|
+
return EventSourceResponse(
|
|
1691
|
+
stream_results(), ping=XINFERENCE_SSE_PING_ATTEMPTS_SECONDS
|
|
1692
|
+
)
|
|
1693
|
+
else:
|
|
1694
|
+
try:
|
|
1695
|
+
data = await model.chat(messages, kwargs, raw_params=raw_kwargs)
|
|
1696
|
+
# Convert OpenAI format to Anthropic format
|
|
1697
|
+
openai_response = json.loads(data)
|
|
1698
|
+
anthropic_response = self._convert_openai_to_anthropic(
|
|
1699
|
+
openai_response, body.model
|
|
1700
|
+
)
|
|
1701
|
+
return Response(
|
|
1702
|
+
json.dumps(anthropic_response), media_type="application/json"
|
|
1703
|
+
)
|
|
1704
|
+
except Exception as e:
|
|
1705
|
+
e = await self._get_model_last_error(model.uid, e)
|
|
1706
|
+
logger.error(e, exc_info=True)
|
|
1707
|
+
await self._report_error_event(model_uid, str(e))
|
|
1708
|
+
self.handle_request_limit_error(e)
|
|
1709
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
1710
|
+
|
|
1420
1711
|
async def create_embedding(self, request: Request) -> Response:
|
|
1421
1712
|
payload = await request.json()
|
|
1422
1713
|
body = CreateEmbeddingRequest.parse_obj(payload)
|
|
@@ -1845,7 +2136,7 @@ class RESTfulAPI(CancelMixin):
|
|
|
1845
2136
|
async def create_variations(
|
|
1846
2137
|
self,
|
|
1847
2138
|
model: str = Form(...),
|
|
1848
|
-
image: UploadFile = File(media_type="application/octet-stream"),
|
|
2139
|
+
image: List[UploadFile] = File(media_type="application/octet-stream"),
|
|
1849
2140
|
prompt: Optional[Union[str, List[str]]] = Form(None),
|
|
1850
2141
|
negative_prompt: Optional[Union[str, List[str]]] = Form(None),
|
|
1851
2142
|
n: Optional[int] = Form(1),
|
|
@@ -1873,8 +2164,17 @@ class RESTfulAPI(CancelMixin):
|
|
|
1873
2164
|
parsed_kwargs = {}
|
|
1874
2165
|
request_id = parsed_kwargs.get("request_id")
|
|
1875
2166
|
self._add_running_task(request_id)
|
|
2167
|
+
|
|
2168
|
+
# Handle single image or multiple images
|
|
2169
|
+
if len(image) == 1:
|
|
2170
|
+
# Single image
|
|
2171
|
+
image_data = Image.open(image[0].file)
|
|
2172
|
+
else:
|
|
2173
|
+
# Multiple images - convert to list of PIL Images
|
|
2174
|
+
image_data = [Image.open(img.file) for img in image]
|
|
2175
|
+
|
|
1876
2176
|
image_list = await model_ref.image_to_image(
|
|
1877
|
-
image=
|
|
2177
|
+
image=image_data,
|
|
1878
2178
|
prompt=prompt,
|
|
1879
2179
|
negative_prompt=negative_prompt,
|
|
1880
2180
|
n=n,
|
|
@@ -2371,7 +2671,14 @@ class RESTfulAPI(CancelMixin):
|
|
|
2371
2671
|
data = await (await self._get_supervisor_ref()).list_model_registrations(
|
|
2372
2672
|
model_type, detailed=detailed
|
|
2373
2673
|
)
|
|
2374
|
-
|
|
2674
|
+
# Remove duplicate model names.
|
|
2675
|
+
model_names = set()
|
|
2676
|
+
final_data = []
|
|
2677
|
+
for item in data:
|
|
2678
|
+
if item["model_name"] not in model_names:
|
|
2679
|
+
model_names.add(item["model_name"])
|
|
2680
|
+
final_data.append(item)
|
|
2681
|
+
return JSONResponse(content=final_data)
|
|
2375
2682
|
except ValueError as re:
|
|
2376
2683
|
logger.error(re, exc_info=True)
|
|
2377
2684
|
raise HTTPException(status_code=400, detail=str(re))
|
|
@@ -2603,6 +2910,96 @@ class RESTfulAPI(CancelMixin):
|
|
|
2603
2910
|
|
|
2604
2911
|
return kwargs
|
|
2605
2912
|
|
|
2913
|
+
def _convert_openai_to_anthropic(self, openai_response: dict, model: str) -> dict:
|
|
2914
|
+
"""
|
|
2915
|
+
Convert OpenAI response format to Anthropic response format.
|
|
2916
|
+
|
|
2917
|
+
Args:
|
|
2918
|
+
openai_response: OpenAI format response
|
|
2919
|
+
model: Model name
|
|
2920
|
+
|
|
2921
|
+
Returns:
|
|
2922
|
+
Anthropic format response
|
|
2923
|
+
"""
|
|
2924
|
+
|
|
2925
|
+
# Extract content and tool calls from OpenAI response
|
|
2926
|
+
content_blocks = []
|
|
2927
|
+
stop_reason = "stop"
|
|
2928
|
+
|
|
2929
|
+
if "choices" in openai_response and len(openai_response["choices"]) > 0:
|
|
2930
|
+
choice = openai_response["choices"][0]
|
|
2931
|
+
message = choice.get("message", {})
|
|
2932
|
+
|
|
2933
|
+
# Handle content text
|
|
2934
|
+
content = message.get("content", "")
|
|
2935
|
+
if content:
|
|
2936
|
+
if isinstance(content, str):
|
|
2937
|
+
# If content is a string, use it directly
|
|
2938
|
+
content_blocks.append({"type": "text", "text": content})
|
|
2939
|
+
elif isinstance(content, list):
|
|
2940
|
+
# If content is a list, extract text from each content block
|
|
2941
|
+
for content_block in content:
|
|
2942
|
+
if isinstance(content_block, dict):
|
|
2943
|
+
if content_block.get("type") == "text":
|
|
2944
|
+
text = content_block.get("text", "")
|
|
2945
|
+
if text:
|
|
2946
|
+
content_blocks.append(
|
|
2947
|
+
{"type": "text", "text": text}
|
|
2948
|
+
)
|
|
2949
|
+
elif "text" in content_block:
|
|
2950
|
+
# Handle different content block format
|
|
2951
|
+
text = content_block.get("text", "")
|
|
2952
|
+
if text:
|
|
2953
|
+
content_blocks.append(
|
|
2954
|
+
{"type": "text", "text": text}
|
|
2955
|
+
)
|
|
2956
|
+
|
|
2957
|
+
# Handle tool calls
|
|
2958
|
+
tool_calls = message.get("tool_calls", [])
|
|
2959
|
+
for tool_call in tool_calls:
|
|
2960
|
+
function = tool_call.get("function", {})
|
|
2961
|
+
arguments = function.get("arguments", "{}")
|
|
2962
|
+
try:
|
|
2963
|
+
input_data = json.loads(arguments)
|
|
2964
|
+
except json.JSONDecodeError:
|
|
2965
|
+
input_data = {}
|
|
2966
|
+
tool_use_block = {
|
|
2967
|
+
"type": "tool_use",
|
|
2968
|
+
"cache_control": {"type": "ephemeral"},
|
|
2969
|
+
"id": tool_call.get("id", str(uuid.uuid4())),
|
|
2970
|
+
"name": function.get("name", ""),
|
|
2971
|
+
"input": input_data,
|
|
2972
|
+
}
|
|
2973
|
+
content_blocks.append(tool_use_block)
|
|
2974
|
+
|
|
2975
|
+
# Set stop reason based on finish reason
|
|
2976
|
+
finish_reason = choice.get("finish_reason", "stop")
|
|
2977
|
+
if finish_reason == "tool_calls":
|
|
2978
|
+
stop_reason = "tool_use"
|
|
2979
|
+
|
|
2980
|
+
# Build Anthropic response
|
|
2981
|
+
anthropic_response = {
|
|
2982
|
+
"id": str(uuid.uuid4()),
|
|
2983
|
+
"type": "message",
|
|
2984
|
+
"role": "assistant",
|
|
2985
|
+
"content": content_blocks,
|
|
2986
|
+
"model": model,
|
|
2987
|
+
"stop_reason": stop_reason,
|
|
2988
|
+
"stop_sequence": None,
|
|
2989
|
+
"usage": {
|
|
2990
|
+
"input_tokens": openai_response.get("usage", {}).get(
|
|
2991
|
+
"prompt_tokens", 0
|
|
2992
|
+
),
|
|
2993
|
+
"output_tokens": openai_response.get("usage", {}).get(
|
|
2994
|
+
"completion_tokens", 0
|
|
2995
|
+
),
|
|
2996
|
+
"cache_creation_input_tokens": 0,
|
|
2997
|
+
"cache_read_input_tokens": 0,
|
|
2998
|
+
},
|
|
2999
|
+
}
|
|
3000
|
+
|
|
3001
|
+
return anthropic_response
|
|
3002
|
+
|
|
2606
3003
|
|
|
2607
3004
|
def run(
|
|
2608
3005
|
supervisor_address: str,
|
|
@@ -285,7 +285,7 @@ class AsyncRESTfulImageModelHandle(AsyncRESTfulModelHandle):
|
|
|
285
285
|
|
|
286
286
|
async def image_to_image(
|
|
287
287
|
self,
|
|
288
|
-
image: Union[str, bytes],
|
|
288
|
+
image: Union[str, bytes, List[Union[str, bytes]]],
|
|
289
289
|
prompt: str,
|
|
290
290
|
negative_prompt: Optional[str] = None,
|
|
291
291
|
n: int = 1,
|
|
@@ -298,7 +298,7 @@ class AsyncRESTfulImageModelHandle(AsyncRESTfulModelHandle):
|
|
|
298
298
|
|
|
299
299
|
Parameters
|
|
300
300
|
----------
|
|
301
|
-
image: `Union[str, bytes]`
|
|
301
|
+
image: `Union[str, bytes, List[Union[str, bytes]]]`
|
|
302
302
|
The ControlNet input condition to provide guidance to the `unet` for generation. If the type is
|
|
303
303
|
specified as `torch.FloatTensor`, it is passed to ControlNet as is. `PIL.Image.Image` can also be
|
|
304
304
|
accepted as an image. The dimensions of the output image defaults to `image`'s dimensions. If height
|
|
@@ -338,7 +338,24 @@ class AsyncRESTfulImageModelHandle(AsyncRESTfulModelHandle):
|
|
|
338
338
|
files: List[Any] = []
|
|
339
339
|
for key, value in params.items():
|
|
340
340
|
files.append((key, (None, value)))
|
|
341
|
-
|
|
341
|
+
|
|
342
|
+
# Handle both single image and list of images
|
|
343
|
+
if isinstance(image, list):
|
|
344
|
+
if len(image) == 0:
|
|
345
|
+
raise ValueError("Image list cannot be empty")
|
|
346
|
+
elif len(image) == 1:
|
|
347
|
+
# Single image in list, use it directly
|
|
348
|
+
files.append(("image", ("image", image[0], "application/octet-stream")))
|
|
349
|
+
else:
|
|
350
|
+
# Multiple images - send all images with same field name
|
|
351
|
+
# FastAPI will collect them into a list
|
|
352
|
+
for img_data in image:
|
|
353
|
+
files.append(
|
|
354
|
+
("image", ("image", img_data, "application/octet-stream"))
|
|
355
|
+
)
|
|
356
|
+
else:
|
|
357
|
+
# Single image
|
|
358
|
+
files.append(("image", ("image", image, "application/octet-stream")))
|
|
342
359
|
response = await self.session.post(url, files=files, headers=self.auth_headers)
|
|
343
360
|
if response.status != 200:
|
|
344
361
|
raise RuntimeError(
|
|
@@ -250,7 +250,7 @@ class RESTfulImageModelHandle(RESTfulModelHandle):
|
|
|
250
250
|
|
|
251
251
|
def image_to_image(
|
|
252
252
|
self,
|
|
253
|
-
image: Union[str, bytes],
|
|
253
|
+
image: Union[str, bytes, List[Union[str, bytes]]],
|
|
254
254
|
prompt: str,
|
|
255
255
|
negative_prompt: Optional[str] = None,
|
|
256
256
|
n: int = 1,
|
|
@@ -263,7 +263,7 @@ class RESTfulImageModelHandle(RESTfulModelHandle):
|
|
|
263
263
|
|
|
264
264
|
Parameters
|
|
265
265
|
----------
|
|
266
|
-
image: `Union[str, bytes]`
|
|
266
|
+
image: `Union[str, bytes, List[Union[str, bytes]]]`
|
|
267
267
|
The ControlNet input condition to provide guidance to the `unet` for generation. If the type is
|
|
268
268
|
specified as `torch.FloatTensor`, it is passed to ControlNet as is. `PIL.Image.Image` can also be
|
|
269
269
|
accepted as an image. The dimensions of the output image defaults to `image`'s dimensions. If height
|
|
@@ -302,7 +302,24 @@ class RESTfulImageModelHandle(RESTfulModelHandle):
|
|
|
302
302
|
files: List[Any] = []
|
|
303
303
|
for key, value in params.items():
|
|
304
304
|
files.append((key, (None, value)))
|
|
305
|
-
|
|
305
|
+
|
|
306
|
+
# Handle both single image and list of images
|
|
307
|
+
if isinstance(image, list):
|
|
308
|
+
if len(image) == 0:
|
|
309
|
+
raise ValueError("Image list cannot be empty")
|
|
310
|
+
elif len(image) == 1:
|
|
311
|
+
# Single image in list, use it directly
|
|
312
|
+
files.append(("image", ("image", image[0], "application/octet-stream")))
|
|
313
|
+
else:
|
|
314
|
+
# Multiple images - send all images with same field name
|
|
315
|
+
# FastAPI will collect them into a list
|
|
316
|
+
for img_data in image:
|
|
317
|
+
files.append(
|
|
318
|
+
("image", ("image", img_data, "application/octet-stream"))
|
|
319
|
+
)
|
|
320
|
+
else:
|
|
321
|
+
# Single image
|
|
322
|
+
files.append(("image", ("image", image, "application/octet-stream")))
|
|
306
323
|
response = self.session.post(url, files=files, headers=self.auth_headers)
|
|
307
324
|
if response.status_code != 200:
|
|
308
325
|
raise RuntimeError(
|
xinference/constants.py
CHANGED
|
@@ -33,6 +33,7 @@ XINFERENCE_ENV_VIRTUAL_ENV = "XINFERENCE_ENABLE_VIRTUAL_ENV"
|
|
|
33
33
|
XINFERENCE_ENV_VIRTUAL_ENV_SKIP_INSTALLED = "XINFERENCE_VIRTUAL_ENV_SKIP_INSTALLED"
|
|
34
34
|
XINFERENCE_ENV_SSE_PING_ATTEMPTS_SECONDS = "XINFERENCE_SSE_PING_ATTEMPTS_SECONDS"
|
|
35
35
|
XINFERENCE_ENV_MAX_TOKENS = "XINFERENCE_MAX_TOKENS"
|
|
36
|
+
XINFERENCE_ENV_ALLOWED_IPS = "XINFERENCE_ALLOWED_IPS"
|
|
36
37
|
|
|
37
38
|
|
|
38
39
|
def get_xinference_home() -> str:
|
|
@@ -110,3 +111,4 @@ XINFERENCE_VIRTUAL_ENV_SKIP_INSTALLED = (
|
|
|
110
111
|
if os.getenv(XINFERENCE_ENV_VIRTUAL_ENV_SKIP_INSTALLED)
|
|
111
112
|
else None
|
|
112
113
|
)
|
|
114
|
+
XINFERENCE_ALLOWED_IPS = os.getenv(XINFERENCE_ENV_ALLOWED_IPS)
|