PyPI - valor-lite - Versions diffs - 0.33.14__py3-none-any.whl → 0.33.16__py3-none-any.whl - Mend

valor-lite 0.33.14py3-none-any.whl → 0.33.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of valor-lite might be problematic. Click here for more details.

Files changed (24) hide show

valor_lite/classification/metric.py +20 -0
valor_lite/object_detection/computation.py +5 -2
valor_lite/object_detection/manager.py +1 -1
valor_lite/object_detection/metric.py +20 -0
valor_lite/schemas.py +0 -6
valor_lite/semantic_segmentation/metric.py +20 -0
valor_lite/text_generation/__init__.py +15 -0
valor_lite/text_generation/annotation.py +56 -0
valor_lite/text_generation/computation.py +609 -0
valor_lite/text_generation/llm/__init__.py +0 -0
valor_lite/text_generation/llm/exceptions.py +14 -0
valor_lite/text_generation/llm/generation.py +903 -0
valor_lite/text_generation/llm/instructions.py +814 -0
valor_lite/text_generation/llm/integrations.py +226 -0
valor_lite/text_generation/llm/utilities.py +43 -0
valor_lite/text_generation/llm/validators.py +68 -0
valor_lite/text_generation/manager.py +697 -0
valor_lite/text_generation/metric.py +381 -0
{valor_lite-0.33.14.dist-info → valor_lite-0.33.16.dist-info}/METADATA +11 -3
valor_lite-0.33.16.dist-info/RECORD +38 -0
{valor_lite-0.33.14.dist-info → valor_lite-0.33.16.dist-info}/WHEEL +1 -1
valor_lite-0.33.14.dist-info/RECORD +0 -27
{valor_lite-0.33.14.dist-info → valor_lite-0.33.16.dist-info}/LICENSE +0 -0
{valor_lite-0.33.14.dist-info → valor_lite-0.33.16.dist-info}/top_level.txt +0 -0

valor_lite/text_generation/metric.py ADDED Viewed

@@ -0,0 +1,381 @@
+from dataclasses import dataclass
+from enum import Enum
+from valor_lite.schemas import BaseMetric
+class MetricType(str, Enum):
+    AnswerCorrectness = "AnswerCorrectness"
+    AnswerRelevance = "AnswerRelevance"
+    Bias = "Bias"
+    BLEU = "BLEU"
+    ContextPrecision = "ContextPrecision"
+    ContextRecall = "ContextRecall"
+    ContextRelevance = "ContextRelevance"
+    Faithfulness = "Faithfulness"
+    Hallucination = "Hallucination"
+    ROUGE = "ROUGE"
+    SummaryCoherence = "SummaryCoherence"
+    Toxicity = "Toxicity"
+@dataclass
+class Metric(BaseMetric):
+    """
+    Text Generation Metric.
+    Attributes
+    ----------
+    type : str
+        The metric type.
+    value : int | float | dict
+        The metric value.
+    parameters : dict[str, Any]
+        A dictionary containing metric parameters.
+    """
+    def __post_init__(self):
+        if not isinstance(self.type, str):
+            raise TypeError(
+                f"Metric type should be of type 'str': {self.type}"
+            )
+        elif not isinstance(self.value, (int, float, dict)):
+            raise TypeError(
+                f"Metric value must be of type 'int', 'float' or 'dict': {self.value}"
+            )
+        elif not isinstance(self.parameters, dict):
+            raise TypeError(
+                f"Metric parameters must be of type 'dict[str, Any]': {self.parameters}"
+            )
+        elif not all([isinstance(k, str) for k in self.parameters.keys()]):
+            raise TypeError(
+                f"Metric parameter dictionary should only have keys with type 'str': {self.parameters}"
+            )
+    @classmethod
+    def error(
+        cls,
+        error_type: str,
+        error_message: str,
+        model_name: str,
+        retries: int,
+    ):
+        return cls(
+            type="Error",
+            value={
+                "type": error_type,
+                "message": error_message,
+            },
+            parameters={
+                "evaluator": model_name,
+                "retries": retries,
+            },
+        )
+    @classmethod
+    def answer_correctness(
+        cls,
+        value: float,
+        model_name: str,
+        retries: int,
+    ):
+        """
+        Defines an answer correctness metric.
+        Parameters
+        ----------
+        value : float
+            The answer correctness score between 0 and 1, with higher values indicating that the answer
+            is more correct. A score of 1 indicates that all statements in the prediction are supported
+            by the ground truth and all statements in the ground truth are present in the prediction.
+        """
+        return cls(
+            type=MetricType.AnswerCorrectness,
+            value=value,
+            parameters={
+                "evaluator": model_name,
+                "retries": retries,
+            },
+        )
+    @classmethod
+    def answer_relevance(
+        cls,
+        value: float,
+        model_name: str,
+        retries: int,
+    ):
+        """
+        Defines an answer relevance metric.
+        Parameters
+        ----------
+        value : float
+            The number of statements in the answer that are relevant to the query divided by the total
+            number of statements in the answer.
+        """
+        return cls(
+            type=MetricType.AnswerRelevance,
+            value=value,
+            parameters={
+                "evaluator": model_name,
+                "retries": retries,
+            },
+        )
+    @classmethod
+    def bleu(
+        cls,
+        value: float,
+        weights: list[float],
+    ):
+        """
+        Defines a BLEU metric.
+        Parameters
+        ----------
+        value : float
+            The BLEU score for an individual datapoint.
+        weights : list[float]
+            The list of weights that the score was calculated with.
+        """
+        return cls(
+            type=MetricType.BLEU,
+            value=value,
+            parameters={
+                "weights": weights,
+            },
+        )
+    @classmethod
+    def bias(
+        cls,
+        value: float,
+        model_name: str,
+        retries: int,
+    ):
+        """
+        Defines a bias metric.
+        Parameters
+        ----------
+        value : float
+            The bias score for a datum. This is a float between 0 and 1, with 1 indicating that all
+            opinions in the datum text are biased and 0 indicating that there is no bias.
+        """
+        return cls(
+            type=MetricType.Bias,
+            value=value,
+            parameters={
+                "evaluator": model_name,
+                "retries": retries,
+            },
+        )
+    @classmethod
+    def context_precision(
+        cls,
+        value: float,
+        model_name: str,
+        retries: int,
+    ):
+        """
+        Defines a context precision metric.
+        Parameters
+        ----------
+        value : float
+            The context precision score for a datum. This is a float between 0 and 1, with 0 indicating
+            that none of the contexts are useful to arrive at the ground truth answer to the query
+            and 1 indicating that all contexts are useful to arrive at the ground truth answer to the
+            query. The score is more heavily influenced by earlier contexts in the list of contexts
+            than later contexts.
+        """
+        return cls(
+            type=MetricType.ContextPrecision,
+            value=value,
+            parameters={
+                "evaluator": model_name,
+                "retries": retries,
+            },
+        )
+    @classmethod
+    def context_recall(
+        cls,
+        value: float,
+        model_name: str,
+        retries: int,
+    ):
+        """
+        Defines a context recall metric.
+        Parameters
+        ----------
+        value : float
+            The context recall score for a datum. This is a float between 0 and 1, with 1 indicating
+            that all ground truth statements are attributable to the context list.
+        """
+        return cls(
+            type=MetricType.ContextRecall,
+            value=value,
+            parameters={
+                "evaluator": model_name,
+                "retries": retries,
+            },
+        )
+    @classmethod
+    def context_relevance(
+        cls,
+        value: float,
+        model_name: str,
+        retries: int,
+    ):
+        """
+        Defines a context relevance metric.
+        Parameters
+        ----------
+        value : float
+            The context relevance score for a datum. This is a float between 0 and 1, with 0 indicating
+            that none of the contexts are relevant and 1 indicating that all of the contexts are relevant.
+        """
+        return cls(
+            type=MetricType.ContextRelevance,
+            value=value,
+            parameters={
+                "evaluator": model_name,
+                "retries": retries,
+            },
+        )
+    @classmethod
+    def faithfulness(
+        cls,
+        value: float,
+        model_name: str,
+        retries: int,
+    ):
+        """
+        Defines a faithfulness metric.
+        Parameters
+        ----------
+        value : float
+            The faithfulness score for a datum. This is a float between 0 and 1, with 1 indicating that
+            all claims in the text are implied by the contexts.
+        """
+        return cls(
+            type=MetricType.Faithfulness,
+            value=value,
+            parameters={
+                "evaluator": model_name,
+                "retries": retries,
+            },
+        )
+    @classmethod
+    def hallucination(
+        cls,
+        value: float,
+        model_name: str,
+        retries: int,
+    ):
+        """
+        Defines a hallucination metric.
+        Parameters
+        ----------
+        value : float
+            The hallucination score for a datum. This is a float between 0 and 1, with 1 indicating that
+            all contexts are contradicted by the text.
+        """
+        return cls(
+            type=MetricType.Hallucination,
+            value=value,
+            parameters={
+                "evaluator": model_name,
+                "retries": retries,
+            },
+        )
+    @classmethod
+    def rouge(
+        cls,
+        value: float,
+        rouge_type: str,
+        use_stemmer: bool,
+    ):
+        """
+        Defines a ROUGE metric.
+        Parameters
+        ----------
+        value : float
+            A ROUGE score.
+        rouge_type : ROUGEType
+            The ROUGE variation used to compute the value. `rouge1` is unigram-based scoring, `rouge2` is bigram-based
+            scoring, `rougeL` is scoring based on sentences (i.e., splitting on "." and ignoring "\n"), and `rougeLsum`
+            is scoring based on splitting the text using "\n".
+        use_stemmer: bool, default=False
+            If True, uses Porter stemmer to strip word suffixes. Defaults to False.
+        """
+        return cls(
+            type=MetricType.ROUGE,
+            value=value,
+            parameters={
+                "rouge_type": rouge_type,
+                "use_stemmer": use_stemmer,
+            },
+        )
+    @classmethod
+    def summary_coherence(
+        cls,
+        value: int,
+        model_name: str,
+        retries: int,
+    ):
+        """
+        Defines a summary coherence metric.
+        Parameters
+        ----------
+        value : int
+            The summary coherence score for a datum. This is an integer with 1 being the lowest summary coherence
+            and 5 the highest summary coherence.
+        """
+        return cls(
+            type=MetricType.SummaryCoherence,
+            value=value,
+            parameters={
+                "evaluator": model_name,
+                "retries": retries,
+            },
+        )
+    @classmethod
+    def toxicity(
+        cls,
+        value: float,
+        model_name: str,
+        retries: int,
+    ):
+        """
+        Defines a toxicity metric.
+        Parameters
+        ----------
+        value : float
+            The toxicity score for a datum. This is a value between 0 and 1, with 1 indicating that all opinions
+            in the datum text are toxic and 0 indicating that there is no toxicity.
+        """
+        return cls(
+            type=MetricType.Toxicity,
+            value=value,
+            parameters={
+                "evaluator": model_name,
+                "retries": retries,
+            },
+        )

{valor_lite-0.33.14.dist-info → valor_lite-0.33.16.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: valor-lite
-Version: 0.33.14
+Version: 0.33.16
 Summary: Compute valor metrics locally.
 License: MIT License
@@ -28,15 +28,23 @@ Project-URL: homepage, https://www.striveworks.com
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
+Requires-Dist: evaluate
+Requires-Dist: nltk
+Requires-Dist: numpy
 Requires-Dist: Pillow >=9.1.0
-Requires-Dist: tqdm
 Requires-Dist: requests
-Requires-Dist: numpy
+Requires-Dist: rouge-score
 Requires-Dist: shapely
+Requires-Dist: tqdm
 Requires-Dist: importlib-metadata ; python_version < "3.8"
+Provides-Extra: mistral
+Requires-Dist: mistralai >=1.0 ; extra == 'mistral'
+Provides-Extra: openai
+Requires-Dist: openai ; extra == 'openai'
 Provides-Extra: test
 Requires-Dist: pytest ; extra == 'test'
 Requires-Dist: coverage ; extra == 'test'
+Requires-Dist: pre-commit ; extra == 'test'
 # valor-lite: Fast, local machine learning evaluation.

valor_lite-0.33.16.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,38 @@
+valor_lite/LICENSE,sha256=M0L53VuwfEEqezhHb7NPeYcO_glw7-k4DMLZQ3eRN64,1068
+valor_lite/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+valor_lite/schemas.py,sha256=pB0MrPx5qFLbwBWDiOUUm-vmXdWvbJLFCBmKgbcbI5g,198
+valor_lite/classification/__init__.py,sha256=8MI8bGwCxYGqRP7KxG7ezhYv4qQ5947XGvvlF8WPM5g,392
+valor_lite/classification/annotation.py,sha256=0aUOvcwBAZgiNOJuyh-pXyNTG7vP7r8CUfnU3OmpUwQ,1113
+valor_lite/classification/computation.py,sha256=qfBhhuDYCiY8h2RdBG3shzgJbHLXDVNujkYFg9xZa6U,12116
+valor_lite/classification/manager.py,sha256=8GXZECSx4CBbG5NfPrA19BPENqmrjo-wZBmaulWHY20,16676
+valor_lite/classification/metric.py,sha256=fkAo-_3s4EIRSkyn3owBSf4_Gp6lBK9xdToDYMWmT8A,12236
+valor_lite/classification/utilities.py,sha256=PmQar06Vt-ew4Jvnn0IM63mq730QVTsdRtFdVu1HMFU,6885
+valor_lite/object_detection/__init__.py,sha256=Ql8rju2q7y0Zd9zFvtBJDRhgQFDm1RSYkTsyH3ZE6pA,648
+valor_lite/object_detection/annotation.py,sha256=o6VfiRobiB0ljqsNBLAYMXgi32RSIR7uTA-dgxq6zBI,8248
+valor_lite/object_detection/computation.py,sha256=P5ijxEBuZ3mxYjBQy24TiQpGxRmPuS40Gwn44uv0J7M,28064
+valor_lite/object_detection/manager.py,sha256=rHY6-aiPVOXKQk7e_MmKpZxn6wdLAhdlj_njaNdYG7Q,23299
+valor_lite/object_detection/metric.py,sha256=8QhdauuaRrzE39idetkFYTPxA12wrBalQDIR4IUzEbg,24794
+valor_lite/object_detection/utilities.py,sha256=98VSW-g8EYI8Cdd9KHLHdm6F4fI89jaX5I4z99zny4s,16271
+valor_lite/semantic_segmentation/__init__.py,sha256=HQQkr3iBPQfdUrsu0uvx-Uyv9SYmumU1B3slbWOnpNY,245
+valor_lite/semantic_segmentation/annotation.py,sha256=CujYFdHS3fgr4Y7mEDs_u1XBmbPJzNU2CdqvjCT_d_A,2938
+valor_lite/semantic_segmentation/computation.py,sha256=rrql3zmpqt4Zygc2BD4SyUfNW_NXC93_kHB-lGBzjXU,5122
+valor_lite/semantic_segmentation/manager.py,sha256=pMepH3zk_fApyFtC9tLrmEYuCbg1n5TLh1J8QRadE44,14287
+valor_lite/semantic_segmentation/metric.py,sha256=aJv3wPEl6USLhZ3c4yz6prnBU-EaG4Kz16f0BXcodd4,7046
+valor_lite/semantic_segmentation/utilities.py,sha256=vZM66YNMz9VJclhuKvcWp74nF65s6bscnnD5U9iDW7Q,2925
+valor_lite/text_generation/__init__.py,sha256=pGhpWCSZjLM0pPHCtPykAfos55B8ie3mi9EzbNxfj-U,356
+valor_lite/text_generation/annotation.py,sha256=O5aXiwCS4WjA-fqn4ly-O0MsTHoIOmqxqCaAp9IeI3M,1270
+valor_lite/text_generation/computation.py,sha256=cG35qMpxNPEYHXN2fz8wcanESriSHoWMl1idpm9-ous,18638
+valor_lite/text_generation/manager.py,sha256=C4QwvronGHXmYSkaRmUGy7TN0C0aeyDx9Hb-ClNYXK4,24810
+valor_lite/text_generation/metric.py,sha256=C9gbWejjOJ23JVLecuUhYW5rkx30NUCfRtgsM46uMds,10409
+valor_lite/text_generation/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+valor_lite/text_generation/llm/exceptions.py,sha256=w4eYSJIJQ_jWuCmquCB6ETr_st_LzbDRlhnlPeqwmfo,349
+valor_lite/text_generation/llm/generation.py,sha256=XKPjCxPUZHiWInQSO7wLOb0YtMFLu50s8rHZe1Yz0s0,28954
+valor_lite/text_generation/llm/instructions.py,sha256=fz2onBZZWcl5W8iy7zEWkPGU9N07ez6O7SxZA5M2xe4,34056
+valor_lite/text_generation/llm/integrations.py,sha256=-rTfdAjq1zH-4ixwYuMQEOQ80pIFzMTe0BYfroVx3Pg,6974
+valor_lite/text_generation/llm/utilities.py,sha256=bjqatGgtVTcl1PrMwiDKTYPGJXKrBrx7PDtzIblGSys,1178
+valor_lite/text_generation/llm/validators.py,sha256=Wzr5RlfF58_2wOU-uTw7C8skan_fYdhy4Gfn0jSJ8HM,2700
+valor_lite-0.33.16.dist-info/LICENSE,sha256=M0L53VuwfEEqezhHb7NPeYcO_glw7-k4DMLZQ3eRN64,1068
+valor_lite-0.33.16.dist-info/METADATA,sha256=mpXXDWKiCL8OsCLqRevVH6AkWMsYBT4Qjqdum3ZYFos,5888
+valor_lite-0.33.16.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
+valor_lite-0.33.16.dist-info/top_level.txt,sha256=9ujykxSwpl2Hu0_R95UQTR_l07k9UUTSdrpiqmq6zc4,11
+valor_lite-0.33.16.dist-info/RECORD,,

{valor_lite-0.33.14.dist-info → valor_lite-0.33.16.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.2.0)
+Generator: setuptools (75.3.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

valor_lite-0.33.14.dist-info/RECORD DELETED Viewed

@@ -1,27 +0,0 @@
-valor_lite/LICENSE,sha256=M0L53VuwfEEqezhHb7NPeYcO_glw7-k4DMLZQ3eRN64,1068
-valor_lite/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-valor_lite/schemas.py,sha256=hcCFJ4ZywlFCqhx0om80Uf4xpASYPvs2vpP1yOUIqaE,403
-valor_lite/classification/__init__.py,sha256=8MI8bGwCxYGqRP7KxG7ezhYv4qQ5947XGvvlF8WPM5g,392
-valor_lite/classification/annotation.py,sha256=0aUOvcwBAZgiNOJuyh-pXyNTG7vP7r8CUfnU3OmpUwQ,1113
-valor_lite/classification/computation.py,sha256=qfBhhuDYCiY8h2RdBG3shzgJbHLXDVNujkYFg9xZa6U,12116
-valor_lite/classification/manager.py,sha256=8GXZECSx4CBbG5NfPrA19BPENqmrjo-wZBmaulWHY20,16676
-valor_lite/classification/metric.py,sha256=0ZGp7Wm4oc0h_EBiYfVEs39QEeL5xa-F27gig7smnq8,11409
-valor_lite/classification/utilities.py,sha256=PmQar06Vt-ew4Jvnn0IM63mq730QVTsdRtFdVu1HMFU,6885
-valor_lite/object_detection/__init__.py,sha256=Ql8rju2q7y0Zd9zFvtBJDRhgQFDm1RSYkTsyH3ZE6pA,648
-valor_lite/object_detection/annotation.py,sha256=o6VfiRobiB0ljqsNBLAYMXgi32RSIR7uTA-dgxq6zBI,8248
-valor_lite/object_detection/computation.py,sha256=xqV_KdYAGyq32VePW0pL8pO3YGRO46ZUVo0luwYD1P8,28024
-valor_lite/object_detection/manager.py,sha256=YjM9Kx3xrIt2VMjNZ-8guPchPq7YBABlams_7eZvYVY,23298
-valor_lite/object_detection/metric.py,sha256=QbxYTOykysshhpdVJjxMPnw8hvcAv4SM3sXDZj8OwnE,23967
-valor_lite/object_detection/utilities.py,sha256=98VSW-g8EYI8Cdd9KHLHdm6F4fI89jaX5I4z99zny4s,16271
-valor_lite/semantic_segmentation/__init__.py,sha256=HQQkr3iBPQfdUrsu0uvx-Uyv9SYmumU1B3slbWOnpNY,245
-valor_lite/semantic_segmentation/annotation.py,sha256=CujYFdHS3fgr4Y7mEDs_u1XBmbPJzNU2CdqvjCT_d_A,2938
-valor_lite/semantic_segmentation/computation.py,sha256=rrql3zmpqt4Zygc2BD4SyUfNW_NXC93_kHB-lGBzjXU,5122
-valor_lite/semantic_segmentation/manager.py,sha256=pMepH3zk_fApyFtC9tLrmEYuCbg1n5TLh1J8QRadE44,14287
-valor_lite/semantic_segmentation/metric.py,sha256=i8uTcalwvzK7CDHJ_8I-zplWe-qrMtXwH_5ZcTBi3M8,6219
-valor_lite/semantic_segmentation/utilities.py,sha256=vZM66YNMz9VJclhuKvcWp74nF65s6bscnnD5U9iDW7Q,2925
-valor_lite/text_generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-valor_lite-0.33.14.dist-info/LICENSE,sha256=M0L53VuwfEEqezhHb7NPeYcO_glw7-k4DMLZQ3eRN64,1068
-valor_lite-0.33.14.dist-info/METADATA,sha256=G2j4SCVunrizKzJ9pBRH8h73RbzbFMbbMViHFuXG4pM,5632
-valor_lite-0.33.14.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
-valor_lite-0.33.14.dist-info/top_level.txt,sha256=9ujykxSwpl2Hu0_R95UQTR_l07k9UUTSdrpiqmq6zc4,11
-valor_lite-0.33.14.dist-info/RECORD,,

{valor_lite-0.33.14.dist-info → valor_lite-0.33.16.dist-info}/LICENSE RENAMED Viewed

File without changes

{valor_lite-0.33.14.dist-info → valor_lite-0.33.16.dist-info}/top_level.txt RENAMED Viewed

File without changes

valor-lite 0.33.14__py3-none-any.whl → 0.33.16__py3-none-any.whl

Potentially problematic release.

valor-lite 0.33.14py3-none-any.whl → 0.33.16py3-none-any.whl