PyPI - themefinder - Versions diffs - 0.4.1__tar.gz → 0.5.2__tar.gz - Mend

themefinder 0.4.1tar.gz → 0.5.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of themefinder might be problematic. Click here for more details.

Files changed (15) hide show

{themefinder-0.4.1 → themefinder-0.5.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: themefinder
-Version: 0.4.1
+Version: 0.5.2
 Summary: A topic modelling Python package designed for analysing one-to-many question-answer data eg free-text survey responses.
 License: MIT
 Author: i.AI

{themefinder-0.4.1 → themefinder-0.5.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "themefinder"
-version = "0.4.1"
+version = "0.5.2"
 description = "A topic modelling Python package designed for analysing one-to-many question-answer data eg free-text survey responses."
 authors = ["i.AI <packages@cabinetoffice.gov.uk>"]
 packages = [{include = "themefinder", from = "src"}]

{themefinder-0.4.1 → themefinder-0.5.2}/src/themefinder/core.py RENAMED Viewed

@@ -106,7 +106,7 @@ async def sentiment_analysis(
     responses_df: pd.DataFrame,
     llm: Runnable,
     question: str,
-    batch_size: int = 10,
+    batch_size: int = 20,
     prompt_template: str | Path | PromptTemplate = "sentiment_analysis",
     system_prompt: str = CONSULTATION_SYSTEM_PROMPT,
 ) -> pd.DataFrame:
@@ -121,7 +121,7 @@ async def sentiment_analysis(
         llm (Runnable): Language model instance to use for sentiment analysis.
         question (str): The survey question.
         batch_size (int, optional): Number of responses to process in each batch.
-            Defaults to 10.
+            Defaults to 20.
         prompt_template (str | Path | PromptTemplate, optional): Template for structuring
             the prompt to the LLM. Can be a string identifier, path to template file,
             or PromptTemplate instance. Defaults to "sentiment_analysis".

{themefinder-0.4.1 → themefinder-0.5.2}/src/themefinder/llm_batch_processor.py RENAMED Viewed

@@ -174,7 +174,6 @@ def generate_prompts(
         to the prompt template as the 'responses' variable.
     """
     batched_prompts = []
     for df in response_dfs:
         prompt = prompt_template.format(
             responses=df.to_dict(orient="records"), **kwargs
@@ -275,7 +274,7 @@ def check_response_integrity(
     if returned_ids_set != response_ids_set:
         logger.info("Failed integrity check")
         logger.info(
-            f"Present in original but not returned from LLM: {response_ids_set - returned_ids_set}. Returned in LLM but not present in original: {returned_ids_set -response_ids_set}"
+            f"Present in original but not returned from LLM: {response_ids_set - returned_ids_set}. Returned in LLM but not present in original: {returned_ids_set - response_ids_set}"
         )
         return False
     return True

{themefinder-0.4.1 → themefinder-0.5.2}/src/themefinder/prompts/sentiment_analysis.txt RENAMED Viewed

@@ -20,6 +20,9 @@ The final output should be in the following JSON format:
     ...
 ]}}
+You MUST include every response ID in the output.
+If the response can not be labelled return empty sections where appropriate but you MUST return an entry
+with the correct response ID for each input object
 ## EXAMPLE
 Example 1:

{themefinder-0.4.1 → themefinder-0.5.2}/src/themefinder/prompts/theme_mapping.txt RENAMED Viewed

@@ -10,7 +10,7 @@ You will be given:
         {{'response_id': 'free text response'}}
 Your task is to analyze each response and decide which topics are present. Guidelines:
-    - You can only assign a response to a topic in the provided TOPIC LIST
+    - You can only assign to a response to a topic in the provided TOPIC LIST
     - A response doesn't need to exactly match the language used in the TOPIC LIST, it should be considered a match if it expresses a similar sentiment.
     - You must use the alphabetic 'topic_id' to indicate which topic you have assigned.
     - Each response can be assigned to multiple topics if it matches more than one topic from the TOPIC LIST.
@@ -20,6 +20,9 @@ Your task is to analyze each response and decide which topics are present. Guide
     - If a response contains both positive and negative statements about a topic within the same response, choose the stance that receives more emphasis or appears more central to the argument
     - The order of reasons and stances must align with the order of labels (e.g., stance_a applies to topic_a)
+You MUST include every response ID in the output.
+If the response can not be labelled return empty sections where appropriate but you MUST return an entry
+with the correct response ID for each input object
 The final output should be in the following JSON format: