themefinder 0.4.1__py3-none-any.whl → 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of themefinder might be problematic. Click here for more details.
- themefinder/core.py +4 -4
- themefinder/llm_batch_processor.py +1 -2
- themefinder/prompts/sentiment_analysis.txt +3 -0
- themefinder/prompts/theme_mapping.txt +6 -2
- {themefinder-0.4.1.dist-info → themefinder-0.5.3.dist-info}/METADATA +4 -3
- {themefinder-0.4.1.dist-info → themefinder-0.5.3.dist-info}/RECORD +8 -8
- {themefinder-0.4.1.dist-info → themefinder-0.5.3.dist-info}/WHEEL +1 -1
- {themefinder-0.4.1.dist-info → themefinder-0.5.3.dist-info}/LICENCE +0 -0
themefinder/core.py
CHANGED
|
@@ -95,7 +95,7 @@ async def find_themes(
|
|
|
95
95
|
return {
|
|
96
96
|
"question": question,
|
|
97
97
|
"sentiment": sentiment_df,
|
|
98
|
-
"
|
|
98
|
+
"themes": theme_df,
|
|
99
99
|
"condensed_themes": condensed_theme_df,
|
|
100
100
|
"refined_themes": refined_theme_df,
|
|
101
101
|
"mapping": mapping_df,
|
|
@@ -106,7 +106,7 @@ async def sentiment_analysis(
|
|
|
106
106
|
responses_df: pd.DataFrame,
|
|
107
107
|
llm: Runnable,
|
|
108
108
|
question: str,
|
|
109
|
-
batch_size: int =
|
|
109
|
+
batch_size: int = 20,
|
|
110
110
|
prompt_template: str | Path | PromptTemplate = "sentiment_analysis",
|
|
111
111
|
system_prompt: str = CONSULTATION_SYSTEM_PROMPT,
|
|
112
112
|
) -> pd.DataFrame:
|
|
@@ -121,7 +121,7 @@ async def sentiment_analysis(
|
|
|
121
121
|
llm (Runnable): Language model instance to use for sentiment analysis.
|
|
122
122
|
question (str): The survey question.
|
|
123
123
|
batch_size (int, optional): Number of responses to process in each batch.
|
|
124
|
-
Defaults to
|
|
124
|
+
Defaults to 20.
|
|
125
125
|
prompt_template (str | Path | PromptTemplate, optional): Template for structuring
|
|
126
126
|
the prompt to the LLM. Can be a string identifier, path to template file,
|
|
127
127
|
or PromptTemplate instance. Defaults to "sentiment_analysis".
|
|
@@ -224,7 +224,7 @@ async def theme_condensation(
|
|
|
224
224
|
pd.DataFrame: DataFrame containing the condensed themes, where similar topics
|
|
225
225
|
have been combined into broader categories.
|
|
226
226
|
"""
|
|
227
|
-
logger.info(f"Running theme condensation on {len(themes_df)}
|
|
227
|
+
logger.info(f"Running theme condensation on {len(themes_df)} themes")
|
|
228
228
|
themes_df["response_id"] = range(len(themes_df))
|
|
229
229
|
|
|
230
230
|
n_themes = themes_df.shape[0]
|
|
@@ -174,7 +174,6 @@ def generate_prompts(
|
|
|
174
174
|
to the prompt template as the 'responses' variable.
|
|
175
175
|
"""
|
|
176
176
|
batched_prompts = []
|
|
177
|
-
|
|
178
177
|
for df in response_dfs:
|
|
179
178
|
prompt = prompt_template.format(
|
|
180
179
|
responses=df.to_dict(orient="records"), **kwargs
|
|
@@ -275,7 +274,7 @@ def check_response_integrity(
|
|
|
275
274
|
if returned_ids_set != response_ids_set:
|
|
276
275
|
logger.info("Failed integrity check")
|
|
277
276
|
logger.info(
|
|
278
|
-
f"Present in original but not returned from LLM: {response_ids_set - returned_ids_set}. Returned in LLM but not present in original: {returned_ids_set -response_ids_set}"
|
|
277
|
+
f"Present in original but not returned from LLM: {response_ids_set - returned_ids_set}. Returned in LLM but not present in original: {returned_ids_set - response_ids_set}"
|
|
279
278
|
)
|
|
280
279
|
return False
|
|
281
280
|
return True
|
|
@@ -20,6 +20,9 @@ The final output should be in the following JSON format:
|
|
|
20
20
|
...
|
|
21
21
|
]}}
|
|
22
22
|
|
|
23
|
+
You MUST include every response ID in the output.
|
|
24
|
+
If the response can not be labelled return empty sections where appropriate but you MUST return an entry
|
|
25
|
+
with the correct response ID for each input object
|
|
23
26
|
|
|
24
27
|
## EXAMPLE
|
|
25
28
|
Example 1:
|
|
@@ -10,9 +10,9 @@ You will be given:
|
|
|
10
10
|
{{'response_id': 'free text response'}}
|
|
11
11
|
|
|
12
12
|
Your task is to analyze each response and decide which topics are present. Guidelines:
|
|
13
|
-
- You can only assign a response to a topic in the provided TOPIC LIST
|
|
13
|
+
- You can only assign to a response to a topic in the provided TOPIC LIST
|
|
14
14
|
- A response doesn't need to exactly match the language used in the TOPIC LIST, it should be considered a match if it expresses a similar sentiment.
|
|
15
|
-
- You must use the alphabetic 'topic_id' to indicate which topic you have assigned.
|
|
15
|
+
- You must use the alphabetic 'topic_id' to indicate which topic you have assigned. Do not use the full topic description
|
|
16
16
|
- Each response can be assigned to multiple topics if it matches more than one topic from the TOPIC LIST.
|
|
17
17
|
- There is no limit on how many topics can be assigned to a response.
|
|
18
18
|
- For each assignment provide a single rationale for why you have chosen the label.
|
|
@@ -20,6 +20,10 @@ Your task is to analyze each response and decide which topics are present. Guide
|
|
|
20
20
|
- If a response contains both positive and negative statements about a topic within the same response, choose the stance that receives more emphasis or appears more central to the argument
|
|
21
21
|
- The order of reasons and stances must align with the order of labels (e.g., stance_a applies to topic_a)
|
|
22
22
|
|
|
23
|
+
You MUST include every response ID in the output.
|
|
24
|
+
If the response can not be labelled return empty sections where appropriate but you MUST return an entry
|
|
25
|
+
with the correct response ID for each input object.
|
|
26
|
+
You must only return the alphabetic topic_ids in the labels section.
|
|
23
27
|
|
|
24
28
|
The final output should be in the following JSON format:
|
|
25
29
|
|
|
@@ -1,17 +1,18 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: themefinder
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.3
|
|
4
4
|
Summary: A topic modelling Python package designed for analysing one-to-many question-answer data eg free-text survey responses.
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: i.AI
|
|
7
7
|
Author-email: packages@cabinetoffice.gov.uk
|
|
8
|
-
Requires-Python: >=3.
|
|
8
|
+
Requires-Python: >=3.10,<3.13
|
|
9
9
|
Classifier: Intended Audience :: Developers
|
|
10
10
|
Classifier: Intended Audience :: Science/Research
|
|
11
11
|
Classifier: License :: OSI Approved :: MIT License
|
|
12
12
|
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
15
|
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.13
|
|
15
16
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
16
17
|
Classifier: Topic :: Text Processing :: Linguistic
|
|
17
18
|
Requires-Dist: boto3 (>=1.29,<2.0)
|
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
themefinder/__init__.py,sha256=p6QoCgA-BYWljk8yPOeTgkNcN5m_gA_o3Q86Eh0QjSM,327
|
|
2
|
-
themefinder/core.py,sha256=
|
|
3
|
-
themefinder/llm_batch_processor.py,sha256=
|
|
2
|
+
themefinder/core.py,sha256=B6Du59rPsZbBcP8tkKmXQn6h5vvLN_PZIferPnF3LNY,17538
|
|
3
|
+
themefinder/llm_batch_processor.py,sha256=SDDeMJeX1J3u7FGFddRhVSxty6U8lFVXwG4eNI_0C5o,12573
|
|
4
4
|
themefinder/prompts/consultation_system_prompt.txt,sha256=_A07oY_an4hnRx-9pQ0y-TLXJz0dd8vDI-MZne7Mdb4,89
|
|
5
|
-
themefinder/prompts/sentiment_analysis.txt,sha256=
|
|
5
|
+
themefinder/prompts/sentiment_analysis.txt,sha256=e3DcUKga6pSFcfeo2TAq8x9LXk0YDV-D7P2gtymcyuc,1832
|
|
6
6
|
themefinder/prompts/theme_condensation.txt,sha256=GFwwQO_oZHhqhPnAfTn887fDzAIVxKoCyj0hXagyBIU,1645
|
|
7
7
|
themefinder/prompts/theme_generation.txt,sha256=JMXuNojxdSAcxPRU1Jg12Xunv_dX4hNvXYU2pXMWTAw,2500
|
|
8
|
-
themefinder/prompts/theme_mapping.txt,sha256=
|
|
8
|
+
themefinder/prompts/theme_mapping.txt,sha256=nb_D7gwKGd8BzrAlzSZC3mQIPYaCRXdE6XmoJaJEKZQ,2405
|
|
9
9
|
themefinder/prompts/theme_refinement.txt,sha256=HCgvWAoz-cpFgjX_QS_VVY0X06d4ds0ekBgcoWyFyfg,3360
|
|
10
10
|
themefinder/prompts/theme_target_alignment.txt,sha256=-_ghr4--KAN6Tz8ExO9s2IXvI6pjWaEA_nG5L83GV5I,1035
|
|
11
11
|
themefinder/themefinder_logging.py,sha256=n5SUQovEZLC4skEbxicjz_fOGF9mOk3S-Wpj5uXsaL8,314
|
|
12
|
-
themefinder-0.
|
|
13
|
-
themefinder-0.
|
|
14
|
-
themefinder-0.
|
|
15
|
-
themefinder-0.
|
|
12
|
+
themefinder-0.5.3.dist-info/LICENCE,sha256=C9ULIN0ctF60ZxUWH_hw1H434bDLg49Z-Qzn6BUHgqs,1060
|
|
13
|
+
themefinder-0.5.3.dist-info/METADATA,sha256=o9rzrhRK-4PMAv9wS8ZrnmTw1rTSYGU8zfPbB31r1DU,6483
|
|
14
|
+
themefinder-0.5.3.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
|
15
|
+
themefinder-0.5.3.dist-info/RECORD,,
|
|
File without changes
|