themefinder 0.3.1__tar.gz → 0.5.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of themefinder might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: themefinder
3
- Version: 0.3.1
3
+ Version: 0.5.2
4
4
  Summary: A topic modelling Python package designed for analysing one-to-many question-answer data eg free-text survey responses.
5
5
  License: MIT
6
6
  Author: i.AI
@@ -128,6 +128,9 @@ ThemeFinder's pipeline consists of five distinct stages, each utilizing a specia
128
128
  - Leverages standardisation prompts to normalise theme descriptions
129
129
  - Creates clear, consistent theme definitions through structured refinement
130
130
 
131
+ ### Theme target alignment
132
+ - Optional step to consolidate themes down to a target number
133
+
131
134
  ### Theme mapping
132
135
  - Utilizes classification prompts to map individual responses to refined themes
133
136
  - Supports multiple theme assignments per response through detailed analysis
@@ -98,6 +98,9 @@ ThemeFinder's pipeline consists of five distinct stages, each utilizing a specia
98
98
  - Leverages standardisation prompts to normalise theme descriptions
99
99
  - Creates clear, consistent theme definitions through structured refinement
100
100
 
101
+ ### Theme target alignment
102
+ - Optional step to consolidate themes down to a target number
103
+
101
104
  ### Theme mapping
102
105
  - Utilizes classification prompts to map individual responses to refined themes
103
106
  - Supports multiple theme assignments per response through detailed analysis
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "themefinder"
3
- version = "0.3.1"
3
+ version = "0.5.2"
4
4
  description = "A topic modelling Python package designed for analysing one-to-many question-answer data eg free-text survey responses."
5
5
  authors = ["i.AI <packages@cabinetoffice.gov.uk>"]
6
6
  packages = [{include = "themefinder", from = "src"}]
@@ -1,3 +1,4 @@
1
+ import logging
1
2
  from pathlib import Path
2
3
 
3
4
  import pandas as pd
@@ -7,7 +8,6 @@ from langchain_core.runnables import Runnable
7
8
  from .llm_batch_processor import batch_and_run, load_prompt_from_file
8
9
  from .themefinder_logging import logger
9
10
 
10
-
11
11
  CONSULTATION_SYSTEM_PROMPT = load_prompt_from_file("consultation_system_prompt")
12
12
 
13
13
 
@@ -15,7 +15,9 @@ async def find_themes(
15
15
  responses_df: pd.DataFrame,
16
16
  llm: Runnable,
17
17
  question: str,
18
+ target_n_themes: int | None = None,
18
19
  system_prompt: str = CONSULTATION_SYSTEM_PROMPT,
20
+ verbose: bool = True,
19
21
  ) -> dict[str, pd.DataFrame]:
20
22
  """Process survey responses through a multi-stage theme analysis pipeline.
21
23
 
@@ -24,14 +26,19 @@ async def find_themes(
24
26
  2. Initial theme generation
25
27
  3. Theme condensation (combining similar themes)
26
28
  4. Theme refinement
27
- 5. Mapping responses to refined themes
29
+ 5. Theme target alignment (optional, if target_n_themes is specified)
30
+ 6. Mapping responses to refined themes
28
31
 
29
32
  Args:
30
33
  responses_df (pd.DataFrame): DataFrame containing survey responses
31
34
  llm (Runnable): Language model instance for text analysis
32
35
  question (str): The survey question
36
+ target_n_themes (int | None, optional): Target number of themes to consolidate to.
37
+ If None, skip theme target alignment step. Defaults to None.
33
38
  system_prompt (str): System prompt to guide the LLM's behavior.
34
39
  Defaults to CONSULTATION_SYSTEM_PROMPT.
40
+ verbose (bool): Whether to show information messages during processing.
41
+ Defaults to True.
35
42
 
36
43
  Returns:
37
44
  dict[str, pd.DataFrame]: Dictionary containing results from each pipeline stage:
@@ -42,6 +49,8 @@ async def find_themes(
42
49
  - refined_topics: DataFrame with refined theme definitions
43
50
  - mapping: DataFrame mapping responses to final themes
44
51
  """
52
+ logger.setLevel(logging.INFO if verbose else logging.CRITICAL)
53
+
45
54
  sentiment_df = await sentiment_analysis(
46
55
  responses_df,
47
56
  llm,
@@ -63,6 +72,14 @@ async def find_themes(
63
72
  question=question,
64
73
  system_prompt=system_prompt,
65
74
  )
75
+ if target_n_themes is not None:
76
+ refined_theme_df = await theme_target_alignment(
77
+ refined_theme_df,
78
+ llm,
79
+ question=question,
80
+ target_n_themes=target_n_themes,
81
+ system_prompt=system_prompt,
82
+ )
66
83
  mapping_df = await theme_mapping(
67
84
  sentiment_df,
68
85
  llm,
@@ -79,8 +96,8 @@ async def find_themes(
79
96
  "question": question,
80
97
  "sentiment": sentiment_df,
81
98
  "topics": theme_df,
82
- "condensed_topics": condensed_theme_df,
83
- "refined_topics": refined_theme_df,
99
+ "condensed_themes": condensed_theme_df,
100
+ "refined_themes": refined_theme_df,
84
101
  "mapping": mapping_df,
85
102
  }
86
103
 
@@ -89,7 +106,7 @@ async def sentiment_analysis(
89
106
  responses_df: pd.DataFrame,
90
107
  llm: Runnable,
91
108
  question: str,
92
- batch_size: int = 10,
109
+ batch_size: int = 20,
93
110
  prompt_template: str | Path | PromptTemplate = "sentiment_analysis",
94
111
  system_prompt: str = CONSULTATION_SYSTEM_PROMPT,
95
112
  ) -> pd.DataFrame:
@@ -104,7 +121,7 @@ async def sentiment_analysis(
104
121
  llm (Runnable): Language model instance to use for sentiment analysis.
105
122
  question (str): The survey question.
106
123
  batch_size (int, optional): Number of responses to process in each batch.
107
- Defaults to 10.
124
+ Defaults to 20.
108
125
  prompt_template (str | Path | PromptTemplate, optional): Template for structuring
109
126
  the prompt to the LLM. Can be a string identifier, path to template file,
110
127
  or PromptTemplate instance. Defaults to "sentiment_analysis".
@@ -180,9 +197,10 @@ async def theme_condensation(
180
197
  themes_df: pd.DataFrame,
181
198
  llm: Runnable,
182
199
  question: str,
183
- batch_size: int = 10000,
200
+ batch_size: int = 100,
184
201
  prompt_template: str | Path | PromptTemplate = "theme_condensation",
185
202
  system_prompt: str = CONSULTATION_SYSTEM_PROMPT,
203
+ **kwargs,
186
204
  ) -> pd.DataFrame:
187
205
  """Condense and combine similar themes identified from survey responses.
188
206
 
@@ -195,7 +213,7 @@ async def theme_condensation(
195
213
  llm (Runnable): Language model instance to use for theme condensation.
196
214
  question (str): The survey question.
197
215
  batch_size (int, optional): Number of themes to process in each batch.
198
- Defaults to 10000.
216
+ Defaults to 100.
199
217
  prompt_template (str | Path | PromptTemplate, optional): Template for structuring
200
218
  the prompt to the LLM. Can be a string identifier, path to template file,
201
219
  or PromptTemplate instance. Defaults to "theme_condensation".
@@ -206,17 +224,43 @@ async def theme_condensation(
206
224
  pd.DataFrame: DataFrame containing the condensed themes, where similar topics
207
225
  have been combined into broader categories.
208
226
  """
209
- logger.info(f"Running theme condensation on {len(themes_df)} topics")
227
+ logger.info(f"Running theme condensation on {len(themes_df)} responses")
210
228
  themes_df["response_id"] = range(len(themes_df))
211
- return await batch_and_run(
229
+
230
+ n_themes = themes_df.shape[0]
231
+ while n_themes > batch_size:
232
+ logger.info(
233
+ f"{n_themes} larger than batch size, using recursive theme condensation"
234
+ )
235
+ themes_df = await batch_and_run(
236
+ themes_df,
237
+ prompt_template,
238
+ llm,
239
+ batch_size=batch_size,
240
+ question=question,
241
+ system_prompt=system_prompt,
242
+ **kwargs,
243
+ )
244
+ themes_df = themes_df.sample(frac=1).reset_index(drop=True)
245
+ themes_df["response_id"] = range(len(themes_df))
246
+ if len(themes_df) == n_themes:
247
+ logger.info("Themes no longer being condensed")
248
+ break
249
+ n_themes = themes_df.shape[0]
250
+
251
+ themes_df = await batch_and_run(
212
252
  themes_df,
213
253
  prompt_template,
214
254
  llm,
215
255
  batch_size=batch_size,
216
256
  question=question,
217
257
  system_prompt=system_prompt,
258
+ **kwargs,
218
259
  )
219
260
 
261
+ logger.info(f"Final number of condensed themes: {themes_df.shape[0]}")
262
+ return themes_df
263
+
220
264
 
221
265
  async def theme_refinement(
222
266
  condensed_themes_df: pd.DataFrame,
@@ -257,10 +301,10 @@ async def theme_refinement(
257
301
  transposes the output for improved readability and easier downstream
258
302
  processing.
259
303
  """
260
- logger.info(f"Running topic refinement on {len(condensed_themes_df)} responses")
304
+ logger.info(f"Running theme refinement on {len(condensed_themes_df)} responses")
261
305
  condensed_themes_df["response_id"] = range(len(condensed_themes_df))
262
306
 
263
- def transpose_refined_topics(refined_themes: pd.DataFrame):
307
+ def transpose_refined_themes(refined_themes: pd.DataFrame):
264
308
  """Transpose topics for increased legibility."""
265
309
  transposed_df = pd.DataFrame(
266
310
  [refined_themes["topic"].to_numpy()], columns=refined_themes["topic_id"]
@@ -275,7 +319,74 @@ async def theme_refinement(
275
319
  question=question,
276
320
  system_prompt=system_prompt,
277
321
  )
278
- return transpose_refined_topics(refined_themes)
322
+ return transpose_refined_themes(refined_themes)
323
+
324
+
325
+ async def theme_target_alignment(
326
+ refined_themes_df: pd.DataFrame,
327
+ llm: Runnable,
328
+ question: str,
329
+ target_n_themes: int = 10,
330
+ batch_size: int = 10000,
331
+ prompt_template: str | Path | PromptTemplate = "theme_target_alignment",
332
+ system_prompt: str = CONSULTATION_SYSTEM_PROMPT,
333
+ ) -> pd.DataFrame:
334
+ """Align themes to target number using an LLM.
335
+
336
+ This function processes refined themes to consolidate them into a target number of
337
+ distinct categories while preserving all significant details and perspectives.
338
+ It transforms the output format for improved readability by transposing the
339
+ results into a single-row DataFrame where columns represent individual themes.
340
+
341
+ Args:
342
+ refined_themes_df (pd.DataFrame): DataFrame containing the refined themes
343
+ from the previous pipeline stage.
344
+ llm (Runnable): Language model instance to use for theme alignment.
345
+ question (str): The survey question.
346
+ target_n_themes (int, optional): Target number of themes to consolidate to.
347
+ Defaults to 10.
348
+ batch_size (int, optional): Number of themes to process in each batch.
349
+ Defaults to 10000.
350
+ prompt_template (str | Path | PromptTemplate, optional): Template for structuring
351
+ the prompt to the LLM. Can be a string identifier, path to template file,
352
+ or PromptTemplate instance. Defaults to "theme_target_alignment".
353
+ system_prompt (str): System prompt to guide the LLM's behavior.
354
+ Defaults to CONSULTATION_SYSTEM_PROMPT.
355
+
356
+ Returns:
357
+ pd.DataFrame: A single-row DataFrame where:
358
+ - Each column represents a unique theme (identified by topic_id)
359
+ - The values contain the aligned theme descriptions
360
+ - The format is optimized for subsequent theme mapping operations
361
+
362
+ Note:
363
+ The function adds sequential response_ids to the input DataFrame and
364
+ transposes the output for improved readability and easier downstream
365
+ processing.
366
+ """
367
+ logger.info(
368
+ f"Running theme target alignment on {len(refined_themes_df.columns)} themes compressing to {target_n_themes} themes"
369
+ )
370
+ refined_themes_df = refined_themes_df.T.rename(columns={0: "topic"})
371
+ refined_themes_df["response_id"] = range(len(refined_themes_df))
372
+
373
+ def transpose_aligned_themes(aligned_themes: pd.DataFrame):
374
+ """Transpose topics for increased legibility."""
375
+ transposed_df = pd.DataFrame(
376
+ [aligned_themes["topic"].to_numpy()], columns=aligned_themes["topic_id"]
377
+ )
378
+ return transposed_df
379
+
380
+ aligned_themes = await batch_and_run(
381
+ refined_themes_df,
382
+ prompt_template,
383
+ llm,
384
+ batch_size=batch_size,
385
+ question=question,
386
+ system_prompt=system_prompt,
387
+ target_n_themes=target_n_themes,
388
+ )
389
+ return transpose_aligned_themes(aligned_themes)
279
390
 
280
391
 
281
392
  async def theme_mapping(
@@ -174,7 +174,6 @@ def generate_prompts(
174
174
  to the prompt template as the 'responses' variable.
175
175
  """
176
176
  batched_prompts = []
177
-
178
177
  for df in response_dfs:
179
178
  prompt = prompt_template.format(
180
179
  responses=df.to_dict(orient="records"), **kwargs
@@ -219,7 +218,7 @@ async def call_llm(
219
218
  failed_ids: set = set()
220
219
 
221
220
  @retry(
222
- wait=wait_random_exponential(min=1, max=60),
221
+ wait=wait_random_exponential(min=1, max=20),
223
222
  stop=stop_after_attempt(6),
224
223
  before=before.before_log(logger=logger, log_level=logging.DEBUG),
225
224
  reraise=True,
@@ -275,7 +274,7 @@ def check_response_integrity(
275
274
  if returned_ids_set != response_ids_set:
276
275
  logger.info("Failed integrity check")
277
276
  logger.info(
278
- f"Present in original but not returned from LLM: {response_ids_set - returned_ids_set}. Returned in LLM but not present in original: {returned_ids_set -response_ids_set}"
277
+ f"Present in original but not returned from LLM: {response_ids_set - returned_ids_set}. Returned in LLM but not present in original: {returned_ids_set - response_ids_set}"
279
278
  )
280
279
  return False
281
280
  return True
@@ -6,7 +6,7 @@ Your job is to analyze each response to the QUESTION below and decide:
6
6
  POSITION - is the response agreeing or disagreeing or is it unclear about the change being proposed in the question.
7
7
  Choose one from [agreement, disagreement, unclear]
8
8
 
9
- You should only return a response in strict json and nothing else. The final output should be in the following JSON format:
9
+ The final output should be in the following JSON format:
10
10
 
11
11
  {{"responses": [
12
12
  {{
@@ -20,6 +20,11 @@ You should only return a response in strict json and nothing else. The final out
20
20
  ...
21
21
  ]}}
22
22
 
23
+ You MUST include every response ID in the output.
24
+ If the response can not be labelled return empty sections where appropriate but you MUST return an entry
25
+ with the correct response ID for each input object
26
+
27
+ ## EXAMPLE
23
28
  Example 1:
24
29
  Question: \n What are your thoughts on the proposed government changes to the policy about reducing school holidays?
25
30
  Response: \n as a parent I have no idea why you would make this change. I guess you were thinking about increasing productivity but any productivity gains would be totally offset by the decrease in family time. \n
@@ -0,0 +1,37 @@
1
+ {system_prompt}
2
+
3
+ Below is a question and a list of topics extracted from answers to that question. Each topic has a topic_label and a topic_description.
4
+
5
+ Your task is to analyze these topics and produce a refined list that:
6
+ 1. Identifies and preserves core themes that appear frequently
7
+ 2. Combines redundant topics while maintaining nuanced differences
8
+ 3. Ensures the final list represents the full spectrum of viewpoints present in the original data
9
+
10
+ Guidelines for Topic Analysis:
11
+ - Begin by identifying distinct concept clusters in the topics
12
+ - Consider the context of the question when determining topic relevance
13
+ - Look for complementary perspectives that could enrich understanding of the same core concept
14
+ - Consider the key ideas behind themes when merging, don't simply focus on the words used in the label and description
15
+
16
+ For each topic in your output:
17
+ 1. Choose a clear, representative label that captures the essence of the combined or preserved topic
18
+ 2. Write a concise description that incorporates key insights from all constituent topics, this should only be a single sentence
19
+
20
+ Return at most 30 topics
21
+
22
+ The final output should be in the following JSON format:
23
+
24
+ {{"responses": [
25
+ {{"topic_label": "{{label for condensed topic 1}}", "topic_description": "{{description for condensed topic 1}}"}},
26
+ {{"topic_label": "{{label for condensed topic 2}}", "topic_description": "{{description for condensed topic 2}}"}},
27
+ {{"topic_label": "{{label for condensed topic 3}}", "topic_description": "{{description for condensed topic 3}}"}},
28
+ // Additional topics as necessary
29
+ ]}}
30
+
31
+ [Question]
32
+
33
+ {question}
34
+
35
+ [Themes]
36
+
37
+ {responses}
@@ -0,0 +1,70 @@
1
+ {system_prompt}
2
+
3
+ Below is a question and a list of responses to that question.
4
+
5
+ Your task is to analyze the RESPONSES below and extract TOPICS such that:
6
+ 1. Each topic summarizes a point of view expressed in the responses
7
+ 2. Every distinct and relevant point of view in the responses should be captured by a topic
8
+ 3. Each topic has a topic_label which summarizes the topic in a few words
9
+ 4. Each topic has a topic_description which gives more detail about the topic in one or two sentences
10
+ 5. The position field should just be the sentiment stated, and is either "agreement" or "disagreement" or "unclear"
11
+ 6. There should be no duplicate topics
12
+
13
+ The topics identified will be used by policy makers to understand what the public like and don't like about the proposals.
14
+
15
+ Here is an example of how to extract topics from some responses
16
+
17
+ The final output should be in the following JSON format:
18
+
19
+ {{"responses": [
20
+ {{
21
+ "topic_label": "{{label_1}}",
22
+ "topic_description": "{{description_1}}",
23
+ "position": "{{position_1}}"
24
+ }},
25
+ {{
26
+ "topic_label": "{{label_2}}",
27
+ "topic_description": "{{description_2}}",
28
+ "position": "{{position_2}}"
29
+ }},
30
+ // Additional topics as necessary
31
+ ]}}
32
+
33
+ ## EXAMPLE
34
+
35
+ QUESTION
36
+ What are your views on the proposed change by the government to introduce a 2% tax on fast food meat products.
37
+
38
+ RESPONSES
39
+ [
40
+ {{"response": "I wish the government would stop interfering in the lves of its citizens. It only ever makes things worse. This change will just cost us all more money, and especially poorer people", "position": "disagreement"}},
41
+ {{"response": "Even though it will make people eat more healthier, I beleibe the government should interfer less and not more!", "position": "disagreement"}},
42
+ {{"response": "I hate grapes", "position": "disagreement"}},
43
+ ]
44
+
45
+ OUTPUTS
46
+
47
+ {{"responses": [
48
+ {{
49
+ "topic_label": "Government overreach",
50
+ "topic_description": "The proposals would result in government interfering too much with citizen's lives",
51
+ "position": "disagreement"
52
+ }},
53
+ {{
54
+ "topic_label": "Regressive change",
55
+ "topic_description": "The change would have a larger negative impact on poorer people",
56
+ "position": "disagreement"
57
+ }},
58
+ {{
59
+ "topic_label": "Health",
60
+ "topic_description": "The change would result in people eating healthier diets",
61
+ "position": "disagreement"
62
+ }},
63
+ ]}}
64
+
65
+
66
+ QUESTION:
67
+ {question}
68
+
69
+ RESPONSES:
70
+ {responses}
@@ -20,6 +20,9 @@ Your task is to analyze each response and decide which topics are present. Guide
20
20
  - If a response contains both positive and negative statements about a topic within the same response, choose the stance that receives more emphasis or appears more central to the argument
21
21
  - The order of reasons and stances must align with the order of labels (e.g., stance_a applies to topic_a)
22
22
 
23
+ You MUST include every response ID in the output.
24
+ If the response can not be labelled return empty sections where appropriate but you MUST return an entry
25
+ with the correct response ID for each input object
23
26
 
24
27
  The final output should be in the following JSON format:
25
28
 
@@ -1,6 +1,7 @@
1
1
  {system_prompt}
2
2
 
3
- You are tasked with refining and neutralizing a list of topics generated from responses to a question. Your goal is to transform opinionated topics into neutral, well-structured, and distinct topics while preserving the essential information.
3
+ You are tasked with refining and neutralizing a list of topics generated from responses to a question.
4
+ Your goal is to transform opinionated topics into neutral, well-structured, and distinct topics while preserving the essential information.
4
5
 
5
6
  ## Input
6
7
  You will receive a list of OPINIONATED TOPICS. These topics explicitly tie opinions to whether a person agrees or disagrees with the question.
@@ -60,7 +61,7 @@ Return your output in the following JSON format:
60
61
  }}
61
62
 
62
63
 
63
- ## Example
64
+ ## EXAMPLE
64
65
 
65
66
  OPINIONATED TOPIC:
66
67
  "Economic impact: Many respondents who support the policy believe it will create jobs and boost the economy, it could raise GDP by 2%."
@@ -0,0 +1,26 @@
1
+ {system_prompt}
2
+ Input: You will receive a JSON array of themes, where each theme contains a description of a topic or concept.
3
+
4
+ Goal: Consolidate these themes into approximately {target_n_themes} distinct categories by:
5
+ 1. Identifying and combining similar or overlapping themes
6
+ 2. Preserving all significant details and perspectives
7
+ 3. Creating clear, comprehensive descriptions for each merged theme
8
+
9
+ Requirements:
10
+ - Each consolidated theme should capture all relevant information from its source themes
11
+ - Final descriptions should be concise but thorough
12
+ - The merged themes should be distinct from each other with minimal overlap
13
+
14
+ Return your output in the following JSON format:
15
+
16
+ {{
17
+ "responses": [
18
+ {{"topic_id": "A", "topic": "{{topic label 1}}: {{topic description 1}}"}},
19
+ {{"topic_id": "B", "topic": "{{topic label 2}}: {{topic description 2}}"}},
20
+ {{"topic_id": "C", "topic": "{{topic label 3}}: {{topic description 3}}"}},
21
+ // Additional topics as necessary
22
+ ]
23
+ }}
24
+
25
+ Themes to analyze:
26
+ {responses}
@@ -1,42 +0,0 @@
1
- {system_prompt}
2
-
3
- Below is a question and a list of topics extracted from answers to that question. Each topic has a topic_label and a topic_description.
4
-
5
- Your task is to analyze these topics and produce a refined list that:
6
- 1. Identifies and preserves core themes that appear frequently
7
- 2. Captures unique perspectives that may only appear once but offer valuable insights
8
- 3. Combines truly redundant topics while maintaining nuanced differences
9
- 4. Ensures the final list represents the full spectrum of viewpoints present in the original data
10
-
11
- Guidelines for Topic Analysis:
12
- - Begin by identifying distinct concept clusters in the topics
13
- - When a topic appears only once, evaluate its unique contribution before deciding to merge or preserve it
14
- - Consider the context of the question when determining topic relevance
15
- - Look for complementary perspectives that could enrich understanding of the same core concept
16
- - Preserve specific examples or concrete applications that illustrate abstract concepts
17
- - Maintain granularity where different aspects of the same broader theme offer distinct insights
18
-
19
- The topics you are analyzing are all extracted from answers with the same position, where "position" means that the answer agrees ("Y") or disagrees ("N") with the question.
20
-
21
- For each topic in your output:
22
- 1. Choose a clear, representative label that captures the essence of the combined or preserved topic
23
- 2. Write a comprehensive description that incorporates key insights from all constituent topics
24
- 3. Ensure the description maintains specific examples or unique angles from the original topics
25
- 4. Include the shared position value
26
-
27
- The final output should be in the following JSON format:
28
-
29
- {{"responses": [
30
- {{"topic_label": "{{label for condensed topic 1}}", "topic_description": "{{description for condensed topic 1}}", "position": {{the position given below}}"}},
31
- {{"topic_label": "{{label for condensed topic 2}}", "topic_description": "{{description for condensed topic 2}}", "position": {{the position given below}}"}},
32
- {{"topic_label": "{{label for condensed topic 3}}", "topic_description": "{{description for condensed topic 3}}", "position": {{the position given below}}"}},
33
- // Additional topics as necessary
34
- ]}}
35
-
36
- [Question]
37
-
38
- {question}
39
-
40
- [Themes]
41
-
42
- {responses}
@@ -1,70 +0,0 @@
1
- {system_prompt}
2
-
3
- Your task is to analyse RESPONSES below and extract TOPICS such that:
4
- 1. Each topic summarises points of view expressed in the responses
5
- 2. Every distinct and relevant point of view in the responses should be captured by a topic
6
- 3. Each topic has a topic_label which summarises the topic in a few words
7
- 4. Each topic has a topic_description which gives more detail about the topic in one or two sentences
8
- 5. The position field should just be the sentiment stated, and is either "agreement" or "disagreement"
9
- 6. There should be no duplicate topics
10
-
11
- The topics identified will be used by policy makers to understand what the public like and don't like about the proposals.
12
-
13
- Here is an example of how to extract topics from some responses
14
-
15
- EXAMPLE:
16
-
17
- POSITION
18
- disagreement
19
-
20
- QUESTION
21
- What are your views on the proposed change by the government to introduce a 2% tax on fast food meat products.
22
-
23
- RESPONSES
24
- [
25
- {{"response": "I wish the government would stop interfering in the lves of its citizens. It only ever makes things worse. This change will just cost us all more money, and especially poorer people", "position": "disagreement"}},
26
- {{"response": "Even though it will make people eat more healthier, I beleibe the government should interfer less and not more!", "position": "disagreement"}},
27
- {{"response": "I hate grapes", "position": "disagreement"}},
28
- ]
29
-
30
- OUTPUTS
31
-
32
- {{"responses": [
33
- {{
34
- "topic_label": "Government overreach",
35
- "topic_description": "Some people thought the proposals would result in government interfering too much with citizen's lives",
36
- "position": "disagreement"
37
- }},
38
- {{
39
- "topic_label": "Regressive change",
40
- "topic_description": "Some people thought the change would have a larger negative impact on poorer people",
41
- "position": "disagreement"
42
- }},
43
- {{
44
- "topic_label": "Health",
45
- "topic_description": "Some people thought the change would result in people eating healthier diets",
46
- "position": "disagreement"
47
- }},
48
- ]}}
49
-
50
- You should only return a response in strict json and nothing else. The final output should be in the following JSON format:
51
-
52
- {{"responses": [
53
- {{
54
- "topic_label": "{{label_1}}",
55
- "topic_description": "{{description_1}}",
56
- "position": "{{position_1}}"
57
- }},
58
- {{
59
- "topic_label": "{{label_2}}",
60
- "topic_description": "{{description_2}}",
61
- "position": "{{position_2}}"
62
- }},
63
- // Additional topics as necessary
64
- ]}}
65
-
66
- QUESTION:
67
- {question}
68
-
69
- RESPONSES:
70
- {responses}
File without changes