themefinder 0.5.4__py3-none-any.whl → 0.6.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of themefinder might be problematic. Click here for more details.
- themefinder/__init__.py +6 -2
- themefinder/core.py +204 -97
- themefinder/llm_batch_processor.py +277 -145
- themefinder/models.py +351 -0
- themefinder/prompts/detail_detection.txt +19 -0
- themefinder/prompts/sentiment_analysis.txt +8 -19
- themefinder/prompts/theme_condensation.txt +2 -22
- themefinder/prompts/theme_generation.txt +6 -38
- themefinder/prompts/theme_mapping.txt +6 -23
- themefinder/prompts/theme_refinement.txt +14 -40
- themefinder/prompts/theme_target_alignment.txt +2 -10
- {themefinder-0.5.4.dist-info → themefinder-0.6.3.dist-info}/METADATA +25 -9
- themefinder-0.6.3.dist-info/RECORD +17 -0
- {themefinder-0.5.4.dist-info → themefinder-0.6.3.dist-info}/WHEEL +1 -1
- themefinder-0.5.4.dist-info/RECORD +0 -15
- {themefinder-0.5.4.dist-info → themefinder-0.6.3.dist-info}/LICENCE +0 -0
themefinder/models.py
ADDED
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
from typing import List, Optional
|
|
2
|
+
from enum import Enum
|
|
3
|
+
from pydantic import BaseModel, Field, model_validator
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Position(str, Enum):
|
|
7
|
+
"""Enum for valid position values"""
|
|
8
|
+
|
|
9
|
+
AGREEMENT = "AGREEMENT"
|
|
10
|
+
DISAGREEMENT = "DISAGREEMENT"
|
|
11
|
+
UNCLEAR = "UNCLEAR"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Stance(str, Enum):
|
|
15
|
+
"""Enum for valid stance values"""
|
|
16
|
+
|
|
17
|
+
POSITIVE = "POSITIVE"
|
|
18
|
+
NEGATIVE = "NEGATIVE"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class EvidenceRich(str, Enum):
|
|
22
|
+
"""Enum for valid evidence_rich values"""
|
|
23
|
+
|
|
24
|
+
YES = "YES"
|
|
25
|
+
NO = "NO"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class ValidatedModel(BaseModel):
|
|
29
|
+
"""Base model with common validation methods"""
|
|
30
|
+
|
|
31
|
+
def validate_non_empty_fields(self) -> "ValidatedModel":
|
|
32
|
+
"""
|
|
33
|
+
Validate that all string fields are non-empty and all list fields are not empty.
|
|
34
|
+
"""
|
|
35
|
+
for field_name, value in self.__dict__.items():
|
|
36
|
+
if isinstance(value, str) and not value.strip():
|
|
37
|
+
raise ValueError(f"{field_name} cannot be empty or only whitespace")
|
|
38
|
+
if isinstance(value, list) and not value:
|
|
39
|
+
raise ValueError(f"{field_name} cannot be an empty list")
|
|
40
|
+
if isinstance(value, list):
|
|
41
|
+
for i, item in enumerate(value):
|
|
42
|
+
if isinstance(item, str) and not item.strip():
|
|
43
|
+
raise ValueError(
|
|
44
|
+
f"Item {i} in {field_name} cannot be empty or only whitespace"
|
|
45
|
+
)
|
|
46
|
+
return self
|
|
47
|
+
|
|
48
|
+
def validate_unique_items(
|
|
49
|
+
self, field_name: str, transform_func: Optional[callable] = None
|
|
50
|
+
) -> "ValidatedModel":
|
|
51
|
+
"""
|
|
52
|
+
Validate that a field contains unique values.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
field_name: The name of the field to check for uniqueness
|
|
56
|
+
transform_func: Optional function to transform items before checking uniqueness
|
|
57
|
+
(e.g., lowercasing strings)
|
|
58
|
+
"""
|
|
59
|
+
if not hasattr(self, field_name):
|
|
60
|
+
raise ValueError(f"Field '{field_name}' does not exist")
|
|
61
|
+
items = getattr(self, field_name)
|
|
62
|
+
if not isinstance(items, list):
|
|
63
|
+
raise ValueError(f"Field '{field_name}' is not a list")
|
|
64
|
+
if transform_func:
|
|
65
|
+
transformed_items = [transform_func(item) for item in items]
|
|
66
|
+
else:
|
|
67
|
+
transformed_items = items
|
|
68
|
+
if len(transformed_items) != len(set(transformed_items)):
|
|
69
|
+
raise ValueError(f"'{field_name}' must contain unique values")
|
|
70
|
+
return self
|
|
71
|
+
|
|
72
|
+
def validate_unique_attribute_in_list(
|
|
73
|
+
self, list_field: str, attr_name: str
|
|
74
|
+
) -> "ValidatedModel":
|
|
75
|
+
"""
|
|
76
|
+
Validate that an attribute across all objects in a list field is unique.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
list_field: The name of the list field containing objects
|
|
80
|
+
attr_name: The attribute within each object to check for uniqueness
|
|
81
|
+
"""
|
|
82
|
+
if not hasattr(self, list_field):
|
|
83
|
+
raise ValueError(f"Field '{list_field}' does not exist")
|
|
84
|
+
|
|
85
|
+
items = getattr(self, list_field)
|
|
86
|
+
if not isinstance(items, list):
|
|
87
|
+
raise ValueError(f"Field '{list_field}' is not a list")
|
|
88
|
+
|
|
89
|
+
attr_values = []
|
|
90
|
+
for item in items:
|
|
91
|
+
if not hasattr(item, attr_name):
|
|
92
|
+
raise ValueError(
|
|
93
|
+
f"Item in '{list_field}' does not have attribute '{attr_name}'"
|
|
94
|
+
)
|
|
95
|
+
attr_values.append(getattr(item, attr_name))
|
|
96
|
+
if len(attr_values) != len(set(attr_values)):
|
|
97
|
+
raise ValueError(
|
|
98
|
+
f"'{attr_name}' must be unique across all items in '{list_field}'"
|
|
99
|
+
)
|
|
100
|
+
return self
|
|
101
|
+
|
|
102
|
+
def validate_equal_lengths(self, *field_names) -> "ValidatedModel":
|
|
103
|
+
"""
|
|
104
|
+
Validate that multiple list fields have the same length.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
*field_names: Variable number of field names to check for equal lengths
|
|
108
|
+
"""
|
|
109
|
+
if len(field_names) < 2:
|
|
110
|
+
return self
|
|
111
|
+
lengths = []
|
|
112
|
+
for field_name in field_names:
|
|
113
|
+
if not hasattr(self, field_name):
|
|
114
|
+
raise ValueError(f"Field '{field_name}' does not exist")
|
|
115
|
+
|
|
116
|
+
items = getattr(self, field_name)
|
|
117
|
+
if not isinstance(items, list):
|
|
118
|
+
raise ValueError(f"Field '{field_name}' is not a list")
|
|
119
|
+
|
|
120
|
+
lengths.append(len(items))
|
|
121
|
+
if len(set(lengths)) > 1:
|
|
122
|
+
raise ValueError(
|
|
123
|
+
f"Fields {', '.join(field_names)} must all have the same length"
|
|
124
|
+
)
|
|
125
|
+
return self
|
|
126
|
+
|
|
127
|
+
@model_validator(mode="after")
|
|
128
|
+
def run_validations(self) -> "ValidatedModel":
|
|
129
|
+
"""
|
|
130
|
+
Run common validations. Override in subclasses to add specific validations.
|
|
131
|
+
"""
|
|
132
|
+
return self.validate_non_empty_fields()
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class SentimentAnalysisOutput(ValidatedModel):
|
|
136
|
+
"""Model for sentiment analysis output"""
|
|
137
|
+
|
|
138
|
+
response_id: int = Field(gt=0)
|
|
139
|
+
position: Position
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
class SentimentAnalysisResponses(ValidatedModel):
|
|
143
|
+
"""Container for all sentiment analysis responses"""
|
|
144
|
+
|
|
145
|
+
responses: List[SentimentAnalysisOutput]
|
|
146
|
+
|
|
147
|
+
@model_validator(mode="after")
|
|
148
|
+
def run_validations(self) -> "SentimentAnalysisResponses":
|
|
149
|
+
"""Validate that response_ids are unique"""
|
|
150
|
+
self.validate_non_empty_fields()
|
|
151
|
+
response_ids = [resp.response_id for resp in self.responses]
|
|
152
|
+
if len(response_ids) != len(set(response_ids)):
|
|
153
|
+
raise ValueError("Response IDs must be unique")
|
|
154
|
+
return self
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
class Theme(ValidatedModel):
|
|
158
|
+
"""Model for a single extracted theme"""
|
|
159
|
+
|
|
160
|
+
topic_label: str = Field(
|
|
161
|
+
..., description="Short label summarizing the topic in a few words"
|
|
162
|
+
)
|
|
163
|
+
topic_description: str = Field(
|
|
164
|
+
..., description="More detailed description of the topic in 1-2 sentences"
|
|
165
|
+
)
|
|
166
|
+
position: Position = Field(
|
|
167
|
+
...,
|
|
168
|
+
description="SENTIMENT ABOUT THIS TOPIC (AGREEMENT, DISAGREEMENT, OR UNCLEAR)",
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
class ThemeGenerationResponses(ValidatedModel):
|
|
173
|
+
"""Container for all extracted themes"""
|
|
174
|
+
|
|
175
|
+
responses: List[Theme] = Field(..., description="List of extracted themes")
|
|
176
|
+
|
|
177
|
+
@model_validator(mode="after")
|
|
178
|
+
def run_validations(self) -> "ThemeGenerationResponses":
|
|
179
|
+
"""Ensure there are no duplicate themes"""
|
|
180
|
+
self.validate_non_empty_fields()
|
|
181
|
+
labels = [theme.topic_label.lower().strip() for theme in self.responses]
|
|
182
|
+
if len(labels) != len(set(labels)):
|
|
183
|
+
raise ValueError("Duplicate topic labels detected")
|
|
184
|
+
return self
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
class CondensedTheme(ValidatedModel):
|
|
188
|
+
"""Model for a single condensed theme"""
|
|
189
|
+
|
|
190
|
+
topic_label: str = Field(
|
|
191
|
+
..., description="Representative label for the condensed topic"
|
|
192
|
+
)
|
|
193
|
+
topic_description: str = Field(
|
|
194
|
+
...,
|
|
195
|
+
description="Concise description incorporating key insights from constituent topics",
|
|
196
|
+
)
|
|
197
|
+
source_topic_count: int = Field(
|
|
198
|
+
..., gt=0, description="Sum of source_topic_counts from combined topics"
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
class ThemeCondensationResponses(ValidatedModel):
|
|
203
|
+
"""Container for all condensed themes"""
|
|
204
|
+
|
|
205
|
+
responses: List[CondensedTheme] = Field(..., description="List of condensed themes")
|
|
206
|
+
|
|
207
|
+
@model_validator(mode="after")
|
|
208
|
+
def run_validations(self) -> "ThemeCondensationResponses":
|
|
209
|
+
"""Ensure there are no duplicate themes"""
|
|
210
|
+
self.validate_non_empty_fields()
|
|
211
|
+
labels = [theme.topic_label.lower().strip() for theme in self.responses]
|
|
212
|
+
if len(labels) != len(set(labels)):
|
|
213
|
+
raise ValueError("Duplicate topic labels detected")
|
|
214
|
+
return self
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
class RefinedTheme(ValidatedModel):
|
|
218
|
+
"""Model for a single refined theme"""
|
|
219
|
+
|
|
220
|
+
topic_id: str = Field(
|
|
221
|
+
..., description="Single uppercase letter ID (A-Z, then AA, AB, etc.)"
|
|
222
|
+
)
|
|
223
|
+
topic: str = Field(
|
|
224
|
+
..., description="Topic label and description combined with a colon separator"
|
|
225
|
+
)
|
|
226
|
+
source_topic_count: int = Field(
|
|
227
|
+
..., gt=0, description="Count of source topics combined"
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
@model_validator(mode="after")
|
|
231
|
+
def run_validations(self) -> "RefinedTheme":
|
|
232
|
+
"""Run all validations for RefinedTheme"""
|
|
233
|
+
self.validate_non_empty_fields()
|
|
234
|
+
self.validate_topic_id_format()
|
|
235
|
+
self.validate_topic_format()
|
|
236
|
+
return self
|
|
237
|
+
|
|
238
|
+
def validate_topic_id_format(self) -> "RefinedTheme":
|
|
239
|
+
"""
|
|
240
|
+
Validate that topic_id follows the expected format (A-Z, then AA, AB, etc.).
|
|
241
|
+
"""
|
|
242
|
+
topic_id = self.topic_id.strip()
|
|
243
|
+
if not topic_id.isupper() or not topic_id.isalpha():
|
|
244
|
+
raise ValueError(f"topic_id must be uppercase letters only: {topic_id}")
|
|
245
|
+
return self
|
|
246
|
+
|
|
247
|
+
def validate_topic_format(self) -> "RefinedTheme":
|
|
248
|
+
"""
|
|
249
|
+
Validate that topic contains a label and description separated by a colon.
|
|
250
|
+
"""
|
|
251
|
+
if ":" not in self.topic:
|
|
252
|
+
raise ValueError(
|
|
253
|
+
"Topic must contain a label and description separated by a colon"
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
label, description = self.topic.split(":", 1)
|
|
257
|
+
if not label.strip() or not description.strip():
|
|
258
|
+
raise ValueError("Both label and description must be non-empty")
|
|
259
|
+
|
|
260
|
+
word_count = len(label.strip().split())
|
|
261
|
+
if word_count > 10:
|
|
262
|
+
raise ValueError(f"Topic label must be under 10 words (found {word_count})")
|
|
263
|
+
|
|
264
|
+
return self
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
class ThemeRefinementResponses(ValidatedModel):
|
|
268
|
+
"""Container for all refined themes"""
|
|
269
|
+
|
|
270
|
+
responses: List[RefinedTheme] = Field(..., description="List of refined themes")
|
|
271
|
+
|
|
272
|
+
@model_validator(mode="after")
|
|
273
|
+
def run_validations(self) -> "ThemeRefinementResponses":
|
|
274
|
+
"""Ensure there are no duplicate themes"""
|
|
275
|
+
self.validate_non_empty_fields()
|
|
276
|
+
topic_ids = [theme.topic_id for theme in self.responses]
|
|
277
|
+
if len(topic_ids) != len(set(topic_ids)):
|
|
278
|
+
raise ValueError("Duplicate topic_ids detected")
|
|
279
|
+
topics = [theme.topic.lower().strip() for theme in self.responses]
|
|
280
|
+
if len(topics) != len(set(topics)):
|
|
281
|
+
raise ValueError("Duplicate topics detected")
|
|
282
|
+
|
|
283
|
+
return self
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
class ThemeMappingOutput(ValidatedModel):
|
|
287
|
+
"""Model for theme mapping output"""
|
|
288
|
+
|
|
289
|
+
response_id: int = Field(gt=0, description="Response ID, must be greater than 0")
|
|
290
|
+
labels: List[str] = Field(..., description="List of theme labels")
|
|
291
|
+
reasons: List[str] = Field(..., description="List of reasons for mapping")
|
|
292
|
+
stances: List[Stance] = Field(
|
|
293
|
+
..., description="List of stances (POSITIVE or NEGATIVE)"
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
@model_validator(mode="after")
|
|
297
|
+
def run_validations(self) -> "ThemeMappingOutput":
|
|
298
|
+
"""
|
|
299
|
+
Run all validations for ThemeMappingOutput.
|
|
300
|
+
"""
|
|
301
|
+
self.validate_non_empty_fields()
|
|
302
|
+
self.validate_equal_lengths("stances", "labels", "reasons")
|
|
303
|
+
self.validate_unique_items("labels")
|
|
304
|
+
return self
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
class ThemeMappingResponses(ValidatedModel):
|
|
308
|
+
"""Container for all theme mapping responses"""
|
|
309
|
+
|
|
310
|
+
responses: List[ThemeMappingOutput] = Field(
|
|
311
|
+
..., description="List of theme mapping outputs"
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
@model_validator(mode="after")
|
|
315
|
+
def run_validations(self) -> "ThemeMappingResponses":
|
|
316
|
+
"""
|
|
317
|
+
Validate that response_ids are unique.
|
|
318
|
+
"""
|
|
319
|
+
self.validate_non_empty_fields()
|
|
320
|
+
response_ids = [resp.response_id for resp in self.responses]
|
|
321
|
+
if len(response_ids) != len(set(response_ids)):
|
|
322
|
+
raise ValueError("Response IDs must be unique")
|
|
323
|
+
return self
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
class DetailDetectionOutput(ValidatedModel):
|
|
327
|
+
"""Model for detail detection output"""
|
|
328
|
+
|
|
329
|
+
response_id: int = Field(gt=0, description="Response ID, must be greater than 0")
|
|
330
|
+
evidence_rich: EvidenceRich = Field(
|
|
331
|
+
..., description="Whether the response is evidence-rich (YES or NO)"
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
class DetailDetectionResponses(ValidatedModel):
|
|
336
|
+
"""Container for all detail detection responses"""
|
|
337
|
+
|
|
338
|
+
responses: List[DetailDetectionOutput] = Field(
|
|
339
|
+
..., description="List of detail detection outputs"
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
@model_validator(mode="after")
|
|
343
|
+
def run_validations(self) -> "DetailDetectionResponses":
|
|
344
|
+
"""
|
|
345
|
+
Validate that response_ids are unique.
|
|
346
|
+
"""
|
|
347
|
+
self.validate_non_empty_fields()
|
|
348
|
+
response_ids = [resp.response_id for resp in self.responses]
|
|
349
|
+
if len(response_ids) != len(set(response_ids)):
|
|
350
|
+
raise ValueError("Response IDs must be unique")
|
|
351
|
+
return self
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
{system_prompt}
|
|
2
|
+
|
|
3
|
+
You will receive a list of RESPONSES, each containing a response_id and a response.
|
|
4
|
+
Your job is to analyze each response to the QUESTION below and decide if a response contains rich evidence.
|
|
5
|
+
You MUST include every response ID in the output.
|
|
6
|
+
|
|
7
|
+
Evidence-rich responses contain one or more of the following:
|
|
8
|
+
- Specific facts or figures that shed new light on the issue (e.g., statistics, percentages, measurements, dates)
|
|
9
|
+
- Concrete examples and specific insights that could inform decision-making
|
|
10
|
+
- Detailed personal or professional experiences with clear contextual information or specific incidents
|
|
11
|
+
In addition to the above an evidence rich response should answer the question and provide deeper insights than an average response.
|
|
12
|
+
|
|
13
|
+
For each response, determine:
|
|
14
|
+
EVIDENCE_RICH - does the response contain significant evidence as defined above?
|
|
15
|
+
Choose one from ['YES', 'NO']
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
QUESTION: \n {question}
|
|
19
|
+
RESPONSES: \n {responses}
|
|
@@ -3,41 +3,30 @@
|
|
|
3
3
|
You will receive a list of RESPONSES, each containing a response_id and a response.
|
|
4
4
|
Your job is to analyze each response to the QUESTION below and decide:
|
|
5
5
|
|
|
6
|
-
POSITION - is the response
|
|
7
|
-
Choose one from [
|
|
8
|
-
|
|
9
|
-
The final output should be in the following JSON format:
|
|
10
|
-
|
|
11
|
-
{{"responses": [
|
|
12
|
-
{{
|
|
13
|
-
"response_id": "{{response_id_1}}",
|
|
14
|
-
"position": {{position_1}},
|
|
15
|
-
}},
|
|
16
|
-
{{
|
|
17
|
-
"response_id": "{{response_id_2}}",
|
|
18
|
-
"position": {{position_2}},
|
|
19
|
-
}}
|
|
20
|
-
...
|
|
21
|
-
]}}
|
|
6
|
+
POSITION - is the response AGREEING or DISAGREEING or is it UNCLEAR about the change being proposed in the question.
|
|
7
|
+
Choose one from [AGREEMENT, DISAGREEMENT, UNCLEAR]
|
|
22
8
|
|
|
23
9
|
You MUST include every response ID in the output.
|
|
24
10
|
If the response can not be labelled return empty sections where appropriate but you MUST return an entry
|
|
25
11
|
with the correct response ID for each input object
|
|
26
12
|
|
|
13
|
+
You MUST pick one of the given POSITION values.
|
|
14
|
+
You MUST not return an empty value for the POSITION of a response.
|
|
15
|
+
|
|
27
16
|
## EXAMPLE
|
|
28
17
|
Example 1:
|
|
29
18
|
Question: \n What are your thoughts on the proposed government changes to the policy about reducing school holidays?
|
|
30
19
|
Response: \n as a parent I have no idea why you would make this change. I guess you were thinking about increasing productivity but any productivity gains would be totally offset by the decrease in family time. \n
|
|
31
20
|
|
|
32
21
|
Output:
|
|
33
|
-
POSITION:
|
|
22
|
+
POSITION: DISAGREEMENT
|
|
34
23
|
|
|
35
24
|
Example 2:
|
|
36
25
|
Question: \n What are your thoughts on the proposed government changes to the policy about reducing school holidays?
|
|
37
26
|
Response: \n I think this is a great idea, our children will learn more if they are in school more \n
|
|
38
27
|
|
|
39
28
|
Output:
|
|
40
|
-
POSITION:
|
|
29
|
+
POSITION: AGREEMENT
|
|
41
30
|
|
|
42
31
|
Example 3:
|
|
43
32
|
Question: \n What are your thoughts on the proposed government changes to the policy about reducing school holidays?
|
|
@@ -45,7 +34,7 @@ Response: \n it will be good for our children to be around their friends more bu
|
|
|
45
34
|
less time with their children \n
|
|
46
35
|
|
|
47
36
|
Output:
|
|
48
|
-
POSITION:
|
|
37
|
+
POSITION: UNCLEAR
|
|
49
38
|
|
|
50
39
|
|
|
51
40
|
QUESTION: \n {question}
|
|
@@ -23,28 +23,8 @@ For each topic in your output:
|
|
|
23
23
|
2. Write a concise description that incorporates key insights from all constituent topics, this should only be a single sentence
|
|
24
24
|
3. Include the total count of original topics combined by summing the source_topic_counts of merged topics (or 1 for topics without a count)
|
|
25
25
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
{{"responses": [
|
|
29
|
-
{{"topic_label": "{{label for condensed topic 1}}",
|
|
30
|
-
"topic_description": "{{description for condensed topic 1}}",
|
|
31
|
-
"source_topic_count": {{sum of source_topic_counts from combined topics}}
|
|
32
|
-
}},
|
|
33
|
-
{{"topic_label": "{{label for condensed topic 2}}",
|
|
34
|
-
"topic_description": "{{description for condensed topic 2}}",
|
|
35
|
-
"source_topic_count": {{sum of source_topic_counts from combined topics}}
|
|
36
|
-
}},
|
|
37
|
-
{{"topic_label": "{{label for condensed topic 3}}",
|
|
38
|
-
"topic_description": "{{description for condensed topic 3}}",
|
|
39
|
-
"source_topic_count": {{sum of source_topic_counts from combined topics}}
|
|
40
|
-
}},
|
|
41
|
-
// Additional topics as necessary
|
|
42
|
-
]}}
|
|
43
|
-
|
|
44
|
-
[Question]
|
|
45
|
-
|
|
26
|
+
QUESTION:
|
|
46
27
|
{question}
|
|
47
28
|
|
|
48
|
-
|
|
49
|
-
|
|
29
|
+
TOPICS:
|
|
50
30
|
{responses}
|
|
@@ -7,28 +7,12 @@ Your task is to analyze the RESPONSES below and extract TOPICS such that:
|
|
|
7
7
|
2. Every distinct and relevant point of view in the responses should be captured by a topic
|
|
8
8
|
3. Each topic has a topic_label which summarizes the topic in a few words
|
|
9
9
|
4. Each topic has a topic_description which gives more detail about the topic in one or two sentences
|
|
10
|
-
5. The position field should just be the sentiment stated, and is either "
|
|
10
|
+
5. The position field should just be the sentiment stated, and is either "AGREEMENT" or "DISAGREEMENT" or "UNCLEAR"
|
|
11
11
|
6. There should be no duplicate topics
|
|
12
12
|
|
|
13
13
|
The topics identified will be used by policy makers to understand what the public like and don't like about the proposals.
|
|
14
14
|
|
|
15
|
-
Here is an example of how to extract topics from some responses
|
|
16
|
-
|
|
17
|
-
The final output should be in the following JSON format:
|
|
18
|
-
|
|
19
|
-
{{"responses": [
|
|
20
|
-
{{
|
|
21
|
-
"topic_label": "{{label_1}}",
|
|
22
|
-
"topic_description": "{{description_1}}",
|
|
23
|
-
"position": "{{position_1}}"
|
|
24
|
-
}},
|
|
25
|
-
{{
|
|
26
|
-
"topic_label": "{{label_2}}",
|
|
27
|
-
"topic_description": "{{description_2}}",
|
|
28
|
-
"position": "{{position_2}}"
|
|
29
|
-
}},
|
|
30
|
-
// Additional topics as necessary
|
|
31
|
-
]}}
|
|
15
|
+
Here is an example of how to extract topics from some responses:
|
|
32
16
|
|
|
33
17
|
## EXAMPLE
|
|
34
18
|
|
|
@@ -42,26 +26,10 @@ RESPONSES
|
|
|
42
26
|
{{"response": "I hate grapes", "position": "disagreement"}},
|
|
43
27
|
]
|
|
44
28
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
"topic_label": "Government overreach",
|
|
50
|
-
"topic_description": "The proposals would result in government interfering too much with citizen's lives",
|
|
51
|
-
"position": "disagreement"
|
|
52
|
-
}},
|
|
53
|
-
{{
|
|
54
|
-
"topic_label": "Regressive change",
|
|
55
|
-
"topic_description": "The change would have a larger negative impact on poorer people",
|
|
56
|
-
"position": "disagreement"
|
|
57
|
-
}},
|
|
58
|
-
{{
|
|
59
|
-
"topic_label": "Health",
|
|
60
|
-
"topic_description": "The change would result in people eating healthier diets",
|
|
61
|
-
"position": "disagreement"
|
|
62
|
-
}},
|
|
63
|
-
]}}
|
|
64
|
-
|
|
29
|
+
EXAMPLE OUTPUT (showing the structure)
|
|
30
|
+
- Topic 1: Government overreach (The proposals would result in government interfering too much with citizen's lives) - DISAGREEMENT
|
|
31
|
+
- Topic 2: Regressive change (The change would have a larger negative impact on poorer people) - DISAGREEMENT
|
|
32
|
+
- Topic 3: Health (The change would result in people eating healthier diets) - DISAGREEMENT
|
|
65
33
|
|
|
66
34
|
QUESTION:
|
|
67
35
|
{question}
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{system_prompt}
|
|
2
2
|
|
|
3
|
-
Your job is to help identify which topics come up in
|
|
3
|
+
Your job is to help identify which topics come up in free_text_responses to a question.
|
|
4
4
|
|
|
5
5
|
You will be given:
|
|
6
6
|
- a QUESTION that has been asked
|
|
7
|
-
- a TOPIC LIST of topics that are known to be present in
|
|
7
|
+
- a TOPIC LIST of topics that are known to be present in free_text_responses to this question. These will be structured as follows:
|
|
8
8
|
{{'topic_id': 'topic_description}}
|
|
9
|
-
- a list of
|
|
9
|
+
- a list of FREE_TEXT_RESPONSES to the question. These will be structured as follows:
|
|
10
10
|
{{'response_id': 'free text response'}}
|
|
11
11
|
|
|
12
12
|
Your task is to analyze each response and decide which topics are present. Guidelines:
|
|
@@ -14,10 +14,11 @@ Your task is to analyze each response and decide which topics are present. Guide
|
|
|
14
14
|
- A response doesn't need to exactly match the language used in the TOPIC LIST, it should be considered a match if it expresses a similar sentiment.
|
|
15
15
|
- You must use the alphabetic 'topic_id' to indicate which topic you have assigned. Do not use the full topic description
|
|
16
16
|
- Each response can be assigned to multiple topics if it matches more than one topic from the TOPIC LIST.
|
|
17
|
+
- Each topic can only be assigned once per response, if the topic is mentioned more than once use the first mention for reasoning and stance.
|
|
17
18
|
- There is no limit on how many topics can be assigned to a response.
|
|
18
19
|
- For each assignment provide a single rationale for why you have chosen the label.
|
|
19
20
|
- For each topic identified in a response, indicate whether the response expresses a positive or negative stance toward that topic (options: 'POSITIVE' or 'NEGATIVE')
|
|
20
|
-
-
|
|
21
|
+
- You MUST use either 'POSITIVE' or 'NEGATIVE'
|
|
21
22
|
- The order of reasons and stances must align with the order of labels (e.g., stance_a applies to topic_a)
|
|
22
23
|
|
|
23
24
|
You MUST include every response ID in the output.
|
|
@@ -25,24 +26,6 @@ If the response can not be labelled return empty sections where appropriate but
|
|
|
25
26
|
with the correct response ID for each input object.
|
|
26
27
|
You must only return the alphabetic topic_ids in the labels section.
|
|
27
28
|
|
|
28
|
-
The final output should be in the following JSON format:
|
|
29
|
-
|
|
30
|
-
{{
|
|
31
|
-
"responses": [
|
|
32
|
-
{{
|
|
33
|
-
"response_id": "response_id_1",
|
|
34
|
-
"reasons": ["reason_a", "reason_b"],
|
|
35
|
-
"labels": ["topic_a", "topic_b"],
|
|
36
|
-
"stances": ["stance_a", "stance_b"],
|
|
37
|
-
}},
|
|
38
|
-
{{
|
|
39
|
-
"response_id": "response_id_2",
|
|
40
|
-
"reasons": ["reason_c"],
|
|
41
|
-
"labels": ["topic_c"],
|
|
42
|
-
"stances": ["stance_c"],
|
|
43
|
-
}}
|
|
44
|
-
]
|
|
45
|
-
}}
|
|
46
29
|
|
|
47
30
|
QUESTION:
|
|
48
31
|
|
|
@@ -52,6 +35,6 @@ TOPIC LIST:
|
|
|
52
35
|
|
|
53
36
|
{refined_themes}
|
|
54
37
|
|
|
55
|
-
|
|
38
|
+
FREE_TEXT_RESPONSES:
|
|
56
39
|
|
|
57
40
|
{responses}
|
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
{system_prompt}
|
|
2
2
|
|
|
3
|
-
You are tasked with refining
|
|
4
|
-
Your goal is to transform opinionated topics into neutral, well-structured, and distinct topics while preserving the essential information.
|
|
3
|
+
You are tasked with refining a list of topics generated from responses to a question.
|
|
5
4
|
|
|
6
5
|
## Input
|
|
7
|
-
You will receive a list of
|
|
6
|
+
You will receive a list of TOPICS. These topics explicitly tie opinions to whether a person agrees or disagrees with the question.
|
|
8
7
|
|
|
9
8
|
## Output
|
|
10
|
-
You will produce a list of
|
|
9
|
+
You will produce a list of CLEAR STANCE TOPICS based on the input. Each topic should have two parts:
|
|
11
10
|
1. A brief, clear topic label (3-7 words)
|
|
12
11
|
2. A more detailed topic description (1-2 sentences)
|
|
12
|
+
3. The source_topic_count field should be included for each topic and should reflect the number of original source topics that were merged to create this refined topic. If multiple source topics were combined, sum their individual counts. If only one source topic was used, simply retain its original count value.
|
|
13
|
+
|
|
13
14
|
|
|
14
15
|
## Guidelines
|
|
15
16
|
|
|
@@ -17,10 +18,11 @@ You will produce a list of NEUTRAL TOPICS based on the input. Each neutral topic
|
|
|
17
18
|
- Preserve all key information, details and concepts from the original topics.
|
|
18
19
|
- Ensure no significant details are lost in the refinement process.
|
|
19
20
|
|
|
20
|
-
2.
|
|
21
|
-
-
|
|
22
|
-
-
|
|
23
|
-
- Avoid
|
|
21
|
+
2. Clear Stance Formulation:
|
|
22
|
+
- Reformulate topics to express a clear stance that can be agreed or disagreed with.
|
|
23
|
+
- Use direct language like "Increased risk of X" rather than "X"
|
|
24
|
+
- Avoid double negatives and ambiguous phrasing.
|
|
25
|
+
- Phrase topics as definitive statements.
|
|
24
26
|
|
|
25
27
|
3. Avoid Response References:
|
|
26
28
|
- Do not use language that refers to multiple responses or respondents.
|
|
@@ -39,43 +41,15 @@ You will produce a list of NEUTRAL TOPICS based on the input. Each neutral topic
|
|
|
39
41
|
|
|
40
42
|
## Process
|
|
41
43
|
|
|
42
|
-
1. Analyze the
|
|
44
|
+
1. Analyze the TOPICS to identify key themes and information.
|
|
43
45
|
2. Group closely related topics together.
|
|
44
46
|
3. For each group or individual topic:
|
|
45
47
|
a. Distill the core concept, removing any bias or opinion.
|
|
46
48
|
b. Create a neutral, concise topic label.
|
|
47
49
|
c. Write a more detailed description that provides context without taking sides.
|
|
48
50
|
4. Review the entire list to ensure distinctiveness and adjust as needed.
|
|
49
|
-
5.
|
|
50
|
-
6.
|
|
51
|
-
7. Combine the topic label and description with a colon separator
|
|
52
|
-
|
|
53
|
-
Return your output in the following JSON format:
|
|
54
|
-
{{
|
|
55
|
-
"responses": [
|
|
56
|
-
{{"topic_id": "A", "topic": "{{topic label 1}}: {{topic description 1}}", "source_topic_count": {{count1}}}},
|
|
57
|
-
{{"topic_id": "B", "topic": "{{topic label 2}}: {{topic description 2}}", "source_topic_count": {{count2}}}},
|
|
58
|
-
{{"topic_id": "C", "topic": "{{topic label 3}}: {{topic description 3}}", "source_topic_count": {{count3}}}},
|
|
59
|
-
// Additional topics as necessary
|
|
60
|
-
]
|
|
61
|
-
}}
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
## EXAMPLE
|
|
65
|
-
|
|
66
|
-
OPINIONATED TOPIC:
|
|
67
|
-
"Economic impact: Many respondents who support the policy believe it will create jobs and boost the economy, it could raise GDP by 2%. [source_topic_count: 15]"
|
|
68
|
-
|
|
69
|
-
NEUTRAL TOPIC:
|
|
70
|
-
{{
|
|
71
|
-
"topic_id": "A",
|
|
72
|
-
"topic": "Economic Impact on Employment: The policy's potential effects on job creation and overall economic growth, including potential for a 2% increase in GDP.",
|
|
73
|
-
"source_topic_count": 15
|
|
74
|
-
}}
|
|
75
|
-
|
|
76
|
-
Remember, your goal is to create a list of neutral, informative, and distinct topics that accurately represent the content of the original opinionated topics without any bias or references to responses.
|
|
77
|
-
|
|
78
|
-
|
|
51
|
+
5. Assign each output topic a topic_id a single uppercase letters (starting from 'A', for the 27th element use AA)
|
|
52
|
+
6. Combine the topic label and description with a colon separator
|
|
79
53
|
|
|
80
|
-
|
|
54
|
+
TOPICS:
|
|
81
55
|
{responses}
|