themefinder 0.5.4__tar.gz → 0.6.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of themefinder might be problematic. Click here for more details.
- {themefinder-0.5.4 → themefinder-0.6.3}/PKG-INFO +25 -9
- {themefinder-0.5.4 → themefinder-0.6.3}/README.md +24 -9
- {themefinder-0.5.4 → themefinder-0.6.3}/pyproject.toml +1 -1
- {themefinder-0.5.4 → themefinder-0.6.3}/src/themefinder/__init__.py +6 -2
- {themefinder-0.5.4 → themefinder-0.6.3}/src/themefinder/core.py +204 -97
- themefinder-0.6.3/src/themefinder/llm_batch_processor.py +442 -0
- themefinder-0.6.3/src/themefinder/models.py +351 -0
- themefinder-0.6.3/src/themefinder/prompts/detail_detection.txt +19 -0
- {themefinder-0.5.4 → themefinder-0.6.3}/src/themefinder/prompts/sentiment_analysis.txt +8 -19
- {themefinder-0.5.4 → themefinder-0.6.3}/src/themefinder/prompts/theme_condensation.txt +2 -22
- {themefinder-0.5.4 → themefinder-0.6.3}/src/themefinder/prompts/theme_generation.txt +6 -38
- {themefinder-0.5.4 → themefinder-0.6.3}/src/themefinder/prompts/theme_mapping.txt +6 -23
- themefinder-0.6.3/src/themefinder/prompts/theme_refinement.txt +55 -0
- {themefinder-0.5.4 → themefinder-0.6.3}/src/themefinder/prompts/theme_target_alignment.txt +2 -10
- themefinder-0.5.4/src/themefinder/llm_batch_processor.py +0 -310
- themefinder-0.5.4/src/themefinder/prompts/theme_refinement.txt +0 -81
- {themefinder-0.5.4 → themefinder-0.6.3}/LICENCE +0 -0
- {themefinder-0.5.4 → themefinder-0.6.3}/src/themefinder/prompts/consultation_system_prompt.txt +0 -0
- {themefinder-0.5.4 → themefinder-0.6.3}/src/themefinder/themefinder_logging.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: themefinder
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.3
|
|
4
4
|
Summary: A topic modelling Python package designed for analysing one-to-many question-answer data eg free-text survey responses.
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: i.AI
|
|
@@ -49,9 +49,9 @@ ThemeFinder takes as input a [pandas DataFrame](https://pandas.pydata.org/docs/r
|
|
|
49
49
|
- `response_id`: A unique identifier for each response
|
|
50
50
|
- `response`: The free text survey response
|
|
51
51
|
|
|
52
|
-
ThemeFinder
|
|
52
|
+
ThemeFinder now supports a range of language models through structured outputs.
|
|
53
53
|
|
|
54
|
-
The function `find_themes` identifies common themes in
|
|
54
|
+
The function `find_themes` identifies common themes in responses and labels them, it also outputs results from intermediate steps in the theme finding pipeline.
|
|
55
55
|
|
|
56
56
|
For this example, import the following Python packages into your virtual environment: `asyncio`, `pandas`, `lanchain`. And import `themefinder` as described above.
|
|
57
57
|
|
|
@@ -81,7 +81,6 @@ load_dotenv()
|
|
|
81
81
|
llm = AzureChatOpenAI(
|
|
82
82
|
model="gpt-4o",
|
|
83
83
|
temperature=0,
|
|
84
|
-
model_kwargs={"response_format": {"type": "json_object"}},
|
|
85
84
|
)
|
|
86
85
|
|
|
87
86
|
# Set up your data
|
|
@@ -97,18 +96,15 @@ question = "What do you think of ThemeFinder?"
|
|
|
97
96
|
# Make the system prompt specific to your use case
|
|
98
97
|
system_prompt = "You are an AI evaluation tool analyzing survey responses about a Python package."
|
|
99
98
|
|
|
100
|
-
# Run the function to find themes
|
|
101
|
-
# We use asyncio to query LLM endpoints asynchronously, so we need to await our function
|
|
99
|
+
# Run the function to find themes, we use asyncio to query LLM endpoints asynchronously, so we need to await our function
|
|
102
100
|
async def main():
|
|
103
|
-
result = await find_themes(responses_df, llm, question, system_prompt)
|
|
101
|
+
result = await find_themes(responses_df, llm, question, system_prompt=system_prompt)
|
|
104
102
|
print(result)
|
|
105
103
|
|
|
106
104
|
if __name__ == "__main__":
|
|
107
105
|
asyncio.run(main())
|
|
108
|
-
|
|
109
106
|
```
|
|
110
107
|
|
|
111
|
-
|
|
112
108
|
## ThemeFinder pipeline
|
|
113
109
|
|
|
114
110
|
ThemeFinder's pipeline consists of five distinct stages, each utilizing a specialized LLM prompt:
|
|
@@ -145,6 +141,25 @@ The file `src/themefinder.core.py` contains the function `find_themes` which run
|
|
|
145
141
|
**For more detail - see the docs: [https://i-dot-ai.github.io/themefinder/](https://i-dot-ai.github.io/themefinder/).**
|
|
146
142
|
|
|
147
143
|
|
|
144
|
+
## Model Compatibility
|
|
145
|
+
|
|
146
|
+
ThemeFinder's structured output approach makes it compatible with a wide range of language models from various providers. This list is non-exhaustive, and other models may also work effectively:
|
|
147
|
+
|
|
148
|
+
### OpenAI Models
|
|
149
|
+
- GPT-4, GPT-4o, GPT-4.1
|
|
150
|
+
- All Azure OpenAI deployments
|
|
151
|
+
|
|
152
|
+
### Google Models
|
|
153
|
+
- Gemini series (1.5 Pro, 2.0 Pro, etc.)
|
|
154
|
+
|
|
155
|
+
### Anthropic Models
|
|
156
|
+
- Claude series (Claude 3 Opus, Sonnet, Haiku, etc.)
|
|
157
|
+
|
|
158
|
+
### Open Source Models
|
|
159
|
+
- Llama 2, Llama 3
|
|
160
|
+
- Mistral models (e.g., Mistral 7B, Mixtral)
|
|
161
|
+
|
|
162
|
+
|
|
148
163
|
## License
|
|
149
164
|
|
|
150
165
|
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
@@ -155,3 +170,4 @@ The documentation is [© Crown copyright](https://www.nationalarchives.gov.uk/in
|
|
|
155
170
|
## Feedback
|
|
156
171
|
|
|
157
172
|
If you have feedback on this package, please fill in our [feedback form](https://forms.gle/85xUSMvxGzSSKQ499) or contact us with questions or feedback at packages@cabinetoffice.gov.uk.
|
|
173
|
+
|
|
@@ -18,9 +18,9 @@ ThemeFinder takes as input a [pandas DataFrame](https://pandas.pydata.org/docs/r
|
|
|
18
18
|
- `response_id`: A unique identifier for each response
|
|
19
19
|
- `response`: The free text survey response
|
|
20
20
|
|
|
21
|
-
ThemeFinder
|
|
21
|
+
ThemeFinder now supports a range of language models through structured outputs.
|
|
22
22
|
|
|
23
|
-
The function `find_themes` identifies common themes in
|
|
23
|
+
The function `find_themes` identifies common themes in responses and labels them, it also outputs results from intermediate steps in the theme finding pipeline.
|
|
24
24
|
|
|
25
25
|
For this example, import the following Python packages into your virtual environment: `asyncio`, `pandas`, `lanchain`. And import `themefinder` as described above.
|
|
26
26
|
|
|
@@ -50,7 +50,6 @@ load_dotenv()
|
|
|
50
50
|
llm = AzureChatOpenAI(
|
|
51
51
|
model="gpt-4o",
|
|
52
52
|
temperature=0,
|
|
53
|
-
model_kwargs={"response_format": {"type": "json_object"}},
|
|
54
53
|
)
|
|
55
54
|
|
|
56
55
|
# Set up your data
|
|
@@ -66,18 +65,15 @@ question = "What do you think of ThemeFinder?"
|
|
|
66
65
|
# Make the system prompt specific to your use case
|
|
67
66
|
system_prompt = "You are an AI evaluation tool analyzing survey responses about a Python package."
|
|
68
67
|
|
|
69
|
-
# Run the function to find themes
|
|
70
|
-
# We use asyncio to query LLM endpoints asynchronously, so we need to await our function
|
|
68
|
+
# Run the function to find themes, we use asyncio to query LLM endpoints asynchronously, so we need to await our function
|
|
71
69
|
async def main():
|
|
72
|
-
result = await find_themes(responses_df, llm, question, system_prompt)
|
|
70
|
+
result = await find_themes(responses_df, llm, question, system_prompt=system_prompt)
|
|
73
71
|
print(result)
|
|
74
72
|
|
|
75
73
|
if __name__ == "__main__":
|
|
76
74
|
asyncio.run(main())
|
|
77
|
-
|
|
78
75
|
```
|
|
79
76
|
|
|
80
|
-
|
|
81
77
|
## ThemeFinder pipeline
|
|
82
78
|
|
|
83
79
|
ThemeFinder's pipeline consists of five distinct stages, each utilizing a specialized LLM prompt:
|
|
@@ -114,6 +110,25 @@ The file `src/themefinder.core.py` contains the function `find_themes` which run
|
|
|
114
110
|
**For more detail - see the docs: [https://i-dot-ai.github.io/themefinder/](https://i-dot-ai.github.io/themefinder/).**
|
|
115
111
|
|
|
116
112
|
|
|
113
|
+
## Model Compatibility
|
|
114
|
+
|
|
115
|
+
ThemeFinder's structured output approach makes it compatible with a wide range of language models from various providers. This list is non-exhaustive, and other models may also work effectively:
|
|
116
|
+
|
|
117
|
+
### OpenAI Models
|
|
118
|
+
- GPT-4, GPT-4o, GPT-4.1
|
|
119
|
+
- All Azure OpenAI deployments
|
|
120
|
+
|
|
121
|
+
### Google Models
|
|
122
|
+
- Gemini series (1.5 Pro, 2.0 Pro, etc.)
|
|
123
|
+
|
|
124
|
+
### Anthropic Models
|
|
125
|
+
- Claude series (Claude 3 Opus, Sonnet, Haiku, etc.)
|
|
126
|
+
|
|
127
|
+
### Open Source Models
|
|
128
|
+
- Llama 2, Llama 3
|
|
129
|
+
- Mistral models (e.g., Mistral 7B, Mixtral)
|
|
130
|
+
|
|
131
|
+
|
|
117
132
|
## License
|
|
118
133
|
|
|
119
134
|
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
@@ -123,4 +138,4 @@ The documentation is [© Crown copyright](https://www.nationalarchives.gov.uk/in
|
|
|
123
138
|
|
|
124
139
|
## Feedback
|
|
125
140
|
|
|
126
|
-
If you have feedback on this package, please fill in our [feedback form](https://forms.gle/85xUSMvxGzSSKQ499) or contact us with questions or feedback at packages@cabinetoffice.gov.uk.
|
|
141
|
+
If you have feedback on this package, please fill in our [feedback form](https://forms.gle/85xUSMvxGzSSKQ499) or contact us with questions or feedback at packages@cabinetoffice.gov.uk.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "themefinder"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.6.3"
|
|
4
4
|
description = "A topic modelling Python package designed for analysing one-to-many question-answer data eg free-text survey responses."
|
|
5
5
|
authors = ["i.AI <packages@cabinetoffice.gov.uk>"]
|
|
6
6
|
packages = [{include = "themefinder", from = "src"}]
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
from .core import (
|
|
2
2
|
find_themes,
|
|
3
3
|
sentiment_analysis,
|
|
4
|
-
theme_generation,
|
|
5
4
|
theme_condensation,
|
|
6
|
-
|
|
5
|
+
theme_generation,
|
|
7
6
|
theme_mapping,
|
|
7
|
+
theme_refinement,
|
|
8
|
+
theme_target_alignment,
|
|
9
|
+
detail_detection,
|
|
8
10
|
)
|
|
9
11
|
|
|
10
12
|
__all__ = [
|
|
@@ -13,6 +15,8 @@ __all__ = [
|
|
|
13
15
|
"theme_generation",
|
|
14
16
|
"theme_condensation",
|
|
15
17
|
"theme_refinement",
|
|
18
|
+
"theme_target_alignment",
|
|
16
19
|
"theme_mapping",
|
|
20
|
+
"detail_detection",
|
|
17
21
|
]
|
|
18
22
|
__version__ = "0.1.0"
|