user-simulator 0.2.5__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- config/__init__.py +0 -0
- config/__pycache__/__init__.cpython-312.pyc +0 -0
- config/asr_configuration/__init__.py +0 -0
- config/asr_configuration/default_asr_config.yml +8 -0
- config/misc/__init__.py +0 -0
- config/misc/sound/__init__.py +0 -0
- config/misc/sound/c1bccaed.wav +0 -0
- config/models/__init__.py +0 -0
- config/models/models.yml +15 -0
- config/patterns/__init__.py +0 -0
- config/patterns/end_conversation_patterns.yml +40 -0
- config/patterns/fallback_patterns.yml +9 -0
- config/personalities/__init__.py +0 -0
- config/personalities/conversational-user.yml +6 -0
- config/personalities/curious-user.yml +5 -0
- config/personalities/direct-user.yml +4 -0
- config/personalities/disorganized-user.yml +5 -0
- config/personalities/elderly-user.yml +6 -0
- config/personalities/formal-user.yml +4 -0
- config/personalities/impatient-user.yml +4 -0
- config/personalities/rude-user.yml +6 -0
- config/personalities/sarcastic-user.yml +4 -0
- config/personalities/skeptical-user.yml +5 -0
- config/types/__init__.py +0 -0
- config/types/currency.yml +10 -0
- config/types/phone_number.yml +4 -0
- data/__init__.py +0 -0
- data/__pycache__/__init__.cpython-312.pyc +0 -0
- data/audio_files/__init__.py +0 -0
- data/audio_files/output.mp3 +0 -0
- data/audio_files/output.wav +0 -0
- data/cache/__init__.py +0 -0
- data/cache/image_register.json +238 -0
- data/cache/pdf_register.json +3 -0
- data/cache/webpage_register.json +67 -0
- data/connectors/__init__.py +0 -0
- data/connectors/dialogflow.yml +16 -0
- data/connectors/julie.yml +37 -0
- data/connectors/kuki.yml +16 -0
- data/connectors/millionbot_ada.yml +25 -0
- data/connectors/rasa.yml +8 -0
- data/connectors/serviceform.yml +17 -0
- data/connectors/taskyto.yml +8 -0
- data/element_lists/__init__.py +0 -0
- data/element_lists/list_of_things.yml +7 -0
- data/list_functions/__init__.py +0 -0
- data/pdfs/Normativa_TFMs_EPS.pdf +0 -0
- data/pdfs/__init__.py +0 -0
- data/readme_data/__init__.py +0 -0
- data/readme_data/img.png +0 -0
- user_sim/cli/cli.py +116 -20
- user_sim/cli/sensei_chat.py +58 -236
- user_sim/core/role_structure.py +4 -3
- user_sim/handlers/asr_module.py +5 -2
- user_sim/handlers/pdf_parser_module.py +1 -0
- user_sim/utils/config.py +10 -2
- user_sim/utils/register_management.py +2 -3
- user_sim/utils/utilities.py +60 -46
- {user_simulator-0.2.5.dist-info → user_simulator-0.3.0.dist-info}/METADATA +5 -2
- user_simulator-0.3.0.dist-info/RECORD +99 -0
- {user_simulator-0.2.5.dist-info → user_simulator-0.3.0.dist-info}/top_level.txt +1 -0
- user_simulator-0.2.5.dist-info/RECORD +0 -49
- {user_simulator-0.2.5.dist-info → user_simulator-0.3.0.dist-info}/WHEEL +0 -0
- {user_simulator-0.2.5.dist-info → user_simulator-0.3.0.dist-info}/entry_points.txt +0 -0
- {user_simulator-0.2.5.dist-info → user_simulator-0.3.0.dist-info}/licenses/LICENSE.txt +0 -0
user_sim/cli/sensei_chat.py
CHANGED
@@ -1,143 +1,64 @@
|
|
1
1
|
import timeit
|
2
|
-
import yaml
|
3
|
-
import pandas as pd
|
4
2
|
from argparse import Namespace
|
5
|
-
from
|
6
|
-
|
7
|
-
from argparse import ArgumentParser
|
8
|
-
from colorama import Fore, Style
|
9
|
-
# from technologies.chatbot_connectors import (Chatbot, ChatbotRasa, ChatbotTaskyto, ChatbotMillionBot,
|
10
|
-
# ChatbotServiceform)
|
3
|
+
from cli import parse_chat_arguments
|
4
|
+
from importlib.resources import files
|
11
5
|
from user_sim.core.data_extraction import DataExtraction
|
12
6
|
from user_sim.core.role_structure import *
|
13
7
|
from user_sim.core.user_simulator import UserSimulator
|
14
8
|
from user_sim.utils.show_logs import *
|
15
9
|
from user_sim.utils.utilities import *
|
10
|
+
from user_sim.utils import config
|
16
11
|
from user_sim.utils.token_cost_calculator import create_cost_dataset
|
17
12
|
from user_sim.utils.register_management import clean_temp_files
|
18
13
|
from chatbot_connectors.cli import ChatbotFactory, parse_connector_params
|
19
14
|
|
20
|
-
|
21
15
|
# check_keys(["OPENAI_API_KEY"])
|
22
|
-
current_script_dir = os.path.dirname(os.path.abspath(__file__))
|
23
|
-
root_path = os.path.abspath(os.path.join(current_script_dir, ".."))
|
24
|
-
|
25
|
-
def print_user(msg):
|
26
|
-
clean_text = re.sub(r'\(Web page content: [^)]*>>\)', '', msg)
|
27
|
-
clean_text = re.sub(r'\(PDF content: [^)]*>>\)', '', clean_text)
|
28
|
-
clean_text = re.sub(r'\(Image description[^)]*\)', '', clean_text)
|
29
|
-
print(f"{Fore.GREEN}User:{Style.RESET_ALL} {clean_text}")
|
30
|
-
|
31
|
-
|
32
|
-
def print_chatbot(msg):
|
33
|
-
clean_text = re.sub(r'\(Web page content:.*?\>\>\)', '', msg, flags=re.DOTALL)
|
34
|
-
clean_text = re.sub(r'\(PDF content:.*?\>\>\)', '', clean_text, flags=re.DOTALL)
|
35
|
-
clean_text = re.sub(r'\(Image description[^)]*\)', '', clean_text)
|
36
|
-
print(f"{Fore.LIGHTRED_EX}Chatbot:{Style.RESET_ALL} {clean_text}")
|
37
|
-
|
38
|
-
def load_yaml_arguments(project_path):
|
39
|
-
files = os.listdir(project_path)
|
40
|
-
|
41
|
-
run_file = next((f for f in files if f in ["run.yml", "run.yaml"]), None)
|
42
|
-
|
43
|
-
if not run_file:
|
44
|
-
raise FileNotFoundError(f"Couldn't find run.yml file.")
|
45
|
-
|
46
|
-
run_yaml_path = os.path.join(project_path, run_file)
|
47
|
-
|
48
|
-
with open(run_yaml_path, 'r', encoding='utf-8') as f:
|
49
|
-
yaml_args = yaml.safe_load(f)
|
50
|
-
|
51
|
-
if yaml_args:
|
52
|
-
if "execution_parameters" in yaml_args.keys():
|
53
|
-
parameters = yaml_args["execution_parameters"]
|
54
|
-
dict_parameters = {param: True for param in parameters}
|
55
|
-
del yaml_args["execution_parameters"]
|
56
|
-
yaml_args.update(dict_parameters)
|
57
|
-
|
58
|
-
yaml_args["project_path"] = project_path
|
59
|
-
|
60
|
-
return yaml_args or {}
|
61
16
|
|
17
|
+
def configure_project(project_path):
|
18
|
+
# sensei
|
19
|
+
config.src_path = files("src")
|
20
|
+
config.cache_path = files("data") / "cache"
|
21
|
+
config.pdfs_path = files("data") / "pdfs"
|
22
|
+
config.audio_files_path = files("data") / "audio_files"
|
23
|
+
config.default_types_path = files("config") / "types"
|
24
|
+
config.default_personalities_path = files("config") / "personalities"
|
25
|
+
|
26
|
+
# project
|
27
|
+
config.project_folder_path = project_path
|
28
|
+
config.profiles_path = os.path.join(project_path, "profiles")
|
29
|
+
config.custom_personalities_path = os.path.join(project_path, "personalities")
|
30
|
+
config.custom_types_path = os.path.join(project_path, "types")
|
31
|
+
custom_types = load_yaml_files_from_folder(config.custom_types_path)
|
62
32
|
|
63
|
-
def load_yaml_files_from_folder(folder_path, existing_keys=None):
|
64
|
-
types = {}
|
65
|
-
for filename in os.listdir(folder_path):
|
66
|
-
if filename.endswith((".yml", ".yaml")):
|
67
|
-
file_path = os.path.join(folder_path, filename)
|
68
|
-
try:
|
69
|
-
with open(file_path, "r", encoding="utf-8") as f:
|
70
|
-
data = yaml.safe_load(f)
|
71
|
-
name = data.get("name")
|
72
|
-
if name:
|
73
|
-
if not existing_keys or name not in existing_keys:
|
74
|
-
types[name] = data
|
75
|
-
except yaml.YAMLError as e:
|
76
|
-
logger.error(f"Error reading {file_path}: {e}")
|
77
|
-
return types
|
78
33
|
|
34
|
+
default_types = load_yaml_files_from_folder(config.default_types_path, existing_keys=custom_types.keys())
|
35
|
+
config.types_dict = {**default_types, **custom_types}
|
79
36
|
|
80
|
-
def configure_project(project_path):
|
81
37
|
|
82
|
-
|
83
|
-
config
|
84
|
-
config.custom_personalities_folder = os.path.join(project_path, "personalities")
|
38
|
+
def _setup_configuration() -> Namespace:
|
39
|
+
"""Parse command line arguments, validate config, and create output dir.
|
85
40
|
|
86
|
-
|
87
|
-
|
41
|
+
Returns:
|
42
|
+
The parsed and validated command line arguments
|
88
43
|
|
89
|
-
|
90
|
-
|
44
|
+
Raises:
|
45
|
+
TracerError: If the specified technology is invalid
|
46
|
+
"""
|
91
47
|
|
92
|
-
|
93
|
-
default_types = load_yaml_files_from_folder(default_types_path, existing_keys=custom_types.keys())
|
94
|
-
config.types_dict = {**default_types, **custom_types}
|
48
|
+
args = parse_chat_arguments()
|
95
49
|
|
96
|
-
|
97
|
-
|
98
|
-
# with open(connec, 'r', encoding='utf-8') as f:
|
99
|
-
# con_yaml = yaml.safe_load(f)
|
100
|
-
#
|
101
|
-
#
|
102
|
-
# if len(args)<2 or not con_yaml["parameters"]:
|
103
|
-
# logger.warning("No parameters added for connector configuration. They may not have been set as input arguments "
|
104
|
-
# "or declared as dynamic parameters in the connector file.")
|
105
|
-
# return con_yaml
|
106
|
-
#
|
107
|
-
# parameters = args[1]
|
108
|
-
# if isinstance(parameters, str):
|
109
|
-
# parameters = json.loads(parameters)
|
110
|
-
#
|
111
|
-
# param_key_list = list(parameters.keys())
|
112
|
-
# if Counter(con_yaml["parameters"]) != Counter(param_key_list):
|
113
|
-
# raise UnmachedList("Parameters in yaml don't match parameters input in execution")
|
114
|
-
#
|
115
|
-
# def replace_values(obj_dict, src_dict):
|
116
|
-
# for key in obj_dict:
|
117
|
-
# if isinstance(obj_dict[key], dict):
|
118
|
-
# replace_values(obj_dict[key], src_dict)
|
119
|
-
# elif key in src_dict:
|
120
|
-
# obj_dict[key] = src_dict[key]
|
121
|
-
#
|
122
|
-
# replace_values(con_yaml, parameters)
|
123
|
-
# return con_yaml
|
124
|
-
|
125
|
-
# def _setup_configuration() -> Namespace:
|
126
|
-
# """Parse command line arguments, validate config, and create output dir.
|
127
|
-
#
|
128
|
-
# Returns:
|
129
|
-
# The parsed and validated command line arguments
|
130
|
-
#
|
131
|
-
# Raises:
|
132
|
-
# TracerError: If the specified technology is invalid
|
133
|
-
# """
|
134
|
-
# args = parse_chat_arguments()
|
135
|
-
#
|
136
|
-
# logger = create_logger(args.verbose, 'Info Logger')
|
137
|
-
# logger.info('Logs enabled!')
|
50
|
+
logger = create_logger(args.verbose, 'Info Logger')
|
51
|
+
logger.info('Logs enabled!')
|
138
52
|
|
53
|
+
configure_project(args.project_path)
|
139
54
|
|
55
|
+
# check_keys(["OPENAI_API_KEY"])
|
56
|
+
config.test_cases_folder = args.extract
|
57
|
+
config.ignore_cache = args.ignore_cache
|
58
|
+
config.update_cache = args.update_cache
|
59
|
+
config.clean_cache = args.clean_cache
|
140
60
|
|
61
|
+
return args
|
141
62
|
|
142
63
|
|
143
64
|
def get_conversation_metadata(user_profile, the_user, serial=None):
|
@@ -162,6 +83,7 @@ def get_conversation_metadata(user_profile, the_user, serial=None):
|
|
162
83
|
|
163
84
|
return conversation_list
|
164
85
|
|
86
|
+
|
165
87
|
def ask_about_metadata(up):
|
166
88
|
if not up.ask_about.variable_list:
|
167
89
|
return up.ask_about.str_list
|
@@ -196,9 +118,9 @@ def get_conversation_metadata(user_profile, the_user, serial=None):
|
|
196
118
|
return data_list
|
197
119
|
|
198
120
|
def total_cost_calculator():
|
121
|
+
import pandas as pd
|
199
122
|
encoding = get_encoding(config.cost_ds_path)["encoding"]
|
200
123
|
cost_df = pd.read_csv(config.cost_ds_path, encoding=encoding)
|
201
|
-
|
202
124
|
total_sum_cost = cost_df[cost_df["Conversation"]==config.conversation_name]['Total Cost'].sum()
|
203
125
|
total_sum_cost = round(float(total_sum_cost), 8)
|
204
126
|
|
@@ -226,57 +148,19 @@ def get_conversation_metadata(user_profile, the_user, serial=None):
|
|
226
148
|
return metadata
|
227
149
|
|
228
150
|
|
229
|
-
def parse_profiles(user_path):
|
230
|
-
def is_yaml(file):
|
231
|
-
if not file.endswith(('.yaml', '.yml')):
|
232
|
-
return False
|
233
|
-
try:
|
234
|
-
with open(file, 'r') as f:
|
235
|
-
yaml.safe_load(f)
|
236
|
-
return True
|
237
|
-
except yaml.YAMLError:
|
238
|
-
return False
|
239
|
-
|
240
|
-
list_of_files = []
|
241
|
-
if os.path.isfile(user_path):
|
242
|
-
if is_yaml(user_path):
|
243
|
-
yaml_file = read_yaml(user_path)
|
244
|
-
return [yaml_file]
|
245
|
-
else:
|
246
|
-
raise Exception(f'The user profile file is not a yaml: {user_path}')
|
247
|
-
elif os.path.isdir(user_path):
|
248
|
-
for root, _, files in os.walk(user_path):
|
249
|
-
for file in files:
|
250
|
-
if is_yaml(os.path.join(root, file)):
|
251
|
-
path = root + '/' + file
|
252
|
-
yaml_file = read_yaml(path)
|
253
|
-
list_of_files.append(yaml_file)
|
254
|
-
|
255
|
-
return list_of_files
|
256
|
-
else:
|
257
|
-
raise Exception(f'Invalid path for user profile operation: {user_path}')
|
258
|
-
|
259
|
-
|
260
151
|
def build_chatbot(technology, connector):
|
261
|
-
# chatbot_builder = {
|
262
|
-
# 'rasa': RasaChatbot,
|
263
|
-
# 'taskyto': ChatbotTaskyto,
|
264
|
-
# # 'serviceform': ChatbotServiceform(connector),
|
265
|
-
# 'millionbot': MillionBot,
|
266
|
-
# 'custom': CustomChatbot
|
267
|
-
# }
|
268
|
-
# chatbot_class = chatbot_builder.get(technology, CustomChatbot)
|
269
152
|
parsed_connector = parse_connector_params(connector)
|
270
153
|
chatbot = ChatbotFactory.create_chatbot(chatbot_type=technology, **parsed_connector)
|
271
154
|
return chatbot
|
272
155
|
|
156
|
+
|
273
157
|
def generate_conversation(technology, connector, user,
|
274
|
-
personality,
|
158
|
+
personality, output, project_folder):
|
275
159
|
profiles = parse_profiles(user)
|
276
160
|
serial = generate_serial()
|
277
161
|
config.serial = serial
|
278
|
-
create_cost_dataset(serial,
|
279
|
-
my_execution_stat = ExecutionStats(
|
162
|
+
create_cost_dataset(serial, output)
|
163
|
+
my_execution_stat = ExecutionStats(output, serial)
|
280
164
|
the_chatbot = build_chatbot(technology, connector)
|
281
165
|
|
282
166
|
|
@@ -386,7 +270,7 @@ def generate_conversation(technology, connector, user,
|
|
386
270
|
the_user.update_history("Assistant", "Error: The server did not respond.")
|
387
271
|
break
|
388
272
|
|
389
|
-
if
|
273
|
+
if output:
|
390
274
|
end_time_conversation = timeit.default_timer()
|
391
275
|
conversation_time = end_time_conversation - start_time_conversation
|
392
276
|
formatted_time_conv = timedelta(seconds=conversation_time).total_seconds()
|
@@ -397,7 +281,7 @@ def generate_conversation(technology, connector, user,
|
|
397
281
|
dg_dataframe = the_user.data_gathering.gathering_register
|
398
282
|
csv_extraction = the_user.goal_style[1] if the_user.goal_style[0] == 'all_answered' else False
|
399
283
|
answer_validation_data = (dg_dataframe, csv_extraction)
|
400
|
-
save_test_conv(history, metadata, test_name,
|
284
|
+
save_test_conv(history, metadata, test_name, output, serial,
|
401
285
|
formatted_time_conv, response_time, answer_validation_data, counter=i)
|
402
286
|
|
403
287
|
config.total_individual_cost = 0
|
@@ -419,93 +303,31 @@ def generate_conversation(technology, connector, user,
|
|
419
303
|
if config.clean_cache:
|
420
304
|
clean_temp_files()
|
421
305
|
|
422
|
-
if
|
306
|
+
if output and len(my_execution_stat.test_names) == len(profiles):
|
423
307
|
my_execution_stat.show_global_stats()
|
424
308
|
my_execution_stat.export_stats()
|
425
|
-
elif
|
309
|
+
elif output:
|
426
310
|
logger.warning("Stats export was enabled but couldn't retrieve all stats. No stats will be exported.")
|
427
311
|
else:
|
428
312
|
pass
|
429
313
|
|
430
314
|
end_alarm()
|
431
315
|
|
432
|
-
def main():
|
433
|
-
parser = ArgumentParser(description='Conversation generator for a chatbot')
|
434
|
-
|
435
|
-
parser.add_argument('--run_from_yaml', type=str, help='Carga los argumentos desde un archivo YAML')
|
436
|
-
|
437
|
-
parser.add_argument('--technology', required=False,
|
438
|
-
choices=['rasa', 'taskyto', 'ada-uam', 'millionbot', 'genion', 'lola', 'serviceform', 'kuki', 'julie', 'rivas_catalina', 'saic_malaga'],
|
439
|
-
help='Technology the chatbot is implemented in')
|
440
|
-
# parser.add_argument('--connector', required=False, help='path to the connector configuration file')
|
441
|
-
parser.add_argument('--connector-params', required=False, help='dynamic parameters for the selected chatbot connector')
|
442
|
-
parser.add_argument('--project_path', required=False, help='Project folder PATH where all testing data is stored')
|
443
|
-
parser.add_argument('--user_profile', required=False, help='User profile file or user profile folder to test the chatbot')
|
444
|
-
parser.add_argument('--personality', required=False, help='Personality file')
|
445
|
-
parser.add_argument('--extract', default=False, help='Path to store conversation user-chatbot')
|
446
|
-
parser.add_argument('--verbose', action='store_true', help='Shows debug prints')
|
447
|
-
parser.add_argument('--clean_cache', action='store_true', help='Deletes temporary files.')
|
448
|
-
parser.add_argument('--ignore_cache', action='store_true', help='Ignores cache for temporary files')
|
449
|
-
parser.add_argument('--update_cache', action='store_true', help='Overwrites temporary files in cache')
|
450
|
-
parser_args, unknown_args = parser.parse_known_args()
|
451
|
-
|
452
|
-
if parser_args.run_from_yaml:
|
453
|
-
if len(sys.argv) > 3: # sys.argv[0] is script, sys.argv[1] is --run_from_yaml, sys.argv[2] is YAML
|
454
|
-
parser.error("No other arguments can be provided when using --run_from_yaml.")
|
455
|
-
|
456
|
-
yaml_args = load_yaml_arguments(parser_args.run_from_yaml)
|
457
|
-
|
458
|
-
default_flags = {
|
459
|
-
"connector_parameters": None,
|
460
|
-
"personality": None,
|
461
|
-
"verbose": False,
|
462
|
-
"clean_cache": False,
|
463
|
-
"ignore_cache": False,
|
464
|
-
"update_cache": False
|
465
|
-
}
|
466
|
-
for flag, default in default_flags.items():
|
467
|
-
yaml_args.setdefault(flag, default)
|
468
|
-
|
469
|
-
class ArgsNamespace:
|
470
|
-
def __init__(self, **entries):
|
471
|
-
self.__dict__.update(entries)
|
472
|
-
|
473
|
-
parser_args = ArgsNamespace(**yaml_args)
|
474
|
-
|
475
|
-
else:
|
476
|
-
required_args = ['technology', 'user_profile', 'connector_params']
|
477
|
-
missing_args = [arg for arg in required_args if getattr(parser_args, arg) is None]
|
478
|
-
|
479
|
-
if missing_args:
|
480
|
-
parser.error(f"The following arguments are required when not using --run_from_yaml: {', '.join(missing_args)}")
|
481
|
-
|
482
|
-
configure_project(parser_args.project_path)
|
483
|
-
# config.root_path = os.path.abspath(os.path.join(current_script_dir, "../../.."))
|
484
|
-
# config.src_path = os.path.abspath(os.path.join(current_script_dir, "../.."))
|
485
|
-
profile_path = os.path.join(config.profiles_path, parser_args.user_profile)
|
486
|
-
|
487
|
-
print(config.src_path)
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
# check_keys(["OPENAI_API_KEY"])
|
492
|
-
config.test_cases_folder = parser_args.extract
|
493
|
-
config.ignore_cache = parser_args.ignore_cache
|
494
|
-
config.update_cache = parser_args.update_cache
|
495
|
-
config.clean_cache = parser_args.clean_cache
|
496
|
-
|
497
|
-
# if parser_args.connector_parameters:
|
498
|
-
# connector = configure_connector(parser_args.connector, parser_args.connector_parameters)
|
499
|
-
# else:
|
500
|
-
# connector = configure_connector(parser_args.connector)
|
501
|
-
|
502
|
-
connector = parser_args.connector_params
|
503
316
|
|
317
|
+
def main():
|
318
|
+
args = _setup_configuration()
|
504
319
|
try:
|
505
|
-
generate_conversation(
|
506
|
-
|
320
|
+
generate_conversation(
|
321
|
+
technology = args.technology,
|
322
|
+
connector = args.connector_params,
|
323
|
+
user = os.path.join(config.profiles_path, args.user_profile),
|
324
|
+
personality = None, #todo: check this
|
325
|
+
output = args.output,
|
326
|
+
project_folder = args.project_path
|
327
|
+
)
|
507
328
|
except Exception as e:
|
508
329
|
logger.error(f"An error occurred while generating the conversation: {e}")
|
509
330
|
|
331
|
+
|
510
332
|
if __name__ == '__main__':
|
511
333
|
main()
|
user_sim/core/role_structure.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
import itertools
|
2
2
|
from pydantic import BaseModel, ValidationError, field_validator
|
3
3
|
from typing import List, Union, Dict, Optional
|
4
|
+
from importlib.resources import files
|
4
5
|
from user_sim.core.interaction_styles import *
|
5
6
|
from user_sim.core.ask_about import *
|
6
7
|
from user_sim.utils.exceptions import *
|
@@ -277,11 +278,11 @@ class RoleData:
|
|
277
278
|
personality = context["personality"]
|
278
279
|
|
279
280
|
path_list = []
|
280
|
-
if os.path.exists(config.
|
281
|
-
custom_personalities_path = config.
|
281
|
+
if os.path.exists(config.custom_personalities_path):
|
282
|
+
custom_personalities_path = config.custom_personalities_path
|
282
283
|
path_list.append(custom_personalities_path)
|
283
284
|
|
284
|
-
default_personalities_path =
|
285
|
+
default_personalities_path = files("config") / "personalities"
|
285
286
|
path_list.append(default_personalities_path)
|
286
287
|
|
287
288
|
try:
|
user_sim/handlers/asr_module.py
CHANGED
@@ -2,6 +2,8 @@ import speech_recognition as sr
|
|
2
2
|
from pydantic import BaseModel, ValidationError
|
3
3
|
from typing import List, Union, Dict, Optional
|
4
4
|
import time
|
5
|
+
|
6
|
+
from user_sim.utils import config
|
5
7
|
from user_sim.utils.utilities import read_yaml
|
6
8
|
from user_sim.utils.token_cost_calculator import calculate_cost, max_input_tokens_allowed
|
7
9
|
from openai import OpenAI
|
@@ -15,6 +17,7 @@ pygame.mixer.init()
|
|
15
17
|
warnings.filterwarnings("ignore", category=FutureWarning, module="whisper")
|
16
18
|
warnings.filterwarnings("ignore", category=RuntimeWarning, module="pydub")
|
17
19
|
client = OpenAI()
|
20
|
+
audio_files_path = config.audio_files_path
|
18
21
|
audio_format = "mp3"
|
19
22
|
|
20
23
|
|
@@ -116,11 +119,11 @@ class STTModule:
|
|
116
119
|
input=message,
|
117
120
|
response_format=audio_format
|
118
121
|
) as response:
|
119
|
-
response.stream_to_file("
|
122
|
+
response.stream_to_file(audio_files_path / f"output.{audio_format}")
|
120
123
|
|
121
124
|
calculate_cost(message, model=self.model, module="tts_module")
|
122
125
|
logger.info("Playing...")
|
123
|
-
audio_path = f"
|
126
|
+
audio_path = audio_files_path / f"output.{audio_format}"
|
124
127
|
with open(audio_path, 'rb') as audio_file:
|
125
128
|
pygame.mixer.music.load(audio_file)
|
126
129
|
pygame.mixer.music.play()
|
@@ -12,6 +12,7 @@ from user_sim.handlers.image_recognition_module import image_description
|
|
12
12
|
logger = logging.getLogger('Info Logger')
|
13
13
|
current_script_dir = os.path.dirname(os.path.abspath(__file__))
|
14
14
|
project_root = os.path.abspath(os.path.join(current_script_dir, "../.."))
|
15
|
+
pdfs_dir = config.pdfs_path
|
15
16
|
pdf_register_name = "pdf_register.json"
|
16
17
|
|
17
18
|
|
user_sim/utils/config.py
CHANGED
@@ -14,11 +14,19 @@ clean_cache = False
|
|
14
14
|
root_path = ""
|
15
15
|
project_folder_path = ""
|
16
16
|
src_path = ""
|
17
|
+
#data
|
18
|
+
cache_path = ""
|
19
|
+
pdfs_path = ""
|
20
|
+
audio_files_path = ""
|
21
|
+
#custom
|
17
22
|
profiles_path = ""
|
18
|
-
|
23
|
+
custom_personalities_path = ""
|
24
|
+
custom_types_path = ""
|
19
25
|
test_cases_folder = ""
|
20
26
|
types_dict = {}
|
21
|
-
|
27
|
+
#default
|
28
|
+
default_types_path = ""
|
29
|
+
default_personalities_path = ""
|
22
30
|
|
23
31
|
|
24
32
|
# cost metrics
|
@@ -2,10 +2,9 @@ import os
|
|
2
2
|
import json
|
3
3
|
import hashlib
|
4
4
|
import logging
|
5
|
+
from user_sim.utils import config
|
5
6
|
|
6
|
-
|
7
|
-
project_root = os.path.abspath(os.path.join(current_script_dir, "../..")) #change
|
8
|
-
temp_file_dir = os.path.join(project_root, "data/cache")
|
7
|
+
temp_file_dir = config.cache_path
|
9
8
|
|
10
9
|
logger = logging.getLogger('Info Logger')
|
11
10
|
|
user_sim/utils/utilities.py
CHANGED
@@ -9,7 +9,7 @@ import importlib.util
|
|
9
9
|
import logging
|
10
10
|
import platform
|
11
11
|
|
12
|
-
from
|
12
|
+
from colorama import Fore, Style
|
13
13
|
from datetime import datetime, timedelta, date
|
14
14
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
15
15
|
from sklearn.metrics.pairwise import cosine_similarity
|
@@ -36,6 +36,19 @@ logger = logging.getLogger('Info Logger')
|
|
36
36
|
# if not os.environ.get(k):
|
37
37
|
# raise Exception(f"{k} not found")
|
38
38
|
|
39
|
+
def print_user(msg):
|
40
|
+
clean_text = re.sub(r'\(Web page content: [^)]*>>\)', '', msg)
|
41
|
+
clean_text = re.sub(r'\(PDF content: [^)]*>>\)', '', clean_text)
|
42
|
+
clean_text = re.sub(r'\(Image description[^)]*\)', '', clean_text)
|
43
|
+
print(f"{Fore.GREEN}User:{Style.RESET_ALL} {clean_text}")
|
44
|
+
|
45
|
+
|
46
|
+
def print_chatbot(msg):
|
47
|
+
clean_text = re.sub(r'\(Web page content:.*?\>\>\)', '', msg, flags=re.DOTALL)
|
48
|
+
clean_text = re.sub(r'\(PDF content:.*?\>\>\)', '', clean_text, flags=re.DOTALL)
|
49
|
+
clean_text = re.sub(r'\(Image description[^)]*\)', '', clean_text)
|
50
|
+
print(f"{Fore.LIGHTRED_EX}Chatbot:{Style.RESET_ALL} {clean_text}")
|
51
|
+
|
39
52
|
|
40
53
|
def end_alarm():
|
41
54
|
os_name = platform.system()
|
@@ -64,57 +77,58 @@ def init_model():
|
|
64
77
|
return model, llm
|
65
78
|
|
66
79
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
# try:
|
83
|
-
# # Try to parse as JSON first
|
84
|
-
# if connector_params_str.strip().startswith("{"):
|
85
|
-
# params = json.loads(connector_params_str)
|
86
|
-
# else:
|
87
|
-
# # Parse as key=value pairs
|
88
|
-
# for pair in connector_params_str.split(","):
|
89
|
-
# if "=" not in pair:
|
90
|
-
# continue
|
91
|
-
# key, value = pair.split("=", 1)
|
92
|
-
# key = key.strip()
|
93
|
-
# value = value.strip()
|
94
|
-
#
|
95
|
-
# # Try to convert to appropriate types
|
96
|
-
# if value.lower() in ("true", "false"):
|
97
|
-
# params[key] = value.lower() == "true"
|
98
|
-
# elif value.isdigit():
|
99
|
-
# params[key] = int(value)
|
100
|
-
# else:
|
101
|
-
# try:
|
102
|
-
# params[key] = float(value)
|
103
|
-
# except ValueError:
|
104
|
-
# params[key] = value
|
105
|
-
#
|
106
|
-
# except (json.JSONDecodeError, ValueError) as e:
|
107
|
-
# logger.exception("Failed to parse connector parameters: %s", connector_params_str)
|
108
|
-
# msg = f"Invalid connector parameters format: {e}"
|
109
|
-
# raise ValueError(msg) from e
|
110
|
-
#
|
111
|
-
# return params
|
80
|
+
def load_yaml_files_from_folder(folder_path, existing_keys=None):
|
81
|
+
types = {}
|
82
|
+
for filename in os.listdir(folder_path):
|
83
|
+
if filename.endswith((".yml", ".yaml")):
|
84
|
+
file_path = os.path.join(folder_path, filename)
|
85
|
+
try:
|
86
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
87
|
+
data = yaml.safe_load(f)
|
88
|
+
name = data.get("name")
|
89
|
+
if name:
|
90
|
+
if not existing_keys or name not in existing_keys:
|
91
|
+
types[name] = data
|
92
|
+
except yaml.YAMLError as e:
|
93
|
+
logger.error(f"Error reading {file_path}: {e}")
|
94
|
+
return types
|
112
95
|
|
113
96
|
|
114
97
|
def parse_content_to_text(messages):
|
115
98
|
return " ".join([message["content"] for message in messages if "content" in message])
|
116
99
|
|
117
100
|
|
101
|
+
def parse_profiles(user_path):
|
102
|
+
def is_yaml(file):
|
103
|
+
if not file.endswith(('.yaml', '.yml')):
|
104
|
+
return False
|
105
|
+
try:
|
106
|
+
with open(file, 'r') as f:
|
107
|
+
yaml.safe_load(f)
|
108
|
+
return True
|
109
|
+
except yaml.YAMLError:
|
110
|
+
return False
|
111
|
+
|
112
|
+
list_of_files = []
|
113
|
+
if os.path.isfile(user_path):
|
114
|
+
if is_yaml(user_path):
|
115
|
+
yaml_file = read_yaml(user_path)
|
116
|
+
return [yaml_file]
|
117
|
+
else:
|
118
|
+
raise Exception(f'The user profile file is not a yaml: {user_path}')
|
119
|
+
elif os.path.isdir(user_path):
|
120
|
+
for root, _, files in os.walk(user_path):
|
121
|
+
for file in files:
|
122
|
+
if is_yaml(os.path.join(root, file)):
|
123
|
+
path = root + '/' + file
|
124
|
+
yaml_file = read_yaml(path)
|
125
|
+
list_of_files.append(yaml_file)
|
126
|
+
|
127
|
+
return list_of_files
|
128
|
+
else:
|
129
|
+
raise Exception(f'Invalid path for user profile operation: {user_path}')
|
130
|
+
|
131
|
+
|
118
132
|
def get_encoding(encoded_file):
|
119
133
|
with open(encoded_file, 'rb') as file:
|
120
134
|
detected = detect(file.read())
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: user-simulator
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.3.0
|
4
4
|
Summary: LLM-based user simulator for chatbot testing.
|
5
5
|
Author: Alejandro Del Pozzo Escalera, Juan de Lara Jaramillo, Esther Guerra Sánchez
|
6
6
|
License: MIT License
|
@@ -41,13 +41,16 @@ Requires-Dist: openai>=1.0.0
|
|
41
41
|
Requires-Dist: pandas>=2.3.0
|
42
42
|
Requires-Dist: pillow>=11.2.1
|
43
43
|
Requires-Dist: pydantic>=2.0.0
|
44
|
-
Requires-Dist: pymupdf
|
44
|
+
Requires-Dist: pymupdf==1.26.1
|
45
45
|
Requires-Dist: pyyaml>=6.0.2
|
46
46
|
Requires-Dist: requests>=2.32.4
|
47
47
|
Requires-Dist: scikit-learn>=1.7.0
|
48
48
|
Requires-Dist: selenium>=4.33.0
|
49
|
+
Requires-Dist: sqlalchemy==2.0.41
|
49
50
|
Requires-Dist: twine>=6.1.0
|
51
|
+
Requires-Dist: typing-extensions==4.13.2
|
50
52
|
Requires-Dist: webdriver-manager>=4.0.2
|
53
|
+
Requires-Dist: zstandard==0.23.0
|
51
54
|
Dynamic: license-file
|
52
55
|
|
53
56
|
# User simulator for chatbot testing
|