user-simulator 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- config/__init__.py +0 -0
- config/__pycache__/__init__.cpython-312.pyc +0 -0
- config/asr_configuration/__init__.py +0 -0
- config/asr_configuration/default_asr_config.yml +8 -0
- config/misc/__init__.py +0 -0
- config/misc/sound/__init__.py +0 -0
- config/misc/sound/c1bccaed.wav +0 -0
- config/models/__init__.py +0 -0
- config/models/models.yml +15 -0
- config/patterns/__init__.py +0 -0
- config/patterns/end_conversation_patterns.yml +40 -0
- config/patterns/fallback_patterns.yml +9 -0
- config/personalities/__init__.py +0 -0
- config/personalities/conversational-user.yml +6 -0
- config/personalities/curious-user.yml +5 -0
- config/personalities/direct-user.yml +4 -0
- config/personalities/disorganized-user.yml +5 -0
- config/personalities/elderly-user.yml +6 -0
- config/personalities/formal-user.yml +4 -0
- config/personalities/impatient-user.yml +4 -0
- config/personalities/rude-user.yml +6 -0
- config/personalities/sarcastic-user.yml +4 -0
- config/personalities/skeptical-user.yml +5 -0
- config/types/__init__.py +0 -0
- config/types/currency.yml +10 -0
- config/types/phone_number.yml +4 -0
- data/__init__.py +0 -0
- data/__pycache__/__init__.cpython-312.pyc +0 -0
- data/audio_files/__init__.py +0 -0
- data/audio_files/output.mp3 +0 -0
- data/audio_files/output.wav +0 -0
- data/cache/__init__.py +0 -0
- data/cache/image_register.json +238 -0
- data/cache/pdf_register.json +3 -0
- data/cache/webpage_register.json +67 -0
- data/connectors/__init__.py +0 -0
- data/connectors/dialogflow.yml +16 -0
- data/connectors/julie.yml +37 -0
- data/connectors/kuki.yml +16 -0
- data/connectors/millionbot_ada.yml +25 -0
- data/connectors/rasa.yml +8 -0
- data/connectors/serviceform.yml +17 -0
- data/connectors/taskyto.yml +8 -0
- data/element_lists/__init__.py +0 -0
- data/element_lists/list_of_things.yml +7 -0
- data/list_functions/__init__.py +0 -0
- data/pdfs/Normativa_TFMs_EPS.pdf +0 -0
- data/pdfs/__init__.py +0 -0
- data/readme_data/__init__.py +0 -0
- data/readme_data/img.png +0 -0
- user_sim/cli/cli.py +116 -20
- user_sim/cli/sensei_chat.py +58 -230
- user_sim/core/role_structure.py +4 -3
- user_sim/handlers/asr_module.py +5 -2
- user_sim/handlers/pdf_parser_module.py +1 -0
- user_sim/utils/config.py +10 -2
- user_sim/utils/register_management.py +2 -3
- user_sim/utils/utilities.py +60 -46
- {user_simulator-0.2.4.dist-info → user_simulator-0.3.0.dist-info}/METADATA +5 -2
- user_simulator-0.3.0.dist-info/RECORD +99 -0
- {user_simulator-0.2.4.dist-info → user_simulator-0.3.0.dist-info}/top_level.txt +1 -0
- user_simulator-0.2.4.dist-info/RECORD +0 -49
- {user_simulator-0.2.4.dist-info → user_simulator-0.3.0.dist-info}/WHEEL +0 -0
- {user_simulator-0.2.4.dist-info → user_simulator-0.3.0.dist-info}/entry_points.txt +0 -0
- {user_simulator-0.2.4.dist-info → user_simulator-0.3.0.dist-info}/licenses/LICENSE.txt +0 -0
user_sim/cli/sensei_chat.py
CHANGED
@@ -1,139 +1,64 @@
|
|
1
1
|
import timeit
|
2
|
-
import yaml
|
3
|
-
import pandas as pd
|
4
2
|
from argparse import Namespace
|
5
|
-
from
|
6
|
-
|
7
|
-
from argparse import ArgumentParser
|
8
|
-
from colorama import Fore, Style
|
9
|
-
# from technologies.chatbot_connectors import (Chatbot, ChatbotRasa, ChatbotTaskyto, ChatbotMillionBot,
|
10
|
-
# ChatbotServiceform)
|
3
|
+
from cli import parse_chat_arguments
|
4
|
+
from importlib.resources import files
|
11
5
|
from user_sim.core.data_extraction import DataExtraction
|
12
6
|
from user_sim.core.role_structure import *
|
13
7
|
from user_sim.core.user_simulator import UserSimulator
|
14
8
|
from user_sim.utils.show_logs import *
|
15
9
|
from user_sim.utils.utilities import *
|
10
|
+
from user_sim.utils import config
|
16
11
|
from user_sim.utils.token_cost_calculator import create_cost_dataset
|
17
12
|
from user_sim.utils.register_management import clean_temp_files
|
18
13
|
from chatbot_connectors.cli import ChatbotFactory, parse_connector_params
|
19
14
|
|
20
|
-
|
21
15
|
# check_keys(["OPENAI_API_KEY"])
|
22
|
-
current_script_dir = os.path.dirname(os.path.abspath(__file__))
|
23
|
-
root_path = os.path.abspath(os.path.join(current_script_dir, ".."))
|
24
|
-
|
25
|
-
def print_user(msg):
|
26
|
-
clean_text = re.sub(r'\(Web page content: [^)]*>>\)', '', msg)
|
27
|
-
clean_text = re.sub(r'\(PDF content: [^)]*>>\)', '', clean_text)
|
28
|
-
clean_text = re.sub(r'\(Image description[^)]*\)', '', clean_text)
|
29
|
-
print(f"{Fore.GREEN}User:{Style.RESET_ALL} {clean_text}")
|
30
|
-
|
31
|
-
|
32
|
-
def print_chatbot(msg):
|
33
|
-
clean_text = re.sub(r'\(Web page content:.*?\>\>\)', '', msg, flags=re.DOTALL)
|
34
|
-
clean_text = re.sub(r'\(PDF content:.*?\>\>\)', '', clean_text, flags=re.DOTALL)
|
35
|
-
clean_text = re.sub(r'\(Image description[^)]*\)', '', clean_text)
|
36
|
-
print(f"{Fore.LIGHTRED_EX}Chatbot:{Style.RESET_ALL} {clean_text}")
|
37
|
-
|
38
|
-
def load_yaml_arguments(project_path):
|
39
|
-
files = os.listdir(project_path)
|
40
|
-
|
41
|
-
run_file = next((f for f in files if f in ["run.yml", "run.yaml"]), None)
|
42
|
-
|
43
|
-
if not run_file:
|
44
|
-
raise FileNotFoundError(f"Couldn't find run.yml file.")
|
45
|
-
|
46
|
-
run_yaml_path = os.path.join(project_path, run_file)
|
47
|
-
|
48
|
-
with open(run_yaml_path, 'r', encoding='utf-8') as f:
|
49
|
-
yaml_args = yaml.safe_load(f)
|
50
|
-
|
51
|
-
if yaml_args:
|
52
|
-
if "execution_parameters" in yaml_args.keys():
|
53
|
-
parameters = yaml_args["execution_parameters"]
|
54
|
-
dict_parameters = {param: True for param in parameters}
|
55
|
-
del yaml_args["execution_parameters"]
|
56
|
-
yaml_args.update(dict_parameters)
|
57
16
|
|
58
|
-
|
17
|
+
def configure_project(project_path):
|
18
|
+
# sensei
|
19
|
+
config.src_path = files("src")
|
20
|
+
config.cache_path = files("data") / "cache"
|
21
|
+
config.pdfs_path = files("data") / "pdfs"
|
22
|
+
config.audio_files_path = files("data") / "audio_files"
|
23
|
+
config.default_types_path = files("config") / "types"
|
24
|
+
config.default_personalities_path = files("config") / "personalities"
|
25
|
+
|
26
|
+
# project
|
27
|
+
config.project_folder_path = project_path
|
28
|
+
config.profiles_path = os.path.join(project_path, "profiles")
|
29
|
+
config.custom_personalities_path = os.path.join(project_path, "personalities")
|
30
|
+
config.custom_types_path = os.path.join(project_path, "types")
|
31
|
+
custom_types = load_yaml_files_from_folder(config.custom_types_path)
|
59
32
|
|
60
|
-
return yaml_args or {}
|
61
33
|
|
34
|
+
default_types = load_yaml_files_from_folder(config.default_types_path, existing_keys=custom_types.keys())
|
35
|
+
config.types_dict = {**default_types, **custom_types}
|
62
36
|
|
63
|
-
def load_yaml_files_from_folder(folder_path, existing_keys=None):
|
64
|
-
types = {}
|
65
|
-
for filename in os.listdir(folder_path):
|
66
|
-
if filename.endswith((".yml", ".yaml")):
|
67
|
-
file_path = os.path.join(folder_path, filename)
|
68
|
-
try:
|
69
|
-
with open(file_path, "r", encoding="utf-8") as f:
|
70
|
-
data = yaml.safe_load(f)
|
71
|
-
name = data.get("name")
|
72
|
-
if name:
|
73
|
-
if not existing_keys or name not in existing_keys:
|
74
|
-
types[name] = data
|
75
|
-
except yaml.YAMLError as e:
|
76
|
-
logger.error(f"Error reading {file_path}: {e}")
|
77
|
-
return types
|
78
37
|
|
38
|
+
def _setup_configuration() -> Namespace:
|
39
|
+
"""Parse command line arguments, validate config, and create output dir.
|
79
40
|
|
80
|
-
|
81
|
-
|
82
|
-
config.profiles_path = os.path.join(project_path, "profiles")
|
83
|
-
config.custom_personalities_folder = os.path.join(project_path, "personalities")
|
41
|
+
Returns:
|
42
|
+
The parsed and validated command line arguments
|
84
43
|
|
85
|
-
|
86
|
-
|
44
|
+
Raises:
|
45
|
+
TracerError: If the specified technology is invalid
|
46
|
+
"""
|
87
47
|
|
88
|
-
|
89
|
-
default_types = load_yaml_files_from_folder(default_types_path, existing_keys=custom_types.keys())
|
90
|
-
config.types_dict = {**default_types, **custom_types}
|
48
|
+
args = parse_chat_arguments()
|
91
49
|
|
92
|
-
|
93
|
-
|
94
|
-
# with open(connec, 'r', encoding='utf-8') as f:
|
95
|
-
# con_yaml = yaml.safe_load(f)
|
96
|
-
#
|
97
|
-
#
|
98
|
-
# if len(args)<2 or not con_yaml["parameters"]:
|
99
|
-
# logger.warning("No parameters added for connector configuration. They may not have been set as input arguments "
|
100
|
-
# "or declared as dynamic parameters in the connector file.")
|
101
|
-
# return con_yaml
|
102
|
-
#
|
103
|
-
# parameters = args[1]
|
104
|
-
# if isinstance(parameters, str):
|
105
|
-
# parameters = json.loads(parameters)
|
106
|
-
#
|
107
|
-
# param_key_list = list(parameters.keys())
|
108
|
-
# if Counter(con_yaml["parameters"]) != Counter(param_key_list):
|
109
|
-
# raise UnmachedList("Parameters in yaml don't match parameters input in execution")
|
110
|
-
#
|
111
|
-
# def replace_values(obj_dict, src_dict):
|
112
|
-
# for key in obj_dict:
|
113
|
-
# if isinstance(obj_dict[key], dict):
|
114
|
-
# replace_values(obj_dict[key], src_dict)
|
115
|
-
# elif key in src_dict:
|
116
|
-
# obj_dict[key] = src_dict[key]
|
117
|
-
#
|
118
|
-
# replace_values(con_yaml, parameters)
|
119
|
-
# return con_yaml
|
120
|
-
|
121
|
-
# def _setup_configuration() -> Namespace:
|
122
|
-
# """Parse command line arguments, validate config, and create output dir.
|
123
|
-
#
|
124
|
-
# Returns:
|
125
|
-
# The parsed and validated command line arguments
|
126
|
-
#
|
127
|
-
# Raises:
|
128
|
-
# TracerError: If the specified technology is invalid
|
129
|
-
# """
|
130
|
-
# args = parse_chat_arguments()
|
131
|
-
#
|
132
|
-
# logger = create_logger(args.verbose, 'Info Logger')
|
133
|
-
# logger.info('Logs enabled!')
|
50
|
+
logger = create_logger(args.verbose, 'Info Logger')
|
51
|
+
logger.info('Logs enabled!')
|
134
52
|
|
53
|
+
configure_project(args.project_path)
|
135
54
|
|
55
|
+
# check_keys(["OPENAI_API_KEY"])
|
56
|
+
config.test_cases_folder = args.extract
|
57
|
+
config.ignore_cache = args.ignore_cache
|
58
|
+
config.update_cache = args.update_cache
|
59
|
+
config.clean_cache = args.clean_cache
|
136
60
|
|
61
|
+
return args
|
137
62
|
|
138
63
|
|
139
64
|
def get_conversation_metadata(user_profile, the_user, serial=None):
|
@@ -158,6 +83,7 @@ def get_conversation_metadata(user_profile, the_user, serial=None):
|
|
158
83
|
|
159
84
|
return conversation_list
|
160
85
|
|
86
|
+
|
161
87
|
def ask_about_metadata(up):
|
162
88
|
if not up.ask_about.variable_list:
|
163
89
|
return up.ask_about.str_list
|
@@ -192,9 +118,9 @@ def get_conversation_metadata(user_profile, the_user, serial=None):
|
|
192
118
|
return data_list
|
193
119
|
|
194
120
|
def total_cost_calculator():
|
121
|
+
import pandas as pd
|
195
122
|
encoding = get_encoding(config.cost_ds_path)["encoding"]
|
196
123
|
cost_df = pd.read_csv(config.cost_ds_path, encoding=encoding)
|
197
|
-
|
198
124
|
total_sum_cost = cost_df[cost_df["Conversation"]==config.conversation_name]['Total Cost'].sum()
|
199
125
|
total_sum_cost = round(float(total_sum_cost), 8)
|
200
126
|
|
@@ -222,57 +148,19 @@ def get_conversation_metadata(user_profile, the_user, serial=None):
|
|
222
148
|
return metadata
|
223
149
|
|
224
150
|
|
225
|
-
def parse_profiles(user_path):
|
226
|
-
def is_yaml(file):
|
227
|
-
if not file.endswith(('.yaml', '.yml')):
|
228
|
-
return False
|
229
|
-
try:
|
230
|
-
with open(file, 'r') as f:
|
231
|
-
yaml.safe_load(f)
|
232
|
-
return True
|
233
|
-
except yaml.YAMLError:
|
234
|
-
return False
|
235
|
-
|
236
|
-
list_of_files = []
|
237
|
-
if os.path.isfile(user_path):
|
238
|
-
if is_yaml(user_path):
|
239
|
-
yaml_file = read_yaml(user_path)
|
240
|
-
return [yaml_file]
|
241
|
-
else:
|
242
|
-
raise Exception(f'The user profile file is not a yaml: {user_path}')
|
243
|
-
elif os.path.isdir(user_path):
|
244
|
-
for root, _, files in os.walk(user_path):
|
245
|
-
for file in files:
|
246
|
-
if is_yaml(os.path.join(root, file)):
|
247
|
-
path = root + '/' + file
|
248
|
-
yaml_file = read_yaml(path)
|
249
|
-
list_of_files.append(yaml_file)
|
250
|
-
|
251
|
-
return list_of_files
|
252
|
-
else:
|
253
|
-
raise Exception(f'Invalid path for user profile operation: {user_path}')
|
254
|
-
|
255
|
-
|
256
151
|
def build_chatbot(technology, connector):
|
257
|
-
# chatbot_builder = {
|
258
|
-
# 'rasa': RasaChatbot,
|
259
|
-
# 'taskyto': ChatbotTaskyto,
|
260
|
-
# # 'serviceform': ChatbotServiceform(connector),
|
261
|
-
# 'millionbot': MillionBot,
|
262
|
-
# 'custom': CustomChatbot
|
263
|
-
# }
|
264
|
-
# chatbot_class = chatbot_builder.get(technology, CustomChatbot)
|
265
152
|
parsed_connector = parse_connector_params(connector)
|
266
153
|
chatbot = ChatbotFactory.create_chatbot(chatbot_type=technology, **parsed_connector)
|
267
154
|
return chatbot
|
268
155
|
|
156
|
+
|
269
157
|
def generate_conversation(technology, connector, user,
|
270
|
-
personality,
|
158
|
+
personality, output, project_folder):
|
271
159
|
profiles = parse_profiles(user)
|
272
160
|
serial = generate_serial()
|
273
161
|
config.serial = serial
|
274
|
-
create_cost_dataset(serial,
|
275
|
-
my_execution_stat = ExecutionStats(
|
162
|
+
create_cost_dataset(serial, output)
|
163
|
+
my_execution_stat = ExecutionStats(output, serial)
|
276
164
|
the_chatbot = build_chatbot(technology, connector)
|
277
165
|
|
278
166
|
|
@@ -382,7 +270,7 @@ def generate_conversation(technology, connector, user,
|
|
382
270
|
the_user.update_history("Assistant", "Error: The server did not respond.")
|
383
271
|
break
|
384
272
|
|
385
|
-
if
|
273
|
+
if output:
|
386
274
|
end_time_conversation = timeit.default_timer()
|
387
275
|
conversation_time = end_time_conversation - start_time_conversation
|
388
276
|
formatted_time_conv = timedelta(seconds=conversation_time).total_seconds()
|
@@ -393,7 +281,7 @@ def generate_conversation(technology, connector, user,
|
|
393
281
|
dg_dataframe = the_user.data_gathering.gathering_register
|
394
282
|
csv_extraction = the_user.goal_style[1] if the_user.goal_style[0] == 'all_answered' else False
|
395
283
|
answer_validation_data = (dg_dataframe, csv_extraction)
|
396
|
-
save_test_conv(history, metadata, test_name,
|
284
|
+
save_test_conv(history, metadata, test_name, output, serial,
|
397
285
|
formatted_time_conv, response_time, answer_validation_data, counter=i)
|
398
286
|
|
399
287
|
config.total_individual_cost = 0
|
@@ -415,91 +303,31 @@ def generate_conversation(technology, connector, user,
|
|
415
303
|
if config.clean_cache:
|
416
304
|
clean_temp_files()
|
417
305
|
|
418
|
-
if
|
306
|
+
if output and len(my_execution_stat.test_names) == len(profiles):
|
419
307
|
my_execution_stat.show_global_stats()
|
420
308
|
my_execution_stat.export_stats()
|
421
|
-
elif
|
309
|
+
elif output:
|
422
310
|
logger.warning("Stats export was enabled but couldn't retrieve all stats. No stats will be exported.")
|
423
311
|
else:
|
424
312
|
pass
|
425
313
|
|
426
314
|
end_alarm()
|
427
315
|
|
428
|
-
def main():
|
429
|
-
parser = ArgumentParser(description='Conversation generator for a chatbot')
|
430
|
-
|
431
|
-
parser.add_argument('--run_from_yaml', type=str, help='Carga los argumentos desde un archivo YAML')
|
432
|
-
|
433
|
-
parser.add_argument('--technology', required=False,
|
434
|
-
choices=['rasa', 'taskyto', 'ada-uam', 'millionbot', 'genion', 'lola', 'serviceform', 'kuki', 'julie', 'rivas_catalina', 'saic_malaga'],
|
435
|
-
help='Technology the chatbot is implemented in')
|
436
|
-
# parser.add_argument('--connector', required=False, help='path to the connector configuration file')
|
437
|
-
parser.add_argument('--connector-params', required=False, help='dynamic parameters for the selected chatbot connector')
|
438
|
-
parser.add_argument('--project_path', required=False, help='Project folder PATH where all testing data is stored')
|
439
|
-
parser.add_argument('--user_profile', required=False, help='User profile file or user profile folder to test the chatbot')
|
440
|
-
parser.add_argument('--personality', required=False, help='Personality file')
|
441
|
-
parser.add_argument('--extract', default=False, help='Path to store conversation user-chatbot')
|
442
|
-
parser.add_argument('--verbose', action='store_true', help='Shows debug prints')
|
443
|
-
parser.add_argument('--clean_cache', action='store_true', help='Deletes temporary files.')
|
444
|
-
parser.add_argument('--ignore_cache', action='store_true', help='Ignores cache for temporary files')
|
445
|
-
parser.add_argument('--update_cache', action='store_true', help='Overwrites temporary files in cache')
|
446
|
-
parser_args, unknown_args = parser.parse_known_args()
|
447
|
-
|
448
|
-
if parser_args.run_from_yaml:
|
449
|
-
if len(sys.argv) > 3: # sys.argv[0] is script, sys.argv[1] is --run_from_yaml, sys.argv[2] is YAML
|
450
|
-
parser.error("No other arguments can be provided when using --run_from_yaml.")
|
451
|
-
|
452
|
-
yaml_args = load_yaml_arguments(parser_args.run_from_yaml)
|
453
|
-
|
454
|
-
default_flags = {
|
455
|
-
"connector_parameters": None,
|
456
|
-
"personality": None,
|
457
|
-
"verbose": False,
|
458
|
-
"clean_cache": False,
|
459
|
-
"ignore_cache": False,
|
460
|
-
"update_cache": False
|
461
|
-
}
|
462
|
-
for flag, default in default_flags.items():
|
463
|
-
yaml_args.setdefault(flag, default)
|
464
|
-
|
465
|
-
class ArgsNamespace:
|
466
|
-
def __init__(self, **entries):
|
467
|
-
self.__dict__.update(entries)
|
468
|
-
|
469
|
-
parser_args = ArgsNamespace(**yaml_args)
|
470
|
-
|
471
|
-
else:
|
472
|
-
required_args = ['technology', 'user_profile', 'connector_params']
|
473
|
-
missing_args = [arg for arg in required_args if getattr(parser_args, arg) is None]
|
474
|
-
|
475
|
-
if missing_args:
|
476
|
-
parser.error(f"The following arguments are required when not using --run_from_yaml: {', '.join(missing_args)}")
|
477
|
-
|
478
|
-
configure_project(parser_args.project_path)
|
479
|
-
config.root_path = os.path.abspath(os.path.join(current_script_dir, "../../.."))
|
480
|
-
config.src_path = os.path.abspath(os.path.join(current_script_dir, "../.."))
|
481
|
-
profile_path = os.path.join(config.profiles_path, parser_args.user_profile)
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
# check_keys(["OPENAI_API_KEY"])
|
486
|
-
config.test_cases_folder = parser_args.extract
|
487
|
-
config.ignore_cache = parser_args.ignore_cache
|
488
|
-
config.update_cache = parser_args.update_cache
|
489
|
-
config.clean_cache = parser_args.clean_cache
|
490
|
-
|
491
|
-
# if parser_args.connector_parameters:
|
492
|
-
# connector = configure_connector(parser_args.connector, parser_args.connector_parameters)
|
493
|
-
# else:
|
494
|
-
# connector = configure_connector(parser_args.connector)
|
495
|
-
|
496
|
-
connector = parser_args.connector_params
|
497
316
|
|
317
|
+
def main():
|
318
|
+
args = _setup_configuration()
|
498
319
|
try:
|
499
|
-
generate_conversation(
|
500
|
-
|
320
|
+
generate_conversation(
|
321
|
+
technology = args.technology,
|
322
|
+
connector = args.connector_params,
|
323
|
+
user = os.path.join(config.profiles_path, args.user_profile),
|
324
|
+
personality = None, #todo: check this
|
325
|
+
output = args.output,
|
326
|
+
project_folder = args.project_path
|
327
|
+
)
|
501
328
|
except Exception as e:
|
502
329
|
logger.error(f"An error occurred while generating the conversation: {e}")
|
503
330
|
|
331
|
+
|
504
332
|
if __name__ == '__main__':
|
505
333
|
main()
|
user_sim/core/role_structure.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
import itertools
|
2
2
|
from pydantic import BaseModel, ValidationError, field_validator
|
3
3
|
from typing import List, Union, Dict, Optional
|
4
|
+
from importlib.resources import files
|
4
5
|
from user_sim.core.interaction_styles import *
|
5
6
|
from user_sim.core.ask_about import *
|
6
7
|
from user_sim.utils.exceptions import *
|
@@ -277,11 +278,11 @@ class RoleData:
|
|
277
278
|
personality = context["personality"]
|
278
279
|
|
279
280
|
path_list = []
|
280
|
-
if os.path.exists(config.
|
281
|
-
custom_personalities_path = config.
|
281
|
+
if os.path.exists(config.custom_personalities_path):
|
282
|
+
custom_personalities_path = config.custom_personalities_path
|
282
283
|
path_list.append(custom_personalities_path)
|
283
284
|
|
284
|
-
default_personalities_path =
|
285
|
+
default_personalities_path = files("config") / "personalities"
|
285
286
|
path_list.append(default_personalities_path)
|
286
287
|
|
287
288
|
try:
|
user_sim/handlers/asr_module.py
CHANGED
@@ -2,6 +2,8 @@ import speech_recognition as sr
|
|
2
2
|
from pydantic import BaseModel, ValidationError
|
3
3
|
from typing import List, Union, Dict, Optional
|
4
4
|
import time
|
5
|
+
|
6
|
+
from user_sim.utils import config
|
5
7
|
from user_sim.utils.utilities import read_yaml
|
6
8
|
from user_sim.utils.token_cost_calculator import calculate_cost, max_input_tokens_allowed
|
7
9
|
from openai import OpenAI
|
@@ -15,6 +17,7 @@ pygame.mixer.init()
|
|
15
17
|
warnings.filterwarnings("ignore", category=FutureWarning, module="whisper")
|
16
18
|
warnings.filterwarnings("ignore", category=RuntimeWarning, module="pydub")
|
17
19
|
client = OpenAI()
|
20
|
+
audio_files_path = config.audio_files_path
|
18
21
|
audio_format = "mp3"
|
19
22
|
|
20
23
|
|
@@ -116,11 +119,11 @@ class STTModule:
|
|
116
119
|
input=message,
|
117
120
|
response_format=audio_format
|
118
121
|
) as response:
|
119
|
-
response.stream_to_file("
|
122
|
+
response.stream_to_file(audio_files_path / f"output.{audio_format}")
|
120
123
|
|
121
124
|
calculate_cost(message, model=self.model, module="tts_module")
|
122
125
|
logger.info("Playing...")
|
123
|
-
audio_path = f"
|
126
|
+
audio_path = audio_files_path / f"output.{audio_format}"
|
124
127
|
with open(audio_path, 'rb') as audio_file:
|
125
128
|
pygame.mixer.music.load(audio_file)
|
126
129
|
pygame.mixer.music.play()
|
@@ -12,6 +12,7 @@ from user_sim.handlers.image_recognition_module import image_description
|
|
12
12
|
logger = logging.getLogger('Info Logger')
|
13
13
|
current_script_dir = os.path.dirname(os.path.abspath(__file__))
|
14
14
|
project_root = os.path.abspath(os.path.join(current_script_dir, "../.."))
|
15
|
+
pdfs_dir = config.pdfs_path
|
15
16
|
pdf_register_name = "pdf_register.json"
|
16
17
|
|
17
18
|
|
user_sim/utils/config.py
CHANGED
@@ -14,11 +14,19 @@ clean_cache = False
|
|
14
14
|
root_path = ""
|
15
15
|
project_folder_path = ""
|
16
16
|
src_path = ""
|
17
|
+
#data
|
18
|
+
cache_path = ""
|
19
|
+
pdfs_path = ""
|
20
|
+
audio_files_path = ""
|
21
|
+
#custom
|
17
22
|
profiles_path = ""
|
18
|
-
|
23
|
+
custom_personalities_path = ""
|
24
|
+
custom_types_path = ""
|
19
25
|
test_cases_folder = ""
|
20
26
|
types_dict = {}
|
21
|
-
|
27
|
+
#default
|
28
|
+
default_types_path = ""
|
29
|
+
default_personalities_path = ""
|
22
30
|
|
23
31
|
|
24
32
|
# cost metrics
|
@@ -2,10 +2,9 @@ import os
|
|
2
2
|
import json
|
3
3
|
import hashlib
|
4
4
|
import logging
|
5
|
+
from user_sim.utils import config
|
5
6
|
|
6
|
-
|
7
|
-
project_root = os.path.abspath(os.path.join(current_script_dir, "../..")) #change
|
8
|
-
temp_file_dir = os.path.join(project_root, "data/cache")
|
7
|
+
temp_file_dir = config.cache_path
|
9
8
|
|
10
9
|
logger = logging.getLogger('Info Logger')
|
11
10
|
|
user_sim/utils/utilities.py
CHANGED
@@ -9,7 +9,7 @@ import importlib.util
|
|
9
9
|
import logging
|
10
10
|
import platform
|
11
11
|
|
12
|
-
from
|
12
|
+
from colorama import Fore, Style
|
13
13
|
from datetime import datetime, timedelta, date
|
14
14
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
15
15
|
from sklearn.metrics.pairwise import cosine_similarity
|
@@ -36,6 +36,19 @@ logger = logging.getLogger('Info Logger')
|
|
36
36
|
# if not os.environ.get(k):
|
37
37
|
# raise Exception(f"{k} not found")
|
38
38
|
|
39
|
+
def print_user(msg):
|
40
|
+
clean_text = re.sub(r'\(Web page content: [^)]*>>\)', '', msg)
|
41
|
+
clean_text = re.sub(r'\(PDF content: [^)]*>>\)', '', clean_text)
|
42
|
+
clean_text = re.sub(r'\(Image description[^)]*\)', '', clean_text)
|
43
|
+
print(f"{Fore.GREEN}User:{Style.RESET_ALL} {clean_text}")
|
44
|
+
|
45
|
+
|
46
|
+
def print_chatbot(msg):
|
47
|
+
clean_text = re.sub(r'\(Web page content:.*?\>\>\)', '', msg, flags=re.DOTALL)
|
48
|
+
clean_text = re.sub(r'\(PDF content:.*?\>\>\)', '', clean_text, flags=re.DOTALL)
|
49
|
+
clean_text = re.sub(r'\(Image description[^)]*\)', '', clean_text)
|
50
|
+
print(f"{Fore.LIGHTRED_EX}Chatbot:{Style.RESET_ALL} {clean_text}")
|
51
|
+
|
39
52
|
|
40
53
|
def end_alarm():
|
41
54
|
os_name = platform.system()
|
@@ -64,57 +77,58 @@ def init_model():
|
|
64
77
|
return model, llm
|
65
78
|
|
66
79
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
# try:
|
83
|
-
# # Try to parse as JSON first
|
84
|
-
# if connector_params_str.strip().startswith("{"):
|
85
|
-
# params = json.loads(connector_params_str)
|
86
|
-
# else:
|
87
|
-
# # Parse as key=value pairs
|
88
|
-
# for pair in connector_params_str.split(","):
|
89
|
-
# if "=" not in pair:
|
90
|
-
# continue
|
91
|
-
# key, value = pair.split("=", 1)
|
92
|
-
# key = key.strip()
|
93
|
-
# value = value.strip()
|
94
|
-
#
|
95
|
-
# # Try to convert to appropriate types
|
96
|
-
# if value.lower() in ("true", "false"):
|
97
|
-
# params[key] = value.lower() == "true"
|
98
|
-
# elif value.isdigit():
|
99
|
-
# params[key] = int(value)
|
100
|
-
# else:
|
101
|
-
# try:
|
102
|
-
# params[key] = float(value)
|
103
|
-
# except ValueError:
|
104
|
-
# params[key] = value
|
105
|
-
#
|
106
|
-
# except (json.JSONDecodeError, ValueError) as e:
|
107
|
-
# logger.exception("Failed to parse connector parameters: %s", connector_params_str)
|
108
|
-
# msg = f"Invalid connector parameters format: {e}"
|
109
|
-
# raise ValueError(msg) from e
|
110
|
-
#
|
111
|
-
# return params
|
80
|
+
def load_yaml_files_from_folder(folder_path, existing_keys=None):
|
81
|
+
types = {}
|
82
|
+
for filename in os.listdir(folder_path):
|
83
|
+
if filename.endswith((".yml", ".yaml")):
|
84
|
+
file_path = os.path.join(folder_path, filename)
|
85
|
+
try:
|
86
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
87
|
+
data = yaml.safe_load(f)
|
88
|
+
name = data.get("name")
|
89
|
+
if name:
|
90
|
+
if not existing_keys or name not in existing_keys:
|
91
|
+
types[name] = data
|
92
|
+
except yaml.YAMLError as e:
|
93
|
+
logger.error(f"Error reading {file_path}: {e}")
|
94
|
+
return types
|
112
95
|
|
113
96
|
|
114
97
|
def parse_content_to_text(messages):
|
115
98
|
return " ".join([message["content"] for message in messages if "content" in message])
|
116
99
|
|
117
100
|
|
101
|
+
def parse_profiles(user_path):
|
102
|
+
def is_yaml(file):
|
103
|
+
if not file.endswith(('.yaml', '.yml')):
|
104
|
+
return False
|
105
|
+
try:
|
106
|
+
with open(file, 'r') as f:
|
107
|
+
yaml.safe_load(f)
|
108
|
+
return True
|
109
|
+
except yaml.YAMLError:
|
110
|
+
return False
|
111
|
+
|
112
|
+
list_of_files = []
|
113
|
+
if os.path.isfile(user_path):
|
114
|
+
if is_yaml(user_path):
|
115
|
+
yaml_file = read_yaml(user_path)
|
116
|
+
return [yaml_file]
|
117
|
+
else:
|
118
|
+
raise Exception(f'The user profile file is not a yaml: {user_path}')
|
119
|
+
elif os.path.isdir(user_path):
|
120
|
+
for root, _, files in os.walk(user_path):
|
121
|
+
for file in files:
|
122
|
+
if is_yaml(os.path.join(root, file)):
|
123
|
+
path = root + '/' + file
|
124
|
+
yaml_file = read_yaml(path)
|
125
|
+
list_of_files.append(yaml_file)
|
126
|
+
|
127
|
+
return list_of_files
|
128
|
+
else:
|
129
|
+
raise Exception(f'Invalid path for user profile operation: {user_path}')
|
130
|
+
|
131
|
+
|
118
132
|
def get_encoding(encoded_file):
|
119
133
|
with open(encoded_file, 'rb') as file:
|
120
134
|
detected = detect(file.read())
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: user-simulator
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.3.0
|
4
4
|
Summary: LLM-based user simulator for chatbot testing.
|
5
5
|
Author: Alejandro Del Pozzo Escalera, Juan de Lara Jaramillo, Esther Guerra Sánchez
|
6
6
|
License: MIT License
|
@@ -41,13 +41,16 @@ Requires-Dist: openai>=1.0.0
|
|
41
41
|
Requires-Dist: pandas>=2.3.0
|
42
42
|
Requires-Dist: pillow>=11.2.1
|
43
43
|
Requires-Dist: pydantic>=2.0.0
|
44
|
-
Requires-Dist: pymupdf
|
44
|
+
Requires-Dist: pymupdf==1.26.1
|
45
45
|
Requires-Dist: pyyaml>=6.0.2
|
46
46
|
Requires-Dist: requests>=2.32.4
|
47
47
|
Requires-Dist: scikit-learn>=1.7.0
|
48
48
|
Requires-Dist: selenium>=4.33.0
|
49
|
+
Requires-Dist: sqlalchemy==2.0.41
|
49
50
|
Requires-Dist: twine>=6.1.0
|
51
|
+
Requires-Dist: typing-extensions==4.13.2
|
50
52
|
Requires-Dist: webdriver-manager>=4.0.2
|
53
|
+
Requires-Dist: zstandard==0.23.0
|
51
54
|
Dynamic: license-file
|
52
55
|
|
53
56
|
# User simulator for chatbot testing
|