user-simulator 0.2.5__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- config/__init__.py +0 -0
- config/__pycache__/__init__.cpython-312.pyc +0 -0
- config/asr_configuration/__init__.py +0 -0
- config/asr_configuration/default_asr_config.yml +8 -0
- config/misc/__init__.py +0 -0
- config/misc/sound/__init__.py +0 -0
- config/misc/sound/c1bccaed.wav +0 -0
- config/models/__init__.py +0 -0
- config/models/models.yml +15 -0
- config/patterns/__init__.py +0 -0
- config/patterns/end_conversation_patterns.yml +40 -0
- config/patterns/fallback_patterns.yml +9 -0
- config/personalities/__init__.py +0 -0
- config/personalities/conversational-user.yml +6 -0
- config/personalities/curious-user.yml +5 -0
- config/personalities/direct-user.yml +4 -0
- config/personalities/disorganized-user.yml +5 -0
- config/personalities/elderly-user.yml +6 -0
- config/personalities/formal-user.yml +4 -0
- config/personalities/impatient-user.yml +4 -0
- config/personalities/rude-user.yml +6 -0
- config/personalities/sarcastic-user.yml +4 -0
- config/personalities/skeptical-user.yml +5 -0
- config/types/__init__.py +0 -0
- config/types/currency.yml +10 -0
- config/types/phone_number.yml +4 -0
- data/__init__.py +0 -0
- data/__pycache__/__init__.cpython-312.pyc +0 -0
- data/audio_files/__init__.py +0 -0
- data/audio_files/output.mp3 +0 -0
- data/audio_files/output.wav +0 -0
- data/cache/__init__.py +0 -0
- data/cache/image_register.json +238 -0
- data/cache/pdf_register.json +3 -0
- data/cache/webpage_register.json +67 -0
- data/connectors/__init__.py +0 -0
- data/connectors/dialogflow.yml +16 -0
- data/connectors/julie.yml +37 -0
- data/connectors/kuki.yml +16 -0
- data/connectors/millionbot_ada.yml +25 -0
- data/connectors/rasa.yml +8 -0
- data/connectors/serviceform.yml +17 -0
- data/connectors/taskyto.yml +8 -0
- data/element_lists/__init__.py +0 -0
- data/element_lists/list_of_things.yml +7 -0
- data/list_functions/__init__.py +0 -0
- data/pdfs/Normativa_TFMs_EPS.pdf +0 -0
- data/pdfs/__init__.py +0 -0
- data/readme_data/__init__.py +0 -0
- data/readme_data/img.png +0 -0
- user_sim/cli/cli.py +116 -20
- user_sim/cli/sensei_chat.py +58 -237
- user_sim/core/role_structure.py +4 -3
- user_sim/handlers/asr_module.py +5 -2
- user_sim/handlers/pdf_parser_module.py +1 -0
- user_sim/utils/config.py +10 -2
- user_sim/utils/register_management.py +2 -3
- user_sim/utils/utilities.py +60 -46
- {user_simulator-0.2.5.dist-info → user_simulator-0.3.1.dist-info}/METADATA +5 -2
- user_simulator-0.3.1.dist-info/RECORD +99 -0
- {user_simulator-0.2.5.dist-info → user_simulator-0.3.1.dist-info}/top_level.txt +1 -0
- user_simulator-0.2.5.dist-info/RECORD +0 -49
- {user_simulator-0.2.5.dist-info → user_simulator-0.3.1.dist-info}/WHEEL +0 -0
- {user_simulator-0.2.5.dist-info → user_simulator-0.3.1.dist-info}/entry_points.txt +0 -0
- {user_simulator-0.2.5.dist-info → user_simulator-0.3.1.dist-info}/licenses/LICENSE.txt +0 -0
user_sim/cli/sensei_chat.py
CHANGED
@@ -1,143 +1,63 @@
|
|
1
1
|
import timeit
|
2
|
-
import yaml
|
3
|
-
import pandas as pd
|
4
2
|
from argparse import Namespace
|
5
|
-
from
|
6
|
-
# from cli import parse_chat_arguments
|
7
|
-
from argparse import ArgumentParser
|
8
|
-
from colorama import Fore, Style
|
9
|
-
# from technologies.chatbot_connectors import (Chatbot, ChatbotRasa, ChatbotTaskyto, ChatbotMillionBot,
|
10
|
-
# ChatbotServiceform)
|
3
|
+
from user_sim.cli.cli import parse_chat_arguments
|
11
4
|
from user_sim.core.data_extraction import DataExtraction
|
12
5
|
from user_sim.core.role_structure import *
|
13
6
|
from user_sim.core.user_simulator import UserSimulator
|
14
7
|
from user_sim.utils.show_logs import *
|
15
8
|
from user_sim.utils.utilities import *
|
9
|
+
from user_sim.utils import config
|
16
10
|
from user_sim.utils.token_cost_calculator import create_cost_dataset
|
17
11
|
from user_sim.utils.register_management import clean_temp_files
|
18
12
|
from chatbot_connectors.cli import ChatbotFactory, parse_connector_params
|
19
|
-
|
20
|
-
|
13
|
+
from importlib.resources import files
|
21
14
|
# check_keys(["OPENAI_API_KEY"])
|
22
|
-
current_script_dir = os.path.dirname(os.path.abspath(__file__))
|
23
|
-
root_path = os.path.abspath(os.path.join(current_script_dir, ".."))
|
24
|
-
|
25
|
-
def print_user(msg):
|
26
|
-
clean_text = re.sub(r'\(Web page content: [^)]*>>\)', '', msg)
|
27
|
-
clean_text = re.sub(r'\(PDF content: [^)]*>>\)', '', clean_text)
|
28
|
-
clean_text = re.sub(r'\(Image description[^)]*\)', '', clean_text)
|
29
|
-
print(f"{Fore.GREEN}User:{Style.RESET_ALL} {clean_text}")
|
30
|
-
|
31
|
-
|
32
|
-
def print_chatbot(msg):
|
33
|
-
clean_text = re.sub(r'\(Web page content:.*?\>\>\)', '', msg, flags=re.DOTALL)
|
34
|
-
clean_text = re.sub(r'\(PDF content:.*?\>\>\)', '', clean_text, flags=re.DOTALL)
|
35
|
-
clean_text = re.sub(r'\(Image description[^)]*\)', '', clean_text)
|
36
|
-
print(f"{Fore.LIGHTRED_EX}Chatbot:{Style.RESET_ALL} {clean_text}")
|
37
|
-
|
38
|
-
def load_yaml_arguments(project_path):
|
39
|
-
files = os.listdir(project_path)
|
40
|
-
|
41
|
-
run_file = next((f for f in files if f in ["run.yml", "run.yaml"]), None)
|
42
|
-
|
43
|
-
if not run_file:
|
44
|
-
raise FileNotFoundError(f"Couldn't find run.yml file.")
|
45
|
-
|
46
|
-
run_yaml_path = os.path.join(project_path, run_file)
|
47
|
-
|
48
|
-
with open(run_yaml_path, 'r', encoding='utf-8') as f:
|
49
|
-
yaml_args = yaml.safe_load(f)
|
50
|
-
|
51
|
-
if yaml_args:
|
52
|
-
if "execution_parameters" in yaml_args.keys():
|
53
|
-
parameters = yaml_args["execution_parameters"]
|
54
|
-
dict_parameters = {param: True for param in parameters}
|
55
|
-
del yaml_args["execution_parameters"]
|
56
|
-
yaml_args.update(dict_parameters)
|
57
15
|
|
58
|
-
|
16
|
+
def configure_project(project_path):
|
17
|
+
# sensei
|
18
|
+
config.src_path = files("src")
|
19
|
+
config.cache_path = files("data") / "cache"
|
20
|
+
config.pdfs_path = files("data") / "pdfs"
|
21
|
+
config.audio_files_path = files("data") / "audio_files"
|
22
|
+
config.default_types_path = files("config") / "types"
|
23
|
+
config.default_personalities_path = files("config") / "personalities"
|
24
|
+
|
25
|
+
# project
|
26
|
+
config.project_folder_path = project_path
|
27
|
+
config.profiles_path = os.path.join(project_path, "profiles")
|
28
|
+
config.custom_personalities_path = os.path.join(project_path, "personalities")
|
29
|
+
config.custom_types_path = os.path.join(project_path, "types")
|
30
|
+
custom_types = load_yaml_files_from_folder(config.custom_types_path)
|
59
31
|
|
60
|
-
return yaml_args or {}
|
61
32
|
|
33
|
+
default_types = load_yaml_files_from_folder(config.default_types_path, existing_keys=custom_types.keys())
|
34
|
+
config.types_dict = {**default_types, **custom_types}
|
62
35
|
|
63
|
-
def load_yaml_files_from_folder(folder_path, existing_keys=None):
|
64
|
-
types = {}
|
65
|
-
for filename in os.listdir(folder_path):
|
66
|
-
if filename.endswith((".yml", ".yaml")):
|
67
|
-
file_path = os.path.join(folder_path, filename)
|
68
|
-
try:
|
69
|
-
with open(file_path, "r", encoding="utf-8") as f:
|
70
|
-
data = yaml.safe_load(f)
|
71
|
-
name = data.get("name")
|
72
|
-
if name:
|
73
|
-
if not existing_keys or name not in existing_keys:
|
74
|
-
types[name] = data
|
75
|
-
except yaml.YAMLError as e:
|
76
|
-
logger.error(f"Error reading {file_path}: {e}")
|
77
|
-
return types
|
78
36
|
|
37
|
+
def _setup_configuration() -> Namespace:
|
38
|
+
"""Parse command line arguments, validate config, and create output dir.
|
79
39
|
|
80
|
-
|
40
|
+
Returns:
|
41
|
+
The parsed and validated command line arguments
|
81
42
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
config.root_path = os.path.abspath(os.path.join(current_script_dir, "../../.."))
|
87
|
-
config.src_path = os.path.abspath(os.path.join(current_script_dir, "../.."))
|
43
|
+
Raises:
|
44
|
+
TracerError: If the specified technology is invalid
|
45
|
+
"""
|
88
46
|
|
89
|
-
|
90
|
-
default_types_path = os.path.join(config.src_path, "config", "types")
|
47
|
+
args = parse_chat_arguments()
|
91
48
|
|
92
|
-
|
93
|
-
|
94
|
-
config.types_dict = {**default_types, **custom_types}
|
95
|
-
|
96
|
-
# def configure_connector(*args):
|
97
|
-
# connec = args[0]
|
98
|
-
# with open(connec, 'r', encoding='utf-8') as f:
|
99
|
-
# con_yaml = yaml.safe_load(f)
|
100
|
-
#
|
101
|
-
#
|
102
|
-
# if len(args)<2 or not con_yaml["parameters"]:
|
103
|
-
# logger.warning("No parameters added for connector configuration. They may not have been set as input arguments "
|
104
|
-
# "or declared as dynamic parameters in the connector file.")
|
105
|
-
# return con_yaml
|
106
|
-
#
|
107
|
-
# parameters = args[1]
|
108
|
-
# if isinstance(parameters, str):
|
109
|
-
# parameters = json.loads(parameters)
|
110
|
-
#
|
111
|
-
# param_key_list = list(parameters.keys())
|
112
|
-
# if Counter(con_yaml["parameters"]) != Counter(param_key_list):
|
113
|
-
# raise UnmachedList("Parameters in yaml don't match parameters input in execution")
|
114
|
-
#
|
115
|
-
# def replace_values(obj_dict, src_dict):
|
116
|
-
# for key in obj_dict:
|
117
|
-
# if isinstance(obj_dict[key], dict):
|
118
|
-
# replace_values(obj_dict[key], src_dict)
|
119
|
-
# elif key in src_dict:
|
120
|
-
# obj_dict[key] = src_dict[key]
|
121
|
-
#
|
122
|
-
# replace_values(con_yaml, parameters)
|
123
|
-
# return con_yaml
|
124
|
-
|
125
|
-
# def _setup_configuration() -> Namespace:
|
126
|
-
# """Parse command line arguments, validate config, and create output dir.
|
127
|
-
#
|
128
|
-
# Returns:
|
129
|
-
# The parsed and validated command line arguments
|
130
|
-
#
|
131
|
-
# Raises:
|
132
|
-
# TracerError: If the specified technology is invalid
|
133
|
-
# """
|
134
|
-
# args = parse_chat_arguments()
|
135
|
-
#
|
136
|
-
# logger = create_logger(args.verbose, 'Info Logger')
|
137
|
-
# logger.info('Logs enabled!')
|
49
|
+
logger = create_logger(args.verbose, 'Info Logger')
|
50
|
+
logger.info('Logs enabled!')
|
138
51
|
|
52
|
+
configure_project(args.project_path)
|
139
53
|
|
54
|
+
# check_keys(["OPENAI_API_KEY"])
|
55
|
+
config.test_cases_folder = args.extract
|
56
|
+
config.ignore_cache = args.ignore_cache
|
57
|
+
config.update_cache = args.update_cache
|
58
|
+
config.clean_cache = args.clean_cache
|
140
59
|
|
60
|
+
return args
|
141
61
|
|
142
62
|
|
143
63
|
def get_conversation_metadata(user_profile, the_user, serial=None):
|
@@ -162,6 +82,7 @@ def get_conversation_metadata(user_profile, the_user, serial=None):
|
|
162
82
|
|
163
83
|
return conversation_list
|
164
84
|
|
85
|
+
|
165
86
|
def ask_about_metadata(up):
|
166
87
|
if not up.ask_about.variable_list:
|
167
88
|
return up.ask_about.str_list
|
@@ -196,9 +117,9 @@ def get_conversation_metadata(user_profile, the_user, serial=None):
|
|
196
117
|
return data_list
|
197
118
|
|
198
119
|
def total_cost_calculator():
|
120
|
+
import pandas as pd
|
199
121
|
encoding = get_encoding(config.cost_ds_path)["encoding"]
|
200
122
|
cost_df = pd.read_csv(config.cost_ds_path, encoding=encoding)
|
201
|
-
|
202
123
|
total_sum_cost = cost_df[cost_df["Conversation"]==config.conversation_name]['Total Cost'].sum()
|
203
124
|
total_sum_cost = round(float(total_sum_cost), 8)
|
204
125
|
|
@@ -226,57 +147,19 @@ def get_conversation_metadata(user_profile, the_user, serial=None):
|
|
226
147
|
return metadata
|
227
148
|
|
228
149
|
|
229
|
-
def parse_profiles(user_path):
|
230
|
-
def is_yaml(file):
|
231
|
-
if not file.endswith(('.yaml', '.yml')):
|
232
|
-
return False
|
233
|
-
try:
|
234
|
-
with open(file, 'r') as f:
|
235
|
-
yaml.safe_load(f)
|
236
|
-
return True
|
237
|
-
except yaml.YAMLError:
|
238
|
-
return False
|
239
|
-
|
240
|
-
list_of_files = []
|
241
|
-
if os.path.isfile(user_path):
|
242
|
-
if is_yaml(user_path):
|
243
|
-
yaml_file = read_yaml(user_path)
|
244
|
-
return [yaml_file]
|
245
|
-
else:
|
246
|
-
raise Exception(f'The user profile file is not a yaml: {user_path}')
|
247
|
-
elif os.path.isdir(user_path):
|
248
|
-
for root, _, files in os.walk(user_path):
|
249
|
-
for file in files:
|
250
|
-
if is_yaml(os.path.join(root, file)):
|
251
|
-
path = root + '/' + file
|
252
|
-
yaml_file = read_yaml(path)
|
253
|
-
list_of_files.append(yaml_file)
|
254
|
-
|
255
|
-
return list_of_files
|
256
|
-
else:
|
257
|
-
raise Exception(f'Invalid path for user profile operation: {user_path}')
|
258
|
-
|
259
|
-
|
260
150
|
def build_chatbot(technology, connector):
|
261
|
-
# chatbot_builder = {
|
262
|
-
# 'rasa': RasaChatbot,
|
263
|
-
# 'taskyto': ChatbotTaskyto,
|
264
|
-
# # 'serviceform': ChatbotServiceform(connector),
|
265
|
-
# 'millionbot': MillionBot,
|
266
|
-
# 'custom': CustomChatbot
|
267
|
-
# }
|
268
|
-
# chatbot_class = chatbot_builder.get(technology, CustomChatbot)
|
269
151
|
parsed_connector = parse_connector_params(connector)
|
270
152
|
chatbot = ChatbotFactory.create_chatbot(chatbot_type=technology, **parsed_connector)
|
271
153
|
return chatbot
|
272
154
|
|
155
|
+
|
273
156
|
def generate_conversation(technology, connector, user,
|
274
|
-
personality,
|
157
|
+
personality, output, project_folder):
|
275
158
|
profiles = parse_profiles(user)
|
276
159
|
serial = generate_serial()
|
277
160
|
config.serial = serial
|
278
|
-
create_cost_dataset(serial,
|
279
|
-
my_execution_stat = ExecutionStats(
|
161
|
+
create_cost_dataset(serial, output)
|
162
|
+
my_execution_stat = ExecutionStats(output, serial)
|
280
163
|
the_chatbot = build_chatbot(technology, connector)
|
281
164
|
|
282
165
|
|
@@ -386,7 +269,7 @@ def generate_conversation(technology, connector, user,
|
|
386
269
|
the_user.update_history("Assistant", "Error: The server did not respond.")
|
387
270
|
break
|
388
271
|
|
389
|
-
if
|
272
|
+
if output:
|
390
273
|
end_time_conversation = timeit.default_timer()
|
391
274
|
conversation_time = end_time_conversation - start_time_conversation
|
392
275
|
formatted_time_conv = timedelta(seconds=conversation_time).total_seconds()
|
@@ -397,7 +280,7 @@ def generate_conversation(technology, connector, user,
|
|
397
280
|
dg_dataframe = the_user.data_gathering.gathering_register
|
398
281
|
csv_extraction = the_user.goal_style[1] if the_user.goal_style[0] == 'all_answered' else False
|
399
282
|
answer_validation_data = (dg_dataframe, csv_extraction)
|
400
|
-
save_test_conv(history, metadata, test_name,
|
283
|
+
save_test_conv(history, metadata, test_name, output, serial,
|
401
284
|
formatted_time_conv, response_time, answer_validation_data, counter=i)
|
402
285
|
|
403
286
|
config.total_individual_cost = 0
|
@@ -419,93 +302,31 @@ def generate_conversation(technology, connector, user,
|
|
419
302
|
if config.clean_cache:
|
420
303
|
clean_temp_files()
|
421
304
|
|
422
|
-
if
|
305
|
+
if output and len(my_execution_stat.test_names) == len(profiles):
|
423
306
|
my_execution_stat.show_global_stats()
|
424
307
|
my_execution_stat.export_stats()
|
425
|
-
elif
|
308
|
+
elif output:
|
426
309
|
logger.warning("Stats export was enabled but couldn't retrieve all stats. No stats will be exported.")
|
427
310
|
else:
|
428
311
|
pass
|
429
312
|
|
430
313
|
end_alarm()
|
431
314
|
|
432
|
-
def main():
|
433
|
-
parser = ArgumentParser(description='Conversation generator for a chatbot')
|
434
|
-
|
435
|
-
parser.add_argument('--run_from_yaml', type=str, help='Carga los argumentos desde un archivo YAML')
|
436
|
-
|
437
|
-
parser.add_argument('--technology', required=False,
|
438
|
-
choices=['rasa', 'taskyto', 'ada-uam', 'millionbot', 'genion', 'lola', 'serviceform', 'kuki', 'julie', 'rivas_catalina', 'saic_malaga'],
|
439
|
-
help='Technology the chatbot is implemented in')
|
440
|
-
# parser.add_argument('--connector', required=False, help='path to the connector configuration file')
|
441
|
-
parser.add_argument('--connector-params', required=False, help='dynamic parameters for the selected chatbot connector')
|
442
|
-
parser.add_argument('--project_path', required=False, help='Project folder PATH where all testing data is stored')
|
443
|
-
parser.add_argument('--user_profile', required=False, help='User profile file or user profile folder to test the chatbot')
|
444
|
-
parser.add_argument('--personality', required=False, help='Personality file')
|
445
|
-
parser.add_argument('--extract', default=False, help='Path to store conversation user-chatbot')
|
446
|
-
parser.add_argument('--verbose', action='store_true', help='Shows debug prints')
|
447
|
-
parser.add_argument('--clean_cache', action='store_true', help='Deletes temporary files.')
|
448
|
-
parser.add_argument('--ignore_cache', action='store_true', help='Ignores cache for temporary files')
|
449
|
-
parser.add_argument('--update_cache', action='store_true', help='Overwrites temporary files in cache')
|
450
|
-
parser_args, unknown_args = parser.parse_known_args()
|
451
|
-
|
452
|
-
if parser_args.run_from_yaml:
|
453
|
-
if len(sys.argv) > 3: # sys.argv[0] is script, sys.argv[1] is --run_from_yaml, sys.argv[2] is YAML
|
454
|
-
parser.error("No other arguments can be provided when using --run_from_yaml.")
|
455
|
-
|
456
|
-
yaml_args = load_yaml_arguments(parser_args.run_from_yaml)
|
457
|
-
|
458
|
-
default_flags = {
|
459
|
-
"connector_parameters": None,
|
460
|
-
"personality": None,
|
461
|
-
"verbose": False,
|
462
|
-
"clean_cache": False,
|
463
|
-
"ignore_cache": False,
|
464
|
-
"update_cache": False
|
465
|
-
}
|
466
|
-
for flag, default in default_flags.items():
|
467
|
-
yaml_args.setdefault(flag, default)
|
468
|
-
|
469
|
-
class ArgsNamespace:
|
470
|
-
def __init__(self, **entries):
|
471
|
-
self.__dict__.update(entries)
|
472
|
-
|
473
|
-
parser_args = ArgsNamespace(**yaml_args)
|
474
|
-
|
475
|
-
else:
|
476
|
-
required_args = ['technology', 'user_profile', 'connector_params']
|
477
|
-
missing_args = [arg for arg in required_args if getattr(parser_args, arg) is None]
|
478
|
-
|
479
|
-
if missing_args:
|
480
|
-
parser.error(f"The following arguments are required when not using --run_from_yaml: {', '.join(missing_args)}")
|
481
|
-
|
482
|
-
configure_project(parser_args.project_path)
|
483
|
-
# config.root_path = os.path.abspath(os.path.join(current_script_dir, "../../.."))
|
484
|
-
# config.src_path = os.path.abspath(os.path.join(current_script_dir, "../.."))
|
485
|
-
profile_path = os.path.join(config.profiles_path, parser_args.user_profile)
|
486
|
-
|
487
|
-
print(config.src_path)
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
# check_keys(["OPENAI_API_KEY"])
|
492
|
-
config.test_cases_folder = parser_args.extract
|
493
|
-
config.ignore_cache = parser_args.ignore_cache
|
494
|
-
config.update_cache = parser_args.update_cache
|
495
|
-
config.clean_cache = parser_args.clean_cache
|
496
|
-
|
497
|
-
# if parser_args.connector_parameters:
|
498
|
-
# connector = configure_connector(parser_args.connector, parser_args.connector_parameters)
|
499
|
-
# else:
|
500
|
-
# connector = configure_connector(parser_args.connector)
|
501
|
-
|
502
|
-
connector = parser_args.connector_params
|
503
315
|
|
316
|
+
def main():
|
317
|
+
args = _setup_configuration()
|
504
318
|
try:
|
505
|
-
generate_conversation(
|
506
|
-
|
319
|
+
generate_conversation(
|
320
|
+
technology = args.technology,
|
321
|
+
connector = args.connector_params,
|
322
|
+
user = os.path.join(config.profiles_path, args.user_profile),
|
323
|
+
personality = None, #todo: check this
|
324
|
+
output = args.output,
|
325
|
+
project_folder = args.project_path
|
326
|
+
)
|
507
327
|
except Exception as e:
|
508
328
|
logger.error(f"An error occurred while generating the conversation: {e}")
|
509
329
|
|
330
|
+
|
510
331
|
if __name__ == '__main__':
|
511
332
|
main()
|
user_sim/core/role_structure.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
import itertools
|
2
2
|
from pydantic import BaseModel, ValidationError, field_validator
|
3
3
|
from typing import List, Union, Dict, Optional
|
4
|
+
from importlib.resources import files
|
4
5
|
from user_sim.core.interaction_styles import *
|
5
6
|
from user_sim.core.ask_about import *
|
6
7
|
from user_sim.utils.exceptions import *
|
@@ -277,11 +278,11 @@ class RoleData:
|
|
277
278
|
personality = context["personality"]
|
278
279
|
|
279
280
|
path_list = []
|
280
|
-
if os.path.exists(config.
|
281
|
-
custom_personalities_path = config.
|
281
|
+
if os.path.exists(config.custom_personalities_path):
|
282
|
+
custom_personalities_path = config.custom_personalities_path
|
282
283
|
path_list.append(custom_personalities_path)
|
283
284
|
|
284
|
-
default_personalities_path =
|
285
|
+
default_personalities_path = files("config") / "personalities"
|
285
286
|
path_list.append(default_personalities_path)
|
286
287
|
|
287
288
|
try:
|
user_sim/handlers/asr_module.py
CHANGED
@@ -2,6 +2,8 @@ import speech_recognition as sr
|
|
2
2
|
from pydantic import BaseModel, ValidationError
|
3
3
|
from typing import List, Union, Dict, Optional
|
4
4
|
import time
|
5
|
+
|
6
|
+
from user_sim.utils import config
|
5
7
|
from user_sim.utils.utilities import read_yaml
|
6
8
|
from user_sim.utils.token_cost_calculator import calculate_cost, max_input_tokens_allowed
|
7
9
|
from openai import OpenAI
|
@@ -15,6 +17,7 @@ pygame.mixer.init()
|
|
15
17
|
warnings.filterwarnings("ignore", category=FutureWarning, module="whisper")
|
16
18
|
warnings.filterwarnings("ignore", category=RuntimeWarning, module="pydub")
|
17
19
|
client = OpenAI()
|
20
|
+
audio_files_path = config.audio_files_path
|
18
21
|
audio_format = "mp3"
|
19
22
|
|
20
23
|
|
@@ -116,11 +119,11 @@ class STTModule:
|
|
116
119
|
input=message,
|
117
120
|
response_format=audio_format
|
118
121
|
) as response:
|
119
|
-
response.stream_to_file("
|
122
|
+
response.stream_to_file(audio_files_path / f"output.{audio_format}")
|
120
123
|
|
121
124
|
calculate_cost(message, model=self.model, module="tts_module")
|
122
125
|
logger.info("Playing...")
|
123
|
-
audio_path = f"
|
126
|
+
audio_path = audio_files_path / f"output.{audio_format}"
|
124
127
|
with open(audio_path, 'rb') as audio_file:
|
125
128
|
pygame.mixer.music.load(audio_file)
|
126
129
|
pygame.mixer.music.play()
|
@@ -12,6 +12,7 @@ from user_sim.handlers.image_recognition_module import image_description
|
|
12
12
|
logger = logging.getLogger('Info Logger')
|
13
13
|
current_script_dir = os.path.dirname(os.path.abspath(__file__))
|
14
14
|
project_root = os.path.abspath(os.path.join(current_script_dir, "../.."))
|
15
|
+
pdfs_dir = config.pdfs_path
|
15
16
|
pdf_register_name = "pdf_register.json"
|
16
17
|
|
17
18
|
|
user_sim/utils/config.py
CHANGED
@@ -14,11 +14,19 @@ clean_cache = False
|
|
14
14
|
root_path = ""
|
15
15
|
project_folder_path = ""
|
16
16
|
src_path = ""
|
17
|
+
#data
|
18
|
+
cache_path = ""
|
19
|
+
pdfs_path = ""
|
20
|
+
audio_files_path = ""
|
21
|
+
#custom
|
17
22
|
profiles_path = ""
|
18
|
-
|
23
|
+
custom_personalities_path = ""
|
24
|
+
custom_types_path = ""
|
19
25
|
test_cases_folder = ""
|
20
26
|
types_dict = {}
|
21
|
-
|
27
|
+
#default
|
28
|
+
default_types_path = ""
|
29
|
+
default_personalities_path = ""
|
22
30
|
|
23
31
|
|
24
32
|
# cost metrics
|
@@ -2,10 +2,9 @@ import os
|
|
2
2
|
import json
|
3
3
|
import hashlib
|
4
4
|
import logging
|
5
|
+
from user_sim.utils import config
|
5
6
|
|
6
|
-
|
7
|
-
project_root = os.path.abspath(os.path.join(current_script_dir, "../..")) #change
|
8
|
-
temp_file_dir = os.path.join(project_root, "data/cache")
|
7
|
+
temp_file_dir = config.cache_path
|
9
8
|
|
10
9
|
logger = logging.getLogger('Info Logger')
|
11
10
|
|
user_sim/utils/utilities.py
CHANGED
@@ -9,7 +9,7 @@ import importlib.util
|
|
9
9
|
import logging
|
10
10
|
import platform
|
11
11
|
|
12
|
-
from
|
12
|
+
from colorama import Fore, Style
|
13
13
|
from datetime import datetime, timedelta, date
|
14
14
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
15
15
|
from sklearn.metrics.pairwise import cosine_similarity
|
@@ -36,6 +36,19 @@ logger = logging.getLogger('Info Logger')
|
|
36
36
|
# if not os.environ.get(k):
|
37
37
|
# raise Exception(f"{k} not found")
|
38
38
|
|
39
|
+
def print_user(msg):
|
40
|
+
clean_text = re.sub(r'\(Web page content: [^)]*>>\)', '', msg)
|
41
|
+
clean_text = re.sub(r'\(PDF content: [^)]*>>\)', '', clean_text)
|
42
|
+
clean_text = re.sub(r'\(Image description[^)]*\)', '', clean_text)
|
43
|
+
print(f"{Fore.GREEN}User:{Style.RESET_ALL} {clean_text}")
|
44
|
+
|
45
|
+
|
46
|
+
def print_chatbot(msg):
|
47
|
+
clean_text = re.sub(r'\(Web page content:.*?\>\>\)', '', msg, flags=re.DOTALL)
|
48
|
+
clean_text = re.sub(r'\(PDF content:.*?\>\>\)', '', clean_text, flags=re.DOTALL)
|
49
|
+
clean_text = re.sub(r'\(Image description[^)]*\)', '', clean_text)
|
50
|
+
print(f"{Fore.LIGHTRED_EX}Chatbot:{Style.RESET_ALL} {clean_text}")
|
51
|
+
|
39
52
|
|
40
53
|
def end_alarm():
|
41
54
|
os_name = platform.system()
|
@@ -64,57 +77,58 @@ def init_model():
|
|
64
77
|
return model, llm
|
65
78
|
|
66
79
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
# try:
|
83
|
-
# # Try to parse as JSON first
|
84
|
-
# if connector_params_str.strip().startswith("{"):
|
85
|
-
# params = json.loads(connector_params_str)
|
86
|
-
# else:
|
87
|
-
# # Parse as key=value pairs
|
88
|
-
# for pair in connector_params_str.split(","):
|
89
|
-
# if "=" not in pair:
|
90
|
-
# continue
|
91
|
-
# key, value = pair.split("=", 1)
|
92
|
-
# key = key.strip()
|
93
|
-
# value = value.strip()
|
94
|
-
#
|
95
|
-
# # Try to convert to appropriate types
|
96
|
-
# if value.lower() in ("true", "false"):
|
97
|
-
# params[key] = value.lower() == "true"
|
98
|
-
# elif value.isdigit():
|
99
|
-
# params[key] = int(value)
|
100
|
-
# else:
|
101
|
-
# try:
|
102
|
-
# params[key] = float(value)
|
103
|
-
# except ValueError:
|
104
|
-
# params[key] = value
|
105
|
-
#
|
106
|
-
# except (json.JSONDecodeError, ValueError) as e:
|
107
|
-
# logger.exception("Failed to parse connector parameters: %s", connector_params_str)
|
108
|
-
# msg = f"Invalid connector parameters format: {e}"
|
109
|
-
# raise ValueError(msg) from e
|
110
|
-
#
|
111
|
-
# return params
|
80
|
+
def load_yaml_files_from_folder(folder_path, existing_keys=None):
|
81
|
+
types = {}
|
82
|
+
for filename in os.listdir(folder_path):
|
83
|
+
if filename.endswith((".yml", ".yaml")):
|
84
|
+
file_path = os.path.join(folder_path, filename)
|
85
|
+
try:
|
86
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
87
|
+
data = yaml.safe_load(f)
|
88
|
+
name = data.get("name")
|
89
|
+
if name:
|
90
|
+
if not existing_keys or name not in existing_keys:
|
91
|
+
types[name] = data
|
92
|
+
except yaml.YAMLError as e:
|
93
|
+
logger.error(f"Error reading {file_path}: {e}")
|
94
|
+
return types
|
112
95
|
|
113
96
|
|
114
97
|
def parse_content_to_text(messages):
|
115
98
|
return " ".join([message["content"] for message in messages if "content" in message])
|
116
99
|
|
117
100
|
|
101
|
+
def parse_profiles(user_path):
|
102
|
+
def is_yaml(file):
|
103
|
+
if not file.endswith(('.yaml', '.yml')):
|
104
|
+
return False
|
105
|
+
try:
|
106
|
+
with open(file, 'r') as f:
|
107
|
+
yaml.safe_load(f)
|
108
|
+
return True
|
109
|
+
except yaml.YAMLError:
|
110
|
+
return False
|
111
|
+
|
112
|
+
list_of_files = []
|
113
|
+
if os.path.isfile(user_path):
|
114
|
+
if is_yaml(user_path):
|
115
|
+
yaml_file = read_yaml(user_path)
|
116
|
+
return [yaml_file]
|
117
|
+
else:
|
118
|
+
raise Exception(f'The user profile file is not a yaml: {user_path}')
|
119
|
+
elif os.path.isdir(user_path):
|
120
|
+
for root, _, files in os.walk(user_path):
|
121
|
+
for file in files:
|
122
|
+
if is_yaml(os.path.join(root, file)):
|
123
|
+
path = root + '/' + file
|
124
|
+
yaml_file = read_yaml(path)
|
125
|
+
list_of_files.append(yaml_file)
|
126
|
+
|
127
|
+
return list_of_files
|
128
|
+
else:
|
129
|
+
raise Exception(f'Invalid path for user profile operation: {user_path}')
|
130
|
+
|
131
|
+
|
118
132
|
def get_encoding(encoded_file):
|
119
133
|
with open(encoded_file, 'rb') as file:
|
120
134
|
detected = detect(file.read())
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: user-simulator
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.3.1
|
4
4
|
Summary: LLM-based user simulator for chatbot testing.
|
5
5
|
Author: Alejandro Del Pozzo Escalera, Juan de Lara Jaramillo, Esther Guerra Sánchez
|
6
6
|
License: MIT License
|
@@ -41,13 +41,16 @@ Requires-Dist: openai>=1.0.0
|
|
41
41
|
Requires-Dist: pandas>=2.3.0
|
42
42
|
Requires-Dist: pillow>=11.2.1
|
43
43
|
Requires-Dist: pydantic>=2.0.0
|
44
|
-
Requires-Dist: pymupdf
|
44
|
+
Requires-Dist: pymupdf==1.26.1
|
45
45
|
Requires-Dist: pyyaml>=6.0.2
|
46
46
|
Requires-Dist: requests>=2.32.4
|
47
47
|
Requires-Dist: scikit-learn>=1.7.0
|
48
48
|
Requires-Dist: selenium>=4.33.0
|
49
|
+
Requires-Dist: sqlalchemy==2.0.41
|
49
50
|
Requires-Dist: twine>=6.1.0
|
51
|
+
Requires-Dist: typing-extensions==4.13.2
|
50
52
|
Requires-Dist: webdriver-manager>=4.0.2
|
53
|
+
Requires-Dist: zstandard==0.23.0
|
51
54
|
Dynamic: license-file
|
52
55
|
|
53
56
|
# User simulator for chatbot testing
|