teradataml 20.0.0.1__py3-none-any.whl → 20.0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (240) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +306 -0
  4. teradataml/__init__.py +10 -3
  5. teradataml/_version.py +1 -1
  6. teradataml/analytics/__init__.py +3 -2
  7. teradataml/analytics/analytic_function_executor.py +299 -16
  8. teradataml/analytics/analytic_query_generator.py +92 -0
  9. teradataml/analytics/byom/__init__.py +3 -2
  10. teradataml/analytics/json_parser/metadata.py +13 -3
  11. teradataml/analytics/json_parser/utils.py +13 -6
  12. teradataml/analytics/meta_class.py +40 -1
  13. teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
  14. teradataml/analytics/sqle/__init__.py +11 -2
  15. teradataml/analytics/table_operator/__init__.py +4 -3
  16. teradataml/analytics/uaf/__init__.py +21 -2
  17. teradataml/analytics/utils.py +66 -1
  18. teradataml/analytics/valib.py +1 -1
  19. teradataml/automl/__init__.py +1502 -323
  20. teradataml/automl/custom_json_utils.py +139 -61
  21. teradataml/automl/data_preparation.py +247 -307
  22. teradataml/automl/data_transformation.py +32 -12
  23. teradataml/automl/feature_engineering.py +325 -86
  24. teradataml/automl/model_evaluation.py +44 -35
  25. teradataml/automl/model_training.py +122 -153
  26. teradataml/catalog/byom.py +8 -8
  27. teradataml/clients/pkce_client.py +1 -1
  28. teradataml/common/__init__.py +2 -1
  29. teradataml/common/constants.py +72 -0
  30. teradataml/common/deprecations.py +13 -7
  31. teradataml/common/garbagecollector.py +152 -120
  32. teradataml/common/messagecodes.py +11 -2
  33. teradataml/common/messages.py +4 -1
  34. teradataml/common/sqlbundle.py +26 -4
  35. teradataml/common/utils.py +225 -14
  36. teradataml/common/wrapper_utils.py +1 -1
  37. teradataml/context/context.py +82 -2
  38. teradataml/data/SQL_Fundamentals.pdf +0 -0
  39. teradataml/data/complaints_test_tokenized.csv +353 -0
  40. teradataml/data/complaints_tokens_model.csv +348 -0
  41. teradataml/data/covid_confirm_sd.csv +83 -0
  42. teradataml/data/dataframe_example.json +27 -1
  43. teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
  44. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
  45. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
  46. teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
  47. teradataml/data/docs/sqle/docs_17_20/Shap.py +203 -0
  48. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
  49. teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
  50. teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
  51. teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
  52. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  53. teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
  54. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
  55. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
  56. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
  57. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
  58. teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
  59. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
  60. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
  61. teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
  62. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
  63. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  64. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
  65. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
  66. teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
  67. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
  68. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
  69. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
  70. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
  71. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
  72. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
  73. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
  74. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
  75. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
  76. teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
  77. teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
  78. teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
  79. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
  80. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
  81. teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
  82. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
  83. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
  84. teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
  85. teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
  86. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
  87. teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
  88. teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
  89. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
  90. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
  91. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
  92. teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
  93. teradataml/data/dwt2d_dataTable.csv +65 -0
  94. teradataml/data/dwt_dataTable.csv +8 -0
  95. teradataml/data/dwt_filterTable.csv +3 -0
  96. teradataml/data/finance_data4.csv +13 -0
  97. teradataml/data/grocery_transaction.csv +19 -0
  98. teradataml/data/idwt2d_dataTable.csv +5 -0
  99. teradataml/data/idwt_dataTable.csv +8 -0
  100. teradataml/data/idwt_filterTable.csv +3 -0
  101. teradataml/data/interval_data.csv +5 -0
  102. teradataml/data/jsons/paired_functions.json +14 -0
  103. teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
  104. teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
  105. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
  106. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
  107. teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
  108. teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
  109. teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
  110. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
  111. teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
  112. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  113. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  114. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  115. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  116. teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
  117. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  118. teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
  119. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
  120. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
  121. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
  122. teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
  123. teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
  124. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
  125. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
  126. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
  127. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
  128. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
  129. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
  130. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
  131. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
  132. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
  133. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
  134. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
  135. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
  136. teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
  137. teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
  138. teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
  139. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
  140. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
  141. teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
  142. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
  143. teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
  144. teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
  145. teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
  146. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
  147. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
  148. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
  149. teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
  150. teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
  151. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
  152. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
  153. teradataml/data/jsons/uaf/17.20/TD_SAX.json +210 -0
  154. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
  155. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
  156. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
  157. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
  158. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
  159. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +410 -0
  160. teradataml/data/load_example_data.py +8 -2
  161. teradataml/data/medical_readings.csv +101 -0
  162. teradataml/data/naivebayestextclassifier_example.json +1 -1
  163. teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
  164. teradataml/data/patient_profile.csv +101 -0
  165. teradataml/data/peppers.png +0 -0
  166. teradataml/data/real_values.csv +14 -0
  167. teradataml/data/sax_example.json +8 -0
  168. teradataml/data/scripts/deploy_script.py +1 -1
  169. teradataml/data/scripts/lightgbm/dataset.template +157 -0
  170. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
  171. teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
  172. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
  173. teradataml/data/scripts/sklearn/sklearn_fit.py +194 -160
  174. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
  175. teradataml/data/scripts/sklearn/sklearn_function.template +34 -16
  176. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
  177. teradataml/data/scripts/sklearn/sklearn_neighbors.py +1 -1
  178. teradataml/data/scripts/sklearn/sklearn_score.py +12 -3
  179. teradataml/data/scripts/sklearn/sklearn_transform.py +162 -24
  180. teradataml/data/star_pivot.csv +8 -0
  181. teradataml/data/target_udt_data.csv +8 -0
  182. teradataml/data/templates/open_source_ml.json +3 -1
  183. teradataml/data/teradataml_example.json +20 -1
  184. teradataml/data/timestamp_data.csv +4 -0
  185. teradataml/data/titanic_dataset_unpivoted.csv +19 -0
  186. teradataml/data/uaf_example.json +55 -1
  187. teradataml/data/unpivot_example.json +15 -0
  188. teradataml/data/url_data.csv +9 -0
  189. teradataml/data/vectordistance_example.json +4 -0
  190. teradataml/data/windowdfft.csv +16 -0
  191. teradataml/dataframe/copy_to.py +1 -1
  192. teradataml/dataframe/data_transfer.py +5 -3
  193. teradataml/dataframe/dataframe.py +1002 -201
  194. teradataml/dataframe/fastload.py +3 -3
  195. teradataml/dataframe/functions.py +867 -0
  196. teradataml/dataframe/row.py +160 -0
  197. teradataml/dataframe/setop.py +2 -2
  198. teradataml/dataframe/sql.py +840 -33
  199. teradataml/dataframe/window.py +1 -1
  200. teradataml/dbutils/dbutils.py +878 -34
  201. teradataml/dbutils/filemgr.py +48 -1
  202. teradataml/geospatial/geodataframe.py +1 -1
  203. teradataml/geospatial/geodataframecolumn.py +1 -1
  204. teradataml/hyperparameter_tuner/optimizer.py +13 -13
  205. teradataml/lib/aed_0_1.dll +0 -0
  206. teradataml/opensource/__init__.py +1 -1
  207. teradataml/opensource/{sklearn/_class.py → _class.py} +102 -17
  208. teradataml/opensource/_lightgbm.py +950 -0
  209. teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +1 -2
  210. teradataml/opensource/{sklearn/constants.py → constants.py} +13 -10
  211. teradataml/opensource/sklearn/__init__.py +0 -1
  212. teradataml/opensource/sklearn/_sklearn_wrapper.py +1019 -574
  213. teradataml/options/__init__.py +9 -23
  214. teradataml/options/configure.py +42 -4
  215. teradataml/options/display.py +2 -2
  216. teradataml/plot/axis.py +4 -4
  217. teradataml/scriptmgmt/UserEnv.py +13 -9
  218. teradataml/scriptmgmt/lls_utils.py +77 -23
  219. teradataml/store/__init__.py +13 -0
  220. teradataml/store/feature_store/__init__.py +0 -0
  221. teradataml/store/feature_store/constants.py +291 -0
  222. teradataml/store/feature_store/feature_store.py +2223 -0
  223. teradataml/store/feature_store/models.py +1505 -0
  224. teradataml/store/vector_store/__init__.py +1586 -0
  225. teradataml/table_operators/Script.py +2 -2
  226. teradataml/table_operators/TableOperator.py +106 -20
  227. teradataml/table_operators/query_generator.py +3 -0
  228. teradataml/table_operators/table_operator_query_generator.py +3 -1
  229. teradataml/table_operators/table_operator_util.py +102 -56
  230. teradataml/table_operators/templates/dataframe_register.template +69 -0
  231. teradataml/table_operators/templates/dataframe_udf.template +63 -0
  232. teradataml/telemetry_utils/__init__.py +0 -0
  233. teradataml/telemetry_utils/queryband.py +52 -0
  234. teradataml/utils/dtypes.py +4 -2
  235. teradataml/utils/validators.py +34 -2
  236. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/METADATA +311 -3
  237. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/RECORD +240 -157
  238. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/WHEEL +0 -0
  239. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/top_level.txt +0 -0
  240. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/zip-safe +0 -0
@@ -0,0 +1,1586 @@
1
+ """
2
+ Unpublished work.
3
+ Copyright (c) 2024 by Teradata Corporation. All rights reserved.
4
+ TERADATA CORPORATION CONFIDENTIAL AND TRADE SECRET
5
+
6
+ Primary Owner: aanchal.kavedia@teradata.com
7
+ Secondary Owner: adithya.avvaru@teradata.com
8
+
9
+ This file implements VectorStore class along with its method.
10
+ """
11
+ import base64
12
+ import json, os, pandas as pd
13
+ from json.decoder import JSONDecodeError
14
+ from teradataml.common.constants import HTTPRequest, Action as action_enum, Permission as permission_enum
15
+ from teradataml.common.exceptions import TeradataMlException
16
+ from teradataml.common.messages import Messages
17
+ from teradataml.common.messagecodes import MessageCodes
18
+ from teradataml.common.utils import UtilFuncs
19
+ from teradataml.context.context import _get_user
20
+ from teradataml import configure
21
+ from teradataml.utils.validators import _Validators
22
+ from teradataml.utils.dtypes import _ListOf
23
+ from teradataml.scriptmgmt.UserEnv import _get_auth_token
24
+
25
+ class _SimilaritySearch:
26
+ """
27
+ Internal class to create a similarity search object which is needed
28
+ to display the results in a tabular format and at the same time store
29
+ the json object which is used in prepare response.
30
+ """
31
+ def __init__(self, response):
32
+ self.similar_objects_count = response['similar_objects_count']
33
+ self._json_obj = response['similar_objects_list']
34
+ self.similar_objects = pd.DataFrame(self._json_obj)
35
+
36
+ def __repr__(self):
37
+ return f"similar_objects_count:{self.similar_objects_count}\nsimilar_objects:\n{self.similar_objects})"
38
+
39
+ class VectorStore:
40
+ def __init__(self,
41
+ name=None,
42
+ enable_logging=False,
43
+ **kwargs):
44
+ """
45
+ DESCRIPTION:
46
+ VectorStore contains a vectorized version of data.
47
+ The vectorization typically is a result of embeddings generated by
48
+ an AI LLM.
49
+ There are two types of vector stores based on the use cases:
50
+ * Content-based vector store: A vector store built on the
51
+ contents of table/view/teradataml DataFrame.
52
+ The table can be formed from the contents of file / pdf.
53
+ Questions can be asked against the contents of the table and
54
+ top matches of relevant rows are returned based on search.
55
+ This can be followed by a textual response generated using
56
+ an LLM by manipulating the top matches.
57
+
58
+ * Metadata-based vector store: A vector store built on the
59
+ metadata of a set of tables. Questions can be asked
60
+ against a table or set of tables and top table
61
+ matches are returned.
62
+
63
+ PARAMETERS:
64
+ name:
65
+ Required Argument.
66
+ Specifies the name of the vector store either to connect to an
67
+ existing vector store or to create a new vector store.
68
+ Types: str
69
+
70
+ enable_logging:
71
+ Optional Argument.
72
+ Specifies whether logging should be enabled for vector store
73
+ methods.
74
+ Default Value: False
75
+ Types: bool
76
+
77
+ RETURNS:
78
+ None
79
+
80
+ RAISES:
81
+ TeradataMlException
82
+
83
+ EXAMPLES:
84
+ vs = VectorStore(name="vs", enable_logging=True)
85
+ """
86
+ # Initialize variables.
87
+ self.name = name
88
+ self._enable_logging = enable_logging
89
+
90
+ # Validating name and enable_logging.
91
+ arg_info_matrix = []
92
+ arg_info_matrix.append(["name", self.name, False, (str), True])
93
+ arg_info_matrix.append(["enable_logging", self._enable_logging, True, (bool)])
94
+
95
+ _Validators._validate_missing_required_arguments(arg_info_matrix)
96
+ # Validate argument types.
97
+ _Validators._validate_function_arguments(arg_info_matrix)
98
+
99
+ # Check if vector_store_base_url is set or not.
100
+ if configure._vector_store_base_url is None:
101
+ error_msg = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
102
+ 'VectorStore()', 'Auth token is not set.'
103
+ ' Set it by calling set_auth_token().')
104
+ raise RuntimeError(error_msg)
105
+
106
+ self.__base_url = f"{configure._vector_store_base_url}/api/v1/"
107
+ self.__session_url = f"{self.__base_url}session/"
108
+ self.__vectorstore_url = f"{self.__base_url}vectorstore/"
109
+
110
+ # Call connect in case of CCP enabled tenant.
111
+ # If non-ccp, connect should be explicitly called passing the required params.
112
+ self._connect()
113
+
114
+ def __set_vs_index_and_vs_parameters(self, **kwargs):
115
+ """
116
+ Internal function to set the parameters for the vector store.
117
+ Keeping it common, as it will be required by update and initialize
118
+ methods.
119
+ """
120
+ ## Initializing vs_index params
121
+ self._database_name = kwargs.get('database_name', None)
122
+ self._object_name = kwargs.get('object_name', None)
123
+ self._key_columns = kwargs.get('key_columns', None)
124
+ self._data_columns = kwargs.get('data_columns', None)
125
+ self._vector_columns = kwargs.get('vector_columns', None)
126
+ self._chunk_size = kwargs.get("chunk_size", None)
127
+ self._optimized_chunking = kwargs.get('optimized_chunking', None)
128
+ self._header_height = kwargs.get('header_height', None)
129
+ self._footer_height = kwargs.get('footer_height', None)
130
+
131
+ ## Initializing vs_parameters
132
+ self._description = kwargs.get("description", None)
133
+ self._embeddings_model = kwargs.get("embeddings_model", None)
134
+ self._embeddings_dims = kwargs.get("embeddings_dims", None)
135
+ self._initial_delay_ms = kwargs.get("initial_delay_ms", None)
136
+ self._delay_max_retries = kwargs.get("delay_max_retries", None)
137
+ self._delay_exp_base = kwargs.get("delay_exp_base", None)
138
+ self._delay_jitter = kwargs.get("delay_jitter", None)
139
+ self._metric = kwargs.get("metric", None)
140
+ self._search_algorithm = kwargs.get("search_algorithm", None)
141
+ self._top_k = kwargs.get("top_k", None)
142
+ self._search_threshold = kwargs.get("search_threshold", None)
143
+ self._initial_centroids_method = kwargs.get("initial_centroids_method", None)
144
+ self._train_numcluster = kwargs.get("train_numcluster", None)
145
+ self._max_iternum = kwargs.get("max_iternum", None)
146
+ self._stop_threshold = kwargs.get("stop_threshold", None)
147
+ self._seed = kwargs.get("seed", None)
148
+ self._num_init = kwargs.get("num_init", None)
149
+ self._search_numcluster = kwargs.get("search_numcluster", None)
150
+ self._prompt = kwargs.get("prompt", None)
151
+ self._document_files = kwargs.get("document_files", None)
152
+ self._chat_completion_model = kwargs.get("chat_completion_model", None)
153
+
154
+ # Validating vs_index
155
+ arg_info_matrix = []
156
+ arg_info_matrix.append(["database_name", self._database_name, True, (str), True])
157
+ arg_info_matrix.append(["object_name", self._object_name, True, (str), True])
158
+ arg_info_matrix.append(["key_columns", self._key_columns, True, (str, list), True])
159
+ arg_info_matrix.append(["data_columns", self._data_columns, True, (str, list), True])
160
+ arg_info_matrix.append(["vector_columns", self._vector_columns, True, (str), True])
161
+ arg_info_matrix.append(["chunk_size", self._chunk_size, True, (int), True])
162
+ arg_info_matrix.append(["optimized_chunking", self._optimized_chunking, True, (bool), True])
163
+ arg_info_matrix.append(["header_height", self._header_height, True, (int), True])
164
+ arg_info_matrix.append(["footer_height", self._footer_height, True, (int), True])
165
+
166
+ # Validating vs_parameters
167
+ arg_info_matrix.append(["description", self._description, True, (str), True])
168
+ arg_info_matrix.append(["embeddings_model", self._embeddings_model, True, (str), True])
169
+ arg_info_matrix.append(["embeddings_dims", self._embeddings_dims, True, (int), True])
170
+ arg_info_matrix.append(["initial_delay_ms", self._initial_delay_ms, True, (int), True])
171
+ arg_info_matrix.append(["delay_max_retries", self._delay_max_retries, True, (int), True])
172
+ arg_info_matrix.append(["delay_exp_base", self._delay_exp_base, True, (int), True])
173
+ arg_info_matrix.append(["delay_jitter", self._delay_jitter, True, (bool), True])
174
+ arg_info_matrix.append(["metric", self._metric, True, (str), True])
175
+ arg_info_matrix.append(["search_algorithm", self._search_algorithm, True, (str), True])
176
+ arg_info_matrix.append(["top_k", self._top_k, True, (int), True])
177
+ arg_info_matrix.append(["initial_centroids_method", self._initial_centroids_method, True, (str),
178
+ True])
179
+ arg_info_matrix.append(["train_numcluster", self._train_numcluster, True, (int), True])
180
+ arg_info_matrix.append(["max_iternum", self._max_iternum, True, (int), True])
181
+ arg_info_matrix.append(["stop_threshold", self._stop_threshold, True, (float), True])
182
+ arg_info_matrix.append(["seed", self._seed, True, (int), True])
183
+ arg_info_matrix.append(["num_init", self._num_init, True, (int), True])
184
+ arg_info_matrix.append(["search_threshold", self._search_threshold, True, (float), True])
185
+ arg_info_matrix.append(["search_numcluster", self._search_numcluster, True, (int), True])
186
+ arg_info_matrix.append(["prompt", self._prompt, True, (str), True])
187
+ arg_info_matrix.append(["chat_completion_model", self._chat_completion_model, True, (str),
188
+ True])
189
+ arg_info_matrix.append(["document_files", self._document_files, True, (str, list),
190
+ True])
191
+
192
+ # Validate argument types.
193
+ _Validators._validate_function_arguments(arg_info_matrix)
194
+
195
+ ## Input document files structure is: [fully_qualified_file_name1,
196
+ # fully_qualified_file_name2]
197
+ # Forming document files structure as the API accepts:
198
+ # document_files = [('document_files', ('file1.pdf',
199
+ # open('/location/file1.pdf', 'rb'),
200
+ # 'application/pdf')),
201
+ # ('document_files', ('file2.pdf',
202
+ # open('/location/file2.pdf', 'rb'),
203
+ # 'application/pdf'))
204
+ # ]
205
+
206
+ if self._document_files:
207
+ document_files = self._document_files
208
+ self._document_files = []
209
+
210
+ for file in document_files:
211
+ # Get the file name from fully qualified path
212
+ file_name = os.path.basename(file)
213
+ # Form the string 'application/pdf' based on the file extension.
214
+ file_type = f"application/{os.path.splitext(file_name)[1]}".replace(".", "")
215
+ self._document_files.append(('document_files', (file_name,
216
+ open(file, 'rb'),
217
+ file_type)))
218
+
219
+ vs_parameters = {"description": self._description,
220
+ "embeddings_model": self._embeddings_model,
221
+ "embeddings_dims": self._embeddings_dims,
222
+ "initial_delay_ms": self._initial_delay_ms,
223
+ "delay_max_retries": self._delay_max_retries,
224
+ "delay_exp_base": self._delay_exp_base,
225
+ "delay_jitter": self._delay_jitter,
226
+ "metric": self._metric,
227
+ "search_algorithm": self._search_algorithm,
228
+ "top_k": self._top_k,
229
+ "initial_centroids_method": self._initial_centroids_method,
230
+ "train_numcluster": self._train_numcluster,
231
+ "max_iternum": self._max_iternum,
232
+ "stop_threshold": self._stop_threshold,
233
+ "seed": self._seed,
234
+ "num_init": self._num_init,
235
+ "search_threshold": self._search_threshold,
236
+ "search_numcluster": self._search_numcluster,
237
+ "prompt": self._prompt,
238
+ "chat_completion_model": self._chat_completion_model}
239
+ # Only add keys with non-None values
240
+ self.__vs_parameters = {k: v for k, v in vs_parameters.items() if v is not None}
241
+
242
+ vs_index = {
243
+ 'database_name': self._database_name,
244
+ 'object_name': self._object_name,
245
+ 'key_columns': self._key_columns,
246
+ 'data_columns': self._data_columns,
247
+ 'vector_column': self._vector_columns,
248
+ 'chunk_size': self._chunk_size,
249
+ 'optimized_chunking': self._optimized_chunking,
250
+ 'header_height': self._header_height,
251
+ 'footer_height': self._footer_height
252
+ }
253
+ # Only add keys with non-None values
254
+ self.__vs_index = {k: v for k, v in vs_index.items() if v is not None}
255
+
256
+ def __get_header(self):
257
+ """
258
+ DESCRIPTION:
259
+ Function to get the latest token if it is expired for all
260
+ the function calls.
261
+
262
+ RETURNS:
263
+ Latest header with auth_token in case of CCP enabled tenant
264
+ or the header formed by passing user_id and password.
265
+
266
+ """
267
+ if self.__ccp_enabled:
268
+ return _get_auth_token()
269
+ else:
270
+ return self.__headers
271
+
272
+ def _connect(self,
273
+ **kwargs):
274
+ """
275
+ DESCRIPTION:
276
+ Function to connect to vector store in Teradata Vantage.
277
+
278
+ PARAMETERS:
279
+ host:
280
+ Optional Argument.
281
+ Specifies the fully qualified domain name or IP address of the
282
+ Teradata System to connect to.
283
+ Types: str
284
+
285
+ username:
286
+ Optional Argument.
287
+ Specifies the username for connecting to/create a vector
288
+ store in Teradata Vantage.
289
+ Types: str
290
+
291
+ password:
292
+ Optional Argument.
293
+ Specifies the password required for the username.
294
+ Types: str
295
+
296
+ database:
297
+ Optional Argument.
298
+ Specifies the initial database to use after logon,
299
+ instead of the user's default database.
300
+ Types: str
301
+
302
+ RETURNS:
303
+ None
304
+
305
+ RAISES:
306
+ TeradataMlException
307
+
308
+ EXAMPLES:
309
+ from teradataml import VectorStore
310
+ # Example 1: Connect to the database using host, database,
311
+ # username and password.
312
+ # Create an instance of the VectorStore class.
313
+ vs = VectorStore(name="vec1")
314
+
315
+ # Call the connect method to connect to the database.
316
+ vs._connect(host='<host>', username='<user>', password='<password>', database='<database>')
317
+ """
318
+ ## Initialize connection parameters.
319
+ self.__host = kwargs.get("host", None)
320
+ self.__user = kwargs.get("username", None)
321
+ self.__password = kwargs.get("password", None)
322
+ self.__database = kwargs.get("database", _get_user())
323
+ # Default is always True
324
+ self.__ssl_verify = kwargs.get("ssl_verify", True)
325
+
326
+ # get the auth_token
327
+ self.__headers = _get_auth_token()
328
+ self.__ccp_enabled = True if self.__headers else False
329
+
330
+ # Validations
331
+ arg_info_matrix = []
332
+ arg_info_matrix.append(["host", self.__host, True, (str), True])
333
+ arg_info_matrix.append(["username", self.__user, True, (str), True])
334
+ arg_info_matrix.append(["password", self.__password, True, (str), True])
335
+ arg_info_matrix.append(["database", self.__database, True, (str), True])
336
+ arg_info_matrix.append(["ssl_verify", self.__ssl_verify, True, (bool)])
337
+
338
+
339
+ # Validate argument types.
340
+ _Validators._validate_function_arguments(arg_info_matrix)
341
+
342
+ # Triggering the 'connect' API
343
+ data = {
344
+ 'database_name': self.__database,
345
+ 'hostname': self.__host
346
+ }
347
+ data = {k: v for k, v in data.items() if v is not None}
348
+
349
+ # Form the header with username and password if it not ccp enabled tenant.
350
+ if not self.__ccp_enabled and self.__user:
351
+ credentials = f"{self.__user}:{self.__password}"
352
+ # Encode the credentials string using Base64
353
+ encoded_credentials = base64.b64encode(
354
+ credentials.encode('utf-8')).decode('utf-8')
355
+ # Form the Authorization header value
356
+ self.__headers = {"Authorization": f"Basic {encoded_credentials}"}
357
+ self.__ssl_verify = False
358
+ # Call the connect API only when the tenant is ccp enabled or header is
359
+ # formed using user and password.
360
+
361
+ # This check is needed to that when the tenant is non-ccp enabled,
362
+ # the call from __init__ does not fail
363
+ if self.__ccp_enabled or self.__headers:
364
+ connect_url = f"{self.__session_url}connect"
365
+
366
+ http_params = {
367
+ "url": connect_url,
368
+ "method_type": HTTPRequest.POST,
369
+ "headers": self.__get_header(),
370
+ "json": data
371
+ }
372
+
373
+ if self.__ssl_verify is not None:
374
+ http_params["verify"] = self.__ssl_verify
375
+
376
+ response = UtilFuncs._http_request(**http_params)
377
+
378
+ self.__session_id = response.cookies.get("session_id")
379
+ self._process_vs_response(api_name="connect", response=response)
380
+
381
+ def initialize(self, **kwargs):
382
+ """
383
+ DESCRIPTION:
384
+ Initialize a new vector store if it does not exist, otherwise
385
+ provide an instance of an existing vector store.
386
+ This call enables / configures a particular vector store to be used.
387
+
388
+ PARAMETERS:
389
+ description:
390
+ Optional Argument.
391
+ Specifies the description of the vector store.
392
+ Types: str
393
+
394
+ database_name:
395
+ Optional Argument.
396
+ Specifies the database name of the table or view to be indexed
397
+ for vector store.
398
+ When "document_files" is passed, it refers to the database where
399
+ the file content splits are stored.
400
+ Note:
401
+ The vector store is also created in this database.
402
+ Types: str
403
+
404
+ object_name:
405
+ Optional Argument.
406
+ Specifies the table name/teradataml DataFrame to be indexed for
407
+ vector store.
408
+ Note:
409
+ * Only one table name/teradataml DataFrame can be specified.
410
+ * For data residing in multiple tables, view should be
411
+ created and view name/teradataml DataFrame over the view should be specified
412
+ here.
413
+ Types: str, DataFrame
414
+
415
+ key_columns:
416
+ Optional Argument.
417
+ Specifies the names of the key columns to be used for indexing.
418
+ Types: str, list of str
419
+
420
+ data_columns:
421
+ Optional Argument.
422
+ Specifies the names of the data columns to be used for indexing.
423
+ Types: str, list of str
424
+
425
+ vector_columns:
426
+ Optional Argument.
427
+ Specifies the names of the columns to be used for storing
428
+ the embeddings.
429
+ Default Value: vector_index
430
+ Types: str, list of str
431
+
432
+ chunk_size:
433
+ Optional Argument.
434
+ Specifies the size of each chunk when dividing document files
435
+ into chunks.
436
+ Default Value: 512
437
+ Types: int
438
+
439
+ optimized_chunking:
440
+ Optional Argument.
441
+ Whether an optimized splitting mechanism supplied by Teradata
442
+ should be used. Applicable only for "document_files".
443
+ The documents are parsed internally in an intelligent fashion
444
+ based on file structure and chunks are dynamically created
445
+ based on section layout.
446
+ Note:
447
+ The "chunk_size" field is not applicable when
448
+ "optimized_chunking" is set to True.
449
+ Default Value: True
450
+ Types: bool
451
+
452
+ header_height:
453
+ Optional Argument.
454
+ Specifies the height (in points) of the header section of a PDF
455
+ document to be trimmed before processing the main content.
456
+ This is useful for removing unwanted header information
457
+ from each page of the PDF.
458
+ Recommended value is 55.
459
+ Default Value: 0
460
+ Types: int
461
+
462
+ footer_height:
463
+ Optional Argument.
464
+ Specifies the height (in points) of the footer section of a PDF
465
+ document to be trimmed before processing the main content.
466
+ This is useful for removing unwanted footer information from
467
+ each page of the PDF.
468
+ Recommended value is 55.
469
+ Default Value: 0
470
+ Types: int
471
+
472
+ embeddings_model:
473
+ Optional Argument.
474
+ Specifies the embeddings model to be used for generating the
475
+ embeddings.
476
+ Permitted Values:
477
+ * amazon.titan-embed-text-v1
478
+ * amazon.titan-embed-image-v1
479
+ * amazon.titan-embed-text-v2:0
480
+ * text-embedding-ada-002
481
+ * text-embedding-3-small
482
+ * text-embedding-3-large
483
+ Types: str
484
+
485
+ embeddings_dims:
486
+ Optional Argument.
487
+ Specifies the number of dimensions to be used for generating the embeddings.
488
+ The value depends on the "embeddings_model".
489
+ Permitted Values:
490
+ * amazon.titan-embed-text-v1: 1536 only
491
+ * amazon.titan-embed-image-v1: [256, 384, 1024]
492
+ * amazon.titan-embed-text-v2:0: [256, 512, 1024]
493
+ * text-embedding-ada-002: 1536 only
494
+ * text-embedding-3-small: 1 <= dims <= 1536
495
+ * text-embedding-3-large: 1 <= dims <= 3072
496
+ Default Value:
497
+ * amazon.titan-embed-text-v1: 1536
498
+ * amazon.titan-embed-image-v1: 1024
499
+ * amazon.titan-embed-text-v2:0: 1024
500
+ * text-embedding-ada-002: 1536
501
+ * text-embedding-3-small: 1536
502
+ * text-embedding-3-large: 3072
503
+ Types: int
504
+
505
+ initial_delay_ms:
506
+ Optional Argument.
507
+ Specifies the millisecond delay after each input table
508
+ row is sent for embeddings.
509
+ Default Value: 5000
510
+ Types: int
511
+
512
+ delay_max_retries:
513
+ Optional Argument.
514
+ Specifies the maximum number of attempts after a failed
515
+ input table row embedding request.
516
+ Default Value: 12
517
+ Types: int
518
+
519
+ delay_exp_base:
520
+ Optional Argument.
521
+ Specifies the exponential base of delay time increase.
522
+ Default Value: 1
523
+ Types: int
524
+
525
+ delay_jitter:
526
+ Optional Argument.
527
+ Specifies whether to use random sum term in exponent.
528
+ Default Value: False
529
+ Types: bool
530
+
531
+ metric:
532
+ Optional Argument.
533
+ Specifies the metric to be used for calculating the distance
534
+ between the vectors.
535
+ Permitted Values:
536
+ * EUCLIDEAN
537
+ * COSINE
538
+ * MANHATTAN
539
+ * DOTPRODUCT
540
+ * MINKOWSKI
541
+ Default Value: EUCLIDEAN
542
+ Types: str
543
+
544
+ search_algorithm:
545
+ Optional Argument.
546
+ Specifies the algorithm to be used for searching the
547
+ tables and views relevant to the question.
548
+ Permitted Values: VECTORDISTANCE, KMEANS.
549
+ Default Value: VECTORDISTANCE
550
+ Types: str
551
+
552
+ initial_centroids_method:
553
+ Optional Argument.
554
+ Specifies the algorithm to be used for initializing the
555
+ centroids when Search Algorithm is KMEANS.
556
+ Permitted Values: RANDOM, KMEANS++
557
+ Default Value: RANDOM
558
+ Types: str
559
+
560
+ train_numcluster:
561
+ Optional Argument.
562
+ Specifies the Number of clusters to be trained when
563
+ "search_algorithm" is KMEANS.
564
+ Default Value: 3
565
+ Types: int
566
+
567
+ max_iternum:
568
+ Optional Argument.
569
+ Specifies the maximum number of iterations to be run during
570
+ training when "search_algorithm" is KMEANS.
571
+ Default Value: 10
572
+ Types: int
573
+
574
+ stop_threshold:
575
+ Optional Argument.
576
+ Specifies the threshold value at which training should be
577
+ stopped when "search_algorithm" is KMEANS.
578
+ Default Value: 0.0395
579
+ Types: int
580
+
581
+ seed:
582
+ Optional Argument.
583
+ Specifies the seed value to be used for random number
584
+ generation when "search_algorithm" is KMEANS.
585
+ Default Value: 0
586
+ Types: int
587
+
588
+ num_init:
589
+ Optional Argument.
590
+ Specifies the number of times the k-means algorithm should
591
+ run with different initial centroid seeds.
592
+ Default Value: 1
593
+ Types: int
594
+
595
+ top_k:
596
+ Optional Argument.
597
+ Specifies the number of top clusters to be considered while searching.
598
+ Value should be between 1-100(both inclusive).
599
+ Default Value: 10
600
+ Types: int
601
+
602
+ search_threshold:
603
+ Optional Argument.
604
+ Specifies the threshold value to consider for matching tables
605
+ while searching.
606
+ Types: float
607
+
608
+ search_numcluster:
609
+ Optional Argument.
610
+ Specifies the number of clusters to be considered while
611
+ searching when "search_algorithm" is KMEANS.
612
+ Default Value: 3
613
+ Types: int
614
+
615
+ prompt:
616
+ Optional Argument.
617
+ Specifies the prompt to be used by language model
618
+ to generate responses using top matches.
619
+ Types: str
620
+
621
+ chat_completion_model:
622
+ Optional Argument.
623
+ Specifies the name of the chat completion model to be used for
624
+ generating text responses.
625
+ Permitted Values:
626
+ * anthropic.claude-3-haiku-20240307-v1:0
627
+ * anthropic.claude-3-opus-20240229-v1:0
628
+ * anthropic.claude-3-sonnet-20240229-v1:0
629
+ * anthropic.claude-3-5-sonnet-20240620-v1:0
630
+ Default Value: anthropic.claude-3-haiku-20240307-v1:0
631
+ Types: str
632
+
633
+ document_files:
634
+ Optional Argument.
635
+ Specifies the input dataset in document files format.
636
+ It can be used to specify input documents in file format.
637
+ The files are processed internally, converted to chunks and stored
638
+ into a database table.
639
+ Alternatively, users can choose to chunk their files themselves,
640
+ store them into a database table, create a table and specify
641
+ the details of that using "database_name", "object_name",
642
+ "data_columns" where the file content splits are stored.
643
+ Note:
644
+ * Only PDF format is currently supported.
645
+ * Multiple document files can be supplied.
646
+ * Fully qualified file name should be specified.
647
+ Examples:
648
+ document_files=['file1.pdf','file2.pdf']
649
+ Types: str, list
650
+
651
+ RETURNS:
652
+ None
653
+
654
+ RAISES:
655
+ TeradataMlException
656
+
657
+ EXAMPLES:
658
+ from teradataml import VectorStore
659
+
660
+ # Create an instance of the VectorStore class.
661
+ vs = VectorStore(name="vec1")
662
+
663
+ # Example 1: The following example initializes the Vector Store using
664
+ # data residing in table.
665
+ vs.initialize(object_name="amazon_reviews_25",
666
+ description="vector store testing",
667
+ database_name='oaf',
668
+ key_columns=['rev_id', 'aid'],
669
+ data_columns=['rev_text'],
670
+ vector_columns='VectorIndex',
671
+ embeddings_model="amazon.titan-embed-text-v1"
672
+ )
673
+
674
+ # Example 2: The following example initializes the Vector Store using
675
+ # data residing in files.
676
+
677
+ # Forming the fully qualified path for SQL_Fundamentals.pdf and
678
+ # InDb_Analytical_Functions.pdf
679
+ import teradataml
680
+ files= [os.path.join(os.path.dirname(teradataml.__file__), "data",
681
+ "SQL_Fundamentals.pdf"),
682
+ os.path.join(os.path.dirname(teradataml.__file__), "data",
683
+ "InDb_Analytical_Functions.pdf")]
684
+
685
+ vs.initialize(object_name="amazon_reviews_25",
686
+ description="vector store testing",
687
+ database_name='oaf',
688
+ key_columns=['rev_id', 'aid'],
689
+ data_columns=['rev_text'],
690
+ vector_columns='VectorIndex',
691
+ embeddings_model="amazon.titan-embed-text-v1"
692
+ document_files=files
693
+ )
694
+
695
+ # Example 3: Initialize an existing Vector Store.
696
+ vs.initialize()
697
+
698
+ """
699
+ self.__set_vs_index_and_vs_parameters(**kwargs)
700
+ # As the rest call accepts 0, 1 converting it.
701
+ self._enable_logging = 0 if not self._enable_logging else 1
702
+ initialize_url = f'{self.__vectorstore_url}initialize?vs_name={self.name}&log_level={self._enable_logging}'
703
+
704
+ data = {}
705
+ if self.__vs_parameters or self.__vs_index:
706
+ data = {}
707
+ if self.__vs_parameters:
708
+ data['vs_parameters'] = json.dumps(self.__vs_parameters)
709
+ if self.__vs_index:
710
+ data['vs_index'] = json.dumps(self.__vs_index)
711
+
712
+ http_params = {
713
+ "url": initialize_url,
714
+ "method_type": HTTPRequest.POST,
715
+ "headers": self.__get_header(),
716
+ "data": data,
717
+ "files": self._document_files,
718
+ "cookies": {'session_id': self.__session_id}
719
+
720
+ }
721
+ response = UtilFuncs._http_request(**http_params)
722
+ self._process_vs_response("initialize", response)
723
+
724
+ def create(self):
725
+ """
726
+ DESCRIPTION:
727
+ Creates a new vector store by generating the embeddings using the
728
+ configuration specified at the time of instantiation.
729
+ Note:
730
+ For "search_algorithm" KMEANS, index is also trained
731
+ because of which, it may take longer to set up compared
732
+ to vector distance.
733
+
734
+ PARAMETERS:
735
+ None
736
+
737
+ RETURNS:
738
+ None
739
+
740
+ RAISES:
741
+ TeradataMlException
742
+
743
+ EXAMPLES:
744
+ from teradataml import VectorStore
745
+
746
+ # Create an instance of the VectorStore class.
747
+ vs = VectorStore(name="vec1")
748
+
749
+ # Example 1: The following example creates the Vector Store using
750
+ # data residing in table..
751
+ vs.initialize(object_name="amazon_reviews_25",
752
+ description="vector store testing",
753
+ database_name='oaf',
754
+ key_columns=['rev_id', 'aid'],
755
+ data_columns=['rev_text'],
756
+ vector_columns='VectorIndex',
757
+ embeddings_model="amazon.titan-embed-text-v1"
758
+ )
759
+ # Create the Vector Store.
760
+ vs.create()
761
+
762
+ # Example 2: The following example creates the Vector Store using
763
+ # data residing in files.
764
+
765
+ # Forming the fully qualified path for SQL_Fundamentals.pdf and
766
+ # InDb_Analytical_Functions.pdf
767
+ files= [os.path.join(os.path.dirname(teradataml.__file__), "data",
768
+ "SQL_Fundamentals.pdf"),
769
+ os.path.join(os.path.dirname(teradataml.__file__), "data",
770
+ "InDb_Analytical_Functions.pdf")]
771
+
772
+ vs.initialize(object_name="amazon_reviews_25",
773
+ description="vector store testing",
774
+ database_name='oaf',
775
+ key_columns=['rev_id', 'aid'],
776
+ data_columns=['rev_text'],
777
+ vector_columns='VectorIndex',
778
+ embeddings_model="amazon.titan-embed-text-v1"
779
+ document_files=files
780
+ )
781
+
782
+ # Create the Vector Store.
783
+ vs.create()
784
+ """
785
+ create_url = f'{self.__vectorstore_url}create'
786
+ response = UtilFuncs._http_request(create_url, HTTPRequest.POST,
787
+ cookies={'session_id': self.__session_id},
788
+ headers=self.__headers)
789
+ self._process_vs_response("create", response)
790
+
791
+ def disconnect(self):
792
+ """
793
+ DESCRIPTION:
794
+ Logout from session and cleanup resources.
795
+
796
+ PARAMETERS:
797
+ None
798
+
799
+ RETURNS:
800
+ None
801
+
802
+ RAISES:
803
+ TeradataMlException
804
+
805
+ EXAMPLES:
806
+ from teradataml import VectorStore
807
+ # Example 1: Disconnect from the database.
808
+ # Create an instance of the VectorStore class.
809
+ vs = VectorStore(name="vec1")
810
+
811
+ # Initialize vector store.
812
+ vs.initialize(object_name="amazon_reviews_25",
813
+ description="vector store testing",
814
+ database_name='oaf',
815
+ key_columns=['rev_id', 'aid'],
816
+ data_columns=['rev_text'],
817
+ vector_columns='VectorIndex',
818
+ embeddings_model="amazon.titan-embed-text-v1"
819
+ )
820
+
821
+ # Disconnect from the database.
822
+ vs.diconnect()
823
+ """
824
+ disconnect_url = f'{self.__session_url}disconnect'
825
+
826
+ response = UtilFuncs._http_request(disconnect_url, HTTPRequest.POST,
827
+ cookies={'session_id': self.__session_id},
828
+ headers=self.__headers)
829
+ self._process_vs_response("disconnect", response)
830
+
831
+ def destroy(self):
832
+ """
833
+ DESCRIPTION:
834
+ Destroy the vector store.
835
+
836
+ PARAMETERS:
837
+ None
838
+
839
+ RETURNS:
840
+ None
841
+
842
+ RAISES:
843
+ TeradataMlException
844
+
845
+ EXAMPLES:
846
+ from teradataml import VectorStore
847
+
848
+ # Create an instance of the VectorStore class.
849
+ vs = VectorStore(name="vec1")
850
+
851
+ # Example 1: The following example destroys the Vector Store created
852
+ # by using data residing in table.
853
+ vs.initialize(object_name="amazon_reviews_25",
854
+ description="vector store testing",
855
+ database_name='oaf',
856
+ key_columns=['rev_id', 'aid'],
857
+ data_columns=['rev_text'],
858
+ vector_columns='VectorIndex',
859
+ embeddings_model="amazon.titan-embed-text-v1"
860
+ )
861
+
862
+ # Create the Vector Store.
863
+ vs.create()
864
+
865
+ # Destroy the Vector Store.
866
+ vs.destroy()
867
+
868
+ # Example 2: The following example destroys the Vector Store
869
+ # created by using data residing in files.
870
+
871
+ # Forming the fully qualified path for SQL_Fundamentals.pdf and
872
+ # InDb_Analytical_Functions.pdf
873
+ import teradataml
874
+ files = [os.path.join(os.path.dirname(teradataml.__file__), "data",
875
+ "SQL_Fundamentals.pdf"),
876
+ os.path.join(os.path.dirname(teradataml.__file__), "data",
877
+ "InDb_Analytical_Functions.pdf")]
878
+
879
+ vs.initialize(object_name="amazon_reviews_25",
880
+ description="vector store testing",
881
+ database_name= 'oaf',
882
+ key_columns=['rev_id', 'aid'],
883
+ data_columns=['rev_text'],
884
+ vector_columns='VectorIndex',
885
+ embeddings_model="amazon.titan-embed-text-v1"
886
+ document_files=files
887
+ )
888
+
889
+ # Create the Vector Store.
890
+ vs.create()
891
+
892
+ # Destroy the Vector Store.
893
+ vs.destroy()
894
+ """
895
+ destroy_url = f'{self.__vectorstore_url}destroy'
896
+ response = UtilFuncs._http_request(destroy_url, HTTPRequest.POST,
897
+ headers=self.__get_header(),
898
+ cookies={'session_id': self.__session_id})
899
+ self._process_vs_response("destroy", response)
900
+
901
+ def update(self, **kwargs):
902
+ """
903
+ DESCRIPTION:
904
+ Update an existing vector store with the specified parameters.
905
+ Note:
906
+ Currently, there is no support for updating a vector store when
907
+ input contents are updated or new data files become available.
908
+ In such cases, user will need to create a new vector store.
909
+
910
+ PARAMETERS:
911
+ embeddings_model:
912
+ Optional Argument.
913
+ Specifies the embeddings model to be used for generating the
914
+ embeddings.
915
+ Permitted Values: text-embedding-ada-002, text-embedding-3-small,
916
+ text-embedding-3-large.
917
+ Types: str
918
+
919
+ embeddings_dims:
920
+ Optional Argument.
921
+ Specifies the number of dimensions to be used for generating the embeddings.
922
+ The value depends on the "embeddings_model".
923
+ Permitted Values:
924
+ * amazon.titan-embed-text-v1: 1536 only
925
+ * amazon.titan-embed-image-v1: [256, 384, 1024]
926
+ * amazon.titan-embed-text-v2:0: [256, 512, 1024]
927
+ * text-embedding-ada-002: 1536 only
928
+ * text-embedding-3-small: 1 <= dims <= 1536
929
+ * text-embedding-3-large: 1 <= dims <= 3072
930
+ Types: int
931
+
932
+ initial_delay_ms:
933
+ Optional Argument.
934
+ Specifies the millisecond delay after each input table
935
+ row is sent for embeddings.
936
+ Types: int
937
+
938
+ delay_max_retries:
939
+ Optional Argument.
940
+ Specifies the maximum number of attempts after a failed
941
+ input table row embedding request.
942
+ Types: int
943
+
944
+ delay_exp_base:
945
+ Optional Argument.
946
+ Specifies the exponential base of delay time increase.
947
+ Types: int
948
+
949
+ delay_jitter:
950
+ Optional Argument.
951
+ Specifies the random sum term in exponent.
952
+ Types: bool
953
+
954
+ metric:
955
+ Optional Argument.
956
+ Specifies the metric to be used for calculating the distance
957
+ between the vectors.
958
+ Permitted Values:
959
+ * EUCLIDEAN
960
+ * COSINE
961
+ * MANHATTAN
962
+ * DOTPRODUCT
963
+ * MINKOWSKI
964
+ Types: str
965
+
966
+ search_algorithm:
967
+ Optional Argument.
968
+ Specifies the algorithm to be used for searching the tables and
969
+ views relevant to the question.
970
+ Permitted Values: VECTORDISTANCE, KMEANS.
971
+ Types: str
972
+
973
+ initial_centroids_method:
974
+ Optional Argument.
975
+ Specifies the Algorithm to be used for initializing the
976
+ centroids when Search Algorithm is KMEANS.
977
+ Allowed values are RANDOM and KMEANS++
978
+ Permitted Values: RANDOM, KMEANS++
979
+ Types: str
980
+
981
+ train_numcluster:
982
+ Optional Argument.
983
+ Specifies the Number of clusters to be trained when
984
+ "search_algorithm" is KMEANS.
985
+ Types: int
986
+
987
+ max_iternum:
988
+ Optional Argument.
989
+ Specifies the maximum number of iterations to be run during
990
+ training when "search_algorithm" is KMEANS.
991
+ Types: int
992
+
993
+ stop_threshold:
994
+ Optional Argument.
995
+ Specifies the threshold value at which training should be
996
+ stopped when "search_algorithm" is KMEANS.
997
+ Types: int
998
+
999
+ seed:
1000
+ Optional Argument.
1001
+ Specifies the seed value to be used for random number
1002
+ generation when "search_algorithm" is KMEANS.
1003
+ Types: int
1004
+
1005
+ num_init:
1006
+ Optional Argument.
1007
+ Specifies the number of times the k-means algorithm will
1008
+ be run with different initial centroid seeds.
1009
+ Types: int
1010
+
1011
+ top_k:
1012
+ Optional Argument.
1013
+ Specifies the number of top clusters to be considered while searching.
1014
+ Types: int
1015
+
1016
+ search_threshold:
1017
+ Optional Argument.
1018
+ Specifies the threshold value to consider matching tables/views
1019
+ while searching.
1020
+ Types: float
1021
+
1022
+ search_numcluster:
1023
+ Optional Argument.
1024
+ Specifies the number of clusters to be considered while
1025
+ searching when "search_algorithm" is KMEANS.
1026
+ Types: int
1027
+
1028
+ prompt:
1029
+ Optional Argument.
1030
+ Specifies the prompt to be used for generating answers.
1031
+ Types: str
1032
+
1033
+ document_files:
1034
+ Optional Argument.
1035
+ Specifies the list of PDF files to be divided into chunks and
1036
+ used for document embedding.
1037
+ Types: tuple, list of tuple
1038
+
1039
+ RETURNS:
1040
+ None
1041
+
1042
+ RAISES:
1043
+ TeradataMlException
1044
+
1045
+ EXAMPLES:
1046
+ from teradataml import VectorStore
1047
+
1048
+ # Create an instance of the VectorStore class.
1049
+ vs = VectorStore(name="vec1")
1050
+
1051
+ # Initialize the Vector Store.
1052
+ vs.initialize(object_name="amazon_reviews_25",
1053
+ description="vector store testing",
1054
+ database_name='oaf',
1055
+ key_columns=['rev_id', 'aid'],
1056
+ data_columns=['rev_text'],
1057
+ vector_columns='VectorIndex',
1058
+ embeddings_model="amazon.titan-embed-text-v1"
1059
+ )
1060
+ # Create the Vector Store.
1061
+ vs.create()
1062
+
1063
+ # Example 1: Update the search_algorithm, search_threshold and
1064
+ # description of the Vector Store.
1065
+ vs.update(search_algorithm='KMEANS',
1066
+ search_threshold=0.6,
1067
+ description='KMeans clustering method')
1068
+ """
1069
+ self.__set_vs_index_and_vs_parameters(**kwargs)
1070
+ update_url = f'{self.__vectorstore_url}update'
1071
+ response = UtilFuncs._http_request(update_url, HTTPRequest.POST, json=self.__vs_parameters,
1072
+ files=self._document_files,
1073
+ headers=self.__get_header(),
1074
+ cookies={'session_id': self.__session_id})
1075
+ self._process_vs_response("update", response)
1076
+
1077
+ def authenticate(self, user_name=None, action=None, permission=None, **kwargs):
1078
+ """
1079
+ DESCRIPTION:
1080
+ Grant or revoke read/write permissions for a user on the vector store.
1081
+ Every database user is permitted to create vector store according to
1082
+ existing access grants on corresponding data.
1083
+ That user becomes the owner/admin of the vector store
1084
+ and is the only one with read/write access to the vector store.
1085
+ Note:
1086
+ * The methods create(), authenticate(), update(), and destroy() need write
1087
+ permissions to the vector store.
1088
+ * vector store initialization and the methods similarity_search(),
1089
+ prepare_response() and ask() need read permissions to the
1090
+ vector store.
1091
+
1092
+ PARAMETERS:
1093
+ user_name:
1094
+ Required Argument.
1095
+ Specifies a database user to authenticate.
1096
+ Types: str
1097
+
1098
+ action:
1099
+ Optional Argument.
1100
+ Specifies the type of grant to be provided.
1101
+ Permitted Values: GRANT, REVOKE
1102
+ Default Value: GRANT
1103
+ Types: Enum
1104
+
1105
+ permission:
1106
+ Optional Argument.
1107
+ Specifies the type of permission to be provided.
1108
+ Permitted Values: READ, WRITE
1109
+ Default Value: READ
1110
+ Types: Enum
1111
+
1112
+ RETURNS:
1113
+ None
1114
+
1115
+ RAISES:
1116
+ TeradataMlException
1117
+
1118
+ EXAMPLES:
1119
+ from teradataml import VectorStore, Action, Permission
1120
+
1121
+ # Create an instance of the VectorStore class.
1122
+ vs = VectorStore(name="vec1")
1123
+
1124
+ vs.initialize(object_name="amazon_reviews_25",
1125
+ description="vector store testing",
1126
+ database_name='oaf',
1127
+ key_columns=['rev_id', 'aid'],
1128
+ data_columns=['rev_text'],
1129
+ vector_columns='VectorIndex',
1130
+ embeddings_model="amazon.titan-embed-text-v1"
1131
+ )
1132
+ # Create the Vector Store.
1133
+ vs.create()
1134
+
1135
+ # Example 1: GRANT READ access for user 'test_user1' to Vector Store 'vec1'.
1136
+ vs.authenticate(user_name='test_user1', action=Action.GRANT, permission=Permission.READ)
1137
+
1138
+ """
1139
+ ## Initializing vs_index params
1140
+ self._user_name = user_name
1141
+ self._action = action
1142
+ self._permission = permission
1143
+
1144
+ # Validating vs_index
1145
+ arg_info_matrix = []
1146
+ arg_info_matrix.append(["user_name", self._user_name, False, (str), True])
1147
+ arg_info_matrix.append(["action", self._action, True, (action_enum), True])
1148
+ arg_info_matrix.append(["permission", self._permission, True, (permission_enum), True])
1149
+
1150
+ _Validators._validate_missing_required_arguments(arg_info_matrix)
1151
+
1152
+ # Explicitly checking action and permission enum types, as correct message is not displayed.
1153
+ if self._action is not None and not isinstance(self._action, action_enum):
1154
+ raise TypeError(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE,
1155
+ "action", "Action Enum. Use 'Action' enum."
1156
+ " Check example"))
1157
+
1158
+ if self._permission is not None and not isinstance(self._permission, permission_enum):
1159
+ raise TypeError(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE,
1160
+ "permission", "Permission Enum. Use "
1161
+ "'Permission' enum."
1162
+ " Check example"))
1163
+
1164
+ # Validate argument types.
1165
+ _Validators._validate_function_arguments(arg_info_matrix)
1166
+
1167
+ authenticate_url = f"{self.__vectorstore_url}authenticate?user_name={self._user_name}" \
1168
+ f"&action={self._action.value}&permission={self._permission.value}"
1169
+ response = UtilFuncs._http_request(authenticate_url, HTTPRequest.POST,
1170
+ headers=self.__get_header(),
1171
+ cookies={'session_id': self.__session_id})
1172
+ self._process_vs_response(api_name="authenticate", response=response)
1173
+
1174
+ def similarity_search(self, question=None):
1175
+ """
1176
+ DESCRIPTION:
1177
+ Perform similarity search in the Vector Store for the input question.
1178
+ The algorithm specified in "search_algorithm" is used to perform
1179
+ the search against the vector store.
1180
+ The result contains "top_k" rows along with similarity score
1181
+ found by the "search_algorithm".
1182
+
1183
+ PARAMETERS:
1184
+ question:
1185
+ Required Argument.
1186
+ Specifies a string of text for which similarity search
1187
+ needs to be performed.
1188
+ Types: str
1189
+
1190
+ RETURNS:
1191
+ list
1192
+
1193
+ RAISES:
1194
+ TeradataMlException
1195
+
1196
+ EXAMPLES:
1197
+ from teradataml import VectorStore
1198
+
1199
+ # Create an instance of the VectorStore class.
1200
+ vs = VectorStore(name="vs")
1201
+
1202
+ # Initialize the Vector Store.
1203
+ vs.initialize(object_name="amazon_reviews_25",
1204
+ description="vector store testing",
1205
+ database_name='oaf',
1206
+ key_columns=['rev_id', 'aid'],
1207
+ data_columns=['rev_text'],
1208
+ vector_columns='VectorIndex',
1209
+ embeddings_model="amazon.titan-embed-text-v1",
1210
+ search_algorithm='VECTORDISTANCE',
1211
+ top_k=10
1212
+ )
1213
+
1214
+ # Create the Vector Store.
1215
+ vs.create()
1216
+
1217
+ # Example: Perform similarity search in the Vector Store for
1218
+ # the input question.
1219
+
1220
+ question = 'Are there any reviews about books?'
1221
+ response = vs.similarity_search(question=question)
1222
+ print(response)
1223
+
1224
+ """
1225
+ # Initializing params
1226
+ self._question = question
1227
+
1228
+ # Validating params
1229
+ arg_info_matrix = []
1230
+ arg_info_matrix.append(["question", self._question, False, (str), True])
1231
+ _Validators._validate_missing_required_arguments(arg_info_matrix)
1232
+
1233
+ # Validate argument types.
1234
+ _Validators._validate_function_arguments(arg_info_matrix)
1235
+
1236
+ similarity_search_url = f'{self.__vectorstore_url}similarity-search?question={question}'
1237
+ response = UtilFuncs._http_request(similarity_search_url, HTTPRequest.POST,
1238
+ headers=self.__get_header(),
1239
+ cookies={'session_id': self.__session_id})
1240
+ return _SimilaritySearch(self._process_vs_response(api_name="similarity-search",
1241
+ response=response))
1242
+
1243
+ def prepare_response(self, question=None, similarity_results=None, prompt=None):
1244
+ """
1245
+ DESCRIPTION:
1246
+ Prepare a natural language response to the user using the input
1247
+ question and similarity_results provided by similarity_search.
1248
+ The response is generated by a language model configured
1249
+ in the environment using a pre-configured prompt.
1250
+ An optional parameter prompt can be used to specify a customized
1251
+ prompt that replaces the internal prompt.
1252
+
1253
+ PARAMETERS:
1254
+ question:
1255
+ Required Argument.
1256
+ Specifies a string of text for which similarity search
1257
+ needs to be performed.
1258
+ Types: str
1259
+
1260
+ similarity_results:
1261
+ Required Argument.
1262
+ Specifies the similarity results obtained by similarity_search().
1263
+ Types: str
1264
+
1265
+ prompt:
1266
+ Optional Argument.
1267
+ Specifies a customized prompt that replaces the internal prompt.
1268
+ Types: str
1269
+
1270
+ RETURNS:
1271
+ HTTP Response json.
1272
+
1273
+ RAISES:
1274
+ None
1275
+
1276
+ EXAMPLES:
1277
+ # Create an instance of the VectorStore class.
1278
+ vs = VectorStore(name="vs")
1279
+
1280
+ # Initialize the Vector Store.
1281
+ vs.initialize(object_name="amazon_reviews_25",
1282
+ description="vector store testing",
1283
+ database_name='oaf',
1284
+ key_columns=['rev_id', 'aid'],
1285
+ data_columns=['rev_text'],
1286
+ vector_columns='VectorIndex',
1287
+ embeddings_model="amazon.titan-embed-text-v1",
1288
+ search_algorithm='VECTORDISTANCE',
1289
+ top_k = 10
1290
+ )
1291
+ # Create the Vector Store.
1292
+ vs.create()
1293
+
1294
+ # Perform similarity search in the Vector Store for
1295
+ # the input question.
1296
+ question = 'Are there any reviews about books?'
1297
+ response = vs.similarity_search(question=question)
1298
+
1299
+ # Example 1: Prepare a natural language response to the user
1300
+ # using the input question and similarity_results
1301
+ # provided by similarity_search().
1302
+
1303
+ question='Did any one feel the book is thin?'
1304
+ similar_objects_list = response['similar_objects_list']
1305
+ vs.prepare_response(question=question,
1306
+ similarity_results=similar_objects_list)
1307
+ """
1308
+ # Initializing params
1309
+ self._question = question
1310
+ self._similarity_results = similarity_results
1311
+ self._prompt = prompt
1312
+
1313
+ # Validating params
1314
+ arg_info_matrix = []
1315
+ arg_info_matrix.append(["question", self._question, False, (str), True])
1316
+ arg_info_matrix.append(["similarity_results", self._similarity_results, False, _SimilaritySearch, True])
1317
+ arg_info_matrix.append(["prompt", self._prompt, True, (str), True])
1318
+ _Validators._validate_missing_required_arguments(arg_info_matrix)
1319
+
1320
+ # Explicitly checking similarity search API, as correct message is not displayed.
1321
+ if not isinstance(similarity_results, _SimilaritySearch):
1322
+ raise TypeError(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE,
1323
+ "similarity_results", "output of similarity_search()"))
1324
+ # Validate argument types.
1325
+ _Validators._validate_function_arguments(arg_info_matrix)
1326
+
1327
+ data = {
1328
+ 'question': self._question,
1329
+ 'similar_objects_list': self._similarity_results._json_obj,
1330
+ 'prompt': self._prompt,
1331
+ }
1332
+ prepare_response_url = f'{self.__vectorstore_url}prepare-response'
1333
+ response = UtilFuncs._http_request(prepare_response_url, HTTPRequest.POST,
1334
+ headers=self.__get_header(),
1335
+ cookies={'session_id': self.__session_id},
1336
+ json=data)
1337
+ return self._process_vs_response(api_name="prepare_response", response=response)
1338
+
1339
+ def ask(self, question=None, prompt=None):
1340
+ """
1341
+ DESCRIPTION:
1342
+ Perform similarity search in the vector store for
1343
+ the input question followed by preparing a natural
1344
+ language response to the user. This method combines
1345
+ the operation of similarity_search() and prepare_response()
1346
+ into one call for faster response time.
1347
+
1348
+ PARAMETERS:
1349
+ question:
1350
+ Required Argument.
1351
+ Specifies a string of text for which similarity search
1352
+ needs to be performed.
1353
+ Types: str
1354
+
1355
+ prompt:
1356
+ Optional Argument.
1357
+ Specifies a customized prompt that replaces the internal prompt.
1358
+ Types: str
1359
+
1360
+ RETURNS:
1361
+ dict
1362
+
1363
+ RAISES:
1364
+ None
1365
+
1366
+ EXAMPLES:
1367
+ # Create an instance of the VectorStore class.
1368
+ vs = VectorStore(name="vs")
1369
+
1370
+ # Initialize the Vector Store.
1371
+ vs.initialize(object_name="amazon_reviews_25",
1372
+ description="vector store testing",
1373
+ database_name='oaf',
1374
+ key_columns=['rev_id', 'aid'],
1375
+ data_columns=['rev_text'],
1376
+ vector_columns='VectorIndex',
1377
+ embeddings_model="amazon.titan-embed-text-v1",
1378
+ search_algorithm='VECTORDISTANCE',
1379
+ top_k=10
1380
+ )
1381
+ # Create the Vector Store.
1382
+ vs.create()
1383
+
1384
+ custom_prompt = '''List good reviews about the books. Do not assume information.
1385
+ Only provide information that is present in the data.
1386
+ Format results like this:
1387
+ Review ID:
1388
+ Author ID:
1389
+ Review:
1390
+ '''
1391
+ # Example 1: Perform similarity search in the Vector Store for
1392
+ # the input question followed by preparing a natural
1393
+ # language response to the user.
1394
+
1395
+ question = 'Are there any reviews saying that the books are inspiring?'
1396
+ response = vs.ask(question=question, prompt=custom_prompt)
1397
+ print(response)
1398
+
1399
+ """
1400
+ # Initializing params
1401
+ self._question = question
1402
+ self._prompt = prompt
1403
+
1404
+ # Validating params
1405
+ arg_info_matrix = []
1406
+ arg_info_matrix.append(["question", self._question, False, (str), True])
1407
+ arg_info_matrix.append(["prompt", self._prompt, True, (str), True])
1408
+ _Validators._validate_missing_required_arguments(arg_info_matrix)
1409
+
1410
+ # Validate argument types.
1411
+ _Validators._validate_function_arguments(arg_info_matrix)
1412
+
1413
+ data = {
1414
+ 'question': self._question,
1415
+ 'prompt': self._prompt,
1416
+ }
1417
+ ask_url = f'{self.__vectorstore_url}ask'
1418
+ response = UtilFuncs._http_request(ask_url, HTTPRequest.POST,
1419
+ headers=self.__get_header(),
1420
+ cookies={'session_id': self.__session_id},
1421
+ json=data)
1422
+ return self._process_vs_response(api_name="ask", response=response)
1423
+
1424
+ def _process_vs_response(self, api_name, response, success_status_code=None):
1425
+ """
1426
+ DESCRIPTION:
1427
+ Function to process and validate the UES Response.
1428
+
1429
+ PARAMETERS:
1430
+ api_name:
1431
+ Required Argument.
1432
+ Specifies the name of the Vector Store method.
1433
+ Types: str
1434
+
1435
+ response:
1436
+ Required Argument.
1437
+ Specifies the response recieved from Vector Store service.
1438
+ Types: requests.Response
1439
+
1440
+ success_status_code:
1441
+ Optional Argument.
1442
+ Specifies the expected success status code for the corresponding
1443
+ Vector Store service.
1444
+ Default Value: None
1445
+ Types: int
1446
+
1447
+ RETURNS:
1448
+ Response object.
1449
+
1450
+ RAISES:
1451
+ TeradataMlException.
1452
+
1453
+ EXAMPLES:
1454
+ >>> _process_vs_response("create", resp)
1455
+ """
1456
+ try:
1457
+ data = response.json()
1458
+ # Success status code ranges between 200-300.
1459
+ if (success_status_code is None and 200 <= response.status_code < 300) or \
1460
+ (success_status_code == response.status_code):
1461
+ if "message" in data:
1462
+ print(data['message'])
1463
+ else:
1464
+ return data
1465
+ return
1466
+
1467
+ # teradataml API got an error response. Error response is expected as follows -
1468
+ # Success
1469
+ # Response:
1470
+ # {
1471
+ # "message": "success string"
1472
+ # }
1473
+ # Failure
1474
+ # Response:
1475
+ # {
1476
+ # "detail": "error message string"
1477
+ # }
1478
+ # Validation
1479
+ # Error:
1480
+ # {
1481
+ # "detail": [
1482
+ # {
1483
+ # "loc": [
1484
+ # "string",
1485
+ # 0
1486
+ # ],
1487
+ # "msg": "string",
1488
+ # "type": "string"
1489
+ # }
1490
+ # ]
1491
+ # }
1492
+ # Extract the fields and raise error accordingly.
1493
+ if isinstance(data['detail'], str):
1494
+ error_description = data['detail']
1495
+ else:
1496
+ error_description = []
1497
+ for dict_ele in data['detail']:
1498
+ error_msg = f"{dict_ele['msg']} for {dict_ele['loc'][1] if len(dict_ele['loc']) > 1 else dict_ele['loc'][0]}"
1499
+ error_description.append(error_msg)
1500
+ error_description = ",".join(error_description)
1501
+
1502
+ exception_message = "Request Failed - {}".format(error_description)
1503
+
1504
+ error_msg = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
1505
+ api_name,
1506
+ exception_message)
1507
+ raise TeradataMlException(error_msg, MessageCodes.FUNC_EXECUTION_FAILED)
1508
+
1509
+ # teradataml API may not get a Json API response in some cases.
1510
+ # So, raise an error with the response received as it is.
1511
+ except JSONDecodeError:
1512
+ error_msg = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
1513
+ api_name,
1514
+ response.text)
1515
+ raise TeradataMlException(error_msg, MessageCodes.FUNC_EXECUTION_FAILED)
1516
+
1517
+ def health(self):
1518
+ """
1519
+ DESCRIPTION:
1520
+ Perform sanity check for the service.
1521
+
1522
+ PARAMETERS:
1523
+ None
1524
+
1525
+ RETURNS:
1526
+ None
1527
+
1528
+ RAISES:
1529
+ None
1530
+
1531
+ EXAMPLES:
1532
+ # Create an instance of the VectorStore class.
1533
+ vs = VectorStore(name="vs")
1534
+ # Example 1: Check the health of the service.
1535
+ vs.health()
1536
+ """
1537
+ health_url = f'{self.__base_url}health'
1538
+ response = UtilFuncs._http_request(health_url, HTTPRequest.GET, headers=self.__get_header())
1539
+ return pd.DataFrame([self._process_vs_response("health", response)])
1540
+
1541
+ def status(self):
1542
+ """
1543
+ DESCRIPTION:
1544
+ Check the status of the below operations:
1545
+ * initialize
1546
+ * create
1547
+ * destroy
1548
+ * update
1549
+
1550
+ PARAMETERS:
1551
+ None
1552
+
1553
+ RETURNS:
1554
+ None
1555
+
1556
+ RAISES:
1557
+ None
1558
+
1559
+ EXAMPLES:
1560
+ # Create an instance of the VectorStore class.
1561
+ vs = VectorStore(name="vs")
1562
+ # Example: Check the status of initialize and create operations.
1563
+
1564
+ # Initialize VectorStore.
1565
+ vs.initialize(object_name="amazon_reviews_25",
1566
+ description="vector store testing",
1567
+ database_name='oaf',
1568
+ key_columns=['rev_id', 'aid'],
1569
+ data_columns=['rev_text'],
1570
+ vector_columns='VectorIndex',
1571
+ embeddings_model="amazon.titan-embed-text-v1"
1572
+ )
1573
+ # Check status.
1574
+ vs.status()
1575
+
1576
+ # Create VectorStore.
1577
+ vs.create()
1578
+
1579
+ # Check status.
1580
+ vs.status()
1581
+ """
1582
+ status_url = f'{self.__vectorstore_url}status'
1583
+ response = UtilFuncs._http_request(status_url, HTTPRequest.GET,
1584
+ headers=self.__get_header(),
1585
+ cookies={'session_id': self.__session_id})
1586
+ return pd.DataFrame([self._process_vs_response("status", response)])