teradataml 20.0.0.1__py3-none-any.whl → 20.0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/LICENSE.pdf +0 -0
- teradataml/README.md +306 -0
- teradataml/__init__.py +10 -3
- teradataml/_version.py +1 -1
- teradataml/analytics/__init__.py +3 -2
- teradataml/analytics/analytic_function_executor.py +299 -16
- teradataml/analytics/analytic_query_generator.py +92 -0
- teradataml/analytics/byom/__init__.py +3 -2
- teradataml/analytics/json_parser/metadata.py +13 -3
- teradataml/analytics/json_parser/utils.py +13 -6
- teradataml/analytics/meta_class.py +40 -1
- teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
- teradataml/analytics/sqle/__init__.py +11 -2
- teradataml/analytics/table_operator/__init__.py +4 -3
- teradataml/analytics/uaf/__init__.py +21 -2
- teradataml/analytics/utils.py +66 -1
- teradataml/analytics/valib.py +1 -1
- teradataml/automl/__init__.py +1502 -323
- teradataml/automl/custom_json_utils.py +139 -61
- teradataml/automl/data_preparation.py +247 -307
- teradataml/automl/data_transformation.py +32 -12
- teradataml/automl/feature_engineering.py +325 -86
- teradataml/automl/model_evaluation.py +44 -35
- teradataml/automl/model_training.py +122 -153
- teradataml/catalog/byom.py +8 -8
- teradataml/clients/pkce_client.py +1 -1
- teradataml/common/__init__.py +2 -1
- teradataml/common/constants.py +72 -0
- teradataml/common/deprecations.py +13 -7
- teradataml/common/garbagecollector.py +152 -120
- teradataml/common/messagecodes.py +11 -2
- teradataml/common/messages.py +4 -1
- teradataml/common/sqlbundle.py +26 -4
- teradataml/common/utils.py +225 -14
- teradataml/common/wrapper_utils.py +1 -1
- teradataml/context/context.py +82 -2
- teradataml/data/SQL_Fundamentals.pdf +0 -0
- teradataml/data/complaints_test_tokenized.csv +353 -0
- teradataml/data/complaints_tokens_model.csv +348 -0
- teradataml/data/covid_confirm_sd.csv +83 -0
- teradataml/data/dataframe_example.json +27 -1
- teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
- teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
- teradataml/data/docs/sqle/docs_17_20/Shap.py +203 -0
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
- teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
- teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
- teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
- teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
- teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
- teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
- teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
- teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
- teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
- teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
- teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
- teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
- teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
- teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
- teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
- teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
- teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
- teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
- teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
- teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
- teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
- teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
- teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
- teradataml/data/dwt2d_dataTable.csv +65 -0
- teradataml/data/dwt_dataTable.csv +8 -0
- teradataml/data/dwt_filterTable.csv +3 -0
- teradataml/data/finance_data4.csv +13 -0
- teradataml/data/grocery_transaction.csv +19 -0
- teradataml/data/idwt2d_dataTable.csv +5 -0
- teradataml/data/idwt_dataTable.csv +8 -0
- teradataml/data/idwt_filterTable.csv +3 -0
- teradataml/data/interval_data.csv +5 -0
- teradataml/data/jsons/paired_functions.json +14 -0
- teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
- teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
- teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
- teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
- teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
- teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
- teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
- teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
- teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
- teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
- teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
- teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
- teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
- teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
- teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
- teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
- teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
- teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
- teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
- teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
- teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
- teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
- teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
- teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
- teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
- teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
- teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
- teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
- teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
- teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
- teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
- teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
- teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
- teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +210 -0
- teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
- teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
- teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
- teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +410 -0
- teradataml/data/load_example_data.py +8 -2
- teradataml/data/medical_readings.csv +101 -0
- teradataml/data/naivebayestextclassifier_example.json +1 -1
- teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
- teradataml/data/patient_profile.csv +101 -0
- teradataml/data/peppers.png +0 -0
- teradataml/data/real_values.csv +14 -0
- teradataml/data/sax_example.json +8 -0
- teradataml/data/scripts/deploy_script.py +1 -1
- teradataml/data/scripts/lightgbm/dataset.template +157 -0
- teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
- teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
- teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
- teradataml/data/scripts/sklearn/sklearn_fit.py +194 -160
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
- teradataml/data/scripts/sklearn/sklearn_function.template +34 -16
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
- teradataml/data/scripts/sklearn/sklearn_neighbors.py +1 -1
- teradataml/data/scripts/sklearn/sklearn_score.py +12 -3
- teradataml/data/scripts/sklearn/sklearn_transform.py +162 -24
- teradataml/data/star_pivot.csv +8 -0
- teradataml/data/target_udt_data.csv +8 -0
- teradataml/data/templates/open_source_ml.json +3 -1
- teradataml/data/teradataml_example.json +20 -1
- teradataml/data/timestamp_data.csv +4 -0
- teradataml/data/titanic_dataset_unpivoted.csv +19 -0
- teradataml/data/uaf_example.json +55 -1
- teradataml/data/unpivot_example.json +15 -0
- teradataml/data/url_data.csv +9 -0
- teradataml/data/vectordistance_example.json +4 -0
- teradataml/data/windowdfft.csv +16 -0
- teradataml/dataframe/copy_to.py +1 -1
- teradataml/dataframe/data_transfer.py +5 -3
- teradataml/dataframe/dataframe.py +1002 -201
- teradataml/dataframe/fastload.py +3 -3
- teradataml/dataframe/functions.py +867 -0
- teradataml/dataframe/row.py +160 -0
- teradataml/dataframe/setop.py +2 -2
- teradataml/dataframe/sql.py +840 -33
- teradataml/dataframe/window.py +1 -1
- teradataml/dbutils/dbutils.py +878 -34
- teradataml/dbutils/filemgr.py +48 -1
- teradataml/geospatial/geodataframe.py +1 -1
- teradataml/geospatial/geodataframecolumn.py +1 -1
- teradataml/hyperparameter_tuner/optimizer.py +13 -13
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/__init__.py +1 -1
- teradataml/opensource/{sklearn/_class.py → _class.py} +102 -17
- teradataml/opensource/_lightgbm.py +950 -0
- teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +1 -2
- teradataml/opensource/{sklearn/constants.py → constants.py} +13 -10
- teradataml/opensource/sklearn/__init__.py +0 -1
- teradataml/opensource/sklearn/_sklearn_wrapper.py +1019 -574
- teradataml/options/__init__.py +9 -23
- teradataml/options/configure.py +42 -4
- teradataml/options/display.py +2 -2
- teradataml/plot/axis.py +4 -4
- teradataml/scriptmgmt/UserEnv.py +13 -9
- teradataml/scriptmgmt/lls_utils.py +77 -23
- teradataml/store/__init__.py +13 -0
- teradataml/store/feature_store/__init__.py +0 -0
- teradataml/store/feature_store/constants.py +291 -0
- teradataml/store/feature_store/feature_store.py +2223 -0
- teradataml/store/feature_store/models.py +1505 -0
- teradataml/store/vector_store/__init__.py +1586 -0
- teradataml/table_operators/Script.py +2 -2
- teradataml/table_operators/TableOperator.py +106 -20
- teradataml/table_operators/query_generator.py +3 -0
- teradataml/table_operators/table_operator_query_generator.py +3 -1
- teradataml/table_operators/table_operator_util.py +102 -56
- teradataml/table_operators/templates/dataframe_register.template +69 -0
- teradataml/table_operators/templates/dataframe_udf.template +63 -0
- teradataml/telemetry_utils/__init__.py +0 -0
- teradataml/telemetry_utils/queryband.py +52 -0
- teradataml/utils/dtypes.py +4 -2
- teradataml/utils/validators.py +34 -2
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/METADATA +311 -3
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/RECORD +240 -157
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,1586 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Unpublished work.
|
|
3
|
+
Copyright (c) 2024 by Teradata Corporation. All rights reserved.
|
|
4
|
+
TERADATA CORPORATION CONFIDENTIAL AND TRADE SECRET
|
|
5
|
+
|
|
6
|
+
Primary Owner: aanchal.kavedia@teradata.com
|
|
7
|
+
Secondary Owner: adithya.avvaru@teradata.com
|
|
8
|
+
|
|
9
|
+
This file implements VectorStore class along with its method.
|
|
10
|
+
"""
|
|
11
|
+
import base64
|
|
12
|
+
import json, os, pandas as pd
|
|
13
|
+
from json.decoder import JSONDecodeError
|
|
14
|
+
from teradataml.common.constants import HTTPRequest, Action as action_enum, Permission as permission_enum
|
|
15
|
+
from teradataml.common.exceptions import TeradataMlException
|
|
16
|
+
from teradataml.common.messages import Messages
|
|
17
|
+
from teradataml.common.messagecodes import MessageCodes
|
|
18
|
+
from teradataml.common.utils import UtilFuncs
|
|
19
|
+
from teradataml.context.context import _get_user
|
|
20
|
+
from teradataml import configure
|
|
21
|
+
from teradataml.utils.validators import _Validators
|
|
22
|
+
from teradataml.utils.dtypes import _ListOf
|
|
23
|
+
from teradataml.scriptmgmt.UserEnv import _get_auth_token
|
|
24
|
+
|
|
25
|
+
class _SimilaritySearch:
|
|
26
|
+
"""
|
|
27
|
+
Internal class to create a similarity search object which is needed
|
|
28
|
+
to display the results in a tabular format and at the same time store
|
|
29
|
+
the json object which is used in prepare response.
|
|
30
|
+
"""
|
|
31
|
+
def __init__(self, response):
|
|
32
|
+
self.similar_objects_count = response['similar_objects_count']
|
|
33
|
+
self._json_obj = response['similar_objects_list']
|
|
34
|
+
self.similar_objects = pd.DataFrame(self._json_obj)
|
|
35
|
+
|
|
36
|
+
def __repr__(self):
|
|
37
|
+
return f"similar_objects_count:{self.similar_objects_count}\nsimilar_objects:\n{self.similar_objects})"
|
|
38
|
+
|
|
39
|
+
class VectorStore:
|
|
40
|
+
def __init__(self,
|
|
41
|
+
name=None,
|
|
42
|
+
enable_logging=False,
|
|
43
|
+
**kwargs):
|
|
44
|
+
"""
|
|
45
|
+
DESCRIPTION:
|
|
46
|
+
VectorStore contains a vectorized version of data.
|
|
47
|
+
The vectorization typically is a result of embeddings generated by
|
|
48
|
+
an AI LLM.
|
|
49
|
+
There are two types of vector stores based on the use cases:
|
|
50
|
+
* Content-based vector store: A vector store built on the
|
|
51
|
+
contents of table/view/teradataml DataFrame.
|
|
52
|
+
The table can be formed from the contents of file / pdf.
|
|
53
|
+
Questions can be asked against the contents of the table and
|
|
54
|
+
top matches of relevant rows are returned based on search.
|
|
55
|
+
This can be followed by a textual response generated using
|
|
56
|
+
an LLM by manipulating the top matches.
|
|
57
|
+
|
|
58
|
+
* Metadata-based vector store: A vector store built on the
|
|
59
|
+
metadata of a set of tables. Questions can be asked
|
|
60
|
+
against a table or set of tables and top table
|
|
61
|
+
matches are returned.
|
|
62
|
+
|
|
63
|
+
PARAMETERS:
|
|
64
|
+
name:
|
|
65
|
+
Required Argument.
|
|
66
|
+
Specifies the name of the vector store either to connect to an
|
|
67
|
+
existing vector store or to create a new vector store.
|
|
68
|
+
Types: str
|
|
69
|
+
|
|
70
|
+
enable_logging:
|
|
71
|
+
Optional Argument.
|
|
72
|
+
Specifies whether logging should be enabled for vector store
|
|
73
|
+
methods.
|
|
74
|
+
Default Value: False
|
|
75
|
+
Types: bool
|
|
76
|
+
|
|
77
|
+
RETURNS:
|
|
78
|
+
None
|
|
79
|
+
|
|
80
|
+
RAISES:
|
|
81
|
+
TeradataMlException
|
|
82
|
+
|
|
83
|
+
EXAMPLES:
|
|
84
|
+
vs = VectorStore(name="vs", enable_logging=True)
|
|
85
|
+
"""
|
|
86
|
+
# Initialize variables.
|
|
87
|
+
self.name = name
|
|
88
|
+
self._enable_logging = enable_logging
|
|
89
|
+
|
|
90
|
+
# Validating name and enable_logging.
|
|
91
|
+
arg_info_matrix = []
|
|
92
|
+
arg_info_matrix.append(["name", self.name, False, (str), True])
|
|
93
|
+
arg_info_matrix.append(["enable_logging", self._enable_logging, True, (bool)])
|
|
94
|
+
|
|
95
|
+
_Validators._validate_missing_required_arguments(arg_info_matrix)
|
|
96
|
+
# Validate argument types.
|
|
97
|
+
_Validators._validate_function_arguments(arg_info_matrix)
|
|
98
|
+
|
|
99
|
+
# Check if vector_store_base_url is set or not.
|
|
100
|
+
if configure._vector_store_base_url is None:
|
|
101
|
+
error_msg = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
|
|
102
|
+
'VectorStore()', 'Auth token is not set.'
|
|
103
|
+
' Set it by calling set_auth_token().')
|
|
104
|
+
raise RuntimeError(error_msg)
|
|
105
|
+
|
|
106
|
+
self.__base_url = f"{configure._vector_store_base_url}/api/v1/"
|
|
107
|
+
self.__session_url = f"{self.__base_url}session/"
|
|
108
|
+
self.__vectorstore_url = f"{self.__base_url}vectorstore/"
|
|
109
|
+
|
|
110
|
+
# Call connect in case of CCP enabled tenant.
|
|
111
|
+
# If non-ccp, connect should be explicitly called passing the required params.
|
|
112
|
+
self._connect()
|
|
113
|
+
|
|
114
|
+
def __set_vs_index_and_vs_parameters(self, **kwargs):
|
|
115
|
+
"""
|
|
116
|
+
Internal function to set the parameters for the vector store.
|
|
117
|
+
Keeping it common, as it will be required by update and initialize
|
|
118
|
+
methods.
|
|
119
|
+
"""
|
|
120
|
+
## Initializing vs_index params
|
|
121
|
+
self._database_name = kwargs.get('database_name', None)
|
|
122
|
+
self._object_name = kwargs.get('object_name', None)
|
|
123
|
+
self._key_columns = kwargs.get('key_columns', None)
|
|
124
|
+
self._data_columns = kwargs.get('data_columns', None)
|
|
125
|
+
self._vector_columns = kwargs.get('vector_columns', None)
|
|
126
|
+
self._chunk_size = kwargs.get("chunk_size", None)
|
|
127
|
+
self._optimized_chunking = kwargs.get('optimized_chunking', None)
|
|
128
|
+
self._header_height = kwargs.get('header_height', None)
|
|
129
|
+
self._footer_height = kwargs.get('footer_height', None)
|
|
130
|
+
|
|
131
|
+
## Initializing vs_parameters
|
|
132
|
+
self._description = kwargs.get("description", None)
|
|
133
|
+
self._embeddings_model = kwargs.get("embeddings_model", None)
|
|
134
|
+
self._embeddings_dims = kwargs.get("embeddings_dims", None)
|
|
135
|
+
self._initial_delay_ms = kwargs.get("initial_delay_ms", None)
|
|
136
|
+
self._delay_max_retries = kwargs.get("delay_max_retries", None)
|
|
137
|
+
self._delay_exp_base = kwargs.get("delay_exp_base", None)
|
|
138
|
+
self._delay_jitter = kwargs.get("delay_jitter", None)
|
|
139
|
+
self._metric = kwargs.get("metric", None)
|
|
140
|
+
self._search_algorithm = kwargs.get("search_algorithm", None)
|
|
141
|
+
self._top_k = kwargs.get("top_k", None)
|
|
142
|
+
self._search_threshold = kwargs.get("search_threshold", None)
|
|
143
|
+
self._initial_centroids_method = kwargs.get("initial_centroids_method", None)
|
|
144
|
+
self._train_numcluster = kwargs.get("train_numcluster", None)
|
|
145
|
+
self._max_iternum = kwargs.get("max_iternum", None)
|
|
146
|
+
self._stop_threshold = kwargs.get("stop_threshold", None)
|
|
147
|
+
self._seed = kwargs.get("seed", None)
|
|
148
|
+
self._num_init = kwargs.get("num_init", None)
|
|
149
|
+
self._search_numcluster = kwargs.get("search_numcluster", None)
|
|
150
|
+
self._prompt = kwargs.get("prompt", None)
|
|
151
|
+
self._document_files = kwargs.get("document_files", None)
|
|
152
|
+
self._chat_completion_model = kwargs.get("chat_completion_model", None)
|
|
153
|
+
|
|
154
|
+
# Validating vs_index
|
|
155
|
+
arg_info_matrix = []
|
|
156
|
+
arg_info_matrix.append(["database_name", self._database_name, True, (str), True])
|
|
157
|
+
arg_info_matrix.append(["object_name", self._object_name, True, (str), True])
|
|
158
|
+
arg_info_matrix.append(["key_columns", self._key_columns, True, (str, list), True])
|
|
159
|
+
arg_info_matrix.append(["data_columns", self._data_columns, True, (str, list), True])
|
|
160
|
+
arg_info_matrix.append(["vector_columns", self._vector_columns, True, (str), True])
|
|
161
|
+
arg_info_matrix.append(["chunk_size", self._chunk_size, True, (int), True])
|
|
162
|
+
arg_info_matrix.append(["optimized_chunking", self._optimized_chunking, True, (bool), True])
|
|
163
|
+
arg_info_matrix.append(["header_height", self._header_height, True, (int), True])
|
|
164
|
+
arg_info_matrix.append(["footer_height", self._footer_height, True, (int), True])
|
|
165
|
+
|
|
166
|
+
# Validating vs_parameters
|
|
167
|
+
arg_info_matrix.append(["description", self._description, True, (str), True])
|
|
168
|
+
arg_info_matrix.append(["embeddings_model", self._embeddings_model, True, (str), True])
|
|
169
|
+
arg_info_matrix.append(["embeddings_dims", self._embeddings_dims, True, (int), True])
|
|
170
|
+
arg_info_matrix.append(["initial_delay_ms", self._initial_delay_ms, True, (int), True])
|
|
171
|
+
arg_info_matrix.append(["delay_max_retries", self._delay_max_retries, True, (int), True])
|
|
172
|
+
arg_info_matrix.append(["delay_exp_base", self._delay_exp_base, True, (int), True])
|
|
173
|
+
arg_info_matrix.append(["delay_jitter", self._delay_jitter, True, (bool), True])
|
|
174
|
+
arg_info_matrix.append(["metric", self._metric, True, (str), True])
|
|
175
|
+
arg_info_matrix.append(["search_algorithm", self._search_algorithm, True, (str), True])
|
|
176
|
+
arg_info_matrix.append(["top_k", self._top_k, True, (int), True])
|
|
177
|
+
arg_info_matrix.append(["initial_centroids_method", self._initial_centroids_method, True, (str),
|
|
178
|
+
True])
|
|
179
|
+
arg_info_matrix.append(["train_numcluster", self._train_numcluster, True, (int), True])
|
|
180
|
+
arg_info_matrix.append(["max_iternum", self._max_iternum, True, (int), True])
|
|
181
|
+
arg_info_matrix.append(["stop_threshold", self._stop_threshold, True, (float), True])
|
|
182
|
+
arg_info_matrix.append(["seed", self._seed, True, (int), True])
|
|
183
|
+
arg_info_matrix.append(["num_init", self._num_init, True, (int), True])
|
|
184
|
+
arg_info_matrix.append(["search_threshold", self._search_threshold, True, (float), True])
|
|
185
|
+
arg_info_matrix.append(["search_numcluster", self._search_numcluster, True, (int), True])
|
|
186
|
+
arg_info_matrix.append(["prompt", self._prompt, True, (str), True])
|
|
187
|
+
arg_info_matrix.append(["chat_completion_model", self._chat_completion_model, True, (str),
|
|
188
|
+
True])
|
|
189
|
+
arg_info_matrix.append(["document_files", self._document_files, True, (str, list),
|
|
190
|
+
True])
|
|
191
|
+
|
|
192
|
+
# Validate argument types.
|
|
193
|
+
_Validators._validate_function_arguments(arg_info_matrix)
|
|
194
|
+
|
|
195
|
+
## Input document files structure is: [fully_qualified_file_name1,
|
|
196
|
+
# fully_qualified_file_name2]
|
|
197
|
+
# Forming document files structure as the API accepts:
|
|
198
|
+
# document_files = [('document_files', ('file1.pdf',
|
|
199
|
+
# open('/location/file1.pdf', 'rb'),
|
|
200
|
+
# 'application/pdf')),
|
|
201
|
+
# ('document_files', ('file2.pdf',
|
|
202
|
+
# open('/location/file2.pdf', 'rb'),
|
|
203
|
+
# 'application/pdf'))
|
|
204
|
+
# ]
|
|
205
|
+
|
|
206
|
+
if self._document_files:
|
|
207
|
+
document_files = self._document_files
|
|
208
|
+
self._document_files = []
|
|
209
|
+
|
|
210
|
+
for file in document_files:
|
|
211
|
+
# Get the file name from fully qualified path
|
|
212
|
+
file_name = os.path.basename(file)
|
|
213
|
+
# Form the string 'application/pdf' based on the file extension.
|
|
214
|
+
file_type = f"application/{os.path.splitext(file_name)[1]}".replace(".", "")
|
|
215
|
+
self._document_files.append(('document_files', (file_name,
|
|
216
|
+
open(file, 'rb'),
|
|
217
|
+
file_type)))
|
|
218
|
+
|
|
219
|
+
vs_parameters = {"description": self._description,
|
|
220
|
+
"embeddings_model": self._embeddings_model,
|
|
221
|
+
"embeddings_dims": self._embeddings_dims,
|
|
222
|
+
"initial_delay_ms": self._initial_delay_ms,
|
|
223
|
+
"delay_max_retries": self._delay_max_retries,
|
|
224
|
+
"delay_exp_base": self._delay_exp_base,
|
|
225
|
+
"delay_jitter": self._delay_jitter,
|
|
226
|
+
"metric": self._metric,
|
|
227
|
+
"search_algorithm": self._search_algorithm,
|
|
228
|
+
"top_k": self._top_k,
|
|
229
|
+
"initial_centroids_method": self._initial_centroids_method,
|
|
230
|
+
"train_numcluster": self._train_numcluster,
|
|
231
|
+
"max_iternum": self._max_iternum,
|
|
232
|
+
"stop_threshold": self._stop_threshold,
|
|
233
|
+
"seed": self._seed,
|
|
234
|
+
"num_init": self._num_init,
|
|
235
|
+
"search_threshold": self._search_threshold,
|
|
236
|
+
"search_numcluster": self._search_numcluster,
|
|
237
|
+
"prompt": self._prompt,
|
|
238
|
+
"chat_completion_model": self._chat_completion_model}
|
|
239
|
+
# Only add keys with non-None values
|
|
240
|
+
self.__vs_parameters = {k: v for k, v in vs_parameters.items() if v is not None}
|
|
241
|
+
|
|
242
|
+
vs_index = {
|
|
243
|
+
'database_name': self._database_name,
|
|
244
|
+
'object_name': self._object_name,
|
|
245
|
+
'key_columns': self._key_columns,
|
|
246
|
+
'data_columns': self._data_columns,
|
|
247
|
+
'vector_column': self._vector_columns,
|
|
248
|
+
'chunk_size': self._chunk_size,
|
|
249
|
+
'optimized_chunking': self._optimized_chunking,
|
|
250
|
+
'header_height': self._header_height,
|
|
251
|
+
'footer_height': self._footer_height
|
|
252
|
+
}
|
|
253
|
+
# Only add keys with non-None values
|
|
254
|
+
self.__vs_index = {k: v for k, v in vs_index.items() if v is not None}
|
|
255
|
+
|
|
256
|
+
def __get_header(self):
|
|
257
|
+
"""
|
|
258
|
+
DESCRIPTION:
|
|
259
|
+
Function to get the latest token if it is expired for all
|
|
260
|
+
the function calls.
|
|
261
|
+
|
|
262
|
+
RETURNS:
|
|
263
|
+
Latest header with auth_token in case of CCP enabled tenant
|
|
264
|
+
or the header formed by passing user_id and password.
|
|
265
|
+
|
|
266
|
+
"""
|
|
267
|
+
if self.__ccp_enabled:
|
|
268
|
+
return _get_auth_token()
|
|
269
|
+
else:
|
|
270
|
+
return self.__headers
|
|
271
|
+
|
|
272
|
+
def _connect(self,
|
|
273
|
+
**kwargs):
|
|
274
|
+
"""
|
|
275
|
+
DESCRIPTION:
|
|
276
|
+
Function to connect to vector store in Teradata Vantage.
|
|
277
|
+
|
|
278
|
+
PARAMETERS:
|
|
279
|
+
host:
|
|
280
|
+
Optional Argument.
|
|
281
|
+
Specifies the fully qualified domain name or IP address of the
|
|
282
|
+
Teradata System to connect to.
|
|
283
|
+
Types: str
|
|
284
|
+
|
|
285
|
+
username:
|
|
286
|
+
Optional Argument.
|
|
287
|
+
Specifies the username for connecting to/create a vector
|
|
288
|
+
store in Teradata Vantage.
|
|
289
|
+
Types: str
|
|
290
|
+
|
|
291
|
+
password:
|
|
292
|
+
Optional Argument.
|
|
293
|
+
Specifies the password required for the username.
|
|
294
|
+
Types: str
|
|
295
|
+
|
|
296
|
+
database:
|
|
297
|
+
Optional Argument.
|
|
298
|
+
Specifies the initial database to use after logon,
|
|
299
|
+
instead of the user's default database.
|
|
300
|
+
Types: str
|
|
301
|
+
|
|
302
|
+
RETURNS:
|
|
303
|
+
None
|
|
304
|
+
|
|
305
|
+
RAISES:
|
|
306
|
+
TeradataMlException
|
|
307
|
+
|
|
308
|
+
EXAMPLES:
|
|
309
|
+
from teradataml import VectorStore
|
|
310
|
+
# Example 1: Connect to the database using host, database,
|
|
311
|
+
# username and password.
|
|
312
|
+
# Create an instance of the VectorStore class.
|
|
313
|
+
vs = VectorStore(name="vec1")
|
|
314
|
+
|
|
315
|
+
# Call the connect method to connect to the database.
|
|
316
|
+
vs._connect(host='<host>', username='<user>', password='<password>', database='<database>')
|
|
317
|
+
"""
|
|
318
|
+
## Initialize connection parameters.
|
|
319
|
+
self.__host = kwargs.get("host", None)
|
|
320
|
+
self.__user = kwargs.get("username", None)
|
|
321
|
+
self.__password = kwargs.get("password", None)
|
|
322
|
+
self.__database = kwargs.get("database", _get_user())
|
|
323
|
+
# Default is always True
|
|
324
|
+
self.__ssl_verify = kwargs.get("ssl_verify", True)
|
|
325
|
+
|
|
326
|
+
# get the auth_token
|
|
327
|
+
self.__headers = _get_auth_token()
|
|
328
|
+
self.__ccp_enabled = True if self.__headers else False
|
|
329
|
+
|
|
330
|
+
# Validations
|
|
331
|
+
arg_info_matrix = []
|
|
332
|
+
arg_info_matrix.append(["host", self.__host, True, (str), True])
|
|
333
|
+
arg_info_matrix.append(["username", self.__user, True, (str), True])
|
|
334
|
+
arg_info_matrix.append(["password", self.__password, True, (str), True])
|
|
335
|
+
arg_info_matrix.append(["database", self.__database, True, (str), True])
|
|
336
|
+
arg_info_matrix.append(["ssl_verify", self.__ssl_verify, True, (bool)])
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
# Validate argument types.
|
|
340
|
+
_Validators._validate_function_arguments(arg_info_matrix)
|
|
341
|
+
|
|
342
|
+
# Triggering the 'connect' API
|
|
343
|
+
data = {
|
|
344
|
+
'database_name': self.__database,
|
|
345
|
+
'hostname': self.__host
|
|
346
|
+
}
|
|
347
|
+
data = {k: v for k, v in data.items() if v is not None}
|
|
348
|
+
|
|
349
|
+
# Form the header with username and password if it not ccp enabled tenant.
|
|
350
|
+
if not self.__ccp_enabled and self.__user:
|
|
351
|
+
credentials = f"{self.__user}:{self.__password}"
|
|
352
|
+
# Encode the credentials string using Base64
|
|
353
|
+
encoded_credentials = base64.b64encode(
|
|
354
|
+
credentials.encode('utf-8')).decode('utf-8')
|
|
355
|
+
# Form the Authorization header value
|
|
356
|
+
self.__headers = {"Authorization": f"Basic {encoded_credentials}"}
|
|
357
|
+
self.__ssl_verify = False
|
|
358
|
+
# Call the connect API only when the tenant is ccp enabled or header is
|
|
359
|
+
# formed using user and password.
|
|
360
|
+
|
|
361
|
+
# This check is needed to that when the tenant is non-ccp enabled,
|
|
362
|
+
# the call from __init__ does not fail
|
|
363
|
+
if self.__ccp_enabled or self.__headers:
|
|
364
|
+
connect_url = f"{self.__session_url}connect"
|
|
365
|
+
|
|
366
|
+
http_params = {
|
|
367
|
+
"url": connect_url,
|
|
368
|
+
"method_type": HTTPRequest.POST,
|
|
369
|
+
"headers": self.__get_header(),
|
|
370
|
+
"json": data
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
if self.__ssl_verify is not None:
|
|
374
|
+
http_params["verify"] = self.__ssl_verify
|
|
375
|
+
|
|
376
|
+
response = UtilFuncs._http_request(**http_params)
|
|
377
|
+
|
|
378
|
+
self.__session_id = response.cookies.get("session_id")
|
|
379
|
+
self._process_vs_response(api_name="connect", response=response)
|
|
380
|
+
|
|
381
|
+
def initialize(self, **kwargs):
|
|
382
|
+
"""
|
|
383
|
+
DESCRIPTION:
|
|
384
|
+
Initialize a new vector store if it does not exist, otherwise
|
|
385
|
+
provide an instance of an existing vector store.
|
|
386
|
+
This call enables / configures a particular vector store to be used.
|
|
387
|
+
|
|
388
|
+
PARAMETERS:
|
|
389
|
+
description:
|
|
390
|
+
Optional Argument.
|
|
391
|
+
Specifies the description of the vector store.
|
|
392
|
+
Types: str
|
|
393
|
+
|
|
394
|
+
database_name:
|
|
395
|
+
Optional Argument.
|
|
396
|
+
Specifies the database name of the table or view to be indexed
|
|
397
|
+
for vector store.
|
|
398
|
+
When "document_files" is passed, it refers to the database where
|
|
399
|
+
the file content splits are stored.
|
|
400
|
+
Note:
|
|
401
|
+
The vector store is also created in this database.
|
|
402
|
+
Types: str
|
|
403
|
+
|
|
404
|
+
object_name:
|
|
405
|
+
Optional Argument.
|
|
406
|
+
Specifies the table name/teradataml DataFrame to be indexed for
|
|
407
|
+
vector store.
|
|
408
|
+
Note:
|
|
409
|
+
* Only one table name/teradataml DataFrame can be specified.
|
|
410
|
+
* For data residing in multiple tables, view should be
|
|
411
|
+
created and view name/teradataml DataFrame over the view should be specified
|
|
412
|
+
here.
|
|
413
|
+
Types: str, DataFrame
|
|
414
|
+
|
|
415
|
+
key_columns:
|
|
416
|
+
Optional Argument.
|
|
417
|
+
Specifies the names of the key columns to be used for indexing.
|
|
418
|
+
Types: str, list of str
|
|
419
|
+
|
|
420
|
+
data_columns:
|
|
421
|
+
Optional Argument.
|
|
422
|
+
Specifies the names of the data columns to be used for indexing.
|
|
423
|
+
Types: str, list of str
|
|
424
|
+
|
|
425
|
+
vector_columns:
|
|
426
|
+
Optional Argument.
|
|
427
|
+
Specifies the names of the columns to be used for storing
|
|
428
|
+
the embeddings.
|
|
429
|
+
Default Value: vector_index
|
|
430
|
+
Types: str, list of str
|
|
431
|
+
|
|
432
|
+
chunk_size:
|
|
433
|
+
Optional Argument.
|
|
434
|
+
Specifies the size of each chunk when dividing document files
|
|
435
|
+
into chunks.
|
|
436
|
+
Default Value: 512
|
|
437
|
+
Types: int
|
|
438
|
+
|
|
439
|
+
optimized_chunking:
|
|
440
|
+
Optional Argument.
|
|
441
|
+
Whether an optimized splitting mechanism supplied by Teradata
|
|
442
|
+
should be used. Applicable only for "document_files".
|
|
443
|
+
The documents are parsed internally in an intelligent fashion
|
|
444
|
+
based on file structure and chunks are dynamically created
|
|
445
|
+
based on section layout.
|
|
446
|
+
Note:
|
|
447
|
+
The "chunk_size" field is not applicable when
|
|
448
|
+
"optimized_chunking" is set to True.
|
|
449
|
+
Default Value: True
|
|
450
|
+
Types: bool
|
|
451
|
+
|
|
452
|
+
header_height:
|
|
453
|
+
Optional Argument.
|
|
454
|
+
Specifies the height (in points) of the header section of a PDF
|
|
455
|
+
document to be trimmed before processing the main content.
|
|
456
|
+
This is useful for removing unwanted header information
|
|
457
|
+
from each page of the PDF.
|
|
458
|
+
Recommended value is 55.
|
|
459
|
+
Default Value: 0
|
|
460
|
+
Types: int
|
|
461
|
+
|
|
462
|
+
footer_height:
|
|
463
|
+
Optional Argument.
|
|
464
|
+
Specifies the height (in points) of the footer section of a PDF
|
|
465
|
+
document to be trimmed before processing the main content.
|
|
466
|
+
This is useful for removing unwanted footer information from
|
|
467
|
+
each page of the PDF.
|
|
468
|
+
Recommended value is 55.
|
|
469
|
+
Default Value: 0
|
|
470
|
+
Types: int
|
|
471
|
+
|
|
472
|
+
embeddings_model:
|
|
473
|
+
Optional Argument.
|
|
474
|
+
Specifies the embeddings model to be used for generating the
|
|
475
|
+
embeddings.
|
|
476
|
+
Permitted Values:
|
|
477
|
+
* amazon.titan-embed-text-v1
|
|
478
|
+
* amazon.titan-embed-image-v1
|
|
479
|
+
* amazon.titan-embed-text-v2:0
|
|
480
|
+
* text-embedding-ada-002
|
|
481
|
+
* text-embedding-3-small
|
|
482
|
+
* text-embedding-3-large
|
|
483
|
+
Types: str
|
|
484
|
+
|
|
485
|
+
embeddings_dims:
|
|
486
|
+
Optional Argument.
|
|
487
|
+
Specifies the number of dimensions to be used for generating the embeddings.
|
|
488
|
+
The value depends on the "embeddings_model".
|
|
489
|
+
Permitted Values:
|
|
490
|
+
* amazon.titan-embed-text-v1: 1536 only
|
|
491
|
+
* amazon.titan-embed-image-v1: [256, 384, 1024]
|
|
492
|
+
* amazon.titan-embed-text-v2:0: [256, 512, 1024]
|
|
493
|
+
* text-embedding-ada-002: 1536 only
|
|
494
|
+
* text-embedding-3-small: 1 <= dims <= 1536
|
|
495
|
+
* text-embedding-3-large: 1 <= dims <= 3072
|
|
496
|
+
Default Value:
|
|
497
|
+
* amazon.titan-embed-text-v1: 1536
|
|
498
|
+
* amazon.titan-embed-image-v1: 1024
|
|
499
|
+
* amazon.titan-embed-text-v2:0: 1024
|
|
500
|
+
* text-embedding-ada-002: 1536
|
|
501
|
+
* text-embedding-3-small: 1536
|
|
502
|
+
* text-embedding-3-large: 3072
|
|
503
|
+
Types: int
|
|
504
|
+
|
|
505
|
+
initial_delay_ms:
|
|
506
|
+
Optional Argument.
|
|
507
|
+
Specifies the millisecond delay after each input table
|
|
508
|
+
row is sent for embeddings.
|
|
509
|
+
Default Value: 5000
|
|
510
|
+
Types: int
|
|
511
|
+
|
|
512
|
+
delay_max_retries:
|
|
513
|
+
Optional Argument.
|
|
514
|
+
Specifies the maximum number of attempts after a failed
|
|
515
|
+
input table row embedding request.
|
|
516
|
+
Default Value: 12
|
|
517
|
+
Types: int
|
|
518
|
+
|
|
519
|
+
delay_exp_base:
|
|
520
|
+
Optional Argument.
|
|
521
|
+
Specifies the exponential base of delay time increase.
|
|
522
|
+
Default Value: 1
|
|
523
|
+
Types: int
|
|
524
|
+
|
|
525
|
+
delay_jitter:
|
|
526
|
+
Optional Argument.
|
|
527
|
+
Specifies whether to use random sum term in exponent.
|
|
528
|
+
Default Value: False
|
|
529
|
+
Types: bool
|
|
530
|
+
|
|
531
|
+
metric:
|
|
532
|
+
Optional Argument.
|
|
533
|
+
Specifies the metric to be used for calculating the distance
|
|
534
|
+
between the vectors.
|
|
535
|
+
Permitted Values:
|
|
536
|
+
* EUCLIDEAN
|
|
537
|
+
* COSINE
|
|
538
|
+
* MANHATTAN
|
|
539
|
+
* DOTPRODUCT
|
|
540
|
+
* MINKOWSKI
|
|
541
|
+
Default Value: EUCLIDEAN
|
|
542
|
+
Types: str
|
|
543
|
+
|
|
544
|
+
search_algorithm:
|
|
545
|
+
Optional Argument.
|
|
546
|
+
Specifies the algorithm to be used for searching the
|
|
547
|
+
tables and views relevant to the question.
|
|
548
|
+
Permitted Values: VECTORDISTANCE, KMEANS.
|
|
549
|
+
Default Value: VECTORDISTANCE
|
|
550
|
+
Types: str
|
|
551
|
+
|
|
552
|
+
initial_centroids_method:
|
|
553
|
+
Optional Argument.
|
|
554
|
+
Specifies the algorithm to be used for initializing the
|
|
555
|
+
centroids when Search Algorithm is KMEANS.
|
|
556
|
+
Permitted Values: RANDOM, KMEANS++
|
|
557
|
+
Default Value: RANDOM
|
|
558
|
+
Types: str
|
|
559
|
+
|
|
560
|
+
train_numcluster:
|
|
561
|
+
Optional Argument.
|
|
562
|
+
Specifies the Number of clusters to be trained when
|
|
563
|
+
"search_algorithm" is KMEANS.
|
|
564
|
+
Default Value: 3
|
|
565
|
+
Types: int
|
|
566
|
+
|
|
567
|
+
max_iternum:
|
|
568
|
+
Optional Argument.
|
|
569
|
+
Specifies the maximum number of iterations to be run during
|
|
570
|
+
training when "search_algorithm" is KMEANS.
|
|
571
|
+
Default Value: 10
|
|
572
|
+
Types: int
|
|
573
|
+
|
|
574
|
+
stop_threshold:
|
|
575
|
+
Optional Argument.
|
|
576
|
+
Specifies the threshold value at which training should be
|
|
577
|
+
stopped when "search_algorithm" is KMEANS.
|
|
578
|
+
Default Value: 0.0395
|
|
579
|
+
Types: int
|
|
580
|
+
|
|
581
|
+
seed:
|
|
582
|
+
Optional Argument.
|
|
583
|
+
Specifies the seed value to be used for random number
|
|
584
|
+
generation when "search_algorithm" is KMEANS.
|
|
585
|
+
Default Value: 0
|
|
586
|
+
Types: int
|
|
587
|
+
|
|
588
|
+
num_init:
|
|
589
|
+
Optional Argument.
|
|
590
|
+
Specifies the number of times the k-means algorithm should
|
|
591
|
+
run with different initial centroid seeds.
|
|
592
|
+
Default Value: 1
|
|
593
|
+
Types: int
|
|
594
|
+
|
|
595
|
+
top_k:
|
|
596
|
+
Optional Argument.
|
|
597
|
+
Specifies the number of top clusters to be considered while searching.
|
|
598
|
+
Value should be between 1-100(both inclusive).
|
|
599
|
+
Default Value: 10
|
|
600
|
+
Types: int
|
|
601
|
+
|
|
602
|
+
search_threshold:
|
|
603
|
+
Optional Argument.
|
|
604
|
+
Specifies the threshold value to consider for matching tables
|
|
605
|
+
while searching.
|
|
606
|
+
Types: float
|
|
607
|
+
|
|
608
|
+
search_numcluster:
|
|
609
|
+
Optional Argument.
|
|
610
|
+
Specifies the number of clusters to be considered while
|
|
611
|
+
searching when "search_algorithm" is KMEANS.
|
|
612
|
+
Default Value: 3
|
|
613
|
+
Types: int
|
|
614
|
+
|
|
615
|
+
prompt:
|
|
616
|
+
Optional Argument.
|
|
617
|
+
Specifies the prompt to be used by language model
|
|
618
|
+
to generate responses using top matches.
|
|
619
|
+
Types: str
|
|
620
|
+
|
|
621
|
+
chat_completion_model:
|
|
622
|
+
Optional Argument.
|
|
623
|
+
Specifies the name of the chat completion model to be used for
|
|
624
|
+
generating text responses.
|
|
625
|
+
Permitted Values:
|
|
626
|
+
* anthropic.claude-3-haiku-20240307-v1:0
|
|
627
|
+
* anthropic.claude-3-opus-20240229-v1:0
|
|
628
|
+
* anthropic.claude-3-sonnet-20240229-v1:0
|
|
629
|
+
* anthropic.claude-3-5-sonnet-20240620-v1:0
|
|
630
|
+
Default Value: anthropic.claude-3-haiku-20240307-v1:0
|
|
631
|
+
Types: str
|
|
632
|
+
|
|
633
|
+
document_files:
|
|
634
|
+
Optional Argument.
|
|
635
|
+
Specifies the input dataset in document files format.
|
|
636
|
+
It can be used to specify input documents in file format.
|
|
637
|
+
The files are processed internally, converted to chunks and stored
|
|
638
|
+
into a database table.
|
|
639
|
+
Alternatively, users can choose to chunk their files themselves,
|
|
640
|
+
store them into a database table, create a table and specify
|
|
641
|
+
the details of that using "database_name", "object_name",
|
|
642
|
+
"data_columns" where the file content splits are stored.
|
|
643
|
+
Note:
|
|
644
|
+
* Only PDF format is currently supported.
|
|
645
|
+
* Multiple document files can be supplied.
|
|
646
|
+
* Fully qualified file name should be specified.
|
|
647
|
+
Examples:
|
|
648
|
+
document_files=['file1.pdf','file2.pdf']
|
|
649
|
+
Types: str, list
|
|
650
|
+
|
|
651
|
+
RETURNS:
|
|
652
|
+
None
|
|
653
|
+
|
|
654
|
+
RAISES:
|
|
655
|
+
TeradataMlException
|
|
656
|
+
|
|
657
|
+
EXAMPLES:
|
|
658
|
+
from teradataml import VectorStore
|
|
659
|
+
|
|
660
|
+
# Create an instance of the VectorStore class.
|
|
661
|
+
vs = VectorStore(name="vec1")
|
|
662
|
+
|
|
663
|
+
# Example 1: The following example initializes the Vector Store using
|
|
664
|
+
# data residing in table.
|
|
665
|
+
vs.initialize(object_name="amazon_reviews_25",
|
|
666
|
+
description="vector store testing",
|
|
667
|
+
database_name='oaf',
|
|
668
|
+
key_columns=['rev_id', 'aid'],
|
|
669
|
+
data_columns=['rev_text'],
|
|
670
|
+
vector_columns='VectorIndex',
|
|
671
|
+
embeddings_model="amazon.titan-embed-text-v1"
|
|
672
|
+
)
|
|
673
|
+
|
|
674
|
+
# Example 2: The following example initializes the Vector Store using
|
|
675
|
+
# data residing in files.
|
|
676
|
+
|
|
677
|
+
# Forming the fully qualified path for SQL_Fundamentals.pdf and
|
|
678
|
+
# InDb_Analytical_Functions.pdf
|
|
679
|
+
import teradataml
|
|
680
|
+
files= [os.path.join(os.path.dirname(teradataml.__file__), "data",
|
|
681
|
+
"SQL_Fundamentals.pdf"),
|
|
682
|
+
os.path.join(os.path.dirname(teradataml.__file__), "data",
|
|
683
|
+
"InDb_Analytical_Functions.pdf")]
|
|
684
|
+
|
|
685
|
+
vs.initialize(object_name="amazon_reviews_25",
|
|
686
|
+
description="vector store testing",
|
|
687
|
+
database_name='oaf',
|
|
688
|
+
key_columns=['rev_id', 'aid'],
|
|
689
|
+
data_columns=['rev_text'],
|
|
690
|
+
vector_columns='VectorIndex',
|
|
691
|
+
embeddings_model="amazon.titan-embed-text-v1"
|
|
692
|
+
document_files=files
|
|
693
|
+
)
|
|
694
|
+
|
|
695
|
+
# Example 3: Initialize an existing Vector Store.
|
|
696
|
+
vs.initialize()
|
|
697
|
+
|
|
698
|
+
"""
|
|
699
|
+
self.__set_vs_index_and_vs_parameters(**kwargs)
|
|
700
|
+
# As the rest call accepts 0, 1 converting it.
|
|
701
|
+
self._enable_logging = 0 if not self._enable_logging else 1
|
|
702
|
+
initialize_url = f'{self.__vectorstore_url}initialize?vs_name={self.name}&log_level={self._enable_logging}'
|
|
703
|
+
|
|
704
|
+
data = {}
|
|
705
|
+
if self.__vs_parameters or self.__vs_index:
|
|
706
|
+
data = {}
|
|
707
|
+
if self.__vs_parameters:
|
|
708
|
+
data['vs_parameters'] = json.dumps(self.__vs_parameters)
|
|
709
|
+
if self.__vs_index:
|
|
710
|
+
data['vs_index'] = json.dumps(self.__vs_index)
|
|
711
|
+
|
|
712
|
+
http_params = {
|
|
713
|
+
"url": initialize_url,
|
|
714
|
+
"method_type": HTTPRequest.POST,
|
|
715
|
+
"headers": self.__get_header(),
|
|
716
|
+
"data": data,
|
|
717
|
+
"files": self._document_files,
|
|
718
|
+
"cookies": {'session_id': self.__session_id}
|
|
719
|
+
|
|
720
|
+
}
|
|
721
|
+
response = UtilFuncs._http_request(**http_params)
|
|
722
|
+
self._process_vs_response("initialize", response)
|
|
723
|
+
|
|
724
|
+
def create(self):
|
|
725
|
+
"""
|
|
726
|
+
DESCRIPTION:
|
|
727
|
+
Creates a new vector store by generating the embeddings using the
|
|
728
|
+
configuration specified at the time of instantiation.
|
|
729
|
+
Note:
|
|
730
|
+
For "search_algorithm" KMEANS, index is also trained
|
|
731
|
+
because of which, it may take longer to set up compared
|
|
732
|
+
to vector distance.
|
|
733
|
+
|
|
734
|
+
PARAMETERS:
|
|
735
|
+
None
|
|
736
|
+
|
|
737
|
+
RETURNS:
|
|
738
|
+
None
|
|
739
|
+
|
|
740
|
+
RAISES:
|
|
741
|
+
TeradataMlException
|
|
742
|
+
|
|
743
|
+
EXAMPLES:
|
|
744
|
+
from teradataml import VectorStore
|
|
745
|
+
|
|
746
|
+
# Create an instance of the VectorStore class.
|
|
747
|
+
vs = VectorStore(name="vec1")
|
|
748
|
+
|
|
749
|
+
# Example 1: The following example creates the Vector Store using
|
|
750
|
+
# data residing in table..
|
|
751
|
+
vs.initialize(object_name="amazon_reviews_25",
|
|
752
|
+
description="vector store testing",
|
|
753
|
+
database_name='oaf',
|
|
754
|
+
key_columns=['rev_id', 'aid'],
|
|
755
|
+
data_columns=['rev_text'],
|
|
756
|
+
vector_columns='VectorIndex',
|
|
757
|
+
embeddings_model="amazon.titan-embed-text-v1"
|
|
758
|
+
)
|
|
759
|
+
# Create the Vector Store.
|
|
760
|
+
vs.create()
|
|
761
|
+
|
|
762
|
+
# Example 2: The following example creates the Vector Store using
|
|
763
|
+
# data residing in files.
|
|
764
|
+
|
|
765
|
+
# Forming the fully qualified path for SQL_Fundamentals.pdf and
|
|
766
|
+
# InDb_Analytical_Functions.pdf
|
|
767
|
+
files= [os.path.join(os.path.dirname(teradataml.__file__), "data",
|
|
768
|
+
"SQL_Fundamentals.pdf"),
|
|
769
|
+
os.path.join(os.path.dirname(teradataml.__file__), "data",
|
|
770
|
+
"InDb_Analytical_Functions.pdf")]
|
|
771
|
+
|
|
772
|
+
vs.initialize(object_name="amazon_reviews_25",
|
|
773
|
+
description="vector store testing",
|
|
774
|
+
database_name='oaf',
|
|
775
|
+
key_columns=['rev_id', 'aid'],
|
|
776
|
+
data_columns=['rev_text'],
|
|
777
|
+
vector_columns='VectorIndex',
|
|
778
|
+
embeddings_model="amazon.titan-embed-text-v1"
|
|
779
|
+
document_files=files
|
|
780
|
+
)
|
|
781
|
+
|
|
782
|
+
# Create the Vector Store.
|
|
783
|
+
vs.create()
|
|
784
|
+
"""
|
|
785
|
+
create_url = f'{self.__vectorstore_url}create'
|
|
786
|
+
response = UtilFuncs._http_request(create_url, HTTPRequest.POST,
|
|
787
|
+
cookies={'session_id': self.__session_id},
|
|
788
|
+
headers=self.__headers)
|
|
789
|
+
self._process_vs_response("create", response)
|
|
790
|
+
|
|
791
|
+
def disconnect(self):
|
|
792
|
+
"""
|
|
793
|
+
DESCRIPTION:
|
|
794
|
+
Logout from session and cleanup resources.
|
|
795
|
+
|
|
796
|
+
PARAMETERS:
|
|
797
|
+
None
|
|
798
|
+
|
|
799
|
+
RETURNS:
|
|
800
|
+
None
|
|
801
|
+
|
|
802
|
+
RAISES:
|
|
803
|
+
TeradataMlException
|
|
804
|
+
|
|
805
|
+
EXAMPLES:
|
|
806
|
+
from teradataml import VectorStore
|
|
807
|
+
# Example 1: Disconnect from the database.
|
|
808
|
+
# Create an instance of the VectorStore class.
|
|
809
|
+
vs = VectorStore(name="vec1")
|
|
810
|
+
|
|
811
|
+
# Initialize vector store.
|
|
812
|
+
vs.initialize(object_name="amazon_reviews_25",
|
|
813
|
+
description="vector store testing",
|
|
814
|
+
database_name='oaf',
|
|
815
|
+
key_columns=['rev_id', 'aid'],
|
|
816
|
+
data_columns=['rev_text'],
|
|
817
|
+
vector_columns='VectorIndex',
|
|
818
|
+
embeddings_model="amazon.titan-embed-text-v1"
|
|
819
|
+
)
|
|
820
|
+
|
|
821
|
+
# Disconnect from the database.
|
|
822
|
+
vs.diconnect()
|
|
823
|
+
"""
|
|
824
|
+
disconnect_url = f'{self.__session_url}disconnect'
|
|
825
|
+
|
|
826
|
+
response = UtilFuncs._http_request(disconnect_url, HTTPRequest.POST,
|
|
827
|
+
cookies={'session_id': self.__session_id},
|
|
828
|
+
headers=self.__headers)
|
|
829
|
+
self._process_vs_response("disconnect", response)
|
|
830
|
+
|
|
831
|
+
def destroy(self):
|
|
832
|
+
"""
|
|
833
|
+
DESCRIPTION:
|
|
834
|
+
Destroy the vector store.
|
|
835
|
+
|
|
836
|
+
PARAMETERS:
|
|
837
|
+
None
|
|
838
|
+
|
|
839
|
+
RETURNS:
|
|
840
|
+
None
|
|
841
|
+
|
|
842
|
+
RAISES:
|
|
843
|
+
TeradataMlException
|
|
844
|
+
|
|
845
|
+
EXAMPLES:
|
|
846
|
+
from teradataml import VectorStore
|
|
847
|
+
|
|
848
|
+
# Create an instance of the VectorStore class.
|
|
849
|
+
vs = VectorStore(name="vec1")
|
|
850
|
+
|
|
851
|
+
# Example 1: The following example destroys the Vector Store created
|
|
852
|
+
# by using data residing in table.
|
|
853
|
+
vs.initialize(object_name="amazon_reviews_25",
|
|
854
|
+
description="vector store testing",
|
|
855
|
+
database_name='oaf',
|
|
856
|
+
key_columns=['rev_id', 'aid'],
|
|
857
|
+
data_columns=['rev_text'],
|
|
858
|
+
vector_columns='VectorIndex',
|
|
859
|
+
embeddings_model="amazon.titan-embed-text-v1"
|
|
860
|
+
)
|
|
861
|
+
|
|
862
|
+
# Create the Vector Store.
|
|
863
|
+
vs.create()
|
|
864
|
+
|
|
865
|
+
# Destroy the Vector Store.
|
|
866
|
+
vs.destroy()
|
|
867
|
+
|
|
868
|
+
# Example 2: The following example destroys the Vector Store
|
|
869
|
+
# created by using data residing in files.
|
|
870
|
+
|
|
871
|
+
# Forming the fully qualified path for SQL_Fundamentals.pdf and
|
|
872
|
+
# InDb_Analytical_Functions.pdf
|
|
873
|
+
import teradataml
|
|
874
|
+
files = [os.path.join(os.path.dirname(teradataml.__file__), "data",
|
|
875
|
+
"SQL_Fundamentals.pdf"),
|
|
876
|
+
os.path.join(os.path.dirname(teradataml.__file__), "data",
|
|
877
|
+
"InDb_Analytical_Functions.pdf")]
|
|
878
|
+
|
|
879
|
+
vs.initialize(object_name="amazon_reviews_25",
|
|
880
|
+
description="vector store testing",
|
|
881
|
+
database_name= 'oaf',
|
|
882
|
+
key_columns=['rev_id', 'aid'],
|
|
883
|
+
data_columns=['rev_text'],
|
|
884
|
+
vector_columns='VectorIndex',
|
|
885
|
+
embeddings_model="amazon.titan-embed-text-v1"
|
|
886
|
+
document_files=files
|
|
887
|
+
)
|
|
888
|
+
|
|
889
|
+
# Create the Vector Store.
|
|
890
|
+
vs.create()
|
|
891
|
+
|
|
892
|
+
# Destroy the Vector Store.
|
|
893
|
+
vs.destroy()
|
|
894
|
+
"""
|
|
895
|
+
destroy_url = f'{self.__vectorstore_url}destroy'
|
|
896
|
+
response = UtilFuncs._http_request(destroy_url, HTTPRequest.POST,
|
|
897
|
+
headers=self.__get_header(),
|
|
898
|
+
cookies={'session_id': self.__session_id})
|
|
899
|
+
self._process_vs_response("destroy", response)
|
|
900
|
+
|
|
901
|
+
def update(self, **kwargs):
|
|
902
|
+
"""
|
|
903
|
+
DESCRIPTION:
|
|
904
|
+
Update an existing vector store with the specified parameters.
|
|
905
|
+
Note:
|
|
906
|
+
Currently, there is no support for updating a vector store when
|
|
907
|
+
input contents are updated or new data files become available.
|
|
908
|
+
In such cases, user will need to create a new vector store.
|
|
909
|
+
|
|
910
|
+
PARAMETERS:
|
|
911
|
+
embeddings_model:
|
|
912
|
+
Optional Argument.
|
|
913
|
+
Specifies the embeddings model to be used for generating the
|
|
914
|
+
embeddings.
|
|
915
|
+
Permitted Values: text-embedding-ada-002, text-embedding-3-small,
|
|
916
|
+
text-embedding-3-large.
|
|
917
|
+
Types: str
|
|
918
|
+
|
|
919
|
+
embeddings_dims:
|
|
920
|
+
Optional Argument.
|
|
921
|
+
Specifies the number of dimensions to be used for generating the embeddings.
|
|
922
|
+
The value depends on the "embeddings_model".
|
|
923
|
+
Permitted Values:
|
|
924
|
+
* amazon.titan-embed-text-v1: 1536 only
|
|
925
|
+
* amazon.titan-embed-image-v1: [256, 384, 1024]
|
|
926
|
+
* amazon.titan-embed-text-v2:0: [256, 512, 1024]
|
|
927
|
+
* text-embedding-ada-002: 1536 only
|
|
928
|
+
* text-embedding-3-small: 1 <= dims <= 1536
|
|
929
|
+
* text-embedding-3-large: 1 <= dims <= 3072
|
|
930
|
+
Types: int
|
|
931
|
+
|
|
932
|
+
initial_delay_ms:
|
|
933
|
+
Optional Argument.
|
|
934
|
+
Specifies the millisecond delay after each input table
|
|
935
|
+
row is sent for embeddings.
|
|
936
|
+
Types: int
|
|
937
|
+
|
|
938
|
+
delay_max_retries:
|
|
939
|
+
Optional Argument.
|
|
940
|
+
Specifies the maximum number of attempts after a failed
|
|
941
|
+
input table row embedding request.
|
|
942
|
+
Types: int
|
|
943
|
+
|
|
944
|
+
delay_exp_base:
|
|
945
|
+
Optional Argument.
|
|
946
|
+
Specifies the exponential base of delay time increase.
|
|
947
|
+
Types: int
|
|
948
|
+
|
|
949
|
+
delay_jitter:
|
|
950
|
+
Optional Argument.
|
|
951
|
+
Specifies the random sum term in exponent.
|
|
952
|
+
Types: bool
|
|
953
|
+
|
|
954
|
+
metric:
|
|
955
|
+
Optional Argument.
|
|
956
|
+
Specifies the metric to be used for calculating the distance
|
|
957
|
+
between the vectors.
|
|
958
|
+
Permitted Values:
|
|
959
|
+
* EUCLIDEAN
|
|
960
|
+
* COSINE
|
|
961
|
+
* MANHATTAN
|
|
962
|
+
* DOTPRODUCT
|
|
963
|
+
* MINKOWSKI
|
|
964
|
+
Types: str
|
|
965
|
+
|
|
966
|
+
search_algorithm:
|
|
967
|
+
Optional Argument.
|
|
968
|
+
Specifies the algorithm to be used for searching the tables and
|
|
969
|
+
views relevant to the question.
|
|
970
|
+
Permitted Values: VECTORDISTANCE, KMEANS.
|
|
971
|
+
Types: str
|
|
972
|
+
|
|
973
|
+
initial_centroids_method:
|
|
974
|
+
Optional Argument.
|
|
975
|
+
Specifies the Algorithm to be used for initializing the
|
|
976
|
+
centroids when Search Algorithm is KMEANS.
|
|
977
|
+
Allowed values are RANDOM and KMEANS++
|
|
978
|
+
Permitted Values: RANDOM, KMEANS++
|
|
979
|
+
Types: str
|
|
980
|
+
|
|
981
|
+
train_numcluster:
|
|
982
|
+
Optional Argument.
|
|
983
|
+
Specifies the Number of clusters to be trained when
|
|
984
|
+
"search_algorithm" is KMEANS.
|
|
985
|
+
Types: int
|
|
986
|
+
|
|
987
|
+
max_iternum:
|
|
988
|
+
Optional Argument.
|
|
989
|
+
Specifies the maximum number of iterations to be run during
|
|
990
|
+
training when "search_algorithm" is KMEANS.
|
|
991
|
+
Types: int
|
|
992
|
+
|
|
993
|
+
stop_threshold:
|
|
994
|
+
Optional Argument.
|
|
995
|
+
Specifies the threshold value at which training should be
|
|
996
|
+
stopped when "search_algorithm" is KMEANS.
|
|
997
|
+
Types: int
|
|
998
|
+
|
|
999
|
+
seed:
|
|
1000
|
+
Optional Argument.
|
|
1001
|
+
Specifies the seed value to be used for random number
|
|
1002
|
+
generation when "search_algorithm" is KMEANS.
|
|
1003
|
+
Types: int
|
|
1004
|
+
|
|
1005
|
+
num_init:
|
|
1006
|
+
Optional Argument.
|
|
1007
|
+
Specifies the number of times the k-means algorithm will
|
|
1008
|
+
be run with different initial centroid seeds.
|
|
1009
|
+
Types: int
|
|
1010
|
+
|
|
1011
|
+
top_k:
|
|
1012
|
+
Optional Argument.
|
|
1013
|
+
Specifies the number of top clusters to be considered while searching.
|
|
1014
|
+
Types: int
|
|
1015
|
+
|
|
1016
|
+
search_threshold:
|
|
1017
|
+
Optional Argument.
|
|
1018
|
+
Specifies the threshold value to consider matching tables/views
|
|
1019
|
+
while searching.
|
|
1020
|
+
Types: float
|
|
1021
|
+
|
|
1022
|
+
search_numcluster:
|
|
1023
|
+
Optional Argument.
|
|
1024
|
+
Specifies the number of clusters to be considered while
|
|
1025
|
+
searching when "search_algorithm" is KMEANS.
|
|
1026
|
+
Types: int
|
|
1027
|
+
|
|
1028
|
+
prompt:
|
|
1029
|
+
Optional Argument.
|
|
1030
|
+
Specifies the prompt to be used for generating answers.
|
|
1031
|
+
Types: str
|
|
1032
|
+
|
|
1033
|
+
document_files:
|
|
1034
|
+
Optional Argument.
|
|
1035
|
+
Specifies the list of PDF files to be divided into chunks and
|
|
1036
|
+
used for document embedding.
|
|
1037
|
+
Types: tuple, list of tuple
|
|
1038
|
+
|
|
1039
|
+
RETURNS:
|
|
1040
|
+
None
|
|
1041
|
+
|
|
1042
|
+
RAISES:
|
|
1043
|
+
TeradataMlException
|
|
1044
|
+
|
|
1045
|
+
EXAMPLES:
|
|
1046
|
+
from teradataml import VectorStore
|
|
1047
|
+
|
|
1048
|
+
# Create an instance of the VectorStore class.
|
|
1049
|
+
vs = VectorStore(name="vec1")
|
|
1050
|
+
|
|
1051
|
+
# Initialize the Vector Store.
|
|
1052
|
+
vs.initialize(object_name="amazon_reviews_25",
|
|
1053
|
+
description="vector store testing",
|
|
1054
|
+
database_name='oaf',
|
|
1055
|
+
key_columns=['rev_id', 'aid'],
|
|
1056
|
+
data_columns=['rev_text'],
|
|
1057
|
+
vector_columns='VectorIndex',
|
|
1058
|
+
embeddings_model="amazon.titan-embed-text-v1"
|
|
1059
|
+
)
|
|
1060
|
+
# Create the Vector Store.
|
|
1061
|
+
vs.create()
|
|
1062
|
+
|
|
1063
|
+
# Example 1: Update the search_algorithm, search_threshold and
|
|
1064
|
+
# description of the Vector Store.
|
|
1065
|
+
vs.update(search_algorithm='KMEANS',
|
|
1066
|
+
search_threshold=0.6,
|
|
1067
|
+
description='KMeans clustering method')
|
|
1068
|
+
"""
|
|
1069
|
+
self.__set_vs_index_and_vs_parameters(**kwargs)
|
|
1070
|
+
update_url = f'{self.__vectorstore_url}update'
|
|
1071
|
+
response = UtilFuncs._http_request(update_url, HTTPRequest.POST, json=self.__vs_parameters,
|
|
1072
|
+
files=self._document_files,
|
|
1073
|
+
headers=self.__get_header(),
|
|
1074
|
+
cookies={'session_id': self.__session_id})
|
|
1075
|
+
self._process_vs_response("update", response)
|
|
1076
|
+
|
|
1077
|
+
def authenticate(self, user_name=None, action=None, permission=None, **kwargs):
|
|
1078
|
+
"""
|
|
1079
|
+
DESCRIPTION:
|
|
1080
|
+
Grant or revoke read/write permissions for a user on the vector store.
|
|
1081
|
+
Every database user is permitted to create vector store according to
|
|
1082
|
+
existing access grants on corresponding data.
|
|
1083
|
+
That user becomes the owner/admin of the vector store
|
|
1084
|
+
and is the only one with read/write access to the vector store.
|
|
1085
|
+
Note:
|
|
1086
|
+
* The methods create(), authenticate(), update(), and destroy() need write
|
|
1087
|
+
permissions to the vector store.
|
|
1088
|
+
* vector store initialization and the methods similarity_search(),
|
|
1089
|
+
prepare_response() and ask() need read permissions to the
|
|
1090
|
+
vector store.
|
|
1091
|
+
|
|
1092
|
+
PARAMETERS:
|
|
1093
|
+
user_name:
|
|
1094
|
+
Required Argument.
|
|
1095
|
+
Specifies a database user to authenticate.
|
|
1096
|
+
Types: str
|
|
1097
|
+
|
|
1098
|
+
action:
|
|
1099
|
+
Optional Argument.
|
|
1100
|
+
Specifies the type of grant to be provided.
|
|
1101
|
+
Permitted Values: GRANT, REVOKE
|
|
1102
|
+
Default Value: GRANT
|
|
1103
|
+
Types: Enum
|
|
1104
|
+
|
|
1105
|
+
permission:
|
|
1106
|
+
Optional Argument.
|
|
1107
|
+
Specifies the type of permission to be provided.
|
|
1108
|
+
Permitted Values: READ, WRITE
|
|
1109
|
+
Default Value: READ
|
|
1110
|
+
Types: Enum
|
|
1111
|
+
|
|
1112
|
+
RETURNS:
|
|
1113
|
+
None
|
|
1114
|
+
|
|
1115
|
+
RAISES:
|
|
1116
|
+
TeradataMlException
|
|
1117
|
+
|
|
1118
|
+
EXAMPLES:
|
|
1119
|
+
from teradataml import VectorStore, Action, Permission
|
|
1120
|
+
|
|
1121
|
+
# Create an instance of the VectorStore class.
|
|
1122
|
+
vs = VectorStore(name="vec1")
|
|
1123
|
+
|
|
1124
|
+
vs.initialize(object_name="amazon_reviews_25",
|
|
1125
|
+
description="vector store testing",
|
|
1126
|
+
database_name='oaf',
|
|
1127
|
+
key_columns=['rev_id', 'aid'],
|
|
1128
|
+
data_columns=['rev_text'],
|
|
1129
|
+
vector_columns='VectorIndex',
|
|
1130
|
+
embeddings_model="amazon.titan-embed-text-v1"
|
|
1131
|
+
)
|
|
1132
|
+
# Create the Vector Store.
|
|
1133
|
+
vs.create()
|
|
1134
|
+
|
|
1135
|
+
# Example 1: GRANT READ access for user 'test_user1' to Vector Store 'vec1'.
|
|
1136
|
+
vs.authenticate(user_name='test_user1', action=Action.GRANT, permission=Permission.READ)
|
|
1137
|
+
|
|
1138
|
+
"""
|
|
1139
|
+
## Initializing vs_index params
|
|
1140
|
+
self._user_name = user_name
|
|
1141
|
+
self._action = action
|
|
1142
|
+
self._permission = permission
|
|
1143
|
+
|
|
1144
|
+
# Validating vs_index
|
|
1145
|
+
arg_info_matrix = []
|
|
1146
|
+
arg_info_matrix.append(["user_name", self._user_name, False, (str), True])
|
|
1147
|
+
arg_info_matrix.append(["action", self._action, True, (action_enum), True])
|
|
1148
|
+
arg_info_matrix.append(["permission", self._permission, True, (permission_enum), True])
|
|
1149
|
+
|
|
1150
|
+
_Validators._validate_missing_required_arguments(arg_info_matrix)
|
|
1151
|
+
|
|
1152
|
+
# Explicitly checking action and permission enum types, as correct message is not displayed.
|
|
1153
|
+
if self._action is not None and not isinstance(self._action, action_enum):
|
|
1154
|
+
raise TypeError(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE,
|
|
1155
|
+
"action", "Action Enum. Use 'Action' enum."
|
|
1156
|
+
" Check example"))
|
|
1157
|
+
|
|
1158
|
+
if self._permission is not None and not isinstance(self._permission, permission_enum):
|
|
1159
|
+
raise TypeError(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE,
|
|
1160
|
+
"permission", "Permission Enum. Use "
|
|
1161
|
+
"'Permission' enum."
|
|
1162
|
+
" Check example"))
|
|
1163
|
+
|
|
1164
|
+
# Validate argument types.
|
|
1165
|
+
_Validators._validate_function_arguments(arg_info_matrix)
|
|
1166
|
+
|
|
1167
|
+
authenticate_url = f"{self.__vectorstore_url}authenticate?user_name={self._user_name}" \
|
|
1168
|
+
f"&action={self._action.value}&permission={self._permission.value}"
|
|
1169
|
+
response = UtilFuncs._http_request(authenticate_url, HTTPRequest.POST,
|
|
1170
|
+
headers=self.__get_header(),
|
|
1171
|
+
cookies={'session_id': self.__session_id})
|
|
1172
|
+
self._process_vs_response(api_name="authenticate", response=response)
|
|
1173
|
+
|
|
1174
|
+
def similarity_search(self, question=None):
|
|
1175
|
+
"""
|
|
1176
|
+
DESCRIPTION:
|
|
1177
|
+
Perform similarity search in the Vector Store for the input question.
|
|
1178
|
+
The algorithm specified in "search_algorithm" is used to perform
|
|
1179
|
+
the search against the vector store.
|
|
1180
|
+
The result contains "top_k" rows along with similarity score
|
|
1181
|
+
found by the "search_algorithm".
|
|
1182
|
+
|
|
1183
|
+
PARAMETERS:
|
|
1184
|
+
question:
|
|
1185
|
+
Required Argument.
|
|
1186
|
+
Specifies a string of text for which similarity search
|
|
1187
|
+
needs to be performed.
|
|
1188
|
+
Types: str
|
|
1189
|
+
|
|
1190
|
+
RETURNS:
|
|
1191
|
+
list
|
|
1192
|
+
|
|
1193
|
+
RAISES:
|
|
1194
|
+
TeradataMlException
|
|
1195
|
+
|
|
1196
|
+
EXAMPLES:
|
|
1197
|
+
from teradataml import VectorStore
|
|
1198
|
+
|
|
1199
|
+
# Create an instance of the VectorStore class.
|
|
1200
|
+
vs = VectorStore(name="vs")
|
|
1201
|
+
|
|
1202
|
+
# Initialize the Vector Store.
|
|
1203
|
+
vs.initialize(object_name="amazon_reviews_25",
|
|
1204
|
+
description="vector store testing",
|
|
1205
|
+
database_name='oaf',
|
|
1206
|
+
key_columns=['rev_id', 'aid'],
|
|
1207
|
+
data_columns=['rev_text'],
|
|
1208
|
+
vector_columns='VectorIndex',
|
|
1209
|
+
embeddings_model="amazon.titan-embed-text-v1",
|
|
1210
|
+
search_algorithm='VECTORDISTANCE',
|
|
1211
|
+
top_k=10
|
|
1212
|
+
)
|
|
1213
|
+
|
|
1214
|
+
# Create the Vector Store.
|
|
1215
|
+
vs.create()
|
|
1216
|
+
|
|
1217
|
+
# Example: Perform similarity search in the Vector Store for
|
|
1218
|
+
# the input question.
|
|
1219
|
+
|
|
1220
|
+
question = 'Are there any reviews about books?'
|
|
1221
|
+
response = vs.similarity_search(question=question)
|
|
1222
|
+
print(response)
|
|
1223
|
+
|
|
1224
|
+
"""
|
|
1225
|
+
# Initializing params
|
|
1226
|
+
self._question = question
|
|
1227
|
+
|
|
1228
|
+
# Validating params
|
|
1229
|
+
arg_info_matrix = []
|
|
1230
|
+
arg_info_matrix.append(["question", self._question, False, (str), True])
|
|
1231
|
+
_Validators._validate_missing_required_arguments(arg_info_matrix)
|
|
1232
|
+
|
|
1233
|
+
# Validate argument types.
|
|
1234
|
+
_Validators._validate_function_arguments(arg_info_matrix)
|
|
1235
|
+
|
|
1236
|
+
similarity_search_url = f'{self.__vectorstore_url}similarity-search?question={question}'
|
|
1237
|
+
response = UtilFuncs._http_request(similarity_search_url, HTTPRequest.POST,
|
|
1238
|
+
headers=self.__get_header(),
|
|
1239
|
+
cookies={'session_id': self.__session_id})
|
|
1240
|
+
return _SimilaritySearch(self._process_vs_response(api_name="similarity-search",
|
|
1241
|
+
response=response))
|
|
1242
|
+
|
|
1243
|
+
def prepare_response(self, question=None, similarity_results=None, prompt=None):
|
|
1244
|
+
"""
|
|
1245
|
+
DESCRIPTION:
|
|
1246
|
+
Prepare a natural language response to the user using the input
|
|
1247
|
+
question and similarity_results provided by similarity_search.
|
|
1248
|
+
The response is generated by a language model configured
|
|
1249
|
+
in the environment using a pre-configured prompt.
|
|
1250
|
+
An optional parameter prompt can be used to specify a customized
|
|
1251
|
+
prompt that replaces the internal prompt.
|
|
1252
|
+
|
|
1253
|
+
PARAMETERS:
|
|
1254
|
+
question:
|
|
1255
|
+
Required Argument.
|
|
1256
|
+
Specifies a string of text for which similarity search
|
|
1257
|
+
needs to be performed.
|
|
1258
|
+
Types: str
|
|
1259
|
+
|
|
1260
|
+
similarity_results:
|
|
1261
|
+
Required Argument.
|
|
1262
|
+
Specifies the similarity results obtained by similarity_search().
|
|
1263
|
+
Types: str
|
|
1264
|
+
|
|
1265
|
+
prompt:
|
|
1266
|
+
Optional Argument.
|
|
1267
|
+
Specifies a customized prompt that replaces the internal prompt.
|
|
1268
|
+
Types: str
|
|
1269
|
+
|
|
1270
|
+
RETURNS:
|
|
1271
|
+
HTTP Response json.
|
|
1272
|
+
|
|
1273
|
+
RAISES:
|
|
1274
|
+
None
|
|
1275
|
+
|
|
1276
|
+
EXAMPLES:
|
|
1277
|
+
# Create an instance of the VectorStore class.
|
|
1278
|
+
vs = VectorStore(name="vs")
|
|
1279
|
+
|
|
1280
|
+
# Initialize the Vector Store.
|
|
1281
|
+
vs.initialize(object_name="amazon_reviews_25",
|
|
1282
|
+
description="vector store testing",
|
|
1283
|
+
database_name='oaf',
|
|
1284
|
+
key_columns=['rev_id', 'aid'],
|
|
1285
|
+
data_columns=['rev_text'],
|
|
1286
|
+
vector_columns='VectorIndex',
|
|
1287
|
+
embeddings_model="amazon.titan-embed-text-v1",
|
|
1288
|
+
search_algorithm='VECTORDISTANCE',
|
|
1289
|
+
top_k = 10
|
|
1290
|
+
)
|
|
1291
|
+
# Create the Vector Store.
|
|
1292
|
+
vs.create()
|
|
1293
|
+
|
|
1294
|
+
# Perform similarity search in the Vector Store for
|
|
1295
|
+
# the input question.
|
|
1296
|
+
question = 'Are there any reviews about books?'
|
|
1297
|
+
response = vs.similarity_search(question=question)
|
|
1298
|
+
|
|
1299
|
+
# Example 1: Prepare a natural language response to the user
|
|
1300
|
+
# using the input question and similarity_results
|
|
1301
|
+
# provided by similarity_search().
|
|
1302
|
+
|
|
1303
|
+
question='Did any one feel the book is thin?'
|
|
1304
|
+
similar_objects_list = response['similar_objects_list']
|
|
1305
|
+
vs.prepare_response(question=question,
|
|
1306
|
+
similarity_results=similar_objects_list)
|
|
1307
|
+
"""
|
|
1308
|
+
# Initializing params
|
|
1309
|
+
self._question = question
|
|
1310
|
+
self._similarity_results = similarity_results
|
|
1311
|
+
self._prompt = prompt
|
|
1312
|
+
|
|
1313
|
+
# Validating params
|
|
1314
|
+
arg_info_matrix = []
|
|
1315
|
+
arg_info_matrix.append(["question", self._question, False, (str), True])
|
|
1316
|
+
arg_info_matrix.append(["similarity_results", self._similarity_results, False, _SimilaritySearch, True])
|
|
1317
|
+
arg_info_matrix.append(["prompt", self._prompt, True, (str), True])
|
|
1318
|
+
_Validators._validate_missing_required_arguments(arg_info_matrix)
|
|
1319
|
+
|
|
1320
|
+
# Explicitly checking similarity search API, as correct message is not displayed.
|
|
1321
|
+
if not isinstance(similarity_results, _SimilaritySearch):
|
|
1322
|
+
raise TypeError(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE,
|
|
1323
|
+
"similarity_results", "output of similarity_search()"))
|
|
1324
|
+
# Validate argument types.
|
|
1325
|
+
_Validators._validate_function_arguments(arg_info_matrix)
|
|
1326
|
+
|
|
1327
|
+
data = {
|
|
1328
|
+
'question': self._question,
|
|
1329
|
+
'similar_objects_list': self._similarity_results._json_obj,
|
|
1330
|
+
'prompt': self._prompt,
|
|
1331
|
+
}
|
|
1332
|
+
prepare_response_url = f'{self.__vectorstore_url}prepare-response'
|
|
1333
|
+
response = UtilFuncs._http_request(prepare_response_url, HTTPRequest.POST,
|
|
1334
|
+
headers=self.__get_header(),
|
|
1335
|
+
cookies={'session_id': self.__session_id},
|
|
1336
|
+
json=data)
|
|
1337
|
+
return self._process_vs_response(api_name="prepare_response", response=response)
|
|
1338
|
+
|
|
1339
|
+
def ask(self, question=None, prompt=None):
|
|
1340
|
+
"""
|
|
1341
|
+
DESCRIPTION:
|
|
1342
|
+
Perform similarity search in the vector store for
|
|
1343
|
+
the input question followed by preparing a natural
|
|
1344
|
+
language response to the user. This method combines
|
|
1345
|
+
the operation of similarity_search() and prepare_response()
|
|
1346
|
+
into one call for faster response time.
|
|
1347
|
+
|
|
1348
|
+
PARAMETERS:
|
|
1349
|
+
question:
|
|
1350
|
+
Required Argument.
|
|
1351
|
+
Specifies a string of text for which similarity search
|
|
1352
|
+
needs to be performed.
|
|
1353
|
+
Types: str
|
|
1354
|
+
|
|
1355
|
+
prompt:
|
|
1356
|
+
Optional Argument.
|
|
1357
|
+
Specifies a customized prompt that replaces the internal prompt.
|
|
1358
|
+
Types: str
|
|
1359
|
+
|
|
1360
|
+
RETURNS:
|
|
1361
|
+
dict
|
|
1362
|
+
|
|
1363
|
+
RAISES:
|
|
1364
|
+
None
|
|
1365
|
+
|
|
1366
|
+
EXAMPLES:
|
|
1367
|
+
# Create an instance of the VectorStore class.
|
|
1368
|
+
vs = VectorStore(name="vs")
|
|
1369
|
+
|
|
1370
|
+
# Initialize the Vector Store.
|
|
1371
|
+
vs.initialize(object_name="amazon_reviews_25",
|
|
1372
|
+
description="vector store testing",
|
|
1373
|
+
database_name='oaf',
|
|
1374
|
+
key_columns=['rev_id', 'aid'],
|
|
1375
|
+
data_columns=['rev_text'],
|
|
1376
|
+
vector_columns='VectorIndex',
|
|
1377
|
+
embeddings_model="amazon.titan-embed-text-v1",
|
|
1378
|
+
search_algorithm='VECTORDISTANCE',
|
|
1379
|
+
top_k=10
|
|
1380
|
+
)
|
|
1381
|
+
# Create the Vector Store.
|
|
1382
|
+
vs.create()
|
|
1383
|
+
|
|
1384
|
+
custom_prompt = '''List good reviews about the books. Do not assume information.
|
|
1385
|
+
Only provide information that is present in the data.
|
|
1386
|
+
Format results like this:
|
|
1387
|
+
Review ID:
|
|
1388
|
+
Author ID:
|
|
1389
|
+
Review:
|
|
1390
|
+
'''
|
|
1391
|
+
# Example 1: Perform similarity search in the Vector Store for
|
|
1392
|
+
# the input question followed by preparing a natural
|
|
1393
|
+
# language response to the user.
|
|
1394
|
+
|
|
1395
|
+
question = 'Are there any reviews saying that the books are inspiring?'
|
|
1396
|
+
response = vs.ask(question=question, prompt=custom_prompt)
|
|
1397
|
+
print(response)
|
|
1398
|
+
|
|
1399
|
+
"""
|
|
1400
|
+
# Initializing params
|
|
1401
|
+
self._question = question
|
|
1402
|
+
self._prompt = prompt
|
|
1403
|
+
|
|
1404
|
+
# Validating params
|
|
1405
|
+
arg_info_matrix = []
|
|
1406
|
+
arg_info_matrix.append(["question", self._question, False, (str), True])
|
|
1407
|
+
arg_info_matrix.append(["prompt", self._prompt, True, (str), True])
|
|
1408
|
+
_Validators._validate_missing_required_arguments(arg_info_matrix)
|
|
1409
|
+
|
|
1410
|
+
# Validate argument types.
|
|
1411
|
+
_Validators._validate_function_arguments(arg_info_matrix)
|
|
1412
|
+
|
|
1413
|
+
data = {
|
|
1414
|
+
'question': self._question,
|
|
1415
|
+
'prompt': self._prompt,
|
|
1416
|
+
}
|
|
1417
|
+
ask_url = f'{self.__vectorstore_url}ask'
|
|
1418
|
+
response = UtilFuncs._http_request(ask_url, HTTPRequest.POST,
|
|
1419
|
+
headers=self.__get_header(),
|
|
1420
|
+
cookies={'session_id': self.__session_id},
|
|
1421
|
+
json=data)
|
|
1422
|
+
return self._process_vs_response(api_name="ask", response=response)
|
|
1423
|
+
|
|
1424
|
+
def _process_vs_response(self, api_name, response, success_status_code=None):
|
|
1425
|
+
"""
|
|
1426
|
+
DESCRIPTION:
|
|
1427
|
+
Function to process and validate the UES Response.
|
|
1428
|
+
|
|
1429
|
+
PARAMETERS:
|
|
1430
|
+
api_name:
|
|
1431
|
+
Required Argument.
|
|
1432
|
+
Specifies the name of the Vector Store method.
|
|
1433
|
+
Types: str
|
|
1434
|
+
|
|
1435
|
+
response:
|
|
1436
|
+
Required Argument.
|
|
1437
|
+
Specifies the response recieved from Vector Store service.
|
|
1438
|
+
Types: requests.Response
|
|
1439
|
+
|
|
1440
|
+
success_status_code:
|
|
1441
|
+
Optional Argument.
|
|
1442
|
+
Specifies the expected success status code for the corresponding
|
|
1443
|
+
Vector Store service.
|
|
1444
|
+
Default Value: None
|
|
1445
|
+
Types: int
|
|
1446
|
+
|
|
1447
|
+
RETURNS:
|
|
1448
|
+
Response object.
|
|
1449
|
+
|
|
1450
|
+
RAISES:
|
|
1451
|
+
TeradataMlException.
|
|
1452
|
+
|
|
1453
|
+
EXAMPLES:
|
|
1454
|
+
>>> _process_vs_response("create", resp)
|
|
1455
|
+
"""
|
|
1456
|
+
try:
|
|
1457
|
+
data = response.json()
|
|
1458
|
+
# Success status code ranges between 200-300.
|
|
1459
|
+
if (success_status_code is None and 200 <= response.status_code < 300) or \
|
|
1460
|
+
(success_status_code == response.status_code):
|
|
1461
|
+
if "message" in data:
|
|
1462
|
+
print(data['message'])
|
|
1463
|
+
else:
|
|
1464
|
+
return data
|
|
1465
|
+
return
|
|
1466
|
+
|
|
1467
|
+
# teradataml API got an error response. Error response is expected as follows -
|
|
1468
|
+
# Success
|
|
1469
|
+
# Response:
|
|
1470
|
+
# {
|
|
1471
|
+
# "message": "success string"
|
|
1472
|
+
# }
|
|
1473
|
+
# Failure
|
|
1474
|
+
# Response:
|
|
1475
|
+
# {
|
|
1476
|
+
# "detail": "error message string"
|
|
1477
|
+
# }
|
|
1478
|
+
# Validation
|
|
1479
|
+
# Error:
|
|
1480
|
+
# {
|
|
1481
|
+
# "detail": [
|
|
1482
|
+
# {
|
|
1483
|
+
# "loc": [
|
|
1484
|
+
# "string",
|
|
1485
|
+
# 0
|
|
1486
|
+
# ],
|
|
1487
|
+
# "msg": "string",
|
|
1488
|
+
# "type": "string"
|
|
1489
|
+
# }
|
|
1490
|
+
# ]
|
|
1491
|
+
# }
|
|
1492
|
+
# Extract the fields and raise error accordingly.
|
|
1493
|
+
if isinstance(data['detail'], str):
|
|
1494
|
+
error_description = data['detail']
|
|
1495
|
+
else:
|
|
1496
|
+
error_description = []
|
|
1497
|
+
for dict_ele in data['detail']:
|
|
1498
|
+
error_msg = f"{dict_ele['msg']} for {dict_ele['loc'][1] if len(dict_ele['loc']) > 1 else dict_ele['loc'][0]}"
|
|
1499
|
+
error_description.append(error_msg)
|
|
1500
|
+
error_description = ",".join(error_description)
|
|
1501
|
+
|
|
1502
|
+
exception_message = "Request Failed - {}".format(error_description)
|
|
1503
|
+
|
|
1504
|
+
error_msg = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
|
|
1505
|
+
api_name,
|
|
1506
|
+
exception_message)
|
|
1507
|
+
raise TeradataMlException(error_msg, MessageCodes.FUNC_EXECUTION_FAILED)
|
|
1508
|
+
|
|
1509
|
+
# teradataml API may not get a Json API response in some cases.
|
|
1510
|
+
# So, raise an error with the response received as it is.
|
|
1511
|
+
except JSONDecodeError:
|
|
1512
|
+
error_msg = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
|
|
1513
|
+
api_name,
|
|
1514
|
+
response.text)
|
|
1515
|
+
raise TeradataMlException(error_msg, MessageCodes.FUNC_EXECUTION_FAILED)
|
|
1516
|
+
|
|
1517
|
+
def health(self):
|
|
1518
|
+
"""
|
|
1519
|
+
DESCRIPTION:
|
|
1520
|
+
Perform sanity check for the service.
|
|
1521
|
+
|
|
1522
|
+
PARAMETERS:
|
|
1523
|
+
None
|
|
1524
|
+
|
|
1525
|
+
RETURNS:
|
|
1526
|
+
None
|
|
1527
|
+
|
|
1528
|
+
RAISES:
|
|
1529
|
+
None
|
|
1530
|
+
|
|
1531
|
+
EXAMPLES:
|
|
1532
|
+
# Create an instance of the VectorStore class.
|
|
1533
|
+
vs = VectorStore(name="vs")
|
|
1534
|
+
# Example 1: Check the health of the service.
|
|
1535
|
+
vs.health()
|
|
1536
|
+
"""
|
|
1537
|
+
health_url = f'{self.__base_url}health'
|
|
1538
|
+
response = UtilFuncs._http_request(health_url, HTTPRequest.GET, headers=self.__get_header())
|
|
1539
|
+
return pd.DataFrame([self._process_vs_response("health", response)])
|
|
1540
|
+
|
|
1541
|
+
def status(self):
|
|
1542
|
+
"""
|
|
1543
|
+
DESCRIPTION:
|
|
1544
|
+
Check the status of the below operations:
|
|
1545
|
+
* initialize
|
|
1546
|
+
* create
|
|
1547
|
+
* destroy
|
|
1548
|
+
* update
|
|
1549
|
+
|
|
1550
|
+
PARAMETERS:
|
|
1551
|
+
None
|
|
1552
|
+
|
|
1553
|
+
RETURNS:
|
|
1554
|
+
None
|
|
1555
|
+
|
|
1556
|
+
RAISES:
|
|
1557
|
+
None
|
|
1558
|
+
|
|
1559
|
+
EXAMPLES:
|
|
1560
|
+
# Create an instance of the VectorStore class.
|
|
1561
|
+
vs = VectorStore(name="vs")
|
|
1562
|
+
# Example: Check the status of initialize and create operations.
|
|
1563
|
+
|
|
1564
|
+
# Initialize VectorStore.
|
|
1565
|
+
vs.initialize(object_name="amazon_reviews_25",
|
|
1566
|
+
description="vector store testing",
|
|
1567
|
+
database_name='oaf',
|
|
1568
|
+
key_columns=['rev_id', 'aid'],
|
|
1569
|
+
data_columns=['rev_text'],
|
|
1570
|
+
vector_columns='VectorIndex',
|
|
1571
|
+
embeddings_model="amazon.titan-embed-text-v1"
|
|
1572
|
+
)
|
|
1573
|
+
# Check status.
|
|
1574
|
+
vs.status()
|
|
1575
|
+
|
|
1576
|
+
# Create VectorStore.
|
|
1577
|
+
vs.create()
|
|
1578
|
+
|
|
1579
|
+
# Check status.
|
|
1580
|
+
vs.status()
|
|
1581
|
+
"""
|
|
1582
|
+
status_url = f'{self.__vectorstore_url}status'
|
|
1583
|
+
response = UtilFuncs._http_request(status_url, HTTPRequest.GET,
|
|
1584
|
+
headers=self.__get_header(),
|
|
1585
|
+
cookies={'session_id': self.__session_id})
|
|
1586
|
+
return pd.DataFrame([self._process_vs_response("status", response)])
|