unstructured-ingest 0.7.1__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/cli/README.md +28 -0
- unstructured_ingest/embed/mixedbreadai.py +0 -1
- unstructured_ingest/interfaces/upload_stager.py +2 -2
- unstructured_ingest/interfaces/uploader.py +3 -3
- unstructured_ingest/logger.py +2 -93
- unstructured_ingest/main.py +0 -0
- unstructured_ingest/pipeline/interfaces.py +1 -1
- unstructured_ingest/pipeline/pipeline.py +1 -1
- unstructured_ingest/processes/chunker.py +4 -0
- unstructured_ingest/processes/connectors/airtable.py +4 -2
- unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql +10 -0
- unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json +23 -0
- unstructured_ingest/processes/connectors/astradb.py +2 -2
- unstructured_ingest/processes/connectors/azure_ai_search.py +1 -1
- unstructured_ingest/processes/connectors/confluence.py +0 -1
- unstructured_ingest/processes/connectors/databricks/volumes_aws.py +1 -1
- unstructured_ingest/processes/connectors/databricks/volumes_azure.py +2 -2
- unstructured_ingest/processes/connectors/databricks/volumes_gcp.py +1 -1
- unstructured_ingest/processes/connectors/databricks/volumes_table.py +1 -2
- unstructured_ingest/processes/connectors/delta_table.py +1 -0
- unstructured_ingest/processes/connectors/duckdb/base.py +2 -2
- unstructured_ingest/processes/connectors/duckdb/duckdb.py +3 -3
- unstructured_ingest/processes/connectors/duckdb/motherduck.py +3 -3
- unstructured_ingest/processes/connectors/fsspec/s3.py +5 -3
- unstructured_ingest/processes/connectors/gitlab.py +1 -2
- unstructured_ingest/processes/connectors/google_drive.py +0 -2
- unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +9 -7
- unstructured_ingest/processes/connectors/kdbai.py +1 -0
- unstructured_ingest/processes/connectors/outlook.py +1 -2
- unstructured_ingest/processes/connectors/pinecone.py +0 -1
- unstructured_ingest/processes/connectors/redisdb.py +28 -24
- unstructured_ingest/processes/connectors/salesforce.py +1 -1
- unstructured_ingest/processes/connectors/slack.py +1 -2
- unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py +5 -0
- unstructured_ingest/processes/connectors/sql/postgres.py +7 -1
- unstructured_ingest/processes/connectors/sql/singlestore.py +11 -6
- unstructured_ingest/processes/connectors/sql/snowflake.py +5 -0
- unstructured_ingest/processes/connectors/sql/sql.py +3 -4
- unstructured_ingest/processes/connectors/sql/sqlite.py +5 -0
- unstructured_ingest/processes/connectors/sql/vastdb.py +7 -3
- unstructured_ingest/processes/connectors/vectara.py +0 -2
- unstructured_ingest/processes/connectors/zendesk/zendesk.py +0 -2
- unstructured_ingest/processes/embedder.py +2 -2
- unstructured_ingest/processes/filter.py +1 -1
- unstructured_ingest/processes/partitioner.py +4 -0
- unstructured_ingest/processes/utils/blob_storage.py +2 -2
- unstructured_ingest/unstructured_api.py +13 -8
- unstructured_ingest/utils/data_prep.py +8 -32
- unstructured_ingest/utils/string_and_date_utils.py +3 -3
- unstructured_ingest-1.0.1.dist-info/METADATA +226 -0
- {unstructured_ingest-0.7.1.dist-info → unstructured_ingest-1.0.1.dist-info}/RECORD +54 -187
- {unstructured_ingest-0.7.1.dist-info → unstructured_ingest-1.0.1.dist-info}/WHEEL +1 -2
- examples/__init__.py +0 -0
- examples/airtable.py +0 -44
- examples/azure_cognitive_search.py +0 -55
- examples/chroma.py +0 -54
- examples/couchbase.py +0 -55
- examples/databricks_volumes_dest.py +0 -55
- examples/databricks_volumes_source.py +0 -53
- examples/delta_table.py +0 -45
- examples/discord_example.py +0 -36
- examples/elasticsearch.py +0 -49
- examples/google_drive.py +0 -45
- examples/kdbai.py +0 -54
- examples/local.py +0 -36
- examples/milvus.py +0 -44
- examples/mongodb.py +0 -53
- examples/opensearch.py +0 -50
- examples/pinecone.py +0 -57
- examples/s3.py +0 -38
- examples/salesforce.py +0 -44
- examples/sharepoint.py +0 -47
- examples/singlestore.py +0 -49
- examples/sql.py +0 -90
- examples/vectara.py +0 -54
- examples/weaviate.py +0 -44
- test/__init__.py +0 -0
- test/integration/__init__.py +0 -0
- test/integration/chunkers/__init__.py +0 -0
- test/integration/chunkers/test_chunkers.py +0 -31
- test/integration/connectors/__init__.py +0 -0
- test/integration/connectors/conftest.py +0 -38
- test/integration/connectors/databricks/__init__.py +0 -0
- test/integration/connectors/databricks/test_volumes_native.py +0 -273
- test/integration/connectors/discord/__init__.py +0 -0
- test/integration/connectors/discord/test_discord.py +0 -90
- test/integration/connectors/duckdb/__init__.py +0 -0
- test/integration/connectors/duckdb/conftest.py +0 -14
- test/integration/connectors/duckdb/test_duckdb.py +0 -90
- test/integration/connectors/duckdb/test_motherduck.py +0 -95
- test/integration/connectors/elasticsearch/__init__.py +0 -0
- test/integration/connectors/elasticsearch/conftest.py +0 -34
- test/integration/connectors/elasticsearch/test_elasticsearch.py +0 -331
- test/integration/connectors/elasticsearch/test_opensearch.py +0 -326
- test/integration/connectors/sql/__init__.py +0 -0
- test/integration/connectors/sql/test_databricks_delta_tables.py +0 -170
- test/integration/connectors/sql/test_postgres.py +0 -201
- test/integration/connectors/sql/test_singlestore.py +0 -182
- test/integration/connectors/sql/test_snowflake.py +0 -244
- test/integration/connectors/sql/test_sqlite.py +0 -168
- test/integration/connectors/sql/test_vastdb.py +0 -34
- test/integration/connectors/test_astradb.py +0 -287
- test/integration/connectors/test_azure_ai_search.py +0 -254
- test/integration/connectors/test_chroma.py +0 -136
- test/integration/connectors/test_confluence.py +0 -111
- test/integration/connectors/test_delta_table.py +0 -183
- test/integration/connectors/test_dropbox.py +0 -151
- test/integration/connectors/test_github.py +0 -49
- test/integration/connectors/test_google_drive.py +0 -257
- test/integration/connectors/test_jira.py +0 -67
- test/integration/connectors/test_lancedb.py +0 -247
- test/integration/connectors/test_milvus.py +0 -208
- test/integration/connectors/test_mongodb.py +0 -335
- test/integration/connectors/test_neo4j.py +0 -244
- test/integration/connectors/test_notion.py +0 -152
- test/integration/connectors/test_onedrive.py +0 -163
- test/integration/connectors/test_pinecone.py +0 -387
- test/integration/connectors/test_qdrant.py +0 -216
- test/integration/connectors/test_redis.py +0 -143
- test/integration/connectors/test_s3.py +0 -184
- test/integration/connectors/test_sharepoint.py +0 -222
- test/integration/connectors/test_vectara.py +0 -282
- test/integration/connectors/test_zendesk.py +0 -120
- test/integration/connectors/utils/__init__.py +0 -0
- test/integration/connectors/utils/constants.py +0 -13
- test/integration/connectors/utils/docker.py +0 -151
- test/integration/connectors/utils/docker_compose.py +0 -59
- test/integration/connectors/utils/validation/__init__.py +0 -0
- test/integration/connectors/utils/validation/destination.py +0 -77
- test/integration/connectors/utils/validation/equality.py +0 -76
- test/integration/connectors/utils/validation/source.py +0 -331
- test/integration/connectors/utils/validation/utils.py +0 -36
- test/integration/connectors/weaviate/__init__.py +0 -0
- test/integration/connectors/weaviate/conftest.py +0 -15
- test/integration/connectors/weaviate/test_cloud.py +0 -39
- test/integration/connectors/weaviate/test_local.py +0 -152
- test/integration/embedders/__init__.py +0 -0
- test/integration/embedders/conftest.py +0 -13
- test/integration/embedders/test_azure_openai.py +0 -57
- test/integration/embedders/test_bedrock.py +0 -103
- test/integration/embedders/test_huggingface.py +0 -24
- test/integration/embedders/test_mixedbread.py +0 -71
- test/integration/embedders/test_octoai.py +0 -75
- test/integration/embedders/test_openai.py +0 -74
- test/integration/embedders/test_togetherai.py +0 -71
- test/integration/embedders/test_vertexai.py +0 -63
- test/integration/embedders/test_voyageai.py +0 -79
- test/integration/embedders/utils.py +0 -66
- test/integration/partitioners/__init__.py +0 -0
- test/integration/partitioners/test_partitioner.py +0 -76
- test/integration/utils.py +0 -15
- test/unit/__init__.py +0 -0
- test/unit/chunkers/__init__.py +0 -0
- test/unit/chunkers/test_chunkers.py +0 -49
- test/unit/connectors/__init__.py +0 -0
- test/unit/connectors/ibm_watsonx/__init__.py +0 -0
- test/unit/connectors/ibm_watsonx/test_ibm_watsonx_s3.py +0 -459
- test/unit/connectors/motherduck/__init__.py +0 -0
- test/unit/connectors/motherduck/test_base.py +0 -73
- test/unit/connectors/sql/__init__.py +0 -0
- test/unit/connectors/sql/test_sql.py +0 -152
- test/unit/connectors/test_confluence.py +0 -71
- test/unit/connectors/test_jira.py +0 -401
- test/unit/embed/__init__.py +0 -0
- test/unit/embed/test_mixedbreadai.py +0 -42
- test/unit/embed/test_octoai.py +0 -27
- test/unit/embed/test_openai.py +0 -28
- test/unit/embed/test_vertexai.py +0 -25
- test/unit/embed/test_voyageai.py +0 -24
- test/unit/embedders/__init__.py +0 -0
- test/unit/embedders/test_bedrock.py +0 -36
- test/unit/embedders/test_huggingface.py +0 -48
- test/unit/embedders/test_mixedbread.py +0 -37
- test/unit/embedders/test_octoai.py +0 -35
- test/unit/embedders/test_openai.py +0 -35
- test/unit/embedders/test_togetherai.py +0 -37
- test/unit/embedders/test_vertexai.py +0 -37
- test/unit/embedders/test_voyageai.py +0 -38
- test/unit/partitioners/__init__.py +0 -0
- test/unit/partitioners/test_partitioner.py +0 -63
- test/unit/test_error.py +0 -27
- test/unit/test_html.py +0 -112
- test/unit/test_interfaces.py +0 -26
- test/unit/test_logger.py +0 -78
- test/unit/test_utils.py +0 -220
- test/unit/utils/__init__.py +0 -0
- test/unit/utils/data_generator.py +0 -32
- unstructured_ingest-0.7.1.dist-info/METADATA +0 -383
- unstructured_ingest-0.7.1.dist-info/top_level.txt +0 -3
- {unstructured_ingest-0.7.1.dist-info → unstructured_ingest-1.0.1.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.7.1.dist-info → unstructured_ingest-1.0.1.dist-info/licenses}/LICENSE.md +0 -0
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: unstructured_ingest
|
|
3
|
+
Version: 1.0.1
|
|
4
|
+
Summary: Local ETL data pipeline to get data RAG ready
|
|
5
|
+
Author-email: Unstructured Technologies <devops@unstructuredai.io>
|
|
6
|
+
License-Expression: Apache-2.0
|
|
7
|
+
License-File: LICENSE.md
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Intended Audience :: Education
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Requires-Python: <3.13,>=3.9
|
|
21
|
+
Requires-Dist: click
|
|
22
|
+
Requires-Dist: dataclasses-json
|
|
23
|
+
Requires-Dist: opentelemetry-sdk
|
|
24
|
+
Requires-Dist: pydantic>=2.7
|
|
25
|
+
Requires-Dist: python-dateutil
|
|
26
|
+
Requires-Dist: tqdm
|
|
27
|
+
Provides-Extra: airtable
|
|
28
|
+
Requires-Dist: pandas; extra == 'airtable'
|
|
29
|
+
Requires-Dist: pyairtable; extra == 'airtable'
|
|
30
|
+
Provides-Extra: astradb
|
|
31
|
+
Requires-Dist: astrapy; extra == 'astradb'
|
|
32
|
+
Provides-Extra: azure
|
|
33
|
+
Requires-Dist: adlfs; extra == 'azure'
|
|
34
|
+
Requires-Dist: fsspec; extra == 'azure'
|
|
35
|
+
Provides-Extra: azure-ai-search
|
|
36
|
+
Requires-Dist: azure-search-documents; extra == 'azure-ai-search'
|
|
37
|
+
Provides-Extra: bedrock
|
|
38
|
+
Requires-Dist: aioboto3; extra == 'bedrock'
|
|
39
|
+
Requires-Dist: boto3; extra == 'bedrock'
|
|
40
|
+
Provides-Extra: biomed
|
|
41
|
+
Requires-Dist: bs4; extra == 'biomed'
|
|
42
|
+
Requires-Dist: requests; extra == 'biomed'
|
|
43
|
+
Provides-Extra: box
|
|
44
|
+
Requires-Dist: boxfs; extra == 'box'
|
|
45
|
+
Requires-Dist: fsspec; extra == 'box'
|
|
46
|
+
Provides-Extra: chroma
|
|
47
|
+
Requires-Dist: chromadb; extra == 'chroma'
|
|
48
|
+
Provides-Extra: clarifai
|
|
49
|
+
Requires-Dist: clarifai; extra == 'clarifai'
|
|
50
|
+
Provides-Extra: confluence
|
|
51
|
+
Requires-Dist: atlassian-python-api; extra == 'confluence'
|
|
52
|
+
Requires-Dist: requests; extra == 'confluence'
|
|
53
|
+
Provides-Extra: couchbase
|
|
54
|
+
Requires-Dist: couchbase; extra == 'couchbase'
|
|
55
|
+
Provides-Extra: databricks-delta-tables
|
|
56
|
+
Requires-Dist: databricks-sql-connector; extra == 'databricks-delta-tables'
|
|
57
|
+
Requires-Dist: pandas; extra == 'databricks-delta-tables'
|
|
58
|
+
Provides-Extra: databricks-volumes
|
|
59
|
+
Requires-Dist: databricks-sdk; extra == 'databricks-volumes'
|
|
60
|
+
Provides-Extra: delta-table
|
|
61
|
+
Requires-Dist: boto3; extra == 'delta-table'
|
|
62
|
+
Requires-Dist: deltalake; extra == 'delta-table'
|
|
63
|
+
Requires-Dist: pandas; extra == 'delta-table'
|
|
64
|
+
Provides-Extra: discord
|
|
65
|
+
Requires-Dist: discord-py; extra == 'discord'
|
|
66
|
+
Provides-Extra: doc
|
|
67
|
+
Requires-Dist: unstructured[doc]; extra == 'doc'
|
|
68
|
+
Provides-Extra: docx
|
|
69
|
+
Requires-Dist: unstructured[docx]; extra == 'docx'
|
|
70
|
+
Provides-Extra: dropbox
|
|
71
|
+
Requires-Dist: dropboxdrivefs; extra == 'dropbox'
|
|
72
|
+
Requires-Dist: fsspec; extra == 'dropbox'
|
|
73
|
+
Provides-Extra: duckdb
|
|
74
|
+
Requires-Dist: duckdb; extra == 'duckdb'
|
|
75
|
+
Requires-Dist: pandas; extra == 'duckdb'
|
|
76
|
+
Provides-Extra: elasticsearch
|
|
77
|
+
Requires-Dist: elasticsearch[async]; extra == 'elasticsearch'
|
|
78
|
+
Provides-Extra: epub
|
|
79
|
+
Requires-Dist: unstructured[epub]; extra == 'epub'
|
|
80
|
+
Provides-Extra: gcs
|
|
81
|
+
Requires-Dist: bs4; extra == 'gcs'
|
|
82
|
+
Requires-Dist: fsspec; extra == 'gcs'
|
|
83
|
+
Requires-Dist: gcsfs; extra == 'gcs'
|
|
84
|
+
Provides-Extra: github
|
|
85
|
+
Requires-Dist: pygithub>1.58.0; extra == 'github'
|
|
86
|
+
Requires-Dist: requests; extra == 'github'
|
|
87
|
+
Provides-Extra: gitlab
|
|
88
|
+
Requires-Dist: python-gitlab; extra == 'gitlab'
|
|
89
|
+
Provides-Extra: google-drive
|
|
90
|
+
Requires-Dist: google-api-python-client; extra == 'google-drive'
|
|
91
|
+
Provides-Extra: hubspot
|
|
92
|
+
Requires-Dist: hubspot-api-client; extra == 'hubspot'
|
|
93
|
+
Requires-Dist: urllib3; extra == 'hubspot'
|
|
94
|
+
Provides-Extra: huggingface
|
|
95
|
+
Requires-Dist: sentence-transformers; extra == 'huggingface'
|
|
96
|
+
Provides-Extra: ibm-watsonx-s3
|
|
97
|
+
Requires-Dist: httpx; extra == 'ibm-watsonx-s3'
|
|
98
|
+
Requires-Dist: pandas; extra == 'ibm-watsonx-s3'
|
|
99
|
+
Requires-Dist: pyarrow; extra == 'ibm-watsonx-s3'
|
|
100
|
+
Requires-Dist: pyiceberg; extra == 'ibm-watsonx-s3'
|
|
101
|
+
Requires-Dist: tenacity; extra == 'ibm-watsonx-s3'
|
|
102
|
+
Provides-Extra: image
|
|
103
|
+
Requires-Dist: unstructured[image]; extra == 'image'
|
|
104
|
+
Provides-Extra: jira
|
|
105
|
+
Requires-Dist: atlassian-python-api; extra == 'jira'
|
|
106
|
+
Provides-Extra: kafka
|
|
107
|
+
Requires-Dist: confluent-kafka; extra == 'kafka'
|
|
108
|
+
Provides-Extra: kdbai
|
|
109
|
+
Requires-Dist: kdbai-client>=1.4.0; extra == 'kdbai'
|
|
110
|
+
Requires-Dist: pandas; extra == 'kdbai'
|
|
111
|
+
Provides-Extra: lancedb
|
|
112
|
+
Requires-Dist: lancedb; extra == 'lancedb'
|
|
113
|
+
Provides-Extra: md
|
|
114
|
+
Requires-Dist: unstructured[md]; extra == 'md'
|
|
115
|
+
Provides-Extra: milvus
|
|
116
|
+
Requires-Dist: pymilvus; extra == 'milvus'
|
|
117
|
+
Provides-Extra: mixedbreadai
|
|
118
|
+
Requires-Dist: mixedbread-ai; extra == 'mixedbreadai'
|
|
119
|
+
Provides-Extra: mongodb
|
|
120
|
+
Requires-Dist: pymongo; extra == 'mongodb'
|
|
121
|
+
Provides-Extra: msg
|
|
122
|
+
Requires-Dist: unstructured[msg]; extra == 'msg'
|
|
123
|
+
Provides-Extra: neo4j
|
|
124
|
+
Requires-Dist: cymple; extra == 'neo4j'
|
|
125
|
+
Requires-Dist: neo4j-rust-ext; extra == 'neo4j'
|
|
126
|
+
Requires-Dist: networkx; extra == 'neo4j'
|
|
127
|
+
Provides-Extra: notion
|
|
128
|
+
Requires-Dist: backoff; extra == 'notion'
|
|
129
|
+
Requires-Dist: htmlbuilder; extra == 'notion'
|
|
130
|
+
Requires-Dist: httpx; extra == 'notion'
|
|
131
|
+
Requires-Dist: notion-client; extra == 'notion'
|
|
132
|
+
Provides-Extra: octoai
|
|
133
|
+
Requires-Dist: openai; extra == 'octoai'
|
|
134
|
+
Requires-Dist: tiktoken; extra == 'octoai'
|
|
135
|
+
Provides-Extra: odt
|
|
136
|
+
Requires-Dist: unstructured[odt]; extra == 'odt'
|
|
137
|
+
Provides-Extra: onedrive
|
|
138
|
+
Requires-Dist: msal; extra == 'onedrive'
|
|
139
|
+
Requires-Dist: office365-rest-python-client; extra == 'onedrive'
|
|
140
|
+
Requires-Dist: requests; extra == 'onedrive'
|
|
141
|
+
Provides-Extra: openai
|
|
142
|
+
Requires-Dist: openai; extra == 'openai'
|
|
143
|
+
Requires-Dist: tiktoken; extra == 'openai'
|
|
144
|
+
Provides-Extra: opensearch
|
|
145
|
+
Requires-Dist: opensearch-py; extra == 'opensearch'
|
|
146
|
+
Provides-Extra: org
|
|
147
|
+
Requires-Dist: unstructured[org]; extra == 'org'
|
|
148
|
+
Provides-Extra: outlook
|
|
149
|
+
Requires-Dist: msal; extra == 'outlook'
|
|
150
|
+
Requires-Dist: office365-rest-python-client; extra == 'outlook'
|
|
151
|
+
Provides-Extra: pdf
|
|
152
|
+
Requires-Dist: unstructured[pdf]; extra == 'pdf'
|
|
153
|
+
Provides-Extra: pinecone
|
|
154
|
+
Requires-Dist: pinecone; extra == 'pinecone'
|
|
155
|
+
Provides-Extra: postgres
|
|
156
|
+
Requires-Dist: pandas; extra == 'postgres'
|
|
157
|
+
Requires-Dist: psycopg2-binary; extra == 'postgres'
|
|
158
|
+
Provides-Extra: ppt
|
|
159
|
+
Requires-Dist: unstructured[ppt]; extra == 'ppt'
|
|
160
|
+
Provides-Extra: pptx
|
|
161
|
+
Requires-Dist: unstructured[pptx]; extra == 'pptx'
|
|
162
|
+
Provides-Extra: qdrant
|
|
163
|
+
Requires-Dist: qdrant-client; extra == 'qdrant'
|
|
164
|
+
Provides-Extra: reddit
|
|
165
|
+
Requires-Dist: praw; extra == 'reddit'
|
|
166
|
+
Provides-Extra: redis
|
|
167
|
+
Requires-Dist: redis; extra == 'redis'
|
|
168
|
+
Provides-Extra: remote
|
|
169
|
+
Requires-Dist: unstructured-client>=0.30.0; extra == 'remote'
|
|
170
|
+
Provides-Extra: rst
|
|
171
|
+
Requires-Dist: unstructured[rst]; extra == 'rst'
|
|
172
|
+
Provides-Extra: rtf
|
|
173
|
+
Requires-Dist: unstructured[rtf]; extra == 'rtf'
|
|
174
|
+
Provides-Extra: s3
|
|
175
|
+
Requires-Dist: fsspec; extra == 's3'
|
|
176
|
+
Requires-Dist: s3fs; extra == 's3'
|
|
177
|
+
Provides-Extra: salesforce
|
|
178
|
+
Requires-Dist: simple-salesforce; extra == 'salesforce'
|
|
179
|
+
Provides-Extra: sftp
|
|
180
|
+
Requires-Dist: fsspec; extra == 'sftp'
|
|
181
|
+
Requires-Dist: paramiko; extra == 'sftp'
|
|
182
|
+
Provides-Extra: sharepoint
|
|
183
|
+
Requires-Dist: msal; extra == 'sharepoint'
|
|
184
|
+
Requires-Dist: office365-rest-python-client; extra == 'sharepoint'
|
|
185
|
+
Requires-Dist: requests; extra == 'sharepoint'
|
|
186
|
+
Provides-Extra: singlestore
|
|
187
|
+
Requires-Dist: pandas; extra == 'singlestore'
|
|
188
|
+
Requires-Dist: singlestoredb; extra == 'singlestore'
|
|
189
|
+
Provides-Extra: slack
|
|
190
|
+
Requires-Dist: slack-sdk[optional]; extra == 'slack'
|
|
191
|
+
Provides-Extra: snowflake
|
|
192
|
+
Requires-Dist: pandas; extra == 'snowflake'
|
|
193
|
+
Requires-Dist: psycopg2-binary; extra == 'snowflake'
|
|
194
|
+
Requires-Dist: snowflake-connector-python; extra == 'snowflake'
|
|
195
|
+
Provides-Extra: togetherai
|
|
196
|
+
Requires-Dist: together; extra == 'togetherai'
|
|
197
|
+
Provides-Extra: tsv
|
|
198
|
+
Requires-Dist: unstructured[tsv]; extra == 'tsv'
|
|
199
|
+
Provides-Extra: vastdb
|
|
200
|
+
Requires-Dist: ibis; extra == 'vastdb'
|
|
201
|
+
Requires-Dist: pandas; extra == 'vastdb'
|
|
202
|
+
Requires-Dist: pyarrow; extra == 'vastdb'
|
|
203
|
+
Requires-Dist: vastdb; extra == 'vastdb'
|
|
204
|
+
Provides-Extra: vectara
|
|
205
|
+
Requires-Dist: aiofiles; extra == 'vectara'
|
|
206
|
+
Requires-Dist: httpx; extra == 'vectara'
|
|
207
|
+
Requires-Dist: requests; extra == 'vectara'
|
|
208
|
+
Provides-Extra: vertexai
|
|
209
|
+
Requires-Dist: vertexai; extra == 'vertexai'
|
|
210
|
+
Provides-Extra: voyageai
|
|
211
|
+
Requires-Dist: voyageai; extra == 'voyageai'
|
|
212
|
+
Provides-Extra: weaviate
|
|
213
|
+
Requires-Dist: weaviate-client; extra == 'weaviate'
|
|
214
|
+
Provides-Extra: wikipedia
|
|
215
|
+
Requires-Dist: wikipedia; extra == 'wikipedia'
|
|
216
|
+
Provides-Extra: xlsx
|
|
217
|
+
Requires-Dist: unstructured[xlsx]; extra == 'xlsx'
|
|
218
|
+
Provides-Extra: zendesk
|
|
219
|
+
Requires-Dist: aiofiles; extra == 'zendesk'
|
|
220
|
+
Requires-Dist: bs4; extra == 'zendesk'
|
|
221
|
+
Requires-Dist: httpx; extra == 'zendesk'
|
|
222
|
+
Description-Content-Type: text/markdown
|
|
223
|
+
|
|
224
|
+
# Unstructured Ingest
|
|
225
|
+
|
|
226
|
+
For details, see the [Unstructured Ingest overview](https://docs.unstructured.io/ingestion/overview) in the Unstructured documentation.
|
|
@@ -1,146 +1,12 @@
|
|
|
1
|
-
examples/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
examples/airtable.py,sha256=4n6B8l_fmnlZLdk9SIfYticOTgZGQBjBfktNwFQf9Zc,1831
|
|
3
|
-
examples/azure_cognitive_search.py,sha256=KThZyRQ2HhTndBcGBn1dlr0FARB2PmBBRRnKJU5AuQU,2279
|
|
4
|
-
examples/chroma.py,sha256=fLqcpnbMAlJEe96SCMm17ZGOtcnaDQBGusURhLDwth8,2208
|
|
5
|
-
examples/couchbase.py,sha256=u4fmZb4eBYEcBgWCaWOKSxxxV1v-YpyaGG7J4ept1w0,2261
|
|
6
|
-
examples/databricks_volumes_dest.py,sha256=xoSVgmS4dNKOKGKtin0ojB20mO7vfCJ8FQ_DFIgMW-U,2329
|
|
7
|
-
examples/databricks_volumes_source.py,sha256=DT7PXW6CTOGSH5lzuZMfasjV6vgoG28R68I9Cb98JvQ,2278
|
|
8
|
-
examples/delta_table.py,sha256=0InvWFYZZt6TolYtWlKp9KliOnRxFNubInbwqlBWMIA,1898
|
|
9
|
-
examples/discord_example.py,sha256=ZNzKVxk7hfUF0qZdOLkh7fGItaiIdo6gh7JYhwr2vyQ,1624
|
|
10
|
-
examples/elasticsearch.py,sha256=KQXIYCE44w_CRZQGanlfP9ZY9NZ5gclxKKvtZnd-IRA,2129
|
|
11
|
-
examples/google_drive.py,sha256=iOjkC8iWQ3sqM3TTsL8Ng_yZbm2C5xfws1a9MTZuV7M,1677
|
|
12
|
-
examples/kdbai.py,sha256=W7yT2lnYOPbftqTiSHQNgWVwQC4UXJ8QhkPqSdiG_Co,2225
|
|
13
|
-
examples/local.py,sha256=WuN3SqxoXRlh2xhMZPh4R9EdCNfjQUhurAlGVqGuUF8,1569
|
|
14
|
-
examples/milvus.py,sha256=TKtH1Rxrj3Hr9d1BUx7qEK468Xb0ux7Ak1RukPXSOOo,1877
|
|
15
|
-
examples/mongodb.py,sha256=eP43TY-rjOeWnVk4m5jSSWJSWXcy6xRYFuLWusBZXws,2160
|
|
16
|
-
examples/opensearch.py,sha256=8YmQpvOB9HBQqoC47ht-lX34SpkoaDlezbQOHRG82cw,2103
|
|
17
|
-
examples/pinecone.py,sha256=URXalj5-0eTVnmfzD0icCB1brGklplU4P8l0jrawCjI,2479
|
|
18
|
-
examples/s3.py,sha256=23y_lPUkPo50rDMZC7cc3kBaSOf5pP_xl_7HO0Mb3c8,1742
|
|
19
|
-
examples/salesforce.py,sha256=tiO6hdRI79H_oORPnIf1FvB0IuGTYG2KzZlnqC_J9Cw,1888
|
|
20
|
-
examples/sharepoint.py,sha256=a0h2zU28m6bW5g17b8BDrcsHzdzjSgb--gYV80bRqs0,2067
|
|
21
|
-
examples/singlestore.py,sha256=UAdBOtIcmhyRkZ-pIh7rrY7Yt_Ed8t3puulZ-MhaSfU,2060
|
|
22
|
-
examples/sql.py,sha256=YSmLD7Ri2a8CvBxRJWxPQefqV4kV8kF3W0l3TXu_iyY,2997
|
|
23
|
-
examples/vectara.py,sha256=bWSsMQL3hEEt5CLR9CZFnuplrSAeLj_EiADipRU_Gkw,2247
|
|
24
|
-
examples/weaviate.py,sha256=QUtYJ-y7eYfm69T316-aUm1imQZnoSJ09RGSVQAoTck,1906
|
|
25
|
-
test/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
26
|
-
test/integration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
|
-
test/integration/utils.py,sha256=CWqzEGw6TA_ZoP9hRUkW64TWYssooBbufcTRmbJvod8,401
|
|
28
|
-
test/integration/chunkers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
|
-
test/integration/chunkers/test_chunkers.py,sha256=MTPVBCBvh54fBqi_53oPkrH9QJtvJeE9YEXDOZ8G0so,1059
|
|
30
|
-
test/integration/connectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
31
|
-
test/integration/connectors/conftest.py,sha256=3weYhwyXyAVVk8DsLqScDOk-PZwsrHQLs1RdYa1EYqQ,1015
|
|
32
|
-
test/integration/connectors/test_astradb.py,sha256=1hFqd9NI283t6lQZryBJMfJoRfP6PKVHTl_6X3Nk0bM,9925
|
|
33
|
-
test/integration/connectors/test_azure_ai_search.py,sha256=NMSjIbFO1EWqr8U-Owd4qdC3pDROjg7zNAXr5yGSsH4,9699
|
|
34
|
-
test/integration/connectors/test_chroma.py,sha256=_K4CUp9x5O_Uuw1uwDi1R0jIvzcaW0wI8tXfAV1hh7c,4536
|
|
35
|
-
test/integration/connectors/test_confluence.py,sha256=3sAFQxRUrS7xXIQXQBccY0r5kihnP8rc_sAs-44g4Ys,3587
|
|
36
|
-
test/integration/connectors/test_delta_table.py,sha256=-ivYySHKgFRCd6PUIK-fy6yOkku3uj4YhDDEUlqi-tM,6915
|
|
37
|
-
test/integration/connectors/test_dropbox.py,sha256=sW-NOXA0-4xtAUgnSnPGg-OhiIhKviROJPpxUt8y8s8,4939
|
|
38
|
-
test/integration/connectors/test_github.py,sha256=wFWRFhfhTUz3byk2FSe9qVv7xMxcBjHGs1FlhjtnTAQ,1500
|
|
39
|
-
test/integration/connectors/test_google_drive.py,sha256=ceiFoVnaguTsQrLHZk8jv-IZ-i_EP4wenHSX-QKHvTM,10300
|
|
40
|
-
test/integration/connectors/test_jira.py,sha256=0FnxFe42d32EGuArfxxnfINkoYNoCsgJjP7ZU6fePu8,2073
|
|
41
|
-
test/integration/connectors/test_lancedb.py,sha256=E8yFuvQMx68w1s1PXIBP8gUlNuUpCtiGNYd7YnDA6Aw,9213
|
|
42
|
-
test/integration/connectors/test_milvus.py,sha256=_cYmJMocsZuUroalT5uc9rcsHFnTIpJyJjIwK8oDDYc,7177
|
|
43
|
-
test/integration/connectors/test_mongodb.py,sha256=wt5o-7qtMtjGv0IPKlhEnD3-sJjBX8cv1acn1Mcq-TY,12450
|
|
44
|
-
test/integration/connectors/test_neo4j.py,sha256=BSOqRTY4ZV8o6TV1MOmUKQq7DzRFU_z9umjDk-yw-Jg,8450
|
|
45
|
-
test/integration/connectors/test_notion.py,sha256=3OXFcSM-jE1_E_JoGw--pz-cv3dPZvt18scACJiHjwo,5397
|
|
46
|
-
test/integration/connectors/test_onedrive.py,sha256=0SZB818cNsxYZlBJJpuvU1PqsFDxRaOiLfJTRcc9Bv0,5233
|
|
47
|
-
test/integration/connectors/test_pinecone.py,sha256=0XBK9xxZhry6Rnv0s-chIWgtN5d1p8Lx-kEEQMNuAnA,13650
|
|
48
|
-
test/integration/connectors/test_qdrant.py,sha256=z3RThQJKzCafCtfH0ocy_DNDlzmDdu_opQH3mKTn0CE,8031
|
|
49
|
-
test/integration/connectors/test_redis.py,sha256=gUU6Dv616tX9KANiqhkMrPWvmhbV2Gk1pNuIv2MvZG8,5093
|
|
50
|
-
test/integration/connectors/test_s3.py,sha256=Cd9HsPjrSB6xss1DO4YHqSORJJ2pUXNVaAlrlpqjZS8,7477
|
|
51
|
-
test/integration/connectors/test_sharepoint.py,sha256=wq4G6J5ffXhYquUySVd5UUtYWC43RoXgCSY4fdlX2z0,7643
|
|
52
|
-
test/integration/connectors/test_vectara.py,sha256=08GIh6J2QTSuupdDOJ_TiyQrYYK3vamUaEQe3_B5-WY,9278
|
|
53
|
-
test/integration/connectors/test_zendesk.py,sha256=15bl3wy0pLxS2dkBlE11yPOX71k6Vbxo0BEFQK-qcFs,3724
|
|
54
|
-
test/integration/connectors/databricks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
55
|
-
test/integration/connectors/databricks/test_volumes_native.py,sha256=RbxImt0JPnvl8TAK_rAtIspaFuRmEFzjL9rLqkm6Juk,9563
|
|
56
|
-
test/integration/connectors/discord/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
57
|
-
test/integration/connectors/discord/test_discord.py,sha256=WeB0ST572GvELMlgorRMwRxYIWkleIobXn6ULhjo1rw,3173
|
|
58
|
-
test/integration/connectors/duckdb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
59
|
-
test/integration/connectors/duckdb/conftest.py,sha256=rlBHMJTiJ2a5xbvIxTOyhhcuTBc9DO-yTzD6Kf8X3hY,301
|
|
60
|
-
test/integration/connectors/duckdb/test_duckdb.py,sha256=o3CKTG2T15QyWwPTh7Yyv42eJ39opm7g9b508y1o1m8,2973
|
|
61
|
-
test/integration/connectors/duckdb/test_motherduck.py,sha256=S2EtKVy4HE0ysB3OvdFOUe5O-0w5Bk4Loy2Gyyff8b0,3218
|
|
62
|
-
test/integration/connectors/elasticsearch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
63
|
-
test/integration/connectors/elasticsearch/conftest.py,sha256=-i4_7MkIxSQENz7nuD2uHuhGU9mZ33vpeTPhHtRpQfs,989
|
|
64
|
-
test/integration/connectors/elasticsearch/test_elasticsearch.py,sha256=NUUL88Z7uWfnEopZ0wkQtWMA94WDZw87v_oCkPShVM4,12076
|
|
65
|
-
test/integration/connectors/elasticsearch/test_opensearch.py,sha256=nV3gHD_tb79KRE8DqfUWpOfzem9LjojACspUHTSI7dw,11454
|
|
66
|
-
test/integration/connectors/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
67
|
-
test/integration/connectors/sql/test_databricks_delta_tables.py,sha256=hYfmT9ud6NqJk1pibaF-1LvNgrfXBGUq0FTHkcIvICE,6145
|
|
68
|
-
test/integration/connectors/sql/test_postgres.py,sha256=b0do75CAxKFEgoODx7mNQqo5vEgZbWHSifMNV_H2IgU,6974
|
|
69
|
-
test/integration/connectors/sql/test_singlestore.py,sha256=uuw_T8EefwMcRD7clzviJxwb45f9k4G2dZIYqbNhM1s,6157
|
|
70
|
-
test/integration/connectors/sql/test_snowflake.py,sha256=dNHR8fk5V1WsvN6P1CesPKCsRbKTuf6zXgjyf7vgueg,7498
|
|
71
|
-
test/integration/connectors/sql/test_sqlite.py,sha256=6By1-XKiGCA5KlR1DHlM6ArU5c_2GjM5mE2RhMoNPg8,5960
|
|
72
|
-
test/integration/connectors/sql/test_vastdb.py,sha256=A0W-kHl1GRf2zHCmTWXOJjV8HPi3xlWvCTKgjebVZUY,1066
|
|
73
|
-
test/integration/connectors/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
74
|
-
test/integration/connectors/utils/constants.py,sha256=JhTk6YNw7JVpkk-Pl8zn2YYkExeL1oE9VBWm_kMYGfo,369
|
|
75
|
-
test/integration/connectors/utils/docker.py,sha256=4g1STiSbYN5qcmDTXyPxVJgwx97O6wk7n-DJ-zgzgag,4971
|
|
76
|
-
test/integration/connectors/utils/docker_compose.py,sha256=GVTB6Cel05c0VQ2n4AwkQQx_cBfz13ZTs1HpbaYipNU,2223
|
|
77
|
-
test/integration/connectors/utils/validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
78
|
-
test/integration/connectors/utils/validation/destination.py,sha256=m5RHgZ3_h6HA2SsWbg15rmKhZjzsyKqOtFlUqpy33SI,2746
|
|
79
|
-
test/integration/connectors/utils/validation/equality.py,sha256=R6d_1c-Si5518WJcBcshF_wBRnywnZ0ORQ-NL0xNmGo,2602
|
|
80
|
-
test/integration/connectors/utils/validation/source.py,sha256=WX67a1tYpyUFXvSxxZrTLEkpyVqZiUXhAsJ11RQzcqQ,13701
|
|
81
|
-
test/integration/connectors/utils/validation/utils.py,sha256=xYYvAbqP6_lZyH09_JjB4w2Sf8aQPvDVT5vZTs05ILs,1428
|
|
82
|
-
test/integration/connectors/weaviate/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
83
|
-
test/integration/connectors/weaviate/conftest.py,sha256=6Q6QdrLJmGHowRFSmoVSzup2EX6qASfS2Z5tqlpTm9M,387
|
|
84
|
-
test/integration/connectors/weaviate/test_cloud.py,sha256=1r16tNUSsq8JawfjgeRWtcfw2COYma0b298mBDZU__o,1281
|
|
85
|
-
test/integration/connectors/weaviate/test_local.py,sha256=q8vSpmFeTapSoUSNChIpc6qfyMdcICo28CJSm7L7V-o,5337
|
|
86
|
-
test/integration/embedders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
87
|
-
test/integration/embedders/conftest.py,sha256=B2W771RbijR7G_GybsCzRyIvOzXqzbKZdRIlNDd5AGY,334
|
|
88
|
-
test/integration/embedders/test_azure_openai.py,sha256=nQle565sZu9N4xjWHBOfiOFGtldOdIK6S1YID00hK78,1787
|
|
89
|
-
test/integration/embedders/test_bedrock.py,sha256=-hy4wPmgBpXrk7OtPbZ8IE0qip6Ht1GptGLpmpqHxiw,3550
|
|
90
|
-
test/integration/embedders/test_huggingface.py,sha256=jNNBpXUA_UpgXXVo_Q2xYGFaknIZJu8eKJ4ifq19eig,986
|
|
91
|
-
test/integration/embedders/test_mixedbread.py,sha256=0Mcq9JU9wAJ_Wz2Enwyz4qGSbivDDqmE6ZJxlZpMuWw,1993
|
|
92
|
-
test/integration/embedders/test_octoai.py,sha256=R6NcBxPL_sVJLGmVTEjsaxfqjnLWxViXwL6vaze666s,2194
|
|
93
|
-
test/integration/embedders/test_openai.py,sha256=iwk56ZK1gllFtatcp6W_PA7x9h2M2ReIg4GXIWXp5qo,2124
|
|
94
|
-
test/integration/embedders/test_togetherai.py,sha256=5PzIdnvjMpjods_rhUqQ2nbVRXgld_F7OGWssnHxa0I,2202
|
|
95
|
-
test/integration/embedders/test_vertexai.py,sha256=xpjauYnRBxOqft0HXEMFk1iRoeQJm7E2eSunZbjj_H4,1827
|
|
96
|
-
test/integration/embedders/test_voyageai.py,sha256=kcuGxhG6kR1XVqmTw7La3MXnIC06CtjOq_n48nkF-eQ,2411
|
|
97
|
-
test/integration/embedders/utils.py,sha256=Sqqg-X31ZV1hojqPQBaZgM2lb2u8cG6s6OnH9JRsFjs,2717
|
|
98
|
-
test/integration/partitioners/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
99
|
-
test/integration/partitioners/test_partitioner.py,sha256=UYQd9x2-66F_FFeulC_2eg3FtjswK0Mt9Hwmg4b_pPs,2784
|
|
100
|
-
test/unit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
101
|
-
test/unit/test_error.py,sha256=RflmngCdFNKOLXVfLnUdNfY3Mfg3k7DTEzfIl0B-syU,840
|
|
102
|
-
test/unit/test_html.py,sha256=ubsck9pVOnPDFL0P8TZkko_46MIaFLlSNQcsgFDgYoE,4496
|
|
103
|
-
test/unit/test_interfaces.py,sha256=Gv3WMJsw_3xPLy3nI3dIcJuLa2WvKYszSjI_W9XLtVM,787
|
|
104
|
-
test/unit/test_logger.py,sha256=0SKndXE_VRd8XmUHkrj7zuBQHZscXx3ZQllMEOvtF9Y,2380
|
|
105
|
-
test/unit/test_utils.py,sha256=xeSM02zOChSOO3dzDOVAEiQme1rQ8drjnJF93S3BFmk,7247
|
|
106
|
-
test/unit/chunkers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
107
|
-
test/unit/chunkers/test_chunkers.py,sha256=wRxbSj7P1FwRGDyVcARkm8CQSVCBCro3nTe54UoUBzc,1769
|
|
108
|
-
test/unit/connectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
109
|
-
test/unit/connectors/test_confluence.py,sha256=Hr91nKw6018FEih-vSrVXFk0p0E9bSL1IeZVDxvITJ0,1916
|
|
110
|
-
test/unit/connectors/test_jira.py,sha256=sPRjoBVDmc-o2RWilcjs-VW_jkafIqSXBE9duCELfoA,12110
|
|
111
|
-
test/unit/connectors/ibm_watsonx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
112
|
-
test/unit/connectors/ibm_watsonx/test_ibm_watsonx_s3.py,sha256=Yj9swJ_phiam-CFBzGnAFCkd8_oqzdA3ZQJQdpV8T1E,14503
|
|
113
|
-
test/unit/connectors/motherduck/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
114
|
-
test/unit/connectors/motherduck/test_base.py,sha256=4SHI3Hx1a28eNE_VDbl8gAssNZRIUNSGPNZgrkzjYWs,2429
|
|
115
|
-
test/unit/connectors/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
116
|
-
test/unit/connectors/sql/test_sql.py,sha256=SfWYDBrR7pHFziKVEe6IAq5E3EQIz99ikQN3LnF1DrY,4622
|
|
117
|
-
test/unit/embed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
118
|
-
test/unit/embed/test_mixedbreadai.py,sha256=Z9A9jg5eJRF4OgYTgbIzQUI27J16uv2qj2kp_Rv0r9k,1428
|
|
119
|
-
test/unit/embed/test_octoai.py,sha256=CWVrieqJh-N40J9n3nzqQPLOH9T1_mldkpZYRiHKxrg,1055
|
|
120
|
-
test/unit/embed/test_openai.py,sha256=RQ-4QIcRvq0JSBFNit_NRcy61EsOv7xh_TcKJKHwHGM,1186
|
|
121
|
-
test/unit/embed/test_vertexai.py,sha256=k_dK-yR_yx1RAOpmAgfcPo-osRDJP9aRCMCsJmQPxYI,1050
|
|
122
|
-
test/unit/embed/test_voyageai.py,sha256=QWoDZEX8cAIkTgn4NtIyGKzOAu-GmudD4VMujnfi1Gg,983
|
|
123
|
-
test/unit/embedders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
124
|
-
test/unit/embedders/test_bedrock.py,sha256=HMaweO_v_9Y1SE2m5QImXP73cb26vNTUfc1onTBa1-g,1074
|
|
125
|
-
test/unit/embedders/test_huggingface.py,sha256=BpMC_AMlifjNf4Y61yBNR_8UU3H_x3ut2NnpFuB4kDo,1543
|
|
126
|
-
test/unit/embedders/test_mixedbread.py,sha256=8yT942TVVXC5EkrT_ReZie1In537BaAD6esRjntgxuU,1021
|
|
127
|
-
test/unit/embedders/test_octoai.py,sha256=JMfrFz25QfEh0ieB4bJneZd4XtNcdPOnNsN1Fj7gU-Q,1012
|
|
128
|
-
test/unit/embedders/test_openai.py,sha256=HoEW95289Ijgo3PJ-pEaDOknfdkSjPXTgkXmE6jJomY,1012
|
|
129
|
-
test/unit/embedders/test_togetherai.py,sha256=s24V_geDNZzblU74sSdC_m4Lqlzjp00RMpy56ptfdx0,1009
|
|
130
|
-
test/unit/embedders/test_vertexai.py,sha256=4gLJaV9Nr2k_SgA-EyJ_sDvm8XvyGbn2zTs4F4CXU2g,1142
|
|
131
|
-
test/unit/embedders/test_voyageai.py,sha256=VaWthF64pmxc-fOBbAQsEzMw7tV4t4Nz_H_Cc5tuAYQ,1193
|
|
132
|
-
test/unit/partitioners/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
133
|
-
test/unit/partitioners/test_partitioner.py,sha256=eJoUDbiKtweyU1WYfsY5KqVqoPjbx1MUsyHkbvvTNEk,2275
|
|
134
|
-
test/unit/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
135
|
-
test/unit/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
|
|
136
1
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
137
|
-
unstructured_ingest/__version__.py,sha256=
|
|
2
|
+
unstructured_ingest/__version__.py,sha256=Bkcw0TdkF4pWY_01piNW3D1XaG9Q-r4aIMSbnIeStCE,42
|
|
138
3
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
139
4
|
unstructured_ingest/errors_v2.py,sha256=9RuRCi7lbDxCguDz07y5RiHoQiFIOWwOD7xqzJ2B3Yw,436
|
|
140
|
-
unstructured_ingest/logger.py,sha256=
|
|
5
|
+
unstructured_ingest/logger.py,sha256=7e_7UeK6hVOd5BQ6i9NzRUAPCS_DF839Y8TjUDywraY,1428
|
|
141
6
|
unstructured_ingest/main.py,sha256=82G_7eG4PNhc_xIqj4Y_sFbDV9VI-nwSfsfJQMzovMk,169
|
|
142
7
|
unstructured_ingest/otel.py,sha256=NsUqOolA0gt69eFhZLABjVpcKoM9aus-AbxIKqWqPTc,4127
|
|
143
|
-
unstructured_ingest/unstructured_api.py,sha256=
|
|
8
|
+
unstructured_ingest/unstructured_api.py,sha256=4e2ZNWIihk0eje4R3ZQ0NOYNbmMZDv_O-rnJo94kaGE,5127
|
|
9
|
+
unstructured_ingest/cli/README.md,sha256=5LfM0ys1aFyCiCjlwZsi_9Mb5Nrq3MmYt3IpmUybnCE,1507
|
|
144
10
|
unstructured_ingest/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
145
11
|
unstructured_ingest/cli/cli.py,sha256=ZeIE9jP8fe7260nE8v7xYgLdqX9OtkQXAXSGWIkHLcA,645
|
|
146
12
|
unstructured_ingest/cli/cmds.py,sha256=EhDW5UX4V-N8Svjba4w7YWnRYl26__ADwzNXrfFBxM4,483
|
|
@@ -159,7 +25,7 @@ unstructured_ingest/embed/azure_openai.py,sha256=_-I-nwd-wdCiKkSdYBL4UKrTZ2UPWsM
|
|
|
159
25
|
unstructured_ingest/embed/bedrock.py,sha256=t58V_QQjWPO62CTuP0aLFMDisPeXpxG2xSFGUhN-JvI,7726
|
|
160
26
|
unstructured_ingest/embed/huggingface.py,sha256=-ZD17O_H_UnK80fqig6y6wNKJckjx0HuAkY5vgPvk8M,2259
|
|
161
27
|
unstructured_ingest/embed/interfaces.py,sha256=SdB3t8eMPB8CbXzOYBpgwjzTvyb4T19L61Sr6Jy3_rw,5099
|
|
162
|
-
unstructured_ingest/embed/mixedbreadai.py,sha256
|
|
28
|
+
unstructured_ingest/embed/mixedbreadai.py,sha256=z8RaG1hGBL840yElvI1Dbnf7llsOGEBbZ2X_QlFflZg,4498
|
|
163
29
|
unstructured_ingest/embed/octoai.py,sha256=136UzSuQgV8Nxel2pB8Iv-4AvlFU6RRCa7N64fWFl6o,3855
|
|
164
30
|
unstructured_ingest/embed/openai.py,sha256=hK98QXb_8oN1E-QwNT6JElzYOxG1mvZCFYQW57pjv0E,3372
|
|
165
31
|
unstructured_ingest/embed/togetherai.py,sha256=T0v0_yTovy3sSeLPvk3PJccqcnmqCc_vxYs6pumjK3I,2983
|
|
@@ -171,12 +37,12 @@ unstructured_ingest/interfaces/downloader.py,sha256=xX0ZzsFRSzZb7SAeoeQph8sIbVq1
|
|
|
171
37
|
unstructured_ingest/interfaces/indexer.py,sha256=c2FwWJEQHfFD6vO-tGfYLpLiIs-TYViLAt8YmHfDbaM,824
|
|
172
38
|
unstructured_ingest/interfaces/process.py,sha256=S3A_9gkwwGC-iQxvnpj3Er6IJAjAT5npzpSgxuFAzUM,449
|
|
173
39
|
unstructured_ingest/interfaces/processor.py,sha256=VX7JqXlbG1plxMK8THWhWINPbTICaaUEk4XUXhnOixY,3303
|
|
174
|
-
unstructured_ingest/interfaces/upload_stager.py,sha256=
|
|
175
|
-
unstructured_ingest/interfaces/uploader.py,sha256=
|
|
40
|
+
unstructured_ingest/interfaces/upload_stager.py,sha256=eYhbdM0Dt8FValZAe41dWnxehhvfMLDOSTp7UoR5HB0,3147
|
|
41
|
+
unstructured_ingest/interfaces/uploader.py,sha256=6HyWttmosKreuWJCFp3TxKCuzDCj_RJdGEPwxhwapQk,2053
|
|
176
42
|
unstructured_ingest/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
177
|
-
unstructured_ingest/pipeline/interfaces.py,sha256=
|
|
43
|
+
unstructured_ingest/pipeline/interfaces.py,sha256=Jb62t0P52hzzRWe-zHxcySgINMnPDOWc4dfJooYUEC8,8642
|
|
178
44
|
unstructured_ingest/pipeline/otel.py,sha256=wUVmUPWIk_X3yw0MuI-5QJ2wU2rQgaapinnS98iQBxI,1082
|
|
179
|
-
unstructured_ingest/pipeline/pipeline.py,sha256=
|
|
45
|
+
unstructured_ingest/pipeline/pipeline.py,sha256=LKCY7kcTfWOYF8k9k3Rw8sYZdNNAH8Qo_qZFHNIkyEU,16781
|
|
180
46
|
unstructured_ingest/pipeline/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
181
47
|
unstructured_ingest/pipeline/steps/chunk.py,sha256=LiJMzgB0ZEDnPuYz5IwuK2k2iJoBIcfftp9cVXMOlf0,3179
|
|
182
48
|
unstructured_ingest/pipeline/steps/download.py,sha256=cw8KbZ4CNZ_on4xam-VehNnLvKkUourazvcaUB-ihGY,8205
|
|
@@ -188,51 +54,53 @@ unstructured_ingest/pipeline/steps/stage.py,sha256=oobrvLtZOOPEnXQXMDUnhaaKhheuS
|
|
|
188
54
|
unstructured_ingest/pipeline/steps/uncompress.py,sha256=clyZKwKQLLKbkQDD2q98Aw1UAe3VqUY0n7_KWtGVMSw,1756
|
|
189
55
|
unstructured_ingest/pipeline/steps/upload.py,sha256=4hvh--03jzbGlxO0l1_2D5ec_EaGu04I5bFsxH0MnTg,1986
|
|
190
56
|
unstructured_ingest/processes/__init__.py,sha256=FaHWSCGyc7GWVnAsNEUUj7L8hT8gCVY3_hUE2VzWtUg,462
|
|
191
|
-
unstructured_ingest/processes/chunker.py,sha256=
|
|
57
|
+
unstructured_ingest/processes/chunker.py,sha256=v6ICNdBqjyAOk8f_cQajIJRdrzGUSm1UFGnQqzihpi0,5806
|
|
192
58
|
unstructured_ingest/processes/connector_registry.py,sha256=PoNhB-qOxvalaN-ssPWWhZSQ9QDdKMa6KzskCYYebfA,2195
|
|
193
|
-
unstructured_ingest/processes/embedder.py,sha256=
|
|
194
|
-
unstructured_ingest/processes/filter.py,sha256=
|
|
195
|
-
unstructured_ingest/processes/partitioner.py,sha256=
|
|
59
|
+
unstructured_ingest/processes/embedder.py,sha256=jJcnAyRWNX5XN2WpPcT8bunbFeVlUa1P3yk4G_JIcvo,7894
|
|
60
|
+
unstructured_ingest/processes/filter.py,sha256=oc3SYukRYfzx8sdJqF3KxdwZcrA-1U8PTAipMdZkW0c,2148
|
|
61
|
+
unstructured_ingest/processes/partitioner.py,sha256=Kn_BSFYvOkwo8fqThw_cOpgD0Um-AdoSqclZplcdNBA,10109
|
|
196
62
|
unstructured_ingest/processes/uncompress.py,sha256=o9JL3Bza4KPUTmrB39-v_5SuK_fYwhwFAhjQi2Pm8h8,2426
|
|
197
63
|
unstructured_ingest/processes/connectors/__init__.py,sha256=cR4ZH2dpPod7QR6OsgMx8X9kpFcEc1TVfQndUNoKGzI,6812
|
|
198
|
-
unstructured_ingest/processes/connectors/airtable.py,sha256=
|
|
199
|
-
unstructured_ingest/processes/connectors/astradb.py,sha256=
|
|
200
|
-
unstructured_ingest/processes/connectors/azure_ai_search.py,sha256=
|
|
64
|
+
unstructured_ingest/processes/connectors/airtable.py,sha256=smx5qBSUKwM8V6Xcc7ikrf8hYQUQ94YrB1L0WVeRDv0,9024
|
|
65
|
+
unstructured_ingest/processes/connectors/astradb.py,sha256=ONt8vHv5h8B6goGba9l0YPS0y5EnSAoowtfq92-E-RY,18307
|
|
66
|
+
unstructured_ingest/processes/connectors/azure_ai_search.py,sha256=szhSRXzUHk0DE2hGFfjGc_jNFzlUwiRlCtIkuu7tmnk,11524
|
|
201
67
|
unstructured_ingest/processes/connectors/chroma.py,sha256=q5_Fu4xb6_W_NyrPxVa3-jVwZLqVdlBNlR4dFvbd7l0,7235
|
|
202
|
-
unstructured_ingest/processes/connectors/confluence.py,sha256=
|
|
68
|
+
unstructured_ingest/processes/connectors/confluence.py,sha256=BbZ-Ecdcn92X8dHQ0egEJtBoX16gM0-zMcBLdn-wQsM,12090
|
|
203
69
|
unstructured_ingest/processes/connectors/couchbase.py,sha256=KCHoYDNya9B05NIB5D78zXoizFyfpJRepcYBe1nLSOs,12298
|
|
204
|
-
unstructured_ingest/processes/connectors/delta_table.py,sha256=
|
|
70
|
+
unstructured_ingest/processes/connectors/delta_table.py,sha256=2DFox_Vzoopt_D3Jy3rCjrrTGMutG2INIrwCeoIohRY,7340
|
|
205
71
|
unstructured_ingest/processes/connectors/discord.py,sha256=6yEJ_agfKUqsV43wFsbMkcd8lcLJC0uqbo4izjdZ3rU,5294
|
|
206
72
|
unstructured_ingest/processes/connectors/github.py,sha256=smHCz6jOH1p_hW2S25bYunBBj_pYjz8HTw6wkzaJz_A,7765
|
|
207
|
-
unstructured_ingest/processes/connectors/gitlab.py,sha256=
|
|
208
|
-
unstructured_ingest/processes/connectors/google_drive.py,sha256=
|
|
73
|
+
unstructured_ingest/processes/connectors/gitlab.py,sha256=6h1CdqznJmzeWxGfXrFLdNdT23PExGnUMMX7usK_4Kk,10013
|
|
74
|
+
unstructured_ingest/processes/connectors/google_drive.py,sha256=CqUwtK4NhKhNfozsunVzFUsKMYBEgRS1eci2pIZLnJE,20055
|
|
209
75
|
unstructured_ingest/processes/connectors/jira.py,sha256=eG8yTn8ZVEz7rBJ-ha8i_d9hEh6VALN6QJT_vbYvbL0,17142
|
|
210
|
-
unstructured_ingest/processes/connectors/kdbai.py,sha256=
|
|
76
|
+
unstructured_ingest/processes/connectors/kdbai.py,sha256=XhxYpKSAoFPBsDQWwNuLX03DCxOVr7yquj9VYM55Rtc,5174
|
|
211
77
|
unstructured_ingest/processes/connectors/local.py,sha256=LluTLKv4g7FbJb4A6vuSxI9VhzKZuuQUpDS-cVNAQ2g,7426
|
|
212
78
|
unstructured_ingest/processes/connectors/milvus.py,sha256=Jr9cul7By03tGAPFnFBoqncnNWwbhKd-qbmkuqnin8U,8908
|
|
213
79
|
unstructured_ingest/processes/connectors/mongodb.py,sha256=1g_5bfbS6lah3nsOXqLAanR3zNYJ47_Njw_uV-uj3_U,14324
|
|
214
80
|
unstructured_ingest/processes/connectors/neo4j.py,sha256=eAM2XWSLA5caKJmbcd7ctn2TapreIJEXRoHoxT1OZwA,18718
|
|
215
81
|
unstructured_ingest/processes/connectors/onedrive.py,sha256=VBkKlbJgR7uKlKTnjNybAw6ZawLKflDPpy2uVvgWYWw,19296
|
|
216
|
-
unstructured_ingest/processes/connectors/outlook.py,sha256=
|
|
217
|
-
unstructured_ingest/processes/connectors/pinecone.py,sha256=
|
|
218
|
-
unstructured_ingest/processes/connectors/redisdb.py,sha256=
|
|
219
|
-
unstructured_ingest/processes/connectors/salesforce.py,sha256=
|
|
82
|
+
unstructured_ingest/processes/connectors/outlook.py,sha256=zHM5frO7CqQG0-KcTyX49aZeSlsvVrl8kh_lR_ESgQw,9275
|
|
83
|
+
unstructured_ingest/processes/connectors/pinecone.py,sha256=BdO1PS_Y6FOeL-7uPl-Eh6ij1wHOwMkopOzKQGQ9Ac0,13979
|
|
84
|
+
unstructured_ingest/processes/connectors/redisdb.py,sha256=YzvSlfHs83XWsWMaIC3bV5enKfxejMQ9BQ8CtXfnJ5o,6923
|
|
85
|
+
unstructured_ingest/processes/connectors/salesforce.py,sha256=OaKEWCqZrirHqFJ650K5jSPwYlWefPOapas8Y-4D9oc,11661
|
|
220
86
|
unstructured_ingest/processes/connectors/sharepoint.py,sha256=PowaqMzWr-VCW1rnwcAeRhHyE55kJ9J9FCVlrmtzN0E,4827
|
|
221
|
-
unstructured_ingest/processes/connectors/slack.py,sha256=
|
|
87
|
+
unstructured_ingest/processes/connectors/slack.py,sha256=EkFj9PcAu5_gF2xLogikKDADLbJYq-_jvchzYrTdLO4,9224
|
|
222
88
|
unstructured_ingest/processes/connectors/utils.py,sha256=TAd0hb1f291N-q7-TUe6JKSCGkhqDyo7Ij8zmliBZUc,2071
|
|
223
|
-
unstructured_ingest/processes/connectors/vectara.py,sha256=
|
|
89
|
+
unstructured_ingest/processes/connectors/vectara.py,sha256=xrC6jkgW8BII4UjdzUelDu122xT484cpfMTK2wl-sko,12292
|
|
224
90
|
unstructured_ingest/processes/connectors/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
91
|
+
unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql,sha256=dUZZDNkyvQXKqoAThRz3ek7zaUE2l_LAQimlG5WZhH4,211
|
|
92
|
+
unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json,sha256=SJlIO0kXxy866tWQ8bEzvwLwflsoUMIS-OKlxMvHIuE,504
|
|
225
93
|
unstructured_ingest/processes/connectors/databricks/__init__.py,sha256=RtKAPyNtXh6fzEsOQ08pA0-vC1uMr3KqYG6cqiBoo70,2133
|
|
226
94
|
unstructured_ingest/processes/connectors/databricks/volumes.py,sha256=OWQrne9-5hPzc-kxGa2P53M3DoksDzMDyjLhQyihdCo,8020
|
|
227
|
-
unstructured_ingest/processes/connectors/databricks/volumes_aws.py,sha256=
|
|
228
|
-
unstructured_ingest/processes/connectors/databricks/volumes_azure.py,sha256=
|
|
229
|
-
unstructured_ingest/processes/connectors/databricks/volumes_gcp.py,sha256=
|
|
95
|
+
unstructured_ingest/processes/connectors/databricks/volumes_aws.py,sha256=WhGTp6aRTLSdc4GChCL4mz2b-IanderW8j1IqezX6YA,2958
|
|
96
|
+
unstructured_ingest/processes/connectors/databricks/volumes_azure.py,sha256=pF2d6uAIbwJJUeOIG5xknUMCGc5d9Aztmc2776wp-a0,3740
|
|
97
|
+
unstructured_ingest/processes/connectors/databricks/volumes_gcp.py,sha256=y9AvVl6PtnIxlTlrPj_wyHBDBRJNq3uoTOuZwTryNg8,2994
|
|
230
98
|
unstructured_ingest/processes/connectors/databricks/volumes_native.py,sha256=pivySGMmFSsyuB42ARAWAPXFQ7qTQxO3dfEoE23pBNM,3104
|
|
231
|
-
unstructured_ingest/processes/connectors/databricks/volumes_table.py,sha256=
|
|
99
|
+
unstructured_ingest/processes/connectors/databricks/volumes_table.py,sha256=tqi6PpYpIBMTZcYZXl5Lw0YuawyDvjHI08TKPFFTTr0,8194
|
|
232
100
|
unstructured_ingest/processes/connectors/duckdb/__init__.py,sha256=Dr6BRJJGefJnnp_vn5W5gBd7vrCCXTMLweuDIqTP-fM,558
|
|
233
|
-
unstructured_ingest/processes/connectors/duckdb/base.py,sha256
|
|
234
|
-
unstructured_ingest/processes/connectors/duckdb/duckdb.py,sha256
|
|
235
|
-
unstructured_ingest/processes/connectors/duckdb/motherduck.py,sha256=
|
|
101
|
+
unstructured_ingest/processes/connectors/duckdb/base.py,sha256=bTLhilg6mgERNCpeeNNl7wxy3xkOt23O9XpCyD0WVY4,2945
|
|
102
|
+
unstructured_ingest/processes/connectors/duckdb/duckdb.py,sha256=jsmibTd_yvYzkCT05HhCJvplyobtjfNILC3zyTuCcVY,4464
|
|
103
|
+
unstructured_ingest/processes/connectors/duckdb/motherduck.py,sha256=Atr2MjJQGFGWh5aeiQsLpUbFw-aCZH-ABI1LprDh5VI,4727
|
|
236
104
|
unstructured_ingest/processes/connectors/elasticsearch/__init__.py,sha256=M8mmBWoP6J5R3hxg6BQUMexYlTUxUxdBoIcjUop8yt8,826
|
|
237
105
|
unstructured_ingest/processes/connectors/elasticsearch/elasticsearch.py,sha256=iNedi-JVkAvdF15CbKwVRwXJazyST6ha3zcNyyGwVmQ,19003
|
|
238
106
|
unstructured_ingest/processes/connectors/elasticsearch/opensearch.py,sha256=wggHvw8h-X0-3WPNxj9rt2xkrE7Pv7CV0B0KzTMzBB4,6944
|
|
@@ -242,11 +110,11 @@ unstructured_ingest/processes/connectors/fsspec/box.py,sha256=1gLS7xR2vbjgKBrQ4Z
|
|
|
242
110
|
unstructured_ingest/processes/connectors/fsspec/dropbox.py,sha256=HwwKjQmjM7yFk9Esh_F20xDisRPXGUkFduzaasByRDE,8355
|
|
243
111
|
unstructured_ingest/processes/connectors/fsspec/fsspec.py,sha256=13TJmipcz9mYQT6Qi4WmqUV3veHIhbLZIW_70qY-5tI,14469
|
|
244
112
|
unstructured_ingest/processes/connectors/fsspec/gcs.py,sha256=ouxISCKpZTAj3T6pWGYbASu93wytJjl5WSICvQcrgfE,7172
|
|
245
|
-
unstructured_ingest/processes/connectors/fsspec/s3.py,sha256=
|
|
113
|
+
unstructured_ingest/processes/connectors/fsspec/s3.py,sha256=2ZV6b2E2pIsf_ab1Lty74FwpMnJZhpQUdamPgpwcKsQ,7141
|
|
246
114
|
unstructured_ingest/processes/connectors/fsspec/sftp.py,sha256=pR_a2SgLjt8ffNkariHrPB1E0HVSTj5h3pt7KxTU3TI,6371
|
|
247
115
|
unstructured_ingest/processes/connectors/fsspec/utils.py,sha256=jec_Qfe2hbfahBuY-u8FnvHuv933AI5HwPFjOL3kEEY,456
|
|
248
116
|
unstructured_ingest/processes/connectors/ibm_watsonx/__init__.py,sha256=kf0UpgdAY2KK1R1FbAB6GEBBAIOeYQ8cZIr3bp660qM,374
|
|
249
|
-
unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py,sha256=
|
|
117
|
+
unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py,sha256=SpZIqjreXpLTpZfezhG6xkZ_h7w-QWmdjXDBG6mlddQ,11729
|
|
250
118
|
unstructured_ingest/processes/connectors/kafka/__init__.py,sha256=pFN2cWwAStiGTAsQ616GIWKi_hDv0s74ZvNqhJEp1Pc,751
|
|
251
119
|
unstructured_ingest/processes/connectors/kafka/cloud.py,sha256=Ki6iOLoZ86tYWdnLnMWYvb2hUCneKqo4mTJcfXh7YoQ,3432
|
|
252
120
|
unstructured_ingest/processes/connectors/kafka/kafka.py,sha256=7NMvWijfoliyAgnmz8TM8oJt5x7RDzC-ABPdYAm7J3w,10306
|
|
@@ -334,13 +202,13 @@ unstructured_ingest/processes/connectors/qdrant/local.py,sha256=3b43kSVoGMcFWTRi
|
|
|
334
202
|
unstructured_ingest/processes/connectors/qdrant/qdrant.py,sha256=1Y1-nfKqt4YooqKMqRKVE_ItV0S1v__PTaEhI3vvtOE,5456
|
|
335
203
|
unstructured_ingest/processes/connectors/qdrant/server.py,sha256=biyF4xr6e7CH0loj_OPt02Xrx4DMkkxqYMAsVXuJ5-Q,1607
|
|
336
204
|
unstructured_ingest/processes/connectors/sql/__init__.py,sha256=WNO7jSL1ABw7K5IxLc-eeKWGGJDk7jCp_OTLdTTkZug,2056
|
|
337
|
-
unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py,sha256=
|
|
338
|
-
unstructured_ingest/processes/connectors/sql/postgres.py,sha256=
|
|
339
|
-
unstructured_ingest/processes/connectors/sql/singlestore.py,sha256=
|
|
340
|
-
unstructured_ingest/processes/connectors/sql/snowflake.py,sha256=
|
|
341
|
-
unstructured_ingest/processes/connectors/sql/sql.py,sha256=
|
|
342
|
-
unstructured_ingest/processes/connectors/sql/sqlite.py,sha256=
|
|
343
|
-
unstructured_ingest/processes/connectors/sql/vastdb.py,sha256=
|
|
205
|
+
unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py,sha256=_IZFFGQUsHV9ScOOISrm6c5FSd4PnX91ePj_COat-gk,9320
|
|
206
|
+
unstructured_ingest/processes/connectors/sql/postgres.py,sha256=kDIL8Cj45EDpKqit1_araRpP4v3cb__QbYqoINg9f2k,5403
|
|
207
|
+
unstructured_ingest/processes/connectors/sql/singlestore.py,sha256=B46lpvyAj1AArpACi9MXbXD1-52zF6Dsj3RJtD1g4r0,5955
|
|
208
|
+
unstructured_ingest/processes/connectors/sql/snowflake.py,sha256=GSEoNrIoJM7p-Q-PrFiONamoxWzjQG8wZJG3mw5Uwdk,9589
|
|
209
|
+
unstructured_ingest/processes/connectors/sql/sql.py,sha256=yUGnv4MF_vT3VHdg7hhGiTD0be94ll-HyhHmRKQp_vQ,15712
|
|
210
|
+
unstructured_ingest/processes/connectors/sql/sqlite.py,sha256=V3OfRrXGGhTa_R2FPA-ysn95HHCv9x_VEBKVDsSGsbs,5549
|
|
211
|
+
unstructured_ingest/processes/connectors/sql/vastdb.py,sha256=trhvUBumDmj2rLjmxFBKw9L9wF6ZpssF0wfmRaG97H0,9803
|
|
344
212
|
unstructured_ingest/processes/connectors/weaviate/__init__.py,sha256=1Vnz8hm_Cf3NkQUTz5ZD4QkbLSVql4UvRoY2j2FnC9k,853
|
|
345
213
|
unstructured_ingest/processes/connectors/weaviate/cloud.py,sha256=tDQ4Vfph1RwADzS0Lk4TSoeT6TZ2gX9DNi78yXkgDw0,6245
|
|
346
214
|
unstructured_ingest/processes/connectors/weaviate/embedded.py,sha256=buizqBd6PSbd9VgRrOj43GZEorBpDFkUIkE6sN9emhw,3008
|
|
@@ -348,23 +216,22 @@ unstructured_ingest/processes/connectors/weaviate/local.py,sha256=4fgZsL9dgnWuaS
|
|
|
348
216
|
unstructured_ingest/processes/connectors/weaviate/weaviate.py,sha256=SqtGcQgejGH0N1R49tGrUtGcTB8mt7sywXmWFTIcpB8,12866
|
|
349
217
|
unstructured_ingest/processes/connectors/zendesk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
350
218
|
unstructured_ingest/processes/connectors/zendesk/client.py,sha256=GvPIpx4aYdD58-edHgvCFjFao94uR0O5Yf4dT9NCmSk,11952
|
|
351
|
-
unstructured_ingest/processes/connectors/zendesk/zendesk.py,sha256=
|
|
219
|
+
unstructured_ingest/processes/connectors/zendesk/zendesk.py,sha256=j5zS_7vJmYDEQtysz_UfwIUH65gc4r-Zjc1LocJr9FM,9033
|
|
352
220
|
unstructured_ingest/processes/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
353
|
-
unstructured_ingest/processes/utils/blob_storage.py,sha256=
|
|
221
|
+
unstructured_ingest/processes/utils/blob_storage.py,sha256=apMUmm9loxdbTRkkLH4VhG9kUVyiw9PFUJheSDxSxPk,1023
|
|
354
222
|
unstructured_ingest/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
355
223
|
unstructured_ingest/utils/chunking.py,sha256=9b3sXMA6L8RW5xAkKQbwdtVudGLAcj_sgT6Grh5tyYM,1870
|
|
356
224
|
unstructured_ingest/utils/compression.py,sha256=_BkFREoa0fkJ6z-1lY76HCmy8mLymbPCg55iMUQTd5c,2653
|
|
357
225
|
unstructured_ingest/utils/constants.py,sha256=pDspTYz-nEojHBqrZNfssGEiujmVa02pIWL63PQP9sU,103
|
|
358
|
-
unstructured_ingest/utils/data_prep.py,sha256=
|
|
226
|
+
unstructured_ingest/utils/data_prep.py,sha256=yqrv7x_nlj0y3uaN0m0Bnsekb7VIQnwABWPa24KU5QI,7426
|
|
359
227
|
unstructured_ingest/utils/dep_check.py,sha256=SXXcUna2H0RtxA6j1S2NGkvQa9JP2DujWhmyBa7776Y,2400
|
|
360
228
|
unstructured_ingest/utils/html.py,sha256=0WduP8tI5S3nHFQi6XHNPHgsIC9j3iWwyIayX9gDLiE,6386
|
|
361
229
|
unstructured_ingest/utils/ndjson.py,sha256=nz8VUOPEgAFdhaDOpuveknvCU4x82fVwqE01qAbElH0,1201
|
|
362
230
|
unstructured_ingest/utils/pydantic_models.py,sha256=BT_j15e4rX40wQbt8LUXbqfPhA3rJn1PHTI_G_A_EHY,1720
|
|
363
|
-
unstructured_ingest/utils/string_and_date_utils.py,sha256=
|
|
231
|
+
unstructured_ingest/utils/string_and_date_utils.py,sha256=oXOI6rxXq-8ncbk7EoJK0WCcTXWj75EzKl8pfQMID3U,2522
|
|
364
232
|
unstructured_ingest/utils/table.py,sha256=WZechczgVFvlodUWFcsnCGvBNh1xRm6hr0VbJTPxKAc,3669
|
|
365
|
-
unstructured_ingest-0.
|
|
366
|
-
unstructured_ingest-0.
|
|
367
|
-
unstructured_ingest-0.
|
|
368
|
-
unstructured_ingest-0.
|
|
369
|
-
unstructured_ingest-0.
|
|
370
|
-
unstructured_ingest-0.7.1.dist-info/RECORD,,
|
|
233
|
+
unstructured_ingest-1.0.1.dist-info/METADATA,sha256=k_kEG2BSsnNaIyDSJWiciUW0Z-HDiPF_flO6kLjn8QI,8713
|
|
234
|
+
unstructured_ingest-1.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
235
|
+
unstructured_ingest-1.0.1.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
236
|
+
unstructured_ingest-1.0.1.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
237
|
+
unstructured_ingest-1.0.1.dist-info/RECORD,,
|
examples/__init__.py
DELETED
|
File without changes
|
examples/airtable.py
DELETED
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
|
|
4
|
-
from unstructured_ingest.interfaces import ProcessorConfig
|
|
5
|
-
from unstructured_ingest.logger import logger
|
|
6
|
-
from unstructured_ingest.pipeline.pipeline import Pipeline
|
|
7
|
-
from unstructured_ingest.processes.chunker import ChunkerConfig
|
|
8
|
-
from unstructured_ingest.processes.connectors.airtable import (
|
|
9
|
-
CONNECTOR_TYPE,
|
|
10
|
-
AirtableAccessConfig,
|
|
11
|
-
AirtableConnectionConfig,
|
|
12
|
-
AirtableDownloaderConfig,
|
|
13
|
-
AirtableIndexerConfig,
|
|
14
|
-
)
|
|
15
|
-
from unstructured_ingest.processes.connectors.local import (
|
|
16
|
-
LocalUploaderConfig,
|
|
17
|
-
)
|
|
18
|
-
from unstructured_ingest.processes.embedder import EmbedderConfig
|
|
19
|
-
from unstructured_ingest.processes.partitioner import PartitionerConfig
|
|
20
|
-
|
|
21
|
-
base_path = Path(__file__).parent.parent.parent.parent
|
|
22
|
-
docs_path = base_path / "example-docs"
|
|
23
|
-
work_dir = base_path / "tmp_ingest" / CONNECTOR_TYPE
|
|
24
|
-
output_path = work_dir / "output"
|
|
25
|
-
download_path = work_dir / "download"
|
|
26
|
-
|
|
27
|
-
if __name__ == "__main__":
|
|
28
|
-
logger.info(f"writing all content in: {work_dir.resolve()}")
|
|
29
|
-
Pipeline.from_configs(
|
|
30
|
-
context=ProcessorConfig(work_dir=str(work_dir.resolve()), verbose=True),
|
|
31
|
-
indexer_config=AirtableIndexerConfig(
|
|
32
|
-
list_of_paths=["app5YQxSfp220fWtm", "appJ43QmP8I17zu88"]
|
|
33
|
-
),
|
|
34
|
-
downloader_config=AirtableDownloaderConfig(download_dir=download_path),
|
|
35
|
-
source_connection_config=AirtableConnectionConfig(
|
|
36
|
-
access_config=AirtableAccessConfig(
|
|
37
|
-
personal_access_token=os.getenv("AIRTABLE_PERSONAL_ACCESS_TOKEN")
|
|
38
|
-
)
|
|
39
|
-
),
|
|
40
|
-
partitioner_config=PartitionerConfig(strategy="fast"),
|
|
41
|
-
chunker_config=ChunkerConfig(chunking_strategy="by_title"),
|
|
42
|
-
embedder_config=EmbedderConfig(embedding_provider="huggingface"),
|
|
43
|
-
uploader_config=LocalUploaderConfig(output_dir=str(output_path.resolve())),
|
|
44
|
-
).run()
|