unstructured-ingest 0.7.2__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/cli/README.md +28 -0
- unstructured_ingest/embed/mixedbreadai.py +0 -1
- unstructured_ingest/interfaces/upload_stager.py +2 -2
- unstructured_ingest/interfaces/uploader.py +3 -3
- unstructured_ingest/main.py +0 -0
- unstructured_ingest/pipeline/interfaces.py +1 -1
- unstructured_ingest/pipeline/pipeline.py +1 -1
- unstructured_ingest/processes/chunker.py +4 -0
- unstructured_ingest/processes/connectors/airtable.py +4 -2
- unstructured_ingest/processes/connectors/astradb.py +2 -2
- unstructured_ingest/processes/connectors/azure_ai_search.py +1 -1
- unstructured_ingest/processes/connectors/confluence.py +0 -1
- unstructured_ingest/processes/connectors/databricks/volumes_aws.py +1 -1
- unstructured_ingest/processes/connectors/databricks/volumes_azure.py +2 -2
- unstructured_ingest/processes/connectors/databricks/volumes_gcp.py +1 -1
- unstructured_ingest/processes/connectors/databricks/volumes_table.py +1 -2
- unstructured_ingest/processes/connectors/delta_table.py +1 -0
- unstructured_ingest/processes/connectors/duckdb/base.py +2 -2
- unstructured_ingest/processes/connectors/duckdb/duckdb.py +3 -3
- unstructured_ingest/processes/connectors/duckdb/motherduck.py +3 -3
- unstructured_ingest/processes/connectors/fsspec/s3.py +5 -3
- unstructured_ingest/processes/connectors/gitlab.py +1 -2
- unstructured_ingest/processes/connectors/google_drive.py +0 -2
- unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +9 -7
- unstructured_ingest/processes/connectors/kdbai.py +1 -0
- unstructured_ingest/processes/connectors/outlook.py +1 -2
- unstructured_ingest/processes/connectors/pinecone.py +0 -1
- unstructured_ingest/processes/connectors/redisdb.py +28 -24
- unstructured_ingest/processes/connectors/salesforce.py +1 -1
- unstructured_ingest/processes/connectors/slack.py +1 -2
- unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py +5 -0
- unstructured_ingest/processes/connectors/sql/postgres.py +7 -1
- unstructured_ingest/processes/connectors/sql/singlestore.py +11 -6
- unstructured_ingest/processes/connectors/sql/snowflake.py +5 -0
- unstructured_ingest/processes/connectors/sql/sql.py +3 -4
- unstructured_ingest/processes/connectors/sql/sqlite.py +5 -0
- unstructured_ingest/processes/connectors/sql/vastdb.py +7 -3
- unstructured_ingest/processes/connectors/vectara.py +0 -2
- unstructured_ingest/processes/connectors/zendesk/zendesk.py +0 -2
- unstructured_ingest/processes/embedder.py +2 -2
- unstructured_ingest/processes/filter.py +1 -1
- unstructured_ingest/processes/partitioner.py +4 -0
- unstructured_ingest/processes/utils/blob_storage.py +2 -2
- unstructured_ingest/unstructured_api.py +13 -8
- unstructured_ingest/utils/data_prep.py +8 -32
- unstructured_ingest-1.0.1.dist-info/METADATA +226 -0
- {unstructured_ingest-0.7.2.dist-info → unstructured_ingest-1.0.1.dist-info}/RECORD +50 -184
- {unstructured_ingest-0.7.2.dist-info → unstructured_ingest-1.0.1.dist-info}/WHEEL +1 -2
- examples/__init__.py +0 -0
- examples/airtable.py +0 -44
- examples/azure_cognitive_search.py +0 -55
- examples/chroma.py +0 -54
- examples/couchbase.py +0 -55
- examples/databricks_volumes_dest.py +0 -55
- examples/databricks_volumes_source.py +0 -53
- examples/delta_table.py +0 -45
- examples/discord_example.py +0 -36
- examples/elasticsearch.py +0 -49
- examples/google_drive.py +0 -45
- examples/kdbai.py +0 -54
- examples/local.py +0 -36
- examples/milvus.py +0 -44
- examples/mongodb.py +0 -53
- examples/opensearch.py +0 -50
- examples/pinecone.py +0 -57
- examples/s3.py +0 -38
- examples/salesforce.py +0 -44
- examples/sharepoint.py +0 -47
- examples/singlestore.py +0 -49
- examples/sql.py +0 -90
- examples/vectara.py +0 -54
- examples/weaviate.py +0 -44
- test/__init__.py +0 -0
- test/integration/__init__.py +0 -0
- test/integration/chunkers/__init__.py +0 -0
- test/integration/chunkers/test_chunkers.py +0 -31
- test/integration/connectors/__init__.py +0 -0
- test/integration/connectors/conftest.py +0 -38
- test/integration/connectors/databricks/__init__.py +0 -0
- test/integration/connectors/databricks/test_volumes_native.py +0 -273
- test/integration/connectors/discord/__init__.py +0 -0
- test/integration/connectors/discord/test_discord.py +0 -90
- test/integration/connectors/duckdb/__init__.py +0 -0
- test/integration/connectors/duckdb/conftest.py +0 -14
- test/integration/connectors/duckdb/test_duckdb.py +0 -90
- test/integration/connectors/duckdb/test_motherduck.py +0 -95
- test/integration/connectors/elasticsearch/__init__.py +0 -0
- test/integration/connectors/elasticsearch/conftest.py +0 -34
- test/integration/connectors/elasticsearch/test_elasticsearch.py +0 -331
- test/integration/connectors/elasticsearch/test_opensearch.py +0 -326
- test/integration/connectors/sql/__init__.py +0 -0
- test/integration/connectors/sql/test_databricks_delta_tables.py +0 -170
- test/integration/connectors/sql/test_postgres.py +0 -201
- test/integration/connectors/sql/test_singlestore.py +0 -182
- test/integration/connectors/sql/test_snowflake.py +0 -244
- test/integration/connectors/sql/test_sqlite.py +0 -168
- test/integration/connectors/sql/test_vastdb.py +0 -34
- test/integration/connectors/test_astradb.py +0 -287
- test/integration/connectors/test_azure_ai_search.py +0 -254
- test/integration/connectors/test_chroma.py +0 -136
- test/integration/connectors/test_confluence.py +0 -111
- test/integration/connectors/test_delta_table.py +0 -183
- test/integration/connectors/test_dropbox.py +0 -151
- test/integration/connectors/test_github.py +0 -49
- test/integration/connectors/test_google_drive.py +0 -257
- test/integration/connectors/test_jira.py +0 -67
- test/integration/connectors/test_lancedb.py +0 -247
- test/integration/connectors/test_milvus.py +0 -208
- test/integration/connectors/test_mongodb.py +0 -335
- test/integration/connectors/test_neo4j.py +0 -244
- test/integration/connectors/test_notion.py +0 -152
- test/integration/connectors/test_onedrive.py +0 -163
- test/integration/connectors/test_pinecone.py +0 -387
- test/integration/connectors/test_qdrant.py +0 -216
- test/integration/connectors/test_redis.py +0 -143
- test/integration/connectors/test_s3.py +0 -184
- test/integration/connectors/test_sharepoint.py +0 -222
- test/integration/connectors/test_vectara.py +0 -282
- test/integration/connectors/test_zendesk.py +0 -120
- test/integration/connectors/utils/__init__.py +0 -0
- test/integration/connectors/utils/constants.py +0 -13
- test/integration/connectors/utils/docker.py +0 -151
- test/integration/connectors/utils/docker_compose.py +0 -59
- test/integration/connectors/utils/validation/__init__.py +0 -0
- test/integration/connectors/utils/validation/destination.py +0 -77
- test/integration/connectors/utils/validation/equality.py +0 -76
- test/integration/connectors/utils/validation/source.py +0 -331
- test/integration/connectors/utils/validation/utils.py +0 -36
- test/integration/connectors/weaviate/__init__.py +0 -0
- test/integration/connectors/weaviate/conftest.py +0 -15
- test/integration/connectors/weaviate/test_cloud.py +0 -39
- test/integration/connectors/weaviate/test_local.py +0 -152
- test/integration/embedders/__init__.py +0 -0
- test/integration/embedders/conftest.py +0 -13
- test/integration/embedders/test_azure_openai.py +0 -57
- test/integration/embedders/test_bedrock.py +0 -103
- test/integration/embedders/test_huggingface.py +0 -24
- test/integration/embedders/test_mixedbread.py +0 -71
- test/integration/embedders/test_octoai.py +0 -75
- test/integration/embedders/test_openai.py +0 -74
- test/integration/embedders/test_togetherai.py +0 -71
- test/integration/embedders/test_vertexai.py +0 -63
- test/integration/embedders/test_voyageai.py +0 -79
- test/integration/embedders/utils.py +0 -66
- test/integration/partitioners/__init__.py +0 -0
- test/integration/partitioners/test_partitioner.py +0 -76
- test/integration/utils.py +0 -15
- test/unit/__init__.py +0 -0
- test/unit/chunkers/__init__.py +0 -0
- test/unit/chunkers/test_chunkers.py +0 -49
- test/unit/connectors/__init__.py +0 -0
- test/unit/connectors/ibm_watsonx/__init__.py +0 -0
- test/unit/connectors/ibm_watsonx/test_ibm_watsonx_s3.py +0 -459
- test/unit/connectors/motherduck/__init__.py +0 -0
- test/unit/connectors/motherduck/test_base.py +0 -73
- test/unit/connectors/sql/__init__.py +0 -0
- test/unit/connectors/sql/test_sql.py +0 -152
- test/unit/connectors/test_confluence.py +0 -71
- test/unit/connectors/test_jira.py +0 -401
- test/unit/embed/__init__.py +0 -0
- test/unit/embed/test_mixedbreadai.py +0 -42
- test/unit/embed/test_octoai.py +0 -27
- test/unit/embed/test_openai.py +0 -28
- test/unit/embed/test_vertexai.py +0 -25
- test/unit/embed/test_voyageai.py +0 -24
- test/unit/embedders/__init__.py +0 -0
- test/unit/embedders/test_bedrock.py +0 -36
- test/unit/embedders/test_huggingface.py +0 -48
- test/unit/embedders/test_mixedbread.py +0 -37
- test/unit/embedders/test_octoai.py +0 -35
- test/unit/embedders/test_openai.py +0 -35
- test/unit/embedders/test_togetherai.py +0 -37
- test/unit/embedders/test_vertexai.py +0 -37
- test/unit/embedders/test_voyageai.py +0 -38
- test/unit/partitioners/__init__.py +0 -0
- test/unit/partitioners/test_partitioner.py +0 -63
- test/unit/test_error.py +0 -27
- test/unit/test_html.py +0 -112
- test/unit/test_interfaces.py +0 -26
- test/unit/test_utils.py +0 -220
- test/unit/utils/__init__.py +0 -0
- test/unit/utils/data_generator.py +0 -32
- unstructured_ingest-0.7.2.dist-info/METADATA +0 -383
- unstructured_ingest-0.7.2.dist-info/top_level.txt +0 -3
- {unstructured_ingest-0.7.2.dist-info → unstructured_ingest-1.0.1.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.7.2.dist-info → unstructured_ingest-1.0.1.dist-info/licenses}/LICENSE.md +0 -0
|
@@ -1,145 +1,12 @@
|
|
|
1
|
-
examples/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
examples/airtable.py,sha256=4n6B8l_fmnlZLdk9SIfYticOTgZGQBjBfktNwFQf9Zc,1831
|
|
3
|
-
examples/azure_cognitive_search.py,sha256=KThZyRQ2HhTndBcGBn1dlr0FARB2PmBBRRnKJU5AuQU,2279
|
|
4
|
-
examples/chroma.py,sha256=fLqcpnbMAlJEe96SCMm17ZGOtcnaDQBGusURhLDwth8,2208
|
|
5
|
-
examples/couchbase.py,sha256=u4fmZb4eBYEcBgWCaWOKSxxxV1v-YpyaGG7J4ept1w0,2261
|
|
6
|
-
examples/databricks_volumes_dest.py,sha256=xoSVgmS4dNKOKGKtin0ojB20mO7vfCJ8FQ_DFIgMW-U,2329
|
|
7
|
-
examples/databricks_volumes_source.py,sha256=DT7PXW6CTOGSH5lzuZMfasjV6vgoG28R68I9Cb98JvQ,2278
|
|
8
|
-
examples/delta_table.py,sha256=0InvWFYZZt6TolYtWlKp9KliOnRxFNubInbwqlBWMIA,1898
|
|
9
|
-
examples/discord_example.py,sha256=ZNzKVxk7hfUF0qZdOLkh7fGItaiIdo6gh7JYhwr2vyQ,1624
|
|
10
|
-
examples/elasticsearch.py,sha256=KQXIYCE44w_CRZQGanlfP9ZY9NZ5gclxKKvtZnd-IRA,2129
|
|
11
|
-
examples/google_drive.py,sha256=iOjkC8iWQ3sqM3TTsL8Ng_yZbm2C5xfws1a9MTZuV7M,1677
|
|
12
|
-
examples/kdbai.py,sha256=W7yT2lnYOPbftqTiSHQNgWVwQC4UXJ8QhkPqSdiG_Co,2225
|
|
13
|
-
examples/local.py,sha256=WuN3SqxoXRlh2xhMZPh4R9EdCNfjQUhurAlGVqGuUF8,1569
|
|
14
|
-
examples/milvus.py,sha256=TKtH1Rxrj3Hr9d1BUx7qEK468Xb0ux7Ak1RukPXSOOo,1877
|
|
15
|
-
examples/mongodb.py,sha256=eP43TY-rjOeWnVk4m5jSSWJSWXcy6xRYFuLWusBZXws,2160
|
|
16
|
-
examples/opensearch.py,sha256=8YmQpvOB9HBQqoC47ht-lX34SpkoaDlezbQOHRG82cw,2103
|
|
17
|
-
examples/pinecone.py,sha256=URXalj5-0eTVnmfzD0icCB1brGklplU4P8l0jrawCjI,2479
|
|
18
|
-
examples/s3.py,sha256=23y_lPUkPo50rDMZC7cc3kBaSOf5pP_xl_7HO0Mb3c8,1742
|
|
19
|
-
examples/salesforce.py,sha256=tiO6hdRI79H_oORPnIf1FvB0IuGTYG2KzZlnqC_J9Cw,1888
|
|
20
|
-
examples/sharepoint.py,sha256=a0h2zU28m6bW5g17b8BDrcsHzdzjSgb--gYV80bRqs0,2067
|
|
21
|
-
examples/singlestore.py,sha256=UAdBOtIcmhyRkZ-pIh7rrY7Yt_Ed8t3puulZ-MhaSfU,2060
|
|
22
|
-
examples/sql.py,sha256=YSmLD7Ri2a8CvBxRJWxPQefqV4kV8kF3W0l3TXu_iyY,2997
|
|
23
|
-
examples/vectara.py,sha256=bWSsMQL3hEEt5CLR9CZFnuplrSAeLj_EiADipRU_Gkw,2247
|
|
24
|
-
examples/weaviate.py,sha256=QUtYJ-y7eYfm69T316-aUm1imQZnoSJ09RGSVQAoTck,1906
|
|
25
|
-
test/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
26
|
-
test/integration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
|
-
test/integration/utils.py,sha256=CWqzEGw6TA_ZoP9hRUkW64TWYssooBbufcTRmbJvod8,401
|
|
28
|
-
test/integration/chunkers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
|
-
test/integration/chunkers/test_chunkers.py,sha256=MTPVBCBvh54fBqi_53oPkrH9QJtvJeE9YEXDOZ8G0so,1059
|
|
30
|
-
test/integration/connectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
31
|
-
test/integration/connectors/conftest.py,sha256=3weYhwyXyAVVk8DsLqScDOk-PZwsrHQLs1RdYa1EYqQ,1015
|
|
32
|
-
test/integration/connectors/test_astradb.py,sha256=1hFqd9NI283t6lQZryBJMfJoRfP6PKVHTl_6X3Nk0bM,9925
|
|
33
|
-
test/integration/connectors/test_azure_ai_search.py,sha256=NMSjIbFO1EWqr8U-Owd4qdC3pDROjg7zNAXr5yGSsH4,9699
|
|
34
|
-
test/integration/connectors/test_chroma.py,sha256=_K4CUp9x5O_Uuw1uwDi1R0jIvzcaW0wI8tXfAV1hh7c,4536
|
|
35
|
-
test/integration/connectors/test_confluence.py,sha256=3sAFQxRUrS7xXIQXQBccY0r5kihnP8rc_sAs-44g4Ys,3587
|
|
36
|
-
test/integration/connectors/test_delta_table.py,sha256=-ivYySHKgFRCd6PUIK-fy6yOkku3uj4YhDDEUlqi-tM,6915
|
|
37
|
-
test/integration/connectors/test_dropbox.py,sha256=sW-NOXA0-4xtAUgnSnPGg-OhiIhKviROJPpxUt8y8s8,4939
|
|
38
|
-
test/integration/connectors/test_github.py,sha256=wFWRFhfhTUz3byk2FSe9qVv7xMxcBjHGs1FlhjtnTAQ,1500
|
|
39
|
-
test/integration/connectors/test_google_drive.py,sha256=ceiFoVnaguTsQrLHZk8jv-IZ-i_EP4wenHSX-QKHvTM,10300
|
|
40
|
-
test/integration/connectors/test_jira.py,sha256=0FnxFe42d32EGuArfxxnfINkoYNoCsgJjP7ZU6fePu8,2073
|
|
41
|
-
test/integration/connectors/test_lancedb.py,sha256=E8yFuvQMx68w1s1PXIBP8gUlNuUpCtiGNYd7YnDA6Aw,9213
|
|
42
|
-
test/integration/connectors/test_milvus.py,sha256=_cYmJMocsZuUroalT5uc9rcsHFnTIpJyJjIwK8oDDYc,7177
|
|
43
|
-
test/integration/connectors/test_mongodb.py,sha256=wt5o-7qtMtjGv0IPKlhEnD3-sJjBX8cv1acn1Mcq-TY,12450
|
|
44
|
-
test/integration/connectors/test_neo4j.py,sha256=BSOqRTY4ZV8o6TV1MOmUKQq7DzRFU_z9umjDk-yw-Jg,8450
|
|
45
|
-
test/integration/connectors/test_notion.py,sha256=3OXFcSM-jE1_E_JoGw--pz-cv3dPZvt18scACJiHjwo,5397
|
|
46
|
-
test/integration/connectors/test_onedrive.py,sha256=0SZB818cNsxYZlBJJpuvU1PqsFDxRaOiLfJTRcc9Bv0,5233
|
|
47
|
-
test/integration/connectors/test_pinecone.py,sha256=0XBK9xxZhry6Rnv0s-chIWgtN5d1p8Lx-kEEQMNuAnA,13650
|
|
48
|
-
test/integration/connectors/test_qdrant.py,sha256=z3RThQJKzCafCtfH0ocy_DNDlzmDdu_opQH3mKTn0CE,8031
|
|
49
|
-
test/integration/connectors/test_redis.py,sha256=gUU6Dv616tX9KANiqhkMrPWvmhbV2Gk1pNuIv2MvZG8,5093
|
|
50
|
-
test/integration/connectors/test_s3.py,sha256=Cd9HsPjrSB6xss1DO4YHqSORJJ2pUXNVaAlrlpqjZS8,7477
|
|
51
|
-
test/integration/connectors/test_sharepoint.py,sha256=wq4G6J5ffXhYquUySVd5UUtYWC43RoXgCSY4fdlX2z0,7643
|
|
52
|
-
test/integration/connectors/test_vectara.py,sha256=08GIh6J2QTSuupdDOJ_TiyQrYYK3vamUaEQe3_B5-WY,9278
|
|
53
|
-
test/integration/connectors/test_zendesk.py,sha256=15bl3wy0pLxS2dkBlE11yPOX71k6Vbxo0BEFQK-qcFs,3724
|
|
54
|
-
test/integration/connectors/databricks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
55
|
-
test/integration/connectors/databricks/test_volumes_native.py,sha256=RbxImt0JPnvl8TAK_rAtIspaFuRmEFzjL9rLqkm6Juk,9563
|
|
56
|
-
test/integration/connectors/discord/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
57
|
-
test/integration/connectors/discord/test_discord.py,sha256=WeB0ST572GvELMlgorRMwRxYIWkleIobXn6ULhjo1rw,3173
|
|
58
|
-
test/integration/connectors/duckdb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
59
|
-
test/integration/connectors/duckdb/conftest.py,sha256=rlBHMJTiJ2a5xbvIxTOyhhcuTBc9DO-yTzD6Kf8X3hY,301
|
|
60
|
-
test/integration/connectors/duckdb/test_duckdb.py,sha256=o3CKTG2T15QyWwPTh7Yyv42eJ39opm7g9b508y1o1m8,2973
|
|
61
|
-
test/integration/connectors/duckdb/test_motherduck.py,sha256=S2EtKVy4HE0ysB3OvdFOUe5O-0w5Bk4Loy2Gyyff8b0,3218
|
|
62
|
-
test/integration/connectors/elasticsearch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
63
|
-
test/integration/connectors/elasticsearch/conftest.py,sha256=-i4_7MkIxSQENz7nuD2uHuhGU9mZ33vpeTPhHtRpQfs,989
|
|
64
|
-
test/integration/connectors/elasticsearch/test_elasticsearch.py,sha256=NUUL88Z7uWfnEopZ0wkQtWMA94WDZw87v_oCkPShVM4,12076
|
|
65
|
-
test/integration/connectors/elasticsearch/test_opensearch.py,sha256=nV3gHD_tb79KRE8DqfUWpOfzem9LjojACspUHTSI7dw,11454
|
|
66
|
-
test/integration/connectors/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
67
|
-
test/integration/connectors/sql/test_databricks_delta_tables.py,sha256=hYfmT9ud6NqJk1pibaF-1LvNgrfXBGUq0FTHkcIvICE,6145
|
|
68
|
-
test/integration/connectors/sql/test_postgres.py,sha256=b0do75CAxKFEgoODx7mNQqo5vEgZbWHSifMNV_H2IgU,6974
|
|
69
|
-
test/integration/connectors/sql/test_singlestore.py,sha256=uuw_T8EefwMcRD7clzviJxwb45f9k4G2dZIYqbNhM1s,6157
|
|
70
|
-
test/integration/connectors/sql/test_snowflake.py,sha256=dNHR8fk5V1WsvN6P1CesPKCsRbKTuf6zXgjyf7vgueg,7498
|
|
71
|
-
test/integration/connectors/sql/test_sqlite.py,sha256=6By1-XKiGCA5KlR1DHlM6ArU5c_2GjM5mE2RhMoNPg8,5960
|
|
72
|
-
test/integration/connectors/sql/test_vastdb.py,sha256=A0W-kHl1GRf2zHCmTWXOJjV8HPi3xlWvCTKgjebVZUY,1066
|
|
73
|
-
test/integration/connectors/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
74
|
-
test/integration/connectors/utils/constants.py,sha256=JhTk6YNw7JVpkk-Pl8zn2YYkExeL1oE9VBWm_kMYGfo,369
|
|
75
|
-
test/integration/connectors/utils/docker.py,sha256=4g1STiSbYN5qcmDTXyPxVJgwx97O6wk7n-DJ-zgzgag,4971
|
|
76
|
-
test/integration/connectors/utils/docker_compose.py,sha256=GVTB6Cel05c0VQ2n4AwkQQx_cBfz13ZTs1HpbaYipNU,2223
|
|
77
|
-
test/integration/connectors/utils/validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
78
|
-
test/integration/connectors/utils/validation/destination.py,sha256=m5RHgZ3_h6HA2SsWbg15rmKhZjzsyKqOtFlUqpy33SI,2746
|
|
79
|
-
test/integration/connectors/utils/validation/equality.py,sha256=R6d_1c-Si5518WJcBcshF_wBRnywnZ0ORQ-NL0xNmGo,2602
|
|
80
|
-
test/integration/connectors/utils/validation/source.py,sha256=WX67a1tYpyUFXvSxxZrTLEkpyVqZiUXhAsJ11RQzcqQ,13701
|
|
81
|
-
test/integration/connectors/utils/validation/utils.py,sha256=xYYvAbqP6_lZyH09_JjB4w2Sf8aQPvDVT5vZTs05ILs,1428
|
|
82
|
-
test/integration/connectors/weaviate/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
83
|
-
test/integration/connectors/weaviate/conftest.py,sha256=6Q6QdrLJmGHowRFSmoVSzup2EX6qASfS2Z5tqlpTm9M,387
|
|
84
|
-
test/integration/connectors/weaviate/test_cloud.py,sha256=1r16tNUSsq8JawfjgeRWtcfw2COYma0b298mBDZU__o,1281
|
|
85
|
-
test/integration/connectors/weaviate/test_local.py,sha256=q8vSpmFeTapSoUSNChIpc6qfyMdcICo28CJSm7L7V-o,5337
|
|
86
|
-
test/integration/embedders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
87
|
-
test/integration/embedders/conftest.py,sha256=B2W771RbijR7G_GybsCzRyIvOzXqzbKZdRIlNDd5AGY,334
|
|
88
|
-
test/integration/embedders/test_azure_openai.py,sha256=nQle565sZu9N4xjWHBOfiOFGtldOdIK6S1YID00hK78,1787
|
|
89
|
-
test/integration/embedders/test_bedrock.py,sha256=-hy4wPmgBpXrk7OtPbZ8IE0qip6Ht1GptGLpmpqHxiw,3550
|
|
90
|
-
test/integration/embedders/test_huggingface.py,sha256=jNNBpXUA_UpgXXVo_Q2xYGFaknIZJu8eKJ4ifq19eig,986
|
|
91
|
-
test/integration/embedders/test_mixedbread.py,sha256=0Mcq9JU9wAJ_Wz2Enwyz4qGSbivDDqmE6ZJxlZpMuWw,1993
|
|
92
|
-
test/integration/embedders/test_octoai.py,sha256=R6NcBxPL_sVJLGmVTEjsaxfqjnLWxViXwL6vaze666s,2194
|
|
93
|
-
test/integration/embedders/test_openai.py,sha256=iwk56ZK1gllFtatcp6W_PA7x9h2M2ReIg4GXIWXp5qo,2124
|
|
94
|
-
test/integration/embedders/test_togetherai.py,sha256=5PzIdnvjMpjods_rhUqQ2nbVRXgld_F7OGWssnHxa0I,2202
|
|
95
|
-
test/integration/embedders/test_vertexai.py,sha256=xpjauYnRBxOqft0HXEMFk1iRoeQJm7E2eSunZbjj_H4,1827
|
|
96
|
-
test/integration/embedders/test_voyageai.py,sha256=kcuGxhG6kR1XVqmTw7La3MXnIC06CtjOq_n48nkF-eQ,2411
|
|
97
|
-
test/integration/embedders/utils.py,sha256=Sqqg-X31ZV1hojqPQBaZgM2lb2u8cG6s6OnH9JRsFjs,2717
|
|
98
|
-
test/integration/partitioners/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
99
|
-
test/integration/partitioners/test_partitioner.py,sha256=UYQd9x2-66F_FFeulC_2eg3FtjswK0Mt9Hwmg4b_pPs,2784
|
|
100
|
-
test/unit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
101
|
-
test/unit/test_error.py,sha256=RflmngCdFNKOLXVfLnUdNfY3Mfg3k7DTEzfIl0B-syU,840
|
|
102
|
-
test/unit/test_html.py,sha256=ubsck9pVOnPDFL0P8TZkko_46MIaFLlSNQcsgFDgYoE,4496
|
|
103
|
-
test/unit/test_interfaces.py,sha256=Gv3WMJsw_3xPLy3nI3dIcJuLa2WvKYszSjI_W9XLtVM,787
|
|
104
|
-
test/unit/test_utils.py,sha256=xeSM02zOChSOO3dzDOVAEiQme1rQ8drjnJF93S3BFmk,7247
|
|
105
|
-
test/unit/chunkers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
106
|
-
test/unit/chunkers/test_chunkers.py,sha256=wRxbSj7P1FwRGDyVcARkm8CQSVCBCro3nTe54UoUBzc,1769
|
|
107
|
-
test/unit/connectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
108
|
-
test/unit/connectors/test_confluence.py,sha256=Hr91nKw6018FEih-vSrVXFk0p0E9bSL1IeZVDxvITJ0,1916
|
|
109
|
-
test/unit/connectors/test_jira.py,sha256=sPRjoBVDmc-o2RWilcjs-VW_jkafIqSXBE9duCELfoA,12110
|
|
110
|
-
test/unit/connectors/ibm_watsonx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
111
|
-
test/unit/connectors/ibm_watsonx/test_ibm_watsonx_s3.py,sha256=Yj9swJ_phiam-CFBzGnAFCkd8_oqzdA3ZQJQdpV8T1E,14503
|
|
112
|
-
test/unit/connectors/motherduck/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
113
|
-
test/unit/connectors/motherduck/test_base.py,sha256=4SHI3Hx1a28eNE_VDbl8gAssNZRIUNSGPNZgrkzjYWs,2429
|
|
114
|
-
test/unit/connectors/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
115
|
-
test/unit/connectors/sql/test_sql.py,sha256=SfWYDBrR7pHFziKVEe6IAq5E3EQIz99ikQN3LnF1DrY,4622
|
|
116
|
-
test/unit/embed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
117
|
-
test/unit/embed/test_mixedbreadai.py,sha256=Z9A9jg5eJRF4OgYTgbIzQUI27J16uv2qj2kp_Rv0r9k,1428
|
|
118
|
-
test/unit/embed/test_octoai.py,sha256=CWVrieqJh-N40J9n3nzqQPLOH9T1_mldkpZYRiHKxrg,1055
|
|
119
|
-
test/unit/embed/test_openai.py,sha256=RQ-4QIcRvq0JSBFNit_NRcy61EsOv7xh_TcKJKHwHGM,1186
|
|
120
|
-
test/unit/embed/test_vertexai.py,sha256=k_dK-yR_yx1RAOpmAgfcPo-osRDJP9aRCMCsJmQPxYI,1050
|
|
121
|
-
test/unit/embed/test_voyageai.py,sha256=QWoDZEX8cAIkTgn4NtIyGKzOAu-GmudD4VMujnfi1Gg,983
|
|
122
|
-
test/unit/embedders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
123
|
-
test/unit/embedders/test_bedrock.py,sha256=HMaweO_v_9Y1SE2m5QImXP73cb26vNTUfc1onTBa1-g,1074
|
|
124
|
-
test/unit/embedders/test_huggingface.py,sha256=BpMC_AMlifjNf4Y61yBNR_8UU3H_x3ut2NnpFuB4kDo,1543
|
|
125
|
-
test/unit/embedders/test_mixedbread.py,sha256=8yT942TVVXC5EkrT_ReZie1In537BaAD6esRjntgxuU,1021
|
|
126
|
-
test/unit/embedders/test_octoai.py,sha256=JMfrFz25QfEh0ieB4bJneZd4XtNcdPOnNsN1Fj7gU-Q,1012
|
|
127
|
-
test/unit/embedders/test_openai.py,sha256=HoEW95289Ijgo3PJ-pEaDOknfdkSjPXTgkXmE6jJomY,1012
|
|
128
|
-
test/unit/embedders/test_togetherai.py,sha256=s24V_geDNZzblU74sSdC_m4Lqlzjp00RMpy56ptfdx0,1009
|
|
129
|
-
test/unit/embedders/test_vertexai.py,sha256=4gLJaV9Nr2k_SgA-EyJ_sDvm8XvyGbn2zTs4F4CXU2g,1142
|
|
130
|
-
test/unit/embedders/test_voyageai.py,sha256=VaWthF64pmxc-fOBbAQsEzMw7tV4t4Nz_H_Cc5tuAYQ,1193
|
|
131
|
-
test/unit/partitioners/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
132
|
-
test/unit/partitioners/test_partitioner.py,sha256=eJoUDbiKtweyU1WYfsY5KqVqoPjbx1MUsyHkbvvTNEk,2275
|
|
133
|
-
test/unit/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
134
|
-
test/unit/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
|
|
135
1
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
136
|
-
unstructured_ingest/__version__.py,sha256=
|
|
2
|
+
unstructured_ingest/__version__.py,sha256=Bkcw0TdkF4pWY_01piNW3D1XaG9Q-r4aIMSbnIeStCE,42
|
|
137
3
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
138
4
|
unstructured_ingest/errors_v2.py,sha256=9RuRCi7lbDxCguDz07y5RiHoQiFIOWwOD7xqzJ2B3Yw,436
|
|
139
5
|
unstructured_ingest/logger.py,sha256=7e_7UeK6hVOd5BQ6i9NzRUAPCS_DF839Y8TjUDywraY,1428
|
|
140
6
|
unstructured_ingest/main.py,sha256=82G_7eG4PNhc_xIqj4Y_sFbDV9VI-nwSfsfJQMzovMk,169
|
|
141
7
|
unstructured_ingest/otel.py,sha256=NsUqOolA0gt69eFhZLABjVpcKoM9aus-AbxIKqWqPTc,4127
|
|
142
|
-
unstructured_ingest/unstructured_api.py,sha256=
|
|
8
|
+
unstructured_ingest/unstructured_api.py,sha256=4e2ZNWIihk0eje4R3ZQ0NOYNbmMZDv_O-rnJo94kaGE,5127
|
|
9
|
+
unstructured_ingest/cli/README.md,sha256=5LfM0ys1aFyCiCjlwZsi_9Mb5Nrq3MmYt3IpmUybnCE,1507
|
|
143
10
|
unstructured_ingest/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
144
11
|
unstructured_ingest/cli/cli.py,sha256=ZeIE9jP8fe7260nE8v7xYgLdqX9OtkQXAXSGWIkHLcA,645
|
|
145
12
|
unstructured_ingest/cli/cmds.py,sha256=EhDW5UX4V-N8Svjba4w7YWnRYl26__ADwzNXrfFBxM4,483
|
|
@@ -158,7 +25,7 @@ unstructured_ingest/embed/azure_openai.py,sha256=_-I-nwd-wdCiKkSdYBL4UKrTZ2UPWsM
|
|
|
158
25
|
unstructured_ingest/embed/bedrock.py,sha256=t58V_QQjWPO62CTuP0aLFMDisPeXpxG2xSFGUhN-JvI,7726
|
|
159
26
|
unstructured_ingest/embed/huggingface.py,sha256=-ZD17O_H_UnK80fqig6y6wNKJckjx0HuAkY5vgPvk8M,2259
|
|
160
27
|
unstructured_ingest/embed/interfaces.py,sha256=SdB3t8eMPB8CbXzOYBpgwjzTvyb4T19L61Sr6Jy3_rw,5099
|
|
161
|
-
unstructured_ingest/embed/mixedbreadai.py,sha256
|
|
28
|
+
unstructured_ingest/embed/mixedbreadai.py,sha256=z8RaG1hGBL840yElvI1Dbnf7llsOGEBbZ2X_QlFflZg,4498
|
|
162
29
|
unstructured_ingest/embed/octoai.py,sha256=136UzSuQgV8Nxel2pB8Iv-4AvlFU6RRCa7N64fWFl6o,3855
|
|
163
30
|
unstructured_ingest/embed/openai.py,sha256=hK98QXb_8oN1E-QwNT6JElzYOxG1mvZCFYQW57pjv0E,3372
|
|
164
31
|
unstructured_ingest/embed/togetherai.py,sha256=T0v0_yTovy3sSeLPvk3PJccqcnmqCc_vxYs6pumjK3I,2983
|
|
@@ -170,12 +37,12 @@ unstructured_ingest/interfaces/downloader.py,sha256=xX0ZzsFRSzZb7SAeoeQph8sIbVq1
|
|
|
170
37
|
unstructured_ingest/interfaces/indexer.py,sha256=c2FwWJEQHfFD6vO-tGfYLpLiIs-TYViLAt8YmHfDbaM,824
|
|
171
38
|
unstructured_ingest/interfaces/process.py,sha256=S3A_9gkwwGC-iQxvnpj3Er6IJAjAT5npzpSgxuFAzUM,449
|
|
172
39
|
unstructured_ingest/interfaces/processor.py,sha256=VX7JqXlbG1plxMK8THWhWINPbTICaaUEk4XUXhnOixY,3303
|
|
173
|
-
unstructured_ingest/interfaces/upload_stager.py,sha256=
|
|
174
|
-
unstructured_ingest/interfaces/uploader.py,sha256=
|
|
40
|
+
unstructured_ingest/interfaces/upload_stager.py,sha256=eYhbdM0Dt8FValZAe41dWnxehhvfMLDOSTp7UoR5HB0,3147
|
|
41
|
+
unstructured_ingest/interfaces/uploader.py,sha256=6HyWttmosKreuWJCFp3TxKCuzDCj_RJdGEPwxhwapQk,2053
|
|
175
42
|
unstructured_ingest/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
176
|
-
unstructured_ingest/pipeline/interfaces.py,sha256=
|
|
43
|
+
unstructured_ingest/pipeline/interfaces.py,sha256=Jb62t0P52hzzRWe-zHxcySgINMnPDOWc4dfJooYUEC8,8642
|
|
177
44
|
unstructured_ingest/pipeline/otel.py,sha256=wUVmUPWIk_X3yw0MuI-5QJ2wU2rQgaapinnS98iQBxI,1082
|
|
178
|
-
unstructured_ingest/pipeline/pipeline.py,sha256=
|
|
45
|
+
unstructured_ingest/pipeline/pipeline.py,sha256=LKCY7kcTfWOYF8k9k3Rw8sYZdNNAH8Qo_qZFHNIkyEU,16781
|
|
179
46
|
unstructured_ingest/pipeline/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
180
47
|
unstructured_ingest/pipeline/steps/chunk.py,sha256=LiJMzgB0ZEDnPuYz5IwuK2k2iJoBIcfftp9cVXMOlf0,3179
|
|
181
48
|
unstructured_ingest/pipeline/steps/download.py,sha256=cw8KbZ4CNZ_on4xam-VehNnLvKkUourazvcaUB-ihGY,8205
|
|
@@ -187,53 +54,53 @@ unstructured_ingest/pipeline/steps/stage.py,sha256=oobrvLtZOOPEnXQXMDUnhaaKhheuS
|
|
|
187
54
|
unstructured_ingest/pipeline/steps/uncompress.py,sha256=clyZKwKQLLKbkQDD2q98Aw1UAe3VqUY0n7_KWtGVMSw,1756
|
|
188
55
|
unstructured_ingest/pipeline/steps/upload.py,sha256=4hvh--03jzbGlxO0l1_2D5ec_EaGu04I5bFsxH0MnTg,1986
|
|
189
56
|
unstructured_ingest/processes/__init__.py,sha256=FaHWSCGyc7GWVnAsNEUUj7L8hT8gCVY3_hUE2VzWtUg,462
|
|
190
|
-
unstructured_ingest/processes/chunker.py,sha256=
|
|
57
|
+
unstructured_ingest/processes/chunker.py,sha256=v6ICNdBqjyAOk8f_cQajIJRdrzGUSm1UFGnQqzihpi0,5806
|
|
191
58
|
unstructured_ingest/processes/connector_registry.py,sha256=PoNhB-qOxvalaN-ssPWWhZSQ9QDdKMa6KzskCYYebfA,2195
|
|
192
|
-
unstructured_ingest/processes/embedder.py,sha256=
|
|
193
|
-
unstructured_ingest/processes/filter.py,sha256=
|
|
194
|
-
unstructured_ingest/processes/partitioner.py,sha256=
|
|
59
|
+
unstructured_ingest/processes/embedder.py,sha256=jJcnAyRWNX5XN2WpPcT8bunbFeVlUa1P3yk4G_JIcvo,7894
|
|
60
|
+
unstructured_ingest/processes/filter.py,sha256=oc3SYukRYfzx8sdJqF3KxdwZcrA-1U8PTAipMdZkW0c,2148
|
|
61
|
+
unstructured_ingest/processes/partitioner.py,sha256=Kn_BSFYvOkwo8fqThw_cOpgD0Um-AdoSqclZplcdNBA,10109
|
|
195
62
|
unstructured_ingest/processes/uncompress.py,sha256=o9JL3Bza4KPUTmrB39-v_5SuK_fYwhwFAhjQi2Pm8h8,2426
|
|
196
63
|
unstructured_ingest/processes/connectors/__init__.py,sha256=cR4ZH2dpPod7QR6OsgMx8X9kpFcEc1TVfQndUNoKGzI,6812
|
|
197
|
-
unstructured_ingest/processes/connectors/airtable.py,sha256=
|
|
198
|
-
unstructured_ingest/processes/connectors/astradb.py,sha256=
|
|
199
|
-
unstructured_ingest/processes/connectors/azure_ai_search.py,sha256=
|
|
64
|
+
unstructured_ingest/processes/connectors/airtable.py,sha256=smx5qBSUKwM8V6Xcc7ikrf8hYQUQ94YrB1L0WVeRDv0,9024
|
|
65
|
+
unstructured_ingest/processes/connectors/astradb.py,sha256=ONt8vHv5h8B6goGba9l0YPS0y5EnSAoowtfq92-E-RY,18307
|
|
66
|
+
unstructured_ingest/processes/connectors/azure_ai_search.py,sha256=szhSRXzUHk0DE2hGFfjGc_jNFzlUwiRlCtIkuu7tmnk,11524
|
|
200
67
|
unstructured_ingest/processes/connectors/chroma.py,sha256=q5_Fu4xb6_W_NyrPxVa3-jVwZLqVdlBNlR4dFvbd7l0,7235
|
|
201
|
-
unstructured_ingest/processes/connectors/confluence.py,sha256=
|
|
68
|
+
unstructured_ingest/processes/connectors/confluence.py,sha256=BbZ-Ecdcn92X8dHQ0egEJtBoX16gM0-zMcBLdn-wQsM,12090
|
|
202
69
|
unstructured_ingest/processes/connectors/couchbase.py,sha256=KCHoYDNya9B05NIB5D78zXoizFyfpJRepcYBe1nLSOs,12298
|
|
203
|
-
unstructured_ingest/processes/connectors/delta_table.py,sha256=
|
|
70
|
+
unstructured_ingest/processes/connectors/delta_table.py,sha256=2DFox_Vzoopt_D3Jy3rCjrrTGMutG2INIrwCeoIohRY,7340
|
|
204
71
|
unstructured_ingest/processes/connectors/discord.py,sha256=6yEJ_agfKUqsV43wFsbMkcd8lcLJC0uqbo4izjdZ3rU,5294
|
|
205
72
|
unstructured_ingest/processes/connectors/github.py,sha256=smHCz6jOH1p_hW2S25bYunBBj_pYjz8HTw6wkzaJz_A,7765
|
|
206
|
-
unstructured_ingest/processes/connectors/gitlab.py,sha256=
|
|
207
|
-
unstructured_ingest/processes/connectors/google_drive.py,sha256=
|
|
73
|
+
unstructured_ingest/processes/connectors/gitlab.py,sha256=6h1CdqznJmzeWxGfXrFLdNdT23PExGnUMMX7usK_4Kk,10013
|
|
74
|
+
unstructured_ingest/processes/connectors/google_drive.py,sha256=CqUwtK4NhKhNfozsunVzFUsKMYBEgRS1eci2pIZLnJE,20055
|
|
208
75
|
unstructured_ingest/processes/connectors/jira.py,sha256=eG8yTn8ZVEz7rBJ-ha8i_d9hEh6VALN6QJT_vbYvbL0,17142
|
|
209
|
-
unstructured_ingest/processes/connectors/kdbai.py,sha256=
|
|
76
|
+
unstructured_ingest/processes/connectors/kdbai.py,sha256=XhxYpKSAoFPBsDQWwNuLX03DCxOVr7yquj9VYM55Rtc,5174
|
|
210
77
|
unstructured_ingest/processes/connectors/local.py,sha256=LluTLKv4g7FbJb4A6vuSxI9VhzKZuuQUpDS-cVNAQ2g,7426
|
|
211
78
|
unstructured_ingest/processes/connectors/milvus.py,sha256=Jr9cul7By03tGAPFnFBoqncnNWwbhKd-qbmkuqnin8U,8908
|
|
212
79
|
unstructured_ingest/processes/connectors/mongodb.py,sha256=1g_5bfbS6lah3nsOXqLAanR3zNYJ47_Njw_uV-uj3_U,14324
|
|
213
80
|
unstructured_ingest/processes/connectors/neo4j.py,sha256=eAM2XWSLA5caKJmbcd7ctn2TapreIJEXRoHoxT1OZwA,18718
|
|
214
81
|
unstructured_ingest/processes/connectors/onedrive.py,sha256=VBkKlbJgR7uKlKTnjNybAw6ZawLKflDPpy2uVvgWYWw,19296
|
|
215
|
-
unstructured_ingest/processes/connectors/outlook.py,sha256=
|
|
216
|
-
unstructured_ingest/processes/connectors/pinecone.py,sha256=
|
|
217
|
-
unstructured_ingest/processes/connectors/redisdb.py,sha256=
|
|
218
|
-
unstructured_ingest/processes/connectors/salesforce.py,sha256=
|
|
82
|
+
unstructured_ingest/processes/connectors/outlook.py,sha256=zHM5frO7CqQG0-KcTyX49aZeSlsvVrl8kh_lR_ESgQw,9275
|
|
83
|
+
unstructured_ingest/processes/connectors/pinecone.py,sha256=BdO1PS_Y6FOeL-7uPl-Eh6ij1wHOwMkopOzKQGQ9Ac0,13979
|
|
84
|
+
unstructured_ingest/processes/connectors/redisdb.py,sha256=YzvSlfHs83XWsWMaIC3bV5enKfxejMQ9BQ8CtXfnJ5o,6923
|
|
85
|
+
unstructured_ingest/processes/connectors/salesforce.py,sha256=OaKEWCqZrirHqFJ650K5jSPwYlWefPOapas8Y-4D9oc,11661
|
|
219
86
|
unstructured_ingest/processes/connectors/sharepoint.py,sha256=PowaqMzWr-VCW1rnwcAeRhHyE55kJ9J9FCVlrmtzN0E,4827
|
|
220
|
-
unstructured_ingest/processes/connectors/slack.py,sha256=
|
|
87
|
+
unstructured_ingest/processes/connectors/slack.py,sha256=EkFj9PcAu5_gF2xLogikKDADLbJYq-_jvchzYrTdLO4,9224
|
|
221
88
|
unstructured_ingest/processes/connectors/utils.py,sha256=TAd0hb1f291N-q7-TUe6JKSCGkhqDyo7Ij8zmliBZUc,2071
|
|
222
|
-
unstructured_ingest/processes/connectors/vectara.py,sha256=
|
|
89
|
+
unstructured_ingest/processes/connectors/vectara.py,sha256=xrC6jkgW8BII4UjdzUelDu122xT484cpfMTK2wl-sko,12292
|
|
223
90
|
unstructured_ingest/processes/connectors/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
224
91
|
unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql,sha256=dUZZDNkyvQXKqoAThRz3ek7zaUE2l_LAQimlG5WZhH4,211
|
|
225
92
|
unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json,sha256=SJlIO0kXxy866tWQ8bEzvwLwflsoUMIS-OKlxMvHIuE,504
|
|
226
93
|
unstructured_ingest/processes/connectors/databricks/__init__.py,sha256=RtKAPyNtXh6fzEsOQ08pA0-vC1uMr3KqYG6cqiBoo70,2133
|
|
227
94
|
unstructured_ingest/processes/connectors/databricks/volumes.py,sha256=OWQrne9-5hPzc-kxGa2P53M3DoksDzMDyjLhQyihdCo,8020
|
|
228
|
-
unstructured_ingest/processes/connectors/databricks/volumes_aws.py,sha256=
|
|
229
|
-
unstructured_ingest/processes/connectors/databricks/volumes_azure.py,sha256=
|
|
230
|
-
unstructured_ingest/processes/connectors/databricks/volumes_gcp.py,sha256=
|
|
95
|
+
unstructured_ingest/processes/connectors/databricks/volumes_aws.py,sha256=WhGTp6aRTLSdc4GChCL4mz2b-IanderW8j1IqezX6YA,2958
|
|
96
|
+
unstructured_ingest/processes/connectors/databricks/volumes_azure.py,sha256=pF2d6uAIbwJJUeOIG5xknUMCGc5d9Aztmc2776wp-a0,3740
|
|
97
|
+
unstructured_ingest/processes/connectors/databricks/volumes_gcp.py,sha256=y9AvVl6PtnIxlTlrPj_wyHBDBRJNq3uoTOuZwTryNg8,2994
|
|
231
98
|
unstructured_ingest/processes/connectors/databricks/volumes_native.py,sha256=pivySGMmFSsyuB42ARAWAPXFQ7qTQxO3dfEoE23pBNM,3104
|
|
232
|
-
unstructured_ingest/processes/connectors/databricks/volumes_table.py,sha256=
|
|
99
|
+
unstructured_ingest/processes/connectors/databricks/volumes_table.py,sha256=tqi6PpYpIBMTZcYZXl5Lw0YuawyDvjHI08TKPFFTTr0,8194
|
|
233
100
|
unstructured_ingest/processes/connectors/duckdb/__init__.py,sha256=Dr6BRJJGefJnnp_vn5W5gBd7vrCCXTMLweuDIqTP-fM,558
|
|
234
|
-
unstructured_ingest/processes/connectors/duckdb/base.py,sha256
|
|
235
|
-
unstructured_ingest/processes/connectors/duckdb/duckdb.py,sha256
|
|
236
|
-
unstructured_ingest/processes/connectors/duckdb/motherduck.py,sha256=
|
|
101
|
+
unstructured_ingest/processes/connectors/duckdb/base.py,sha256=bTLhilg6mgERNCpeeNNl7wxy3xkOt23O9XpCyD0WVY4,2945
|
|
102
|
+
unstructured_ingest/processes/connectors/duckdb/duckdb.py,sha256=jsmibTd_yvYzkCT05HhCJvplyobtjfNILC3zyTuCcVY,4464
|
|
103
|
+
unstructured_ingest/processes/connectors/duckdb/motherduck.py,sha256=Atr2MjJQGFGWh5aeiQsLpUbFw-aCZH-ABI1LprDh5VI,4727
|
|
237
104
|
unstructured_ingest/processes/connectors/elasticsearch/__init__.py,sha256=M8mmBWoP6J5R3hxg6BQUMexYlTUxUxdBoIcjUop8yt8,826
|
|
238
105
|
unstructured_ingest/processes/connectors/elasticsearch/elasticsearch.py,sha256=iNedi-JVkAvdF15CbKwVRwXJazyST6ha3zcNyyGwVmQ,19003
|
|
239
106
|
unstructured_ingest/processes/connectors/elasticsearch/opensearch.py,sha256=wggHvw8h-X0-3WPNxj9rt2xkrE7Pv7CV0B0KzTMzBB4,6944
|
|
@@ -243,11 +110,11 @@ unstructured_ingest/processes/connectors/fsspec/box.py,sha256=1gLS7xR2vbjgKBrQ4Z
|
|
|
243
110
|
unstructured_ingest/processes/connectors/fsspec/dropbox.py,sha256=HwwKjQmjM7yFk9Esh_F20xDisRPXGUkFduzaasByRDE,8355
|
|
244
111
|
unstructured_ingest/processes/connectors/fsspec/fsspec.py,sha256=13TJmipcz9mYQT6Qi4WmqUV3veHIhbLZIW_70qY-5tI,14469
|
|
245
112
|
unstructured_ingest/processes/connectors/fsspec/gcs.py,sha256=ouxISCKpZTAj3T6pWGYbASu93wytJjl5WSICvQcrgfE,7172
|
|
246
|
-
unstructured_ingest/processes/connectors/fsspec/s3.py,sha256=
|
|
113
|
+
unstructured_ingest/processes/connectors/fsspec/s3.py,sha256=2ZV6b2E2pIsf_ab1Lty74FwpMnJZhpQUdamPgpwcKsQ,7141
|
|
247
114
|
unstructured_ingest/processes/connectors/fsspec/sftp.py,sha256=pR_a2SgLjt8ffNkariHrPB1E0HVSTj5h3pt7KxTU3TI,6371
|
|
248
115
|
unstructured_ingest/processes/connectors/fsspec/utils.py,sha256=jec_Qfe2hbfahBuY-u8FnvHuv933AI5HwPFjOL3kEEY,456
|
|
249
116
|
unstructured_ingest/processes/connectors/ibm_watsonx/__init__.py,sha256=kf0UpgdAY2KK1R1FbAB6GEBBAIOeYQ8cZIr3bp660qM,374
|
|
250
|
-
unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py,sha256=
|
|
117
|
+
unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py,sha256=SpZIqjreXpLTpZfezhG6xkZ_h7w-QWmdjXDBG6mlddQ,11729
|
|
251
118
|
unstructured_ingest/processes/connectors/kafka/__init__.py,sha256=pFN2cWwAStiGTAsQ616GIWKi_hDv0s74ZvNqhJEp1Pc,751
|
|
252
119
|
unstructured_ingest/processes/connectors/kafka/cloud.py,sha256=Ki6iOLoZ86tYWdnLnMWYvb2hUCneKqo4mTJcfXh7YoQ,3432
|
|
253
120
|
unstructured_ingest/processes/connectors/kafka/kafka.py,sha256=7NMvWijfoliyAgnmz8TM8oJt5x7RDzC-ABPdYAm7J3w,10306
|
|
@@ -335,13 +202,13 @@ unstructured_ingest/processes/connectors/qdrant/local.py,sha256=3b43kSVoGMcFWTRi
|
|
|
335
202
|
unstructured_ingest/processes/connectors/qdrant/qdrant.py,sha256=1Y1-nfKqt4YooqKMqRKVE_ItV0S1v__PTaEhI3vvtOE,5456
|
|
336
203
|
unstructured_ingest/processes/connectors/qdrant/server.py,sha256=biyF4xr6e7CH0loj_OPt02Xrx4DMkkxqYMAsVXuJ5-Q,1607
|
|
337
204
|
unstructured_ingest/processes/connectors/sql/__init__.py,sha256=WNO7jSL1ABw7K5IxLc-eeKWGGJDk7jCp_OTLdTTkZug,2056
|
|
338
|
-
unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py,sha256=
|
|
339
|
-
unstructured_ingest/processes/connectors/sql/postgres.py,sha256=
|
|
340
|
-
unstructured_ingest/processes/connectors/sql/singlestore.py,sha256=
|
|
341
|
-
unstructured_ingest/processes/connectors/sql/snowflake.py,sha256=
|
|
342
|
-
unstructured_ingest/processes/connectors/sql/sql.py,sha256=
|
|
343
|
-
unstructured_ingest/processes/connectors/sql/sqlite.py,sha256=
|
|
344
|
-
unstructured_ingest/processes/connectors/sql/vastdb.py,sha256=
|
|
205
|
+
unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py,sha256=_IZFFGQUsHV9ScOOISrm6c5FSd4PnX91ePj_COat-gk,9320
|
|
206
|
+
unstructured_ingest/processes/connectors/sql/postgres.py,sha256=kDIL8Cj45EDpKqit1_araRpP4v3cb__QbYqoINg9f2k,5403
|
|
207
|
+
unstructured_ingest/processes/connectors/sql/singlestore.py,sha256=B46lpvyAj1AArpACi9MXbXD1-52zF6Dsj3RJtD1g4r0,5955
|
|
208
|
+
unstructured_ingest/processes/connectors/sql/snowflake.py,sha256=GSEoNrIoJM7p-Q-PrFiONamoxWzjQG8wZJG3mw5Uwdk,9589
|
|
209
|
+
unstructured_ingest/processes/connectors/sql/sql.py,sha256=yUGnv4MF_vT3VHdg7hhGiTD0be94ll-HyhHmRKQp_vQ,15712
|
|
210
|
+
unstructured_ingest/processes/connectors/sql/sqlite.py,sha256=V3OfRrXGGhTa_R2FPA-ysn95HHCv9x_VEBKVDsSGsbs,5549
|
|
211
|
+
unstructured_ingest/processes/connectors/sql/vastdb.py,sha256=trhvUBumDmj2rLjmxFBKw9L9wF6ZpssF0wfmRaG97H0,9803
|
|
345
212
|
unstructured_ingest/processes/connectors/weaviate/__init__.py,sha256=1Vnz8hm_Cf3NkQUTz5ZD4QkbLSVql4UvRoY2j2FnC9k,853
|
|
346
213
|
unstructured_ingest/processes/connectors/weaviate/cloud.py,sha256=tDQ4Vfph1RwADzS0Lk4TSoeT6TZ2gX9DNi78yXkgDw0,6245
|
|
347
214
|
unstructured_ingest/processes/connectors/weaviate/embedded.py,sha256=buizqBd6PSbd9VgRrOj43GZEorBpDFkUIkE6sN9emhw,3008
|
|
@@ -349,23 +216,22 @@ unstructured_ingest/processes/connectors/weaviate/local.py,sha256=4fgZsL9dgnWuaS
|
|
|
349
216
|
unstructured_ingest/processes/connectors/weaviate/weaviate.py,sha256=SqtGcQgejGH0N1R49tGrUtGcTB8mt7sywXmWFTIcpB8,12866
|
|
350
217
|
unstructured_ingest/processes/connectors/zendesk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
351
218
|
unstructured_ingest/processes/connectors/zendesk/client.py,sha256=GvPIpx4aYdD58-edHgvCFjFao94uR0O5Yf4dT9NCmSk,11952
|
|
352
|
-
unstructured_ingest/processes/connectors/zendesk/zendesk.py,sha256=
|
|
219
|
+
unstructured_ingest/processes/connectors/zendesk/zendesk.py,sha256=j5zS_7vJmYDEQtysz_UfwIUH65gc4r-Zjc1LocJr9FM,9033
|
|
353
220
|
unstructured_ingest/processes/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
354
|
-
unstructured_ingest/processes/utils/blob_storage.py,sha256=
|
|
221
|
+
unstructured_ingest/processes/utils/blob_storage.py,sha256=apMUmm9loxdbTRkkLH4VhG9kUVyiw9PFUJheSDxSxPk,1023
|
|
355
222
|
unstructured_ingest/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
356
223
|
unstructured_ingest/utils/chunking.py,sha256=9b3sXMA6L8RW5xAkKQbwdtVudGLAcj_sgT6Grh5tyYM,1870
|
|
357
224
|
unstructured_ingest/utils/compression.py,sha256=_BkFREoa0fkJ6z-1lY76HCmy8mLymbPCg55iMUQTd5c,2653
|
|
358
225
|
unstructured_ingest/utils/constants.py,sha256=pDspTYz-nEojHBqrZNfssGEiujmVa02pIWL63PQP9sU,103
|
|
359
|
-
unstructured_ingest/utils/data_prep.py,sha256=
|
|
226
|
+
unstructured_ingest/utils/data_prep.py,sha256=yqrv7x_nlj0y3uaN0m0Bnsekb7VIQnwABWPa24KU5QI,7426
|
|
360
227
|
unstructured_ingest/utils/dep_check.py,sha256=SXXcUna2H0RtxA6j1S2NGkvQa9JP2DujWhmyBa7776Y,2400
|
|
361
228
|
unstructured_ingest/utils/html.py,sha256=0WduP8tI5S3nHFQi6XHNPHgsIC9j3iWwyIayX9gDLiE,6386
|
|
362
229
|
unstructured_ingest/utils/ndjson.py,sha256=nz8VUOPEgAFdhaDOpuveknvCU4x82fVwqE01qAbElH0,1201
|
|
363
230
|
unstructured_ingest/utils/pydantic_models.py,sha256=BT_j15e4rX40wQbt8LUXbqfPhA3rJn1PHTI_G_A_EHY,1720
|
|
364
231
|
unstructured_ingest/utils/string_and_date_utils.py,sha256=oXOI6rxXq-8ncbk7EoJK0WCcTXWj75EzKl8pfQMID3U,2522
|
|
365
232
|
unstructured_ingest/utils/table.py,sha256=WZechczgVFvlodUWFcsnCGvBNh1xRm6hr0VbJTPxKAc,3669
|
|
366
|
-
unstructured_ingest-0.
|
|
367
|
-
unstructured_ingest-0.
|
|
368
|
-
unstructured_ingest-0.
|
|
369
|
-
unstructured_ingest-0.
|
|
370
|
-
unstructured_ingest-0.
|
|
371
|
-
unstructured_ingest-0.7.2.dist-info/RECORD,,
|
|
233
|
+
unstructured_ingest-1.0.1.dist-info/METADATA,sha256=k_kEG2BSsnNaIyDSJWiciUW0Z-HDiPF_flO6kLjn8QI,8713
|
|
234
|
+
unstructured_ingest-1.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
235
|
+
unstructured_ingest-1.0.1.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
236
|
+
unstructured_ingest-1.0.1.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
237
|
+
unstructured_ingest-1.0.1.dist-info/RECORD,,
|
examples/__init__.py
DELETED
|
File without changes
|
examples/airtable.py
DELETED
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
|
|
4
|
-
from unstructured_ingest.interfaces import ProcessorConfig
|
|
5
|
-
from unstructured_ingest.logger import logger
|
|
6
|
-
from unstructured_ingest.pipeline.pipeline import Pipeline
|
|
7
|
-
from unstructured_ingest.processes.chunker import ChunkerConfig
|
|
8
|
-
from unstructured_ingest.processes.connectors.airtable import (
|
|
9
|
-
CONNECTOR_TYPE,
|
|
10
|
-
AirtableAccessConfig,
|
|
11
|
-
AirtableConnectionConfig,
|
|
12
|
-
AirtableDownloaderConfig,
|
|
13
|
-
AirtableIndexerConfig,
|
|
14
|
-
)
|
|
15
|
-
from unstructured_ingest.processes.connectors.local import (
|
|
16
|
-
LocalUploaderConfig,
|
|
17
|
-
)
|
|
18
|
-
from unstructured_ingest.processes.embedder import EmbedderConfig
|
|
19
|
-
from unstructured_ingest.processes.partitioner import PartitionerConfig
|
|
20
|
-
|
|
21
|
-
base_path = Path(__file__).parent.parent.parent.parent
|
|
22
|
-
docs_path = base_path / "example-docs"
|
|
23
|
-
work_dir = base_path / "tmp_ingest" / CONNECTOR_TYPE
|
|
24
|
-
output_path = work_dir / "output"
|
|
25
|
-
download_path = work_dir / "download"
|
|
26
|
-
|
|
27
|
-
if __name__ == "__main__":
|
|
28
|
-
logger.info(f"writing all content in: {work_dir.resolve()}")
|
|
29
|
-
Pipeline.from_configs(
|
|
30
|
-
context=ProcessorConfig(work_dir=str(work_dir.resolve()), verbose=True),
|
|
31
|
-
indexer_config=AirtableIndexerConfig(
|
|
32
|
-
list_of_paths=["app5YQxSfp220fWtm", "appJ43QmP8I17zu88"]
|
|
33
|
-
),
|
|
34
|
-
downloader_config=AirtableDownloaderConfig(download_dir=download_path),
|
|
35
|
-
source_connection_config=AirtableConnectionConfig(
|
|
36
|
-
access_config=AirtableAccessConfig(
|
|
37
|
-
personal_access_token=os.getenv("AIRTABLE_PERSONAL_ACCESS_TOKEN")
|
|
38
|
-
)
|
|
39
|
-
),
|
|
40
|
-
partitioner_config=PartitionerConfig(strategy="fast"),
|
|
41
|
-
chunker_config=ChunkerConfig(chunking_strategy="by_title"),
|
|
42
|
-
embedder_config=EmbedderConfig(embedding_provider="huggingface"),
|
|
43
|
-
uploader_config=LocalUploaderConfig(output_dir=str(output_path.resolve())),
|
|
44
|
-
).run()
|
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
|
|
4
|
-
from unstructured_ingest.interfaces import ProcessorConfig
|
|
5
|
-
from unstructured_ingest.logger import logger
|
|
6
|
-
from unstructured_ingest.pipeline.pipeline import Pipeline
|
|
7
|
-
from unstructured_ingest.processes.chunker import ChunkerConfig
|
|
8
|
-
from unstructured_ingest.processes.connectors.azure_ai_search import (
|
|
9
|
-
CONNECTOR_TYPE,
|
|
10
|
-
AzureAISearchAccessConfig,
|
|
11
|
-
AzureAISearchConnectionConfig,
|
|
12
|
-
AzureAISearchUploaderConfig,
|
|
13
|
-
AzureAISearchUploadStagerConfig,
|
|
14
|
-
)
|
|
15
|
-
from unstructured_ingest.processes.connectors.local import (
|
|
16
|
-
LocalConnectionConfig,
|
|
17
|
-
LocalDownloaderConfig,
|
|
18
|
-
LocalIndexerConfig,
|
|
19
|
-
)
|
|
20
|
-
from unstructured_ingest.processes.embedder import EmbedderConfig
|
|
21
|
-
from unstructured_ingest.processes.partitioner import PartitionerConfig
|
|
22
|
-
|
|
23
|
-
base_path = Path(__file__).parent.parent.parent.parent
|
|
24
|
-
docs_path = base_path / "example-docs"
|
|
25
|
-
work_dir = base_path / "tmp_ingest" / CONNECTOR_TYPE
|
|
26
|
-
output_path = work_dir / "output"
|
|
27
|
-
download_path = work_dir / "download"
|
|
28
|
-
|
|
29
|
-
if __name__ == "__main__":
|
|
30
|
-
logger.info(f"writing all content in: {work_dir.resolve()}")
|
|
31
|
-
index_name = "ingest-test-destination"
|
|
32
|
-
Pipeline.from_configs(
|
|
33
|
-
context=ProcessorConfig(work_dir=str(work_dir.resolve())),
|
|
34
|
-
indexer_config=LocalIndexerConfig(
|
|
35
|
-
input_path=str(docs_path.resolve()) + "/book-war-and-peace-1p.txt"
|
|
36
|
-
),
|
|
37
|
-
downloader_config=LocalDownloaderConfig(download_dir=download_path),
|
|
38
|
-
source_connection_config=LocalConnectionConfig(),
|
|
39
|
-
partitioner_config=PartitionerConfig(strategy="fast"),
|
|
40
|
-
chunker_config=ChunkerConfig(
|
|
41
|
-
chunking_strategy="by_title", chunk_include_orig_elements=False
|
|
42
|
-
),
|
|
43
|
-
embedder_config=EmbedderConfig(
|
|
44
|
-
embedding_provider="openai", embedding_api_key=os.getenv("OPENAI_API_KEY")
|
|
45
|
-
),
|
|
46
|
-
destination_connection_config=AzureAISearchConnectionConfig(
|
|
47
|
-
access_config=AzureAISearchAccessConfig(
|
|
48
|
-
azure_ai_search_key=os.getenv("AZURE_SEARCH_API_KEY")
|
|
49
|
-
),
|
|
50
|
-
index=os.getenv("AZURE_SEARCH_INDEX"),
|
|
51
|
-
endpoint=os.getenv("AZURE_SEARCH_ENDPOINT"),
|
|
52
|
-
),
|
|
53
|
-
uploader_config=AzureAISearchUploaderConfig(batch_size=10),
|
|
54
|
-
stager_config=AzureAISearchUploadStagerConfig(),
|
|
55
|
-
).run()
|
examples/chroma.py
DELETED
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
import random
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
|
|
4
|
-
from unstructured_ingest.interfaces import ProcessorConfig
|
|
5
|
-
from unstructured_ingest.logger import logger
|
|
6
|
-
from unstructured_ingest.pipeline.pipeline import Pipeline
|
|
7
|
-
from unstructured_ingest.processes.chunker import ChunkerConfig
|
|
8
|
-
from unstructured_ingest.processes.connectors.chroma import (
|
|
9
|
-
CONNECTOR_TYPE,
|
|
10
|
-
ChromaAccessConfig,
|
|
11
|
-
ChromaConnectionConfig,
|
|
12
|
-
ChromaUploaderConfig,
|
|
13
|
-
ChromaUploadStagerConfig,
|
|
14
|
-
)
|
|
15
|
-
from unstructured_ingest.processes.connectors.local import (
|
|
16
|
-
LocalConnectionConfig,
|
|
17
|
-
LocalDownloaderConfig,
|
|
18
|
-
LocalIndexerConfig,
|
|
19
|
-
)
|
|
20
|
-
from unstructured_ingest.processes.embedder import EmbedderConfig
|
|
21
|
-
from unstructured_ingest.processes.partitioner import PartitionerConfig
|
|
22
|
-
|
|
23
|
-
base_path = Path(__file__).parent.parent.parent.parent
|
|
24
|
-
docs_path = base_path / "example-docs"
|
|
25
|
-
work_dir = base_path / "tmp_ingest" / CONNECTOR_TYPE
|
|
26
|
-
output_path = work_dir / "output"
|
|
27
|
-
download_path = work_dir / "download"
|
|
28
|
-
|
|
29
|
-
if __name__ == "__main__":
|
|
30
|
-
logger.info(f"writing all content in: {work_dir.resolve()}")
|
|
31
|
-
Pipeline.from_configs(
|
|
32
|
-
context=ProcessorConfig(work_dir=str(work_dir.resolve())),
|
|
33
|
-
indexer_config=LocalIndexerConfig(input_path=docs_path.resolve() / "multisimple"),
|
|
34
|
-
downloader_config=LocalDownloaderConfig(download_dir=download_path),
|
|
35
|
-
source_connection_config=LocalConnectionConfig(),
|
|
36
|
-
partitioner_config=PartitionerConfig(strategy="fast"),
|
|
37
|
-
chunker_config=ChunkerConfig(
|
|
38
|
-
chunking_strategy="by_title",
|
|
39
|
-
chunk_include_orig_elements=False,
|
|
40
|
-
chunk_max_characters=1500,
|
|
41
|
-
chunk_multipage_sections=True,
|
|
42
|
-
),
|
|
43
|
-
embedder_config=EmbedderConfig(embedding_provider="huggingface"),
|
|
44
|
-
destination_connection_config=ChromaConnectionConfig(
|
|
45
|
-
access_config=ChromaAccessConfig(settings=None, headers=None),
|
|
46
|
-
host="localhost",
|
|
47
|
-
port=8047,
|
|
48
|
-
collection_name=f"test-collection-{random.randint(1000, 9999)}",
|
|
49
|
-
tenant="default_tenant",
|
|
50
|
-
database="default_database",
|
|
51
|
-
),
|
|
52
|
-
stager_config=ChromaUploadStagerConfig(),
|
|
53
|
-
uploader_config=ChromaUploaderConfig(batch_size=10),
|
|
54
|
-
).run()
|
examples/couchbase.py
DELETED
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
|
|
3
|
-
from unstructured_ingest.interfaces import ProcessorConfig
|
|
4
|
-
from unstructured_ingest.logger import logger
|
|
5
|
-
from unstructured_ingest.pipeline.pipeline import Pipeline
|
|
6
|
-
from unstructured_ingest.processes.chunker import ChunkerConfig
|
|
7
|
-
from unstructured_ingest.processes.connectors.couchbase import (
|
|
8
|
-
CONNECTOR_TYPE,
|
|
9
|
-
CouchbaseAccessConfig,
|
|
10
|
-
CouchbaseConnectionConfig,
|
|
11
|
-
CouchbaseUploaderConfig,
|
|
12
|
-
CouchbaseUploadStagerConfig,
|
|
13
|
-
)
|
|
14
|
-
from unstructured_ingest.processes.connectors.local import (
|
|
15
|
-
LocalConnectionConfig,
|
|
16
|
-
LocalDownloaderConfig,
|
|
17
|
-
LocalIndexerConfig,
|
|
18
|
-
)
|
|
19
|
-
from unstructured_ingest.processes.embedder import EmbedderConfig
|
|
20
|
-
from unstructured_ingest.processes.partitioner import PartitionerConfig
|
|
21
|
-
|
|
22
|
-
base_path = Path(__file__).parent.parent.parent.parent
|
|
23
|
-
docs_path = base_path / "example-docs"
|
|
24
|
-
work_dir = base_path / "tmp_ingest" / CONNECTOR_TYPE
|
|
25
|
-
output_path = work_dir / "output"
|
|
26
|
-
download_path = work_dir / "download"
|
|
27
|
-
|
|
28
|
-
if __name__ == "__main__":
|
|
29
|
-
logger.info(f"writing all content in: {work_dir.resolve()}")
|
|
30
|
-
Pipeline.from_configs(
|
|
31
|
-
context=ProcessorConfig(work_dir=str(work_dir.resolve())),
|
|
32
|
-
indexer_config=LocalIndexerConfig(input_path=str(docs_path.resolve()) + "/multisimple/"),
|
|
33
|
-
downloader_config=LocalDownloaderConfig(download_dir=download_path),
|
|
34
|
-
source_connection_config=LocalConnectionConfig(),
|
|
35
|
-
partitioner_config=PartitionerConfig(strategy="fast"),
|
|
36
|
-
chunker_config=ChunkerConfig(
|
|
37
|
-
chunking_strategy="by_title",
|
|
38
|
-
chunk_include_orig_elements=False,
|
|
39
|
-
chunk_max_characters=1500,
|
|
40
|
-
chunk_multipage_sections=True,
|
|
41
|
-
),
|
|
42
|
-
embedder_config=EmbedderConfig(embedding_provider="huggingface"),
|
|
43
|
-
destination_connection_config=CouchbaseConnectionConfig(
|
|
44
|
-
access_config=CouchbaseAccessConfig(
|
|
45
|
-
connection_string="couchbase://localhost",
|
|
46
|
-
username="Administrator",
|
|
47
|
-
password="password",
|
|
48
|
-
),
|
|
49
|
-
bucket="example_bucket",
|
|
50
|
-
scope="example_scope",
|
|
51
|
-
collection="example_collection",
|
|
52
|
-
),
|
|
53
|
-
stager_config=CouchbaseUploadStagerConfig(),
|
|
54
|
-
uploader_config=CouchbaseUploaderConfig(batch_size=10),
|
|
55
|
-
).run()
|
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
|
|
4
|
-
from unstructured_ingest.interfaces import ProcessorConfig
|
|
5
|
-
from unstructured_ingest.logger import logger
|
|
6
|
-
from unstructured_ingest.pipeline.pipeline import Pipeline
|
|
7
|
-
from unstructured_ingest.processes.chunker import ChunkerConfig
|
|
8
|
-
from unstructured_ingest.processes.connectors.databricks.volumes_native import (
|
|
9
|
-
CONNECTOR_TYPE,
|
|
10
|
-
DatabricksNativeVolumesAccessConfig,
|
|
11
|
-
DatabricksNativeVolumesConnectionConfig,
|
|
12
|
-
DatabricksNativeVolumesUploaderConfig,
|
|
13
|
-
)
|
|
14
|
-
from unstructured_ingest.processes.connectors.local import (
|
|
15
|
-
LocalConnectionConfig,
|
|
16
|
-
LocalDownloaderConfig,
|
|
17
|
-
LocalIndexerConfig,
|
|
18
|
-
)
|
|
19
|
-
from unstructured_ingest.processes.partitioner import PartitionerConfig
|
|
20
|
-
|
|
21
|
-
base_path = Path(__file__).parent.parent.parent.parent
|
|
22
|
-
docs_path = base_path / "example-docs"
|
|
23
|
-
work_dir = base_path / "tmp_ingest" / CONNECTOR_TYPE
|
|
24
|
-
output_path = work_dir / "output"
|
|
25
|
-
download_path = work_dir / "download"
|
|
26
|
-
|
|
27
|
-
if __name__ == "__main__":
|
|
28
|
-
logger.info(f"writing all content in: {work_dir.resolve()}")
|
|
29
|
-
Pipeline.from_configs(
|
|
30
|
-
context=ProcessorConfig(work_dir=str(work_dir.resolve())),
|
|
31
|
-
indexer_config=LocalIndexerConfig(input_path=str(docs_path.resolve()) + "/fake-text.txt"),
|
|
32
|
-
downloader_config=LocalDownloaderConfig(download_dir=download_path),
|
|
33
|
-
source_connection_config=LocalConnectionConfig(),
|
|
34
|
-
partitioner_config=PartitionerConfig(strategy="fast"),
|
|
35
|
-
chunker_config=ChunkerConfig(
|
|
36
|
-
chunking_strategy="basic",
|
|
37
|
-
),
|
|
38
|
-
embedder_config=None,
|
|
39
|
-
destination_connection_config=DatabricksNativeVolumesConnectionConfig(
|
|
40
|
-
access_config=DatabricksNativeVolumesAccessConfig(
|
|
41
|
-
client_id=os.environ["DATABRICKS_CLIENT_ID"],
|
|
42
|
-
client_secret=os.environ["DATABRICKS_CLIENT_SECRET"],
|
|
43
|
-
),
|
|
44
|
-
host=os.environ["DATABRICKS_HOST"],
|
|
45
|
-
catalog=os.environ["DATABRICKS_CATALOG"],
|
|
46
|
-
volume=os.environ["DATABRICKS_VOLUME"],
|
|
47
|
-
volume_path=os.environ["DATABRICKS_VOLUME_PATH"],
|
|
48
|
-
),
|
|
49
|
-
uploader_config=DatabricksNativeVolumesUploaderConfig(
|
|
50
|
-
overwrite=True,
|
|
51
|
-
catalog=os.environ["DATABRICKS_CATALOG"],
|
|
52
|
-
volume=os.environ["DATABRICKS_VOLUME"],
|
|
53
|
-
volume_path=os.environ["DATABRICKS_VOLUME_PATH"],
|
|
54
|
-
),
|
|
55
|
-
).run()
|