unstructured-ingest 0.2.2__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- test/integration/connectors/elasticsearch/__init__.py +0 -0
- test/integration/connectors/elasticsearch/conftest.py +34 -0
- test/integration/connectors/elasticsearch/test_elasticsearch.py +308 -0
- test/integration/connectors/elasticsearch/test_opensearch.py +302 -0
- test/integration/connectors/sql/test_postgres.py +10 -4
- test/integration/connectors/sql/test_singlestore.py +8 -4
- test/integration/connectors/sql/test_snowflake.py +10 -6
- test/integration/connectors/sql/test_sqlite.py +4 -4
- test/integration/connectors/test_astradb.py +156 -0
- test/integration/connectors/test_azure_cog_search.py +233 -0
- test/integration/connectors/test_delta_table.py +46 -0
- test/integration/connectors/test_kafka.py +150 -16
- test/integration/connectors/test_lancedb.py +209 -0
- test/integration/connectors/test_milvus.py +141 -0
- test/integration/connectors/test_pinecone.py +213 -0
- test/integration/connectors/test_s3.py +23 -0
- test/integration/connectors/utils/docker.py +81 -15
- test/integration/connectors/utils/validation.py +10 -0
- test/integration/connectors/weaviate/__init__.py +0 -0
- test/integration/connectors/weaviate/conftest.py +15 -0
- test/integration/connectors/weaviate/test_local.py +131 -0
- test/unit/v2/__init__.py +0 -0
- test/unit/v2/chunkers/__init__.py +0 -0
- test/unit/v2/chunkers/test_chunkers.py +49 -0
- test/unit/v2/connectors/__init__.py +0 -0
- test/unit/v2/embedders/__init__.py +0 -0
- test/unit/v2/embedders/test_bedrock.py +36 -0
- test/unit/v2/embedders/test_huggingface.py +48 -0
- test/unit/v2/embedders/test_mixedbread.py +37 -0
- test/unit/v2/embedders/test_octoai.py +35 -0
- test/unit/v2/embedders/test_openai.py +35 -0
- test/unit/v2/embedders/test_togetherai.py +37 -0
- test/unit/v2/embedders/test_vertexai.py +37 -0
- test/unit/v2/embedders/test_voyageai.py +38 -0
- test/unit/v2/partitioners/__init__.py +0 -0
- test/unit/v2/partitioners/test_partitioner.py +63 -0
- test/unit/v2/utils/__init__.py +0 -0
- test/unit/v2/utils/data_generator.py +32 -0
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/cli/cmds/__init__.py +2 -2
- unstructured_ingest/cli/cmds/{azure_cognitive_search.py → azure_ai_search.py} +9 -9
- unstructured_ingest/connector/{azure_cognitive_search.py → azure_ai_search.py} +9 -9
- unstructured_ingest/pipeline/reformat/embedding.py +1 -1
- unstructured_ingest/runner/writers/__init__.py +2 -2
- unstructured_ingest/runner/writers/azure_ai_search.py +24 -0
- unstructured_ingest/utils/data_prep.py +9 -1
- unstructured_ingest/v2/constants.py +2 -0
- unstructured_ingest/v2/processes/connectors/__init__.py +7 -20
- unstructured_ingest/v2/processes/connectors/airtable.py +2 -2
- unstructured_ingest/v2/processes/connectors/astradb.py +35 -23
- unstructured_ingest/v2/processes/connectors/{azure_cognitive_search.py → azure_ai_search.py} +116 -35
- unstructured_ingest/v2/processes/connectors/confluence.py +2 -2
- unstructured_ingest/v2/processes/connectors/couchbase.py +1 -0
- unstructured_ingest/v2/processes/connectors/delta_table.py +37 -9
- unstructured_ingest/v2/processes/connectors/elasticsearch/__init__.py +19 -0
- unstructured_ingest/v2/processes/connectors/{elasticsearch.py → elasticsearch/elasticsearch.py} +93 -46
- unstructured_ingest/v2/processes/connectors/{opensearch.py → elasticsearch/opensearch.py} +1 -1
- unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +27 -0
- unstructured_ingest/v2/processes/connectors/google_drive.py +3 -3
- unstructured_ingest/v2/processes/connectors/kafka/__init__.py +6 -2
- unstructured_ingest/v2/processes/connectors/kafka/cloud.py +38 -2
- unstructured_ingest/v2/processes/connectors/kafka/kafka.py +84 -23
- unstructured_ingest/v2/processes/connectors/kafka/local.py +32 -4
- unstructured_ingest/v2/processes/connectors/lancedb/__init__.py +17 -0
- unstructured_ingest/v2/processes/connectors/lancedb/aws.py +43 -0
- unstructured_ingest/v2/processes/connectors/lancedb/azure.py +43 -0
- unstructured_ingest/v2/processes/connectors/lancedb/gcp.py +44 -0
- unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py +161 -0
- unstructured_ingest/v2/processes/connectors/lancedb/local.py +44 -0
- unstructured_ingest/v2/processes/connectors/milvus.py +72 -27
- unstructured_ingest/v2/processes/connectors/onedrive.py +2 -3
- unstructured_ingest/v2/processes/connectors/outlook.py +2 -2
- unstructured_ingest/v2/processes/connectors/pinecone.py +101 -13
- unstructured_ingest/v2/processes/connectors/sharepoint.py +3 -2
- unstructured_ingest/v2/processes/connectors/slack.py +2 -2
- unstructured_ingest/v2/processes/connectors/sql/postgres.py +16 -8
- unstructured_ingest/v2/processes/connectors/sql/sql.py +97 -26
- unstructured_ingest/v2/processes/connectors/weaviate/__init__.py +22 -0
- unstructured_ingest/v2/processes/connectors/weaviate/cloud.py +164 -0
- unstructured_ingest/v2/processes/connectors/weaviate/embedded.py +90 -0
- unstructured_ingest/v2/processes/connectors/weaviate/local.py +73 -0
- unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py +289 -0
- {unstructured_ingest-0.2.2.dist-info → unstructured_ingest-0.3.1.dist-info}/METADATA +20 -19
- {unstructured_ingest-0.2.2.dist-info → unstructured_ingest-0.3.1.dist-info}/RECORD +91 -50
- unstructured_ingest/runner/writers/azure_cognitive_search.py +0 -24
- unstructured_ingest/v2/processes/connectors/weaviate.py +0 -242
- /test/integration/embedders/{togetherai.py → test_togetherai.py} +0 -0
- /test/unit/{test_interfaces_v2.py → v2/test_interfaces.py} +0 -0
- /test/unit/{test_utils_v2.py → v2/test_utils.py} +0 -0
- {unstructured_ingest-0.2.2.dist-info → unstructured_ingest-0.3.1.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.2.2.dist-info → unstructured_ingest-0.3.1.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.2.2.dist-info → unstructured_ingest-0.3.1.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.2.2.dist-info → unstructured_ingest-0.3.1.dist-info}/top_level.txt +0 -0
|
@@ -5,24 +5,36 @@ test/integration/chunkers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJW
|
|
|
5
5
|
test/integration/chunkers/test_chunkers.py,sha256=pqn1Rqh36jZTJL4qpU0iuOMFAEQ-LrKAPOgWtQMAt_I,1482
|
|
6
6
|
test/integration/connectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
7
|
test/integration/connectors/conftest.py,sha256=6dVNMBrL6WIO4KXA-0nf2tNrPYk_tsor8uomi6fbi3Q,727
|
|
8
|
+
test/integration/connectors/test_astradb.py,sha256=QPFrODXmOHagpuKaiooxXb3OEW93w2g4fmq8BkaBCnY,5303
|
|
9
|
+
test/integration/connectors/test_azure_cog_search.py,sha256=dae4GifRiKue5YpsxworDiaMQoMsxcPDBithb6OFkx4,8876
|
|
8
10
|
test/integration/connectors/test_confluence.py,sha256=xcPmZ_vi_pkCt-tUPn10P49FH9i_9YUbrAPO6fYk5rU,3521
|
|
9
|
-
test/integration/connectors/test_delta_table.py,sha256=
|
|
10
|
-
test/integration/connectors/test_kafka.py,sha256=
|
|
11
|
+
test/integration/connectors/test_delta_table.py,sha256=GSzWIkbEUzOrRPt2F1uO0dabcp7kTFDj75BhhI2y-WU,6856
|
|
12
|
+
test/integration/connectors/test_kafka.py,sha256=j7jsNWZumNBv9v-5Bpx8geUUXpxxad5EuA4CMRsl4R8,7104
|
|
13
|
+
test/integration/connectors/test_lancedb.py,sha256=1EqdXOaA3gJqXDe1W-dHUzfOfeL1A4RB0oYwKvlfltg,7590
|
|
14
|
+
test/integration/connectors/test_milvus.py,sha256=CVmYw9iEeKT_0OtShxye2E6i1LbWzzDA8JtwJRkYQlA,4763
|
|
11
15
|
test/integration/connectors/test_onedrive.py,sha256=KIkBwKh1hnv203VCL2UABnDkS_bP4NxOFm1AL8EPGLA,3554
|
|
16
|
+
test/integration/connectors/test_pinecone.py,sha256=X10OWZ6IrO6YyhuR3ydMAZOQq3u2f5u_lCjKNYUUcnI,7558
|
|
12
17
|
test/integration/connectors/test_qdrant.py,sha256=ASvO-BNyhv8m8or28KljrJy27Da0uaTNeoR5w_QsvFg,5121
|
|
13
|
-
test/integration/connectors/test_s3.py,sha256=
|
|
18
|
+
test/integration/connectors/test_s3.py,sha256=YHEYMqWTKTfR7wlL4VoxtgMs1YiYKyhLIBdG-anaQGo,6896
|
|
14
19
|
test/integration/connectors/databricks_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
20
|
test/integration/connectors/databricks_tests/test_volumes_native.py,sha256=k4lALbwNtlyuI3wd3OHoBULI21E3Ck2Fo8EJXaVfwgw,5812
|
|
21
|
+
test/integration/connectors/elasticsearch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
22
|
+
test/integration/connectors/elasticsearch/conftest.py,sha256=-i4_7MkIxSQENz7nuD2uHuhGU9mZ33vpeTPhHtRpQfs,989
|
|
23
|
+
test/integration/connectors/elasticsearch/test_elasticsearch.py,sha256=nqdHwBpvgk_74orzDaQIKALK5cb0YloxSdt7QDJX0r0,11169
|
|
24
|
+
test/integration/connectors/elasticsearch/test_opensearch.py,sha256=Rk4tQ_Qv5icycDWMUpnzTbg-QzwGyb6nKqB0gDef9D0,10555
|
|
16
25
|
test/integration/connectors/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
|
-
test/integration/connectors/sql/test_postgres.py,sha256=
|
|
18
|
-
test/integration/connectors/sql/test_singlestore.py,sha256=
|
|
19
|
-
test/integration/connectors/sql/test_snowflake.py,sha256=
|
|
20
|
-
test/integration/connectors/sql/test_sqlite.py,sha256=
|
|
26
|
+
test/integration/connectors/sql/test_postgres.py,sha256=lrymDI7bVX_4qij5gsUc_bTvHPeelu6hpJemQ6WWmlY,6783
|
|
27
|
+
test/integration/connectors/sql/test_singlestore.py,sha256=iCp9q6tzhNIUCUubCPiRKj6VmJnwot4JGo9fkkTHg_U,5960
|
|
28
|
+
test/integration/connectors/sql/test_snowflake.py,sha256=DqQIV9H5Uv7HaHtDyrAPdqefd316oVt5lKtdJ2Zdk6Q,7082
|
|
29
|
+
test/integration/connectors/sql/test_sqlite.py,sha256=gSfp2hXAb5BGknzZXVa7K5bBwEb5Li4k5493mQCFjBQ,5719
|
|
21
30
|
test/integration/connectors/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
22
31
|
test/integration/connectors/utils/constants.py,sha256=0zSPnsZVqJuNhXduXvdXFQLZTRIQa5Fo_1qjBYVCfb8,209
|
|
23
|
-
test/integration/connectors/utils/docker.py,sha256=
|
|
32
|
+
test/integration/connectors/utils/docker.py,sha256=lnSjRgYoQa5c5nBdg2eLkB8KJVOjk4eyqq_C6PtTkME,4806
|
|
24
33
|
test/integration/connectors/utils/docker_compose.py,sha256=GVTB6Cel05c0VQ2n4AwkQQx_cBfz13ZTs1HpbaYipNU,2223
|
|
25
|
-
test/integration/connectors/utils/validation.py,sha256=
|
|
34
|
+
test/integration/connectors/utils/validation.py,sha256=SwvPVuHjJxTo8xEUwnuL9FZNpu3sZZ8iouOz5xh_kB8,14272
|
|
35
|
+
test/integration/connectors/weaviate/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
36
|
+
test/integration/connectors/weaviate/conftest.py,sha256=6Q6QdrLJmGHowRFSmoVSzup2EX6qASfS2Z5tqlpTm9M,387
|
|
37
|
+
test/integration/connectors/weaviate/test_local.py,sha256=SK6iEwQUKiCd0X99BEk8GlQoLaCcJcFPt09NN526Ct0,4508
|
|
26
38
|
test/integration/embedders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
39
|
test/integration/embedders/conftest.py,sha256=B2W771RbijR7G_GybsCzRyIvOzXqzbKZdRIlNDd5AGY,334
|
|
28
40
|
test/integration/embedders/test_bedrock.py,sha256=0oBRNS_DtFDGQ22Z1T3t6VOJ31PrItgvnJpqcLe9Fg4,1903
|
|
@@ -30,9 +42,9 @@ test/integration/embedders/test_huggingface.py,sha256=0mMTOO-Nh7KB70AGs_7LLQIxMY
|
|
|
30
42
|
test/integration/embedders/test_mixedbread.py,sha256=RrLv8SByMNXsgrlh94RbaT-VyxZ4-DILO-OPpmOwvSI,1441
|
|
31
43
|
test/integration/embedders/test_octoai.py,sha256=LnR0BLttamW5PGid6jFxATDAi0x7hq5iWMXurbHP6TI,1328
|
|
32
44
|
test/integration/embedders/test_openai.py,sha256=0jlFqEeeCneIWX9tGyC3TXeUNqsMXR7u5n7uEIaAQKo,1328
|
|
45
|
+
test/integration/embedders/test_togetherai.py,sha256=0W1ScD5yb1D9hPC2ewUsuCHLUOpCuM083YMBhqAI9fw,1395
|
|
33
46
|
test/integration/embedders/test_vertexai.py,sha256=OtoFzmrWWhGIO5Bbl5zt_4sp6qRHZxtaDQKpGcfzNLM,1345
|
|
34
47
|
test/integration/embedders/test_voyageai.py,sha256=Zqf7nn1AxfBDBr5A9Jr-5pxes4QNvfKiyeGexCCm4nY,1346
|
|
35
|
-
test/integration/embedders/togetherai.py,sha256=0W1ScD5yb1D9hPC2ewUsuCHLUOpCuM083YMBhqAI9fw,1395
|
|
36
48
|
test/integration/embedders/utils.py,sha256=3AMKMBpgBep_0jFqrqMHH8BJo6w60kpouSZ5JPJTwIA,1850
|
|
37
49
|
test/integration/partitioners/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
50
|
test/integration/partitioners/test_partitioner.py,sha256=KEpnhsz2YNAoQ2UZGOTsi1_uk1h4Vg-gGTsy5Fe9OCw,2846
|
|
@@ -40,18 +52,35 @@ test/unit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
40
52
|
test/unit/test_chunking_utils.py,sha256=0iPwfnMPpyTm-yOE0BXMnEQQP4iguS6NhOqgMQU5nhk,1390
|
|
41
53
|
test/unit/test_error.py,sha256=RflmngCdFNKOLXVfLnUdNfY3Mfg3k7DTEzfIl0B-syU,840
|
|
42
54
|
test/unit/test_interfaces.py,sha256=XNj8qasc1ltaeUv-2y31rv7R9xquo0rgRrMvBZoNZLw,9623
|
|
43
|
-
test/unit/test_interfaces_v2.py,sha256=nyxUsRX1M6Mfhux7SqEhal85PIaWO5xhm6ZTcqpPpHI,790
|
|
44
55
|
test/unit/test_logger.py,sha256=0SKndXE_VRd8XmUHkrj7zuBQHZscXx3ZQllMEOvtF9Y,2380
|
|
45
56
|
test/unit/test_utils.py,sha256=xJ9WGpHBihWpQWvIzd6z99UIdZJba8U7c31h3q6C9To,4800
|
|
46
|
-
test/unit/test_utils_v2.py,sha256=TWVAeE0OrcHgPyzGPtEnQakICsVrDeVhIKPMRQPX554,2638
|
|
47
57
|
test/unit/embed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
48
58
|
test/unit/embed/test_mixedbreadai.py,sha256=XFNJDP5pIgF3eQYwBiuEWmH3zZWx72Wpwyv-Q4m0DJg,1332
|
|
49
59
|
test/unit/embed/test_octoai.py,sha256=Ha9EgAW64Q45hFj51tToe8RyKXWXwqAkdDqSFDMu37Q,831
|
|
50
60
|
test/unit/embed/test_openai.py,sha256=0O1yshDcE0BMKv1yJqrNuiNLSdPhLpKqJ-D_wmnidsM,831
|
|
51
61
|
test/unit/embed/test_vertexai.py,sha256=Pl7COc9E3tf_yGidkTEmTizNGyZF1F5zuL2TgPTMnfI,1048
|
|
52
62
|
test/unit/embed/test_voyageai.py,sha256=DviCOJFhe5H4e26-kNyX3JNe8h3qB5Yl0KOe8rQEMrc,981
|
|
63
|
+
test/unit/v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
64
|
+
test/unit/v2/test_interfaces.py,sha256=nyxUsRX1M6Mfhux7SqEhal85PIaWO5xhm6ZTcqpPpHI,790
|
|
65
|
+
test/unit/v2/test_utils.py,sha256=TWVAeE0OrcHgPyzGPtEnQakICsVrDeVhIKPMRQPX554,2638
|
|
66
|
+
test/unit/v2/chunkers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
67
|
+
test/unit/v2/chunkers/test_chunkers.py,sha256=HSr3_lsoMw1nkDhkjO0-NOTEomRdR9oxCrSXvcMFecE,1772
|
|
68
|
+
test/unit/v2/connectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
69
|
+
test/unit/v2/embedders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
70
|
+
test/unit/v2/embedders/test_bedrock.py,sha256=sW-Vv-u3Yiw8rHPOfE5x_reywXlnozxO49rIMx6_xjo,1071
|
|
71
|
+
test/unit/v2/embedders/test_huggingface.py,sha256=mkVPym7TZkRJchwHedujgFXWdL9sVMi1W90jpmZ_vxg,1543
|
|
72
|
+
test/unit/v2/embedders/test_mixedbread.py,sha256=8yT942TVVXC5EkrT_ReZie1In537BaAD6esRjntgxuU,1021
|
|
73
|
+
test/unit/v2/embedders/test_octoai.py,sha256=JMfrFz25QfEh0ieB4bJneZd4XtNcdPOnNsN1Fj7gU-Q,1012
|
|
74
|
+
test/unit/v2/embedders/test_openai.py,sha256=HoEW95289Ijgo3PJ-pEaDOknfdkSjPXTgkXmE6jJomY,1012
|
|
75
|
+
test/unit/v2/embedders/test_togetherai.py,sha256=s24V_geDNZzblU74sSdC_m4Lqlzjp00RMpy56ptfdx0,1009
|
|
76
|
+
test/unit/v2/embedders/test_vertexai.py,sha256=_4a0tw_GbyvgYJSrP1yw1KjEQJYGzqR5yNXBCSdK8yQ,1145
|
|
77
|
+
test/unit/v2/embedders/test_voyageai.py,sha256=De_25F0EhxTNLmAE_c-EK2pFO5p54ad1TVVF055y6p0,1186
|
|
78
|
+
test/unit/v2/partitioners/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
79
|
+
test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U-dS0ga6h04h7WSfg,2281
|
|
80
|
+
test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
81
|
+
test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
|
|
53
82
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
54
|
-
unstructured_ingest/__version__.py,sha256=
|
|
83
|
+
unstructured_ingest/__version__.py,sha256=0bjUtHIzwwONNua74ouSySVzVv9qumqBMBxOWLE7Tbo,42
|
|
55
84
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
56
85
|
unstructured_ingest/interfaces.py,sha256=OYVUP0bzBJpT-Lz92BDyz_hLBvyfxkuSwWHhUdnUayA,31493
|
|
57
86
|
unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
|
|
@@ -67,10 +96,10 @@ unstructured_ingest/cli/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
|
|
|
67
96
|
unstructured_ingest/cli/base/cmd.py,sha256=BbfjA2v203Jh-7DL6bzxQ7fOeNixd5BsBMuzXz6h5IQ,583
|
|
68
97
|
unstructured_ingest/cli/base/dest.py,sha256=uN44l7kPErm_BQqKFUgaiz_Xu6UKk-mnB1B8c0cb4lQ,3416
|
|
69
98
|
unstructured_ingest/cli/base/src.py,sha256=gDLZlBuOCEGMAAFCLkoURFQKmrmE34WQ5DbT0w1ssy4,2179
|
|
70
|
-
unstructured_ingest/cli/cmds/__init__.py,sha256=
|
|
99
|
+
unstructured_ingest/cli/cmds/__init__.py,sha256=jFCRPXVYQElp20md_BYMcZ_3aiQCIxu6Qii0K-jwNv8,5911
|
|
71
100
|
unstructured_ingest/cli/cmds/airtable.py,sha256=SgdUztUCFbabWP3K937TwdYlhrdY2PUtE4TXUHfBGtw,2629
|
|
72
101
|
unstructured_ingest/cli/cmds/astradb.py,sha256=2subkQvVHOxSs98Zu3bPKtIUl-uzjNEMarcYQaPYCh0,2929
|
|
73
|
-
unstructured_ingest/cli/cmds/
|
|
102
|
+
unstructured_ingest/cli/cmds/azure_ai_search.py,sha256=z_z5A9YZq2bW3D3mhMkzAlmyXqBQUWzbxXAmGwgUwPA,1850
|
|
74
103
|
unstructured_ingest/cli/cmds/biomed.py,sha256=M2jc7_-EvbAeDtDwtZNrPI48QJ1Tm401LcSUD0Ayd20,1442
|
|
75
104
|
unstructured_ingest/cli/cmds/chroma.py,sha256=zO17L7LgUaDfKutfQjSE-QjZJcREyeSpWZetja243lE,3267
|
|
76
105
|
unstructured_ingest/cli/cmds/clarifai.py,sha256=akkES1Z0xomd1LPGnxWNhNKbCBiRZgl-gEOzhg8t8To,1921
|
|
@@ -112,7 +141,7 @@ unstructured_ingest/cli/cmds/fsspec/sftp.py,sha256=TCB7sf_GYoifryQbbttknYSt9Q1kR
|
|
|
112
141
|
unstructured_ingest/connector/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
113
142
|
unstructured_ingest/connector/airtable.py,sha256=KcLt-FEabO9D5ev5E4xUf06VYHpYpypP-adTVyhGcb8,10585
|
|
114
143
|
unstructured_ingest/connector/astradb.py,sha256=jU2rS5Xips6eNNrHvNIxIYD-2u7wwPzW08aMc5NKZOE,9814
|
|
115
|
-
unstructured_ingest/connector/
|
|
144
|
+
unstructured_ingest/connector/azure_ai_search.py,sha256=HNC6GWStb1S_Bj0PSKNfsWK2kyTwgGdGijq-accb1UY,5787
|
|
116
145
|
unstructured_ingest/connector/biomed.py,sha256=uwtBuKzpHfxbJckHAHcsnKo4dTCdag66tCDtCqKNSZM,10847
|
|
117
146
|
unstructured_ingest/connector/chroma.py,sha256=Nma6HebQxNY7CCWwWArkX3kMXf2xVv6L-jrfRjMi9LE,5713
|
|
118
147
|
unstructured_ingest/connector/clarifai.py,sha256=kAtPGrjOps_aYdlhHkTtQc46Rfc0woNor6VY1UGEKZI,4211
|
|
@@ -247,7 +276,7 @@ unstructured_ingest/pipeline/utils.py,sha256=RNx4bv2FhKOhaK_YTiRubta7n9wmJwqzznF
|
|
|
247
276
|
unstructured_ingest/pipeline/write.py,sha256=xmDjmbieGRrcI342he7PkgxWaMoSJ5nWPmP5AM2xloU,669
|
|
248
277
|
unstructured_ingest/pipeline/reformat/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
249
278
|
unstructured_ingest/pipeline/reformat/chunking.py,sha256=vbJgi2Yl9Rd9yZxIf64Nxj6cjUJnJWRpDCagswQmrLw,6040
|
|
250
|
-
unstructured_ingest/pipeline/reformat/embedding.py,sha256=
|
|
279
|
+
unstructured_ingest/pipeline/reformat/embedding.py,sha256=vyRgrNvz50eMOCO00YdV9ODK0LRIB3_NF6t1mWD01uc,2525
|
|
251
280
|
unstructured_ingest/runner/__init__.py,sha256=FO0X_jBIMilXdyjBajyFmzHoC3eVypNMGlhdOW4mcCM,2859
|
|
252
281
|
unstructured_ingest/runner/airtable.py,sha256=1ndJ6PKT63E0gZN3KYFBj4Yo94zQYsIvSjC6ro2nIPE,1115
|
|
253
282
|
unstructured_ingest/runner/astradb.py,sha256=FSBtQrsdC9E3eHUcAuQ0apcCnWolz-9tkvy-Uf7QeKg,1102
|
|
@@ -283,9 +312,9 @@ unstructured_ingest/runner/fsspec/fsspec.py,sha256=83LpsJAgPDJ3HzCKeaWXh7alO8duL
|
|
|
283
312
|
unstructured_ingest/runner/fsspec/gcs.py,sha256=HZyQBoHdnLRA9pULopY7k3b9xLEviENwuDmDGpUoZmU,949
|
|
284
313
|
unstructured_ingest/runner/fsspec/s3.py,sha256=LPsm-Kz1XmrVHM5nj6OcADnI-K6rVbtGXGfSicN_g-A,941
|
|
285
314
|
unstructured_ingest/runner/fsspec/sftp.py,sha256=6vD_CVnxcdpHt4wSEQJ-tQvKL6BQJYxlw2g6OHzlTWw,957
|
|
286
|
-
unstructured_ingest/runner/writers/__init__.py,sha256=
|
|
315
|
+
unstructured_ingest/runner/writers/__init__.py,sha256=rfajJ1ccFt9RiGcHuoDLdtSJrldYYUYebihREOMrD_Y,1500
|
|
287
316
|
unstructured_ingest/runner/writers/astradb.py,sha256=-WDJtRgdBho1S7ju52HEXcrAAiuqrfH1t2dpFcoAGzg,759
|
|
288
|
-
unstructured_ingest/runner/writers/
|
|
317
|
+
unstructured_ingest/runner/writers/azure_ai_search.py,sha256=o5dKksXFKCLTgx6LxIPja0YZFTNL5swch6asTXI61tI,750
|
|
289
318
|
unstructured_ingest/runner/writers/base_writer.py,sha256=S16pacw1HbAj9D5L8tWJbVjVJzv1Xp5RYTj3J9rtrHo,669
|
|
290
319
|
unstructured_ingest/runner/writers/chroma.py,sha256=VDeaZPkJjBl55l1ztMK1cW-72N8j5F4Ro5Oh8stYKPo,750
|
|
291
320
|
unstructured_ingest/runner/writers/clarifai.py,sha256=QM-sHIaL-hVXofZbCfYgg_-_ju0kBMlFDixzrZGA0Tg,637
|
|
@@ -309,12 +338,13 @@ unstructured_ingest/runner/writers/fsspec/s3.py,sha256=kHJq2O3864QBd_tL2SKb0mdyw
|
|
|
309
338
|
unstructured_ingest/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
310
339
|
unstructured_ingest/utils/chunking.py,sha256=efWEfMcCukG5zASZrXhkNgAX8AzHa6t3rClMzm2TwFE,1521
|
|
311
340
|
unstructured_ingest/utils/compression.py,sha256=NNiY-2S2Gf3at7zC1PYxMijaEza9vVSzRn5mdFf6mHo,4434
|
|
312
|
-
unstructured_ingest/utils/data_prep.py,sha256=
|
|
341
|
+
unstructured_ingest/utils/data_prep.py,sha256=IDAedOSBdgZpD9IY4tLJT-rmKGV7GHtU6KRj6VM-_tE,4666
|
|
313
342
|
unstructured_ingest/utils/dep_check.py,sha256=SXXcUna2H0RtxA6j1S2NGkvQa9JP2DujWhmyBa7776Y,2400
|
|
314
343
|
unstructured_ingest/utils/google_filetype.py,sha256=YVspEkiiBrRUSGVeVbsavvLvTmizdy2e6TsjigXTSRU,468
|
|
315
344
|
unstructured_ingest/utils/string_and_date_utils.py,sha256=LwcbLmWpwt1zEabLlyUd5kIf9oOWcZxsRzxDglLCMeU,1375
|
|
316
345
|
unstructured_ingest/utils/table.py,sha256=aWjcowDVSClNpEAdR6PY3H7khKu4T6T3QqQE6GjmQ_M,3469
|
|
317
346
|
unstructured_ingest/v2/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
347
|
+
unstructured_ingest/v2/constants.py,sha256=pDspTYz-nEojHBqrZNfssGEiujmVa02pIWL63PQP9sU,103
|
|
318
348
|
unstructured_ingest/v2/logger.py,sha256=wcln4s5Nyp2fjjJux9iM3d6t9aQFNJ2H1IAZXmIknjI,4323
|
|
319
349
|
unstructured_ingest/v2/main.py,sha256=WFdLEqEXRy6E9_G-dF20MK2AtgX51Aan1sp_N67U2B8,172
|
|
320
350
|
unstructured_ingest/v2/otel.py,sha256=2fGj1c7cVcC3J8NwL6MNYhyPEAXiB33DsilvRDkrdLo,4130
|
|
@@ -361,63 +391,74 @@ unstructured_ingest/v2/processes/embedder.py,sha256=PQn0IO8xbGRQHpcT2VVl-J8gTJ5H
|
|
|
361
391
|
unstructured_ingest/v2/processes/filter.py,sha256=kjUmMw2SDq2bme0JCAOxs6cJriIG6Ty09KOznS-xz08,2145
|
|
362
392
|
unstructured_ingest/v2/processes/partitioner.py,sha256=agpHwB9FR8OZVQqE7zFEb0IcDPCOPA_BZjLzLF71nOY,8194
|
|
363
393
|
unstructured_ingest/v2/processes/uncompress.py,sha256=Z_XfsITGdyaRwhtNUc7bMj5Y2jLuBge8KoK4nxhqKag,2425
|
|
364
|
-
unstructured_ingest/v2/processes/connectors/__init__.py,sha256=
|
|
365
|
-
unstructured_ingest/v2/processes/connectors/airtable.py,sha256=
|
|
366
|
-
unstructured_ingest/v2/processes/connectors/astradb.py,sha256=
|
|
367
|
-
unstructured_ingest/v2/processes/connectors/
|
|
394
|
+
unstructured_ingest/v2/processes/connectors/__init__.py,sha256=8M3aYYNbOkS2SYG2B_HLHMgX4V69-Oz1VqpQcRQMiVg,5167
|
|
395
|
+
unstructured_ingest/v2/processes/connectors/airtable.py,sha256=eeZJe-bBNxt5Sa-XEFCdcGeJCguJU5WN2Mv9kLp5dVQ,8917
|
|
396
|
+
unstructured_ingest/v2/processes/connectors/astradb.py,sha256=zsIElFNNqVCXcLqBw6C8bRoyPQDrGNPkTWeA0FYYO94,14703
|
|
397
|
+
unstructured_ingest/v2/processes/connectors/azure_ai_search.py,sha256=-6IijSWGqj-85vD0c4l5wdMHp-LF371jO8j53PPRB4I,12002
|
|
368
398
|
unstructured_ingest/v2/processes/connectors/chroma.py,sha256=skrxRPHZ8y3JxNa0dt5SVitHiDQ5WVxLvY_kh2-QUrQ,8029
|
|
369
|
-
unstructured_ingest/v2/processes/connectors/confluence.py,sha256=
|
|
370
|
-
unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=
|
|
371
|
-
unstructured_ingest/v2/processes/connectors/delta_table.py,sha256=
|
|
372
|
-
unstructured_ingest/v2/processes/connectors/elasticsearch.py,sha256=ojxMUHkLa6ZG50aTGn2YWhDHZ1n38uFRn5p8_ghAIvM,16762
|
|
399
|
+
unstructured_ingest/v2/processes/connectors/confluence.py,sha256=qQApDcmPBGg4tHXwSOj4JPkAbrO9GQ4NRlaETjhp25U,7003
|
|
400
|
+
unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=yhMDbpkZXs-Kis7tFlgjvNemU-MdWMdpCZDrpZNFaU4,12180
|
|
401
|
+
unstructured_ingest/v2/processes/connectors/delta_table.py,sha256=1yS7ivEyiucwd_kv6LL5HQdGabT43yeG6XCdwiz89hc,8019
|
|
373
402
|
unstructured_ingest/v2/processes/connectors/gitlab.py,sha256=yBgCeLy9iCVI8bBDcHHuHB0H3BO05e9E1OccbHwvKAo,9724
|
|
374
|
-
unstructured_ingest/v2/processes/connectors/google_drive.py,sha256=
|
|
403
|
+
unstructured_ingest/v2/processes/connectors/google_drive.py,sha256=a1BAvhX3nsgghjuR5CJ1lOwMtJ5ZJwimg6VtDYvluxA,13104
|
|
375
404
|
unstructured_ingest/v2/processes/connectors/kdbai.py,sha256=8bGHbZctJ_Tl1AUSMnI7CCZ7CgEtTRVcRuvlB1HPlqQ,5907
|
|
376
405
|
unstructured_ingest/v2/processes/connectors/local.py,sha256=a3stgnIkhBbXPIQD0O-RaRM-Eb-szHj9Yy4Fz881-9c,6723
|
|
377
|
-
unstructured_ingest/v2/processes/connectors/milvus.py,sha256=
|
|
406
|
+
unstructured_ingest/v2/processes/connectors/milvus.py,sha256=Bzv2fa852BcM4_Pr-I_DPvLmjPoXv0Z7BeEA8qSKCDc,9725
|
|
378
407
|
unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=A0pt6JcNTD5bEu79jZ8KhnHcBQ2VUJ2AjtQAtdFr_Lo,13175
|
|
379
|
-
unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=
|
|
380
|
-
unstructured_ingest/v2/processes/connectors/
|
|
381
|
-
unstructured_ingest/v2/processes/connectors/
|
|
382
|
-
unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=k_GH55S_OQ6-wCLC6gkhRrNpXIFECYZ_2Gjz_XRtY6Y,7561
|
|
408
|
+
unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=heZMtOIrCySi552ldIk8iH0pSRXZ0W2LeD-CcNOwCFQ,15979
|
|
409
|
+
unstructured_ingest/v2/processes/connectors/outlook.py,sha256=KgNGM8hImRhy6_SpswRP2VwRD4VOrqqJoySgxf2oduI,9290
|
|
410
|
+
unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=hWkXgVDAzCtrBxf7A4HoexBACGAfVf_Qvn9YHbeiBSY,11505
|
|
383
411
|
unstructured_ingest/v2/processes/connectors/salesforce.py,sha256=2CiO2ZZiZ1Y1-nB7wcDlDVcpW2B7ut9wCj66rkkqho0,11616
|
|
384
|
-
unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=
|
|
385
|
-
unstructured_ingest/v2/processes/connectors/slack.py,sha256=
|
|
412
|
+
unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=Ndn2Wm7RupfjAtlLxxQwJueeE0V8aGMbNVPuFq9nqdQ,19730
|
|
413
|
+
unstructured_ingest/v2/processes/connectors/slack.py,sha256=Z73VmQ3oUY09KoLEi5OBdQeDt4ONEY_02SglWQc6HXE,9252
|
|
386
414
|
unstructured_ingest/v2/processes/connectors/utils.py,sha256=8kd0g7lo9NqnpaIkjeO-Ut6erhwUNH_gS9koevpe3WE,878
|
|
387
|
-
unstructured_ingest/v2/processes/connectors/weaviate.py,sha256=Ss0YyD5T6k-00eJ6dr5lSo2H0LcOjVTMmozehyTvnAo,8866
|
|
388
415
|
unstructured_ingest/v2/processes/connectors/databricks/__init__.py,sha256=jO71UTC7bLA_N12CrLWJzh_yZML5gfT7VohxzCpUGWg,1848
|
|
389
416
|
unstructured_ingest/v2/processes/connectors/databricks/volumes.py,sha256=p7sjCYZb7JmY3v3Xy1gm-q0O7oamLTsSFf2EWXYfXYQ,6447
|
|
390
417
|
unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py,sha256=I1MJwe5LOxoPLjwo00H0XbXO6u_SJHWYgsj4s6ePoyI,2754
|
|
391
418
|
unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py,sha256=P4rfcE3td7WyuuguRgUnGQytCMDpfeYrrpshBZuVynY,3539
|
|
392
419
|
unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py,sha256=UUotY_-HpgSEJkvdQfZTlbxY7CRLZ4ctL8TlryeFvxk,2790
|
|
393
420
|
unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py,sha256=Wk7s2_u5G0BOV5slvGc8IlUf7ivznY9PrgPqe6nlJKM,2897
|
|
421
|
+
unstructured_ingest/v2/processes/connectors/elasticsearch/__init__.py,sha256=Zzc0JNPP-eFqpwWw1Gp-XC8H-s__IgkYKzoagECycZY,829
|
|
422
|
+
unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py,sha256=lzbrQ66zz3Dh_G29XFkyzQ84St8H_xfQVsYV4mTf32c,19141
|
|
423
|
+
unstructured_ingest/v2/processes/connectors/elasticsearch/opensearch.py,sha256=qRz8Fyr2RSZIPZGkhPeme6AZxM0aX-c_xOa1ZtSr2Kg,6781
|
|
394
424
|
unstructured_ingest/v2/processes/connectors/fsspec/__init__.py,sha256=TtdeImM7Ypl_n6sl7I1JqX6bGSG0t_FqvCqE3Cy24og,1846
|
|
395
425
|
unstructured_ingest/v2/processes/connectors/fsspec/azure.py,sha256=Y01BuVRql0Kvzc_cdaZE9dDGYjJzrwJu-etfUrEGcUU,7061
|
|
396
426
|
unstructured_ingest/v2/processes/connectors/fsspec/box.py,sha256=Cjk0LUxqOCDbme0GmnD_5_b1hfStjI23cKw6BquKNrg,5488
|
|
397
427
|
unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py,sha256=NNAxIRdOQxUncfwhu7J7SnQRM6BSStNOyQZi-4E51iY,5816
|
|
398
|
-
unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py,sha256=
|
|
428
|
+
unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py,sha256=lOfAOwWQd4V1yAcGxR6obtsAbnhE4P2-7pU-JFfC5lE,12180
|
|
399
429
|
unstructured_ingest/v2/processes/connectors/fsspec/gcs.py,sha256=-_pYHbsBG9FyRyNIaf_xyFbPiiR7pnWEEg_8mp0rIZ8,7053
|
|
400
430
|
unstructured_ingest/v2/processes/connectors/fsspec/s3.py,sha256=je1BDqFWlyMfPa4oAMMNFQLLQtCY9quuqx3xjTwF8OQ,6251
|
|
401
431
|
unstructured_ingest/v2/processes/connectors/fsspec/sftp.py,sha256=dwpyqDq0qceCBWX3zM1hiUlgXB4hzX6ObOr-sh-5CJs,6926
|
|
402
432
|
unstructured_ingest/v2/processes/connectors/fsspec/utils.py,sha256=jec_Qfe2hbfahBuY-u8FnvHuv933AI5HwPFjOL3kEEY,456
|
|
403
|
-
unstructured_ingest/v2/processes/connectors/kafka/__init__.py,sha256=
|
|
404
|
-
unstructured_ingest/v2/processes/connectors/kafka/cloud.py,sha256=
|
|
405
|
-
unstructured_ingest/v2/processes/connectors/kafka/kafka.py,sha256=
|
|
406
|
-
unstructured_ingest/v2/processes/connectors/kafka/local.py,sha256=
|
|
433
|
+
unstructured_ingest/v2/processes/connectors/kafka/__init__.py,sha256=mQJ9Ex-QCfhz-BB5YWTfbPf7xGLd1i7FpjRr0ukbhNw,754
|
|
434
|
+
unstructured_ingest/v2/processes/connectors/kafka/cloud.py,sha256=qprsfI8VH0mVTa1MOCpa2D4coyopinQ5ag2KXcAecXE,3296
|
|
435
|
+
unstructured_ingest/v2/processes/connectors/kafka/kafka.py,sha256=qEv_yaG94KekFtfS06KgpTTbqeJkje0hn5uOjsMMngw,9414
|
|
436
|
+
unstructured_ingest/v2/processes/connectors/kafka/local.py,sha256=vwLZjvc_C17zOqcrzic0aIoPwS98sqYiwiMknw2IcK4,2586
|
|
437
|
+
unstructured_ingest/v2/processes/connectors/lancedb/__init__.py,sha256=lHUPCOiyOGu1IME1QiyFBZaB8z8e3bP8Y8TkqKs32Qk,906
|
|
438
|
+
unstructured_ingest/v2/processes/connectors/lancedb/aws.py,sha256=yR8V4O-oI_nUKJtHTLxhteEJpPDPn-_d2IkkXvgThJ0,1406
|
|
439
|
+
unstructured_ingest/v2/processes/connectors/lancedb/azure.py,sha256=Ms5vQVRIpTF1Q2qBl_bET9wbgaf4diPaH-iR8kJlr4E,1461
|
|
440
|
+
unstructured_ingest/v2/processes/connectors/lancedb/gcp.py,sha256=p5BPaFtS3y3Yh8PIr3tUqsAXrUYu4QYYAWQNh5W2ucE,1361
|
|
441
|
+
unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py,sha256=7WIShs2V3dpN6wUhDTt1j2rvdiPp6yopbh7XYkb9T3s,5129
|
|
442
|
+
unstructured_ingest/v2/processes/connectors/lancedb/local.py,sha256=_7-6iO6B60gAWwJUUrmlsRzYMFIBeZgu_QT3mhw5L0I,1272
|
|
407
443
|
unstructured_ingest/v2/processes/connectors/qdrant/__init__.py,sha256=xM19uYzAuGizVoZIM_hnVZ5AcBN69aOBGpqZcpWPtuE,760
|
|
408
444
|
unstructured_ingest/v2/processes/connectors/qdrant/cloud.py,sha256=accJ4sNWBVWV-KiVBDBDBYYx5A9CUoikP5NCErRmfik,1624
|
|
409
445
|
unstructured_ingest/v2/processes/connectors/qdrant/local.py,sha256=cGEyv3Oy6y4BQ4DU8yhJWMpL82QYwBVdPTxxNuV127U,1588
|
|
410
446
|
unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py,sha256=14qDTLrLBugsfvetFPx4ueS8zrk53wBLISuNRD4P-B8,5350
|
|
411
447
|
unstructured_ingest/v2/processes/connectors/qdrant/server.py,sha256=odvCZWZp8DmRxLXMR7tHhW-c7UQbix1_zpFdfXfCvKI,1613
|
|
412
448
|
unstructured_ingest/v2/processes/connectors/sql/__init__.py,sha256=D43wrV2ADvQsToIYwbEWnZ7mhzlsYcZMFCqf6jIC7dQ,1333
|
|
413
|
-
unstructured_ingest/v2/processes/connectors/sql/postgres.py,sha256=
|
|
449
|
+
unstructured_ingest/v2/processes/connectors/sql/postgres.py,sha256=rHaSb1MtdWMY6eQL2i2cWSL4w0VApFTChzmWtyfvFTI,5140
|
|
414
450
|
unstructured_ingest/v2/processes/connectors/sql/singlestore.py,sha256=YrmhAL1RQ1c5-2fnR3UAyj_4KfvjYTQ2cWzpvsdJOnU,5535
|
|
415
451
|
unstructured_ingest/v2/processes/connectors/sql/snowflake.py,sha256=jl524VudwmFK63emCT7DmZan_EWJAMiGir5_zoO9FuY,5697
|
|
416
|
-
unstructured_ingest/v2/processes/connectors/sql/sql.py,sha256=
|
|
452
|
+
unstructured_ingest/v2/processes/connectors/sql/sql.py,sha256=LFzGeAUagLknK07DsXg2oSG7ZAgR6VqT9wfI_tYlHUg,14782
|
|
417
453
|
unstructured_ingest/v2/processes/connectors/sql/sqlite.py,sha256=9605K36nQ5-gBxzt1daYKYotON1SE85RETusqCJrbdk,5230
|
|
418
|
-
unstructured_ingest
|
|
419
|
-
unstructured_ingest
|
|
420
|
-
unstructured_ingest
|
|
421
|
-
unstructured_ingest
|
|
422
|
-
unstructured_ingest
|
|
423
|
-
unstructured_ingest-0.
|
|
454
|
+
unstructured_ingest/v2/processes/connectors/weaviate/__init__.py,sha256=NMiwnVWan69KnzVELvaqX34tMhCytIa-C8EDsXVKsEo,856
|
|
455
|
+
unstructured_ingest/v2/processes/connectors/weaviate/cloud.py,sha256=2g1Fm2J0ppfy2jCw4b5YtrsWrSD3VcrAaqiE7FlpIAg,6236
|
|
456
|
+
unstructured_ingest/v2/processes/connectors/weaviate/embedded.py,sha256=S8Zg8StuZT-k7tCg1D5YShO1-vJYYk9-M1bE1fIqx64,3014
|
|
457
|
+
unstructured_ingest/v2/processes/connectors/weaviate/local.py,sha256=LuTBKPseVewsz8VqxRPRLfGEm3BeI9nBZxpy7ZU5tOA,2201
|
|
458
|
+
unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py,sha256=dBDC_M8GVKupl7i9UMRCZyRIUv6gTkq8bJE_SILydAc,11291
|
|
459
|
+
unstructured_ingest-0.3.1.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
460
|
+
unstructured_ingest-0.3.1.dist-info/METADATA,sha256=gEXBJbX1y03XJgGGqXpNlkOw1PJ4IhEHmohj2CXHq9g,7326
|
|
461
|
+
unstructured_ingest-0.3.1.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
462
|
+
unstructured_ingest-0.3.1.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
463
|
+
unstructured_ingest-0.3.1.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
|
|
464
|
+
unstructured_ingest-0.3.1.dist-info/RECORD,,
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
import typing as t
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
|
|
4
|
-
from unstructured_ingest.interfaces import BaseDestinationConnector
|
|
5
|
-
from unstructured_ingest.runner.writers.base_writer import Writer
|
|
6
|
-
|
|
7
|
-
if t.TYPE_CHECKING:
|
|
8
|
-
from unstructured_ingest.connector.azure_cognitive_search import (
|
|
9
|
-
AzureCognitiveSearchWriteConfig,
|
|
10
|
-
SimpleAzureCognitiveSearchStorageConfig,
|
|
11
|
-
)
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@dataclass
|
|
15
|
-
class AzureCognitiveSearchWriter(Writer):
|
|
16
|
-
connector_config: "SimpleAzureCognitiveSearchStorageConfig"
|
|
17
|
-
write_config: "AzureCognitiveSearchWriteConfig"
|
|
18
|
-
|
|
19
|
-
def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
|
|
20
|
-
from unstructured_ingest.connector.azure_cognitive_search import (
|
|
21
|
-
AzureCognitiveSearchDestinationConnector,
|
|
22
|
-
)
|
|
23
|
-
|
|
24
|
-
return AzureCognitiveSearchDestinationConnector
|
|
@@ -1,242 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
from dataclasses import dataclass, field
|
|
3
|
-
from datetime import date, datetime
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
from typing import TYPE_CHECKING, Any, Optional
|
|
6
|
-
|
|
7
|
-
from dateutil import parser
|
|
8
|
-
from pydantic import Field, Secret
|
|
9
|
-
|
|
10
|
-
from unstructured_ingest.error import DestinationConnectionError
|
|
11
|
-
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
12
|
-
from unstructured_ingest.v2.interfaces import (
|
|
13
|
-
AccessConfig,
|
|
14
|
-
ConnectionConfig,
|
|
15
|
-
FileData,
|
|
16
|
-
Uploader,
|
|
17
|
-
UploaderConfig,
|
|
18
|
-
UploadStager,
|
|
19
|
-
UploadStagerConfig,
|
|
20
|
-
)
|
|
21
|
-
from unstructured_ingest.v2.logger import logger
|
|
22
|
-
from unstructured_ingest.v2.processes.connector_registry import (
|
|
23
|
-
DestinationRegistryEntry,
|
|
24
|
-
)
|
|
25
|
-
|
|
26
|
-
if TYPE_CHECKING:
|
|
27
|
-
from weaviate import Client
|
|
28
|
-
|
|
29
|
-
CONNECTOR_TYPE = "weaviate"
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
class WeaviateAccessConfig(AccessConfig):
|
|
33
|
-
access_token: Optional[str] = Field(
|
|
34
|
-
default=None, description="Used to create the bearer token."
|
|
35
|
-
)
|
|
36
|
-
api_key: Optional[str] = None
|
|
37
|
-
client_secret: Optional[str] = None
|
|
38
|
-
password: Optional[str] = None
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
class WeaviateConnectionConfig(ConnectionConfig):
|
|
42
|
-
host_url: str = Field(description="Weaviate instance url")
|
|
43
|
-
class_name: str = Field(
|
|
44
|
-
description="Name of the class to push the records into, e.g: Pdf-elements"
|
|
45
|
-
)
|
|
46
|
-
access_config: Secret[WeaviateAccessConfig] = Field(
|
|
47
|
-
default=WeaviateAccessConfig(), validate_default=True
|
|
48
|
-
)
|
|
49
|
-
username: Optional[str] = None
|
|
50
|
-
anonymous: bool = Field(default=False, description="if set, all auth values will be ignored")
|
|
51
|
-
scope: Optional[list[str]] = None
|
|
52
|
-
refresh_token: Optional[str] = Field(
|
|
53
|
-
default=None,
|
|
54
|
-
description="Will tie this value to the bearer token. If not provided, "
|
|
55
|
-
"the authentication will expire once the lifetime of the access token is up.",
|
|
56
|
-
)
|
|
57
|
-
connector_type: str = Field(default=CONNECTOR_TYPE, init=False)
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
class WeaviateUploadStagerConfig(UploadStagerConfig):
|
|
61
|
-
pass
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
@dataclass
|
|
65
|
-
class WeaviateUploadStager(UploadStager):
|
|
66
|
-
upload_stager_config: WeaviateUploadStagerConfig = field(
|
|
67
|
-
default_factory=lambda: WeaviateUploadStagerConfig()
|
|
68
|
-
)
|
|
69
|
-
|
|
70
|
-
@staticmethod
|
|
71
|
-
def parse_date_string(date_string: str) -> date:
|
|
72
|
-
try:
|
|
73
|
-
timestamp = float(date_string)
|
|
74
|
-
return datetime.fromtimestamp(timestamp)
|
|
75
|
-
except Exception as e:
|
|
76
|
-
logger.debug(f"date {date_string} string not a timestamp: {e}")
|
|
77
|
-
return parser.parse(date_string)
|
|
78
|
-
|
|
79
|
-
@classmethod
|
|
80
|
-
def conform_dict(cls, data: dict) -> None:
|
|
81
|
-
"""
|
|
82
|
-
Updates the element dictionary to conform to the Weaviate schema
|
|
83
|
-
"""
|
|
84
|
-
|
|
85
|
-
# Dict as string formatting
|
|
86
|
-
if record_locator := data.get("metadata", {}).get("data_source", {}).get("record_locator"):
|
|
87
|
-
# Explicit casting otherwise fails schema type checking
|
|
88
|
-
data["metadata"]["data_source"]["record_locator"] = str(json.dumps(record_locator))
|
|
89
|
-
|
|
90
|
-
# Array of items as string formatting
|
|
91
|
-
if points := data.get("metadata", {}).get("coordinates", {}).get("points"):
|
|
92
|
-
data["metadata"]["coordinates"]["points"] = str(json.dumps(points))
|
|
93
|
-
|
|
94
|
-
if links := data.get("metadata", {}).get("links", {}):
|
|
95
|
-
data["metadata"]["links"] = str(json.dumps(links))
|
|
96
|
-
|
|
97
|
-
if permissions_data := (
|
|
98
|
-
data.get("metadata", {}).get("data_source", {}).get("permissions_data")
|
|
99
|
-
):
|
|
100
|
-
data["metadata"]["data_source"]["permissions_data"] = json.dumps(permissions_data)
|
|
101
|
-
|
|
102
|
-
# Datetime formatting
|
|
103
|
-
if date_created := data.get("metadata", {}).get("data_source", {}).get("date_created"):
|
|
104
|
-
data["metadata"]["data_source"]["date_created"] = cls.parse_date_string(
|
|
105
|
-
date_created
|
|
106
|
-
).strftime(
|
|
107
|
-
"%Y-%m-%dT%H:%M:%S.%fZ",
|
|
108
|
-
)
|
|
109
|
-
|
|
110
|
-
if date_modified := data.get("metadata", {}).get("data_source", {}).get("date_modified"):
|
|
111
|
-
data["metadata"]["data_source"]["date_modified"] = cls.parse_date_string(
|
|
112
|
-
date_modified
|
|
113
|
-
).strftime(
|
|
114
|
-
"%Y-%m-%dT%H:%M:%S.%fZ",
|
|
115
|
-
)
|
|
116
|
-
|
|
117
|
-
if date_processed := data.get("metadata", {}).get("data_source", {}).get("date_processed"):
|
|
118
|
-
data["metadata"]["data_source"]["date_processed"] = cls.parse_date_string(
|
|
119
|
-
date_processed
|
|
120
|
-
).strftime(
|
|
121
|
-
"%Y-%m-%dT%H:%M:%S.%fZ",
|
|
122
|
-
)
|
|
123
|
-
|
|
124
|
-
if last_modified := data.get("metadata", {}).get("last_modified"):
|
|
125
|
-
data["metadata"]["last_modified"] = cls.parse_date_string(last_modified).strftime(
|
|
126
|
-
"%Y-%m-%dT%H:%M:%S.%fZ",
|
|
127
|
-
)
|
|
128
|
-
|
|
129
|
-
# String casting
|
|
130
|
-
if version := data.get("metadata", {}).get("data_source", {}).get("version"):
|
|
131
|
-
data["metadata"]["data_source"]["version"] = str(version)
|
|
132
|
-
|
|
133
|
-
if page_number := data.get("metadata", {}).get("page_number"):
|
|
134
|
-
data["metadata"]["page_number"] = str(page_number)
|
|
135
|
-
|
|
136
|
-
if regex_metadata := data.get("metadata", {}).get("regex_metadata"):
|
|
137
|
-
data["metadata"]["regex_metadata"] = str(json.dumps(regex_metadata))
|
|
138
|
-
|
|
139
|
-
def run(
|
|
140
|
-
self,
|
|
141
|
-
elements_filepath: Path,
|
|
142
|
-
file_data: FileData,
|
|
143
|
-
output_dir: Path,
|
|
144
|
-
output_filename: str,
|
|
145
|
-
**kwargs: Any,
|
|
146
|
-
) -> Path:
|
|
147
|
-
with open(elements_filepath) as elements_file:
|
|
148
|
-
elements_contents = json.load(elements_file)
|
|
149
|
-
for element in elements_contents:
|
|
150
|
-
self.conform_dict(data=element)
|
|
151
|
-
output_path = Path(output_dir) / Path(f"{output_filename}.json")
|
|
152
|
-
with open(output_path, "w") as output_file:
|
|
153
|
-
json.dump(elements_contents, output_file)
|
|
154
|
-
return output_path
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
class WeaviateUploaderConfig(UploaderConfig):
|
|
158
|
-
batch_size: int = Field(default=100, description="Number of records per batch")
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
@dataclass
|
|
162
|
-
class WeaviateUploader(Uploader):
|
|
163
|
-
upload_config: WeaviateUploaderConfig
|
|
164
|
-
connection_config: WeaviateConnectionConfig
|
|
165
|
-
connector_type: str = CONNECTOR_TYPE
|
|
166
|
-
|
|
167
|
-
@requires_dependencies(["weaviate"], extras="weaviate")
|
|
168
|
-
def get_client(self) -> "Client":
|
|
169
|
-
from weaviate import Client
|
|
170
|
-
|
|
171
|
-
auth = self._resolve_auth_method()
|
|
172
|
-
return Client(url=self.connection_config.host_url, auth_client_secret=auth)
|
|
173
|
-
|
|
174
|
-
def precheck(self) -> None:
|
|
175
|
-
try:
|
|
176
|
-
self.get_client()
|
|
177
|
-
except Exception as e:
|
|
178
|
-
logger.error(f"Failed to validate connection {e}", exc_info=True)
|
|
179
|
-
raise DestinationConnectionError(f"failed to validate connection: {e}")
|
|
180
|
-
|
|
181
|
-
@requires_dependencies(["weaviate"], extras="weaviate")
|
|
182
|
-
def _resolve_auth_method(self):
|
|
183
|
-
access_configs = self.connection_config.access_config.get_secret_value()
|
|
184
|
-
connection_config = self.connection_config
|
|
185
|
-
if connection_config.anonymous:
|
|
186
|
-
return None
|
|
187
|
-
|
|
188
|
-
if access_configs.access_token:
|
|
189
|
-
from weaviate.auth import AuthBearerToken
|
|
190
|
-
|
|
191
|
-
return AuthBearerToken(
|
|
192
|
-
access_token=access_configs.access_token,
|
|
193
|
-
refresh_token=connection_config.refresh_token,
|
|
194
|
-
)
|
|
195
|
-
elif access_configs.api_key:
|
|
196
|
-
from weaviate.auth import AuthApiKey
|
|
197
|
-
|
|
198
|
-
return AuthApiKey(api_key=access_configs.api_key)
|
|
199
|
-
elif access_configs.client_secret:
|
|
200
|
-
from weaviate.auth import AuthClientCredentials
|
|
201
|
-
|
|
202
|
-
return AuthClientCredentials(
|
|
203
|
-
client_secret=access_configs.client_secret, scope=connection_config.scope
|
|
204
|
-
)
|
|
205
|
-
elif connection_config.username and access_configs.password:
|
|
206
|
-
from weaviate.auth import AuthClientPassword
|
|
207
|
-
|
|
208
|
-
return AuthClientPassword(
|
|
209
|
-
username=connection_config.username,
|
|
210
|
-
password=access_configs.password,
|
|
211
|
-
scope=connection_config.scope,
|
|
212
|
-
)
|
|
213
|
-
return None
|
|
214
|
-
|
|
215
|
-
def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
|
|
216
|
-
with path.open("r") as file:
|
|
217
|
-
elements_dict = json.load(file)
|
|
218
|
-
logger.info(
|
|
219
|
-
f"writing {len(elements_dict)} objects to destination "
|
|
220
|
-
f"class {self.connection_config.class_name} "
|
|
221
|
-
f"at {self.connection_config.host_url}",
|
|
222
|
-
)
|
|
223
|
-
|
|
224
|
-
client = self.get_client()
|
|
225
|
-
client.batch.configure(batch_size=self.upload_config.batch_size)
|
|
226
|
-
with client.batch as b:
|
|
227
|
-
for e in elements_dict:
|
|
228
|
-
vector = e.pop("embeddings", None)
|
|
229
|
-
b.add_data_object(
|
|
230
|
-
e,
|
|
231
|
-
self.connection_config.class_name,
|
|
232
|
-
vector=vector,
|
|
233
|
-
)
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
weaviate_destination_entry = DestinationRegistryEntry(
|
|
237
|
-
connection_config=WeaviateConnectionConfig,
|
|
238
|
-
uploader=WeaviateUploader,
|
|
239
|
-
uploader_config=WeaviateUploaderConfig,
|
|
240
|
-
upload_stager=WeaviateUploadStager,
|
|
241
|
-
upload_stager_config=WeaviateUploadStagerConfig,
|
|
242
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-0.2.2.dist-info → unstructured_ingest-0.3.1.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|