unstructured-ingest 1.0.56__tar.gz → 1.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (244) hide show
  1. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/PKG-INFO +2 -1
  2. unstructured_ingest-1.1.2/unstructured_ingest/__version__.py +1 -0
  3. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/embed/azure_openai.py +7 -2
  4. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/embed/openai.py +11 -4
  5. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/interfaces/connector.py +7 -1
  6. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/otel.py +16 -1
  7. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/delta_table.py +2 -1
  8. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/fsspec/fsspec.py +106 -14
  9. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/fsspec/s3.py +14 -4
  10. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/onedrive.py +9 -8
  11. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/sharepoint.py +3 -2
  12. unstructured_ingest-1.1.2/unstructured_ingest/processes/utils/__init__.py +8 -0
  13. unstructured_ingest-1.1.2/unstructured_ingest/processes/utils/logging/connector.py +365 -0
  14. unstructured_ingest-1.1.2/unstructured_ingest/processes/utils/logging/sanitizer.py +117 -0
  15. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/utils/__init__.py +1 -1
  16. unstructured_ingest-1.1.2/unstructured_ingest/utils/tls.py +15 -0
  17. unstructured_ingest-1.0.56/unstructured_ingest/__version__.py +0 -1
  18. unstructured_ingest-1.0.56/unstructured_ingest/processes/utils/__init__.py +0 -0
  19. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/.gitignore +0 -0
  20. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/LICENSE.md +0 -0
  21. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/README.md +0 -0
  22. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/pyproject.toml +0 -0
  23. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/__init__.py +0 -0
  24. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/cli/README.md +0 -0
  25. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/cli/__init__.py +0 -0
  26. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/cli/base/__init__.py +0 -0
  27. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/cli/base/cmd.py +0 -0
  28. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/cli/base/dest.py +0 -0
  29. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/cli/base/importer.py +0 -0
  30. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/cli/base/src.py +0 -0
  31. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/cli/cli.py +0 -0
  32. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/cli/cmds.py +0 -0
  33. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/cli/utils/__init__.py +0 -0
  34. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/cli/utils/click.py +0 -0
  35. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/cli/utils/model_conversion.py +0 -0
  36. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/data_types/__init__.py +0 -0
  37. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/data_types/entities.py +0 -0
  38. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/data_types/file_data.py +0 -0
  39. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/embed/__init__.py +0 -0
  40. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/embed/bedrock.py +0 -0
  41. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/embed/huggingface.py +0 -0
  42. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/embed/interfaces.py +0 -0
  43. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/embed/mixedbreadai.py +0 -0
  44. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/embed/octoai.py +0 -0
  45. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/embed/togetherai.py +0 -0
  46. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/embed/vertexai.py +0 -0
  47. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/embed/voyageai.py +0 -0
  48. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/error.py +0 -0
  49. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/errors_v2.py +0 -0
  50. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/interfaces/__init__.py +0 -0
  51. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/interfaces/downloader.py +0 -0
  52. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/interfaces/indexer.py +0 -0
  53. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/interfaces/process.py +0 -0
  54. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/interfaces/processor.py +0 -0
  55. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/interfaces/upload_stager.py +0 -0
  56. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/interfaces/uploader.py +0 -0
  57. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/logger.py +0 -0
  58. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/main.py +0 -0
  59. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/pipeline/__init__.py +0 -0
  60. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/pipeline/interfaces.py +0 -0
  61. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/pipeline/otel.py +0 -0
  62. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/pipeline/pipeline.py +0 -0
  63. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/pipeline/steps/__init__.py +0 -0
  64. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/pipeline/steps/chunk.py +0 -0
  65. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/pipeline/steps/download.py +0 -0
  66. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/pipeline/steps/embed.py +0 -0
  67. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/pipeline/steps/filter.py +0 -0
  68. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/pipeline/steps/index.py +0 -0
  69. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/pipeline/steps/partition.py +0 -0
  70. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/pipeline/steps/stage.py +0 -0
  71. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/pipeline/steps/uncompress.py +0 -0
  72. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/pipeline/steps/upload.py +0 -0
  73. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/__init__.py +0 -0
  74. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/chunker.py +0 -0
  75. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connector_registry.py +0 -0
  76. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/__init__.py +0 -0
  77. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/airtable.py +0 -0
  78. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/assets/__init__.py +0 -0
  79. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql +0 -0
  80. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json +0 -0
  81. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/astradb.py +0 -0
  82. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/azure_ai_search.py +0 -0
  83. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/chroma.py +0 -0
  84. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/confluence.py +0 -0
  85. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/couchbase.py +0 -0
  86. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/databricks/__init__.py +0 -0
  87. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/databricks/volumes.py +0 -0
  88. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/databricks/volumes_aws.py +0 -0
  89. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/databricks/volumes_azure.py +0 -0
  90. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/databricks/volumes_gcp.py +0 -0
  91. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/databricks/volumes_native.py +0 -0
  92. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/databricks/volumes_table.py +0 -0
  93. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/discord.py +0 -0
  94. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/duckdb/__init__.py +0 -0
  95. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/duckdb/base.py +0 -0
  96. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/duckdb/duckdb.py +0 -0
  97. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/duckdb/motherduck.py +0 -0
  98. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/elasticsearch/__init__.py +0 -0
  99. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/elasticsearch/elasticsearch.py +0 -0
  100. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/elasticsearch/opensearch.py +0 -0
  101. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/fsspec/__init__.py +0 -0
  102. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/fsspec/azure.py +0 -0
  103. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/fsspec/box.py +0 -0
  104. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/fsspec/dropbox.py +0 -0
  105. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/fsspec/gcs.py +0 -0
  106. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/fsspec/sftp.py +0 -0
  107. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/fsspec/utils.py +0 -0
  108. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/github.py +0 -0
  109. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/gitlab.py +0 -0
  110. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/google_drive.py +0 -0
  111. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/ibm_watsonx/__init__.py +0 -0
  112. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +0 -0
  113. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/jira.py +0 -0
  114. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/kafka/__init__.py +0 -0
  115. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/kafka/cloud.py +0 -0
  116. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/kafka/kafka.py +0 -0
  117. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/kafka/local.py +0 -0
  118. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/kdbai.py +0 -0
  119. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/lancedb/__init__.py +0 -0
  120. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/lancedb/aws.py +0 -0
  121. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/lancedb/azure.py +0 -0
  122. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/lancedb/cloud.py +0 -0
  123. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/lancedb/gcp.py +0 -0
  124. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/lancedb/lancedb.py +0 -0
  125. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/lancedb/local.py +0 -0
  126. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/local.py +0 -0
  127. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/milvus.py +0 -0
  128. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/mongodb.py +0 -0
  129. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/neo4j.py +0 -0
  130. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/__init__.py +0 -0
  131. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/client.py +0 -0
  132. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/connector.py +0 -0
  133. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/helpers.py +0 -0
  134. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/ingest_backoff/__init__.py +0 -0
  135. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/ingest_backoff/_common.py +0 -0
  136. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/ingest_backoff/_wrapper.py +0 -0
  137. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +0 -0
  138. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/interfaces.py +0 -0
  139. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/__init__.py +0 -0
  140. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/block.py +0 -0
  141. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/blocks/__init__.py +0 -0
  142. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/blocks/bookmark.py +0 -0
  143. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/blocks/breadcrumb.py +0 -0
  144. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/blocks/bulleted_list_item.py +0 -0
  145. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/blocks/callout.py +0 -0
  146. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/blocks/child_database.py +0 -0
  147. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/blocks/child_page.py +0 -0
  148. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/blocks/code.py +0 -0
  149. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/blocks/column_list.py +0 -0
  150. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/blocks/divider.py +0 -0
  151. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/blocks/embed.py +0 -0
  152. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/blocks/equation.py +0 -0
  153. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/blocks/file.py +0 -0
  154. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/blocks/heading.py +0 -0
  155. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/blocks/image.py +0 -0
  156. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/blocks/link_preview.py +0 -0
  157. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/blocks/link_to_page.py +0 -0
  158. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/blocks/numbered_list.py +0 -0
  159. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/blocks/paragraph.py +0 -0
  160. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/blocks/pdf.py +0 -0
  161. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/blocks/quote.py +0 -0
  162. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/blocks/synced_block.py +0 -0
  163. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/blocks/table.py +0 -0
  164. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/blocks/table_of_contents.py +0 -0
  165. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/blocks/template.py +0 -0
  166. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/blocks/todo.py +0 -0
  167. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/blocks/toggle.py +0 -0
  168. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/blocks/unsupported.py +0 -0
  169. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/blocks/video.py +0 -0
  170. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/database.py +0 -0
  171. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py +0 -0
  172. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py +0 -0
  173. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py +0 -0
  174. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py +0 -0
  175. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/date.py +0 -0
  176. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/email.py +0 -0
  177. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/files.py +0 -0
  178. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py +0 -0
  179. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py +0 -0
  180. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py +0 -0
  181. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py +0 -0
  182. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/number.py +0 -0
  183. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/people.py +0 -0
  184. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py +0 -0
  185. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py +0 -0
  186. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py +0 -0
  187. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py +0 -0
  188. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/select.py +0 -0
  189. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/status.py +0 -0
  190. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/title.py +0 -0
  191. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py +0 -0
  192. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/url.py +0 -0
  193. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py +0 -0
  194. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/date.py +0 -0
  195. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/file.py +0 -0
  196. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/page.py +0 -0
  197. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/parent.py +0 -0
  198. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/rich_text.py +0 -0
  199. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/notion/types/user.py +0 -0
  200. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/outlook.py +0 -0
  201. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/pinecone.py +0 -0
  202. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/qdrant/__init__.py +0 -0
  203. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/qdrant/cloud.py +0 -0
  204. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/qdrant/local.py +0 -0
  205. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/qdrant/qdrant.py +0 -0
  206. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/qdrant/server.py +0 -0
  207. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/redisdb.py +0 -0
  208. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/salesforce.py +0 -0
  209. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/slack.py +0 -0
  210. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/sql/__init__.py +0 -0
  211. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py +0 -0
  212. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/sql/postgres.py +0 -0
  213. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/sql/singlestore.py +0 -0
  214. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/sql/snowflake.py +0 -0
  215. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/sql/sql.py +0 -0
  216. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/sql/sqlite.py +0 -0
  217. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/sql/vastdb.py +0 -0
  218. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/utils.py +0 -0
  219. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/vectara.py +0 -0
  220. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/weaviate/__init__.py +0 -0
  221. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/weaviate/cloud.py +0 -0
  222. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/weaviate/embedded.py +0 -0
  223. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/weaviate/local.py +0 -0
  224. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/weaviate/weaviate.py +0 -0
  225. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/zendesk/__init__.py +0 -0
  226. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/zendesk/client.py +0 -0
  227. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/connectors/zendesk/zendesk.py +0 -0
  228. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/embedder.py +0 -0
  229. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/filter.py +0 -0
  230. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/partitioner.py +0 -0
  231. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/uncompress.py +0 -0
  232. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/processes/utils/blob_storage.py +0 -0
  233. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/unstructured_api.py +0 -0
  234. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/utils/chunking.py +0 -0
  235. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/utils/compression.py +0 -0
  236. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/utils/constants.py +0 -0
  237. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/utils/data_prep.py +0 -0
  238. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/utils/dep_check.py +0 -0
  239. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/utils/filesystem.py +0 -0
  240. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/utils/html.py +0 -0
  241. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/utils/ndjson.py +0 -0
  242. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/utils/pydantic_models.py +0 -0
  243. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/utils/string_and_date_utils.py +0 -0
  244. {unstructured_ingest-1.0.56 → unstructured_ingest-1.1.2}/unstructured_ingest/utils/table.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: unstructured_ingest
3
- Version: 1.0.56
3
+ Version: 1.1.2
4
4
  Summary: Local ETL data pipeline to get data RAG ready
5
5
  Author-email: Unstructured Technologies <devops@unstructuredai.io>
6
6
  License-Expression: Apache-2.0
@@ -18,6 +18,7 @@ Classifier: Programming Language :: Python :: 3.11
18
18
  Classifier: Programming Language :: Python :: 3.12
19
19
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
20
  Requires-Python: <3.13,>=3.9
21
+ Requires-Dist: certifi>=2025.7.14
21
22
  Requires-Dist: click
22
23
  Requires-Dist: opentelemetry-sdk
23
24
  Requires-Dist: pydantic>=2.7
@@ -0,0 +1 @@
1
+ __version__ = "1.1.2" # pragma: no cover
@@ -9,6 +9,7 @@ from unstructured_ingest.embed.openai import (
9
9
  OpenAIEmbeddingEncoder,
10
10
  )
11
11
  from unstructured_ingest.utils.dep_check import requires_dependencies
12
+ from unstructured_ingest.utils.tls import ssl_context_with_optional_ca_override
12
13
 
13
14
  if TYPE_CHECKING:
14
15
  from openai import AsyncAzureOpenAI, AzureOpenAI
@@ -23,9 +24,11 @@ class AzureOpenAIEmbeddingConfig(OpenAIEmbeddingConfig):
23
24
 
24
25
  @requires_dependencies(["openai"], extras="openai")
25
26
  def get_client(self) -> "AzureOpenAI":
26
- from openai import AzureOpenAI
27
+ from openai import AzureOpenAI, DefaultHttpxClient
27
28
 
29
+ client = DefaultHttpxClient(verify=ssl_context_with_optional_ca_override())
28
30
  return AzureOpenAI(
31
+ http_client=client,
29
32
  api_key=self.api_key.get_secret_value(),
30
33
  api_version=self.api_version,
31
34
  azure_endpoint=self.azure_endpoint,
@@ -33,9 +36,11 @@ class AzureOpenAIEmbeddingConfig(OpenAIEmbeddingConfig):
33
36
 
34
37
  @requires_dependencies(["openai"], extras="openai")
35
38
  def get_async_client(self) -> "AsyncAzureOpenAI":
36
- from openai import AsyncAzureOpenAI
39
+ from openai import AsyncAzureOpenAI, DefaultAsyncHttpxClient
37
40
 
41
+ client = DefaultAsyncHttpxClient(verify=ssl_context_with_optional_ca_override())
38
42
  return AsyncAzureOpenAI(
43
+ http_client=client,
39
44
  api_key=self.api_key.get_secret_value(),
40
45
  api_version=self.api_version,
41
46
  azure_endpoint=self.azure_endpoint,
@@ -18,6 +18,7 @@ from unstructured_ingest.errors_v2 import (
18
18
  )
19
19
  from unstructured_ingest.logger import logger
20
20
  from unstructured_ingest.utils.dep_check import requires_dependencies
21
+ from unstructured_ingest.utils.tls import ssl_context_with_optional_ca_override
21
22
 
22
23
  if TYPE_CHECKING:
23
24
  from openai import AsyncOpenAI, OpenAI
@@ -86,15 +87,21 @@ class OpenAIEmbeddingConfig(EmbeddingConfig):
86
87
 
87
88
  @requires_dependencies(["openai"], extras="openai")
88
89
  def get_client(self) -> "OpenAI":
89
- from openai import OpenAI
90
+ from openai import DefaultHttpxClient, OpenAI
90
91
 
91
- return OpenAI(api_key=self.api_key.get_secret_value(), base_url=self.base_url)
92
+ client = DefaultHttpxClient(verify=ssl_context_with_optional_ca_override())
93
+ return OpenAI(
94
+ api_key=self.api_key.get_secret_value(), http_client=client, base_url=self.base_url
95
+ )
92
96
 
93
97
  @requires_dependencies(["openai"], extras="openai")
94
98
  def get_async_client(self) -> "AsyncOpenAI":
95
- from openai import AsyncOpenAI
99
+ from openai import AsyncOpenAI, DefaultAsyncHttpxClient
96
100
 
97
- return AsyncOpenAI(api_key=self.api_key.get_secret_value(), base_url=self.base_url)
101
+ client = DefaultAsyncHttpxClient(verify=ssl_context_with_optional_ca_override())
102
+ return AsyncOpenAI(
103
+ api_key=self.api_key.get_secret_value(), http_client=client, base_url=self.base_url
104
+ )
98
105
 
99
106
 
100
107
  @dataclass
@@ -5,6 +5,8 @@ from typing import Any, TypeVar, Union
5
5
  from pydantic import BaseModel, Secret, model_validator
6
6
  from pydantic.types import _SecretBase
7
7
 
8
+ from unstructured_ingest.processes.utils.logging.connector import ConnectorLoggingMixin
9
+
8
10
 
9
11
  class AccessConfig(BaseModel):
10
12
  """Meant to designate holding any sensitive information associated with other configs
@@ -46,5 +48,9 @@ ConnectionConfigT = TypeVar("ConnectionConfigT", bound=ConnectionConfig)
46
48
 
47
49
 
48
50
  @dataclass
49
- class BaseConnector(ABC):
51
+ class BaseConnector(ABC, ConnectorLoggingMixin):
50
52
  connection_config: ConnectionConfigT
53
+
54
+ def __post_init__(self):
55
+ """Initialize the logging mixin after dataclass initialization."""
56
+ ConnectorLoggingMixin.__init__(self)
@@ -1,3 +1,4 @@
1
+ import logging
1
2
  import os
2
3
  from dataclasses import dataclass, field
3
4
  from typing import Callable, ClassVar, Optional, Protocol, Sequence
@@ -31,13 +32,27 @@ class LogSpanExporter(ConsoleSpanExporter):
31
32
  self.log_out(self.formatter(span))
32
33
  return SpanExportResult.SUCCESS
33
34
 
35
+ def get_log_out() -> Callable:
36
+ level_names_mapping = {
37
+ 'CRITICAL': logging.CRITICAL,
38
+ 'FATAL': logging.FATAL,
39
+ 'ERROR': logging.ERROR,
40
+ 'WARN': logging.WARNING,
41
+ 'WARNING': logging.WARNING,
42
+ 'INFO': logging.INFO,
43
+ 'DEBUG': logging.DEBUG,
44
+ 'NOTSET': logging.NOTSET,
45
+ }
46
+ log_level = os.getenv("OTEL_LOG_LEVEL", "DEBUG").upper()
47
+ log_level_int = level_names_mapping.get(log_level, logging.DEBUG)
48
+ return lambda message: logger.log(log_level_int, message)
34
49
 
35
50
  @dataclass
36
51
  class OtelHandler:
37
52
  otel_endpoint: Optional[str] = None
38
53
  service_name: str = "unstructured-ingest"
39
54
  trace_provider: TracerProvider = field(init=False)
40
- log_out: Callable = field(default=logger.info)
55
+ log_out: Callable = field(default=get_log_out())
41
56
  trace_context_key: ClassVar[str] = "_trace_context"
42
57
 
43
58
  def init_trace(self):
@@ -48,7 +48,8 @@ class DeltaTableAccessConfig(AccessConfig):
48
48
 
49
49
  class DeltaTableConnectionConfig(ConnectionConfig):
50
50
  access_config: Secret[DeltaTableAccessConfig] = Field(
51
- default=Secret(DeltaTableAccessConfig()), validate_default=True
51
+ default_factory=lambda: Secret[DeltaTableAccessConfig](DeltaTableAccessConfig()),
52
+ validate_default=True,
52
53
  )
53
54
  aws_region: Optional[str] = Field(default=None, description="AWS Region")
54
55
  table_uri: str = Field(
@@ -28,7 +28,6 @@ from unstructured_ingest.interfaces import (
28
28
  Uploader,
29
29
  UploaderConfig,
30
30
  )
31
- from unstructured_ingest.logger import logger
32
31
  from unstructured_ingest.processes.connectors.fsspec.utils import sterilize_dict
33
32
  from unstructured_ingest.utils.filesystem import mkdir_concurrent_safe
34
33
 
@@ -106,6 +105,12 @@ class FsspecIndexer(Indexer):
106
105
  def precheck(self) -> None:
107
106
  from fsspec import get_filesystem_class
108
107
 
108
+ self.log_operation_start(
109
+ "Connection validation",
110
+ protocol=self.index_config.protocol,
111
+ path=self.index_config.path_without_protocol,
112
+ )
113
+
109
114
  try:
110
115
  fs = get_filesystem_class(self.index_config.protocol)(
111
116
  **self.connection_config.get_access_config(),
@@ -113,13 +118,24 @@ class FsspecIndexer(Indexer):
113
118
  files = fs.ls(path=self.index_config.path_without_protocol, detail=True)
114
119
  valid_files = [x.get("name") for x in files if x.get("type") == "file"]
115
120
  if not valid_files:
121
+ self.log_operation_complete("Connection validation", count=0)
116
122
  return
117
123
  file_to_sample = valid_files[0]
118
- logger.debug(f"attempting to make HEAD request for file: {file_to_sample}")
124
+ self.log_debug(f"attempting to make HEAD request for file: {file_to_sample}")
119
125
  with self.connection_config.get_client(protocol=self.index_config.protocol) as client:
120
126
  client.head(path=file_to_sample)
127
+
128
+ self.log_connection_validated(
129
+ connector_type=self.connector_type,
130
+ endpoint=f"{self.index_config.protocol}://{self.index_config.path_without_protocol}",
131
+ )
132
+
121
133
  except Exception as e:
122
- logger.error(f"failed to validate connection: {e}", exc_info=True)
134
+ self.log_connection_failed(
135
+ connector_type=self.connector_type,
136
+ error=e,
137
+ endpoint=f"{self.index_config.protocol}://{self.index_config.path_without_protocol}",
138
+ )
123
139
  raise self.wrap_error(e=e)
124
140
 
125
141
  def get_file_info(self) -> list[dict[str, Any]]:
@@ -150,7 +166,7 @@ class FsspecIndexer(Indexer):
150
166
 
151
167
  def sample_n_files(self, files: list[dict[str, Any]], n) -> list[dict[str, Any]]:
152
168
  if len(files) <= n:
153
- logger.warning(
169
+ self.log_warning(
154
170
  f"number of files to be sampled={n} is not smaller than the number"
155
171
  f" of files found ({len(files)}). Returning all of the files as the"
156
172
  " sample."
@@ -201,9 +217,22 @@ class FsspecIndexer(Indexer):
201
217
  init_file_data.additional_metadata = self.get_metadata(file_info=file_info)
202
218
 
203
219
  def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
220
+ self.log_indexing_start(f"{self.connector_type} files")
221
+
204
222
  files = self.get_file_info()
205
- for file_info in files:
223
+ total_files = len(files)
224
+
225
+ self.log_operation_start("File indexing", total_files=total_files)
226
+
227
+ for i, file_info in enumerate(files):
206
228
  file_path = self.get_path(file_info=file_info)
229
+
230
+ # Only log progress for larger operations
231
+ if total_files > 5:
232
+ self.log_progress(
233
+ current=i + 1, total=total_files, item_type="files", operation="Indexing"
234
+ )
235
+
207
236
  # Note: we remove any remaining leading slashes (Box introduces these)
208
237
  # to get a valid relative path
209
238
  rel_path = file_path.replace(self.index_config.path_without_protocol, "").lstrip("/")
@@ -223,6 +252,8 @@ class FsspecIndexer(Indexer):
223
252
  display_name=file_path,
224
253
  )
225
254
 
255
+ self.log_indexing_complete(f"{self.connector_type} files", total_files)
256
+
226
257
 
227
258
  class FsspecDownloaderConfig(DownloaderConfig):
228
259
  pass
@@ -272,25 +303,57 @@ class FsspecDownloader(Downloader):
272
303
  def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
273
304
  download_path = self.get_download_path(file_data=file_data)
274
305
  mkdir_concurrent_safe(download_path.parent)
306
+
307
+ rpath = file_data.additional_metadata["original_file_path"]
308
+ file_size = file_data.metadata.filesize_bytes
309
+ self.log_download_start(file_path=rpath, file_id=file_data.identifier, file_size=file_size)
310
+
275
311
  try:
276
- rpath = file_data.additional_metadata["original_file_path"]
277
312
  with self.connection_config.get_client(protocol=self.protocol) as client:
278
313
  client.get_file(rpath=rpath, lpath=download_path.as_posix())
279
314
  self.handle_directory_download(lpath=download_path)
315
+
280
316
  except Exception as e:
317
+ self.log_error(
318
+ "File download failed",
319
+ error=e,
320
+ context={"file_path": rpath, "file_id": file_data.identifier},
321
+ )
281
322
  raise self.wrap_error(e=e)
323
+
324
+ self.log_download_complete(
325
+ file_path=rpath,
326
+ file_id=file_data.identifier,
327
+ download_path=str(download_path),
328
+ )
329
+
282
330
  return self.generate_download_response(file_data=file_data, download_path=download_path)
283
331
 
284
332
  async def async_run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
285
333
  download_path = self.get_download_path(file_data=file_data)
286
334
  mkdir_concurrent_safe(download_path.parent)
335
+ rpath = file_data.additional_metadata["original_file_path"]
336
+ file_size = file_data.metadata.filesize_bytes
337
+ self.log_download_start(file_path=rpath, file_id=file_data.identifier, file_size=file_size)
338
+
287
339
  try:
288
- rpath = file_data.additional_metadata["original_file_path"]
289
340
  with self.connection_config.get_client(protocol=self.protocol) as client:
290
341
  await client.get_file(rpath=rpath, lpath=download_path.as_posix())
291
342
  self.handle_directory_download(lpath=download_path)
292
343
  except Exception as e:
344
+ self.log_error(
345
+ "File download failed",
346
+ error=e,
347
+ context={"file_path": rpath, "file_id": file_data.identifier},
348
+ )
293
349
  raise self.wrap_error(e=e)
350
+
351
+ self.log_download_complete(
352
+ file_path=rpath,
353
+ file_id=file_data.identifier,
354
+ download_path=str(download_path),
355
+ )
356
+
294
357
  return self.generate_download_response(file_data=file_data, download_path=download_path)
295
358
 
296
359
 
@@ -321,6 +384,7 @@ class FsspecUploader(Uploader):
321
384
  )
322
385
 
323
386
  def __post_init__(self):
387
+ super().__post_init__()
324
388
  # TODO once python3.9 no longer supported and kw_only is allowed in dataclasses, remove:
325
389
  if not self.upload_config:
326
390
  raise TypeError(
@@ -334,6 +398,8 @@ class FsspecUploader(Uploader):
334
398
  def precheck(self) -> None:
335
399
  from fsspec import get_filesystem_class
336
400
 
401
+ self.log_operation_start("Connection validation", protocol=self.upload_config.protocol)
402
+
337
403
  try:
338
404
  fs = get_filesystem_class(self.upload_config.protocol)(
339
405
  **self.connection_config.get_access_config(),
@@ -341,7 +407,16 @@ class FsspecUploader(Uploader):
341
407
  upload_path = Path(self.upload_config.path_without_protocol) / "_empty"
342
408
  fs.write_bytes(path=upload_path.as_posix(), value=b"")
343
409
  except Exception as e:
410
+ self.log_connection_failed(
411
+ connector_type=self.connector_type,
412
+ error=e,
413
+ endpoint=f"{self.upload_config.protocol}://{self.upload_config.path_without_protocol}",
414
+ )
344
415
  raise self.wrap_error(e=e)
416
+ self.log_connection_validated(
417
+ connector_type=self.connector_type,
418
+ endpoint=f"{self.upload_config.protocol}://{self.upload_config.path_without_protocol}",
419
+ )
345
420
 
346
421
  def get_upload_path(self, file_data: FileData) -> Path:
347
422
  upload_path = Path(
@@ -353,14 +428,31 @@ class FsspecUploader(Uploader):
353
428
  def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
354
429
  path_str = str(path.resolve())
355
430
  upload_path = self.get_upload_path(file_data=file_data)
356
- logger.debug(f"writing local file {path_str} to {upload_path}")
357
- with self.connection_config.get_client(protocol=self.upload_config.protocol) as client:
358
- client.upload(lpath=path_str, rpath=upload_path.as_posix())
431
+ self.log_upload_start(file_path=path_str, destination=upload_path.as_posix())
432
+ try:
433
+ with self.connection_config.get_client(protocol=self.upload_config.protocol) as client:
434
+ client.upload(lpath=path_str, rpath=upload_path.as_posix())
435
+ except Exception as e:
436
+ self.log_error(
437
+ "File upload failed",
438
+ error=e,
439
+ context={"file_path": path_str, "destination": upload_path.as_posix()},
440
+ )
441
+ raise self.wrap_error(e=e)
442
+ self.log_upload_complete(file_path=path_str, destination=upload_path.as_posix())
359
443
 
360
444
  async def run_async(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
361
445
  path_str = str(path.resolve())
362
446
  upload_path = self.get_upload_path(file_data=file_data)
363
- # Odd that fsspec doesn't run exists() as async even when client support async
364
- logger.debug(f"writing local file {path_str} to {upload_path}")
365
- with self.connection_config.get_client(protocol=self.upload_config.protocol) as client:
366
- client.upload(lpath=path_str, rpath=upload_path.as_posix())
447
+ self.log_upload_start(file_path=path_str, destination=upload_path.as_posix())
448
+ try:
449
+ with self.connection_config.get_client(protocol=self.upload_config.protocol) as client:
450
+ client.upload(lpath=path_str, rpath=upload_path.as_posix())
451
+ except Exception as e:
452
+ self.log_error(
453
+ "File upload failed",
454
+ error=e,
455
+ context={"file_path": path_str, "destination": upload_path.as_posix()},
456
+ )
457
+ raise self.wrap_error(e=e)
458
+ self.log_upload_complete(file_path=path_str, destination=upload_path.as_posix())
@@ -104,7 +104,13 @@ class S3ConnectionConfig(FsspecConnectionConfig):
104
104
  return UserError(message)
105
105
  if http_code >= 500:
106
106
  return ProviderError(message)
107
- logger.error(f"unhandled exception from s3 ({type(e)}): {e}", exc_info=True)
107
+ logger.error(
108
+ "Unhandled exception from S3 (type: %s, endpoint: %s): %s",
109
+ type(e).__name__,
110
+ self.endpoint_url or "default",
111
+ e,
112
+ exc_info=True,
113
+ )
108
114
  return e
109
115
 
110
116
 
@@ -122,6 +128,10 @@ class S3Indexer(FsspecIndexer):
122
128
 
123
129
  def get_metadata(self, file_info: dict) -> FileDataSourceMetadata:
124
130
  path = file_info["Key"]
131
+
132
+ self.log_debug("Getting metadata for S3 object", context={"file_path": path})
133
+ self.log_file_operation("Getting metadata", file_path=path)
134
+
125
135
  date_created = None
126
136
  date_modified = None
127
137
  modified = file_info.get("LastModified")
@@ -147,9 +157,9 @@ class S3Indexer(FsspecIndexer):
147
157
  record_locator["metadata"] = metadata
148
158
  issue_characters = [char for char in CHARACTERS_TO_AVOID if char in path]
149
159
  if issue_characters:
150
- logger.warning(
151
- f"File path {path} contains characters "
152
- f"that can cause issues with S3: {issue_characters}"
160
+ self.log_warning(
161
+ f"File path contains characters that can cause issues with S3: {issue_characters}",
162
+ context={"path": path, "problematic_characters": issue_characters},
153
163
  )
154
164
  return FileDataSourceMetadata(
155
165
  date_created=date_created,
@@ -115,23 +115,24 @@ class OnedriveConnectionConfig(ConnectionConfig):
115
115
  except ValueError as exc:
116
116
  logger.error("Couldn't set up credentials.")
117
117
  raise exc
118
-
118
+
119
119
  if "error" in token:
120
120
  error_codes = token.get("error_codes", [])
121
121
  error_type = token.get("error", "")
122
122
  error_description = token.get("error_description", "")
123
-
123
+
124
124
  # 7000215: Invalid client secret provided
125
125
  # 7000218: Invalid client id provided
126
126
  # 700016: Application not found in directory
127
127
  # 90002: Tenant not found
128
128
  auth_error_codes = [7000215, 7000218, 700016, 90002]
129
-
130
- if (any(code in error_codes for code in auth_error_codes) or
131
- error_type in ["invalid_client", "unauthorized_client", "invalid_grant"]):
132
- raise UserAuthError(
133
- f"Authentication failed: {error_type}: {error_description}"
134
- )
129
+
130
+ if any(code in error_codes for code in auth_error_codes) or error_type in [
131
+ "invalid_client",
132
+ "unauthorized_client",
133
+ "invalid_grant",
134
+ ]:
135
+ raise UserAuthError(f"Authentication failed: {error_type}: {error_description}")
135
136
  else:
136
137
  raise SourceConnectionNetworkError(
137
138
  f"Failed to fetch token: {error_type}: {error_description}"
@@ -87,6 +87,7 @@ class SharepointIndexerConfig(OnedriveIndexerConfig):
87
87
  # TODO: We can probably make path non-optional on OnedriveIndexerConfig once tested
88
88
  path: str = Field(default="")
89
89
 
90
+
90
91
  @dataclass
91
92
  class SharepointIndexer(OnedriveIndexer):
92
93
  connection_config: SharepointConnectionConfig
@@ -114,14 +115,14 @@ class SharepointIndexer(OnedriveIndexer):
114
115
  def _is_root_path(self, path: str) -> bool:
115
116
  """Check if the path represents root access (empty string or legacy default)."""
116
117
  return not path or not path.strip() or path == LEGACY_DEFAULT_PATH
117
-
118
+
118
119
  def _get_target_drive_item(self, site_drive_item: DriveItem, path: str) -> DriveItem:
119
120
  """Get the drive item to search in based on the path."""
120
121
  if self._is_root_path(path):
121
122
  return site_drive_item
122
123
  else:
123
124
  return site_drive_item.get_by_path(path).get().execute_query()
124
-
125
+
125
126
  def _validate_folder_path(self, site_drive_item: DriveItem, path: str) -> None:
126
127
  """Validate that a specific folder path exists and is accessible."""
127
128
  from office365.runtime.client_request_exception import ClientRequestException
@@ -0,0 +1,8 @@
1
+ from .logging.connector import ConnectorLoggingMixin, LoggingConfig
2
+ from .logging.sanitizer import DataSanitizer
3
+
4
+ __all__ = [
5
+ "ConnectorLoggingMixin",
6
+ "DataSanitizer",
7
+ "LoggingConfig",
8
+ ]