wizit-context-ingestor 0.2.5b1__tar.gz → 0.2.5b2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wizit-context-ingestor might be problematic. Click here for more details.

Files changed (31) hide show
  1. {wizit_context_ingestor-0.2.5b1 → wizit_context_ingestor-0.2.5b2}/PKG-INFO +1 -1
  2. {wizit_context_ingestor-0.2.5b1 → wizit_context_ingestor-0.2.5b2}/pyproject.toml +1 -1
  3. {wizit_context_ingestor-0.2.5b1 → wizit_context_ingestor-0.2.5b2}/src/wizit_context_ingestor/domain/services.py +12 -12
  4. {wizit_context_ingestor-0.2.5b1 → wizit_context_ingestor-0.2.5b2}/src/wizit_context_ingestor/services/parse_doc.py +11 -11
  5. {wizit_context_ingestor-0.2.5b1 → wizit_context_ingestor-0.2.5b2}/README.md +0 -0
  6. {wizit_context_ingestor-0.2.5b1 → wizit_context_ingestor-0.2.5b2}/src/wizit_context_ingestor/.DS_Store +0 -0
  7. {wizit_context_ingestor-0.2.5b1 → wizit_context_ingestor-0.2.5b2}/src/wizit_context_ingestor/__init__.py +0 -0
  8. {wizit_context_ingestor-0.2.5b1 → wizit_context_ingestor-0.2.5b2}/src/wizit_context_ingestor/application/__init__.py +0 -0
  9. {wizit_context_ingestor-0.2.5b1 → wizit_context_ingestor-0.2.5b2}/src/wizit_context_ingestor/application/context_chunk_service.py +0 -0
  10. {wizit_context_ingestor-0.2.5b1 → wizit_context_ingestor-0.2.5b2}/src/wizit_context_ingestor/application/interfaces.py +0 -0
  11. {wizit_context_ingestor-0.2.5b1 → wizit_context_ingestor-0.2.5b2}/src/wizit_context_ingestor/application/transcription_service.py +0 -0
  12. {wizit_context_ingestor-0.2.5b1 → wizit_context_ingestor-0.2.5b2}/src/wizit_context_ingestor/data/__init__.py +0 -0
  13. {wizit_context_ingestor-0.2.5b1 → wizit_context_ingestor-0.2.5b2}/src/wizit_context_ingestor/data/prompts.py +0 -0
  14. {wizit_context_ingestor-0.2.5b1 → wizit_context_ingestor-0.2.5b2}/src/wizit_context_ingestor/domain/__init__.py +0 -0
  15. {wizit_context_ingestor-0.2.5b1 → wizit_context_ingestor-0.2.5b2}/src/wizit_context_ingestor/domain/models.py +0 -0
  16. {wizit_context_ingestor-0.2.5b1 → wizit_context_ingestor-0.2.5b2}/src/wizit_context_ingestor/infra/__init__.py +0 -0
  17. {wizit_context_ingestor-0.2.5b1 → wizit_context_ingestor-0.2.5b2}/src/wizit_context_ingestor/infra/aws_model.py +0 -0
  18. {wizit_context_ingestor-0.2.5b1 → wizit_context_ingestor-0.2.5b2}/src/wizit_context_ingestor/infra/persistence/__init__.py +0 -0
  19. {wizit_context_ingestor-0.2.5b1 → wizit_context_ingestor-0.2.5b2}/src/wizit_context_ingestor/infra/persistence/local_storage.py +0 -0
  20. {wizit_context_ingestor-0.2.5b1 → wizit_context_ingestor-0.2.5b2}/src/wizit_context_ingestor/infra/persistence/s3_storage.py +0 -0
  21. {wizit_context_ingestor-0.2.5b1 → wizit_context_ingestor-0.2.5b2}/src/wizit_context_ingestor/infra/rag/pg_embeddings.py +0 -0
  22. {wizit_context_ingestor-0.2.5b1 → wizit_context_ingestor-0.2.5b2}/src/wizit_context_ingestor/infra/rag/redis_embeddings.py +0 -0
  23. {wizit_context_ingestor-0.2.5b1 → wizit_context_ingestor-0.2.5b2}/src/wizit_context_ingestor/infra/rag/semantic_chunks.py +0 -0
  24. {wizit_context_ingestor-0.2.5b1 → wizit_context_ingestor-0.2.5b2}/src/wizit_context_ingestor/infra/secrets/__init__.py +0 -0
  25. {wizit_context_ingestor-0.2.5b1 → wizit_context_ingestor-0.2.5b2}/src/wizit_context_ingestor/infra/secrets/aws_secrets_manager.py +0 -0
  26. {wizit_context_ingestor-0.2.5b1 → wizit_context_ingestor-0.2.5b2}/src/wizit_context_ingestor/infra/vertex_model.py +0 -0
  27. {wizit_context_ingestor-0.2.5b1 → wizit_context_ingestor-0.2.5b2}/src/wizit_context_ingestor/main.py +0 -0
  28. {wizit_context_ingestor-0.2.5b1 → wizit_context_ingestor-0.2.5b2}/src/wizit_context_ingestor/services/.DS_Store +0 -0
  29. {wizit_context_ingestor-0.2.5b1 → wizit_context_ingestor-0.2.5b2}/src/wizit_context_ingestor/services/__init__.py +0 -0
  30. {wizit_context_ingestor-0.2.5b1 → wizit_context_ingestor-0.2.5b2}/src/wizit_context_ingestor/services/chunks.py +0 -0
  31. {wizit_context_ingestor-0.2.5b1 → wizit_context_ingestor-0.2.5b2}/src/wizit_context_ingestor/services/pg_embeddings_manager.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: wizit-context-ingestor
3
- Version: 0.2.5b1
3
+ Version: 0.2.5b2
4
4
  Summary: Contextual Rag with Cloud Solutions
5
5
  Requires-Dist: anthropic[vertex]>=0.66.0
6
6
  Requires-Dist: boto3>=1.40.23
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "wizit_context_ingestor"
3
- version = "0.2.5-beta-1"
3
+ version = "0.2.5-beta-2"
4
4
  description = "Contextual Rag with Cloud Solutions"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12"
@@ -1,7 +1,7 @@
1
1
  import base64
2
2
  import logging
3
3
  import io
4
- import fitz
4
+ import pymupdf
5
5
  from PIL import Image
6
6
  from typing import List
7
7
  from ..domain.models import ParsedDocPage, ParsedDoc
@@ -17,25 +17,25 @@ class ParseDocModelService():
17
17
  def __init__(self, file_path: str):
18
18
  """
19
19
  Initialize a PDF document parser.
20
-
20
+
21
21
  Args:
22
22
  file_path: Path to the PDF file to parse
23
23
  """
24
24
  self.file_path = file_path
25
- self.pdf_document = fitz.open(file_path)
25
+ self.pdf_document = pymupdf.open(file_path)
26
26
  self.page_count = self.pdf_document.page_count
27
27
 
28
-
28
+
29
29
  def pdf_page_to_base64(self, page_number: int) -> ParsedDocPage:
30
30
  """
31
31
  Convert a PDF page to a base64-encoded PNG image.
32
-
32
+
33
33
  Args:
34
34
  page_number: One-indexed page number to convert
35
-
35
+
36
36
  Returns:
37
37
  Base64 encoded string of the page image
38
-
38
+
39
39
  Raises:
40
40
  Exception: If there's an error during conversion
41
41
  """
@@ -49,7 +49,7 @@ class ParseDocModelService():
49
49
  b64_encoded_image = base64.b64encode(buffer.getvalue()).decode("utf-8")
50
50
  logger.info(f"Page {page_number} encoded successfully")
51
51
  return ParsedDocPage(
52
- page_number=page_number,
52
+ page_number=page_number,
53
53
  page_base64=b64_encoded_image
54
54
  )
55
55
  except Exception as e:
@@ -59,15 +59,15 @@ class ParseDocModelService():
59
59
  def parse_document_to_base64(self) -> List[ParsedDocPage]:
60
60
  """
61
61
  Convert all pages in the PDF document to base64-encoded images.
62
-
62
+
63
63
  Returns:
64
64
  List of base64 encoded strings for each page
65
-
65
+
66
66
  Raises:
67
67
  Exception: If there's an error during conversion
68
68
  """
69
69
  # BASE DE DATOS SINTETICOS DE PREGUNTAS Y RESPUESTAS SOBRE EL DOCUMENTO, FINE TUNING PARA EL LLM
70
- # GEMMA 2 --> DATASET DE PREGUNTAS Y RESPUESTAS SOBRE EL DOCUMENTO
70
+ # GEMMA 2 --> DATASET DE PREGUNTAS Y RESPUESTAS SOBRE EL DOCUMENTO
71
71
  # RAG --> FINETUNING AUTOMATICO / CONSULTAR EL MODELO
72
72
  # OPENAI --> PREGUNTAS Y RESPUESTAS SOBRE EL DOCUMENTO
73
73
  # COLAB --> PREGUNTAS Y RESPUESTAS SOBRE EL DOCUMENTO
@@ -95,4 +95,4 @@ class ParseDocModelService():
95
95
  document_text=md_content
96
96
  )
97
97
 
98
- # def
98
+ # def
@@ -4,7 +4,7 @@ from langchain_core.output_parsers import StrOutputParser
4
4
  import base64
5
5
  import logging
6
6
  import io
7
- import fitz
7
+ import pymupdf
8
8
  from PIL import Image
9
9
  from typing import List, Any
10
10
  from dotenv import load_dotenv
@@ -23,13 +23,13 @@ class ParseDoc:
23
23
  def __init__(self, file_path: str, system_prompt, chat_model: Any):
24
24
  """
25
25
  Initialize a PDF document parser.
26
-
26
+
27
27
  Args:
28
28
  file_path: Path to the PDF file to parse
29
29
  chat_model: Language model for processing document content
30
30
  """
31
31
  self.file_path = file_path
32
- self.pdf_document = fitz.open(file_path)
32
+ self.pdf_document = pymupdf.open(file_path)
33
33
  self.page_count = self.pdf_document.page_count
34
34
  self.system_prompt = system_prompt
35
35
  self.chat_model = chat_model
@@ -37,13 +37,13 @@ class ParseDoc:
37
37
  def pdf_page_to_base64(self, page_number: int) -> str:
38
38
  """
39
39
  Convert a PDF page to a base64-encoded PNG image.
40
-
40
+
41
41
  Args:
42
42
  page_number: One-indexed page number to convert
43
-
43
+
44
44
  Returns:
45
45
  Base64 encoded string of the page image
46
-
46
+
47
47
  Raises:
48
48
  Exception: If there's an error during conversion
49
49
  """
@@ -69,10 +69,10 @@ class ParseDoc:
69
69
  def parse_document_to_base64(self) -> List[str]:
70
70
  """
71
71
  Convert all pages in the PDF document to base64-encoded images.
72
-
72
+
73
73
  Returns:
74
74
  List of base64 encoded strings for each page
75
-
75
+
76
76
  Raises:
77
77
  Exception: If there's an error during conversion
78
78
  """
@@ -90,14 +90,14 @@ class ParseDoc:
90
90
  def parse_with_llm(self, base_64_image: str, prompt: str) -> AIMessage:
91
91
  """
92
92
  Process a base64-encoded image with a language model using the provided prompt.
93
-
93
+
94
94
  Args:
95
95
  base_64_image: Base64 encoded image string
96
96
  prompt: Text prompt to send with the image
97
-
97
+
98
98
  Returns:
99
99
  Language model response
100
-
100
+
101
101
  Raises:
102
102
  Exception: If there's an error during processing
103
103
  """