validmind 2.8.20__py3-none-any.whl → 2.8.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
validmind/__version__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "2.8.20"
1
+ __version__ = "2.8.22"
validmind/client.py CHANGED
@@ -61,6 +61,7 @@ def init_dataset(
61
61
  class_labels: Optional[Dict[str, Any]] = None,
62
62
  type: Optional[str] = None,
63
63
  input_id: Optional[str] = None,
64
+ copy_data: bool = True,
64
65
  __log: bool = True,
65
66
  ) -> VMDataset:
66
67
  """
@@ -92,6 +93,7 @@ def init_dataset(
92
93
  this will be set to `dataset` but if you are passing this dataset as a
93
94
  test input using some other key than `dataset`, then you should set
94
95
  this to the same key.
96
+ copy_data (bool, optional): Whether to copy the data. Defaults to True.
95
97
  __log (bool): Whether to log the input. Defaults to True.
96
98
 
97
99
  Raises:
@@ -121,6 +123,7 @@ def init_dataset(
121
123
  extra_columns=extra_columns,
122
124
  target_class_labels=class_labels,
123
125
  date_time_index=date_time_index,
126
+ copy_data=copy_data,
124
127
  )
125
128
  elif isinstance(dataset, pl.DataFrame):
126
129
  vm_dataset = PolarsDataset(
validmind/template.py CHANGED
@@ -53,8 +53,9 @@ def _convert_sections_to_section_tree(
53
53
 
54
54
  if start_section_id and not section_tree:
55
55
  raise ValueError(f"Section {start_section_id} not found in template")
56
-
57
- return sorted(section_tree, key=lambda x: x.get("order", 0))
56
+ # sort the section tree by the order of the sections in the template (if provided)
57
+ # set the order to 9999 for the sections that do not have an order
58
+ return sorted(section_tree, key=lambda x: x.get("order", 9999))
58
59
 
59
60
 
60
61
  def _create_content_widget(content: Dict[str, Any]) -> Widget:
@@ -47,6 +47,7 @@ class VMDataset(VMInput):
47
47
  target_class_labels (Dict): The class labels for the target columns.
48
48
  df (pd.DataFrame): The dataset as a pandas DataFrame.
49
49
  extra_columns (Dict): Extra columns to include in the dataset.
50
+ copy_data (bool): Whether to copy the data. Defaults to True.
50
51
  """
51
52
 
52
53
  def __repr__(self):
@@ -66,6 +67,7 @@ class VMDataset(VMInput):
66
67
  text_column: str = None,
67
68
  extra_columns: dict = None,
68
69
  target_class_labels: dict = None,
70
+ copy_data: bool = True,
69
71
  ):
70
72
  """
71
73
  Initializes a VMDataset instance.
@@ -82,6 +84,7 @@ class VMDataset(VMInput):
82
84
  feature_columns (str, optional): The feature column names of the dataset. Defaults to None.
83
85
  text_column (str, optional): The text column name of the dataset for nlp tasks. Defaults to None.
84
86
  target_class_labels (Dict, optional): The class labels for the target columns. Defaults to None.
87
+ copy_data (bool, optional): Whether to copy the data. Defaults to True.
85
88
  """
86
89
  # initialize input_id
87
90
  self.input_id = input_id
@@ -112,6 +115,7 @@ class VMDataset(VMInput):
112
115
  self.target_class_labels = target_class_labels
113
116
  self.extra_columns = ExtraColumns.from_dict(extra_columns)
114
117
  self._set_feature_columns(feature_columns)
118
+ self._copy_data = copy_data
115
119
 
116
120
  if model:
117
121
  self.assign_predictions(model)
@@ -129,16 +133,19 @@ class VMDataset(VMInput):
129
133
  excluded = [self.target_column, *self.extra_columns.flatten()]
130
134
  self.feature_columns = [col for col in self.columns if col not in excluded]
131
135
 
132
- self.feature_columns_numeric = (
133
- self._df[self.feature_columns]
134
- .select_dtypes(include=[np.number])
135
- .columns.tolist()
136
- )
137
- self.feature_columns_categorical = (
138
- self._df[self.feature_columns]
139
- .select_dtypes(include=[object, pd.Categorical])
140
- .columns.tolist()
141
- )
136
+ # Get dtypes without loading data into memory
137
+ feature_dtypes = self._df[self.feature_columns].dtypes
138
+
139
+ self.feature_columns_numeric = feature_dtypes[
140
+ feature_dtypes.apply(lambda x: pd.api.types.is_numeric_dtype(x))
141
+ ].index.tolist()
142
+
143
+ self.feature_columns_categorical = feature_dtypes[
144
+ feature_dtypes.apply(
145
+ lambda x: pd.api.types.is_categorical_dtype(x)
146
+ or pd.api.types.is_object_dtype(x)
147
+ )
148
+ ].index.tolist()
142
149
 
143
150
  def _add_column(self, column_name, column_values):
144
151
  column_values = np.array(column_values)
@@ -397,8 +404,18 @@ class VMDataset(VMInput):
397
404
  assert self.target_column not in columns
398
405
  columns.append(self.target_column)
399
406
 
400
- # return a copy to prevent accidental modification
401
- return as_df(self._df[columns]).copy()
407
+ # Check if all columns in self._df are requested
408
+ all_columns = set(columns) == set(self._df.columns)
409
+
410
+ # For copy_data=False and all columns: return exact same DataFrame object
411
+ if not self._copy_data and all_columns:
412
+ return self._df
413
+ # For copy_data=False and subset of columns: return view with shared data
414
+ elif not self._copy_data:
415
+ return as_df(self._df[columns])
416
+ # For copy_data=True: return independent copy with duplicated data
417
+ else:
418
+ return as_df(self._df[columns]).copy()
402
419
 
403
420
  @property
404
421
  def x(self) -> np.ndarray:
@@ -522,9 +539,10 @@ class DataFrameDataset(VMDataset):
522
539
  text_column: str = None,
523
540
  target_class_labels: dict = None,
524
541
  date_time_index: bool = False,
542
+ copy_data: bool = True,
525
543
  ):
526
544
  """
527
- Initializes a DataFrameDataset instance.
545
+ Initializes a DataFrameDataset instance, preserving original pandas dtypes.
528
546
 
529
547
  Args:
530
548
  raw_dataset (pd.DataFrame): The raw dataset as a pandas DataFrame.
@@ -536,25 +554,44 @@ class DataFrameDataset(VMDataset):
536
554
  text_column (str, optional): The text column name of the dataset for NLP tasks. Defaults to None.
537
555
  target_class_labels (dict, optional): The class labels for the target columns. Defaults to None.
538
556
  date_time_index (bool, optional): Whether to use date-time index. Defaults to False.
557
+ copy_data (bool, optional): Whether to create a copy of the input data. Defaults to True.
539
558
  """
559
+
560
+ VMInput.__init__(self)
561
+
562
+ self.input_id = input_id
563
+
540
564
  index = None
541
565
  if isinstance(raw_dataset.index, pd.Index):
542
566
  index = raw_dataset.index.values
567
+ self.index = index
543
568
 
544
- super().__init__(
545
- raw_dataset=raw_dataset.values,
546
- input_id=input_id,
547
- model=model,
548
- index_name=raw_dataset.index.name,
549
- index=index,
550
- columns=raw_dataset.columns.to_list(),
551
- target_column=target_column,
552
- extra_columns=extra_columns,
553
- feature_columns=feature_columns,
554
- text_column=text_column,
555
- target_class_labels=target_class_labels,
556
- date_time_index=date_time_index,
557
- )
569
+ # Store the DataFrame directly
570
+ self._df = raw_dataset
571
+
572
+ if date_time_index:
573
+ self._df = convert_index_to_datetime(self._df)
574
+
575
+ self.columns = raw_dataset.columns.tolist()
576
+ self.column_aliases = {}
577
+ self.target_column = target_column
578
+ self.text_column = text_column
579
+ self.target_class_labels = target_class_labels
580
+ self.extra_columns = ExtraColumns.from_dict(extra_columns)
581
+ self._copy_data = copy_data
582
+
583
+ # Add warning when copy_data is False
584
+ if not copy_data:
585
+ logger.warning(
586
+ "Dataset initialized with copy_data=False. Changes to the original DataFrame "
587
+ "may affect this dataset. Use this option only when memory efficiency is critical "
588
+ "and you won't modify the source data."
589
+ )
590
+
591
+ self._set_feature_columns(feature_columns)
592
+
593
+ if model:
594
+ self.assign_predictions(model)
558
595
 
559
596
 
560
597
  class PolarsDataset(VMDataset):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: validmind
3
- Version: 2.8.20
3
+ Version: 2.8.22
4
4
  Summary: ValidMind Library
5
5
  License: Commercial License
6
6
  Author: Andres Rodriguez
@@ -1,9 +1,9 @@
1
1
  validmind/__init__.py,sha256=qmC6WY6ifIQpCU38V91EN91UlIEcOG7a9jWK3smZJoY,4220
2
- validmind/__version__.py,sha256=iRm89S5RopcoKz6axcNfUiasj1Y2r4BwSP5aughGqlE,23
2
+ validmind/__version__.py,sha256=KYh5XihUKlbKxUpWkhRlJg_POPRyBFW9RUgXYfdQc0s,23
3
3
  validmind/ai/test_descriptions.py,sha256=eBF09MAyqAAD-Ah7vxXVRbHxOmGx5_10ZkoJmMvEaEA,7123
4
4
  validmind/ai/utils.py,sha256=O5gTkvGsPCCdKCdBGvpDaJM1oL_msdm2xKkf9fFpIy8,4172
5
5
  validmind/api_client.py,sha256=slvf0FJ8olYsK1-EPetMVYV7UvhjMAIFBxwBWSVT9BI,16807
6
- validmind/client.py,sha256=mVctTYyoDFDKnCA_eT99ZmOmOznOStx_C1vBJhET9sM,18908
6
+ validmind/client.py,sha256=XKb4uc7yXVV_3NH9-zTrS9jCbLPX2zZZU12vKKlSpIc,19049
7
7
  validmind/client_config.py,sha256=O1gopTaNADM4ZVPj383AJTjcpjdxyEvUQY5cFt7nbIs,1366
8
8
  validmind/datasets/__init__.py,sha256=c0hQZN_6GrUEJxdFHdQaEsQrSYNABG84ZCY0H-PzOZk,260
9
9
  validmind/datasets/classification/__init__.py,sha256=p2p9r3SE4FIm4WNNJb4-axnkD5EexVW9VxIRYTbA6dI,1941
@@ -73,7 +73,7 @@ validmind/models/pipeline.py,sha256=nSskKWxaS4SGmx_B0IAvS5ogDZyh6tdx_aUkyxSXt88,
73
73
  validmind/models/pytorch.py,sha256=aAEUWtISwLh-PMvHkcLwBEbBStAByt4J-NpK-Ndv38E,1826
74
74
  validmind/models/r_model.py,sha256=TPUwPmxz3cNzJ1bAA5vz6P9xS6deVcLTuIO1e7rD1vY,7306
75
75
  validmind/models/sklearn.py,sha256=lOCJlP2wvd5IJHtBS1XG9FXrtIvO_f8xm2Qp1UdsiBw,2406
76
- validmind/template.py,sha256=e_5PvX-CotA7gz45gQ5zoHgIUECMyVYxlJK56Oc1cnA,7742
76
+ validmind/template.py,sha256=ezmHLee5QkccBf4n8iBFcXw7EMljSceGgGJb3O0c4cE,7902
77
77
  validmind/test_suites/__init__.py,sha256=ofNaXD2SdkHko_Fy_RTr29YBHPGWiXVzHd1nENwi2ms,6973
78
78
  validmind/test_suites/classifier.py,sha256=0ZE3z5X_ZewTvmwQ3cVGJQh7dPgg0IlqcQshJJxCFWQ,4003
79
79
  validmind/test_suites/cluster.py,sha256=Wc2NViwivjiuiJMwrnGbOJYeZ3ApN8usWlOPYZYWAgE,2276
@@ -309,7 +309,7 @@ validmind/unit_metrics/regression/RootMeanSquaredError.py,sha256=uIDsSpy75Z7W3zu
309
309
  validmind/utils.py,sha256=59WWVV_JhvxzPr8Y625qw_wsyu_ZVRoLJyi0Pw3MTMU,28613
310
310
  validmind/vm_models/__init__.py,sha256=lcqf9q2aRzrVrNN6R--81IkrnSa6BXPbhJ8SnkT_hcI,702
311
311
  validmind/vm_models/dataset/__init__.py,sha256=U4CxZjdoc0dd9u2AqBl5PJh1UVbzXWNrmundmjLF-qE,346
312
- validmind/vm_models/dataset/dataset.py,sha256=4RCKyWIHd-RCIpleMIlfURGEQlmCRQkFZ5CRsJRC-g4,26479
312
+ validmind/vm_models/dataset/dataset.py,sha256=lP0XLncHV6V5P-DG-Zs__wgirlUrEHokST-dQpb81Ro,28038
313
313
  validmind/vm_models/dataset/utils.py,sha256=g6mBPrBmVYf8wJAlTxeg9DTiNvts4ZaaT5mbnQAPWaU,5638
314
314
  validmind/vm_models/figure.py,sha256=ZMO_nIIleNhkBV1vJeF_UUsVDCzrXNOYwV1Lbg9E0XY,6303
315
315
  validmind/vm_models/input.py,sha256=nTBQB6aqirhF-0Gmg5mYc4_vNyypvbYUfahMovcK02M,1095
@@ -323,8 +323,8 @@ validmind/vm_models/test_suite/runner.py,sha256=JqW8LW4X1Ri2C6wSsAGSki-JxGUGV8zm
323
323
  validmind/vm_models/test_suite/summary.py,sha256=7P4zhfeU7a3I1MMBn8f7s-2lzdAz7U4y6LblpR89_vE,5401
324
324
  validmind/vm_models/test_suite/test.py,sha256=C8xPGKSyYF9oMJ3VegwFJDF7cwYlIgtQoQ7nzXIS1uc,3914
325
325
  validmind/vm_models/test_suite/test_suite.py,sha256=CciC6IhrLEeWwcpY3Np8EmQCB8XEF2ljwEXcvmNYgZc,5090
326
- validmind-2.8.20.dist-info/LICENSE,sha256=XonPUfwjvrC5Ombl3y-ko0Wubb1xdG_7nzvIbkZRKHw,35772
327
- validmind-2.8.20.dist-info/METADATA,sha256=iy0RjnDMshRPOKnzwT4N4fEh8cR1YKakh5IrYn2-jyM,6032
328
- validmind-2.8.20.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
329
- validmind-2.8.20.dist-info/entry_points.txt,sha256=HuW7YyOv9u_OEWpViQXtv0nfoI67uieJHawKWA4Hv9A,76
330
- validmind-2.8.20.dist-info/RECORD,,
326
+ validmind-2.8.22.dist-info/LICENSE,sha256=XonPUfwjvrC5Ombl3y-ko0Wubb1xdG_7nzvIbkZRKHw,35772
327
+ validmind-2.8.22.dist-info/METADATA,sha256=cLg53_Ec1ZxQQ1eiIsBugTOh8e2_wjgCnPfWBWAEuZc,6032
328
+ validmind-2.8.22.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
329
+ validmind-2.8.22.dist-info/entry_points.txt,sha256=HuW7YyOv9u_OEWpViQXtv0nfoI67uieJHawKWA4Hv9A,76
330
+ validmind-2.8.22.dist-info/RECORD,,