PyPI - varsha-preprocess - Versions diffs - 0.1.1__tar.gz - Mend

varsha-preprocess 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

varsha_preprocess-0.1.1/My_Preprocess1/Column_Transformer.py ADDED Viewed

@@ -0,0 +1,58 @@
+from .encoders.MyLabelEncoder import MyLabelEncoder
+from .encoders.MyOneHotEncoder import MyOneHotEncoder
+from .encoders.MyOrdinalEncoder import MyOrdinalEncoder
+from .my_pipeline import MyPipeline
+import numpy as np
+import pandas as pd
+class MyColumnTransformer:
+    def __init__(self, transformers):
+        self.transformers = transformers
+    def fit_transform(self, df):
+        outputs = []
+        for name, encoder, col in self.transformers:
+            # it will extract column data
+            data = df[col]
+            # data = np.array(data).reshape(-1, 1)
+            transformed = encoder.fit_transform(data)
+            outputs.append(transformed)
+        # this will combine all outputs
+        return np.concatenate(outputs, axis=1)
+# df = pd.read_csv("Practice/SalaryData.csv")
+# ct = MyColumnTransformer([
+#     # ("label", MyLabelEncoder(), "cough"),
+#     ("ordinal", MyOneHotEncoder(), "Education Level")
+#     # ("onehot", MyOneHotEncoder(), "city")
+# ])
+# result = ct.fit_transform(df)
+# print(result)
+# ct = MyColumnTransformer([
+#     ("education_pipe",
+#      MyPipeline([
+#          ("ordinal", MyOrdinalEncoder())
+#      ]),
+#      "Education Level"
+#     ),
+#     ("gender_pipe",
+#      MyPipeline([
+#          ("onehot", MyOneHotEncoder())
+#      ]),
+#      "Gender"
+#     )])

varsha_preprocess-0.1.1/My_Preprocess1/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+from .encoders.MyLabelEncoder import MyLabelEncoder
+from .encoders.MyOneHotEncoder import MyOneHotEncoder
+from .encoders.MyOrdinalEncoder import MyOrdinalEncoder
+from .preprocessing.my_scaler import standardscaling
+from .Column_Transformer import MyColumnTransformer
+from .my_pipeline import MyPipeline

varsha_preprocess-0.1.1/My_Preprocess1/encoders/MyLabelEncoder.py ADDED Viewed

@@ -0,0 +1,22 @@
+import numpy as np
+import pandas as pd
+class MyLabelEncoder:
+    def fit(self, y):
+        self.categories = np.unique(y)
+    def transform(self, y):
+        mapping_value ={val: index for index,val in enumerate(self.categories)}
+        return y.map(mapping_value)
+    def fit_transform(self, y):
+        self.fit(y)
+        return self.transform(y)
+# df = pd.read_csv("Practice/covid_data.csv")
+# y = df['has_covid']
+# le = MyLabelEncoder()
+# print(le.fit_transform(y))

varsha_preprocess-0.1.1/My_Preprocess1/encoders/MyOneHotEncoder.py ADDED Viewed

@@ -0,0 +1,38 @@
+import numpy as np
+import pandas as pd
+class MyOneHotEncoder:
+    def fit(self, X):
+        self.categories = np.unique(X.dropna())
+    def transform(self, X):
+        result = {}
+        for category in self.categories:
+            # for this category, compare every value in X
+            # convert True-False to 1-0
+            # store in result dictionary with category as key
+            result[category] = (X == category).astype(int)
+        # convert dictionary to DataFrame and return
+        return pd.DataFrame(result)
+    def fit_transform(self, X):
+        self.fit(X)
+        return self.transform(X)
+# df = pd.read_csv("Practice/covid_data.csv")
+# X = np.array(df['gender']).reshape(-1,1)
+# X = [['male']], [['female']]
+# df = pd.read_csv("Practice/SalaryData.csv")
+# X = df['Education Level']
+# print(type(X))
+# ohe = MyOneHotEncoder()
+# print(ohe.fit_transform(X))

varsha_preprocess-0.1.1/My_Preprocess1/encoders/MyOrdinalEncoder.py ADDED Viewed

@@ -0,0 +1,53 @@
+import numpy as np
+import pandas as pd
+class MyOrdinalEncoder:
+    # def ordinal_encoding(self, X):
+    #     order = {
+    #         "Strong": 1,
+    #         "Mild": 0
+    #     }
+    #     #   we can use for loop but this is faster to do
+    #     return X.map(order)
+    def __init__(self):
+        self.mapping = None
+    def fit(self,X, order):
+        self.mapping = {}
+        for i, category in enumerate(order):
+            self.mapping[category] = i
+        self.mapping = self.mapping
+        return self
+    def transform(self, X):
+        if self.mapping is None:
+            raise ValueError("first insert the order")
+        result = []
+        for value in X:
+            if value in self.mapping is None:
+                result.append(self.mapping[value])
+            else:
+                result.append(-1)
+        return np.array(result).reshape(-1, 1)
+    def fit_transform(self, X, order):
+        self.fit(X,order)
+        return self.transform(X)
+# df = pd.read_csv("Practice/covid_data.csv")
+# X = df['cough']
+# oe = MyOrdinalEncoder()
+# print(oe.ordinal_encoding(X))

varsha_preprocess-0.1.1/My_Preprocess1/encoders/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .MyLabelEncoder import MyLabelEncoder
+from .MyOneHotEncoder import MyOneHotEncoder
+from .MyOrdinalEncoder import MyOrdinalEncoder

varsha_preprocess-0.1.1/My_Preprocess1/my_pipeline.py ADDED Viewed

@@ -0,0 +1,23 @@
+from .encoders.MyLabelEncoder import MyLabelEncoder
+from .encoders.MyOneHotEncoder import MyOneHotEncoder
+from .encoders.MyOrdinalEncoder import MyOrdinalEncoder
+import numpy as np
+import pandas as pd
+class MyPipeline:
+    def __init__(self,steps):
+        self.steps = steps
+    def fit_transform(self,data):
+        for name, transformer in self.steps:
+            if len(self.steps)==0:
+                return data
+            data = transformer.fit_transform(data)
+        return data

varsha_preprocess-0.1.1/My_Preprocess1/preprocessing/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .my_scaler import standardscaling

varsha_preprocess-0.1.1/My_Preprocess1/preprocessing/my_scaler.py ADDED Viewed

@@ -0,0 +1,38 @@
+import numpy as np
+import pandas as pd
+class standardscaling:
+    def __init__(self):
+        self.mean = 0
+        self.standard_deviation = 0
+    def fit(self, X):
+        self.X = np.array(X).reshape(-1,1)
+        self.mean = np.mean(X, axis=0)
+        self.standard_deviation = np.std(X, axis =0)
+        return self.mean, self.standard_deviation
+    def transform(self, X):
+        self.X = np.array(X).reshape(-1,1)
+        if np.all( self.standard_deviation == 0):
+            return (X-self.mean)
+        else:
+            return (X-self.mean)/self.standard_deviation
+    def fit_transform(self, X):
+        self.fit(X)
+        return self.transform(X)
+# df = pd.read_csv("/home/varsha/PracticePython/100DaysOfMl/Practice/SalaryData.csv")
+# X = df['Age']
+# # print(X[3])
+# # X = [100,120,130,150]
+# standardscaler = standardscaling()
+# X_scaled = standardscaler.fit_transform(X)
+# print(X_scaled)

varsha_preprocess-0.1.1/PKG-INFO ADDED Viewed

@@ -0,0 +1,49 @@
+Metadata-Version: 2.4
+Name: varsha-preprocess
+Version: 0.1.1
+Summary: Custom preprocessing pipeline library
+Description-Content-Type: text/markdown
+Requires-Dist: numpy
+Requires-Dist: pandas
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: requires-dist
+Dynamic: summary
+# My_Preprocess
+A lightweight machine learning preprocessing library built from scratch using NumPy and Pandas.
+This project is designed to deeply understand how core preprocessing tools like Pipeline, ColumnTransformer, and Encoders work internally — without relying on sklearn.
+---
+## ✨ Features
+- 🔁 Custom Pipeline (sequential transformations)
+- 🧩 ColumnTransformer (parallel column-wise transformations)
+- 🔤 Encoders:
+  - Label Encoder
+  - One Hot Encoder
+  - Ordinal Encoder
+- 📏 Standard Scaler
+- ⚙️ Fully built using NumPy and Pandas
+---
+## 🧠 Why this project?
+Most ML libraries (like sklearn) hide internal implementation details.
+This project focuses on:
+- Understanding how transformations are chained
+- Learning how fit / transform logic works
+- Building preprocessing systems from scratch
+---
+## 📦 Installation
+```bash
+pip install varsha-preprocess

varsha_preprocess-0.1.1/README.md ADDED Viewed

@@ -0,0 +1,37 @@
+# My_Preprocess
+A lightweight machine learning preprocessing library built from scratch using NumPy and Pandas.
+This project is designed to deeply understand how core preprocessing tools like Pipeline, ColumnTransformer, and Encoders work internally — without relying on sklearn.
+---
+## ✨ Features
+- 🔁 Custom Pipeline (sequential transformations)
+- 🧩 ColumnTransformer (parallel column-wise transformations)
+- 🔤 Encoders:
+  - Label Encoder
+  - One Hot Encoder
+  - Ordinal Encoder
+- 📏 Standard Scaler
+- ⚙️ Fully built using NumPy and Pandas
+---
+## 🧠 Why this project?
+Most ML libraries (like sklearn) hide internal implementation details.
+This project focuses on:
+- Understanding how transformations are chained
+- Learning how fit / transform logic works
+- Building preprocessing systems from scratch
+---
+## 📦 Installation
+```bash
+pip install varsha-preprocess

varsha_preprocess-0.1.1/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

varsha_preprocess-0.1.1/setup.py ADDED Viewed

@@ -0,0 +1,19 @@
+from setuptools import setup, find_packages
+# from .My_Preprocess1.my_pipeline import MyPipeline
+with open("README.md", "r", encoding="utf-8") as f:
+    long_description = f.read()
+setup(
+    name="varsha-preprocess",
+    version="0.1.1",
+    packages=find_packages(),
+    install_requires=["numpy", "pandas"],
+    description="Custom preprocessing pipeline library",
+    #  key part for readme
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+)

varsha_preprocess-0.1.1/varsha_preprocess.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,49 @@
+Metadata-Version: 2.4
+Name: varsha-preprocess
+Version: 0.1.1
+Summary: Custom preprocessing pipeline library
+Description-Content-Type: text/markdown
+Requires-Dist: numpy
+Requires-Dist: pandas
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: requires-dist
+Dynamic: summary
+# My_Preprocess
+A lightweight machine learning preprocessing library built from scratch using NumPy and Pandas.
+This project is designed to deeply understand how core preprocessing tools like Pipeline, ColumnTransformer, and Encoders work internally — without relying on sklearn.
+---
+## ✨ Features
+- 🔁 Custom Pipeline (sequential transformations)
+- 🧩 ColumnTransformer (parallel column-wise transformations)
+- 🔤 Encoders:
+  - Label Encoder
+  - One Hot Encoder
+  - Ordinal Encoder
+- 📏 Standard Scaler
+- ⚙️ Fully built using NumPy and Pandas
+---
+## 🧠 Why this project?
+Most ML libraries (like sklearn) hide internal implementation details.
+This project focuses on:
+- Understanding how transformations are chained
+- Learning how fit / transform logic works
+- Building preprocessing systems from scratch
+---
+## 📦 Installation
+```bash
+pip install varsha-preprocess

varsha_preprocess-0.1.1/varsha_preprocess.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,16 @@
+README.md
+setup.py
+My_Preprocess1/Column_Transformer.py
+My_Preprocess1/__init__.py
+My_Preprocess1/my_pipeline.py
+My_Preprocess1/encoders/MyLabelEncoder.py
+My_Preprocess1/encoders/MyOneHotEncoder.py
+My_Preprocess1/encoders/MyOrdinalEncoder.py
+My_Preprocess1/encoders/__init__.py
+My_Preprocess1/preprocessing/__init__.py
+My_Preprocess1/preprocessing/my_scaler.py
+varsha_preprocess.egg-info/PKG-INFO
+varsha_preprocess.egg-info/SOURCES.txt
+varsha_preprocess.egg-info/dependency_links.txt
+varsha_preprocess.egg-info/requires.txt
+varsha_preprocess.egg-info/top_level.txt

varsha_preprocess-0.1.1/varsha_preprocess.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

varsha_preprocess-0.1.1/varsha_preprocess.egg-info/requires.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ numpy
2	+ pandas

varsha_preprocess-0.1.1/varsha_preprocess.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ My_Preprocess1