varsha-preprocess 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,58 @@
1
+ from .encoders.MyLabelEncoder import MyLabelEncoder
2
+ from .encoders.MyOneHotEncoder import MyOneHotEncoder
3
+ from .encoders.MyOrdinalEncoder import MyOrdinalEncoder
4
+ from .my_pipeline import MyPipeline
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+
9
+ class MyColumnTransformer:
10
+
11
+ def __init__(self, transformers):
12
+ self.transformers = transformers
13
+
14
+ def fit_transform(self, df):
15
+ outputs = []
16
+
17
+ for name, encoder, col in self.transformers:
18
+
19
+ # it will extract column data
20
+ data = df[col]
21
+ # data = np.array(data).reshape(-1, 1)
22
+ transformed = encoder.fit_transform(data)
23
+
24
+ outputs.append(transformed)
25
+
26
+ # this will combine all outputs
27
+ return np.concatenate(outputs, axis=1)
28
+
29
+
30
+ # df = pd.read_csv("Practice/SalaryData.csv")
31
+
32
+ # ct = MyColumnTransformer([
33
+ # # ("label", MyLabelEncoder(), "cough"),
34
+ # ("ordinal", MyOneHotEncoder(), "Education Level")
35
+ # # ("onehot", MyOneHotEncoder(), "city")
36
+ # ])
37
+
38
+
39
+ # result = ct.fit_transform(df)
40
+ # print(result)
41
+
42
+
43
+ # ct = MyColumnTransformer([
44
+
45
+ # ("education_pipe",
46
+ # MyPipeline([
47
+ # ("ordinal", MyOrdinalEncoder())
48
+ # ]),
49
+ # "Education Level"
50
+ # ),
51
+
52
+ # ("gender_pipe",
53
+ # MyPipeline([
54
+ # ("onehot", MyOneHotEncoder())
55
+ # ]),
56
+ # "Gender"
57
+ # )])
58
+
@@ -0,0 +1,9 @@
1
+ from .encoders.MyLabelEncoder import MyLabelEncoder
2
+ from .encoders.MyOneHotEncoder import MyOneHotEncoder
3
+ from .encoders.MyOrdinalEncoder import MyOrdinalEncoder
4
+
5
+ from .preprocessing.my_scaler import standardscaling
6
+
7
+ from .Column_Transformer import MyColumnTransformer
8
+ from .my_pipeline import MyPipeline
9
+
@@ -0,0 +1,22 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ class MyLabelEncoder:
5
+
6
+ def fit(self, y):
7
+ self.categories = np.unique(y)
8
+
9
+ def transform(self, y):
10
+ mapping_value ={val: index for index,val in enumerate(self.categories)}
11
+ return y.map(mapping_value)
12
+
13
+ def fit_transform(self, y):
14
+ self.fit(y)
15
+ return self.transform(y)
16
+
17
+
18
+ # df = pd.read_csv("Practice/covid_data.csv")
19
+ # y = df['has_covid']
20
+
21
+ # le = MyLabelEncoder()
22
+ # print(le.fit_transform(y))
@@ -0,0 +1,38 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ class MyOneHotEncoder:
5
+
6
+ def fit(self, X):
7
+ self.categories = np.unique(X.dropna())
8
+
9
+ def transform(self, X):
10
+
11
+ result = {}
12
+
13
+ for category in self.categories:
14
+ # for this category, compare every value in X
15
+ # convert True-False to 1-0
16
+ # store in result dictionary with category as key
17
+ result[category] = (X == category).astype(int)
18
+
19
+ # convert dictionary to DataFrame and return
20
+ return pd.DataFrame(result)
21
+
22
+ def fit_transform(self, X):
23
+ self.fit(X)
24
+ return self.transform(X)
25
+
26
+ # df = pd.read_csv("Practice/covid_data.csv")
27
+ # X = np.array(df['gender']).reshape(-1,1)
28
+
29
+ # X = [['male']], [['female']]
30
+ # df = pd.read_csv("Practice/SalaryData.csv")
31
+ # X = df['Education Level']
32
+
33
+
34
+ # print(type(X))
35
+ # ohe = MyOneHotEncoder()
36
+ # print(ohe.fit_transform(X))
37
+
38
+
@@ -0,0 +1,53 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ class MyOrdinalEncoder:
5
+
6
+ # def ordinal_encoding(self, X):
7
+
8
+ # order = {
9
+ # "Strong": 1,
10
+ # "Mild": 0
11
+ # }
12
+ # # we can use for loop but this is faster to do
13
+ # return X.map(order)
14
+
15
+ def __init__(self):
16
+ self.mapping = None
17
+
18
+ def fit(self,X, order):
19
+ self.mapping = {}
20
+
21
+ for i, category in enumerate(order):
22
+ self.mapping[category] = i
23
+
24
+ self.mapping = self.mapping
25
+ return self
26
+
27
+
28
+ def transform(self, X):
29
+ if self.mapping is None:
30
+ raise ValueError("first insert the order")
31
+
32
+ result = []
33
+ for value in X:
34
+ if value in self.mapping is None:
35
+ result.append(self.mapping[value])
36
+
37
+ else:
38
+ result.append(-1)
39
+
40
+ return np.array(result).reshape(-1, 1)
41
+
42
+ def fit_transform(self, X, order):
43
+ self.fit(X,order)
44
+ return self.transform(X)
45
+
46
+
47
+ # df = pd.read_csv("Practice/covid_data.csv")
48
+ # X = df['cough']
49
+
50
+ # oe = MyOrdinalEncoder()
51
+
52
+ # print(oe.ordinal_encoding(X))
53
+
@@ -0,0 +1,3 @@
1
+ from .MyLabelEncoder import MyLabelEncoder
2
+ from .MyOneHotEncoder import MyOneHotEncoder
3
+ from .MyOrdinalEncoder import MyOrdinalEncoder
@@ -0,0 +1,23 @@
1
+ from .encoders.MyLabelEncoder import MyLabelEncoder
2
+ from .encoders.MyOneHotEncoder import MyOneHotEncoder
3
+ from .encoders.MyOrdinalEncoder import MyOrdinalEncoder
4
+
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+
9
+ class MyPipeline:
10
+ def __init__(self,steps):
11
+ self.steps = steps
12
+
13
+
14
+ def fit_transform(self,data):
15
+ for name, transformer in self.steps:
16
+ if len(self.steps)==0:
17
+ return data
18
+ data = transformer.fit_transform(data)
19
+
20
+ return data
21
+
22
+
23
+
@@ -0,0 +1 @@
1
+ from .my_scaler import standardscaling
@@ -0,0 +1,38 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ class standardscaling:
5
+ def __init__(self):
6
+ self.mean = 0
7
+ self.standard_deviation = 0
8
+
9
+ def fit(self, X):
10
+ self.X = np.array(X).reshape(-1,1)
11
+ self.mean = np.mean(X, axis=0)
12
+ self.standard_deviation = np.std(X, axis =0)
13
+ return self.mean, self.standard_deviation
14
+
15
+ def transform(self, X):
16
+ self.X = np.array(X).reshape(-1,1)
17
+ if np.all( self.standard_deviation == 0):
18
+ return (X-self.mean)
19
+ else:
20
+ return (X-self.mean)/self.standard_deviation
21
+
22
+
23
+ def fit_transform(self, X):
24
+ self.fit(X)
25
+ return self.transform(X)
26
+
27
+
28
+ # df = pd.read_csv("/home/varsha/PracticePython/100DaysOfMl/Practice/SalaryData.csv")
29
+
30
+ # X = df['Age']
31
+ # # print(X[3])
32
+ # # X = [100,120,130,150]
33
+
34
+ # standardscaler = standardscaling()
35
+ # X_scaled = standardscaler.fit_transform(X)
36
+
37
+ # print(X_scaled)
38
+
@@ -0,0 +1,49 @@
1
+ Metadata-Version: 2.4
2
+ Name: varsha-preprocess
3
+ Version: 0.1.1
4
+ Summary: Custom preprocessing pipeline library
5
+ Description-Content-Type: text/markdown
6
+ Requires-Dist: numpy
7
+ Requires-Dist: pandas
8
+ Dynamic: description
9
+ Dynamic: description-content-type
10
+ Dynamic: requires-dist
11
+ Dynamic: summary
12
+
13
+ # My_Preprocess
14
+
15
+
16
+ A lightweight machine learning preprocessing library built from scratch using NumPy and Pandas.
17
+
18
+ This project is designed to deeply understand how core preprocessing tools like Pipeline, ColumnTransformer, and Encoders work internally — without relying on sklearn.
19
+
20
+ ---
21
+
22
+ ## ✨ Features
23
+
24
+ - 🔁 Custom Pipeline (sequential transformations)
25
+ - 🧩 ColumnTransformer (parallel column-wise transformations)
26
+ - 🔤 Encoders:
27
+ - Label Encoder
28
+ - One Hot Encoder
29
+ - Ordinal Encoder
30
+ - 📏 Standard Scaler
31
+ - ⚙️ Fully built using NumPy and Pandas
32
+
33
+ ---
34
+
35
+ ## 🧠 Why this project?
36
+
37
+ Most ML libraries (like sklearn) hide internal implementation details.
38
+
39
+ This project focuses on:
40
+ - Understanding how transformations are chained
41
+ - Learning how fit / transform logic works
42
+ - Building preprocessing systems from scratch
43
+
44
+ ---
45
+
46
+ ## 📦 Installation
47
+
48
+ ```bash
49
+ pip install varsha-preprocess
@@ -0,0 +1,37 @@
1
+ # My_Preprocess
2
+
3
+
4
+ A lightweight machine learning preprocessing library built from scratch using NumPy and Pandas.
5
+
6
+ This project is designed to deeply understand how core preprocessing tools like Pipeline, ColumnTransformer, and Encoders work internally — without relying on sklearn.
7
+
8
+ ---
9
+
10
+ ## ✨ Features
11
+
12
+ - 🔁 Custom Pipeline (sequential transformations)
13
+ - 🧩 ColumnTransformer (parallel column-wise transformations)
14
+ - 🔤 Encoders:
15
+ - Label Encoder
16
+ - One Hot Encoder
17
+ - Ordinal Encoder
18
+ - 📏 Standard Scaler
19
+ - ⚙️ Fully built using NumPy and Pandas
20
+
21
+ ---
22
+
23
+ ## 🧠 Why this project?
24
+
25
+ Most ML libraries (like sklearn) hide internal implementation details.
26
+
27
+ This project focuses on:
28
+ - Understanding how transformations are chained
29
+ - Learning how fit / transform logic works
30
+ - Building preprocessing systems from scratch
31
+
32
+ ---
33
+
34
+ ## 📦 Installation
35
+
36
+ ```bash
37
+ pip install varsha-preprocess
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,19 @@
1
+ from setuptools import setup, find_packages
2
+ # from .My_Preprocess1.my_pipeline import MyPipeline
3
+
4
+
5
+ with open("README.md", "r", encoding="utf-8") as f:
6
+ long_description = f.read()
7
+
8
+ setup(
9
+ name="varsha-preprocess",
10
+ version="0.1.1",
11
+ packages=find_packages(),
12
+ install_requires=["numpy", "pandas"],
13
+
14
+ description="Custom preprocessing pipeline library",
15
+
16
+ # key part for readme
17
+ long_description=long_description,
18
+ long_description_content_type="text/markdown",
19
+ )
@@ -0,0 +1,49 @@
1
+ Metadata-Version: 2.4
2
+ Name: varsha-preprocess
3
+ Version: 0.1.1
4
+ Summary: Custom preprocessing pipeline library
5
+ Description-Content-Type: text/markdown
6
+ Requires-Dist: numpy
7
+ Requires-Dist: pandas
8
+ Dynamic: description
9
+ Dynamic: description-content-type
10
+ Dynamic: requires-dist
11
+ Dynamic: summary
12
+
13
+ # My_Preprocess
14
+
15
+
16
+ A lightweight machine learning preprocessing library built from scratch using NumPy and Pandas.
17
+
18
+ This project is designed to deeply understand how core preprocessing tools like Pipeline, ColumnTransformer, and Encoders work internally — without relying on sklearn.
19
+
20
+ ---
21
+
22
+ ## ✨ Features
23
+
24
+ - 🔁 Custom Pipeline (sequential transformations)
25
+ - 🧩 ColumnTransformer (parallel column-wise transformations)
26
+ - 🔤 Encoders:
27
+ - Label Encoder
28
+ - One Hot Encoder
29
+ - Ordinal Encoder
30
+ - 📏 Standard Scaler
31
+ - ⚙️ Fully built using NumPy and Pandas
32
+
33
+ ---
34
+
35
+ ## 🧠 Why this project?
36
+
37
+ Most ML libraries (like sklearn) hide internal implementation details.
38
+
39
+ This project focuses on:
40
+ - Understanding how transformations are chained
41
+ - Learning how fit / transform logic works
42
+ - Building preprocessing systems from scratch
43
+
44
+ ---
45
+
46
+ ## 📦 Installation
47
+
48
+ ```bash
49
+ pip install varsha-preprocess
@@ -0,0 +1,16 @@
1
+ README.md
2
+ setup.py
3
+ My_Preprocess1/Column_Transformer.py
4
+ My_Preprocess1/__init__.py
5
+ My_Preprocess1/my_pipeline.py
6
+ My_Preprocess1/encoders/MyLabelEncoder.py
7
+ My_Preprocess1/encoders/MyOneHotEncoder.py
8
+ My_Preprocess1/encoders/MyOrdinalEncoder.py
9
+ My_Preprocess1/encoders/__init__.py
10
+ My_Preprocess1/preprocessing/__init__.py
11
+ My_Preprocess1/preprocessing/my_scaler.py
12
+ varsha_preprocess.egg-info/PKG-INFO
13
+ varsha_preprocess.egg-info/SOURCES.txt
14
+ varsha_preprocess.egg-info/dependency_links.txt
15
+ varsha_preprocess.egg-info/requires.txt
16
+ varsha_preprocess.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ numpy
2
+ pandas
@@ -0,0 +1 @@
1
+ My_Preprocess1