Column Transformer

				
					import pandas as pd
				
			
				
					d = {'sales': [100000,222000,1000000,522000,111111,222222,1111111,20000,75000,90000,1000000,10000],
      'city': ['Tampa','Tampa','Orlando','Jacksonville','Miami','Jacksonville','Miami','Miami','Orlando','Orlando','Orlando','Orlando'],
      'size': ['Small', 'Medium','Large','Large','Small','Medium','Large','Small','Medium','Medium','Medium','Small',]}
				
			
				
					df = pd.DataFrame(data=d)
				
			
				
					df
				
			
				
					from sklearn.preprocessing import OneHotEncoder
				
			
				
					from sklearn.preprocessing import OrdinalEncoder
				
			
				
					ohe = OneHotEncoder(sparse_output=False)
ode = OrdinalEncoder()
				
			
				
					from sklearn.compose import make_column_transformer
				
			
				
					ct = make_column_transformer(
    (OneHotEncoder, ['city']),  
    (OrdinalEncoder, ['size']),
    remainder='drop')   
				
			
				
					ct.set_output(transform="pandas")
				
			
				
					df_pandas = ct.fit_transform(df)
				
			
				
					df_pandas
				
			
#drop
				
					ct2 = make_column_transformer(
    (ohe, [1]),  
    (ode, [2]),
    sparse_threshold=0,
    remainder='drop')   
				
			
				
					ct2.set_output(transform="pandas")
				
			
				
					df_pandas2 = ct2.fit_transform(df)
				
			
				
					df_pandas2
				
			
#Example Passthrough some columns, drop offthers
				
					ct3 = make_column_transformer(
    (ohe, ['city']),  
    ('passthrough', ['size']),
    sparse_threshold=0,
    remainder='drop') 
				
			
				
					ct3.set_output(transform="pandas")
				
			
				
					df_pandas3 = ct3.fit_transform(df)
				
			
				
					df_pandas3
				
			

Ryan is a Data Scientist at a fintech company, where he focuses on fraud prevention in underwriting and risk. Before that, he worked as a Data Analyst at a tax software company. He holds a degree in Electrical Engineering from UCF.

Leave a Reply

Your email address will not be published. Required fields are marked *