Data pretreatment

Import library


import numpy as np
from sklearn import preprocessing as pp

input_data
= np.array([[2.1, -1.9, 5.5],
[
-1.5, 2.4, 3.5],
[
0.5, -7.9, 5.6],
[
5.9, 2.3, -5.8]])



Binarization
data_bin
= pp.Binarizer(threshold=0.5).transform(input_data)
#array([[1., 0., 1.] ,
[0., 1., 1.],
[0., 0.,
1.],
[
1., 1., 0.]])

Average value
input_data.mean(0)
# array([ 1.75, -1.275 , 2.2 ])
input_data.std(0) # array([2.71431391, 4.20022321, 4.69414529])< /span>

data_scale
= pp.scale(input_data)

data_scale.mean(0)
# array([1.11022302e-16 , 0.00000000e+00, 0.00000000e+00])
data_scale.std(0) # array([1., 1., 1. ])

Zoom
data_minmax
= pp.MinMaxScaler(feature_range=(0, 1)).fit_transform(input_data)
#array([[0.48648649, 0.58252427, 0.99122807],
[0., 1., 0.81578947],
[
0.27027027, 0., 1. ],
[
1., 0.99029126, 0. ]])

Normalize
#L1 standardization, the sum of absolute values ​​is always in each row Up to 1
data_L1 = pp.normalize(input_data, 'l1')
#array([[ 0.22105263, -0.2, 0.57894737],< /span>
[-0.2027027, 0.32432432, 0.47297297],
[
0.03571429, -0.56428571, 0.4 ],
[
0.42142857, 0.16428571, -0.41428571]])

#L2 standardization, the sum of squares in each line is always at most 1
data_L2 = pp.normalize(input_data, 'l2')
#array([[ 0.33946114, -0.30713151, 0.88906489],< /span>
[-0.33325106, 0.53320169, 0.7775858 ],
[
0.05156558, -0.81473612, 0.57753446],
[
0.68706914, 0.26784051, -0.6754239 ]])


Tag data
input_labels
= ['red','black','red< span style="color: #800000;">'
,'green','black< /span>',' yellow','white']
test_labels
= list(set(input_labels))

#Create and train tag encoder objects
encoder = pp.LabelEncoder()

#label==>data
values ​​= encoder.transform(test_labels)
#array([1, 4, 3, 2, 0] , dtype=int64)

#data==>label
test = np.array([1, 3, 4])
labels
= encoder.inverse_transform(test)
#array(['green','white','yellow '], dtype='

Import library


import numpy as np
from sklearn import preprocessing as pp

input_data
= np.array([[2.1, -1.9, 5.5],
[
-1.5, 2.4, 3.5],
[
0.5, -7.9, 5.6],
[
5.9, 2.3, -5.8]])



Binarization
data_bin
= pp.Binarizer(threshold=0.5).transform(input_data)
#array([[1., 0., 1.] ,
[0., 1., 1.],
[0., 0.,
1.],
[
1., 1., 0.]])

Average value
input_data.mean(0)
# array([ 1.75, -1.275 , 2.2 ])
input_data.std(0) # array([2.71431391, 4.20022321, 4.69414529])< /span>

data_scale
= pp.scale(input_data)

data_scale.mean(0)
# array([1.11022302e-16 , 0.00000000e+00, 0.00000000e+00])
data_scale.std(0) # array([1., 1., 1. ])

Zoom
data_minmax
= pp.MinMaxScaler(feature_range=(0, 1)).fit_transform(input_data)
#array([[0.48648649, 0.58252427, 0.99122807],
[0., 1., 0.81578947],
[
0.27027027, 0., 1. ],
[
1., 0.99029126, 0. ]])

Normalize
#L1 standardization, the sum of absolute values ​​is always in each row Up to 1
data_L1 = pp.normalize(input_data, 'l1')
#array([[ 0.22105263, -0.2, 0.57894737],< /span>
[-0.2027027, 0.32432432, 0.47297297],
[
0.03571429, -0.56428571, 0.4 ],
[
0.42142857, 0.16428571, -0.41428571]])

#L2 standardization, the sum of squares in each line is always at most 1
data_L2 = pp.normalize(input_data, 'l2')
#array([[ 0.33946114, -0.30713151, 0.88906489],< /span>
[-0.33325106, 0.53320169, 0.7775858 ],
[
0.05156558, -0.81473612, 0.57753446],
[
0.68706914, 0.26784051, -0.6754239 ]])


Tag data
input_labels
= ['red','black','red< span style="color: #800000;">'
,'green','black< /span>',' yellow','white']
test_labels
= list(set(input_labels))

#Create and train tag encoder objects
encoder = pp.LabelEncoder()

#label==>data
values ​​= encoder.transform(test_labels)
#array([1, 4, 3, 2, 0] , dtype=int64)

#data==>label
test = np.array([1, 3, 4])
labels
= encoder.inverse_transform(test)
#array(['green','white','yellow '], dtype='

Leave a Comment

Your email address will not be published.