from numpy import *
import os
import pandas as pd
def loadDataSet(fileName, delim='\t'):
fr = open(fileName)
stringArr = [line.strip().split(delim) for line in fr.readlines()]
datArr = [map(float, line) for line in stringArr] #################
return mat(datArr)
def replaceNanWithMean():
datMat = loadDataSet('C:/Users/Omega/OneDrive/桌面/实验三+四/4/全部数据集/secom.data', ' ')
numFeat = shape(datMat)[1]
print(type(datMat))
datMat=imputer(datMat)
# for i in range(numFeat):
# meanVal = mean(datMat[nonzero(~isnan(datMat[:, i].A))[0], i]) # values that are not NaN (a number)
# datMat[nonzero(isnan(datMat[:, i].A))[0], i] = meanVal # set NaN values to mean
return datMat
from sklearn.impute import SimpleImputer # 上面遗漏了一块
def imputer(a):
im=SimpleImputer(missing_values=nan,strategy="mean")
data=im.fit_transform(a)
return data
运行上面这段代码之后,会出现如下错误:
<class 'numpy.matrix'>
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-77-b762e1b84152> in <module>
----> 1 dataMat = replaceNanWithMean()############################
<ipython-input-75-d4c5db6a0068> in replaceNanWithMean()
3 numFeat = shape(datMat)[1]
4 print(type(datMat))
----> 5 datMat=imputer(datMat)
6 # for i in range(numFeat):
7 # meanVal = mean(datMat[nonzero(~isnan(datMat[:, i].A))[0], i]) # values that are not NaN (a number)
<ipython-input-72-37b11b13bcab> in imputer(a)
3 def imputer(a):
4 im=SimpleImputer(missing_values=nan,strategy="mean")
----> 5 data=im.fit_transform(a)
6 return data
7 # data=im.fit_transform([[90,2,10,np.nan],
D:\anacoda\lib\site-packages\sklearn\base.py in fit_transform(self, X, y, **fit_params)
697 if y is None:
698 # fit method of arity 1 (unsupervised transformation)
--> 699 return self.fit(X, **fit_params).transform(X)
700 else:
701 # fit method of arity 2 (supervised transformation)
D:\anacoda\lib\site-packages\sklearn\impute\_base.py in fit(self, X, y)
286 self : SimpleImputer
287 """
--> 288 X = self._validate_input(X, in_fit=True)
289
290 # default fill_value is 0 for numerical input and "missing_value"
D:\anacoda\lib\site-packages\sklearn\impute\_base.py in _validate_input(self, X, in_fit)
250
251 try:
--> 252 X = self._validate_data(X, reset=in_fit,
253 accept_sparse='csc', dtype=dtype,
254 force_all_finite=force_all_finite,
D:\anacoda\lib\site-packages\sklearn\base.py in _validate_data(self, X, y, reset, validate_separately, **check_params)
419 out = X
420 elif isinstance(y, str) and y == 'no_validation':
--> 421 X = check_array(X, **check_params)
422 out = X
423 else:
D:\anacoda\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
61 extra_args = len(args) - len(all_args)
62 if extra_args <= 0:
---> 63 return f(*args, **kwargs)
64
65 # extra_args > 0
D:\anacoda\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)
614 array = array.astype(dtype, casting="unsafe", copy=False)
615 else:
--> 616 array = np.asarray(array, order=order, dtype=dtype)
617 except ComplexWarning as complex_warning:
618 raise ValueError("Complex data not supported\n"
D:\anacoda\lib\site-packages\numpy\core\_asarray.py in asarray(a, dtype, order, like)
100 return _asarray_with_like(a, dtype=dtype, order=order, like=like)
101
--> 102 return array(a, dtype, copy=False, order=order)
103
104
TypeError: float() argument must be a string or a number, not 'map'
最后经过查看后,了解到map函数要套上list,修改后正常运行:
datArr = [list(map(float, line)) for line in stringArr]
|