
5.3
k
平均法
125
# k-means - Accuracy as the number of components varies
n_clusters = 20
n_init = 10
max_iter = 300
tol = 0.0001
random_state = 2018
n_jobs = 2
kMeans_inertia = pd.DataFrame(data=[],index=[9, 49, 99, 199, \
299, 399, 499, 599, 699, 783],columns=['inertia'])
overallAccuracy_kMeansDF = pd.DataFrame(data=[],index=[9, 49, \
99, 199, 299, 399, 499, 599, 699, 783], \
columns=['overallAccuracy'])
for cutoffNumber in [9, 49, 99, 199, 299, 399, 499, 599, 699, 783]:
kmeans = KMeans(n_clusters=n_clusters, n_init=n_init, \
max_iter=max_iter, tol=tol, random_state=random_state, \
n_jobs=n_jobs)
cutoff = cutoffNumber
kmeans.fit(X_train_PCA.loc[:,0:cutoff]) ...