from sklearn.cluster import KMeans import matplotlib.pyplot as plt ##### Clustering ###### ## KMeans #eval_artworks = eval_art[eval_art.nettype == "alldata"].iloc[:,range(1,5)] eval_artworks = eval_art[eval_art.nettype == "subdata"].iloc[:,range(1,5)] kmeans = KMeans(n_clusters=4, max_iter=1000).fit(eval_artworks) #from sklearn.manifold import MDS #coord = pd.DataFrame(MDS(normalized_stress='auto').fit_transform(eval_artworks)) coord = eval_artworks coord["clusters"] = kmeans.labels_ for i in coord.clusters.unique(): #plt.scatter(coord[coord.clusters == i].iloc[:,0], coord[coord.clusters == i].iloc[:,1], plt.scatter(coord[coord.clusters == i].iloc[:,1], coord[coord.clusters == i].iloc[:,2], #plt.scatter(coord[coord.clusters == i].iloc[:,2], coord[coord.clusters == i].iloc[:,4], label = i) plt.legend() plt.show() ### Scree plot sse = {} for k in range(1, 10): kmeans = KMeans(n_clusters=k, max_iter=1000).fit(eval_artworks[["precision", "generalizability"]]) #data["clusters"] = kmeans.labels_ #print(data["clusters"]) sse[k] = kmeans.inertia_ # Inertia: Sum of distances of samples to their closest cluster center plt.figure() plt.plot(list(sse.keys()), list(sse.values())) plt.xlabel("Number of clusters") plt.ylabel("SSE") plt.show()