42 lines
1.3 KiB
Python
42 lines
1.3 KiB
Python
|
from sklearn.cluster import KMeans
|
||
|
|
||
|
import matplotlib.pyplot as plt
|
||
|
|
||
|
|
||
|
##### Clustering ######
|
||
|
|
||
|
## KMeans
|
||
|
|
||
|
#eval_artworks = eval_art[eval_art.nettype == "alldata"].iloc[:,range(1,5)]
|
||
|
eval_artworks = eval_art[eval_art.nettype == "subdata"].iloc[:,range(1,5)]
|
||
|
|
||
|
kmeans = KMeans(n_clusters=4, max_iter=1000).fit(eval_artworks)
|
||
|
|
||
|
#from sklearn.manifold import MDS
|
||
|
#coord = pd.DataFrame(MDS(normalized_stress='auto').fit_transform(eval_artworks))
|
||
|
|
||
|
coord = eval_artworks
|
||
|
coord["clusters"] = kmeans.labels_
|
||
|
|
||
|
for i in coord.clusters.unique():
|
||
|
#plt.scatter(coord[coord.clusters == i].iloc[:,0], coord[coord.clusters == i].iloc[:,1],
|
||
|
plt.scatter(coord[coord.clusters == i].iloc[:,1], coord[coord.clusters == i].iloc[:,2],
|
||
|
#plt.scatter(coord[coord.clusters == i].iloc[:,2], coord[coord.clusters == i].iloc[:,4],
|
||
|
label = i)
|
||
|
plt.legend()
|
||
|
plt.show()
|
||
|
|
||
|
### Scree plot
|
||
|
|
||
|
sse = {}
|
||
|
for k in range(1, 10):
|
||
|
kmeans = KMeans(n_clusters=k, max_iter=1000).fit(eval_artworks[["precision", "generalizability"]])
|
||
|
#data["clusters"] = kmeans.labels_
|
||
|
#print(data["clusters"])
|
||
|
sse[k] = kmeans.inertia_ # Inertia: Sum of distances of samples to their closest cluster center
|
||
|
plt.figure()
|
||
|
plt.plot(list(sse.keys()), list(sse.values()))
|
||
|
plt.xlabel("Number of clusters")
|
||
|
plt.ylabel("SSE")
|
||
|
plt.show()
|