mtt_haum/code/trace-clustering.py
2024-01-16 09:59:23 +01:00

42 lines
1.3 KiB
Python

from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
##### Clustering ######
## KMeans
#eval_artworks = eval_art[eval_art.nettype == "alldata"].iloc[:,range(1,5)]
eval_artworks = eval_art[eval_art.nettype == "subdata"].iloc[:,range(1,5)]
kmeans = KMeans(n_clusters=4, max_iter=1000).fit(eval_artworks)
#from sklearn.manifold import MDS
#coord = pd.DataFrame(MDS(normalized_stress='auto').fit_transform(eval_artworks))
coord = eval_artworks
coord["clusters"] = kmeans.labels_
for i in coord.clusters.unique():
#plt.scatter(coord[coord.clusters == i].iloc[:,0], coord[coord.clusters == i].iloc[:,1],
plt.scatter(coord[coord.clusters == i].iloc[:,1], coord[coord.clusters == i].iloc[:,2],
#plt.scatter(coord[coord.clusters == i].iloc[:,2], coord[coord.clusters == i].iloc[:,4],
label = i)
plt.legend()
plt.show()
### Scree plot
sse = {}
for k in range(1, 10):
kmeans = KMeans(n_clusters=k, max_iter=1000).fit(eval_artworks[["precision", "generalizability"]])
#data["clusters"] = kmeans.labels_
#print(data["clusters"])
sse[k] = kmeans.inertia_ # Inertia: Sum of distances of samples to their closest cluster center
plt.figure()
plt.plot(list(sse.keys()), list(sse.values()))
plt.xlabel("Number of clusters")
plt.ylabel("SSE")
plt.show()