K-Means

from sklearn.cluster import KMeans
from sklearn import datasets
from sklearn.utils import shuffle
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

# Import the Iris dataset
iris = datasets.load_iris()
X = iris.data  # Features
y = iris.target  # True labels (species)
names = iris.feature_names  # Feature names

# Shuffle the dataset to randomize the order
X, y = shuffle(X, y, random_state=42)

# K-Means clustering with 3 clusters (as there are 3 species)
model = KMeans(n_clusters=3, random_state=42)
iris_kmeans = model.fit(X)

# Get the cluster labels from the KMeans model
print(“KMeans labels:\n”, iris_kmeans.labels_)

# Reorder the true labels to match the KMeans clusters (as the order might differ)
y = np.choose(y, [1, 2, 0]).astype(int)
print(“Reordered true labels:\n”, y)

# Confusion matrix to compare the true labels with the predicted ones
from sklearn.metrics import confusion_matrix
conf_matrix = confusion_matrix(y, iris_kmeans.labels_)

# Plotting the confusion matrix
fig, ax = plt.subplots(figsize=(7.5, 7.5))
ax.matshow(conf_matrix, cmap=plt.cm.Blues, alpha=0.3)
for i in range(conf_matrix.shape[0]):
    for j in range(conf_matrix.shape[1]):
        ax.text(x=j, y=i, s=conf_matrix[i, j], va=’center’,
                ha=’center’, size=’xx-large’)

plt.xlabel(‘Predictions’, fontsize=18)
plt.ylabel(‘Actuals’, fontsize=18)
plt.title(‘Confusion Matrix’, fontsize=18)
plt.show()

# Get the KMeans cluster centers
print(“KMeans cluster centers:\n”, iris_kmeans.cluster_centers_)

# Define a colormap (use a predefined one or customize)
customcmap = mpl.colormaps[‘viridis’]  # Updated colormap fetching

# 3D Scatter Plot for K-Means clustering results
fig = plt.figure(figsize=(20, 10))
ax1 = fig.add_subplot(1, 2, 1, projection=’3d’)

# Plot the 3D scatter plot of the clustered data
ax1.scatter(X[:, 3], X[:, 0], X[:, 2], c=iris_kmeans.labels_.astype(float), edgecolor=”k”, s=150, cmap=customcmap)
ax1.view_init(20, -50)
ax1.set_xlabel(names[3], fontsize=12)
ax1.set_ylabel(names[0], fontsize=12)
ax1.set_zlabel(names[2], fontsize=12)
ax1.set_title(“K-Means Clusters for the Iris Dataset”, fontsize=12)

# 3D Scatter Plot for actual labels
ax2 = fig.add_subplot(1, 2, 2, projection=’3d’)

# Annotate the clusters with species names
for label, name in enumerate([‘virginica’, ‘setosa’, ‘versicolor’]):
    ax2.text3D(
        X[y == label, 3].mean(),
        X[y == label, 0].mean(),
        X[y == label, 2].mean() + 2,
        name,
        horizontalalignment=”center”,
        bbox=dict(alpha=0.2, edgecolor=”w”, facecolor=”w”),
    )

# Plot the 3D scatter plot for the actual labels
ax2.scatter(X[:, 3], X[:, 0], X[:, 2],
            c=y, edgecolor=”k”, s=150,
            cmap=customcmap)
ax2.view_init(20, -50)
ax2.set_xlabel(names[3], fontsize=12)
ax2.set_ylabel(names[0], fontsize=12)
ax2.set_zlabel(names[2], fontsize=12)
ax2.set_title(“Actual Labels for the Iris Dataset”, fontsize=12)

# Show the plot (use plt.show() for script-based environments)
plt.show()

Paste text,images,html and share with anyone
Scroll to Top