from sklearn.cluster import KMeans
from sklearn import datasets
from sklearn.utils import shuffle
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
# Import the Iris dataset
iris = datasets.load_iris()
X = iris.data # Features
y = iris.target # True labels (species)
names = iris.feature_names # Feature names
# Shuffle the dataset to randomize the order
X, y = shuffle(X, y, random_state=42)
# K-Means clustering with 3 clusters (as there are 3 species)
model = KMeans(n_clusters=3, random_state=42)
iris_kmeans = model.fit(X)
# Get the cluster labels from the KMeans model
print(“KMeans labels:\n”, iris_kmeans.labels_)
# Reorder the true labels to match the KMeans clusters (as the order might differ)
y = np.choose(y, [1, 2, 0]).astype(int)
print(“Reordered true labels:\n”, y)
# Confusion matrix to compare the true labels with the predicted ones
from sklearn.metrics import confusion_matrix
conf_matrix = confusion_matrix(y, iris_kmeans.labels_)
# Plotting the confusion matrix
fig, ax = plt.subplots(figsize=(7.5, 7.5))
ax.matshow(conf_matrix, cmap=plt.cm.Blues, alpha=0.3)
for i in range(conf_matrix.shape[0]):
for j in range(conf_matrix.shape[1]):
ax.text(x=j, y=i, s=conf_matrix[i, j], va=’center’,
ha=’center’, size=’xx-large’)
plt.xlabel(‘Predictions’, fontsize=18)
plt.ylabel(‘Actuals’, fontsize=18)
plt.title(‘Confusion Matrix’, fontsize=18)
plt.show()
# Get the KMeans cluster centers
print(“KMeans cluster centers:\n”, iris_kmeans.cluster_centers_)
# Define a colormap (use a predefined one or customize)
customcmap = mpl.colormaps[‘viridis’] # Updated colormap fetching
# 3D Scatter Plot for K-Means clustering results
fig = plt.figure(figsize=(20, 10))
ax1 = fig.add_subplot(1, 2, 1, projection=’3d’)
# Plot the 3D scatter plot of the clustered data
ax1.scatter(X[:, 3], X[:, 0], X[:, 2], c=iris_kmeans.labels_.astype(float), edgecolor=”k”, s=150, cmap=customcmap)
ax1.view_init(20, -50)
ax1.set_xlabel(names[3], fontsize=12)
ax1.set_ylabel(names[0], fontsize=12)
ax1.set_zlabel(names[2], fontsize=12)
ax1.set_title(“K-Means Clusters for the Iris Dataset”, fontsize=12)
# 3D Scatter Plot for actual labels
ax2 = fig.add_subplot(1, 2, 2, projection=’3d’)
# Annotate the clusters with species names
for label, name in enumerate([‘virginica’, ‘setosa’, ‘versicolor’]):
ax2.text3D(
X[y == label, 3].mean(),
X[y == label, 0].mean(),
X[y == label, 2].mean() + 2,
name,
horizontalalignment=”center”,
bbox=dict(alpha=0.2, edgecolor=”w”, facecolor=”w”),
)
# Plot the 3D scatter plot for the actual labels
ax2.scatter(X[:, 3], X[:, 0], X[:, 2],
c=y, edgecolor=”k”, s=150,
cmap=customcmap)
ax2.view_init(20, -50)
ax2.set_xlabel(names[3], fontsize=12)
ax2.set_ylabel(names[0], fontsize=12)
ax2.set_zlabel(names[2], fontsize=12)
ax2.set_title(“Actual Labels for the Iris Dataset”, fontsize=12)
# Show the plot (use plt.show() for script-based environments)
plt.show()