#06701192023 Sugandh ML LAB 9
import numpy as np
import matplotlib.pyplot as plt
# Generate two normal distributions
d1 = np.random.normal(loc=[2, 2], scale=1.0, size=(500, 2))
d2 = np.random.normal(loc=[8, 8], scale=1.0, size=(500, 2))
data = np.vstack((d1, d2)) # Combine both distributions
# Randomly select two initial points
indices = np.random.choice(data.shape[0], 2, replace=False)
x1, x2 = data[indices]
while True:
# Assign labels based on the closest center
distances_to_x1 = np.linalg.norm(data - x1, axis=1)
distances_to_x2 = np.linalg.norm(data - x2, axis=1)
labels = np.where(distances_to_x1 < distances_to_x2, 1, 2)
# Compute new means
new_x1 = data[labels == 1].mean(axis=0)
new_x2 = data[labels == 2].mean(axis=0)
# Check for convergence
if np.allclose(new_x1, x1) and np.allclose(new_x2, x2):
break
x1, x2 = new_x1, new_x2
# Plot results
plt.scatter(data[:, 0], data[:, 1], c=labels, cmap='bwr', alpha=0.6)
plt.scatter([x1[0], x2[0]], [x1[1], x2[1]], c='black', marker='x',
s=200, label='Centers')
plt.legend()
plt.xlabel('X')
plt.ylabel('Y')
plt.title('Clustering using Distance-based Labeling')
plt.show()