Clustering#
Clustering seeks to group data into clusters based on their properties and then allow us to predict which cluster a new member belongs.
import numpy as np
import matplotlib.pyplot as plt
We’ll use a dataset generator that is part of scikit-learn called make_moons. This generates data that falls into 2 different sets with a shape that looks like half-moons.
from sklearn import datasets
def generate_data():
xvec, val = datasets.make_moons(200, noise=0.2)
# encode the output to be 2 elements
x = []
v = []
for xv, vv in zip(xvec, val):
x.append(np.array(xv))
v.append(vv)
return np.array(x), np.array(v)
x, v = generate_data()
Let’s look at a point and it’s value
print(f"x = {x[0]}, value = {v[0]}")
x = [ 1.65034975 -0.18557353], value = 1
Now let’s plot the data
def plot_data(x, v):
xpt = [q[0] for q in x]
ypt = [q[1] for q in x]
fig, ax = plt.subplots()
ax.scatter(xpt, ypt, s=40, c=v, cmap="viridis")
ax.set_aspect("equal")
return fig
fig = plot_data(x, v)
We want to partition this domain into 2 regions, such that when we come in with a new point, we know which group it belongs to.
First we setup and train our network
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Input
from keras.optimizers import RMSprop
2025-11-03 13:35:51.029938: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-11-03 13:35:51.074881: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-11-03 13:35:52.785378: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
model = Sequential()
model.add(Input(shape=(2,)))
model.add(Dense(50, activation="relu"))
model.add(Dense(20, activation="relu"))
model.add(Dense(1, activation="sigmoid"))
2025-11-03 13:35:53.082586: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)
rms = RMSprop()
model.compile(loss='binary_crossentropy',
optimizer=rms, metrics=['accuracy'])
model.summary()
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ dense (Dense) │ (None, 50) │ 150 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_1 (Dense) │ (None, 20) │ 1,020 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_2 (Dense) │ (None, 1) │ 21 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 1,191 (4.65 KB)
Trainable params: 1,191 (4.65 KB)
Non-trainable params: 0 (0.00 B)
We seem to need a lot of epochs here to get a good result
epochs = 100
results = model.fit(x, v, batch_size=50, epochs=epochs, verbose=2)
Epoch 1/100
4/4 - 0s - 114ms/step - accuracy: 0.5000 - loss: 0.7031
Epoch 2/100
4/4 - 0s - 6ms/step - accuracy: 0.4800 - loss: 0.6756
Epoch 3/100
4/4 - 0s - 6ms/step - accuracy: 0.5600 - loss: 0.6548
Epoch 4/100
4/4 - 0s - 6ms/step - accuracy: 0.6700 - loss: 0.6353
Epoch 5/100
4/4 - 0s - 6ms/step - accuracy: 0.7550 - loss: 0.6172
Epoch 6/100
4/4 - 0s - 6ms/step - accuracy: 0.7750 - loss: 0.5984
Epoch 7/100
4/4 - 0s - 6ms/step - accuracy: 0.8000 - loss: 0.5823
Epoch 8/100
4/4 - 0s - 6ms/step - accuracy: 0.8000 - loss: 0.5674
Epoch 9/100
4/4 - 0s - 6ms/step - accuracy: 0.8000 - loss: 0.5531
Epoch 10/100
4/4 - 0s - 6ms/step - accuracy: 0.8100 - loss: 0.5394
Epoch 11/100
4/4 - 0s - 6ms/step - accuracy: 0.8250 - loss: 0.5251
Epoch 12/100
4/4 - 0s - 6ms/step - accuracy: 0.8250 - loss: 0.5114
Epoch 13/100
4/4 - 0s - 6ms/step - accuracy: 0.8250 - loss: 0.4978
Epoch 14/100
4/4 - 0s - 6ms/step - accuracy: 0.8300 - loss: 0.4839
Epoch 15/100
4/4 - 0s - 6ms/step - accuracy: 0.8300 - loss: 0.4698
Epoch 16/100
4/4 - 0s - 6ms/step - accuracy: 0.8300 - loss: 0.4559
Epoch 17/100
4/4 - 0s - 6ms/step - accuracy: 0.8300 - loss: 0.4423
Epoch 18/100
4/4 - 0s - 6ms/step - accuracy: 0.8300 - loss: 0.4309
Epoch 19/100
4/4 - 0s - 6ms/step - accuracy: 0.8300 - loss: 0.4184
Epoch 20/100
4/4 - 0s - 6ms/step - accuracy: 0.8350 - loss: 0.4097
Epoch 21/100
4/4 - 0s - 6ms/step - accuracy: 0.8300 - loss: 0.3986
Epoch 22/100
4/4 - 0s - 6ms/step - accuracy: 0.8500 - loss: 0.3892
Epoch 23/100
4/4 - 0s - 6ms/step - accuracy: 0.8500 - loss: 0.3807
Epoch 24/100
4/4 - 0s - 6ms/step - accuracy: 0.8450 - loss: 0.3736
Epoch 25/100
4/4 - 0s - 6ms/step - accuracy: 0.8550 - loss: 0.3653
Epoch 26/100
4/4 - 0s - 6ms/step - accuracy: 0.8500 - loss: 0.3582
Epoch 27/100
4/4 - 0s - 6ms/step - accuracy: 0.8600 - loss: 0.3520
Epoch 28/100
4/4 - 0s - 6ms/step - accuracy: 0.8550 - loss: 0.3467
Epoch 29/100
4/4 - 0s - 6ms/step - accuracy: 0.8550 - loss: 0.3405
Epoch 30/100
4/4 - 0s - 6ms/step - accuracy: 0.8550 - loss: 0.3356
Epoch 31/100
4/4 - 0s - 6ms/step - accuracy: 0.8550 - loss: 0.3310
Epoch 32/100
4/4 - 0s - 6ms/step - accuracy: 0.8550 - loss: 0.3272
Epoch 33/100
4/4 - 0s - 6ms/step - accuracy: 0.8550 - loss: 0.3230
Epoch 34/100
4/4 - 0s - 6ms/step - accuracy: 0.8600 - loss: 0.3201
Epoch 35/100
4/4 - 0s - 6ms/step - accuracy: 0.8600 - loss: 0.3166
Epoch 36/100
4/4 - 0s - 6ms/step - accuracy: 0.8550 - loss: 0.3135
Epoch 37/100
4/4 - 0s - 6ms/step - accuracy: 0.8600 - loss: 0.3128
Epoch 38/100
4/4 - 0s - 6ms/step - accuracy: 0.8600 - loss: 0.3099
Epoch 39/100
4/4 - 0s - 6ms/step - accuracy: 0.8650 - loss: 0.3084
Epoch 40/100
4/4 - 0s - 6ms/step - accuracy: 0.8700 - loss: 0.3050
Epoch 41/100
4/4 - 0s - 6ms/step - accuracy: 0.8600 - loss: 0.3031
Epoch 42/100
4/4 - 0s - 6ms/step - accuracy: 0.8700 - loss: 0.3038
Epoch 43/100
4/4 - 0s - 6ms/step - accuracy: 0.8700 - loss: 0.3015
Epoch 44/100
4/4 - 0s - 6ms/step - accuracy: 0.8650 - loss: 0.2979
Epoch 45/100
4/4 - 0s - 6ms/step - accuracy: 0.8650 - loss: 0.2983
Epoch 46/100
4/4 - 0s - 6ms/step - accuracy: 0.8700 - loss: 0.2952
Epoch 47/100
4/4 - 0s - 6ms/step - accuracy: 0.8700 - loss: 0.2945
Epoch 48/100
4/4 - 0s - 6ms/step - accuracy: 0.8850 - loss: 0.2918
Epoch 49/100
4/4 - 0s - 6ms/step - accuracy: 0.8850 - loss: 0.2903
Epoch 50/100
4/4 - 0s - 6ms/step - accuracy: 0.8800 - loss: 0.2895
Epoch 51/100
4/4 - 0s - 6ms/step - accuracy: 0.8850 - loss: 0.2869
Epoch 52/100
4/4 - 0s - 6ms/step - accuracy: 0.8800 - loss: 0.2871
Epoch 53/100
4/4 - 0s - 6ms/step - accuracy: 0.8800 - loss: 0.2868
Epoch 54/100
4/4 - 0s - 6ms/step - accuracy: 0.8850 - loss: 0.2832
Epoch 55/100
4/4 - 0s - 6ms/step - accuracy: 0.8850 - loss: 0.2830
Epoch 56/100
4/4 - 0s - 6ms/step - accuracy: 0.8800 - loss: 0.2810
Epoch 57/100
4/4 - 0s - 6ms/step - accuracy: 0.8850 - loss: 0.2808
Epoch 58/100
4/4 - 0s - 6ms/step - accuracy: 0.8850 - loss: 0.2781
Epoch 59/100
4/4 - 0s - 6ms/step - accuracy: 0.8850 - loss: 0.2766
Epoch 60/100
4/4 - 0s - 6ms/step - accuracy: 0.8900 - loss: 0.2759
Epoch 61/100
4/4 - 0s - 6ms/step - accuracy: 0.8800 - loss: 0.2738
Epoch 62/100
4/4 - 0s - 7ms/step - accuracy: 0.8850 - loss: 0.2731
Epoch 63/100
4/4 - 0s - 6ms/step - accuracy: 0.8800 - loss: 0.2722
Epoch 64/100
4/4 - 0s - 6ms/step - accuracy: 0.8900 - loss: 0.2713
Epoch 65/100
4/4 - 0s - 6ms/step - accuracy: 0.8850 - loss: 0.2699
Epoch 66/100
4/4 - 0s - 6ms/step - accuracy: 0.8900 - loss: 0.2693
Epoch 67/100
4/4 - 0s - 6ms/step - accuracy: 0.9000 - loss: 0.2650
Epoch 68/100
4/4 - 0s - 6ms/step - accuracy: 0.8950 - loss: 0.2649
Epoch 69/100
4/4 - 0s - 6ms/step - accuracy: 0.9050 - loss: 0.2625
Epoch 70/100
4/4 - 0s - 6ms/step - accuracy: 0.9000 - loss: 0.2631
Epoch 71/100
4/4 - 0s - 6ms/step - accuracy: 0.9000 - loss: 0.2598
Epoch 72/100
4/4 - 0s - 6ms/step - accuracy: 0.9050 - loss: 0.2607
Epoch 73/100
4/4 - 0s - 6ms/step - accuracy: 0.9000 - loss: 0.2588
Epoch 74/100
4/4 - 0s - 6ms/step - accuracy: 0.9050 - loss: 0.2565
Epoch 75/100
4/4 - 0s - 6ms/step - accuracy: 0.8950 - loss: 0.2587
Epoch 76/100
4/4 - 0s - 6ms/step - accuracy: 0.9050 - loss: 0.2538
Epoch 77/100
4/4 - 0s - 6ms/step - accuracy: 0.9000 - loss: 0.2527
Epoch 78/100
4/4 - 0s - 6ms/step - accuracy: 0.9050 - loss: 0.2519
Epoch 79/100
4/4 - 0s - 6ms/step - accuracy: 0.9050 - loss: 0.2523
Epoch 80/100
4/4 - 0s - 6ms/step - accuracy: 0.9050 - loss: 0.2486
Epoch 81/100
4/4 - 0s - 6ms/step - accuracy: 0.9050 - loss: 0.2476
Epoch 82/100
4/4 - 0s - 6ms/step - accuracy: 0.9000 - loss: 0.2488
Epoch 83/100
4/4 - 0s - 6ms/step - accuracy: 0.9050 - loss: 0.2454
Epoch 84/100
4/4 - 0s - 6ms/step - accuracy: 0.9000 - loss: 0.2467
Epoch 85/100
4/4 - 0s - 6ms/step - accuracy: 0.9050 - loss: 0.2435
Epoch 86/100
4/4 - 0s - 6ms/step - accuracy: 0.9050 - loss: 0.2437
Epoch 87/100
4/4 - 0s - 6ms/step - accuracy: 0.9050 - loss: 0.2411
Epoch 88/100
4/4 - 0s - 6ms/step - accuracy: 0.9000 - loss: 0.2397
Epoch 89/100
4/4 - 0s - 6ms/step - accuracy: 0.9050 - loss: 0.2380
Epoch 90/100
4/4 - 0s - 6ms/step - accuracy: 0.9050 - loss: 0.2375
Epoch 91/100
4/4 - 0s - 6ms/step - accuracy: 0.9050 - loss: 0.2370
Epoch 92/100
4/4 - 0s - 6ms/step - accuracy: 0.9050 - loss: 0.2365
Epoch 93/100
4/4 - 0s - 6ms/step - accuracy: 0.9050 - loss: 0.2350
Epoch 94/100
4/4 - 0s - 6ms/step - accuracy: 0.9000 - loss: 0.2324
Epoch 95/100
4/4 - 0s - 6ms/step - accuracy: 0.9050 - loss: 0.2316
Epoch 96/100
4/4 - 0s - 6ms/step - accuracy: 0.9050 - loss: 0.2333
Epoch 97/100
4/4 - 0s - 6ms/step - accuracy: 0.9050 - loss: 0.2293
Epoch 98/100
4/4 - 0s - 6ms/step - accuracy: 0.9100 - loss: 0.2290
Epoch 99/100
4/4 - 0s - 6ms/step - accuracy: 0.9100 - loss: 0.2262
Epoch 100/100
4/4 - 0s - 6ms/step - accuracy: 0.9050 - loss: 0.2257
score = model.evaluate(x, v, verbose=0)
print(f"score = {score[0]}")
print(f"accuracy = {score[1]}")
score = 0.2232695370912552
accuracy = 0.9100000262260437
Let’s look at a prediction. We need to feed in a single point as an array of shape (N, 2), where N is the number of points
res = model.predict(np.array([[-2, 2]]))
res
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 30ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 40ms/step
array([[8.7691205e-06]], dtype=float32)
We see that we get a floating point number. We will need to convert this to 0 or 1 by rounding.
Let’s plot the partitioning
M = 128
N = 128
xmin = -1.75
xmax = 2.5
ymin = -1.25
ymax = 1.75
xpt = np.linspace(xmin, xmax, M)
ypt = np.linspace(ymin, ymax, N)
To make the prediction go faster, we want to feed in a vector of these points, of the form:
[[xpt[0], ypt[0]],
[xpt[1], ypt[1]],
...
]
We can see that this packs them into the vector
pairs = np.array(np.meshgrid(xpt, ypt)).T.reshape(-1, 2)
pairs[0]
array([-1.75, -1.25])
Now we do the prediction. We will get a vector out, which we reshape to match the original domain.
res = model.predict(pairs, verbose=0)
res.shape = (M, N)
Finally, round to 0 or 1
domain = np.where(res > 0.5, 1, 0)
and we can plot the data
fig, ax = plt.subplots()
ax.imshow(domain.T, origin="lower",
extent=[xmin, xmax, ymin, ymax], alpha=0.25)
xpt = [q[0] for q in x]
ypt = [q[1] for q in x]
ax.scatter(xpt, ypt, s=40, c=v, cmap="viridis")
<matplotlib.collections.PathCollection at 0x7fc9d8be0ad0>