Clustering#
Clustering seeks to group data into clusters based on their properties and then allow us to predict which cluster a new member belongs.
import numpy as np
import matplotlib.pyplot as plt
We’ll use a dataset generator that is part of scikit-learn called make_moons
. This generates data that falls into 2 different sets with a shape that looks like half-moons.
from sklearn import datasets
def generate_data():
xvec, val = datasets.make_moons(200, noise=0.2)
# encode the output to be 2 elements
x = []
v = []
for xv, vv in zip(xvec, val):
x.append(np.array(xv))
v.append(vv)
return np.array(x), np.array(v)
x, v = generate_data()
Let’s look at a point and it’s value
print(f"x = {x[0]}, value = {v[0]}")
x = [ 0.97526408 -0.68288325], value = 1
Now let’s plot the data
def plot_data(x, v):
xpt = [q[0] for q in x]
ypt = [q[1] for q in x]
fig, ax = plt.subplots()
ax.scatter(xpt, ypt, s=40, c=v, cmap="viridis")
ax.set_aspect("equal")
return fig
fig = plot_data(x, v)

We want to partition this domain into 2 regions, such that when we come in with a new point, we know which group it belongs to.
First we setup and train our network
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Input
from keras.optimizers import RMSprop
2025-08-24 00:58:37.969766: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-08-24 00:58:38.016402: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-08-24 00:58:39.788219: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
model = Sequential()
model.add(Input(shape=(2,)))
model.add(Dense(50, activation="relu"))
model.add(Dense(20, activation="relu"))
model.add(Dense(1, activation="sigmoid"))
2025-08-24 00:58:40.113096: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)
rms = RMSprop()
model.compile(loss='binary_crossentropy',
optimizer=rms, metrics=['accuracy'])
model.summary()
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ dense (Dense) │ (None, 50) │ 150 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_1 (Dense) │ (None, 20) │ 1,020 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_2 (Dense) │ (None, 1) │ 21 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 1,191 (4.65 KB)
Trainable params: 1,191 (4.65 KB)
Non-trainable params: 0 (0.00 B)
We seem to need a lot of epochs here to get a good result
epochs = 100
results = model.fit(x, v, batch_size=50, epochs=epochs, verbose=2)
Epoch 1/100
4/4 - 0s - 116ms/step - accuracy: 0.5050 - loss: 0.6925
Epoch 2/100
4/4 - 0s - 6ms/step - accuracy: 0.5700 - loss: 0.6640
Epoch 3/100
4/4 - 0s - 6ms/step - accuracy: 0.7300 - loss: 0.6404
Epoch 4/100
4/4 - 0s - 6ms/step - accuracy: 0.7750 - loss: 0.6188
Epoch 5/100
4/4 - 0s - 7ms/step - accuracy: 0.8050 - loss: 0.5997
Epoch 6/100
4/4 - 0s - 7ms/step - accuracy: 0.7950 - loss: 0.5817
Epoch 7/100
4/4 - 0s - 6ms/step - accuracy: 0.8100 - loss: 0.5642
Epoch 8/100
4/4 - 0s - 7ms/step - accuracy: 0.8200 - loss: 0.5480
Epoch 9/100
4/4 - 0s - 6ms/step - accuracy: 0.8150 - loss: 0.5330
Epoch 10/100
4/4 - 0s - 6ms/step - accuracy: 0.8150 - loss: 0.5181
Epoch 11/100
4/4 - 0s - 6ms/step - accuracy: 0.8150 - loss: 0.5040
Epoch 12/100
4/4 - 0s - 6ms/step - accuracy: 0.8200 - loss: 0.4912
Epoch 13/100
4/4 - 0s - 7ms/step - accuracy: 0.8200 - loss: 0.4788
Epoch 14/100
4/4 - 0s - 7ms/step - accuracy: 0.8200 - loss: 0.4674
Epoch 15/100
4/4 - 0s - 6ms/step - accuracy: 0.8200 - loss: 0.4558
Epoch 16/100
4/4 - 0s - 6ms/step - accuracy: 0.8200 - loss: 0.4457
Epoch 17/100
4/4 - 0s - 6ms/step - accuracy: 0.8250 - loss: 0.4351
Epoch 18/100
4/4 - 0s - 6ms/step - accuracy: 0.8250 - loss: 0.4256
Epoch 19/100
4/4 - 0s - 6ms/step - accuracy: 0.8250 - loss: 0.4168
Epoch 20/100
4/4 - 0s - 6ms/step - accuracy: 0.8250 - loss: 0.4079
Epoch 21/100
4/4 - 0s - 6ms/step - accuracy: 0.8250 - loss: 0.4003
Epoch 22/100
4/4 - 0s - 6ms/step - accuracy: 0.8250 - loss: 0.3924
Epoch 23/100
4/4 - 0s - 6ms/step - accuracy: 0.8250 - loss: 0.3857
Epoch 24/100
4/4 - 0s - 6ms/step - accuracy: 0.8300 - loss: 0.3790
Epoch 25/100
4/4 - 0s - 7ms/step - accuracy: 0.8350 - loss: 0.3730
Epoch 26/100
4/4 - 0s - 7ms/step - accuracy: 0.8300 - loss: 0.3681
Epoch 27/100
4/4 - 0s - 6ms/step - accuracy: 0.8350 - loss: 0.3635
Epoch 28/100
4/4 - 0s - 6ms/step - accuracy: 0.8350 - loss: 0.3594
Epoch 29/100
4/4 - 0s - 6ms/step - accuracy: 0.8350 - loss: 0.3545
Epoch 30/100
4/4 - 0s - 6ms/step - accuracy: 0.8350 - loss: 0.3506
Epoch 31/100
4/4 - 0s - 6ms/step - accuracy: 0.8300 - loss: 0.3477
Epoch 32/100
4/4 - 0s - 6ms/step - accuracy: 0.8400 - loss: 0.3436
Epoch 33/100
4/4 - 0s - 6ms/step - accuracy: 0.8350 - loss: 0.3404
Epoch 34/100
4/4 - 0s - 6ms/step - accuracy: 0.8450 - loss: 0.3379
Epoch 35/100
4/4 - 0s - 6ms/step - accuracy: 0.8450 - loss: 0.3362
Epoch 36/100
4/4 - 0s - 6ms/step - accuracy: 0.8450 - loss: 0.3319
Epoch 37/100
4/4 - 0s - 6ms/step - accuracy: 0.8450 - loss: 0.3293
Epoch 38/100
4/4 - 0s - 6ms/step - accuracy: 0.8500 - loss: 0.3271
Epoch 39/100
4/4 - 0s - 6ms/step - accuracy: 0.8500 - loss: 0.3246
Epoch 40/100
4/4 - 0s - 6ms/step - accuracy: 0.8500 - loss: 0.3229
Epoch 41/100
4/4 - 0s - 6ms/step - accuracy: 0.8450 - loss: 0.3200
Epoch 42/100
4/4 - 0s - 6ms/step - accuracy: 0.8450 - loss: 0.3182
Epoch 43/100
4/4 - 0s - 6ms/step - accuracy: 0.8450 - loss: 0.3154
Epoch 44/100
4/4 - 0s - 6ms/step - accuracy: 0.8500 - loss: 0.3144
Epoch 45/100
4/4 - 0s - 6ms/step - accuracy: 0.8500 - loss: 0.3113
Epoch 46/100
4/4 - 0s - 6ms/step - accuracy: 0.8500 - loss: 0.3108
Epoch 47/100
4/4 - 0s - 6ms/step - accuracy: 0.8500 - loss: 0.3073
Epoch 48/100
4/4 - 0s - 7ms/step - accuracy: 0.8500 - loss: 0.3067
Epoch 49/100
4/4 - 0s - 7ms/step - accuracy: 0.8500 - loss: 0.3045
Epoch 50/100
4/4 - 0s - 7ms/step - accuracy: 0.8550 - loss: 0.3027
Epoch 51/100
4/4 - 0s - 7ms/step - accuracy: 0.8500 - loss: 0.3003
Epoch 52/100
4/4 - 0s - 7ms/step - accuracy: 0.8500 - loss: 0.2985
Epoch 53/100
4/4 - 0s - 6ms/step - accuracy: 0.8550 - loss: 0.2961
Epoch 54/100
4/4 - 0s - 6ms/step - accuracy: 0.8550 - loss: 0.2950
Epoch 55/100
4/4 - 0s - 6ms/step - accuracy: 0.8550 - loss: 0.2969
Epoch 56/100
4/4 - 0s - 6ms/step - accuracy: 0.8550 - loss: 0.2914
Epoch 57/100
4/4 - 0s - 6ms/step - accuracy: 0.8550 - loss: 0.2898
Epoch 58/100
4/4 - 0s - 7ms/step - accuracy: 0.8550 - loss: 0.2883
Epoch 59/100
4/4 - 0s - 7ms/step - accuracy: 0.8550 - loss: 0.2867
Epoch 60/100
4/4 - 0s - 7ms/step - accuracy: 0.8550 - loss: 0.2849
Epoch 61/100
4/4 - 0s - 6ms/step - accuracy: 0.8650 - loss: 0.2841
Epoch 62/100
4/4 - 0s - 7ms/step - accuracy: 0.8650 - loss: 0.2816
Epoch 63/100
4/4 - 0s - 7ms/step - accuracy: 0.8600 - loss: 0.2799
Epoch 64/100
4/4 - 0s - 7ms/step - accuracy: 0.8650 - loss: 0.2783
Epoch 65/100
4/4 - 0s - 7ms/step - accuracy: 0.8650 - loss: 0.2762
Epoch 66/100
4/4 - 0s - 7ms/step - accuracy: 0.8600 - loss: 0.2766
Epoch 67/100
4/4 - 0s - 6ms/step - accuracy: 0.8650 - loss: 0.2749
Epoch 68/100
4/4 - 0s - 6ms/step - accuracy: 0.8650 - loss: 0.2714
Epoch 69/100
4/4 - 0s - 6ms/step - accuracy: 0.8650 - loss: 0.2710
Epoch 70/100
4/4 - 0s - 7ms/step - accuracy: 0.8700 - loss: 0.2679
Epoch 71/100
4/4 - 0s - 6ms/step - accuracy: 0.8750 - loss: 0.2670
Epoch 72/100
4/4 - 0s - 6ms/step - accuracy: 0.8700 - loss: 0.2664
Epoch 73/100
4/4 - 0s - 6ms/step - accuracy: 0.8850 - loss: 0.2646
Epoch 74/100
4/4 - 0s - 6ms/step - accuracy: 0.8750 - loss: 0.2614
Epoch 75/100
4/4 - 0s - 6ms/step - accuracy: 0.8750 - loss: 0.2605
Epoch 76/100
4/4 - 0s - 6ms/step - accuracy: 0.8700 - loss: 0.2592
Epoch 77/100
4/4 - 0s - 7ms/step - accuracy: 0.8800 - loss: 0.2579
Epoch 78/100
4/4 - 0s - 7ms/step - accuracy: 0.8850 - loss: 0.2549
Epoch 79/100
4/4 - 0s - 7ms/step - accuracy: 0.8750 - loss: 0.2541
Epoch 80/100
4/4 - 0s - 6ms/step - accuracy: 0.8850 - loss: 0.2517
Epoch 81/100
4/4 - 0s - 6ms/step - accuracy: 0.8850 - loss: 0.2512
Epoch 82/100
4/4 - 0s - 6ms/step - accuracy: 0.8850 - loss: 0.2498
Epoch 83/100
4/4 - 0s - 6ms/step - accuracy: 0.8850 - loss: 0.2475
Epoch 84/100
4/4 - 0s - 7ms/step - accuracy: 0.8850 - loss: 0.2458
Epoch 85/100
4/4 - 0s - 7ms/step - accuracy: 0.8850 - loss: 0.2445
Epoch 86/100
4/4 - 0s - 6ms/step - accuracy: 0.8900 - loss: 0.2438
Epoch 87/100
4/4 - 0s - 6ms/step - accuracy: 0.8950 - loss: 0.2419
Epoch 88/100
4/4 - 0s - 6ms/step - accuracy: 0.8900 - loss: 0.2407
Epoch 89/100
4/4 - 0s - 6ms/step - accuracy: 0.8900 - loss: 0.2385
Epoch 90/100
4/4 - 0s - 6ms/step - accuracy: 0.9050 - loss: 0.2394
Epoch 91/100
4/4 - 0s - 7ms/step - accuracy: 0.9000 - loss: 0.2350
Epoch 92/100
4/4 - 0s - 6ms/step - accuracy: 0.8950 - loss: 0.2337
Epoch 93/100
4/4 - 0s - 7ms/step - accuracy: 0.9000 - loss: 0.2325
Epoch 94/100
4/4 - 0s - 7ms/step - accuracy: 0.8900 - loss: 0.2314
Epoch 95/100
4/4 - 0s - 6ms/step - accuracy: 0.9050 - loss: 0.2294
Epoch 96/100
4/4 - 0s - 7ms/step - accuracy: 0.9000 - loss: 0.2281
Epoch 97/100
4/4 - 0s - 7ms/step - accuracy: 0.9000 - loss: 0.2269
Epoch 98/100
4/4 - 0s - 6ms/step - accuracy: 0.9050 - loss: 0.2263
Epoch 99/100
4/4 - 0s - 7ms/step - accuracy: 0.9000 - loss: 0.2235
Epoch 100/100
4/4 - 0s - 7ms/step - accuracy: 0.9050 - loss: 0.2221
score = model.evaluate(x, v, verbose=0)
print(f"score = {score[0]}")
print(f"accuracy = {score[1]}")
score = 0.21983426809310913
accuracy = 0.8999999761581421
Let’s look at a prediction. We need to feed in a single point as an array of shape (N, 2)
, where N
is the number of points
res = model.predict(np.array([[-2, 2]]))
res
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 32ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 42ms/step
array([[1.4882655e-08]], dtype=float32)
We see that we get a floating point number. We will need to convert this to 0 or 1 by rounding.
Let’s plot the partitioning
M = 128
N = 128
xmin = -1.75
xmax = 2.5
ymin = -1.25
ymax = 1.75
xpt = np.linspace(xmin, xmax, M)
ypt = np.linspace(ymin, ymax, N)
To make the prediction go faster, we want to feed in a vector of these points, of the form:
[[xpt[0], ypt[0]],
[xpt[1], ypt[1]],
...
]
We can see that this packs them into the vector
pairs = np.array(np.meshgrid(xpt, ypt)).T.reshape(-1, 2)
pairs[0]
array([-1.75, -1.25])
Now we do the prediction. We will get a vector out, which we reshape to match the original domain.
res = model.predict(pairs, verbose=0)
res.shape = (M, N)
Finally, round to 0 or 1
domain = np.where(res > 0.5, 1, 0)
and we can plot the data
fig, ax = plt.subplots()
ax.imshow(domain.T, origin="lower",
extent=[xmin, xmax, ymin, ymax], alpha=0.25)
xpt = [q[0] for q in x]
ypt = [q[1] for q in x]
ax.scatter(xpt, ypt, s=40, c=v, cmap="viridis")
<matplotlib.collections.PathCollection at 0x7fee3644d4d0>
