Just for fun: UMAPs#
In this notebook we demonstrate how repeatedly executed UMAPs, sorted to arrange changes between them, lead to a funny video of jumping UMAPs.
from skimage.data import human_mitosis
from napari_segment_blobs_and_things_with_membranes import voronoi_otsu_labeling
from napari_simpleitk_image_processing import label_statistics
from sklearn.preprocessing import StandardScaler
import numpy as np
import umap
import seaborn
import pyclesperanto_prototype as cle
import matplotlib.pyplot as plt
Example data#
First, we load the example image (taken from the scikit-image examples) showing human cell nuclei, segment them and measure properties of objects.
image = cle.asarray(human_mitosis())
labels = cle.voronoi_otsu_labeling(image, spot_sigma=2.5, outline_sigma=0)
nuclei_statistics = label_statistics(image, labels,
intensity=True,
size=True,
shape=True,
perimeter=True,
moments=True)
nuclei_statistics.head(15)
label | maximum | mean | median | minimum | sigma | sum | variance | elongation | feret_diameter | ... | number_of_pixels_on_border | perimeter | perimeter_on_border | perimeter_on_border_ratio | principal_axes0 | principal_axes1 | principal_axes2 | principal_axes3 | principal_moments0 | principal_moments1 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 83.0 | 69.886364 | 73.359375 | 48.0 | 9.890601 | 3075.0 | 97.823996 | 1.858735 | 9.486833 | ... | 10 | 25.664982 | 10.0 | 0.389636 | 0.998995 | -0.044825 | 0.044825 | 0.998995 | 1.926448 | 6.655680 |
1 | 2 | 46.0 | 42.125000 | 43.328125 | 38.0 | 2.748376 | 337.0 | 7.553571 | 0.000000 | 7.000000 | ... | 8 | 15.954349 | 8.0 | 0.501431 | 1.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 5.250000 |
2 | 3 | 53.0 | 44.916667 | 45.265625 | 38.0 | 4.461111 | 539.0 | 19.901515 | 3.609511 | 6.000000 | ... | 7 | 14.843629 | 7.0 | 0.471583 | 1.000000 | 0.000000 | 0.000000 | 1.000000 | 0.243056 | 3.166667 |
3 | 4 | 92.0 | 65.603175 | 65.609375 | 38.0 | 14.475273 | 4133.0 | 209.533538 | 1.212933 | 10.000000 | ... | 7 | 29.687257 | 7.0 | 0.235791 | 0.914511 | 0.404561 | -0.404561 | 0.914511 | 4.227271 | 6.219189 |
4 | 5 | 59.0 | 46.315068 | 46.234375 | 38.0 | 5.065890 | 3381.0 | 25.663242 | 1.249507 | 11.401754 | ... | 9 | 34.264893 | 9.0 | 0.262660 | 0.363116 | -0.931744 | 0.931744 | 0.363116 | 5.001247 | 7.808286 |
5 | 6 | 75.0 | 54.087838 | 53.984375 | 38.0 | 8.394952 | 8005.0 | 70.475225 | 1.280946 | 14.866069 | ... | 6 | 42.864805 | 6.0 | 0.139975 | 0.183365 | -0.983045 | 0.983045 | 0.183365 | 9.187499 | 15.075056 |
6 | 7 | 96.0 | 61.033333 | 62.703125 | 38.0 | 15.166831 | 3662.0 | 230.032768 | 3.402583 | 15.132746 | ... | 0 | 36.716372 | 0.0 | 0.000000 | -0.014158 | -0.999900 | 0.999900 | -0.014158 | 1.446932 | 16.751957 |
7 | 8 | 100.0 | 70.000000 | 74.328125 | 39.0 | 16.138291 | 4480.0 | 260.444444 | 1.022473 | 9.219544 | ... | 0 | 29.917295 | 0.0 | 0.000000 | 0.917896 | -0.396820 | 0.396820 | 0.917896 | 5.073067 | 5.303642 |
8 | 9 | 65.0 | 53.117647 | 53.015625 | 38.0 | 6.358751 | 3612.0 | 40.433714 | 1.299197 | 9.848858 | ... | 0 | 29.687257 | 0.0 | 0.000000 | 0.108143 | -0.994135 | 0.994135 | 0.108143 | 4.175749 | 7.048300 |
9 | 10 | 79.0 | 59.594937 | 60.765625 | 38.0 | 9.191020 | 4708.0 | 84.474846 | 1.412593 | 11.661904 | ... | 0 | 36.946410 | 0.0 | 0.000000 | 0.957268 | 0.289203 | -0.289203 | 0.957268 | 4.709643 | 9.397712 |
10 | 11 | 80.0 | 59.333333 | 58.828125 | 38.0 | 10.281620 | 4450.0 | 105.711712 | 1.136874 | 10.000000 | ... | 0 | 30.472655 | 0.0 | 0.000000 | 0.494998 | -0.868894 | 0.868894 | 0.494998 | 5.244425 | 6.778330 |
11 | 12 | 96.0 | 69.666667 | 71.421875 | 39.0 | 14.848699 | 4389.0 | 220.483871 | 1.177721 | 9.486833 | ... | 0 | 28.021176 | 0.0 | 0.000000 | -0.266249 | -0.963904 | 0.963904 | -0.266249 | 4.246311 | 5.889743 |
12 | 13 | 102.0 | 78.533333 | 82.078125 | 42.0 | 16.168994 | 3534.0 | 261.436364 | 2.122467 | 11.000000 | ... | 12 | 27.791138 | 12.0 | 0.431792 | -0.020958 | -0.999780 | 0.999780 | -0.020958 | 1.781948 | 8.027435 |
13 | 14 | 72.0 | 53.914286 | 53.984375 | 38.0 | 6.819867 | 7548.0 | 46.510586 | 1.226967 | 14.317821 | ... | 0 | 42.864805 | 0.0 | 0.000000 | 0.972168 | -0.234285 | 0.234285 | 0.972168 | 9.112690 | 13.718688 |
14 | 15 | 99.0 | 72.049180 | 73.359375 | 38.0 | 17.157531 | 4395.0 | 294.380874 | 1.298605 | 9.433981 | ... | 0 | 28.021176 | 0.0 | 0.000000 | 0.562829 | -0.826574 | 0.826574 | 0.562829 | 3.746099 | 6.317325 |
15 rows × 27 columns
Feature selection and scaling#
Before applying dimensionality reduction, we select columns and scale the data.
selected_table = nuclei_statistics[
[
"mean",
"variance",
"elongation",
]
]
selected_statistics = selected_table.values
scaled_statistics = StandardScaler().fit_transform(selected_statistics)
type(scaled_statistics), scaled_statistics.shape
(numpy.ndarray, (320, 3))
Computing many UMAPs#
The algorithm behind the UMAP is partially a non-deterministic. Thus, if you run the same code multiple times and sort the results, you can get happily jumping UMAPs. Technically, the only difference between the UMAPS shown below are local and global rotations.
from scipy.spatial.distance import pdist, squareform
from scipy.cluster.hierarchy import linkage, leaves_list
import os
import tempfile
from PIL import Image
# Generate 100 different UMAP embeddings
print("Generating 100 UMAP embeddings...")
umap_embeddings = []
for i in range(100):
umap_reducer = umap.UMAP()
umap_embedding = umap_reducer.fit_transform(scaled_statistics)
umap_embeddings.append(umap_embedding)
# Calculate similarity between embeddings
print("Calculating similarities between embeddings...")
# Flatten each embedding and calculate pairwise distances
flattened_embeddings = [emb.flatten() for emb in umap_embeddings]
distance_matrix = pdist(flattened_embeddings, metric='euclidean')
distance_matrix_square = squareform(distance_matrix)
# Sort embeddings by similarity using hierarchical clustering
print("Sorting embeddings by similarity...")
linkage_matrix = linkage(distance_matrix, method='ward')
sorted_indices = leaves_list(linkage_matrix)
sorted_embeddings = [umap_embeddings[i] for i in sorted_indices]
# Create temporary directory for saving plots
temp_dir = tempfile.mkdtemp()
print(f"Creating plots and saving to {temp_dir}...")
# Create and save plots
plot_files = []
for i, umap_embedding in enumerate(sorted_embeddings):
fig, ax = plt.subplots(figsize=(6.4, 4.8)) # 640x480 pixels at 100 DPI
scatter = ax.scatter(umap_embedding[:, 0], umap_embedding[:, 1], s=10, c=range(len(umap_embedding[:, 0])), cmap='viridis')
ax.set_title(f'UMAP {i+1}')
ax.set_xlabel('UMAP 1')
ax.set_ylabel('UMAP 2')
# Save plot
filename = os.path.join(temp_dir, f'umap_{i:03d}.png')
plt.savefig(filename, dpi=100, bbox_inches=None)
plot_files.append(filename)
plt.close(fig)
# Load images back into numpy array
print("Loading images into numpy array...")
plot_images = []
for filename in plot_files:
img = Image.open(filename)
img_array = np.array(img)
plot_images.append(img_array)
# Convert to 2D numpy array (flatten spatial dimensions)
images_array = np.array(plot_images)
print(f"Final array shape: {images_array.shape}")
# Clean up temporary files
for filename in plot_files:
os.remove(filename)
os.rmdir(temp_dir)
print("Done! The sorted UMAP plots are stored in 'images_array'")
print(f"Array contains {len(images_array)} images of shape {images_array[0].shape}")
Generating 100 UMAP embeddings...
Calculating similarities between embeddings...
Sorting embeddings by similarity...
Creating plots and saving to C:\Users\rober\AppData\Local\Temp\tmp332cayzj...
Loading images into numpy array...
Final array shape: (100, 480, 640, 4)
Done! The sorted UMAP plots are stored in 'images_array'
Array contains 100 images of shape (480, 640, 4)
import stackview
stackview.animate(images_array, filename="happy_umap.gif")
C:\Users\rober\miniforge3\envs\bio11\Lib\site-packages\stackview\_animate.py:63: UserWarning: The image is quite large (> 10 MByte) and might not be properly shown in the notebook when rendered over the internet. Consider subsampling or cropping the image for visualization purposes.
warnings.warn("The image is quite large (> 10 MByte) and might not be properly shown in the notebook when rendered over the internet. Consider subsampling or cropping the image for visualization purposes.")