import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import imageio
import math
from sklearn.cluster import KMeans
from PIL import Image
[docs]def process_image(image_path, percent=0.15, number_cluster=15):
"""
Completely process image from path to obtain dataframe of
information about rings and datapoints.
Authors: Ethan Fang
Version: v0_1 (Jul 10 2023)
:param image_path: The path to the input image file.
:type image_path: str
:param percent: The percentage of pixels to consider, defaults to 0.15.
:type percent: float, optional
:param number_cluster: The number of clusters to find, defaults to 15.
:type number_cluster: int, optional
:return: The DataFrame with the processed results.
:rtype: pandas.DataFrame
"""
df = find_ring_coordinates(image_path, percent)
df = calculate_distances(df)
df = find_clusters(df, number_cluster)
return df
[docs]def calculate_distance(x1, y1, x2, y2):
"""
Calculate the Euclidean distance between two points.
Authors: Ethan Fang
Version: v0_1 (Jul 10 2023)
:param x1: X-coordinate of the first point.
:type x1: float
:param y1: Y-coordinate of the first point.
:type y1: float
:param x2: X-coordinate of the second point.
:type x2: float
:param y2: Y-coordinate of the second point.
:type y2: float
:return: The Euclidean distance between the two points.
:rtype: float
"""
distance = math.sqrt((x2 - x1)**2 + (y2 - y1)**2)
return distance
[docs]def find_ring_coordinates(image_path, percent=0.15):
"""
Find the coordinates of pixels with the highest intensities in an image.
Authors: Ethan Fang
Version: v0_1 (Jul 10 2023)
:param image_path: The path to the input image file.
:type image_path: str
:param percent: The percentage of pixels to consider, defaults to 0.15.
:type percent: float, optional
:return: A DataFrame containing the coordinates and intensities of the selected pixels.
:rtype: pandas.DataFrame
"""
image = Image.open(image_path).convert("L")
intensities = np.array(image).flatten()
intensities_normalized = intensities / 16383.0
num_pixels = int(len(intensities_normalized) * percent)
top_indices = np.argpartition(intensities_normalized, -num_pixels)[-num_pixels:]
# Extract pixel coordinates and intensities
pixels = []
width, height = image.size
for index in top_indices:
y, x = divmod(index, width)
intensity = intensities_normalized[index]
pixels.append((int(x), int(y), intensity))
df = pd.DataFrame(pixels, columns=["x", "y", "intensity"])
# Obtaining coordinates
threshold = df["intensity"][0]
image = np.array(Image.open(image_path))
distances, distanceCounter, xCoordinates, yCoordinates = [], [], [], []
pixelNumber = 0
mask = np.zeros_like(image, dtype=bool)
for index, row in df.iterrows():
x = int(row['x'])
y = int(row['y'])
distances.append(calculate_distance(x, y, 1023, 1023))
distanceCounter.append(pixelNumber)
image[x, y] = 1
mask[x, y] = True
pixelNumber += 1
# Set the rest of the pixels not in the mask equal to 0
image[~mask] = 0
# Finish the dataframe
distances, distanceCounter, df["x"], df["y"] = zip(*sorted(zip(distances, distanceCounter, df["x"], df["y"])))
df['Pixel Number'] = distanceCounter
df['Distance'] = distances
df = df[df['Distance'] > 400]
df = df.reset_index(drop=True)
return df
[docs]def find_clusters(df, number_cluster=15):
"""
Find clusters in the given DataFrame using K-means clustering.
Authors: Ethan Fang
Version: v0_1 (Jul 10 2023)
:param df: The DataFrame containing the coordinates and intensities of the pixels.
:type df: pandas.DataFrame
:param number_cluster: The number of clusters to find, defaults to 15.
:type number_cluster: int, optional
:return: The DataFrame with an additional 'Cluster' column indicating the cluster label for each pixel.
:rtype: pandas.DataFrame
"""
distanced = df['Distance'].values.reshape(-1, 1)
kmeans = KMeans(number_cluster)
kmeans.fit(distanced)
cluster_labels = kmeans.labels_
df['Cluster'] = cluster_labels
# Sort the clusters by their size
cluster_sizes = []
for label in np.unique(cluster_labels):
cluster_size = np.sum(cluster_labels == label)
cluster_sizes.append((label, cluster_size))
cluster_sizes.sort(key=lambda x: x[1])
cmap = plt.cm.tab20
kmeansX, kmeansY = [], []
# Plot each cluster on the image
for i, (label, color) in enumerate(zip(np.unique(cluster_labels), cmap.colors)):
cluster_points = df[df['Cluster'] == label]
# Do something with the cluster points, e.g., kmeansX.append(cluster_points['x'].values)
return df