Source code for image_segmentation

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import imageio
import math
from sklearn.cluster import KMeans
from PIL import Image

[docs]def process_image(image_path, percent=0.15, number_cluster=15): """ Completely process image from path to obtain dataframe of information about rings and datapoints. Authors: Ethan Fang Version: v0_1 (Jul 10 2023) :param image_path: The path to the input image file. :type image_path: str :param percent: The percentage of pixels to consider, defaults to 0.15. :type percent: float, optional :param number_cluster: The number of clusters to find, defaults to 15. :type number_cluster: int, optional :return: The DataFrame with the processed results. :rtype: pandas.DataFrame """ df = find_ring_coordinates(image_path, percent) df = calculate_distances(df) df = find_clusters(df, number_cluster) return df
[docs]def calculate_distance(x1, y1, x2, y2): """ Calculate the Euclidean distance between two points. Authors: Ethan Fang Version: v0_1 (Jul 10 2023) :param x1: X-coordinate of the first point. :type x1: float :param y1: Y-coordinate of the first point. :type y1: float :param x2: X-coordinate of the second point. :type x2: float :param y2: Y-coordinate of the second point. :type y2: float :return: The Euclidean distance between the two points. :rtype: float """ distance = math.sqrt((x2 - x1)**2 + (y2 - y1)**2) return distance
[docs]def find_ring_coordinates(image_path, percent=0.15): """ Find the coordinates of pixels with the highest intensities in an image. Authors: Ethan Fang Version: v0_1 (Jul 10 2023) :param image_path: The path to the input image file. :type image_path: str :param percent: The percentage of pixels to consider, defaults to 0.15. :type percent: float, optional :return: A DataFrame containing the coordinates and intensities of the selected pixels. :rtype: pandas.DataFrame """ image = Image.open(image_path).convert("L") intensities = np.array(image).flatten() intensities_normalized = intensities / 16383.0 num_pixels = int(len(intensities_normalized) * percent) top_indices = np.argpartition(intensities_normalized, -num_pixels)[-num_pixels:] # Extract pixel coordinates and intensities pixels = [] width, height = image.size for index in top_indices: y, x = divmod(index, width) intensity = intensities_normalized[index] pixels.append((int(x), int(y), intensity)) df = pd.DataFrame(pixels, columns=["x", "y", "intensity"]) # Obtaining coordinates threshold = df["intensity"][0] image = np.array(Image.open(image_path)) distances, distanceCounter, xCoordinates, yCoordinates = [], [], [], [] pixelNumber = 0 mask = np.zeros_like(image, dtype=bool) for index, row in df.iterrows(): x = int(row['x']) y = int(row['y']) distances.append(calculate_distance(x, y, 1023, 1023)) distanceCounter.append(pixelNumber) image[x, y] = 1 mask[x, y] = True pixelNumber += 1 # Set the rest of the pixels not in the mask equal to 0 image[~mask] = 0 # Finish the dataframe distances, distanceCounter, df["x"], df["y"] = zip(*sorted(zip(distances, distanceCounter, df["x"], df["y"]))) df['Pixel Number'] = distanceCounter df['Distance'] = distances df = df[df['Distance'] > 400] df = df.reset_index(drop=True) return df
[docs]def find_clusters(df, number_cluster=15): """ Find clusters in the given DataFrame using K-means clustering. Authors: Ethan Fang Version: v0_1 (Jul 10 2023) :param df: The DataFrame containing the coordinates and intensities of the pixels. :type df: pandas.DataFrame :param number_cluster: The number of clusters to find, defaults to 15. :type number_cluster: int, optional :return: The DataFrame with an additional 'Cluster' column indicating the cluster label for each pixel. :rtype: pandas.DataFrame """ distanced = df['Distance'].values.reshape(-1, 1) kmeans = KMeans(number_cluster) kmeans.fit(distanced) cluster_labels = kmeans.labels_ df['Cluster'] = cluster_labels # Sort the clusters by their size cluster_sizes = [] for label in np.unique(cluster_labels): cluster_size = np.sum(cluster_labels == label) cluster_sizes.append((label, cluster_size)) cluster_sizes.sort(key=lambda x: x[1]) cmap = plt.cm.tab20 kmeansX, kmeansY = [], [] # Plot each cluster on the image for i, (label, color) in enumerate(zip(np.unique(cluster_labels), cmap.colors)): cluster_points = df[df['Cluster'] == label] # Do something with the cluster points, e.g., kmeansX.append(cluster_points['x'].values) return df