Source code for skshape.image.statistics

"""Functionality for statistical computations needed by other functions.

This module contains functions for statistical computations. These are
mostly auxiliary functions used by other image and shape analysis functions.

"""

import numpy as np
from ..numerics.function import ImageFunction, MeshFunction
from ..numerics.fem.domain2d_fem import ReferenceElement as DomainRefElement
from ..numerics.fem.curve_fem import ReferenceElement as SurfaceRefElement


[docs]def two_clusters(I): """Computes two clusters from given vector with k-means algorithm. Given a one-dimensional array of real numbers, this function computes and returns two clusters using the k-means algorithm. For the method to work well, the data should have a clear bimodal distribution. Parameters ---------- I : NumPy array A one-dimensional Numpy array of real numbers, which is assumed to have two clusters. Returns ------- clusters : tuple A pair of tuples characterizing the two clusters computed. The tuples have the following form: (center, deviation, share, mask), where the center is the center (or average) of the cluster computed, deviation is the median of the distances of the points in that cluster to the center, share is the ratio of the points in that cluster to the total number of points, mask is a boolean array of the same size as I and is True for points belonging to the clusters, is False otherwise. d : NumPy array Array of distances for each point in I to its cluster center. """ tol = 1.0 / 255.0 c0, c1 = np.min(I), np.max(I) old_c0 = c0 + 2.0*tol old_c1 = c1 + 2.0*tol k = 1 while abs(old_c0 - c0) + abs(old_c1 - c1) > tol: d0 = np.abs( I - c0 ) d1 = np.abs( I - c1 ) mask0 = d0 < d1 mask1 = ~mask0 n0 = np.count_nonzero( mask0 ) n1 = len(I) - n0 old_c0, old_c1 = c0, c1 c0 = np.sum( I[mask0] ) / n0 c1 = np.sum( I[mask1] ) / n1 k = k+1 d = np.empty( len(I) ) d[mask0] = np.abs( I[mask0] - c0 ) d[mask1] = np.abs( I[mask1] - c1 ) deviation0 = np.median( d[mask0] ) deviation1 = np.median( d[mask1] ) share0 = 1.0 * n0 / len(I) share1 = 1.0 * n1 / len(I) clusters = [ ( c0, deviation0, share0, mask0 ), ( c1, deviation1, share1, mask1 ) ] return (clusters, d)
[docs]def estimate_integration_errors(I, resolution=None): """Estimates the integration errors for elements of about pixel size. The given image is sampled by domain elements (e.g. triangles), and surface elements (e.g. curve edges), with each element being roughly the pixel size. Quadrature error is evaluated on each element and used to estimate the average error for a unit square or a curve of length one on the image. The purpose is to see what the average error would be if the unit square or the curve is meshed fine enough to resolve the image pixels with small elements. Parameters ---------- I : ImageFunction An image function that returns the image values for a given 2xN or 3xN array of point coordinates. It can also have the method, resolution(), which returns a tuple consisting of the number of pixels along each axes. This is used if resolution is not given as a separate argument. resolution : tuple, optional A pair or triple of integers defining the grid size to infer the minimum size of mesh elements to be used in integration. Returns ------- domain_int_error : float The average integration error on a unit area of the image when the integration is performed on triangles, each roughly the size of a pixel. surface_int_error : float The average integration error on a unit length curve on the image when the curve consists of elements of roughly the size of a pixel. sample_values : NumPy array A one-dimensional NumPy array containing all of the random image values used for the computations. """ try: if resolution is not None: grid_size = resolution else: grid_size = I.resolution() except AttributeError: print("Need the resolution parameter to continue!") return None ref_element = DomainRefElement() quad0 = ref_element.quadrature( order=2 ) quad1 = ref_element.quadrature( order = quad0.order()+1 ) domain = ref_element.random_elements( grid_size, I.diameter() ) func = MeshFunction( I, caching=False ) pts0, pts1 = quad0.points(), quad1.points() f0_list = [ func( domain, pts0[:,k] ) for k in range(pts0.shape[1]) ] f1_list = [ func( domain, pts1[:,k] ) for k in range(pts1.shape[1]) ] error = sum(( weight*f0 for f0,weight in zip(f0_list, quad0.weights()) )) error -= sum(( weight*f1 for f1,weight in zip(f1_list, quad1.weights()) )) error[:] = np.abs( error[:] ) el_sizes = domain.element_sizes() error *= el_sizes avg_error = np.mean( error ) avg_el_size = np.mean( el_sizes ) normalization = 1.0 / avg_el_size domain_int_error = normalization * avg_error # Compute the integration error expected for curve elements # at pixel level. ref_element = SurfaceRefElement() quad0 = ref_element.quadrature( order=2 ) quad1 = ref_element.quadrature( order = quad0.order()+1 ) surface = ref_element.random_elements( grid_size, I.diameter() ) func = MeshFunction( I, caching=False ) f3_list = [ func( surface, pt ) for pt in quad0.points() ] f4_list = [ func( surface, pt ) for pt in quad1.points() ] error = sum(( weight*f3 for f3,weight in zip(f3_list, quad0.weights()) )) error -= sum(( weight*f4 for f4,weight in zip(f4_list, quad1.weights()) )) error[:] = np.abs( error[:] ) el_sizes = surface.element_sizes() error *= el_sizes avg_error = np.mean( error ) avg_el_size = np.mean( el_sizes ) normalization = 1.0 / avg_el_size surface_int_error = normalization * avg_error # Put sample image values in a single vector and return. sample_values = np.hstack( f0_list + f1_list + f3_list + f4_list ) return (domain_int_error, surface_int_error, sample_values)