"""Functionality for statistical computations needed by other functions.
This module contains functions for statistical computations. These are
mostly auxiliary functions used by other image and shape analysis functions.
"""
import numpy as np
from ..numerics.function import ImageFunction, MeshFunction
from ..numerics.fem.domain2d_fem import ReferenceElement as DomainRefElement
from ..numerics.fem.curve_fem import ReferenceElement as SurfaceRefElement
[docs]def two_clusters(I):
"""Computes two clusters from given vector with k-means algorithm.
Given a one-dimensional array of real numbers, this function computes
and returns two clusters using the k-means algorithm. For the method
to work well, the data should have a clear bimodal distribution.
Parameters
----------
I : NumPy array
A one-dimensional Numpy array of real numbers, which is assumed
to have two clusters.
Returns
-------
clusters : tuple
A pair of tuples characterizing the two clusters computed.
The tuples have the following form:
(center, deviation, share, mask),
where the center is the center (or average) of the cluster
computed, deviation is the median of the distances of the
points in that cluster to the center, share is the ratio
of the points in that cluster to the total number of points,
mask is a boolean array of the same size as I and is True
for points belonging to the clusters, is False otherwise.
d : NumPy array
Array of distances for each point in I to its cluster center.
"""
tol = 1.0 / 255.0
c0, c1 = np.min(I), np.max(I)
old_c0 = c0 + 2.0*tol
old_c1 = c1 + 2.0*tol
k = 1
while abs(old_c0 - c0) + abs(old_c1 - c1) > tol:
d0 = np.abs( I - c0 )
d1 = np.abs( I - c1 )
mask0 = d0 < d1
mask1 = ~mask0
n0 = np.count_nonzero( mask0 )
n1 = len(I) - n0
old_c0, old_c1 = c0, c1
c0 = np.sum( I[mask0] ) / n0
c1 = np.sum( I[mask1] ) / n1
k = k+1
d = np.empty( len(I) )
d[mask0] = np.abs( I[mask0] - c0 )
d[mask1] = np.abs( I[mask1] - c1 )
deviation0 = np.median( d[mask0] )
deviation1 = np.median( d[mask1] )
share0 = 1.0 * n0 / len(I)
share1 = 1.0 * n1 / len(I)
clusters = [ ( c0, deviation0, share0, mask0 ),
( c1, deviation1, share1, mask1 ) ]
return (clusters, d)
[docs]def estimate_integration_errors(I, resolution=None):
"""Estimates the integration errors for elements of about pixel size.
The given image is sampled by domain elements (e.g. triangles),
and surface elements (e.g. curve edges), with each element being
roughly the pixel size. Quadrature error is evaluated on each
element and used to estimate the average error for a unit square
or a curve of length one on the image. The purpose is to see what
the average error would be if the unit square or the curve is meshed
fine enough to resolve the image pixels with small elements.
Parameters
----------
I : ImageFunction
An image function that returns the image values for a given
2xN or 3xN array of point coordinates. It can also have the
method, resolution(), which returns a tuple consisting of the
number of pixels along each axes. This is used if resolution
is not given as a separate argument.
resolution : tuple, optional
A pair or triple of integers defining the grid size to infer
the minimum size of mesh elements to be used in integration.
Returns
-------
domain_int_error : float
The average integration error on a unit area of the image
when the integration is performed on triangles, each roughly
the size of a pixel.
surface_int_error : float
The average integration error on a unit length curve on the
image when the curve consists of elements of roughly the size
of a pixel.
sample_values : NumPy array
A one-dimensional NumPy array containing all of the random image
values used for the computations.
"""
try:
if resolution is not None:
grid_size = resolution
else:
grid_size = I.resolution()
except AttributeError:
print("Need the resolution parameter to continue!")
return None
ref_element = DomainRefElement()
quad0 = ref_element.quadrature( order=2 )
quad1 = ref_element.quadrature( order = quad0.order()+1 )
domain = ref_element.random_elements( grid_size, I.diameter() )
func = MeshFunction( I, caching=False )
pts0, pts1 = quad0.points(), quad1.points()
f0_list = [ func( domain, pts0[:,k] ) for k in range(pts0.shape[1]) ]
f1_list = [ func( domain, pts1[:,k] ) for k in range(pts1.shape[1]) ]
error = sum(( weight*f0 for f0,weight in zip(f0_list, quad0.weights()) ))
error -= sum(( weight*f1 for f1,weight in zip(f1_list, quad1.weights()) ))
error[:] = np.abs( error[:] )
el_sizes = domain.element_sizes()
error *= el_sizes
avg_error = np.mean( error )
avg_el_size = np.mean( el_sizes )
normalization = 1.0 / avg_el_size
domain_int_error = normalization * avg_error
# Compute the integration error expected for curve elements
# at pixel level.
ref_element = SurfaceRefElement()
quad0 = ref_element.quadrature( order=2 )
quad1 = ref_element.quadrature( order = quad0.order()+1 )
surface = ref_element.random_elements( grid_size, I.diameter() )
func = MeshFunction( I, caching=False )
f3_list = [ func( surface, pt ) for pt in quad0.points() ]
f4_list = [ func( surface, pt ) for pt in quad1.points() ]
error = sum(( weight*f3 for f3,weight in zip(f3_list, quad0.weights()) ))
error -= sum(( weight*f4 for f4,weight in zip(f4_list, quad1.weights()) ))
error[:] = np.abs( error[:] )
el_sizes = surface.element_sizes()
error *= el_sizes
avg_error = np.mean( error )
avg_el_size = np.mean( el_sizes )
normalization = 1.0 / avg_el_size
surface_int_error = normalization * avg_error
# Put sample image values in a single vector and return.
sample_values = np.hstack( f0_list + f1_list + f3_list + f4_list )
return (domain_int_error, surface_int_error, sample_values)