Source code for pysteg.features.bsm

##  $Id$
## -*- coding: utf-8 -*-

"""
Calculate the BSM features of Avcibas et al (2005).

The paper present the features in 3 groups (i)-(iii), where the
features within each group are calculated in very similar ways,
based on the same basic statistics.  These groups are recognised
as subvectors in the :class:`bsmVector` object.

:Module:    pysteg.features.bsm
:Date:      $Date$
:Revision:  $Revision$
:Author:    © 2012: Hans Georg Schaathun <georg@schaathun.net>
"""

import numpy as np
from .cm import splitMatrix

__all__ = [ "bsmraw", "bsm12", "bsm3", ]

# Group (i)-(ii) Preliminaries
# ----------------------------

# The first two groups, totalling 16 features, are based on four
# statistics a, b, c, d from each of the two bit planes, defined in
# Eq (2) in the paper.  These four statistics are calculated by
# :func:`chiavg` below.  First we define the auxiliary function
# :func:`chi` defining the \chi_{r,s} values from Eq (1).
#
# .. autofunc:: pysteg.analysis.sfeatures.chi
#
# ::

def chi(X,dir):
  """
  This function calculates the texture type \chi_{r,s} for each pixel r,
  according to (1) in Avcibas' paper.  The input dir is the direction s.
  The result is a numpy array, giving the type for each pixel of X with
  respect to the given direction.

  Although Avcibas uses the range 1..4 for the type, we use 0..3 because
  this makes implementation easier.
  """
  (A,B) = splitMatrix(X,dir)
  return 2*A + B

# .. autofunc:: chiavg
#
# ::

def chiavg(X):
  """
  This function returns a numpy array [a,b,c,d], where a,b,c,d
  are given by (2) in Avcibas' paper.
  """
  h = np.zeros(4)
  dlist = [ (0,1), (1,0), (-1,0), (0,-1) ]
  for d in dlist:
    h += np.histogram( chi(X,d), range(5) )[0]
  return h.astype(float) / X.size

# Group (i)
# ---------
#
# Group (i) is based on a 10-D features vector from each of the bit
# planes.  The ten features used for the image is the vector difference
# of the features from the two bitplanes.
#
# .. autofunction:: g1
#
#    calculates the 10-D feature vector from a binary image.
#    The input is the output of the :func:`chiavg` function.
#
# ::
    
def g1(h):
  a,b,c,d = h
  ad = a + d
  bc = b + c
  S = ad + bc
  m1 = 2*ad / ( 2*ad + b + c )
  m2 = a / ( a + 2*bc )
  m3 = a / bc
  m4 = ad / bc
  m5 = ( a/(a+b) + a/(a+c) + d/(b+d) + d/(c+d) ) / 4
  m6 = a*d / np.sqrt( (a+d)*(a+c)*(b+d)*(c+d) )
  m7 = np.sqrt( (a/(a+b)) * (a/(a+c)) )
  m8 = bc / (2*a + b + c)
  m9 = b*c / S ** 2
  m10 = bc / 4*S
  return np.array( [ m1,m2,m3,m4,m5,m6,m7,m8,m9,m10 ] )

# Group (ii)
# ----------
#
# The calculation of group (ii) can be made significantly simpler
# than the presentation in the paper.  Firstly, we observe that
# the probability estimates p_i^j are equal to the a/4, b/4, c/4, and
# d/4, and can thus be calculated easily directly from the output of 
# :func:`chiavg` from Group (i).
#
# Group (ii) consists of four features calculated from a pair of
# lists [a,b,c,d] calculated by :func:`chiavg` for each of the two
# binary image.  Each of the feature uses one one of the following
# basic similarity function, which we define as global variables 
# because they are used also for Group (iii).
#
# ::

f11 = lambda (p7,p8) : min(p7,p8)
f12 = lambda (p7,p8) : abs(p7-p8)
f13 = lambda (p7,p8) : - p7 * np.log(p8)
f14 = lambda (p7,p8) : - p7 * np.log(p7/p8)
basefunc = [ f11, f12, f13, f14 ]

def g2(h1,h2):

# The input include two lists h1 and h2 representing the two bit planes
# to be compared.  We divide each of them by four, and zip them together
# to a single list of pairs (p7^j,p8^j).
#
#   ::

  L = zip(list( h1 / 4 ), list( h2 / 4 ) )

# Each of the features applies a simple function to a pair of the list
# L, and add the result over all the pairs in the list.  These four basic
# are defined in the global basefunc list.  The actual features can be
# calculated by applying each function f on each pair from L (using
# :func:`map`) and then add over the elements of L.
#
#   ::

  return [ sum( map(f,L) ) for f in basefunc ]

# Group (iii)
# -----------
#
# The structure of Group (iii) is very similar to Group (ii),
# but it is not based on the same base statistics a,b,c,d.
# Instead each possible value for a 3x3 binary submatrix is
# enumerated (range 0..511), and for each pixel, the corresponding
# type of its 3x3 neighbourhood is recorded.  The histogram of
# these neighbourhood types is used as the underlying statistics.
# This histogram, for a binary image X, is calculated using the
# following :func:`shist` function.
#
# ::

def shist(X):
  (M,N) = X.shape

# To take care of the boundary, we need to pad the array with a
# zero frame.
#
#   ::

  Y = np.zeros((M+2,N+2))
  Y[1:-1,1:-1] = X

# We generate the new array by multiplying submatrices of Y with 
# the mask filter, and then take the histogram of this array.
#
#   ::

  mask = np.array( [[ 1,2,4] , [2**7, 2**8, 2**3], [2**6, 2**5, 2**4]] )
  Z = np.array( [ [ np.sum( mask * Y[x:(x+3),y:(y+3)] ) 
                  for y in xrange(N) ] for x in xrange(M) ] )
  return np.histogram( Z, range(513) )[0].astype(float)

# The main function :func:`g3` is implemented very similarly to 
# :func:`g2`, except that the input is two binary images, and
# the histogram to be used is calculated internally.
#
# ::

def g3(X7,X8):
  L = zip( shist(X7), shist(X8) )
  return [ sum( map(f,L) ) for f in basefunc ]


# The :class:`bsmVector` class
# ----------------------------

def _bitplanes(I):
    return ( (I>>1) % 2 , I % 2 )

[docs]def bsmraw(I): "Return the raw BSM features underlying groups (i) and (ii)." if I == None: return namesraw X7,X8 = _bitplanes(I) return list(chiavg(X7)) + list(chiavg(X8))
[docs]def bsm3(I): "Return group (iii) of the BSM features." if I == None: return names3 X7,X8 = _bitplanes(I) return g3(X7,X8)
featureDic = { "BSM-12" : [ ("Raw 7",), ("Raw 8",), ("(iii)",) ], "BSM-18" : [ ("(i)",), ("(ii)",), ("(iii)",) ], } dlist = [ (0,1), (1,0), (-1,0), (0,-1) ] names1 = [ "BSM[i]m%s" % (x,) for x in range(1,11) ] names2 = [ "BSM[ii]" + s for s in [ "f1", "f2", "f3", "f4" ] ] names3 = [ "BSM[iii]" + s for s in [ "f1", "f2", "f3", "f4" ] ] namesraw = [ "BSMR7(%s,%s)" % (x,y) for (x,y) in dlist ] + \ [ "BSMR8(%s,%s)" % (x,y) for (x,y) in dlist ]
[docs]def bsm12(I): "Return groups (i) and (ii) of the BSM features." if I == None: return names1 + names2 X7,X8 = _bitplanes(I) h7 = chiavg(X7) h8 = chiavg(X8) return list( g1(h7) - g1(h8) ) + g2(h7,h8)