Source code for pysteg.features.homgen

##  $Id$
## -*- coding: utf-8 -*-

"""
Functions to calculate generic higher-order features,
following the lines of HOLMES.

:Module:    pysteg.features.homgen
:Date:      $Date$
:Revision:  $Revision$
:Copyright: © 2012 Hans Georg Schaathun <georg@schaathun.net>

This module is currently not used.  It is intended to replace
the markov module to be simpler and more generic.

It appears to be safe for various input constituent types,
because the scipy.signal module converts to 64-bit integers
in calculation.  Thus 8-, 16-, and 32-bit images should be
safe, whether signed or unsigned.
"""

print "[pysteg.features.homgen] $Id$"
__all__ = [ "HOM", "SPAM" ]

# ######################
# pysteg.features.homgen
# ######################
# 
# .. automodule:: pysteg.features.homgen
 
import numpy as np
import scipy.signal as sig

diff_filter = {
      1 : np.array( [ -1, 1 ] ),
      2 : np.array( [ -1, 2, -1 ] ),
      }
dir_dict = {
       "h":  (0,1),
       "v":  (1,0),
       "d":  (1,1),
       "m":  (1,-1),
       }

def convolve(M,F):
   (x,y) = F.shape
   S = sig.convolve(M,F)
   if x > 1:
      x -= 1
      S = S[x:-x,:]
   if y > 1:
      y -= 1
      S = S[:,y:-y]
   return S

def diffMatrix(M,dir="h",order=1):
   """Calculate the difference matrix of M in direction dir and
   of the given order."""
   B = diff_filter[order]
   if dir == "h": F = B[None,:]
   elif dir == "v": F = B[:,None]
   elif dir == "d": F = np.diag(B)
   elif dir == "m": F = np.diag(B)[::-1,:]
   else: raise Exception, "Unknown direction."
   #return convolve(M,F)
   return M[:,:-1].astype(np.int64) - M[:,1:].astype(np.int64)

def cap(M,T):
   """Cap a matrix M by +/- T, by replacing entries out of range
   by the capping value."""
   M[( M >  T)] = T
   M[( M < -T)] = -T
   return M

def stepfunc(N,d,order=1):
   "Auxliary function for splitMatrix()."
   if d == 0:
      fx = lambda x : [ x for i in range(order) ]
      XL = range(N)
   elif d < 0:
      fx = lambda x : [ x+i-1 for i in range(order,0,-1) ]
      XL = range(N+1-order)
   else:
      fx = lambda x : [ x+i for i in range(order) ]
      XL = range(N+1-order)
   return (fx,XL)

def makeTuples(C,dir,order=2):
   """
   Return two (overlapping) submatrices of C dislocated by one
   element in the direction dir.  This is used for calculation of
   both difference matrices and transition probability matrices.

   dir is a pair (x,y) indicating the vector difference between
   the two co-ordinates of each difference
    
   Alternatively, dir may be one of "h" (horizontal),
   "v" (vertical), "d" (diagonal), "m" (minor diagonal),
   or "Xr" (reverse order where X is one of the above).
   """
   if type(dir) == str:
      if dir_dict.has_key(dir): (d1,d2) = dir_dict[dir]
      else: raise Exception, "Unknown direction for differences"
   elif type(dir) == tuple:
      (d1,d2) = dir
   else:
      raise Exception, "Unexpected type (%s)" % (dir,)
   (M,N) = C.shape
   (fx,XL) = stepfunc(M,d1,order)
   (fy,YL) = stepfunc(N,d2,order)
   return np.vstack( [C[fx(x),fy(y)] for x in XL for y in YL] )

[docs]def CM(C,dir,T,order=2): """Return the co-occurrence matrix of C of the given order along direction dir. Relative frequencies are used, so the result is a floating point array.""" S = makeTuples(C,dir,order) R = range(-T,T+2) (N,d) = S.shape b = tuple([R for i in xrange(d)]) (h,b) = np.histogramdd( S, bins=b ) print "Non-zero entries:", h[(h>0)].size h = h.astype(np.float64) / N print "Non-zero entries:", h[(h>0)].size return h
[docs]def HOM(C,dir,difforder=1,order=2,capvalue=None,limit=None): """Calculate features based on a generalised higher-order model. Either capvalue or limit has to be specified. To cap the underlying difference matrix, use capvalue. To use the unmodified difference matrix, use limit to select the central portion of the co-occurrence matrix.""" print "HOM", dir, difforder, order, capvalue, limit D = diffMatrix(C,dir=dir,order=difforder) print "[HOM]", D.size, D[(D<0)].size, D[(D>0)].size print "[HOM]", D.size, D[(D<-3)].size, D[(D>3)].size if capvalue != None: D = cap(D,capvalue) if limit == None: limit = capvalue print "[CM]", limit, capvalue return CM(D,dir=dir,T=limit,order=order)
[docs]def SPAM(C,dir,order): if dir == "d": dir = [ "d", "m" ] elif dir == "hv": dir = [ "h", "v" ] else: raise Exception, "Invalid direction" if order == 1: T = 4 elif order == 2: T = 3 else: raise Exception, "Invalid order" (tmp,F1,R1) = diffTPM(C,dir[0],difforder=1,order=order,capvalue=T) (tmp,F2,R2) = diffTPM(C,dir[1],difforder=1,order=order,capvalue=T) return (F1+R1+F2+R2) / 4
def div0nan(A,B): X = A / B X[(np.isnan(X))] = 0 return X def diffTPM(C,dir,capvalue,difforder=1,order=2): """Calculate the difference matrix D of C and return both the co-occurrence matrix of D and the backwards and forwards transition probability matrices for a Markov model of the given order. This should be used for the SPAM features.""" P = HOM(C,dir,difforder=difforder,order=order+1,capvalue=capvalue) d = len(P.shape) S1 = np.sum(P,axis=-1) S2 = np.sum(P,axis=0) S1 = np.reshape( S1, S1.shape + (1,) ) S2 = np.reshape( S2, (1,) + S2.shape ) return (P, div0nan( P,S1 ), div0nan( P,S2 ) )