Source code for pysteg.sql.extract

#! /usr/bin/env python
## -*- coding: utf-8 -*-
# (C) 2012: Hans Georg Schaathun <hg@schaathun.net>

"""
Methods to load images, extract features, and enter them into the
database.  This module makes the glue between the SQL object library
and the features module.

Only two methods are exported, one to enter tasks in the queue and one
to process the queue.
"""

from . import *
from .queue import *
from .svmodel import *
from ..imtools import imread
from ..features import *
import pysteg.ssteg.lsb as lsb
import signal
from datetime import datetime
import tables

__all__ = [ "queueSet", "worker" ]

imp = config.get( "sql", "featuremodules" )
imp = [ s.strip() for s in imp.split( "," ) ]
for s in imp:
   if s == "": continue
   exec "from %s import *" % (s,)

class sigState(object):
   """This object listens to a given signal and records whether it has
   occurred.  It is a state machine which is initially False, and changes
   to True when the signal is received.  The state can be read by the
   isset() method, and reset to False with the unset() method.

   It is used to allow the queue processor to complete the current task
   before terminating when an agreed signal is received.
   """
   def __init__(self,sig):
      self.state = False
      signal.signal(signal.SIGUSR2,self)
   def __call__(self,*a,**kw):
      print "Signal caught", a
      print kw
      self.state = True
   def unset(self):
      self.state = False
   def isset(self):
      return self.state

def _extract1( im, img, fv, verbosity=1 ):
   """Auxiliary method for extract(), handling a single feature set,
   and taking both the image matrix (im) and the Image SQL object (img)
   as arguments.
   """
   if isinstance(fv,str):
      key = fv
      fv = FeatureSet.byKey( fv )  # Look up SQL object for feature vector
   elif isinstance(fv,SQLObject):
      key = fv.key
   else:
      raise TypeError, "Third argument should be a feature vector"
   if verbosity > 0:
      print fv, key
   log = tables.FeatureLog.selectBy( image=img, fset=fv ).getOne(None)
   # Check if feature set has been calculated:
   if log != None:
      if verbosity > 0:
         print "[_extract1] Features already calculated.  Returning"
         print log
      return
   N = len(img.getFeatures(fv))
   N0 = fv.features.count()
   if N == N0:
      print "[_extract1] Features already calculated.  Logging and returning."
      log = tables.FeatureLog( image=img, fset=fv )
      if verbosity > 0:
         print "[_extract1]", N, N0
         print log
      return
   if verbosity > 0:
      print "Extraction function:", fv.func
   fx = eval(fv.func)        # Get extraction function from SQL entry
   val = fx(im)	             # Extract features
   if fv.matrix:
      if hasattr(val,"shape") and verbosity > 1: 
	 print "[_extract1] val.shape =", val.shape
      img.addFeatureMatrix(key,val)
   else:
      names = fx(None)
      img.addFeaturesNamed(val,names)
   log = tables.FeatureLog( image=img, fset=fv, entered=datetime.now() )
   if verbosity > 1:
      print log
   return

def extract( img, fv, verbosity=1 ):
   "Extract the given feature set(s) fv from the image img."
   if isinstance(img,SQLObject):
      fn = img.getPath()        # Look up file path
   elif isinstance(img,str):
      fn = img
      img = Image.byPath( img )
   else: raise TypeError, "Was expecting an Image as first argument"
   im = imread( fn )         # Load image
   if isinstance(fv,list):
      for fv0 in fv:
	 _extract1(im,img,fv0,verbosity=verbosity)
   else: _extract1(im,img,fv,verbosity=verbosity)
   return

[docs]def worker( *a, **kw ): """Process jobs from the queue until the queue is empty, or the SIGUSR2 signal is received. It may be suboptimal to configure the signal handler in the API. It might be better to move the loop to the script defining the UI. """ i = 0 while qProcess( *a, **kw ): i += 1 return i
def qProcess( *a, **kw ): "Process one job from the queue." verbosity = config.getVerbosity( "sql" ) kw["verbosity"] = verbosity stop = sigState( signal.SIGUSR2 ) q = Queue.getJob( *a, **kw ) if verbosity > 0: print "[qProcess]", q if not q: return False img = q.image if img != None: fl = q.features for fs in fl: extract( img, fs, verbosity=verbosity ) else: S = q.testset M = q.svmodel if M == None: raise DataIntegrityException, \ "Neither SVM model nor image is given for queue item." if S == None: try: M.train() M.saveModel() except MissingDataException: print "Missing data for", M q.releaseJob(success=False) q = None M.destroy() else: M.loadModel() try: P = SVMPerformance( svmodel=M, testset=S ) P.run() except MissingDataException: print "Missing data for", P q.releaseJob(success=False) q = None P.destroy() if q != None: q.releaseJob() return not stop.isset() def extractSet( imgset, fv ): """Extract the given feature sets or list of feature sets fv from every image in imgset which may be an ImageSet or TestSet. This is obviously only useful when parallellisation is not possible. NOT USED! Use the queue system instead. """ if not hasattr( imgset, "__iter__" ): imgset = ImageSet.byPath( imgset ) for img in imgset: extract( img, fv ) return
[docs]def queueSet( imgset, fv, stegonly=False ): """Queue new tasks for feature extraction. The given feature sets or list of feature sets fv are queued for every image in imgset, which can be either an ImageSet or a TestSet. """ if not hasattr( imgset, "__iter__" ): imgset = ImageSet.byPath( imgset ) if stegonly: L = imgset.getClass(1) else: L = imgset for img in L: print img Queue.addToImage( img, fv ) return