Source code for pysteg.sql.tables

#! /usr/bin/env python
## -*- coding: utf-8 -*-
## (C) 2012: Hans Georg Schaathun <georg@schaathun.net> 

"""
This module defines SQLObject classes for the image and feature datasets.
The SQL database tables are defined through the SQLObject definitions.
"""

# TODO: Consider threading to parallellise I/O

from sqlobject import *
from .config import config
from .aux import *
import numpy as np

__all__ = [ "Image", "Feature", "FeatureValue",
            "FeatureSet", "FeatureVector",
            "ImageSet", "TestSet", "TestImage",
      ]

[docs]class ImageSet(SQLObject): """Image Set is a collection of images from the same source and which have been subject to similar processing. It may be an original image base, or a collection of Images processed from an image base.""" source = ForeignKey( "ImageSet", default=None ) images = SQLMultipleJoin( "Image", joinColumn="imageset" ) # Contents path = StringCol( alternateID=True ) # Directory path name name = StringCol( alternateID=True ) # Shorthand name fileformat = StringCol() extension = StringCol( default=None ) imgformat = StringCol() # Image format, e.g. JPEG, pixmap colour = BoolCol( default=False ) # True if the image is colour description = StringCol() # Description stego = StringCol( default=None ) # Stego system used or null conv = StringCol( default=None ) # Conversion command @classmethod def byPath( cls, path ): """Return an ImageSet object by giving the directory where it resides. The pathname of the directory may be absolute or relative to the imageroot. """ imageroot = config.get( "sql", "imageroot" ) N = len( imageroot ) if path[:N] == imageroot: return SQLObject.byPath( cls, path[(N+1):] ) else: return SQLObject.byPath( cls, path ) def __iter__( self ): return self.images.__iter__()
[docs] def getPath( self ): "Get the full path to the image set directory." imageroot = config.get( "sql", "imageroot" ) N = len( imageroot ) if self.path[:N] == imageroot: return self.path else: return imageroot + "/" + self.path
[docs] def getBasename( self, base ): "Look up an image by its base filename (excluding extension)." L = self.images.filter( Image.q.filename.startswith( base + "." ) ) return L.getOne( None )
@classmethod
[docs] def destroyKey(cls,key): "Delete the object with the given key." obj = cls.byName( key ) return obj.destroy()
[docs] def destroy(self): "Delete the object, including constituent images." print "[destroy]", self for im in self.images: print im im.destroySelf() return self.destroySelf()
[docs]class Image(SQLObject): """An Image is an Image Object to be analysed. It may be an identical copy of a Source Image, or it may be a modified version obtained by stego embedding, compression, down sampling, etc.""" filename = StringCol() # Filename imageset = ForeignKey( "ImageSet" ) # Collection idx = DatabaseIndex( 'imageset', 'filename', unique=True ) source = ForeignKey( "Image", default=None ) # Source File msglen = IntCol( default=None ) # Absolute message length (or null) msgfrac = IntCol( default=None ) # Relative message length (or null) # Note: either msglen or msgfrac should normally be null features = SQLMultipleJoin( "FeatureValue" ) testsets = RelatedJoin( "TestSet", intermediateTable="test_image", joinColumn="image_id", otherColumn="imageset_id", createRelatedTable=False ) # Universal object methods def __call__( self ): """The Image object is made callable for compatibility with TestImage. It returns the object itself.""" return self def __str__(self): "The string representation is the id number and the full path name." return "<Image %s: %s>" % ( self.id, self.getPath(), ) # Methods handling image filenames and paths. @classmethod
[docs] def byPath( cls, path ): "Look up an image by its path name." L = path.split( "/" ) dir = "/".join( L[:-1] ) fn = L[-1] S = ImageSet.byPath( dir ) L = S.images.filter( Image.q.filename == fn ) N = L.count() if N == 0: raise Exception, "Image not found" elif N == 1: return L[0] else: raise Exception, "Path is not unique!"
[docs] def getPath( self ): "Return the full path name for the image." return self.imageset.getPath() + "/" + self.filename
[docs] def getBasename( self ): "Return the basename of the file, stripping any extension off." return self.filename.split( ".", 1 )[0] # Retrieve single feature
[docs] def getOneFeature( self, key, verbosity=2 ): "Return the given feature value." if isinstance( key, SQLObject ): fv = key else: fv = Feature.byKey( key ) L = FeatureValue.selectBy( feature=fv, image=self ) F = L.getOne(None) if F == None: if verbosity > 1: print "Missing feature value", key print self return None return F.getValue()
[docs] def getCoverFeature( self, key ): """Obtain the given feature value recursively from the source image.""" if self.source == None: return self.getOneFeature( key ) else: return self.source.getCoverFeature( key ) # Retrieve feature vectors
[docs] def featureValueObjects( self, key=None ): """Return an iterator of FeatureValue objects defined by the given key. If key is None, all features are included.""" if key == None: return self.features elif isinstance( key, SQLObject ): fv = key else: try: fv = FeatureSet.byKey( key ) except: fv = FeatureVector.byKey( key ) verbosity = config.getVerbosity( "sql" ) fl = fv.theFeatures( self, verbosity=verbosity ) try: fln = len(fl) except TypeError: fln = fl.count() try: if fv.dim != fln: raise MissingDataException, \ "FeatureVector (%s) has not been calculated." % (fv,) except AttributeError: # AttributeError means that we did not have the data for the # error check above. Never mind. pass return fl
[docs] def getFeatures( self, key=None, featureSet=False ): "Return a feature vector as a list of floating point values." fl = list(self.featureValueObjects( key )) fl.sort() return [ f.getValue() for f in fl ] # Adding features
[docs] def addFeatures( self, **kw ): """Add feature values for the image. The features are given as a dictionary with keys as used in the database and a floating point value. (Not tested!)""" for key in kw.keys(): FeatureValue( feature=Feature.byKey( key ), value=kw[key], image=self ) return
[docs] def addFeatureMatrix( self, key, M ): """Add feature values from a numpy array M. The given key is the prefix, to which indices are appended. If symindex is True, the indices are symmetric around 0, otherwise they range from 0 upwards.""" fset = FeatureSet.byKey(key) symindex = fset.symidx for (idx,val) in matrix2dict( M, symindex ): if len(idx) == 1: k = key + "(%s)" % idx else: k = key + str(idx).replace(" ","") feature = Feature.byKey( k ) feature.addValue(self,val) return
[docs] def addFeaturesNamed( self, vals, names ): """Add feature values from a list vals. The keys of the features should be given in a list names.""" for (v,k) in zip( vals, names ): feature = Feature.byKey( k ) feature.addValue(self,v) return # Analysis
[docs] def delta(self, feature ): """Compare this image with its cover or source image with respect to the given feature. The return value is the difference between the feature values. None is returned if the image does not have a known source image. """ if self.source == None: return None a = self.getOneFeature( feature ) b = self.source.getOneFeature( feature ) return a - b
[docs]class Feature(SQLObject): """A feature is a function of an image. The database table stores a unique key (ID) and a description.""" cat = ForeignKey( "FeatureSet" ) key = StringCol( alternateID=True ) # Human-readable key description = StringCol( default=None ) fv = RelatedJoin( "FeatureVector", intermediateTable="vector_feature", joinColumn="feature_id", otherColumn="vector_id", createRelatedTable=False ) val = SQLMultipleJoin( "FeatureValue" )
[docs] def destroy(self): "Delete the feature including all calculated feature values." for v in self.val: v.destroySelf() return self.destroySelf()
def __str__(self): return "<Feature %s: %s>" % (self.id,self.key)
[docs] def addValue(self,image,value): "Add a calculated feature value giving the image and its value." if np.isnan(value): value = 0 print "Warning: NaN detected and converted to zero (%s)." % (self.key,) print image value = float(value) try: FeatureValue( feature=self, value=value, image=image ) except StandardError as e: if isDuplicateError(e): S = FeatureValue.selectBy( feature=self, image=image ) fval = S.getOne() print "Warning: FeatureValue already calculated." if fval.value - value != 0: print "Updating:", fval.value, "->", value fval.value = value elif tailType(e) == "DataError": if value < 10**(-12): print "Warning! Floating point underflow in database. " +\ "Rounding to zero" FeatureValue( feature=self, value=0, image=image ) else: raise else: print "[addFeatureMatrix] Unknown error." raise return
[docs]class FeatureValue(SQLObject): """A Feature Value is a Feature calculated for a particular Image. The database table stores references to the Feature and Image as foreign keys (one-to-one), and a floating point value.""" feature = ForeignKey( "Feature" ) image = ForeignKey( "Image" ) value = FloatCol() # TODO: Consider adding timestamp and/or versioning idx = DatabaseIndex( 'feature', 'image', unique=True ) idximg = DatabaseIndex( 'image', unique=False )
[docs] def getValue(self): "Accessor for the value field." return self.value
[docs] def getFID(self): """Return the ID of the feature. The ID is currently an integer, and one can assume that it is comparable. It can be used to give a canonical ordering of features. It is provided as a method for compatibility with decorator patterns and other objects mimicking the interface.""" return self.feature.id
def __cmp__(self,r): """Comparison is by feature keys. FeatureValue objects can be compared with any other object implementing a compatible getFID() method.""" return cmp(self.getFID(),r.getFID())
[docs]class FeatureSet(SQLObject): """A Feature Set is a collection of Features with a common description. Fields to be set in the constructor: :key: human-readable, unique key :description: longer description of the features :func: python function to extract the feature The function is stored as a string and interpreted using eval(). :jpeg(bool): flag to indicate that the extraction function takes a jpeg object instead of a pixmap matrix. :matrix(bool): flag to indicate a feature set represented by a matrix If set, the addFeatureMatrix() method applies. :symidx(bool): (assumes matrix) Flag to indicate that individual elements should be indexed symetrically around 0. Relational fields: :features (SelectResult): the included features :queues (SelectResult): queue jobs asking to extract the feature set """ key = StringCol( alternateID=True ) # Human-readable key description = StringCol() # Possibly longer description func = StringCol( default=None ) # Python function for extraction jpeg = BoolCol( default=False ) # Is it calculated from JPEG? matrix = BoolCol( default=True ) # Is it calculated as a matrix? symidx = BoolCol( default=True ) # Is the matrix symmetrically indexed? features = SQLMultipleJoin( "Feature", joinColumn="cat_id" ) #queues = SQLRelatedJoin( "Queue" ) def __iter__(self): """FeatureSet objects iterate over constituent features.""" return self.features.__iter__()
[docs] def theFeatures(self,image=None,verbosity=0): """Return an SQLResult of FeatureValue objects. If image is given, the result is filtered to include just the given image.""" R = self.features.orderBy( "id" ).throughTo.val if image == None: return R else: return R.filter( FeatureValue.q.image==image )
[docs] def count(self): "Return the number of features in the set." return self.features.count()
def __str__(self): return "<FeatureSet %s '%s'>" % (self.id,self.key,) @classmethod
[docs] def destroyKey(cls,key): """Delete the object with the given key.""" obj = cls.byKey( key ) return obj.destroy()
[docs] def destroy(self): """Delete the object including constituent features and feature values.""" print "[destroy]", self ### for q in self.queues: ### print q ### q.destroySelf() for f in self.features: print f f.destroy() return self.destroySelf()
class VectorFeature(SQLObject): """Relational table to define feature vectors. It should not be necessary to use this class directly. """ class sqlmeta: table = "vector_feature" feature = ForeignKey( "Feature" ) vector = ForeignKey( "FeatureVector" )
[docs]class FeatureVector(SQLObject): """A Feature Vector is a vector where each element is a Feature. The database tables stores Feature Vectors which form the basis for classifiers. Where Feature Sets contain Features with common descriptions, Feature Vectors contain Features which are used together.""" key = StringCol( alternateID=True ) # Human-readable key dim = IntCol() # Dimensionality credit = StringCol( default=None ) # Reference to source paper or similar description = StringCol( default=None ) features = SQLRelatedJoin( "Feature", intermediateTable="vector_feature", joinColumn="vector_id", otherColumn="feature_id", createRelatedTable=False ) def __iter__(self): """Feature vectors iterate over constituent features and are compatible with FeatureSet objects.""" return self.features.orderBy( "id" ).__iter__()
[docs] def theFeatures(self,image=None,verbosity=0): """Return an SQLResult of FeatureValue objects. If image is given, the result is filtered to include just the given image.""" R = self.features.orderBy( "id" ).throughTo.val if image == None: return R else: return R.filter( FeatureValue.q.image==image )
[docs] def count(self): "Return the dimensionality of the feature vector." return self.dim
@classmethod
[docs] def destroyKey(cls,key): "Delete the object with the given key." obj = cls.byKey( key ) return obj.destroy()
[docs] def destroy(self): """Delete the object including corresponding objects in the relation table VectorFeature.""" print "[destroy]", self for v in VectorFeature.selectBy( vector=self ): print v v.destroySelf() return self.destroySelf()
[docs]class TestSet(SQLObject): """A TestSet is a collection of images used for training or testing of a classifier.""" name = StringCol( alternateID=True ) images = SQLRelatedJoin( "Image", intermediateTable="test_image", joinColumn="imageset_id", otherColumn="image_id", createRelatedTable=False ) testimg = SQLMultipleJoin( "TestImage", joinColumn="imageset_id" ) perf = SQLMultipleJoin( "SVMPerformance", joinColumn="testset_id" )
[docs] def destroy(self): """Delete the object, including dependent SVMPerformance objects and TestImage objects.""" for i in self.testimg: i.destroySelf() for i in self.perf: i.destroySelf() return self.destroySelf()
[docs] def count( self ): "Return the number of images in the set." return self.images.count()
[docs] def getClass( self, label=1 ): """Return an iterator of Test Image objects restricted to the given class.""" for im in self: if im.label == label: yield im
def __iter__( self ): "The iterator is over constituent Test Image objects." return self.testimg.__iter__() # Return feature values and feature vectors
[docs] def getOneFeature(self,f): """Return an unsorted list of feature values for the given feature f which can be a Feature object or a key. This appears to be exceedingly slow. TODO: It should be optimised to use a single query to the server. """ return [ img.getOneFeature(f) for img in self.images ]
[docs] def getFeatures(self,fv): """Return a pair (l,v) where l is a list of labels and v is a list of feature vectors for the individual images. This is designed to be compatible with libSVM. """ L = [ (img.label,img.image.getFeatures(fv),img()) for img in self.testimg ] return ( [ l for (l,v,i) in L ], [ v for (l,v,i) in L ], [ i for (l,v,i) in L ] )
[docs]class TestImage(SQLObject): """TestImage is a relational table marking a given Image as included in a Test or Training Set. It includes additional fields, where label is used for classification and response for regression. Clearly, these numbers could be derived from Image data on the fly, but because it depends on both the Image and ImageSet tables that seems cumbersome and it is preferrable at this stage to hardcode it in the relational table. The TestImage class is a decorator for the Image class, so all methods of Image are supported. See the :class:`Image` class for details. For any :class:`Image` or :class:`TestImage` object img, the call img() returns the appropriate Image object. This should be used polymorphically whenever the type is unknown and the Image (or Image ID) is required. """ class sqlmeta: table = "test_image" image = ForeignKey( "Image" ) imageset = ForeignKey( "TestSet" ) label = IntCol( default=None ) # Class label (or null) response = FloatCol( default=None ) # Response variable (or null) def __call__( self ): """Calling a TestImage returns the corresponding Image object.""" return self.image
[docs] def copy(self, imageset): "Copy this image into the TestSet imageset, with the same settings." return TestImage( image=self.image, imageset=imageset, label=self.label, response=self.response )
def __str__(self): """The string representation is the id number, the full path name, and the TestSet it belongs to.""" return "<TestImage %s: %s (%s)>" % ( self.id, self.getPath(), self.imageset.name ) # The following methods belong to the decorator pattern for Image. def delta(self, *a, **kw ): return self.image.delta(*a,**kw) def getCoverFeature( self, *a, **kw ): return self.image.getCoverFeature(*a,**kw) def getOneFeature( self, *a, **kw ): return self.image.getOneFeature(*a,**kw) def featureValueObjects( self, *a, **kw ): return self.image.featureValueObjects(*a,**kw) def getFeatures( self, *a, **kw ): return self.image.getFeatures(*a,**kw) def getPath( self ): return self.image.getPath() def getBasename( self ): return self.image.getBasename() def addFeatures( self, *a, **kw ): return self.image.addFeatures(*a,**kw) def addFeatureMatrix( self, *a, **kw ): return self.image.addFeatureMatrix(*a,**kw)
class FeatureLog(SQLObject): """SQL table to record when each feature set is calculated for each image. This is used primarily to avoid recomputation, as the :mod:`extract` module checks for an existing record and aborts if one is found.""" image = ForeignKey( "Image" ) fset = ForeignKey( "FeatureSet" ) entered = DateTimeCol( default=None ) idx = DatabaseIndex( 'image', 'fset', unique=True ) if __name__ == "__main__": sqlhub.processConnection = connectionForURI('sqlite:/:memory:') createTables() S = ImageSet( path="/foobar/", fileformat="JPEG", imgformat="JPEG", desc="Dummy test set", name="Test01" ) Image( filename="foobar", imageset=S ) print Image.get( 1 )