#! /usr/bin/env python
## -*- coding: utf-8 -*-
## (C) 2012: Hans Georg Schaathun <georg@schaathun.net>
"""
This module defines SQLObject classes for the image and feature datasets.
The SQL database tables are defined through the SQLObject definitions.
"""
# TODO: Consider threading to parallellise I/O
from sqlobject import *
from .config import config
from .aux import *
import numpy as np
__all__ = [ "Image", "Feature", "FeatureValue",
"FeatureSet", "FeatureVector",
"ImageSet", "TestSet", "TestImage",
]
[docs]class ImageSet(SQLObject):
"""Image Set is a collection of images from the same source and
which have been subject to similar processing. It may be an original
image base, or a collection of Images processed from an image base."""
source = ForeignKey( "ImageSet", default=None )
images = SQLMultipleJoin( "Image", joinColumn="imageset" ) # Contents
path = StringCol( alternateID=True ) # Directory path name
name = StringCol( alternateID=True ) # Shorthand name
fileformat = StringCol()
extension = StringCol( default=None )
imgformat = StringCol() # Image format, e.g. JPEG, pixmap
colour = BoolCol( default=False ) # True if the image is colour
description = StringCol() # Description
stego = StringCol( default=None ) # Stego system used or null
conv = StringCol( default=None ) # Conversion command
@classmethod
def byPath( cls, path ):
"""Return an ImageSet object by giving the directory where it resides.
The pathname of the directory may be absolute or relative to
the imageroot.
"""
imageroot = config.get( "sql", "imageroot" )
N = len( imageroot )
if path[:N] == imageroot:
return SQLObject.byPath( cls, path[(N+1):] )
else:
return SQLObject.byPath( cls, path )
def __iter__( self ):
return self.images.__iter__()
[docs] def getPath( self ):
"Get the full path to the image set directory."
imageroot = config.get( "sql", "imageroot" )
N = len( imageroot )
if self.path[:N] == imageroot:
return self.path
else:
return imageroot + "/" + self.path
[docs] def getBasename( self, base ):
"Look up an image by its base filename (excluding extension)."
L = self.images.filter( Image.q.filename.startswith( base + "." ) )
return L.getOne( None )
@classmethod
[docs] def destroyKey(cls,key):
"Delete the object with the given key."
obj = cls.byName( key )
return obj.destroy()
[docs] def destroy(self):
"Delete the object, including constituent images."
print "[destroy]", self
for im in self.images:
print im
im.destroySelf()
return self.destroySelf()
[docs]class Image(SQLObject):
"""An Image is an Image Object to be analysed. It may be an
identical copy of a Source Image, or it may be a modified version
obtained by stego embedding, compression, down sampling, etc."""
filename = StringCol() # Filename
imageset = ForeignKey( "ImageSet" ) # Collection
idx = DatabaseIndex( 'imageset', 'filename', unique=True )
source = ForeignKey( "Image", default=None ) # Source File
msglen = IntCol( default=None ) # Absolute message length (or null)
msgfrac = IntCol( default=None ) # Relative message length (or null)
# Note: either msglen or msgfrac should normally be null
features = SQLMultipleJoin( "FeatureValue" )
testsets = RelatedJoin( "TestSet", intermediateTable="test_image",
joinColumn="image_id", otherColumn="imageset_id",
createRelatedTable=False )
# Universal object methods
def __call__( self ):
"""The Image object is made callable for compatibility with TestImage.
It returns the object itself."""
return self
def __str__(self):
"The string representation is the id number and the full path name."
return "<Image %s: %s>" % ( self.id, self.getPath(), )
# Methods handling image filenames and paths.
@classmethod
[docs] def byPath( cls, path ):
"Look up an image by its path name."
L = path.split( "/" )
dir = "/".join( L[:-1] )
fn = L[-1]
S = ImageSet.byPath( dir )
L = S.images.filter( Image.q.filename == fn )
N = L.count()
if N == 0: raise Exception, "Image not found"
elif N == 1: return L[0]
else: raise Exception, "Path is not unique!"
[docs] def getPath( self ):
"Return the full path name for the image."
return self.imageset.getPath() + "/" + self.filename
[docs] def getBasename( self ):
"Return the basename of the file, stripping any extension off."
return self.filename.split( ".", 1 )[0]
# Retrieve single feature
[docs] def getOneFeature( self, key, verbosity=2 ):
"Return the given feature value."
if isinstance( key, SQLObject ): fv = key
else: fv = Feature.byKey( key )
L = FeatureValue.selectBy( feature=fv, image=self )
F = L.getOne(None)
if F == None:
if verbosity > 1:
print "Missing feature value", key
print self
return None
return F.getValue()
[docs] def getCoverFeature( self, key ):
"""Obtain the given feature value recursively from the
source image."""
if self.source == None: return self.getOneFeature( key )
else: return self.source.getCoverFeature( key )
# Retrieve feature vectors
[docs] def featureValueObjects( self, key=None ):
"""Return an iterator of FeatureValue objects defined by the given
key. If key is None, all features are included."""
if key == None:
return self.features
elif isinstance( key, SQLObject ):
fv = key
else:
try:
fv = FeatureSet.byKey( key )
except:
fv = FeatureVector.byKey( key )
verbosity = config.getVerbosity( "sql" )
fl = fv.theFeatures( self, verbosity=verbosity )
try:
fln = len(fl)
except TypeError:
fln = fl.count()
try:
if fv.dim != fln:
raise MissingDataException, \
"FeatureVector (%s) has not been calculated." % (fv,)
except AttributeError:
# AttributeError means that we did not have the data for the
# error check above. Never mind.
pass
return fl
[docs] def getFeatures( self, key=None, featureSet=False ):
"Return a feature vector as a list of floating point values."
fl = list(self.featureValueObjects( key ))
fl.sort()
return [ f.getValue() for f in fl ]
# Adding features
[docs] def addFeatures( self, **kw ):
"""Add feature values for the image. The features are given
as a dictionary with keys as used in the database and a floating
point value. (Not tested!)"""
for key in kw.keys():
FeatureValue( feature=Feature.byKey( key ),
value=kw[key], image=self )
return
[docs] def addFeatureMatrix( self, key, M ):
"""Add feature values from a numpy array M. The given key
is the prefix, to which indices are appended. If symindex is
True, the indices are symmetric around 0, otherwise they range
from 0 upwards."""
fset = FeatureSet.byKey(key)
symindex = fset.symidx
for (idx,val) in matrix2dict( M, symindex ):
if len(idx) == 1:
k = key + "(%s)" % idx
else: k = key + str(idx).replace(" ","")
feature = Feature.byKey( k )
feature.addValue(self,val)
return
[docs] def addFeaturesNamed( self, vals, names ):
"""Add feature values from a list vals. The keys of the features
should be given in a list names."""
for (v,k) in zip( vals, names ):
feature = Feature.byKey( k )
feature.addValue(self,v)
return
# Analysis
[docs] def delta(self, feature ):
"""Compare this image with its cover or source image with respect
to the given feature. The return value is the difference between
the feature values. None is returned if the image does not have
a known source image.
"""
if self.source == None:
return None
a = self.getOneFeature( feature )
b = self.source.getOneFeature( feature )
return a - b
[docs]class Feature(SQLObject):
"""A feature is a function of an image. The database table stores
a unique key (ID) and a description."""
cat = ForeignKey( "FeatureSet" )
key = StringCol( alternateID=True ) # Human-readable key
description = StringCol( default=None )
fv = RelatedJoin( "FeatureVector", intermediateTable="vector_feature",
joinColumn="feature_id", otherColumn="vector_id",
createRelatedTable=False )
val = SQLMultipleJoin( "FeatureValue" )
[docs] def destroy(self):
"Delete the feature including all calculated feature values."
for v in self.val:
v.destroySelf()
return self.destroySelf()
def __str__(self):
return "<Feature %s: %s>" % (self.id,self.key)
[docs] def addValue(self,image,value):
"Add a calculated feature value giving the image and its value."
if np.isnan(value):
value = 0
print "Warning: NaN detected and converted to zero (%s)." % (self.key,)
print image
value = float(value)
try:
FeatureValue( feature=self, value=value, image=image )
except StandardError as e:
if isDuplicateError(e):
S = FeatureValue.selectBy( feature=self, image=image )
fval = S.getOne()
print "Warning: FeatureValue already calculated."
if fval.value - value != 0:
print "Updating:", fval.value, "->", value
fval.value = value
elif tailType(e) == "DataError":
if value < 10**(-12):
print "Warning! Floating point underflow in database. " +\
"Rounding to zero"
FeatureValue( feature=self, value=0, image=image )
else:
raise
else:
print "[addFeatureMatrix] Unknown error."
raise
return
[docs]class FeatureValue(SQLObject):
"""A Feature Value is a Feature calculated for a particular Image.
The database table stores references to the Feature and Image as
foreign keys (one-to-one), and a floating point value."""
feature = ForeignKey( "Feature" )
image = ForeignKey( "Image" )
value = FloatCol()
# TODO: Consider adding timestamp and/or versioning
idx = DatabaseIndex( 'feature', 'image', unique=True )
idximg = DatabaseIndex( 'image', unique=False )
[docs] def getValue(self):
"Accessor for the value field."
return self.value
[docs] def getFID(self):
"""Return the ID of the feature. The ID is currently an integer,
and one can assume that it is comparable. It can be used to give a
canonical ordering of features. It is provided as a method for
compatibility with decorator patterns and other objects mimicking
the interface."""
return self.feature.id
def __cmp__(self,r):
"""Comparison is by feature keys. FeatureValue objects can be
compared with any other object implementing a compatible getFID()
method."""
return cmp(self.getFID(),r.getFID())
[docs]class FeatureSet(SQLObject):
"""A Feature Set is a collection of Features with a common description.
Fields to be set in the constructor:
:key: human-readable, unique key
:description: longer description of the features
:func: python function to extract the feature
The function is stored as a string and interpreted using
eval().
:jpeg(bool): flag to indicate that the extraction function takes
a jpeg object instead of a pixmap matrix.
:matrix(bool): flag to indicate a feature set represented by a matrix
If set, the addFeatureMatrix() method applies.
:symidx(bool): (assumes matrix)
Flag to indicate that individual elements should be indexed
symetrically around 0.
Relational fields:
:features (SelectResult): the included features
:queues (SelectResult): queue jobs asking to extract the feature set
"""
key = StringCol( alternateID=True ) # Human-readable key
description = StringCol() # Possibly longer description
func = StringCol( default=None ) # Python function for extraction
jpeg = BoolCol( default=False ) # Is it calculated from JPEG?
matrix = BoolCol( default=True ) # Is it calculated as a matrix?
symidx = BoolCol( default=True ) # Is the matrix symmetrically indexed?
features = SQLMultipleJoin( "Feature", joinColumn="cat_id" )
#queues = SQLRelatedJoin( "Queue" )
def __iter__(self):
"""FeatureSet objects iterate over constituent features."""
return self.features.__iter__()
[docs] def theFeatures(self,image=None,verbosity=0):
"""Return an SQLResult of FeatureValue objects.
If image is given, the result is filtered to include just
the given image."""
R = self.features.orderBy( "id" ).throughTo.val
if image == None:
return R
else:
return R.filter( FeatureValue.q.image==image )
[docs] def count(self):
"Return the number of features in the set."
return self.features.count()
def __str__(self):
return "<FeatureSet %s '%s'>" % (self.id,self.key,)
@classmethod
[docs] def destroyKey(cls,key):
"""Delete the object with the given key."""
obj = cls.byKey( key )
return obj.destroy()
[docs] def destroy(self):
"""Delete the object including constituent features and
feature values."""
print "[destroy]", self
### for q in self.queues:
### print q
### q.destroySelf()
for f in self.features:
print f
f.destroy()
return self.destroySelf()
class VectorFeature(SQLObject):
"""Relational table to define feature vectors.
It should not be necessary to use this class directly.
"""
class sqlmeta:
table = "vector_feature"
feature = ForeignKey( "Feature" )
vector = ForeignKey( "FeatureVector" )
[docs]class FeatureVector(SQLObject):
"""A Feature Vector is a vector where each element is a Feature.
The database tables stores Feature Vectors which form the basis
for classifiers. Where Feature Sets contain Features with common
descriptions, Feature Vectors contain Features which are used
together."""
key = StringCol( alternateID=True ) # Human-readable key
dim = IntCol() # Dimensionality
credit = StringCol( default=None ) # Reference to source paper or similar
description = StringCol( default=None )
features = SQLRelatedJoin( "Feature", intermediateTable="vector_feature",
joinColumn="vector_id", otherColumn="feature_id",
createRelatedTable=False )
def __iter__(self):
"""Feature vectors iterate over constituent features and are
compatible with FeatureSet objects."""
return self.features.orderBy( "id" ).__iter__()
[docs] def theFeatures(self,image=None,verbosity=0):
"""Return an SQLResult of FeatureValue objects.
If image is given, the result is filtered to include just
the given image."""
R = self.features.orderBy( "id" ).throughTo.val
if image == None:
return R
else:
return R.filter( FeatureValue.q.image==image )
[docs] def count(self):
"Return the dimensionality of the feature vector."
return self.dim
@classmethod
[docs] def destroyKey(cls,key):
"Delete the object with the given key."
obj = cls.byKey( key )
return obj.destroy()
[docs] def destroy(self):
"""Delete the object including corresponding objects in the
relation table VectorFeature."""
print "[destroy]", self
for v in VectorFeature.selectBy( vector=self ):
print v
v.destroySelf()
return self.destroySelf()
[docs]class TestSet(SQLObject):
"""A TestSet is a collection of images used for training or testing
of a classifier."""
name = StringCol( alternateID=True )
images = SQLRelatedJoin( "Image", intermediateTable="test_image",
joinColumn="imageset_id", otherColumn="image_id",
createRelatedTable=False )
testimg = SQLMultipleJoin( "TestImage", joinColumn="imageset_id" )
perf = SQLMultipleJoin( "SVMPerformance", joinColumn="testset_id" )
[docs] def destroy(self):
"""Delete the object, including dependent SVMPerformance objects
and TestImage objects."""
for i in self.testimg:
i.destroySelf()
for i in self.perf:
i.destroySelf()
return self.destroySelf()
[docs] def count( self ):
"Return the number of images in the set."
return self.images.count()
[docs] def getClass( self, label=1 ):
"""Return an iterator of Test Image objects restricted to the
given class."""
for im in self:
if im.label == label:
yield im
def __iter__( self ):
"The iterator is over constituent Test Image objects."
return self.testimg.__iter__()
# Return feature values and feature vectors
[docs] def getOneFeature(self,f):
"""Return an unsorted list of feature values for the given
feature f which can be a Feature object or a key.
This appears to be exceedingly slow.
TODO: It should be optimised to use a single query to the server.
"""
return [ img.getOneFeature(f) for img in self.images ]
[docs] def getFeatures(self,fv):
"""Return a pair (l,v) where l is a list of labels and
v is a list of feature vectors for the individual images.
This is designed to be compatible with libSVM.
"""
L = [ (img.label,img.image.getFeatures(fv),img())
for img in self.testimg ]
return ( [ l for (l,v,i) in L ], [ v for (l,v,i) in L ],
[ i for (l,v,i) in L ] )
[docs]class TestImage(SQLObject):
"""TestImage is a relational table marking a given Image as
included in a Test or Training Set. It includes additional
fields, where label is used for classification and response
for regression. Clearly, these numbers could be derived from
Image data on the fly, but because it depends on both the Image
and ImageSet tables that seems cumbersome and it is preferrable
at this stage to hardcode it in the relational table.
The TestImage class is a decorator for the Image class, so all
methods of Image are supported. See the :class:`Image` class
for details.
For any :class:`Image` or :class:`TestImage` object img,
the call img() returns the appropriate Image object.
This should be used polymorphically whenever the type is unknown
and the Image (or Image ID) is required.
"""
class sqlmeta:
table = "test_image"
image = ForeignKey( "Image" )
imageset = ForeignKey( "TestSet" )
label = IntCol( default=None ) # Class label (or null)
response = FloatCol( default=None ) # Response variable (or null)
def __call__( self ):
"""Calling a TestImage returns the corresponding Image object."""
return self.image
[docs] def copy(self, imageset):
"Copy this image into the TestSet imageset, with the same settings."
return TestImage( image=self.image, imageset=imageset,
label=self.label, response=self.response )
def __str__(self):
"""The string representation is the id number, the full path name,
and the TestSet it belongs to."""
return "<TestImage %s: %s (%s)>" % (
self.id, self.getPath(), self.imageset.name )
# The following methods belong to the decorator pattern for Image.
def delta(self, *a, **kw ):
return self.image.delta(*a,**kw)
def getCoverFeature( self, *a, **kw ):
return self.image.getCoverFeature(*a,**kw)
def getOneFeature( self, *a, **kw ):
return self.image.getOneFeature(*a,**kw)
def featureValueObjects( self, *a, **kw ):
return self.image.featureValueObjects(*a,**kw)
def getFeatures( self, *a, **kw ):
return self.image.getFeatures(*a,**kw)
def getPath( self ):
return self.image.getPath()
def getBasename( self ):
return self.image.getBasename()
def addFeatures( self, *a, **kw ):
return self.image.addFeatures(*a,**kw)
def addFeatureMatrix( self, *a, **kw ):
return self.image.addFeatureMatrix(*a,**kw)
class FeatureLog(SQLObject):
"""SQL table to record when each feature set is calculated for each
image. This is used primarily to avoid recomputation, as the
:mod:`extract` module checks for an existing record and aborts if one
is found."""
image = ForeignKey( "Image" )
fset = ForeignKey( "FeatureSet" )
entered = DateTimeCol( default=None )
idx = DatabaseIndex( 'image', 'fset', unique=True )
if __name__ == "__main__":
sqlhub.processConnection = connectionForURI('sqlite:/:memory:')
createTables()
S = ImageSet( path="/foobar/", fileformat="JPEG", imgformat="JPEG",
desc="Dummy test set", name="Test01" )
Image( filename="foobar", imageset=S )
print Image.get( 1 )