Source code for pysteg.sql.stats

#! /usr/bin/env python
## -*- coding: utf-8 -*-
## (C) 2012: Hans Georg Schaathun <georg@schaathun.net> 

"""
Module for statistical analysis and comparison of features.
"""

from . import *
import numpy as np
import scipy.stats as ss
import aux
import matplotlib.pyplot as plt

[docs]def corrcoef(imgset,features): """Returns the correlation coefficient matrix of the given features, calculated from the images in imgset. The features argument can be a list of Feature objects or feature keys. The imgset object can be a list of Image objects, an ImageSet object, or a TestSet object.""" L = [ [ im.getOneFeature( f ) for f in features ] for im in imgset ] X = np.array( L ) return np.corrcoef( X, rowvar=0 )
[docs]def scatterPlot(imgset,f1,f2,outfile=None): """Plot two features against eachother in the form of a scatter plot. The first argument is a TestSet object using the class labels 0 and 1, where 0 is plotted red and 1 is plotted blue. The second and third arguments are features, given as Feature objects or as keys. If the optional outfile is given, the plot is written to the given file. """ L = [ ( im.label, im.getOneFeature(f1), im.getOneFeature(f2) ) for im in imgset ] CX = [ v1 for (l,v1,v2) in L if l == 0 ] CY = [ v2 for (l,v1,v2) in L if l == 0 ] SX = [ v1 for (l,v1,v2) in L if l == 1 ] SY = [ v2 for (l,v1,v2) in L if l == 1 ] scatteropt = { "s" : 1, "linewidth" : (0,) } plt.scatter( CX, CY, c="r", **scatteropt ) plt.scatter( SX, SY, c="b", **scatteropt ) plt.xlabel( f1.key ) plt.ylabel( f2.key ) if outfile: plt.savefig(outfile) return
def _moments(L): "Return the four first statistical moments from the given list of numbers." m1 = np.mean( L ) m2 = np.var( L ) m3 = ss.skew( L ) m4 = ss.kurtosis( L ) return (m1,m2,m3,m4)
[docs]def deltaMoments(imgset,feature,label=None): """Consider the difference in the given feature between a steganogram and its corresponding cover image. Return the four first statistical moments (mean, variance, skewness and kurtosis) of this difference in the given image set (imgset). If label is given, imgset should be a TestSet or other iterable over TestImage object, and only images with the given class label will be considered. Images which do not have a source (cover) image recorded in the database will be tacitly ignored. """ if label != None: imgset = ( im for im in imgset if im.label == label ) L = [ im.delta(feature) for im in imgset ] L = [ x for x in L if x != None ] return (len(L),) + _moments(L)
[docs]def featureMoments(imgset,feature,label=None): """Return the four first statistical moments (mean, variance, skewness, and kurtosis) of the given feature in the given image set imgset. If label is given, imgset should be a TestSet or other iterable over TestImage object, and only images with the given class label will be considered. """ if label != None: imgset = ( im for im in imgset if im.label == label ) L = [ im.getOneFeature(feature) for im in imgset ] return (len(L),) + _moments(L)
[docs]def featureMedian(imgset,feature,label=None): """Return the median of the given feature within imgset. If label is given, imgset should be a TestSet or other iterable over TestImage object, and only images with the given class label will be considered. """ if label != None: imgset = ( im for im in imgset if im.label == label ) L = [ im.getOneFeature(feature) for im in imgset ] return np.median(L)
[docs]def featurePerc(imgset,feature,bins=10,label=None): """Return the percentile points of the given feature within imgset. If label is given, imgset should be a TestSet or other iterable over TestImage object, and only images with the given class label will be considered. """ if label != None: imgset = ( im for im in imgset if im.label == label ) L = [ im.getOneFeature(feature) for im in imgset ] L.sort() N = len(L) R = [ L[n*N/bins] for n in range(bins) ] R.append( L[-1] ) return R
[docs]def reclass(v): "Translate +/-1 labels to 0/1." if v > 0: return 0 elif v < 0: return 1 else: return None
[docs]def ccount( L, fl=["1","2"] ): f0 = fl[0] if hasattr(f0,"key"): f0 = f0.key f1 = fl[1] if hasattr(f1,"key"): f1 = f1.key R = { "both" : 0, "neither" : 0, f0 : 0, f1 : 0, } for t in L: c1 = t[2] == t[1] c2 = t[3] == t[1] if c1 and c2: s = "both" elif c1: s = f0 elif c2: s = f1 else: s = "neither" R[s] += 1 return R
[docs]def compareClassifiers(imgset,fl): L = [ (im(),im.label) for im in imgset.testimg ] imlist = [ im for (im,l) in L ] lablist = [ l for (im,l) in L ] if hasattr(fl,"__iter__"): S = [ [ im.getOneFeature(f) for im in imlist ] for f in fl ] else: S = [ [ im.getOneFeature(fl) for im in imlist ] ] S = [ [ reclass(v) for v in L ] for L in S ] Z = zip( imlist, lablist, *S ) Z0 = [ t for t in Z if t[1] == 0 ] Z1 = [ t for t in Z if t[1] == 1 ] print len(Z), len(Z0), len(Z1) print "Steganograms:" print ccount(Z1,fl) print "Clean images:" print ccount(Z0,fl)