Source code for pysteg.ml.classifiers

#! /usr/bin/env python
## -*- coding: utf-8 -*-
## (C) 2012: Hans Georg Schaathun <georg@schaathun.net> 

"""
Ensemble classifier.

:Module:    pysteg.ml.classifiers
:Date:      $Date$
:Revision:  $Revision$
:Author:    © 2012: Hans Georg Schaathun <georg@schaathun.net>

This module implements primarily the ensemble classifier, and necessary
auxiliaries.  This includes the abstract :class:`Classifier` class
which is intended as base class for other classifiers as well.
"""

import mlpy

[docs]class Classifier(object): """Abstract classifier class. Any derived class must implement :meth:`_learn` and :meth:`_pred` which are called from the training method :meth:`learn` and prediction method :meth:`pred` (respectively). The main method normalise the arguments, so :meth:`_learn` and :meth:`_pred` can assume well-formed arguments of :class:`np.array`. """ labels = None
[docs] def labels(self): "Return the set of labels used." return self._labels
[docs] def learn(self, x, y, labels=None): """Train the classifier. :Parameters: x : 2d array_like object training data (N, dim); each row is one object. y : 1d array_like object integer true labels of the trainin set """ # Format input as arrays and check dimensions xarr = np.asarray(x, dtype=np.float) yarr = np.asarray(y, dtype=np.int) if xarr.ndim != 2: raise ValueError("x must be a 2d array_like object") if yarr.ndim != 1: raise ValueError("y must be an 1d array_like object") return self._learn(xarr,yarr,labels)
[docs] def pred(self, t): """Run classification of test vector(s) `t`, returning discrete prediction labels. :Parameters: t : 1d (one sample) or 2d array_like object Feature vectors to be classified :Returns: p : integer or 1d numpy array predicted class(es) """ tarr = np.asarray(t, dtype=np.float) if tarr.ndim == 1: tarr = tarr[None,:] elif tarr.ndim != 2: raise ValueError, "Input must be 1D or 2D." return self._pred(tarr)
def _pred(self,*a,**kw): raise NotImplementedError def _learn(self,*a,**kw): raise NotImplementedError
[docs]class EC(Classifier): """This class represents an ensemble classifier.""" def __init__(self, L, dred, base=mlpy.LDAC, rule="majority" ): """The :class:`EC` object is initialised with the parameters L and dred. The ensemble classifier uses L instances of the base classifier, each using dred random features from the complete feature vector. The base learner is chosen with the base parameter, default is :class:`mlpy.LDAC`. The fusion rule is chosen with the rule parameter; default is majority vote ("majority"), which is the only supported rule at present. """ self._L = L self._dred = dred self._base = base self._dim = None self._labels = None self._bc = None self._fselection = None if rule == "majority": self._soft = None else: self._soft = rule self._hard = None def _learn(self, xarr, yarr, labels): """Train the classifier. This is an auxiliary method called from :meth:`learn` only. """ # Set constant parameters and sanity check (N, dim) = xarr.shape if labels == None: labels = np.unique(yarr) self._labels = labels self._dim = dim L = self._L dred = self._dred if dred >= dim: raise ValueError, "Training set has too low dimension" # Generate the random feature selection self._fselection = [ np.random.permutation(dim)[:dred] for i in xrange(L) ] # Instantiate and train base learners self._bc = [] for S in self._fselection: C = self._base() C.learn( xarr[:,S], yarr ) self._bc.append( C ) return def _classify(self, tarr): "Auxiliary inner method for :meth:`classify`." if self._bc == None: raise ValueError("no model computed.") sl = [ C.classify(t[:,S]) for (C,S) in zip(self._bc,self._fselection) ] return sl
[docs] def classify(self, t): """Run classification of test vector(s) `t` and return soft information classification scores. :Parameters: t : 1d (one sample) or 2d array_like object Feature vectors to be classified :Returns: p : float or 1d numpy array classification scores NOT COMPLETED. Need to design fusion rules for multi-class prediction. """ if self._soft == None: raise ValueError, \ "This instance has no support for soft information classification." tarr = np.asarray(t, dtype=np.float) if tarr.ndim == 1: tarr = tarr[None,:] elif tarr.ndim != 2: raise ValueError, "Input must be 1D or 2D." sl = self._classify(tarr) raise NotImplementedError
def _pred(self, tarr): if self._bc == None: raise ValueError("no model computed.") if self._hard == "majority": sl = [ C.pred(tarr[:,S]) for (C,S) in zip(self._bc,self._fselection) ] return majorityVote(sl,self.labels()) elif self._soft != None: S = self._classify( tarr ) raise NotImplementedError else: raise ValueError, "No appropriate fusion rule is known"
[docs]def majorityVote(L,labels=[0,1]): """Majority vote prediction based on a list of constituent predictions. :Parameters: L : 2-D array like each row is one constituent prediction labels : list of integers class labels (default [0,1]) :Returns: p : list of integers predicted class labels for each item If there is a tie, the first label (0 by default) is favoured. """ countlist = [] A = np.asarray(L) for l in labels: sl = ( A == l ) countlist.append( np.sum( sl, axis=0 ) ) countarray = np.asarray(countlist) predarray = np.argmax( countarray, axis=0 ) return np.array(labels)[predarray]