#! /usr/bin/env python
## -*- coding: utf-8 -*-
## (C) 2012: Hans Georg Schaathun <georg@schaathun.net>
"""
Ensemble classifier.
:Module: pysteg.ml.classifiers
:Date: $Date$
:Revision: $Revision$
:Author: © 2012: Hans Georg Schaathun <georg@schaathun.net>
This module implements primarily the ensemble classifier, and necessary
auxiliaries. This includes the abstract :class:`Classifier` class
which is intended as base class for other classifiers as well.
"""
import mlpy
[docs]class Classifier(object):
"""Abstract classifier class.
Any derived class must implement :meth:`_learn` and :meth:`_pred`
which are called from the training method :meth:`learn` and prediction
method :meth:`pred` (respectively). The main method normalise the
arguments, so :meth:`_learn` and :meth:`_pred` can assume well-formed
arguments of :class:`np.array`.
"""
labels = None
[docs] def labels(self):
"Return the set of labels used."
return self._labels
[docs] def learn(self, x, y, labels=None):
"""Train the classifier.
:Parameters:
x : 2d array_like object
training data (N, dim); each row is one object.
y : 1d array_like object integer
true labels of the trainin set
"""
# Format input as arrays and check dimensions
xarr = np.asarray(x, dtype=np.float)
yarr = np.asarray(y, dtype=np.int)
if xarr.ndim != 2:
raise ValueError("x must be a 2d array_like object")
if yarr.ndim != 1:
raise ValueError("y must be an 1d array_like object")
return self._learn(xarr,yarr,labels)
[docs] def pred(self, t):
"""Run classification of test vector(s) `t`, returning discrete
prediction labels.
:Parameters:
t : 1d (one sample) or 2d array_like object
Feature vectors to be classified
:Returns:
p : integer or 1d numpy array
predicted class(es)
"""
tarr = np.asarray(t, dtype=np.float)
if tarr.ndim == 1:
tarr = tarr[None,:]
elif tarr.ndim != 2:
raise ValueError, "Input must be 1D or 2D."
return self._pred(tarr)
def _pred(self,*a,**kw):
raise NotImplementedError
def _learn(self,*a,**kw):
raise NotImplementedError
[docs]class EC(Classifier):
"""This class represents an ensemble classifier."""
def __init__(self, L, dred, base=mlpy.LDAC, rule="majority" ):
"""The :class:`EC` object is initialised with the parameters L
and dred. The ensemble classifier uses L instances of the
base classifier, each using dred random features from the
complete feature vector.
The base learner is chosen with the base parameter, default is
:class:`mlpy.LDAC`. The fusion rule is chosen with the rule
parameter; default is majority vote ("majority"), which is
the only supported rule at present.
"""
self._L = L
self._dred = dred
self._base = base
self._dim = None
self._labels = None
self._bc = None
self._fselection = None
if rule == "majority":
self._soft = None
else:
self._soft = rule
self._hard = None
def _learn(self, xarr, yarr, labels):
"""Train the classifier.
This is an auxiliary method called from :meth:`learn` only.
"""
# Set constant parameters and sanity check
(N, dim) = xarr.shape
if labels == None:
labels = np.unique(yarr)
self._labels = labels
self._dim = dim
L = self._L
dred = self._dred
if dred >= dim:
raise ValueError, "Training set has too low dimension"
# Generate the random feature selection
self._fselection = [ np.random.permutation(dim)[:dred]
for i in xrange(L) ]
# Instantiate and train base learners
self._bc = []
for S in self._fselection:
C = self._base()
C.learn( xarr[:,S], yarr )
self._bc.append( C )
return
def _classify(self, tarr):
"Auxiliary inner method for :meth:`classify`."
if self._bc == None:
raise ValueError("no model computed.")
sl = [ C.classify(t[:,S]) for (C,S) in zip(self._bc,self._fselection) ]
return sl
[docs] def classify(self, t):
"""Run classification of test vector(s) `t` and return soft
information classification scores.
:Parameters:
t : 1d (one sample) or 2d array_like object
Feature vectors to be classified
:Returns:
p : float or 1d numpy array
classification scores
NOT COMPLETED.
Need to design fusion rules for multi-class prediction.
"""
if self._soft == None:
raise ValueError, \
"This instance has no support for soft information classification."
tarr = np.asarray(t, dtype=np.float)
if tarr.ndim == 1:
tarr = tarr[None,:]
elif tarr.ndim != 2:
raise ValueError, "Input must be 1D or 2D."
sl = self._classify(tarr)
raise NotImplementedError
def _pred(self, tarr):
if self._bc == None:
raise ValueError("no model computed.")
if self._hard == "majority":
sl = [ C.pred(tarr[:,S]) for (C,S) in zip(self._bc,self._fselection) ]
return majorityVote(sl,self.labels())
elif self._soft != None:
S = self._classify( tarr )
raise NotImplementedError
else:
raise ValueError, "No appropriate fusion rule is known"
[docs]def majorityVote(L,labels=[0,1]):
"""Majority vote prediction based on a list of constituent predictions.
:Parameters:
L : 2-D array like
each row is one constituent prediction
labels : list of integers
class labels (default [0,1])
:Returns:
p : list of integers
predicted class labels for each item
If there is a tie, the first label (0 by default) is favoured.
"""
countlist = []
A = np.asarray(L)
for l in labels:
sl = ( A == l )
countlist.append( np.sum( sl, axis=0 ) )
countarray = np.asarray(countlist)
predarray = np.argmax( countarray, axis=0 )
return np.array(labels)[predarray]