import numpy as np
from brew.base import Ensemble
from .base import DCS
[docs]class LCA(DCS):
"""Local Class Accuracy.
The Local Class Accuracy selects the best classifier for
a sample using it's K nearest neighbors.
Attributes
----------
`Xval` : array-like, shape = [indeterminated, n_features]
Validation set.
`yval` : array-like, shape = [indeterminated]
Labels of the validation set.
`knn` : sklearn KNeighborsClassifier,
Classifier used to find neighborhood.
Examples
--------
>>> from brew.selection.dynamic.lca import LCA
>>> from brew.generation.bagging import Bagging
>>> from brew.base import EnsembleClassifier
>>>
>>> from sklearn.tree import DecisionTreeClassifier
>>> import numpy as np
>>>
>>> X = np.array([[-1, 0], [-0.8, 1], [-0.8, -1], [-0.5, 0] , [0.5, 0], [1, 0], [0.8, 1], [0.8, -1]])
>>> y = np.array([1, 1, 1, 2, 1, 2, 2, 2])
>>>
>>> bag = Bagging(base_classifier=DecisionTreeClassifier(max_depth=1, min_samples_leaf=1), n_classifiers=10)
>>> bag.fit(X, y)
>>>
>>> lca = LCA(X, y, K=3)
>>>
>>> clf = EnsembleClassifier(bag.ensemble, selector=lca)
>>> clf.predict([-1.1,-0.5])
[1]
See also
--------
brew.selection.dynamic.ola.OLA: Overall Local Accuracy.
References
----------
Woods, Kevin, Kevin Bowyer, and W. Philip Kegelmeyer Jr. "Combination
of multiple classifiers using local accuracy estimates." Computer Vision
and Pattern Recognition, 1996. Proceedings CVPR'96, 1996 IEEE Computer
Society Conference on. IEEE, 1996.
Ko, Albert HR, Robert Sabourin, and Alceu Souza Britto Jr.
"From dynamic classifier selection to dynamic ensemble selection."
Pattern Recognition 41.5 (2008): 1718-1731.
"""
def __init__(self, Xval, yval, K=5, weighted=False, knn=None):
'''
Parameters
----------
Xval : Numpy 2d-array with rows representing each sample.
yval : Numpy 1d-array representing the target classes of
the samples in Xval.
K : int (default=5), the size of the neighborhood used to select the
classifier.
weighted : bool (default=False), if the selected classifiers are weighted;
knn : sklearn KNeighborsClassifier (default=None), a classifier to find the
neighborhood of each sample.
'''
super(LCA, self).__init__(Xval, yval, K, weighted, knn)
[docs] def select(self, ensemble, x):
if ensemble.in_agreement(x):
return Ensemble([ensemble.classifiers[0]]), None
# obtain the K nearest neighbors in the validation set
[idx] = self.knn.kneighbors(x, return_distance=False)
neighbors_X = self.Xval[idx] # k neighbors
neighbors_y = self.yval[idx] # k neighbors target
# pool_output (sample, classifier_output)
pool_output = np.zeros((neighbors_X.shape[0], len(ensemble)))
for i, clf in enumerate(ensemble.classifiers):
pool_output[:,i] = clf.predict(neighbors_X)
x_outputs = [ensemble.classifiers[j].predict(x) for j in range(len(ensemble))]
x_outputs = np.asarray(x_outputs).flatten()
d = {}
scores = np.zeros(len(ensemble))
for j in range(pool_output.shape[1]):
# get correctly classified samples
mask = pool_output[:,j] == neighbors_y
# get
mask = (pool_output[:,j] == x_outputs[j]) * mask
scores[j] = sum(mask)
d[scores[j]] = d[scores[j]] + [j] if scores[j] in d else [j]
best_scores = sorted([k for k in d.iterkeys()], reverse=True)
options = None
for j, score in enumerate(best_scores):
pred = [x_outputs[i] for i in d[score]]
pred = np.asarray(pred).flatten()
bincount = np.bincount(pred)
if options != None:
for i in range(len(bincount)):
bincount[i] = bincount[i] if i in options else 0
imx = np.argmax(bincount)
votes = np.argwhere(bincount == bincount[imx]).flatten()
count = len(votes)
if count == 1:
return Ensemble([ensemble.classifiers[np.argmax(pred == imx)]]), None
elif options == None:
options = votes
return Ensemble([ensemble.classifiers[np.argmax(scores)]]), None
def select(self, ensemble, x):
if ensemble.in_agreement(x):
return Ensemble([ensemble.classifiers[0]]), None
# obtain the K nearest neighbors in the validation set
[idx] = self.knn.kneighbors(x, return_distance=False)
neighbors_X = self.Xval[idx] # k neighbors
neighbors_y = self.yval[idx] # k neighbors target
# pool_output (sample, classifier_output)
pool_output = np.zeros((neighbors_X.shape[0], len(ensemble)))
for i, clf in enumerate(ensemble.classifiers):
pool_output[:,i] = clf.predict(neighbors_X)
x_outputs = [ensemble.classifiers[j].predict(x) for j in range(len(ensemble))]
x_outputs = np.asarray(x_outputs).flatten()
scores = np.zeros(len(ensemble))
for j in range(pool_output.shape[1]):
# get correctly classified samples
mask = pool_output[:,j] == neighbors_y
# get correctly classified samples with the same class as 'x'
mask = (pool_output[:,j] == x_outputs[j]) * mask
scores[j] = sum(mask)
return Ensemble([ensemble.classifiers[np.argmax(scores)]]), None