Source code for PyMca5.PyMcaMath.mva.PCAModule

#/*##########################################################################
#
# The PyMca X-Ray Fluorescence Toolkit
#
# Copyright (c) 2004-2014 European Synchrotron Radiation Facility
#
# This file is part of the PyMca X-ray Fluorescence Toolkit developed at
# the ESRF by the Software group.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
#############################################################################*/
__author__ = "V.A. Sole & A. Mirone - ESRF Data Analysis"
__contact__ = "sole@esrf.fr"
__license__ = "MIT"
__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
import os
import time
import numpy
import numpy.linalg
try:
    import numpy.core._dotblas as dotblas
except ImportError:
    print("WARNING: Not using BLAS, PCA calculation will be slower")
    dotblas = numpy

try:
    import mdp
    MDP = True
except:
    # MDP can raise other errors than just an import error
    MDP = False

from . import Lanczos
from . import PCATools
DEBUG = 0

# Make these functions accept arguments not relevant to
# them in order to simplify having a common graphical interface
[docs]def lanczosPCA(stack, ncomponents=10, binning=None, **kw): if DEBUG: print("lanczosPCA") if binning is None: binning = 1 if hasattr(stack, "info") and hasattr(stack, "data"): data = stack.data else: data = stack if not isinstance(data, numpy.ndarray): raise TypeError(\ "lanczosPCA is only supported when using numpy arrays") #wrapmatrix = "double" wrapmatrix = "single" dtype = numpy.float64 if wrapmatrix == "double": data = data.astype(dtype) if len(data.shape) == 3: r, c, N = data.shape data.shape = r * c, N else: r, N = data.shape c = 1 npixels = r * c if binning > 1: # data.shape may fails with non-contiguous arrays # use reshape. data = numpy.reshape(data, [data.shape[0], data.shape[1] / binning, binning]) data = numpy.sum(data, axis=-1) N /= binning if ncomponents > N: raise ValueError("Number of components too high.") avg = numpy.sum(data, 0) / (1.0 * npixels) numpy.subtract(data, avg, data) Lanczos.LanczosNumericMatrix.tipo = dtype Lanczos.LanczosNumericVector.tipo = dtype if wrapmatrix == "single": SM = [dotblas.dot(data.T, data).astype(dtype)] SM = Lanczos.LanczosNumericMatrix(SM) else: SM = Lanczos.LanczosNumericMatrix([data.T.astype(dtype), data.astype(dtype)]) eigenvalues, eigenvectors = Lanczos.solveEigenSystem(SM, ncomponents, shift=0.0, tol=1.0e-15) SM = None numpy.add(data, avg, data) images = numpy.zeros((ncomponents, npixels), data.dtype) vectors = numpy.zeros((ncomponents, N), dtype) for i in range(ncomponents): vectors[i, :] = eigenvectors[i].vr images[i, :] = dotblas.dot(data, (eigenvectors[i].vr).astype(data.dtype)) data = None images.shape = ncomponents, r, c return images, eigenvalues, vectors
[docs]def lanczosPCA2(stack, ncomponents=10, binning=None, **kw): """ This is a fast method, but it may loose information """ if hasattr(stack, "info") and hasattr(stack, "data"): data = stack.data else: data = stack # check we have received a numpy.ndarray and not an HDF5 group # or other type of dynamically loaded data if not isinstance(data, numpy.ndarray): raise TypeError(\ "lanczosPCA2 is only supported when using numpy arrays") r, c, N = data.shape npixels = r * c # number of pixels data.shape = r * c, N if npixels < 2000: BINNING = 2 if npixels < 5000: BINNING = 4 elif npixels < 10000: BINNING = 8 elif npixels < 20000: BINNING = 10 elif npixels < 30000: BINNING = 15 elif npixels < 60000: BINNING = 20 else: BINNING = 30 if BINNING is not None: dataorig = data reminder = npixels % BINNING if reminder: data = data[0:BINNING * int(npixels / BINNING), :] data.shape = data.shape[0] / BINNING, BINNING, data.shape[1] data = numpy.swapaxes(data, 1, 2) data = numpy.sum(data, axis=-1) rc = int(r * c / BINNING) tipo = numpy.float64 neig = ncomponents + 5 # it does not create the covariance matrix but performs two multiplications rappmatrix = "doppia" # it creates the covariance matrix but performs only one multiplication rappmatrix = "singola" # calcola la media mediadata = numpy.sum(data, axis=0) / numpy.array([len(data)], data.dtype) numpy.subtract(data, mediadata, data) Lanczos.LanczosNumericMatrix.tipo = tipo Lanczos.LanczosNumericVector.tipo = tipo if rappmatrix == "singola": SM = [dotblas.dot(data.T, data).astype(tipo)] SM = Lanczos.LanczosNumericMatrix(SM) else: SM = Lanczos.LanczosNumericMatrix([data.T.astype(tipo), data.astype(tipo)]) # calculate eigenvalues and eigenvectors ev, eve = Lanczos.solveEigenSystem(SM, neig, shift=0.0, tol=1.0e-7) SM = None rc = rc * BINNING newmat = numpy.zeros((r * c, neig), numpy.float64) data = data.astype(tipo) # numpy in-place addition to make sure not intermediate copies are made numpy.add(data, mediadata, data) for i in range(neig): newmat[:, i] = dotblas.dot(dataorig, (eve[i].vr).astype(dataorig.dtype)) newcov = dotblas.dot(newmat.T, newmat) evals, evects = numpy.linalg.eigh(newcov) nuovispettri = dotblas.dot(evects, eve.vr[:neig]) images = numpy.zeros((ncomponents, npixels), data.dtype) vectors = numpy.zeros((ncomponents, N), tipo) for i in range(ncomponents): vectors[i, :] = nuovispettri[-1 - i, :] images[i, :] = dotblas.dot(newmat, evects[-1 - i].astype(dataorig.dtype)) images.shape = ncomponents, r, c return images, evals, vectors
[docs]def multipleArrayPCA(stackList, ncomponents=10, binning=None, **kw): """ Given a list of arrays, calculate the requested principal components from the matrix resulting from their column concatenation. Therefore, all the input arrays must have the same number of rows. """ stack = stackList[0] if hasattr(stack, "info") and hasattr(stack, "data"): data = stack.data else: data = stack if not isinstance(data, numpy.ndarray): raise TypeError(\ "multipleArrayPCA is only supported when using numpy arrays") if len(data.shape) == 3: r, c = data.shape[:2] npixels = r * c else: c = None r = data.shape[0] npixels = r #reshape and subtract mean to all the input data shapeList = [] avgList = [] eigenvectorLength = 0 for i in range(len(stackList)): shape = stackList[i].shape eigenvectorLength += shape[-1] shapeList.append(shape) stackList[i].shape = npixels, -1 avg = numpy.sum(stackList[i], 0) / (1.0 * npixels) numpy.subtract(stackList[i], avg, stackList[i]) avgList.append(avg) #create the needed storage space for the covariance matrix covMatrix = numpy.zeros((eigenvectorLength, eigenvectorLength), numpy.float32) rowOffset = 0 indexDict = {} for i in range(len(stackList)): iVectorLength = shapeList[i][-1] colOffset = 0 for j in range(len(stackList)): jVectorLength = shapeList[j][-1] if i <= j: covMatrix[rowOffset:(rowOffset + iVectorLength), colOffset:(colOffset + jVectorLength)] =\ dotblas.dot(stackList[i].T, stackList[j]) if i < j: key = "%02d%02d" % (i, j) indexDict[key] = (rowOffset, rowOffset + iVectorLength, colOffset, colOffset + jVectorLength) else: key = "%02d%02d" % (j, i) rowMin, rowMax, colMin, colMax = indexDict[key] covMatrix[rowOffset:(rowOffset + iVectorLength), colOffset:(colOffset + jVectorLength)] =\ covMatrix[rowMin:rowMax, colMin:colMax].T colOffset += jVectorLength rowOffset += iVectorLength indexDict = None #I have the covariance matrix, calculate the eigenvectors and eigenvalues covMatrix = [covMatrix] covMatrix = Lanczos.LanczosNumericMatrix(covMatrix) eigenvalues, evectors = Lanczos.solveEigenSystem(covMatrix, ncomponents, shift=0.0, tol=1.0e-15) covMatrix = None images = numpy.zeros((ncomponents, npixels), numpy.float32) eigenvectors = numpy.zeros((ncomponents, eigenvectorLength), numpy.float32) for i in range(ncomponents): eigenvectors[i, :] = evectors[i].vr colOffset = 0 for j in range(len(stackList)): jVectorLength = shapeList[j][-1] images[i, :] +=\ dotblas.dot(stackList[j], eigenvectors[i, colOffset:(colOffset + jVectorLength)]) colOffset += jVectorLength #restore shapes and values for i in range(len(stackList)): numpy.add(stackList[i], avgList[i], stackList[i]) stackList[i].shape = shapeList[i] if c is None: images.shape = ncomponents, r, 1 else: images.shape = ncomponents, r, c return images, eigenvalues, eigenvectors
[docs]def expectationMaximizationPCA(stack, ncomponents=10, binning=None, **kw): """ This is a fast method when the number of components is small """ if DEBUG: print("expectationMaximizationPCA") #This part is common to all ... if binning is None: binning = 1 if hasattr(stack, "info") and hasattr(stack, "data"): data = stack.data else: data = stack if len(data.shape) == 3: r, c, N = data.shape data.shape = r * c, N else: r, N = data.shape c = 1 if binning > 1: data = numpy.reshape(data, [data.shape[0], data.shape[1] / binning, binning]) data = numpy.sum(data, axis=-1) N /= binning if ncomponents > N: raise ValueError("Number of components too high.") #end of common part avg = numpy.sum(data, axis=0, dtype=numpy.float) / (1.0 * r * c) numpy.subtract(data, avg, data) dataw = data * 1 images = numpy.zeros((ncomponents, r * c), data.dtype) eigenvalues = numpy.zeros((ncomponents,), data.dtype) eigenvectors = numpy.zeros((ncomponents, N), data.dtype) for i in range(ncomponents): #generate a random vector p = numpy.random.random(N) #10 iterations seems to be fairly accurate, but it is #slow when reaching "noise" components. #A variation threshold of 1 % seems to be acceptable. tmod_old = 0 tmod = 0.02 j = 0 max_iter = 7 while ((abs(tmod - tmod_old) / tmod) > 0.01) and (j < max_iter): tmod_old = tmod t = 0.0 for k in range(r * c): t += dotblas.dot(dataw[k, :], p.T) * dataw[k, :] tmod = numpy.sqrt(numpy.sum(t * t)) p = t / tmod j += 1 eigenvectors[i, :] = p #subtract the found component from the dataset for k in range(r * c): dataw[k, :] -= dotblas.dot(dataw[k, :], p.T) * p # calculate eigenvalues via the Rayleigh Quotients: # eigenvalue = \ # (Eigenvector.T * Covariance * EigenVector)/ (Eigenvector.T * Eigenvector) for i in range(ncomponents): tmp = dotblas.dot(data, eigenvectors[i, :].T) eigenvalues[i] = \ dotblas.dot(tmp.T, tmp) / dotblas.dot(eigenvectors[i, :].T, eigenvectors[i, :]) #Generate the eigenimages for i0 in range(ncomponents): images[i0, :] = dotblas.dot(data, eigenvectors[i0, :]) #restore the original data numpy.add(data, avg, data) #reshape the images images.shape = ncomponents, r, c return images, eigenvalues, eigenvectors
[docs]def numpyPCA(stack, ncomponents=10, binning=None, **kw): """ This is a covariance method using numpy """ if DEBUG: print("PCAModule.numpyPCA called") if hasattr(stack, "info"): index = stack.info.get('McaIndex', -1) elif "index" in kw: index = kw["index"] else: print("WARNING: Assuming index is -1 in numpyPCA") index = -1 return PCATools.numpyPCA(stack, index=index, ncomponents=ncomponents, binning=binning, **kw)
[docs]def mdpPCASVDFloat32(stack, ncomponents=10, binning=None, mask=None, spectral_mask=None, **kw): return mdpPCA(stack, ncomponents, binning=binning, dtype='float32', svd='True', mask=mask, spectral_mask=spectral_mask, **kw)
[docs]def mdpPCASVDFloat64(stack, ncomponents=10, binning=None, mask=None, spectral_mask=None, **kw): return mdpPCA(stack, ncomponents, binning=binning, dtype='float64', svd='True', mask=mask, spectral_mask=spectral_mask, **kw)
[docs]def mdpICAFloat32(stack, ncomponents=10, binning=None, mask=None, spectral_mask=None, **kw): return mdpICA(stack, ncomponents, binning=binning, dtype='float32', svd='True', mask=mask, spectral_mask=spectral_mask, **kw)
[docs]def mdpICAFloat64(stack, ncomponents=10, binning=None, mask=None, spectral_mask=None, **kw): return mdpICA(stack, ncomponents, binning=binning, dtype='float64', svd='True', mask=mask, spectral_mask=spectral_mask, **kw)
[docs]def mdpPCA(stack, ncomponents=10, binning=None, dtype='float64', svd='True', mask=None, spectral_mask=None, **kw): if DEBUG: print("MDP Method") print("binning =", binning) print("dtype = ", dtype) print("svd = ", svd) for key in kw: print("mdpPCA Key ignored: %s" % key) #This part is common to all ... if binning is None: binning = 1 if hasattr(stack, "info") and hasattr(stack, "data"): data = stack.data[:] else: data = stack[:] oldShape = data.shape if len(data.shape) == 3: r, c, N = data.shape # data can be dynamically loaded if isinstance(data, numpy.ndarray): data.shape = r * c, N else: r, N = data.shape c = 1 if binning > 1: if isinstance(data, numpy.ndarray): data = numpy.reshape(data, [data.shape[0], data.shape[1] / binning, binning]) data = numpy.sum(data, axis=-1) N /= binning if ncomponents > N: if binning == 1: if data.shape != oldShape: data.shape = oldShape raise ValueError("Number of components too high.") #end of common part #begin the specific coding pca = mdp.nodes.PCANode(output_dim=ncomponents, dtype=dtype, svd=svd) shape = data.shape if len(data.shape) == 3: step = 10 if r > step: last = step * (int(r / step) - 1) for i in range(0, last, step): for j in range(step): print("Training data %d out of %d" % (i + j + 1, r)) tmpData = data[i:(i + step), :, :] if binning > 1: tmpData.shape = (step * shape[1], shape[2] / binning, binning) tmpData = numpy.sum(tmpData, axis=-1) else: tmpData.shape = step * shape[1], shape[2] if spectral_mask is None: pca.train(tmpData) else: pca.train(tmpData[:, spectral_mask > 0]) tmpData = None last = i + step else: last = 0 if binning > 1: for i in range(last, r): print("Training data %d out of %d" % (i + 1, r)) tmpData = data[i, :, :] tmpData.shape = shape[1], shape[2] / binning, binning tmpData = numpy.sum(tmpData, axis=-1) if spectral_mask is None: pca.train(tmpData) else: pca.train(tmpData[:, spectral_mask > 0]) tmpData = None else: for i in range(last, r): print("Training data %d out of %d" % (i + 1, r)) if spectral_mask is None: pca.train(data[i, :, :]) else: pca.train(data[i, :, spectral_mask > 0]) else: if data.shape[0] > 10000: step = 1000 last = step * (int(data.shape[0] / step) - 1) if spectral_mask is None: for i in range(0, last, step): print("Training data from %d to %d of %d" %\ (i + 1, i + step, data.shape[0])) pca.train(data[i:(i + step), :]) print("Training data from %d to end of %d" %\ (i + step + 1, data.shape[0])) pca.train(data[(i + step):, :]) else: for i in range(0, last, step): print("Training data from %d to %d of %d" %\ (i + 1, i + step, data.shape[0])) pca.train(data[i:(i + step), spectral_mask > 0]) # TODO i is undefined here in the print statement print("Training data from %d to end of %d" %\ (i + step + 1, data.shape[0])) pca.train(data[(i + step):, spectral_mask > 0]) elif data.shape[0] > 1000: i = int(data.shape[0] / 2) if spectral_mask is None: pca.train(data[:i, :]) else: pca.train(data[:i, spectral_mask > 0]) if DEBUG: print("Half training") if spectral_mask is None: pca.train(data[i:, :]) else: pca.train(data[i:, spectral_mask > 0]) if DEBUG: print("Full training") else: if spectral_mask is None: pca.train(data) else: pca.train(data[:, spectral_mask > 0]) pca.stop_training() # avg = pca.avg eigenvalues = pca.d eigenvectors = pca.v.T proj = pca.get_projmatrix(transposed=0) if len(data.shape) == 3: images = numpy.zeros((ncomponents, r, c), data.dtype) for i in range(r): print("Building images. Projecting data %d out of %d" % (i + 1, r)) if binning > 1: if spectral_mask is None: tmpData = data[i, :, :] else: tmpData = data[i, :, spectral_mask > 0] tmpData.shape = data.shape[1], data.shape[2] / binning, binning tmpData = numpy.sum(tmpData, axis=-1) images[:, i, :] = numpy.dot(proj.astype(data.dtype), tmpData.T) else: if spectral_mask is None: images[:, i, :] = numpy.dot(proj.astype(data.dtype), data[i, :, :].T) else: images[:, i, :] = numpy.dot(proj.astype(data.dtype), data[i, :, spectral_mask > 0].T) else: if spectral_mask is None: images = numpy.dot(proj.astype(data.dtype), data.T) else: images = numpy.dot(proj.astype(data.dtype), data[:, spectral_mask > 0].T) #make sure the shape of the original data is not modified if hasattr(stack, "info") and hasattr(stack, "data"): if stack.data.shape != oldShape: stack.data.shape = oldShape else: if stack.shape != oldShape: stack.shape = oldShape if spectral_mask is not None: eigenvectors = numpy.zeros((ncomponents, N), pca.v.dtype) for i in range(ncomponents): eigenvectors[i, spectral_mask > 0] = pca.v.T[i] #reshape the images images.shape = ncomponents, r, c return images, eigenvalues, eigenvectors
[docs]def mdpICA(stack, ncomponents=10, binning=None, dtype='float64', svd='True', mask=None, spectral_mask=None, **kw): for key in kw: print("mdpICA Key ignored: %s" % key) #This part is common to all ... if binning is None: binning = 1 if hasattr(stack, "info") and hasattr(stack, "data"): data = stack.data[:] else: data = stack[:] oldShape = data.shape if len(data.shape) == 3: r, c, N = data.shape if isinstance(data, numpy.ndarray): data.shape = r * c, N else: r, N = data.shape c = 1 if binning > 1: if isinstance(data, numpy.ndarray): data = numpy.reshape(data, [data.shape[0], data.shape[1] / binning, binning]) data = numpy.sum(data, axis=-1) N /= binning if ncomponents > N: if binning == 1: if data.shape != oldShape: data.shape = oldShape raise ValueError("Number of components too high.") if 1: if (mdp.__version__ >= 2.5): if DEBUG: print("TDSEPNone") ica = mdp.nodes.TDSEPNode(white_comp=ncomponents, verbose=False, dtype="float64", white_parm={'svd': svd}) if DEBUG: t0 = time.time() shape = data.shape if len(data.shape) == 3: if r > 10: step = 10 last = step * (int(r / step) - 1) for i in range(0, last, step): print("Training data from %d to %d out of %d" %\ (i + 1, i + step, r)) tmpData = data[i:(i + step), :, :] if binning > 1: tmpData.shape = (step * shape[1], shape[2] / binning, binning) tmpData = numpy.sum(tmpData, axis=-1) else: tmpData.shape = step * shape[1], shape[2] if spectral_mask is None: ica.train(tmpData) else: ica.train(tmpData[:, spectral_mask > 0]) tmpData = None last = i + step else: last = 0 if binning > 1: for i in range(last, r): print("Training data %d out of %d" % (i + 1, r)) tmpData = data[i, :, :] tmpData.shape = shape[1], shape[2] / binning, binning tmpData = numpy.sum(tmpData, axis=-1) if spectral_mask is None: ica.train(tmpData) else: ica.train(tmpData[:, spectral_mask > 0]) tmpData = None else: for i in range(last, r): print("Training data %d out of %d" % (i + 1, r)) if spectral_mask is None: ica.train(data[i, :, :]) else: ica.train(data[i, :, spectral_mask > 0]) else: if data.shape[0] > 10000: step = 1000 last = step * (int(data.shape[0] / step) - 1) for i in range(0, last, step): print("Training data from %d to %d of %d" %\ (i + 1, i + step, data.shape[0])) if spectral_mask is None: ica.train(data[i:(i + step), :]) else: ica.train(data[i:(i + step), spectral_mask > 0]) print("Training data from %d to end of %d" %\ (i + step + 1, data.shape[0])) if spectral_mask is None: ica.train(data[(i + step):, :]) else: ica.train(data[(i + step):, spectral_mask > 0]) elif data.shape[0] > 1000: i = int(data.shape[0] / 2) if spectral_mask is None: ica.train(data[:i, :]) else: ica.train(data[:i, spectral_mask > 0]) if DEBUG: print("Half training") if spectral_mask is None: ica.train(data[i:, :]) else: ica.train(data[i:, spectral_mask > 0]) if DEBUG: print("Full training") else: if spectral_mask is None: ica.train(data) else: ica.train(data[:, spectral_mask > 0]) ica.stop_training() if DEBUG: print("training elapsed = %f" % (time.time() - t0)) else: if 0: print("ISFANode (alike)") ica = mdp.nodes.TDSEPNode(white_comp=ncomponents, verbose=False, dtype='float64', white_parm={'svd':svd}) elif 1: if DEBUG: print("FastICANode") ica = mdp.nodes.FastICANode(white_comp=ncomponents, verbose=False, dtype=dtype) else: if DEBUG: print("CuBICANode") ica = mdp.nodes.CuBICANode(white_comp=ncomponents, verbose=False, dtype=dtype) ica.train(data) ica.stop_training() #output = ica.execute(data) proj = ica.get_projmatrix(transposed=0) # These are the PCA data eigenvalues = ica.white.d * 1 eigenvectors = ica.white.v.T * 1 vectors = numpy.zeros((ncomponents * 2, N), data.dtype) if spectral_mask is None: vectors[0:ncomponents, :] = proj * 1 # ica components? vectors[ncomponents:, :] = eigenvectors else: vectors = numpy.zeros((2 * ncomponents, N), eigenvectors.dtype) vectors[0:ncomponents, spectral_mask > 0] = proj * 1 vectors[ncomponents:, spectral_mask > 0] = eigenvectors if (len(data.shape) == 3): images = numpy.zeros((2 * ncomponents, r, c), data.dtype) for i in range(r): print("Building images. Projecting data %d out of %d" %\ (i + 1, r)) if binning > 1: if spectral_mask is None: tmpData = data[i, :, :] else: tmpData = data[i, :, spectral_mask > 0] tmpData.shape = (data.shape[1], data.shape[2] / binning, binning) tmpData = numpy.sum(tmpData, axis=-1) tmpData = ica.white.execute(tmpData) else: if spectral_mask is None: tmpData = ica.white.execute(data[i, :, :]) else: tmpData = ica.white.execute(data[i, :, spectral_mask > 0]) images[ncomponents:(2 * ncomponents), i, :] = tmpData.T[:, :] images[0:ncomponents, i, :] =\ numpy.dot(tmpData, ica.filters).T[:, :] else: images = numpy.zeros((2 * ncomponents, r * c), data.dtype) if spectral_mask is None: images[0:ncomponents, :] =\ numpy.dot(proj.astype(data.dtype), data.T) else: tmpData = data[:, spectral_mask > 0] images[0:ncomponents, :] =\ numpy.dot(proj.astype(data.dtype), tmpData.T) proj = ica.white.get_projmatrix(transposed=0) if spectral_mask is None: images[ncomponents:(2 * ncomponents), :] =\ numpy.dot(proj.astype(data.dtype), data.T) else: images[ncomponents:(2 * ncomponents), :] =\ numpy.dot(proj.astype(data.dtype), data[:, spectral_mask > 0].T) images.shape = 2 * ncomponents, r, c else: ica = mdp.nodes.FastICANode(white_comp=ncomponents, verbose=False, dtype=dtype) ica.train(data) output = ica.execute(data) proj = ica.get_projmatrix(transposed=0) # These are the PCA data # make sure no reference to the ica module is kept to make sure # memory is relased. eigenvalues = ica.white.d * 1 eigenvectors = ica.white.v.T * 1 images = numpy.zeros((2 * ncomponents, r * c), data.dtype) vectors = numpy.zeros((ncomponents * 2, N), data.dtype) vectors[0:ncomponents, :] = proj * 1 # ica components? vectors[ncomponents:, :] = eigenvectors images[0:ncomponents, :] = numpy.dot(proj.astype(data.dtype), data.T) proj = ica.white.get_projmatrix(transposed=0) images[ncomponents:(2 * ncomponents), :] =\ numpy.dot(proj.astype(data.dtype), data.T) images.shape = 2 * ncomponents, r, c if binning == 1: if data.shape != oldShape: data.shape = oldShape return images, eigenvalues, vectors
[docs]def main(): from PyMca.PyMcaIO import EDFStack from PyMca.PyMcaIO import EdfFile import sys inputfile = "D:\DATA\COTTE\ch09\ch09__mca_0005_0000_0000.edf" if len(sys.argv) > 1: inputfile = sys.argv[1] print(inputfile) elif os.path.exists(inputfile): print("Using a default test case") else: print("Usage:") print("python PCAModule.py indexed_edf_stack") sys.exit(0) stack = EDFStack.EDFStack(inputfile) r0, c0, n0 = stack.data.shape ncomponents = 5 outfile = os.path.basename(inputfile) + "ICA.edf" e0 = time.time() images, eigenvalues, eigenvectors = mdpICA(stack.data, ncomponents, binning=1, svd=True, dtype='float64') #images, eigenvalues, eigenvectors = lanczosPCA2(stack.data, # ncomponents, # binning=1) if os.path.exists(outfile): os.remove(outfile) f = EdfFile.EdfFile(outfile) for i in range(ncomponents): f.WriteImage({}, images[i, :]) stack.data.shape = r0, c0, n0 print("PCA Elapsed = %f" % (time.time() - e0)) print("eigenvectors PCA2 = ", eigenvectors[0, 200:230]) stack = None stack = EDFStack.EDFStack(inputfile) e0 = time.time() images2, eigenvalues, eigenvectors = mdpPCA(stack.data, ncomponents, binning=1) stack.data.shape = r0, c0, n0 print("MDP Elapsed = %f" % (time.time() - e0)) print("eigenvectors MDP = ", eigenvectors[0, 200:230]) if os.path.exists(outfile): os.remove(outfile) f = EdfFile.EdfFile(outfile) for i in range(ncomponents): f.WriteImage({}, images[i, :]) for i in range(ncomponents): f.WriteImage({}, images2[i, :]) f = None
if __name__ == "__main__": main()