libflame  revision_anchor
Functions
FLA_Tevd_v.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_Tevd_compute_scaling_ops (int m_A, float *buff_d, int inc_d, float *buff_e, int inc_e, float *sigma)
FLA_Error FLA_Tevd_compute_scaling_opd (int m_A, double *buff_d, int inc_d, double *buff_e, int inc_e, double *sigma)
FLA_Error FLA_Tevd_find_submatrix_ops (int m_A, int ij_begin, float *buff_d, int inc_d, float *buff_e, int inc_e, int *ijTL, int *ijBR)
FLA_Error FLA_Tevd_find_submatrix_opd (int m_A, int ij_begin, double *buff_d, int inc_d, double *buff_e, int inc_e, int *ijTL, int *ijBR)
FLA_Error FLA_Tevd_find_perfshift_ops (int m_d, int m_l, float *buff_d, int inc_d, float *buff_e, int inc_e, float *buff_l, int inc_l, int *buff_lstat, int inc_lstat, float *buff_pu, int inc_pu, int *ij_shift)
FLA_Error FLA_Tevd_find_perfshift_opd (int m_d, int m_l, double *buff_d, int inc_d, double *buff_e, int inc_e, double *buff_l, int inc_l, int *buff_lstat, int inc_lstat, double *buff_pu, int inc_pu, int *ij_shift)
FLA_Error FLA_Norm1_tridiag (FLA_Obj d, FLA_Obj e, FLA_Obj norm)
FLA_Error FLA_Norm1_tridiag_ops (int m_A, float *buff_d, int inc_d, float *buff_e, int inc_e, float *norm)
FLA_Error FLA_Norm1_tridiag_opd (int m_A, double *buff_d, int inc_d, double *buff_e, int inc_e, double *norm)
FLA_Error FLA_Tevd_v_opt_var1 (dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj U, dim_t b_alg)
FLA_Error FLA_Tevd_v_ops_var1 (int m_A, int m_U, int n_G, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, float *buff_U, int rs_U, int cs_U, int b_alg)
FLA_Error FLA_Tevd_v_opd_var1 (int m_A, int m_U, int n_G, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, double *buff_U, int rs_U, int cs_U, int b_alg)
FLA_Error FLA_Tevd_v_opc_var1 (int m_A, int m_U, int n_G, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_U, int rs_U, int cs_U, int b_alg)
FLA_Error FLA_Tevd_v_opz_var1 (int m_A, int m_U, int n_G, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_U, int rs_U, int cs_U, int b_alg)
FLA_Error FLA_Tevd_v_opt_var2 (dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj R, FLA_Obj W, FLA_Obj U, dim_t b_alg)
FLA_Error FLA_Tevd_v_ops_var2 (int m_A, int m_U, int n_G, int n_G_extra, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, float *buff_R, int rs_R, int cs_R, float *buff_W, int rs_W, int cs_W, float *buff_U, int rs_U, int cs_U, int b_alg)
FLA_Error FLA_Tevd_v_opd_var2 (int m_A, int m_U, int n_G, int n_G_extra, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, double *buff_R, int rs_R, int cs_R, double *buff_W, int rs_W, int cs_W, double *buff_U, int rs_U, int cs_U, int b_alg)
FLA_Error FLA_Tevd_v_opc_var2 (int m_A, int m_U, int n_G, int n_G_extra, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, float *buff_R, int rs_R, int cs_R, scomplex *buff_W, int rs_W, int cs_W, scomplex *buff_U, int rs_U, int cs_U, int b_alg)
FLA_Error FLA_Tevd_v_opz_var2 (int m_A, int m_U, int n_G, int n_G_extra, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, double *buff_R, int rs_R, int cs_R, dcomplex *buff_W, int rs_W, int cs_W, dcomplex *buff_U, int rs_U, int cs_U, int b_alg)

Function Documentation

References FLA_Norm1_tridiag_opd(), FLA_Norm1_tridiag_ops(), FLA_Obj_datatype(), FLA_Obj_vector_dim(), and FLA_Obj_vector_inc().

{
    FLA_Datatype datatype;
    int          m_A;
    int          inc_d;
    int          inc_e;

    datatype = FLA_Obj_datatype( d );

    m_A      = FLA_Obj_vector_dim( d );

    inc_d    = FLA_Obj_vector_inc( d );
    inc_e    = FLA_Obj_vector_inc( e );
    

    switch ( datatype )
    {
        case FLA_FLOAT:
        {
            float*    buff_d    = FLA_FLOAT_PTR( d );
            float*    buff_e    = FLA_FLOAT_PTR( e );
            float*    buff_norm = FLA_FLOAT_PTR( norm );

            FLA_Norm1_tridiag_ops( m_A,
                                   buff_d, inc_d,
                                   buff_e, inc_e,
                                   buff_norm );

            break;
        }

        case FLA_DOUBLE:
        {
            double*   buff_d    = FLA_DOUBLE_PTR( d );
            double*   buff_e    = FLA_DOUBLE_PTR( e );
            double*   buff_norm = FLA_DOUBLE_PTR( norm );

            FLA_Norm1_tridiag_opd( m_A,
                                   buff_d, inc_d,
                                   buff_e, inc_e,
                                   buff_norm );

            break;
        }
    }

    return FLA_SUCCESS;
}
FLA_Error FLA_Norm1_tridiag_opd ( int  m_A,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
double *  norm 
)

Referenced by FLA_Norm1_tridiag(), and FLA_Tevd_compute_scaling_opd().

{
    double* d  = buff_d;
    double* e  = buff_e;
    double  nm;
    int     i;

    if ( m_A == 1 )
    {
        nm = fabs( *d );
    }
    else
    {
        double d_first = d[ (0    )*inc_d ];
        double e_first = e[ (0    )*inc_e ];
        double e_last  = e[ (m_A-2)*inc_e ];
        double d_last  = d[ (m_A-1)*inc_d ];

        // Record the maximum of the absolute row/column sums for the
        // first and last row/columns.
        nm = max( fabs( d_first ) + fabs( e_first ),
                  fabs( e_last  ) + fabs( d_last  ) );

        for ( i = 1; i < m_A - 2; ++i )
        {
            double e0 = e[ (i-1)*inc_e ];
            double e1 = e[ (i  )*inc_e ];
            double d1 = d[ (i  )*inc_d ];

            // Update nm with the absolute row/column sum for the ith
            // row/column.
            nm = max( nm, fabs( e0 ) +
                          fabs( d1 ) +
                          fabs( e1 ) );
        }
    }

    *norm = nm;

    return FLA_SUCCESS;
}
FLA_Error FLA_Norm1_tridiag_ops ( int  m_A,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
float *  norm 
)

Referenced by FLA_Norm1_tridiag(), and FLA_Tevd_compute_scaling_ops().

{
    float*  d  = buff_d;
    float*  e  = buff_e;
    float   nm;
    int     i;

    if ( m_A == 1 )
    {
        nm = fabs( *d );
    }
    else
    {
        float  d_first = d[ (0    )*inc_d ];
        float  e_first = e[ (0    )*inc_e ];
        float  e_last  = e[ (m_A-2)*inc_e ];
        float  d_last  = d[ (m_A-1)*inc_d ];

        // Record the maximum of the absolute row/column sums for the
        // first and last row/columns.
        nm = max( fabs( d_first ) + fabs( e_first ),
                  fabs( e_last  ) + fabs( d_last  ) );

        for ( i = 1; i < m_A - 2; ++i )
        {
            float  e0 = e[ (i-1)*inc_e ];
            float  e1 = e[ (i  )*inc_e ];
            float  d1 = d[ (i  )*inc_d ];

            // Update nm with the absolute row/column sum for the ith
            // row/column.
            nm = max( nm, fabs( e0 ) +
                          fabs( d1 ) +
                          fabs( e1 ) );
        }
    }

    *norm = nm;

    return FLA_SUCCESS;
}
FLA_Error FLA_Tevd_compute_scaling_opd ( int  m_A,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
double *  sigma 
)

References bl1_d1(), FLA_Mach_params_opd(), and FLA_Norm1_tridiag_opd().

{
    double one   = bl1_d1();
    double three = 3.0;
    double norm;
    double eps2;
    double safmin;
    double safmax;
    double ssfmin;
    double ssfmax;

    // Query some constants.
    eps2   = FLA_Mach_params_opd( FLA_MACH_EPS2 );
    safmin = FLA_Mach_params_opd( FLA_MACH_SFMIN );
    safmax = one / safmin;

    // Compute the acceptable range for the 1-norm;
    ssfmax = sqrt( safmax ) / three;
    ssfmin = sqrt( safmin ) / eps2;

    // Compute the 1-norm of the tridiagonal matrix.
    FLA_Norm1_tridiag_opd( m_A,
                           buff_d, inc_d,
                           buff_e, inc_e,
                           &norm );

    // Compute sigma accordingly if norm is outside of the range.
    if ( norm > ssfmax )
    {
        *sigma = ssfmax / norm;
    }
    else if ( norm < ssfmin )
    {
        *sigma = ssfmin / norm;
    }
    else
    {
        *sigma = one;
    }

    return FLA_SUCCESS;
}
FLA_Error FLA_Tevd_compute_scaling_ops ( int  m_A,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
float *  sigma 
)

References bl1_s1(), FLA_Mach_params_ops(), and FLA_Norm1_tridiag_ops().

{
    float  one   = bl1_s1();
    float  three = 3.0F;
    float  norm;
    float  eps2;
    float  safmin;
    float  safmax;
    float  ssfmin;
    float  ssfmax;

    // Query some constants.
    eps2   = FLA_Mach_params_ops( FLA_MACH_EPS2 );
    safmin = FLA_Mach_params_ops( FLA_MACH_SFMIN );
    safmax = one / safmin;

    // Compute the acceptable range for the 1-norm;
    ssfmax = sqrt( safmax ) / three;
    ssfmin = sqrt( safmin ) / eps2;

    // Compute the 1-norm of the tridiagonal matrix.
    FLA_Norm1_tridiag_ops( m_A,
                           buff_d, inc_d,
                           buff_e, inc_e,
                           &norm );

    // Compute sigma accordingly if norm is outside of the range.
    if ( norm > ssfmax )
    {
        *sigma = ssfmax / norm;
    }
    else if ( norm < ssfmin )
    {
        *sigma = ssfmin / norm;
    }
    else
    {
        *sigma = one;
    }

    return FLA_SUCCESS;
}
FLA_Error FLA_Tevd_find_perfshift_opd ( int  m_d,
int  m_l,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
double *  buff_l,
int  inc_l,
int *  buff_lstat,
int  inc_lstat,
double *  buff_pu,
int  inc_pu,
int *  ij_shift 
)

References FLA_Wilkshift_tridiag_opd().

Referenced by FLA_Tevd_eigval_v_opd_var3().

{
    double* d1p;
    double* e1p;
    double* d2p;
    double  wilkshift;
    int     i;
    int     ij_cand;
    double  dist_cand;
    double  pshift_cand;
    
    d1p = buff_d + (m_d-2)*inc_d;
    e1p = buff_e + (m_d-2)*inc_e;
    d2p = buff_d + (m_d-1)*inc_d;

    if ( *buff_ls == -1 )
    {
        *ij_shift = -1;
        return FLA_FAILURE;
    }

    FLA_Wilkshift_tridiag_opd( *d1p,
                               *e1p,
                               *d2p,
                               &wilkshift );

/*
    // If we have shifted here previously, use a Wilkinson shfit.
    prev_shift = buff_pu[ (m_d-1)*inc_pu ];

    if ( prev_shift != 0.0 )
    {
        // *shift = prev_shift;
        *shift = wilkshift;
        return FLA_SUCCESS;
    }
*/
    ij_cand = -1;

    // Find an available (unused) shift.
    for ( i = 0; i < m_l; ++i )
    {
        int* status = buff_ls + (i  )*inc_ls;

        if ( *status == 0 )
        {
            double* lambda1 = buff_l + (i  )*inc_l;
            ij_cand     = i;
            pshift_cand = *lambda1;
            dist_cand   = fabs( wilkshift - pshift_cand );
        }
    }

    if ( ij_cand == -1 )
    {
        *ij_shift = -1;
        *buff_ls  = -1;
        return FLA_FAILURE;
    }

    // Now try to find a shift closer to wilkshift than the
    // first one we found.
    for ( i = 0; i < m_l; ++i )
    {
        double* lambda1 = buff_l  + (i  )*inc_l;
        int*    status  = buff_ls + (i  )*inc_ls;
        double  dist    = fabs( wilkshift - *lambda1 );

        if ( *status == 1 ) continue;

        if ( dist < dist_cand )
        {
            ij_cand = i;
            pshift_cand = *lambda1;
            dist_cand = dist;
        }
    }

    *ij_shift = ij_cand;

    return FLA_SUCCESS;
}
FLA_Error FLA_Tevd_find_perfshift_ops ( int  m_d,
int  m_l,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
float *  buff_l,
int  inc_l,
int *  buff_lstat,
int  inc_lstat,
float *  buff_pu,
int  inc_pu,
int *  ij_shift 
)
{
    FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );

    return FLA_SUCCESS;
}
FLA_Error FLA_Tevd_find_submatrix_opd ( int  m_A,
int  ij_begin,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
int *  ijTL,
int *  ijBR 
)

References bl1_d0(), and FLA_Mach_params_opd().

{
    double rzero = bl1_d0();
    double eps;
    int    ij_tl;
    int    ij_br;

    // Initialize some numerical constants.
    eps = FLA_Mach_params_opd( FLA_MACH_EPS );

    // Search for the first non-zero subdiagonal element starting at
    // the index specified by ij_begin.
    for ( ij_tl = ij_begin; ij_tl < m_A - 1; ++ij_tl )
    {
        double* d1     = buff_d + (ij_tl  )*inc_d;
        double* d2     = buff_d + (ij_tl+1)*inc_d;
        double* e1     = buff_e + (ij_tl  )*inc_e;
        double  abs_e1 = fabs( *e1 );

        // If we encounter a non-zero subdiagonal element that is close
        // enough to zero, set it to zero.
        if ( abs_e1 != rzero )
        {
            if ( abs_e1 <= eps * sqrt( fabs( *d1 ) ) *
                                 sqrt( fabs( *d2 ) ) )
            {
#ifdef PRINTF
printf( "FLA_Tevd_find_submatrix_opd: nudging non-zero subdiagonal element (e1) to zero.\n" );
printf( "                             d[%3d]        = %22.19e\n", ij_tl, *d1 );
printf( "                             e[%3d] d[%3d] = %22.19e %22.19e\n", ij_tl, ij_tl+1, *e1, *d2 );
#endif
                *e1 = rzero;
            }
        }

        // If we find a non-zero element, record it and break out of this
        // loop.
        if ( *e1 != rzero )
        {
#ifdef PRINTF
printf( "FLA_Tevd_find_submatrix_opd: found non-zero subdiagonal element\n" );
printf( "                             e[%3d] = %22.19e\n", ij_tl, *e1 );
#endif
            *ijTL = ij_tl;
            break;
        }
    }

    // If ij_tl was incremented all the way up to m_A - 1, then we didn't
    // find any non-zeros.
    if ( ij_tl == m_A - 1 )
    {
#ifdef PRINTF
printf( "FLA_Tevd_find_submatrix_opd: no submatrices found.\n" );
#endif
        return FLA_FAILURE;
    }

    // If we've gotten this far, then a non-zero subdiagonal element was
    // found. Now we must walk the remaining portion of the subdiagonal
    // to find the first zero element, or if one is not found, we simply
    // use the last element of the subdiagonal.
    for ( ij_br = ij_tl; ij_br < m_A - 1; ++ij_br )
    {
        double* d1     = buff_d + (ij_br  )*inc_d;
        double* d2     = buff_d + (ij_br+1)*inc_d;
        double* e1     = buff_e + (ij_br  )*inc_e;
        double  abs_e1 = fabs( *e1 );

        // If we encounter a non-zero subdiagonal element that is close
        // enough to zero, set it to zero.
        if ( abs_e1 != rzero )
        {
            if ( abs_e1 <= eps * sqrt( fabs( *d1 ) ) *
                                 sqrt( fabs( *d2 ) ) )
            {
#ifdef PRINTF
printf( "FLA_Tevd_find_submatrix_opd: nudging non-zero subdiagonal element (e1) to zero.\n" );
printf( "                             d[%3d]        = %22.19e\n", ij_br, *d1 );
printf( "                             e[%3d] d[%3d] = %22.19e %22.19e\n", ij_br, ij_br+1, *e1, *d2 );
#endif
                *e1 = rzero;
            }
        }

        // If we find a zero element, record it and break out of this
        // loop.
        if ( *e1 == rzero )
        {
#ifdef PRINTF
printf( "FLA_Tevd_find_submatrix_opd: found zero subdiagonal element\n" );
printf( "                             e[%3d] = %22.19e\n", ij_br, *e1 );
#endif
            break;
        }
    }

    // If a zero element was found, then ij_br should hold the index of
    // that element. If a zero element was not found, then ij_br should
    // hold m_A - 1. Either way, we save the value and return success.
    *ijBR = ij_br;

    return FLA_SUCCESS;
}
FLA_Error FLA_Tevd_find_submatrix_ops ( int  m_A,
int  ij_begin,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
int *  ijTL,
int *  ijBR 
)
{
    FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );

    return FLA_SUCCESS;
}
FLA_Error FLA_Tevd_v_opc_var1 ( int  m_A,
int  m_U,
int  n_G,
int  n_iter_max,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)

Referenced by FLA_Tevd_v_opt_var1().

{
    FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );

    return FLA_SUCCESS;
}
FLA_Error FLA_Tevd_v_opc_var2 ( int  m_A,
int  m_U,
int  n_G,
int  n_G_extra,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_R,
int  rs_R,
int  cs_R,
scomplex buff_W,
int  rs_W,
int  cs_W,
scomplex buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)

Referenced by FLA_Tevd_v_opt_var2().

{
    FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );

    return FLA_SUCCESS;
}
FLA_Error FLA_Tevd_v_opd_var1 ( int  m_A,
int  m_U,
int  n_G,
int  n_iter_max,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)

References bl1_z1(), bl1_zsetm(), FLA_Abort(), FLA_Apply_G_rf_bld_var3(), FLA_Tevd_find_submatrix_opd(), and FLA_Tevd_iteracc_v_opd_var1().

Referenced by FLA_Tevd_v_opt_var1().

{
    dcomplex  one  = bl1_z1();

    dcomplex* G;
    double*   d1;
    double*   e1;
    int       r_val;
    int       done;
    int       m_G_sweep_max;
    int       ij_begin;
    int       ijTL, ijBR;
    int       m_A11;
    int       n_iter_perf;
    int       n_U_apply;
    int       total_deflations;
    int       n_deflations;
    int       n_iter_prev;
    int       n_iter_perf_sweep_max;

    // Initialize our completion flag.
    done = FALSE;

    // Initialize a counter that holds the maximum number of rows of G
    // that we would need to initialize for the next sweep.
    m_G_sweep_max = m_A - 1;

    // Initialize a counter for the total number of iterations performed.
    n_iter_prev = 0;

    // Iterate until the matrix has completely deflated.
    for ( total_deflations = 0; done != TRUE; )
    {
        // Initialize G to contain only identity rotations.
        bl1_zsetm( m_G_sweep_max,
                   n_G,
                   &one,
                   buff_G, rs_G, cs_G );

        // Keep track of the maximum number of iterations performed in the
        // current sweep. This is used when applying the sweep's Givens
        // rotations.
        n_iter_perf_sweep_max = 0;

        // Perform a sweep: Move through the matrix and perform a tridiagonal
        // EVD on each non-zero submatrix that is encountered. During the
        // first time through, ijTL will be 0 and ijBR will be m_A - 1.
        for ( ij_begin = 0; ij_begin < m_A; )
        {

#ifdef PRINTF
if ( ij_begin == 0 )
printf( "FLA_Tevd_v_opd_var1: beginning new sweep (ij_begin = %d)\n", ij_begin );
#endif

            // Search for the first submatrix along the diagonal that is
            // bounded by zeroes (or endpoints of the matrix). If no
            // submatrix is found (ie: if the entire subdiagonal is zero
            // then FLA_FAILURE is returned. This function also inspects
            // subdiagonal elements for proximity to zero. If a given
            // element is close enough to zero, then it is deemed
            // converged and manually set to zero.
            r_val = FLA_Tevd_find_submatrix_opd( m_A,
                                                 ij_begin,
                                                 buff_d, inc_d,
                                                 buff_e, inc_e,
                                                 &ijTL,
                                                 &ijBR );

            // Verify that a submatrix was found. If one was not found,
            // then we are done with the current sweep. Furthermore, if
            // a submatrix was not found AND we began our search at the
            // beginning of the matrix (ie: ij_begin == 0), then the
            // matrix has completely deflated and so we are done with
            // Francis step iteration.
            if ( r_val == FLA_FAILURE )
            {
                if ( ij_begin == 0 )
                {
#ifdef PRINTF
printf( "FLA_Tevd_v_opd_var1: subdiagonal is completely zero.\n" );
printf( "FLA_Tevd_v_opd_var1: Francis iteration is done!\n" );
#endif
                    done = TRUE;
                }

                // Break out of the current sweep so we can apply the last
                // remaining Givens rotations.
                break;
            }

            // If we got this far, then:
            //   (a) ijTL refers to the index of the first non-zero
            //       subdiagonal along the diagonal, and
            //   (b) ijBR refers to either:
            //       - the first zero element that occurs after ijTL, or
            //       - the the last diagonal element.
            // Note that ijTL and ijBR also correspond to the first and
            // last diagonal elements of the submatrix of interest. Thus,
            // we may compute the dimension of this submatrix as:
            m_A11 = ijBR - ijTL + 1;

#ifdef PRINTF
printf( "FLA_Tevd_v_opd_var1: ij_begin = %d\n", ij_begin );
printf( "FLA_Tevd_v_opd_var1: ijTL     = %d\n", ijTL );
printf( "FLA_Tevd_v_opd_var1: ijBR     = %d\n", ijBR );
printf( "FLA_Tevd_v_opd_var1: m_A11    = %d\n", m_A11 );
#endif

            // Adjust ij_begin, which gets us ready for the next submatrix
            // search in the current sweep.
            ij_begin = ijBR + 1;

            // Index to the submatrices upon which we will operate.
            d1 = buff_d + ijTL * inc_d;
            e1 = buff_e + ijTL * inc_e;
            G  = buff_G + ijTL * rs_G;

            // Search for a batch of eigenvalues, recursing on deflated
            // subproblems whenever a split occurs. Iteration continues
            // as long as:
            //   (a) there is still matrix left to operate on, and
            //   (b) the number of iterations performed in this batch is
            //       less than n_G.
            // If/when either of the two above conditions fails to hold,
            // the function returns.
            n_deflations = FLA_Tevd_iteracc_v_opd_var1( m_A11,
                                                        n_G,
                                                        ijTL,
                                                        d1, inc_d,
                                                        e1, inc_e,
                                                        G,  rs_G, cs_G,
                                                        &n_iter_perf );

            // Record the number of deflations that were observed.
            total_deflations += n_deflations;

            // Update the maximum number of iterations performed in the
            // current sweep.
            n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );

#ifdef PRINTF
printf( "FLA_Tevd_v_opd_var1: deflations observed       = %d\n", n_deflations );
printf( "FLA_Tevd_v_opd_var1: total deflations observed = %d\n", total_deflations );
printf( "FLA_Tevd_v_opd_var1: num iterations performed  = %d\n", n_iter_perf );
#endif

            // Store the most recent value of ijBR in m_G_sweep_max.
            // When the sweep is done, this value will contain the minimum
            // number of rows of G we can apply and safely include all
            // non-identity rotations that were computed during the
            // eigenvalue searches.
            m_G_sweep_max = ijBR;

            // Make sure we haven't exceeded our maximum iteration count.
            if ( n_iter_prev >= m_A * n_iter_max )
            {
#ifdef PRINTF
printf( "FLA_Tevd_v_opd_var1: reached maximum total number of iterations: %d\n", n_iter_prev );
#endif
                FLA_Abort();
                //return FLA_FAILURE;
            }
        }

        // The sweep is complete. Now we must apply the Givens rotations
        // that were accumulated during the sweep.

        // Recall that the number of columns of U to which we apply
        // rotations is one more than the number of rotations.
        n_U_apply = m_G_sweep_max + 1;

#ifdef PRINTF
printf( "FLA_Tevd_v_opd_var1: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
#endif

        // Apply the Givens rotations. Note that we optimize the scope
        // of the operation in two ways:
        //   1. We only apply k sets of Givens rotations, where
        //      k = n_iter_perf_sweep_max. We could simply always apply
        //      n_G sets of rotations since G is initialized to contain
        //      identity rotations in every element, but we do this to
        //      save a little bit of time.
        //   2. We only apply to the first n_U_apply columns of A since
        //      this is the most we need to touch given the ijBR index
        //      bound of the last submatrix found in the previous sweep.
        //      Similar to above, we could simply always perform the
        //      application on all m_A columns of A, but instead we apply
        //      only to the first n_U_apply columns to save time.
        //FLA_Apply_G_rf_bld_var1( n_iter_perf_sweep_max,
        //FLA_Apply_G_rf_bld_var2( n_iter_perf_sweep_max,
        FLA_Apply_G_rf_bld_var3( n_iter_perf_sweep_max,
        //FLA_Apply_G_rf_bld_var9( n_iter_perf_sweep_max,
        //FLA_Apply_G_rf_bld_var6( n_iter_perf_sweep_max,
                                 m_U,
                                 n_U_apply,
                                 buff_G, rs_G, cs_G,
                                 buff_U, rs_U, cs_U,
                                 b_alg );



        // Increment the total number of iterations previously performed.
        n_iter_prev += n_iter_perf_sweep_max;

#ifdef PRINTF
printf( "FLA_Tevd_v_opd_var1: total number of iterations performed: %d\n", n_iter_prev );
#endif
    }

    return n_iter_prev;
}
FLA_Error FLA_Tevd_v_opd_var2 ( int  m_A,
int  m_U,
int  n_G,
int  n_G_extra,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_R,
int  rs_R,
int  cs_R,
double *  buff_W,
int  rs_W,
int  cs_W,
double *  buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)

References bl1_d0(), bl1_d1(), bl1_dcopymt(), bl1_dgemm(), bl1_dident(), bl1_z1(), bl1_zsetm(), BLIS1_NO_TRANSPOSE, FLA_Abort(), FLA_Apply_G_rf_bld_var3b(), FLA_Tevd_find_submatrix_opd(), and FLA_Tevd_iteracc_v_opd_var1().

Referenced by FLA_Tevd_v_opt_var2().

{
    dcomplex  one   = bl1_z1();
    double    rone  = bl1_d1();
    double    rzero = bl1_d0();

    dcomplex* G;
    double*   d1;
    double*   e1;
    int       r_val;
    int       done;
    int       m_G_sweep_max;
    int       ij_begin;
    int       ijTL, ijBR;
    int       m_A11;
    int       n_iter_perf;
    int       n_U_apply;
    int       total_deflations;
    int       n_deflations;
    int       n_iter_prev;
    int       n_iter_perf_sweep_max;

    // Initialize our completion flag.
    done = FALSE;

    // Initialize a counter that holds the maximum number of rows of G
    // that we would need to initialize for the next sweep.
    m_G_sweep_max = m_A - 1;

    // Initialize a counter for the total number of iterations performed.
    n_iter_prev = 0;

    // Initialize R to identity.
    bl1_dident( m_A,
                buff_R, rs_R, cs_R );

    // Iterate until the matrix has completely deflated.
    for ( total_deflations = 0; done != TRUE; )
    {

        // Initialize G to contain only identity rotations.
        bl1_zsetm( m_G_sweep_max,
                   n_G,
                   &one,
                   buff_G, rs_G, cs_G );

        // Keep track of the maximum number of iterations performed in the
        // current sweep. This is used when applying the sweep's Givens
        // rotations.
        n_iter_perf_sweep_max = 0;

        // Perform a sweep: Move through the matrix and perform a tridiagonal
        // EVD on each non-zero submatrix that is encountered. During the
        // first time through, ijTL will be 0 and ijBR will be m_A - 1.
        for ( ij_begin = 0; ij_begin < m_A;  )
        {

#ifdef PRINTF
if ( ij_begin == 0 )
printf( "FLA_Tevd_v_opd_var2: beginning new sweep (ij_begin = %d)\n", ij_begin );
#endif

            // Search for the first submatrix along the diagonal that is
            // bounded by zeroes (or endpoints of the matrix). If no
            // submatrix is found (ie: if the entire subdiagonal is zero
            // then FLA_FAILURE is returned. This function also inspects
            // subdiagonal elements for proximity to zero. If a given
            // element is close enough to zero, then it is deemed
            // converged and manually set to zero.
            r_val = FLA_Tevd_find_submatrix_opd( m_A,
                                                 ij_begin,
                                                 buff_d, inc_d,
                                                 buff_e, inc_e,
                                                 &ijTL,
                                                 &ijBR );

            // Verify that a submatrix was found. If one was not found,
            // then we are done with the current sweep. Furthermore, if
            // a submatrix was not found AND we began our search at the
            // beginning of the matrix (ie: ij_begin == 0), then the
            // matrix has completely deflated and so we are done with
            // Francis step iteration.
            if ( r_val == FLA_FAILURE )
            {
                if ( ij_begin == 0 )
                {
#ifdef PRINTF
printf( "FLA_Tevd_v_opd_var2: subdiagonal is completely zero.\n" );
printf( "FLA_Tevd_v_opd_var2: Francis iteration is done!\n" );
#endif
                    done = TRUE;
                }

                // Break out of the current sweep so we can apply the last
                // remaining Givens rotations.
                break;
            }

            // If we got this far, then:
            //   (a) ijTL refers to the index of the first non-zero
            //       subdiagonal along the diagonal, and
            //   (b) ijBR refers to either:
            //       - the first zero element that occurs after ijTL, or
            //       - the the last diagonal element.
            // Note that ijTL and ijBR also correspond to the first and
            // last diagonal elements of the submatrix of interest. Thus,
            // we may compute the dimension of this submatrix as:
            m_A11 = ijBR - ijTL + 1;

#ifdef PRINTF
printf( "FLA_Tevd_v_opd_var2: ij_begin = %d\n", ij_begin );
printf( "FLA_Tevd_v_opd_var2: ijTL     = %d\n", ijTL );
printf( "FLA_Tevd_v_opd_var2: ijBR     = %d\n", ijBR );
printf( "FLA_Tevd_v_opd_var2: m_A11    = %d\n", m_A11 );
#endif

            // Adjust ij_begin, which gets us ready for the next subproblem, if
            // there is one.
            ij_begin = ijBR + 1;

            // Index to the submatrices upon which we will operate.
            d1 = buff_d + ijTL * inc_d;
            e1 = buff_e + ijTL * inc_e;
            G  = buff_G + ijTL * rs_G;

            // Search for a batch of eigenvalues, recursing on deflated
            // subproblems whenever a split occurs. Iteration continues
            // as long as
            //   (a) there is still matrix left to operate on, and
            //   (b) the number of iterations performed in this batch is
            //       less than n_G.
            // If/when either of the two above conditions fails to hold,
            // the function returns.
            n_deflations = FLA_Tevd_iteracc_v_opd_var1( m_A11,
                                                        n_G,
                                                        ijTL,
                                                        d1, inc_d,
                                                        e1, inc_e,
                                                        G,  rs_G, cs_G,
                                                        &n_iter_perf );

            // Record the number of deflations that we observed.
            total_deflations += n_deflations;

            // Update the maximum number of iterations performed in the
            // current sweep.
            n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );

#ifdef PRINTF
printf( "FLA_Tevd_v_opd_var2: deflations observed       = %d\n", n_deflations );
printf( "FLA_Tevd_v_opd_var2: total deflations observed = %d\n", total_deflations );
printf( "FLA_Tevd_v_opd_var2: num iterations            = %d\n", n_iter_perf );
#endif

            // Store the most recent value of ijBR in m_G_sweep_max.
            // When the sweep is done, this value will contain the minimum
            // number of rows of G we can apply and safely include all
            // non-identity rotations that were computed during the
            // eigenvalue searches.
            m_G_sweep_max = ijBR;

            // Make sure we haven't exceeded our maximum iteration count.
            if ( n_iter_prev >= m_A * n_iter_max )
            {
#ifdef PRINTF
printf( "FLA_Tevd_v_opd_var2: reached maximum total number of iterations: %d\n", n_iter_prev );
#endif
                FLA_Abort();
                //return FLA_FAILURE;
            }
        }

        // The sweep is complete. Now we must apply the Givens rotations
        // that were accumulated during the sweep.


        // Recall that the number of columns of U to which we apply
        // rotations is one more than the number of rotations.
        n_U_apply = m_G_sweep_max + 1;

        // Apply the Givens rotations that were computed as part of
        // the previous batch of iterations.
        //FLA_Apply_G_rf_bld_var8b( n_iter_perf_sweep_max,
        //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
        FLA_Apply_G_rf_bld_var3b( n_iter_perf_sweep_max,
        //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
        //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
                                  m_U,
                                  n_U_apply,
                                  n_iter_prev,
                                  buff_G, rs_G, cs_G,
                                  buff_R, rs_R, cs_R,
                                  b_alg );

#ifdef PRINTF
printf( "FLA_Tevd_v_opd_var2: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
#endif

        // Increment the total number of iterations previously performed.
        n_iter_prev += n_iter_perf_sweep_max;
    }

    // Copy the contents of Q to temporary storage.
    bl1_dcopymt( BLIS1_NO_TRANSPOSE,
                 m_A,
                 m_A,
                 buff_U, rs_U, cs_U,
                 buff_W, rs_W, cs_W );


    // Multiply Q by R, overwriting U.
    bl1_dgemm( BLIS1_NO_TRANSPOSE,
               BLIS1_NO_TRANSPOSE,
               m_A,
               m_A,
               m_A,
               &rone,
               ( double* )buff_W, rs_W, cs_W,
                          buff_R, rs_R, cs_R,
               &rzero,
               ( double* )buff_U, rs_U, cs_U );

    return n_iter_prev;
}
FLA_Error FLA_Tevd_v_ops_var1 ( int  m_A,
int  m_U,
int  n_G,
int  n_iter_max,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)

Referenced by FLA_Tevd_v_opt_var1().

{
    FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );

    return FLA_SUCCESS;
}
FLA_Error FLA_Tevd_v_ops_var2 ( int  m_A,
int  m_U,
int  n_G,
int  n_G_extra,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_R,
int  rs_R,
int  cs_R,
float *  buff_W,
int  rs_W,
int  cs_W,
float *  buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)

Referenced by FLA_Tevd_v_opt_var2().

{
    FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );

    return FLA_SUCCESS;
}
FLA_Error FLA_Tevd_v_opt_var1 ( dim_t  n_iter_max,
FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  G,
FLA_Obj  U,
dim_t  b_alg 
)

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_dim(), FLA_Obj_vector_inc(), FLA_Obj_width(), FLA_Tevd_v_opc_var1(), FLA_Tevd_v_opd_var1(), FLA_Tevd_v_ops_var1(), and FLA_Tevd_v_opz_var1().

Referenced by FLA_Hevd_lv_unb_var1().

{
    FLA_Error    r_val = FLA_SUCCESS;
    FLA_Datatype datatype;
    int          m_A, m_U, n_G;
    int          inc_d;
    int          inc_e;
    int          rs_G, cs_G;
    int          rs_U, cs_U;

    datatype = FLA_Obj_datatype( U );

    m_A       = FLA_Obj_vector_dim( d );
    m_U       = FLA_Obj_length( U );
    n_G       = FLA_Obj_width( G );

    inc_d     = FLA_Obj_vector_inc( d );
    inc_e     = FLA_Obj_vector_inc( e );
    
    rs_G      = FLA_Obj_row_stride( G );
    cs_G      = FLA_Obj_col_stride( G );

    rs_U      = FLA_Obj_row_stride( U );
    cs_U      = FLA_Obj_col_stride( U );


    switch ( datatype )
    {
        case FLA_FLOAT:
        {
            float*    buff_d = FLA_FLOAT_PTR( d );
            float*    buff_e = FLA_FLOAT_PTR( e );
            scomplex* buff_G = FLA_COMPLEX_PTR( G );
            float*    buff_U = FLA_FLOAT_PTR( U );

            r_val = FLA_Tevd_v_ops_var1( m_A,
                                         m_U,
                                         n_G,
                                         n_iter_max,
                                         buff_d, inc_d,
                                         buff_e, inc_e,
                                         buff_G, rs_G, cs_G,
                                         buff_U, rs_U, cs_U,
                                         b_alg );

            break;
        }

        case FLA_DOUBLE:
        {
            double*   buff_d = FLA_DOUBLE_PTR( d );
            double*   buff_e = FLA_DOUBLE_PTR( e );
            dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
            double*   buff_U = FLA_DOUBLE_PTR( U );

            r_val = FLA_Tevd_v_opd_var1( m_A,
                                         m_U,
                                         n_G,
                                         n_iter_max,
                                         buff_d, inc_d,
                                         buff_e, inc_e,
                                         buff_G, rs_G, cs_G,
                                         buff_U, rs_U, cs_U,
                                         b_alg );

            break;
        }

        case FLA_COMPLEX:
        {
            float*    buff_d = FLA_FLOAT_PTR( d );
            float*    buff_e = FLA_FLOAT_PTR( e );
            scomplex* buff_G = FLA_COMPLEX_PTR( G );
            scomplex* buff_U = FLA_COMPLEX_PTR( U );

            r_val = FLA_Tevd_v_opc_var1( m_A,
                                         m_U,
                                         n_G,
                                         n_iter_max,
                                         buff_d, inc_d,
                                         buff_e, inc_e,
                                         buff_G, rs_G, cs_G,
                                         buff_U, rs_U, cs_U,
                                         b_alg );

            break;
        }

        case FLA_DOUBLE_COMPLEX:
        {
            double*   buff_d = FLA_DOUBLE_PTR( d );
            double*   buff_e = FLA_DOUBLE_PTR( e );
            dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
            dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U );

            r_val = FLA_Tevd_v_opz_var1( m_A,
                                         m_U,
                                         n_G,
                                         n_iter_max,
                                         buff_d, inc_d,
                                         buff_e, inc_e,
                                         buff_G, rs_G, cs_G,
                                         buff_U, rs_U, cs_U,
                                         b_alg );

            break;
        }
    }

    return r_val;
}
FLA_Error FLA_Tevd_v_opt_var2 ( dim_t  n_iter_max,
FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  G,
FLA_Obj  R,
FLA_Obj  W,
FLA_Obj  U,
dim_t  b_alg 
)

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_dim(), FLA_Obj_vector_inc(), FLA_Obj_width(), FLA_Tevd_v_opc_var2(), FLA_Tevd_v_opd_var2(), FLA_Tevd_v_ops_var2(), and FLA_Tevd_v_opz_var2().

Referenced by FLA_Hevd_lv_unb_var2().

{
    FLA_Error    r_val = FLA_SUCCESS;
    FLA_Datatype datatype;
    int          m_A, m_U, n_G;
    int          inc_d;
    int          inc_e;
    int          rs_G, cs_G;
    int          rs_R, cs_R;
    int          rs_U, cs_U;
    int          rs_W, cs_W;

    datatype = FLA_Obj_datatype( U );

    m_A       = FLA_Obj_vector_dim( d );
    m_U       = FLA_Obj_length( U );
    n_G       = FLA_Obj_width( G );

    inc_d     = FLA_Obj_vector_inc( d );
    inc_e     = FLA_Obj_vector_inc( e );
    
    rs_G      = FLA_Obj_row_stride( G );
    cs_G      = FLA_Obj_col_stride( G );

    rs_R      = FLA_Obj_row_stride( R );
    cs_R      = FLA_Obj_col_stride( R );

    rs_W      = FLA_Obj_row_stride( W );
    cs_W      = FLA_Obj_col_stride( W );

    rs_U      = FLA_Obj_row_stride( U );
    cs_U      = FLA_Obj_col_stride( U );


    switch ( datatype )
    {
        case FLA_FLOAT:
        {
            float*    buff_d = FLA_FLOAT_PTR( d );
            float*    buff_e = FLA_FLOAT_PTR( e );
            scomplex* buff_G = FLA_COMPLEX_PTR( G );
            float*    buff_R = FLA_FLOAT_PTR( R );
            float*    buff_W = FLA_FLOAT_PTR( W );
            float*    buff_U = FLA_FLOAT_PTR( U );

            r_val = FLA_Tevd_v_ops_var2( m_A,
                                         m_U,
                                         n_G,
                                         n_iter_max,
                                         buff_d, inc_d,
                                         buff_e, inc_e,
                                         buff_G, rs_G, cs_G,
                                         buff_R, rs_R, cs_R,
                                         buff_W, rs_W, cs_W,
                                         buff_U, rs_U, cs_U,
                                         b_alg );

            break;
        }

        case FLA_DOUBLE:
        {
            double*   buff_d = FLA_DOUBLE_PTR( d );
            double*   buff_e = FLA_DOUBLE_PTR( e );
            dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
            double*   buff_R = FLA_DOUBLE_PTR( R );
            double*   buff_W = FLA_DOUBLE_PTR( W );
            double*   buff_U = FLA_DOUBLE_PTR( U );

            r_val = FLA_Tevd_v_opd_var2( m_A,
                                         m_U,
                                         n_G,
                                         n_iter_max,
                                         buff_d, inc_d,
                                         buff_e, inc_e,
                                         buff_G, rs_G, cs_G,
                                         buff_R, rs_R, cs_R,
                                         buff_W, rs_W, cs_W,
                                         buff_U, rs_U, cs_U,
                                         b_alg );

            break;
        }

        case FLA_COMPLEX:
        {
            float*    buff_d = FLA_FLOAT_PTR( d );
            float*    buff_e = FLA_FLOAT_PTR( e );
            scomplex* buff_G = FLA_COMPLEX_PTR( G );
            float*    buff_R = FLA_FLOAT_PTR( R );
            scomplex* buff_W = FLA_COMPLEX_PTR( W );
            scomplex* buff_U = FLA_COMPLEX_PTR( U );

            r_val = FLA_Tevd_v_opc_var2( m_A,
                                         m_U,
                                         n_G,
                                         n_iter_max,
                                         buff_d, inc_d,
                                         buff_e, inc_e,
                                         buff_G, rs_G, cs_G,
                                         buff_R, rs_R, cs_R,
                                         buff_W, rs_W, cs_W,
                                         buff_U, rs_U, cs_U,
                                         b_alg );

            break;
        }

        case FLA_DOUBLE_COMPLEX:
        {
            double*   buff_d = FLA_DOUBLE_PTR( d );
            double*   buff_e = FLA_DOUBLE_PTR( e );
            dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
            double*   buff_R = FLA_DOUBLE_PTR( R );
            dcomplex* buff_W = FLA_DOUBLE_COMPLEX_PTR( W );
            dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U );

            r_val = FLA_Tevd_v_opz_var2( m_A,
                                         m_U,
                                         n_G,
                                         n_iter_max,
                                         buff_d, inc_d,
                                         buff_e, inc_e,
                                         buff_G, rs_G, cs_G,
                                         buff_R, rs_R, cs_R,
                                         buff_W, rs_W, cs_W,
                                         buff_U, rs_U, cs_U,
                                         b_alg );

            break;
        }
    }

    return r_val;
}
FLA_Error FLA_Tevd_v_opz_var1 ( int  m_A,
int  m_U,
int  n_G,
int  n_iter_max,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)

References bl1_z1(), bl1_zsetm(), FLA_Abort(), FLA_Apply_G_rf_blz_var3(), FLA_Tevd_find_submatrix_opd(), and FLA_Tevd_iteracc_v_opd_var1().

Referenced by FLA_Tevd_v_opt_var1().

{
    dcomplex  one  = bl1_z1();

    dcomplex* G;
    double*   d1;
    double*   e1;
    int       r_val;
    int       done;
    int       m_G_sweep_max;
    int       ij_begin;
    int       ijTL, ijBR;
    int       m_A11;
    int       n_iter_perf;
    int       n_U_apply;
    int       total_deflations;
    int       n_deflations;
    int       n_iter_prev;
    int       n_iter_perf_sweep_max;

    // Initialize our completion flag.
    done = FALSE;

    // Initialize a counter that holds the maximum number of rows of G
    // that we would need to initialize for the next sweep.
    m_G_sweep_max = m_A - 1;

    // Initialize a counter for the total number of iterations performed.
    n_iter_prev = 0;

    // Iterate until the matrix has completely deflated.
    for ( total_deflations = 0; done != TRUE; )
    {

        // Initialize G to contain only identity rotations.
        bl1_zsetm( m_G_sweep_max,
                   n_G,
                   &one,
                   buff_G, rs_G, cs_G );

        // Keep track of the maximum number of iterations performed in the
        // current sweep. This is used when applying the sweep's Givens
        // rotations.
        n_iter_perf_sweep_max = 0;

        // Perform a sweep: Move through the matrix and perform a tridiagonal
        // EVD on each non-zero submatrix that is encountered. During the
        // first time through, ijTL will be 0 and ijBR will be m_A - 1.
        for ( ij_begin = 0; ij_begin < m_A; )
        {

#ifdef PRINTF
if ( ij_begin == 0 )
printf( "FLA_Tevd_v_opz_var1: beginning new sweep (ij_begin = %d)\n", ij_begin );
#endif

            // Search for the first submatrix along the diagonal that is
            // bounded by zeroes (or endpoints of the matrix). If no
            // submatrix is found (ie: if the entire subdiagonal is zero
            // then FLA_FAILURE is returned. This function also inspects
            // subdiagonal elements for proximity to zero. If a given
            // element is close enough to zero, then it is deemed
            // converged and manually set to zero.
            r_val = FLA_Tevd_find_submatrix_opd( m_A,
                                                 ij_begin,
                                                 buff_d, inc_d,
                                                 buff_e, inc_e,
                                                 &ijTL,
                                                 &ijBR );

            // Verify that a submatrix was found. If one was not found,
            // then we are done with the current sweep. Furthermore, if
            // a submatrix was not found AND we began our search at the
            // beginning of the matrix (ie: ij_begin == 0), then the
            // matrix has completely deflated and so we are done with
            // Francis step iteration.
            if ( r_val == FLA_FAILURE )
            {
                if ( ij_begin == 0 )
                {
#ifdef PRINTF
printf( "FLA_Tevd_v_opz_var1: subdiagonal is completely zero.\n" );
printf( "FLA_Tevd_v_opz_var1: Francis iteration is done!\n" );
#endif
                    done = TRUE;
                }

                // Break out of the current sweep so we can apply the last
                // remaining Givens rotations.
                break;
            }

            // If we got this far, then:
            //   (a) ijTL refers to the index of the first non-zero
            //       subdiagonal along the diagonal, and
            //   (b) ijBR refers to either:
            //       - the first zero element that occurs after ijTL, or
            //       - the the last diagonal element.
            // Note that ijTL and ijBR also correspond to the first and
            // last diagonal elements of the submatrix of interest. Thus,
            // we may compute the dimension of this submatrix as:
            m_A11 = ijBR - ijTL + 1;

#ifdef PRINTF
printf( "FLA_Tevd_v_opz_var1: ij_begin = %d\n", ij_begin );
printf( "FLA_Tevd_v_opz_var1: ijTL     = %d\n", ijTL );
printf( "FLA_Tevd_v_opz_var1: ijBR     = %d\n", ijBR );
printf( "FLA_Tevd_v_opz_var1: m_A11    = %d\n", m_A11 );
#endif

            // Adjust ij_begin, which gets us ready for the next submatrix
            // search in the current sweep.
            ij_begin = ijBR + 1;

            // Index to the submatrices upon which we will operate.
            d1 = buff_d + ijTL * inc_d;
            e1 = buff_e + ijTL * inc_e;
            G  = buff_G + ijTL * rs_G;

            // Search for a batch of eigenvalues, recursing on deflated
            // subproblems whenever a split occurs. Iteration continues
            // as long as:
            //   (a) there is still matrix left to operate on, and
            //   (b) the number of iterations performed in this batch is
            //       less than n_G.
            // If/when either of the two above conditions fails to hold,
            // the function returns.
            n_deflations = FLA_Tevd_iteracc_v_opd_var1( m_A11,
                                                        n_G,
                                                        ijTL,
                                                        d1, inc_d,
                                                        e1, inc_e,
                                                        G,  rs_G, cs_G,
                                                        &n_iter_perf );

            // Record the number of deflations that were observed.
            total_deflations += n_deflations;

            // Update the maximum number of iterations performed in the
            // current sweep.
            n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );

#ifdef PRINTF
printf( "FLA_Tevd_v_opz_var1: deflations observed       = %d\n", n_deflations );
printf( "FLA_Tevd_v_opz_var1: total deflations observed = %d\n", total_deflations );
printf( "FLA_Tevd_v_opz_var1: num iterations performed  = %d\n", n_iter_perf );
#endif

            // Store the most recent value of ijBR in m_G_sweep_max.
            // When the sweep is done, this value will contain the minimum
            // number of rows of G we can apply and safely include all
            // non-identity rotations that were computed during the
            // eigenvalue searches.
            m_G_sweep_max = ijBR;

            // Make sure we haven't exceeded our maximum iteration count.
            if ( n_iter_prev >= m_A * n_iter_max )
            {
#ifdef PRINTF
printf( "FLA_Tevd_v_opz_var1: reached maximum total number of iterations: %d\n", n_iter_prev );
#endif
                FLA_Abort();
                //return FLA_FAILURE;
            }
        }

        // The sweep is complete. Now we must apply the Givens rotations
        // that were accumulated during the sweep.

        // Recall that the number of columns of U to which we apply
        // rotations is one more than the number of rotations.
        n_U_apply = m_G_sweep_max + 1;

#ifdef PRINTF
printf( "FLA_Tevd_v_opz_var1: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
#endif

        // Apply the Givens rotations. Note that we optimize the scope
        // of the operation in two ways:
        //   1. We only apply k sets of Givens rotations, where
        //      k = n_iter_perf_sweep_max. We could simply always apply
        //      n_G sets of rotations since G is initialized to contain
        //      identity rotations in every element, but we do this to
        //      save a little bit of time.
        //   2. We only apply to the first n_U_apply columns of A since
        //      this is the most we need to touch given the ijBR index
        //      bound of the last submatrix found in the previous sweep.
        //      Similar to above, we could simply always perform the
        //      application on all m_A columns of A, but instead we apply
        //      only to the first n_U_apply columns to save time.
        //FLA_Apply_G_rf_blz_var5( n_iter_perf_sweep_max,
        FLA_Apply_G_rf_blz_var3( n_iter_perf_sweep_max,
        //FLA_Apply_G_rf_blz_var9( n_iter_perf_sweep_max,
        //FLA_Apply_G_rf_blz_var6( n_iter_perf_sweep_max,
                                 m_U,
                                 n_U_apply,
                                 buff_G, rs_G, cs_G,
                                 buff_U, rs_U, cs_U,
                                 b_alg );

        // Increment the total number of iterations previously performed.
        n_iter_prev += n_iter_perf_sweep_max;

#ifdef PRINTF
printf( "FLA_Tevd_v_opz_var1: total number of iterations performed: %d\n", n_iter_prev );
#endif
    }

    return n_iter_prev;
}
FLA_Error FLA_Tevd_v_opz_var2 ( int  m_A,
int  m_U,
int  n_G,
int  n_G_extra,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_R,
int  rs_R,
int  cs_R,
dcomplex buff_W,
int  rs_W,
int  cs_W,
dcomplex buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)

References bl1_d0(), bl1_d1(), bl1_dgemm(), bl1_dident(), bl1_z1(), bl1_zcopymt(), bl1_zsetm(), BLIS1_NO_TRANSPOSE, FLA_Abort(), FLA_Apply_G_rf_bld_var3b(), FLA_Tevd_find_submatrix_opd(), and FLA_Tevd_iteracc_v_opd_var1().

Referenced by FLA_Tevd_v_opt_var2().

{
    dcomplex  one   = bl1_z1();
    double    rone  = bl1_d1();
    double    rzero = bl1_d0();

    dcomplex* G;
    double*   d1;
    double*   e1;
    int       r_val;
    int       done;
    int       m_G_sweep_max;
    int       ij_begin;
    int       ijTL, ijBR;
    int       m_A11;
    int       n_iter_perf;
    int       n_U_apply;
    int       total_deflations;
    int       n_deflations;
    int       n_iter_prev;
    int       n_iter_perf_sweep_max;

    // Initialize our completion flag.
    done = FALSE;

    // Initialize a counter that holds the maximum number of rows of G
    // that we would need to initialize for the next sweep.
    m_G_sweep_max = m_A - 1;

    // Initialize a counter for the total number of iterations performed.
    n_iter_prev = 0;

    // Initialize R to identity.
    bl1_dident( m_A,
                buff_R, rs_R, cs_R );

    // Iterate until the matrix has completely deflated.
    for ( total_deflations = 0; done != TRUE; )
    {

        // Initialize G to contain only identity rotations.
        bl1_zsetm( m_G_sweep_max,
                   n_G,
                   &one,
                   buff_G, rs_G, cs_G );

        // Keep track of the maximum number of iterations performed in the
        // current sweep. This is used when applying the sweep's Givens
        // rotations.
        n_iter_perf_sweep_max = 0;

        // Perform a sweep: Move through the matrix and perform a tridiagonal
        // EVD on each non-zero submatrix that is encountered. During the
        // first time through, ijTL will be 0 and ijBR will be m_A - 1.
        for ( ij_begin = 0; ij_begin < m_A;  )
        {

#ifdef PRINTF
if ( ij_begin == 0 )
printf( "FLA_Tevd_v_opz_var2: beginning new sweep (ij_begin = %d)\n", ij_begin );
#endif

            // Search for the first submatrix along the diagonal that is
            // bounded by zeroes (or endpoints of the matrix). If no
            // submatrix is found (ie: if the entire subdiagonal is zero
            // then FLA_FAILURE is returned. This function also inspects
            // subdiagonal elements for proximity to zero. If a given
            // element is close enough to zero, then it is deemed
            // converged and manually set to zero.
            r_val = FLA_Tevd_find_submatrix_opd( m_A,
                                                 ij_begin,
                                                 buff_d, inc_d,
                                                 buff_e, inc_e,
                                                 &ijTL,
                                                 &ijBR );

            // Verify that a submatrix was found. If one was not found,
            // then we are done with the current sweep. Furthermore, if
            // a submatrix was not found AND we began our search at the
            // beginning of the matrix (ie: ij_begin == 0), then the
            // matrix has completely deflated and so we are done with
            // Francis step iteration.
            if ( r_val == FLA_FAILURE )
            {
                if ( ij_begin == 0 )
                {
#ifdef PRINTF
printf( "FLA_Tevd_v_opz_var2: subdiagonal is completely zero.\n" );
printf( "FLA_Tevd_v_opz_var2: Francis iteration is done!\n" );
#endif
                    done = TRUE;
                }

                // Break out of the current sweep so we can apply the last
                // remaining Givens rotations.
                break;
            }

            // If we got this far, then:
            //   (a) ijTL refers to the index of the first non-zero
            //       subdiagonal along the diagonal, and
            //   (b) ijBR refers to either:
            //       - the first zero element that occurs after ijTL, or
            //       - the the last diagonal element.
            // Note that ijTL and ijBR also correspond to the first and
            // last diagonal elements of the submatrix of interest. Thus,
            // we may compute the dimension of this submatrix as:
            m_A11 = ijBR - ijTL + 1;

#ifdef PRINTF
printf( "FLA_Tevd_v_opz_var2: ij_begin = %d\n", ij_begin );
printf( "FLA_Tevd_v_opz_var2: ijTL     = %d\n", ijTL );
printf( "FLA_Tevd_v_opz_var2: ijBR     = %d\n", ijBR );
printf( "FLA_Tevd_v_opz_var2: m_A11    = %d\n", m_A11 );
#endif

            // Adjust ij_begin, which gets us ready for the next subproblem, if
            // there is one.
            ij_begin = ijBR + 1;

            // Index to the submatrices upon which we will operate.
            d1 = buff_d + ijTL * inc_d;
            e1 = buff_e + ijTL * inc_e;
            G  = buff_G + ijTL * rs_G;

            // Search for a batch of eigenvalues, recursing on deflated
            // subproblems whenever a split occurs. Iteration continues
            // as long as
            //   (a) there is still matrix left to operate on, and
            //   (b) the number of iterations performed in this batch is
            //       less than n_G.
            // If/when either of the two above conditions fails to hold,
            // the function returns.
            n_deflations = FLA_Tevd_iteracc_v_opd_var1( m_A11,
                                                        n_G,
                                                        ijTL,
                                                        d1, inc_d,
                                                        e1, inc_e,
                                                        G,  rs_G, cs_G,
                                                        &n_iter_perf );

            // Record the number of deflations that we observed.
            total_deflations += n_deflations;

            // Update the maximum number of iterations performed in the
            // current sweep.
            n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );

#ifdef PRINTF
printf( "FLA_Tevd_v_opz_var2: deflations observed       = %d\n", n_deflations );
printf( "FLA_Tevd_v_opz_var2: total deflations observed = %d\n", total_deflations );
printf( "FLA_Tevd_v_opz_var2: num iterations            = %d\n", n_iter_perf );
#endif

            // Store the most recent value of ijBR in m_G_sweep_max.
            // When the sweep is done, this value will contain the minimum
            // number of rows of G we can apply and safely include all
            // non-identity rotations that were computed during the
            // eigenvalue searches.
            m_G_sweep_max = ijBR;

            // Make sure we haven't exceeded our maximum iteration count.
            if ( n_iter_prev >= m_A * n_iter_max )
            {
#ifdef PRINTF
printf( "FLA_Tevd_v_opz_var2: reached maximum total number of iterations: %d\n", n_iter_prev );
#endif
                FLA_Abort();
                //return FLA_FAILURE;
            }
        }

        // The sweep is complete. Now we must apply the Givens rotations
        // that were accumulated during the sweep.


        // Recall that the number of columns of U to which we apply
        // rotations is one more than the number of rotations.
        n_U_apply = m_G_sweep_max + 1;

        // Apply the Givens rotations that were computed as part of
        // the previous batch of iterations.
        //FLA_Apply_G_rf_bld_var8b( n_iter_perf_sweep_max,
        //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
        FLA_Apply_G_rf_bld_var3b( n_iter_perf_sweep_max,
        //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
        //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
                                  m_U,
                                  n_U_apply,
                                  n_iter_prev,
                                  buff_G, rs_G, cs_G,
                                  buff_R, rs_R, cs_R,
                                  b_alg );

#ifdef PRINTF
printf( "FLA_Tevd_v_opz_var2: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
#endif

        // Increment the total number of iterations previously performed.
        n_iter_prev += n_iter_perf_sweep_max;
    }

    // Copy the contents of Q to temporary storage.
    bl1_zcopymt( BLIS1_NO_TRANSPOSE,
                 m_A,
                 m_A,
                 buff_U, rs_U, cs_U,
                 buff_W, rs_W, cs_W );


    // Multiply Q by R, overwriting U.
    bl1_dgemm( BLIS1_NO_TRANSPOSE,
               BLIS1_NO_TRANSPOSE,
               2*m_A,
               m_A,
               m_A,
               &rone,
               ( double* )buff_W, rs_W, 2*cs_W,
                          buff_R, rs_R,   cs_R,
               &rzero,
               ( double* )buff_U, rs_U, 2*cs_U );

    return n_iter_prev;
}