libflame
revision_anchor
|
Go to the source code of this file.
Functions | |
FLA_Error | FLA_Tevd_compute_scaling_ops (int m_A, float *buff_d, int inc_d, float *buff_e, int inc_e, float *sigma) |
FLA_Error | FLA_Tevd_compute_scaling_opd (int m_A, double *buff_d, int inc_d, double *buff_e, int inc_e, double *sigma) |
FLA_Error | FLA_Tevd_find_submatrix_ops (int m_A, int ij_begin, float *buff_d, int inc_d, float *buff_e, int inc_e, int *ijTL, int *ijBR) |
FLA_Error | FLA_Tevd_find_submatrix_opd (int m_A, int ij_begin, double *buff_d, int inc_d, double *buff_e, int inc_e, int *ijTL, int *ijBR) |
FLA_Error | FLA_Tevd_find_perfshift_ops (int m_d, int m_l, float *buff_d, int inc_d, float *buff_e, int inc_e, float *buff_l, int inc_l, int *buff_lstat, int inc_lstat, float *buff_pu, int inc_pu, int *ij_shift) |
FLA_Error | FLA_Tevd_find_perfshift_opd (int m_d, int m_l, double *buff_d, int inc_d, double *buff_e, int inc_e, double *buff_l, int inc_l, int *buff_lstat, int inc_lstat, double *buff_pu, int inc_pu, int *ij_shift) |
FLA_Error | FLA_Norm1_tridiag (FLA_Obj d, FLA_Obj e, FLA_Obj norm) |
FLA_Error | FLA_Norm1_tridiag_ops (int m_A, float *buff_d, int inc_d, float *buff_e, int inc_e, float *norm) |
FLA_Error | FLA_Norm1_tridiag_opd (int m_A, double *buff_d, int inc_d, double *buff_e, int inc_e, double *norm) |
FLA_Error | FLA_Tevd_v_opt_var1 (dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj U, dim_t b_alg) |
FLA_Error | FLA_Tevd_v_ops_var1 (int m_A, int m_U, int n_G, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, float *buff_U, int rs_U, int cs_U, int b_alg) |
FLA_Error | FLA_Tevd_v_opd_var1 (int m_A, int m_U, int n_G, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, double *buff_U, int rs_U, int cs_U, int b_alg) |
FLA_Error | FLA_Tevd_v_opc_var1 (int m_A, int m_U, int n_G, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_U, int rs_U, int cs_U, int b_alg) |
FLA_Error | FLA_Tevd_v_opz_var1 (int m_A, int m_U, int n_G, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_U, int rs_U, int cs_U, int b_alg) |
FLA_Error | FLA_Tevd_v_opt_var2 (dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj R, FLA_Obj W, FLA_Obj U, dim_t b_alg) |
FLA_Error | FLA_Tevd_v_ops_var2 (int m_A, int m_U, int n_G, int n_G_extra, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, float *buff_R, int rs_R, int cs_R, float *buff_W, int rs_W, int cs_W, float *buff_U, int rs_U, int cs_U, int b_alg) |
FLA_Error | FLA_Tevd_v_opd_var2 (int m_A, int m_U, int n_G, int n_G_extra, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, double *buff_R, int rs_R, int cs_R, double *buff_W, int rs_W, int cs_W, double *buff_U, int rs_U, int cs_U, int b_alg) |
FLA_Error | FLA_Tevd_v_opc_var2 (int m_A, int m_U, int n_G, int n_G_extra, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, float *buff_R, int rs_R, int cs_R, scomplex *buff_W, int rs_W, int cs_W, scomplex *buff_U, int rs_U, int cs_U, int b_alg) |
FLA_Error | FLA_Tevd_v_opz_var2 (int m_A, int m_U, int n_G, int n_G_extra, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, double *buff_R, int rs_R, int cs_R, dcomplex *buff_W, int rs_W, int cs_W, dcomplex *buff_U, int rs_U, int cs_U, int b_alg) |
FLA_Error FLA_Norm1_tridiag | ( | FLA_Obj | d, |
FLA_Obj | e, | ||
FLA_Obj | norm | ||
) |
References FLA_Norm1_tridiag_opd(), FLA_Norm1_tridiag_ops(), FLA_Obj_datatype(), FLA_Obj_vector_dim(), and FLA_Obj_vector_inc().
{ FLA_Datatype datatype; int m_A; int inc_d; int inc_e; datatype = FLA_Obj_datatype( d ); m_A = FLA_Obj_vector_dim( d ); inc_d = FLA_Obj_vector_inc( d ); inc_e = FLA_Obj_vector_inc( e ); switch ( datatype ) { case FLA_FLOAT: { float* buff_d = FLA_FLOAT_PTR( d ); float* buff_e = FLA_FLOAT_PTR( e ); float* buff_norm = FLA_FLOAT_PTR( norm ); FLA_Norm1_tridiag_ops( m_A, buff_d, inc_d, buff_e, inc_e, buff_norm ); break; } case FLA_DOUBLE: { double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_e = FLA_DOUBLE_PTR( e ); double* buff_norm = FLA_DOUBLE_PTR( norm ); FLA_Norm1_tridiag_opd( m_A, buff_d, inc_d, buff_e, inc_e, buff_norm ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Norm1_tridiag_opd | ( | int | m_A, |
double * | buff_d, | ||
int | inc_d, | ||
double * | buff_e, | ||
int | inc_e, | ||
double * | norm | ||
) |
Referenced by FLA_Norm1_tridiag(), and FLA_Tevd_compute_scaling_opd().
{ double* d = buff_d; double* e = buff_e; double nm; int i; if ( m_A == 1 ) { nm = fabs( *d ); } else { double d_first = d[ (0 )*inc_d ]; double e_first = e[ (0 )*inc_e ]; double e_last = e[ (m_A-2)*inc_e ]; double d_last = d[ (m_A-1)*inc_d ]; // Record the maximum of the absolute row/column sums for the // first and last row/columns. nm = max( fabs( d_first ) + fabs( e_first ), fabs( e_last ) + fabs( d_last ) ); for ( i = 1; i < m_A - 2; ++i ) { double e0 = e[ (i-1)*inc_e ]; double e1 = e[ (i )*inc_e ]; double d1 = d[ (i )*inc_d ]; // Update nm with the absolute row/column sum for the ith // row/column. nm = max( nm, fabs( e0 ) + fabs( d1 ) + fabs( e1 ) ); } } *norm = nm; return FLA_SUCCESS; }
FLA_Error FLA_Norm1_tridiag_ops | ( | int | m_A, |
float * | buff_d, | ||
int | inc_d, | ||
float * | buff_e, | ||
int | inc_e, | ||
float * | norm | ||
) |
Referenced by FLA_Norm1_tridiag(), and FLA_Tevd_compute_scaling_ops().
{ float* d = buff_d; float* e = buff_e; float nm; int i; if ( m_A == 1 ) { nm = fabs( *d ); } else { float d_first = d[ (0 )*inc_d ]; float e_first = e[ (0 )*inc_e ]; float e_last = e[ (m_A-2)*inc_e ]; float d_last = d[ (m_A-1)*inc_d ]; // Record the maximum of the absolute row/column sums for the // first and last row/columns. nm = max( fabs( d_first ) + fabs( e_first ), fabs( e_last ) + fabs( d_last ) ); for ( i = 1; i < m_A - 2; ++i ) { float e0 = e[ (i-1)*inc_e ]; float e1 = e[ (i )*inc_e ]; float d1 = d[ (i )*inc_d ]; // Update nm with the absolute row/column sum for the ith // row/column. nm = max( nm, fabs( e0 ) + fabs( d1 ) + fabs( e1 ) ); } } *norm = nm; return FLA_SUCCESS; }
FLA_Error FLA_Tevd_compute_scaling_opd | ( | int | m_A, |
double * | buff_d, | ||
int | inc_d, | ||
double * | buff_e, | ||
int | inc_e, | ||
double * | sigma | ||
) |
References bl1_d1(), FLA_Mach_params_opd(), and FLA_Norm1_tridiag_opd().
{ double one = bl1_d1(); double three = 3.0; double norm; double eps2; double safmin; double safmax; double ssfmin; double ssfmax; // Query some constants. eps2 = FLA_Mach_params_opd( FLA_MACH_EPS2 ); safmin = FLA_Mach_params_opd( FLA_MACH_SFMIN ); safmax = one / safmin; // Compute the acceptable range for the 1-norm; ssfmax = sqrt( safmax ) / three; ssfmin = sqrt( safmin ) / eps2; // Compute the 1-norm of the tridiagonal matrix. FLA_Norm1_tridiag_opd( m_A, buff_d, inc_d, buff_e, inc_e, &norm ); // Compute sigma accordingly if norm is outside of the range. if ( norm > ssfmax ) { *sigma = ssfmax / norm; } else if ( norm < ssfmin ) { *sigma = ssfmin / norm; } else { *sigma = one; } return FLA_SUCCESS; }
FLA_Error FLA_Tevd_compute_scaling_ops | ( | int | m_A, |
float * | buff_d, | ||
int | inc_d, | ||
float * | buff_e, | ||
int | inc_e, | ||
float * | sigma | ||
) |
References bl1_s1(), FLA_Mach_params_ops(), and FLA_Norm1_tridiag_ops().
{ float one = bl1_s1(); float three = 3.0F; float norm; float eps2; float safmin; float safmax; float ssfmin; float ssfmax; // Query some constants. eps2 = FLA_Mach_params_ops( FLA_MACH_EPS2 ); safmin = FLA_Mach_params_ops( FLA_MACH_SFMIN ); safmax = one / safmin; // Compute the acceptable range for the 1-norm; ssfmax = sqrt( safmax ) / three; ssfmin = sqrt( safmin ) / eps2; // Compute the 1-norm of the tridiagonal matrix. FLA_Norm1_tridiag_ops( m_A, buff_d, inc_d, buff_e, inc_e, &norm ); // Compute sigma accordingly if norm is outside of the range. if ( norm > ssfmax ) { *sigma = ssfmax / norm; } else if ( norm < ssfmin ) { *sigma = ssfmin / norm; } else { *sigma = one; } return FLA_SUCCESS; }
FLA_Error FLA_Tevd_find_perfshift_opd | ( | int | m_d, |
int | m_l, | ||
double * | buff_d, | ||
int | inc_d, | ||
double * | buff_e, | ||
int | inc_e, | ||
double * | buff_l, | ||
int | inc_l, | ||
int * | buff_lstat, | ||
int | inc_lstat, | ||
double * | buff_pu, | ||
int | inc_pu, | ||
int * | ij_shift | ||
) |
References FLA_Wilkshift_tridiag_opd().
Referenced by FLA_Tevd_eigval_v_opd_var3().
{ double* d1p; double* e1p; double* d2p; double wilkshift; int i; int ij_cand; double dist_cand; double pshift_cand; d1p = buff_d + (m_d-2)*inc_d; e1p = buff_e + (m_d-2)*inc_e; d2p = buff_d + (m_d-1)*inc_d; if ( *buff_ls == -1 ) { *ij_shift = -1; return FLA_FAILURE; } FLA_Wilkshift_tridiag_opd( *d1p, *e1p, *d2p, &wilkshift ); /* // If we have shifted here previously, use a Wilkinson shfit. prev_shift = buff_pu[ (m_d-1)*inc_pu ]; if ( prev_shift != 0.0 ) { // *shift = prev_shift; *shift = wilkshift; return FLA_SUCCESS; } */ ij_cand = -1; // Find an available (unused) shift. for ( i = 0; i < m_l; ++i ) { int* status = buff_ls + (i )*inc_ls; if ( *status == 0 ) { double* lambda1 = buff_l + (i )*inc_l; ij_cand = i; pshift_cand = *lambda1; dist_cand = fabs( wilkshift - pshift_cand ); } } if ( ij_cand == -1 ) { *ij_shift = -1; *buff_ls = -1; return FLA_FAILURE; } // Now try to find a shift closer to wilkshift than the // first one we found. for ( i = 0; i < m_l; ++i ) { double* lambda1 = buff_l + (i )*inc_l; int* status = buff_ls + (i )*inc_ls; double dist = fabs( wilkshift - *lambda1 ); if ( *status == 1 ) continue; if ( dist < dist_cand ) { ij_cand = i; pshift_cand = *lambda1; dist_cand = dist; } } *ij_shift = ij_cand; return FLA_SUCCESS; }
FLA_Error FLA_Tevd_find_perfshift_ops | ( | int | m_d, |
int | m_l, | ||
float * | buff_d, | ||
int | inc_d, | ||
float * | buff_e, | ||
int | inc_e, | ||
float * | buff_l, | ||
int | inc_l, | ||
int * | buff_lstat, | ||
int | inc_lstat, | ||
float * | buff_pu, | ||
int | inc_pu, | ||
int * | ij_shift | ||
) |
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
FLA_Error FLA_Tevd_find_submatrix_opd | ( | int | m_A, |
int | ij_begin, | ||
double * | buff_d, | ||
int | inc_d, | ||
double * | buff_e, | ||
int | inc_e, | ||
int * | ijTL, | ||
int * | ijBR | ||
) |
References bl1_d0(), and FLA_Mach_params_opd().
{ double rzero = bl1_d0(); double eps; int ij_tl; int ij_br; // Initialize some numerical constants. eps = FLA_Mach_params_opd( FLA_MACH_EPS ); // Search for the first non-zero subdiagonal element starting at // the index specified by ij_begin. for ( ij_tl = ij_begin; ij_tl < m_A - 1; ++ij_tl ) { double* d1 = buff_d + (ij_tl )*inc_d; double* d2 = buff_d + (ij_tl+1)*inc_d; double* e1 = buff_e + (ij_tl )*inc_e; double abs_e1 = fabs( *e1 ); // If we encounter a non-zero subdiagonal element that is close // enough to zero, set it to zero. if ( abs_e1 != rzero ) { if ( abs_e1 <= eps * sqrt( fabs( *d1 ) ) * sqrt( fabs( *d2 ) ) ) { #ifdef PRINTF printf( "FLA_Tevd_find_submatrix_opd: nudging non-zero subdiagonal element (e1) to zero.\n" ); printf( " d[%3d] = %22.19e\n", ij_tl, *d1 ); printf( " e[%3d] d[%3d] = %22.19e %22.19e\n", ij_tl, ij_tl+1, *e1, *d2 ); #endif *e1 = rzero; } } // If we find a non-zero element, record it and break out of this // loop. if ( *e1 != rzero ) { #ifdef PRINTF printf( "FLA_Tevd_find_submatrix_opd: found non-zero subdiagonal element\n" ); printf( " e[%3d] = %22.19e\n", ij_tl, *e1 ); #endif *ijTL = ij_tl; break; } } // If ij_tl was incremented all the way up to m_A - 1, then we didn't // find any non-zeros. if ( ij_tl == m_A - 1 ) { #ifdef PRINTF printf( "FLA_Tevd_find_submatrix_opd: no submatrices found.\n" ); #endif return FLA_FAILURE; } // If we've gotten this far, then a non-zero subdiagonal element was // found. Now we must walk the remaining portion of the subdiagonal // to find the first zero element, or if one is not found, we simply // use the last element of the subdiagonal. for ( ij_br = ij_tl; ij_br < m_A - 1; ++ij_br ) { double* d1 = buff_d + (ij_br )*inc_d; double* d2 = buff_d + (ij_br+1)*inc_d; double* e1 = buff_e + (ij_br )*inc_e; double abs_e1 = fabs( *e1 ); // If we encounter a non-zero subdiagonal element that is close // enough to zero, set it to zero. if ( abs_e1 != rzero ) { if ( abs_e1 <= eps * sqrt( fabs( *d1 ) ) * sqrt( fabs( *d2 ) ) ) { #ifdef PRINTF printf( "FLA_Tevd_find_submatrix_opd: nudging non-zero subdiagonal element (e1) to zero.\n" ); printf( " d[%3d] = %22.19e\n", ij_br, *d1 ); printf( " e[%3d] d[%3d] = %22.19e %22.19e\n", ij_br, ij_br+1, *e1, *d2 ); #endif *e1 = rzero; } } // If we find a zero element, record it and break out of this // loop. if ( *e1 == rzero ) { #ifdef PRINTF printf( "FLA_Tevd_find_submatrix_opd: found zero subdiagonal element\n" ); printf( " e[%3d] = %22.19e\n", ij_br, *e1 ); #endif break; } } // If a zero element was found, then ij_br should hold the index of // that element. If a zero element was not found, then ij_br should // hold m_A - 1. Either way, we save the value and return success. *ijBR = ij_br; return FLA_SUCCESS; }
FLA_Error FLA_Tevd_find_submatrix_ops | ( | int | m_A, |
int | ij_begin, | ||
float * | buff_d, | ||
int | inc_d, | ||
float * | buff_e, | ||
int | inc_e, | ||
int * | ijTL, | ||
int * | ijBR | ||
) |
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
FLA_Error FLA_Tevd_v_opc_var1 | ( | int | m_A, |
int | m_U, | ||
int | n_G, | ||
int | n_iter_max, | ||
float * | buff_d, | ||
int | inc_d, | ||
float * | buff_e, | ||
int | inc_e, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_U, | ||
int | rs_U, | ||
int | cs_U, | ||
int | b_alg | ||
) |
Referenced by FLA_Tevd_v_opt_var1().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
FLA_Error FLA_Tevd_v_opc_var2 | ( | int | m_A, |
int | m_U, | ||
int | n_G, | ||
int | n_G_extra, | ||
float * | buff_d, | ||
int | inc_d, | ||
float * | buff_e, | ||
int | inc_e, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_R, | ||
int | rs_R, | ||
int | cs_R, | ||
scomplex * | buff_W, | ||
int | rs_W, | ||
int | cs_W, | ||
scomplex * | buff_U, | ||
int | rs_U, | ||
int | cs_U, | ||
int | b_alg | ||
) |
Referenced by FLA_Tevd_v_opt_var2().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
FLA_Error FLA_Tevd_v_opd_var1 | ( | int | m_A, |
int | m_U, | ||
int | n_G, | ||
int | n_iter_max, | ||
double * | buff_d, | ||
int | inc_d, | ||
double * | buff_e, | ||
int | inc_e, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_U, | ||
int | rs_U, | ||
int | cs_U, | ||
int | b_alg | ||
) |
References bl1_z1(), bl1_zsetm(), FLA_Abort(), FLA_Apply_G_rf_bld_var3(), FLA_Tevd_find_submatrix_opd(), and FLA_Tevd_iteracc_v_opd_var1().
Referenced by FLA_Tevd_v_opt_var1().
{ dcomplex one = bl1_z1(); dcomplex* G; double* d1; double* e1; int r_val; int done; int m_G_sweep_max; int ij_begin; int ijTL, ijBR; int m_A11; int n_iter_perf; int n_U_apply; int total_deflations; int n_deflations; int n_iter_prev; int n_iter_perf_sweep_max; // Initialize our completion flag. done = FALSE; // Initialize a counter that holds the maximum number of rows of G // that we would need to initialize for the next sweep. m_G_sweep_max = m_A - 1; // Initialize a counter for the total number of iterations performed. n_iter_prev = 0; // Iterate until the matrix has completely deflated. for ( total_deflations = 0; done != TRUE; ) { // Initialize G to contain only identity rotations. bl1_zsetm( m_G_sweep_max, n_G, &one, buff_G, rs_G, cs_G ); // Keep track of the maximum number of iterations performed in the // current sweep. This is used when applying the sweep's Givens // rotations. n_iter_perf_sweep_max = 0; // Perform a sweep: Move through the matrix and perform a tridiagonal // EVD on each non-zero submatrix that is encountered. During the // first time through, ijTL will be 0 and ijBR will be m_A - 1. for ( ij_begin = 0; ij_begin < m_A; ) { #ifdef PRINTF if ( ij_begin == 0 ) printf( "FLA_Tevd_v_opd_var1: beginning new sweep (ij_begin = %d)\n", ij_begin ); #endif // Search for the first submatrix along the diagonal that is // bounded by zeroes (or endpoints of the matrix). If no // submatrix is found (ie: if the entire subdiagonal is zero // then FLA_FAILURE is returned. This function also inspects // subdiagonal elements for proximity to zero. If a given // element is close enough to zero, then it is deemed // converged and manually set to zero. r_val = FLA_Tevd_find_submatrix_opd( m_A, ij_begin, buff_d, inc_d, buff_e, inc_e, &ijTL, &ijBR ); // Verify that a submatrix was found. If one was not found, // then we are done with the current sweep. Furthermore, if // a submatrix was not found AND we began our search at the // beginning of the matrix (ie: ij_begin == 0), then the // matrix has completely deflated and so we are done with // Francis step iteration. if ( r_val == FLA_FAILURE ) { if ( ij_begin == 0 ) { #ifdef PRINTF printf( "FLA_Tevd_v_opd_var1: subdiagonal is completely zero.\n" ); printf( "FLA_Tevd_v_opd_var1: Francis iteration is done!\n" ); #endif done = TRUE; } // Break out of the current sweep so we can apply the last // remaining Givens rotations. break; } // If we got this far, then: // (a) ijTL refers to the index of the first non-zero // subdiagonal along the diagonal, and // (b) ijBR refers to either: // - the first zero element that occurs after ijTL, or // - the the last diagonal element. // Note that ijTL and ijBR also correspond to the first and // last diagonal elements of the submatrix of interest. Thus, // we may compute the dimension of this submatrix as: m_A11 = ijBR - ijTL + 1; #ifdef PRINTF printf( "FLA_Tevd_v_opd_var1: ij_begin = %d\n", ij_begin ); printf( "FLA_Tevd_v_opd_var1: ijTL = %d\n", ijTL ); printf( "FLA_Tevd_v_opd_var1: ijBR = %d\n", ijBR ); printf( "FLA_Tevd_v_opd_var1: m_A11 = %d\n", m_A11 ); #endif // Adjust ij_begin, which gets us ready for the next submatrix // search in the current sweep. ij_begin = ijBR + 1; // Index to the submatrices upon which we will operate. d1 = buff_d + ijTL * inc_d; e1 = buff_e + ijTL * inc_e; G = buff_G + ijTL * rs_G; // Search for a batch of eigenvalues, recursing on deflated // subproblems whenever a split occurs. Iteration continues // as long as: // (a) there is still matrix left to operate on, and // (b) the number of iterations performed in this batch is // less than n_G. // If/when either of the two above conditions fails to hold, // the function returns. n_deflations = FLA_Tevd_iteracc_v_opd_var1( m_A11, n_G, ijTL, d1, inc_d, e1, inc_e, G, rs_G, cs_G, &n_iter_perf ); // Record the number of deflations that were observed. total_deflations += n_deflations; // Update the maximum number of iterations performed in the // current sweep. n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf ); #ifdef PRINTF printf( "FLA_Tevd_v_opd_var1: deflations observed = %d\n", n_deflations ); printf( "FLA_Tevd_v_opd_var1: total deflations observed = %d\n", total_deflations ); printf( "FLA_Tevd_v_opd_var1: num iterations performed = %d\n", n_iter_perf ); #endif // Store the most recent value of ijBR in m_G_sweep_max. // When the sweep is done, this value will contain the minimum // number of rows of G we can apply and safely include all // non-identity rotations that were computed during the // eigenvalue searches. m_G_sweep_max = ijBR; // Make sure we haven't exceeded our maximum iteration count. if ( n_iter_prev >= m_A * n_iter_max ) { #ifdef PRINTF printf( "FLA_Tevd_v_opd_var1: reached maximum total number of iterations: %d\n", n_iter_prev ); #endif FLA_Abort(); //return FLA_FAILURE; } } // The sweep is complete. Now we must apply the Givens rotations // that were accumulated during the sweep. // Recall that the number of columns of U to which we apply // rotations is one more than the number of rotations. n_U_apply = m_G_sweep_max + 1; #ifdef PRINTF printf( "FLA_Tevd_v_opd_var1: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max ); #endif // Apply the Givens rotations. Note that we optimize the scope // of the operation in two ways: // 1. We only apply k sets of Givens rotations, where // k = n_iter_perf_sweep_max. We could simply always apply // n_G sets of rotations since G is initialized to contain // identity rotations in every element, but we do this to // save a little bit of time. // 2. We only apply to the first n_U_apply columns of A since // this is the most we need to touch given the ijBR index // bound of the last submatrix found in the previous sweep. // Similar to above, we could simply always perform the // application on all m_A columns of A, but instead we apply // only to the first n_U_apply columns to save time. //FLA_Apply_G_rf_bld_var1( n_iter_perf_sweep_max, //FLA_Apply_G_rf_bld_var2( n_iter_perf_sweep_max, FLA_Apply_G_rf_bld_var3( n_iter_perf_sweep_max, //FLA_Apply_G_rf_bld_var9( n_iter_perf_sweep_max, //FLA_Apply_G_rf_bld_var6( n_iter_perf_sweep_max, m_U, n_U_apply, buff_G, rs_G, cs_G, buff_U, rs_U, cs_U, b_alg ); // Increment the total number of iterations previously performed. n_iter_prev += n_iter_perf_sweep_max; #ifdef PRINTF printf( "FLA_Tevd_v_opd_var1: total number of iterations performed: %d\n", n_iter_prev ); #endif } return n_iter_prev; }
FLA_Error FLA_Tevd_v_opd_var2 | ( | int | m_A, |
int | m_U, | ||
int | n_G, | ||
int | n_G_extra, | ||
double * | buff_d, | ||
int | inc_d, | ||
double * | buff_e, | ||
int | inc_e, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_R, | ||
int | rs_R, | ||
int | cs_R, | ||
double * | buff_W, | ||
int | rs_W, | ||
int | cs_W, | ||
double * | buff_U, | ||
int | rs_U, | ||
int | cs_U, | ||
int | b_alg | ||
) |
References bl1_d0(), bl1_d1(), bl1_dcopymt(), bl1_dgemm(), bl1_dident(), bl1_z1(), bl1_zsetm(), BLIS1_NO_TRANSPOSE, FLA_Abort(), FLA_Apply_G_rf_bld_var3b(), FLA_Tevd_find_submatrix_opd(), and FLA_Tevd_iteracc_v_opd_var1().
Referenced by FLA_Tevd_v_opt_var2().
{ dcomplex one = bl1_z1(); double rone = bl1_d1(); double rzero = bl1_d0(); dcomplex* G; double* d1; double* e1; int r_val; int done; int m_G_sweep_max; int ij_begin; int ijTL, ijBR; int m_A11; int n_iter_perf; int n_U_apply; int total_deflations; int n_deflations; int n_iter_prev; int n_iter_perf_sweep_max; // Initialize our completion flag. done = FALSE; // Initialize a counter that holds the maximum number of rows of G // that we would need to initialize for the next sweep. m_G_sweep_max = m_A - 1; // Initialize a counter for the total number of iterations performed. n_iter_prev = 0; // Initialize R to identity. bl1_dident( m_A, buff_R, rs_R, cs_R ); // Iterate until the matrix has completely deflated. for ( total_deflations = 0; done != TRUE; ) { // Initialize G to contain only identity rotations. bl1_zsetm( m_G_sweep_max, n_G, &one, buff_G, rs_G, cs_G ); // Keep track of the maximum number of iterations performed in the // current sweep. This is used when applying the sweep's Givens // rotations. n_iter_perf_sweep_max = 0; // Perform a sweep: Move through the matrix and perform a tridiagonal // EVD on each non-zero submatrix that is encountered. During the // first time through, ijTL will be 0 and ijBR will be m_A - 1. for ( ij_begin = 0; ij_begin < m_A; ) { #ifdef PRINTF if ( ij_begin == 0 ) printf( "FLA_Tevd_v_opd_var2: beginning new sweep (ij_begin = %d)\n", ij_begin ); #endif // Search for the first submatrix along the diagonal that is // bounded by zeroes (or endpoints of the matrix). If no // submatrix is found (ie: if the entire subdiagonal is zero // then FLA_FAILURE is returned. This function also inspects // subdiagonal elements for proximity to zero. If a given // element is close enough to zero, then it is deemed // converged and manually set to zero. r_val = FLA_Tevd_find_submatrix_opd( m_A, ij_begin, buff_d, inc_d, buff_e, inc_e, &ijTL, &ijBR ); // Verify that a submatrix was found. If one was not found, // then we are done with the current sweep. Furthermore, if // a submatrix was not found AND we began our search at the // beginning of the matrix (ie: ij_begin == 0), then the // matrix has completely deflated and so we are done with // Francis step iteration. if ( r_val == FLA_FAILURE ) { if ( ij_begin == 0 ) { #ifdef PRINTF printf( "FLA_Tevd_v_opd_var2: subdiagonal is completely zero.\n" ); printf( "FLA_Tevd_v_opd_var2: Francis iteration is done!\n" ); #endif done = TRUE; } // Break out of the current sweep so we can apply the last // remaining Givens rotations. break; } // If we got this far, then: // (a) ijTL refers to the index of the first non-zero // subdiagonal along the diagonal, and // (b) ijBR refers to either: // - the first zero element that occurs after ijTL, or // - the the last diagonal element. // Note that ijTL and ijBR also correspond to the first and // last diagonal elements of the submatrix of interest. Thus, // we may compute the dimension of this submatrix as: m_A11 = ijBR - ijTL + 1; #ifdef PRINTF printf( "FLA_Tevd_v_opd_var2: ij_begin = %d\n", ij_begin ); printf( "FLA_Tevd_v_opd_var2: ijTL = %d\n", ijTL ); printf( "FLA_Tevd_v_opd_var2: ijBR = %d\n", ijBR ); printf( "FLA_Tevd_v_opd_var2: m_A11 = %d\n", m_A11 ); #endif // Adjust ij_begin, which gets us ready for the next subproblem, if // there is one. ij_begin = ijBR + 1; // Index to the submatrices upon which we will operate. d1 = buff_d + ijTL * inc_d; e1 = buff_e + ijTL * inc_e; G = buff_G + ijTL * rs_G; // Search for a batch of eigenvalues, recursing on deflated // subproblems whenever a split occurs. Iteration continues // as long as // (a) there is still matrix left to operate on, and // (b) the number of iterations performed in this batch is // less than n_G. // If/when either of the two above conditions fails to hold, // the function returns. n_deflations = FLA_Tevd_iteracc_v_opd_var1( m_A11, n_G, ijTL, d1, inc_d, e1, inc_e, G, rs_G, cs_G, &n_iter_perf ); // Record the number of deflations that we observed. total_deflations += n_deflations; // Update the maximum number of iterations performed in the // current sweep. n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf ); #ifdef PRINTF printf( "FLA_Tevd_v_opd_var2: deflations observed = %d\n", n_deflations ); printf( "FLA_Tevd_v_opd_var2: total deflations observed = %d\n", total_deflations ); printf( "FLA_Tevd_v_opd_var2: num iterations = %d\n", n_iter_perf ); #endif // Store the most recent value of ijBR in m_G_sweep_max. // When the sweep is done, this value will contain the minimum // number of rows of G we can apply and safely include all // non-identity rotations that were computed during the // eigenvalue searches. m_G_sweep_max = ijBR; // Make sure we haven't exceeded our maximum iteration count. if ( n_iter_prev >= m_A * n_iter_max ) { #ifdef PRINTF printf( "FLA_Tevd_v_opd_var2: reached maximum total number of iterations: %d\n", n_iter_prev ); #endif FLA_Abort(); //return FLA_FAILURE; } } // The sweep is complete. Now we must apply the Givens rotations // that were accumulated during the sweep. // Recall that the number of columns of U to which we apply // rotations is one more than the number of rotations. n_U_apply = m_G_sweep_max + 1; // Apply the Givens rotations that were computed as part of // the previous batch of iterations. //FLA_Apply_G_rf_bld_var8b( n_iter_perf_sweep_max, //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max, FLA_Apply_G_rf_bld_var3b( n_iter_perf_sweep_max, //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max, //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max, m_U, n_U_apply, n_iter_prev, buff_G, rs_G, cs_G, buff_R, rs_R, cs_R, b_alg ); #ifdef PRINTF printf( "FLA_Tevd_v_opd_var2: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max ); #endif // Increment the total number of iterations previously performed. n_iter_prev += n_iter_perf_sweep_max; } // Copy the contents of Q to temporary storage. bl1_dcopymt( BLIS1_NO_TRANSPOSE, m_A, m_A, buff_U, rs_U, cs_U, buff_W, rs_W, cs_W ); // Multiply Q by R, overwriting U. bl1_dgemm( BLIS1_NO_TRANSPOSE, BLIS1_NO_TRANSPOSE, m_A, m_A, m_A, &rone, ( double* )buff_W, rs_W, cs_W, buff_R, rs_R, cs_R, &rzero, ( double* )buff_U, rs_U, cs_U ); return n_iter_prev; }
FLA_Error FLA_Tevd_v_ops_var1 | ( | int | m_A, |
int | m_U, | ||
int | n_G, | ||
int | n_iter_max, | ||
float * | buff_d, | ||
int | inc_d, | ||
float * | buff_e, | ||
int | inc_e, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_U, | ||
int | rs_U, | ||
int | cs_U, | ||
int | b_alg | ||
) |
Referenced by FLA_Tevd_v_opt_var1().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
FLA_Error FLA_Tevd_v_ops_var2 | ( | int | m_A, |
int | m_U, | ||
int | n_G, | ||
int | n_G_extra, | ||
float * | buff_d, | ||
int | inc_d, | ||
float * | buff_e, | ||
int | inc_e, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_R, | ||
int | rs_R, | ||
int | cs_R, | ||
float * | buff_W, | ||
int | rs_W, | ||
int | cs_W, | ||
float * | buff_U, | ||
int | rs_U, | ||
int | cs_U, | ||
int | b_alg | ||
) |
Referenced by FLA_Tevd_v_opt_var2().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
FLA_Error FLA_Tevd_v_opt_var1 | ( | dim_t | n_iter_max, |
FLA_Obj | d, | ||
FLA_Obj | e, | ||
FLA_Obj | G, | ||
FLA_Obj | U, | ||
dim_t | b_alg | ||
) |
References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_dim(), FLA_Obj_vector_inc(), FLA_Obj_width(), FLA_Tevd_v_opc_var1(), FLA_Tevd_v_opd_var1(), FLA_Tevd_v_ops_var1(), and FLA_Tevd_v_opz_var1().
Referenced by FLA_Hevd_lv_unb_var1().
{ FLA_Error r_val = FLA_SUCCESS; FLA_Datatype datatype; int m_A, m_U, n_G; int inc_d; int inc_e; int rs_G, cs_G; int rs_U, cs_U; datatype = FLA_Obj_datatype( U ); m_A = FLA_Obj_vector_dim( d ); m_U = FLA_Obj_length( U ); n_G = FLA_Obj_width( G ); inc_d = FLA_Obj_vector_inc( d ); inc_e = FLA_Obj_vector_inc( e ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_U = FLA_Obj_row_stride( U ); cs_U = FLA_Obj_col_stride( U ); switch ( datatype ) { case FLA_FLOAT: { float* buff_d = FLA_FLOAT_PTR( d ); float* buff_e = FLA_FLOAT_PTR( e ); scomplex* buff_G = FLA_COMPLEX_PTR( G ); float* buff_U = FLA_FLOAT_PTR( U ); r_val = FLA_Tevd_v_ops_var1( m_A, m_U, n_G, n_iter_max, buff_d, inc_d, buff_e, inc_e, buff_G, rs_G, cs_G, buff_U, rs_U, cs_U, b_alg ); break; } case FLA_DOUBLE: { double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_e = FLA_DOUBLE_PTR( e ); dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_U = FLA_DOUBLE_PTR( U ); r_val = FLA_Tevd_v_opd_var1( m_A, m_U, n_G, n_iter_max, buff_d, inc_d, buff_e, inc_e, buff_G, rs_G, cs_G, buff_U, rs_U, cs_U, b_alg ); break; } case FLA_COMPLEX: { float* buff_d = FLA_FLOAT_PTR( d ); float* buff_e = FLA_FLOAT_PTR( e ); scomplex* buff_G = FLA_COMPLEX_PTR( G ); scomplex* buff_U = FLA_COMPLEX_PTR( U ); r_val = FLA_Tevd_v_opc_var1( m_A, m_U, n_G, n_iter_max, buff_d, inc_d, buff_e, inc_e, buff_G, rs_G, cs_G, buff_U, rs_U, cs_U, b_alg ); break; } case FLA_DOUBLE_COMPLEX: { double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_e = FLA_DOUBLE_PTR( e ); dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U ); r_val = FLA_Tevd_v_opz_var1( m_A, m_U, n_G, n_iter_max, buff_d, inc_d, buff_e, inc_e, buff_G, rs_G, cs_G, buff_U, rs_U, cs_U, b_alg ); break; } } return r_val; }
FLA_Error FLA_Tevd_v_opt_var2 | ( | dim_t | n_iter_max, |
FLA_Obj | d, | ||
FLA_Obj | e, | ||
FLA_Obj | G, | ||
FLA_Obj | R, | ||
FLA_Obj | W, | ||
FLA_Obj | U, | ||
dim_t | b_alg | ||
) |
References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_dim(), FLA_Obj_vector_inc(), FLA_Obj_width(), FLA_Tevd_v_opc_var2(), FLA_Tevd_v_opd_var2(), FLA_Tevd_v_ops_var2(), and FLA_Tevd_v_opz_var2().
Referenced by FLA_Hevd_lv_unb_var2().
{ FLA_Error r_val = FLA_SUCCESS; FLA_Datatype datatype; int m_A, m_U, n_G; int inc_d; int inc_e; int rs_G, cs_G; int rs_R, cs_R; int rs_U, cs_U; int rs_W, cs_W; datatype = FLA_Obj_datatype( U ); m_A = FLA_Obj_vector_dim( d ); m_U = FLA_Obj_length( U ); n_G = FLA_Obj_width( G ); inc_d = FLA_Obj_vector_inc( d ); inc_e = FLA_Obj_vector_inc( e ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_R = FLA_Obj_row_stride( R ); cs_R = FLA_Obj_col_stride( R ); rs_W = FLA_Obj_row_stride( W ); cs_W = FLA_Obj_col_stride( W ); rs_U = FLA_Obj_row_stride( U ); cs_U = FLA_Obj_col_stride( U ); switch ( datatype ) { case FLA_FLOAT: { float* buff_d = FLA_FLOAT_PTR( d ); float* buff_e = FLA_FLOAT_PTR( e ); scomplex* buff_G = FLA_COMPLEX_PTR( G ); float* buff_R = FLA_FLOAT_PTR( R ); float* buff_W = FLA_FLOAT_PTR( W ); float* buff_U = FLA_FLOAT_PTR( U ); r_val = FLA_Tevd_v_ops_var2( m_A, m_U, n_G, n_iter_max, buff_d, inc_d, buff_e, inc_e, buff_G, rs_G, cs_G, buff_R, rs_R, cs_R, buff_W, rs_W, cs_W, buff_U, rs_U, cs_U, b_alg ); break; } case FLA_DOUBLE: { double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_e = FLA_DOUBLE_PTR( e ); dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_R = FLA_DOUBLE_PTR( R ); double* buff_W = FLA_DOUBLE_PTR( W ); double* buff_U = FLA_DOUBLE_PTR( U ); r_val = FLA_Tevd_v_opd_var2( m_A, m_U, n_G, n_iter_max, buff_d, inc_d, buff_e, inc_e, buff_G, rs_G, cs_G, buff_R, rs_R, cs_R, buff_W, rs_W, cs_W, buff_U, rs_U, cs_U, b_alg ); break; } case FLA_COMPLEX: { float* buff_d = FLA_FLOAT_PTR( d ); float* buff_e = FLA_FLOAT_PTR( e ); scomplex* buff_G = FLA_COMPLEX_PTR( G ); float* buff_R = FLA_FLOAT_PTR( R ); scomplex* buff_W = FLA_COMPLEX_PTR( W ); scomplex* buff_U = FLA_COMPLEX_PTR( U ); r_val = FLA_Tevd_v_opc_var2( m_A, m_U, n_G, n_iter_max, buff_d, inc_d, buff_e, inc_e, buff_G, rs_G, cs_G, buff_R, rs_R, cs_R, buff_W, rs_W, cs_W, buff_U, rs_U, cs_U, b_alg ); break; } case FLA_DOUBLE_COMPLEX: { double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_e = FLA_DOUBLE_PTR( e ); dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_R = FLA_DOUBLE_PTR( R ); dcomplex* buff_W = FLA_DOUBLE_COMPLEX_PTR( W ); dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U ); r_val = FLA_Tevd_v_opz_var2( m_A, m_U, n_G, n_iter_max, buff_d, inc_d, buff_e, inc_e, buff_G, rs_G, cs_G, buff_R, rs_R, cs_R, buff_W, rs_W, cs_W, buff_U, rs_U, cs_U, b_alg ); break; } } return r_val; }
FLA_Error FLA_Tevd_v_opz_var1 | ( | int | m_A, |
int | m_U, | ||
int | n_G, | ||
int | n_iter_max, | ||
double * | buff_d, | ||
int | inc_d, | ||
double * | buff_e, | ||
int | inc_e, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_U, | ||
int | rs_U, | ||
int | cs_U, | ||
int | b_alg | ||
) |
References bl1_z1(), bl1_zsetm(), FLA_Abort(), FLA_Apply_G_rf_blz_var3(), FLA_Tevd_find_submatrix_opd(), and FLA_Tevd_iteracc_v_opd_var1().
Referenced by FLA_Tevd_v_opt_var1().
{ dcomplex one = bl1_z1(); dcomplex* G; double* d1; double* e1; int r_val; int done; int m_G_sweep_max; int ij_begin; int ijTL, ijBR; int m_A11; int n_iter_perf; int n_U_apply; int total_deflations; int n_deflations; int n_iter_prev; int n_iter_perf_sweep_max; // Initialize our completion flag. done = FALSE; // Initialize a counter that holds the maximum number of rows of G // that we would need to initialize for the next sweep. m_G_sweep_max = m_A - 1; // Initialize a counter for the total number of iterations performed. n_iter_prev = 0; // Iterate until the matrix has completely deflated. for ( total_deflations = 0; done != TRUE; ) { // Initialize G to contain only identity rotations. bl1_zsetm( m_G_sweep_max, n_G, &one, buff_G, rs_G, cs_G ); // Keep track of the maximum number of iterations performed in the // current sweep. This is used when applying the sweep's Givens // rotations. n_iter_perf_sweep_max = 0; // Perform a sweep: Move through the matrix and perform a tridiagonal // EVD on each non-zero submatrix that is encountered. During the // first time through, ijTL will be 0 and ijBR will be m_A - 1. for ( ij_begin = 0; ij_begin < m_A; ) { #ifdef PRINTF if ( ij_begin == 0 ) printf( "FLA_Tevd_v_opz_var1: beginning new sweep (ij_begin = %d)\n", ij_begin ); #endif // Search for the first submatrix along the diagonal that is // bounded by zeroes (or endpoints of the matrix). If no // submatrix is found (ie: if the entire subdiagonal is zero // then FLA_FAILURE is returned. This function also inspects // subdiagonal elements for proximity to zero. If a given // element is close enough to zero, then it is deemed // converged and manually set to zero. r_val = FLA_Tevd_find_submatrix_opd( m_A, ij_begin, buff_d, inc_d, buff_e, inc_e, &ijTL, &ijBR ); // Verify that a submatrix was found. If one was not found, // then we are done with the current sweep. Furthermore, if // a submatrix was not found AND we began our search at the // beginning of the matrix (ie: ij_begin == 0), then the // matrix has completely deflated and so we are done with // Francis step iteration. if ( r_val == FLA_FAILURE ) { if ( ij_begin == 0 ) { #ifdef PRINTF printf( "FLA_Tevd_v_opz_var1: subdiagonal is completely zero.\n" ); printf( "FLA_Tevd_v_opz_var1: Francis iteration is done!\n" ); #endif done = TRUE; } // Break out of the current sweep so we can apply the last // remaining Givens rotations. break; } // If we got this far, then: // (a) ijTL refers to the index of the first non-zero // subdiagonal along the diagonal, and // (b) ijBR refers to either: // - the first zero element that occurs after ijTL, or // - the the last diagonal element. // Note that ijTL and ijBR also correspond to the first and // last diagonal elements of the submatrix of interest. Thus, // we may compute the dimension of this submatrix as: m_A11 = ijBR - ijTL + 1; #ifdef PRINTF printf( "FLA_Tevd_v_opz_var1: ij_begin = %d\n", ij_begin ); printf( "FLA_Tevd_v_opz_var1: ijTL = %d\n", ijTL ); printf( "FLA_Tevd_v_opz_var1: ijBR = %d\n", ijBR ); printf( "FLA_Tevd_v_opz_var1: m_A11 = %d\n", m_A11 ); #endif // Adjust ij_begin, which gets us ready for the next submatrix // search in the current sweep. ij_begin = ijBR + 1; // Index to the submatrices upon which we will operate. d1 = buff_d + ijTL * inc_d; e1 = buff_e + ijTL * inc_e; G = buff_G + ijTL * rs_G; // Search for a batch of eigenvalues, recursing on deflated // subproblems whenever a split occurs. Iteration continues // as long as: // (a) there is still matrix left to operate on, and // (b) the number of iterations performed in this batch is // less than n_G. // If/when either of the two above conditions fails to hold, // the function returns. n_deflations = FLA_Tevd_iteracc_v_opd_var1( m_A11, n_G, ijTL, d1, inc_d, e1, inc_e, G, rs_G, cs_G, &n_iter_perf ); // Record the number of deflations that were observed. total_deflations += n_deflations; // Update the maximum number of iterations performed in the // current sweep. n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf ); #ifdef PRINTF printf( "FLA_Tevd_v_opz_var1: deflations observed = %d\n", n_deflations ); printf( "FLA_Tevd_v_opz_var1: total deflations observed = %d\n", total_deflations ); printf( "FLA_Tevd_v_opz_var1: num iterations performed = %d\n", n_iter_perf ); #endif // Store the most recent value of ijBR in m_G_sweep_max. // When the sweep is done, this value will contain the minimum // number of rows of G we can apply and safely include all // non-identity rotations that were computed during the // eigenvalue searches. m_G_sweep_max = ijBR; // Make sure we haven't exceeded our maximum iteration count. if ( n_iter_prev >= m_A * n_iter_max ) { #ifdef PRINTF printf( "FLA_Tevd_v_opz_var1: reached maximum total number of iterations: %d\n", n_iter_prev ); #endif FLA_Abort(); //return FLA_FAILURE; } } // The sweep is complete. Now we must apply the Givens rotations // that were accumulated during the sweep. // Recall that the number of columns of U to which we apply // rotations is one more than the number of rotations. n_U_apply = m_G_sweep_max + 1; #ifdef PRINTF printf( "FLA_Tevd_v_opz_var1: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max ); #endif // Apply the Givens rotations. Note that we optimize the scope // of the operation in two ways: // 1. We only apply k sets of Givens rotations, where // k = n_iter_perf_sweep_max. We could simply always apply // n_G sets of rotations since G is initialized to contain // identity rotations in every element, but we do this to // save a little bit of time. // 2. We only apply to the first n_U_apply columns of A since // this is the most we need to touch given the ijBR index // bound of the last submatrix found in the previous sweep. // Similar to above, we could simply always perform the // application on all m_A columns of A, but instead we apply // only to the first n_U_apply columns to save time. //FLA_Apply_G_rf_blz_var5( n_iter_perf_sweep_max, FLA_Apply_G_rf_blz_var3( n_iter_perf_sweep_max, //FLA_Apply_G_rf_blz_var9( n_iter_perf_sweep_max, //FLA_Apply_G_rf_blz_var6( n_iter_perf_sweep_max, m_U, n_U_apply, buff_G, rs_G, cs_G, buff_U, rs_U, cs_U, b_alg ); // Increment the total number of iterations previously performed. n_iter_prev += n_iter_perf_sweep_max; #ifdef PRINTF printf( "FLA_Tevd_v_opz_var1: total number of iterations performed: %d\n", n_iter_prev ); #endif } return n_iter_prev; }
FLA_Error FLA_Tevd_v_opz_var2 | ( | int | m_A, |
int | m_U, | ||
int | n_G, | ||
int | n_G_extra, | ||
double * | buff_d, | ||
int | inc_d, | ||
double * | buff_e, | ||
int | inc_e, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_R, | ||
int | rs_R, | ||
int | cs_R, | ||
dcomplex * | buff_W, | ||
int | rs_W, | ||
int | cs_W, | ||
dcomplex * | buff_U, | ||
int | rs_U, | ||
int | cs_U, | ||
int | b_alg | ||
) |
References bl1_d0(), bl1_d1(), bl1_dgemm(), bl1_dident(), bl1_z1(), bl1_zcopymt(), bl1_zsetm(), BLIS1_NO_TRANSPOSE, FLA_Abort(), FLA_Apply_G_rf_bld_var3b(), FLA_Tevd_find_submatrix_opd(), and FLA_Tevd_iteracc_v_opd_var1().
Referenced by FLA_Tevd_v_opt_var2().
{ dcomplex one = bl1_z1(); double rone = bl1_d1(); double rzero = bl1_d0(); dcomplex* G; double* d1; double* e1; int r_val; int done; int m_G_sweep_max; int ij_begin; int ijTL, ijBR; int m_A11; int n_iter_perf; int n_U_apply; int total_deflations; int n_deflations; int n_iter_prev; int n_iter_perf_sweep_max; // Initialize our completion flag. done = FALSE; // Initialize a counter that holds the maximum number of rows of G // that we would need to initialize for the next sweep. m_G_sweep_max = m_A - 1; // Initialize a counter for the total number of iterations performed. n_iter_prev = 0; // Initialize R to identity. bl1_dident( m_A, buff_R, rs_R, cs_R ); // Iterate until the matrix has completely deflated. for ( total_deflations = 0; done != TRUE; ) { // Initialize G to contain only identity rotations. bl1_zsetm( m_G_sweep_max, n_G, &one, buff_G, rs_G, cs_G ); // Keep track of the maximum number of iterations performed in the // current sweep. This is used when applying the sweep's Givens // rotations. n_iter_perf_sweep_max = 0; // Perform a sweep: Move through the matrix and perform a tridiagonal // EVD on each non-zero submatrix that is encountered. During the // first time through, ijTL will be 0 and ijBR will be m_A - 1. for ( ij_begin = 0; ij_begin < m_A; ) { #ifdef PRINTF if ( ij_begin == 0 ) printf( "FLA_Tevd_v_opz_var2: beginning new sweep (ij_begin = %d)\n", ij_begin ); #endif // Search for the first submatrix along the diagonal that is // bounded by zeroes (or endpoints of the matrix). If no // submatrix is found (ie: if the entire subdiagonal is zero // then FLA_FAILURE is returned. This function also inspects // subdiagonal elements for proximity to zero. If a given // element is close enough to zero, then it is deemed // converged and manually set to zero. r_val = FLA_Tevd_find_submatrix_opd( m_A, ij_begin, buff_d, inc_d, buff_e, inc_e, &ijTL, &ijBR ); // Verify that a submatrix was found. If one was not found, // then we are done with the current sweep. Furthermore, if // a submatrix was not found AND we began our search at the // beginning of the matrix (ie: ij_begin == 0), then the // matrix has completely deflated and so we are done with // Francis step iteration. if ( r_val == FLA_FAILURE ) { if ( ij_begin == 0 ) { #ifdef PRINTF printf( "FLA_Tevd_v_opz_var2: subdiagonal is completely zero.\n" ); printf( "FLA_Tevd_v_opz_var2: Francis iteration is done!\n" ); #endif done = TRUE; } // Break out of the current sweep so we can apply the last // remaining Givens rotations. break; } // If we got this far, then: // (a) ijTL refers to the index of the first non-zero // subdiagonal along the diagonal, and // (b) ijBR refers to either: // - the first zero element that occurs after ijTL, or // - the the last diagonal element. // Note that ijTL and ijBR also correspond to the first and // last diagonal elements of the submatrix of interest. Thus, // we may compute the dimension of this submatrix as: m_A11 = ijBR - ijTL + 1; #ifdef PRINTF printf( "FLA_Tevd_v_opz_var2: ij_begin = %d\n", ij_begin ); printf( "FLA_Tevd_v_opz_var2: ijTL = %d\n", ijTL ); printf( "FLA_Tevd_v_opz_var2: ijBR = %d\n", ijBR ); printf( "FLA_Tevd_v_opz_var2: m_A11 = %d\n", m_A11 ); #endif // Adjust ij_begin, which gets us ready for the next subproblem, if // there is one. ij_begin = ijBR + 1; // Index to the submatrices upon which we will operate. d1 = buff_d + ijTL * inc_d; e1 = buff_e + ijTL * inc_e; G = buff_G + ijTL * rs_G; // Search for a batch of eigenvalues, recursing on deflated // subproblems whenever a split occurs. Iteration continues // as long as // (a) there is still matrix left to operate on, and // (b) the number of iterations performed in this batch is // less than n_G. // If/when either of the two above conditions fails to hold, // the function returns. n_deflations = FLA_Tevd_iteracc_v_opd_var1( m_A11, n_G, ijTL, d1, inc_d, e1, inc_e, G, rs_G, cs_G, &n_iter_perf ); // Record the number of deflations that we observed. total_deflations += n_deflations; // Update the maximum number of iterations performed in the // current sweep. n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf ); #ifdef PRINTF printf( "FLA_Tevd_v_opz_var2: deflations observed = %d\n", n_deflations ); printf( "FLA_Tevd_v_opz_var2: total deflations observed = %d\n", total_deflations ); printf( "FLA_Tevd_v_opz_var2: num iterations = %d\n", n_iter_perf ); #endif // Store the most recent value of ijBR in m_G_sweep_max. // When the sweep is done, this value will contain the minimum // number of rows of G we can apply and safely include all // non-identity rotations that were computed during the // eigenvalue searches. m_G_sweep_max = ijBR; // Make sure we haven't exceeded our maximum iteration count. if ( n_iter_prev >= m_A * n_iter_max ) { #ifdef PRINTF printf( "FLA_Tevd_v_opz_var2: reached maximum total number of iterations: %d\n", n_iter_prev ); #endif FLA_Abort(); //return FLA_FAILURE; } } // The sweep is complete. Now we must apply the Givens rotations // that were accumulated during the sweep. // Recall that the number of columns of U to which we apply // rotations is one more than the number of rotations. n_U_apply = m_G_sweep_max + 1; // Apply the Givens rotations that were computed as part of // the previous batch of iterations. //FLA_Apply_G_rf_bld_var8b( n_iter_perf_sweep_max, //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max, FLA_Apply_G_rf_bld_var3b( n_iter_perf_sweep_max, //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max, //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max, m_U, n_U_apply, n_iter_prev, buff_G, rs_G, cs_G, buff_R, rs_R, cs_R, b_alg ); #ifdef PRINTF printf( "FLA_Tevd_v_opz_var2: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max ); #endif // Increment the total number of iterations previously performed. n_iter_prev += n_iter_perf_sweep_max; } // Copy the contents of Q to temporary storage. bl1_zcopymt( BLIS1_NO_TRANSPOSE, m_A, m_A, buff_U, rs_U, cs_U, buff_W, rs_W, cs_W ); // Multiply Q by R, overwriting U. bl1_dgemm( BLIS1_NO_TRANSPOSE, BLIS1_NO_TRANSPOSE, 2*m_A, m_A, m_A, &rone, ( double* )buff_W, rs_W, 2*cs_W, buff_R, rs_R, cs_R, &rzero, ( double* )buff_U, rs_U, 2*cs_U ); return n_iter_prev; }