libflame
revision_anchor
|
Functions | |
FLA_Error | FLA_Bsvd_ext_opt_var1 (dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj H, FLA_Svd_type jobu, FLA_Obj U, FLA_Svd_type jobv, FLA_Obj V, FLA_Bool apply_Uh2C, FLA_Obj C, dim_t b_alg) |
FLA_Error | FLA_Bsvd_ext_ops_var1 (int m_d, int m_U, int m_V, int m_C, int n_C, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_U, int rs_U, int cs_U, float *buff_V, int rs_V, int cs_V, float *buff_C, int rs_C, int cs_C, int b_alg) |
FLA_Error | FLA_Bsvd_ext_opd_var1 (int m_d, int m_U, int m_V, int m_C, int n_C, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_U, int rs_U, int cs_U, double *buff_V, int rs_V, int cs_V, double *buff_C, int rs_C, int cs_C, int b_alg) |
FLA_Error | FLA_Bsvd_ext_opc_var1 (int m_d, int m_U, int m_V, int m_C, int n_C, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_V, int rs_V, int cs_V, scomplex *buff_C, int rs_C, int cs_C, int b_alg) |
FLA_Error | FLA_Bsvd_ext_opz_var1 (int m_d, int m_U, int m_V, int m_C, int n_C, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_V, int rs_V, int cs_V, dcomplex *buff_C, int rs_C, int cs_C, int b_alg) |
FLA_Error FLA_Bsvd_ext_opc_var1 | ( | int | m_d, |
int | m_U, | ||
int | m_V, | ||
int | m_C, | ||
int | n_C, | ||
int | n_GH, | ||
int | n_iter_max, | ||
float * | buff_d, | ||
int | inc_d, | ||
float * | buff_e, | ||
int | inc_e, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_H, | ||
int | rs_H, | ||
int | cs_H, | ||
scomplex * | buff_U, | ||
int | rs_U, | ||
int | cs_U, | ||
scomplex * | buff_V, | ||
int | rs_V, | ||
int | cs_V, | ||
scomplex * | buff_C, | ||
int | rs_C, | ||
int | cs_C, | ||
int | b_alg | ||
) |
References bl1_c1(), bl1_csetm(), bl1_csscalv(), bl1_s0(), bl1_sm1(), BLIS1_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_blc_var3(), FLA_Bsvd_compute_tol_thresh_ops(), FLA_Bsvd_find_submatrix_ops(), FLA_Bsvd_iteracc_v_ops_var1(), and FLA_Mach_params_ops().
Referenced by FLA_Bsvd_ext_opt_var1().
{ scomplex one = bl1_c1(); float rzero = bl1_s0(); int maxitr = 6; float eps; float tolmul; float tol; float thresh; scomplex* G; scomplex* H; float* d1; float* e1; int r_val; int done; int m_GH_sweep_max; int ij_begin; int ijTL, ijBR; int m_A11; int n_iter_perf; int n_UV_apply; int total_deflations; int n_deflations; int n_iter_prev; int n_iter_perf_sweep_max; // Compute some convergence constants. eps = FLA_Mach_params_ops( FLA_MACH_EPS ); tolmul = max( 10.0F, min( 100.0F, powf( eps, -0.125F ) ) ); FLA_Bsvd_compute_tol_thresh_ops( m_d, tolmul, maxitr, buff_d, inc_d, buff_e, inc_e, &tol, &thresh ); // Initialize our completion flag. done = FALSE; // Initialize a counter that holds the maximum number of rows of G // and H that we would need to initialize for the next sweep. m_GH_sweep_max = m_d - 1; // Initialize a counter for the total number of iterations performed. n_iter_prev = 0; // Iterate until the matrix has completely deflated. for ( total_deflations = 0; done != TRUE; ) { // Initialize G and H to contain only identity rotations. bl1_csetm( m_GH_sweep_max, n_GH, &one, buff_G, rs_G, cs_G ); bl1_csetm( m_GH_sweep_max, n_GH, &one, buff_H, rs_H, cs_H ); // Keep track of the maximum number of iterations performed in the // current sweep. This is used when applying the sweep's Givens // rotations. n_iter_perf_sweep_max = 0; // Perform a sweep: Move through the matrix and perform a bidiagonal // SVD on each non-zero submatrix that is encountered. During the // first time through, ijTL will be 0 and ijBR will be m_d - 1. for ( ij_begin = 0; ij_begin < m_d; ) { // Search for the first submatrix along the diagonal that is // bounded by zeroes (or endpoints of the matrix). If no // submatrix is found (ie: if the entire superdiagonal is zero // then FLA_FAILURE is returned. This function also inspects // superdiagonal elements for proximity to zero. If a given // element is close enough to zero, then it is deemed // converged and manually set to zero. r_val = FLA_Bsvd_find_submatrix_ops( m_d, ij_begin, buff_d, inc_d, buff_e, inc_e, &ijTL, &ijBR ); // Verify that a submatrix was found. If one was not found, // then we are done with the current sweep. Furthermore, if // a submatrix was not found AND we began our search at the // beginning of the matrix (ie: ij_begin == 0), then the // matrix has completely deflated and so we are done with // Francis step iteration. if ( r_val == FLA_FAILURE ) { if ( ij_begin == 0 ) { done = TRUE; } // Break out of the current sweep so we can apply the last // remaining Givens rotations. break; } // If we got this far, then: // (a) ijTL refers to the index of the first non-zero // superdiagonal along the diagonal, and // (b) ijBR refers to either: // - the first zero element that occurs after ijTL, or // - the the last diagonal element. // Note that ijTL and ijBR also correspond to the first and // last diagonal elements of the submatrix of interest. Thus, // we may compute the dimension of this submatrix as: m_A11 = ijBR - ijTL + 1; // Adjust ij_begin, which gets us ready for the next submatrix // search in the current sweep. ij_begin = ijBR + 1; // Index to the submatrices upon which we will operate. d1 = buff_d + ijTL * inc_d; e1 = buff_e + ijTL * inc_e; G = buff_G + ijTL * rs_G; H = buff_H + ijTL * rs_H; // Search for a batch of singular values, recursing on deflated // subproblems whenever a split occurs. Iteration continues as // long as // (a) there is still matrix left to operate on, and // (b) the number of iterations performed in this batch is // less than n_G. // If/when either of the two above conditions fails to hold, // the function returns. n_deflations = FLA_Bsvd_iteracc_v_ops_var1( m_A11, n_GH, ijTL, tol, thresh, d1, inc_d, e1, inc_e, G, rs_G, cs_G, H, rs_H, cs_H, &n_iter_perf ); // Record the number of deflations that were observed. total_deflations += n_deflations; // Update the maximum number of iterations performed in the // current sweep. n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf ); // Store the most recent value of ijBR in m_G_sweep_max. // When the sweep is done, this value will contain the minimum // number of rows of G and H we can apply and safely include all // non-identity rotations that were computed during the // singular value searches. m_GH_sweep_max = ijBR; // Make sure we haven't exceeded our maximum iteration count. if ( n_iter_prev >= n_iter_max * m_d ) { FLA_Abort(); //return FLA_FAILURE; } } // The sweep is complete. Now we must apply the Givens rotations // that were accumulated during the sweep. // Recall that the number of columns of U and V to which we apply // rotations is one more than the number of rotations. n_UV_apply = m_GH_sweep_max + 1; // Apply the Givens rotations. Note that we only apply k sets of // rotations, where k = n_iter_perf_sweep_max. Also note that we only // apply to n_UV_apply columns of U and V since this is the most we // need to touch given the most recent value stored to m_GH_sweep_max. if ( buff_U != NULL ) // ( rs_U == 0 && cs_U == 0 ) // if U is requested FLA_Apply_G_rf_blc_var3( n_iter_perf_sweep_max, m_U, n_UV_apply, buff_G, rs_G, cs_G, buff_U, rs_U, cs_U, b_alg ); if ( buff_V != NULL ) // ( rs_V == 0 && cs_V == 0 ) // if V is requested FLA_Apply_G_rf_blc_var3( n_iter_perf_sweep_max, m_V, n_UV_apply, buff_H, rs_H, cs_H, buff_V, rs_V, cs_V, b_alg ); // lf with non-flipped cs/rs m/n should be used; // but the same operation can be achieved by using rf // ( U^H C )^H = C^H U where U is G_0 G_1 G_2 .... G_{k-1} if ( buff_C != NULL ) // ( rs_C == 0 && cs_C == 0 ) // if C is requested, flip { FLA_Apply_G_rf_blc_var3( n_iter_perf_sweep_max, n_C, m_C, buff_G, rs_G, cs_G, buff_C, cs_C, rs_C, b_alg ); } // Increment the total number of iterations previously performed. n_iter_prev += n_iter_perf_sweep_max; } // Make all the singular values positive. { int i; float minus_one = bl1_sm1(); for ( i = 0; i < m_d; ++i ) { if ( buff_d[ (i )*inc_d ] < rzero ) { buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ]; // Scale the right singular vectors. if ( buff_V != NULL ) bl1_csscalv( BLIS1_NO_CONJUGATE, m_V, &minus_one, buff_V + (i )*cs_V, rs_V ); } } } return FLA_SUCCESS; }
FLA_Error FLA_Bsvd_ext_opd_var1 | ( | int | m_d, |
int | m_U, | ||
int | m_V, | ||
int | m_C, | ||
int | n_C, | ||
int | n_GH, | ||
int | n_iter_max, | ||
double * | buff_d, | ||
int | inc_d, | ||
double * | buff_e, | ||
int | inc_e, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_H, | ||
int | rs_H, | ||
int | cs_H, | ||
double * | buff_U, | ||
int | rs_U, | ||
int | cs_U, | ||
double * | buff_V, | ||
int | rs_V, | ||
int | cs_V, | ||
double * | buff_C, | ||
int | rs_C, | ||
int | cs_C, | ||
int | b_alg | ||
) |
References bl1_d0(), bl1_dm1(), bl1_dscalv(), bl1_z1(), bl1_zsetm(), BLIS1_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_bld_var3(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), and FLA_Mach_params_opd().
Referenced by FLA_Bsvd_ext_opt_var1().
{ dcomplex one = bl1_z1(); double rzero = bl1_d0(); int maxitr = 6; double eps; double tolmul; double tol; double thresh; dcomplex* G; dcomplex* H; double* d1; double* e1; int r_val; int done; int m_GH_sweep_max; int ij_begin; int ijTL, ijBR; int m_A11; int n_iter_perf; int n_UV_apply; int total_deflations; int n_deflations; int n_iter_prev; int n_iter_perf_sweep_max; // Compute some convergence constants. eps = FLA_Mach_params_opd( FLA_MACH_EPS ); tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) ); FLA_Bsvd_compute_tol_thresh_opd( m_d, tolmul, maxitr, buff_d, inc_d, buff_e, inc_e, &tol, &thresh ); // Initialize our completion flag. done = FALSE; // Initialize a counter that holds the maximum number of rows of G // and H that we would need to initialize for the next sweep. m_GH_sweep_max = m_d - 1; // Initialize a counter for the total number of iterations performed. n_iter_prev = 0; // Iterate until the matrix has completely deflated. for ( total_deflations = 0; done != TRUE; ) { // Initialize G and H to contain only identity rotations. bl1_zsetm( m_GH_sweep_max, n_GH, &one, buff_G, rs_G, cs_G ); bl1_zsetm( m_GH_sweep_max, n_GH, &one, buff_H, rs_H, cs_H ); // Keep track of the maximum number of iterations performed in the // current sweep. This is used when applying the sweep's Givens // rotations. n_iter_perf_sweep_max = 0; // Perform a sweep: Move through the matrix and perform a bidiagonal // SVD on each non-zero submatrix that is encountered. During the // first time through, ijTL will be 0 and ijBR will be m_d - 1. for ( ij_begin = 0; ij_begin < m_d; ) { // Search for the first submatrix along the diagonal that is // bounded by zeroes (or endpoints of the matrix). If no // submatrix is found (ie: if the entire superdiagonal is zero // then FLA_FAILURE is returned. This function also inspects // superdiagonal elements for proximity to zero. If a given // element is close enough to zero, then it is deemed // converged and manually set to zero. r_val = FLA_Bsvd_find_submatrix_opd( m_d, ij_begin, buff_d, inc_d, buff_e, inc_e, &ijTL, &ijBR ); // Verify that a submatrix was found. If one was not found, // then we are done with the current sweep. Furthermore, if // a submatrix was not found AND we began our search at the // beginning of the matrix (ie: ij_begin == 0), then the // matrix has completely deflated and so we are done with // Francis step iteration. if ( r_val == FLA_FAILURE ) { if ( ij_begin == 0 ) { done = TRUE; } // Break out of the current sweep so we can apply the last // remaining Givens rotations. break; } // If we got this far, then: // (a) ijTL refers to the index of the first non-zero // superdiagonal along the diagonal, and // (b) ijBR refers to either: // - the first zero element that occurs after ijTL, or // - the the last diagonal element. // Note that ijTL and ijBR also correspond to the first and // last diagonal elements of the submatrix of interest. Thus, // we may compute the dimension of this submatrix as: m_A11 = ijBR - ijTL + 1; // Adjust ij_begin, which gets us ready for the next submatrix // search in the current sweep. ij_begin = ijBR + 1; // Index to the submatrices upon which we will operate. d1 = buff_d + ijTL * inc_d; e1 = buff_e + ijTL * inc_e; G = buff_G + ijTL * rs_G; H = buff_H + ijTL * rs_H; // Search for a batch of singular values, recursing on deflated // subproblems whenever a split occurs. Iteration continues as // long as // (a) there is still matrix left to operate on, and // (b) the number of iterations performed in this batch is // less than n_G. // If/when either of the two above conditions fails to hold, // the function returns. n_deflations = FLA_Bsvd_iteracc_v_opd_var1( m_A11, n_GH, ijTL, tol, thresh, d1, inc_d, e1, inc_e, G, rs_G, cs_G, H, rs_H, cs_H, &n_iter_perf ); // Record the number of deflations that were observed. total_deflations += n_deflations; // Update the maximum number of iterations performed in the // current sweep. n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf ); // Store the most recent value of ijBR in m_G_sweep_max. // When the sweep is done, this value will contain the minimum // number of rows of G and H we can apply and safely include all // non-identity rotations that were computed during the // singular value searches. m_GH_sweep_max = ijBR; // Make sure we haven't exceeded our maximum iteration count. if ( n_iter_prev >= n_iter_max * m_d ) { FLA_Abort(); //return FLA_FAILURE; } } // The sweep is complete. Now we must apply the Givens rotations // that were accumulated during the sweep. // Recall that the number of columns of U and V to which we apply // rotations is one more than the number of rotations. n_UV_apply = m_GH_sweep_max + 1; // Apply the Givens rotations. Note that we only apply k sets of // rotations, where k = n_iter_perf_sweep_max. Also note that we only // apply to n_UV_apply columns of U and V since this is the most we // need to touch given the most recent value stored to m_GH_sweep_max. if ( buff_U != NULL ) // ( rs_U == 0 && cs_U == 0 ) // if U is requested FLA_Apply_G_rf_bld_var3( n_iter_perf_sweep_max, m_U, n_UV_apply, buff_G, rs_G, cs_G, buff_U, rs_U, cs_U, b_alg ); if ( buff_V != NULL ) // ( rs_V == 0 && cs_V == 0 ) // if V is requested FLA_Apply_G_rf_bld_var3( n_iter_perf_sweep_max, m_V, n_UV_apply, buff_H, rs_H, cs_H, buff_V, rs_V, cs_V, b_alg ); // lf with non-flipped cs/rs m/n should be used; // but the same operation can be achieved by using rf // ( U^H C )^H = C^H U where U is G_0 G_1 G_2 .... G_{k-1} if ( buff_C != NULL ) // ( rs_C == 0 && cs_C == 0 ) // if C is requested, flip { FLA_Apply_G_rf_bld_var3( n_iter_perf_sweep_max, n_C, m_C, buff_G, rs_G, cs_G, buff_C, cs_C, rs_C, b_alg ); } // Increment the total number of iterations previously performed. n_iter_prev += n_iter_perf_sweep_max; } // Make all the singular values positive. { int i; double minus_one = bl1_dm1(); for ( i = 0; i < m_d; ++i ) { if ( buff_d[ (i )*inc_d ] < rzero ) { buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ]; // Scale the right singular vectors. if ( buff_V != NULL ) bl1_dscalv( BLIS1_NO_CONJUGATE, m_V, &minus_one, buff_V + (i )*cs_V, rs_V ); } } } return n_iter_prev; }
FLA_Error FLA_Bsvd_ext_ops_var1 | ( | int | m_d, |
int | m_U, | ||
int | m_V, | ||
int | m_C, | ||
int | n_C, | ||
int | n_GH, | ||
int | n_iter_max, | ||
float * | buff_d, | ||
int | inc_d, | ||
float * | buff_e, | ||
int | inc_e, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_H, | ||
int | rs_H, | ||
int | cs_H, | ||
float * | buff_U, | ||
int | rs_U, | ||
int | cs_U, | ||
float * | buff_V, | ||
int | rs_V, | ||
int | cs_V, | ||
float * | buff_C, | ||
int | rs_C, | ||
int | cs_C, | ||
int | b_alg | ||
) |
References bl1_c1(), bl1_csetm(), bl1_s0(), bl1_sm1(), bl1_sscalv(), BLIS1_NO_CONJUGATE, FLA_Apply_G_rf_bls_var3(), FLA_Bsvd_compute_tol_thresh_ops(), FLA_Bsvd_find_submatrix_ops(), FLA_Bsvd_iteracc_v_ops_var1(), and FLA_Mach_params_ops().
Referenced by FLA_Bsvd_ext_opt_var1().
{ scomplex one = bl1_c1(); float rzero = bl1_s0(); int maxitr = 6; float eps; float tolmul; float tol; float thresh; scomplex* G; scomplex* H; float* d1; float* e1; int r_val; int done; int m_GH_sweep_max; int ij_begin; int ijTL, ijBR; int m_A11; int n_iter_perf; int n_UV_apply; int total_deflations; int n_deflations; int n_iter_prev; int n_iter_perf_sweep_max; // Compute some convergence constants. eps = FLA_Mach_params_ops( FLA_MACH_EPS ); tolmul = max( 10.0F, min( 100.0F, powf( eps, -0.125F ) ) ); FLA_Bsvd_compute_tol_thresh_ops( m_d, tolmul, maxitr, buff_d, inc_d, buff_e, inc_e, &tol, &thresh ); // Initialize our completion flag. done = FALSE; // Initialize a counter that holds the maximum number of rows of G // and H that we would need to initialize for the next sweep. m_GH_sweep_max = m_d - 1; // Initialize a counter for the total number of iterations performed. n_iter_prev = 0; // Iterate until the matrix has completely deflated. for ( total_deflations = 0; done != TRUE; ) { // Initialize G and H to contain only identity rotations. bl1_csetm( m_GH_sweep_max, n_GH, &one, buff_G, rs_G, cs_G ); bl1_csetm( m_GH_sweep_max, n_GH, &one, buff_H, rs_H, cs_H ); // Keep track of the maximum number of iterations performed in the // current sweep. This is used when applying the sweep's Givens // rotations. n_iter_perf_sweep_max = 0; // Perform a sweep: Move through the matrix and perform a bidiagonal // SVD on each non-zero submatrix that is encountered. During the // first time through, ijTL will be 0 and ijBR will be m_d - 1. for ( ij_begin = 0; ij_begin < m_d; ) { // Search for the first submatrix along the diagonal that is // bounded by zeroes (or endpoints of the matrix). If no // submatrix is found (ie: if the entire superdiagonal is zero // then FLA_FAILURE is returned. This function also inspects // superdiagonal elements for proximity to zero. If a given // element is close enough to zero, then it is deemed // converged and manually set to zero. r_val = FLA_Bsvd_find_submatrix_ops( m_d, ij_begin, buff_d, inc_d, buff_e, inc_e, &ijTL, &ijBR ); // Verify that a submatrix was found. If one was not found, // then we are done with the current sweep. Furthermore, if // a submatrix was not found AND we began our search at the // beginning of the matrix (ie: ij_begin == 0), then the // matrix has completely deflated and so we are done with // Francis step iteration. if ( r_val == FLA_FAILURE ) { if ( ij_begin == 0 ) { done = TRUE; } // Break out of the current sweep so we can apply the last // remaining Givens rotations. break; } // If we got this far, then: // (a) ijTL refers to the index of the first non-zero // superdiagonal along the diagonal, and // (b) ijBR refers to either: // - the first zero element that occurs after ijTL, or // - the the last diagonal element. // Note that ijTL and ijBR also correspond to the first and // last diagonal elements of the submatrix of interest. Thus, // we may compute the dimension of this submatrix as: m_A11 = ijBR - ijTL + 1; // Adjust ij_begin, which gets us ready for the next submatrix // search in the current sweep. ij_begin = ijBR + 1; // Index to the submatrices upon which we will operate. d1 = buff_d + ijTL * inc_d; e1 = buff_e + ijTL * inc_e; G = buff_G + ijTL * rs_G; H = buff_H + ijTL * rs_H; // Search for a batch of singular values, recursing on deflated // subproblems whenever a split occurs. Iteration continues as // long as // (a) there is still matrix left to operate on, and // (b) the number of iterations performed in this batch is // less than n_G. // If/when either of the two above conditions fails to hold, // the function returns. n_deflations = FLA_Bsvd_iteracc_v_ops_var1( m_A11, n_GH, ijTL, tol, thresh, d1, inc_d, e1, inc_e, G, rs_G, cs_G, H, rs_H, cs_H, &n_iter_perf ); // Record the number of deflations that were observed. total_deflations += n_deflations; // Update the maximum number of iterations performed in the // current sweep. n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf ); // Store the most recent value of ijBR in m_G_sweep_max. // When the sweep is done, this value will contain the minimum // number of rows of G and H we can apply and safely include all // non-identity rotations that were computed during the // singular value searches. m_GH_sweep_max = ijBR; // Make sure we haven't exceeded our maximum iteration count. if ( n_iter_prev >= n_iter_max * m_d ) { //FLA_Abort(); return n_iter_prev; } } // The sweep is complete. Now we must apply the Givens rotations // that were accumulated during the sweep. // Recall that the number of columns of U and V to which we apply // rotations is one more than the number of rotations. n_UV_apply = m_GH_sweep_max + 1; // Apply the Givens rotations. Note that we only apply k sets of // rotations, where k = n_iter_perf_sweep_max. Also note that we only // apply to n_UV_apply columns of U and V since this is the most we // need to touch given the most recent value stored to m_GH_sweep_max. if ( buff_U != NULL ) // ( rs_U == 0 && cs_U == 0 ) // if U is requested FLA_Apply_G_rf_bls_var3( n_iter_perf_sweep_max, m_U, n_UV_apply, buff_G, rs_G, cs_G, buff_U, rs_U, cs_U, b_alg ); if ( buff_V != NULL ) // ( rs_V == 0 && cs_V == 0 ) // if V is requested FLA_Apply_G_rf_bls_var3( n_iter_perf_sweep_max, m_V, n_UV_apply, buff_H, rs_H, cs_H, buff_V, rs_V, cs_V, b_alg ); // lf with non-flipped cs/rs m/n should be used; // but the same operation can be achieved by using rf // ( U^H C )^H = C^H U where U is G_0 G_1 G_2 .... G_{k-1} if ( buff_C != NULL ) // ( rs_C == 0 && cs_C == 0 ) // if C is requested, flip { FLA_Apply_G_rf_bls_var3( n_iter_perf_sweep_max, n_C, m_C, buff_G, rs_G, cs_G, buff_C, cs_C, rs_C, b_alg ); } // Increment the total number of iterations previously performed. n_iter_prev += n_iter_perf_sweep_max; } // Make all the singular values positive. { int i; float minus_one = bl1_sm1(); for ( i = 0; i < m_d; ++i ) { if ( buff_d[ (i )*inc_d ] < rzero ) { buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ]; // Scale the right singular vectors. if ( buff_V != NULL ) bl1_sscalv( BLIS1_NO_CONJUGATE, m_V, &minus_one, buff_V + (i )*cs_V, rs_V ); } } } return n_iter_prev; }
FLA_Error FLA_Bsvd_ext_opt_var1 | ( | dim_t | n_iter_max, |
FLA_Obj | d, | ||
FLA_Obj | e, | ||
FLA_Obj | G, | ||
FLA_Obj | H, | ||
FLA_Svd_type | jobu, | ||
FLA_Obj | U, | ||
FLA_Svd_type | jobv, | ||
FLA_Obj | V, | ||
FLA_Bool | apply_Uh2C, | ||
FLA_Obj | C, | ||
dim_t | b_alg | ||
) |
References FLA_Bsvd_ext_opc_var1(), FLA_Bsvd_ext_opd_var1(), FLA_Bsvd_ext_ops_var1(), FLA_Bsvd_ext_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_dim(), FLA_Obj_vector_inc(), FLA_Obj_width(), FLA_Sort_bsvd_ext_b_opc(), FLA_Sort_bsvd_ext_b_opd(), FLA_Sort_bsvd_ext_b_ops(), and FLA_Sort_bsvd_ext_b_opz().
Referenced by FLA_Bsvd(), FLA_Bsvd_ext(), and FLA_Svd_ext_u_unb_var1().
{ FLA_Error r_val = FLA_SUCCESS; FLA_Datatype datatype; int m_U, m_V, m_C, n_C, n_GH; int m_d, inc_d; int inc_e; int rs_G, cs_G; int rs_H, cs_H; int rs_U, cs_U; int rs_V, cs_V; int rs_C, cs_C; if ( jobu != FLA_SVD_VECTORS_NONE ) datatype = FLA_Obj_datatype( U ); else if ( jobv != FLA_SVD_VECTORS_NONE ) datatype = FLA_Obj_datatype( V ); else if ( apply_Uh2C == TRUE ) datatype = FLA_Obj_datatype( C ); else datatype = FLA_Obj_datatype( d ); m_d = FLA_Obj_vector_dim( d ); inc_d = FLA_Obj_vector_inc( d ); inc_e = FLA_Obj_vector_inc( e ); n_GH = FLA_Obj_width( G ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_H = FLA_Obj_row_stride( H ); cs_H = FLA_Obj_col_stride( H ); if ( jobu == FLA_SVD_VECTORS_NONE ) { m_U = 0; rs_U = 0; cs_U = 0; } else { m_U = FLA_Obj_length( U ); rs_U = FLA_Obj_row_stride( U ); cs_U = FLA_Obj_col_stride( U ); } if ( jobv == FLA_SVD_VECTORS_NONE ) { m_V = 0; rs_V = 0; cs_V = 0; } else { m_V = FLA_Obj_length( V ); rs_V = FLA_Obj_row_stride( V ); cs_V = FLA_Obj_col_stride( V ); } if ( apply_Uh2C == FALSE ) { m_C = 0; n_C = 0; rs_C = 0; cs_C = 0; } else { m_C = FLA_Obj_length( C ); n_C = FLA_Obj_width( C ); rs_C = FLA_Obj_row_stride( C ); cs_C = FLA_Obj_col_stride( C ); } switch ( datatype ) { case FLA_FLOAT: { float* buff_d = FLA_FLOAT_PTR( d ); float* buff_e = FLA_FLOAT_PTR( e ); scomplex* buff_G = FLA_COMPLEX_PTR( G ); scomplex* buff_H = FLA_COMPLEX_PTR( H ); float* buff_U = ( jobu == FLA_SVD_VECTORS_NONE ? NULL : FLA_FLOAT_PTR( U ) ); float* buff_V = ( jobv == FLA_SVD_VECTORS_NONE ? NULL : FLA_FLOAT_PTR( V ) ); float* buff_C = ( apply_Uh2C == FALSE ? NULL : FLA_FLOAT_PTR( C ) ); r_val = FLA_Bsvd_ext_ops_var1( m_d, m_U, m_V, m_C, n_C, n_GH, n_iter_max, buff_d, inc_d, buff_e, inc_e, buff_G, rs_G, cs_G, buff_H, rs_H, cs_H, buff_U, rs_U, cs_U, buff_V, rs_V, cs_V, buff_C, rs_C, cs_C, b_alg ); FLA_Sort_bsvd_ext_b_ops( m_d, buff_d, inc_d, m_U, buff_U, rs_U, cs_U, m_V, buff_V, rs_V, cs_V, n_C, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE: { double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_e = FLA_DOUBLE_PTR( e ); dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_H = FLA_DOUBLE_COMPLEX_PTR( H ); double* buff_U = ( jobu == FLA_SVD_VECTORS_NONE ? NULL : FLA_DOUBLE_PTR( U ) ); double* buff_V = ( jobv == FLA_SVD_VECTORS_NONE ? NULL : FLA_DOUBLE_PTR( V ) ); double* buff_C = ( apply_Uh2C == FALSE ? NULL : FLA_DOUBLE_PTR( C ) ); r_val = FLA_Bsvd_ext_opd_var1( m_d, m_U, m_V, m_C, n_C, n_GH, n_iter_max, buff_d, inc_d, buff_e, inc_e, buff_G, rs_G, cs_G, buff_H, rs_H, cs_H, buff_U, rs_U, cs_U, buff_V, rs_V, cs_V, buff_C, rs_C, cs_C, b_alg ); FLA_Sort_bsvd_ext_b_opd( m_d, buff_d, inc_d, m_U, buff_U, rs_U, cs_U, m_V, buff_V, rs_V, cs_V, n_C, buff_C, rs_C, cs_C ); break; } case FLA_COMPLEX: { float* buff_d = FLA_FLOAT_PTR( d ); float* buff_e = FLA_FLOAT_PTR( e ); scomplex* buff_G = FLA_COMPLEX_PTR( G ); scomplex* buff_H = FLA_COMPLEX_PTR( H ); scomplex* buff_U = ( jobu == FLA_SVD_VECTORS_NONE ? NULL : FLA_COMPLEX_PTR( U ) ); scomplex* buff_V = ( jobv == FLA_SVD_VECTORS_NONE ? NULL : FLA_COMPLEX_PTR( V ) ); scomplex* buff_C = ( apply_Uh2C == FALSE ? NULL : FLA_COMPLEX_PTR( C ) ); r_val = FLA_Bsvd_ext_opc_var1( m_d, m_U, m_V, m_C, n_C, n_GH, n_iter_max, buff_d, inc_d, buff_e, inc_e, buff_G, rs_G, cs_G, buff_H, rs_H, cs_H, buff_U, rs_U, cs_U, buff_V, rs_V, cs_V, buff_C, rs_C, cs_C, b_alg ); FLA_Sort_bsvd_ext_b_opc( m_d, buff_d, inc_d, m_U, buff_U, rs_U, cs_U, m_V, buff_V, rs_V, cs_V, n_C, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE_COMPLEX: { double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_e = FLA_DOUBLE_PTR( e ); dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_H = FLA_DOUBLE_COMPLEX_PTR( H ); dcomplex* buff_U = ( jobu == FLA_SVD_VECTORS_NONE ? NULL : FLA_DOUBLE_COMPLEX_PTR( U ) ); dcomplex* buff_V = ( jobv == FLA_SVD_VECTORS_NONE ? NULL : FLA_DOUBLE_COMPLEX_PTR( V ) ); dcomplex* buff_C = ( apply_Uh2C == FALSE ? NULL : FLA_DOUBLE_COMPLEX_PTR( C ) ); r_val = FLA_Bsvd_ext_opz_var1( m_d, m_U, m_V, m_C, n_C, n_GH, n_iter_max, buff_d, inc_d, buff_e, inc_e, buff_G, rs_G, cs_G, buff_H, rs_H, cs_H, buff_U, rs_U, cs_U, buff_V, rs_V, cs_V, buff_C, rs_C, cs_C, b_alg ); FLA_Sort_bsvd_ext_b_opz( m_d, buff_d, inc_d, m_U, buff_U, rs_U, cs_U, m_V, buff_V, rs_V, cs_V, n_C, buff_C, rs_C, cs_C ); break; } } return r_val; }
FLA_Error FLA_Bsvd_ext_opz_var1 | ( | int | m_d, |
int | m_U, | ||
int | m_V, | ||
int | m_C, | ||
int | n_C, | ||
int | n_GH, | ||
int | n_iter_max, | ||
double * | buff_d, | ||
int | inc_d, | ||
double * | buff_e, | ||
int | inc_e, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_H, | ||
int | rs_H, | ||
int | cs_H, | ||
dcomplex * | buff_U, | ||
int | rs_U, | ||
int | cs_U, | ||
dcomplex * | buff_V, | ||
int | rs_V, | ||
int | cs_V, | ||
dcomplex * | buff_C, | ||
int | rs_C, | ||
int | cs_C, | ||
int | b_alg | ||
) |
References bl1_d0(), bl1_dm1(), bl1_z1(), bl1_zdscalv(), bl1_zsetm(), BLIS1_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_blz_var3(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), and FLA_Mach_params_opd().
Referenced by FLA_Bsvd_ext_opt_var1().
{ dcomplex one = bl1_z1(); double rzero = bl1_d0(); int maxitr = 6; double eps; double tolmul; double tol; double thresh; dcomplex* G; dcomplex* H; double* d1; double* e1; int r_val; int done; int m_GH_sweep_max; int ij_begin; int ijTL, ijBR; int m_A11; int n_iter_perf; int n_UV_apply; int total_deflations; int n_deflations; int n_iter_prev; int n_iter_perf_sweep_max; // Compute some convergence constants. eps = FLA_Mach_params_opd( FLA_MACH_EPS ); tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) ); FLA_Bsvd_compute_tol_thresh_opd( m_d, tolmul, maxitr, buff_d, inc_d, buff_e, inc_e, &tol, &thresh ); // Initialize our completion flag. done = FALSE; // Initialize a counter that holds the maximum number of rows of G // and H that we would need to initialize for the next sweep. m_GH_sweep_max = m_d - 1; // Initialize a counter for the total number of iterations performed. n_iter_prev = 0; // Iterate until the matrix has completely deflated. for ( total_deflations = 0; done != TRUE; ) { // Initialize G and H to contain only identity rotations. bl1_zsetm( m_GH_sweep_max, n_GH, &one, buff_G, rs_G, cs_G ); bl1_zsetm( m_GH_sweep_max, n_GH, &one, buff_H, rs_H, cs_H ); // Keep track of the maximum number of iterations performed in the // current sweep. This is used when applying the sweep's Givens // rotations. n_iter_perf_sweep_max = 0; // Perform a sweep: Move through the matrix and perform a bidiagonal // SVD on each non-zero submatrix that is encountered. During the // first time through, ijTL will be 0 and ijBR will be m_d - 1. for ( ij_begin = 0; ij_begin < m_d; ) { // Search for the first submatrix along the diagonal that is // bounded by zeroes (or endpoints of the matrix). If no // submatrix is found (ie: if the entire superdiagonal is zero // then FLA_FAILURE is returned. This function also inspects // superdiagonal elements for proximity to zero. If a given // element is close enough to zero, then it is deemed // converged and manually set to zero. r_val = FLA_Bsvd_find_submatrix_opd( m_d, ij_begin, buff_d, inc_d, buff_e, inc_e, &ijTL, &ijBR ); // Verify that a submatrix was found. If one was not found, // then we are done with the current sweep. Furthermore, if // a submatrix was not found AND we began our search at the // beginning of the matrix (ie: ij_begin == 0), then the // matrix has completely deflated and so we are done with // Francis step iteration. if ( r_val == FLA_FAILURE ) { if ( ij_begin == 0 ) { done = TRUE; } // Break out of the current sweep so we can apply the last // remaining Givens rotations. break; } // If we got this far, then: // (a) ijTL refers to the index of the first non-zero // superdiagonal along the diagonal, and // (b) ijBR refers to either: // - the first zero element that occurs after ijTL, or // - the the last diagonal element. // Note that ijTL and ijBR also correspond to the first and // last diagonal elements of the submatrix of interest. Thus, // we may compute the dimension of this submatrix as: m_A11 = ijBR - ijTL + 1; // Adjust ij_begin, which gets us ready for the next submatrix // search in the current sweep. ij_begin = ijBR + 1; // Index to the submatrices upon which we will operate. d1 = buff_d + ijTL * inc_d; e1 = buff_e + ijTL * inc_e; G = buff_G + ijTL * rs_G; H = buff_H + ijTL * rs_H; // Search for a batch of singular values, recursing on deflated // subproblems whenever a split occurs. Iteration continues as // long as // (a) there is still matrix left to operate on, and // (b) the number of iterations performed in this batch is // less than n_G. // If/when either of the two above conditions fails to hold, // the function returns. n_deflations = FLA_Bsvd_iteracc_v_opd_var1( m_A11, n_GH, ijTL, tol, thresh, d1, inc_d, e1, inc_e, G, rs_G, cs_G, H, rs_H, cs_H, &n_iter_perf ); // Record the number of deflations that were observed. total_deflations += n_deflations; // Update the maximum number of iterations performed in the // current sweep. n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf ); // Store the most recent value of ijBR in m_G_sweep_max. // When the sweep is done, this value will contain the minimum // number of rows of G and H we can apply and safely include all // non-identity rotations that were computed during the // singular value searches. m_GH_sweep_max = ijBR; // Make sure we haven't exceeded our maximum iteration count. if ( n_iter_prev >= n_iter_max * m_d ) { FLA_Abort(); //return FLA_FAILURE; } } // The sweep is complete. Now we must apply the Givens rotations // that were accumulated during the sweep. // Recall that the number of columns of U and V to which we apply // rotations is one more than the number of rotations. n_UV_apply = m_GH_sweep_max + 1; // Apply the Givens rotations. Note that we only apply k sets of // rotations, where k = n_iter_perf_sweep_max. Also note that we only // apply to n_UV_apply columns of U and V since this is the most we // need to touch given the most recent value stored to m_GH_sweep_max. if ( buff_U != NULL ) // ( rs_U == 0 && cs_U == 0 ) // if U is requested FLA_Apply_G_rf_blz_var3( n_iter_perf_sweep_max, m_U, n_UV_apply, buff_G, rs_G, cs_G, buff_U, rs_U, cs_U, b_alg ); if ( buff_V != NULL ) // ( rs_V == 0 && cs_V == 0 ) // if V is requested FLA_Apply_G_rf_blz_var3( n_iter_perf_sweep_max, m_V, n_UV_apply, buff_H, rs_H, cs_H, buff_V, rs_V, cs_V, b_alg ); // lf with non-flipped cs/rs m/n should be used; // but the same operation can be achieved by using rf // ( U^H C )^H = C^H U where U is G_0 G_1 G_2 .... G_{k-1} if ( buff_C != NULL ) // ( rs_C == 0 && cs_C == 0 ) // if C is requested, flip { FLA_Apply_G_rf_blz_var3( n_iter_perf_sweep_max, n_C, m_C, buff_G, rs_G, cs_G, buff_C, cs_C, rs_C, b_alg ); } // Increment the total number of iterations previously performed. n_iter_prev += n_iter_perf_sweep_max; } // Make all the singular values positive. { int i; double minus_one = bl1_dm1(); for ( i = 0; i < m_d; ++i ) { if ( buff_d[ (i )*inc_d ] < rzero ) { buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ]; // Scale the right singular vectors. if ( buff_V != NULL ) bl1_zdscalv( BLIS1_NO_CONJUGATE, m_V, &minus_one, buff_V + (i )*cs_V, rs_V ); } } } return n_iter_prev; }