libflame
revision_anchor
|
Go to the source code of this file.
Functions | |
FLA_Error | FLA_Bsvd_compute_shift (FLA_Obj tol, FLA_Obj sminl, FLA_Obj smax, FLA_Obj d, FLA_Obj e, FLA_Obj shift) |
FLA_Error | FLA_Bsvd_compute_shift_ops (int m_A, float tol, float sminl, float smax, float *buff_d, int inc_d, float *buff_e, int inc_e, float *shift) |
FLA_Error | FLA_Bsvd_compute_shift_opd (int m_A, double tol, double sminl, double smax, double *buff_d, int inc_d, double *buff_e, int inc_e, double *shift) |
FLA_Error | FLA_Bsvd_compute_tol_thresh (FLA_Obj tolmul, FLA_Obj maxit, FLA_Obj d, FLA_Obj e, FLA_Obj tol, FLA_Obj thresh) |
FLA_Error | FLA_Bsvd_compute_tol_thresh_ops (int m_A, float tolmul, float maxit, float *buff_d, int inc_d, float *buff_e, int inc_e, float *tol, float *thresh) |
FLA_Error | FLA_Bsvd_compute_tol_thresh_opd (int m_A, double tolmul, double maxit, double *buff_d, int inc_d, double *buff_e, int inc_e, double *tol, double *thresh) |
FLA_Error | FLA_Bsvd_find_converged (FLA_Obj tol, FLA_Obj d, FLA_Obj e, FLA_Obj sminl) |
FLA_Error | FLA_Bsvd_find_converged_ops (int m_A, float tol, float *buff_d, int inc_d, float *buff_e, int inc_e, float *sminl) |
FLA_Error | FLA_Bsvd_find_converged_opd (int m_A, double tol, double *buff_d, int inc_d, double *buff_e, int inc_e, double *sminl) |
FLA_Error | FLA_Bsvd_find_max_min (FLA_Obj d, FLA_Obj e, FLA_Obj smax, FLA_Obj smin) |
FLA_Error | FLA_Bsvd_find_max_min_ops (int m_A, float *buff_d, int inc_d, float *buff_e, int inc_e, float *smax, float *smin) |
FLA_Error | FLA_Bsvd_find_max_min_opd (int m_A, double *buff_d, int inc_d, double *buff_e, int inc_e, double *smax, double *smin) |
FLA_Error | FLA_Bsvd_find_submatrix_ops (int mn_A, int ij_begin, float *buff_d, int inc_d, float *buff_e, int inc_e, int *ijTL, int *ijBR) |
FLA_Error | FLA_Bsvd_find_submatrix_opd (int mn_A, int ij_begin, double *buff_d, int inc_d, double *buff_e, int inc_e, int *ijTL, int *ijBR) |
FLA_Error | FLA_Bsvd_v_opt_var1 (dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj H, FLA_Obj U, FLA_Obj V, dim_t b_alg) |
FLA_Error | FLA_Bsvd_v_ops_var1 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_U, int rs_U, int cs_U, float *buff_V, int rs_V, int cs_V, int b_alg) |
FLA_Error | FLA_Bsvd_v_opd_var1 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_U, int rs_U, int cs_U, double *buff_V, int rs_V, int cs_V, int b_alg) |
FLA_Error | FLA_Bsvd_v_opc_var1 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_V, int rs_V, int cs_V, int b_alg) |
FLA_Error | FLA_Bsvd_v_opz_var1 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_V, int rs_V, int cs_V, int b_alg) |
FLA_Error | FLA_Bsvd_v_opt_var2 (dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj H, FLA_Obj RG, FLA_Obj RH, FLA_Obj W, FLA_Obj U, FLA_Obj V, dim_t b_alg) |
FLA_Error | FLA_Bsvd_v_ops_var2 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_RG, int rs_RG, int cs_RG, float *buff_RH, int rs_RH, int cs_RH, float *buff_W, int rs_W, int cs_W, float *buff_U, int rs_U, int cs_U, float *buff_V, int rs_V, int cs_V, int b_alg) |
FLA_Error | FLA_Bsvd_v_opd_var2 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_RG, int rs_RG, int cs_RG, double *buff_RH, int rs_RH, int cs_RH, double *buff_W, int rs_W, int cs_W, double *buff_U, int rs_U, int cs_U, double *buff_V, int rs_V, int cs_V, int b_alg) |
FLA_Error | FLA_Bsvd_v_opc_var2 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_RG, int rs_RG, int cs_RG, float *buff_RH, int rs_RH, int cs_RH, scomplex *buff_W, int rs_W, int cs_W, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_V, int rs_V, int cs_V, int b_alg) |
FLA_Error | FLA_Bsvd_v_opz_var2 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_RG, int rs_RG, int cs_RG, double *buff_RH, int rs_RH, int cs_RH, dcomplex *buff_W, int rs_W, int cs_W, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_V, int rs_V, int cs_V, int b_alg) |
FLA_Error FLA_Bsvd_compute_shift | ( | FLA_Obj | tol, |
FLA_Obj | sminl, | ||
FLA_Obj | smax, | ||
FLA_Obj | d, | ||
FLA_Obj | e, | ||
FLA_Obj | shift | ||
) |
References FLA_Bsvd_compute_shift_opd(), FLA_Bsvd_compute_shift_ops(), FLA_Obj_datatype(), FLA_Obj_vector_dim(), and FLA_Obj_vector_inc().
{ FLA_Datatype datatype; int m_A; int inc_d; int inc_e; datatype = FLA_Obj_datatype( d ); m_A = FLA_Obj_vector_dim( d ); inc_d = FLA_Obj_vector_inc( d ); inc_e = FLA_Obj_vector_inc( e ); switch ( datatype ) { case FLA_FLOAT: { float* buff_tol = FLA_FLOAT_PTR( tol ); float* buff_sminl = FLA_FLOAT_PTR( sminl ); float* buff_smax = FLA_FLOAT_PTR( smax ); float* buff_d = FLA_FLOAT_PTR( d ); float* buff_e = FLA_FLOAT_PTR( e ); float* buff_shift = FLA_FLOAT_PTR( shift ); FLA_Bsvd_compute_shift_ops( m_A, *buff_tol, *buff_sminl, *buff_smax, buff_d, inc_d, buff_e, inc_e, buff_shift ); break; } case FLA_DOUBLE: { double* buff_tol = FLA_DOUBLE_PTR( tol ); double* buff_sminl = FLA_DOUBLE_PTR( sminl ); double* buff_smax = FLA_DOUBLE_PTR( smax ); double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_e = FLA_DOUBLE_PTR( e ); double* buff_shift = FLA_DOUBLE_PTR( shift ); FLA_Bsvd_compute_shift_opd( m_A, *buff_tol, *buff_sminl, *buff_smax, buff_d, inc_d, buff_e, inc_e, buff_shift ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Bsvd_compute_shift_opd | ( | int | m_A, |
double | tol, | ||
double | sminl, | ||
double | smax, | ||
double * | buff_d, | ||
int | inc_d, | ||
double * | buff_e, | ||
int | inc_e, | ||
double * | shift | ||
) |
References FLA_Mach_params_opd(), and FLA_Sv_2x2_opd().
Referenced by FLA_Bsvd_compute_shift(), and FLA_Bsvd_sinval_v_opd_var1().
{ double hndrth = 0.01; double eps; double* d_first; double* e_last; double* d_last_m1; double* d_last; double sll, temp; eps = FLA_Mach_params_opd( FLA_MACH_EPS ); d_first = buff_d + (0 )*inc_d; e_last = buff_e + (m_A-2)*inc_e; d_last_m1 = buff_d + (m_A-2)*inc_d; d_last = buff_d + (m_A-1)*inc_d; // If the shift would ruin relative accuracy, set it to zero. if ( m_A * tol * ( sminl / smax ) <= max( eps, hndrth * tol ) ) { #ifdef PRINTF printf( "FLA_Bsvd_compute_shift_opd: shift would ruin accuracy; setting shift to 0.\n" ); printf( " m_A = %d \n", m_A ); printf( " tol = %20.15e\n", tol ); printf( " sminl = %20.15e\n", sminl ); printf( " smax = %20.15e\n", smax ); printf( " LHS = %20.15e\n", m_A * tol * ( sminl / smax ) ); printf( " max(eps,0.01*tol)= %20.15e\n", max( eps, hndrth * tol ) ); #endif *shift = 0.0; } else { // Compute the shift from the last 2x2 matrix. FLA_Sv_2x2_opd( d_last_m1, e_last, d_last, shift, &temp ); sll = fabs( *d_first ); // Check if the shift is negligible; if so, set it to zero. if ( sll > 0.0 ) { temp = ( *shift / sll ); if ( temp * temp < eps ) { #ifdef PRINTF printf( "FLA_Bsvd_compute_shift_opd: shift is negligible; setting shift to 0.\n" ); #endif *shift = 0.0; } } } return FLA_SUCCESS; }
FLA_Error FLA_Bsvd_compute_shift_ops | ( | int | m_A, |
float | tol, | ||
float | sminl, | ||
float | smax, | ||
float * | buff_d, | ||
int | inc_d, | ||
float * | buff_e, | ||
int | inc_e, | ||
float * | shift | ||
) |
References FLA_Mach_params_ops(), and FLA_Sv_2x2_ops().
Referenced by FLA_Bsvd_compute_shift(), and FLA_Bsvd_sinval_v_ops_var1().
{ float hndrth = 0.01; float eps; float* d_first; float* e_last; float* d_last_m1; float* d_last; float sll, temp; eps = FLA_Mach_params_ops( FLA_MACH_EPS ); d_first = buff_d + (0 )*inc_d; e_last = buff_e + (m_A-2)*inc_e; d_last_m1 = buff_d + (m_A-2)*inc_d; d_last = buff_d + (m_A-1)*inc_d; // If the shift would ruin relative accuracy, set it to zero. if ( m_A * tol * ( sminl / smax ) <= max( eps, hndrth * tol ) ) { *shift = 0.0; } else { // Compute the shift from the last 2x2 matrix. FLA_Sv_2x2_ops( d_last_m1, e_last, d_last, shift, &temp ); sll = fabsf( *d_first ); // Check if the shift is negligible; if so, set it to zero. if ( sll > 0.0F ) { temp = ( *shift / sll ); if ( temp * temp < eps ) { *shift = 0.0F; } } } return FLA_SUCCESS; }
FLA_Error FLA_Bsvd_compute_tol_thresh | ( | FLA_Obj | tolmul, |
FLA_Obj | maxit, | ||
FLA_Obj | d, | ||
FLA_Obj | e, | ||
FLA_Obj | tol, | ||
FLA_Obj | thresh | ||
) |
References FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_compute_tol_thresh_ops(), FLA_Obj_datatype(), FLA_Obj_vector_dim(), and FLA_Obj_vector_inc().
{ FLA_Datatype datatype; int n_A; int inc_d; int inc_e; datatype = FLA_Obj_datatype( d ); n_A = FLA_Obj_vector_dim( d ); inc_d = FLA_Obj_vector_inc( d ); inc_e = FLA_Obj_vector_inc( e ); switch ( datatype ) { case FLA_FLOAT: { float* buff_tolmul = FLA_FLOAT_PTR( tolmul ); float* buff_maxitr = FLA_FLOAT_PTR( maxitr ); float* buff_d = FLA_FLOAT_PTR( d ); float* buff_e = FLA_FLOAT_PTR( e ); float* buff_tol = FLA_FLOAT_PTR( tol ); float* buff_thresh = FLA_FLOAT_PTR( thresh ); FLA_Bsvd_compute_tol_thresh_ops( n_A, *buff_tolmul, *buff_maxitr, buff_d, inc_d, buff_e, inc_e, buff_tol, buff_thresh ); break; } case FLA_DOUBLE: { double* buff_tolmul = FLA_DOUBLE_PTR( tolmul ); double* buff_maxitr = FLA_DOUBLE_PTR( maxitr ); double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_e = FLA_DOUBLE_PTR( e ); double* buff_tol = FLA_DOUBLE_PTR( tol ); double* buff_thresh = FLA_DOUBLE_PTR( thresh ); FLA_Bsvd_compute_tol_thresh_opd( n_A, *buff_tolmul, *buff_maxitr, buff_d, inc_d, buff_e, inc_e, buff_tol, buff_thresh ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Bsvd_compute_tol_thresh_opd | ( | int | m_A, |
double | tolmul, | ||
double | maxit, | ||
double * | buff_d, | ||
int | inc_d, | ||
double * | buff_e, | ||
int | inc_e, | ||
double * | tol, | ||
double * | thresh | ||
) |
References bl1_d0(), and FLA_Mach_params_opd().
Referenced by FLA_Bsvd_compute_tol_thresh(), FLA_Bsvd_ext_opd_var1(), FLA_Bsvd_ext_opz_var1(), FLA_Bsvd_v_opd_var1(), FLA_Bsvd_v_opd_var2(), FLA_Bsvd_v_opz_var1(), and FLA_Bsvd_v_opz_var2().
{ double zero = bl1_d0(); double smin; double eps, unfl; double mu; int i; // Query machine epsilon and the safe minimum. eps = FLA_Mach_params_opd( FLA_MACH_EPS ); unfl = FLA_Mach_params_opd( FLA_MACH_SFMIN ); // Compute tol as the product of machine epsilon and tolmul. *tol = tolmul * eps; // Compute the approximate maximum singular value. // NOT NEEDED unless we're supporting absolute accuracy. //FLA_Bsvd_sinval_find_max( n_A, // buff_d, inc_d, // buff_e, inc_e, // &smax ); // Compute the approximate minimum singular value. smin = fabs( *buff_d ); // Skip the accumulation of smin if the first element is zero. if ( smin != zero ) { mu = smin; for ( i = 1; i < n_A; ++i ) { double* epsilon1 = buff_e + (i-1)*inc_e; double* delta2 = buff_d + (i )*inc_d; mu = fabs( *delta2 ) * ( mu / ( mu + fabs( *epsilon1 ) ) ); smin = min( smin, mu ); // Stop early if we encountered a zero. if ( smin == zero ) break; } } // Compute thresh either in terms of tol or as a function of the // maximum total number of iterations, the problem size, and the // safe minimum. smin = smin / sqrt( ( double ) n_A ); *thresh = max( *tol * smin, maxitr * n_A * n_A * unfl ); return FLA_SUCCESS; }
FLA_Error FLA_Bsvd_compute_tol_thresh_ops | ( | int | m_A, |
float | tolmul, | ||
float | maxit, | ||
float * | buff_d, | ||
int | inc_d, | ||
float * | buff_e, | ||
int | inc_e, | ||
float * | tol, | ||
float * | thresh | ||
) |
References bl1_s0(), and FLA_Mach_params_ops().
Referenced by FLA_Bsvd_compute_tol_thresh(), FLA_Bsvd_ext_opc_var1(), FLA_Bsvd_ext_ops_var1(), FLA_Bsvd_v_opc_var1(), and FLA_Bsvd_v_ops_var1().
{ float zero = bl1_s0(); float smin; float eps, unfl; float mu; int i; // Query machine epsilon and the safe minimum. eps = FLA_Mach_params_ops( FLA_MACH_EPS ); unfl = FLA_Mach_params_ops( FLA_MACH_SFMIN ); // Compute tol as the product of machine epsilon and tolmul. *tol = tolmul * eps; // Compute the approximate maximum singular value. // NOT NEEDED unless we're supporting absolute accuracy. //FLA_Bsvd_sinval_find_max( n_A, // buff_d, inc_d, // buff_e, inc_e, // &smax ); // Compute the approximate minimum singular value. smin = fabsf( *buff_d ); // Skip the accumulation of smin if the first element is zero. if ( smin != zero ) { mu = smin; for ( i = 1; i < n_A; ++i ) { float* epsilon1 = buff_e + (i-1)*inc_e; float* delta2 = buff_d + (i )*inc_d; mu = fabsf( *delta2 ) * ( mu / ( mu + fabsf( *epsilon1 ) ) ); smin = min( smin, mu ); // Stop early if we encountered a zero. if ( smin == zero ) break; } } // Compute thresh either in terms of tol or as a function of the // maximum total number of iterations, the problem size, and the // safe minimum. smin = smin / sqrtf( ( float ) n_A ); *thresh = max( *tol * smin, maxitr * n_A * n_A * unfl ); return FLA_SUCCESS; }
References FLA_Bsvd_find_converged_opd(), FLA_Bsvd_find_converged_ops(), FLA_Obj_datatype(), FLA_Obj_vector_dim(), and FLA_Obj_vector_inc().
{ FLA_Datatype datatype; int m_A; int inc_d; int inc_e; datatype = FLA_Obj_datatype( d ); m_A = FLA_Obj_vector_dim( d ); inc_d = FLA_Obj_vector_inc( d ); inc_e = FLA_Obj_vector_inc( e ); switch ( datatype ) { case FLA_FLOAT: { float* buff_tol = FLA_FLOAT_PTR( tol ); float* buff_d = FLA_FLOAT_PTR( d ); float* buff_e = FLA_FLOAT_PTR( e ); float* buff_sminl = FLA_FLOAT_PTR( sminl ); FLA_Bsvd_find_converged_ops( m_A, *buff_tol, buff_d, inc_d, buff_e, inc_e, buff_sminl ); break; } case FLA_DOUBLE: { double* buff_tol = FLA_DOUBLE_PTR( tol ); double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_e = FLA_DOUBLE_PTR( e ); double* buff_sminl = FLA_DOUBLE_PTR( sminl ); FLA_Bsvd_find_converged_opd( m_A, *buff_tol, buff_d, inc_d, buff_e, inc_e, buff_sminl ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Bsvd_find_converged_opd | ( | int | m_A, |
double | tol, | ||
double * | buff_d, | ||
int | inc_d, | ||
double * | buff_e, | ||
int | inc_e, | ||
double * | sminl | ||
) |
Referenced by FLA_Bsvd_find_converged(), and FLA_Bsvd_sinval_v_opd_var1().
{ double* epsilon_last; double* delta_last; double mu; int i; epsilon_last = buff_e + (m_A-2)*inc_e; delta_last = buff_d + (m_A-1)*inc_d; // Check convergence at the bottom of the matrix first. if ( MAC_Bsvd_sinval_is_converged_opd( tol, *delta_last, *epsilon_last ) ) { //*epsilon_last = 0.0; *sminl = 0.0; return m_A - 2; } // If the last element is not yet converged, check interior elements. // Also, accumulate sminl for later use when it comes time to check // the shift. mu = fabs( *buff_d ); *sminl = mu; for ( i = 0; i < m_A - 1; ++i ) { double* epsilon1 = buff_e + (i )*inc_e; double* delta2 = buff_d + (i+1)*inc_d; // Check convergence of epsilon1 against the value of mu accumulated // so far. if ( MAC_Bsvd_sinval_is_converged_opd( tol, mu, *epsilon1 ) ) { //printf( "FLA_Bsvd_sinval_find_converged: Split occurred in col %d\n", i ); //printf( "FLA_Bsvd_sinval_find_converged: mu alpha12 = %23.19e %23.19e\n", mu, *epsilon1 ); //printf( "FLA_Bsvd_sinval_find_converged: alpha22 = %43.19e\n", *delta2 ); //*epsilon1 = 0.0; //return FLA_FAILURE; return i; } // Update mu and sminl. mu = fabs( *delta2 ) * ( mu / ( mu + fabs( *epsilon1 ) ) ); *sminl = min( *sminl, mu ); } // Return with no convergence found. return FLA_SUCCESS; }
FLA_Error FLA_Bsvd_find_converged_ops | ( | int | m_A, |
float | tol, | ||
float * | buff_d, | ||
int | inc_d, | ||
float * | buff_e, | ||
int | inc_e, | ||
float * | sminl | ||
) |
Referenced by FLA_Bsvd_find_converged(), and FLA_Bsvd_sinval_v_ops_var1().
{ float* epsilon_last; float* delta_last; float mu; int i; epsilon_last = buff_e + (m_A-2)*inc_e; delta_last = buff_d + (m_A-1)*inc_d; // Check convergence at the bottom of the matrix first. if ( MAC_Bsvd_sinval_is_converged_ops( tol, *delta_last, *epsilon_last ) ) { *sminl = 0.0F; return m_A - 2; } // If the last element is not yet converged, check interior elements. // Also, accumulate sminl for later use when it comes time to check // the shift. mu = fabsf( *buff_d ); *sminl = mu; for ( i = 0; i < m_A - 1; ++i ) { float* epsilon1 = buff_e + (i )*inc_e; float* delta2 = buff_d + (i+1)*inc_d; // Check convergence of epsilon1 against the value of mu accumulated // so far. if ( MAC_Bsvd_sinval_is_converged_ops( tol, mu, *epsilon1 ) ) { return i; } // Update mu and sminl. mu = fabsf( *delta2 ) * ( mu / ( mu + fabsf( *epsilon1 ) ) ); *sminl = min( *sminl, mu ); } // Return with no convergence found. return FLA_SUCCESS; }
FLA_Error FLA_Bsvd_find_max_min_opd | ( | int | m_A, |
double * | buff_d, | ||
int | inc_d, | ||
double * | buff_e, | ||
int | inc_e, | ||
double * | smax, | ||
double * | smin | ||
) |
Referenced by FLA_Bsvd_find_max(), and FLA_Bsvd_sinval_v_opd_var1().
{ double smax_cand; double smin_cand; int i; smax_cand = fabs( buff_d[ (m_A-1)*inc_d ] ); smin_cand = smax_cand; for ( i = 0; i < m_A - 1; ++i ) { double abs_di = fabs( buff_d[ i*inc_d ] ); double abs_ei = fabs( buff_e[ i*inc_e ] ); // Track the minimum element. smin_cand = min( smin_cand, abs_di ); // Track the maximum element. smax_cand = max( smax_cand, abs_di ); smax_cand = max( smax_cand, abs_ei ); } // Save the results of the search. *smax = smax_cand; *smin = smin_cand; return FLA_SUCCESS; }
FLA_Error FLA_Bsvd_find_max_min_ops | ( | int | m_A, |
float * | buff_d, | ||
int | inc_d, | ||
float * | buff_e, | ||
int | inc_e, | ||
float * | smax, | ||
float * | smin | ||
) |
Referenced by FLA_Bsvd_find_max(), and FLA_Bsvd_sinval_v_ops_var1().
{ float smax_cand; float smin_cand; int i; smax_cand = fabsf( buff_d[ (m_A-1)*inc_d ] ); smin_cand = smax_cand; for ( i = 0; i < m_A - 1; ++i ) { float abs_di = fabsf( buff_d[ i*inc_d ] ); float abs_ei = fabsf( buff_e[ i*inc_e ] ); // Track the minimum element. smin_cand = min( smin_cand, abs_di ); // Track the maximum element. smax_cand = max( smax_cand, abs_di ); smax_cand = max( smax_cand, abs_ei ); } // Save the results of the search. *smax = smax_cand; *smin = smin_cand; return FLA_SUCCESS; }
FLA_Error FLA_Bsvd_find_submatrix_opd | ( | int | mn_A, |
int | ij_begin, | ||
double * | buff_d, | ||
int | inc_d, | ||
double * | buff_e, | ||
int | inc_e, | ||
int * | ijTL, | ||
int * | ijBR | ||
) |
References bl1_d0().
Referenced by FLA_Bsvd_ext_opd_var1(), FLA_Bsvd_ext_opz_var1(), FLA_Bsvd_v_opd_var1(), FLA_Bsvd_v_opd_var2(), FLA_Bsvd_v_opz_var1(), and FLA_Bsvd_v_opz_var2().
{ double rzero = bl1_d0(); int ij_tl; int ij_br; // Search for the first non-zero superdiagonal element starting at // the index specified by ij_begin. for ( ij_tl = ij_begin; ij_tl < mn_A - 1; ++ij_tl ) { double* e1 = buff_e + (ij_tl )*inc_e; // If we find a non-zero element, record it and break out of this // loop. if ( *e1 != rzero ) { #ifdef PRINTF printf( "FLA_Bsvd_find_submatrix_opd: found non-zero superdiagonal element\n" ); printf( " e[%3d] = %22.19e\n", ij_tl, *e1 ); #endif *ijTL = ij_tl; break; } } // If ij_tl was incremented all the way up to mn_A - 1, then we didn't // find any non-zeros. if ( ij_tl == mn_A - 1 ) { #ifdef PRINTF printf( "FLA_Bsvd_find_submatrix_opd: no submatrices found.\n" ); #endif return FLA_FAILURE; } // If we've gotten this far, then a non-zero superdiagonal element was // found. Now we must walk the remaining portion of the superdiagonal // to find the first zero element, or if one is not found, we simply // use the last element of the superdiagonal. for ( ij_br = ij_tl; ij_br < mn_A - 1; ++ij_br ) { double* e1 = buff_e + (ij_br )*inc_e; // If we find a zero element, record it and break out of this // loop. if ( *e1 == rzero ) { #ifdef PRINTF printf( "FLA_Bsvd_find_submatrix_opd: found zero superdiagonal element\n" ); printf( " e[%3d] = %22.19e\n", ij_br, *e1 ); #endif break; } } // If a zero element was found, then ij_br should hold the index of // that element. If a zero element was not found, then ij_br should // hold mn_A - 1. Either way, we save the value and return success. *ijBR = ij_br; return FLA_SUCCESS; }
FLA_Error FLA_Bsvd_find_submatrix_ops | ( | int | mn_A, |
int | ij_begin, | ||
float * | buff_d, | ||
int | inc_d, | ||
float * | buff_e, | ||
int | inc_e, | ||
int * | ijTL, | ||
int * | ijBR | ||
) |
References bl1_s0().
Referenced by FLA_Bsvd_ext_opc_var1(), FLA_Bsvd_ext_ops_var1(), FLA_Bsvd_v_opc_var1(), and FLA_Bsvd_v_ops_var1().
{ float rzero = bl1_s0(); int ij_tl; int ij_br; // Search for the first non-zero superdiagonal element starting at // the index specified by ij_begin. for ( ij_tl = ij_begin; ij_tl < mn_A - 1; ++ij_tl ) { float* e1 = buff_e + (ij_tl )*inc_e; // If we find a non-zero element, record it and break out of this // loop. if ( *e1 != rzero ) { *ijTL = ij_tl; break; } } // If ij_tl was incremented all the way up to mn_A - 1, then we didn't // find any non-zeros. if ( ij_tl == mn_A - 1 ) { return FLA_FAILURE; } // If we've gotten this far, then a non-zero superdiagonal element was // found. Now we must walk the remaining portion of the superdiagonal // to find the first zero element, or if one is not found, we simply // use the last element of the superdiagonal. for ( ij_br = ij_tl; ij_br < mn_A - 1; ++ij_br ) { float* e1 = buff_e + (ij_br )*inc_e; // If we find a zero element, record it and break out of this // loop. if ( *e1 == rzero ) { break; } } // If a zero element was found, then ij_br should hold the index of // that element. If a zero element was not found, then ij_br should // hold mn_A - 1. Either way, we save the value and return success. *ijBR = ij_br; return FLA_SUCCESS; }
FLA_Error FLA_Bsvd_v_opc_var1 | ( | int | min_m_n, |
int | m_U, | ||
int | m_V, | ||
int | n_GH, | ||
int | n_iter_max, | ||
float * | buff_d, | ||
int | inc_d, | ||
float * | buff_e, | ||
int | inc_e, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_H, | ||
int | rs_H, | ||
int | cs_H, | ||
scomplex * | buff_U, | ||
int | rs_U, | ||
int | cs_U, | ||
scomplex * | buff_V, | ||
int | rs_V, | ||
int | cs_V, | ||
int | b_alg | ||
) |
References bl1_c1(), bl1_csetm(), bl1_csscalv(), bl1_s0(), bl1_sm1(), BLIS1_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_blc_var3(), FLA_Bsvd_compute_tol_thresh_ops(), FLA_Bsvd_find_submatrix_ops(), FLA_Bsvd_iteracc_v_ops_var1(), and FLA_Mach_params_ops().
Referenced by FLA_Bsvd_v_opt_var1().
{ scomplex one = bl1_c1(); float rzero = bl1_s0(); int maxitr = 6; float eps; float tolmul; float tol; float thresh; scomplex* G; scomplex* H; float* d1; float* e1; int r_val; int done; int m_GH_sweep_max; int ij_begin; int ijTL, ijBR; int m_A11; int n_iter_perf; int n_UV_apply; int total_deflations; int n_deflations; int n_iter_prev; int n_iter_perf_sweep_max; // Compute some convergence constants. eps = FLA_Mach_params_ops( FLA_MACH_EPS ); tolmul = max( 10.0F, min( 100.0F, powf( eps, -0.125F ) ) ); FLA_Bsvd_compute_tol_thresh_ops( min_m_n, tolmul, maxitr, buff_d, inc_d, buff_e, inc_e, &tol, &thresh ); // Initialize our completion flag. done = FALSE; // Initialize a counter that holds the maximum number of rows of G // and H that we would need to initialize for the next sweep. m_GH_sweep_max = min_m_n - 1; // Initialize a counter for the total number of iterations performed. n_iter_prev = 0; // Iterate until the matrix has completely deflated. for ( total_deflations = 0; done != TRUE; ) { // Initialize G and H to contain only identity rotations. bl1_csetm( m_GH_sweep_max, n_GH, &one, buff_G, rs_G, cs_G ); bl1_csetm( m_GH_sweep_max, n_GH, &one, buff_H, rs_H, cs_H ); // Keep track of the maximum number of iterations performed in the // current sweep. This is used when applying the sweep's Givens // rotations. n_iter_perf_sweep_max = 0; // Perform a sweep: Move through the matrix and perform a bidiagonal // SVD on each non-zero submatrix that is encountered. During the // first time through, ijTL will be 0 and ijBR will be min_m_n - 1. for ( ij_begin = 0; ij_begin < min_m_n; ) { // Search for the first submatrix along the diagonal that is // bounded by zeroes (or endpoints of the matrix). If no // submatrix is found (ie: if the entire superdiagonal is zero // then FLA_FAILURE is returned. This function also inspects // superdiagonal elements for proximity to zero. If a given // element is close enough to zero, then it is deemed // converged and manually set to zero. r_val = FLA_Bsvd_find_submatrix_ops( min_m_n, ij_begin, buff_d, inc_d, buff_e, inc_e, &ijTL, &ijBR ); // Verify that a submatrix was found. If one was not found, // then we are done with the current sweep. Furthermore, if // a submatrix was not found AND we began our search at the // beginning of the matrix (ie: ij_begin == 0), then the // matrix has completely deflated and so we are done with // Francis step iteration. if ( r_val == FLA_FAILURE ) { if ( ij_begin == 0 ) { done = TRUE; } // Break out of the current sweep so we can apply the last // remaining Givens rotations. break; } // If we got this far, then: // (a) ijTL refers to the index of the first non-zero // superdiagonal along the diagonal, and // (b) ijBR refers to either: // - the first zero element that occurs after ijTL, or // - the the last diagonal element. // Note that ijTL and ijBR also correspond to the first and // last diagonal elements of the submatrix of interest. Thus, // we may compute the dimension of this submatrix as: m_A11 = ijBR - ijTL + 1; // Adjust ij_begin, which gets us ready for the next submatrix // search in the current sweep. ij_begin = ijBR + 1; // Index to the submatrices upon which we will operate. d1 = buff_d + ijTL * inc_d; e1 = buff_e + ijTL * inc_e; G = buff_G + ijTL * rs_G; H = buff_H + ijTL * rs_H; // Search for a batch of singular values, recursing on deflated // subproblems whenever a split occurs. Iteration continues as // long as // (a) there is still matrix left to operate on, and // (b) the number of iterations performed in this batch is // less than n_G. // If/when either of the two above conditions fails to hold, // the function returns. n_deflations = FLA_Bsvd_iteracc_v_ops_var1( m_A11, n_GH, ijTL, tol, thresh, d1, inc_d, e1, inc_e, G, rs_G, cs_G, H, rs_H, cs_H, &n_iter_perf ); // Record the number of deflations that were observed. total_deflations += n_deflations; // Update the maximum number of iterations performed in the // current sweep. n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf ); // Store the most recent value of ijBR in m_G_sweep_max. // When the sweep is done, this value will contain the minimum // number of rows of G and H we can apply and safely include all // non-identity rotations that were computed during the // singular value searches. m_GH_sweep_max = ijBR; // Make sure we haven't exceeded our maximum iteration count. if ( n_iter_prev >= n_iter_max * min_m_n ) { FLA_Abort(); //return FLA_FAILURE; } } // The sweep is complete. Now we must apply the Givens rotations // that were accumulated during the sweep. // Recall that the number of columns of U and V to which we apply // rotations is one more than the number of rotations. n_UV_apply = m_GH_sweep_max + 1; // Apply the Givens rotations. Note that we only apply k sets of // rotations, where k = n_iter_perf_sweep_max. Also note that we only // apply to n_UV_apply columns of U and V since this is the most we // need to touch given the most recent value stored to m_GH_sweep_max. //FLA_Apply_G_rf_blc_var5( n_iter_perf_sweep_max, FLA_Apply_G_rf_blc_var3( n_iter_perf_sweep_max, //FLA_Apply_G_rf_blz_var9( n_iter_perf_sweep_max, //FLA_Apply_G_rf_blz_var6( n_iter_perf_sweep_max, m_U, n_UV_apply, buff_G, rs_G, cs_G, buff_U, rs_U, cs_U, b_alg ); //FLA_Apply_G_rf_blc_var5( n_iter_perf_sweep_max, FLA_Apply_G_rf_blc_var3( n_iter_perf_sweep_max, //FLA_Apply_G_rf_blz_var9( n_iter_perf_sweep_max, //FLA_Apply_G_rf_blz_var6( n_iter_perf_sweep_max, m_V, n_UV_apply, buff_H, rs_H, cs_H, buff_V, rs_V, cs_V, b_alg ); // Increment the total number of iterations previously performed. n_iter_prev += n_iter_perf_sweep_max; } // Make all the singular values positive. { int i; float minus_one = bl1_sm1(); for ( i = 0; i < min_m_n; ++i ) { if ( buff_d[ (i )*inc_d ] < rzero ) { buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ]; // Scale the right singular vectors. bl1_csscalv( BLIS1_NO_CONJUGATE, m_V, &minus_one, buff_V + (i )*cs_V, rs_V ); } } } return FLA_SUCCESS; }
FLA_Error FLA_Bsvd_v_opc_var2 | ( | int | min_m_n, |
int | m_U, | ||
int | m_V, | ||
int | n_GH, | ||
int | n_iter_max, | ||
float * | buff_d, | ||
int | inc_d, | ||
float * | buff_e, | ||
int | inc_e, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_H, | ||
int | rs_H, | ||
int | cs_H, | ||
float * | buff_RG, | ||
int | rs_RG, | ||
int | cs_RG, | ||
float * | buff_RH, | ||
int | rs_RH, | ||
int | cs_RH, | ||
scomplex * | buff_W, | ||
int | rs_W, | ||
int | cs_W, | ||
scomplex * | buff_U, | ||
int | rs_U, | ||
int | cs_U, | ||
scomplex * | buff_V, | ||
int | rs_V, | ||
int | cs_V, | ||
int | b_alg | ||
) |
Referenced by FLA_Bsvd_v_opt_var2().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
FLA_Error FLA_Bsvd_v_opd_var1 | ( | int | min_m_n, |
int | m_U, | ||
int | m_V, | ||
int | n_GH, | ||
int | n_iter_max, | ||
double * | buff_d, | ||
int | inc_d, | ||
double * | buff_e, | ||
int | inc_e, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_H, | ||
int | rs_H, | ||
int | cs_H, | ||
double * | buff_U, | ||
int | rs_U, | ||
int | cs_U, | ||
double * | buff_V, | ||
int | rs_V, | ||
int | cs_V, | ||
int | b_alg | ||
) |
References bl1_d0(), bl1_dm1(), bl1_dscalv(), bl1_z1(), bl1_zsetm(), BLIS1_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_bld_var3(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), and FLA_Mach_params_opd().
Referenced by FLA_Bsvd_v_opt_var1().
{ dcomplex one = bl1_z1(); double rzero = bl1_d0(); int maxitr = 6; double eps; double tolmul; double tol; double thresh; dcomplex* G; dcomplex* H; double* d1; double* e1; int r_val; int done; int m_GH_sweep_max; int ij_begin; int ijTL, ijBR; int m_A11; int n_iter_perf; int n_UV_apply; int total_deflations; int n_deflations; int n_iter_prev; int n_iter_perf_sweep_max; // Compute some convergence constants. eps = FLA_Mach_params_opd( FLA_MACH_EPS ); tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) ); FLA_Bsvd_compute_tol_thresh_opd( min_m_n, tolmul, maxitr, buff_d, inc_d, buff_e, inc_e, &tol, &thresh ); #ifdef PRINTF printf( "FLA_Bsvd_v_opd_var1: tolmul = %12.6e\n", tolmul ); printf( "FLA_Bsvd_v_opd_var1: tol = %12.6e\n", tol ); printf( "FLA_Bsvd_v_opd_var1: thresh = %12.6e\n", thresh ); #endif // Initialize our completion flag. done = FALSE; // Initialize a counter that holds the maximum number of rows of G // and H that we would need to initialize for the next sweep. m_GH_sweep_max = min_m_n - 1; // Initialize a counter for the total number of iterations performed. n_iter_prev = 0; // Iterate until the matrix has completely deflated. for ( total_deflations = 0; done != TRUE; ) { // Initialize G and H to contain only identity rotations. bl1_zsetm( m_GH_sweep_max, n_GH, &one, buff_G, rs_G, cs_G ); bl1_zsetm( m_GH_sweep_max, n_GH, &one, buff_H, rs_H, cs_H ); // Keep track of the maximum number of iterations performed in the // current sweep. This is used when applying the sweep's Givens // rotations. n_iter_perf_sweep_max = 0; // Perform a sweep: Move through the matrix and perform a bidiagonal // SVD on each non-zero submatrix that is encountered. During the // first time through, ijTL will be 0 and ijBR will be min_m_n - 1. for ( ij_begin = 0; ij_begin < min_m_n; ) { #ifdef PRINTF if ( ij_begin == 0 ) printf( "FLA_Bsvd_v_opd_var1: beginning new sweep (ij_begin = %d)\n", ij_begin ); #endif // Search for the first submatrix along the diagonal that is // bounded by zeroes (or endpoints of the matrix). If no // submatrix is found (ie: if the entire superdiagonal is zero // then FLA_FAILURE is returned. This function also inspects // superdiagonal elements for proximity to zero. If a given // element is close enough to zero, then it is deemed // converged and manually set to zero. r_val = FLA_Bsvd_find_submatrix_opd( min_m_n, ij_begin, buff_d, inc_d, buff_e, inc_e, &ijTL, &ijBR ); // Verify that a submatrix was found. If one was not found, // then we are done with the current sweep. Furthermore, if // a submatrix was not found AND we began our search at the // beginning of the matrix (ie: ij_begin == 0), then the // matrix has completely deflated and so we are done with // Francis step iteration. if ( r_val == FLA_FAILURE ) { if ( ij_begin == 0 ) { #ifdef PRINTF printf( "FLA_Bsvd_v_opd_var1: superdiagonal is completely zero.\n" ); printf( "FLA_Bsvd_v_opd_var1: Francis iteration is done!\n" ); #endif done = TRUE; } // Break out of the current sweep so we can apply the last // remaining Givens rotations. break; } // If we got this far, then: // (a) ijTL refers to the index of the first non-zero // superdiagonal along the diagonal, and // (b) ijBR refers to either: // - the first zero element that occurs after ijTL, or // - the the last diagonal element. // Note that ijTL and ijBR also correspond to the first and // last diagonal elements of the submatrix of interest. Thus, // we may compute the dimension of this submatrix as: m_A11 = ijBR - ijTL + 1; #ifdef PRINTF printf( "FLA_Bsvd_v_opd_var1: ij_begin = %d\n", ij_begin ); printf( "FLA_Bsvd_v_opd_var1: ijTL = %d\n", ijTL ); printf( "FLA_Bsvd_v_opd_var1: ijBR = %d\n", ijBR ); printf( "FLA_Bsvd_v_opd_var1: m_A11 = %d\n", m_A11 ); #endif // Adjust ij_begin, which gets us ready for the next submatrix // search in the current sweep. ij_begin = ijBR + 1; // Index to the submatrices upon which we will operate. d1 = buff_d + ijTL * inc_d; e1 = buff_e + ijTL * inc_e; G = buff_G + ijTL * rs_G; H = buff_H + ijTL * rs_H; // Search for a batch of singular values, recursing on deflated // subproblems whenever a split occurs. Iteration continues as // long as // (a) there is still matrix left to operate on, and // (b) the number of iterations performed in this batch is // less than n_G. // If/when either of the two above conditions fails to hold, // the function returns. n_deflations = FLA_Bsvd_iteracc_v_opd_var1( m_A11, n_GH, ijTL, tol, thresh, d1, inc_d, e1, inc_e, G, rs_G, cs_G, H, rs_H, cs_H, &n_iter_perf ); // Record the number of deflations that were observed. total_deflations += n_deflations; // Update the maximum number of iterations performed in the // current sweep. n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf ); #ifdef PRINTF printf( "FLA_Bsvd_v_opd_var1: deflations observed = %d\n", n_deflations ); printf( "FLA_Bsvd_v_opd_var1: total deflations observed = %d\n", total_deflations ); printf( "FLA_Bsvd_v_opd_var1: num iterations performed = %d\n", n_iter_perf ); #endif // Store the most recent value of ijBR in m_G_sweep_max. // When the sweep is done, this value will contain the minimum // number of rows of G and H we can apply and safely include all // non-identity rotations that were computed during the // singular value searches. m_GH_sweep_max = ijBR; // Make sure we haven't exceeded our maximum iteration count. if ( n_iter_prev >= n_iter_max * min_m_n ) { #ifdef PRINTF printf( "FLA_Bsvd_v_opd_var1: reached maximum total number of iterations: %d\n", n_iter_prev ); #endif FLA_Abort(); //return FLA_FAILURE; } } // The sweep is complete. Now we must apply the Givens rotations // that were accumulated during the sweep. // Recall that the number of columns of U and V to which we apply // rotations is one more than the number of rotations. n_UV_apply = m_GH_sweep_max + 1; #ifdef PRINTF printf( "FLA_Bsvd_v_opd_var1: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max ); printf( "FLA_Bsvd_v_opd_var1: m_U = %d\n", m_U ); printf( "FLA_Bsvd_v_opd_var1: m_V = %d\n", m_V ); printf( "FLA_Bsvd_v_opd_var1: napp= %d\n", n_UV_apply ); #endif // Apply the Givens rotations. Note that we only apply k sets of // rotations, where k = n_iter_perf_sweep_max. Also note that we only // apply to n_UV_apply columns of U and V since this is the most we // need to touch given the most recent value stored to m_GH_sweep_max. //FLA_Apply_G_rf_bld_var5( n_iter_perf_sweep_max, FLA_Apply_G_rf_bld_var3( n_iter_perf_sweep_max, //FLA_Apply_G_rf_bld_var9( n_iter_perf_sweep_max, //FLA_Apply_G_rf_bld_var6( n_iter_perf_sweep_max, m_U, n_UV_apply, buff_G, rs_G, cs_G, buff_U, rs_U, cs_U, b_alg ); //FLA_Apply_G_rf_bld_var5( n_iter_perf_sweep_max, FLA_Apply_G_rf_bld_var3( n_iter_perf_sweep_max, //FLA_Apply_G_rf_bld_var9( n_iter_perf_sweep_max, //FLA_Apply_G_rf_bld_var6( n_iter_perf_sweep_max, m_V, n_UV_apply, buff_H, rs_H, cs_H, buff_V, rs_V, cs_V, b_alg ); // Increment the total number of iterations previously performed. n_iter_prev += n_iter_perf_sweep_max; #ifdef PRINTF printf( "FLA_Bsvd_v_opd_var1: total number of iterations performed: %d\n", n_iter_prev ); #endif } // Make all the singular values positive. { int i; double minus_one = bl1_dm1(); for ( i = 0; i < min_m_n; ++i ) { if ( buff_d[ (i )*inc_d ] < rzero ) { buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ]; // Scale the right singular vectors. bl1_dscalv( BLIS1_NO_CONJUGATE, m_V, &minus_one, buff_V + (i )*cs_V, rs_V ); } } } return n_iter_prev; }
FLA_Error FLA_Bsvd_v_opd_var2 | ( | int | min_m_n, |
int | m_U, | ||
int | m_V, | ||
int | n_GH, | ||
int | n_iter_max, | ||
double * | buff_d, | ||
int | inc_d, | ||
double * | buff_e, | ||
int | inc_e, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_H, | ||
int | rs_H, | ||
int | cs_H, | ||
double * | buff_RG, | ||
int | rs_RG, | ||
int | cs_RG, | ||
double * | buff_RH, | ||
int | rs_RH, | ||
int | cs_RH, | ||
double * | buff_W, | ||
int | rs_W, | ||
int | cs_W, | ||
double * | buff_U, | ||
int | rs_U, | ||
int | cs_U, | ||
double * | buff_V, | ||
int | rs_V, | ||
int | cs_V, | ||
int | b_alg | ||
) |
References bl1_d0(), bl1_d1(), bl1_dcopymt(), bl1_dgemm(), bl1_dident(), bl1_dm1(), bl1_dscalv(), bl1_z1(), bl1_zsetm(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_Apply_G_rf_bld_var3b(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), and FLA_Mach_params_opd().
Referenced by FLA_Bsvd_v_opt_var2().
{ dcomplex one = bl1_z1(); double rone = bl1_d1(); double rzero = bl1_d0(); int maxitr = 6; double eps; double tolmul; double tol; double thresh; dcomplex* G; dcomplex* H; double* d1; double* e1; int r_val; int done; int m_GH_sweep_max; int ij_begin; int ijTL, ijBR; int m_A11; int n_iter_perf; int n_UV_apply; int total_deflations; int n_deflations; int n_iter_prev; int n_iter_perf_sweep_max; // Compute some convergence constants. eps = FLA_Mach_params_opd( FLA_MACH_EPS ); tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) ); FLA_Bsvd_compute_tol_thresh_opd( min_m_n, tolmul, maxitr, buff_d, inc_d, buff_e, inc_e, &tol, &thresh ); // Initialize our completion flag. done = FALSE; // Initialize a counter that holds the maximum number of rows of G // and H that we would need to initialize for the next sweep. m_GH_sweep_max = min_m_n - 1; // Initialize a counter for the total number of iterations performed. n_iter_prev = 0; // Initialize RG and RH to identity. bl1_dident( min_m_n, buff_RG, rs_RG, cs_RG ); bl1_dident( min_m_n, buff_RH, rs_RH, cs_RH ); // Iterate until the matrix has completely deflated. for ( total_deflations = 0; done != TRUE; ) { // Initialize G and H to contain only identity rotations. bl1_zsetm( m_GH_sweep_max, n_GH, &one, buff_G, rs_G, cs_G ); bl1_zsetm( m_GH_sweep_max, n_GH, &one, buff_H, rs_H, cs_H ); // Keep track of the maximum number of iterations performed in the // current sweep. This is used when applying the sweep's Givens // rotations. n_iter_perf_sweep_max = 0; // Perform a sweep: Move through the matrix and perform a bidiagonal // SVD on each non-zero submatrix that is encountered. During the // first time through, ijTL will be 0 and ijBR will be min_m_n - 1. for ( ij_begin = 0; ij_begin < min_m_n; ) { #ifdef PRINTF if ( ij_begin == 0 ) printf( "FLA_Bsvd_v_opd_var2: beginning new sweep (ij_begin = %d)\n", ij_begin ); #endif // Search for the first submatrix along the diagonal that is // bounded by zeroes (or endpoints of the matrix). If no // submatrix is found (ie: if the entire superdiagonal is zero // then FLA_FAILURE is returned. This function also inspects // superdiagonal elements for proximity to zero. If a given // element is close enough to zero, then it is deemed // converged and manually set to zero. r_val = FLA_Bsvd_find_submatrix_opd( min_m_n, ij_begin, buff_d, inc_d, buff_e, inc_e, &ijTL, &ijBR ); // Verify that a submatrix was found. If one was not found, // then we are done with the current sweep. Furthermore, if // a submatrix was not found AND we began our search at the // beginning of the matrix (ie: ij_begin == 0), then the // matrix has completely deflated and so we are done with // Francis step iteration. if ( r_val == FLA_FAILURE ) { if ( ij_begin == 0 ) { #ifdef PRINTF printf( "FLA_Bsvd_v_opd_var2: superdiagonal is completely zero.\n" ); printf( "FLA_Bsvd_v_opd_var2: Francis iteration is done!\n" ); #endif done = TRUE; } // Break out of the current sweep so we can apply the last // remaining Givens rotations. break; } // If we got this far, then: // (a) ijTL refers to the index of the first non-zero // superdiagonal along the diagonal, and // (b) ijBR refers to either: // - the first zero element that occurs after ijTL, or // - the the last diagonal element. // Note that ijTL and ijBR also correspond to the first and // last diagonal elements of the submatrix of interest. Thus, // we may compute the dimension of this submatrix as: m_A11 = ijBR - ijTL + 1; #ifdef PRINTF printf( "FLA_Bsvd_v_opd_var2: ij_begin = %d\n", ij_begin ); printf( "FLA_Bsvd_v_opd_var2: ijTL = %d\n", ijTL ); printf( "FLA_Bsvd_v_opd_var2: ijBR = %d\n", ijBR ); printf( "FLA_Bsvd_v_opd_var2: m_A11 = %d\n", m_A11 ); #endif // Adjust ij_begin, which gets us ready for the next submatrix // search in the current sweep. ij_begin = ijBR + 1; // Index to the submatrices upon which we will operate. d1 = buff_d + ijTL * inc_d; e1 = buff_e + ijTL * inc_e; G = buff_G + ijTL * rs_G; H = buff_H + ijTL * rs_H; // Search for a batch of singular values, recursing on deflated // subproblems whenever possible. A new singular value search is // performed as long as // (a) there is still matrix left to operate on, and // (b) the number of iterations performed in this batch is // less than n_G. // If/when either of the two above conditions fails to hold, // the function returns. n_deflations = FLA_Bsvd_iteracc_v_opd_var1( m_A11, n_GH, ijTL, tol, thresh, d1, inc_d, e1, inc_e, G, rs_G, cs_G, H, rs_H, cs_H, &n_iter_perf ); // Record the number of deflations that occurred. total_deflations += n_deflations; // Update the maximum number of iterations performed in the // current sweep. n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf ); #ifdef PRINTF printf( "FLA_Bsvd_v_opd_var2: deflations observed = %d\n", n_deflations ); printf( "FLA_Bsvd_v_opd_var2: total deflations observed = %d\n", total_deflations ); printf( "FLA_Bsvd_v_opd_var2: num iterations performed = %d\n", n_iter_perf ); #endif // Store the most recent value of ijBR in m_G_sweep_max. // When the sweep is done, this value will contain the minimum // number of rows of G and H we can apply and safely include all // non-identity rotations that were computed during the // singular value searches. m_GH_sweep_max = ijBR; } // The sweep is complete. Now we must apply the Givens rotations // that were accumulated during the sweep. // Recall that the number of columns of U and V to which we apply // rotations is one more than the number of rotations. n_UV_apply = m_GH_sweep_max + 1; #ifdef PRINTF printf( "FLA_Bsvd_v_opd_var2: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max ); printf( "FLA_Bsvd_v_opd_var2: m_U = %d\n", m_U ); printf( "FLA_Bsvd_v_opd_var2: m_V = %d\n", m_V ); printf( "FLA_Bsvd_v_opd_var2: napp= %d\n", n_UV_apply ); #endif // Apply the Givens rotations. Note that we only apply k sets of // rotations, where k = n_iter_perf_sweep_max. Also note that we only // apply to n_UV_apply columns of U and V since this is the most we // need to touch given the most recent value stored to m_GH_sweep_max. //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max, FLA_Apply_G_rf_bld_var3b( n_iter_perf_sweep_max, //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max, //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max, min_m_n, n_UV_apply, n_iter_prev, buff_G, rs_G, cs_G, buff_RG, rs_RG, cs_RG, b_alg ); //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max, FLA_Apply_G_rf_bld_var3b( n_iter_perf_sweep_max, //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max, //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max, min_m_n, n_UV_apply, n_iter_prev, buff_H, rs_H, cs_H, buff_RH, rs_RH, cs_RH, b_alg ); // Increment the total number of iterations previously performed. n_iter_prev += n_iter_perf_sweep_max; #ifdef PRINTF printf( "FLA_Bsvd_v_opd_var2: total number of iterations performed: %d\n", n_iter_prev ); #endif } // Copy the contents of Q to temporary storage. bl1_dcopymt( BLIS1_NO_TRANSPOSE, m_U, m_V, buff_U, rs_U, cs_U, buff_W, rs_W, cs_W ); // W needs to be max_m_n-by-min_m_n!!!!!!!!!!!!!!! // Multiply U by R, overwriting U. bl1_dgemm( BLIS1_NO_TRANSPOSE, BLIS1_NO_TRANSPOSE, m_U, m_V, m_V, &rone, ( double* )buff_W, rs_W, cs_W, buff_RG, rs_RG, cs_RG, &rzero, ( double* )buff_U, rs_U, cs_U ); bl1_dcopymt( BLIS1_NO_TRANSPOSE, m_V, m_V, buff_V, rs_V, cs_V, buff_W, rs_W, cs_W ); // Multiply V by R, overwriting V. bl1_dgemm( BLIS1_NO_TRANSPOSE, BLIS1_NO_TRANSPOSE, m_V, m_V, m_V, &rone, ( double* )buff_W, rs_W, cs_W, buff_RH, rs_RH, cs_RH, &rzero, ( double* )buff_V, rs_V, cs_V ); // Make all the singular values positive. { int i; double minus_one = bl1_dm1(); for ( i = 0; i < min_m_n; ++i ) { if ( buff_d[ (i )*inc_d ] < rzero ) { buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ]; // Scale the right singular vectors. bl1_dscalv( BLIS1_NO_CONJUGATE, m_V, &minus_one, buff_V + (i )*cs_V, rs_V ); } } } return n_iter_prev; }
FLA_Error FLA_Bsvd_v_ops_var1 | ( | int | min_m_n, |
int | m_U, | ||
int | m_V, | ||
int | n_GH, | ||
int | n_iter_max, | ||
float * | buff_d, | ||
int | inc_d, | ||
float * | buff_e, | ||
int | inc_e, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_H, | ||
int | rs_H, | ||
int | cs_H, | ||
float * | buff_U, | ||
int | rs_U, | ||
int | cs_U, | ||
float * | buff_V, | ||
int | rs_V, | ||
int | cs_V, | ||
int | b_alg | ||
) |
References bl1_c1(), bl1_csetm(), bl1_s0(), bl1_sm1(), bl1_sscalv(), BLIS1_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_bls_var3(), FLA_Bsvd_compute_tol_thresh_ops(), FLA_Bsvd_find_submatrix_ops(), FLA_Bsvd_iteracc_v_ops_var1(), and FLA_Mach_params_ops().
Referenced by FLA_Bsvd_v_opt_var1().
{ scomplex one = bl1_c1(); float rzero = bl1_s0(); int maxitr = 6; float eps; float tolmul; float tol; float thresh; scomplex* G; scomplex* H; float* d1; float* e1; int r_val; int done; int m_GH_sweep_max; int ij_begin; int ijTL, ijBR; int m_A11; int n_iter_perf; int n_UV_apply; int total_deflations; int n_deflations; int n_iter_prev; int n_iter_perf_sweep_max; // Compute some convergence constants. eps = FLA_Mach_params_ops( FLA_MACH_EPS ); tolmul = max( 10.0F, min( 100.0F, powf( eps, -0.125F ) ) ); FLA_Bsvd_compute_tol_thresh_ops( min_m_n, tolmul, maxitr, buff_d, inc_d, buff_e, inc_e, &tol, &thresh ); // Initialize our completion flag. done = FALSE; // Initialize a counter that holds the maximum number of rows of G // and H that we would need to initialize for the next sweep. m_GH_sweep_max = min_m_n - 1; // Initialize a counter for the total number of iterations performed. n_iter_prev = 0; // Iterate until the matrix has completely deflated. for ( total_deflations = 0; done != TRUE; ) { // Initialize G and H to contain only identity rotations. bl1_csetm( m_GH_sweep_max, n_GH, &one, buff_G, rs_G, cs_G ); bl1_csetm( m_GH_sweep_max, n_GH, &one, buff_H, rs_H, cs_H ); // Keep track of the maximum number of iterations performed in the // current sweep. This is used when applying the sweep's Givens // rotations. n_iter_perf_sweep_max = 0; // Perform a sweep: Move through the matrix and perform a bidiagonal // SVD on each non-zero submatrix that is encountered. During the // first time through, ijTL will be 0 and ijBR will be min_m_n - 1. for ( ij_begin = 0; ij_begin < min_m_n; ) { // Search for the first submatrix along the diagonal that is // bounded by zeroes (or endpoints of the matrix). If no // submatrix is found (ie: if the entire superdiagonal is zero // then FLA_FAILURE is returned. This function also inspects // superdiagonal elements for proximity to zero. If a given // element is close enough to zero, then it is deemed // converged and manually set to zero. r_val = FLA_Bsvd_find_submatrix_ops( min_m_n, ij_begin, buff_d, inc_d, buff_e, inc_e, &ijTL, &ijBR ); // Verify that a submatrix was found. If one was not found, // then we are done with the current sweep. Furthermore, if // a submatrix was not found AND we began our search at the // beginning of the matrix (ie: ij_begin == 0), then the // matrix has completely deflated and so we are done with // Francis step iteration. if ( r_val == FLA_FAILURE ) { if ( ij_begin == 0 ) { done = TRUE; } // Break out of the current sweep so we can apply the last // remaining Givens rotations. break; } // If we got this far, then: // (a) ijTL refers to the index of the first non-zero // superdiagonal along the diagonal, and // (b) ijBR refers to either: // - the first zero element that occurs after ijTL, or // - the the last diagonal element. // Note that ijTL and ijBR also correspond to the first and // last diagonal elements of the submatrix of interest. Thus, // we may compute the dimension of this submatrix as: m_A11 = ijBR - ijTL + 1; // Adjust ij_begin, which gets us ready for the next submatrix // search in the current sweep. ij_begin = ijBR + 1; // Index to the submatrices upon which we will operate. d1 = buff_d + ijTL * inc_d; e1 = buff_e + ijTL * inc_e; G = buff_G + ijTL * rs_G; H = buff_H + ijTL * rs_H; // Search for a batch of singular values, recursing on deflated // subproblems whenever a split occurs. Iteration continues as // long as // (a) there is still matrix left to operate on, and // (b) the number of iterations performed in this batch is // less than n_G. // If/when either of the two above conditions fails to hold, // the function returns. n_deflations = FLA_Bsvd_iteracc_v_ops_var1( m_A11, n_GH, ijTL, tol, thresh, d1, inc_d, e1, inc_e, G, rs_G, cs_G, H, rs_H, cs_H, &n_iter_perf ); // Record the number of deflations that were observed. total_deflations += n_deflations; // Update the maximum number of iterations performed in the // current sweep. n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf ); // Store the most recent value of ijBR in m_G_sweep_max. // When the sweep is done, this value will contain the minimum // number of rows of G and H we can apply and safely include all // non-identity rotations that were computed during the // singular value searches. m_GH_sweep_max = ijBR; // Make sure we haven't exceeded our maximum iteration count. if ( n_iter_prev >= n_iter_max * min_m_n ) { FLA_Abort(); //return FLA_FAILURE; } } // The sweep is complete. Now we must apply the Givens rotations // that were accumulated during the sweep. // Recall that the number of columns of U and V to which we apply // rotations is one more than the number of rotations. n_UV_apply = m_GH_sweep_max + 1; // Apply the Givens rotations. Note that we only apply k sets of // rotations, where k = n_iter_perf_sweep_max. Also note that we only // apply to n_UV_apply columns of U and V since this is the most we // need to touch given the most recent value stored to m_GH_sweep_max. //FLA_Apply_G_rf_bls_var5( n_iter_perf_sweep_max, FLA_Apply_G_rf_bls_var3( n_iter_perf_sweep_max, //FLA_Apply_G_rf_bld_var9( n_iter_perf_sweep_max, //FLA_Apply_G_rf_bld_var6( n_iter_perf_sweep_max, m_U, n_UV_apply, buff_G, rs_G, cs_G, buff_U, rs_U, cs_U, b_alg ); //FLA_Apply_G_rf_blc_var5( n_iter_perf_sweep_max, FLA_Apply_G_rf_bls_var3( n_iter_perf_sweep_max, //FLA_Apply_G_rf_bld_var9( n_iter_perf_sweep_max, //FLA_Apply_G_rf_bld_var6( n_iter_perf_sweep_max, m_V, n_UV_apply, buff_H, rs_H, cs_H, buff_V, rs_V, cs_V, b_alg ); // Increment the total number of iterations previously performed. n_iter_prev += n_iter_perf_sweep_max; } // Make all the singular values positive. { int i; float minus_one = bl1_sm1(); for ( i = 0; i < min_m_n; ++i ) { if ( buff_d[ (i )*inc_d ] < rzero ) { buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ]; // Scale the right singular vectors. bl1_sscalv( BLIS1_NO_CONJUGATE, m_V, &minus_one, buff_V + (i )*cs_V, rs_V ); } } } return n_iter_prev; }
FLA_Error FLA_Bsvd_v_ops_var2 | ( | int | min_m_n, |
int | m_U, | ||
int | m_V, | ||
int | n_GH, | ||
int | n_iter_max, | ||
float * | buff_d, | ||
int | inc_d, | ||
float * | buff_e, | ||
int | inc_e, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_H, | ||
int | rs_H, | ||
int | cs_H, | ||
float * | buff_RG, | ||
int | rs_RG, | ||
int | cs_RG, | ||
float * | buff_RH, | ||
int | rs_RH, | ||
int | cs_RH, | ||
float * | buff_W, | ||
int | rs_W, | ||
int | cs_W, | ||
float * | buff_U, | ||
int | rs_U, | ||
int | cs_U, | ||
float * | buff_V, | ||
int | rs_V, | ||
int | cs_V, | ||
int | b_alg | ||
) |
Referenced by FLA_Bsvd_v_opt_var2().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
FLA_Error FLA_Bsvd_v_opt_var1 | ( | dim_t | n_iter_max, |
FLA_Obj | d, | ||
FLA_Obj | e, | ||
FLA_Obj | G, | ||
FLA_Obj | H, | ||
FLA_Obj | U, | ||
FLA_Obj | V, | ||
dim_t | b_alg | ||
) |
References FLA_Bsvd_v_opc_var1(), FLA_Bsvd_v_opd_var1(), FLA_Bsvd_v_ops_var1(), FLA_Bsvd_v_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().
Referenced by FLA_Svd_uv_unb_var1().
{ FLA_Error r_val = FLA_SUCCESS; FLA_Datatype datatype; int m_U, m_V, n_GH; int inc_d; int inc_e; int rs_G, cs_G; int rs_H, cs_H; int rs_U, cs_U; int rs_V, cs_V; datatype = FLA_Obj_datatype( U ); m_U = FLA_Obj_length( U ); m_V = FLA_Obj_length( V ); n_GH = FLA_Obj_width( G ); inc_d = FLA_Obj_vector_inc( d ); inc_e = FLA_Obj_vector_inc( e ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_H = FLA_Obj_row_stride( H ); cs_H = FLA_Obj_col_stride( H ); rs_U = FLA_Obj_row_stride( U ); cs_U = FLA_Obj_col_stride( U ); rs_V = FLA_Obj_row_stride( V ); cs_V = FLA_Obj_col_stride( V ); switch ( datatype ) { case FLA_FLOAT: { float* buff_d = FLA_FLOAT_PTR( d ); float* buff_e = FLA_FLOAT_PTR( e ); scomplex* buff_G = FLA_COMPLEX_PTR( G ); scomplex* buff_H = FLA_COMPLEX_PTR( H ); float* buff_U = FLA_FLOAT_PTR( U ); float* buff_V = FLA_FLOAT_PTR( V ); r_val = FLA_Bsvd_v_ops_var1( min( m_U, m_V ), m_U, m_V, n_GH, n_iter_max, buff_d, inc_d, buff_e, inc_e, buff_G, rs_G, cs_G, buff_H, rs_H, cs_H, buff_U, rs_U, cs_U, buff_V, rs_V, cs_V, b_alg ); break; } case FLA_DOUBLE: { double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_e = FLA_DOUBLE_PTR( e ); dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_H = FLA_DOUBLE_COMPLEX_PTR( H ); double* buff_U = FLA_DOUBLE_PTR( U ); double* buff_V = FLA_DOUBLE_PTR( V ); r_val = FLA_Bsvd_v_opd_var1( min( m_U, m_V ), m_U, m_V, n_GH, n_iter_max, buff_d, inc_d, buff_e, inc_e, buff_G, rs_G, cs_G, buff_H, rs_H, cs_H, buff_U, rs_U, cs_U, buff_V, rs_V, cs_V, b_alg ); break; } case FLA_COMPLEX: { float* buff_d = FLA_FLOAT_PTR( d ); float* buff_e = FLA_FLOAT_PTR( e ); scomplex* buff_G = FLA_COMPLEX_PTR( G ); scomplex* buff_H = FLA_COMPLEX_PTR( H ); scomplex* buff_U = FLA_COMPLEX_PTR( U ); scomplex* buff_V = FLA_COMPLEX_PTR( V ); r_val = FLA_Bsvd_v_opc_var1( min( m_U, m_V ), m_U, m_V, n_GH, n_iter_max, buff_d, inc_d, buff_e, inc_e, buff_G, rs_G, cs_G, buff_H, rs_H, cs_H, buff_U, rs_U, cs_U, buff_V, rs_V, cs_V, b_alg ); break; } case FLA_DOUBLE_COMPLEX: { double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_e = FLA_DOUBLE_PTR( e ); dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_H = FLA_DOUBLE_COMPLEX_PTR( H ); dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U ); dcomplex* buff_V = FLA_DOUBLE_COMPLEX_PTR( V ); r_val = FLA_Bsvd_v_opz_var1( min( m_U, m_V ), m_U, m_V, n_GH, n_iter_max, buff_d, inc_d, buff_e, inc_e, buff_G, rs_G, cs_G, buff_H, rs_H, cs_H, buff_U, rs_U, cs_U, buff_V, rs_V, cs_V, b_alg ); break; } } return r_val; }
FLA_Error FLA_Bsvd_v_opt_var2 | ( | dim_t | n_iter_max, |
FLA_Obj | d, | ||
FLA_Obj | e, | ||
FLA_Obj | G, | ||
FLA_Obj | H, | ||
FLA_Obj | RG, | ||
FLA_Obj | RH, | ||
FLA_Obj | W, | ||
FLA_Obj | U, | ||
FLA_Obj | V, | ||
dim_t | b_alg | ||
) |
References FLA_Bsvd_v_opc_var2(), FLA_Bsvd_v_opd_var2(), FLA_Bsvd_v_ops_var2(), FLA_Bsvd_v_opz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().
Referenced by FLA_Svd_uv_unb_var2().
{ FLA_Error r_val = FLA_SUCCESS; FLA_Datatype datatype; int m_U, m_V, n_GH; int inc_d; int inc_e; int rs_G, cs_G; int rs_H, cs_H; int rs_RG, cs_RG; int rs_RH, cs_RH; int rs_W, cs_W; int rs_U, cs_U; int rs_V, cs_V; datatype = FLA_Obj_datatype( U ); m_U = FLA_Obj_length( U ); m_V = FLA_Obj_length( V ); n_GH = FLA_Obj_width( G ); inc_d = FLA_Obj_vector_inc( d ); inc_e = FLA_Obj_vector_inc( e ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_H = FLA_Obj_row_stride( H ); cs_H = FLA_Obj_col_stride( H ); rs_RG = FLA_Obj_row_stride( RG ); cs_RG = FLA_Obj_col_stride( RG ); rs_RH = FLA_Obj_row_stride( RH ); cs_RH = FLA_Obj_col_stride( RH ); rs_W = FLA_Obj_row_stride( W ); cs_W = FLA_Obj_col_stride( W ); rs_U = FLA_Obj_row_stride( U ); cs_U = FLA_Obj_col_stride( U ); rs_V = FLA_Obj_row_stride( V ); cs_V = FLA_Obj_col_stride( V ); switch ( datatype ) { case FLA_FLOAT: { float* buff_d = FLA_FLOAT_PTR( d ); float* buff_e = FLA_FLOAT_PTR( e ); scomplex* buff_G = FLA_COMPLEX_PTR( G ); scomplex* buff_H = FLA_COMPLEX_PTR( H ); float* buff_RG = FLA_FLOAT_PTR( RG ); float* buff_RH = FLA_FLOAT_PTR( RH ); float* buff_W = FLA_FLOAT_PTR( W ); float* buff_U = FLA_FLOAT_PTR( U ); float* buff_V = FLA_FLOAT_PTR( V ); r_val = FLA_Bsvd_v_ops_var2( min( m_U, m_V ), m_U, m_V, n_GH, n_iter_max, buff_d, inc_d, buff_e, inc_e, buff_G, rs_G, cs_G, buff_H, rs_H, cs_H, buff_RG, rs_RG, cs_RG, buff_RH, rs_RH, cs_RH, buff_W, rs_W, cs_W, buff_U, rs_U, cs_U, buff_V, rs_V, cs_V, b_alg ); break; } case FLA_DOUBLE: { double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_e = FLA_DOUBLE_PTR( e ); dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_H = FLA_DOUBLE_COMPLEX_PTR( H ); double* buff_RG = FLA_DOUBLE_PTR( RG ); double* buff_RH = FLA_DOUBLE_PTR( RH ); double* buff_W = FLA_DOUBLE_PTR( W ); double* buff_U = FLA_DOUBLE_PTR( U ); double* buff_V = FLA_DOUBLE_PTR( V ); r_val = FLA_Bsvd_v_opd_var2( min( m_U, m_V ), m_U, m_V, n_GH, n_iter_max, buff_d, inc_d, buff_e, inc_e, buff_G, rs_G, cs_G, buff_H, rs_H, cs_H, buff_RG, rs_RG, cs_RG, buff_RH, rs_RH, cs_RH, buff_W, rs_W, cs_W, buff_U, rs_U, cs_U, buff_V, rs_V, cs_V, b_alg ); break; } case FLA_COMPLEX: { float* buff_d = FLA_FLOAT_PTR( d ); float* buff_e = FLA_FLOAT_PTR( e ); scomplex* buff_G = FLA_COMPLEX_PTR( G ); scomplex* buff_H = FLA_COMPLEX_PTR( H ); float* buff_RG = FLA_FLOAT_PTR( RG ); float* buff_RH = FLA_FLOAT_PTR( RH ); scomplex* buff_W = FLA_COMPLEX_PTR( W ); scomplex* buff_U = FLA_COMPLEX_PTR( U ); scomplex* buff_V = FLA_COMPLEX_PTR( V ); r_val = FLA_Bsvd_v_opc_var2( min( m_U, m_V ), m_U, m_V, n_GH, n_iter_max, buff_d, inc_d, buff_e, inc_e, buff_G, rs_G, cs_G, buff_H, rs_H, cs_H, buff_RG, rs_RG, cs_RG, buff_RH, rs_RH, cs_RH, buff_W, rs_W, cs_W, buff_U, rs_U, cs_U, buff_V, rs_V, cs_V, b_alg ); break; } case FLA_DOUBLE_COMPLEX: { double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_e = FLA_DOUBLE_PTR( e ); dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_H = FLA_DOUBLE_COMPLEX_PTR( H ); double* buff_RG = FLA_DOUBLE_PTR( RG ); double* buff_RH = FLA_DOUBLE_PTR( RH ); dcomplex* buff_W = FLA_DOUBLE_COMPLEX_PTR( W ); dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U ); dcomplex* buff_V = FLA_DOUBLE_COMPLEX_PTR( V ); r_val = FLA_Bsvd_v_opz_var2( min( m_U, m_V ), m_U, m_V, n_GH, n_iter_max, buff_d, inc_d, buff_e, inc_e, buff_G, rs_G, cs_G, buff_H, rs_H, cs_H, buff_RG, rs_RG, cs_RG, buff_RH, rs_RH, cs_RH, buff_W, rs_W, cs_W, buff_U, rs_U, cs_U, buff_V, rs_V, cs_V, b_alg ); break; } } return r_val; }
FLA_Error FLA_Bsvd_v_opz_var1 | ( | int | min_m_n, |
int | m_U, | ||
int | m_V, | ||
int | n_GH, | ||
int | n_iter_max, | ||
double * | buff_d, | ||
int | inc_d, | ||
double * | buff_e, | ||
int | inc_e, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_H, | ||
int | rs_H, | ||
int | cs_H, | ||
dcomplex * | buff_U, | ||
int | rs_U, | ||
int | cs_U, | ||
dcomplex * | buff_V, | ||
int | rs_V, | ||
int | cs_V, | ||
int | b_alg | ||
) |
References bl1_d0(), bl1_dm1(), bl1_z1(), bl1_zdscalv(), bl1_zsetm(), BLIS1_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_blz_var3(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), and FLA_Mach_params_opd().
Referenced by FLA_Bsvd_v_opt_var1().
{ dcomplex one = bl1_z1(); double rzero = bl1_d0(); int maxitr = 6; double eps; double tolmul; double tol; double thresh; dcomplex* G; dcomplex* H; double* d1; double* e1; int r_val; int done; int m_GH_sweep_max; int ij_begin; int ijTL, ijBR; int m_A11; int n_iter_perf; int n_UV_apply; int total_deflations; int n_deflations; int n_iter_prev; int n_iter_perf_sweep_max; // Compute some convergence constants. eps = FLA_Mach_params_opd( FLA_MACH_EPS ); tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) ); FLA_Bsvd_compute_tol_thresh_opd( min_m_n, tolmul, maxitr, buff_d, inc_d, buff_e, inc_e, &tol, &thresh ); #ifdef PRINTF printf( "FLA_Bsvd_v_opz_var1: tolmul = %12.6e\n", tolmul ); printf( "FLA_Bsvd_v_opz_var1: tol = %12.6e\n", tol ); printf( "FLA_Bsvd_v_opz_var1: thresh = %12.6e\n", thresh ); #endif // Initialize our completion flag. done = FALSE; // Initialize a counter that holds the maximum number of rows of G // and H that we would need to initialize for the next sweep. m_GH_sweep_max = min_m_n - 1; // Initialize a counter for the total number of iterations performed. n_iter_prev = 0; // Iterate until the matrix has completely deflated. for ( total_deflations = 0; done != TRUE; ) { // Initialize G and H to contain only identity rotations. bl1_zsetm( m_GH_sweep_max, n_GH, &one, buff_G, rs_G, cs_G ); bl1_zsetm( m_GH_sweep_max, n_GH, &one, buff_H, rs_H, cs_H ); // Keep track of the maximum number of iterations performed in the // current sweep. This is used when applying the sweep's Givens // rotations. n_iter_perf_sweep_max = 0; // Perform a sweep: Move through the matrix and perform a bidiagonal // SVD on each non-zero submatrix that is encountered. During the // first time through, ijTL will be 0 and ijBR will be min_m_n - 1. for ( ij_begin = 0; ij_begin < min_m_n; ) { #ifdef PRINTF if ( ij_begin == 0 ) printf( "FLA_Bsvd_v_opz_var1: beginning new sweep (ij_begin = %d)\n", ij_begin ); #endif // Search for the first submatrix along the diagonal that is // bounded by zeroes (or endpoints of the matrix). If no // submatrix is found (ie: if the entire superdiagonal is zero // then FLA_FAILURE is returned. This function also inspects // superdiagonal elements for proximity to zero. If a given // element is close enough to zero, then it is deemed // converged and manually set to zero. r_val = FLA_Bsvd_find_submatrix_opd( min_m_n, ij_begin, buff_d, inc_d, buff_e, inc_e, &ijTL, &ijBR ); // Verify that a submatrix was found. If one was not found, // then we are done with the current sweep. Furthermore, if // a submatrix was not found AND we began our search at the // beginning of the matrix (ie: ij_begin == 0), then the // matrix has completely deflated and so we are done with // Francis step iteration. if ( r_val == FLA_FAILURE ) { if ( ij_begin == 0 ) { #ifdef PRINTF printf( "FLA_Bsvd_v_opz_var1: superdiagonal is completely zero.\n" ); printf( "FLA_Bsvd_v_opz_var1: Francis iteration is done!\n" ); #endif done = TRUE; } // Break out of the current sweep so we can apply the last // remaining Givens rotations. break; } // If we got this far, then: // (a) ijTL refers to the index of the first non-zero // superdiagonal along the diagonal, and // (b) ijBR refers to either: // - the first zero element that occurs after ijTL, or // - the the last diagonal element. // Note that ijTL and ijBR also correspond to the first and // last diagonal elements of the submatrix of interest. Thus, // we may compute the dimension of this submatrix as: m_A11 = ijBR - ijTL + 1; #ifdef PRINTF printf( "FLA_Bsvd_v_opz_var1: ij_begin = %d\n", ij_begin ); printf( "FLA_Bsvd_v_opz_var1: ijTL = %d\n", ijTL ); printf( "FLA_Bsvd_v_opz_var1: ijBR = %d\n", ijBR ); printf( "FLA_Bsvd_v_opz_var1: m_A11 = %d\n", m_A11 ); #endif // Adjust ij_begin, which gets us ready for the next submatrix // search in the current sweep. ij_begin = ijBR + 1; // Index to the submatrices upon which we will operate. d1 = buff_d + ijTL * inc_d; e1 = buff_e + ijTL * inc_e; G = buff_G + ijTL * rs_G; H = buff_H + ijTL * rs_H; // Search for a batch of singular values, recursing on deflated // subproblems whenever a split occurs. Iteration continues as // long as // (a) there is still matrix left to operate on, and // (b) the number of iterations performed in this batch is // less than n_G. // If/when either of the two above conditions fails to hold, // the function returns. n_deflations = FLA_Bsvd_iteracc_v_opd_var1( m_A11, n_GH, ijTL, tol, thresh, d1, inc_d, e1, inc_e, G, rs_G, cs_G, H, rs_H, cs_H, &n_iter_perf ); // Record the number of deflations that were observed. total_deflations += n_deflations; // Update the maximum number of iterations performed in the // current sweep. n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf ); #ifdef PRINTF printf( "FLA_Bsvd_v_opz_var1: deflations observed = %d\n", n_deflations ); printf( "FLA_Bsvd_v_opz_var1: total deflations observed = %d\n", total_deflations ); printf( "FLA_Bsvd_v_opz_var1: num iterations performed = %d\n", n_iter_perf ); #endif // Store the most recent value of ijBR in m_G_sweep_max. // When the sweep is done, this value will contain the minimum // number of rows of G and H we can apply and safely include all // non-identity rotations that were computed during the // singular value searches. m_GH_sweep_max = ijBR; // Make sure we haven't exceeded our maximum iteration count. if ( n_iter_prev >= n_iter_max * min_m_n ) { #ifdef PRINTF printf( "FLA_Bsvd_v_opz_var1: reached maximum total number of iterations: %d\n", n_iter_prev ); #endif FLA_Abort(); //return FLA_FAILURE; } } // The sweep is complete. Now we must apply the Givens rotations // that were accumulated during the sweep. // Recall that the number of columns of U and V to which we apply // rotations is one more than the number of rotations. n_UV_apply = m_GH_sweep_max + 1; #ifdef PRINTF printf( "FLA_Bsvd_v_opz_var1: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max ); printf( "FLA_Bsvd_v_opz_var1: m_U = %d\n", m_U ); printf( "FLA_Bsvd_v_opz_var1: m_V = %d\n", m_V ); printf( "FLA_Bsvd_v_opz_var1: napp= %d\n", n_UV_apply ); #endif // Apply the Givens rotations. Note that we only apply k sets of // rotations, where k = n_iter_perf_sweep_max. Also note that we only // apply to n_UV_apply columns of U and V since this is the most we // need to touch given the most recent value stored to m_GH_sweep_max. //FLA_Apply_G_rf_blz_var5( n_iter_perf_sweep_max, FLA_Apply_G_rf_blz_var3( n_iter_perf_sweep_max, //FLA_Apply_G_rf_blz_var9( n_iter_perf_sweep_max, //FLA_Apply_G_rf_blz_var6( n_iter_perf_sweep_max, m_U, n_UV_apply, buff_G, rs_G, cs_G, buff_U, rs_U, cs_U, b_alg ); //FLA_Apply_G_rf_blz_var5( n_iter_perf_sweep_max, FLA_Apply_G_rf_blz_var3( n_iter_perf_sweep_max, //FLA_Apply_G_rf_blz_var9( n_iter_perf_sweep_max, //FLA_Apply_G_rf_blz_var6( n_iter_perf_sweep_max, m_V, n_UV_apply, buff_H, rs_H, cs_H, buff_V, rs_V, cs_V, b_alg ); // Increment the total number of iterations previously performed. n_iter_prev += n_iter_perf_sweep_max; #ifdef PRINTF printf( "FLA_Bsvd_v_opz_var1: total number of iterations performed: %d\n", n_iter_prev ); #endif } // Make all the singular values positive. { int i; double minus_one = bl1_dm1(); for ( i = 0; i < min_m_n; ++i ) { if ( buff_d[ (i )*inc_d ] < rzero ) { buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ]; // Scale the right singular vectors. bl1_zdscalv( BLIS1_NO_CONJUGATE, m_V, &minus_one, buff_V + (i )*cs_V, rs_V ); } } } return n_iter_prev; }
FLA_Error FLA_Bsvd_v_opz_var2 | ( | int | min_m_n, |
int | m_U, | ||
int | m_V, | ||
int | n_GH, | ||
int | n_iter_max, | ||
double * | buff_d, | ||
int | inc_d, | ||
double * | buff_e, | ||
int | inc_e, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_H, | ||
int | rs_H, | ||
int | cs_H, | ||
double * | buff_RG, | ||
int | rs_RG, | ||
int | cs_RG, | ||
double * | buff_RH, | ||
int | rs_RH, | ||
int | cs_RH, | ||
dcomplex * | buff_W, | ||
int | rs_W, | ||
int | cs_W, | ||
dcomplex * | buff_U, | ||
int | rs_U, | ||
int | cs_U, | ||
dcomplex * | buff_V, | ||
int | rs_V, | ||
int | cs_V, | ||
int | b_alg | ||
) |
References bl1_d0(), bl1_d1(), bl1_dgemm(), bl1_dident(), bl1_dm1(), bl1_z1(), bl1_zcopymt(), bl1_zdscalv(), bl1_zsetm(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_Apply_G_rf_bld_var3b(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), and FLA_Mach_params_opd().
Referenced by FLA_Bsvd_v_opt_var2().
{ dcomplex one = bl1_z1(); double rone = bl1_d1(); double rzero = bl1_d0(); int maxitr = 6; double eps; double tolmul; double tol; double thresh; dcomplex* G; dcomplex* H; double* d1; double* e1; int r_val; int done; int m_GH_sweep_max; int ij_begin; int ijTL, ijBR; int m_A11; int n_iter_perf; int n_UV_apply; int total_deflations; int n_deflations; int n_iter_prev; int n_iter_perf_sweep_max; // Compute some convergence constants. eps = FLA_Mach_params_opd( FLA_MACH_EPS ); tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) ); FLA_Bsvd_compute_tol_thresh_opd( min_m_n, tolmul, maxitr, buff_d, inc_d, buff_e, inc_e, &tol, &thresh ); // Initialize our completion flag. done = FALSE; // Initialize a counter that holds the maximum number of rows of G // and H that we would need to initialize for the next sweep. m_GH_sweep_max = min_m_n - 1; // Initialize a counter for the total number of iterations performed. n_iter_prev = 0; // Initialize RG and RH to identity. bl1_dident( min_m_n, buff_RG, rs_RG, cs_RG ); bl1_dident( min_m_n, buff_RH, rs_RH, cs_RH ); // Iterate until the matrix has completely deflated. for ( total_deflations = 0; done != TRUE; ) { // Initialize G and H to contain only identity rotations. bl1_zsetm( m_GH_sweep_max, n_GH, &one, buff_G, rs_G, cs_G ); bl1_zsetm( m_GH_sweep_max, n_GH, &one, buff_H, rs_H, cs_H ); // Keep track of the maximum number of iterations performed in the // current sweep. This is used when applying the sweep's Givens // rotations. n_iter_perf_sweep_max = 0; // Perform a sweep: Move through the matrix and perform a bidiagonal // SVD on each non-zero submatrix that is encountered. During the // first time through, ijTL will be 0 and ijBR will be min_m_n - 1. for ( ij_begin = 0; ij_begin < min_m_n; ) { #ifdef PRINTF if ( ij_begin == 0 ) printf( "FLA_Bsvd_v_opz_var2: beginning new sweep (ij_begin = %d)\n", ij_begin ); #endif // Search for the first submatrix along the diagonal that is // bounded by zeroes (or endpoints of the matrix). If no // submatrix is found (ie: if the entire superdiagonal is zero // then FLA_FAILURE is returned. This function also inspects // superdiagonal elements for proximity to zero. If a given // element is close enough to zero, then it is deemed // converged and manually set to zero. r_val = FLA_Bsvd_find_submatrix_opd( min_m_n, ij_begin, buff_d, inc_d, buff_e, inc_e, &ijTL, &ijBR ); // Verify that a submatrix was found. If one was not found, // then we are done with the current sweep. Furthermore, if // a submatrix was not found AND we began our search at the // beginning of the matrix (ie: ij_begin == 0), then the // matrix has completely deflated and so we are done with // Francis step iteration. if ( r_val == FLA_FAILURE ) { if ( ij_begin == 0 ) { #ifdef PRINTF printf( "FLA_Bsvd_v_opz_var2: superdiagonal is completely zero.\n" ); printf( "FLA_Bsvd_v_opz_var2: Francis iteration is done!\n" ); #endif done = TRUE; } // Break out of the current sweep so we can apply the last // remaining Givens rotations. break; } // If we got this far, then: // (a) ijTL refers to the index of the first non-zero // superdiagonal along the diagonal, and // (b) ijBR refers to either: // - the first zero element that occurs after ijTL, or // - the the last diagonal element. // Note that ijTL and ijBR also correspond to the first and // last diagonal elements of the submatrix of interest. Thus, // we may compute the dimension of this submatrix as: m_A11 = ijBR - ijTL + 1; #ifdef PRINTF printf( "FLA_Bsvd_v_opz_var2: ij_begin = %d\n", ij_begin ); printf( "FLA_Bsvd_v_opz_var2: ijTL = %d\n", ijTL ); printf( "FLA_Bsvd_v_opz_var2: ijBR = %d\n", ijBR ); printf( "FLA_Bsvd_v_opz_var2: m_A11 = %d\n", m_A11 ); #endif // Adjust ij_begin, which gets us ready for the next submatrix // search in the current sweep. ij_begin = ijBR + 1; // Index to the submatrices upon which we will operate. d1 = buff_d + ijTL * inc_d; e1 = buff_e + ijTL * inc_e; G = buff_G + ijTL * rs_G; H = buff_H + ijTL * rs_H; // Search for a batch of singular values, recursing on deflated // subproblems whenever possible. A new singular value search is // performed as long as // (a) there is still matrix left to operate on, and // (b) the number of iterations performed in this batch is // less than n_G. // If/when either of the two above conditions fails to hold, // the function returns. n_deflations = FLA_Bsvd_iteracc_v_opd_var1( m_A11, n_GH, ijTL, tol, thresh, d1, inc_d, e1, inc_e, G, rs_G, cs_G, H, rs_H, cs_H, &n_iter_perf ); // Record the number of deflations that occurred. total_deflations += n_deflations; // Update the maximum number of iterations performed in the // current sweep. n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf ); #ifdef PRINTF printf( "FLA_Bsvd_v_opz_var2: deflations observed = %d\n", n_deflations ); printf( "FLA_Bsvd_v_opz_var2: total deflations observed = %d\n", total_deflations ); printf( "FLA_Bsvd_v_opz_var2: num iterations performed = %d\n", n_iter_perf ); #endif // Store the most recent value of ijBR in m_G_sweep_max. // When the sweep is done, this value will contain the minimum // number of rows of G and H we can apply and safely include all // non-identity rotations that were computed during the // singular value searches. m_GH_sweep_max = ijBR; } // The sweep is complete. Now we must apply the Givens rotations // that were accumulated during the sweep. // Recall that the number of columns of U and V to which we apply // rotations is one more than the number of rotations. n_UV_apply = m_GH_sweep_max + 1; #ifdef PRINTF printf( "FLA_Bsvd_v_opz_var2: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max ); printf( "FLA_Bsvd_v_opz_var2: m_U = %d\n", m_U ); printf( "FLA_Bsvd_v_opz_var2: m_V = %d\n", m_V ); printf( "FLA_Bsvd_v_opz_var2: napp= %d\n", n_UV_apply ); #endif // Apply the Givens rotations. Note that we only apply k sets of // rotations, where k = n_iter_perf_sweep_max. Also note that we only // apply to n_UV_apply columns of U and V since this is the most we // need to touch given the most recent value stored to m_GH_sweep_max. //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max, FLA_Apply_G_rf_bld_var3b( n_iter_perf_sweep_max, //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max, //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max, min_m_n, n_UV_apply, n_iter_prev, buff_G, rs_G, cs_G, buff_RG, rs_RG, cs_RG, b_alg ); //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max, FLA_Apply_G_rf_bld_var3b( n_iter_perf_sweep_max, //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max, //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max, min_m_n, n_UV_apply, n_iter_prev, buff_H, rs_H, cs_H, buff_RH, rs_RH, cs_RH, b_alg ); // Increment the total number of iterations previously performed. n_iter_prev += n_iter_perf_sweep_max; #ifdef PRINTF printf( "FLA_Bsvd_v_opz_var2: total number of iterations performed: %d\n", n_iter_prev ); #endif } // Copy the contents of Q to temporary storage. bl1_zcopymt( BLIS1_NO_TRANSPOSE, m_U, m_V, buff_U, rs_U, cs_U, buff_W, rs_W, cs_W ); // W needs to be max_m_n-by-min_m_n!!!!!!!!!!!!!!! // Multiply U by R, overwriting U. bl1_dgemm( BLIS1_NO_TRANSPOSE, BLIS1_NO_TRANSPOSE, 2*m_U, m_V, m_V, &rone, ( double* )buff_W, rs_W, 2*cs_W, buff_RG, rs_RG, cs_RG, &rzero, ( double* )buff_U, rs_U, 2*cs_U ); bl1_zcopymt( BLIS1_NO_TRANSPOSE, m_V, m_V, buff_V, rs_V, cs_V, buff_W, rs_W, cs_W ); // Multiply V by R, overwriting V. bl1_dgemm( BLIS1_NO_TRANSPOSE, BLIS1_NO_TRANSPOSE, 2*m_V, m_V, m_V, &rone, ( double* )buff_W, rs_W, 2*cs_W, buff_RH, rs_RH, cs_RH, &rzero, ( double* )buff_V, rs_V, 2*cs_V ); // Make all the singular values positive. { int i; double minus_one = bl1_dm1(); for ( i = 0; i < min_m_n; ++i ) { if ( buff_d[ (i )*inc_d ] < rzero ) { buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ]; // Scale the right singular vectors. bl1_zdscalv( BLIS1_NO_CONJUGATE, m_V, &minus_one, buff_V + (i )*cs_V, rs_V ); } } } return n_iter_prev; }