libflame
revision_anchor
|
Go to the source code of this file.
FLA_Error FLA_Tridiag_UT | ( | FLA_Uplo | uplo, |
FLA_Obj | A, | ||
FLA_Obj | T | ||
) |
References FLA_Check_error_level(), FLA_Obj_is_double_precision(), FLA_Obj_row_stride(), FLA_Tridiag_UT_check(), and FLA_Tridiag_UT_internal().
Referenced by FLA_Hevd_lv_unb_var1(), and FLA_Hevd_lv_unb_var2().
{ FLA_Error r_val; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Tridiag_UT_check( uplo, A, T ); if ( FLA_Obj_row_stride( A ) == 1 && FLA_Obj_is_double_precision( A ) ) // Temporary fix not to use the fused version (numerically unstable). r_val = FLA_Tridiag_UT_internal( uplo, A, T, fla_tridiagut_cntl_plain ); else r_val = FLA_Tridiag_UT_internal( uplo, A, T, fla_tridiagut_cntl_nofus ); return r_val; }
FLA_Error FLA_Tridiag_UT_create_T | ( | FLA_Obj | A, |
FLA_Obj * | T | ||
) |
References FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_min_dim(), FLA_Obj_row_stride(), and FLA_Query_blocksize().
Referenced by FLA_Hevd_lv_unb_var1(), and FLA_Hevd_lv_unb_var2().
{ FLA_Datatype datatype; dim_t b_alg, k; dim_t rs_T, cs_T; // Query the datatype of A. datatype = FLA_Obj_datatype( A ); // Query the blocksize from the library. b_alg = FLA_Query_blocksize( datatype, FLA_DIMENSION_MIN ); // Scale the blocksize by a pre-set global constant. b_alg = ( dim_t )( ( ( double ) b_alg ) * FLA_TRIDIAG_INNER_TO_OUTER_B_RATIO ); // Query the minimum dimension of A. k = FLA_Obj_min_dim( A ); // Adjust the blocksize with respect to the min-dim of A. b_alg = min( b_alg, k ); // Figure out whether T should be row-major or column-major. if ( FLA_Obj_row_stride( A ) == 1 ) { rs_T = 1; cs_T = b_alg; } else // if ( FLA_Obj_col_stride( A ) == 1 ) { rs_T = k; cs_T = 1; } // Create a b_alg x k matrix to hold the block Householder transforms that // will be accumulated within the tridiagonal reduction algorithm. FLA_Obj_create( datatype, b_alg, k, rs_T, cs_T, T ); return FLA_SUCCESS; }
References FLA_Bidiag_UT_l_extract_diagonals(), FLA_Bidiag_UT_u_extract_diagonals(), FLA_Check_error_level(), and FLA_Tridiag_UT_extract_diagonals_check().
{ FLA_Error r_val = FLA_SUCCESS; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Tridiag_UT_extract_diagonals_check( uplo, A, d, e ); if ( uplo == FLA_LOWER_TRIANGULAR ) r_val = FLA_Bidiag_UT_l_extract_diagonals( A, d, e ); else r_val = FLA_Bidiag_UT_u_extract_diagonals( A, d, e ); return r_val; }
References FLA_Bidiag_UT_l_extract_real_diagonals(), FLA_Bidiag_UT_u_extract_real_diagonals(), FLA_Check_error_level(), and FLA_Tridiag_UT_extract_real_diagonals_check().
Referenced by FLA_Hevd_lv_unb_var1(), and FLA_Hevd_lv_unb_var2().
{ FLA_Error r_val = FLA_SUCCESS; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Tridiag_UT_extract_real_diagonals_check( uplo, A, d, e ); if ( uplo == FLA_LOWER_TRIANGULAR ) r_val = FLA_Bidiag_UT_l_extract_real_diagonals( A, d, e ); else r_val = FLA_Bidiag_UT_u_extract_real_diagonals( A, d, e ); return r_val; }
References FLA_Check_error_level(), FLA_Obj_is(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x2(), FLA_QR_UT_form_Q(), FLA_Set(), FLA_Tridiag_UT_form_Q_check(), FLA_Tridiag_UT_shift_U(), and FLA_ZERO.
Referenced by FLA_Hevd_lv_unb_var1(), and FLA_Hevd_lv_unb_var2().
{ FLA_Error r_val = FLA_SUCCESS; FLA_Obj ATL, ATR, ABL, ABR; FLA_Obj QTL, QTR, QBL, QBR; FLA_Obj TL, TR; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Tridiag_UT_form_Q_check( uplo, A, T, Q ); // Adjust T. FLA_Part_1x2( T, &TL, &TR, 1, FLA_RIGHT ); if ( FLA_Obj_is( A, Q ) == FALSE ) { FLA_Part_2x2( Q, &QTL, &QTR, &QBL, &QBR, 1, 1, FLA_TL ); FLA_Set( FLA_ONE, QTL ); FLA_Set( FLA_ZERO, QTR ); FLA_Set( FLA_ZERO, QBL ); if ( uplo == FLA_LOWER_TRIANGULAR ) { FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 1, 1, FLA_TR ); FLA_QR_UT_form_Q( ABL, TL, QBR ); } else // ( uplo == FLA_UPPER_TRIANGULAR ) { FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED ); } } else { // Shift the Householder vectors one row/column towards the diagonal. FLA_Tridiag_UT_shift_U( uplo, A ); FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 1, 1, FLA_TL ); if ( uplo == FLA_LOWER_TRIANGULAR ) { FLA_QR_UT_form_Q( ABR, TL, ABR ); } else // ( uplo == FLA_UPPER_TRIANGULAR ) { FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED ); } } return r_val; }
FLA_Error FLA_Tridiag_UT_form_Q_l_blk_var1 | ( | FLA_Obj | A, |
FLA_Obj | T, | ||
FLA_Obj | W | ||
) |
FLA_Error FLA_Tridiag_UT_form_Q_l_opc_var1 | ( | int | m_A, |
int | n_AT, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
scomplex * | buff_T, | ||
int | rs_T, | ||
int | cs_T | ||
) |
FLA_Error FLA_Tridiag_UT_form_Q_l_opd_var1 | ( | int | m_A, |
int | n_AT, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
double * | buff_T, | ||
int | rs_T, | ||
int | cs_T | ||
) |
FLA_Error FLA_Tridiag_UT_form_Q_l_ops_var1 | ( | int | m_A, |
int | n_AT, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
float * | buff_T, | ||
int | rs_T, | ||
int | cs_T | ||
) |
FLA_Error FLA_Tridiag_UT_form_Q_l_opz_var1 | ( | int | m_A, |
int | n_AT, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
dcomplex * | buff_T, | ||
int | rs_T, | ||
int | cs_T | ||
) |
FLA_Error FLA_Tridiag_UT_form_Q_u_blk_var1 | ( | FLA_Obj | A, |
FLA_Obj | T, | ||
FLA_Obj | W | ||
) |
FLA_Error FLA_Tridiag_UT_internal | ( | FLA_Uplo | uplo, |
FLA_Obj | A, | ||
FLA_Obj | T, | ||
fla_tridiagut_t * | cntl | ||
) |
References FLA_Check_error_level(), FLA_Tridiag_UT_internal_check(), and FLA_Tridiag_UT_l().
Referenced by FLA_Tridiag_UT().
{ FLA_Error r_val = FLA_SUCCESS; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Tridiag_UT_internal_check( uplo, A, T, cntl ); if ( uplo == FLA_LOWER_TRIANGULAR ) { r_val = FLA_Tridiag_UT_l( A, T, cntl ); } else // if ( uplo == FLA_UPPER_TRIANGULAR ) { FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED ); } return r_val; }
FLA_Error FLA_Tridiag_UT_l | ( | FLA_Obj | A, |
FLA_Obj | T, | ||
fla_tridiagut_t * | cntl | ||
) |
References FLA_Tridiag_UT_l_blf_var2(), FLA_Tridiag_UT_l_blf_var3(), FLA_Tridiag_UT_l_blk_var1(), FLA_Tridiag_UT_l_blk_var2(), FLA_Tridiag_UT_l_blk_var3(), FLA_Tridiag_UT_l_opt_var1(), FLA_Tridiag_UT_l_opt_var2(), FLA_Tridiag_UT_l_opt_var3(), FLA_Tridiag_UT_l_unb_var1(), FLA_Tridiag_UT_l_unb_var2(), and FLA_Tridiag_UT_l_unb_var3().
Referenced by FLA_Tridiag_UT_internal().
{ FLA_Error r_val = FLA_SUCCESS; if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT1 ) { r_val = FLA_Tridiag_UT_l_unb_var1( A, T ); } else if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT2 ) { r_val = FLA_Tridiag_UT_l_unb_var2( A, T ); } else if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT3 ) { r_val = FLA_Tridiag_UT_l_unb_var3( A, T ); } else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT1 ) { r_val = FLA_Tridiag_UT_l_opt_var1( A, T ); } else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT2 ) { r_val = FLA_Tridiag_UT_l_opt_var2( A, T ); } else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT3 ) { r_val = FLA_Tridiag_UT_l_opt_var3( A, T ); } else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT1 ) { r_val = FLA_Tridiag_UT_l_blk_var1( A, T ); } else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT2 ) { r_val = FLA_Tridiag_UT_l_blk_var2( A, T ); } else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT3 ) { r_val = FLA_Tridiag_UT_l_blk_var3( A, T ); } else if ( FLA_Cntl_variant( cntl ) == FLA_BLK_FUS_VARIANT2 ) { r_val = FLA_Tridiag_UT_l_blf_var2( A, T ); } else if ( FLA_Cntl_variant( cntl ) == FLA_BLK_FUS_VARIANT3 ) { r_val = FLA_Tridiag_UT_l_blf_var3( A, T ); } else { FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED ); } return r_val; }
References bl1_csetv(), bl1_dsetv(), bl1_ssetv(), bl1_zsetv(), BLIS1_CONJUGATE, FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_ONE.
Referenced by FLA_Tridiag_UT_realify().
{ FLA_Datatype datatype; int m_A; int rs_A, cs_A; int inc_d; int i; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_d = FLA_Obj_vector_inc( d ); switch ( datatype ) { case FLA_FLOAT: { float* buff_d = FLA_FLOAT_PTR( d ); float* buff_1 = FLA_FLOAT_PTR( FLA_ONE ); bl1_ssetv( m_A, buff_1, buff_d, inc_d ); break; } case FLA_DOUBLE: { double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE ); bl1_dsetv( m_A, buff_1, buff_d, inc_d ); break; } case FLA_COMPLEX: { scomplex* buff_A = FLA_COMPLEX_PTR( A ); scomplex* buff_d = FLA_COMPLEX_PTR( d ); scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE ); bl1_csetv( 1, buff_1, buff_d, inc_d ); for ( i = 1; i < m_A; ++i ) { scomplex* a10t_r = buff_A + (i-1)*cs_A + (i )*rs_A; scomplex* a21_t = buff_A + (i )*cs_A + (i+1)*rs_A; scomplex* delta1 = buff_d + (i )*inc_d; scomplex absv; scomplex conj_delta1; int m_ahead = m_A - i - 1; // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a10t_r, delta1 ); // FLA_Copyt( FLA_NO_TRANSPOSE, a10t_r, absv ); // FLA_Absolute_value( absv ); // FLA_Inv_scal( absv, delta1 ); bl1_ccopys( BLIS1_CONJUGATE, a10t_r, delta1 ); bl1_cabsval2( a10t_r, &absv ); bl1_cinvscals( &absv, delta1 ); // FLA_Copyt( FLA_NO_TRANSPOSE, absv, a10t_r ); // FLA_Scalc( FLA_CONJUGATE, delta1, a21_t ); *a10t_r = absv; if ( m_ahead > 0 ) { bl1_ccopyconj( delta1, &conj_delta1 ); bl1_cscals( &conj_delta1, a21_t ); } } break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex* buff_d = FLA_DOUBLE_COMPLEX_PTR( d ); dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE ); bl1_zsetv( 1, buff_1, buff_d, inc_d ); for ( i = 1; i < m_A; ++i ) { dcomplex* a10t_r = buff_A + (i-1)*cs_A + (i )*rs_A; dcomplex* a21_t = buff_A + (i )*cs_A + (i+1)*rs_A; dcomplex* delta1 = buff_d + (i )*inc_d; dcomplex absv; dcomplex conj_delta1; int m_ahead = m_A - i - 1; // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a10t_r, delta1 ); // FLA_Copyt( FLA_NO_TRANSPOSE, a10t_r, absv ); // FLA_Absolute_value( absv ); // FLA_Inv_scal( absv, delta1 ); bl1_zcopys( BLIS1_CONJUGATE, a10t_r, delta1 ); bl1_zabsval2( a10t_r, &absv ); bl1_zinvscals( &absv, delta1 ); // FLA_Copyt( FLA_NO_TRANSPOSE, absv, a10t_r ); // FLA_Scalc( FLA_CONJUGATE, delta1, a21_t ); *a10t_r = absv; if ( m_ahead > 0 ) { bl1_zcopyconj( delta1, &conj_delta1 ); bl1_zscals( &conj_delta1, a21_t ); } } break; } } return FLA_SUCCESS; }
References FLA_Absolute_value(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copyt(), FLA_Inv_scal(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_min_dim(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scalc(), and FLA_Set().
{ FLA_Obj ATL, ATR, A00, a01, A02, ABL, ABR, a10t, alpha11, a12t, A20, a21, A22; FLA_Obj dT, d0, dB, delta1, d2; FLA_Obj a10t_l, a10t_r; FLA_Obj a21_t, a21_b; FLA_Obj absv; FLA_Obj_create( FLA_Obj_datatype( A ), 1, 1, 0, 0, &absv ); FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 1, 1, FLA_TL ); FLA_Part_2x1( d, &dT, &dB, 1, FLA_TOP ); // Set first element of vector d to one. FLA_Set( FLA_ONE, dT ); while ( FLA_Obj_min_dim( ABR ) > 0 ) { FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02, /* ************* */ /* ************************** */ &a10t, /**/ &alpha11, &a12t, ABL, /**/ ABR, &A20, /**/ &a21, &A22, 1, 1, FLA_BR ); FLA_Repart_2x1_to_3x1( dT, &d0, /* ** */ /* ****** */ &delta1, dB, &d2, 1, FLA_BOTTOM ); /*------------------------------------------------------------*/ FLA_Part_1x2( a10t, &a10t_l, &a10t_r, 1, FLA_RIGHT ); FLA_Part_2x1( a21, &a21_t, &a21_b, 1, FLA_TOP ); // delta1 = conj(a10t_r) / abs(a10t_r); FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a10t_r, delta1 ); FLA_Copyt( FLA_NO_TRANSPOSE, a10t_r, absv ); FLA_Absolute_value( absv ); FLA_Inv_scal( absv, delta1 ); // a10t_r = delta1 * a10t_r; // = abs(a10t_r); // alpha11 = delta1 * alpha11 * conj(delta1); // = alpha11; // a21_t = a21_t * conj(delta1); FLA_Copyt( FLA_NO_TRANSPOSE, absv, a10t_r ); FLA_Scalc( FLA_CONJUGATE, delta1, a21_t ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02, a10t, alpha11, /**/ a12t, /* ************** */ /* ************************ */ &ABL, /**/ &ABR, A20, a21, /**/ A22, FLA_TL ); FLA_Cont_with_3x1_to_2x1( &dT, d0, delta1, /* ** */ /* ****** */ &dB, d2, FLA_TOP ); } FLA_Obj_free( &absv ); return FLA_SUCCESS; }
FLA_Error FLA_Tridiag_UT_realify | ( | FLA_Uplo | uplo, |
FLA_Obj | A, | ||
FLA_Obj | d | ||
) |
References FLA_Check_error_level(), FLA_Obj_is_real(), FLA_ONE, FLA_Set(), FLA_Tridiag_UT_l_realify_opt(), FLA_Tridiag_UT_realify_check(), and FLA_Tridiag_UT_u_realify_opt().
Referenced by FLA_Hevd_lv_unb_var1(), and FLA_Hevd_lv_unb_var2().
{ FLA_Error r_val = FLA_SUCCESS; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Tridiag_UT_realify_check( uplo, A, d ); if ( FLA_Obj_is_real( A ) ) { FLA_Set( FLA_ONE, d ); return FLA_SUCCESS; } if ( uplo == FLA_LOWER_TRIANGULAR ) //r_val = FLA_Tridiag_UT_l_realify_unb( A, d ); r_val = FLA_Tridiag_UT_l_realify_opt( A, d ); else //r_val = FLA_Tridiag_UT_u_realify_unb( A, d ); r_val = FLA_Tridiag_UT_u_realify_opt( A, d ); return r_val; }
References FLA_Check_error_level(), FLA_Tridiag_UT_realify_subdiagonal_check(), and FLA_Tridiag_UT_realify_subdiagonal_opt().
{ FLA_Error r_val = FLA_SUCCESS; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Tridiag_UT_realify_subdiagonal_check( b, d ); r_val = FLA_Tridiag_UT_realify_subdiagonal_opt( b, d ); return r_val; }
References bl1_csetv(), bl1_dsetv(), bl1_ssetv(), bl1_zsetv(), BLIS1_CONJUGATE, FLA_Obj_datatype(), FLA_Obj_vector_dim(), FLA_Obj_vector_inc(), FLA_ONE, scomplex::imag, and dcomplex::imag.
Referenced by FLA_Tridiag_UT_realify_subdiagonal().
{ FLA_Datatype datatype; int m, inc_b, inc_d; int i; datatype = FLA_Obj_datatype( d ); m = FLA_Obj_vector_dim( d ); inc_d = FLA_Obj_vector_inc( d ); inc_b = ( m > 1 ? FLA_Obj_vector_inc( b ) : 0 ); switch ( datatype ) { case FLA_FLOAT: { float* buff_d = FLA_FLOAT_PTR( d ); float* buff_1 = FLA_FLOAT_PTR( FLA_ONE ); bl1_ssetv( m, buff_1, buff_d, inc_d ); break; } case FLA_DOUBLE: { double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE ); bl1_dsetv( m, buff_1, buff_d, inc_d ); break; } case FLA_COMPLEX: { scomplex* buff_b = ( m > 1 ? FLA_COMPLEX_PTR( b ) : NULL ); scomplex* buff_d = FLA_COMPLEX_PTR( d ); scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE ); bl1_csetv( 1, buff_1, buff_d, inc_d ); for ( i = 1; i < m; ++i ) { scomplex* beta1 = buff_b + (i-1)*inc_b; scomplex* delta1 = buff_d + (i )*inc_d; scomplex absv; scomplex conj_delta1; if ( beta1->imag == 0.0F ) *delta1 = *buff_1; else { bl1_ccopys( BLIS1_CONJUGATE, beta1, delta1 ); bl1_cabsval2( beta1, &absv ); bl1_cinvscals( &absv, delta1 ); *beta1 = absv; } if ( i < ( m - 1 ) ) { scomplex* beta2 = buff_b + (i )*inc_b; bl1_ccopyconj( delta1, &conj_delta1 ); bl1_cscals( &conj_delta1, beta2 ); } } break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_b = ( m > 1 ? FLA_DOUBLE_COMPLEX_PTR( b ) : NULL ); dcomplex* buff_d = FLA_DOUBLE_COMPLEX_PTR( d ); dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE ); bl1_zsetv( 1, buff_1, buff_d, inc_d ); for ( i = 1; i < m; ++i ) { dcomplex* beta1 = buff_b + (i-1)*inc_b; dcomplex* delta1 = buff_d + (i )*inc_d; dcomplex absv; dcomplex conj_delta1; if ( beta1->imag == 0.0 ) *delta1 = *buff_1; else { bl1_zcopys( BLIS1_CONJUGATE, beta1, delta1 ); bl1_zabsval2( beta1, &absv ); bl1_zinvscals( &absv, delta1 ); *beta1 = absv; } if ( i < ( m - 1 ) ) { dcomplex* beta2 = buff_b + (i )*inc_b; bl1_zcopyconj( delta1, &conj_delta1 ); bl1_zscals( &conj_delta1, beta2 ); } } break; } } return FLA_SUCCESS; }
References FLA_Check_error_level(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Obj_length(), FLA_Part_1x2(), FLA_Part_2x1(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Tridiag_UT_recover_tau_check(), and FLA_Tridiag_UT_recover_tau_submatrix().
{ FLA_Obj TL, TR, T0, T1, T2; FLA_Obj tT, t0, tB, t1, t2; dim_t b_alg, b; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Tridiag_UT_recover_tau_check( T, t ); b_alg = FLA_Obj_length( T ); FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT ); FLA_Part_2x1( t, &tT, &tB, 0, FLA_TOP ); while ( FLA_Obj_length( tT ) < FLA_Obj_length( t ) ){ b = min( FLA_Obj_length( tB ), b_alg ); FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &T2, b, FLA_RIGHT ); FLA_Repart_2x1_to_3x1( tT, &t0, /* ** */ /* ** */ &t1, tB, &t2, b, FLA_BOTTOM ); /*------------------------------------------------------------*/ FLA_Tridiag_UT_recover_tau_submatrix( T1, t1 ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ T2, FLA_LEFT ); FLA_Cont_with_3x1_to_2x1( &tT, t0, t1, /* ** */ /* ** */ &tB, t2, FLA_TOP ); } return FLA_SUCCESS; }
FLA_Error FLA_Tridiag_UT_scale_diagonals | ( | FLA_Uplo | uplo, |
FLA_Obj | alpha, | ||
FLA_Obj | A | ||
) |
References FLA_Bidiag_UT_l_scale_diagonals(), FLA_Bidiag_UT_u_scale_diagonals(), FLA_Check_error_level(), and FLA_Tridiag_UT_scale_diagonals_check().
{ FLA_Error r_val = FLA_SUCCESS; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Tridiag_UT_scale_diagonals_check( uplo, alpha, A ); if ( uplo == FLA_LOWER_TRIANGULAR ) r_val = FLA_Bidiag_UT_l_scale_diagonals( alpha, A ); else r_val = FLA_Bidiag_UT_u_scale_diagonals( alpha, A ); return r_val; }
FLA_Error FLA_Tridiag_UT_shift_U | ( | FLA_Uplo | uplo, |
FLA_Obj | A | ||
) |
References FLA_Check_error_level(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), FLA_Tridiag_UT_shift_U_check(), FLA_Tridiag_UT_shift_U_l_opc(), FLA_Tridiag_UT_shift_U_l_opd(), FLA_Tridiag_UT_shift_U_l_ops(), and FLA_Tridiag_UT_shift_U_l_opz().
Referenced by FLA_Tridiag_UT_form_Q().
{ FLA_Datatype datatype; int m_A; int rs_A, cs_A; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Tridiag_UT_shift_U_check( uplo, A ); datatype = FLA_Obj_datatype( A ); // Play with swapping of cs rs; we do not need "u" version. if ( uplo == FLA_LOWER_TRIANGULAR ) { m_A = FLA_Obj_length( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); } else { m_A = FLA_Obj_width( A ); cs_A = FLA_Obj_row_stride( A ); rs_A = FLA_Obj_col_stride( A ); } switch ( datatype ) { case FLA_FLOAT: { float* buff_A = ( float* ) FLA_FLOAT_PTR( A ); FLA_Tridiag_UT_shift_U_l_ops( m_A, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { double* buff_A = ( double* ) FLA_DOUBLE_PTR( A ); FLA_Tridiag_UT_shift_U_l_opd( m_A, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A ); FLA_Tridiag_UT_shift_U_l_opc( m_A, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A ); FLA_Tridiag_UT_shift_U_l_opz( m_A, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Tridiag_UT_shift_U_l_opc | ( | int | m_A, |
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bl1_c0(), bl1_c1(), bl1_ccopyv(), bl1_csetv(), and BLIS1_NO_CONJUGATE.
Referenced by FLA_Tridiag_UT_shift_U().
{ scomplex* a00 = buff_A; scomplex* a10 = buff_A + rs_A; scomplex zero = bl1_c0(); scomplex one = bl1_c1(); int j; for ( j = m_A - 1; j > 0; --j ) { scomplex* alpha01 = buff_A + (j )*cs_A + (0 )*rs_A; scomplex* alpha11 = buff_A + (j )*cs_A + (j )*rs_A; scomplex* a20 = buff_A + (j-1)*cs_A + (j+1)*rs_A; scomplex* a21 = buff_A + (j )*cs_A + (j+1)*rs_A; int m_ahead = m_A - j - 1; *alpha01 = zero; *alpha11 = one; bl1_ccopyv( BLIS1_NO_CONJUGATE, m_ahead, a20, rs_A, a21, rs_A ); } *a00 = one; bl1_csetv( m_A - 1, &zero, a10, rs_A ); return FLA_SUCCESS; }
FLA_Error FLA_Tridiag_UT_shift_U_l_opd | ( | int | m_A, |
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bl1_d0(), bl1_d1(), bl1_dcopyv(), bl1_dsetv(), and BLIS1_NO_CONJUGATE.
Referenced by FLA_Tridiag_UT_shift_U().
{ double* a00 = buff_A; double* a10 = buff_A + rs_A; double zero = bl1_d0(); double one = bl1_d1(); int j; for ( j = m_A - 1; j > 0; --j ) { double* alpha01 = buff_A + (j )*cs_A + (0 )*rs_A; double* alpha11 = buff_A + (j )*cs_A + (j )*rs_A; double* a20 = buff_A + (j-1)*cs_A + (j+1)*rs_A; double* a21 = buff_A + (j )*cs_A + (j+1)*rs_A; int m_ahead = m_A - j - 1; *alpha01 = zero; *alpha11 = one; bl1_dcopyv( BLIS1_NO_CONJUGATE, m_ahead, a20, rs_A, a21, rs_A ); } *a00 = one; bl1_dsetv( m_A - 1, &zero, a10, rs_A ); return FLA_SUCCESS; }
FLA_Error FLA_Tridiag_UT_shift_U_l_ops | ( | int | m_A, |
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bl1_s0(), bl1_s1(), bl1_scopyv(), bl1_ssetv(), and BLIS1_NO_CONJUGATE.
Referenced by FLA_Tridiag_UT_shift_U().
{ float* a00 = buff_A; float* a10 = buff_A + rs_A; float zero = bl1_s0(); float one = bl1_s1(); int j; for ( j = m_A - 1; j > 0; --j ) { float* alpha01 = buff_A + (j )*cs_A + (0 )*rs_A; float* alpha11 = buff_A + (j )*cs_A + (j )*rs_A; float* a20 = buff_A + (j-1)*cs_A + (j+1)*rs_A; float* a21 = buff_A + (j )*cs_A + (j+1)*rs_A; int m_ahead = m_A - j - 1; *alpha01 = zero; *alpha11 = one; bl1_scopyv( BLIS1_NO_CONJUGATE, m_ahead, a20, rs_A, a21, rs_A ); } *a00 = one; bl1_ssetv( m_A - 1, &zero, a10, rs_A ); return FLA_SUCCESS; }
FLA_Error FLA_Tridiag_UT_shift_U_l_opz | ( | int | m_A, |
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bl1_z0(), bl1_z1(), bl1_zcopyv(), bl1_zsetv(), and BLIS1_NO_CONJUGATE.
Referenced by FLA_Tridiag_UT_shift_U().
{ dcomplex* a00 = buff_A; dcomplex* a10 = buff_A + rs_A; dcomplex zero = bl1_z0(); dcomplex one = bl1_z1(); int j; for ( j = m_A - 1; j > 0; --j ) { dcomplex* alpha01 = buff_A + (j )*cs_A + (0 )*rs_A; dcomplex* alpha11 = buff_A + (j )*cs_A + (j )*rs_A; dcomplex* a20 = buff_A + (j-1)*cs_A + (j+1)*rs_A; dcomplex* a21 = buff_A + (j )*cs_A + (j+1)*rs_A; int m_ahead = m_A - j - 1; *alpha01 = zero; *alpha11 = one; bl1_zcopyv( BLIS1_NO_CONJUGATE, m_ahead, a20, rs_A, a21, rs_A ); } *a00 = one; bl1_zsetv( m_A - 1, &zero, a10, rs_A ); return FLA_SUCCESS; }
FLA_Error FLA_Tridiag_UT_shift_U_u_opc | ( | int | m_A, |
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Tridiag_UT_shift_U_u_opd | ( | int | m_A, |
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Tridiag_UT_shift_U_u_ops | ( | int | m_A, |
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Tridiag_UT_shift_U_u_opz | ( | int | m_A, |
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Tridiag_UT_u | ( | FLA_Obj | A, |
FLA_Obj | T, | ||
fla_tridiagut_t * | cntl | ||
) |
{ FLA_Error r_val = FLA_SUCCESS; /* if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT1 ) { r_val = FLA_Tridiag_UT_u_unb_var1( A, T ); } else if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT2 ) { r_val = FLA_Tridiag_UT_u_unb_var2( A, T ); } else if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT3 ) { r_val = FLA_Tridiag_UT_u_unb_var3( A, T ); } else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT1 ) { r_val = FLA_Tridiag_UT_u_opt_var1( A, T ); } else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT2 ) { r_val = FLA_Tridiag_UT_u_opt_var2( A, T ); } else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT3 ) { r_val = FLA_Tridiag_UT_u_opt_var3( A, T ); } else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT1 ) { r_val = FLA_Tridiag_UT_u_blk_var1( A, T ); } else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT2 ) { r_val = FLA_Tridiag_UT_u_blk_var2( A, T ); } else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT3 ) { r_val = FLA_Tridiag_UT_u_blk_var3( A, T ); } else */ { FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED ); } return r_val; }
References bl1_csetv(), bl1_dsetv(), bl1_ssetv(), bl1_zsetv(), BLIS1_CONJUGATE, FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_ONE.
Referenced by FLA_Tridiag_UT_realify().
{ FLA_Datatype datatype; int m_A; int rs_A, cs_A; int inc_d; int i; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_d = FLA_Obj_vector_inc( d ); switch ( datatype ) { case FLA_FLOAT: { float* buff_d = FLA_FLOAT_PTR( d ); float* buff_1 = FLA_FLOAT_PTR( FLA_ONE ); bl1_ssetv( m_A, buff_1, buff_d, inc_d ); break; } case FLA_DOUBLE: { double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE ); bl1_dsetv( m_A, buff_1, buff_d, inc_d ); break; } case FLA_COMPLEX: { scomplex* buff_A = FLA_COMPLEX_PTR( A ); scomplex* buff_d = FLA_COMPLEX_PTR( d ); scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE ); bl1_csetv( 1, buff_1, buff_d, inc_d ); for ( i = 1; i < m_A; ++i ) { scomplex* a01_b = buff_A + (i )*cs_A + (i-1)*rs_A; scomplex* a12t_l = buff_A + (i+1)*cs_A + (i )*rs_A; scomplex* delta1 = buff_d + (i )*inc_d; scomplex absv; scomplex conj_delta1; int m_ahead = m_A - i - 1; // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a01_b, delta1 ); // FLA_Copyt( FLA_NO_TRANSPOSE, a01_b, absv ); // FLA_Absolute_value( absv ); // FLA_Inv_scal( absv, delta1 ); bl1_ccopys( BLIS1_CONJUGATE, a01_b, delta1 ); bl1_cabsval2( a01_b, &absv ); bl1_cinvscals( &absv, delta1 ); // FLA_Copyt( FLA_NO_TRANSPOSE, absv, a01_b ); // FLA_Scalc( FLA_CONJUGATE, delta1, a12t_l ); *a01_b = absv; if ( m_ahead > 0 ) { bl1_ccopyconj( delta1, &conj_delta1 ); bl1_cscals( &conj_delta1, a12t_l ); } } break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex* buff_d = FLA_DOUBLE_COMPLEX_PTR( d ); dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE ); bl1_zsetv( 1, buff_1, buff_d, inc_d ); for ( i = 1; i < m_A; ++i ) { dcomplex* a01_b = buff_A + (i )*cs_A + (i-1)*rs_A; dcomplex* a12t_l = buff_A + (i+1)*cs_A + (i )*rs_A; dcomplex* delta1 = buff_d + (i )*inc_d; dcomplex absv; dcomplex conj_delta1; int m_ahead = m_A - i - 1; // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a01_b, delta1 ); // FLA_Copyt( FLA_NO_TRANSPOSE, a01_b, absv ); // FLA_Absolute_value( absv ); // FLA_Inv_scal( absv, delta1 ); bl1_zcopys( BLIS1_CONJUGATE, a01_b, delta1 ); bl1_zabsval2( a01_b, &absv ); bl1_zinvscals( &absv, delta1 ); // FLA_Copyt( FLA_NO_TRANSPOSE, absv, a01_b ); // FLA_Scalc( FLA_CONJUGATE, delta1, a12t_l ); *a01_b = absv; if ( m_ahead > 0 ) { bl1_zcopyconj( delta1, &conj_delta1 ); bl1_zscals( &conj_delta1, a12t_l ); } } break; } } return FLA_SUCCESS; }
References FLA_Absolute_value(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copyt(), FLA_Inv_scal(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_min_dim(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scalc(), and FLA_Set().
{ FLA_Obj ATL, ATR, A00, a01, A02, ABL, ABR, a10t, alpha11, a12t, A20, a21, A22; FLA_Obj dT, d0, dB, delta1, d2; FLA_Obj a01_t, a01_b; FLA_Obj a12t_l, a12t_r; FLA_Obj absv; FLA_Obj_create( FLA_Obj_datatype( A ), 1, 1, 0, 0, &absv ); FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 1, 1, FLA_TL ); FLA_Part_2x1( d, &dT, &dB, 1, FLA_TOP ); // Set first element of vector d to one. FLA_Set( FLA_ONE, dT ); while ( FLA_Obj_min_dim( ABR ) > 0 ) { FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02, /* ************* */ /* ************************** */ &a10t, /**/ &alpha11, &a12t, ABL, /**/ ABR, &A20, /**/ &a21, &A22, 1, 1, FLA_BR ); FLA_Repart_2x1_to_3x1( dT, &d0, /* ** */ /* ****** */ &delta1, dB, &d2, 1, FLA_BOTTOM ); /*------------------------------------------------------------*/ FLA_Part_2x1( a01, &a01_t, &a01_b, 1, FLA_BOTTOM ); FLA_Part_1x2( a12t, &a12t_l, &a12t_r, 1, FLA_LEFT ); // delta1 = conj(a01_b) / abs(a01_b); FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a01_b, delta1 ); FLA_Copyt( FLA_NO_TRANSPOSE, a01_b, absv ); FLA_Absolute_value( absv ); FLA_Inv_scal( absv, delta1 ); // a01_b = delta1 * a01_b; // = abs(a01_b); // alpha11 = delta1 * alpha11 * conj(delta1); // = alpha11; // a12t_l = a12t_l * conj(delta1); FLA_Copyt( FLA_NO_TRANSPOSE, absv, a01_b ); FLA_Scalc( FLA_CONJUGATE, delta1, a12t_l ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02, a10t, alpha11, /**/ a12t, /* ************** */ /* ************************ */ &ABL, /**/ &ABR, A20, a21, /**/ A22, FLA_TL ); FLA_Cont_with_3x1_to_2x1( &dT, d0, delta1, /* ** */ /* ****** */ &dB, d2, FLA_TOP ); } FLA_Obj_free( &absv ); return FLA_SUCCESS; }