libflame
revision_anchor
|
Functions | |
FLA_Error | FLA_SA_LU_unb (FLA_Obj U, FLA_Obj D, FLA_Obj p, FLA_Obj L) |
References bl1_camax(), bl1_ccopy(), bl1_cger(), bl1_cscal(), bl1_cswap(), bl1_damax(), bl1_dcopy(), bl1_dger(), bl1_dscal(), bl1_dswap(), bl1_samax(), bl1_scopy(), bl1_sger(), bl1_sscal(), bl1_sswap(), bl1_zamax(), bl1_zcopy(), bl1_zger(), bl1_zscal(), bl1_zswap(), BLIS1_NO_CONJUGATE, FLA_Copy_external(), FLA_MINUS_ONE, FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Triangularize(), scomplex::imag, dcomplex::imag, scomplex::real, and dcomplex::real.
Referenced by FLA_SA_LU_blk().
{ FLA_Datatype datatype; int m_U, cs_U; int m_D, cs_D; int cs_L; // int rs_U; int rs_D; // int rs_L; int m_U_min_j, m_U_min_j_min_1; int j, ipiv; int* buff_p; if ( FLA_Obj_has_zero_dim( U ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( U ); m_U = FLA_Obj_length( U ); // rs_U = FLA_Obj_row_stride( U ); cs_U = FLA_Obj_col_stride( U ); m_D = FLA_Obj_length( D ); rs_D = FLA_Obj_row_stride( D ); cs_D = FLA_Obj_col_stride( D ); // rs_L = FLA_Obj_row_stride( L ); cs_L = FLA_Obj_col_stride( L ); FLA_Copy_external( U, L ); FLA_Triangularize( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, L ); buff_p = ( int * ) FLA_INT_PTR( p ); switch ( datatype ){ case FLA_FLOAT: { float* buff_U = ( float * ) FLA_FLOAT_PTR( U ); float* buff_D = ( float * ) FLA_FLOAT_PTR( D ); float* buff_L = ( float * ) FLA_FLOAT_PTR( L ); float* buff_minus1 = ( float * ) FLA_FLOAT_PTR( FLA_MINUS_ONE ); float L_tmp; float D_tmp; float d_inv_Ljj; for ( j = 0; j < m_U; ++j ) { bl1_samax( m_D, buff_D + j*cs_D + 0*rs_D, rs_D, &ipiv ); L_tmp = buff_L[ j*cs_L + j ]; D_tmp = buff_D[ j*cs_D + ipiv ]; if ( fabsf( L_tmp ) < fabsf( D_tmp ) ) { bl1_sswap( m_U, buff_L + 0*cs_L + j, cs_L, buff_D + 0*cs_D + ipiv, cs_D ); buff_p[ j ] = ipiv + m_U - j; } else { buff_p[ j ] = 0; } d_inv_Ljj = 1.0F / buff_L[ j*cs_L + j ]; bl1_sscal( m_D, &d_inv_Ljj, buff_D + j*cs_D + 0, rs_D ); m_U_min_j_min_1 = m_U - j - 1; if ( m_U_min_j_min_1 > 0 ) { bl1_sger( BLIS1_NO_CONJUGATE, BLIS1_NO_CONJUGATE, m_D, m_U_min_j_min_1, buff_minus1, buff_D + (j+0)*cs_D + 0, rs_D, buff_L + (j+1)*cs_L + j, cs_L, buff_D + (j+1)*cs_D + 0, rs_D, cs_D ); } m_U_min_j = m_U - j; if ( m_U_min_j > 0 ) { bl1_scopy( m_U_min_j, buff_L + j*cs_L + j, cs_L, buff_U + j*cs_U + j, cs_U ); } } break; } case FLA_DOUBLE: { double* buff_U = ( double * ) FLA_DOUBLE_PTR( U ); double* buff_D = ( double * ) FLA_DOUBLE_PTR( D ); double* buff_L = ( double * ) FLA_DOUBLE_PTR( L ); double* buff_minus1 = ( double * ) FLA_DOUBLE_PTR( FLA_MINUS_ONE ); double L_tmp; double D_tmp; double d_inv_Ljj; for ( j = 0; j < m_U; ++j ) { bl1_damax( m_D, buff_D + j*cs_D + 0*rs_D, rs_D, &ipiv ); L_tmp = buff_L[ j*cs_L + j ]; D_tmp = buff_D[ j*cs_D + ipiv ]; if ( fabs( L_tmp ) < fabs( D_tmp ) ) { bl1_dswap( m_U, buff_L + 0*cs_L + j, cs_L, buff_D + 0*cs_D + ipiv, cs_D ); buff_p[ j ] = ipiv + m_U - j; } else { buff_p[ j ] = 0; } d_inv_Ljj = 1.0 / buff_L[ j*cs_L + j ]; bl1_dscal( m_D, &d_inv_Ljj, buff_D + j*cs_D + 0, rs_D ); m_U_min_j_min_1 = m_U - j - 1; if ( m_U_min_j_min_1 > 0 ) { bl1_dger( BLIS1_NO_CONJUGATE, BLIS1_NO_CONJUGATE, m_D, m_U_min_j_min_1, buff_minus1, buff_D + (j+0)*cs_D + 0, rs_D, buff_L + (j+1)*cs_L + j, cs_L, buff_D + (j+1)*cs_D + 0, rs_D, cs_D ); } m_U_min_j = m_U - j; if ( m_U_min_j > 0 ) { bl1_dcopy( m_U_min_j, buff_L + j*cs_L + j, cs_L, buff_U + j*cs_U + j, cs_U ); } } break; } case FLA_COMPLEX: { scomplex* buff_U = ( scomplex * ) FLA_COMPLEX_PTR( U ); scomplex* buff_D = ( scomplex * ) FLA_COMPLEX_PTR( D ); scomplex* buff_L = ( scomplex * ) FLA_COMPLEX_PTR( L ); scomplex* buff_minus1 = ( scomplex * ) FLA_COMPLEX_PTR( FLA_MINUS_ONE ); scomplex L_tmp; scomplex D_tmp; scomplex d_inv_Ljj; scomplex Ljj; float temp; for ( j = 0; j < m_U; ++j ) { bl1_camax( m_D, buff_D + j*cs_D + 0*rs_D, rs_D, &ipiv ); L_tmp = buff_L[ j*cs_L + j ]; D_tmp = buff_D[ j*cs_D + ipiv ]; if ( fabsf( L_tmp.real + L_tmp.imag ) < fabsf( D_tmp.real + D_tmp.imag ) ) { bl1_cswap( m_U, buff_L + 0*cs_L + j, cs_L, buff_D + 0*cs_D + ipiv, cs_D ); buff_p[ j ] = ipiv + m_U - j; } else { buff_p[ j ] = 0; } Ljj = buff_L[ j*cs_L + j ]; // d_inv_Ljj = 1.0 / Ljj temp = 1.0F / ( Ljj.real * Ljj.real + Ljj.imag * Ljj.imag ); d_inv_Ljj.real = Ljj.real * temp; d_inv_Ljj.imag = Ljj.imag * -temp; bl1_cscal( m_D, &d_inv_Ljj, buff_D + j*cs_D + 0, rs_D ); m_U_min_j_min_1 = m_U - j - 1; if ( m_U_min_j_min_1 > 0 ) { bl1_cger( BLIS1_NO_CONJUGATE, BLIS1_NO_CONJUGATE, m_D, m_U_min_j_min_1, buff_minus1, buff_D + (j+0)*cs_D + 0, rs_D, buff_L + (j+1)*cs_L + j, cs_L, buff_D + (j+1)*cs_D + 0, rs_D, cs_D ); } m_U_min_j = m_U - j; if ( m_U_min_j > 0 ) { bl1_ccopy( m_U_min_j, buff_L + j*cs_L + j, cs_L, buff_U + j*cs_U + j, cs_U ); } } break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_U = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( U ); dcomplex* buff_D = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( D ); dcomplex* buff_L = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( L ); dcomplex* buff_minus1 = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE ); dcomplex L_tmp; dcomplex D_tmp; dcomplex d_inv_Ljj; dcomplex Ljj; double temp; for ( j = 0; j < m_U; ++j ) { bl1_zamax( m_D, buff_D + j*cs_D + 0*rs_D, rs_D, &ipiv ); L_tmp = buff_L[ j*cs_L + j ]; D_tmp = buff_D[ j*cs_D + ipiv ]; if ( fabs( L_tmp.real + L_tmp.imag ) < fabs( D_tmp.real + D_tmp.imag ) ) { bl1_zswap( m_U, buff_L + 0*cs_L + j, cs_L, buff_D + 0*cs_D + ipiv, cs_D ); buff_p[ j ] = ipiv + m_U - j; } else { buff_p[ j ] = 0; } Ljj = buff_L[ j*cs_L + j ]; // d_inv_Ljj = 1.0 / Ljj temp = 1.0 / ( Ljj.real * Ljj.real + Ljj.imag * Ljj.imag ); d_inv_Ljj.real = Ljj.real * temp; d_inv_Ljj.imag = Ljj.imag * -temp; bl1_zscal( m_D, &d_inv_Ljj, buff_D + j*cs_D + 0, rs_D ); m_U_min_j_min_1 = m_U - j - 1; if ( m_U_min_j_min_1 > 0 ) { bl1_zger( BLIS1_NO_CONJUGATE, BLIS1_NO_CONJUGATE, m_D, m_U_min_j_min_1, buff_minus1, buff_D + (j+0)*cs_D + 0, rs_D, buff_L + (j+1)*cs_L + j, cs_L, buff_D + (j+1)*cs_D + 0, rs_D, cs_D ); } m_U_min_j = m_U - j; if ( m_U_min_j > 0 ) { bl1_zcopy( m_U_min_j, buff_L + j*cs_L + j, cs_L, buff_U + j*cs_U + j, cs_U ); } } break; } } return FLA_SUCCESS; }