libflame  revision_anchor
Functions
FLA_Bidiag_UT_u.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_Bidiag_UT_u_unb_var1 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
FLA_Error FLA_Bidiag_UT_u_blk_var1 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
FLA_Error FLA_Bidiag_UT_u_step_unb_var1 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
FLA_Error FLA_Bidiag_UT_u_unb_var2 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
FLA_Error FLA_Bidiag_UT_u_blk_var2 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
FLA_Error FLA_Bidiag_UT_u_blf_var2 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
FLA_Error FLA_Bidiag_UT_u_step_unb_var2 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
FLA_Error FLA_Bidiag_UT_u_unb_var3 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
FLA_Error FLA_Bidiag_UT_u_blk_var3 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
FLA_Error FLA_Bidiag_UT_u_blf_var3 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
FLA_Error FLA_Bidiag_UT_u_step_unb_var3 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
FLA_Error FLA_Bidiag_UT_u_unb_var4 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
FLA_Error FLA_Bidiag_UT_u_blk_var4 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
FLA_Error FLA_Bidiag_UT_u_blf_var4 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
FLA_Error FLA_Bidiag_UT_u_step_unb_var4 (FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj TU, FLA_Obj TV)
FLA_Error FLA_Bidiag_UT_u_unb_var5 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
FLA_Error FLA_Bidiag_UT_u_blk_var5 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
FLA_Error FLA_Bidiag_UT_u_step_unb_var5 (FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj TU, FLA_Obj TV)
FLA_Error FLA_Bidiag_UT_u_opt_var1 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
FLA_Error FLA_Bidiag_UT_u_step_opt_var1 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
FLA_Error FLA_Bidiag_UT_u_step_ops_var1 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
FLA_Error FLA_Bidiag_UT_u_step_opd_var1 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
FLA_Error FLA_Bidiag_UT_u_step_opc_var1 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
FLA_Error FLA_Bidiag_UT_u_step_opz_var1 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
FLA_Error FLA_Bidiag_UT_u_opt_var2 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
FLA_Error FLA_Bidiag_UT_u_step_opt_var2 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
FLA_Error FLA_Bidiag_UT_u_step_ops_var2 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
FLA_Error FLA_Bidiag_UT_u_step_opd_var2 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
FLA_Error FLA_Bidiag_UT_u_step_opc_var2 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
FLA_Error FLA_Bidiag_UT_u_step_opz_var2 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
FLA_Error FLA_Bidiag_UT_u_opt_var3 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
FLA_Error FLA_Bidiag_UT_u_step_opt_var3 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
FLA_Error FLA_Bidiag_UT_u_step_ops_var3 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
FLA_Error FLA_Bidiag_UT_u_step_opd_var3 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
FLA_Error FLA_Bidiag_UT_u_step_opc_var3 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
FLA_Error FLA_Bidiag_UT_u_step_opz_var3 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
FLA_Error FLA_Bidiag_UT_u_opt_var4 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
FLA_Error FLA_Bidiag_UT_u_step_opt_var4 (FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
FLA_Error FLA_Bidiag_UT_u_step_ops_var4 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
FLA_Error FLA_Bidiag_UT_u_step_opd_var4 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
FLA_Error FLA_Bidiag_UT_u_step_opc_var4 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
FLA_Error FLA_Bidiag_UT_u_step_opz_var4 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
FLA_Error FLA_Bidiag_UT_u_opt_var5 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
FLA_Error FLA_Bidiag_UT_u_step_opt_var5 (FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
FLA_Error FLA_Bidiag_UT_u_step_ops_var5 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
FLA_Error FLA_Bidiag_UT_u_step_opd_var5 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
FLA_Error FLA_Bidiag_UT_u_step_opc_var5 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
FLA_Error FLA_Bidiag_UT_u_step_opz_var5 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
FLA_Error FLA_Bidiag_UT_u_ofu_var2 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
FLA_Error FLA_Bidiag_UT_u_step_ofu_var2 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
FLA_Error FLA_Bidiag_UT_u_step_ofs_var2 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
FLA_Error FLA_Bidiag_UT_u_step_ofd_var2 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
FLA_Error FLA_Bidiag_UT_u_step_ofc_var2 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
FLA_Error FLA_Bidiag_UT_u_step_ofz_var2 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
FLA_Error FLA_Bidiag_UT_u_ofu_var3 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
FLA_Error FLA_Bidiag_UT_u_step_ofu_var3 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
FLA_Error FLA_Bidiag_UT_u_step_ofs_var3 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
FLA_Error FLA_Bidiag_UT_u_step_ofd_var3 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
FLA_Error FLA_Bidiag_UT_u_step_ofc_var3 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
FLA_Error FLA_Bidiag_UT_u_step_ofz_var3 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
FLA_Error FLA_Bidiag_UT_u_ofu_var4 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
FLA_Error FLA_Bidiag_UT_u_step_ofu_var4 (FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
FLA_Error FLA_Bidiag_UT_u_step_ofs_var4 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
FLA_Error FLA_Bidiag_UT_u_step_ofd_var4 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
FLA_Error FLA_Bidiag_UT_u_step_ofc_var4 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
FLA_Error FLA_Bidiag_UT_u_step_ofz_var4 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
FLA_Error FLA_Fused_Gerc2_opt_var1 (FLA_Obj alpha, FLA_Obj u, FLA_Obj y, FLA_Obj z, FLA_Obj v, FLA_Obj A)
FLA_Error FLA_Fused_Gerc2_ops_var1 (int m_A, int n_A, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_v, int inc_v, float *buff_A, int rs_A, int cs_A)
FLA_Error FLA_Fused_Gerc2_opd_var1 (int m_A, int n_A, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_v, int inc_v, double *buff_A, int rs_A, int cs_A)
FLA_Error FLA_Fused_Gerc2_opc_var1 (int m_A, int n_A, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_v, int inc_v, scomplex *buff_A, int rs_A, int cs_A)
FLA_Error FLA_Fused_Gerc2_opz_var1 (int m_A, int n_A, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_v, int inc_v, dcomplex *buff_A, int rs_A, int cs_A)
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opt_var1 (FLA_Obj A, FLA_Obj u, FLA_Obj tau, FLA_Obj a, FLA_Obj beta, FLA_Obj y, FLA_Obj w)
FLA_Error FLA_Fused_Ahx_Axpy_Ax_ops_var1 (int m_A, int n_A, float *buff_tau, float *buff_beta, float *buff_A, int rs_A, int cs_A, float *buff_u, int inc_u, float *buff_a, int inc_a, float *buff_y, int inc_y, float *buff_w, int inc_w)
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opd_var1 (int m_A, int n_A, double *buff_tau, double *buff_beta, double *buff_A, int rs_A, int cs_A, double *buff_u, int inc_u, double *buff_a, int inc_a, double *buff_y, int inc_y, double *buff_w, int inc_w)
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opc_var1 (int m_A, int n_A, scomplex *buff_tau, scomplex *buff_beta, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_u, int inc_u, scomplex *buff_a, int inc_a, scomplex *buff_y, int inc_y, scomplex *buff_w, int inc_w)
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opz_var1 (int m_A, int n_A, dcomplex *buff_tau, dcomplex *buff_beta, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_u, int inc_u, dcomplex *buff_a, int inc_a, dcomplex *buff_y, int inc_y, dcomplex *buff_w, int inc_w)
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1 (FLA_Obj alpha, FLA_Obj tau, FLA_Obj u, FLA_Obj y, FLA_Obj z, FLA_Obj v, FLA_Obj A, FLA_Obj up, FLA_Obj a, FLA_Obj w)
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1 (int m_A, int n_A, float *buff_tau, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_v, int inc_v, float *buff_A, int rs_A, int cs_A, float *buff_up, int inc_up, float *buff_a, int inc_a, float *buff_w, int inc_w)
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1 (int m_A, int n_A, double *buff_tau, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_v, int inc_v, double *buff_A, int rs_A, int cs_A, double *buff_up, int inc_up, double *buff_a, int inc_a, double *buff_w, int inc_w)
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1 (int m_A, int n_A, scomplex *buff_tau, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_v, int inc_v, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_up, int inc_up, scomplex *buff_a, int inc_a, scomplex *buff_w, int inc_w)
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1 (int m_A, int n_A, dcomplex *buff_tau, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_v, int inc_v, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_up, int inc_up, dcomplex *buff_a, int inc_a, dcomplex *buff_w, int inc_w)
FLA_Error FLA_Fused_UYx_ZVx_opt_var1 (FLA_Obj delta, FLA_Obj a, FLA_Obj U, FLA_Obj Y, FLA_Obj Z, FLA_Obj V, FLA_Obj A, FLA_Obj temp, FLA_Obj t, FLA_Obj w, FLA_Obj al)
FLA_Error FLA_Fused_UYx_ZVx_ops_var1 (int m_U, int n_U, int m_V, int n_V, float *buff_delta, float *buff_U, int rs_U, int cs_U, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_V, int rs_V, int cs_V, float *buff_A, int rs_A, int cs_A, float *buff_temp, int inc_temp, float *buff_t, int inc_t, float *buff_a, int inc_a, float *buff_w, int inc_w, float *buff_al, int inc_al)
FLA_Error FLA_Fused_UYx_ZVx_opd_var1 (int m_U, int n_U, int m_V, int n_V, double *buff_delta, double *buff_U, int rs_U, int cs_U, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_V, int rs_V, int cs_V, double *buff_A, int rs_A, int cs_A, double *buff_temp, int inc_temp, double *buff_t, int inc_t, double *buff_a, int inc_a, double *buff_w, int inc_w, double *buff_al, int inc_al)
FLA_Error FLA_Fused_UYx_ZVx_opc_var1 (int m_U, int n_U, int m_V, int n_V, scomplex *buff_delta, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_V, int rs_V, int cs_V, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_temp, int inc_temp, scomplex *buff_t, int inc_t, scomplex *buff_a, int inc_a, scomplex *buff_w, int inc_w, scomplex *buff_al, int inc_al)
FLA_Error FLA_Fused_UYx_ZVx_opz_var1 (int m_U, int n_U, int m_V, int n_V, dcomplex *buff_delta, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_V, int rs_V, int cs_V, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_temp, int inc_temp, dcomplex *buff_t, int inc_t, dcomplex *buff_a, int inc_a, dcomplex *buff_w, int inc_w, dcomplex *buff_al, int inc_al)

Function Documentation

References FLA_Bidiag_UT_u_step_ofu_var2(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), and FLA_Repart_2x2_to_3x3().

Referenced by FLA_Bidiag_UT_u().

{
  FLA_Obj  ATL,   ATR,      A00, A01, A02, 
           ABL,   ABR,      A10, A11, A12,
                            A20, A21, A22;
  FLA_Obj  TUL,   TUR,      TU0, TU1, TU2; 
  FLA_Obj  TVL,   TVR,      TV0, TV1, TV2; 

  FLA_Obj  TU1_tl;
  FLA_Obj  TV1_tl;
  FLA_Obj  none, none2, none3;
  dim_t    b_alg, b;

  b_alg = FLA_Obj_length( TU );

  FLA_Part_2x2( A,    &ATL, &ATR,
                      &ABL, &ABR,   0, 0, FLA_TL );
  FLA_Part_1x2( TU,   &TUL, &TUR,      0, FLA_LEFT ); 
  FLA_Part_1x2( TV,   &TVL, &TVR,      0, FLA_LEFT ); 

  while ( FLA_Obj_min_dim( ABR ) > 0 )
  {
    b = min( FLA_Obj_min_dim( ABR ), b_alg );

    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, /**/ &A01, &A02,
                        /* ************* */   /* ******************** */
                                                &A10, /**/ &A11, &A12,
                           ABL, /**/ ABR,       &A20, /**/ &A21, &A22,
                           b, b, FLA_BR );
    FLA_Repart_1x2_to_1x3( TUL, /**/ TUR,       &TU0, /**/ &TU1, &TU2,
                           b, FLA_RIGHT );
    FLA_Repart_1x2_to_1x3( TVL, /**/ TVR,       &TV0, /**/ &TV1, &TV2,
                           b, FLA_RIGHT );

    /*------------------------------------------------------------*/

    FLA_Part_2x2( TU1,     &TU1_tl, &none,   
                           &none2,  &none3,   b, b, FLA_TL ); 

    FLA_Part_2x2( TV1,     &TV1_tl, &none,   
                           &none2,  &none3,   b, b, FLA_TL ); 

    // [ ABR, T1 ] = FLA_Bidiag_UT_u_step_unb_var2( ABR, TU1, TV1, b );
    //FLA_Bidiag_UT_u_step_unb_var2( ABR, TU1_tl, TV1_tl );
    FLA_Bidiag_UT_u_step_ofu_var2( ABR, TU1_tl, TV1_tl );
    //FLA_Bidiag_UT_u_step_opt_var2( ABR, TU1_tl, TV1_tl );

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, A01, /**/ A02,
                                                     A10, A11, /**/ A12,
                            /* ************** */  /* ****************** */
                              &ABL, /**/ &ABR,       A20, A21, /**/ A22,
                              FLA_TL );
    FLA_Cont_with_1x3_to_1x2( &TUL, /**/ &TUR,       TU0, TU1, /**/ TU2,
                              FLA_LEFT );
    FLA_Cont_with_1x3_to_1x2( &TVL, /**/ &TVR,       TV0, TV1, /**/ TV2,
                              FLA_LEFT );
  }

  return FLA_SUCCESS;
}

References FLA_Bidiag_UT_u_step_ofu_var3(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), and FLA_Repart_2x2_to_3x3().

Referenced by FLA_Bidiag_UT_u().

{
  FLA_Obj  ATL,   ATR,      A00, A01, A02, 
           ABL,   ABR,      A10, A11, A12,
                            A20, A21, A22;
  FLA_Obj  TUL,   TUR,      TU0, TU1, TU2; 
  FLA_Obj  TVL,   TVR,      TV0, TV1, TV2; 

  FLA_Obj  TU1_tl;
  FLA_Obj  TV1_tl;
  FLA_Obj  none, none2, none3;
  dim_t    b_alg, b;

  b_alg = FLA_Obj_length( TU );

  FLA_Part_2x2( A,    &ATL, &ATR,
                      &ABL, &ABR,   0, 0, FLA_TL );
  FLA_Part_1x2( TU,   &TUL, &TUR,      0, FLA_LEFT ); 
  FLA_Part_1x2( TV,   &TVL, &TVR,      0, FLA_LEFT ); 

  while ( FLA_Obj_min_dim( ABR ) > 0 )
  {
    b = min( FLA_Obj_min_dim( ABR ), b_alg );

    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, /**/ &A01, &A02,
                        /* ************* */   /* ******************** */
                                                &A10, /**/ &A11, &A12,
                           ABL, /**/ ABR,       &A20, /**/ &A21, &A22,
                           b, b, FLA_BR );
    FLA_Repart_1x2_to_1x3( TUL, /**/ TUR,       &TU0, /**/ &TU1, &TU2,
                           b, FLA_RIGHT );
    FLA_Repart_1x2_to_1x3( TVL, /**/ TVR,       &TV0, /**/ &TV1, &TV2,
                           b, FLA_RIGHT );

    /*------------------------------------------------------------*/

    FLA_Part_2x2( TU1,     &TU1_tl, &none,   
                           &none2,  &none3,   b, b, FLA_TL ); 

    FLA_Part_2x2( TV1,     &TV1_tl, &none,   
                           &none2,  &none3,   b, b, FLA_TL ); 

    // [ ABR, T1 ] = FLA_Bidiag_UT_u_step_unb_var3( ABR, TU1, TV1, b );
    //FLA_Bidiag_UT_u_step_unb_var3( ABR, TU1_tl, TV1_tl );
    FLA_Bidiag_UT_u_step_ofu_var3( ABR, TU1_tl, TV1_tl );
    //FLA_Bidiag_UT_u_step_opt_var3( ABR, TU1_tl, TV1_tl );

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, A01, /**/ A02,
                                                     A10, A11, /**/ A12,
                            /* ************** */  /* ****************** */
                              &ABL, /**/ &ABR,       A20, A21, /**/ A22,
                              FLA_TL );
    FLA_Cont_with_1x3_to_1x2( &TUL, /**/ &TUR,       TU0, TU1, /**/ TU2,
                              FLA_LEFT );
    FLA_Cont_with_1x3_to_1x2( &TVL, /**/ &TVR,       TV0, TV1, /**/ TV2,
                              FLA_LEFT );
  }

  return FLA_SUCCESS;
}

References FLA_Bidiag_UT_u_step_ofu_var4(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Gemm_external(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_Triangularize(), and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u().

{
  FLA_Obj  ATL,   ATR,      A00, A01, A02, 
           ABL,   ABR,      A10, A11, A12,
                            A20, A21, A22;
  FLA_Obj  UT,              U0,
           UB,              U1,
                            U2;
  FLA_Obj  VT,              V0,
           VB,              V1,
                            V2;
  FLA_Obj  YT,              Y0,
           YB,              Y1,
                            Y2; 
  FLA_Obj  ZT,              Z0,
           ZB,              Z1,
                            Z2;
  FLA_Obj  TUL,   TUR,      TU0, TU1, TU2;
  FLA_Obj  TVL,   TVR,      TV0, TV1, TV2;

  FLA_Obj  U, V, Y, Z;
  FLA_Obj  ABR_l, ABR_t;
  FLA_Obj  UB_l, U2_l;
  FLA_Obj  VB_l, V2_l;
  FLA_Obj  YB_l, Y2_l;
  FLA_Obj  ZB_l, Z2_l;
  FLA_Obj  TU1_tl;
  FLA_Obj  TV1_tl;
  FLA_Obj  none, none2, none3;
  FLA_Obj  VB_tl,
           VB_bl;
  FLA_Datatype datatype_A;
  dim_t        m_A, n_A;
  dim_t        b_alg, b;

  b_alg      = FLA_Obj_length( TU );

  datatype_A = FLA_Obj_datatype( A );
  m_A        = FLA_Obj_length( A );
  n_A        = FLA_Obj_width( A );

  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &U );
  FLA_Obj_create( datatype_A, n_A, b_alg, 0, 0, &V );
  FLA_Obj_create( datatype_A, n_A, b_alg, 0, 0, &Y );
  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Z );

  FLA_Part_2x2( A,    &ATL, &ATR,
                      &ABL, &ABR,   0, 0, FLA_TL );
  FLA_Part_2x1( U,    &UT,
                      &UB,            0, FLA_TOP );
  FLA_Part_2x1( V,    &VT,
                      &VB,            0, FLA_TOP );
  FLA_Part_2x1( Y,    &YT,
                      &YB,            0, FLA_TOP );
  FLA_Part_2x1( Z,    &ZT,
                      &ZB,            0, FLA_TOP );
  FLA_Part_1x2( TU,   &TUL, &TUR,      0, FLA_LEFT ); 
  FLA_Part_1x2( TV,   &TVL, &TVR,      0, FLA_LEFT ); 

  while ( FLA_Obj_min_dim( ABR ) > 0 )
  {
    b = min( FLA_Obj_min_dim( ABR ), b_alg );

    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, /**/ &A01, &A02,
                        /* ************* */   /* ******************** */
                                                &A10, /**/ &A11, &A12,
                           ABL, /**/ ABR,       &A20, /**/ &A21, &A22,
                           b, b, FLA_BR );
    FLA_Repart_2x1_to_3x1( UT,                &U0,
                        /* ** */            /* ** */
                                              &U1,
                           UB,                &U2,        b, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( VT,                &V0,
                        /* ** */            /* ** */
                                              &V1,
                           VB,                &V2,        b, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( YT,                &Y0,
                        /* ** */            /* ** */
                                              &Y1,
                           YB,                &Y2,        b, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( ZT,                &Z0,
                        /* ** */            /* ** */
                                              &Z1,
                           ZB,                &Z2,        b, FLA_BOTTOM );
    FLA_Repart_1x2_to_1x3( TUL, /**/ TUR,       &TU0, /**/ &TU1, &TU2,
                           b, FLA_RIGHT );
    FLA_Repart_1x2_to_1x3( TVL, /**/ TVR,       &TV0, /**/ &TV1, &TV2,
                           b, FLA_RIGHT );

    /*------------------------------------------------------------*/

    FLA_Part_2x2( TU1,     &TU1_tl, &none,   
                           &none2,  &none3,   b, b, FLA_TL ); 

    FLA_Part_2x2( TV1,     &TV1_tl, &none,   
                           &none2,  &none3,   b, b, FLA_TL ); 

    FLA_Part_1x2( ABR,    &ABR_l, &none,    b, FLA_LEFT );
    FLA_Part_2x1( ABR,    &ABR_t,
                          &none,            b, FLA_TOP );

    FLA_Part_1x2( UB,     &UB_l,  &none,    b, FLA_LEFT );
    FLA_Part_1x2( VB,     &VB_l,  &none,    b, FLA_LEFT );
    FLA_Part_1x2( YB,     &YB_l,  &none,    b, FLA_LEFT );
    FLA_Part_1x2( ZB,     &ZB_l,  &none,    b, FLA_LEFT );

    FLA_Part_2x1( UB_l,   &none,
                          &U2_l,            b, FLA_TOP );
    FLA_Part_2x1( VB_l,   &none,
                          &V2_l,            b, FLA_TOP );
    FLA_Part_2x1( YB_l,   &none, 
                          &Y2_l,            b, FLA_TOP );
    FLA_Part_2x1( ZB_l,   &none,
                          &Z2_l,            b, FLA_TOP );

    // [ ABR, YB, ZB, TU1, TV1 ] = FLA_Bidiag_UT_u_step_unb_var4( ABR, TU1, TV1, b );
    //FLA_Bidiag_UT_u_step_unb_var4( ABR, YB, ZB, TU1_tl, TV1_tl );
    FLA_Bidiag_UT_u_step_ofu_var4( ABR, YB, ZB, TU1_tl, TV1_tl );
    //FLA_Bidiag_UT_u_step_opt_var4( ABR, YB, ZB, TU1_tl, TV1_tl );

    if ( FLA_Obj_length( A22 ) > 0 )
    {
      // Build UB from ABR, with explicit unit subdiagonal and zeros.
      FLA_Copy( ABR_l, UB_l );
      FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_UNIT_DIAG, UB_l );

      // Build VB from ABR, with explicit unit subdiagonal and zeros.
      FLA_Copyt( FLA_TRANSPOSE, ABR_t, VB_l );
      FLA_Part_2x1( VB_l,   &VB_tl,
                            &VB_bl,            1, FLA_TOP );
      FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_UNIT_DIAG, VB_bl );
      FLA_Set( FLA_ZERO, VB_tl );

      // A22 = A22 - U2 * Y2' - Z2 * V2';
      FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
                         FLA_MINUS_ONE, U2_l, Y2_l, FLA_ONE, A22 );
      FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
                         FLA_MINUS_ONE, Z2_l, V2_l, FLA_ONE, A22 );
    }

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, A01, /**/ A02,
                                                     A10, A11, /**/ A12,
                            /* ************** */  /* ****************** */
                              &ABL, /**/ &ABR,       A20, A21, /**/ A22,
                              FLA_TL );
    FLA_Cont_with_3x1_to_2x1( &UT,                U0,
                                                  U1,
                            /* ** */           /* ** */
                              &UB,                U2,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &VT,                V0,
                                                  V1,
                            /* ** */           /* ** */
                              &VB,                V2,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &YT,                Y0,
                                                  Y1,
                            /* ** */           /* ** */
                              &YB,                Y2,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &ZT,                Z0,
                                                  Z1,
                            /* ** */           /* ** */
                              &ZB,                Z2,     FLA_TOP );
    FLA_Cont_with_1x3_to_1x2( &TUL, /**/ &TUR,       TU0, TU1, /**/ TU2,
                              FLA_LEFT );
    FLA_Cont_with_1x3_to_1x2( &TVL, /**/ &TVR,       TV0, TV1, /**/ TV2,
                              FLA_LEFT );
  }

  FLA_Obj_free( &U );
  FLA_Obj_free( &V );
  FLA_Obj_free( &Y );
  FLA_Obj_free( &Z );

  return FLA_SUCCESS;
}

References FLA_Bidiag_UT_u_step_opt_var1(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), and FLA_Repart_2x2_to_3x3().

Referenced by FLA_Bidiag_UT_u().

{
  FLA_Obj  ATL,   ATR,      A00, A01, A02, 
           ABL,   ABR,      A10, A11, A12,
                            A20, A21, A22;
  FLA_Obj  TUL,   TUR,      TU0, TU1, TU2; 
  FLA_Obj  TVL,   TVR,      TV0, TV1, TV2; 

  FLA_Obj  TU1_tl;
  FLA_Obj  TV1_tl;
  FLA_Obj  none, none2, none3;
  dim_t    b_alg, b;

  b_alg = FLA_Obj_length( TU );

  FLA_Part_2x2( A,    &ATL, &ATR,
                      &ABL, &ABR,   0, 0, FLA_TL );
  FLA_Part_1x2( TU,   &TUL, &TUR,      0, FLA_LEFT ); 
  FLA_Part_1x2( TV,   &TVL, &TVR,      0, FLA_LEFT ); 

  while ( FLA_Obj_min_dim( ABR ) > 0 )
  {
    b = min( FLA_Obj_min_dim( ABR ), b_alg );

    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, /**/ &A01, &A02,
                        /* ************* */   /* ******************** */
                                                &A10, /**/ &A11, &A12,
                           ABL, /**/ ABR,       &A20, /**/ &A21, &A22,
                           b, b, FLA_BR );
    FLA_Repart_1x2_to_1x3( TUL, /**/ TUR,       &TU0, /**/ &TU1, &TU2,
                           b, FLA_RIGHT );
    FLA_Repart_1x2_to_1x3( TVL, /**/ TVR,       &TV0, /**/ &TV1, &TV2,
                           b, FLA_RIGHT );

    /*------------------------------------------------------------*/

    FLA_Part_2x2( TU1,     &TU1_tl, &none,   
                           &none2,  &none3,   b, b, FLA_TL ); 

    FLA_Part_2x2( TV1,     &TV1_tl, &none,   
                           &none2,  &none3,   b, b, FLA_TL ); 

    // [ ABR, T1 ] = FLA_Bidiag_UT_u_step_unb_var1( ABR, TU1, TV1, b );
    //FLA_Bidiag_UT_u_step_unb_var1( ABR, TU1_tl, TV1_tl );
    FLA_Bidiag_UT_u_step_opt_var1( ABR, TU1_tl, TV1_tl );

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, A01, /**/ A02,
                                                     A10, A11, /**/ A12,
                            /* ************** */  /* ****************** */
                              &ABL, /**/ &ABR,       A20, A21, /**/ A22,
                              FLA_TL );
    FLA_Cont_with_1x3_to_1x2( &TUL, /**/ &TUR,       TU0, TU1, /**/ TU2,
                              FLA_LEFT );
    FLA_Cont_with_1x3_to_1x2( &TVL, /**/ &TVR,       TV0, TV1, /**/ TV2,
                              FLA_LEFT );
  }

  return FLA_SUCCESS;
}

References FLA_Bidiag_UT_u_step_opt_var2(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), and FLA_Repart_2x2_to_3x3().

Referenced by FLA_Bidiag_UT_u().

{
  FLA_Obj  ATL,   ATR,      A00, A01, A02, 
           ABL,   ABR,      A10, A11, A12,
                            A20, A21, A22;
  FLA_Obj  TUL,   TUR,      TU0, TU1, TU2; 
  FLA_Obj  TVL,   TVR,      TV0, TV1, TV2; 

  FLA_Obj  TU1_tl;
  FLA_Obj  TV1_tl;
  FLA_Obj  none, none2, none3;
  dim_t    b_alg, b;

  b_alg = FLA_Obj_length( TU );

  FLA_Part_2x2( A,    &ATL, &ATR,
                      &ABL, &ABR,   0, 0, FLA_TL );
  FLA_Part_1x2( TU,   &TUL, &TUR,      0, FLA_LEFT ); 
  FLA_Part_1x2( TV,   &TVL, &TVR,      0, FLA_LEFT ); 

  while ( FLA_Obj_min_dim( ABR ) > 0 )
  {
    b = min( FLA_Obj_min_dim( ABR ), b_alg );

    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, /**/ &A01, &A02,
                        /* ************* */   /* ******************** */
                                                &A10, /**/ &A11, &A12,
                           ABL, /**/ ABR,       &A20, /**/ &A21, &A22,
                           b, b, FLA_BR );
    FLA_Repart_1x2_to_1x3( TUL, /**/ TUR,       &TU0, /**/ &TU1, &TU2,
                           b, FLA_RIGHT );
    FLA_Repart_1x2_to_1x3( TVL, /**/ TVR,       &TV0, /**/ &TV1, &TV2,
                           b, FLA_RIGHT );

    /*------------------------------------------------------------*/

    FLA_Part_2x2( TU1,     &TU1_tl, &none,   
                           &none2,  &none3,   b, b, FLA_TL ); 

    FLA_Part_2x2( TV1,     &TV1_tl, &none,   
                           &none2,  &none3,   b, b, FLA_TL ); 

    // [ ABR, T1 ] = FLA_Bidiag_UT_u_step_unb_var2( ABR, TU1, TV1, b );
    //FLA_Bidiag_UT_u_step_unb_var2( ABR, TU1_tl, TV1_tl );
    //FLA_Bidiag_UT_u_step_ofu_var2( ABR, TU1_tl, TV1_tl );
    FLA_Bidiag_UT_u_step_opt_var2( ABR, TU1_tl, TV1_tl );

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, A01, /**/ A02,
                                                     A10, A11, /**/ A12,
                            /* ************** */  /* ****************** */
                              &ABL, /**/ &ABR,       A20, A21, /**/ A22,
                              FLA_TL );
    FLA_Cont_with_1x3_to_1x2( &TUL, /**/ &TUR,       TU0, TU1, /**/ TU2,
                              FLA_LEFT );
    FLA_Cont_with_1x3_to_1x2( &TVL, /**/ &TVR,       TV0, TV1, /**/ TV2,
                              FLA_LEFT );
  }

  return FLA_SUCCESS;
}

References FLA_Bidiag_UT_u_step_opt_var3(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), and FLA_Repart_2x2_to_3x3().

Referenced by FLA_Bidiag_UT_u().

{
  FLA_Obj  ATL,   ATR,      A00, A01, A02, 
           ABL,   ABR,      A10, A11, A12,
                            A20, A21, A22;
  FLA_Obj  TUL,   TUR,      TU0, TU1, TU2; 
  FLA_Obj  TVL,   TVR,      TV0, TV1, TV2; 

  FLA_Obj  TU1_tl;
  FLA_Obj  TV1_tl;
  FLA_Obj  none, none2, none3;
  dim_t    b_alg, b;

  b_alg = FLA_Obj_length( TU );

  FLA_Part_2x2( A,    &ATL, &ATR,
                      &ABL, &ABR,   0, 0, FLA_TL );
  FLA_Part_1x2( TU,   &TUL, &TUR,      0, FLA_LEFT ); 
  FLA_Part_1x2( TV,   &TVL, &TVR,      0, FLA_LEFT ); 

  while ( FLA_Obj_min_dim( ABR ) > 0 )
  {
    b = min( FLA_Obj_min_dim( ABR ), b_alg );

    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, /**/ &A01, &A02,
                        /* ************* */   /* ******************** */
                                                &A10, /**/ &A11, &A12,
                           ABL, /**/ ABR,       &A20, /**/ &A21, &A22,
                           b, b, FLA_BR );
    FLA_Repart_1x2_to_1x3( TUL, /**/ TUR,       &TU0, /**/ &TU1, &TU2,
                           b, FLA_RIGHT );
    FLA_Repart_1x2_to_1x3( TVL, /**/ TVR,       &TV0, /**/ &TV1, &TV2,
                           b, FLA_RIGHT );

    /*------------------------------------------------------------*/

    FLA_Part_2x2( TU1,     &TU1_tl, &none,   
                           &none2,  &none3,   b, b, FLA_TL ); 

    FLA_Part_2x2( TV1,     &TV1_tl, &none,   
                           &none2,  &none3,   b, b, FLA_TL ); 

    // [ ABR, T1 ] = FLA_Bidiag_UT_u_step_unb_var3( ABR, TU1, TV1, b );
    //FLA_Bidiag_UT_u_step_unb_var3( ABR, TU1_tl, TV1_tl );
    //FLA_Bidiag_UT_u_step_ofu_var3( ABR, TU1_tl, TV1_tl );
    FLA_Bidiag_UT_u_step_opt_var3( ABR, TU1_tl, TV1_tl );

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, A01, /**/ A02,
                                                     A10, A11, /**/ A12,
                            /* ************** */  /* ****************** */
                              &ABL, /**/ &ABR,       A20, A21, /**/ A22,
                              FLA_TL );
    FLA_Cont_with_1x3_to_1x2( &TUL, /**/ &TUR,       TU0, TU1, /**/ TU2,
                              FLA_LEFT );
    FLA_Cont_with_1x3_to_1x2( &TVL, /**/ &TVR,       TV0, TV1, /**/ TV2,
                              FLA_LEFT );
  }

  return FLA_SUCCESS;
}

References FLA_Bidiag_UT_u_step_opt_var4(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Gemm_external(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_Triangularize(), and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u().

{
  FLA_Obj  ATL,   ATR,      A00, A01, A02, 
           ABL,   ABR,      A10, A11, A12,
                            A20, A21, A22;
  FLA_Obj  UT,              U0,
           UB,              U1,
                            U2;
  FLA_Obj  VT,              V0,
           VB,              V1,
                            V2;
  FLA_Obj  YT,              Y0,
           YB,              Y1,
                            Y2; 
  FLA_Obj  ZT,              Z0,
           ZB,              Z1,
                            Z2;
  FLA_Obj  TUL,   TUR,      TU0, TU1, TU2;
  FLA_Obj  TVL,   TVR,      TV0, TV1, TV2;

  FLA_Obj  U, V, Y, Z;
  FLA_Obj  ABR_l, ABR_t;
  FLA_Obj  UB_l, U2_l;
  FLA_Obj  VB_l, V2_l;
  FLA_Obj  YB_l, Y2_l;
  FLA_Obj  ZB_l, Z2_l;
  FLA_Obj  TU1_tl;
  FLA_Obj  TV1_tl;
  FLA_Obj  none, none2, none3;
  FLA_Obj  VB_tl,
           VB_bl;
  FLA_Datatype datatype_A;
  dim_t        m_A, n_A;
  dim_t        b_alg, b;

  b_alg      = FLA_Obj_length( TU );

  datatype_A = FLA_Obj_datatype( A );
  m_A        = FLA_Obj_length( A );
  n_A        = FLA_Obj_width( A );

  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &U );
  FLA_Obj_create( datatype_A, n_A, b_alg, 0, 0, &V );
  FLA_Obj_create( datatype_A, n_A, b_alg, 0, 0, &Y );
  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Z );

  FLA_Part_2x2( A,    &ATL, &ATR,
                      &ABL, &ABR,   0, 0, FLA_TL );
  FLA_Part_2x1( U,    &UT,
                      &UB,            0, FLA_TOP );
  FLA_Part_2x1( V,    &VT,
                      &VB,            0, FLA_TOP );
  FLA_Part_2x1( Y,    &YT,
                      &YB,            0, FLA_TOP );
  FLA_Part_2x1( Z,    &ZT,
                      &ZB,            0, FLA_TOP );
  FLA_Part_1x2( TU,   &TUL, &TUR,      0, FLA_LEFT ); 
  FLA_Part_1x2( TV,   &TVL, &TVR,      0, FLA_LEFT ); 

  while ( FLA_Obj_min_dim( ABR ) > 0 )
  {
    b = min( FLA_Obj_min_dim( ABR ), b_alg );

    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, /**/ &A01, &A02,
                        /* ************* */   /* ******************** */
                                                &A10, /**/ &A11, &A12,
                           ABL, /**/ ABR,       &A20, /**/ &A21, &A22,
                           b, b, FLA_BR );
    FLA_Repart_2x1_to_3x1( UT,                &U0,
                        /* ** */            /* ** */
                                              &U1,
                           UB,                &U2,        b, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( VT,                &V0,
                        /* ** */            /* ** */
                                              &V1,
                           VB,                &V2,        b, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( YT,                &Y0,
                        /* ** */            /* ** */
                                              &Y1,
                           YB,                &Y2,        b, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( ZT,                &Z0,
                        /* ** */            /* ** */
                                              &Z1,
                           ZB,                &Z2,        b, FLA_BOTTOM );
    FLA_Repart_1x2_to_1x3( TUL, /**/ TUR,       &TU0, /**/ &TU1, &TU2,
                           b, FLA_RIGHT );
    FLA_Repart_1x2_to_1x3( TVL, /**/ TVR,       &TV0, /**/ &TV1, &TV2,
                           b, FLA_RIGHT );

    /*------------------------------------------------------------*/

    FLA_Part_2x2( TU1,     &TU1_tl, &none,   
                           &none2,  &none3,   b, b, FLA_TL ); 

    FLA_Part_2x2( TV1,     &TV1_tl, &none,   
                           &none2,  &none3,   b, b, FLA_TL ); 

    FLA_Part_1x2( ABR,    &ABR_l, &none,    b, FLA_LEFT );
    FLA_Part_2x1( ABR,    &ABR_t,
                          &none,            b, FLA_TOP );

    FLA_Part_1x2( UB,     &UB_l,  &none,    b, FLA_LEFT );
    FLA_Part_1x2( VB,     &VB_l,  &none,    b, FLA_LEFT );
    FLA_Part_1x2( YB,     &YB_l,  &none,    b, FLA_LEFT );
    FLA_Part_1x2( ZB,     &ZB_l,  &none,    b, FLA_LEFT );

    FLA_Part_2x1( UB_l,   &none,
                          &U2_l,            b, FLA_TOP );
    FLA_Part_2x1( VB_l,   &none,
                          &V2_l,            b, FLA_TOP );
    FLA_Part_2x1( YB_l,   &none, 
                          &Y2_l,            b, FLA_TOP );
    FLA_Part_2x1( ZB_l,   &none,
                          &Z2_l,            b, FLA_TOP );

    // [ ABR, YB, ZB, TU1, TV1 ] = FLA_Bidiag_UT_u_step_unb_var4( ABR, TU1, TV1, b );
    //FLA_Bidiag_UT_u_step_unb_var4( ABR, YB, ZB, TU1_tl, TV1_tl );
    //FLA_Bidiag_UT_u_step_ofu_var4( ABR, YB, ZB, TU1_tl, TV1_tl );
    FLA_Bidiag_UT_u_step_opt_var4( ABR, YB, ZB, TU1_tl, TV1_tl );

    if ( FLA_Obj_length( A22 ) > 0 )
    {
      // Build UB from ABR, with explicit unit subdiagonal and zeros.
      FLA_Copy( ABR_l, UB_l );
      FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_UNIT_DIAG, UB_l );

      // Build VB from ABR, with explicit unit subdiagonal and zeros.
      FLA_Copyt( FLA_TRANSPOSE, ABR_t, VB_l );
      FLA_Part_2x1( VB_l,   &VB_tl,
                            &VB_bl,            1, FLA_TOP );
      FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_UNIT_DIAG, VB_bl );
      FLA_Set( FLA_ZERO, VB_tl );

      // A22 = A22 - U2 * Y2' - Z2 * V2';
      FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
                         FLA_MINUS_ONE, U2_l, Y2_l, FLA_ONE, A22 );
      FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
                         FLA_MINUS_ONE, Z2_l, V2_l, FLA_ONE, A22 );
    }

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, A01, /**/ A02,
                                                     A10, A11, /**/ A12,
                            /* ************** */  /* ****************** */
                              &ABL, /**/ &ABR,       A20, A21, /**/ A22,
                              FLA_TL );
    FLA_Cont_with_3x1_to_2x1( &UT,                U0,
                                                  U1,
                            /* ** */           /* ** */
                              &UB,                U2,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &VT,                V0,
                                                  V1,
                            /* ** */           /* ** */
                              &VB,                V2,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &YT,                Y0,
                                                  Y1,
                            /* ** */           /* ** */
                              &YB,                Y2,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &ZT,                Z0,
                                                  Z1,
                            /* ** */           /* ** */
                              &ZB,                Z2,     FLA_TOP );
    FLA_Cont_with_1x3_to_1x2( &TUL, /**/ &TUR,       TU0, TU1, /**/ TU2,
                              FLA_LEFT );
    FLA_Cont_with_1x3_to_1x2( &TVL, /**/ &TVR,       TV0, TV1, /**/ TV2,
                              FLA_LEFT );
  }

  FLA_Obj_free( &U );
  FLA_Obj_free( &V );
  FLA_Obj_free( &Y );
  FLA_Obj_free( &Z );

  return FLA_SUCCESS;
}

References FLA_Bidiag_UT_u_step_opt_var5(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Gemm_external(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_Triangularize(), and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u().

{
  FLA_Obj  ATL,   ATR,      A00, A01, A02, 
           ABL,   ABR,      A10, A11, A12,
                            A20, A21, A22;
  FLA_Obj  UT,              U0,
           UB,              U1,
                            U2;
  FLA_Obj  VT,              V0,
           VB,              V1,
                            V2;
  FLA_Obj  YT,              Y0,
           YB,              Y1,
                            Y2; 
  FLA_Obj  ZT,              Z0,
           ZB,              Z1,
                            Z2;
  FLA_Obj  TUL,   TUR,      TU0, TU1, TU2;
  FLA_Obj  TVL,   TVR,      TV0, TV1, TV2;

  FLA_Obj  U, V, Y, Z;
  FLA_Obj  ABR_l, ABR_t;
  FLA_Obj  UB_l, U2_l;
  FLA_Obj  VB_l, V2_l;
  FLA_Obj  YB_l, Y2_l;
  FLA_Obj  ZB_l, Z2_l;
  FLA_Obj  TU1_tl;
  FLA_Obj  TV1_tl;
  FLA_Obj  none, none2, none3;
  FLA_Obj  VB_tl,
           VB_bl;
  FLA_Datatype datatype_A;
  dim_t        m_A, n_A;
  dim_t        b_alg, b;

  b_alg      = FLA_Obj_length( TU );

  datatype_A = FLA_Obj_datatype( A );
  m_A        = FLA_Obj_length( A );
  n_A        = FLA_Obj_width( A );

  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &U );
  FLA_Obj_create( datatype_A, n_A, b_alg, 0, 0, &V );
  FLA_Obj_create( datatype_A, n_A, b_alg, 0, 0, &Y );
  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Z );

  FLA_Part_2x2( A,    &ATL, &ATR,
                      &ABL, &ABR,   0, 0, FLA_TL );
  FLA_Part_2x1( U,    &UT,
                      &UB,            0, FLA_TOP );
  FLA_Part_2x1( V,    &VT,
                      &VB,            0, FLA_TOP );
  FLA_Part_2x1( Y,    &YT,
                      &YB,            0, FLA_TOP );
  FLA_Part_2x1( Z,    &ZT,
                      &ZB,            0, FLA_TOP );
  FLA_Part_1x2( TU,   &TUL, &TUR,      0, FLA_LEFT ); 
  FLA_Part_1x2( TV,   &TVL, &TVR,      0, FLA_LEFT ); 

  while ( FLA_Obj_min_dim( ABR ) > 0 )
  {
    b = min( FLA_Obj_min_dim( ABR ), b_alg );

    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, /**/ &A01, &A02,
                        /* ************* */   /* ******************** */
                                                &A10, /**/ &A11, &A12,
                           ABL, /**/ ABR,       &A20, /**/ &A21, &A22,
                           b, b, FLA_BR );
    FLA_Repart_2x1_to_3x1( UT,                &U0,
                        /* ** */            /* ** */
                                              &U1,
                           UB,                &U2,        b, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( VT,                &V0,
                        /* ** */            /* ** */
                                              &V1,
                           VB,                &V2,        b, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( YT,                &Y0,
                        /* ** */            /* ** */
                                              &Y1,
                           YB,                &Y2,        b, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( ZT,                &Z0,
                        /* ** */            /* ** */
                                              &Z1,
                           ZB,                &Z2,        b, FLA_BOTTOM );
    FLA_Repart_1x2_to_1x3( TUL, /**/ TUR,       &TU0, /**/ &TU1, &TU2,
                           b, FLA_RIGHT );
    FLA_Repart_1x2_to_1x3( TVL, /**/ TVR,       &TV0, /**/ &TV1, &TV2,
                           b, FLA_RIGHT );

    /*------------------------------------------------------------*/

    FLA_Part_2x2( TU1,     &TU1_tl, &none,   
                           &none2,  &none3,   b, b, FLA_TL ); 

    FLA_Part_2x2( TV1,     &TV1_tl, &none,   
                           &none2,  &none3,   b, b, FLA_TL ); 

    FLA_Part_1x2( ABR,    &ABR_l, &none,    b, FLA_LEFT );
    FLA_Part_2x1( ABR,    &ABR_t,
                          &none,            b, FLA_TOP );

    FLA_Part_1x2( UB,     &UB_l,  &none,    b, FLA_LEFT );
    FLA_Part_1x2( VB,     &VB_l,  &none,    b, FLA_LEFT );
    FLA_Part_1x2( YB,     &YB_l,  &none,    b, FLA_LEFT );
    FLA_Part_1x2( ZB,     &ZB_l,  &none,    b, FLA_LEFT );

    FLA_Part_2x1( UB_l,   &none,
                          &U2_l,            b, FLA_TOP );
    FLA_Part_2x1( VB_l,   &none,
                          &V2_l,            b, FLA_TOP );
    FLA_Part_2x1( YB_l,   &none, 
                          &Y2_l,            b, FLA_TOP );
    FLA_Part_2x1( ZB_l,   &none,
                          &Z2_l,            b, FLA_TOP );

    // [ ABR, YB, ZB, TU1, TV1 ] = FLA_Bidiag_UT_u_step_unb_var5( ABR, TU1, TV1, b );
    //FLA_Bidiag_UT_u_step_unb_var5( ABR, YB, ZB, TU1_tl, TV1_tl );
    FLA_Bidiag_UT_u_step_opt_var5( ABR, YB, ZB, TU1_tl, TV1_tl );

    if ( FLA_Obj_length( A22 ) > 0 )
    {
      // Build UB from ABR, with explicit unit subdiagonal and zeros.
      FLA_Copy( ABR_l, UB_l );
      FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_UNIT_DIAG, UB_l );

      // Build VB from ABR, with explicit unit subdiagonal and zeros.
      FLA_Copyt( FLA_TRANSPOSE, ABR_t, VB_l );
      FLA_Part_2x1( VB_l,   &VB_tl,
                            &VB_bl,            1, FLA_TOP );
      FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_UNIT_DIAG, VB_bl );
      FLA_Set( FLA_ZERO, VB_tl );

      // A22 = A22 - U2 * Y2' - Z2 * V2';
      FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
                         FLA_MINUS_ONE, U2_l, Y2_l, FLA_ONE, A22 );
      FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
                         FLA_MINUS_ONE, Z2_l, V2_l, FLA_ONE, A22 );
    }

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, A01, /**/ A02,
                                                     A10, A11, /**/ A12,
                            /* ************** */  /* ****************** */
                              &ABL, /**/ &ABR,       A20, A21, /**/ A22,
                              FLA_TL );
    FLA_Cont_with_3x1_to_2x1( &UT,                U0,
                                                  U1,
                            /* ** */           /* ** */
                              &UB,                U2,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &VT,                V0,
                                                  V1,
                            /* ** */           /* ** */
                              &VB,                V2,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &YT,                Y0,
                                                  Y1,
                            /* ** */           /* ** */
                              &YB,                Y2,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &ZT,                Z0,
                                                  Z1,
                            /* ** */           /* ** */
                              &ZB,                Z2,     FLA_TOP );
    FLA_Cont_with_1x3_to_1x2( &TUL, /**/ &TUR,       TU0, TU1, /**/ TU2,
                              FLA_LEFT );
    FLA_Cont_with_1x3_to_1x2( &TVL, /**/ &TVR,       TV0, TV1, /**/ TV2,
                              FLA_LEFT );
  }

  FLA_Obj_free( &U );
  FLA_Obj_free( &V );
  FLA_Obj_free( &Y );
  FLA_Obj_free( &Z );

  return FLA_SUCCESS;
}

References FLA_Bidiag_UT_u_step_ofu_var2().

{
  return FLA_Bidiag_UT_u_step_ofu_var2( A, TU, TV );
}

References FLA_Bidiag_UT_u_step_ofu_var3().

{
  return FLA_Bidiag_UT_u_step_ofu_var3( A, TU, TV );
}

References FLA_Bidiag_UT_u_step_ofu_var4(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), and FLA_Obj_width().

{
  FLA_Error    r_val;
  FLA_Obj      Y, Z;
  FLA_Datatype datatype_A;
  dim_t        m_A, n_A;

  datatype_A = FLA_Obj_datatype( A );
  m_A        = FLA_Obj_length( A );
  n_A        = FLA_Obj_width( A );
  
  FLA_Obj_create( datatype_A, n_A, n_A, 0, 0, &Y );
  FLA_Obj_create( datatype_A, m_A, n_A, 0, 0, &Z );

  r_val = FLA_Bidiag_UT_u_step_ofu_var4( A, Y, Z, TU, TV );

  FLA_Obj_free( &Y );
  FLA_Obj_free( &Z );

  return r_val;
}

References FLA_Bidiag_UT_u_step_opt_var1().

Referenced by FLA_Bidiag_UT_u().

{
  return FLA_Bidiag_UT_u_step_opt_var1( A, TU, TV );
}

References FLA_Bidiag_UT_u_step_opt_var2().

Referenced by FLA_Bidiag_UT_u().

{
  return FLA_Bidiag_UT_u_step_opt_var2( A, TU, TV );
}

References FLA_Bidiag_UT_u_step_opt_var3().

Referenced by FLA_Bidiag_UT_u().

{
  return FLA_Bidiag_UT_u_step_opt_var3( A, TU, TV );
}

References FLA_Bidiag_UT_u_step_opt_var4(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u().

{
  FLA_Error    r_val;
  FLA_Obj      Y, Z;
  FLA_Datatype datatype_A;
  dim_t        m_A, n_A;

  datatype_A = FLA_Obj_datatype( A );
  m_A        = FLA_Obj_length( A );
  n_A        = FLA_Obj_width( A );
  
  FLA_Obj_create( datatype_A, n_A, n_A, 0, 0, &Y );
  FLA_Obj_create( datatype_A, m_A, n_A, 0, 0, &Z );

  r_val = FLA_Bidiag_UT_u_step_opt_var4( A, Y, Z, TU, TV );

  FLA_Obj_free( &Y );
  FLA_Obj_free( &Z );

  return r_val;
}

References FLA_Bidiag_UT_u_step_opt_var5(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u().

{
  FLA_Error    r_val;
  FLA_Obj      Y, Z;
  FLA_Datatype datatype_A;
  dim_t        m_A, n_A;

  datatype_A = FLA_Obj_datatype( A );
  m_A        = FLA_Obj_length( A );
  n_A        = FLA_Obj_width( A );
  
  FLA_Obj_create( datatype_A, n_A, n_A, 0, 0, &Y );
  FLA_Obj_create( datatype_A, m_A, n_A, 0, 0, &Z );

  r_val = FLA_Bidiag_UT_u_step_opt_var5( A, Y, Z, TU, TV );

  FLA_Obj_free( &Y );
  FLA_Obj_free( &Z );

  return r_val;
}
FLA_Error FLA_Bidiag_UT_u_step_ofc_var2 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Gerc2_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_Househ2_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_ofu_var2().

{
  scomplex* buff_1  = FLA_COMPLEX_PTR( FLA_ONE );
  scomplex* buff_0  = FLA_COMPLEX_PTR( FLA_ZERO );
  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );

  scomplex  beta;
  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_TS;

  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
  scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  scomplex* buff_y = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  int       inc_v  = 1;
  int       inc_y  = 1;
  int       inc_z  = 1;

  for ( i = 0; i < b_alg; ++i )
  {
    scomplex* a10t     = buff_A + (0  )*cs_A + (i  )*rs_A;
    scomplex* A20      = buff_A + (0  )*cs_A + (i+1)*rs_A;
    scomplex* alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
    scomplex* a21      = buff_A + (i  )*cs_A + (i+1)*rs_A;
    scomplex* A02      = buff_A + (i+1)*cs_A + (0  )*rs_A;
    scomplex* a12t     = buff_A + (i+1)*cs_A + (i  )*rs_A;
    scomplex* A22      = buff_A + (i+1)*cs_A + (i+1)*rs_A;

    scomplex* t01      = buff_T + (i  )*cs_T + (0  )*rs_T;
    scomplex* tau11    = buff_T + (i  )*cs_T + (i  )*rs_T;

    scomplex* s01      = buff_S + (i  )*cs_S + (0  )*rs_S;
    scomplex* sigma11  = buff_S + (i  )*cs_S + (i  )*rs_S;

    scomplex* v21      = buff_v + (i+1)*inc_v;

    scomplex* y21      = buff_y + (i+1)*inc_y;

    scomplex* z21      = buff_z + (i+1)*inc_z;

    scomplex* a12t_l   = a12t   + (0  )*cs_A + (0  )*rs_A;
    scomplex* a12t_r   = a12t   + (1  )*cs_A + (0  )*rs_A;

    scomplex* v21_t    = v21    + (0  )*inc_v;
    scomplex* v21_b    = v21    + (1  )*inc_v;

    int       m_ahead  = m_A - i - 1;
    int       n_ahead  = n_A - i - 1;
    int       m_behind = i;
    int       n_behind = i;

    /*------------------------------------------------------------*/

    // FLA_Househ2_UT( FLA_LEFT,
    //                 alpha11,
    //                 a21, tau11 );
    FLA_Househ2_UT_l_opc( m_ahead,
                          alpha11,
                          a21, rs_A,
                          tau11 );

    if ( n_ahead > 0 )
    {
      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
      // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
      bl1_ccopyv( BLIS1_CONJUGATE,
                  n_ahead,
                  a12t, cs_A,
                  y21,  inc_y );
      bl1_cgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22, rs_A, cs_A,
                 a21, rs_A,
                 buff_1,
                 y21, inc_y );

      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
      bl1_cinvscalv( BLIS1_NO_CONJUGATE,
                     n_ahead,
                     tau11,
                     y21, inc_y );

      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
      bl1_caxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  buff_m1,
                  y21,  inc_y,
                  a12t, cs_A );

      // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
      FLA_Househ2_UT_r_opc( n_ahead - 1,
                            a12t_l,
                            a12t_r, cs_A,
                            sigma11 );

      // FLA_Set( FLA_ONE, v21_t );
      // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
      *v21_t = *buff_1;
      bl1_ccopyv( BLIS1_NO_CONJUGATE,
                  n_ahead - 1,
                  a12t_r, cs_A,
                  v21_b,  inc_y );

      // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
      // FLA_Scal( FLA_MINUS_ONE, beta );
      bl1_cdot( BLIS1_CONJUGATE,
                n_ahead,
                y21, inc_y,
                v21, inc_v,
                &beta );
      bl1_cneg1( &beta );

      // FLA_Copy( a21, z21 );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
      bl1_ccopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  a21, rs_A,
                  z21, inc_z );
      bl1_cgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22, rs_A, cs_A,
                 v21, inc_v,
                 &beta,
                 z21, inc_z );
      bl1_cinvscalv( BLIS1_NO_CONJUGATE,
                     m_ahead,
                     sigma11,
                     z21, inc_z );

      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y21, A22 );
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
      FLA_Fused_Gerc2_opc_var1( m_ahead,
                                n_ahead,
                                buff_m1,
                                a21, rs_A,
                                y21, inc_y,
                                z21, inc_z,
                                v21, inc_v,
                                A22, rs_A, cs_A );

      // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
      bl1_cgemv( BLIS1_CONJ_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_1,
                 A02, rs_A, cs_A,
                 v21, inc_v,
                 buff_0,
                 s01, rs_S );
    }

    // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
    // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
    bl1_ccopyv( BLIS1_CONJUGATE,
                n_behind,
                a10t, cs_A,
                t01,  rs_T );
    bl1_cgemv( BLIS1_CONJ_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_ahead,
               n_behind,
               buff_1,
               A20, rs_A, cs_A,
               a21, rs_A,
               buff_1,
               t01, rs_T );

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &v );
  // FLA_Obj_free( &y );
  // FLA_Obj_free( &z );
  FLA_free( buff_v );
  FLA_free( buff_y );
  FLA_free( buff_z );

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_step_ofc_var3 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_caxpyv(), bl1_cconjv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_opc_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_Househ2s_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_ofu_var3().

{
  scomplex* buff_1  = FLA_COMPLEX_PTR( FLA_ONE );
  scomplex* buff_0  = FLA_COMPLEX_PTR( FLA_ZERO );
  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );

  scomplex  alpha12;
  scomplex  minus_conj_alpha12;
  scomplex  psi11_minus_alpha12;
  scomplex  minus_inv_tau11;
  scomplex  minus_upsilon11;
  scomplex  minus_conj_nu11;
  scomplex  minus_conj_psi11;
  scomplex  minus_zeta11;
  scomplex  beta;
  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_TS;

  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
  scomplex* buff_w  = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  scomplex* buff_ap = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  scomplex* buff_u  = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  scomplex* buff_up = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  scomplex* buff_v  = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  scomplex* buff_y  = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  scomplex* buff_z  = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  int       inc_w   = 1;
  int       inc_ap  = 1;
  int       inc_u   = 1;
  int       inc_up  = 1;
  int       inc_v   = 1;
  int       inc_y   = 1;
  int       inc_z   = 1;

  for ( i = 0; i < b_alg; ++i )
  {
    scomplex* a10t      = buff_A  + (0  )*cs_A + (i  )*rs_A;
    scomplex* A20       = buff_A  + (0  )*cs_A + (i+1)*rs_A;
    scomplex* alpha11   = buff_A  + (i  )*cs_A + (i  )*rs_A;
    scomplex* a21       = buff_A  + (i  )*cs_A + (i+1)*rs_A;
    scomplex* A02       = buff_A  + (i+1)*cs_A + (0  )*rs_A;
    scomplex* a12t      = buff_A  + (i+1)*cs_A + (i  )*rs_A;
    scomplex* A22       = buff_A  + (i+1)*cs_A + (i+1)*rs_A;

    scomplex* t01       = buff_T  + (i  )*cs_T + (0  )*rs_T;
    scomplex* tau11     = buff_T  + (i  )*cs_T + (i  )*rs_T;

    scomplex* s01       = buff_S  + (i  )*cs_S + (0  )*rs_S;
    scomplex* sigma11   = buff_S  + (i  )*cs_S + (i  )*rs_S;

    scomplex* w21       = buff_w  + (i+1)*inc_w;

    scomplex* a12p      = buff_ap + (i+1)*inc_ap;

    scomplex* upsilon11 = buff_u  + (i  )*inc_u;
    scomplex* u21       = buff_u  + (i+1)*inc_u;

    scomplex* u21p      = buff_up + (i+1)*inc_up;

    scomplex* nu11      = buff_v  + (i  )*inc_v;
    scomplex* v21       = buff_v  + (i+1)*inc_v;

    scomplex* psi11     = buff_y  + (i  )*inc_y;
    scomplex* y21       = buff_y  + (i+1)*inc_y;

    scomplex* zeta11    = buff_z  + (i  )*inc_z;
    scomplex* z21       = buff_z  + (i+1)*inc_z;

    scomplex* a12p_t    = a12p    + (0  )*inc_ap;
    scomplex* a12p_b    = a12p    + (1  )*inc_ap;

    scomplex* v21_t     = v21     + (0  )*inc_v;
    scomplex* v21_b     = v21     + (1  )*inc_v;

    scomplex* a12t_l    = a12t    + (0  )*cs_A + (0  )*rs_A;
    scomplex* a12t_r    = a12t    + (1  )*cs_A + (0  )*rs_A;

    scomplex* A22_l     = A22     + (0  )*cs_A + (0  )*rs_A;

    int       m_ahead   = m_A - i - 1;
    int       n_ahead   = n_A - i - 1;
    int       m_behind  = i;
    int       n_behind  = i;

    /*------------------------------------------------------------*/

    if ( m_behind > 0 )
    {
      // FLA_Copy( upsilon11, minus_upsilon11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
      bl1_cmult3( buff_m1, upsilon11, &minus_upsilon11 );

      // FLA_Copy( zeta11, minus_zeta11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
      bl1_cmult3( buff_m1, zeta11, &minus_zeta11 );

      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
      bl1_ccopyconj( psi11, &minus_conj_psi11 );
      bl1_cscals( buff_m1, &minus_conj_psi11 );

      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
      bl1_ccopyconj( nu11, &minus_conj_nu11 );
      bl1_cscals( buff_m1, &minus_conj_nu11 );

      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11,  zeta11,    alpha11 );
      bl1_caxpyv( BLIS1_NO_CONJUGATE,
                  1,
                  &minus_conj_psi11,
                  upsilon11, 1,
                  alpha11,   1 );
      bl1_caxpyv( BLIS1_NO_CONJUGATE,
                  1,
                  &minus_conj_nu11,
                  zeta11,  1,
                  alpha11, 1 );

      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11,  z21, a21 );
      bl1_caxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_psi11,
                  u21, inc_u,
                  a21, rs_A );
      bl1_caxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_nu11,
                  z21, inc_z,
                  a21, rs_A );

      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11,    v21, a12t );
      bl1_caxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  &minus_upsilon11,
                  y21,  inc_y,
                  a12t, cs_A );
      bl1_caxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  &minus_zeta11,
                  v21,  inc_v,
                  a12t, cs_A );
    }

    // FLA_Househ2_UT( FLA_LEFT,
    //                 alpha11,
    //                 a21, tau11 );
    // FLA_Copy( a21, u21p );
    FLA_Househ2_UT_l_opc( m_ahead,
                          alpha11,
                          a21, rs_A,
                          tau11 );
    bl1_ccopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                a21,  rs_A,
                u21p, inc_up );

    if ( n_ahead > 0 )
    {
      // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
      bl1_cdiv3( buff_m1, tau11, &minus_inv_tau11 );

      // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
      // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
      bl1_ccopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12t, cs_A,
                  a12p, inc_ap );
      bl1_caxpyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  &minus_inv_tau11,
                  a12t, cs_A,
                  a12p, inc_ap );
    }

    if ( m_behind > 0 && n_ahead > 0 )
    {
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
      // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
      // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
      FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1( m_ahead,
                                            n_ahead,
                                            tau11,
                                            buff_m1,
                                            u21, inc_u,
                                            y21, inc_y,
                                            z21, inc_z,
                                            v21, inc_v,
                                            A22, rs_A, cs_A,
                                            u21p, inc_up,
                                            a12p, inc_ap,
                                            w21,  inc_w );
                                           
                                           
    }
    else if ( n_ahead > 0 )
    {
      // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
      // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
      FLA_Fused_Ahx_Axpy_Ax_opc_var1( m_ahead,
                                      n_ahead,
                                      tau11,
                                      buff_0,
                                      A22,  rs_A, cs_A,
                                      u21p, inc_up,
                                      a12p, inc_ap,
                                      y21,  inc_y,
                                      w21,  inc_w );
    }

    if ( n_ahead > 0 )
    {
      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
      bl1_caxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  buff_1,
                  a12t, cs_A,
                  y21,  inc_y );

      // FLA_Househ2s_UT( FLA_RIGHT,
      //                  a12p_t,
      //                  a12p_b,
      //                  alpha12, psi11_minus_alpha12, sigma11 );
      FLA_Househ2s_UT_r_opc( n_ahead - 1,
                             a12p_t,
                             a12p_b, inc_ap,
                             &alpha12,
                             &psi11_minus_alpha12,
                             sigma11 );

      // FLA_Copy( a12p, v21 );
      // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
      // FLA_Conjugate( v21_b );
      bl1_ccopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12p, inc_ap,
                  v21,  inc_v );
      bl1_cmult4( buff_m1, &alpha12, v21_t, v21_t );
      bl1_cinvscalv( BLIS1_NO_CONJUGATE,
                     n_ahead,
                     &psi11_minus_alpha12,
                     v21, inc_v );
      bl1_cconjv( n_ahead - 1,
                  v21_b, inc_v );

      // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
      // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
      *a12t_l = alpha12;
      bl1_ccopyv( BLIS1_NO_CONJUGATE,
                  n_ahead - 1,
                  v21_b,  inc_v,
                  a12t_r, cs_A );
    }

    // FLA_Copy( u21p, u21 );
    bl1_ccopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                u21p, inc_up,
                u21,  inc_u );

    if ( n_ahead > 0 )
    {
      // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
      // FLA_Scal( FLA_MINUS_ONE, beta );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
      bl1_cdot( BLIS1_CONJUGATE,
                n_ahead,
                y21, inc_y,
                v21, inc_v,
                &beta );
      bl1_cscals( &minus_inv_tau11, &beta );

      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
      // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
      bl1_ccopyconj( &alpha12, &minus_conj_alpha12 );
      bl1_cneg1( &minus_conj_alpha12 );

      // FLA_Copy( w21, z21 );
      // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
      // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
      // FLA_Axpy( beta, u21, z21 );
      bl1_ccopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  w21, inc_w,
                  z21, inc_z );
      bl1_caxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_alpha12,
                  A22_l, rs_A,
                  z21,   inc_z );
      bl1_cinvscalv( BLIS1_CONJUGATE,
                     m_ahead,
                     &psi11_minus_alpha12,
                     z21, inc_z );
      bl1_caxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &beta,
                  u21, inc_u,
                  z21, inc_z );

      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11,   y21 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
      bl1_cinvscalv( BLIS1_CONJUGATE,
                     n_ahead,
                     tau11,
                     y21, inc_y );
      bl1_cinvscalv( BLIS1_NO_CONJUGATE,
                     m_ahead,
                     sigma11,
                     z21, inc_z );

      // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
      bl1_cgemv( BLIS1_CONJ_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_1,
                 A02, rs_A, cs_A,
                 v21, inc_v,
                 buff_0,
                 s01, rs_S );
    }

    // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
    // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
    bl1_ccopyv( BLIS1_CONJUGATE,
                n_behind,
                a10t, cs_A,
                t01,  rs_T );
    bl1_cgemv( BLIS1_CONJ_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_ahead,
               n_behind,
               buff_1,
               A20, rs_A, cs_A,
               u21, inc_u,
               buff_1,
               t01, rs_T );

    if ( m_behind + 1 == b_alg && n_ahead > 0 )
    {
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
      bl1_cger( BLIS1_NO_CONJUGATE,
                BLIS1_CONJUGATE,
                m_ahead,
                n_ahead,
                buff_m1,
                u21, inc_u,
                y21, inc_y,
                A22, rs_A, cs_A );
      bl1_cger( BLIS1_NO_CONJUGATE,
                BLIS1_CONJUGATE,
                m_ahead,
                n_ahead,
                buff_m1,
                z21, inc_z,
                v21, inc_v,
                A22, rs_A, cs_A );
    }

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &w );
  // FLA_Obj_free( &ap );
  // FLA_Obj_free( &u );
  // FLA_Obj_free( &up );
  // FLA_Obj_free( &v );
  // FLA_Obj_free( &y );
  // FLA_Obj_free( &z );
  FLA_free( buff_w );
  FLA_free( buff_ap );
  FLA_free( buff_u );
  FLA_free( buff_up );
  FLA_free( buff_v );
  FLA_free( buff_y );
  FLA_free( buff_z );

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_step_ofc_var4 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_Y,
int  rs_Y,
int  cs_Y,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_caxpyv(), bl1_cconjv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cinvscalv(), bl1_csetm(), bl1_csetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_opc_var1(), FLA_Fused_UYx_ZVx_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_Househ2s_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_ofu_var4().

{
  scomplex* buff_1  = FLA_COMPLEX_PTR( FLA_ONE );
  scomplex* buff_0  = FLA_COMPLEX_PTR( FLA_ZERO );
  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );

  scomplex  alpha12;
  scomplex  minus_conj_alpha12;
  scomplex  psi11_minus_alpha12;
  scomplex  minus_inv_tau11;
  scomplex  beta;
  scomplex  last_elem;
  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_TS;

  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
  scomplex* buff_tmp = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  scomplex* buff_w  = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  scomplex* buff_al = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  scomplex* buff_ap = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  scomplex* buff_u  = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  scomplex* buff_up = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  scomplex* buff_v  = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  scomplex* buff_d  = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  scomplex* buff_e  = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  int       inc_tmp = 1;
  int       inc_w   = 1;
  int       inc_al  = 1;
  int       inc_ap  = 1;
  int       inc_u   = 1;
  int       inc_up  = 1;
  int       inc_v   = 1;
  int       inc_d   = 1;
  int       inc_e   = 1;

  // FLA_Set( FLA_ZERO, Y );
  // FLA_Set( FLA_ZERO, Z );
  bl1_csetm( n_A,
             b_alg,
             buff_0,
             buff_Y, rs_Y, cs_Y );
  bl1_csetm( m_A,
             b_alg,
             buff_0,
             buff_Z, rs_Z, cs_Z );

  for ( i = 0; i < b_alg; ++i )
  {
    scomplex* a10t      = buff_A  + (0  )*cs_A + (i  )*rs_A;
    scomplex* A20       = buff_A  + (0  )*cs_A + (i+1)*rs_A;
    scomplex* a01       = buff_A  + (i  )*cs_A + (0  )*rs_A;
    scomplex* alpha11   = buff_A  + (i  )*cs_A + (i  )*rs_A;
    scomplex* a21       = buff_A  + (i  )*cs_A + (i+1)*rs_A;
    scomplex* A02       = buff_A  + (i+1)*cs_A + (0  )*rs_A;
    scomplex* a12t      = buff_A  + (i+1)*cs_A + (i  )*rs_A;
    scomplex* A22       = buff_A  + (i+1)*cs_A + (i+1)*rs_A;

    scomplex* y10t      = buff_Y  + (0  )*cs_Y + (i  )*rs_Y;
    scomplex* Y20       = buff_Y  + (0  )*cs_Y + (i+1)*rs_Y;
    scomplex* y21       = buff_Y  + (i  )*cs_Y + (i+1)*rs_Y;

    scomplex* z10t      = buff_Z  + (0  )*cs_Z + (i  )*rs_Z;
    scomplex* Z20       = buff_Z  + (0  )*cs_Z + (i+1)*rs_Z;
    scomplex* z21       = buff_Z  + (i  )*cs_Z + (i+1)*rs_Z;

    scomplex* t01       = buff_T  + (i  )*cs_T + (0  )*rs_T;
    scomplex* tau11     = buff_T  + (i  )*cs_T + (i  )*rs_T;

    scomplex* s01       = buff_S  + (i  )*cs_S + (0  )*rs_S;
    scomplex* sigma11   = buff_S  + (i  )*cs_S + (i  )*rs_S;

    scomplex* tmp21     = buff_tmp + (i+1)*inc_tmp;

    scomplex* w21       = buff_w  + (i+1)*inc_w;

    scomplex* a22l      = buff_al + (i+1)*inc_al;

    scomplex* a12p      = buff_ap + (i+1)*inc_ap;

    scomplex* u21       = buff_u  + (i+1)*inc_u;

    scomplex* u21p      = buff_up + (i+1)*inc_up;

    scomplex* v21       = buff_v  + (i+1)*inc_v;

    scomplex* d0        = buff_d  + (0  )*inc_d;

    scomplex* e0        = buff_e  + (0  )*inc_e;

    scomplex* a12p_t    = a12p    + (0  )*inc_ap;
    scomplex* a12p_b    = a12p    + (1  )*inc_ap;

    scomplex* v21_t     = v21     + (0  )*inc_v;
    scomplex* v21_b     = v21     + (1  )*inc_v;

    scomplex* a01_b     = a01     + (0  )*cs_A + (i-1)*rs_A;

    scomplex* a12t_l    = a12t    + (0  )*cs_A + (0  )*rs_A;
    scomplex* a12t_r    = a12t    + (1  )*cs_A + (0  )*rs_A;

    scomplex* ABL       = a10t;
    scomplex* ZBL       = z10t;

    scomplex* a2        = alpha11;

    int       m_ahead   = m_A - i - 1;
    int       n_ahead   = n_A - i - 1;
    int       m_behind  = i;
    int       n_behind  = i;

    /*------------------------------------------------------------*/

    if ( m_behind > 0 )
    {
      // FLA_Copy( a01_b, last_elem );
      // FLA_Set( FLA_ONE, a01_b );
      last_elem = *a01_b;
      *a01_b = *buff_1;
    }

    // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
    // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01,  FLA_ONE, a2 );
    bl1_cgemv( BLIS1_NO_TRANSPOSE,
               BLIS1_CONJUGATE,
               m_ahead + 1,
               n_behind,
               buff_m1,
               ABL,  rs_A, cs_A,
               y10t, cs_Y,
               buff_1,
               a2,   rs_A );
    bl1_cgemv( BLIS1_NO_TRANSPOSE,
               BLIS1_CONJUGATE,
               m_ahead + 1,
               n_behind,
               buff_m1,
               ZBL, rs_Z, cs_Z,
               a01, rs_A,
               buff_1,
               a2,  rs_A );

    // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
    // FLA_Gemv( FLA_CONJ_TRANSPOSE,    FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
    bl1_cgemv( BLIS1_CONJ_NO_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               n_ahead,
               n_behind,
               buff_m1,
               Y20,  rs_Y, cs_Y,
               a10t, cs_A,
               buff_1,
               a12t, cs_A );
    bl1_cgemv( BLIS1_CONJ_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_behind,
               n_ahead,
               buff_m1,
               A02,  rs_A, cs_A,
               z10t, cs_Z,
               buff_1,
               a12t, cs_A );

    if ( m_behind > 0 )
    {
      // FLA_Copy( last_elem, a01_b );
      *a01_b = last_elem;
    }

    // FLA_Househ2_UT( FLA_LEFT,
    //                 alpha11,
    //                 a21, tau11 );
    // FLA_Copy( a21, u21p );
    FLA_Househ2_UT_l_opc( m_ahead,
                          alpha11,
                          a21, rs_A,
                          tau11 );
    bl1_ccopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                a21,  rs_A,
                u21p, inc_up );

    if ( n_ahead > 0 )
    {
      // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
      bl1_cdiv3( buff_m1, tau11, &minus_inv_tau11 );

      // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
      // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
      bl1_ccopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12t, cs_A,
                  a12p, inc_ap );
      bl1_caxpyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  &minus_inv_tau11,
                  a12t, cs_A,
                  a12p, inc_ap );

      // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
      // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
      bl1_cgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 A20,  rs_A, cs_A,
                 u21p, inc_up,
                 buff_0,
                 d0,   inc_d );
      bl1_cgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 Z20,  rs_Z, cs_Z,
                 u21p, inc_up,
                 buff_0,
                 e0,   inc_e );

      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
      // FLA_Axpy( FLA_ONE, d0, t01 );
      bl1_ccopyv( BLIS1_CONJUGATE,
                  n_behind,
                  a10t, cs_A,
                  t01,  rs_T );
      bl1_caxpyv( BLIS1_NO_CONJUGATE,
                  n_behind,
                  buff_1,
                  d0,  inc_d,
                  t01, rs_T );

      // FLA_Set( FLA_ZERO, y21 );
      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
      // FLA_Gemv( FLA_TRANSPOSE,    FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
      bl1_csetv( n_ahead,
                 buff_0,
                 y21, rs_Y );
      bl1_cgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 n_ahead,
                 n_behind,
                 buff_m1,
                 Y20, rs_Y, cs_Y,
                 d0,  inc_d,
                 buff_1,
                 y21, rs_Y );
      bl1_cgemv( BLIS1_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_m1,
                 A02, rs_A, cs_A,
                 e0,  inc_e,
                 buff_1,
                 y21, rs_Y );

      // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
      // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
      FLA_Fused_Ahx_Axpy_Ax_opc_var1( m_ahead,
                                      n_ahead,
                                      tau11,
                                      buff_1,
                                      A22,  rs_A, cs_A,
                                      u21p, inc_up,
                                      a12p, inc_ap,
                                      y21,  rs_Y,
                                      w21,  inc_w );

      // FLA_Gemvc( FLA_CONJ_TRANSPOSE,    FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
      // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
      // FLA_Copy( A22_l, a22l );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
      // FLA_Copy( g0, s01 );
      FLA_Fused_UYx_ZVx_opc_var1( m_ahead,
                                  n_behind,
                                  m_behind,
                                  n_ahead,
                                  buff_m1,
                                  A20, rs_A, cs_A,
                                  Y20, rs_Y, cs_Y,
                                  Z20, rs_Z, cs_Z,
                                  A02, rs_A, cs_A,
                                  A22, rs_A, cs_A,
                                  tmp21, inc_tmp, 
                                  s01,  rs_S, 
                                  a12p, inc_ap, 
                                  w21,  inc_w, 
                                  a22l, inc_al );

      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
      bl1_caxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  buff_1, 
                  a12t, cs_A,
                  y21,  rs_Y );

      // FLA_Househ2s_UT( FLA_RIGHT,
      //                  a12p_t,
      //                  a12p_b,
      //                  alpha12, psi11_minus_alpha12, sigma11 );
      FLA_Househ2s_UT_r_opc( n_ahead - 1,
                             a12p_t,
                             a12p_b, inc_ap,
                             &alpha12,
                             &psi11_minus_alpha12,
                             sigma11 );

      // FLA_Copy( a12p, v21 );
      // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
      // FLA_Conjugate( v21_b );
      bl1_ccopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12p, inc_ap,
                  v21,  inc_v );
      bl1_cmult4( buff_m1, &alpha12, v21_t, v21_t );
      bl1_cinvscalv( BLIS1_NO_CONJUGATE,
                     n_ahead,
                     &psi11_minus_alpha12,
                     v21, inc_v );
      bl1_cconjv( n_ahead - 1,
                  v21_b, inc_v );

      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
      // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
      bl1_ccopyconj( &alpha12, &minus_conj_alpha12 );
      bl1_cneg1( &minus_conj_alpha12 );

      // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
      // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
      bl1_caxpyv( BLIS1_CONJUGATE,
                  n_behind,
                  &minus_conj_alpha12,
                  A02, rs_A,
                  s01, rs_S );
      bl1_cinvscalv( BLIS1_CONJUGATE,
                     n_behind,
                     &psi11_minus_alpha12,
                     s01, rs_S );

      // FLA_Copy( alpha12, a12t_l );
      // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
      *a12t_l = alpha12;
      bl1_ccopyv( BLIS1_NO_CONJUGATE,
                  n_ahead - 1,
                  v21_b,  inc_v,
                  a12t_r, cs_A );
    }

    // FLA_Copy( u21p, u21 );
    bl1_ccopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                u21p, inc_up,
                u21,  inc_u );

    if ( n_ahead > 0 )
    {
      // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
      // FLA_Scal( FLA_MINUS_ONE, beta );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
      bl1_cdot( BLIS1_CONJUGATE,
                n_ahead,
                y21, rs_Y,
                v21, inc_v,
                &beta );
      bl1_cscals( &minus_inv_tau11, &beta );

      // FLA_Copy( w21, z21 );
      // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
      // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
      // FLA_Axpy( beta, u21, z21 );
      bl1_ccopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  w21, inc_w,
                  z21, rs_Z );
      bl1_caxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_alpha12,
                  a22l, inc_al,
                  z21,  rs_Z );
      bl1_cinvscalv( BLIS1_CONJUGATE,
                     m_ahead,
                     &psi11_minus_alpha12,
                     z21, rs_Z );
      bl1_caxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &beta,
                  u21, inc_u,
                  z21, rs_Z );

      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11,   y21 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
      bl1_cinvscalv( BLIS1_NO_CONJUGATE,
                     n_ahead,
                     tau11,
                     y21, rs_Y );
      bl1_cinvscalv( BLIS1_NO_CONJUGATE,
                     m_ahead,
                     sigma11,
                     z21, rs_Z );
    }
    else // if ( n_ahead == 0 )
    {
      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
      bl1_ccopyv( BLIS1_CONJUGATE,
                  n_behind,
                  a10t, cs_A,
                  t01,  rs_T );
      bl1_cgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 A20, rs_A, cs_A,
                 u21, inc_u,
                 buff_1,
                 t01, rs_T );
    }

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &w );
  // FLA_Obj_free( &al );
  // FLA_Obj_free( &ap );
  // FLA_Obj_free( &u );
  // FLA_Obj_free( &up );
  // FLA_Obj_free( &v );
  // FLA_Obj_free( &d );
  // FLA_Obj_free( &e );
  FLA_free( buff_tmp );
  FLA_free( buff_w );
  FLA_free( buff_al );
  FLA_free( buff_ap );
  FLA_free( buff_u );
  FLA_free( buff_up );
  FLA_free( buff_v );
  FLA_free( buff_d );
  FLA_free( buff_e );

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_step_ofd_var2 ( int  m_A,
int  n_A,
int  m_TS,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_T,
int  rs_T,
int  cs_T,
double *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dinvscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Fused_Gerc2_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_Househ2_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_ofu_var2().

{
  double*   buff_1  = FLA_DOUBLE_PTR( FLA_ONE );
  double*   buff_0  = FLA_DOUBLE_PTR( FLA_ZERO );
  double*   buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );

  double    beta;
  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_TS;

  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
  double*   buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  double*   buff_y = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  double*   buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  int       inc_v  = 1;
  int       inc_y  = 1;
  int       inc_z  = 1;

  for ( i = 0; i < b_alg; ++i )
  {
    double*   a10t     = buff_A + (0  )*cs_A + (i  )*rs_A;
    double*   A20      = buff_A + (0  )*cs_A + (i+1)*rs_A;
    double*   alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
    double*   a21      = buff_A + (i  )*cs_A + (i+1)*rs_A;
    double*   A02      = buff_A + (i+1)*cs_A + (0  )*rs_A;
    double*   a12t     = buff_A + (i+1)*cs_A + (i  )*rs_A;
    double*   A22      = buff_A + (i+1)*cs_A + (i+1)*rs_A;

    double*   t01      = buff_T + (i  )*cs_T + (0  )*rs_T;
    double*   tau11    = buff_T + (i  )*cs_T + (i  )*rs_T;

    double*   s01      = buff_S + (i  )*cs_S + (0  )*rs_S;
    double*   sigma11  = buff_S + (i  )*cs_S + (i  )*rs_S;

    double*   v21      = buff_v + (i+1)*inc_v;

    double*   y21      = buff_y + (i+1)*inc_y;

    double*   z21      = buff_z + (i+1)*inc_z;

    double*   a12t_l   = a12t   + (0  )*cs_A + (0  )*rs_A;
    double*   a12t_r   = a12t   + (1  )*cs_A + (0  )*rs_A;

    double*   v21_t    = v21    + (0  )*inc_v;
    double*   v21_b    = v21    + (1  )*inc_v;

    int       m_ahead  = m_A - i - 1;
    int       n_ahead  = n_A - i - 1;
    int       m_behind = i;
    int       n_behind = i;

    /*------------------------------------------------------------*/

    // FLA_Househ2_UT( FLA_LEFT,
    //                 alpha11,
    //                 a21, tau11 );
    FLA_Househ2_UT_l_opd( m_ahead,
                          alpha11,
                          a21, rs_A,
                          tau11 );

    if ( n_ahead > 0 )
    {
      // FLA_Copyt( FLA_TRANSPOSE, a12t, y21 );
      // FLA_Gemvc( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
      bl1_dcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12t, cs_A,
                  y21,  inc_y );
      bl1_dgemv( BLIS1_TRANSPOSE,
                 BLIS1_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22, rs_A, cs_A,
                 a21, rs_A,
                 buff_1,
                 y21, inc_y );

      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
      bl1_dinvscalv( BLIS1_NO_CONJUGATE,
                     n_ahead,
                     tau11,
                     y21, inc_y );

      // FLA_Axpyt( FLA_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
      bl1_daxpyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  buff_m1,
                  y21,  inc_y,
                  a12t, cs_A );

      // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
      FLA_Househ2_UT_r_opd( n_ahead - 1,
                            a12t_l,
                            a12t_r, cs_A,
                            sigma11 );

      // FLA_Set( FLA_ONE, v21_t );
      // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
      *v21_t = *buff_1;
      bl1_dcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead - 1,
                  a12t_r, cs_A,
                  v21_b,  inc_y );

      // FLA_Dotc( FLA_CONJUGATE, v21, y21, beta );
      // FLA_Scal( FLA_MINUS_ONE, beta );
      bl1_ddot( BLIS1_CONJUGATE,
                n_ahead,
                v21, inc_v,
                y21, inc_y,
                &beta );
      bl1_dneg1( &beta );

      // FLA_Copy( a21, z21 );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
      bl1_dcopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  a21, rs_A,
                  z21, inc_z );
      bl1_dgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22, rs_A, cs_A,
                 v21, inc_v,
                 &beta,
                 z21, inc_z );
      bl1_dinvscalv( BLIS1_NO_CONJUGATE,
                     m_ahead,
                     sigma11,
                     z21, inc_z );

      // FLA_Ger( FLA_MINUS_ONE, a21, y21, A22 );
      // FLA_Ger( FLA_MINUS_ONE, z21, v21, A22 );
      FLA_Fused_Gerc2_opd_var1( m_ahead,
                                n_ahead,
                                buff_m1,
                                a21, rs_A,
                                y21, inc_y,
                                z21, inc_z,
                                v21, inc_v,
                                A22, rs_A, cs_A );

      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
      bl1_dgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_1,
                 A02, rs_A, cs_A,
                 v21, inc_v,
                 buff_0,
                 s01, rs_S );
    }

    // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
    // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
    bl1_dcopyv( BLIS1_CONJUGATE,
                n_behind,
                a10t, cs_A,
                t01,  rs_T );
    bl1_dgemv( BLIS1_CONJ_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_ahead,
               n_behind,
               buff_1,
               A20, rs_A, cs_A,
               a21, rs_A,
               buff_1,
               t01, rs_T );

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &v );
  // FLA_Obj_free( &y );
  // FLA_Obj_free( &z );
  FLA_free( buff_v );
  FLA_free( buff_y );
  FLA_free( buff_z );

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_step_ofd_var3 ( int  m_A,
int  n_A,
int  m_TS,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_T,
int  rs_T,
int  cs_T,
double *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_daxpyv(), bl1_dconjv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_Househ2s_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_ofu_var3().

{
  double*   buff_1  = FLA_DOUBLE_PTR( FLA_ONE );
  double*   buff_0  = FLA_DOUBLE_PTR( FLA_ZERO );
  double*   buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );

  double    alpha12;
  double    minus_conj_alpha12;
  double    psi11_minus_alpha12;
  double    minus_inv_tau11;
  double    minus_upsilon11;
  double    minus_conj_nu11;
  double    minus_conj_psi11;
  double    minus_zeta11;
  double    beta;
  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_TS;

  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
  double*   buff_w  = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  double*   buff_ap = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  double*   buff_u  = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  double*   buff_up = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  double*   buff_v  = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  double*   buff_y  = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  double*   buff_z  = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  int       inc_w   = 1;
  int       inc_ap  = 1;
  int       inc_u   = 1;
  int       inc_up  = 1;
  int       inc_v   = 1;
  int       inc_y   = 1;
  int       inc_z   = 1;

  for ( i = 0; i < b_alg; ++i )
  {
    double*   a10t      = buff_A  + (0  )*cs_A + (i  )*rs_A;
    double*   A20       = buff_A  + (0  )*cs_A + (i+1)*rs_A;
    double*   alpha11   = buff_A  + (i  )*cs_A + (i  )*rs_A;
    double*   a21       = buff_A  + (i  )*cs_A + (i+1)*rs_A;
    double*   A02       = buff_A  + (i+1)*cs_A + (0  )*rs_A;
    double*   a12t      = buff_A  + (i+1)*cs_A + (i  )*rs_A;
    double*   A22       = buff_A  + (i+1)*cs_A + (i+1)*rs_A;

    double*   t01       = buff_T  + (i  )*cs_T + (0  )*rs_T;
    double*   tau11     = buff_T  + (i  )*cs_T + (i  )*rs_T;

    double*   s01       = buff_S  + (i  )*cs_S + (0  )*rs_S;
    double*   sigma11   = buff_S  + (i  )*cs_S + (i  )*rs_S;

    double*   w21       = buff_w  + (i+1)*inc_w;

    double*   a12p      = buff_ap + (i+1)*inc_ap;

    double*   upsilon11 = buff_u  + (i  )*inc_u;
    double*   u21       = buff_u  + (i+1)*inc_u;

    double*   u21p      = buff_up + (i+1)*inc_up;

    double*   nu11      = buff_v  + (i  )*inc_v;
    double*   v21       = buff_v  + (i+1)*inc_v;

    double*   psi11     = buff_y  + (i  )*inc_y;
    double*   y21       = buff_y  + (i+1)*inc_y;

    double*   zeta11    = buff_z  + (i  )*inc_z;
    double*   z21       = buff_z  + (i+1)*inc_z;

    double*   a12p_t    = a12p    + (0  )*inc_ap;
    double*   a12p_b    = a12p    + (1  )*inc_ap;

    double*   v21_t     = v21     + (0  )*inc_v;
    double*   v21_b     = v21     + (1  )*inc_v;

    double*   a12t_l    = a12t    + (0  )*cs_A + (0  )*rs_A;
    double*   a12t_r    = a12t    + (1  )*cs_A + (0  )*rs_A;

    double*   A22_l     = A22     + (0  )*cs_A + (0  )*rs_A;

    int       m_ahead   = m_A - i - 1;
    int       n_ahead   = n_A - i - 1;
    int       m_behind  = i;
    int       n_behind  = i;

    /*------------------------------------------------------------*/

    if ( m_behind > 0 )
    {
      // FLA_Copy( upsilon11, minus_upsilon11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
      bl1_dmult3( buff_m1, upsilon11, &minus_upsilon11 );

      // FLA_Copy( zeta11, minus_zeta11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
      bl1_dmult3( buff_m1, zeta11, &minus_zeta11 );

      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
      bl1_dcopyconj( psi11, &minus_conj_psi11 );
      bl1_dscals( buff_m1, &minus_conj_psi11 );

      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
      bl1_dcopyconj( nu11, &minus_conj_nu11 );
      bl1_dscals( buff_m1, &minus_conj_nu11 );

      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11,  zeta11,    alpha11 );
      bl1_daxpyv( BLIS1_NO_CONJUGATE,
                  1,
                  &minus_conj_psi11,
                  upsilon11, 1,
                  alpha11,   1 );
      bl1_daxpyv( BLIS1_NO_CONJUGATE,
                  1,
                  &minus_conj_nu11,
                  zeta11,  1,
                  alpha11, 1 );

      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11,  z21, a21 );
      bl1_daxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_psi11,
                  u21, inc_u,
                  a21, rs_A );
      bl1_daxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_nu11,
                  z21, inc_z,
                  a21, rs_A );

      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11,    v21, a12t );
      bl1_daxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  &minus_upsilon11,
                  y21,  inc_y,
                  a12t, cs_A );
      bl1_daxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  &minus_zeta11,
                  v21,  inc_v,
                  a12t, cs_A );
    }

    // FLA_Househ2_UT( FLA_LEFT,
    //                 alpha11,
    //                 a21, tau11 );
    // FLA_Copy( a21, u21p );
    FLA_Househ2_UT_l_opd( m_ahead,
                          alpha11,
                          a21, rs_A,
                          tau11 );
    bl1_dcopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                a21,  rs_A,
                u21p, inc_up );

    if ( n_ahead > 0 )
    {
      // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
      bl1_ddiv3( buff_m1, tau11, &minus_inv_tau11 );

      // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
      // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
      bl1_dcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12t, cs_A,
                  a12p, inc_ap );
      bl1_daxpyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  &minus_inv_tau11,
                  a12t, cs_A,
                  a12p, inc_ap );
    }

    if ( m_behind > 0 && n_ahead > 0 )
    {
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
      // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
      // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
      FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1( m_ahead,
                                            n_ahead,
                                            tau11,
                                            buff_m1,
                                            u21, inc_u,
                                            y21, inc_y,
                                            z21, inc_z,
                                            v21, inc_v,
                                            A22, rs_A, cs_A,
                                            u21p, inc_up,
                                            a12p, inc_ap,
                                            w21,  inc_w );
                                           
                                           
    }
    else if ( n_ahead > 0 )
    {
      // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
      // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
      FLA_Fused_Ahx_Axpy_Ax_opd_var1( m_ahead,
                                      n_ahead,
                                      tau11,
                                      buff_0,
                                      A22,  rs_A, cs_A,
                                      u21p, inc_up,
                                      a12p, inc_ap,
                                      y21,  inc_y,
                                      w21,  inc_w );
    }

    if ( n_ahead > 0 )
    {
      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
      bl1_daxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  buff_1,
                  a12t, cs_A,
                  y21,  inc_y );

      // FLA_Househ2s_UT( FLA_RIGHT,
      //                  a12p_t,
      //                  a12p_b,
      //                  alpha12, psi11_minus_alpha12, sigma11 );
      FLA_Househ2s_UT_r_opd( n_ahead - 1,
                             a12p_t,
                             a12p_b, inc_ap,
                             &alpha12,
                             &psi11_minus_alpha12,
                             sigma11 );

      // FLA_Copy( a12p, v21 );
      // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
      // FLA_Conjugate( v21_b );
      bl1_dcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12p, inc_ap,
                  v21,  inc_v );
      bl1_dmult4( buff_m1, &alpha12, v21_t, v21_t );
      bl1_dinvscalv( BLIS1_NO_CONJUGATE,
                     n_ahead,
                     &psi11_minus_alpha12,
                     v21, inc_v );
      bl1_dconjv( n_ahead - 1,
                  v21_b, inc_v );

      // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
      // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
      *a12t_l = alpha12;
      bl1_dcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead - 1,
                  v21_b,  inc_v,
                  a12t_r, cs_A );
    }

    // FLA_Copy( u21p, u21 );
    bl1_dcopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                u21p, inc_up,
                u21,  inc_u );

    if ( n_ahead > 0 )
    {
      // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
      // FLA_Scal( FLA_MINUS_ONE, beta );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
      bl1_ddot( BLIS1_CONJUGATE,
                n_ahead,
                y21, inc_y,
                v21, inc_v,
                &beta );
      bl1_dscals( &minus_inv_tau11, &beta );

      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
      // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
      bl1_dcopyconj( &alpha12, &minus_conj_alpha12 );
      bl1_dneg1( &minus_conj_alpha12 );

      // FLA_Copy( w21, z21 );
      // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
      // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
      // FLA_Axpy( beta, u21, z21 );
      bl1_dcopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  w21, inc_w,
                  z21, inc_z );
      bl1_daxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_alpha12,
                  A22_l, rs_A,
                  z21,   inc_z );
      bl1_dinvscalv( BLIS1_CONJUGATE,
                     m_ahead,
                     &psi11_minus_alpha12,
                     z21, inc_z );
      bl1_daxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &beta,
                  u21, inc_u,
                  z21, inc_z );

      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11,   y21 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
      bl1_dinvscalv( BLIS1_CONJUGATE,
                     n_ahead,
                     tau11,
                     y21, inc_y );
      bl1_dinvscalv( BLIS1_NO_CONJUGATE,
                     m_ahead,
                     sigma11,
                     z21, inc_z );

      // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
      bl1_dgemv( BLIS1_CONJ_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_1,
                 A02, rs_A, cs_A,
                 v21, inc_v,
                 buff_0,
                 s01, rs_S );
    }

    // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
    // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
    bl1_dcopyv( BLIS1_CONJUGATE,
                n_behind,
                a10t, cs_A,
                t01,  rs_T );
    bl1_dgemv( BLIS1_CONJ_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_ahead,
               n_behind,
               buff_1,
               A20, rs_A, cs_A,
               u21, inc_u,
               buff_1,
               t01, rs_T );

    if ( m_behind + 1 == b_alg && n_ahead > 0 )
    {
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
      bl1_dger( BLIS1_NO_CONJUGATE,
                BLIS1_CONJUGATE,
                m_ahead,
                n_ahead,
                buff_m1,
                u21, inc_u,
                y21, inc_y,
                A22, rs_A, cs_A );
      bl1_dger( BLIS1_NO_CONJUGATE,
                BLIS1_CONJUGATE,
                m_ahead,
                n_ahead,
                buff_m1,
                z21, inc_z,
                v21, inc_v,
                A22, rs_A, cs_A );
    }

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &w );
  // FLA_Obj_free( &ap );
  // FLA_Obj_free( &u );
  // FLA_Obj_free( &up );
  // FLA_Obj_free( &v );
  // FLA_Obj_free( &y );
  // FLA_Obj_free( &z );
  FLA_free( buff_w );
  FLA_free( buff_ap );
  FLA_free( buff_u );
  FLA_free( buff_up );
  FLA_free( buff_v );
  FLA_free( buff_y );
  FLA_free( buff_z );

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_step_ofd_var4 ( int  m_A,
int  n_A,
int  m_TS,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_Y,
int  rs_Y,
int  cs_Y,
double *  buff_Z,
int  rs_Z,
int  cs_Z,
double *  buff_T,
int  rs_T,
int  cs_T,
double *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_daxpyv(), bl1_dconjv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dinvscalv(), bl1_dsetm(), bl1_dsetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_UYx_ZVx_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_Househ2s_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_ofu_var4().

{
  double*   buff_1  = FLA_DOUBLE_PTR( FLA_ONE );
  double*   buff_0  = FLA_DOUBLE_PTR( FLA_ZERO );
  double*   buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );

  double    alpha12;
  double    minus_conj_alpha12;
  double    psi11_minus_alpha12;
  double    minus_inv_tau11;
  double    beta;
  double    last_elem;
  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_TS;

  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
  double*   buff_tmp = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  double*   buff_w  = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  double*   buff_al = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  double*   buff_ap = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  double*   buff_u  = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  double*   buff_up = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  double*   buff_v  = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  double*   buff_d  = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  double*   buff_e  = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  int       inc_tmp = 1;
  int       inc_w   = 1;
  int       inc_al  = 1;
  int       inc_ap  = 1;
  int       inc_u   = 1;
  int       inc_up  = 1;
  int       inc_v   = 1;
  int       inc_d   = 1;
  int       inc_e   = 1;

  // FLA_Set( FLA_ZERO, Y );
  // FLA_Set( FLA_ZERO, Z );
  bl1_dsetm( n_A,
             b_alg,
             buff_0,
             buff_Y, rs_Y, cs_Y );
  bl1_dsetm( m_A,
             b_alg,
             buff_0,
             buff_Z, rs_Z, cs_Z );

  for ( i = 0; i < b_alg; ++i )
  {
    double*   a10t      = buff_A  + (0  )*cs_A + (i  )*rs_A;
    double*   A20       = buff_A  + (0  )*cs_A + (i+1)*rs_A;
    double*   a01       = buff_A  + (i  )*cs_A + (0  )*rs_A;
    double*   alpha11   = buff_A  + (i  )*cs_A + (i  )*rs_A;
    double*   a21       = buff_A  + (i  )*cs_A + (i+1)*rs_A;
    double*   A02       = buff_A  + (i+1)*cs_A + (0  )*rs_A;
    double*   a12t      = buff_A  + (i+1)*cs_A + (i  )*rs_A;
    double*   A22       = buff_A  + (i+1)*cs_A + (i+1)*rs_A;

    double*   y10t      = buff_Y  + (0  )*cs_Y + (i  )*rs_Y;
    double*   Y20       = buff_Y  + (0  )*cs_Y + (i+1)*rs_Y;
    double*   y21       = buff_Y  + (i  )*cs_Y + (i+1)*rs_Y;

    double*   z10t      = buff_Z  + (0  )*cs_Z + (i  )*rs_Z;
    double*   Z20       = buff_Z  + (0  )*cs_Z + (i+1)*rs_Z;
    double*   z21       = buff_Z  + (i  )*cs_Z + (i+1)*rs_Z;

    double*   t01       = buff_T  + (i  )*cs_T + (0  )*rs_T;
    double*   tau11     = buff_T  + (i  )*cs_T + (i  )*rs_T;

    double*   s01       = buff_S  + (i  )*cs_S + (0  )*rs_S;
    double*   sigma11   = buff_S  + (i  )*cs_S + (i  )*rs_S;

    double*   tmp21     = buff_tmp + (i+1)*inc_tmp;

    double*   w21       = buff_w  + (i+1)*inc_w;

    double*   a22l      = buff_al + (i+1)*inc_al;

    double*   a12p      = buff_ap + (i+1)*inc_ap;

    double*   u21       = buff_u  + (i+1)*inc_u;

    double*   u21p      = buff_up + (i+1)*inc_up;

    double*   v21       = buff_v  + (i+1)*inc_v;

    double*   d0        = buff_d  + (0  )*inc_d;

    double*   e0        = buff_e  + (0  )*inc_e;

    double*   a12p_t    = a12p    + (0  )*inc_ap;
    double*   a12p_b    = a12p    + (1  )*inc_ap;

    double*   v21_t     = v21     + (0  )*inc_v;
    double*   v21_b     = v21     + (1  )*inc_v;

    double*   a01_b     = a01     + (0  )*cs_A + (i-1)*rs_A;

    double*   a12t_l    = a12t    + (0  )*cs_A + (0  )*rs_A;
    double*   a12t_r    = a12t    + (1  )*cs_A + (0  )*rs_A;

    double*   ABL       = a10t;
    double*   ZBL       = z10t;

    double*   a2        = alpha11;

    int       m_ahead   = m_A - i - 1;
    int       n_ahead   = n_A - i - 1;
    int       m_behind  = i;
    int       n_behind  = i;

    /*------------------------------------------------------------*/

    if ( m_behind > 0 )
    {
      // FLA_Copy( a01_b, last_elem );
      // FLA_Set( FLA_ONE, a01_b );
      last_elem = *a01_b;
      *a01_b = *buff_1;
    }

    // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
    // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01,  FLA_ONE, a2 );
    bl1_dgemv( BLIS1_NO_TRANSPOSE,
               BLIS1_CONJUGATE,
               m_ahead + 1,
               n_behind,
               buff_m1,
               ABL,  rs_A, cs_A,
               y10t, cs_Y,
               buff_1,
               a2,   rs_A );
    bl1_dgemv( BLIS1_NO_TRANSPOSE,
               BLIS1_CONJUGATE,
               m_ahead + 1,
               n_behind,
               buff_m1,
               ZBL, rs_Z, cs_Z,
               a01, rs_A,
               buff_1,
               a2,  rs_A );

    // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
    // FLA_Gemv( FLA_CONJ_TRANSPOSE,    FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
    bl1_dgemv( BLIS1_CONJ_NO_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               n_ahead,
               n_behind,
               buff_m1,
               Y20,  rs_Y, cs_Y,
               a10t, cs_A,
               buff_1,
               a12t, cs_A );
    bl1_dgemv( BLIS1_CONJ_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_behind,
               n_ahead,
               buff_m1,
               A02,  rs_A, cs_A,
               z10t, cs_Z,
               buff_1,
               a12t, cs_A );

    if ( m_behind > 0 )
    {
      // FLA_Copy( last_elem, a01_b );
      *a01_b = last_elem;
    }

    // FLA_Househ2_UT( FLA_LEFT,
    //                 alpha11,
    //                 a21, tau11 );
    // FLA_Copy( a21, u21p );
    FLA_Househ2_UT_l_opd( m_ahead,
                          alpha11,
                          a21, rs_A,
                          tau11 );
    bl1_dcopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                a21,  rs_A,
                u21p, inc_up );

    if ( n_ahead > 0 )
    {
      // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
      bl1_ddiv3( buff_m1, tau11, &minus_inv_tau11 );

      // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
      // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
      bl1_dcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12t, cs_A,
                  a12p, inc_ap );
      bl1_daxpyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  &minus_inv_tau11,
                  a12t, cs_A,
                  a12p, inc_ap );

      // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
      // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
      bl1_dgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 A20,  rs_A, cs_A,
                 u21p, inc_up,
                 buff_0,
                 d0,   inc_d );
      bl1_dgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 Z20,  rs_Z, cs_Z,
                 u21p, inc_up,
                 buff_0,
                 e0,   inc_e );

      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
      // FLA_Axpy( FLA_ONE, d0, t01 );
      bl1_dcopyv( BLIS1_CONJUGATE,
                  n_behind,
                  a10t, cs_A,
                  t01,  rs_T );
      bl1_daxpyv( BLIS1_NO_CONJUGATE,
                  n_behind,
                  buff_1,
                  d0,  inc_d,
                  t01, rs_T );

      // FLA_Set( FLA_ZERO, y21 );
      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
      // FLA_Gemv( FLA_TRANSPOSE,    FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
      bl1_dsetv( n_ahead,
                 buff_0,
                 y21, rs_Y );
      bl1_dgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 n_ahead,
                 n_behind,
                 buff_m1,
                 Y20, rs_Y, cs_Y,
                 d0,  inc_d,
                 buff_1,
                 y21, rs_Y );
      bl1_dgemv( BLIS1_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_m1,
                 A02, rs_A, cs_A,
                 e0,  inc_e,
                 buff_1,
                 y21, rs_Y );

      // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
      // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
      FLA_Fused_Ahx_Axpy_Ax_opd_var1( m_ahead,
                                      n_ahead,
                                      tau11,
                                      buff_1,
                                      A22,  rs_A, cs_A,
                                      u21p, inc_up,
                                      a12p, inc_ap,
                                      y21,  rs_Y,
                                      w21,  inc_w );

      // FLA_Gemvc( FLA_CONJ_TRANSPOSE,    FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
      // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
      // FLA_Copy( A22_l, a22l );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
      // FLA_Copy( g0, s01 );
      FLA_Fused_UYx_ZVx_opd_var1( m_ahead,
                                  n_behind,
                                  m_behind,
                                  n_ahead,
                                  buff_m1,
                                  A20, rs_A, cs_A,
                                  Y20, rs_Y, cs_Y,
                                  Z20, rs_Z, cs_Z,
                                  A02, rs_A, cs_A,
                                  A22, rs_A, cs_A,
                                  tmp21, inc_tmp, 
                                  s01,  rs_S, 
                                  a12p, inc_ap, 
                                  w21,  inc_w, 
                                  a22l, inc_al );

      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
      bl1_daxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  buff_1, 
                  a12t, cs_A,
                  y21,  rs_Y );

      // FLA_Househ2s_UT( FLA_RIGHT,
      //                  a12p_t,
      //                  a12p_b,
      //                  alpha12, psi11_minus_alpha12, sigma11 );
      FLA_Househ2s_UT_r_opd( n_ahead - 1,
                             a12p_t,
                             a12p_b, inc_ap,
                             &alpha12,
                             &psi11_minus_alpha12,
                             sigma11 );

      // FLA_Copy( a12p, v21 );
      // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
      // FLA_Conjugate( v21_b );
      bl1_dcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12p, inc_ap,
                  v21,  inc_v );
      bl1_dmult4( buff_m1, &alpha12, v21_t, v21_t );
      bl1_dinvscalv( BLIS1_NO_CONJUGATE,
                     n_ahead,
                     &psi11_minus_alpha12,
                     v21, inc_v );
      bl1_dconjv( n_ahead - 1,
                  v21_b, inc_v );

      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
      // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
      bl1_dcopyconj( &alpha12, &minus_conj_alpha12 );
      bl1_dneg1( &minus_conj_alpha12 );

      // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
      // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
      bl1_daxpyv( BLIS1_CONJUGATE,
                  n_behind,
                  &minus_conj_alpha12,
                  A02, rs_A,
                  s01, rs_S );
      bl1_dinvscalv( BLIS1_CONJUGATE,
                     n_behind,
                     &psi11_minus_alpha12,
                     s01, rs_S );

      // FLA_Copy( alpha12, a12t_l );
      // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
      *a12t_l = alpha12;
      bl1_dcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead - 1,
                  v21_b,  inc_v,
                  a12t_r, cs_A );
    }

    // FLA_Copy( u21p, u21 );
    bl1_dcopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                u21p, inc_up,
                u21,  inc_u );

    if ( n_ahead > 0 )
    {
      // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
      // FLA_Scal( FLA_MINUS_ONE, beta );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
      bl1_ddot( BLIS1_CONJUGATE,
                n_ahead,
                y21, rs_Y,
                v21, inc_v,
                &beta );
      bl1_dscals( &minus_inv_tau11, &beta );

      // FLA_Copy( w21, z21 );
      // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
      // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
      // FLA_Axpy( beta, u21, z21 );
      bl1_dcopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  w21, inc_w,
                  z21, rs_Z );
      bl1_daxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_alpha12,
                  a22l, inc_al,
                  z21,  rs_Z );
      bl1_dinvscalv( BLIS1_CONJUGATE,
                     m_ahead,
                     &psi11_minus_alpha12,
                     z21, rs_Z );
      bl1_daxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &beta,
                  u21, inc_u,
                  z21, rs_Z );

      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11,   y21 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
      bl1_dinvscalv( BLIS1_NO_CONJUGATE,
                     n_ahead,
                     tau11,
                     y21, rs_Y );
      bl1_dinvscalv( BLIS1_NO_CONJUGATE,
                     m_ahead,
                     sigma11,
                     z21, rs_Z );
    }
    else // if ( n_ahead == 0 )
    {
      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
      bl1_dcopyv( BLIS1_CONJUGATE,
                  n_behind,
                  a10t, cs_A,
                  t01,  rs_T );
      bl1_dgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 A20, rs_A, cs_A,
                 u21, inc_u,
                 buff_1,
                 t01, rs_T );
    }

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &w );
  // FLA_Obj_free( &al );
  // FLA_Obj_free( &ap );
  // FLA_Obj_free( &u );
  // FLA_Obj_free( &up );
  // FLA_Obj_free( &v );
  // FLA_Obj_free( &d );
  // FLA_Obj_free( &e );
  FLA_free( buff_tmp );
  FLA_free( buff_w );
  FLA_free( buff_al );
  FLA_free( buff_ap );
  FLA_free( buff_u );
  FLA_free( buff_up );
  FLA_free( buff_v );
  FLA_free( buff_d );
  FLA_free( buff_e );

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_step_ofs_var2 ( int  m_A,
int  n_A,
int  m_TS,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_T,
int  rs_T,
int  cs_T,
float *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sinvscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Fused_Gerc2_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_Househ2_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_ofu_var2().

{
  float*    buff_1  = FLA_FLOAT_PTR( FLA_ONE );
  float*    buff_0  = FLA_FLOAT_PTR( FLA_ZERO );
  float*    buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );

  float     beta;
  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_TS;

  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
  float*    buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  float*    buff_y = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  float*    buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  int       inc_v  = 1;
  int       inc_y  = 1;
  int       inc_z  = 1;

  for ( i = 0; i < b_alg; ++i )
  {
    float*    a10t     = buff_A + (0  )*cs_A + (i  )*rs_A;
    float*    A20      = buff_A + (0  )*cs_A + (i+1)*rs_A;
    float*    alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
    float*    a21      = buff_A + (i  )*cs_A + (i+1)*rs_A;
    float*    A02      = buff_A + (i+1)*cs_A + (0  )*rs_A;
    float*    a12t     = buff_A + (i+1)*cs_A + (i  )*rs_A;
    float*    A22      = buff_A + (i+1)*cs_A + (i+1)*rs_A;

    float*    t01      = buff_T + (i  )*cs_T + (0  )*rs_T;
    float*    tau11    = buff_T + (i  )*cs_T + (i  )*rs_T;

    float*    s01      = buff_S + (i  )*cs_S + (0  )*rs_S;
    float*    sigma11  = buff_S + (i  )*cs_S + (i  )*rs_S;

    float*    v21      = buff_v + (i+1)*inc_v;

    float*    y21      = buff_y + (i+1)*inc_y;

    float*    z21      = buff_z + (i+1)*inc_z;

    float*    a12t_l   = a12t   + (0  )*cs_A + (0  )*rs_A;
    float*    a12t_r   = a12t   + (1  )*cs_A + (0  )*rs_A;

    float*    v21_t    = v21    + (0  )*inc_v;
    float*    v21_b    = v21    + (1  )*inc_v;

    int       m_ahead  = m_A - i - 1;
    int       n_ahead  = n_A - i - 1;
    int       m_behind = i;
    int       n_behind = i;

    /*------------------------------------------------------------*/

    // FLA_Househ2_UT( FLA_LEFT,
    //                 alpha11,
    //                 a21, tau11 );
    FLA_Househ2_UT_l_ops( m_ahead,
                          alpha11,
                          a21, rs_A,
                          tau11 );

    if ( n_ahead > 0 )
    {
      // FLA_Copyt( FLA_TRANSPOSE, a12t, y21 );
      // FLA_Gemvc( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
      bl1_scopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12t, cs_A,
                  y21,  inc_y );
      bl1_sgemv( BLIS1_TRANSPOSE,
                 BLIS1_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22, rs_A, cs_A,
                 a21, rs_A,
                 buff_1,
                 y21, inc_y );

      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
      bl1_sinvscalv( BLIS1_NO_CONJUGATE,
                     n_ahead,
                     tau11,
                     y21, inc_y );

      // FLA_Axpyt( FLA_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
      bl1_saxpyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  buff_m1,
                  y21,  inc_y,
                  a12t, cs_A );

      // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
      FLA_Househ2_UT_r_ops( n_ahead - 1,
                            a12t_l,
                            a12t_r, cs_A,
                            sigma11 );

      // FLA_Set( FLA_ONE, v21_t );
      // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
      *v21_t = *buff_1;
      bl1_scopyv( BLIS1_NO_CONJUGATE,
                  n_ahead - 1,
                  a12t_r, cs_A,
                  v21_b,  inc_y );

      // FLA_Dotc( FLA_CONJUGATE, v21, y21, beta );
      // FLA_Scal( FLA_MINUS_ONE, beta );
      bl1_sdot( BLIS1_CONJUGATE,
                n_ahead,
                v21, inc_v,
                y21, inc_y,
                &beta );
      bl1_sneg1( &beta );

      // FLA_Copy( a21, z21 );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
      bl1_scopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  a21, rs_A,
                  z21, inc_z );
      bl1_sgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22, rs_A, cs_A,
                 v21, inc_v,
                 &beta,
                 z21, inc_z );
      bl1_sinvscalv( BLIS1_NO_CONJUGATE,
                     m_ahead,
                     sigma11,
                     z21, inc_z );

      // FLA_Ger( FLA_MINUS_ONE, a21, y21, A22 );
      // FLA_Ger( FLA_MINUS_ONE, z21, v21, A22 );
      FLA_Fused_Gerc2_ops_var1( m_ahead,
                                n_ahead,
                                buff_m1,
                                a21, rs_A,
                                y21, inc_y,
                                z21, inc_z,
                                v21, inc_v,
                                A22, rs_A, cs_A );

      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
      bl1_sgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_1,
                 A02, rs_A, cs_A,
                 v21, inc_v,
                 buff_0,
                 s01, rs_S );
    }

    // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
    // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
    bl1_scopyv( BLIS1_CONJUGATE,
                n_behind,
                a10t, cs_A,
                t01,  rs_T );
    bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_ahead,
               n_behind,
               buff_1,
               A20, rs_A, cs_A,
               a21, rs_A,
               buff_1,
               t01, rs_T );

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &v );
  // FLA_Obj_free( &y );
  // FLA_Obj_free( &z );
  FLA_free( buff_v );
  FLA_free( buff_y );
  FLA_free( buff_z );

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_step_ofs_var3 ( int  m_A,
int  n_A,
int  m_TS,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_T,
int  rs_T,
int  cs_T,
float *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sinvscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_ops_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_Househ2s_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_ofu_var3().

{
  float*    buff_1  = FLA_FLOAT_PTR( FLA_ONE );
  float*    buff_0  = FLA_FLOAT_PTR( FLA_ZERO );
  float*    buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );

  float     alpha12;
  float     minus_conj_alpha12;
  float     psi11_minus_alpha12;
  float     minus_inv_tau11;
  float     minus_upsilon11;
  float     minus_conj_nu11;
  float     minus_conj_psi11;
  float     minus_zeta11;
  float     beta;
  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_TS;

  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
  float*    buff_w  = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  float*    buff_ap = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  float*    buff_u  = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  float*    buff_up = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  float*    buff_v  = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  float*    buff_y  = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  float*    buff_z  = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  int       inc_w   = 1;
  int       inc_ap  = 1;
  int       inc_u   = 1;
  int       inc_up  = 1;
  int       inc_v   = 1;
  int       inc_y   = 1;
  int       inc_z   = 1;

  for ( i = 0; i < b_alg; ++i )
  {
    float*    a10t      = buff_A  + (0  )*cs_A + (i  )*rs_A;
    float*    A20       = buff_A  + (0  )*cs_A + (i+1)*rs_A;
    float*    alpha11   = buff_A  + (i  )*cs_A + (i  )*rs_A;
    float*    a21       = buff_A  + (i  )*cs_A + (i+1)*rs_A;
    float*    A02       = buff_A  + (i+1)*cs_A + (0  )*rs_A;
    float*    a12t      = buff_A  + (i+1)*cs_A + (i  )*rs_A;
    float*    A22       = buff_A  + (i+1)*cs_A + (i+1)*rs_A;

    float*    t01       = buff_T  + (i  )*cs_T + (0  )*rs_T;
    float*    tau11     = buff_T  + (i  )*cs_T + (i  )*rs_T;

    float*    s01       = buff_S  + (i  )*cs_S + (0  )*rs_S;
    float*    sigma11   = buff_S  + (i  )*cs_S + (i  )*rs_S;

    float*    w21       = buff_w  + (i+1)*inc_w;

    float*    a12p      = buff_ap + (i+1)*inc_ap;

    float*    upsilon11 = buff_u  + (i  )*inc_u;
    float*    u21       = buff_u  + (i+1)*inc_u;

    float*    u21p      = buff_up + (i+1)*inc_up;

    float*    nu11      = buff_v  + (i  )*inc_v;
    float*    v21       = buff_v  + (i+1)*inc_v;

    float*    psi11     = buff_y  + (i  )*inc_y;
    float*    y21       = buff_y  + (i+1)*inc_y;

    float*    zeta11    = buff_z  + (i  )*inc_z;
    float*    z21       = buff_z  + (i+1)*inc_z;

    float*    a12p_t    = a12p    + (0  )*inc_ap;
    float*    a12p_b    = a12p    + (1  )*inc_ap;

    float*    v21_t     = v21     + (0  )*inc_v;
    float*    v21_b     = v21     + (1  )*inc_v;

    float*    a12t_l    = a12t    + (0  )*cs_A + (0  )*rs_A;
    float*    a12t_r    = a12t    + (1  )*cs_A + (0  )*rs_A;

    float*    A22_l     = A22     + (0  )*cs_A + (0  )*rs_A;

    int       m_ahead   = m_A - i - 1;
    int       n_ahead   = n_A - i - 1;
    int       m_behind  = i;
    int       n_behind  = i;

    /*------------------------------------------------------------*/

    if ( m_behind > 0 )
    {
      // FLA_Copy( upsilon11, minus_upsilon11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
      bl1_smult3( buff_m1, upsilon11, &minus_upsilon11 );

      // FLA_Copy( zeta11, minus_zeta11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
      bl1_smult3( buff_m1, zeta11, &minus_zeta11 );

      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
      bl1_scopyconj( psi11, &minus_conj_psi11 );
      bl1_sscals( buff_m1, &minus_conj_psi11 );

      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
      bl1_scopyconj( nu11, &minus_conj_nu11 );
      bl1_sscals( buff_m1, &minus_conj_nu11 );

      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_upsilon11, psi11, alpha11 );
      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_zeta11,    nu11,  alpha11 );
      bl1_saxpyv( BLIS1_NO_CONJUGATE,
                  1,
                  &minus_upsilon11,
                  psi11,   1,
                  alpha11, 1 );
      bl1_saxpyv( BLIS1_NO_CONJUGATE,
                  1,
                  &minus_zeta11,
                  nu11,    1,
                  alpha11, 1 );

      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11,  z21, a21 );
      bl1_saxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_psi11,
                  u21, inc_u,
                  a21, rs_A );
      bl1_saxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_nu11,
                  z21, inc_z,
                  a21, rs_A );

      // FLA_Axpyt( FLA_TRANSPOSE, minus_upsilon11, y21, a12t );
      // FLA_Axpyt( FLA_TRANSPOSE, minus_zeta11,    v21, a12t );
      bl1_saxpyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  &minus_upsilon11,
                  y21,  inc_y,
                  a12t, cs_A );
      bl1_saxpyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  &minus_zeta11,
                  v21,  inc_v,
                  a12t, cs_A );
    }

    // FLA_Househ2_UT( FLA_LEFT,
    //                 alpha11,
    //                 a21, tau11 );
    // FLA_Copy( a21, u21p );
    FLA_Househ2_UT_l_ops( m_ahead,
                          alpha11,
                          a21, rs_A,
                          tau11 );
    bl1_scopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                a21,  rs_A,
                u21p, inc_up );

    if ( n_ahead > 0 )
    {
      // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
      bl1_sdiv3( buff_m1, tau11, &minus_inv_tau11 );

      // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
      // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
      bl1_scopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12t, cs_A,
                  a12p, inc_ap );
      bl1_saxpyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  &minus_inv_tau11,
                  a12t, cs_A,
                  a12p, inc_ap );
    }

    if ( m_behind > 0 && n_ahead > 0 )
    {
      // FLA_Ger( FLA_MINUS_ONE, u21, y21, A22 );
      // FLA_Ger( FLA_MINUS_ONE, z21, v21, A22 );
      // FLA_Gemvc( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
      FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1( m_ahead,
                                            n_ahead,
                                            tau11,
                                            buff_m1,
                                            u21, inc_u,
                                            y21, inc_y,
                                            z21, inc_z,
                                            v21, inc_v,
                                            A22, rs_A, cs_A,
                                            u21p, inc_up,
                                            a12p, inc_ap,
                                            w21,  inc_w );
                                           
                                           
    }
    else if ( n_ahead > 0 )
    {
      // FLA_Gemvc( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
      FLA_Fused_Ahx_Axpy_Ax_ops_var1( m_ahead,
                                      n_ahead,
                                      tau11,
                                      buff_0,
                                      A22,  rs_A, cs_A,
                                      u21p, inc_up,
                                      a12p, inc_ap,
                                      y21,  inc_y,
                                      w21,  inc_w );
    }

    if ( n_ahead > 0 )
    {
      // FLA_Axpyt( FLA_TRANSPOSE, FLA_ONE, a12t, y21 );
      bl1_saxpyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  buff_1,
                  a12t, cs_A,
                  y21,  inc_y );

      // FLA_Househ2s_UT( FLA_RIGHT,
      //                  a12p_t,
      //                  a12p_b,
      //                  alpha12, psi11_minus_alpha12, sigma11 );
      FLA_Househ2s_UT_r_ops( n_ahead - 1,
                             a12p_t,
                             a12p_b, inc_ap,
                             &alpha12,
                             &psi11_minus_alpha12,
                             sigma11 );

      // FLA_Copy( a12p, v21 );
      // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
      bl1_scopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12p, inc_ap,
                  v21,  inc_v );
      bl1_smult4( buff_m1, &alpha12, v21_t, v21_t );
      bl1_sinvscalv( BLIS1_NO_CONJUGATE,
                     n_ahead,
                     &psi11_minus_alpha12,
                     v21, inc_v );

      // FLA_Copy( alpha12, a12t_l );
      // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
      *a12t_l = alpha12;
      bl1_scopyv( BLIS1_NO_CONJUGATE,
                  n_ahead - 1,
                  v21_b,  inc_v,
                  a12t_r, cs_A );
    }

    // FLA_Copy( u21p, u21 );
    bl1_scopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                u21p, inc_up,
                u21,  inc_u );

    if ( n_ahead > 0 )
    {
      // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
      // FLA_Scal( FLA_MINUS_ONE, beta );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
      bl1_sdot( BLIS1_CONJUGATE,
                n_ahead,
                y21, inc_y,
                v21, inc_v,
                &beta );
      bl1_sscals( &minus_inv_tau11, &beta );

      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
      // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
      bl1_scopyconj( &alpha12, &minus_conj_alpha12 );
      bl1_sneg1( &minus_conj_alpha12 );

      // FLA_Copy( w21, z21 );
      // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
      // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
      // FLA_Axpy( beta, u21, z21 );
      bl1_scopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  w21, inc_w,
                  z21, inc_z );
      bl1_saxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_alpha12,
                  A22_l, rs_A,
                  z21,   inc_z );
      bl1_sinvscalv( BLIS1_CONJUGATE,
                     m_ahead,
                     &psi11_minus_alpha12,
                     z21, inc_z );
      bl1_saxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &beta,
                  u21, inc_u,
                  z21, inc_z );

      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11,   y21 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
      bl1_sinvscalv( BLIS1_CONJUGATE,
                     n_ahead,
                     tau11,
                     y21, inc_y );
      bl1_sinvscalv( BLIS1_NO_CONJUGATE,
                     m_ahead,
                     sigma11,
                     z21, inc_z );

      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
      bl1_sgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_1,
                 A02, rs_A, cs_A,
                 v21, inc_v,
                 buff_0,
                 s01, rs_S );
    }

    // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
    // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
    bl1_scopyv( BLIS1_CONJUGATE,
                n_behind,
                a10t, cs_A,
                t01,  rs_T );
    bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_ahead,
               n_behind,
               buff_1,
               A20, rs_A, cs_A,
               u21, inc_u,
               buff_1,
               t01, rs_T );

    if ( m_behind + 1 == b_alg && n_ahead > 0 )
    {
      // FLA_Ger( FLA_MINUS_ONE, u21, y21, A22 );
      // FLA_Ger( FLA_MINUS_ONE, z21, v21, A22 );
      bl1_sger( BLIS1_NO_CONJUGATE,
                BLIS1_NO_CONJUGATE,
                m_ahead,
                n_ahead,
                buff_m1,
                u21, inc_u,
                y21, inc_y,
                A22, rs_A, cs_A );
      bl1_sger( BLIS1_NO_CONJUGATE,
                BLIS1_NO_CONJUGATE,
                m_ahead,
                n_ahead,
                buff_m1,
                z21, inc_z,
                v21, inc_v,
                A22, rs_A, cs_A );
    }

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &w );
  // FLA_Obj_free( &ap );
  // FLA_Obj_free( &u );
  // FLA_Obj_free( &up );
  // FLA_Obj_free( &v );
  // FLA_Obj_free( &y );
  // FLA_Obj_free( &z );
  FLA_free( buff_w );
  FLA_free( buff_ap );
  FLA_free( buff_u );
  FLA_free( buff_up );
  FLA_free( buff_v );
  FLA_free( buff_y );
  FLA_free( buff_z );

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_step_ofs_var4 ( int  m_A,
int  n_A,
int  m_TS,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_Y,
int  rs_Y,
int  cs_Y,
float *  buff_Z,
int  rs_Z,
int  cs_Z,
float *  buff_T,
int  rs_T,
int  cs_T,
float *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_saxpyv(), bl1_sconjv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sinvscalv(), bl1_ssetm(), bl1_ssetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_ops_var1(), FLA_Fused_UYx_ZVx_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_Househ2s_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_ofu_var4().

{
  float*    buff_1  = FLA_FLOAT_PTR( FLA_ONE );
  float*    buff_0  = FLA_FLOAT_PTR( FLA_ZERO );
  float*    buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );

  float     alpha12;
  float     minus_conj_alpha12;
  float     psi11_minus_alpha12;
  float     minus_inv_tau11;
  float     beta;
  float     last_elem;
  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_TS;

  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
  float*    buff_tmp = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  float*    buff_w  = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  float*    buff_al = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  float*    buff_ap = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  float*    buff_u  = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  float*    buff_up = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  float*    buff_v  = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  float*    buff_d  = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  float*    buff_e  = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  int       inc_tmp = 1;
  int       inc_w   = 1;
  int       inc_al  = 1;
  int       inc_ap  = 1;
  int       inc_u   = 1;
  int       inc_up  = 1;
  int       inc_v   = 1;
  int       inc_d   = 1;
  int       inc_e   = 1;

  // FLA_Set( FLA_ZERO, Y );
  // FLA_Set( FLA_ZERO, Z );
  bl1_ssetm( n_A,
             b_alg,
             buff_0,
             buff_Y, rs_Y, cs_Y );
  bl1_ssetm( m_A,
             b_alg,
             buff_0,
             buff_Z, rs_Z, cs_Z );

  for ( i = 0; i < b_alg; ++i )
  {
    float*    a10t      = buff_A  + (0  )*cs_A + (i  )*rs_A;
    float*    A20       = buff_A  + (0  )*cs_A + (i+1)*rs_A;
    float*    a01       = buff_A  + (i  )*cs_A + (0  )*rs_A;
    float*    alpha11   = buff_A  + (i  )*cs_A + (i  )*rs_A;
    float*    a21       = buff_A  + (i  )*cs_A + (i+1)*rs_A;
    float*    A02       = buff_A  + (i+1)*cs_A + (0  )*rs_A;
    float*    a12t      = buff_A  + (i+1)*cs_A + (i  )*rs_A;
    float*    A22       = buff_A  + (i+1)*cs_A + (i+1)*rs_A;

    float*    y10t      = buff_Y  + (0  )*cs_Y + (i  )*rs_Y;
    float*    Y20       = buff_Y  + (0  )*cs_Y + (i+1)*rs_Y;
    float*    y21       = buff_Y  + (i  )*cs_Y + (i+1)*rs_Y;

    float*    z10t      = buff_Z  + (0  )*cs_Z + (i  )*rs_Z;
    float*    Z20       = buff_Z  + (0  )*cs_Z + (i+1)*rs_Z;
    float*    z21       = buff_Z  + (i  )*cs_Z + (i+1)*rs_Z;

    float*    t01       = buff_T  + (i  )*cs_T + (0  )*rs_T;
    float*    tau11     = buff_T  + (i  )*cs_T + (i  )*rs_T;

    float*    s01       = buff_S  + (i  )*cs_S + (0  )*rs_S;
    float*    sigma11   = buff_S  + (i  )*cs_S + (i  )*rs_S;

    float*    tmp21     = buff_tmp + (i+1)*inc_tmp;

    float*    w21       = buff_w  + (i+1)*inc_w;

    float*    a22l      = buff_al + (i+1)*inc_al;

    float*    a12p      = buff_ap + (i+1)*inc_ap;

    float*    u21       = buff_u  + (i+1)*inc_u;

    float*    u21p      = buff_up + (i+1)*inc_up;

    float*    v21       = buff_v  + (i+1)*inc_v;

    float*    d0        = buff_d  + (0  )*inc_d;

    float*    e0        = buff_e  + (0  )*inc_e;

    float*    a12p_t    = a12p    + (0  )*inc_ap;
    float*    a12p_b    = a12p    + (1  )*inc_ap;

    float*    v21_t     = v21     + (0  )*inc_v;
    float*    v21_b     = v21     + (1  )*inc_v;

    float*    a01_b     = a01     + (0  )*cs_A + (i-1)*rs_A;

    float*    a12t_l    = a12t    + (0  )*cs_A + (0  )*rs_A;
    float*    a12t_r    = a12t    + (1  )*cs_A + (0  )*rs_A;

    float*    ABL       = a10t;
    float*    ZBL       = z10t;

    float*    a2        = alpha11;

    int       m_ahead   = m_A - i - 1;
    int       n_ahead   = n_A - i - 1;
    int       m_behind  = i;
    int       n_behind  = i;

    /*------------------------------------------------------------*/

    if ( m_behind > 0 )
    {
      // FLA_Copy( a01_b, last_elem );
      // FLA_Set( FLA_ONE, a01_b );
      last_elem = *a01_b;
      *a01_b = *buff_1;
    }

    // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
    // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01,  FLA_ONE, a2 );
    bl1_sgemv( BLIS1_NO_TRANSPOSE,
               BLIS1_CONJUGATE,
               m_ahead + 1,
               n_behind,
               buff_m1,
               ABL,  rs_A, cs_A,
               y10t, cs_Y,
               buff_1,
               a2,   rs_A );
    bl1_sgemv( BLIS1_NO_TRANSPOSE,
               BLIS1_CONJUGATE,
               m_ahead + 1,
               n_behind,
               buff_m1,
               ZBL, rs_Z, cs_Z,
               a01, rs_A,
               buff_1,
               a2,  rs_A );

    // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
    // FLA_Gemv( FLA_CONJ_TRANSPOSE,    FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
    bl1_sgemv( BLIS1_CONJ_NO_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               n_ahead,
               n_behind,
               buff_m1,
               Y20,  rs_Y, cs_Y,
               a10t, cs_A,
               buff_1,
               a12t, cs_A );
    bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_behind,
               n_ahead,
               buff_m1,
               A02,  rs_A, cs_A,
               z10t, cs_Z,
               buff_1,
               a12t, cs_A );

    if ( m_behind > 0 )
    {
      // FLA_Copy( last_elem, a01_b );
      *a01_b = last_elem;
    }

    // FLA_Househ2_UT( FLA_LEFT,
    //                 alpha11,
    //                 a21, tau11 );
    // FLA_Copy( a21, u21p );
    FLA_Househ2_UT_l_ops( m_ahead,
                          alpha11,
                          a21, rs_A,
                          tau11 );
    bl1_scopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                a21,  rs_A,
                u21p, inc_up );

    if ( n_ahead > 0 )
    {
      // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
      bl1_sdiv3( buff_m1, tau11, &minus_inv_tau11 );

      // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
      // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
      bl1_scopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12t, cs_A,
                  a12p, inc_ap );
      bl1_saxpyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  &minus_inv_tau11,
                  a12t, cs_A,
                  a12p, inc_ap );

      // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
      // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
      bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 A20,  rs_A, cs_A,
                 u21p, inc_up,
                 buff_0,
                 d0,   inc_d );
      bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 Z20,  rs_Z, cs_Z,
                 u21p, inc_up,
                 buff_0,
                 e0,   inc_e );

      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
      // FLA_Axpy( FLA_ONE, d0, t01 );
      bl1_scopyv( BLIS1_CONJUGATE,
                  n_behind,
                  a10t, cs_A,
                  t01,  rs_T );
      bl1_saxpyv( BLIS1_NO_CONJUGATE,
                  n_behind,
                  buff_1,
                  d0,  inc_d,
                  t01, rs_T );

      // FLA_Set( FLA_ZERO, y21 );
      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
      // FLA_Gemv( FLA_TRANSPOSE,    FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
      bl1_ssetv( n_ahead,
                 buff_0,
                 y21, rs_Y );
      bl1_sgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 n_ahead,
                 n_behind,
                 buff_m1,
                 Y20, rs_Y, cs_Y,
                 d0,  inc_d,
                 buff_1,
                 y21, rs_Y );
      bl1_sgemv( BLIS1_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_m1,
                 A02, rs_A, cs_A,
                 e0,  inc_e,
                 buff_1,
                 y21, rs_Y );

      // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
      // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
      FLA_Fused_Ahx_Axpy_Ax_ops_var1( m_ahead,
                                      n_ahead,
                                      tau11,
                                      buff_1,
                                      A22,  rs_A, cs_A,
                                      u21p, inc_up,
                                      a12p, inc_ap,
                                      y21,  rs_Y,
                                      w21,  inc_w );

      // FLA_Gemvc( FLA_CONJ_TRANSPOSE,    FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
      // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
      // FLA_Copy( A22_l, a22l );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
      // FLA_Copy( g0, s01 );
      FLA_Fused_UYx_ZVx_ops_var1( m_ahead,
                                  n_behind,
                                  m_behind,
                                  n_ahead,
                                  buff_m1,
                                  A20, rs_A, cs_A,
                                  Y20, rs_Y, cs_Y,
                                  Z20, rs_Z, cs_Z,
                                  A02, rs_A, cs_A,
                                  A22, rs_A, cs_A,
                                  tmp21, inc_tmp, 
                                  s01,  rs_S, 
                                  a12p, inc_ap, 
                                  w21,  inc_w, 
                                  a22l, inc_al );

      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
      bl1_saxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  buff_1, 
                  a12t, cs_A,
                  y21,  rs_Y );

      // FLA_Househ2s_UT( FLA_RIGHT,
      //                  a12p_t,
      //                  a12p_b,
      //                  alpha12, psi11_minus_alpha12, sigma11 );
      FLA_Househ2s_UT_r_ops( n_ahead - 1,
                             a12p_t,
                             a12p_b, inc_ap,
                             &alpha12,
                             &psi11_minus_alpha12,
                             sigma11 );

      // FLA_Copy( a12p, v21 );
      // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
      // FLA_Conjugate( v21_b );
      bl1_scopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12p, inc_ap,
                  v21,  inc_v );
      bl1_smult4( buff_m1, &alpha12, v21_t, v21_t );
      bl1_sinvscalv( BLIS1_NO_CONJUGATE,
                     n_ahead,
                     &psi11_minus_alpha12,
                     v21, inc_v );
      bl1_sconjv( n_ahead - 1,
                  v21_b, inc_v );

      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
      // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
      bl1_scopyconj( &alpha12, &minus_conj_alpha12 );
      bl1_sneg1( &minus_conj_alpha12 );

      // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
      // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
      bl1_saxpyv( BLIS1_CONJUGATE,
                  n_behind,
                  &minus_conj_alpha12,
                  A02, rs_A,
                  s01, rs_S );
      bl1_sinvscalv( BLIS1_CONJUGATE,
                     n_behind,
                     &psi11_minus_alpha12,
                     s01, rs_S );

      // FLA_Copy( alpha12, a12t_l );
      // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
      *a12t_l = alpha12;
      bl1_scopyv( BLIS1_NO_CONJUGATE,
                  n_ahead - 1,
                  v21_b,  inc_v,
                  a12t_r, cs_A );
    }

    // FLA_Copy( u21p, u21 );
    bl1_scopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                u21p, inc_up,
                u21,  inc_u );

    if ( n_ahead > 0 )
    {
      // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
      // FLA_Scal( FLA_MINUS_ONE, beta );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
      bl1_sdot( BLIS1_CONJUGATE,
                n_ahead,
                y21, rs_Y,
                v21, inc_v,
                &beta );
      bl1_sscals( &minus_inv_tau11, &beta );

      // FLA_Copy( w21, z21 );
      // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
      // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
      // FLA_Axpy( beta, u21, z21 );
      bl1_scopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  w21, inc_w,
                  z21, rs_Z );
      bl1_saxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_alpha12,
                  a22l, inc_al,
                  z21,  rs_Z );
      bl1_sinvscalv( BLIS1_CONJUGATE,
                     m_ahead,
                     &psi11_minus_alpha12,
                     z21, rs_Z );
      bl1_saxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &beta,
                  u21, inc_u,
                  z21, rs_Z );

      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11,   y21 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
      bl1_sinvscalv( BLIS1_NO_CONJUGATE,
                     n_ahead,
                     tau11,
                     y21, rs_Y );
      bl1_sinvscalv( BLIS1_NO_CONJUGATE,
                     m_ahead,
                     sigma11,
                     z21, rs_Z );
    }
    else // if ( n_ahead == 0 )
    {
      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
      bl1_scopyv( BLIS1_CONJUGATE,
                  n_behind,
                  a10t, cs_A,
                  t01,  rs_T );
      bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 A20, rs_A, cs_A,
                 u21, inc_u,
                 buff_1,
                 t01, rs_T );
    }

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &w );
  // FLA_Obj_free( &al );
  // FLA_Obj_free( &ap );
  // FLA_Obj_free( &u );
  // FLA_Obj_free( &up );
  // FLA_Obj_free( &v );
  // FLA_Obj_free( &d );
  // FLA_Obj_free( &e );
  FLA_free( buff_tmp );
  FLA_free( buff_w );
  FLA_free( buff_al );
  FLA_free( buff_ap );
  FLA_free( buff_u );
  FLA_free( buff_up );
  FLA_free( buff_v );
  FLA_free( buff_d );
  FLA_free( buff_e );

  return FLA_SUCCESS;
}

References FLA_Bidiag_UT_u_step_ofc_var2(), FLA_Bidiag_UT_u_step_ofd_var2(), FLA_Bidiag_UT_u_step_ofs_var2(), FLA_Bidiag_UT_u_step_ofz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u_blf_var2(), and FLA_Bidiag_UT_u_ofu_var2().

{
  FLA_Datatype datatype;
  int          m_A, n_A, m_TS;
  int          rs_A, cs_A;
  int          rs_T, cs_T;
  int          rs_S, cs_S;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  n_A      = FLA_Obj_width( A );
  m_TS     = FLA_Obj_length( T );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  rs_T     = FLA_Obj_row_stride( T );
  cs_T     = FLA_Obj_col_stride( T );
  
  rs_S     = FLA_Obj_row_stride( S );
  cs_S     = FLA_Obj_col_stride( S );
  

  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float* buff_A = FLA_FLOAT_PTR( A );
      float* buff_T = FLA_FLOAT_PTR( T );
      float* buff_S = FLA_FLOAT_PTR( S );

      FLA_Bidiag_UT_u_step_ofs_var2( m_A,
                                     n_A,
                                     m_TS,
                                     buff_A, rs_A, cs_A,
                                     buff_T, rs_T, cs_T,
                                     buff_S, rs_S, cs_S );

      break;
    }

    case FLA_DOUBLE:
    {
      double* buff_A = FLA_DOUBLE_PTR( A );
      double* buff_T = FLA_DOUBLE_PTR( T );
      double* buff_S = FLA_DOUBLE_PTR( S );

      FLA_Bidiag_UT_u_step_ofd_var2( m_A,
                                     n_A,
                                     m_TS,
                                     buff_A, rs_A, cs_A,
                                     buff_T, rs_T, cs_T,
                                     buff_S, rs_S, cs_S );

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_A = FLA_COMPLEX_PTR( A );
      scomplex* buff_T = FLA_COMPLEX_PTR( T );
      scomplex* buff_S = FLA_COMPLEX_PTR( S );

      FLA_Bidiag_UT_u_step_ofc_var2( m_A,
                                     n_A,
                                     m_TS,
                                     buff_A, rs_A, cs_A,
                                     buff_T, rs_T, cs_T,
                                     buff_S, rs_S, cs_S );

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
      dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
      dcomplex* buff_S = FLA_DOUBLE_COMPLEX_PTR( S );

      FLA_Bidiag_UT_u_step_ofz_var2( m_A,
                                     n_A,
                                     m_TS,
                                     buff_A, rs_A, cs_A,
                                     buff_T, rs_T, cs_T,
                                     buff_S, rs_S, cs_S );

      break;
    }
  }

  return FLA_SUCCESS;
}

References FLA_Bidiag_UT_u_step_ofc_var3(), FLA_Bidiag_UT_u_step_ofd_var3(), FLA_Bidiag_UT_u_step_ofs_var3(), FLA_Bidiag_UT_u_step_ofz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u_blf_var3(), and FLA_Bidiag_UT_u_ofu_var3().

{
  FLA_Datatype datatype;
  int          m_A, n_A, m_TS;
  int          rs_A, cs_A;
  int          rs_T, cs_T;
  int          rs_S, cs_S;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  n_A      = FLA_Obj_width( A );
  m_TS     = FLA_Obj_length( T );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  rs_T     = FLA_Obj_row_stride( T );
  cs_T     = FLA_Obj_col_stride( T );
  
  rs_S     = FLA_Obj_row_stride( S );
  cs_S     = FLA_Obj_col_stride( S );
  

  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float* buff_A = FLA_FLOAT_PTR( A );
      float* buff_T = FLA_FLOAT_PTR( T );
      float* buff_S = FLA_FLOAT_PTR( S );

      FLA_Bidiag_UT_u_step_ofs_var3( m_A,
                                     n_A,
                                     m_TS,
                                     buff_A, rs_A, cs_A,
                                     buff_T, rs_T, cs_T,
                                     buff_S, rs_S, cs_S );

      break;
    }

    case FLA_DOUBLE:
    {
      double* buff_A = FLA_DOUBLE_PTR( A );
      double* buff_T = FLA_DOUBLE_PTR( T );
      double* buff_S = FLA_DOUBLE_PTR( S );

      FLA_Bidiag_UT_u_step_ofd_var3( m_A,
                                     n_A,
                                     m_TS,
                                     buff_A, rs_A, cs_A,
                                     buff_T, rs_T, cs_T,
                                     buff_S, rs_S, cs_S );

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_A = FLA_COMPLEX_PTR( A );
      scomplex* buff_T = FLA_COMPLEX_PTR( T );
      scomplex* buff_S = FLA_COMPLEX_PTR( S );

      FLA_Bidiag_UT_u_step_ofc_var3( m_A,
                                     n_A,
                                     m_TS,
                                     buff_A, rs_A, cs_A,
                                     buff_T, rs_T, cs_T,
                                     buff_S, rs_S, cs_S );

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
      dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
      dcomplex* buff_S = FLA_DOUBLE_COMPLEX_PTR( S );

      FLA_Bidiag_UT_u_step_ofz_var3( m_A,
                                     n_A,
                                     m_TS,
                                     buff_A, rs_A, cs_A,
                                     buff_T, rs_T, cs_T,
                                     buff_S, rs_S, cs_S );

      break;
    }
  }

  return FLA_SUCCESS;
}

References FLA_Bidiag_UT_u_step_ofc_var4(), FLA_Bidiag_UT_u_step_ofd_var4(), FLA_Bidiag_UT_u_step_ofs_var4(), FLA_Bidiag_UT_u_step_ofz_var4(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u_blf_var4(), and FLA_Bidiag_UT_u_ofu_var4().

{
  FLA_Datatype datatype;
  int          m_A, n_A, m_TS;
  int          rs_A, cs_A;
  int          rs_Y, cs_Y;
  int          rs_Z, cs_Z;
  int          rs_T, cs_T;
  int          rs_S, cs_S;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  n_A      = FLA_Obj_width( A );
  m_TS     = FLA_Obj_length( T );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  rs_Y     = FLA_Obj_row_stride( Y );
  cs_Y     = FLA_Obj_col_stride( Y );

  rs_Z     = FLA_Obj_row_stride( Z );
  cs_Z     = FLA_Obj_col_stride( Z );

  rs_T     = FLA_Obj_row_stride( T );
  cs_T     = FLA_Obj_col_stride( T );
  
  rs_S     = FLA_Obj_row_stride( S );
  cs_S     = FLA_Obj_col_stride( S );
  

  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float* buff_A = FLA_FLOAT_PTR( A );
      float* buff_Y = FLA_FLOAT_PTR( Y );
      float* buff_Z = FLA_FLOAT_PTR( Z );
      float* buff_T = FLA_FLOAT_PTR( T );
      float* buff_S = FLA_FLOAT_PTR( S );

      FLA_Bidiag_UT_u_step_ofs_var4( m_A,
                                     n_A,
                                     m_TS,
                                     buff_A, rs_A, cs_A,
                                     buff_Y, rs_Y, cs_Y,
                                     buff_Z, rs_Z, cs_Z,
                                     buff_T, rs_T, cs_T,
                                     buff_S, rs_S, cs_S );

      break;
    }

    case FLA_DOUBLE:
    {
      double* buff_A = FLA_DOUBLE_PTR( A );
      double* buff_Y = FLA_DOUBLE_PTR( Y );
      double* buff_Z = FLA_DOUBLE_PTR( Z );
      double* buff_T = FLA_DOUBLE_PTR( T );
      double* buff_S = FLA_DOUBLE_PTR( S );

      FLA_Bidiag_UT_u_step_ofd_var4( m_A,
                                     n_A,
                                     m_TS,
                                     buff_A, rs_A, cs_A,
                                     buff_Y, rs_Y, cs_Y,
                                     buff_Z, rs_Z, cs_Z,
                                     buff_T, rs_T, cs_T,
                                     buff_S, rs_S, cs_S );

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_A = FLA_COMPLEX_PTR( A );
      scomplex* buff_Y = FLA_COMPLEX_PTR( Y );
      scomplex* buff_Z = FLA_COMPLEX_PTR( Z );
      scomplex* buff_T = FLA_COMPLEX_PTR( T );
      scomplex* buff_S = FLA_COMPLEX_PTR( S );

      FLA_Bidiag_UT_u_step_ofc_var4( m_A,
                                     n_A,
                                     m_TS,
                                     buff_A, rs_A, cs_A,
                                     buff_Y, rs_Y, cs_Y,
                                     buff_Z, rs_Z, cs_Z,
                                     buff_T, rs_T, cs_T,
                                     buff_S, rs_S, cs_S );

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
      dcomplex* buff_Y = FLA_DOUBLE_COMPLEX_PTR( Y );
      dcomplex* buff_Z = FLA_DOUBLE_COMPLEX_PTR( Z );
      dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
      dcomplex* buff_S = FLA_DOUBLE_COMPLEX_PTR( S );

      FLA_Bidiag_UT_u_step_ofz_var4( m_A,
                                     n_A,
                                     m_TS,
                                     buff_A, rs_A, cs_A,
                                     buff_Y, rs_Y, cs_Y,
                                     buff_Z, rs_Z, cs_Z,
                                     buff_T, rs_T, cs_T,
                                     buff_S, rs_S, cs_S );

      break;
    }
  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_step_ofz_var2 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Gerc2_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_Househ2_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_ofu_var2().

{
  dcomplex* buff_1  = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
  dcomplex* buff_0  = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );

  dcomplex  beta;
  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_TS;

  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
  dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  dcomplex* buff_y = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  int       inc_v  = 1;
  int       inc_y  = 1;
  int       inc_z  = 1;

  for ( i = 0; i < b_alg; ++i )
  {
    dcomplex* a10t     = buff_A + (0  )*cs_A + (i  )*rs_A;
    dcomplex* A20      = buff_A + (0  )*cs_A + (i+1)*rs_A;
    dcomplex* alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
    dcomplex* a21      = buff_A + (i  )*cs_A + (i+1)*rs_A;
    dcomplex* A02      = buff_A + (i+1)*cs_A + (0  )*rs_A;
    dcomplex* a12t     = buff_A + (i+1)*cs_A + (i  )*rs_A;
    dcomplex* A22      = buff_A + (i+1)*cs_A + (i+1)*rs_A;

    dcomplex* t01      = buff_T + (i  )*cs_T + (0  )*rs_T;
    dcomplex* tau11    = buff_T + (i  )*cs_T + (i  )*rs_T;

    dcomplex* s01      = buff_S + (i  )*cs_S + (0  )*rs_S;
    dcomplex* sigma11  = buff_S + (i  )*cs_S + (i  )*rs_S;

    dcomplex* v21      = buff_v + (i+1)*inc_v;

    dcomplex* y21      = buff_y + (i+1)*inc_y;

    dcomplex* z21      = buff_z + (i+1)*inc_z;

    dcomplex* a12t_l   = a12t   + (0  )*cs_A + (0  )*rs_A;
    dcomplex* a12t_r   = a12t   + (1  )*cs_A + (0  )*rs_A;

    dcomplex* v21_t    = v21    + (0  )*inc_v;
    dcomplex* v21_b    = v21    + (1  )*inc_v;

    int       m_ahead  = m_A - i - 1;
    int       n_ahead  = n_A - i - 1;
    int       m_behind = i;
    int       n_behind = i;

    /*------------------------------------------------------------*/

    // FLA_Househ2_UT( FLA_LEFT,
    //                 alpha11,
    //                 a21, tau11 );
    FLA_Househ2_UT_l_opz( m_ahead,
                          alpha11,
                          a21, rs_A,
                          tau11 );

    if ( n_ahead > 0 )
    {
      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
      // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
      bl1_zcopyv( BLIS1_CONJUGATE,
                  n_ahead,
                  a12t, cs_A,
                  y21,  inc_y );
      bl1_zgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22, rs_A, cs_A,
                 a21, rs_A,
                 buff_1,
                 y21, inc_y );

      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
      bl1_zinvscalv( BLIS1_NO_CONJUGATE,
                     n_ahead,
                     tau11,
                     y21, inc_y );

      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
      bl1_zaxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  buff_m1,
                  y21,  inc_y,
                  a12t, cs_A );

      // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
      FLA_Househ2_UT_r_opz( n_ahead - 1,
                            a12t_l,
                            a12t_r, cs_A,
                            sigma11 );

      // FLA_Set( FLA_ONE, v21_t );
      // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
      *v21_t = *buff_1;
      bl1_zcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead - 1,
                  a12t_r, cs_A,
                  v21_b,  inc_y );

      // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
      // FLA_Scal( FLA_MINUS_ONE, beta );
      bl1_zdot( BLIS1_CONJUGATE,
                n_ahead,
                y21, inc_y,
                v21, inc_v,
                &beta );
      bl1_zneg1( &beta );

      // FLA_Copy( a21, z21 );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
      bl1_zcopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  a21, rs_A,
                  z21, inc_z );
      bl1_zgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22, rs_A, cs_A,
                 v21, inc_v,
                 &beta,
                 z21, inc_z );
      bl1_zinvscalv( BLIS1_NO_CONJUGATE,
                     m_ahead,
                     sigma11,
                     z21, inc_z );

      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y21, A22 );
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
      FLA_Fused_Gerc2_opz_var1( m_ahead,
                                n_ahead,
                                buff_m1,
                                a21, rs_A,
                                y21, inc_y,
                                z21, inc_z,
                                v21, inc_v,
                                A22, rs_A, cs_A );

      // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
      bl1_zgemv( BLIS1_CONJ_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_1,
                 A02, rs_A, cs_A,
                 v21, inc_v,
                 buff_0,
                 s01, rs_S );
    }

    // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
    // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
    bl1_zcopyv( BLIS1_CONJUGATE,
                n_behind,
                a10t, cs_A,
                t01,  rs_T );
    bl1_zgemv( BLIS1_CONJ_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_ahead,
               n_behind,
               buff_1,
               A20, rs_A, cs_A,
               a21, rs_A,
               buff_1,
               t01, rs_T );

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &v );
  // FLA_Obj_free( &y );
  // FLA_Obj_free( &z );
  FLA_free( buff_v );
  FLA_free( buff_y );
  FLA_free( buff_z );

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_step_ofz_var3 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_zaxpyv(), bl1_zconjv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_Househ2s_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_ofu_var3().

{
  dcomplex* buff_1  = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
  dcomplex* buff_0  = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );

  dcomplex  alpha12;
  dcomplex  minus_conj_alpha12;
  dcomplex  psi11_minus_alpha12;
  dcomplex  minus_inv_tau11;
  dcomplex  minus_upsilon11;
  dcomplex  minus_conj_nu11;
  dcomplex  minus_conj_psi11;
  dcomplex  minus_zeta11;
  dcomplex  beta;
  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_TS;

  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
  dcomplex* buff_w  = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  dcomplex* buff_ap = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  dcomplex* buff_u  = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  dcomplex* buff_up = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  dcomplex* buff_v  = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  dcomplex* buff_y  = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  dcomplex* buff_z  = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  int       inc_w   = 1;
  int       inc_ap  = 1;
  int       inc_u   = 1;
  int       inc_up  = 1;
  int       inc_v   = 1;
  int       inc_y   = 1;
  int       inc_z   = 1;

  for ( i = 0; i < b_alg; ++i )
  {
    dcomplex* a10t      = buff_A  + (0  )*cs_A + (i  )*rs_A;
    dcomplex* A20       = buff_A  + (0  )*cs_A + (i+1)*rs_A;
    dcomplex* alpha11   = buff_A  + (i  )*cs_A + (i  )*rs_A;
    dcomplex* a21       = buff_A  + (i  )*cs_A + (i+1)*rs_A;
    dcomplex* A02       = buff_A  + (i+1)*cs_A + (0  )*rs_A;
    dcomplex* a12t      = buff_A  + (i+1)*cs_A + (i  )*rs_A;
    dcomplex* A22       = buff_A  + (i+1)*cs_A + (i+1)*rs_A;

    dcomplex* t01       = buff_T  + (i  )*cs_T + (0  )*rs_T;
    dcomplex* tau11     = buff_T  + (i  )*cs_T + (i  )*rs_T;

    dcomplex* s01       = buff_S  + (i  )*cs_S + (0  )*rs_S;
    dcomplex* sigma11   = buff_S  + (i  )*cs_S + (i  )*rs_S;

    dcomplex* w21       = buff_w  + (i+1)*inc_w;

    dcomplex* a12p      = buff_ap + (i+1)*inc_ap;

    dcomplex* upsilon11 = buff_u  + (i  )*inc_u;
    dcomplex* u21       = buff_u  + (i+1)*inc_u;

    dcomplex* u21p      = buff_up + (i+1)*inc_up;

    dcomplex* nu11      = buff_v  + (i  )*inc_v;
    dcomplex* v21       = buff_v  + (i+1)*inc_v;

    dcomplex* psi11     = buff_y  + (i  )*inc_y;
    dcomplex* y21       = buff_y  + (i+1)*inc_y;

    dcomplex* zeta11    = buff_z  + (i  )*inc_z;
    dcomplex* z21       = buff_z  + (i+1)*inc_z;

    dcomplex* a12p_t    = a12p    + (0  )*inc_ap;
    dcomplex* a12p_b    = a12p    + (1  )*inc_ap;

    dcomplex* v21_t     = v21     + (0  )*inc_v;
    dcomplex* v21_b     = v21     + (1  )*inc_v;

    dcomplex* a12t_l    = a12t    + (0  )*cs_A + (0  )*rs_A;
    dcomplex* a12t_r    = a12t    + (1  )*cs_A + (0  )*rs_A;

    dcomplex* A22_l     = A22     + (0  )*cs_A + (0  )*rs_A;

    int       m_ahead   = m_A - i - 1;
    int       n_ahead   = n_A - i - 1;
    int       m_behind  = i;
    int       n_behind  = i;

    /*------------------------------------------------------------*/

    if ( m_behind > 0 )
    {
      // FLA_Copy( upsilon11, minus_upsilon11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
      bl1_zmult3( buff_m1, upsilon11, &minus_upsilon11 );

      // FLA_Copy( zeta11, minus_zeta11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
      bl1_zmult3( buff_m1, zeta11, &minus_zeta11 );

      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
      bl1_zcopyconj( psi11, &minus_conj_psi11 );
      bl1_zscals( buff_m1, &minus_conj_psi11 );

      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
      bl1_zcopyconj( nu11, &minus_conj_nu11 );
      bl1_zscals( buff_m1, &minus_conj_nu11 );

      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11,  zeta11,    alpha11 );
      bl1_zaxpyv( BLIS1_NO_CONJUGATE,
                  1,
                  &minus_conj_psi11,
                  upsilon11, 1,
                  alpha11,   1 );
      bl1_zaxpyv( BLIS1_NO_CONJUGATE,
                  1,
                  &minus_conj_nu11,
                  zeta11,  1,
                  alpha11, 1 );

      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11,  z21, a21 );
      bl1_zaxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_psi11,
                  u21, inc_u,
                  a21, rs_A );
      bl1_zaxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_nu11,
                  z21, inc_z,
                  a21, rs_A );

      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11,    v21, a12t );
      bl1_zaxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  &minus_upsilon11,
                  y21,  inc_y,
                  a12t, cs_A );
      bl1_zaxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  &minus_zeta11,
                  v21,  inc_v,
                  a12t, cs_A );
    }

    // FLA_Househ2_UT( FLA_LEFT,
    //                 alpha11,
    //                 a21, tau11 );
    // FLA_Copy( a21, u21p );
    FLA_Househ2_UT_l_opz( m_ahead,
                          alpha11,
                          a21, rs_A,
                          tau11 );
    bl1_zcopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                a21,  rs_A,
                u21p, inc_up );

    if ( n_ahead > 0 )
    {
      // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
      bl1_zdiv3( buff_m1, tau11, &minus_inv_tau11 );

      // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
      // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
      bl1_zcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12t, cs_A,
                  a12p, inc_ap );
      bl1_zaxpyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  &minus_inv_tau11,
                  a12t, cs_A,
                  a12p, inc_ap );
    }

    if ( m_behind > 0 && n_ahead > 0 )
    {
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
      // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
      // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
      FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1( m_ahead,
                                            n_ahead,
                                            tau11,
                                            buff_m1,
                                            u21, inc_u,
                                            y21, inc_y,
                                            z21, inc_z,
                                            v21, inc_v,
                                            A22, rs_A, cs_A,
                                            u21p, inc_up,
                                            a12p, inc_ap,
                                            w21,  inc_w );
                                           
                                           
    }
    else if ( n_ahead > 0 )
    {
      // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
      // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
      FLA_Fused_Ahx_Axpy_Ax_opz_var1( m_ahead,
                                      n_ahead,
                                      tau11,
                                      buff_0,
                                      A22,  rs_A, cs_A,
                                      u21p, inc_up,
                                      a12p, inc_ap,
                                      y21,  inc_y,
                                      w21,  inc_w );
    }

    if ( n_ahead > 0 )
    {
      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
      bl1_zaxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  buff_1,
                  a12t, cs_A,
                  y21,  inc_y );

      // FLA_Househ2s_UT( FLA_RIGHT,
      //                  a12p_t,
      //                  a12p_b,
      //                  alpha12, psi11_minus_alpha12, sigma11 );
      FLA_Househ2s_UT_r_opz( n_ahead - 1,
                             a12p_t,
                             a12p_b, inc_ap,
                             &alpha12,
                             &psi11_minus_alpha12,
                             sigma11 );

      // FLA_Copy( a12p, v21 );
      // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
      // FLA_Conjugate( v21_b );
      bl1_zcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12p, inc_ap,
                  v21,  inc_v );
      bl1_zmult4( buff_m1, &alpha12, v21_t, v21_t );
      bl1_zinvscalv( BLIS1_NO_CONJUGATE,
                     n_ahead,
                     &psi11_minus_alpha12,
                     v21, inc_v );
      bl1_zconjv( n_ahead - 1,
                  v21_b, inc_v );

      // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
      // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
      *a12t_l = alpha12;
      bl1_zcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead - 1,
                  v21_b,  inc_v,
                  a12t_r, cs_A );
    }

    // FLA_Copy( u21p, u21 );
    bl1_zcopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                u21p, inc_up,
                u21,  inc_u );

    if ( n_ahead > 0 )
    {
      // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
      // FLA_Scal( FLA_MINUS_ONE, beta );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
      bl1_zdot( BLIS1_CONJUGATE,
                n_ahead,
                y21, inc_y,
                v21, inc_v,
                &beta );
      bl1_zscals( &minus_inv_tau11, &beta );

      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
      // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
      bl1_zcopyconj( &alpha12, &minus_conj_alpha12 );
      bl1_zneg1( &minus_conj_alpha12 );

      // FLA_Copy( w21, z21 );
      // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
      // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
      // FLA_Axpy( beta, u21, z21 );
      bl1_zcopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  w21, inc_w,
                  z21, inc_z );
      bl1_zaxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_alpha12,
                  A22_l, rs_A,
                  z21,   inc_z );
      bl1_zinvscalv( BLIS1_CONJUGATE,
                     m_ahead,
                     &psi11_minus_alpha12,
                     z21, inc_z );
      bl1_zaxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &beta,
                  u21, inc_u,
                  z21, inc_z );

      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11,   y21 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
      bl1_zinvscalv( BLIS1_CONJUGATE,
                     n_ahead,
                     tau11,
                     y21, inc_y );
      bl1_zinvscalv( BLIS1_NO_CONJUGATE,
                     m_ahead,
                     sigma11,
                     z21, inc_z );

      // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
      bl1_zgemv( BLIS1_CONJ_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_1,
                 A02, rs_A, cs_A,
                 v21, inc_v,
                 buff_0,
                 s01, rs_S );
    }

    // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
    // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
    bl1_zcopyv( BLIS1_CONJUGATE,
                n_behind,
                a10t, cs_A,
                t01,  rs_T );
    bl1_zgemv( BLIS1_CONJ_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_ahead,
               n_behind,
               buff_1,
               A20, rs_A, cs_A,
               u21, inc_u,
               buff_1,
               t01, rs_T );

    if ( m_behind + 1 == b_alg && n_ahead > 0 )
    {
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
      bl1_zger( BLIS1_NO_CONJUGATE,
                BLIS1_CONJUGATE,
                m_ahead,
                n_ahead,
                buff_m1,
                u21, inc_u,
                y21, inc_y,
                A22, rs_A, cs_A );
      bl1_zger( BLIS1_NO_CONJUGATE,
                BLIS1_CONJUGATE,
                m_ahead,
                n_ahead,
                buff_m1,
                z21, inc_z,
                v21, inc_v,
                A22, rs_A, cs_A );
    }

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &w );
  // FLA_Obj_free( &ap );
  // FLA_Obj_free( &u );
  // FLA_Obj_free( &up );
  // FLA_Obj_free( &v );
  // FLA_Obj_free( &y );
  // FLA_Obj_free( &z );
  FLA_free( buff_w );
  FLA_free( buff_ap );
  FLA_free( buff_u );
  FLA_free( buff_up );
  FLA_free( buff_v );
  FLA_free( buff_y );
  FLA_free( buff_z );

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_step_ofz_var4 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_Y,
int  rs_Y,
int  cs_Y,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_zaxpyv(), bl1_zconjv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zinvscalv(), bl1_zsetm(), bl1_zsetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_UYx_ZVx_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_Househ2s_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_ofu_var4().

{
  dcomplex* buff_1  = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
  dcomplex* buff_0  = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );

  dcomplex  alpha12;
  dcomplex  minus_conj_alpha12;
  dcomplex  psi11_minus_alpha12;
  dcomplex  minus_inv_tau11;
  dcomplex  beta;
  dcomplex  last_elem;
  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_TS;

  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
  dcomplex* buff_tmp = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  dcomplex* buff_w  = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  dcomplex* buff_al = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  dcomplex* buff_ap = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  dcomplex* buff_u  = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  dcomplex* buff_up = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  dcomplex* buff_v  = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  dcomplex* buff_d  = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  dcomplex* buff_e  = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  int       inc_tmp = 1;
  int       inc_w   = 1;
  int       inc_al  = 1;
  int       inc_ap  = 1;
  int       inc_u   = 1;
  int       inc_up  = 1;
  int       inc_v   = 1;
  int       inc_d   = 1;
  int       inc_e   = 1;

  // FLA_Set( FLA_ZERO, Y );
  // FLA_Set( FLA_ZERO, Z );
  bl1_zsetm( n_A,
             b_alg,
             buff_0,
             buff_Y, rs_Y, cs_Y );
  bl1_zsetm( m_A,
             b_alg,
             buff_0,
             buff_Z, rs_Z, cs_Z );

  for ( i = 0; i < b_alg; ++i )
  {
    dcomplex* a10t      = buff_A  + (0  )*cs_A + (i  )*rs_A;
    dcomplex* A20       = buff_A  + (0  )*cs_A + (i+1)*rs_A;
    dcomplex* a01       = buff_A  + (i  )*cs_A + (0  )*rs_A;
    dcomplex* alpha11   = buff_A  + (i  )*cs_A + (i  )*rs_A;
    dcomplex* a21       = buff_A  + (i  )*cs_A + (i+1)*rs_A;
    dcomplex* A02       = buff_A  + (i+1)*cs_A + (0  )*rs_A;
    dcomplex* a12t      = buff_A  + (i+1)*cs_A + (i  )*rs_A;
    dcomplex* A22       = buff_A  + (i+1)*cs_A + (i+1)*rs_A;

    dcomplex* y10t      = buff_Y  + (0  )*cs_Y + (i  )*rs_Y;
    dcomplex* Y20       = buff_Y  + (0  )*cs_Y + (i+1)*rs_Y;
    dcomplex* y21       = buff_Y  + (i  )*cs_Y + (i+1)*rs_Y;

    dcomplex* z10t      = buff_Z  + (0  )*cs_Z + (i  )*rs_Z;
    dcomplex* Z20       = buff_Z  + (0  )*cs_Z + (i+1)*rs_Z;
    dcomplex* z21       = buff_Z  + (i  )*cs_Z + (i+1)*rs_Z;

    dcomplex* t01       = buff_T  + (i  )*cs_T + (0  )*rs_T;
    dcomplex* tau11     = buff_T  + (i  )*cs_T + (i  )*rs_T;

    dcomplex* s01       = buff_S  + (i  )*cs_S + (0  )*rs_S;
    dcomplex* sigma11   = buff_S  + (i  )*cs_S + (i  )*rs_S;

    dcomplex* tmp21     = buff_tmp + (i+1)*inc_tmp;

    dcomplex* w21       = buff_w  + (i+1)*inc_w;

    dcomplex* a22l      = buff_al + (i+1)*inc_al;

    dcomplex* a12p      = buff_ap + (i+1)*inc_ap;

    dcomplex* u21       = buff_u  + (i+1)*inc_u;

    dcomplex* u21p      = buff_up + (i+1)*inc_up;

    dcomplex* v21       = buff_v  + (i+1)*inc_v;

    dcomplex* d0        = buff_d  + (0  )*inc_d;

    dcomplex* e0        = buff_e  + (0  )*inc_e;

    dcomplex* a12p_t    = a12p    + (0  )*inc_ap;
    dcomplex* a12p_b    = a12p    + (1  )*inc_ap;

    dcomplex* v21_t     = v21     + (0  )*inc_v;
    dcomplex* v21_b     = v21     + (1  )*inc_v;

    dcomplex* a01_b     = a01     + (0  )*cs_A + (i-1)*rs_A;

    dcomplex* a12t_l    = a12t    + (0  )*cs_A + (0  )*rs_A;
    dcomplex* a12t_r    = a12t    + (1  )*cs_A + (0  )*rs_A;

    dcomplex* ABL       = a10t;
    dcomplex* ZBL       = z10t;

    dcomplex* a2        = alpha11;

    int       m_ahead   = m_A - i - 1;
    int       n_ahead   = n_A - i - 1;
    int       m_behind  = i;
    int       n_behind  = i;

    /*------------------------------------------------------------*/

    if ( m_behind > 0 )
    {
      // FLA_Copy( a01_b, last_elem );
      // FLA_Set( FLA_ONE, a01_b );
      last_elem = *a01_b;
      *a01_b = *buff_1;
    }

    // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
    // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01,  FLA_ONE, a2 );
    bl1_zgemv( BLIS1_NO_TRANSPOSE,
               BLIS1_CONJUGATE,
               m_ahead + 1,
               n_behind,
               buff_m1,
               ABL,  rs_A, cs_A,
               y10t, cs_Y,
               buff_1,
               a2,   rs_A );
    bl1_zgemv( BLIS1_NO_TRANSPOSE,
               BLIS1_CONJUGATE,
               m_ahead + 1,
               n_behind,
               buff_m1,
               ZBL, rs_Z, cs_Z,
               a01, rs_A,
               buff_1,
               a2,  rs_A );

    // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
    // FLA_Gemv( FLA_CONJ_TRANSPOSE,    FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
    bl1_zgemv( BLIS1_CONJ_NO_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               n_ahead,
               n_behind,
               buff_m1,
               Y20,  rs_Y, cs_Y,
               a10t, cs_A,
               buff_1,
               a12t, cs_A );
    bl1_zgemv( BLIS1_CONJ_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_behind,
               n_ahead,
               buff_m1,
               A02,  rs_A, cs_A,
               z10t, cs_Z,
               buff_1,
               a12t, cs_A );

    if ( m_behind > 0 )
    {
      // FLA_Copy( last_elem, a01_b );
      *a01_b = last_elem;
    }

    // FLA_Househ2_UT( FLA_LEFT,
    //                 alpha11,
    //                 a21, tau11 );
    // FLA_Copy( a21, u21p );
    FLA_Househ2_UT_l_opz( m_ahead,
                          alpha11,
                          a21, rs_A,
                          tau11 );
    bl1_zcopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                a21,  rs_A,
                u21p, inc_up );

    if ( n_ahead > 0 )
    {
      // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
      bl1_zdiv3( buff_m1, tau11, &minus_inv_tau11 );

      // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
      // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
      bl1_zcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12t, cs_A,
                  a12p, inc_ap );
      bl1_zaxpyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  &minus_inv_tau11,
                  a12t, cs_A,
                  a12p, inc_ap );

      // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
      // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
      bl1_zgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 A20,  rs_A, cs_A,
                 u21p, inc_up,
                 buff_0,
                 d0,   inc_d );
      bl1_zgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 Z20,  rs_Z, cs_Z,
                 u21p, inc_up,
                 buff_0,
                 e0,   inc_e );

      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
      // FLA_Axpy( FLA_ONE, d0, t01 );
      bl1_zcopyv( BLIS1_CONJUGATE,
                  n_behind,
                  a10t, cs_A,
                  t01,  rs_T );
      bl1_zaxpyv( BLIS1_NO_CONJUGATE,
                  n_behind,
                  buff_1,
                  d0,  inc_d,
                  t01, rs_T );

      // FLA_Set( FLA_ZERO, y21 );
      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
      // FLA_Gemv( FLA_TRANSPOSE,    FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
      bl1_zsetv( n_ahead,
                 buff_0,
                 y21, rs_Y );
      bl1_zgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 n_ahead,
                 n_behind,
                 buff_m1,
                 Y20, rs_Y, cs_Y,
                 d0,  inc_d,
                 buff_1,
                 y21, rs_Y );
      bl1_zgemv( BLIS1_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_m1,
                 A02, rs_A, cs_A,
                 e0,  inc_e,
                 buff_1,
                 y21, rs_Y );

      // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
      // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
      FLA_Fused_Ahx_Axpy_Ax_opz_var1( m_ahead,
                                      n_ahead,
                                      tau11,
                                      buff_1,
                                      A22,  rs_A, cs_A,
                                      u21p, inc_up,
                                      a12p, inc_ap,
                                      y21,  rs_Y,
                                      w21,  inc_w );

      // FLA_Gemvc( FLA_CONJ_TRANSPOSE,    FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
      // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
      // FLA_Copy( A22_l, a22l );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
      // FLA_Copy( g0, s01 );
      FLA_Fused_UYx_ZVx_opz_var1( m_ahead,
                                  n_behind,
                                  m_behind,
                                  n_ahead,
                                  buff_m1,
                                  A20, rs_A, cs_A,
                                  Y20, rs_Y, cs_Y,
                                  Z20, rs_Z, cs_Z,
                                  A02, rs_A, cs_A,
                                  A22, rs_A, cs_A,
                                  tmp21, inc_tmp, 
                                  s01,  rs_S, 
                                  a12p, inc_ap, 
                                  w21,  inc_w, 
                                  a22l, inc_al );

      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
      bl1_zaxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  buff_1, 
                  a12t, cs_A,
                  y21,  rs_Y );

      // FLA_Househ2s_UT( FLA_RIGHT,
      //                  a12p_t,
      //                  a12p_b,
      //                  alpha12, psi11_minus_alpha12, sigma11 );
      FLA_Househ2s_UT_r_opz( n_ahead - 1,
                             a12p_t,
                             a12p_b, inc_ap,
                             &alpha12,
                             &psi11_minus_alpha12,
                             sigma11 );

      // FLA_Copy( a12p, v21 );
      // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
      // FLA_Conjugate( v21_b );
      bl1_zcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12p, inc_ap,
                  v21,  inc_v );
      bl1_zmult4( buff_m1, &alpha12, v21_t, v21_t );
      bl1_zinvscalv( BLIS1_NO_CONJUGATE,
                     n_ahead,
                     &psi11_minus_alpha12,
                     v21, inc_v );
      bl1_zconjv( n_ahead - 1,
                  v21_b, inc_v );

      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
      // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
      bl1_zcopyconj( &alpha12, &minus_conj_alpha12 );
      bl1_zneg1( &minus_conj_alpha12 );

      // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
      // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
      bl1_zaxpyv( BLIS1_CONJUGATE,
                  n_behind,
                  &minus_conj_alpha12,
                  A02, rs_A,
                  s01, rs_S );
      bl1_zinvscalv( BLIS1_CONJUGATE,
                     n_behind,
                     &psi11_minus_alpha12,
                     s01, rs_S );

      // FLA_Copy( alpha12, a12t_l );
      // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
      *a12t_l = alpha12;
      bl1_zcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead - 1,
                  v21_b,  inc_v,
                  a12t_r, cs_A );
    }

    // FLA_Copy( u21p, u21 );
    bl1_zcopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                u21p, inc_up,
                u21,  inc_u );

    if ( n_ahead > 0 )
    {
      // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
      // FLA_Scal( FLA_MINUS_ONE, beta );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
      bl1_zdot( BLIS1_CONJUGATE,
                n_ahead,
                y21, rs_Y,
                v21, inc_v,
                &beta );
      bl1_zscals( &minus_inv_tau11, &beta );

      // FLA_Copy( w21, z21 );
      // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
      // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
      // FLA_Axpy( beta, u21, z21 );
      bl1_zcopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  w21, inc_w,
                  z21, rs_Z );
      bl1_zaxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_alpha12,
                  a22l, inc_al,
                  z21,  rs_Z );
      bl1_zinvscalv( BLIS1_CONJUGATE,
                     m_ahead,
                     &psi11_minus_alpha12,
                     z21, rs_Z );
      bl1_zaxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &beta,
                  u21, inc_u,
                  z21, rs_Z );

      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11,   y21 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
      bl1_zinvscalv( BLIS1_NO_CONJUGATE,
                     n_ahead,
                     tau11,
                     y21, rs_Y );
      bl1_zinvscalv( BLIS1_NO_CONJUGATE,
                     m_ahead,
                     sigma11,
                     z21, rs_Z );
    }
    else // if ( n_ahead == 0 )
    {
      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
      bl1_zcopyv( BLIS1_CONJUGATE,
                  n_behind,
                  a10t, cs_A,
                  t01,  rs_T );
      bl1_zgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 A20, rs_A, cs_A,
                 u21, inc_u,
                 buff_1,
                 t01, rs_T );
    }

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &w );
  // FLA_Obj_free( &al );
  // FLA_Obj_free( &ap );
  // FLA_Obj_free( &u );
  // FLA_Obj_free( &up );
  // FLA_Obj_free( &v );
  // FLA_Obj_free( &d );
  // FLA_Obj_free( &e );
  FLA_free( buff_tmp );
  FLA_free( buff_w );
  FLA_free( buff_al );
  FLA_free( buff_ap );
  FLA_free( buff_u );
  FLA_free( buff_up );
  FLA_free( buff_v );
  FLA_free( buff_d );
  FLA_free( buff_e );

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_step_opc_var1 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_ccopyv(), bl1_cgemv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opc_var1(), FLA_Apply_H2_UT_r_opc_var1(), FLA_free(), FLA_Househ2_UT_l_opc(), FLA_Househ2_UT_r_opc(), FLA_malloc(), FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_opt_var1().

{
  scomplex* buff_1  = FLA_COMPLEX_PTR( FLA_ONE );
  scomplex* buff_0  = FLA_COMPLEX_PTR( FLA_ZERO );

  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_TS;

  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
  scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  int       inc_v  = 1;

  for ( i = 0; i < b_alg; ++i )
  {
    scomplex* a10t     = buff_A + (0  )*cs_A + (i  )*rs_A;
    scomplex* A20      = buff_A + (0  )*cs_A + (i+1)*rs_A;
    scomplex* alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
    scomplex* a21      = buff_A + (i  )*cs_A + (i+1)*rs_A;
    scomplex* A02      = buff_A + (i+1)*cs_A + (0  )*rs_A;
    scomplex* a12t     = buff_A + (i+1)*cs_A + (i  )*rs_A;
    scomplex* A22      = buff_A + (i+1)*cs_A + (i+1)*rs_A;

    scomplex* t01      = buff_T + (i  )*cs_T + (0  )*rs_T;
    scomplex* tau11    = buff_T + (i  )*cs_T + (i  )*rs_T;

    scomplex* s01      = buff_S + (i  )*cs_S + (0  )*rs_S;
    scomplex* sigma11  = buff_S + (i  )*cs_S + (i  )*rs_S;

    scomplex* v21      = buff_v + (i+1)*inc_v;

    scomplex* a12t_l   = a12t   + (0  )*cs_A + (0  )*rs_A;
    scomplex* a12t_r   = a12t   + (1  )*cs_A + (0  )*rs_A;

    scomplex* A22_l    = A22    + (0  )*cs_A + (0  )*rs_A;
    scomplex* A22_r    = A22    + (1  )*cs_A + (0  )*rs_A;

    scomplex* v21_t    = v21    + (0  )*inc_v;
    scomplex* v21_b    = v21    + (1  )*inc_v;

    int       m_ahead  = m_A - i - 1;
    int       n_ahead  = n_A - i - 1;
    int       m_behind = i;
    int       n_behind = i;

    /*------------------------------------------------------------*/

    // FLA_Househ2_UT( FLA_LEFT,
    //                 alpha11,
    //                 a21, tau11 );
    FLA_Househ2_UT_l_opc( m_ahead,
                          alpha11,
                          a21, rs_A,
                          tau11 );

    if ( n_ahead > 0 )
    {
      // FLA_Apply_H2_UT( FLA_LEFT, tau11, a21, a12t, A22 );
      FLA_Apply_H2_UT_l_opc_var1( m_ahead,
                                  n_ahead,
                                  tau11,
                                  a21,  rs_A,
                                  a12t, cs_A,
                                  A22,  rs_A, cs_A );

      // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
      FLA_Househ2_UT_r_opc( n_ahead - 1,
                            a12t_l,
                            a12t_r, cs_A,
                            sigma11 );

      // FLA_Set( FLA_ONE, v21_t );
      // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
      *v21_t = *buff_1;
      bl1_ccopyv( BLIS1_NO_CONJUGATE,
                  n_ahead - 1,
                  a12t_r, cs_A,
                  v21_b,  inc_v );

      // FLA_Apply_H2_UT( FLA_RIGHT, sigma11, v21_b, A22_l, A22_r );
      FLA_Apply_H2_UT_r_opc_var1( m_ahead,
                                  n_ahead - 1,
                                  sigma11,
                                  v21_b, inc_v,
                                  A22_l, rs_A,
                                  A22_r, rs_A, cs_A );

      // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
      bl1_cgemv( BLIS1_CONJ_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_1,
                 A02, rs_A, cs_A,
                 v21, inc_v,
                 buff_0,
                 s01, rs_S );
    }

    // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
    // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
    bl1_ccopyv( BLIS1_CONJUGATE,
                n_behind,
                a10t, cs_A,
                t01,  rs_T );
    bl1_cgemv( BLIS1_CONJ_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_ahead,
               n_behind,
               buff_1,
               A20, rs_A, cs_A,
               a21, rs_A,
               buff_1,
               t01, rs_T );

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &v );
  FLA_free( buff_v );

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_step_opc_var2 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_Househ2_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_opt_var2().

{
  scomplex* buff_1  = FLA_COMPLEX_PTR( FLA_ONE );
  scomplex* buff_0  = FLA_COMPLEX_PTR( FLA_ZERO );
  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );

  scomplex  beta;
  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_TS;

  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
  scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  scomplex* buff_y = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  int       inc_v  = 1;
  int       inc_y  = 1;
  int       inc_z  = 1;

  for ( i = 0; i < b_alg; ++i )
  {
    scomplex* a10t     = buff_A + (0  )*cs_A + (i  )*rs_A;
    scomplex* A20      = buff_A + (0  )*cs_A + (i+1)*rs_A;
    scomplex* alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
    scomplex* a21      = buff_A + (i  )*cs_A + (i+1)*rs_A;
    scomplex* A02      = buff_A + (i+1)*cs_A + (0  )*rs_A;
    scomplex* a12t     = buff_A + (i+1)*cs_A + (i  )*rs_A;
    scomplex* A22      = buff_A + (i+1)*cs_A + (i+1)*rs_A;

    scomplex* t01      = buff_T + (i  )*cs_T + (0  )*rs_T;
    scomplex* tau11    = buff_T + (i  )*cs_T + (i  )*rs_T;

    scomplex* s01      = buff_S + (i  )*cs_S + (0  )*rs_S;
    scomplex* sigma11  = buff_S + (i  )*cs_S + (i  )*rs_S;

    scomplex* v21      = buff_v + (i+1)*inc_v;

    scomplex* y21      = buff_y + (i+1)*inc_y;

    scomplex* z21      = buff_z + (i+1)*inc_z;

    scomplex* a12t_l   = a12t   + (0  )*cs_A + (0  )*rs_A;
    scomplex* a12t_r   = a12t   + (1  )*cs_A + (0  )*rs_A;

    scomplex* v21_t    = v21    + (0  )*inc_v;
    scomplex* v21_b    = v21    + (1  )*inc_v;

    int       m_ahead  = m_A - i - 1;
    int       n_ahead  = n_A - i - 1;
    int       m_behind = i;
    int       n_behind = i;

    /*------------------------------------------------------------*/

    // FLA_Househ2_UT( FLA_LEFT,
    //                 alpha11,
    //                 a21, tau11 );
    FLA_Househ2_UT_l_opc( m_ahead,
                          alpha11,
                          a21, rs_A,
                          tau11 );

    if ( n_ahead > 0 )
    {
      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
      // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
      bl1_ccopyv( BLIS1_CONJUGATE,
                  n_ahead,
                  a12t, cs_A,
                  y21,  inc_y );
      bl1_cgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22, rs_A, cs_A,
                 a21, rs_A,
                 buff_1,
                 y21, inc_y );

      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
      bl1_cinvscalv( BLIS1_NO_CONJUGATE,
                     n_ahead,
                     tau11,
                     y21, inc_y );

      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
      bl1_caxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  buff_m1,
                  y21,  inc_y,
                  a12t, cs_A );

      // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
      FLA_Househ2_UT_r_opc( n_ahead - 1,
                            a12t_l,
                            a12t_r, cs_A,
                            sigma11 );

      // FLA_Set( FLA_ONE, v21_t );
      // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
      *v21_t = *buff_1;
      bl1_ccopyv( BLIS1_NO_CONJUGATE,
                  n_ahead - 1,
                  a12t_r, cs_A,
                  v21_b,  inc_y );

      // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
      // FLA_Scal( FLA_MINUS_ONE, beta );
      bl1_cdot( BLIS1_CONJUGATE,
                n_ahead,
                y21, inc_y,
                v21, inc_v,
                &beta );
      bl1_cneg1( &beta );

      // FLA_Copy( a21, z21 );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
      bl1_ccopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  a21, rs_A,
                  z21, inc_z );
      bl1_cgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22, rs_A, cs_A,
                 v21, inc_v,
                 &beta,
                 z21, inc_z );
      bl1_cinvscalv( BLIS1_NO_CONJUGATE,
                     m_ahead,
                     sigma11,
                     z21, inc_z );

      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y21, A22 );
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
      bl1_cger( BLIS1_NO_CONJUGATE,
                BLIS1_CONJUGATE,
                m_ahead,
                n_ahead,
                buff_m1,
                a21, rs_A,
                y21, inc_y,
                A22, rs_A, cs_A );
      bl1_cger( BLIS1_NO_CONJUGATE,
                BLIS1_CONJUGATE,
                m_ahead,
                n_ahead,
                buff_m1,
                z21, inc_z,
                v21, inc_v,
                A22, rs_A, cs_A );

      // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
      bl1_cgemv( BLIS1_CONJ_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_1,
                 A02, rs_A, cs_A,
                 v21, inc_v,
                 buff_0,
                 s01, rs_S );
    }

    // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
    // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
    bl1_ccopyv( BLIS1_CONJUGATE,
                n_behind,
                a10t, cs_A,
                t01,  rs_T );
    bl1_cgemv( BLIS1_CONJ_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_ahead,
               n_behind,
               buff_1,
               A20, rs_A, cs_A,
               a21, rs_A,
               buff_1,
               t01, rs_T );

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &v );
  // FLA_Obj_free( &y );
  // FLA_Obj_free( &z );
  FLA_free( buff_v );
  FLA_free( buff_y );
  FLA_free( buff_z );

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_step_opc_var3 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_caxpyv(), bl1_cconjv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_Househ2s_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_opt_var3().

{
  scomplex* buff_1  = FLA_COMPLEX_PTR( FLA_ONE );
  scomplex* buff_0  = FLA_COMPLEX_PTR( FLA_ZERO );
  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );

  scomplex  alpha12;
  scomplex  minus_conj_alpha12;
  scomplex  psi11_minus_alpha12;
  scomplex  minus_inv_tau11;
  scomplex  minus_upsilon11;
  scomplex  minus_conj_nu11;
  scomplex  minus_conj_psi11;
  scomplex  minus_zeta11;
  scomplex  beta;
  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_TS;

  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
  scomplex* buff_w  = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  scomplex* buff_ap = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  scomplex* buff_u  = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  scomplex* buff_up = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  scomplex* buff_v  = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  scomplex* buff_y  = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  scomplex* buff_z  = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  int       inc_w   = 1;
  int       inc_ap  = 1;
  int       inc_u   = 1;
  int       inc_up  = 1;
  int       inc_v   = 1;
  int       inc_y   = 1;
  int       inc_z   = 1;

  for ( i = 0; i < b_alg; ++i )
  {
    scomplex* a10t      = buff_A  + (0  )*cs_A + (i  )*rs_A;
    scomplex* A20       = buff_A  + (0  )*cs_A + (i+1)*rs_A;
    scomplex* alpha11   = buff_A  + (i  )*cs_A + (i  )*rs_A;
    scomplex* a21       = buff_A  + (i  )*cs_A + (i+1)*rs_A;
    scomplex* A02       = buff_A  + (i+1)*cs_A + (0  )*rs_A;
    scomplex* a12t      = buff_A  + (i+1)*cs_A + (i  )*rs_A;
    scomplex* A22       = buff_A  + (i+1)*cs_A + (i+1)*rs_A;

    scomplex* t01       = buff_T  + (i  )*cs_T + (0  )*rs_T;
    scomplex* tau11     = buff_T  + (i  )*cs_T + (i  )*rs_T;

    scomplex* s01       = buff_S  + (i  )*cs_S + (0  )*rs_S;
    scomplex* sigma11   = buff_S  + (i  )*cs_S + (i  )*rs_S;

    scomplex* w21       = buff_w  + (i+1)*inc_w;

    scomplex* a12p      = buff_ap + (i+1)*inc_ap;

    scomplex* upsilon11 = buff_u  + (i  )*inc_u;
    scomplex* u21       = buff_u  + (i+1)*inc_u;

    scomplex* u21p      = buff_up + (i+1)*inc_up;

    scomplex* nu11      = buff_v  + (i  )*inc_v;
    scomplex* v21       = buff_v  + (i+1)*inc_v;

    scomplex* psi11     = buff_y  + (i  )*inc_y;
    scomplex* y21       = buff_y  + (i+1)*inc_y;

    scomplex* zeta11    = buff_z  + (i  )*inc_z;
    scomplex* z21       = buff_z  + (i+1)*inc_z;

    scomplex* a12p_t    = a12p    + (0  )*inc_ap;
    scomplex* a12p_b    = a12p    + (1  )*inc_ap;

    scomplex* v21_t     = v21     + (0  )*inc_v;
    scomplex* v21_b     = v21     + (1  )*inc_v;

    scomplex* a12t_l    = a12t    + (0  )*cs_A + (0  )*rs_A;
    scomplex* a12t_r    = a12t    + (1  )*cs_A + (0  )*rs_A;

    scomplex* A22_l     = A22     + (0  )*cs_A + (0  )*rs_A;

    int       m_ahead   = m_A - i - 1;
    int       n_ahead   = n_A - i - 1;
    int       m_behind  = i;
    int       n_behind  = i;

    /*------------------------------------------------------------*/

    if ( m_behind > 0 )
    {
      // FLA_Copy( upsilon11, minus_upsilon11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
      bl1_cmult3( buff_m1, upsilon11, &minus_upsilon11 );

      // FLA_Copy( zeta11, minus_zeta11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
      bl1_cmult3( buff_m1, zeta11, &minus_zeta11 );

      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
      bl1_ccopyconj( psi11, &minus_conj_psi11 );
      bl1_cscals( buff_m1, &minus_conj_psi11 );

      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
      bl1_ccopyconj( nu11, &minus_conj_nu11 );
      bl1_cscals( buff_m1, &minus_conj_nu11 );

      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11,  zeta11,    alpha11 );
      bl1_caxpyv( BLIS1_NO_CONJUGATE,
                  1,
                  &minus_conj_psi11,
                  upsilon11, 1,
                  alpha11,   1 );
      bl1_caxpyv( BLIS1_NO_CONJUGATE,
                  1,
                  &minus_conj_nu11,
                  zeta11,  1,
                  alpha11, 1 );

      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11,  z21, a21 );
      bl1_caxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_psi11,
                  u21, inc_u,
                  a21, rs_A );
      bl1_caxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_nu11,
                  z21, inc_z,
                  a21, rs_A );

      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11,    v21, a12t );
      bl1_caxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  &minus_upsilon11,
                  y21,  inc_y,
                  a12t, cs_A );
      bl1_caxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  &minus_zeta11,
                  v21,  inc_v,
                  a12t, cs_A );
    }

    // FLA_Househ2_UT( FLA_LEFT,
    //                 alpha11,
    //                 a21, tau11 );
    // FLA_Copy( a21, u21p );
    FLA_Househ2_UT_l_opc( m_ahead,
                          alpha11,
                          a21, rs_A,
                          tau11 );
    bl1_ccopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                a21,  rs_A,
                u21p, inc_up );

    if ( n_ahead > 0 )
    {
      // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
      bl1_cdiv3( buff_m1, tau11, &minus_inv_tau11 );

      // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
      // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
      bl1_ccopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12t, cs_A,
                  a12p, inc_ap );
      bl1_caxpyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  &minus_inv_tau11,
                  a12t, cs_A,
                  a12p, inc_ap );
    }

    if ( m_behind > 0 )
    {
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
      bl1_cger( BLIS1_NO_CONJUGATE,
                BLIS1_CONJUGATE,
                m_ahead,
                n_ahead,
                buff_m1,
                u21, inc_u,
                y21, inc_y,
                A22, rs_A, cs_A );
      bl1_cger( BLIS1_NO_CONJUGATE,
                BLIS1_CONJUGATE,
                m_ahead,
                n_ahead,
                buff_m1,
                z21, inc_z,
                v21, inc_v,
                A22, rs_A, cs_A );
    }

    if ( n_ahead > 0 )
    {
      // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
      bl1_cgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22,  rs_A, cs_A,
                 u21p, inc_up,
                 buff_0,
                 y21,  inc_y );

      // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
      bl1_caxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  &minus_inv_tau11,
                  y21,  inc_y,
                  a12p, inc_ap );

      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
      bl1_cgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22,  rs_A, cs_A,
                 a12p, inc_ap,
                 buff_0,
                 w21,  inc_w );

      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
      bl1_caxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  buff_1,
                  a12t, cs_A,
                  y21,  inc_y );

      // FLA_Househ2s_UT( FLA_RIGHT,
      //                  a12p_t,
      //                  a12p_b,
      //                  alpha12, psi11_minus_alpha12, sigma11 );
      FLA_Househ2s_UT_r_opc( n_ahead - 1,
                             a12p_t,
                             a12p_b, inc_ap,
                             &alpha12,
                             &psi11_minus_alpha12,
                             sigma11 );

      // FLA_Copy( a12p, v21 );
      // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
      // FLA_Conjugate( v21_b );
      bl1_ccopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12p, inc_ap,
                  v21,  inc_v );
      bl1_cmult4( buff_m1, &alpha12, v21_t, v21_t );
      bl1_cinvscalv( BLIS1_NO_CONJUGATE,
                     n_ahead,
                     &psi11_minus_alpha12,
                     v21, inc_v );
      bl1_cconjv( n_ahead - 1,
                  v21_b, inc_v );

      // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
      // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
      *a12t_l = alpha12;
      bl1_ccopyv( BLIS1_NO_CONJUGATE,
                  n_ahead - 1,
                  v21_b,  inc_v,
                  a12t_r, cs_A );
    }

    // FLA_Copy( u21p, u21 );
    bl1_ccopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                u21p, inc_up,
                u21,  inc_u );

    if ( n_ahead > 0 )
    {
      // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
      // FLA_Scal( FLA_MINUS_ONE, beta );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
      bl1_cdot( BLIS1_CONJUGATE,
                n_ahead,
                y21, inc_y,
                v21, inc_v,
                &beta );
      bl1_cscals( &minus_inv_tau11, &beta );

      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
      // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
      bl1_ccopyconj( &alpha12, &minus_conj_alpha12 );
      bl1_cneg1( &minus_conj_alpha12 );

      // FLA_Copy( w21, z21 );
      // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
      // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
      // FLA_Axpy( beta, u21, z21 );
      bl1_ccopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  w21, inc_w,
                  z21, inc_z );
      bl1_caxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_alpha12,
                  A22_l, rs_A,
                  z21,   inc_z );
      bl1_cinvscalv( BLIS1_CONJUGATE,
                     m_ahead,
                     &psi11_minus_alpha12,
                     z21, inc_z );
      bl1_caxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &beta,
                  u21, inc_u,
                  z21, inc_z );

      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11,   y21 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
      bl1_cinvscalv( BLIS1_CONJUGATE,
                     n_ahead,
                     tau11,
                     y21, inc_y );
      bl1_cinvscalv( BLIS1_NO_CONJUGATE,
                     m_ahead,
                     sigma11,
                     z21, inc_z );

      // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
      bl1_cgemv( BLIS1_CONJ_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_1,
                 A02, rs_A, cs_A,
                 v21, inc_v,
                 buff_0,
                 s01, rs_S );
    }

    // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
    // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
    bl1_ccopyv( BLIS1_CONJUGATE,
                n_behind,
                a10t, cs_A,
                t01,  rs_T );
    bl1_cgemv( BLIS1_CONJ_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_ahead,
               n_behind,
               buff_1,
               A20, rs_A, cs_A,
               u21, inc_u,
               buff_1,
               t01, rs_T );

    if ( m_behind + 1 == b_alg && n_ahead > 0 )
    {
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
      bl1_cger( BLIS1_NO_CONJUGATE,
                BLIS1_CONJUGATE,
                m_ahead,
                n_ahead,
                buff_m1,
                u21, inc_u,
                y21, inc_y,
                A22, rs_A, cs_A );
      bl1_cger( BLIS1_NO_CONJUGATE,
                BLIS1_CONJUGATE,
                m_ahead,
                n_ahead,
                buff_m1,
                z21, inc_z,
                v21, inc_v,
                A22, rs_A, cs_A );
    }

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &w );
  // FLA_Obj_free( &ap );
  // FLA_Obj_free( &u );
  // FLA_Obj_free( &up );
  // FLA_Obj_free( &v );
  // FLA_Obj_free( &y );
  // FLA_Obj_free( &z );
  FLA_free( buff_w );
  FLA_free( buff_ap );
  FLA_free( buff_u );
  FLA_free( buff_up );
  FLA_free( buff_v );
  FLA_free( buff_y );
  FLA_free( buff_z );

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_step_opc_var4 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_Y,
int  rs_Y,
int  cs_Y,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_caxpyv(), bl1_cconjv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cinvscalv(), bl1_csetm(), bl1_csetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_Househ2s_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_opt_var4().

{
  scomplex* buff_1  = FLA_COMPLEX_PTR( FLA_ONE );
  scomplex* buff_0  = FLA_COMPLEX_PTR( FLA_ZERO );
  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );

  scomplex  alpha12;
  scomplex  minus_conj_alpha12;
  scomplex  psi11_minus_alpha12;
  scomplex  minus_inv_tau11;
  scomplex  beta;
  scomplex  last_elem;
  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_TS;

  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
  scomplex* buff_w  = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  scomplex* buff_al = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  scomplex* buff_ap = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  scomplex* buff_u  = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  scomplex* buff_up = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  scomplex* buff_v  = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  scomplex* buff_d  = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  scomplex* buff_e  = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  scomplex* buff_f  = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  scomplex* buff_g  = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  int       inc_w   = 1;
  int       inc_al  = 1;
  int       inc_ap  = 1;
  int       inc_u   = 1;
  int       inc_up  = 1;
  int       inc_v   = 1;
  int       inc_d   = 1;
  int       inc_e   = 1;
  int       inc_f   = 1;
  int       inc_g   = 1;

  // FLA_Set( FLA_ZERO, Y );
  // FLA_Set( FLA_ZERO, Z );
  bl1_csetm( n_A,
             b_alg,
             buff_0,
             buff_Y, rs_Y, cs_Y );
  bl1_csetm( m_A,
             b_alg,
             buff_0,
             buff_Z, rs_Z, cs_Z );

  for ( i = 0; i < b_alg; ++i )
  {
    scomplex* a10t      = buff_A  + (0  )*cs_A + (i  )*rs_A;
    scomplex* A20       = buff_A  + (0  )*cs_A + (i+1)*rs_A;
    scomplex* a01       = buff_A  + (i  )*cs_A + (0  )*rs_A;
    scomplex* alpha11   = buff_A  + (i  )*cs_A + (i  )*rs_A;
    scomplex* a21       = buff_A  + (i  )*cs_A + (i+1)*rs_A;
    scomplex* A02       = buff_A  + (i+1)*cs_A + (0  )*rs_A;
    scomplex* a12t      = buff_A  + (i+1)*cs_A + (i  )*rs_A;
    scomplex* A22       = buff_A  + (i+1)*cs_A + (i+1)*rs_A;

    scomplex* y10t      = buff_Y  + (0  )*cs_Y + (i  )*rs_Y;
    scomplex* Y20       = buff_Y  + (0  )*cs_Y + (i+1)*rs_Y;
    scomplex* y21       = buff_Y  + (i  )*cs_Y + (i+1)*rs_Y;

    scomplex* z10t      = buff_Z  + (0  )*cs_Z + (i  )*rs_Z;
    scomplex* Z20       = buff_Z  + (0  )*cs_Z + (i+1)*rs_Z;
    scomplex* z21       = buff_Z  + (i  )*cs_Z + (i+1)*rs_Z;

    scomplex* t01       = buff_T  + (i  )*cs_T + (0  )*rs_T;
    scomplex* tau11     = buff_T  + (i  )*cs_T + (i  )*rs_T;

    scomplex* s01       = buff_S  + (i  )*cs_S + (0  )*rs_S;
    scomplex* sigma11   = buff_S  + (i  )*cs_S + (i  )*rs_S;

    scomplex* w21       = buff_w  + (i+1)*inc_w;

    scomplex* a22l      = buff_al + (i+1)*inc_al;

    scomplex* a12p      = buff_ap + (i+1)*inc_ap;

    scomplex* u21       = buff_u  + (i+1)*inc_u;

    scomplex* u21p      = buff_up + (i+1)*inc_up;

    scomplex* v21       = buff_v  + (i+1)*inc_v;

    scomplex* d0        = buff_d  + (0  )*inc_d;

    scomplex* e0        = buff_e  + (0  )*inc_e;

    scomplex* f0        = buff_f  + (0  )*inc_f;

    scomplex* g0        = buff_g  + (0  )*inc_g;

    scomplex* a12p_t    = a12p    + (0  )*inc_ap;
    scomplex* a12p_b    = a12p    + (1  )*inc_ap;

    scomplex* v21_t     = v21     + (0  )*inc_v;
    scomplex* v21_b     = v21     + (1  )*inc_v;

    scomplex* a01_b     = a01     + (0  )*cs_A + (i-1)*rs_A;

    scomplex* a12t_l    = a12t    + (0  )*cs_A + (0  )*rs_A;
    scomplex* a12t_r    = a12t    + (1  )*cs_A + (0  )*rs_A;

    scomplex* A02_l     = A02     + (0  )*cs_A + (0  )*rs_A;

    scomplex* A22_l     = A22     + (0  )*cs_A + (0  )*rs_A;

    scomplex* Y20_t     = Y20     + (0  )*cs_Y + (0  )*rs_Y;

    scomplex* ABL       = a10t;
    scomplex* ZBL       = z10t;

    scomplex* a2        = alpha11;

    int       m_ahead   = m_A - i - 1;
    int       n_ahead   = n_A - i - 1;
    int       m_behind  = i;
    int       n_behind  = i;

    /*------------------------------------------------------------*/

    if ( m_behind > 0 )
    {
      // FLA_Copy( a01_b, last_elem );
      // FLA_Set( FLA_ONE, a01_b );
      last_elem = *a01_b;
      *a01_b = *buff_1;
    }

    // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
    // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01,  FLA_ONE, a2 );
    bl1_cgemv( BLIS1_NO_TRANSPOSE,
               BLIS1_CONJUGATE,
               m_ahead + 1,
               n_behind,
               buff_m1,
               ABL,  rs_A, cs_A,
               y10t, cs_Y,
               buff_1,
               a2,   rs_A );
    bl1_cgemv( BLIS1_NO_TRANSPOSE,
               BLIS1_CONJUGATE,
               m_ahead + 1,
               n_behind,
               buff_m1,
               ZBL, rs_Z, cs_Z,
               a01, rs_A,
               buff_1,
               a2,  rs_A );

    // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
    // FLA_Gemv( FLA_CONJ_TRANSPOSE,    FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
    bl1_cgemv( BLIS1_CONJ_NO_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               n_ahead,
               n_behind,
               buff_m1,
               Y20,  rs_Y, cs_Y,
               a10t, cs_A,
               buff_1,
               a12t, cs_A );
    bl1_cgemv( BLIS1_CONJ_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_behind,
               n_ahead,
               buff_m1,
               A02,  rs_A, cs_A,
               z10t, cs_Z,
               buff_1,
               a12t, cs_A );

    if ( m_behind > 0 )
    {
      // FLA_Copy( last_elem, a01_b );
      *a01_b = last_elem;
    }

    // FLA_Househ2_UT( FLA_LEFT,
    //                 alpha11,
    //                 a21, tau11 );
    // FLA_Copy( a21, u21p );
    FLA_Househ2_UT_l_opc( m_ahead,
                          alpha11,
                          a21, rs_A,
                          tau11 );
    bl1_ccopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                a21,  rs_A,
                u21p, inc_up );

    if ( n_ahead > 0 )
    {
      // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
      bl1_cdiv3( buff_m1, tau11, &minus_inv_tau11 );

      // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
      // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
      bl1_ccopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12t, cs_A,
                  a12p, inc_ap );
      bl1_caxpyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  &minus_inv_tau11,
                  a12t, cs_A,
                  a12p, inc_ap );

      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
      bl1_cgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 A20,  rs_A, cs_A,
                 u21p, inc_up,
                 buff_0,
                 d0,   inc_d );
      bl1_cgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 Z20,  rs_Z, cs_Z,
                 u21p, inc_up,
                 buff_0,
                 e0,   inc_e );

      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
      // FLA_Axpy( FLA_ONE, d0, t01 );
      bl1_ccopyv( BLIS1_CONJUGATE,
                  n_behind,
                  a10t, cs_A,
                  t01,  rs_T );
      bl1_caxpyv( BLIS1_NO_CONJUGATE,
                  n_behind,
                  buff_1,
                  d0,  inc_d,
                  t01, rs_T );

      // FLA_Set( FLA_ZERO, y21 );
      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
      // FLA_Gemv( FLA_TRANSPOSE,    FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
      bl1_csetv( n_ahead,
                 buff_0,
                 y21, rs_Y );
      bl1_cgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 n_ahead,
                 n_behind,
                 buff_m1,
                 Y20, rs_Y, cs_Y,
                 d0,  inc_d,
                 buff_1,
                 y21, rs_Y );
      bl1_cgemv( BLIS1_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_m1,
                 A02, rs_A, cs_A,
                 e0,  inc_e,
                 buff_1,
                 y21, rs_Y );

      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
      bl1_cgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22,  rs_A, cs_A,
                 u21p, inc_up,
                 buff_1,
                 y21,  rs_Y );

      // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
      bl1_caxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  &minus_inv_tau11,
                  y21,  rs_Y,
                  a12p, inc_ap );

      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
      bl1_cgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22,  rs_A, cs_A,
                 a12p, inc_ap,
                 buff_0,
                 w21,  inc_w );

      // FLA_Gemvc( FLA_CONJ_TRANSPOSE,    FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
      // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
      bl1_cgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_CONJUGATE,
                 n_ahead,
                 n_behind,
                 buff_1,
                 Y20,  rs_Y, cs_Y,
                 a12p, inc_ap,
                 buff_0,
                 f0,   inc_f );
      bl1_cgemv( BLIS1_CONJ_NO_TRANSPOSE,
                 BLIS1_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_1,
                 A02,  rs_A, cs_A,
                 a12p, inc_ap,
                 buff_0,
                 g0,   inc_g );

      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
      bl1_cgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_m1,
                 A20, rs_A, cs_A,
                 f0,  inc_f,
                 buff_1,
                 w21, inc_w );
      bl1_cgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_m1,
                 Z20, rs_Z, cs_Z,
                 g0,  inc_g,
                 buff_1,
                 w21, inc_w );

      // FLA_Copy( A22_l, a22l );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
      bl1_ccopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  A22_l, rs_A,
                  a22l,  inc_al );
      bl1_cgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_m1,
                 A20,   rs_A, cs_A,
                 Y20_t, cs_Y,
                 buff_1,
                 a22l,  inc_al );
      bl1_cgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_m1,
                 Z20,   rs_Z, cs_Z,
                 A02_l, rs_A,
                 buff_1,
                 a22l,  inc_al );

      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
      bl1_caxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  buff_1, 
                  a12t, cs_A,
                  y21,  rs_Y );

      // FLA_Househ2s_UT( FLA_RIGHT,
      //                  a12p_t,
      //                  a12p_b,
      //                  alpha12, psi11_minus_alpha12, sigma11 );
      FLA_Househ2s_UT_r_opc( n_ahead - 1,
                             a12p_t,
                             a12p_b, inc_ap,
                             &alpha12,
                             &psi11_minus_alpha12,
                             sigma11 );

      // FLA_Copy( a12p, v21 );
      // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
      // FLA_Conjugate( v21_b );
      bl1_ccopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12p, inc_ap,
                  v21,  inc_v );
      bl1_cmult4( buff_m1, &alpha12, v21_t, v21_t );
      bl1_cinvscalv( BLIS1_NO_CONJUGATE,
                     n_ahead,
                     &psi11_minus_alpha12,
                     v21, inc_v );
      bl1_cconjv( n_ahead - 1,
                  v21_b, inc_v );

      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
      // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
      bl1_ccopyconj( &alpha12, &minus_conj_alpha12 );
      bl1_cneg1( &minus_conj_alpha12 );

      // FLA_Copy( g0, s01 );
      // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
      // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
      bl1_ccopyv( BLIS1_NO_CONJUGATE,
                  n_behind,
                  g0,  inc_g,
                  s01, rs_S );
      bl1_caxpyv( BLIS1_CONJUGATE,
                  n_behind,
                  &minus_conj_alpha12,
                  A02_l, rs_A,
                  s01,   rs_S );
      bl1_cinvscalv( BLIS1_CONJUGATE,
                     n_behind,
                     &psi11_minus_alpha12,
                     s01, rs_S );

      // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
      // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
      *a12t_l = alpha12;
      bl1_ccopyv( BLIS1_NO_CONJUGATE,
                  n_ahead - 1,
                  v21_b,  inc_v,
                  a12t_r, cs_A );
    }

    // FLA_Copy( u21p, u21 );
    bl1_ccopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                u21p, inc_up,
                u21,  inc_u );

    if ( n_ahead > 0 )
    {
      // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
      // FLA_Scal( FLA_MINUS_ONE, beta );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
      bl1_cdot( BLIS1_CONJUGATE,
                n_ahead,
                y21, rs_Y,
                v21, inc_v,
                &beta );
      bl1_cscals( &minus_inv_tau11, &beta );

      // FLA_Copy( w21, z21 );
      // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
      // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
      // FLA_Axpy( beta, u21, z21 );
      bl1_ccopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  w21, inc_w,
                  z21, rs_Z );
      bl1_caxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_alpha12,
                  a22l, inc_al,
                  z21,  rs_Z );
      bl1_cinvscalv( BLIS1_CONJUGATE,
                     m_ahead,
                     &psi11_minus_alpha12,
                     z21, rs_Z );
      bl1_caxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &beta,
                  u21, inc_u,
                  z21, rs_Z );

      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11,   y21 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
      bl1_cinvscalv( BLIS1_CONJUGATE,
                     n_ahead,
                     tau11,
                     y21, rs_Y );
      bl1_cinvscalv( BLIS1_NO_CONJUGATE,
                     m_ahead,
                     sigma11,
                     z21, rs_Z );
    }
    else // if ( n_ahead == 0 )
    {
      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
      bl1_ccopyv( BLIS1_CONJUGATE,
                  n_behind,
                  a10t, cs_A,
                  t01,  rs_T );
      bl1_cgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 A20, rs_A, cs_A,
                 u21, inc_u,
                 buff_1,
                 t01, rs_T );
    }

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &w );
  // FLA_Obj_free( &al );
  // FLA_Obj_free( &ap );
  // FLA_Obj_free( &u );
  // FLA_Obj_free( &up );
  // FLA_Obj_free( &v );
  // FLA_Obj_free( &d );
  // FLA_Obj_free( &e );
  // FLA_Obj_free( &f );
  // FLA_Obj_free( &g );
  FLA_free( buff_w );
  FLA_free( buff_al );
  FLA_free( buff_ap );
  FLA_free( buff_u );
  FLA_free( buff_up );
  FLA_free( buff_v );
  FLA_free( buff_d );
  FLA_free( buff_e );
  FLA_free( buff_f );
  FLA_free( buff_g );

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_step_opc_var5 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_Y,
int  rs_Y,
int  cs_Y,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cinvscalv(), bl1_csetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_Househ2_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_opt_var5().

{
  scomplex* buff_1  = FLA_COMPLEX_PTR( FLA_ONE );
  scomplex* buff_0  = FLA_COMPLEX_PTR( FLA_ZERO );
  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );

  scomplex  beta;
  scomplex  last_elem;
  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_TS;

  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
  scomplex* buff_u  = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  scomplex* buff_v  = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  scomplex* buff_d  = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  scomplex* buff_e  = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  scomplex* buff_f  = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  scomplex* buff_g  = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  int       inc_u   = 1;
  int       inc_v   = 1;
  int       inc_d   = 1;
  int       inc_e   = 1;
  int       inc_f   = 1;
  int       inc_g   = 1;

  // FLA_Set( FLA_ZERO, Y );
  // FLA_Set( FLA_ZERO, Z );
  bl1_csetm( n_A,
             b_alg,
             buff_0,
             buff_Y, rs_Y, cs_Y );
  bl1_csetm( m_A,
             b_alg,
             buff_0,
             buff_Z, rs_Z, cs_Z );

  for ( i = 0; i < b_alg; ++i )
  {
    scomplex* a10t      = buff_A  + (0  )*cs_A + (i  )*rs_A;
    scomplex* A20       = buff_A  + (0  )*cs_A + (i+1)*rs_A;
    scomplex* a01       = buff_A  + (i  )*cs_A + (0  )*rs_A;
    scomplex* alpha11   = buff_A  + (i  )*cs_A + (i  )*rs_A;
    scomplex* a21       = buff_A  + (i  )*cs_A + (i+1)*rs_A;
    scomplex* A02       = buff_A  + (i+1)*cs_A + (0  )*rs_A;
    scomplex* a12t      = buff_A  + (i+1)*cs_A + (i  )*rs_A;
    scomplex* A22       = buff_A  + (i+1)*cs_A + (i+1)*rs_A;

    scomplex* y10t      = buff_Y  + (0  )*cs_Y + (i  )*rs_Y;
    scomplex* Y20       = buff_Y  + (0  )*cs_Y + (i+1)*rs_Y;
    scomplex* y21       = buff_Y  + (i  )*cs_Y + (i+1)*rs_Y;

    scomplex* z10t      = buff_Z  + (0  )*cs_Z + (i  )*rs_Z;
    scomplex* Z20       = buff_Z  + (0  )*cs_Z + (i+1)*rs_Z;
    scomplex* z21       = buff_Z  + (i  )*cs_Z + (i+1)*rs_Z;

    scomplex* t01       = buff_T  + (i  )*cs_T + (0  )*rs_T;
    scomplex* tau11     = buff_T  + (i  )*cs_T + (i  )*rs_T;

    scomplex* s01       = buff_S  + (i  )*cs_S + (0  )*rs_S;
    scomplex* sigma11   = buff_S  + (i  )*cs_S + (i  )*rs_S;

    scomplex* u21       = buff_u  + (i+1)*inc_u;

    scomplex* v21       = buff_v  + (i+1)*inc_v;

    scomplex* d0        = buff_d  + (0  )*inc_d;

    scomplex* e0        = buff_e  + (0  )*inc_e;

    scomplex* f0        = buff_f  + (0  )*inc_f;

    scomplex* g0        = buff_g  + (0  )*inc_g;

    scomplex* v21_t     = v21     + (0  )*inc_v;
    scomplex* v21_b     = v21     + (1  )*inc_v;

    scomplex* a01_b     = a01     + (0  )*cs_A + (i-1)*rs_A;

    scomplex* a12t_l    = a12t    + (0  )*cs_A + (0  )*rs_A;
    scomplex* a12t_r    = a12t    + (1  )*cs_A + (0  )*rs_A;

    scomplex* ABL       = a10t;
    scomplex* ZBL       = z10t;

    scomplex* a2        = alpha11;

    int       m_ahead   = m_A - i - 1;
    int       n_ahead   = n_A - i - 1;
    int       m_behind  = i;
    int       n_behind  = i;

    /*------------------------------------------------------------*/

    if ( m_behind > 0 )
    {
      // FLA_Copy( a01_b, last_elem );
      // FLA_Set( FLA_ONE, a01_b );
      last_elem = *a01_b;
      *a01_b = *buff_1;
    }

    // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
    // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01,  FLA_ONE, a2 );
    bl1_cgemv( BLIS1_NO_TRANSPOSE,
               BLIS1_CONJUGATE,
               m_ahead + 1,
               n_behind,
               buff_m1,
               ABL,  rs_A, cs_A,
               y10t, cs_Y,
               buff_1,
               a2,   rs_A );
    bl1_cgemv( BLIS1_NO_TRANSPOSE,
               BLIS1_CONJUGATE,
               m_ahead + 1,
               n_behind,
               buff_m1,
               ZBL, rs_Z, cs_Z,
               a01, rs_A,
               buff_1,
               a2,  rs_A );

    // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
    // FLA_Gemv( FLA_CONJ_TRANSPOSE,    FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
    bl1_cgemv( BLIS1_CONJ_NO_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               n_ahead,
               n_behind,
               buff_m1,
               Y20,  rs_Y, cs_Y,
               a10t, cs_A,
               buff_1,
               a12t, cs_A );
    bl1_cgemv( BLIS1_CONJ_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_behind,
               n_ahead,
               buff_m1,
               A02,  rs_A, cs_A,
               z10t, cs_Z,
               buff_1,
               a12t, cs_A );

    if ( m_behind > 0 )
    {
      // FLA_Copy( last_elem, a01_b );
      *a01_b = last_elem;
    }

    // FLA_Househ2_UT( FLA_LEFT,
    //                 alpha11,
    //                 a21, tau11 );
    // FLA_Copy( a21, u21 );
    FLA_Househ2_UT_l_opc( m_ahead,
                          alpha11,
                          a21, rs_A,
                          tau11 );
    bl1_ccopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                a21, rs_A,
                u21, inc_u );

    if ( n_ahead > 0 )
    {
      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21, FLA_ONE, y21 );
      bl1_ccopyv( BLIS1_CONJUGATE,
                  n_ahead,
                  a12t, cs_A,
                  y21,  rs_Y );
      bl1_cgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22, rs_A, cs_A,
                 u21, inc_u,
                 buff_1,
                 y21, rs_Y );

      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ZERO, d0 );
      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21, FLA_ZERO, e0 );
      bl1_cgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 A20, rs_A, cs_A,
                 u21, inc_u,
                 buff_0,
                 d0,  inc_d );
      bl1_cgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 Z20, rs_Z, cs_Z,
                 u21, inc_u,
                 buff_0,
                 e0,  inc_e );

      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
      // FLA_Axpy( FLA_ONE, d0, t01 );
      bl1_ccopyv( BLIS1_CONJUGATE,
                  n_behind,
                  a10t, cs_A,
                  t01,  rs_T );
      bl1_caxpyv( BLIS1_NO_CONJUGATE,
                  n_behind,
                  buff_1,
                  d0,  inc_d,
                  t01, rs_T );

      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
      // FLA_Gemv( FLA_TRANSPOSE,    FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
      bl1_cgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 n_ahead,
                 n_behind,
                 buff_m1,
                 Y20, rs_Y, cs_Y,
                 d0,  inc_d,
                 buff_1,
                 y21, rs_Y );
      bl1_cgemv( BLIS1_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_m1,
                 A02, rs_A, cs_A,
                 e0,  inc_e,
                 buff_1,
                 y21, rs_Y );

      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
      bl1_cinvscalv( BLIS1_NO_CONJUGATE,
                     n_ahead,
                     tau11,
                     y21, rs_Y );

      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
      bl1_caxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  buff_m1,
                  y21,  rs_Y,
                  a12t, cs_A );

      // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
      FLA_Househ2_UT_r_opc( n_ahead - 1,
                            a12t_l,
                            a12t_r, cs_A,
                            sigma11 );

      // FLA_Set( FLA_ONE, v21_t );
      // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
      *v21_t = *buff_1;
      bl1_ccopyv( BLIS1_NO_CONJUGATE,
                  n_ahead - 1,
                  a12t_r, cs_A,
                  v21_b,  inc_v );

      // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
      // FLA_Scal( FLA_MINUS_ONE, beta );
      bl1_cdot( BLIS1_CONJUGATE,
                n_ahead,
                y21, rs_Y,
                v21, inc_v,
                &beta );
      bl1_cscals( buff_m1, &beta );

      // FLA_Copy( u21, z21 );
      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, v21, beta, z21 );
      bl1_ccopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  u21, inc_u,
                  z21, rs_Z );
      bl1_cgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22, rs_A, cs_A,
                 v21, inc_v,
                 &beta,
                 z21, rs_Z );

      // FLA_Gemvc( FLA_CONJ_TRANSPOSE,    FLA_ONE, Y20, v21, FLA_ZERO, f0 );
      // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, g0 );
      bl1_cgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 n_ahead,
                 m_behind,
                 buff_1,
                 Y20, rs_Y, cs_Y,
                 v21, inc_v,
                 buff_0,
                 f0,  inc_f );
      bl1_cgemv( BLIS1_CONJ_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_1,
                 A02, rs_A, cs_A,
                 v21, inc_v,
                 buff_0,
                 g0,  inc_g );

      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, z21 );
      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, z21 );
      bl1_cgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_m1,
                 A20, rs_A, cs_A,
                 f0,  inc_f,
                 buff_1,
                 z21, rs_Z );
      bl1_cgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_m1,
                 Z20, rs_Z, cs_Z,
                 g0,  inc_g,
                 buff_1,
                 z21, rs_Z );

      // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
      bl1_cinvscalv( BLIS1_NO_CONJUGATE,
                     m_ahead,
                     sigma11,
                     z21, rs_Z );

      // FLA_Copy( g0, s01 );
      bl1_ccopyv( BLIS1_NO_CONJUGATE,
                  n_behind,
                  g0,  inc_g,
                  s01, rs_S );
    }
    else // if ( n_ahead == 0 )
    {
      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
      bl1_ccopyv( BLIS1_CONJUGATE,
                  n_behind,
                  a10t, cs_A,
                  t01,  rs_T );
      bl1_cgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 A20, rs_A, cs_A,
                 u21, inc_u,
                 buff_1,
                 t01, rs_T );
    }

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &u );
  // FLA_Obj_free( &v );
  // FLA_Obj_free( &d );
  // FLA_Obj_free( &e );
  // FLA_Obj_free( &f );
  // FLA_Obj_free( &g );
  FLA_free( buff_u );
  FLA_free( buff_v );
  FLA_free( buff_d );
  FLA_free( buff_e );
  FLA_free( buff_f );
  FLA_free( buff_g );

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_step_opd_var1 ( int  m_A,
int  n_A,
int  m_TS,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_T,
int  rs_T,
int  cs_T,
double *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_dcopyv(), bl1_dgemv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opd_var1(), FLA_Apply_H2_UT_r_opd_var1(), FLA_free(), FLA_Househ2_UT_l_opd(), FLA_Househ2_UT_r_opd(), FLA_malloc(), FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_opt_var1().

{
  double*   buff_1  = FLA_DOUBLE_PTR( FLA_ONE );
  double*   buff_0  = FLA_DOUBLE_PTR( FLA_ZERO );

  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_TS;

  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
  double*   buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  int       inc_v  = 1;

  for ( i = 0; i < b_alg; ++i )
  {
    double*   a10t     = buff_A + (0  )*cs_A + (i  )*rs_A;
    double*   A20      = buff_A + (0  )*cs_A + (i+1)*rs_A;
    double*   alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
    double*   a21      = buff_A + (i  )*cs_A + (i+1)*rs_A;
    double*   A02      = buff_A + (i+1)*cs_A + (0  )*rs_A;
    double*   a12t     = buff_A + (i+1)*cs_A + (i  )*rs_A;
    double*   A22      = buff_A + (i+1)*cs_A + (i+1)*rs_A;

    double*   t01      = buff_T + (i  )*cs_T + (0  )*rs_T;
    double*   tau11    = buff_T + (i  )*cs_T + (i  )*rs_T;

    double*   s01      = buff_S + (i  )*cs_S + (0  )*rs_S;
    double*   sigma11  = buff_S + (i  )*cs_S + (i  )*rs_S;

    double*   v21      = buff_v + (i+1)*inc_v;

    double*   a12t_l   = a12t   + (0  )*cs_A + (0  )*rs_A;
    double*   a12t_r   = a12t   + (1  )*cs_A + (0  )*rs_A;

    double*   A22_l    = A22    + (0  )*cs_A + (0  )*rs_A;
    double*   A22_r    = A22    + (1  )*cs_A + (0  )*rs_A;

    double*   v21_t    = v21    + (0  )*inc_v;
    double*   v21_b    = v21    + (1  )*inc_v;

    int       m_ahead  = m_A - i - 1;
    int       n_ahead  = n_A - i - 1;
    int       m_behind = i;
    int       n_behind = i;

    /*------------------------------------------------------------*/

    // FLA_Househ2_UT( FLA_LEFT,
    //                 alpha11,
    //                 a21, tau11 );
    FLA_Househ2_UT_l_opd( m_ahead,
                          alpha11,
                          a21, rs_A,
                          tau11 );

    if ( n_ahead > 0 )
    {
      // FLA_Apply_H2_UT( FLA_LEFT, tau11, a21, a12t, A22 );
      FLA_Apply_H2_UT_l_opd_var1( m_ahead,
                                  n_ahead,
                                  tau11,
                                  a21,  rs_A,
                                  a12t, cs_A,
                                  A22,  rs_A, cs_A );

      // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
      FLA_Househ2_UT_r_opd( n_ahead - 1,
                            a12t_l,
                            a12t_r, cs_A,
                            sigma11 );

      // FLA_Set( FLA_ONE, v21_t );
      // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
      *v21_t = *buff_1;
      bl1_dcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead - 1,
                  a12t_r, cs_A,
                  v21_b,  inc_v );

      // FLA_Apply_H2_UT( FLA_RIGHT, sigma11, v21_b, A22_l, A22_r );
      FLA_Apply_H2_UT_r_opd_var1( m_ahead,
                                  n_ahead - 1,
                                  sigma11,
                                  v21_b, inc_v,
                                  A22_l, rs_A,
                                  A22_r, rs_A, cs_A );

      // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
      bl1_dgemv( BLIS1_CONJ_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_1,
                 A02, rs_A, cs_A,
                 v21, inc_v,
                 buff_0,
                 s01, rs_S );
    }

    // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
    // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
    bl1_dcopyv( BLIS1_CONJUGATE,
                n_behind,
                a10t, cs_A,
                t01,  rs_T );
    bl1_dgemv( BLIS1_CONJ_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_ahead,
               n_behind,
               buff_1,
               A20, rs_A, cs_A,
               a21, rs_A,
               buff_1,
               t01, rs_T );

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &v );
  FLA_free( buff_v );

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_step_opd_var2 ( int  m_A,
int  n_A,
int  m_TS,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_T,
int  rs_T,
int  cs_T,
double *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_Househ2_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_opt_var2().

{
  double*   buff_1  = FLA_DOUBLE_PTR( FLA_ONE );
  double*   buff_0  = FLA_DOUBLE_PTR( FLA_ZERO );
  double*   buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );

  double    beta;
  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_TS;

  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
  double*   buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  double*   buff_y = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  double*   buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  int       inc_v  = 1;
  int       inc_y  = 1;
  int       inc_z  = 1;

  for ( i = 0; i < b_alg; ++i )
  {
    double*   a10t     = buff_A + (0  )*cs_A + (i  )*rs_A;
    double*   A20      = buff_A + (0  )*cs_A + (i+1)*rs_A;
    double*   alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
    double*   a21      = buff_A + (i  )*cs_A + (i+1)*rs_A;
    double*   A02      = buff_A + (i+1)*cs_A + (0  )*rs_A;
    double*   a12t     = buff_A + (i+1)*cs_A + (i  )*rs_A;
    double*   A22      = buff_A + (i+1)*cs_A + (i+1)*rs_A;

    double*   t01      = buff_T + (i  )*cs_T + (0  )*rs_T;
    double*   tau11    = buff_T + (i  )*cs_T + (i  )*rs_T;

    double*   s01      = buff_S + (i  )*cs_S + (0  )*rs_S;
    double*   sigma11  = buff_S + (i  )*cs_S + (i  )*rs_S;

    double*   v21      = buff_v + (i+1)*inc_v;

    double*   y21      = buff_y + (i+1)*inc_y;

    double*   z21      = buff_z + (i+1)*inc_z;

    double*   a12t_l   = a12t   + (0  )*cs_A + (0  )*rs_A;
    double*   a12t_r   = a12t   + (1  )*cs_A + (0  )*rs_A;

    double*   v21_t    = v21    + (0  )*inc_v;
    double*   v21_b    = v21    + (1  )*inc_v;

    int       m_ahead  = m_A - i - 1;
    int       n_ahead  = n_A - i - 1;
    int       m_behind = i;
    int       n_behind = i;

    /*------------------------------------------------------------*/

    // FLA_Househ2_UT( FLA_LEFT,
    //                 alpha11,
    //                 a21, tau11 );
    FLA_Househ2_UT_l_opd( m_ahead,
                          alpha11,
                          a21, rs_A,
                          tau11 );

    if ( n_ahead > 0 )
    {
      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
      // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
      bl1_dcopyv( BLIS1_CONJUGATE,
                  n_ahead,
                  a12t, cs_A,
                  y21,  inc_y );
      bl1_dgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22, rs_A, cs_A,
                 a21, rs_A,
                 buff_1,
                 y21, inc_y );

      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
      bl1_dinvscalv( BLIS1_NO_CONJUGATE,
                     n_ahead,
                     tau11,
                     y21, inc_y );

      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
      bl1_daxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  buff_m1,
                  y21,  inc_y,
                  a12t, cs_A );

      // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
      FLA_Househ2_UT_r_opd( n_ahead - 1,
                            a12t_l,
                            a12t_r, cs_A,
                            sigma11 );

      // FLA_Set( FLA_ONE, v21_t );
      // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
      *v21_t = *buff_1;
      bl1_dcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead - 1,
                  a12t_r, cs_A,
                  v21_b,  inc_y );

      // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
      // FLA_Scal( FLA_MINUS_ONE, beta );
      bl1_ddot( BLIS1_CONJUGATE,
                n_ahead,
                y21, inc_y,
                v21, inc_v,
                &beta );
      bl1_dneg1( &beta );

      // FLA_Copy( a21, z21 );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
      bl1_dcopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  a21, rs_A,
                  z21, inc_z );
      bl1_dgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22, rs_A, cs_A,
                 v21, inc_v,
                 &beta,
                 z21, inc_z );
      bl1_dinvscalv( BLIS1_NO_CONJUGATE,
                     m_ahead,
                     sigma11,
                     z21, inc_z );

      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y21, A22 );
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
      bl1_dger( BLIS1_NO_CONJUGATE,
                BLIS1_CONJUGATE,
                m_ahead,
                n_ahead,
                buff_m1,
                a21, rs_A,
                y21, inc_y,
                A22, rs_A, cs_A );
      bl1_dger( BLIS1_NO_CONJUGATE,
                BLIS1_CONJUGATE,
                m_ahead,
                n_ahead,
                buff_m1,
                z21, inc_z,
                v21, inc_v,
                A22, rs_A, cs_A );

      // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
      bl1_dgemv( BLIS1_CONJ_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_1,
                 A02, rs_A, cs_A,
                 v21, inc_v,
                 buff_0,
                 s01, rs_S );
    }

    // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
    // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
    bl1_dcopyv( BLIS1_CONJUGATE,
                n_behind,
                a10t, cs_A,
                t01,  rs_T );
    bl1_dgemv( BLIS1_CONJ_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_ahead,
               n_behind,
               buff_1,
               A20, rs_A, cs_A,
               a21, rs_A,
               buff_1,
               t01, rs_T );

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &v );
  // FLA_Obj_free( &y );
  // FLA_Obj_free( &z );
  FLA_free( buff_v );
  FLA_free( buff_y );
  FLA_free( buff_z );

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_step_opd_var3 ( int  m_A,
int  n_A,
int  m_TS,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_T,
int  rs_T,
int  cs_T,
double *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_daxpyv(), bl1_dconjv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_Househ2s_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_opt_var3().

{
  double*   buff_1  = FLA_DOUBLE_PTR( FLA_ONE );
  double*   buff_0  = FLA_DOUBLE_PTR( FLA_ZERO );
  double*   buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );

  double    alpha12;
  double    minus_conj_alpha12;
  double    psi11_minus_alpha12;
  double    minus_inv_tau11;
  double    minus_upsilon11;
  double    minus_conj_nu11;
  double    minus_conj_psi11;
  double    minus_zeta11;
  double    beta;
  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_TS;

  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
  double*   buff_w  = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  double*   buff_ap = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  double*   buff_u  = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  double*   buff_up = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  double*   buff_v  = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  double*   buff_y  = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  double*   buff_z  = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  int       inc_w   = 1;
  int       inc_ap  = 1;
  int       inc_u   = 1;
  int       inc_up  = 1;
  int       inc_v   = 1;
  int       inc_y   = 1;
  int       inc_z   = 1;

  for ( i = 0; i < b_alg; ++i )
  {
    double*   a10t      = buff_A  + (0  )*cs_A + (i  )*rs_A;
    double*   A20       = buff_A  + (0  )*cs_A + (i+1)*rs_A;
    double*   alpha11   = buff_A  + (i  )*cs_A + (i  )*rs_A;
    double*   a21       = buff_A  + (i  )*cs_A + (i+1)*rs_A;
    double*   A02       = buff_A  + (i+1)*cs_A + (0  )*rs_A;
    double*   a12t      = buff_A  + (i+1)*cs_A + (i  )*rs_A;
    double*   A22       = buff_A  + (i+1)*cs_A + (i+1)*rs_A;

    double*   t01       = buff_T  + (i  )*cs_T + (0  )*rs_T;
    double*   tau11     = buff_T  + (i  )*cs_T + (i  )*rs_T;

    double*   s01       = buff_S  + (i  )*cs_S + (0  )*rs_S;
    double*   sigma11   = buff_S  + (i  )*cs_S + (i  )*rs_S;

    double*   w21       = buff_w  + (i+1)*inc_w;

    double*   a12p      = buff_ap + (i+1)*inc_ap;

    double*   upsilon11 = buff_u  + (i  )*inc_u;
    double*   u21       = buff_u  + (i+1)*inc_u;

    double*   u21p      = buff_up + (i+1)*inc_up;

    double*   nu11      = buff_v  + (i  )*inc_v;
    double*   v21       = buff_v  + (i+1)*inc_v;

    double*   psi11     = buff_y  + (i  )*inc_y;
    double*   y21       = buff_y  + (i+1)*inc_y;

    double*   zeta11    = buff_z  + (i  )*inc_z;
    double*   z21       = buff_z  + (i+1)*inc_z;

    double*   a12p_t    = a12p    + (0  )*inc_ap;
    double*   a12p_b    = a12p    + (1  )*inc_ap;

    double*   v21_t     = v21     + (0  )*inc_v;
    double*   v21_b     = v21     + (1  )*inc_v;

    double*   a12t_l    = a12t    + (0  )*cs_A + (0  )*rs_A;
    double*   a12t_r    = a12t    + (1  )*cs_A + (0  )*rs_A;

    double*   A22_l     = A22     + (0  )*cs_A + (0  )*rs_A;

    int       m_ahead   = m_A - i - 1;
    int       n_ahead   = n_A - i - 1;
    int       m_behind  = i;
    int       n_behind  = i;

    /*------------------------------------------------------------*/

    if ( m_behind > 0 )
    {
      // FLA_Copy( upsilon11, minus_upsilon11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
      bl1_dmult3( buff_m1, upsilon11, &minus_upsilon11 );

      // FLA_Copy( zeta11, minus_zeta11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
      bl1_dmult3( buff_m1, zeta11, &minus_zeta11 );

      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
      bl1_dcopyconj( psi11, &minus_conj_psi11 );
      bl1_dscals( buff_m1, &minus_conj_psi11 );

      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
      bl1_dcopyconj( nu11, &minus_conj_nu11 );
      bl1_dscals( buff_m1, &minus_conj_nu11 );

      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11,  zeta11,    alpha11 );
      bl1_daxpyv( BLIS1_NO_CONJUGATE,
                  1,
                  &minus_conj_psi11,
                  upsilon11, 1,
                  alpha11,   1 );
      bl1_daxpyv( BLIS1_NO_CONJUGATE,
                  1,
                  &minus_conj_nu11,
                  zeta11,  1,
                  alpha11, 1 );

      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11,  z21, a21 );
      bl1_daxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_psi11,
                  u21, inc_u,
                  a21, rs_A );
      bl1_daxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_nu11,
                  z21, inc_z,
                  a21, rs_A );

      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11,    v21, a12t );
      bl1_daxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  &minus_upsilon11,
                  y21,  inc_y,
                  a12t, cs_A );
      bl1_daxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  &minus_zeta11,
                  v21,  inc_v,
                  a12t, cs_A );
    }

    // FLA_Househ2_UT( FLA_LEFT,
    //                 alpha11,
    //                 a21, tau11 );
    // FLA_Copy( a21, u21p );
    FLA_Househ2_UT_l_opd( m_ahead,
                          alpha11,
                          a21, rs_A,
                          tau11 );
    bl1_dcopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                a21,  rs_A,
                u21p, inc_up );

    if ( n_ahead > 0 )
    {
      // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
      bl1_ddiv3( buff_m1, tau11, &minus_inv_tau11 );

      // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
      // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
      bl1_dcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12t, cs_A,
                  a12p, inc_ap );
      bl1_daxpyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  &minus_inv_tau11,
                  a12t, cs_A,
                  a12p, inc_ap );
    }

    if ( m_behind > 0 )
    {
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
      bl1_dger( BLIS1_NO_CONJUGATE,
                BLIS1_CONJUGATE,
                m_ahead,
                n_ahead,
                buff_m1,
                u21, inc_u,
                y21, inc_y,
                A22, rs_A, cs_A );
      bl1_dger( BLIS1_NO_CONJUGATE,
                BLIS1_CONJUGATE,
                m_ahead,
                n_ahead,
                buff_m1,
                z21, inc_z,
                v21, inc_v,
                A22, rs_A, cs_A );
    }

    if ( n_ahead > 0 )
    {
      // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
      bl1_dgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22,  rs_A, cs_A,
                 u21p, inc_up,
                 buff_0,
                 y21,  inc_y );

      // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
      bl1_daxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  &minus_inv_tau11,
                  y21,  inc_y,
                  a12p, inc_ap );

      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
      bl1_dgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22,  rs_A, cs_A,
                 a12p, inc_ap,
                 buff_0,
                 w21,  inc_w );

      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
      bl1_daxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  buff_1,
                  a12t, cs_A,
                  y21,  inc_y );

      // FLA_Househ2s_UT( FLA_RIGHT,
      //                  a12p_t,
      //                  a12p_b,
      //                  alpha12, psi11_minus_alpha12, sigma11 );
      FLA_Househ2s_UT_r_opd( n_ahead - 1,
                             a12p_t,
                             a12p_b, inc_ap,
                             &alpha12,
                             &psi11_minus_alpha12,
                             sigma11 );

      // FLA_Copy( a12p, v21 );
      // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
      // FLA_Conjugate( v21_b );
      bl1_dcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12p, inc_ap,
                  v21,  inc_v );
      bl1_dmult4( buff_m1, &alpha12, v21_t, v21_t );
      bl1_dinvscalv( BLIS1_NO_CONJUGATE,
                     n_ahead,
                     &psi11_minus_alpha12,
                     v21, inc_v );
      bl1_dconjv( n_ahead - 1,
                  v21_b, inc_v );

      // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
      // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
      *a12t_l = alpha12;
      bl1_dcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead - 1,
                  v21_b,  inc_v,
                  a12t_r, cs_A );
    }

    // FLA_Copy( u21p, u21 );
    bl1_dcopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                u21p, inc_up,
                u21,  inc_u );

    if ( n_ahead > 0 )
    {
      // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
      // FLA_Scal( FLA_MINUS_ONE, beta );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
      bl1_ddot( BLIS1_CONJUGATE,
                n_ahead,
                y21, inc_y,
                v21, inc_v,
                &beta );
      bl1_dscals( &minus_inv_tau11, &beta );

      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
      // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
      bl1_dcopyconj( &alpha12, &minus_conj_alpha12 );
      bl1_dneg1( &minus_conj_alpha12 );

      // FLA_Copy( w21, z21 );
      // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
      // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
      // FLA_Axpy( beta, u21, z21 );
      bl1_dcopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  w21, inc_w,
                  z21, inc_z );
      bl1_daxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_alpha12,
                  A22_l, rs_A,
                  z21,   inc_z );
      bl1_dinvscalv( BLIS1_CONJUGATE,
                     m_ahead,
                     &psi11_minus_alpha12,
                     z21, inc_z );
      bl1_daxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &beta,
                  u21, inc_u,
                  z21, inc_z );

      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11,   y21 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
      bl1_dinvscalv( BLIS1_CONJUGATE,
                     n_ahead,
                     tau11,
                     y21, inc_y );
      bl1_dinvscalv( BLIS1_NO_CONJUGATE,
                     m_ahead,
                     sigma11,
                     z21, inc_z );

      // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
      bl1_dgemv( BLIS1_CONJ_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_1,
                 A02, rs_A, cs_A,
                 v21, inc_v,
                 buff_0,
                 s01, rs_S );
    }

    // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
    // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
    bl1_dcopyv( BLIS1_CONJUGATE,
                n_behind,
                a10t, cs_A,
                t01,  rs_T );
    bl1_dgemv( BLIS1_CONJ_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_ahead,
               n_behind,
               buff_1,
               A20, rs_A, cs_A,
               u21, inc_u,
               buff_1,
               t01, rs_T );

    if ( m_behind + 1 == b_alg && n_ahead > 0 )
    {
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
      bl1_dger( BLIS1_NO_CONJUGATE,
                BLIS1_CONJUGATE,
                m_ahead,
                n_ahead,
                buff_m1,
                u21, inc_u,
                y21, inc_y,
                A22, rs_A, cs_A );
      bl1_dger( BLIS1_NO_CONJUGATE,
                BLIS1_CONJUGATE,
                m_ahead,
                n_ahead,
                buff_m1,
                z21, inc_z,
                v21, inc_v,
                A22, rs_A, cs_A );
    }

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &w );
  // FLA_Obj_free( &ap );
  // FLA_Obj_free( &u );
  // FLA_Obj_free( &up );
  // FLA_Obj_free( &v );
  // FLA_Obj_free( &y );
  // FLA_Obj_free( &z );
  FLA_free( buff_w );
  FLA_free( buff_ap );
  FLA_free( buff_u );
  FLA_free( buff_up );
  FLA_free( buff_v );
  FLA_free( buff_y );
  FLA_free( buff_z );

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_step_opd_var4 ( int  m_A,
int  n_A,
int  m_TS,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_Y,
int  rs_Y,
int  cs_Y,
double *  buff_Z,
int  rs_Z,
int  cs_Z,
double *  buff_T,
int  rs_T,
int  cs_T,
double *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_daxpyv(), bl1_dconjv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dinvscalv(), bl1_dsetm(), bl1_dsetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_Househ2s_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_opt_var4().

{
  double*   buff_1  = FLA_DOUBLE_PTR( FLA_ONE );
  double*   buff_0  = FLA_DOUBLE_PTR( FLA_ZERO );
  double*   buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );

  double    alpha12;
  double    minus_conj_alpha12;
  double    psi11_minus_alpha12;
  double    minus_inv_tau11;
  double    beta;
  double    last_elem;
  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_TS;

  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
  double*   buff_w  = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  double*   buff_al = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  double*   buff_ap = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  double*   buff_u  = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  double*   buff_up = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  double*   buff_v  = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  double*   buff_d  = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  double*   buff_e  = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  double*   buff_f  = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  double*   buff_g  = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  int       inc_w   = 1;
  int       inc_al  = 1;
  int       inc_ap  = 1;
  int       inc_u   = 1;
  int       inc_up  = 1;
  int       inc_v   = 1;
  int       inc_d   = 1;
  int       inc_e   = 1;
  int       inc_f   = 1;
  int       inc_g   = 1;

  // FLA_Set( FLA_ZERO, Y );
  // FLA_Set( FLA_ZERO, Z );
  bl1_dsetm( n_A,
             b_alg,
             buff_0,
             buff_Y, rs_Y, cs_Y );
  bl1_dsetm( m_A,
             b_alg,
             buff_0,
             buff_Z, rs_Z, cs_Z );

  for ( i = 0; i < b_alg; ++i )
  {
    double*   a10t      = buff_A  + (0  )*cs_A + (i  )*rs_A;
    double*   A20       = buff_A  + (0  )*cs_A + (i+1)*rs_A;
    double*   a01       = buff_A  + (i  )*cs_A + (0  )*rs_A;
    double*   alpha11   = buff_A  + (i  )*cs_A + (i  )*rs_A;
    double*   a21       = buff_A  + (i  )*cs_A + (i+1)*rs_A;
    double*   A02       = buff_A  + (i+1)*cs_A + (0  )*rs_A;
    double*   a12t      = buff_A  + (i+1)*cs_A + (i  )*rs_A;
    double*   A22       = buff_A  + (i+1)*cs_A + (i+1)*rs_A;

    double*   y10t      = buff_Y  + (0  )*cs_Y + (i  )*rs_Y;
    double*   Y20       = buff_Y  + (0  )*cs_Y + (i+1)*rs_Y;
    double*   y21       = buff_Y  + (i  )*cs_Y + (i+1)*rs_Y;

    double*   z10t      = buff_Z  + (0  )*cs_Z + (i  )*rs_Z;
    double*   Z20       = buff_Z  + (0  )*cs_Z + (i+1)*rs_Z;
    double*   z21       = buff_Z  + (i  )*cs_Z + (i+1)*rs_Z;

    double*   t01       = buff_T  + (i  )*cs_T + (0  )*rs_T;
    double*   tau11     = buff_T  + (i  )*cs_T + (i  )*rs_T;

    double*   s01       = buff_S  + (i  )*cs_S + (0  )*rs_S;
    double*   sigma11   = buff_S  + (i  )*cs_S + (i  )*rs_S;

    double*   w21       = buff_w  + (i+1)*inc_w;

    double*   a22l      = buff_al + (i+1)*inc_al;

    double*   a12p      = buff_ap + (i+1)*inc_ap;

    double*   u21       = buff_u  + (i+1)*inc_u;

    double*   u21p      = buff_up + (i+1)*inc_up;

    double*   v21       = buff_v  + (i+1)*inc_v;

    double*   d0        = buff_d  + (0  )*inc_d;

    double*   e0        = buff_e  + (0  )*inc_e;

    double*   f0        = buff_f  + (0  )*inc_f;

    double*   g0        = buff_g  + (0  )*inc_g;

    double*   a12p_t    = a12p    + (0  )*inc_ap;
    double*   a12p_b    = a12p    + (1  )*inc_ap;

    double*   v21_t     = v21     + (0  )*inc_v;
    double*   v21_b     = v21     + (1  )*inc_v;

    double*   a01_b     = a01     + (0  )*cs_A + (i-1)*rs_A;

    double*   a12t_l    = a12t    + (0  )*cs_A + (0  )*rs_A;
    double*   a12t_r    = a12t    + (1  )*cs_A + (0  )*rs_A;

    double*   A02_l     = A02     + (0  )*cs_A + (0  )*rs_A;

    double*   A22_l     = A22     + (0  )*cs_A + (0  )*rs_A;

    double*   Y20_t     = Y20     + (0  )*cs_Y + (0  )*rs_Y;

    double*   ABL       = a10t;
    double*   ZBL       = z10t;

    double*   a2        = alpha11;

    int       m_ahead   = m_A - i - 1;
    int       n_ahead   = n_A - i - 1;
    int       m_behind  = i;
    int       n_behind  = i;

    /*------------------------------------------------------------*/

    if ( m_behind > 0 )
    {
      // FLA_Copy( a01_b, last_elem );
      // FLA_Set( FLA_ONE, a01_b );
      last_elem = *a01_b;
      *a01_b = *buff_1;
    }

    // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
    // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01,  FLA_ONE, a2 );
    bl1_dgemv( BLIS1_NO_TRANSPOSE,
               BLIS1_CONJUGATE,
               m_ahead + 1,
               n_behind,
               buff_m1,
               ABL,  rs_A, cs_A,
               y10t, cs_Y,
               buff_1,
               a2,   rs_A );
    bl1_dgemv( BLIS1_NO_TRANSPOSE,
               BLIS1_CONJUGATE,
               m_ahead + 1,
               n_behind,
               buff_m1,
               ZBL, rs_Z, cs_Z,
               a01, rs_A,
               buff_1,
               a2,  rs_A );

    // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
    // FLA_Gemv( FLA_CONJ_TRANSPOSE,    FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
    bl1_dgemv( BLIS1_CONJ_NO_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               n_ahead,
               n_behind,
               buff_m1,
               Y20,  rs_Y, cs_Y,
               a10t, cs_A,
               buff_1,
               a12t, cs_A );
    bl1_dgemv( BLIS1_CONJ_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_behind,
               n_ahead,
               buff_m1,
               A02,  rs_A, cs_A,
               z10t, cs_Z,
               buff_1,
               a12t, cs_A );

    if ( m_behind > 0 )
    {
      // FLA_Copy( last_elem, a01_b );
      *a01_b = last_elem;
    }

    // FLA_Househ2_UT( FLA_LEFT,
    //                 alpha11,
    //                 a21, tau11 );
    // FLA_Copy( a21, u21p );
    FLA_Househ2_UT_l_opd( m_ahead,
                          alpha11,
                          a21, rs_A,
                          tau11 );
    bl1_dcopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                a21,  rs_A,
                u21p, inc_up );

    if ( n_ahead > 0 )
    {
      // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
      bl1_ddiv3( buff_m1, tau11, &minus_inv_tau11 );

      // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
      // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
      bl1_dcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12t, cs_A,
                  a12p, inc_ap );
      bl1_daxpyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  &minus_inv_tau11,
                  a12t, cs_A,
                  a12p, inc_ap );

      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
      bl1_dgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 A20,  rs_A, cs_A,
                 u21p, inc_up,
                 buff_0,
                 d0,   inc_d );
      bl1_dgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 Z20,  rs_Z, cs_Z,
                 u21p, inc_up,
                 buff_0,
                 e0,   inc_e );

      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
      // FLA_Axpy( FLA_ONE, d0, t01 );
      bl1_dcopyv( BLIS1_CONJUGATE,
                  n_behind,
                  a10t, cs_A,
                  t01,  rs_T );
      bl1_daxpyv( BLIS1_NO_CONJUGATE,
                  n_behind,
                  buff_1,
                  d0,  inc_d,
                  t01, rs_T );

      // FLA_Set( FLA_ZERO, y21 );
      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
      // FLA_Gemv( FLA_TRANSPOSE,    FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
      bl1_dsetv( n_ahead,
                 buff_0,
                 y21, rs_Y );
      bl1_dgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 n_ahead,
                 n_behind,
                 buff_m1,
                 Y20, rs_Y, cs_Y,
                 d0,  inc_d,
                 buff_1,
                 y21, rs_Y );
      bl1_dgemv( BLIS1_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_m1,
                 A02, rs_A, cs_A,
                 e0,  inc_e,
                 buff_1,
                 y21, rs_Y );

      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
      bl1_dgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22,  rs_A, cs_A,
                 u21p, inc_up,
                 buff_1,
                 y21,  rs_Y );

      // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
      bl1_daxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  &minus_inv_tau11,
                  y21,  rs_Y,
                  a12p, inc_ap );

      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
      bl1_dgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22,  rs_A, cs_A,
                 a12p, inc_ap,
                 buff_0,
                 w21,  inc_w );

      // FLA_Gemvc( FLA_CONJ_TRANSPOSE,    FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
      // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
      bl1_dgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_CONJUGATE,
                 n_ahead,
                 n_behind,
                 buff_1,
                 Y20,  rs_Y, cs_Y,
                 a12p, inc_ap,
                 buff_0,
                 f0,   inc_f );
      bl1_dgemv( BLIS1_CONJ_NO_TRANSPOSE,
                 BLIS1_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_1,
                 A02,  rs_A, cs_A,
                 a12p, inc_ap,
                 buff_0,
                 g0,   inc_g );

      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
      bl1_dgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_m1,
                 A20, rs_A, cs_A,
                 f0,  inc_f,
                 buff_1,
                 w21, inc_w );
      bl1_dgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_m1,
                 Z20, rs_Z, cs_Z,
                 g0,  inc_g,
                 buff_1,
                 w21, inc_w );

      // FLA_Copy( A22_l, a22l );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
      bl1_dcopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  A22_l, rs_A,
                  a22l,  inc_al );
      bl1_dgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_m1,
                 A20,   rs_A, cs_A,
                 Y20_t, cs_Y,
                 buff_1,
                 a22l,  inc_al );
      bl1_dgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_m1,
                 Z20,   rs_Z, cs_Z,
                 A02_l, rs_A,
                 buff_1,
                 a22l,  inc_al );

      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
      bl1_daxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  buff_1, 
                  a12t, cs_A,
                  y21,  rs_Y );

      // FLA_Househ2s_UT( FLA_RIGHT,
      //                  a12p_t,
      //                  a12p_b,
      //                  alpha12, psi11_minus_alpha12, sigma11 );
      FLA_Househ2s_UT_r_opd( n_ahead - 1,
                             a12p_t,
                             a12p_b, inc_ap,
                             &alpha12,
                             &psi11_minus_alpha12,
                             sigma11 );

      // FLA_Copy( a12p, v21 );
      // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
      // FLA_Conjugate( v21_b );
      bl1_dcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12p, inc_ap,
                  v21,  inc_v );
      bl1_dmult4( buff_m1, &alpha12, v21_t, v21_t );
      bl1_dinvscalv( BLIS1_NO_CONJUGATE,
                     n_ahead,
                     &psi11_minus_alpha12,
                     v21, inc_v );
      bl1_dconjv( n_ahead - 1,
                  v21_b, inc_v );

      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
      // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
      bl1_dcopyconj( &alpha12, &minus_conj_alpha12 );
      bl1_dneg1( &minus_conj_alpha12 );

      // FLA_Copy( g0, s01 );
      // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
      // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
      bl1_dcopyv( BLIS1_NO_CONJUGATE,
                  n_behind,
                  g0,  inc_g,
                  s01, rs_S );
      bl1_daxpyv( BLIS1_CONJUGATE,
                  n_behind,
                  &minus_conj_alpha12,
                  A02_l, rs_A,
                  s01,   rs_S );
      bl1_dinvscalv( BLIS1_CONJUGATE,
                     n_behind,
                     &psi11_minus_alpha12,
                     s01, rs_S );

      // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
      // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
      *a12t_l = alpha12;
      bl1_dcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead - 1,
                  v21_b,  inc_v,
                  a12t_r, cs_A );
    }

    // FLA_Copy( u21p, u21 );
    bl1_dcopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                u21p, inc_up,
                u21,  inc_u );

    if ( n_ahead > 0 )
    {
      // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
      // FLA_Scal( FLA_MINUS_ONE, beta );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
      bl1_ddot( BLIS1_CONJUGATE,
                n_ahead,
                y21, rs_Y,
                v21, inc_v,
                &beta );
      bl1_dscals( &minus_inv_tau11, &beta );

      // FLA_Copy( w21, z21 );
      // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
      // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
      // FLA_Axpy( beta, u21, z21 );
      bl1_dcopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  w21, inc_w,
                  z21, rs_Z );
      bl1_daxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_alpha12,
                  a22l, inc_al,
                  z21,  rs_Z );
      bl1_dinvscalv( BLIS1_CONJUGATE,
                     m_ahead,
                     &psi11_minus_alpha12,
                     z21, rs_Z );
      bl1_daxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &beta,
                  u21, inc_u,
                  z21, rs_Z );

      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11,   y21 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
      bl1_dinvscalv( BLIS1_CONJUGATE,
                     n_ahead,
                     tau11,
                     y21, rs_Y );
      bl1_dinvscalv( BLIS1_NO_CONJUGATE,
                     m_ahead,
                     sigma11,
                     z21, rs_Z );
    }
    else // if ( n_ahead == 0 )
    {
      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
      bl1_dcopyv( BLIS1_CONJUGATE,
                  n_behind,
                  a10t, cs_A,
                  t01,  rs_T );
      bl1_dgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 A20, rs_A, cs_A,
                 u21, inc_u,
                 buff_1,
                 t01, rs_T );
    }

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &w );
  // FLA_Obj_free( &al );
  // FLA_Obj_free( &ap );
  // FLA_Obj_free( &u );
  // FLA_Obj_free( &up );
  // FLA_Obj_free( &v );
  // FLA_Obj_free( &d );
  // FLA_Obj_free( &e );
  // FLA_Obj_free( &f );
  // FLA_Obj_free( &g );
  FLA_free( buff_w );
  FLA_free( buff_al );
  FLA_free( buff_ap );
  FLA_free( buff_u );
  FLA_free( buff_up );
  FLA_free( buff_v );
  FLA_free( buff_d );
  FLA_free( buff_e );
  FLA_free( buff_f );
  FLA_free( buff_g );

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_step_opd_var5 ( int  m_A,
int  n_A,
int  m_TS,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_Y,
int  rs_Y,
int  cs_Y,
double *  buff_Z,
int  rs_Z,
int  cs_Z,
double *  buff_T,
int  rs_T,
int  cs_T,
double *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dinvscalv(), bl1_dsetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_Househ2_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_opt_var5().

{
  double*   buff_1  = FLA_DOUBLE_PTR( FLA_ONE );
  double*   buff_0  = FLA_DOUBLE_PTR( FLA_ZERO );
  double*   buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );

  double    beta;
  double    last_elem;
  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_TS;

  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
  double*   buff_u  = ( double*   ) FLA_malloc( m_A * sizeof( *buff_A ) );
  double*   buff_v  = ( double*   ) FLA_malloc( n_A * sizeof( *buff_A ) );
  double*   buff_d  = ( double*   ) FLA_malloc( n_A * sizeof( *buff_A ) );
  double*   buff_e  = ( double*   ) FLA_malloc( n_A * sizeof( *buff_A ) );
  double*   buff_f  = ( double*   ) FLA_malloc( m_A * sizeof( *buff_A ) );
  double*   buff_g  = ( double*   ) FLA_malloc( m_A * sizeof( *buff_A ) );
  int       inc_u   = 1;
  int       inc_v   = 1;
  int       inc_d   = 1;
  int       inc_e   = 1;
  int       inc_f   = 1;
  int       inc_g   = 1;

  // FLA_Set( FLA_ZERO, Y );
  // FLA_Set( FLA_ZERO, Z );
  bl1_dsetm( n_A,
             b_alg,
             buff_0,
             buff_Y, rs_Y, cs_Y );
  bl1_dsetm( m_A,
             b_alg,
             buff_0,
             buff_Z, rs_Z, cs_Z );

  for ( i = 0; i < b_alg; ++i )
  {
    double*   a10t      = buff_A  + (0  )*cs_A + (i  )*rs_A;
    double*   A20       = buff_A  + (0  )*cs_A + (i+1)*rs_A;
    double*   a01       = buff_A  + (i  )*cs_A + (0  )*rs_A;
    double*   alpha11   = buff_A  + (i  )*cs_A + (i  )*rs_A;
    double*   a21       = buff_A  + (i  )*cs_A + (i+1)*rs_A;
    double*   A02       = buff_A  + (i+1)*cs_A + (0  )*rs_A;
    double*   a12t      = buff_A  + (i+1)*cs_A + (i  )*rs_A;
    double*   A22       = buff_A  + (i+1)*cs_A + (i+1)*rs_A;

    double*   y10t      = buff_Y  + (0  )*cs_Y + (i  )*rs_Y;
    double*   Y20       = buff_Y  + (0  )*cs_Y + (i+1)*rs_Y;
    double*   y21       = buff_Y  + (i  )*cs_Y + (i+1)*rs_Y;

    double*   z10t      = buff_Z  + (0  )*cs_Z + (i  )*rs_Z;
    double*   Z20       = buff_Z  + (0  )*cs_Z + (i+1)*rs_Z;
    double*   z21       = buff_Z  + (i  )*cs_Z + (i+1)*rs_Z;

    double*   t01       = buff_T  + (i  )*cs_T + (0  )*rs_T;
    double*   tau11     = buff_T  + (i  )*cs_T + (i  )*rs_T;

    double*   s01       = buff_S  + (i  )*cs_S + (0  )*rs_S;
    double*   sigma11   = buff_S  + (i  )*cs_S + (i  )*rs_S;

    double*   u21       = buff_u  + (i+1)*inc_u;

    double*   v21       = buff_v  + (i+1)*inc_v;

    double*   d0        = buff_d  + (0  )*inc_d;

    double*   e0        = buff_e  + (0  )*inc_e;

    double*   f0        = buff_f  + (0  )*inc_f;

    double*   g0        = buff_g  + (0  )*inc_g;

    double*   v21_t     = v21     + (0  )*inc_v;
    double*   v21_b     = v21     + (1  )*inc_v;

    double*   a01_b     = a01     + (0  )*cs_A + (i-1)*rs_A;

    double*   a12t_l    = a12t    + (0  )*cs_A + (0  )*rs_A;
    double*   a12t_r    = a12t    + (1  )*cs_A + (0  )*rs_A;

    double*   ABL       = a10t;
    double*   ZBL       = z10t;

    double*   a2        = alpha11;

    int       m_ahead   = m_A - i - 1;
    int       n_ahead   = n_A - i - 1;
    int       m_behind  = i;
    int       n_behind  = i;

    /*------------------------------------------------------------*/

    if ( m_behind > 0 )
    {
      // FLA_Copy( a01_b, last_elem );
      // FLA_Set( FLA_ONE, a01_b );
      last_elem = *a01_b;
      *a01_b = *buff_1;
    }

    // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
    // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01,  FLA_ONE, a2 );
    bl1_dgemv( BLIS1_NO_TRANSPOSE,
               BLIS1_CONJUGATE,
               m_ahead + 1,
               n_behind,
               buff_m1,
               ABL,  rs_A, cs_A,
               y10t, cs_Y,
               buff_1,
               a2,   rs_A );
    bl1_dgemv( BLIS1_NO_TRANSPOSE,
               BLIS1_CONJUGATE,
               m_ahead + 1,
               n_behind,
               buff_m1,
               ZBL, rs_Z, cs_Z,
               a01, rs_A,
               buff_1,
               a2,  rs_A );

    // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
    // FLA_Gemv( FLA_CONJ_TRANSPOSE,    FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
    bl1_dgemv( BLIS1_CONJ_NO_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               n_ahead,
               n_behind,
               buff_m1,
               Y20,  rs_Y, cs_Y,
               a10t, cs_A,
               buff_1,
               a12t, cs_A );
    bl1_dgemv( BLIS1_CONJ_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_behind,
               n_ahead,
               buff_m1,
               A02,  rs_A, cs_A,
               z10t, cs_Z,
               buff_1,
               a12t, cs_A );

    if ( m_behind > 0 )
    {
      // FLA_Copy( last_elem, a01_b );
      *a01_b = last_elem;
    }

    // FLA_Househ2_UT( FLA_LEFT,
    //                 alpha11,
    //                 a21, tau11 );
    // FLA_Copy( a21, u21 );
    FLA_Househ2_UT_l_opd( m_ahead,
                          alpha11,
                          a21, rs_A,
                          tau11 );
    bl1_dcopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                a21, rs_A,
                u21, inc_u );

    if ( n_ahead > 0 )
    {
      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21, FLA_ONE, y21 );
      bl1_dcopyv( BLIS1_CONJUGATE,
                  n_ahead,
                  a12t, cs_A,
                  y21,  rs_Y );
      bl1_dgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22, rs_A, cs_A,
                 u21, inc_u,
                 buff_1,
                 y21, rs_Y );

      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ZERO, d0 );
      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21, FLA_ZERO, e0 );
      bl1_dgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 A20, rs_A, cs_A,
                 u21, inc_u,
                 buff_0,
                 d0,  inc_d );
      bl1_dgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 Z20, rs_Z, cs_Z,
                 u21, inc_u,
                 buff_0,
                 e0,  inc_e );

      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
      // FLA_Axpy( FLA_ONE, d0, t01 );
      bl1_dcopyv( BLIS1_CONJUGATE,
                  n_behind,
                  a10t, cs_A,
                  t01,  rs_T );
      bl1_daxpyv( BLIS1_NO_CONJUGATE,
                  n_behind,
                  buff_1,
                  d0,  inc_d,
                  t01, rs_T );

      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
      // FLA_Gemv( FLA_TRANSPOSE,    FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
      bl1_dgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 n_ahead,
                 n_behind,
                 buff_m1,
                 Y20, rs_Y, cs_Y,
                 d0,  inc_d,
                 buff_1,
                 y21, rs_Y );
      bl1_dgemv( BLIS1_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_m1,
                 A02, rs_A, cs_A,
                 e0,  inc_e,
                 buff_1,
                 y21, rs_Y );

      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
      bl1_dinvscalv( BLIS1_NO_CONJUGATE,
                     n_ahead,
                     tau11,
                     y21, rs_Y );

      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
      bl1_daxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  buff_m1,
                  y21,  rs_Y,
                  a12t, cs_A );

      // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
      FLA_Househ2_UT_r_opd( n_ahead - 1,
                            a12t_l,
                            a12t_r, cs_A,
                            sigma11 );

      // FLA_Set( FLA_ONE, v21_t );
      // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
      *v21_t = *buff_1;
      bl1_dcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead - 1,
                  a12t_r, cs_A,
                  v21_b,  inc_v );

      // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
      // FLA_Scal( FLA_MINUS_ONE, beta );
      bl1_ddot( BLIS1_CONJUGATE,
                n_ahead,
                y21, rs_Y,
                v21, inc_v,
                &beta );
      bl1_dscals( buff_m1, &beta );

      // FLA_Copy( u21, z21 );
      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, v21, beta, z21 );
      bl1_dcopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  u21, inc_u,
                  z21, rs_Z );
      bl1_dgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22, rs_A, cs_A,
                 v21, inc_v,
                 &beta,
                 z21, rs_Z );

      // FLA_Gemvc( FLA_CONJ_TRANSPOSE,    FLA_ONE, Y20, v21, FLA_ZERO, f0 );
      // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, g0 );
      bl1_dgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 n_ahead,
                 m_behind,
                 buff_1,
                 Y20, rs_Y, cs_Y,
                 v21, inc_v,
                 buff_0,
                 f0,  inc_f );
      bl1_dgemv( BLIS1_CONJ_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_1,
                 A02, rs_A, cs_A,
                 v21, inc_v,
                 buff_0,
                 g0,  inc_g );

      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, z21 );
      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, z21 );
      bl1_dgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_m1,
                 A20, rs_A, cs_A,
                 f0,  inc_f,
                 buff_1,
                 z21, rs_Z );
      bl1_dgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_m1,
                 Z20, rs_Z, cs_Z,
                 g0,  inc_g,
                 buff_1,
                 z21, rs_Z );

      // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
      bl1_dinvscalv( BLIS1_NO_CONJUGATE,
                     m_ahead,
                     sigma11,
                     z21, rs_Z );

      // FLA_Copy( g0, s01 );
      bl1_dcopyv( BLIS1_NO_CONJUGATE,
                  n_behind,
                  g0,  inc_g,
                  s01, rs_S );
    }
    else // if ( n_ahead == 0 )
    {
      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
      bl1_dcopyv( BLIS1_CONJUGATE,
                  n_behind,
                  a10t, cs_A,
                  t01,  rs_T );
      bl1_dgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 A20, rs_A, cs_A,
                 u21, inc_u,
                 buff_1,
                 t01, rs_T );
    }

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &u );
  // FLA_Obj_free( &v );
  // FLA_Obj_free( &d );
  // FLA_Obj_free( &e );
  // FLA_Obj_free( &f );
  // FLA_Obj_free( &g );
  FLA_free( buff_u );
  FLA_free( buff_v );
  FLA_free( buff_d );
  FLA_free( buff_e );
  FLA_free( buff_f );
  FLA_free( buff_g );

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_step_ops_var1 ( int  m_A,
int  n_A,
int  m_TS,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_T,
int  rs_T,
int  cs_T,
float *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_scopyv(), bl1_sgemv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_ops_var1(), FLA_Apply_H2_UT_r_ops_var1(), FLA_free(), FLA_Househ2_UT_l_ops(), FLA_Househ2_UT_r_ops(), FLA_malloc(), FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_opt_var1().

{
  float*    buff_1  = FLA_FLOAT_PTR( FLA_ONE );
  float*    buff_0  = FLA_FLOAT_PTR( FLA_ZERO );

  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_TS;

  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
  float*    buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  int       inc_v  = 1;

  for ( i = 0; i < b_alg; ++i )
  {
    float*    a10t     = buff_A + (0  )*cs_A + (i  )*rs_A;
    float*    A20      = buff_A + (0  )*cs_A + (i+1)*rs_A;
    float*    alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
    float*    a21      = buff_A + (i  )*cs_A + (i+1)*rs_A;
    float*    A02      = buff_A + (i+1)*cs_A + (0  )*rs_A;
    float*    a12t     = buff_A + (i+1)*cs_A + (i  )*rs_A;
    float*    A22      = buff_A + (i+1)*cs_A + (i+1)*rs_A;

    float*    t01      = buff_T + (i  )*cs_T + (0  )*rs_T;
    float*    tau11    = buff_T + (i  )*cs_T + (i  )*rs_T;

    float*    s01      = buff_S + (i  )*cs_S + (0  )*rs_S;
    float*    sigma11  = buff_S + (i  )*cs_S + (i  )*rs_S;

    float*    v21      = buff_v + (i+1)*inc_v;

    float*    a12t_l   = a12t   + (0  )*cs_A + (0  )*rs_A;
    float*    a12t_r   = a12t   + (1  )*cs_A + (0  )*rs_A;

    float*    A22_l    = A22    + (0  )*cs_A + (0  )*rs_A;
    float*    A22_r    = A22    + (1  )*cs_A + (0  )*rs_A;

    float*    v21_t    = v21    + (0  )*inc_v;
    float*    v21_b    = v21    + (1  )*inc_v;

    int       m_ahead  = m_A - i - 1;
    int       n_ahead  = n_A - i - 1;
    int       m_behind = i;
    int       n_behind = i;

    /*------------------------------------------------------------*/

    // FLA_Househ2_UT( FLA_LEFT,
    //                 alpha11,
    //                 a21, tau11 );
    FLA_Househ2_UT_l_ops( m_ahead,
                          alpha11,
                          a21, rs_A,
                          tau11 );

    if ( n_ahead > 0 )
    {
      // FLA_Apply_H2_UT( FLA_LEFT, tau11, a21, a12t, A22 );
      FLA_Apply_H2_UT_l_ops_var1( m_ahead,
                                  n_ahead,
                                  tau11,
                                  a21,  rs_A,
                                  a12t, cs_A,
                                  A22,  rs_A, cs_A );

      // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
      FLA_Househ2_UT_r_ops( n_ahead - 1,
                            a12t_l,
                            a12t_r, cs_A,
                            sigma11 );

      // FLA_Set( FLA_ONE, v21_t );
      // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
      *v21_t = *buff_1;
      bl1_scopyv( BLIS1_NO_CONJUGATE,
                  n_ahead - 1,
                  a12t_r, cs_A,
                  v21_b,  inc_v );

      // FLA_Apply_H2_UT( FLA_RIGHT, sigma11, v21_b, A22_l, A22_r );
      FLA_Apply_H2_UT_r_ops_var1( m_ahead,
                                  n_ahead - 1,
                                  sigma11,
                                  v21_b, inc_v,
                                  A22_l, rs_A,
                                  A22_r, rs_A, cs_A );

      // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
      bl1_sgemv( BLIS1_CONJ_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_1,
                 A02, rs_A, cs_A,
                 v21, inc_v,
                 buff_0,
                 s01, rs_S );
    }

    // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
    // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
    bl1_scopyv( BLIS1_CONJUGATE,
                n_behind,
                a10t, cs_A,
                t01,  rs_T );
    bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_ahead,
               n_behind,
               buff_1,
               A20, rs_A, cs_A,
               a21, rs_A,
               buff_1,
               t01, rs_T );

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &v );
  FLA_free( buff_v );

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_step_ops_var2 ( int  m_A,
int  n_A,
int  m_TS,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_T,
int  rs_T,
int  cs_T,
float *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_Househ2_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_opt_var2().

{
  float*    buff_1  = FLA_FLOAT_PTR( FLA_ONE );
  float*    buff_0  = FLA_FLOAT_PTR( FLA_ZERO );
  float*    buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );

  float     beta;
  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_TS;

  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
  float*    buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  float*    buff_y = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  float*    buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  int       inc_v  = 1;
  int       inc_y  = 1;
  int       inc_z  = 1;

  for ( i = 0; i < b_alg; ++i )
  {
    float*    a10t     = buff_A + (0  )*cs_A + (i  )*rs_A;
    float*    A20      = buff_A + (0  )*cs_A + (i+1)*rs_A;
    float*    alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
    float*    a21      = buff_A + (i  )*cs_A + (i+1)*rs_A;
    float*    A02      = buff_A + (i+1)*cs_A + (0  )*rs_A;
    float*    a12t     = buff_A + (i+1)*cs_A + (i  )*rs_A;
    float*    A22      = buff_A + (i+1)*cs_A + (i+1)*rs_A;

    float*    t01      = buff_T + (i  )*cs_T + (0  )*rs_T;
    float*    tau11    = buff_T + (i  )*cs_T + (i  )*rs_T;

    float*    s01      = buff_S + (i  )*cs_S + (0  )*rs_S;
    float*    sigma11  = buff_S + (i  )*cs_S + (i  )*rs_S;

    float*    v21      = buff_v + (i+1)*inc_v;

    float*    y21      = buff_y + (i+1)*inc_y;

    float*    z21      = buff_z + (i+1)*inc_z;

    float*    a12t_l   = a12t   + (0  )*cs_A + (0  )*rs_A;
    float*    a12t_r   = a12t   + (1  )*cs_A + (0  )*rs_A;

    float*    v21_t    = v21    + (0  )*inc_v;
    float*    v21_b    = v21    + (1  )*inc_v;

    int       m_ahead  = m_A - i - 1;
    int       n_ahead  = n_A - i - 1;
    int       m_behind = i;
    int       n_behind = i;

    /*------------------------------------------------------------*/

    // FLA_Househ2_UT( FLA_LEFT,
    //                 alpha11,
    //                 a21, tau11 );
    FLA_Househ2_UT_l_ops( m_ahead,
                          alpha11,
                          a21, rs_A,
                          tau11 );

    if ( n_ahead > 0 )
    {
      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
      // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
      bl1_scopyv( BLIS1_CONJUGATE,
                  n_ahead,
                  a12t, cs_A,
                  y21,  inc_y );
      bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22, rs_A, cs_A,
                 a21, rs_A,
                 buff_1,
                 y21, inc_y );

      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
      bl1_sinvscalv( BLIS1_NO_CONJUGATE,
                     n_ahead,
                     tau11,
                     y21, inc_y );

      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
      bl1_saxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  buff_m1,
                  y21,  inc_y,
                  a12t, cs_A );

      // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
      FLA_Househ2_UT_r_ops( n_ahead - 1,
                            a12t_l,
                            a12t_r, cs_A,
                            sigma11 );

      // FLA_Set( FLA_ONE, v21_t );
      // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
      *v21_t = *buff_1;
      bl1_scopyv( BLIS1_NO_CONJUGATE,
                  n_ahead - 1,
                  a12t_r, cs_A,
                  v21_b,  inc_y );

      // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
      // FLA_Scal( FLA_MINUS_ONE, beta );
      bl1_sdot( BLIS1_CONJUGATE,
                n_ahead,
                y21, inc_y,
                v21, inc_v,
                &beta );
      bl1_sneg1( &beta );

      // FLA_Copy( a21, z21 );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
      bl1_scopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  a21, rs_A,
                  z21, inc_z );
      bl1_sgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22, rs_A, cs_A,
                 v21, inc_v,
                 &beta,
                 z21, inc_z );
      bl1_sinvscalv( BLIS1_NO_CONJUGATE,
                     m_ahead,
                     sigma11,
                     z21, inc_z );

      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y21, A22 );
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
      bl1_sger( BLIS1_NO_CONJUGATE,
                BLIS1_CONJUGATE,
                m_ahead,
                n_ahead,
                buff_m1,
                a21, rs_A,
                y21, inc_y,
                A22, rs_A, cs_A );
      bl1_sger( BLIS1_NO_CONJUGATE,
                BLIS1_CONJUGATE,
                m_ahead,
                n_ahead,
                buff_m1,
                z21, inc_z,
                v21, inc_v,
                A22, rs_A, cs_A );

      // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
      bl1_sgemv( BLIS1_CONJ_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_1,
                 A02, rs_A, cs_A,
                 v21, inc_v,
                 buff_0,
                 s01, rs_S );
    }

    // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
    // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
    bl1_scopyv( BLIS1_CONJUGATE,
                n_behind,
                a10t, cs_A,
                t01,  rs_T );
    bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_ahead,
               n_behind,
               buff_1,
               A20, rs_A, cs_A,
               a21, rs_A,
               buff_1,
               t01, rs_T );

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &v );
  // FLA_Obj_free( &y );
  // FLA_Obj_free( &z );
  FLA_free( buff_v );
  FLA_free( buff_y );
  FLA_free( buff_z );

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_step_ops_var3 ( int  m_A,
int  n_A,
int  m_TS,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_T,
int  rs_T,
int  cs_T,
float *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_saxpyv(), bl1_sconjv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_Househ2s_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_opt_var3().

{
  float*    buff_1  = FLA_FLOAT_PTR( FLA_ONE );
  float*    buff_0  = FLA_FLOAT_PTR( FLA_ZERO );
  float*    buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );

  float     alpha12;
  float     minus_conj_alpha12;
  float     psi11_minus_alpha12;
  float     minus_inv_tau11;
  float     minus_upsilon11;
  float     minus_conj_nu11;
  float     minus_conj_psi11;
  float     minus_zeta11;
  float     beta;
  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_TS;

  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
  float*    buff_w  = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  float*    buff_ap = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  float*    buff_u  = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  float*    buff_up = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  float*    buff_v  = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  float*    buff_y  = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  float*    buff_z  = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  int       inc_w   = 1;
  int       inc_ap  = 1;
  int       inc_u   = 1;
  int       inc_up  = 1;
  int       inc_v   = 1;
  int       inc_y   = 1;
  int       inc_z   = 1;

  for ( i = 0; i < b_alg; ++i )
  {
    float*    a10t      = buff_A  + (0  )*cs_A + (i  )*rs_A;
    float*    A20       = buff_A  + (0  )*cs_A + (i+1)*rs_A;
    float*    alpha11   = buff_A  + (i  )*cs_A + (i  )*rs_A;
    float*    a21       = buff_A  + (i  )*cs_A + (i+1)*rs_A;
    float*    A02       = buff_A  + (i+1)*cs_A + (0  )*rs_A;
    float*    a12t      = buff_A  + (i+1)*cs_A + (i  )*rs_A;
    float*    A22       = buff_A  + (i+1)*cs_A + (i+1)*rs_A;

    float*    t01       = buff_T  + (i  )*cs_T + (0  )*rs_T;
    float*    tau11     = buff_T  + (i  )*cs_T + (i  )*rs_T;

    float*    s01       = buff_S  + (i  )*cs_S + (0  )*rs_S;
    float*    sigma11   = buff_S  + (i  )*cs_S + (i  )*rs_S;

    float*    w21       = buff_w  + (i+1)*inc_w;

    float*    a12p      = buff_ap + (i+1)*inc_ap;

    float*    upsilon11 = buff_u  + (i  )*inc_u;
    float*    u21       = buff_u  + (i+1)*inc_u;

    float*    u21p      = buff_up + (i+1)*inc_up;

    float*    nu11      = buff_v  + (i  )*inc_v;
    float*    v21       = buff_v  + (i+1)*inc_v;

    float*    psi11     = buff_y  + (i  )*inc_y;
    float*    y21       = buff_y  + (i+1)*inc_y;

    float*    zeta11    = buff_z  + (i  )*inc_z;
    float*    z21       = buff_z  + (i+1)*inc_z;

    float*    a12p_t    = a12p    + (0  )*inc_ap;
    float*    a12p_b    = a12p    + (1  )*inc_ap;

    float*    v21_t     = v21     + (0  )*inc_v;
    float*    v21_b     = v21     + (1  )*inc_v;

    float*    a12t_l    = a12t    + (0  )*cs_A + (0  )*rs_A;
    float*    a12t_r    = a12t    + (1  )*cs_A + (0  )*rs_A;

    float*    A22_l     = A22     + (0  )*cs_A + (0  )*rs_A;

    int       m_ahead   = m_A - i - 1;
    int       n_ahead   = n_A - i - 1;
    int       m_behind  = i;
    int       n_behind  = i;

    /*------------------------------------------------------------*/

    if ( m_behind > 0 )
    {
      // FLA_Copy( upsilon11, minus_upsilon11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
      bl1_smult3( buff_m1, upsilon11, &minus_upsilon11 );

      // FLA_Copy( zeta11, minus_zeta11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
      bl1_smult3( buff_m1, zeta11, &minus_zeta11 );

      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
      bl1_scopyconj( psi11, &minus_conj_psi11 );
      bl1_sscals( buff_m1, &minus_conj_psi11 );

      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
      bl1_scopyconj( nu11, &minus_conj_nu11 );
      bl1_sscals( buff_m1, &minus_conj_nu11 );

      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11,  zeta11,    alpha11 );
      bl1_saxpyv( BLIS1_NO_CONJUGATE,
                  1,
                  &minus_conj_psi11,
                  upsilon11, 1,
                  alpha11,   1 );
      bl1_saxpyv( BLIS1_NO_CONJUGATE,
                  1,
                  &minus_conj_nu11,
                  zeta11,  1,
                  alpha11, 1 );

      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11,  z21, a21 );
      bl1_saxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_psi11,
                  u21, inc_u,
                  a21, rs_A );
      bl1_saxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_nu11,
                  z21, inc_z,
                  a21, rs_A );

      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11,    v21, a12t );
      bl1_saxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  &minus_upsilon11,
                  y21,  inc_y,
                  a12t, cs_A );
      bl1_saxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  &minus_zeta11,
                  v21,  inc_v,
                  a12t, cs_A );
    }

    // FLA_Househ2_UT( FLA_LEFT,
    //                 alpha11,
    //                 a21, tau11 );
    // FLA_Copy( a21, u21p );
    FLA_Househ2_UT_l_ops( m_ahead,
                          alpha11,
                          a21, rs_A,
                          tau11 );
    bl1_scopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                a21,  rs_A,
                u21p, inc_up );

    if ( n_ahead > 0 )
    {
      // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
      bl1_sdiv3( buff_m1, tau11, &minus_inv_tau11 );

      // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
      // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
      bl1_scopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12t, cs_A,
                  a12p, inc_ap );
      bl1_saxpyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  &minus_inv_tau11,
                  a12t, cs_A,
                  a12p, inc_ap );
    }

    if ( m_behind > 0 )
    {
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
      bl1_sger( BLIS1_NO_CONJUGATE,
                BLIS1_CONJUGATE,
                m_ahead,
                n_ahead,
                buff_m1,
                u21, inc_u,
                y21, inc_y,
                A22, rs_A, cs_A );
      bl1_sger( BLIS1_NO_CONJUGATE,
                BLIS1_CONJUGATE,
                m_ahead,
                n_ahead,
                buff_m1,
                z21, inc_z,
                v21, inc_v,
                A22, rs_A, cs_A );
    }

    if ( n_ahead > 0 )
    {
      // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
      bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22,  rs_A, cs_A,
                 u21p, inc_up,
                 buff_0,
                 y21,  inc_y );

      // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
      bl1_saxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  &minus_inv_tau11,
                  y21,  inc_y,
                  a12p, inc_ap );

      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
      bl1_sgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22,  rs_A, cs_A,
                 a12p, inc_ap,
                 buff_0,
                 w21,  inc_w );

      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
      bl1_saxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  buff_1,
                  a12t, cs_A,
                  y21,  inc_y );

      // FLA_Househ2s_UT( FLA_RIGHT,
      //                  a12p_t,
      //                  a12p_b,
      //                  alpha12, psi11_minus_alpha12, sigma11 );
      FLA_Househ2s_UT_r_ops( n_ahead - 1,
                             a12p_t,
                             a12p_b, inc_ap,
                             &alpha12,
                             &psi11_minus_alpha12,
                             sigma11 );

      // FLA_Copy( a12p, v21 );
      // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
      // FLA_Conjugate( v21_b );
      bl1_scopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12p, inc_ap,
                  v21,  inc_v );
      bl1_smult4( buff_m1, &alpha12, v21_t, v21_t );
      bl1_sinvscalv( BLIS1_NO_CONJUGATE,
                     n_ahead,
                     &psi11_minus_alpha12,
                     v21, inc_v );
      bl1_sconjv( n_ahead - 1,
                  v21_b, inc_v );

      // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
      // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
      *a12t_l = alpha12;
      bl1_scopyv( BLIS1_NO_CONJUGATE,
                  n_ahead - 1,
                  v21_b,  inc_v,
                  a12t_r, cs_A );
    }

    // FLA_Copy( u21p, u21 );
    bl1_scopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                u21p, inc_up,
                u21,  inc_u );

    if ( n_ahead > 0 )
    {
      // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
      // FLA_Scal( FLA_MINUS_ONE, beta );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
      bl1_sdot( BLIS1_CONJUGATE,
                n_ahead,
                y21, inc_y,
                v21, inc_v,
                &beta );
      bl1_sscals( &minus_inv_tau11, &beta );

      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
      // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
      bl1_scopyconj( &alpha12, &minus_conj_alpha12 );
      bl1_sneg1( &minus_conj_alpha12 );

      // FLA_Copy( w21, z21 );
      // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
      // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
      // FLA_Axpy( beta, u21, z21 );
      bl1_scopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  w21, inc_w,
                  z21, inc_z );
      bl1_saxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_alpha12,
                  A22_l, rs_A,
                  z21,   inc_z );
      bl1_sinvscalv( BLIS1_CONJUGATE,
                     m_ahead,
                     &psi11_minus_alpha12,
                     z21, inc_z );
      bl1_saxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &beta,
                  u21, inc_u,
                  z21, inc_z );

      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11,   y21 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
      bl1_sinvscalv( BLIS1_CONJUGATE,
                     n_ahead,
                     tau11,
                     y21, inc_y );
      bl1_sinvscalv( BLIS1_NO_CONJUGATE,
                     m_ahead,
                     sigma11,
                     z21, inc_z );

      // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
      bl1_sgemv( BLIS1_CONJ_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_1,
                 A02, rs_A, cs_A,
                 v21, inc_v,
                 buff_0,
                 s01, rs_S );
    }

    // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
    // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
    bl1_scopyv( BLIS1_CONJUGATE,
                n_behind,
                a10t, cs_A,
                t01,  rs_T );
    bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_ahead,
               n_behind,
               buff_1,
               A20, rs_A, cs_A,
               u21, inc_u,
               buff_1,
               t01, rs_T );

    if ( m_behind + 1 == b_alg && n_ahead > 0 )
    {
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
      bl1_sger( BLIS1_NO_CONJUGATE,
                BLIS1_CONJUGATE,
                m_ahead,
                n_ahead,
                buff_m1,
                u21, inc_u,
                y21, inc_y,
                A22, rs_A, cs_A );
      bl1_sger( BLIS1_NO_CONJUGATE,
                BLIS1_CONJUGATE,
                m_ahead,
                n_ahead,
                buff_m1,
                z21, inc_z,
                v21, inc_v,
                A22, rs_A, cs_A );
    }

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &w );
  // FLA_Obj_free( &ap );
  // FLA_Obj_free( &u );
  // FLA_Obj_free( &up );
  // FLA_Obj_free( &v );
  // FLA_Obj_free( &y );
  // FLA_Obj_free( &z );
  FLA_free( buff_w );
  FLA_free( buff_ap );
  FLA_free( buff_u );
  FLA_free( buff_up );
  FLA_free( buff_v );
  FLA_free( buff_y );
  FLA_free( buff_z );

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_step_ops_var4 ( int  m_A,
int  n_A,
int  m_TS,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_Y,
int  rs_Y,
int  cs_Y,
float *  buff_Z,
int  rs_Z,
int  cs_Z,
float *  buff_T,
int  rs_T,
int  cs_T,
float *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_saxpyv(), bl1_sconjv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sinvscalv(), bl1_ssetm(), bl1_ssetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_Househ2s_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_opt_var4().

{
  float*    buff_1  = FLA_FLOAT_PTR( FLA_ONE );
  float*    buff_0  = FLA_FLOAT_PTR( FLA_ZERO );
  float*    buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );

  float     alpha12;
  float     minus_conj_alpha12;
  float     psi11_minus_alpha12;
  float     minus_inv_tau11;
  float     beta;
  float     last_elem;
  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_TS;

  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
  float*    buff_w  = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  float*    buff_al = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  float*    buff_ap = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  float*    buff_u  = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  float*    buff_up = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  float*    buff_v  = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  float*    buff_d  = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  float*    buff_e  = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  float*    buff_f  = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  float*    buff_g  = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  int       inc_w   = 1;
  int       inc_al  = 1;
  int       inc_ap  = 1;
  int       inc_u   = 1;
  int       inc_up  = 1;
  int       inc_v   = 1;
  int       inc_d   = 1;
  int       inc_e   = 1;
  int       inc_f   = 1;
  int       inc_g   = 1;

  // FLA_Set( FLA_ZERO, Y );
  // FLA_Set( FLA_ZERO, Z );
  bl1_ssetm( n_A,
             b_alg,
             buff_0,
             buff_Y, rs_Y, cs_Y );
  bl1_ssetm( m_A,
             b_alg,
             buff_0,
             buff_Z, rs_Z, cs_Z );

  for ( i = 0; i < b_alg; ++i )
  {
    float*    a10t      = buff_A  + (0  )*cs_A + (i  )*rs_A;
    float*    A20       = buff_A  + (0  )*cs_A + (i+1)*rs_A;
    float*    a01       = buff_A  + (i  )*cs_A + (0  )*rs_A;
    float*    alpha11   = buff_A  + (i  )*cs_A + (i  )*rs_A;
    float*    a21       = buff_A  + (i  )*cs_A + (i+1)*rs_A;
    float*    A02       = buff_A  + (i+1)*cs_A + (0  )*rs_A;
    float*    a12t      = buff_A  + (i+1)*cs_A + (i  )*rs_A;
    float*    A22       = buff_A  + (i+1)*cs_A + (i+1)*rs_A;

    float*    y10t      = buff_Y  + (0  )*cs_Y + (i  )*rs_Y;
    float*    Y20       = buff_Y  + (0  )*cs_Y + (i+1)*rs_Y;
    float*    y21       = buff_Y  + (i  )*cs_Y + (i+1)*rs_Y;

    float*    z10t      = buff_Z  + (0  )*cs_Z + (i  )*rs_Z;
    float*    Z20       = buff_Z  + (0  )*cs_Z + (i+1)*rs_Z;
    float*    z21       = buff_Z  + (i  )*cs_Z + (i+1)*rs_Z;

    float*    t01       = buff_T  + (i  )*cs_T + (0  )*rs_T;
    float*    tau11     = buff_T  + (i  )*cs_T + (i  )*rs_T;

    float*    s01       = buff_S  + (i  )*cs_S + (0  )*rs_S;
    float*    sigma11   = buff_S  + (i  )*cs_S + (i  )*rs_S;

    float*    w21       = buff_w  + (i+1)*inc_w;

    float*    a22l      = buff_al + (i+1)*inc_al;

    float*    a12p      = buff_ap + (i+1)*inc_ap;

    float*    u21       = buff_u  + (i+1)*inc_u;

    float*    u21p      = buff_up + (i+1)*inc_up;

    float*    v21       = buff_v  + (i+1)*inc_v;

    float*    d0        = buff_d  + (0  )*inc_d;

    float*    e0        = buff_e  + (0  )*inc_e;

    float*    f0        = buff_f  + (0  )*inc_f;

    float*    g0        = buff_g  + (0  )*inc_g;

    float*    a12p_t    = a12p    + (0  )*inc_ap;
    float*    a12p_b    = a12p    + (1  )*inc_ap;

    float*    v21_t     = v21     + (0  )*inc_v;
    float*    v21_b     = v21     + (1  )*inc_v;

    float*    a01_b     = a01     + (0  )*cs_A + (i-1)*rs_A;

    float*    a12t_l    = a12t    + (0  )*cs_A + (0  )*rs_A;
    float*    a12t_r    = a12t    + (1  )*cs_A + (0  )*rs_A;

    float*    A02_l     = A02     + (0  )*cs_A + (0  )*rs_A;

    float*    A22_l     = A22     + (0  )*cs_A + (0  )*rs_A;

    float*    Y20_t     = Y20     + (0  )*cs_Y + (0  )*rs_Y;

    float*    ABL       = a10t;
    float*    ZBL       = z10t;

    float*    a2        = alpha11;

    int       m_ahead   = m_A - i - 1;
    int       n_ahead   = n_A - i - 1;
    int       m_behind  = i;
    int       n_behind  = i;

    /*------------------------------------------------------------*/

    if ( m_behind > 0 )
    {
      // FLA_Copy( a01_b, last_elem );
      // FLA_Set( FLA_ONE, a01_b );
      last_elem = *a01_b;
      *a01_b = *buff_1;
    }

    // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
    // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01,  FLA_ONE, a2 );
    bl1_sgemv( BLIS1_NO_TRANSPOSE,
               BLIS1_CONJUGATE,
               m_ahead + 1,
               n_behind,
               buff_m1,
               ABL,  rs_A, cs_A,
               y10t, cs_Y,
               buff_1,
               a2,   rs_A );
    bl1_sgemv( BLIS1_NO_TRANSPOSE,
               BLIS1_CONJUGATE,
               m_ahead + 1,
               n_behind,
               buff_m1,
               ZBL, rs_Z, cs_Z,
               a01, rs_A,
               buff_1,
               a2,  rs_A );

    // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
    // FLA_Gemv( FLA_CONJ_TRANSPOSE,    FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
    bl1_sgemv( BLIS1_CONJ_NO_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               n_ahead,
               n_behind,
               buff_m1,
               Y20,  rs_Y, cs_Y,
               a10t, cs_A,
               buff_1,
               a12t, cs_A );
    bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_behind,
               n_ahead,
               buff_m1,
               A02,  rs_A, cs_A,
               z10t, cs_Z,
               buff_1,
               a12t, cs_A );

    if ( m_behind > 0 )
    {
      // FLA_Copy( last_elem, a01_b );
      *a01_b = last_elem;
    }

    // FLA_Househ2_UT( FLA_LEFT,
    //                 alpha11,
    //                 a21, tau11 );
    // FLA_Copy( a21, u21p );
    FLA_Househ2_UT_l_ops( m_ahead,
                          alpha11,
                          a21, rs_A,
                          tau11 );
    bl1_scopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                a21,  rs_A,
                u21p, inc_up );

    if ( n_ahead > 0 )
    {
      // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
      bl1_sdiv3( buff_m1, tau11, &minus_inv_tau11 );

      // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
      // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
      bl1_scopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12t, cs_A,
                  a12p, inc_ap );
      bl1_saxpyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  &minus_inv_tau11,
                  a12t, cs_A,
                  a12p, inc_ap );

      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
      bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 A20,  rs_A, cs_A,
                 u21p, inc_up,
                 buff_0,
                 d0,   inc_d );
      bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 Z20,  rs_Z, cs_Z,
                 u21p, inc_up,
                 buff_0,
                 e0,   inc_e );

      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
      // FLA_Axpy( FLA_ONE, d0, t01 );
      bl1_scopyv( BLIS1_CONJUGATE,
                  n_behind,
                  a10t, cs_A,
                  t01,  rs_T );
      bl1_saxpyv( BLIS1_NO_CONJUGATE,
                  n_behind,
                  buff_1,
                  d0,  inc_d,
                  t01, rs_T );

      // FLA_Set( FLA_ZERO, y21 );
      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
      // FLA_Gemv( FLA_TRANSPOSE,    FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
      bl1_ssetv( n_ahead,
                 buff_0,
                 y21, rs_Y );
      bl1_sgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 n_ahead,
                 n_behind,
                 buff_m1,
                 Y20, rs_Y, cs_Y,
                 d0,  inc_d,
                 buff_1,
                 y21, rs_Y );
      bl1_sgemv( BLIS1_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_m1,
                 A02, rs_A, cs_A,
                 e0,  inc_e,
                 buff_1,
                 y21, rs_Y );

      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
      bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22,  rs_A, cs_A,
                 u21p, inc_up,
                 buff_1,
                 y21,  rs_Y );

      // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
      bl1_saxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  &minus_inv_tau11,
                  y21,  rs_Y,
                  a12p, inc_ap );

      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
      bl1_sgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22,  rs_A, cs_A,
                 a12p, inc_ap,
                 buff_0,
                 w21,  inc_w );

      // FLA_Gemvc( FLA_CONJ_TRANSPOSE,    FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
      // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
      bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_CONJUGATE,
                 n_ahead,
                 n_behind,
                 buff_1,
                 Y20,  rs_Y, cs_Y,
                 a12p, inc_ap,
                 buff_0,
                 f0,   inc_f );
      bl1_sgemv( BLIS1_CONJ_NO_TRANSPOSE,
                 BLIS1_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_1,
                 A02,  rs_A, cs_A,
                 a12p, inc_ap,
                 buff_0,
                 g0,   inc_g );

      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
      bl1_sgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_m1,
                 A20, rs_A, cs_A,
                 f0,  inc_f,
                 buff_1,
                 w21, inc_w );
      bl1_sgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_m1,
                 Z20, rs_Z, cs_Z,
                 g0,  inc_g,
                 buff_1,
                 w21, inc_w );

      // FLA_Copy( A22_l, a22l );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
      bl1_scopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  A22_l, rs_A,
                  a22l,  inc_al );
      bl1_sgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_m1,
                 A20,   rs_A, cs_A,
                 Y20_t, cs_Y,
                 buff_1,
                 a22l,  inc_al );
      bl1_sgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_m1,
                 Z20,   rs_Z, cs_Z,
                 A02_l, rs_A,
                 buff_1,
                 a22l,  inc_al );

      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
      bl1_saxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  buff_1, 
                  a12t, cs_A,
                  y21,  rs_Y );

      // FLA_Househ2s_UT( FLA_RIGHT,
      //                  a12p_t,
      //                  a12p_b,
      //                  alpha12, psi11_minus_alpha12, sigma11 );
      FLA_Househ2s_UT_r_ops( n_ahead - 1,
                             a12p_t,
                             a12p_b, inc_ap,
                             &alpha12,
                             &psi11_minus_alpha12,
                             sigma11 );

      // FLA_Copy( a12p, v21 );
      // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
      // FLA_Conjugate( v21_b );
      bl1_scopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12p, inc_ap,
                  v21,  inc_v );
      bl1_smult4( buff_m1, &alpha12, v21_t, v21_t );
      bl1_sinvscalv( BLIS1_NO_CONJUGATE,
                     n_ahead,
                     &psi11_minus_alpha12,
                     v21, inc_v );
      bl1_sconjv( n_ahead - 1,
                  v21_b, inc_v );

      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
      // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
      bl1_scopyconj( &alpha12, &minus_conj_alpha12 );
      bl1_sneg1( &minus_conj_alpha12 );

      // FLA_Copy( g0, s01 );
      // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
      // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
      bl1_scopyv( BLIS1_NO_CONJUGATE,
                  n_behind,
                  g0,  inc_g,
                  s01, rs_S );
      bl1_saxpyv( BLIS1_CONJUGATE,
                  n_behind,
                  &minus_conj_alpha12,
                  A02_l, rs_A,
                  s01,   rs_S );
      bl1_sinvscalv( BLIS1_CONJUGATE,
                     n_behind,
                     &psi11_minus_alpha12,
                     s01, rs_S );

      // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
      // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
      *a12t_l = alpha12;
      bl1_scopyv( BLIS1_NO_CONJUGATE,
                  n_ahead - 1,
                  v21_b,  inc_v,
                  a12t_r, cs_A );
    }

    // FLA_Copy( u21p, u21 );
    bl1_scopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                u21p, inc_up,
                u21,  inc_u );

    if ( n_ahead > 0 )
    {
      // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
      // FLA_Scal( FLA_MINUS_ONE, beta );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
      bl1_sdot( BLIS1_CONJUGATE,
                n_ahead,
                y21, rs_Y,
                v21, inc_v,
                &beta );
      bl1_sscals( &minus_inv_tau11, &beta );

      // FLA_Copy( w21, z21 );
      // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
      // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
      // FLA_Axpy( beta, u21, z21 );
      bl1_scopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  w21, inc_w,
                  z21, rs_Z );
      bl1_saxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_alpha12,
                  a22l, inc_al,
                  z21,  rs_Z );
      bl1_sinvscalv( BLIS1_CONJUGATE,
                     m_ahead,
                     &psi11_minus_alpha12,
                     z21, rs_Z );
      bl1_saxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &beta,
                  u21, inc_u,
                  z21, rs_Z );

      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11,   y21 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
      bl1_sinvscalv( BLIS1_CONJUGATE,
                     n_ahead,
                     tau11,
                     y21, rs_Y );
      bl1_sinvscalv( BLIS1_NO_CONJUGATE,
                     m_ahead,
                     sigma11,
                     z21, rs_Z );
    }
    else // if ( n_ahead == 0 )
    {
      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
      bl1_scopyv( BLIS1_CONJUGATE,
                  n_behind,
                  a10t, cs_A,
                  t01,  rs_T );
      bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 A20, rs_A, cs_A,
                 u21, inc_u,
                 buff_1,
                 t01, rs_T );
    }

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &w );
  // FLA_Obj_free( &al );
  // FLA_Obj_free( &ap );
  // FLA_Obj_free( &u );
  // FLA_Obj_free( &up );
  // FLA_Obj_free( &v );
  // FLA_Obj_free( &d );
  // FLA_Obj_free( &e );
  // FLA_Obj_free( &f );
  // FLA_Obj_free( &g );
  FLA_free( buff_w );
  FLA_free( buff_al );
  FLA_free( buff_ap );
  FLA_free( buff_u );
  FLA_free( buff_up );
  FLA_free( buff_v );
  FLA_free( buff_d );
  FLA_free( buff_e );
  FLA_free( buff_f );
  FLA_free( buff_g );

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_step_ops_var5 ( int  m_A,
int  n_A,
int  m_TS,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_Y,
int  rs_Y,
int  cs_Y,
float *  buff_Z,
int  rs_Z,
int  cs_Z,
float *  buff_T,
int  rs_T,
int  cs_T,
float *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sinvscalv(), bl1_ssetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_Househ2_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_opt_var5().

{
  float*    buff_1  = FLA_FLOAT_PTR( FLA_ONE );
  float*    buff_0  = FLA_FLOAT_PTR( FLA_ZERO );
  float*    buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );

  float     beta;
  float     last_elem;
  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_TS;

  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
  float*    buff_u  = ( float*    ) FLA_malloc( m_A * sizeof( *buff_A ) );
  float*    buff_v  = ( float*    ) FLA_malloc( n_A * sizeof( *buff_A ) );
  float*    buff_d  = ( float*    ) FLA_malloc( n_A * sizeof( *buff_A ) );
  float*    buff_e  = ( float*    ) FLA_malloc( n_A * sizeof( *buff_A ) );
  float*    buff_f  = ( float*    ) FLA_malloc( m_A * sizeof( *buff_A ) );
  float*    buff_g  = ( float*    ) FLA_malloc( m_A * sizeof( *buff_A ) );
  int       inc_u   = 1;
  int       inc_v   = 1;
  int       inc_d   = 1;
  int       inc_e   = 1;
  int       inc_f   = 1;
  int       inc_g   = 1;

  // FLA_Set( FLA_ZERO, Y );
  // FLA_Set( FLA_ZERO, Z );
  bl1_ssetm( n_A,
             b_alg,
             buff_0,
             buff_Y, rs_Y, cs_Y );
  bl1_ssetm( m_A,
             b_alg,
             buff_0,
             buff_Z, rs_Z, cs_Z );

  for ( i = 0; i < b_alg; ++i )
  {
    float*    a10t      = buff_A  + (0  )*cs_A + (i  )*rs_A;
    float*    A20       = buff_A  + (0  )*cs_A + (i+1)*rs_A;
    float*    a01       = buff_A  + (i  )*cs_A + (0  )*rs_A;
    float*    alpha11   = buff_A  + (i  )*cs_A + (i  )*rs_A;
    float*    a21       = buff_A  + (i  )*cs_A + (i+1)*rs_A;
    float*    A02       = buff_A  + (i+1)*cs_A + (0  )*rs_A;
    float*    a12t      = buff_A  + (i+1)*cs_A + (i  )*rs_A;
    float*    A22       = buff_A  + (i+1)*cs_A + (i+1)*rs_A;

    float*    y10t      = buff_Y  + (0  )*cs_Y + (i  )*rs_Y;
    float*    Y20       = buff_Y  + (0  )*cs_Y + (i+1)*rs_Y;
    float*    y21       = buff_Y  + (i  )*cs_Y + (i+1)*rs_Y;

    float*    z10t      = buff_Z  + (0  )*cs_Z + (i  )*rs_Z;
    float*    Z20       = buff_Z  + (0  )*cs_Z + (i+1)*rs_Z;
    float*    z21       = buff_Z  + (i  )*cs_Z + (i+1)*rs_Z;

    float*    t01       = buff_T  + (i  )*cs_T + (0  )*rs_T;
    float*    tau11     = buff_T  + (i  )*cs_T + (i  )*rs_T;

    float*    s01       = buff_S  + (i  )*cs_S + (0  )*rs_S;
    float*    sigma11   = buff_S  + (i  )*cs_S + (i  )*rs_S;

    float*    u21       = buff_u  + (i+1)*inc_u;

    float*    v21       = buff_v  + (i+1)*inc_v;

    float*    d0        = buff_d  + (0  )*inc_d;

    float*    e0        = buff_e  + (0  )*inc_e;

    float*    f0        = buff_f  + (0  )*inc_f;

    float*    g0        = buff_g  + (0  )*inc_g;

    float*    v21_t     = v21     + (0  )*inc_v;
    float*    v21_b     = v21     + (1  )*inc_v;

    float*    a01_b     = a01     + (0  )*cs_A + (i-1)*rs_A;

    float*    a12t_l    = a12t    + (0  )*cs_A + (0  )*rs_A;
    float*    a12t_r    = a12t    + (1  )*cs_A + (0  )*rs_A;

    float*    ABL       = a10t;
    float*    ZBL       = z10t;

    float*    a2        = alpha11;

    int       m_ahead   = m_A - i - 1;
    int       n_ahead   = n_A - i - 1;
    int       m_behind  = i;
    int       n_behind  = i;

    /*------------------------------------------------------------*/

    if ( m_behind > 0 )
    {
      // FLA_Copy( a01_b, last_elem );
      // FLA_Set( FLA_ONE, a01_b );
      last_elem = *a01_b;
      *a01_b = *buff_1;
    }

    // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
    // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01,  FLA_ONE, a2 );
    bl1_sgemv( BLIS1_NO_TRANSPOSE,
               BLIS1_CONJUGATE,
               m_ahead + 1,
               n_behind,
               buff_m1,
               ABL,  rs_A, cs_A,
               y10t, cs_Y,
               buff_1,
               a2,   rs_A );
    bl1_sgemv( BLIS1_NO_TRANSPOSE,
               BLIS1_CONJUGATE,
               m_ahead + 1,
               n_behind,
               buff_m1,
               ZBL, rs_Z, cs_Z,
               a01, rs_A,
               buff_1,
               a2,  rs_A );

    // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
    // FLA_Gemv( FLA_CONJ_TRANSPOSE,    FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
    bl1_sgemv( BLIS1_CONJ_NO_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               n_ahead,
               n_behind,
               buff_m1,
               Y20,  rs_Y, cs_Y,
               a10t, cs_A,
               buff_1,
               a12t, cs_A );
    bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_behind,
               n_ahead,
               buff_m1,
               A02,  rs_A, cs_A,
               z10t, cs_Z,
               buff_1,
               a12t, cs_A );

    if ( m_behind > 0 )
    {
      // FLA_Copy( last_elem, a01_b );
      *a01_b = last_elem;
    }

    // FLA_Househ2_UT( FLA_LEFT,
    //                 alpha11,
    //                 a21, tau11 );
    // FLA_Copy( a21, u21 );
    FLA_Househ2_UT_l_ops( m_ahead,
                          alpha11,
                          a21, rs_A,
                          tau11 );
    bl1_scopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                a21, rs_A,
                u21, inc_u );

    if ( n_ahead > 0 )
    {
      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21, FLA_ONE, y21 );
      bl1_scopyv( BLIS1_CONJUGATE,
                  n_ahead,
                  a12t, cs_A,
                  y21,  rs_Y );
      bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22, rs_A, cs_A,
                 u21, inc_u,
                 buff_1,
                 y21, rs_Y );

      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ZERO, d0 );
      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21, FLA_ZERO, e0 );
      bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 A20, rs_A, cs_A,
                 u21, inc_u,
                 buff_0,
                 d0,  inc_d );
      bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 Z20, rs_Z, cs_Z,
                 u21, inc_u,
                 buff_0,
                 e0,  inc_e );

      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
      // FLA_Axpy( FLA_ONE, d0, t01 );
      bl1_scopyv( BLIS1_CONJUGATE,
                  n_behind,
                  a10t, cs_A,
                  t01,  rs_T );
      bl1_saxpyv( BLIS1_NO_CONJUGATE,
                  n_behind,
                  buff_1,
                  d0,  inc_d,
                  t01, rs_T );

      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
      // FLA_Gemv( FLA_TRANSPOSE,    FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
      bl1_sgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 n_ahead,
                 n_behind,
                 buff_m1,
                 Y20, rs_Y, cs_Y,
                 d0,  inc_d,
                 buff_1,
                 y21, rs_Y );
      bl1_sgemv( BLIS1_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_m1,
                 A02, rs_A, cs_A,
                 e0,  inc_e,
                 buff_1,
                 y21, rs_Y );

      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
      bl1_sinvscalv( BLIS1_NO_CONJUGATE,
                     n_ahead,
                     tau11,
                     y21, rs_Y );

      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
      bl1_saxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  buff_m1,
                  y21,  rs_Y,
                  a12t, cs_A );

      // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
      FLA_Househ2_UT_r_ops( n_ahead - 1,
                            a12t_l,
                            a12t_r, cs_A,
                            sigma11 );

      // FLA_Set( FLA_ONE, v21_t );
      // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
      *v21_t = *buff_1;
      bl1_scopyv( BLIS1_NO_CONJUGATE,
                  n_ahead - 1,
                  a12t_r, cs_A,
                  v21_b,  inc_v );

      // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
      // FLA_Scal( FLA_MINUS_ONE, beta );
      bl1_sdot( BLIS1_CONJUGATE,
                n_ahead,
                y21, rs_Y,
                v21, inc_v,
                &beta );
      bl1_sscals( buff_m1, &beta );

      // FLA_Copy( u21, z21 );
      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, v21, beta, z21 );
      bl1_scopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  u21, inc_u,
                  z21, rs_Z );
      bl1_sgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22, rs_A, cs_A,
                 v21, inc_v,
                 &beta,
                 z21, rs_Z );

      // FLA_Gemvc( FLA_CONJ_TRANSPOSE,    FLA_ONE, Y20, v21, FLA_ZERO, f0 );
      // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, g0 );
      bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 n_ahead,
                 m_behind,
                 buff_1,
                 Y20, rs_Y, cs_Y,
                 v21, inc_v,
                 buff_0,
                 f0,  inc_f );
      bl1_sgemv( BLIS1_CONJ_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_1,
                 A02, rs_A, cs_A,
                 v21, inc_v,
                 buff_0,
                 g0,  inc_g );

      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, z21 );
      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, z21 );
      bl1_sgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_m1,
                 A20, rs_A, cs_A,
                 f0,  inc_f,
                 buff_1,
                 z21, rs_Z );
      bl1_sgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_m1,
                 Z20, rs_Z, cs_Z,
                 g0,  inc_g,
                 buff_1,
                 z21, rs_Z );

      // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
      bl1_sinvscalv( BLIS1_NO_CONJUGATE,
                     m_ahead,
                     sigma11,
                     z21, rs_Z );

      // FLA_Copy( g0, s01 );
      bl1_scopyv( BLIS1_NO_CONJUGATE,
                  n_behind,
                  g0,  inc_g,
                  s01, rs_S );
    }
    else // if ( n_ahead == 0 )
    {
      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
      bl1_scopyv( BLIS1_CONJUGATE,
                  n_behind,
                  a10t, cs_A,
                  t01,  rs_T );
      bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 A20, rs_A, cs_A,
                 u21, inc_u,
                 buff_1,
                 t01, rs_T );
    }

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &u );
  // FLA_Obj_free( &v );
  // FLA_Obj_free( &d );
  // FLA_Obj_free( &e );
  // FLA_Obj_free( &f );
  // FLA_Obj_free( &g );
  FLA_free( buff_u );
  FLA_free( buff_v );
  FLA_free( buff_d );
  FLA_free( buff_e );
  FLA_free( buff_f );
  FLA_free( buff_g );

  return FLA_SUCCESS;
}

References FLA_Bidiag_UT_u_step_opc_var1(), FLA_Bidiag_UT_u_step_opd_var1(), FLA_Bidiag_UT_u_step_ops_var1(), FLA_Bidiag_UT_u_step_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u_blk_var1(), and FLA_Bidiag_UT_u_opt_var1().

{
  FLA_Datatype datatype;
  int          m_A, n_A, m_TS;
  int          rs_A, cs_A;
  int          rs_T, cs_T;
  int          rs_S, cs_S;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  n_A      = FLA_Obj_width( A );
  m_TS     = FLA_Obj_length( T );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  rs_T     = FLA_Obj_row_stride( T );
  cs_T     = FLA_Obj_col_stride( T );
  
  rs_S     = FLA_Obj_row_stride( S );
  cs_S     = FLA_Obj_col_stride( S );
  

  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float* buff_A = FLA_FLOAT_PTR( A );
      float* buff_T = FLA_FLOAT_PTR( T );
      float* buff_S = FLA_FLOAT_PTR( S );

      FLA_Bidiag_UT_u_step_ops_var1( m_A,
                                     n_A,
                                     m_TS,
                                     buff_A, rs_A, cs_A,
                                     buff_T, rs_T, cs_T,
                                     buff_S, rs_S, cs_S );

      break;
    }

    case FLA_DOUBLE:
    {
      double* buff_A = FLA_DOUBLE_PTR( A );
      double* buff_T = FLA_DOUBLE_PTR( T );
      double* buff_S = FLA_DOUBLE_PTR( S );

      FLA_Bidiag_UT_u_step_opd_var1( m_A,
                                     n_A,
                                     m_TS,
                                     buff_A, rs_A, cs_A,
                                     buff_T, rs_T, cs_T,
                                     buff_S, rs_S, cs_S );

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_A = FLA_COMPLEX_PTR( A );
      scomplex* buff_T = FLA_COMPLEX_PTR( T );
      scomplex* buff_S = FLA_COMPLEX_PTR( S );

      FLA_Bidiag_UT_u_step_opc_var1( m_A,
                                     n_A,
                                     m_TS,
                                     buff_A, rs_A, cs_A,
                                     buff_T, rs_T, cs_T,
                                     buff_S, rs_S, cs_S );

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
      dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
      dcomplex* buff_S = FLA_DOUBLE_COMPLEX_PTR( S );

      FLA_Bidiag_UT_u_step_opz_var1( m_A,
                                     n_A,
                                     m_TS,
                                     buff_A, rs_A, cs_A,
                                     buff_T, rs_T, cs_T,
                                     buff_S, rs_S, cs_S );

      break;
    }
  }

  return FLA_SUCCESS;
}

References FLA_Bidiag_UT_u_step_opc_var2(), FLA_Bidiag_UT_u_step_opd_var2(), FLA_Bidiag_UT_u_step_ops_var2(), FLA_Bidiag_UT_u_step_opz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u_blk_var2(), and FLA_Bidiag_UT_u_opt_var2().

{
  FLA_Datatype datatype;
  int          m_A, n_A, m_TS;
  int          rs_A, cs_A;
  int          rs_T, cs_T;
  int          rs_S, cs_S;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  n_A      = FLA_Obj_width( A );
  m_TS     = FLA_Obj_length( T );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  rs_T     = FLA_Obj_row_stride( T );
  cs_T     = FLA_Obj_col_stride( T );
  
  rs_S     = FLA_Obj_row_stride( S );
  cs_S     = FLA_Obj_col_stride( S );
  

  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float* buff_A = FLA_FLOAT_PTR( A );
      float* buff_T = FLA_FLOAT_PTR( T );
      float* buff_S = FLA_FLOAT_PTR( S );

      FLA_Bidiag_UT_u_step_ops_var2( m_A,
                                     n_A,
                                     m_TS,
                                     buff_A, rs_A, cs_A,
                                     buff_T, rs_T, cs_T,
                                     buff_S, rs_S, cs_S );

      break;
    }

    case FLA_DOUBLE:
    {
      double* buff_A = FLA_DOUBLE_PTR( A );
      double* buff_T = FLA_DOUBLE_PTR( T );
      double* buff_S = FLA_DOUBLE_PTR( S );

      FLA_Bidiag_UT_u_step_opd_var2( m_A,
                                     n_A,
                                     m_TS,
                                     buff_A, rs_A, cs_A,
                                     buff_T, rs_T, cs_T,
                                     buff_S, rs_S, cs_S );

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_A = FLA_COMPLEX_PTR( A );
      scomplex* buff_T = FLA_COMPLEX_PTR( T );
      scomplex* buff_S = FLA_COMPLEX_PTR( S );

      FLA_Bidiag_UT_u_step_opc_var2( m_A,
                                     n_A,
                                     m_TS,
                                     buff_A, rs_A, cs_A,
                                     buff_T, rs_T, cs_T,
                                     buff_S, rs_S, cs_S );

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
      dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
      dcomplex* buff_S = FLA_DOUBLE_COMPLEX_PTR( S );

      FLA_Bidiag_UT_u_step_opz_var2( m_A,
                                     n_A,
                                     m_TS,
                                     buff_A, rs_A, cs_A,
                                     buff_T, rs_T, cs_T,
                                     buff_S, rs_S, cs_S );

      break;
    }
  }

  return FLA_SUCCESS;
}

References FLA_Bidiag_UT_u_step_opc_var3(), FLA_Bidiag_UT_u_step_opd_var3(), FLA_Bidiag_UT_u_step_ops_var3(), FLA_Bidiag_UT_u_step_opz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u_blk_var3(), and FLA_Bidiag_UT_u_opt_var3().

{
  FLA_Datatype datatype;
  int          m_A, n_A, m_TS;
  int          rs_A, cs_A;
  int          rs_T, cs_T;
  int          rs_S, cs_S;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  n_A      = FLA_Obj_width( A );
  m_TS     = FLA_Obj_length( T );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  rs_T     = FLA_Obj_row_stride( T );
  cs_T     = FLA_Obj_col_stride( T );
  
  rs_S     = FLA_Obj_row_stride( S );
  cs_S     = FLA_Obj_col_stride( S );
  

  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float* buff_A = FLA_FLOAT_PTR( A );
      float* buff_T = FLA_FLOAT_PTR( T );
      float* buff_S = FLA_FLOAT_PTR( S );

      FLA_Bidiag_UT_u_step_ops_var3( m_A,
                                     n_A,
                                     m_TS,
                                     buff_A, rs_A, cs_A,
                                     buff_T, rs_T, cs_T,
                                     buff_S, rs_S, cs_S );

      break;
    }

    case FLA_DOUBLE:
    {
      double* buff_A = FLA_DOUBLE_PTR( A );
      double* buff_T = FLA_DOUBLE_PTR( T );
      double* buff_S = FLA_DOUBLE_PTR( S );

      FLA_Bidiag_UT_u_step_opd_var3( m_A,
                                     n_A,
                                     m_TS,
                                     buff_A, rs_A, cs_A,
                                     buff_T, rs_T, cs_T,
                                     buff_S, rs_S, cs_S );

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_A = FLA_COMPLEX_PTR( A );
      scomplex* buff_T = FLA_COMPLEX_PTR( T );
      scomplex* buff_S = FLA_COMPLEX_PTR( S );

      FLA_Bidiag_UT_u_step_opc_var3( m_A,
                                     n_A,
                                     m_TS,
                                     buff_A, rs_A, cs_A,
                                     buff_T, rs_T, cs_T,
                                     buff_S, rs_S, cs_S );

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
      dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
      dcomplex* buff_S = FLA_DOUBLE_COMPLEX_PTR( S );

      FLA_Bidiag_UT_u_step_opz_var3( m_A,
                                     n_A,
                                     m_TS,
                                     buff_A, rs_A, cs_A,
                                     buff_T, rs_T, cs_T,
                                     buff_S, rs_S, cs_S );

      break;
    }
  }

  return FLA_SUCCESS;
}

References FLA_Bidiag_UT_u_step_opc_var4(), FLA_Bidiag_UT_u_step_opd_var4(), FLA_Bidiag_UT_u_step_ops_var4(), FLA_Bidiag_UT_u_step_opz_var4(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u_blk_var4(), and FLA_Bidiag_UT_u_opt_var4().

{
  FLA_Datatype datatype;
  int          m_A, n_A, m_TS;
  int          rs_A, cs_A;
  int          rs_Y, cs_Y;
  int          rs_Z, cs_Z;
  int          rs_T, cs_T;
  int          rs_S, cs_S;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  n_A      = FLA_Obj_width( A );
  m_TS     = FLA_Obj_length( T );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  rs_Y     = FLA_Obj_row_stride( Y );
  cs_Y     = FLA_Obj_col_stride( Y );

  rs_Z     = FLA_Obj_row_stride( Z );
  cs_Z     = FLA_Obj_col_stride( Z );

  rs_T     = FLA_Obj_row_stride( T );
  cs_T     = FLA_Obj_col_stride( T );
  
  rs_S     = FLA_Obj_row_stride( S );
  cs_S     = FLA_Obj_col_stride( S );
  

  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float* buff_A = FLA_FLOAT_PTR( A );
      float* buff_Y = FLA_FLOAT_PTR( Y );
      float* buff_Z = FLA_FLOAT_PTR( Z );
      float* buff_T = FLA_FLOAT_PTR( T );
      float* buff_S = FLA_FLOAT_PTR( S );

      FLA_Bidiag_UT_u_step_ops_var4( m_A,
                                     n_A,
                                     m_TS,
                                     buff_A, rs_A, cs_A,
                                     buff_Y, rs_Y, cs_Y,
                                     buff_Z, rs_Z, cs_Z,
                                     buff_T, rs_T, cs_T,
                                     buff_S, rs_S, cs_S );

      break;
    }

    case FLA_DOUBLE:
    {
      double* buff_A = FLA_DOUBLE_PTR( A );
      double* buff_Y = FLA_DOUBLE_PTR( Y );
      double* buff_Z = FLA_DOUBLE_PTR( Z );
      double* buff_T = FLA_DOUBLE_PTR( T );
      double* buff_S = FLA_DOUBLE_PTR( S );

      FLA_Bidiag_UT_u_step_opd_var4( m_A,
                                     n_A,
                                     m_TS,
                                     buff_A, rs_A, cs_A,
                                     buff_Y, rs_Y, cs_Y,
                                     buff_Z, rs_Z, cs_Z,
                                     buff_T, rs_T, cs_T,
                                     buff_S, rs_S, cs_S );

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_A = FLA_COMPLEX_PTR( A );
      scomplex* buff_Y = FLA_COMPLEX_PTR( Y );
      scomplex* buff_Z = FLA_COMPLEX_PTR( Z );
      scomplex* buff_T = FLA_COMPLEX_PTR( T );
      scomplex* buff_S = FLA_COMPLEX_PTR( S );

      FLA_Bidiag_UT_u_step_opc_var4( m_A,
                                     n_A,
                                     m_TS,
                                     buff_A, rs_A, cs_A,
                                     buff_Y, rs_Y, cs_Y,
                                     buff_Z, rs_Z, cs_Z,
                                     buff_T, rs_T, cs_T,
                                     buff_S, rs_S, cs_S );

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
      dcomplex* buff_Y = FLA_DOUBLE_COMPLEX_PTR( Y );
      dcomplex* buff_Z = FLA_DOUBLE_COMPLEX_PTR( Z );
      dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
      dcomplex* buff_S = FLA_DOUBLE_COMPLEX_PTR( S );

      FLA_Bidiag_UT_u_step_opz_var4( m_A,
                                     n_A,
                                     m_TS,
                                     buff_A, rs_A, cs_A,
                                     buff_Y, rs_Y, cs_Y,
                                     buff_Z, rs_Z, cs_Z,
                                     buff_T, rs_T, cs_T,
                                     buff_S, rs_S, cs_S );

      break;
    }
  }

  return FLA_SUCCESS;
}

References FLA_Bidiag_UT_u_step_opc_var5(), FLA_Bidiag_UT_u_step_opd_var5(), FLA_Bidiag_UT_u_step_ops_var5(), FLA_Bidiag_UT_u_step_opz_var5(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u_blk_var5(), and FLA_Bidiag_UT_u_opt_var5().

{
  FLA_Datatype datatype;
  int          m_A, n_A, m_TS;
  int          rs_A, cs_A;
  int          rs_Y, cs_Y;
  int          rs_Z, cs_Z;
  int          rs_T, cs_T;
  int          rs_S, cs_S;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  n_A      = FLA_Obj_width( A );
  m_TS     = FLA_Obj_length( T );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  rs_Y     = FLA_Obj_row_stride( Y );
  cs_Y     = FLA_Obj_col_stride( Y );

  rs_Z     = FLA_Obj_row_stride( Z );
  cs_Z     = FLA_Obj_col_stride( Z );

  rs_T     = FLA_Obj_row_stride( T );
  cs_T     = FLA_Obj_col_stride( T );
  
  rs_S     = FLA_Obj_row_stride( S );
  cs_S     = FLA_Obj_col_stride( S );
  

  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float* buff_A = FLA_FLOAT_PTR( A );
      float* buff_Y = FLA_FLOAT_PTR( Y );
      float* buff_Z = FLA_FLOAT_PTR( Z );
      float* buff_T = FLA_FLOAT_PTR( T );
      float* buff_S = FLA_FLOAT_PTR( S );

      FLA_Bidiag_UT_u_step_ops_var5( m_A,
                                     n_A,
                                     m_TS,
                                     buff_A, rs_A, cs_A,
                                     buff_Y, rs_Y, cs_Y,
                                     buff_Z, rs_Z, cs_Z,
                                     buff_T, rs_T, cs_T,
                                     buff_S, rs_S, cs_S );

      break;
    }

    case FLA_DOUBLE:
    {
      double* buff_A = FLA_DOUBLE_PTR( A );
      double* buff_Y = FLA_DOUBLE_PTR( Y );
      double* buff_Z = FLA_DOUBLE_PTR( Z );
      double* buff_T = FLA_DOUBLE_PTR( T );
      double* buff_S = FLA_DOUBLE_PTR( S );

      FLA_Bidiag_UT_u_step_opd_var5( m_A,
                                     n_A,
                                     m_TS,
                                     buff_A, rs_A, cs_A,
                                     buff_Y, rs_Y, cs_Y,
                                     buff_Z, rs_Z, cs_Z,
                                     buff_T, rs_T, cs_T,
                                     buff_S, rs_S, cs_S );

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_A = FLA_COMPLEX_PTR( A );
      scomplex* buff_Y = FLA_COMPLEX_PTR( Y );
      scomplex* buff_Z = FLA_COMPLEX_PTR( Z );
      scomplex* buff_T = FLA_COMPLEX_PTR( T );
      scomplex* buff_S = FLA_COMPLEX_PTR( S );

      FLA_Bidiag_UT_u_step_opc_var5( m_A,
                                     n_A,
                                     m_TS,
                                     buff_A, rs_A, cs_A,
                                     buff_Y, rs_Y, cs_Y,
                                     buff_Z, rs_Z, cs_Z,
                                     buff_T, rs_T, cs_T,
                                     buff_S, rs_S, cs_S );

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
      dcomplex* buff_Y = FLA_DOUBLE_COMPLEX_PTR( Y );
      dcomplex* buff_Z = FLA_DOUBLE_COMPLEX_PTR( Z );
      dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
      dcomplex* buff_S = FLA_DOUBLE_COMPLEX_PTR( S );

      FLA_Bidiag_UT_u_step_opz_var5( m_A,
                                     n_A,
                                     m_TS,
                                     buff_A, rs_A, cs_A,
                                     buff_Y, rs_Y, cs_Y,
                                     buff_Z, rs_Z, cs_Z,
                                     buff_T, rs_T, cs_T,
                                     buff_S, rs_S, cs_S );

      break;
    }
  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_step_opz_var1 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_zcopyv(), bl1_zgemv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opz_var1(), FLA_Apply_H2_UT_r_opz_var1(), FLA_free(), FLA_Househ2_UT_l_opz(), FLA_Househ2_UT_r_opz(), FLA_malloc(), FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_opt_var1().

{
  dcomplex* buff_1  = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
  dcomplex* buff_0  = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );

  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_TS;

  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
  dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  int       inc_v  = 1;

  for ( i = 0; i < b_alg; ++i )
  {
    dcomplex* a10t     = buff_A + (0  )*cs_A + (i  )*rs_A;
    dcomplex* A20      = buff_A + (0  )*cs_A + (i+1)*rs_A;
    dcomplex* alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
    dcomplex* a21      = buff_A + (i  )*cs_A + (i+1)*rs_A;
    dcomplex* A02      = buff_A + (i+1)*cs_A + (0  )*rs_A;
    dcomplex* a12t     = buff_A + (i+1)*cs_A + (i  )*rs_A;
    dcomplex* A22      = buff_A + (i+1)*cs_A + (i+1)*rs_A;

    dcomplex* t01      = buff_T + (i  )*cs_T + (0  )*rs_T;
    dcomplex* tau11    = buff_T + (i  )*cs_T + (i  )*rs_T;

    dcomplex* s01      = buff_S + (i  )*cs_S + (0  )*rs_S;
    dcomplex* sigma11  = buff_S + (i  )*cs_S + (i  )*rs_S;

    dcomplex* v21      = buff_v + (i+1)*inc_v;

    dcomplex* a12t_l   = a12t   + (0  )*cs_A + (0  )*rs_A;
    dcomplex* a12t_r   = a12t   + (1  )*cs_A + (0  )*rs_A;

    dcomplex* A22_l    = A22    + (0  )*cs_A + (0  )*rs_A;
    dcomplex* A22_r    = A22    + (1  )*cs_A + (0  )*rs_A;

    dcomplex* v21_t    = v21    + (0  )*inc_v;
    dcomplex* v21_b    = v21    + (1  )*inc_v;

    int       m_ahead  = m_A - i - 1;
    int       n_ahead  = n_A - i - 1;
    int       m_behind = i;
    int       n_behind = i;

    /*------------------------------------------------------------*/

    // FLA_Househ2_UT( FLA_LEFT,
    //                 alpha11,
    //                 a21, tau11 );
    FLA_Househ2_UT_l_opz( m_ahead,
                          alpha11,
                          a21, rs_A,
                          tau11 );

    if ( n_ahead > 0 )
    {
      // FLA_Apply_H2_UT( FLA_LEFT, tau11, a21, a12t, A22 );
      FLA_Apply_H2_UT_l_opz_var1( m_ahead,
                                  n_ahead,
                                  tau11,
                                  a21,  rs_A,
                                  a12t, cs_A,
                                  A22,  rs_A, cs_A );

      // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
      FLA_Househ2_UT_r_opz( n_ahead - 1,
                            a12t_l,
                            a12t_r, cs_A,
                            sigma11 );

      // FLA_Set( FLA_ONE, v21_t );
      // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
      *v21_t = *buff_1;
      bl1_zcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead - 1,
                  a12t_r, cs_A,
                  v21_b,  inc_v );

      // FLA_Apply_H2_UT( FLA_RIGHT, sigma11, v21_b, A22_l, A22_r );
      FLA_Apply_H2_UT_r_opz_var1( m_ahead,
                                  n_ahead - 1,
                                  sigma11,
                                  v21_b, inc_v,
                                  A22_l, rs_A,
                                  A22_r, rs_A, cs_A );

      // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
      bl1_zgemv( BLIS1_CONJ_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_1,
                 A02, rs_A, cs_A,
                 v21, inc_v,
                 buff_0,
                 s01, rs_S );
    }

    // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
    // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
    bl1_zcopyv( BLIS1_CONJUGATE,
                n_behind,
                a10t, cs_A,
                t01,  rs_T );
    bl1_zgemv( BLIS1_CONJ_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_ahead,
               n_behind,
               buff_1,
               A20, rs_A, cs_A,
               a21, rs_A,
               buff_1,
               t01, rs_T );

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &v );
  FLA_free( buff_v );

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_step_opz_var2 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_Househ2_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_opt_var2().

{
  dcomplex* buff_1  = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
  dcomplex* buff_0  = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );

  dcomplex  beta;
  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_TS;

  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
  dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  dcomplex* buff_y = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  int       inc_v  = 1;
  int       inc_y  = 1;
  int       inc_z  = 1;

  for ( i = 0; i < b_alg; ++i )
  {
    dcomplex* a10t     = buff_A + (0  )*cs_A + (i  )*rs_A;
    dcomplex* A20      = buff_A + (0  )*cs_A + (i+1)*rs_A;
    dcomplex* alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
    dcomplex* a21      = buff_A + (i  )*cs_A + (i+1)*rs_A;
    dcomplex* A02      = buff_A + (i+1)*cs_A + (0  )*rs_A;
    dcomplex* a12t     = buff_A + (i+1)*cs_A + (i  )*rs_A;
    dcomplex* A22      = buff_A + (i+1)*cs_A + (i+1)*rs_A;

    dcomplex* t01      = buff_T + (i  )*cs_T + (0  )*rs_T;
    dcomplex* tau11    = buff_T + (i  )*cs_T + (i  )*rs_T;

    dcomplex* s01      = buff_S + (i  )*cs_S + (0  )*rs_S;
    dcomplex* sigma11  = buff_S + (i  )*cs_S + (i  )*rs_S;

    dcomplex* v21      = buff_v + (i+1)*inc_v;

    dcomplex* y21      = buff_y + (i+1)*inc_y;

    dcomplex* z21      = buff_z + (i+1)*inc_z;

    dcomplex* a12t_l   = a12t   + (0  )*cs_A + (0  )*rs_A;
    dcomplex* a12t_r   = a12t   + (1  )*cs_A + (0  )*rs_A;

    dcomplex* v21_t    = v21    + (0  )*inc_v;
    dcomplex* v21_b    = v21    + (1  )*inc_v;

    int       m_ahead  = m_A - i - 1;
    int       n_ahead  = n_A - i - 1;
    int       m_behind = i;
    int       n_behind = i;

    /*------------------------------------------------------------*/

    // FLA_Househ2_UT( FLA_LEFT,
    //                 alpha11,
    //                 a21, tau11 );
    FLA_Househ2_UT_l_opz( m_ahead,
                          alpha11,
                          a21, rs_A,
                          tau11 );

    if ( n_ahead > 0 )
    {
      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
      // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
      bl1_zcopyv( BLIS1_CONJUGATE,
                  n_ahead,
                  a12t, cs_A,
                  y21,  inc_y );
      bl1_zgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22, rs_A, cs_A,
                 a21, rs_A,
                 buff_1,
                 y21, inc_y );

      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
      bl1_zinvscalv( BLIS1_NO_CONJUGATE,
                     n_ahead,
                     tau11,
                     y21, inc_y );

      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
      bl1_zaxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  buff_m1,
                  y21,  inc_y,
                  a12t, cs_A );

      // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
      FLA_Househ2_UT_r_opz( n_ahead - 1,
                            a12t_l,
                            a12t_r, cs_A,
                            sigma11 );

      // FLA_Set( FLA_ONE, v21_t );
      // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
      *v21_t = *buff_1;
      bl1_zcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead - 1,
                  a12t_r, cs_A,
                  v21_b,  inc_y );

      // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
      // FLA_Scal( FLA_MINUS_ONE, beta );
      bl1_zdot( BLIS1_CONJUGATE,
                n_ahead,
                y21, inc_y,
                v21, inc_v,
                &beta );
      bl1_zneg1( &beta );

      // FLA_Copy( a21, z21 );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
      bl1_zcopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  a21, rs_A,
                  z21, inc_z );
      bl1_zgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22, rs_A, cs_A,
                 v21, inc_v,
                 &beta,
                 z21, inc_z );
      bl1_zinvscalv( BLIS1_NO_CONJUGATE,
                     m_ahead,
                     sigma11,
                     z21, inc_z );

      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y21, A22 );
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
      bl1_zger( BLIS1_NO_CONJUGATE,
                BLIS1_CONJUGATE,
                m_ahead,
                n_ahead,
                buff_m1,
                a21, rs_A,
                y21, inc_y,
                A22, rs_A, cs_A );
      bl1_zger( BLIS1_NO_CONJUGATE,
                BLIS1_CONJUGATE,
                m_ahead,
                n_ahead,
                buff_m1,
                z21, inc_z,
                v21, inc_v,
                A22, rs_A, cs_A );

      // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
      bl1_zgemv( BLIS1_CONJ_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_1,
                 A02, rs_A, cs_A,
                 v21, inc_v,
                 buff_0,
                 s01, rs_S );
    }

    // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
    // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
    bl1_zcopyv( BLIS1_CONJUGATE,
                n_behind,
                a10t, cs_A,
                t01,  rs_T );
    bl1_zgemv( BLIS1_CONJ_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_ahead,
               n_behind,
               buff_1,
               A20, rs_A, cs_A,
               a21, rs_A,
               buff_1,
               t01, rs_T );

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &v );
  // FLA_Obj_free( &y );
  // FLA_Obj_free( &z );
  FLA_free( buff_v );
  FLA_free( buff_y );
  FLA_free( buff_z );

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_step_opz_var3 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_zaxpyv(), bl1_zconjv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_Househ2s_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_opt_var3().

{
  dcomplex* buff_1  = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
  dcomplex* buff_0  = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );

  dcomplex  alpha12;
  dcomplex  minus_conj_alpha12;
  dcomplex  psi11_minus_alpha12;
  dcomplex  minus_inv_tau11;
  dcomplex  minus_upsilon11;
  dcomplex  minus_conj_nu11;
  dcomplex  minus_conj_psi11;
  dcomplex  minus_zeta11;
  dcomplex  beta;
  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_TS;

  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
  dcomplex* buff_w  = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  dcomplex* buff_ap = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  dcomplex* buff_u  = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  dcomplex* buff_up = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  dcomplex* buff_v  = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  dcomplex* buff_y  = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  dcomplex* buff_z  = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  int       inc_w   = 1;
  int       inc_ap  = 1;
  int       inc_u   = 1;
  int       inc_up  = 1;
  int       inc_v   = 1;
  int       inc_y   = 1;
  int       inc_z   = 1;

  for ( i = 0; i < b_alg; ++i )
  {
    dcomplex* a10t      = buff_A  + (0  )*cs_A + (i  )*rs_A;
    dcomplex* A20       = buff_A  + (0  )*cs_A + (i+1)*rs_A;
    dcomplex* alpha11   = buff_A  + (i  )*cs_A + (i  )*rs_A;
    dcomplex* a21       = buff_A  + (i  )*cs_A + (i+1)*rs_A;
    dcomplex* A02       = buff_A  + (i+1)*cs_A + (0  )*rs_A;
    dcomplex* a12t      = buff_A  + (i+1)*cs_A + (i  )*rs_A;
    dcomplex* A22       = buff_A  + (i+1)*cs_A + (i+1)*rs_A;

    dcomplex* t01       = buff_T  + (i  )*cs_T + (0  )*rs_T;
    dcomplex* tau11     = buff_T  + (i  )*cs_T + (i  )*rs_T;

    dcomplex* s01       = buff_S  + (i  )*cs_S + (0  )*rs_S;
    dcomplex* sigma11   = buff_S  + (i  )*cs_S + (i  )*rs_S;

    dcomplex* w21       = buff_w  + (i+1)*inc_w;

    dcomplex* a12p      = buff_ap + (i+1)*inc_ap;

    dcomplex* upsilon11 = buff_u  + (i  )*inc_u;
    dcomplex* u21       = buff_u  + (i+1)*inc_u;

    dcomplex* u21p      = buff_up + (i+1)*inc_up;

    dcomplex* nu11      = buff_v  + (i  )*inc_v;
    dcomplex* v21       = buff_v  + (i+1)*inc_v;

    dcomplex* psi11     = buff_y  + (i  )*inc_y;
    dcomplex* y21       = buff_y  + (i+1)*inc_y;

    dcomplex* zeta11    = buff_z  + (i  )*inc_z;
    dcomplex* z21       = buff_z  + (i+1)*inc_z;

    dcomplex* a12p_t    = a12p    + (0  )*inc_ap;
    dcomplex* a12p_b    = a12p    + (1  )*inc_ap;

    dcomplex* v21_t     = v21     + (0  )*inc_v;
    dcomplex* v21_b     = v21     + (1  )*inc_v;

    dcomplex* a12t_l    = a12t    + (0  )*cs_A + (0  )*rs_A;
    dcomplex* a12t_r    = a12t    + (1  )*cs_A + (0  )*rs_A;

    dcomplex* A22_l     = A22     + (0  )*cs_A + (0  )*rs_A;

    int       m_ahead   = m_A - i - 1;
    int       n_ahead   = n_A - i - 1;
    int       m_behind  = i;
    int       n_behind  = i;

    /*------------------------------------------------------------*/

    if ( m_behind > 0 )
    {
      // FLA_Copy( upsilon11, minus_upsilon11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
      bl1_zmult3( buff_m1, upsilon11, &minus_upsilon11 );

      // FLA_Copy( zeta11, minus_zeta11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
      bl1_zmult3( buff_m1, zeta11, &minus_zeta11 );

      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
      bl1_zcopyconj( psi11, &minus_conj_psi11 );
      bl1_zscals( buff_m1, &minus_conj_psi11 );

      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
      bl1_zcopyconj( nu11, &minus_conj_nu11 );
      bl1_zscals( buff_m1, &minus_conj_nu11 );

      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11,  zeta11,    alpha11 );
      bl1_zaxpyv( BLIS1_NO_CONJUGATE,
                  1,
                  &minus_conj_psi11,
                  upsilon11, 1,
                  alpha11,   1 );
      bl1_zaxpyv( BLIS1_NO_CONJUGATE,
                  1,
                  &minus_conj_nu11,
                  zeta11,  1,
                  alpha11, 1 );

      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11,  z21, a21 );
      bl1_zaxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_psi11,
                  u21, inc_u,
                  a21, rs_A );
      bl1_zaxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_nu11,
                  z21, inc_z,
                  a21, rs_A );

      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11,    v21, a12t );
      bl1_zaxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  &minus_upsilon11,
                  y21,  inc_y,
                  a12t, cs_A );
      bl1_zaxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  &minus_zeta11,
                  v21,  inc_v,
                  a12t, cs_A );
    }

    // FLA_Househ2_UT( FLA_LEFT,
    //                 alpha11,
    //                 a21, tau11 );
    // FLA_Copy( a21, u21p );
    FLA_Househ2_UT_l_opz( m_ahead,
                          alpha11,
                          a21, rs_A,
                          tau11 );
    bl1_zcopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                a21,  rs_A,
                u21p, inc_up );

    if ( n_ahead > 0 )
    {
      // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
      bl1_zdiv3( buff_m1, tau11, &minus_inv_tau11 );

      // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
      // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
      bl1_zcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12t, cs_A,
                  a12p, inc_ap );
      bl1_zaxpyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  &minus_inv_tau11,
                  a12t, cs_A,
                  a12p, inc_ap );
    }

    if ( m_behind > 0 )
    {
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
      bl1_zger( BLIS1_NO_CONJUGATE,
                BLIS1_CONJUGATE,
                m_ahead,
                n_ahead,
                buff_m1,
                u21, inc_u,
                y21, inc_y,
                A22, rs_A, cs_A );
      bl1_zger( BLIS1_NO_CONJUGATE,
                BLIS1_CONJUGATE,
                m_ahead,
                n_ahead,
                buff_m1,
                z21, inc_z,
                v21, inc_v,
                A22, rs_A, cs_A );
    }

    if ( n_ahead > 0 )
    {
      // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
      bl1_zgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22,  rs_A, cs_A,
                 u21p, inc_up,
                 buff_0,
                 y21,  inc_y );

      // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
      bl1_zaxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  &minus_inv_tau11,
                  y21,  inc_y,
                  a12p, inc_ap );

      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
      bl1_zgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22,  rs_A, cs_A,
                 a12p, inc_ap,
                 buff_0,
                 w21,  inc_w );

      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
      bl1_zaxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  buff_1,
                  a12t, cs_A,
                  y21,  inc_y );

      // FLA_Househ2s_UT( FLA_RIGHT,
      //                  a12p_t,
      //                  a12p_b,
      //                  alpha12, psi11_minus_alpha12, sigma11 );
      FLA_Househ2s_UT_r_opz( n_ahead - 1,
                             a12p_t,
                             a12p_b, inc_ap,
                             &alpha12,
                             &psi11_minus_alpha12,
                             sigma11 );

      // FLA_Copy( a12p, v21 );
      // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
      // FLA_Conjugate( v21_b );
      bl1_zcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12p, inc_ap,
                  v21,  inc_v );
      bl1_zmult4( buff_m1, &alpha12, v21_t, v21_t );
      bl1_zinvscalv( BLIS1_NO_CONJUGATE,
                     n_ahead,
                     &psi11_minus_alpha12,
                     v21, inc_v );
      bl1_zconjv( n_ahead - 1,
                  v21_b, inc_v );

      // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
      // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
      *a12t_l = alpha12;
      bl1_zcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead - 1,
                  v21_b,  inc_v,
                  a12t_r, cs_A );
    }

    // FLA_Copy( u21p, u21 );
    bl1_zcopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                u21p, inc_up,
                u21,  inc_u );

    if ( n_ahead > 0 )
    {
      // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
      // FLA_Scal( FLA_MINUS_ONE, beta );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
      bl1_zdot( BLIS1_CONJUGATE,
                n_ahead,
                y21, inc_y,
                v21, inc_v,
                &beta );
      bl1_zscals( &minus_inv_tau11, &beta );

      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
      // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
      bl1_zcopyconj( &alpha12, &minus_conj_alpha12 );
      bl1_zneg1( &minus_conj_alpha12 );

      // FLA_Copy( w21, z21 );
      // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
      // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
      // FLA_Axpy( beta, u21, z21 );
      bl1_zcopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  w21, inc_w,
                  z21, inc_z );
      bl1_zaxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_alpha12,
                  A22_l, rs_A,
                  z21,   inc_z );
      bl1_zinvscalv( BLIS1_CONJUGATE,
                     m_ahead,
                     &psi11_minus_alpha12,
                     z21, inc_z );
      bl1_zaxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &beta,
                  u21, inc_u,
                  z21, inc_z );

      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11,   y21 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
      bl1_zinvscalv( BLIS1_CONJUGATE,
                     n_ahead,
                     tau11,
                     y21, inc_y );
      bl1_zinvscalv( BLIS1_NO_CONJUGATE,
                     m_ahead,
                     sigma11,
                     z21, inc_z );

      // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
      bl1_zgemv( BLIS1_CONJ_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_1,
                 A02, rs_A, cs_A,
                 v21, inc_v,
                 buff_0,
                 s01, rs_S );
    }

    // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
    // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
    bl1_zcopyv( BLIS1_CONJUGATE,
                n_behind,
                a10t, cs_A,
                t01,  rs_T );
    bl1_zgemv( BLIS1_CONJ_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_ahead,
               n_behind,
               buff_1,
               A20, rs_A, cs_A,
               u21, inc_u,
               buff_1,
               t01, rs_T );

    if ( m_behind + 1 == b_alg && n_ahead > 0 )
    {
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
      bl1_zger( BLIS1_NO_CONJUGATE,
                BLIS1_CONJUGATE,
                m_ahead,
                n_ahead,
                buff_m1,
                u21, inc_u,
                y21, inc_y,
                A22, rs_A, cs_A );
      bl1_zger( BLIS1_NO_CONJUGATE,
                BLIS1_CONJUGATE,
                m_ahead,
                n_ahead,
                buff_m1,
                z21, inc_z,
                v21, inc_v,
                A22, rs_A, cs_A );
    }

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &w );
  // FLA_Obj_free( &ap );
  // FLA_Obj_free( &u );
  // FLA_Obj_free( &up );
  // FLA_Obj_free( &v );
  // FLA_Obj_free( &y );
  // FLA_Obj_free( &z );
  FLA_free( buff_w );
  FLA_free( buff_ap );
  FLA_free( buff_u );
  FLA_free( buff_up );
  FLA_free( buff_v );
  FLA_free( buff_y );
  FLA_free( buff_z );

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_step_opz_var4 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_Y,
int  rs_Y,
int  cs_Y,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_zaxpyv(), bl1_zconjv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zinvscalv(), bl1_zsetm(), bl1_zsetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_Househ2s_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_opt_var4().

{
  dcomplex* buff_1  = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
  dcomplex* buff_0  = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );

  dcomplex  alpha12;
  dcomplex  minus_conj_alpha12;
  dcomplex  psi11_minus_alpha12;
  dcomplex  minus_inv_tau11;
  dcomplex  beta;
  dcomplex  last_elem;
  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_TS;

  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
  dcomplex* buff_w  = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  dcomplex* buff_al = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  dcomplex* buff_ap = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  dcomplex* buff_u  = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  dcomplex* buff_up = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  dcomplex* buff_v  = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  dcomplex* buff_d  = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  dcomplex* buff_e  = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  dcomplex* buff_f  = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  dcomplex* buff_g  = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  int       inc_w   = 1;
  int       inc_al  = 1;
  int       inc_ap  = 1;
  int       inc_u   = 1;
  int       inc_up  = 1;
  int       inc_v   = 1;
  int       inc_d   = 1;
  int       inc_e   = 1;
  int       inc_f   = 1;
  int       inc_g   = 1;

  // FLA_Set( FLA_ZERO, Y );
  // FLA_Set( FLA_ZERO, Z );
  bl1_zsetm( n_A,
             b_alg,
             buff_0,
             buff_Y, rs_Y, cs_Y );
  bl1_zsetm( m_A,
             b_alg,
             buff_0,
             buff_Z, rs_Z, cs_Z );

  for ( i = 0; i < b_alg; ++i )
  {
    dcomplex* a10t      = buff_A  + (0  )*cs_A + (i  )*rs_A;
    dcomplex* A20       = buff_A  + (0  )*cs_A + (i+1)*rs_A;
    dcomplex* a01       = buff_A  + (i  )*cs_A + (0  )*rs_A;
    dcomplex* alpha11   = buff_A  + (i  )*cs_A + (i  )*rs_A;
    dcomplex* a21       = buff_A  + (i  )*cs_A + (i+1)*rs_A;
    dcomplex* A02       = buff_A  + (i+1)*cs_A + (0  )*rs_A;
    dcomplex* a12t      = buff_A  + (i+1)*cs_A + (i  )*rs_A;
    dcomplex* A22       = buff_A  + (i+1)*cs_A + (i+1)*rs_A;

    dcomplex* y10t      = buff_Y  + (0  )*cs_Y + (i  )*rs_Y;
    dcomplex* Y20       = buff_Y  + (0  )*cs_Y + (i+1)*rs_Y;
    dcomplex* y21       = buff_Y  + (i  )*cs_Y + (i+1)*rs_Y;

    dcomplex* z10t      = buff_Z  + (0  )*cs_Z + (i  )*rs_Z;
    dcomplex* Z20       = buff_Z  + (0  )*cs_Z + (i+1)*rs_Z;
    dcomplex* z21       = buff_Z  + (i  )*cs_Z + (i+1)*rs_Z;

    dcomplex* t01       = buff_T  + (i  )*cs_T + (0  )*rs_T;
    dcomplex* tau11     = buff_T  + (i  )*cs_T + (i  )*rs_T;

    dcomplex* s01       = buff_S  + (i  )*cs_S + (0  )*rs_S;
    dcomplex* sigma11   = buff_S  + (i  )*cs_S + (i  )*rs_S;

    dcomplex* w21       = buff_w  + (i+1)*inc_w;

    dcomplex* a22l      = buff_al + (i+1)*inc_al;

    dcomplex* a12p      = buff_ap + (i+1)*inc_ap;

    dcomplex* u21       = buff_u  + (i+1)*inc_u;

    dcomplex* u21p      = buff_up + (i+1)*inc_up;

    dcomplex* v21       = buff_v  + (i+1)*inc_v;

    dcomplex* d0        = buff_d  + (0  )*inc_d;

    dcomplex* e0        = buff_e  + (0  )*inc_e;

    dcomplex* f0        = buff_f  + (0  )*inc_f;

    dcomplex* g0        = buff_g  + (0  )*inc_g;

    dcomplex* a12p_t    = a12p    + (0  )*inc_ap;
    dcomplex* a12p_b    = a12p    + (1  )*inc_ap;

    dcomplex* v21_t     = v21     + (0  )*inc_v;
    dcomplex* v21_b     = v21     + (1  )*inc_v;

    dcomplex* a01_b     = a01     + (0  )*cs_A + (i-1)*rs_A;

    dcomplex* a12t_l    = a12t    + (0  )*cs_A + (0  )*rs_A;
    dcomplex* a12t_r    = a12t    + (1  )*cs_A + (0  )*rs_A;

    dcomplex* A02_l     = A02     + (0  )*cs_A + (0  )*rs_A;

    dcomplex* A22_l     = A22     + (0  )*cs_A + (0  )*rs_A;

    dcomplex* Y20_t     = Y20     + (0  )*cs_Y + (0  )*rs_Y;

    dcomplex* ABL       = a10t;
    dcomplex* ZBL       = z10t;

    dcomplex* a2        = alpha11;

    int       m_ahead   = m_A - i - 1;
    int       n_ahead   = n_A - i - 1;
    int       m_behind  = i;
    int       n_behind  = i;

    /*------------------------------------------------------------*/

    if ( m_behind > 0 )
    {
      // FLA_Copy( a01_b, last_elem );
      // FLA_Set( FLA_ONE, a01_b );
      last_elem = *a01_b;
      *a01_b = *buff_1;
    }

    // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
    // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01,  FLA_ONE, a2 );
    bl1_zgemv( BLIS1_NO_TRANSPOSE,
               BLIS1_CONJUGATE,
               m_ahead + 1,
               n_behind,
               buff_m1,
               ABL,  rs_A, cs_A,
               y10t, cs_Y,
               buff_1,
               a2,   rs_A );
    bl1_zgemv( BLIS1_NO_TRANSPOSE,
               BLIS1_CONJUGATE,
               m_ahead + 1,
               n_behind,
               buff_m1,
               ZBL, rs_Z, cs_Z,
               a01, rs_A,
               buff_1,
               a2,  rs_A );

    // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
    // FLA_Gemv( FLA_CONJ_TRANSPOSE,    FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
    bl1_zgemv( BLIS1_CONJ_NO_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               n_ahead,
               n_behind,
               buff_m1,
               Y20,  rs_Y, cs_Y,
               a10t, cs_A,
               buff_1,
               a12t, cs_A );
    bl1_zgemv( BLIS1_CONJ_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_behind,
               n_ahead,
               buff_m1,
               A02,  rs_A, cs_A,
               z10t, cs_Z,
               buff_1,
               a12t, cs_A );

    if ( m_behind > 0 )
    {
      // FLA_Copy( last_elem, a01_b );
      *a01_b = last_elem;
    }

    // FLA_Househ2_UT( FLA_LEFT,
    //                 alpha11,
    //                 a21, tau11 );
    // FLA_Copy( a21, u21p );
    FLA_Househ2_UT_l_opz( m_ahead,
                          alpha11,
                          a21, rs_A,
                          tau11 );
    bl1_zcopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                a21,  rs_A,
                u21p, inc_up );

    if ( n_ahead > 0 )
    {
      // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
      bl1_zdiv3( buff_m1, tau11, &minus_inv_tau11 );

      // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
      // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
      bl1_zcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12t, cs_A,
                  a12p, inc_ap );
      bl1_zaxpyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  &minus_inv_tau11,
                  a12t, cs_A,
                  a12p, inc_ap );

      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
      bl1_zgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 A20,  rs_A, cs_A,
                 u21p, inc_up,
                 buff_0,
                 d0,   inc_d );
      bl1_zgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 Z20,  rs_Z, cs_Z,
                 u21p, inc_up,
                 buff_0,
                 e0,   inc_e );

      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
      // FLA_Axpy( FLA_ONE, d0, t01 );
      bl1_zcopyv( BLIS1_CONJUGATE,
                  n_behind,
                  a10t, cs_A,
                  t01,  rs_T );
      bl1_zaxpyv( BLIS1_NO_CONJUGATE,
                  n_behind,
                  buff_1,
                  d0,  inc_d,
                  t01, rs_T );

      // FLA_Set( FLA_ZERO, y21 );
      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
      // FLA_Gemv( FLA_TRANSPOSE,    FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
      bl1_zsetv( n_ahead,
                 buff_0,
                 y21, rs_Y );
      bl1_zgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 n_ahead,
                 n_behind,
                 buff_m1,
                 Y20, rs_Y, cs_Y,
                 d0,  inc_d,
                 buff_1,
                 y21, rs_Y );
      bl1_zgemv( BLIS1_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_m1,
                 A02, rs_A, cs_A,
                 e0,  inc_e,
                 buff_1,
                 y21, rs_Y );

      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
      bl1_zgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22,  rs_A, cs_A,
                 u21p, inc_up,
                 buff_1,
                 y21,  rs_Y );

      // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
      bl1_zaxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  &minus_inv_tau11,
                  y21,  rs_Y,
                  a12p, inc_ap );

      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
      bl1_zgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22,  rs_A, cs_A,
                 a12p, inc_ap,
                 buff_0,
                 w21,  inc_w );

      // FLA_Gemvc( FLA_CONJ_TRANSPOSE,    FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
      // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
      bl1_zgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_CONJUGATE,
                 n_ahead,
                 n_behind,
                 buff_1,
                 Y20,  rs_Y, cs_Y,
                 a12p, inc_ap,
                 buff_0,
                 f0,   inc_f );
      bl1_zgemv( BLIS1_CONJ_NO_TRANSPOSE,
                 BLIS1_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_1,
                 A02,  rs_A, cs_A,
                 a12p, inc_ap,
                 buff_0,
                 g0,   inc_g );

      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
      bl1_zgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_m1,
                 A20, rs_A, cs_A,
                 f0,  inc_f,
                 buff_1,
                 w21, inc_w );
      bl1_zgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_m1,
                 Z20, rs_Z, cs_Z,
                 g0,  inc_g,
                 buff_1,
                 w21, inc_w );

      // FLA_Copy( A22_l, a22l );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
      // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
      bl1_zcopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  A22_l, rs_A,
                  a22l,  inc_al );
      bl1_zgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_m1,
                 A20,   rs_A, cs_A,
                 Y20_t, cs_Y,
                 buff_1,
                 a22l,  inc_al );
      bl1_zgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_m1,
                 Z20,   rs_Z, cs_Z,
                 A02_l, rs_A,
                 buff_1,
                 a22l,  inc_al );

      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
      bl1_zaxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  buff_1, 
                  a12t, cs_A,
                  y21,  rs_Y );

      // FLA_Househ2s_UT( FLA_RIGHT,
      //                  a12p_t,
      //                  a12p_b,
      //                  alpha12, psi11_minus_alpha12, sigma11 );
      FLA_Househ2s_UT_r_opz( n_ahead - 1,
                             a12p_t,
                             a12p_b, inc_ap,
                             &alpha12,
                             &psi11_minus_alpha12,
                             sigma11 );

      // FLA_Copy( a12p, v21 );
      // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
      // FLA_Conjugate( v21_b );
      bl1_zcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead,
                  a12p, inc_ap,
                  v21,  inc_v );
      bl1_zmult4( buff_m1, &alpha12, v21_t, v21_t );
      bl1_zinvscalv( BLIS1_NO_CONJUGATE,
                     n_ahead,
                     &psi11_minus_alpha12,
                     v21, inc_v );
      bl1_zconjv( n_ahead - 1,
                  v21_b, inc_v );

      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
      // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
      bl1_zcopyconj( &alpha12, &minus_conj_alpha12 );
      bl1_zneg1( &minus_conj_alpha12 );

      // FLA_Copy( g0, s01 );
      // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
      // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
      bl1_zcopyv( BLIS1_NO_CONJUGATE,
                  n_behind,
                  g0,  inc_g,
                  s01, rs_S );
      bl1_zaxpyv( BLIS1_CONJUGATE,
                  n_behind,
                  &minus_conj_alpha12,
                  A02_l, rs_A,
                  s01,   rs_S );
      bl1_zinvscalv( BLIS1_CONJUGATE,
                     n_behind,
                     &psi11_minus_alpha12,
                     s01, rs_S );

      // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
      // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
      *a12t_l = alpha12;
      bl1_zcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead - 1,
                  v21_b,  inc_v,
                  a12t_r, cs_A );
    }

    // FLA_Copy( u21p, u21 );
    bl1_zcopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                u21p, inc_up,
                u21,  inc_u );

    if ( n_ahead > 0 )
    {
      // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
      // FLA_Scal( FLA_MINUS_ONE, beta );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
      bl1_zdot( BLIS1_CONJUGATE,
                n_ahead,
                y21, rs_Y,
                v21, inc_v,
                &beta );
      bl1_zscals( &minus_inv_tau11, &beta );

      // FLA_Copy( w21, z21 );
      // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
      // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
      // FLA_Axpy( beta, u21, z21 );
      bl1_zcopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  w21, inc_w,
                  z21, rs_Z );
      bl1_zaxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_alpha12,
                  a22l, inc_al,
                  z21,  rs_Z );
      bl1_zinvscalv( BLIS1_CONJUGATE,
                     m_ahead,
                     &psi11_minus_alpha12,
                     z21, rs_Z );
      bl1_zaxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &beta,
                  u21, inc_u,
                  z21, rs_Z );

      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11,   y21 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
      bl1_zinvscalv( BLIS1_CONJUGATE,
                     n_ahead,
                     tau11,
                     y21, rs_Y );
      bl1_zinvscalv( BLIS1_NO_CONJUGATE,
                     m_ahead,
                     sigma11,
                     z21, rs_Z );
    }
    else // if ( n_ahead == 0 )
    {
      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
      bl1_zcopyv( BLIS1_CONJUGATE,
                  n_behind,
                  a10t, cs_A,
                  t01,  rs_T );
      bl1_zgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 A20, rs_A, cs_A,
                 u21, inc_u,
                 buff_1,
                 t01, rs_T );
    }

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &w );
  // FLA_Obj_free( &al );
  // FLA_Obj_free( &ap );
  // FLA_Obj_free( &u );
  // FLA_Obj_free( &up );
  // FLA_Obj_free( &v );
  // FLA_Obj_free( &d );
  // FLA_Obj_free( &e );
  // FLA_Obj_free( &f );
  // FLA_Obj_free( &g );
  FLA_free( buff_w );
  FLA_free( buff_al );
  FLA_free( buff_ap );
  FLA_free( buff_u );
  FLA_free( buff_up );
  FLA_free( buff_v );
  FLA_free( buff_d );
  FLA_free( buff_e );
  FLA_free( buff_f );
  FLA_free( buff_g );

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_step_opz_var5 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_Y,
int  rs_Y,
int  cs_Y,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zinvscalv(), bl1_zsetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_Househ2_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_opt_var5().

{
  dcomplex* buff_1  = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
  dcomplex* buff_0  = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );

  dcomplex  beta;
  dcomplex  last_elem;
  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_TS;

  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
  dcomplex* buff_u  = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  dcomplex* buff_v  = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  dcomplex* buff_d  = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  dcomplex* buff_e  = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
  dcomplex* buff_f  = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  dcomplex* buff_g  = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  int       inc_u   = 1;
  int       inc_v   = 1;
  int       inc_d   = 1;
  int       inc_e   = 1;
  int       inc_f   = 1;
  int       inc_g   = 1;

  // FLA_Set( FLA_ZERO, Y );
  // FLA_Set( FLA_ZERO, Z );
  bl1_zsetm( n_A,
             b_alg,
             buff_0,
             buff_Y, rs_Y, cs_Y );
  bl1_zsetm( m_A,
             b_alg,
             buff_0,
             buff_Z, rs_Z, cs_Z );

  for ( i = 0; i < b_alg; ++i )
  {
    dcomplex* a10t      = buff_A  + (0  )*cs_A + (i  )*rs_A;
    dcomplex* A20       = buff_A  + (0  )*cs_A + (i+1)*rs_A;
    dcomplex* a01       = buff_A  + (i  )*cs_A + (0  )*rs_A;
    dcomplex* alpha11   = buff_A  + (i  )*cs_A + (i  )*rs_A;
    dcomplex* a21       = buff_A  + (i  )*cs_A + (i+1)*rs_A;
    dcomplex* A02       = buff_A  + (i+1)*cs_A + (0  )*rs_A;
    dcomplex* a12t      = buff_A  + (i+1)*cs_A + (i  )*rs_A;
    dcomplex* A22       = buff_A  + (i+1)*cs_A + (i+1)*rs_A;

    dcomplex* y10t      = buff_Y  + (0  )*cs_Y + (i  )*rs_Y;
    dcomplex* Y20       = buff_Y  + (0  )*cs_Y + (i+1)*rs_Y;
    dcomplex* y21       = buff_Y  + (i  )*cs_Y + (i+1)*rs_Y;

    dcomplex* z10t      = buff_Z  + (0  )*cs_Z + (i  )*rs_Z;
    dcomplex* Z20       = buff_Z  + (0  )*cs_Z + (i+1)*rs_Z;
    dcomplex* z21       = buff_Z  + (i  )*cs_Z + (i+1)*rs_Z;

    dcomplex* t01       = buff_T  + (i  )*cs_T + (0  )*rs_T;
    dcomplex* tau11     = buff_T  + (i  )*cs_T + (i  )*rs_T;

    dcomplex* s01       = buff_S  + (i  )*cs_S + (0  )*rs_S;
    dcomplex* sigma11   = buff_S  + (i  )*cs_S + (i  )*rs_S;

    dcomplex* u21       = buff_u  + (i+1)*inc_u;

    dcomplex* v21       = buff_v  + (i+1)*inc_v;

    dcomplex* d0        = buff_d  + (0  )*inc_d;

    dcomplex* e0        = buff_e  + (0  )*inc_e;

    dcomplex* f0        = buff_f  + (0  )*inc_f;

    dcomplex* g0        = buff_g  + (0  )*inc_g;

    dcomplex* v21_t     = v21     + (0  )*inc_v;
    dcomplex* v21_b     = v21     + (1  )*inc_v;

    dcomplex* a01_b     = a01     + (0  )*cs_A + (i-1)*rs_A;

    dcomplex* a12t_l    = a12t    + (0  )*cs_A + (0  )*rs_A;
    dcomplex* a12t_r    = a12t    + (1  )*cs_A + (0  )*rs_A;

    dcomplex* ABL       = a10t;
    dcomplex* ZBL       = z10t;

    dcomplex* a2        = alpha11;

    int       m_ahead   = m_A - i - 1;
    int       n_ahead   = n_A - i - 1;
    int       m_behind  = i;
    int       n_behind  = i;

    /*------------------------------------------------------------*/

    if ( m_behind > 0 )
    {
      // FLA_Copy( a01_b, last_elem );
      // FLA_Set( FLA_ONE, a01_b );
      last_elem = *a01_b;
      *a01_b = *buff_1;
    }

    // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
    // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01,  FLA_ONE, a2 );
    bl1_zgemv( BLIS1_NO_TRANSPOSE,
               BLIS1_CONJUGATE,
               m_ahead + 1,
               n_behind,
               buff_m1,
               ABL,  rs_A, cs_A,
               y10t, cs_Y,
               buff_1,
               a2,   rs_A );
    bl1_zgemv( BLIS1_NO_TRANSPOSE,
               BLIS1_CONJUGATE,
               m_ahead + 1,
               n_behind,
               buff_m1,
               ZBL, rs_Z, cs_Z,
               a01, rs_A,
               buff_1,
               a2,  rs_A );

    // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
    // FLA_Gemv( FLA_CONJ_TRANSPOSE,    FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
    bl1_zgemv( BLIS1_CONJ_NO_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               n_ahead,
               n_behind,
               buff_m1,
               Y20,  rs_Y, cs_Y,
               a10t, cs_A,
               buff_1,
               a12t, cs_A );
    bl1_zgemv( BLIS1_CONJ_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_behind,
               n_ahead,
               buff_m1,
               A02,  rs_A, cs_A,
               z10t, cs_Z,
               buff_1,
               a12t, cs_A );

    if ( m_behind > 0 )
    {
      // FLA_Copy( last_elem, a01_b );
      *a01_b = last_elem;
    }

    // FLA_Househ2_UT( FLA_LEFT,
    //                 alpha11,
    //                 a21, tau11 );
    // FLA_Copy( a21, u21 );
    FLA_Househ2_UT_l_opz( m_ahead,
                          alpha11,
                          a21, rs_A,
                          tau11 );
    bl1_zcopyv( BLIS1_NO_CONJUGATE,
                m_ahead,
                a21, rs_A,
                u21, inc_u );

    if ( n_ahead > 0 )
    {
      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21, FLA_ONE, y21 );
      bl1_zcopyv( BLIS1_CONJUGATE,
                  n_ahead,
                  a12t, cs_A,
                  y21,  rs_Y );
      bl1_zgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22, rs_A, cs_A,
                 u21, inc_u,
                 buff_1,
                 y21, rs_Y );

      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ZERO, d0 );
      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21, FLA_ZERO, e0 );
      bl1_zgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 A20, rs_A, cs_A,
                 u21, inc_u,
                 buff_0,
                 d0,  inc_d );
      bl1_zgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 Z20, rs_Z, cs_Z,
                 u21, inc_u,
                 buff_0,
                 e0,  inc_e );

      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
      // FLA_Axpy( FLA_ONE, d0, t01 );
      bl1_zcopyv( BLIS1_CONJUGATE,
                  n_behind,
                  a10t, cs_A,
                  t01,  rs_T );
      bl1_zaxpyv( BLIS1_NO_CONJUGATE,
                  n_behind,
                  buff_1,
                  d0,  inc_d,
                  t01, rs_T );

      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
      // FLA_Gemv( FLA_TRANSPOSE,    FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
      bl1_zgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 n_ahead,
                 n_behind,
                 buff_m1,
                 Y20, rs_Y, cs_Y,
                 d0,  inc_d,
                 buff_1,
                 y21, rs_Y );
      bl1_zgemv( BLIS1_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_m1,
                 A02, rs_A, cs_A,
                 e0,  inc_e,
                 buff_1,
                 y21, rs_Y );

      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
      bl1_zinvscalv( BLIS1_NO_CONJUGATE,
                     n_ahead,
                     tau11,
                     y21, rs_Y );

      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
      bl1_zaxpyv( BLIS1_CONJUGATE,
                  n_ahead,
                  buff_m1,
                  y21,  rs_Y,
                  a12t, cs_A );

      // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
      FLA_Househ2_UT_r_opz( n_ahead - 1,
                            a12t_l,
                            a12t_r, cs_A,
                            sigma11 );

      // FLA_Set( FLA_ONE, v21_t );
      // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
      *v21_t = *buff_1;
      bl1_zcopyv( BLIS1_NO_CONJUGATE,
                  n_ahead - 1,
                  a12t_r, cs_A,
                  v21_b,  inc_v );

      // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
      // FLA_Scal( FLA_MINUS_ONE, beta );
      bl1_zdot( BLIS1_CONJUGATE,
                n_ahead,
                y21, rs_Y,
                v21, inc_v,
                &beta );
      bl1_zscals( buff_m1, &beta );

      // FLA_Copy( u21, z21 );
      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, v21, beta, z21 );
      bl1_zcopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  u21, inc_u,
                  z21, rs_Z );
      bl1_zgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22, rs_A, cs_A,
                 v21, inc_v,
                 &beta,
                 z21, rs_Z );

      // FLA_Gemvc( FLA_CONJ_TRANSPOSE,    FLA_ONE, Y20, v21, FLA_ZERO, f0 );
      // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, g0 );
      bl1_zgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 n_ahead,
                 m_behind,
                 buff_1,
                 Y20, rs_Y, cs_Y,
                 v21, inc_v,
                 buff_0,
                 f0,  inc_f );
      bl1_zgemv( BLIS1_CONJ_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_1,
                 A02, rs_A, cs_A,
                 v21, inc_v,
                 buff_0,
                 g0,  inc_g );

      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, z21 );
      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, z21 );
      bl1_zgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_m1,
                 A20, rs_A, cs_A,
                 f0,  inc_f,
                 buff_1,
                 z21, rs_Z );
      bl1_zgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_m1,
                 Z20, rs_Z, cs_Z,
                 g0,  inc_g,
                 buff_1,
                 z21, rs_Z );

      // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
      bl1_zinvscalv( BLIS1_NO_CONJUGATE,
                     m_ahead,
                     sigma11,
                     z21, rs_Z );

      // FLA_Copy( g0, s01 );
      bl1_zcopyv( BLIS1_NO_CONJUGATE,
                  n_behind,
                  g0,  inc_g,
                  s01, rs_S );
    }
    else // if ( n_ahead == 0 )
    {
      // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
      bl1_zcopyv( BLIS1_CONJUGATE,
                  n_behind,
                  a10t, cs_A,
                  t01,  rs_T );
      bl1_zgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 A20, rs_A, cs_A,
                 u21, inc_u,
                 buff_1,
                 t01, rs_T );
    }

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &u );
  // FLA_Obj_free( &v );
  // FLA_Obj_free( &d );
  // FLA_Obj_free( &e );
  // FLA_Obj_free( &f );
  // FLA_Obj_free( &g );
  FLA_free( buff_u );
  FLA_free( buff_v );
  FLA_free( buff_d );
  FLA_free( buff_e );
  FLA_free( buff_f );
  FLA_free( buff_g );

  return FLA_SUCCESS;
}

References FLA_Apply_H2_UT(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copyt(), FLA_Gemv(), FLA_Househ2_UT(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_unb_var1().

{
  FLA_Obj  ATL,   ATR,      A00,  a01,     A02, 
           ABL,   ABR,      a10t, alpha11, a12t,
                            A20,  a21,     A22;
  FLA_Obj  TTL,   TTR,      T00,  t01,   T02, 
           TBL,   TBR,      t10t, tau11, t12t,
                            T20,  t21,   T22;
  FLA_Obj  STL,   STR,      S00,  s01,     S02, 
           SBL,   SBR,      s10t, sigma11, s12t,
                            S20,  s21,     S22;
  FLA_Obj  vT,              v01,
           vB,              nu11,
                            v21;
  FLA_Obj  v;

  FLA_Obj  a12t_l, a12t_r;
  FLA_Obj  A22_l, A22_r;
  FLA_Obj  v21_t,
           v21_b;

  FLA_Datatype datatype_A;
  dim_t        n_A;
  dim_t        b_alg;


  b_alg      = FLA_Obj_length( T );

  datatype_A = FLA_Obj_datatype( A );
  n_A        = FLA_Obj_width( A );

  FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );

  FLA_Part_2x2( A,    &ATL, &ATR,
                      &ABL, &ABR,     0, 0, FLA_TL );
  FLA_Part_2x2( T,    &TTL, &TTR,
                      &TBL, &TBR,     0, 0, FLA_TL );
  FLA_Part_2x2( S,    &STL, &STR,
                      &SBL, &SBR,     0, 0, FLA_TL );
  FLA_Part_2x1( v,    &vT, 
                      &vB,            0, FLA_TOP );

  while ( FLA_Obj_length( ATL ) < b_alg )
  {
    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00,  /**/ &a01,     &A02,
                        /* ************* */   /* ************************** */
                                                &a10t, /**/ &alpha11, &a12t,
                           ABL, /**/ ABR,       &A20,  /**/ &a21,     &A22,
                           1, 1, FLA_BR );
    FLA_Repart_2x2_to_3x3( TTL, /**/ TTR,       &T00,  /**/ &t01,   &T02,
                        /* ************* */   /* ************************** */
                                                &t10t, /**/ &tau11, &t12t,
                           TBL, /**/ TBR,       &T20,  /**/ &t21,   &T22,
                           1, 1, FLA_BR );
    FLA_Repart_2x2_to_3x3( STL, /**/ STR,       &S00,  /**/ &s01,     &S02,
                        /* ************* */   /* ************************** */
                                                &s10t, /**/ &sigma11, &s12t,
                           SBL, /**/ SBR,       &S20,  /**/ &s21,     &S22,
                           1, 1, FLA_BR );
    FLA_Repart_2x1_to_3x1( vT,                &v01, 
                        /* ** */            /* ***** */
                                              &nu11, 
                           vB,                &v21,        1, FLA_BOTTOM );

    /*------------------------------------------------------------*/

    // [ alpha11_new, u21, tau11 ] = House2( alpha11, a21 );
    FLA_Househ2_UT( FLA_LEFT,
                    alpha11,
                    a21, tau11 );

    if ( FLA_Obj_width( A22 ) > 0 )
    {
      FLA_Part_1x2( a12t,    &a12t_l, &a12t_r,      1, FLA_LEFT );
      FLA_Part_1x2( A22,     &A22_l,  &A22_r,       1, FLA_LEFT );
      FLA_Part_2x1( v21,     &v21_t, 
                             &v21_b,            1, FLA_TOP );

      // Apply H from the left to a12t and A22.
      FLA_Apply_H2_UT( FLA_LEFT, tau11, a21, a12t, A22 );

      // [ alpha12t, u12t_r, tau11 ] = House2( a12t_l, a12t_r );
      FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );

      // v21_t = 1;
      // v21_b = a12t_r;
      FLA_Set( FLA_ONE, v21_t );
      FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );

      // Apply H from the right to A22.
      FLA_Apply_H2_UT( FLA_RIGHT, sigma11, v21_b, A22_l, A22_r );

      // s01 = conj(V02) * v21;
      FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
    }

    // t01 = a10t' + U20' * u21;
    FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
    FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00,  a01,     /**/ A02,
                                                     a10t, alpha11, /**/ a12t,
                            /* ************** */  /* ************************ */
                              &ABL, /**/ &ABR,       A20,  a21,     /**/ A22,
                              FLA_TL );
    FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR,       T00,  t01,   /**/ T02,
                                                     t10t, tau11, /**/ t12t,
                            /* ************** */  /* ************************ */
                              &TBL, /**/ &TBR,       T20,  t21,   /**/ T22,
                              FLA_TL );
    FLA_Cont_with_3x3_to_2x2( &STL, /**/ &STR,       S00,  s01,     /**/ S02,
                                                     s10t, sigma11, /**/ s12t,
                            /* ************** */  /* ************************ */
                              &SBL, /**/ &SBR,       S20,  s21,     /**/ S22,
                              FLA_TL );
    FLA_Cont_with_3x1_to_2x1( &vT,                v01, 
                                                  nu11, 
                            /* ** */           /* ***** */
                              &vB,                v21,     FLA_TOP );
  }

  FLA_Obj_free( &v );

  return FLA_SUCCESS;
}

References FLA_Axpyt(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Dotc(), FLA_Gemv(), FLA_Gemvc(), FLA_Gerc(), FLA_Househ2_UT(), FLA_Inv_scalc(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scal(), FLA_Set(), and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_unb_var2().

{
  FLA_Obj  ATL,   ATR,      A00,  a01,     A02, 
           ABL,   ABR,      a10t, alpha11, a12t,
                            A20,  a21,     A22;
  FLA_Obj  TTL,   TTR,      T00,  t01,   T02, 
           TBL,   TBR,      t10t, tau11, t12t,
                            T20,  t21,   T22;
  FLA_Obj  STL,   STR,      S00,  s01,     S02, 
           SBL,   SBR,      s10t, sigma11, s12t,
                            S20,  s21,     S22;
  FLA_Obj  yT,              y01,
           yB,              psi11,
                            y21;
  FLA_Obj  zT,              z01,
           zB,              zeta11,
                            z21;
  FLA_Obj  vT,              v01,
           vB,              nu11,
                            v21;
  FLA_Obj  v, y, z;

  FLA_Obj  beta;

  FLA_Obj  a12t_l, a12t_r;
  FLA_Obj  v21_t,
           v21_b;

  FLA_Datatype datatype_A;
  dim_t        m_A, n_A;
  dim_t        b_alg;


  b_alg      = FLA_Obj_length( T );

  datatype_A = FLA_Obj_datatype( A );
  m_A        = FLA_Obj_length( A );
  n_A        = FLA_Obj_width( A );

  FLA_Obj_create( datatype_A, 1,   1, 0, 0, &beta );
  FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );

  FLA_Part_2x2( A,    &ATL, &ATR,
                      &ABL, &ABR,     0, 0, FLA_TL );
  FLA_Part_2x2( T,    &TTL, &TTR,
                      &TBL, &TBR,     0, 0, FLA_TL );
  FLA_Part_2x2( S,    &STL, &STR,
                      &SBL, &SBR,     0, 0, FLA_TL );
  FLA_Part_2x1( v,    &vT, 
                      &vB,            0, FLA_TOP );
  FLA_Part_2x1( y,    &yT, 
                      &yB,            0, FLA_TOP );
  FLA_Part_2x1( z,    &zT, 
                      &zB,            0, FLA_TOP );

  while ( FLA_Obj_length( ATL ) < b_alg )
  {
    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00,  /**/ &a01,     &A02,
                        /* ************* */   /* ************************** */
                                                &a10t, /**/ &alpha11, &a12t,
                           ABL, /**/ ABR,       &A20,  /**/ &a21,     &A22,
                           1, 1, FLA_BR );
    FLA_Repart_2x2_to_3x3( TTL, /**/ TTR,       &T00,  /**/ &t01,   &T02,
                        /* ************* */   /* ************************** */
                                                &t10t, /**/ &tau11, &t12t,
                           TBL, /**/ TBR,       &T20,  /**/ &t21,   &T22,
                           1, 1, FLA_BR );
    FLA_Repart_2x2_to_3x3( STL, /**/ STR,       &S00,  /**/ &s01,     &S02,
                        /* ************* */   /* ************************** */
                                                &s10t, /**/ &sigma11, &s12t,
                           SBL, /**/ SBR,       &S20,  /**/ &s21,     &S22,
                           1, 1, FLA_BR );
    FLA_Repart_2x1_to_3x1( vT,                &v01, 
                        /* ** */            /* ***** */
                                              &nu11, 
                           vB,                &v21,        1, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( yT,                &y01, 
                        /* ** */            /* ***** */
                                              &psi11, 
                           yB,                &y21,        1, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( zT,                &z01, 
                        /* ** */            /* ***** */
                                              &zeta11, 
                           zB,                &z21,        1, FLA_BOTTOM );

    /*------------------------------------------------------------*/

    // [ alpha11_new, u21, tau11 ] = House2( alpha11, a21 );
    FLA_Househ2_UT( FLA_LEFT,
                    alpha11,
                    a21, tau11 );

    if ( FLA_Obj_width( A22 ) > 0 )
    {
      // y21' = a12t + u21' * A22;
      // y21  = conj(a12t) + A22' * u21;
      FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
      FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );

      // y21 = y21 / tau11;
      FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );

      // a12t = a12t - conj(y21)^T;
      FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );

      FLA_Part_1x2( a12t,    &a12t_l, &a12t_r,      1, FLA_LEFT );
      FLA_Part_2x1( v21,     &v21_t, 
                             &v21_b,            1, FLA_TOP );

      // [ a12t_l, v12t_b, sigma11 ] = House2( a12t_l, a12t_r );
      FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );

      // v21_t = 1;
      // v21_b = a12t_r^T;
      FLA_Set( FLA_ONE, v21_t );
      FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );

      // beta = - y21' * v21;
      FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
      FLA_Scal( FLA_MINUS_ONE, beta );

      // z21 = ( A22 - u21 * y21' ) * v21 / sigma11;
      //     = ( A22 * v21 - u21 * y21' * v21 ) / sigma11;
      //     = ( A22 * v21 + beta * u21 ) / sigma11;
      FLA_Copy( a21, z21 );
      FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
      FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );

      // A22 = A22 - u21 * y21' - z21 * v21';
      FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y21, A22 );
      FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );

      // s01 = conj(V02) * v21;
      FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
    }

    // t01 = a10t' + U20' * u21;
    FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
    FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00,  a01,     /**/ A02,
                                                     a10t, alpha11, /**/ a12t,
                            /* ************** */  /* ************************ */
                              &ABL, /**/ &ABR,       A20,  a21,     /**/ A22,
                              FLA_TL );
    FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR,       T00,  t01,   /**/ T02,
                                                     t10t, tau11, /**/ t12t,
                            /* ************** */  /* ************************ */
                              &TBL, /**/ &TBR,       T20,  t21,   /**/ T22,
                              FLA_TL );
    FLA_Cont_with_3x3_to_2x2( &STL, /**/ &STR,       S00,  s01,     /**/ S02,
                                                     s10t, sigma11, /**/ s12t,
                            /* ************** */  /* ************************ */
                              &SBL, /**/ &SBR,       S20,  s21,     /**/ S22,
                              FLA_TL );
    FLA_Cont_with_3x1_to_2x1( &vT,                v01, 
                                                  nu11, 
                            /* ** */           /* ***** */
                              &vB,                v21,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &yT,                y01, 
                                                  psi11, 
                            /* ** */           /* ***** */
                              &yB,                y21,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &zT,                z01, 
                                                  zeta11, 
                            /* ** */           /* ***** */
                              &zB,                z21,     FLA_TOP );
  }

  FLA_Obj_free( &beta );
  FLA_Obj_free( &v );
  FLA_Obj_free( &y );
  FLA_Obj_free( &z );

  return FLA_SUCCESS;
}

References FLA_Axpy(), FLA_Axpyt(), FLA_Conjugate(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Dotc(), FLA_Gemv(), FLA_Gemvc(), FLA_Gerc(), FLA_Househ2_UT(), FLA_Househ2s_UT(), FLA_Inv_scalc(), FLA_MINUS_ONE, FLA_Mult_add(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scal(), and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_unb_var3().

{
  FLA_Obj  ATL,   ATR,      A00,  a01,     A02, 
           ABL,   ABR,      a10t, alpha11, a12t,
                            A20,  a21,     A22;
  FLA_Obj  TTL,   TTR,      T00,  t01,   T02, 
           TBL,   TBR,      t10t, tau11, t12t,
                            T20,  t21,   T22;
  FLA_Obj  STL,   STR,      S00,  s01,     S02, 
           SBL,   SBR,      s10t, sigma11, s12t,
                            S20,  s21,     S22;
  FLA_Obj  wT,              w01,
           wB,              omega11,
                            w21;
  FLA_Obj  apT,             a01p,
           apB,             alpha11p,
                            a12p;
  FLA_Obj  uT,              u01,
           uB,              upsilon11,
                            u21;
  FLA_Obj  uTp,             u01p,
           uBp,             upsilon11p,
                            u21p;
  FLA_Obj  vT,              v01,
           vB,              nu11,
                            v21;
  FLA_Obj  yT,              y01,
           yB,              psi11,
                            y21;
  FLA_Obj  zT,              z01,
           zB,              zeta11,
                            z21;
  FLA_Obj  w, ap, u, up, v, y, z;

  FLA_Obj  minus_inv_tau11;
  FLA_Obj  beta;
  FLA_Obj  alpha12;
  FLA_Obj  minus_conj_alpha12;
  FLA_Obj  psi11_minus_alpha12;
  FLA_Obj  minus_upsilon11;
  FLA_Obj  minus_conj_nu11;
  FLA_Obj  minus_conj_psi11;
  FLA_Obj  minus_zeta11;

  FLA_Obj  a12t_l, a12t_r;
  FLA_Obj  a12p_t,
           a12p_b;
  FLA_Obj  A22_l, A22_r;
  FLA_Obj  v21_t,
           v21_b;

  FLA_Datatype datatype_A;
  dim_t        m_A, n_A;
  dim_t        b_alg;


  b_alg      = FLA_Obj_length( T );

  datatype_A = FLA_Obj_datatype( A );
  m_A        = FLA_Obj_length( A );
  n_A        = FLA_Obj_width( A );

  FLA_Obj_create( datatype_A, 1,   1, 0, 0, &minus_inv_tau11 );
  FLA_Obj_create( datatype_A, 1,   1, 0, 0, &beta );
  FLA_Obj_create( datatype_A, 1,   1, 0, 0, &alpha12 );
  FLA_Obj_create( datatype_A, 1,   1, 0, 0, &minus_conj_alpha12 );
  FLA_Obj_create( datatype_A, 1,   1, 0, 0, &psi11_minus_alpha12 );
  FLA_Obj_create( datatype_A, 1,   1, 0, 0, &minus_upsilon11 );
  FLA_Obj_create( datatype_A, 1,   1, 0, 0, &minus_conj_nu11 );
  FLA_Obj_create( datatype_A, 1,   1, 0, 0, &minus_conj_psi11 );
  FLA_Obj_create( datatype_A, 1,   1, 0, 0, &minus_zeta11 );
  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
  FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
  FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );

  FLA_Part_2x2( A,    &ATL, &ATR,
                      &ABL, &ABR,     0, 0, FLA_TL );
  FLA_Part_2x2( T,    &TTL, &TTR,
                      &TBL, &TBR,     0, 0, FLA_TL );
  FLA_Part_2x2( S,    &STL, &STR,
                      &SBL, &SBR,     0, 0, FLA_TL );
  FLA_Part_2x1( w,    &wT, 
                      &wB,            0, FLA_TOP );
  FLA_Part_2x1( ap,   &apT, 
                      &apB,           0, FLA_TOP );
  FLA_Part_2x1( u,    &uT, 
                      &uB,            0, FLA_TOP );
  FLA_Part_2x1( up,   &uTp, 
                      &uBp,           0, FLA_TOP );
  FLA_Part_2x1( v,    &vT, 
                      &vB,            0, FLA_TOP );
  FLA_Part_2x1( y,    &yT, 
                      &yB,            0, FLA_TOP );
  FLA_Part_2x1( z,    &zT, 
                      &zB,            0, FLA_TOP );

  while ( FLA_Obj_length( ATL ) < b_alg )
  {
    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00,  /**/ &a01,     &A02,
                        /* ************* */   /* ************************** */
                                                &a10t, /**/ &alpha11, &a12t,
                           ABL, /**/ ABR,       &A20,  /**/ &a21,     &A22,
                           1, 1, FLA_BR );
    FLA_Repart_2x2_to_3x3( TTL, /**/ TTR,       &T00,  /**/ &t01,   &T02,
                        /* ************* */   /* ************************** */
                                                &t10t, /**/ &tau11, &t12t,
                           TBL, /**/ TBR,       &T20,  /**/ &t21,   &T22,
                           1, 1, FLA_BR );
    FLA_Repart_2x2_to_3x3( STL, /**/ STR,       &S00,  /**/ &s01,     &S02,
                        /* ************* */   /* ************************** */
                                                &s10t, /**/ &sigma11, &s12t,
                           SBL, /**/ SBR,       &S20,  /**/ &s21,     &S22,
                           1, 1, FLA_BR );
    FLA_Repart_2x1_to_3x1( wT,                &w01, 
                        /* ** */            /* ***** */
                                              &omega11, 
                           wB,                &w21,        1, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( apT,               &a01p, 
                        /* ** */            /* ***** */
                                              &alpha11p, 
                           apB,               &a12p,        1, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( uT,                &u01, 
                        /* ** */            /* ***** */
                                              &upsilon11, 
                           uB,                &u21,        1, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( uTp,               &u01p, 
                        /* ** */            /* ***** */
                                              &upsilon11p, 
                           uBp,               &u21p,        1, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( vT,                &v01, 
                        /* ** */            /* ***** */
                                              &nu11, 
                           vB,                &v21,        1, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( yT,                &y01, 
                        /* ** */            /* ***** */
                                              &psi11, 
                           yB,                &y21,        1, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( zT,                &z01, 
                        /* ** */            /* ***** */
                                              &zeta11, 
                           zB,                &z21,        1, FLA_BOTTOM );

    /*------------------------------------------------------------*/

    if ( FLA_Obj_length( ATL ) > 0 )
    {
      FLA_Copy( upsilon11, minus_upsilon11 );
      FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );

      FLA_Copy( zeta11, minus_zeta11 );
      FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );

      FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
      FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );

      FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
      FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );

      // alpha11 = alpha11 - upsilon11 * conj(psi11) - zeta11 * conj(nu1);
      FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
      FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11,  zeta11,    alpha11 );

      // a21 = a21 - u21 * conj(psi11) - z21 * conj(nu11);
      FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
      FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11,  z21, a21 );

      // a12t = a12t - upsilon11 * y21' - zeta11 * v21';
      FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
      FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11,    v21, a12t );
    }

    // [ alpha11, u21p, tau11 ] = House2( alpha11, a21 );
    FLA_Househ2_UT( FLA_LEFT,
                    alpha11,
                    a21, tau11 );
    FLA_Copy( a21, u21p );

    if ( FLA_Obj_width( A22 ) > 0 )
    {
      // minus_inv_tau11 = - 1 / tau11;
      FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
      FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );

      // a12p = ( tau11 - 1 ) * a12t^T / tau11;
      //      = a12t^T - ( 1 / tau11 ) * a12t^T;
      FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
      FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
    }

    if ( FLA_Obj_length( ATL ) > 0 )
    {
      // A22 = A22 - u21 * y21' - z21 * v21';
      FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
      FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
    }

    if ( FLA_Obj_width( A22 ) > 0 )
    {
      // y21 = A22' * u21p;
      FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );

      // a12p = a12p - conj(y21) / tau11;
      FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );

      // w21 = A22 * conj(a12p);
      FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );

      // y21 = y21 + conj(a12t)^T;
      FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );

      FLA_Part_1x2( a12t,    &a12t_l, &a12t_r,      1, FLA_LEFT );
      FLA_Part_2x1( v21,     &v21_t, 
                             &v21_b,            1, FLA_TOP );
      FLA_Part_2x1( a12p,    &a12p_t, 
                             &a12p_b,           1, FLA_TOP );

      // [ alpha12, psi11_minus_alpha12, sigma11 ] = House2s( a12p_t, a12p_b );
      FLA_Househ2s_UT( FLA_RIGHT,
                       a12p_t,
                       a12p_b,
                       alpha12, psi11_minus_alpha12, sigma11 );

      // v21 = conj( ( a12p - alpha12 * e0 ) / ( psi11 - alpha12 ) );
      FLA_Copy( a12p, v21 );
      FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
      FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
      FLA_Conjugate( v21_b );

      // a12t_l = alpha12;
      // a12t_r = v21_b^T;
      FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
      FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
    }

    // u21 = u21p;
    FLA_Copy( u21p, u21 );

    if ( FLA_Obj_width( A22 ) > 0 )
    {
      // beta = - y21' * v21 / tau11;
      FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
      FLA_Scal( FLA_MINUS_ONE, beta );
      FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );

      FLA_Part_1x2( A22,    &A22_l, &A22_r,      1, FLA_LEFT );

      // minus_conj_alpha12 = - conj(alpha12);
      FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
      FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );

      // z21 = ( w21 - conj(alpha12) * A22 * e0 ) / conj(psi11 - alpha12) + beta * u21;
      FLA_Copy( w21, z21 );
      FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
      FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
      FLA_Axpy( beta, u21, z21 );

      // y21 = y21 / tau11;
      // z21 = z21 / sigma11;
      FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11,   y21 );
      FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );

      // s01 = conj(V02) * v21;
      FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
    }

    // t01 = a10t' + U20' * u21;
    FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
    FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );

    // Update A22 if this is the last iteration; this is needed when we're
    // being called from the blocked routine so A22 is left in a valid state.
    if ( FLA_Obj_length( ATL ) + 1 == b_alg &&
         FLA_Obj_width( A22 ) > 0 )
    {
      // A22 = A22 - u21 * y21' - z21 * v21';
      FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
      FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
    }

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00,  a01,     /**/ A02,
                                                     a10t, alpha11, /**/ a12t,
                            /* ************** */  /* ************************ */
                              &ABL, /**/ &ABR,       A20,  a21,     /**/ A22,
                              FLA_TL );
    FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR,       T00,  t01,   /**/ T02,
                                                     t10t, tau11, /**/ t12t,
                            /* ************** */  /* ************************ */
                              &TBL, /**/ &TBR,       T20,  t21,   /**/ T22,
                              FLA_TL );
    FLA_Cont_with_3x3_to_2x2( &STL, /**/ &STR,       S00,  s01,     /**/ S02,
                                                     s10t, sigma11, /**/ s12t,
                            /* ************** */  /* ************************ */
                              &SBL, /**/ &SBR,       S20,  s21,     /**/ S22,
                              FLA_TL );
    FLA_Cont_with_3x1_to_2x1( &wT,                w01, 
                                                  omega11, 
                            /* ** */           /* ***** */
                              &wB,                w21,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &apT,               a01p, 
                                                  alpha11p, 
                            /* ** */           /* ***** */
                              &apB,               a12p,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &uT,                u01, 
                                                  upsilon11, 
                            /* ** */           /* ***** */
                              &uB,                u21,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &uTp,               u01p, 
                                                  upsilon11p, 
                            /* ** */           /* ***** */
                              &uBp,               u21p,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &vT,                v01, 
                                                  nu11, 
                            /* ** */           /* ***** */
                              &vB,                v21,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &yT,                y01, 
                                                  psi11, 
                            /* ** */           /* ***** */
                              &yB,                y21,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &zT,                z01, 
                                                  zeta11, 
                            /* ** */           /* ***** */
                              &zB,                z21,     FLA_TOP );
  }

  FLA_Obj_free( &minus_inv_tau11 );
  FLA_Obj_free( &beta );
  FLA_Obj_free( &alpha12 );
  FLA_Obj_free( &minus_conj_alpha12 );
  FLA_Obj_free( &psi11_minus_alpha12 );
  FLA_Obj_free( &minus_upsilon11 );
  FLA_Obj_free( &minus_conj_nu11 );
  FLA_Obj_free( &minus_conj_psi11 );
  FLA_Obj_free( &minus_zeta11 );
  FLA_Obj_free( &w );
  FLA_Obj_free( &ap );
  FLA_Obj_free( &u );
  FLA_Obj_free( &up );
  FLA_Obj_free( &v );
  FLA_Obj_free( &y );
  FLA_Obj_free( &z );

  return FLA_SUCCESS;
}

References FLA_Axpy(), FLA_Axpyt(), FLA_Conjugate(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Dotc(), FLA_Gemv(), FLA_Gemvc(), FLA_Househ2_UT(), FLA_Househ2s_UT(), FLA_Inv_scalc(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Mult_add(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scal(), FLA_Set(), and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_unb_var4().

{
  FLA_Obj  ATL,   ATR,      A00,  a01,     A02, 
           ABL,   ABR,      a10t, alpha11, a12t,
                            A20,  a21,     A22;
  FLA_Obj  YTL,   YTR,      Y00,  y01,   Y02, 
           YBL,   YBR,      y10t, psi11, y12t,
                            Y20,  y21,   Y22;
  FLA_Obj  ZTL,   ZTR,      Z00,  z01,    Z02, 
           ZBL,   ZBR,      z10t, zeta11, z12t,
                            Z20,  z21,    Z22;
  FLA_Obj  TTL,   TTR,      T00,  t01,   T02, 
           TBL,   TBR,      t10t, tau11, t12t,
                            T20,  t21,   T22;
  FLA_Obj  STL,   STR,      S00,  s01,     S02, 
           SBL,   SBR,      s10t, sigma11, s12t,
                            S20,  s21,     S22;
  FLA_Obj  wT,              w01,
           wB,              omega11,
                            w21;
  FLA_Obj  alT,             a01l,
           alB,             alpha11l,
                            a22l;
  FLA_Obj  apT,             a01p,
           apB,             alpha11p,
                            a12p;
  FLA_Obj  uT,              u01,
           uB,              upsilon11,
                            u21;
  FLA_Obj  uTp,             u01p,
           uBp,             upsilon11p,
                            u21p;
  FLA_Obj  vT,              v01,
           vB,              nu11,
                            v21;
  FLA_Obj  dT,              d0,
           dB,              delta1,
                            d2;
  FLA_Obj  eT,              e0,
           eB,              epsilon1,
                            e2;
  FLA_Obj  fT,              f0,
           fB,              phi1,
                            f2;
  FLA_Obj  gT,              g0,
           gB,              ghi1,
                            g2;
  FLA_Obj  w, al, ap, u, up, v;
  FLA_Obj  d, e, f, g;

  FLA_Obj  minus_inv_tau11;
  FLA_Obj  last_elem;
  FLA_Obj  beta;
  FLA_Obj  alpha12;
  FLA_Obj  minus_alpha12;
  FLA_Obj  minus_conj_alpha12;
  FLA_Obj  psi11_minus_alpha12;
  FLA_Obj  minus_upsilon11;
  FLA_Obj  minus_conj_nu11;
  FLA_Obj  minus_conj_psi11;
  FLA_Obj  minus_zeta11;

  FLA_Obj  a01_t,
           a01_b;
  FLA_Obj  A02_l, A02_r;
  FLA_Obj  a12t_l, a12t_r;
  FLA_Obj  a12p_t,
           a12p_b;
  FLA_Obj  A22_l, A22_r;
  FLA_Obj  v21_t,
           v21_b;
  FLA_Obj  Y20_t,
           Y20_b;
  FLA_Obj  a2;

  FLA_Datatype datatype_A;
  dim_t        m_A, n_A;
  dim_t        b_alg;


  b_alg      = FLA_Obj_length( T );

  datatype_A = FLA_Obj_datatype( A );
  m_A        = FLA_Obj_length( A );
  n_A        = FLA_Obj_width( A );

  FLA_Obj_create( datatype_A, 1,   1, 0, 0, &minus_inv_tau11 );
  FLA_Obj_create( datatype_A, 1,   1, 0, 0, &last_elem );
  FLA_Obj_create( datatype_A, 1,   1, 0, 0, &beta );
  FLA_Obj_create( datatype_A, 1,   1, 0, 0, &alpha12 );
  FLA_Obj_create( datatype_A, 1,   1, 0, 0, &minus_alpha12 );
  FLA_Obj_create( datatype_A, 1,   1, 0, 0, &minus_conj_alpha12 );
  FLA_Obj_create( datatype_A, 1,   1, 0, 0, &psi11_minus_alpha12 );
  FLA_Obj_create( datatype_A, 1,   1, 0, 0, &minus_upsilon11 );
  FLA_Obj_create( datatype_A, 1,   1, 0, 0, &minus_conj_nu11 );
  FLA_Obj_create( datatype_A, 1,   1, 0, 0, &minus_conj_psi11 );
  FLA_Obj_create( datatype_A, 1,   1, 0, 0, &minus_zeta11 );
  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
  FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
  FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
  FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );

  FLA_Set( FLA_ZERO, Y );
  FLA_Set( FLA_ZERO, Z );

  FLA_Part_2x2( A,    &ATL, &ATR,
                      &ABL, &ABR,     0, 0, FLA_TL );
  FLA_Part_2x2( Y,    &YTL, &YTR,
                      &YBL, &YBR,     0, 0, FLA_TL );
  FLA_Part_2x2( Z,    &ZTL, &ZTR,
                      &ZBL, &ZBR,     0, 0, FLA_TL );
  FLA_Part_2x2( T,    &TTL, &TTR,
                      &TBL, &TBR,     0, 0, FLA_TL );
  FLA_Part_2x2( S,    &STL, &STR,
                      &SBL, &SBR,     0, 0, FLA_TL );
  FLA_Part_2x1( w,    &wT, 
                      &wB,            0, FLA_TOP );
  FLA_Part_2x1( al,   &alT, 
                      &alB,           0, FLA_TOP );
  FLA_Part_2x1( ap,   &apT, 
                      &apB,           0, FLA_TOP );
  FLA_Part_2x1( u,    &uT, 
                      &uB,            0, FLA_TOP );
  FLA_Part_2x1( up,   &uTp, 
                      &uBp,           0, FLA_TOP );
  FLA_Part_2x1( v,    &vT, 
                      &vB,            0, FLA_TOP );
  FLA_Part_2x1( d,    &dT,
                      &dB,            0, FLA_TOP );
  FLA_Part_2x1( e,    &eT, 
                      &eB,            0, FLA_TOP );
  FLA_Part_2x1( f,    &fT,
                      &fB,            0, FLA_TOP );
  FLA_Part_2x1( g,    &gT,
                      &gB,            0, FLA_TOP );

  while ( FLA_Obj_length( ATL ) < b_alg )
  {
    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00,  /**/ &a01,     &A02,
                        /* ************* */   /* ************************** */
                                                &a10t, /**/ &alpha11, &a12t,
                           ABL, /**/ ABR,       &A20,  /**/ &a21,     &A22,
                           1, 1, FLA_BR );
    FLA_Repart_2x2_to_3x3( YTL, /**/ YTR,       &Y00,  /**/ &y01,   &Y02,
                        /* ************* */   /* ************************ */
                                                &y10t, /**/ &psi11, &y12t,
                           YBL, /**/ YBR,       &Y20,  /**/ &y21,   &Y22,
                           1, 1, FLA_BR );
    FLA_Repart_2x2_to_3x3( ZTL, /**/ ZTR,       &Z00,  /**/ &z01,    &Z02,
                        /* ************* */   /* ************************* */
                                                &z10t, /**/ &zeta11, &z12t,
                           ZBL, /**/ ZBR,       &Z20,  /**/ &z21,    &Z22,
                           1, 1, FLA_BR );
    FLA_Repart_2x2_to_3x3( TTL, /**/ TTR,       &T00,  /**/ &t01,   &T02,
                        /* ************* */   /* ************************** */
                                                &t10t, /**/ &tau11, &t12t,
                           TBL, /**/ TBR,       &T20,  /**/ &t21,   &T22,
                           1, 1, FLA_BR );
    FLA_Repart_2x2_to_3x3( STL, /**/ STR,       &S00,  /**/ &s01,     &S02,
                        /* ************* */   /* ************************** */
                                                &s10t, /**/ &sigma11, &s12t,
                           SBL, /**/ SBR,       &S20,  /**/ &s21,     &S22,
                           1, 1, FLA_BR );
    FLA_Repart_2x1_to_3x1( wT,                &w01, 
                        /* ** */            /* ***** */
                                              &omega11, 
                           wB,                &w21,        1, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( alT,               &a01l, 
                        /* ** */            /* ***** */
                                              &alpha11l, 
                           alB,               &a22l,        1, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( apT,               &a01p, 
                        /* ** */            /* ***** */
                                              &alpha11p, 
                           apB,               &a12p,        1, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( uT,                &u01, 
                        /* ** */            /* ***** */
                                              &upsilon11, 
                           uB,                &u21,        1, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( uTp,               &u01p, 
                        /* ** */            /* ***** */
                                              &upsilon11p, 
                           uBp,               &u21p,        1, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( vT,                &v01, 
                        /* ** */            /* ***** */
                                              &nu11, 
                           vB,                &v21,        1, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( dT,                &d0,
                        /* ** */            /* ****** */
                                              &delta1,
                           dB,                &d2,        1, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( eT,                &e0,
                        /* ** */            /* ******** */
                                              &epsilon1,
                           eB,                &e2,        1, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( fT,                &f0,
                        /* ** */            /* **** */
                                              &phi1,
                           fB,                &f2,        1, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( gT,                &g0,
                        /* ** */            /* **** */
                                              &ghi1,
                           gB,                &g2,        1, FLA_BOTTOM );

    /*------------------------------------------------------------*/

    // Save last element of a01 and set it to one so we can use a01 as
    // v10t^T in subsequent computations. We will restore a01_b later on.
    // Also note: V20^T is stored in A02.
    if ( FLA_Obj_length( ATL ) > 0 )
    {
      FLA_Part_2x1( a01,    &a01_t,
                            &a01_b,            1, FLA_BOTTOM );
      FLA_Copy( a01_b, last_elem );
      FLA_Set( FLA_ONE, a01_b );
    }
    
    FLA_Merge_2x1( alpha11,
                   a21,      &a2 );

    // alpha11 = alpha11 - u10t * y10t' - z10t * v10t';
    // a21     = a21     - U20  * y10t' - Z20  * v10t';
    FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
    FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01,  FLA_ONE, a2 );

    // a12t = a12t - u10t * Y20' - z10t * V20';
    FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
    FLA_Gemv( FLA_CONJ_TRANSPOSE,    FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
    
    // Restore last element of a01.
    if ( FLA_Obj_length( ATL ) > 0 )
    {
      FLA_Copy( last_elem, a01_b );
    }

    // [ alpha11, u21p, tau11 ] = House2( alpha11, a21 );
    FLA_Househ2_UT( FLA_LEFT,
                    alpha11,
                    a21, tau11 );
    FLA_Copy( a21, u21p );

    if ( FLA_Obj_width( A22 ) > 0 )
    {
      // minus_inv_tau11 = - 1 / tau11;
      FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
      FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );

      // a12p = ( tau11 - 1 ) * a12t^T / tau11;
      //      = a12t^T - ( 1 / tau11 ) * a12t^T;
      FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
      FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );

      // y21 = - Y20 * ( U20' * u21p ) - V20 * ( Z20' * u21p );
      FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
      FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );

      FLA_Set( FLA_ZERO, y21 );
      FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
      FLA_Gemv( FLA_TRANSPOSE,    FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );

      // t01 = a10t' + U20' * u21;
      FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
      FLA_Axpy( FLA_ONE, d0, t01 );

      // y21 = y21 + A22' * u21p;
      FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );

      // a12p = a12p - conj(y21) / tau11;
      FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );

      // w21 = A22 * conj(a12p);
      FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );

      // w21 = w21 - U20 * ( Y20' * conj(a12p) ) - Z20 * ( V20' * conj(a12p) );
      FLA_Gemvc( FLA_CONJ_TRANSPOSE,    FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
      FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );

      FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
      FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );

      FLA_Part_1x2( A22,    &A22_l, &A22_r,     1, FLA_LEFT );
      FLA_Part_2x1( Y20,    &Y20_t,
                            &Y20_b,             1, FLA_TOP );
      FLA_Part_1x2( A02,    &A02_l, &A02_r,     1, FLA_LEFT );

      // a22l = A22 * e0 - U20 * ( Y20' * e0 ) - Z20 * ( V20' * e0 );
      FLA_Copy( A22_l, a22l );
      FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
      FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );

      // y21 = y21 + conj(a12t)^T;
      FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );

      FLA_Part_1x2( a12t,    &a12t_l, &a12t_r,      1, FLA_LEFT );
      FLA_Part_2x1( v21,     &v21_t, 
                             &v21_b,            1, FLA_TOP );
      FLA_Part_2x1( a12p,    &a12p_t, 
                             &a12p_b,           1, FLA_TOP );

      // [ alpha12, psi11_minus_alpha12, sigma11 ] = House2s( a12p_t, a12p_b );
      FLA_Househ2s_UT( FLA_RIGHT,
                       a12p_t,
                       a12p_b,
                       alpha12, psi11_minus_alpha12, sigma11 );

      // v21 = conj( ( a12p - alpha12 * e0 ) / ( psi11 - alpha12 ) );
      FLA_Copy( a12p, v21 );
      FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
      FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
      FLA_Conjugate( v21_b );

      // minus_conj_alpha12 = - conj(alpha12);
      FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
      FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );

      // s01 = V20' * v21;
      //     = conj(V02) * v21;
      //     = conj(V02) * conj( ( a12p - alpha12 * e0 ) / ( psi11 - alpha12 ) );
      //     = conj(V02) * ( conj(a12p) - conj(alpha12) * e0 ) / conj( psi11 - alpha12 ) );
      //     = ( conj(V02) * conj(a12p) - conj(V02) * conj(alpha12) * e0 ) / conj( psi11 - alpha12 );
      //     = ( g0 - conj(V02) * conj(alpha12) * e0 ) / conj( psi11 - alpha12 );
      FLA_Copy( g0, s01 );
      FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
      FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );

      // a12t_l = alpha12;
      // a12t_r = v21_b^T;
      FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
      FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
    }

    // u21 = u21p;
    FLA_Copy( u21p, u21 );

    if ( FLA_Obj_width( A22 ) > 0 )
    {
      // beta = - y21' * v21 / tau11;
      FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
      FLA_Scal( FLA_MINUS_ONE, beta );
      FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );

      // z21 = ( w21 - conj(alpha12) * a22l ) / conj(psi11 - alpha12) + beta * u21;
      FLA_Copy( w21, z21 );
      FLA_Axpy( minus_conj_alpha12, a22l, z21 );
      FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
      FLA_Axpy( beta, u21, z21 );

      // y21 = y21 / tau11;
      // z21 = z21 / sigma11;
      FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11,   y21 );
      FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
    }
    else // if ( FLA_Obj_width( A22 ) == 0 )
    {
      // t01 = a10t' + U20' * u21;
      FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
      FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
    }

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00,  a01,     /**/ A02,
                                                     a10t, alpha11, /**/ a12t,
                            /* ************** */  /* ************************ */
                              &ABL, /**/ &ABR,       A20,  a21,     /**/ A22,
                              FLA_TL );
    FLA_Cont_with_3x3_to_2x2( &YTL, /**/ &YTR,       Y00,  y01,   /**/ Y02,
                                                     y10t, psi11, /**/ y12t,
                            /* ************** */  /* ********************** */
                              &YBL, /**/ &YBR,       Y20,  y21,   /**/ Y22,
                              FLA_TL );
    FLA_Cont_with_3x3_to_2x2( &ZTL, /**/ &ZTR,       Z00,  z01,    /**/ Z02,
                                                     z10t, zeta11, /**/ z12t,
                            /* ************** */  /* *********************** */
                              &ZBL, /**/ &ZBR,       Z20,  z21,    /**/ Z22,
                              FLA_TL );
    FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR,       T00,  t01,   /**/ T02,
                                                     t10t, tau11, /**/ t12t,
                            /* ************** */  /* ************************ */
                              &TBL, /**/ &TBR,       T20,  t21,   /**/ T22,
                              FLA_TL );
    FLA_Cont_with_3x3_to_2x2( &STL, /**/ &STR,       S00,  s01,     /**/ S02,
                                                     s10t, sigma11, /**/ s12t,
                            /* ************** */  /* ************************ */
                              &SBL, /**/ &SBR,       S20,  s21,     /**/ S22,
                              FLA_TL );
    FLA_Cont_with_3x1_to_2x1( &wT,                w01, 
                                                  omega11, 
                            /* ** */           /* ***** */
                              &wB,                w21,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &alT,               a01l, 
                                                  alpha11l, 
                            /* ** */           /* ***** */
                              &alB,               a22l,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &apT,               a01p, 
                                                  alpha11p, 
                            /* ** */           /* ***** */
                              &apB,               a12p,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &uT,                u01, 
                                                  upsilon11, 
                            /* ** */           /* ***** */
                              &uB,                u21,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &uTp,               u01p, 
                                                  upsilon11p, 
                            /* ** */           /* ***** */
                              &uBp,               u21p,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &vT,                v01, 
                                                  nu11, 
                            /* ** */           /* ***** */
                              &vB,                v21,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &dT,                d0,
                                                  delta1,
                            /* ** */           /* ****** */
                              &dB,                d2,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &eT,                e0,
                                                  epsilon1,
                            /* ** */           /* ******** */
                              &eB,                e2,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &fT,                f0,
                                                  phi1,
                            /* ** */           /* **** */
                              &fB,                f2,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &gT,                g0,
                                                  ghi1,
                            /* ** */           /* **** */
                              &gB,                g2,     FLA_TOP );
  }

  FLA_Obj_free( &minus_inv_tau11 );
  FLA_Obj_free( &last_elem );
  FLA_Obj_free( &beta );
  FLA_Obj_free( &alpha12 );
  FLA_Obj_free( &minus_alpha12 );
  FLA_Obj_free( &minus_conj_alpha12 );
  FLA_Obj_free( &psi11_minus_alpha12 );
  FLA_Obj_free( &minus_upsilon11 );
  FLA_Obj_free( &minus_conj_nu11 );
  FLA_Obj_free( &minus_conj_psi11 );
  FLA_Obj_free( &minus_zeta11 );
  FLA_Obj_free( &w );
  FLA_Obj_free( &al );
  FLA_Obj_free( &ap );
  FLA_Obj_free( &u );
  FLA_Obj_free( &up );
  FLA_Obj_free( &v );
  FLA_Obj_free( &d );
  FLA_Obj_free( &e );
  FLA_Obj_free( &f );
  FLA_Obj_free( &g );

  return FLA_SUCCESS;
}

References FLA_Axpy(), FLA_Axpyt(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Dotc(), FLA_Gemv(), FLA_Gemvc(), FLA_Househ2_UT(), FLA_Inv_scalc(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scal(), FLA_Set(), and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_unb_var5().

{
  FLA_Obj  ATL,   ATR,      A00,  a01,     A02, 
           ABL,   ABR,      a10t, alpha11, a12t,
                            A20,  a21,     A22;
  FLA_Obj  YTL,   YTR,      Y00,  y01,   Y02, 
           YBL,   YBR,      y10t, psi11, y12t,
                            Y20,  y21,   Y22;
  FLA_Obj  ZTL,   ZTR,      Z00,  z01,    Z02, 
           ZBL,   ZBR,      z10t, zeta11, z12t,
                            Z20,  z21,    Z22;
  FLA_Obj  TTL,   TTR,      T00,  t01,   T02, 
           TBL,   TBR,      t10t, tau11, t12t,
                            T20,  t21,   T22;
  FLA_Obj  STL,   STR,      S00,  s01,     S02, 
           SBL,   SBR,      s10t, sigma11, s12t,
                            S20,  s21,     S22;
  FLA_Obj  uT,              u01,
           uB,              upsilon11,
                            u21;
  FLA_Obj  vT,              v01,
           vB,              nu11,
                            v21;
  FLA_Obj  dT,              d0,
           dB,              delta1,
                            d2;
  FLA_Obj  eT,              e0,
           eB,              epsilon1,
                            e2;
  FLA_Obj  fT,              f0,
           fB,              phi1,
                            f2;
  FLA_Obj  gT,              g0,
           gB,              ghi1,
                            g2;
  FLA_Obj  u, v;
  FLA_Obj  d, e, f, g;

  FLA_Obj  last_elem;
  FLA_Obj  beta;
  FLA_Obj  minus_upsilon11;
  FLA_Obj  minus_zeta11;

  FLA_Obj  a01_t,
           a01_b;
  FLA_Obj  a12t_l, a12t_r;
  FLA_Obj  v21_t,
           v21_b;
  FLA_Obj  a2;

  FLA_Datatype datatype_A;
  dim_t        m_A, n_A;
  dim_t        b_alg;


  b_alg      = FLA_Obj_length( T );

  datatype_A = FLA_Obj_datatype( A );
  m_A        = FLA_Obj_length( A );
  n_A        = FLA_Obj_width( A );

  FLA_Obj_create( datatype_A, 1,   1, 0, 0, &last_elem );
  FLA_Obj_create( datatype_A, 1,   1, 0, 0, &beta );
  FLA_Obj_create( datatype_A, 1,   1, 0, 0, &minus_upsilon11 );
  FLA_Obj_create( datatype_A, 1,   1, 0, 0, &minus_zeta11 );
  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
  FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
  FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
  FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );

  FLA_Set( FLA_ZERO, Y );
  FLA_Set( FLA_ZERO, Z );

  FLA_Part_2x2( A,    &ATL, &ATR,
                      &ABL, &ABR,     0, 0, FLA_TL );
  FLA_Part_2x2( Y,    &YTL, &YTR,
                      &YBL, &YBR,     0, 0, FLA_TL );
  FLA_Part_2x2( Z,    &ZTL, &ZTR,
                      &ZBL, &ZBR,     0, 0, FLA_TL );
  FLA_Part_2x2( T,    &TTL, &TTR,
                      &TBL, &TBR,     0, 0, FLA_TL );
  FLA_Part_2x2( S,    &STL, &STR,
                      &SBL, &SBR,     0, 0, FLA_TL );
  FLA_Part_2x1( u,    &uT, 
                      &uB,            0, FLA_TOP );
  FLA_Part_2x1( v,    &vT, 
                      &vB,            0, FLA_TOP );
  FLA_Part_2x1( d,    &dT,
                      &dB,            0, FLA_TOP );
  FLA_Part_2x1( e,    &eT, 
                      &eB,            0, FLA_TOP );
  FLA_Part_2x1( f,    &fT,
                      &fB,            0, FLA_TOP );
  FLA_Part_2x1( g,    &gT,
                      &gB,            0, FLA_TOP );

  while ( FLA_Obj_length( ATL ) < b_alg )
  {
    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00,  /**/ &a01,     &A02,
                        /* ************* */   /* ************************** */
                                                &a10t, /**/ &alpha11, &a12t,
                           ABL, /**/ ABR,       &A20,  /**/ &a21,     &A22,
                           1, 1, FLA_BR );
    FLA_Repart_2x2_to_3x3( YTL, /**/ YTR,       &Y00,  /**/ &y01,   &Y02,
                        /* ************* */   /* ************************ */
                                                &y10t, /**/ &psi11, &y12t,
                           YBL, /**/ YBR,       &Y20,  /**/ &y21,   &Y22,
                           1, 1, FLA_BR );
    FLA_Repart_2x2_to_3x3( ZTL, /**/ ZTR,       &Z00,  /**/ &z01,    &Z02,
                        /* ************* */   /* ************************* */
                                                &z10t, /**/ &zeta11, &z12t,
                           ZBL, /**/ ZBR,       &Z20,  /**/ &z21,    &Z22,
                           1, 1, FLA_BR );
    FLA_Repart_2x2_to_3x3( TTL, /**/ TTR,       &T00,  /**/ &t01,   &T02,
                        /* ************* */   /* ************************** */
                                                &t10t, /**/ &tau11, &t12t,
                           TBL, /**/ TBR,       &T20,  /**/ &t21,   &T22,
                           1, 1, FLA_BR );
    FLA_Repart_2x2_to_3x3( STL, /**/ STR,       &S00,  /**/ &s01,     &S02,
                        /* ************* */   /* ************************** */
                                                &s10t, /**/ &sigma11, &s12t,
                           SBL, /**/ SBR,       &S20,  /**/ &s21,     &S22,
                           1, 1, FLA_BR );
    FLA_Repart_2x1_to_3x1( uT,                &u01, 
                        /* ** */            /* ***** */
                                              &upsilon11, 
                           uB,                &u21,        1, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( vT,                &v01, 
                        /* ** */            /* ***** */
                                              &nu11, 
                           vB,                &v21,        1, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( dT,                &d0,
                        /* ** */            /* ****** */
                                              &delta1,
                           dB,                &d2,        1, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( eT,                &e0,
                        /* ** */            /* ******** */
                                              &epsilon1,
                           eB,                &e2,        1, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( fT,                &f0,
                        /* ** */            /* **** */
                                              &phi1,
                           fB,                &f2,        1, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( gT,                &g0,
                        /* ** */            /* **** */
                                              &ghi1,
                           gB,                &g2,        1, FLA_BOTTOM );

    /*------------------------------------------------------------*/

    // Save last element of a01 and set it to one so we can use a01 as
    // v10t^T in subsequent computations. We will restore a01_b later on.
    // Also note: V20^T is stored in A02.
    if ( FLA_Obj_length( ATL ) > 0 )
    {
      FLA_Part_2x1( a01,    &a01_t,
                            &a01_b,            1, FLA_BOTTOM );
      FLA_Copy( a01_b, last_elem );
      FLA_Set( FLA_ONE, a01_b );
    }
    
    FLA_Merge_2x1( alpha11,
                   a21,      &a2 );

    // alpha11 = alpha11 - u10t * y10t' - z10t * v10t';
    // a21     = a21     - U20  * y10t' - Z20  * v10t';
    FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
    FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01,  FLA_ONE, a2 );

    // a12t = a12t - u10t * Y20' - z10t * V20';
    FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
    FLA_Gemv( FLA_CONJ_TRANSPOSE,    FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
    
    // Restore last element of a01.
    if ( FLA_Obj_length( ATL ) > 0 )
    {
      FLA_Copy( last_elem, a01_b );
    }

    // [ alpha11, u21, tau11 ] = House2( alpha11, a21 );
    FLA_Househ2_UT( FLA_LEFT,
                    alpha11,
                    a21, tau11 );
    FLA_Copy( a21, u21 );

    if ( FLA_Obj_width( A22 ) > 0 )
    {
      // y21' = a12t + u21' * A22;
      // y21  = conj(a12t) + A22' * u21;
      FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
      FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21, FLA_ONE, y21 );

      // y21 = y21 - Y20 * ( U20' * u21 ) - V20 * ( Z20' * u21 );
      FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ZERO, d0 );
      FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21, FLA_ZERO, e0 );

      // t01 = a10t' + U20' * u21;
      FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
      FLA_Axpy( FLA_ONE, d0, t01 );

      FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
      FLA_Gemv( FLA_TRANSPOSE,    FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );

      // y21 = y21 / tau11;
      FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );

      // a12t = a12t - conj(y21)^T;
      FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );

      FLA_Part_1x2( a12t,    &a12t_l, &a12t_r,      1, FLA_LEFT );
      FLA_Part_2x1( v21,     &v21_t, 
                             &v21_b,            1, FLA_TOP );

      // [ a12t_l, v21_b, sigma11 ] = House2( a12t_l, a12t_r );
      FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );

      // v21_t = 1;
      // v21_b = a12t_r^T;
      FLA_Set( FLA_ONE, v21_t );
      FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );

      // beta = - y21' * v21;
      FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
      FLA_Scal( FLA_MINUS_ONE, beta );

      // z21 = A22 * v21 + beta * u21;
      FLA_Copy( u21, z21 );
      FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, v21, beta, z21 );

      // z21 = z21 - U20 * ( Y20' * v21 ) - Z20 * ( V20' * v21 );
      FLA_Gemv( FLA_CONJ_TRANSPOSE,    FLA_ONE, Y20, v21, FLA_ZERO, f0 );
      FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, g0 );

      FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, z21 );
      FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, z21 );

      // z21 = z21 / sigma11;
      FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );

      // s01 = conj(V02) * v21;
      FLA_Copy( g0, s01 );
    }
    else // if ( FLA_Obj_width( A22 ) == 0 )
    {
      // t01 = a10t' + U20' * u21;
      FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
      FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
    }

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00,  a01,     /**/ A02,
                                                     a10t, alpha11, /**/ a12t,
                            /* ************** */  /* ************************ */
                              &ABL, /**/ &ABR,       A20,  a21,     /**/ A22,
                              FLA_TL );
    FLA_Cont_with_3x3_to_2x2( &YTL, /**/ &YTR,       Y00,  y01,   /**/ Y02,
                                                     y10t, psi11, /**/ y12t,
                            /* ************** */  /* ********************** */
                              &YBL, /**/ &YBR,       Y20,  y21,   /**/ Y22,
                              FLA_TL );
    FLA_Cont_with_3x3_to_2x2( &ZTL, /**/ &ZTR,       Z00,  z01,    /**/ Z02,
                                                     z10t, zeta11, /**/ z12t,
                            /* ************** */  /* *********************** */
                              &ZBL, /**/ &ZBR,       Z20,  z21,    /**/ Z22,
                              FLA_TL );
    FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR,       T00,  t01,   /**/ T02,
                                                     t10t, tau11, /**/ t12t,
                            /* ************** */  /* ************************ */
                              &TBL, /**/ &TBR,       T20,  t21,   /**/ T22,
                              FLA_TL );
    FLA_Cont_with_3x3_to_2x2( &STL, /**/ &STR,       S00,  s01,     /**/ S02,
                                                     s10t, sigma11, /**/ s12t,
                            /* ************** */  /* ************************ */
                              &SBL, /**/ &SBR,       S20,  s21,     /**/ S22,
                              FLA_TL );
    FLA_Cont_with_3x1_to_2x1( &uT,                u01, 
                                                  upsilon11, 
                            /* ** */           /* ***** */
                              &uB,                u21,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &vT,                v01, 
                                                  nu11, 
                            /* ** */           /* ***** */
                              &vB,                v21,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &dT,                d0,
                                                  delta1,
                            /* ** */           /* ****** */
                              &dB,                d2,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &eT,                e0,
                                                  epsilon1,
                            /* ** */           /* ******** */
                              &eB,                e2,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &fT,                f0,
                                                  phi1,
                            /* ** */           /* **** */
                              &fB,                f2,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &gT,                g0,
                                                  ghi1,
                            /* ** */           /* **** */
                              &gB,                g2,     FLA_TOP );
  }

  FLA_Obj_free( &last_elem );
  FLA_Obj_free( &beta );
  FLA_Obj_free( &minus_upsilon11 );
  FLA_Obj_free( &minus_zeta11 );
  FLA_Obj_free( &u );
  FLA_Obj_free( &v );
  FLA_Obj_free( &d );
  FLA_Obj_free( &e );
  FLA_Obj_free( &f );
  FLA_Obj_free( &g );

  return FLA_SUCCESS;
}

References FLA_Bidiag_UT_u_step_unb_var1().

Referenced by FLA_Bidiag_UT_u().

{
  return FLA_Bidiag_UT_u_step_unb_var1( A, TU, TV );
}

References FLA_Bidiag_UT_u_step_unb_var2().

Referenced by FLA_Bidiag_UT_u().

{
  return FLA_Bidiag_UT_u_step_unb_var2( A, TU, TV );
}

References FLA_Bidiag_UT_u_step_unb_var3().

Referenced by FLA_Bidiag_UT_u().

{
  return FLA_Bidiag_UT_u_step_unb_var3( A, TU, TV );
}

References FLA_Bidiag_UT_u_step_unb_var4(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u().

{
  FLA_Error    r_val;
  FLA_Obj      Y, Z;
  FLA_Datatype datatype_A;
  dim_t        m_A, n_A;

  datatype_A = FLA_Obj_datatype( A );
  m_A        = FLA_Obj_length( A );
  n_A        = FLA_Obj_width( A );
  
  FLA_Obj_create( datatype_A, n_A, n_A, 0, 0, &Y );
  FLA_Obj_create( datatype_A, m_A, n_A, 0, 0, &Z );

  r_val = FLA_Bidiag_UT_u_step_unb_var4( A, Y, Z, TU, TV );

  FLA_Obj_free( &Y );
  FLA_Obj_free( &Z );

  return r_val;
}

References FLA_Bidiag_UT_u_step_unb_var5(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u().

{
  FLA_Error    r_val;
  FLA_Obj      Y, Z;
  FLA_Datatype datatype_A;
  dim_t        m_A, n_A;

  datatype_A = FLA_Obj_datatype( A );
  m_A        = FLA_Obj_length( A );
  n_A        = FLA_Obj_width( A );
  
  FLA_Obj_create( datatype_A, n_A, n_A, 0, 0, &Y );
  FLA_Obj_create( datatype_A, m_A, n_A, 0, 0, &Z );

  r_val = FLA_Bidiag_UT_u_step_unb_var5( A, Y, Z, TU, TV );

  FLA_Obj_free( &Y );
  FLA_Obj_free( &Z );

  return r_val;
}
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opc_var1 ( int  m_A,
int  n_A,
scomplex buff_tau,
scomplex buff_beta,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_u,
int  inc_u,
scomplex buff_a,
int  inc_a,
scomplex buff_y,
int  inc_y,
scomplex buff_w,
int  inc_w 
)

References bl1_caxpyv(), bl1_cdots(), bl1_csetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_MINUS_ONE, FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_ofc_var3(), FLA_Bidiag_UT_u_step_ofc_var4(), and FLA_Fused_Ahx_Axpy_Ax_opt_var1().

{
  scomplex* buff_1  = FLA_COMPLEX_PTR( FLA_ONE );
  scomplex* buff_0  = FLA_COMPLEX_PTR( FLA_ZERO );
  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
  scomplex  minus_inv_tau;
  scomplex  conj_psi1;
  scomplex  conj_alpha1;
  int       i;

  bl1_csetv( m_A,
             buff_0,
             buff_w, inc_w );

  bl1_cdiv3( buff_m1, buff_tau, &minus_inv_tau );

  for ( i = 0; i < n_A; ++i )
  {
    scomplex* a1       = buff_A + (i  )*cs_A + (0  )*rs_A;
    scomplex* psi1     = buff_y + (i  )*inc_y;
    scomplex* alpha1   = buff_a + (i  )*inc_a;
    scomplex* u        = buff_u;
    scomplex* w        = buff_w;

    /*------------------------------------------------------------*/

    bl1_cdots( BLIS1_CONJUGATE,
               m_A,
               buff_1,
               a1, rs_A,
               u,  inc_u,
               buff_beta,
               psi1 );

    bl1_ccopyconj( psi1, &conj_psi1 );
    bl1_cmult4( &minus_inv_tau, &conj_psi1, alpha1, alpha1 );

    bl1_ccopyconj( alpha1, &conj_alpha1 );

    bl1_caxpyv( BLIS1_NO_CONJUGATE,
                m_A,
                &conj_alpha1,
                a1, rs_A,
                w,  inc_w );
/*
    F77_caxpy( &m_A,
               &conj_alpha1,
               a1, &rs_A,
               w,  &inc_w );
*/

    /*------------------------------------------------------------*/

  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opd_var1 ( int  m_A,
int  n_A,
double *  buff_tau,
double *  buff_beta,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_u,
int  inc_u,
double *  buff_a,
int  inc_a,
double *  buff_y,
int  inc_y,
double *  buff_w,
int  inc_w 
)

References bl1_d0(), bl1_daxpyv(), bl1_daxpyv2b(), bl1_ddot(), bl1_ddotsv2(), bl1_dm1(), bl1_dsetv(), BLIS1_CONJUGATE, and BLIS1_NO_CONJUGATE.

Referenced by FLA_Bidiag_UT_u_step_ofd_var3(), FLA_Bidiag_UT_u_step_ofd_var4(), and FLA_Fused_Ahx_Axpy_Ax_opt_var1().

{
  double    zero      = bl1_d0();
  double    minus_one = bl1_dm1();
  double*   restrict u = buff_u;
  double*   restrict w = buff_w;
  double*   restrict beta = buff_beta;
  double*   restrict a1;
  double*   restrict a2;
  double*   restrict psi1;
  double*   restrict psi2;
  double*   restrict alpha1;
  double*   restrict alpha2;

  double    minus_inv_tau;
  int       i;

  int       n_run    = n_A / 2;
  int       n_left   = n_A % 2;
  int       stepcs_A  = 2*cs_A;
  int       stepinc_y = 2*inc_y;
  int       stepinc_a = 2*inc_a;


  bl1_dsetv( m_A,
             &zero,
             buff_w, inc_w );

  bl1_ddiv3( &minus_one, buff_tau, &minus_inv_tau );

  a1     = buff_A;
  a2     = buff_A + cs_A;
  psi1   = buff_y;
  psi2   = buff_y + inc_y;
  alpha1 = buff_a;
  alpha2 = buff_a + inc_a;

  for ( i = 0; i < n_run; ++i )
  {
/*
   Effective computation:
   y = beta * y + A' * u;
   a = a - conj(y) / tau;
   w = A * conj(a);
*/
    /*------------------------------------------------------------*/

    bl1_ddotsv2( BLIS1_CONJUGATE,
                 m_A,
                 a1, rs_A,
                 a2, rs_A,
                 u,  inc_u,
                 beta,
                 psi1,
                 psi2 );

    bl1_dmult4( &minus_inv_tau, psi1, alpha1, alpha1 );
    bl1_dmult4( &minus_inv_tau, psi2, alpha2, alpha2 );

    bl1_daxpyv2b( m_A,
                  alpha1,
                  alpha2,
                  a1, rs_A,
                  a2, rs_A,
                  w,  inc_w );

    /*------------------------------------------------------------*/

    a1     += stepcs_A;
    a2     += stepcs_A;
    psi1   += stepinc_y;
    psi2   += stepinc_y;
    alpha1 += stepinc_a;
    alpha2 += stepinc_a;
  }

  if ( n_left == 1 )
  //for ( i = 0; i < n_left; ++i )
  {
    double   rho1;

    bl1_ddot( BLIS1_CONJUGATE,
              m_A,
              a1, rs_A,
              u,  inc_u,
              &rho1 );
    bl1_dscals( buff_beta, psi1 );
    bl1_dadd3( psi1, &rho1, psi1 );

    bl1_dmult4( &minus_inv_tau, psi1, alpha1, alpha1 );

    bl1_daxpyv( BLIS1_NO_CONJUGATE,
                m_A,
                alpha1,
                a1, rs_A,
                w,  inc_w );

    //a1     += cs_A;
    //psi1   += inc_y;
    //alpha1 += inc_a;
  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Fused_Ahx_Axpy_Ax_ops_var1 ( int  m_A,
int  n_A,
float *  buff_tau,
float *  buff_beta,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_u,
int  inc_u,
float *  buff_a,
int  inc_a,
float *  buff_y,
int  inc_y,
float *  buff_w,
int  inc_w 
)

References bl1_saxpyv(), bl1_sdots(), bl1_ssetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_MINUS_ONE, FLA_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_ofs_var3(), FLA_Bidiag_UT_u_step_ofs_var4(), and FLA_Fused_Ahx_Axpy_Ax_opt_var1().

{
  float*    buff_1  = FLA_FLOAT_PTR( FLA_ONE );
  float*    buff_0  = FLA_FLOAT_PTR( FLA_ZERO );
  float*    buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
  float     minus_inv_tau;
  int       i;

  bl1_ssetv( m_A,
             buff_0,
             buff_w, inc_w );

  minus_inv_tau = *buff_m1 / *buff_tau;

  for ( i = 0; i < n_A; ++i )
  {
    float*    a1       = buff_A + (i  )*cs_A + (0  )*rs_A;
    float*    psi1     = buff_y + (i  )*inc_y;
    float*    alpha1   = buff_a + (i  )*inc_a;
    float*    u        = buff_u;
    float*    w        = buff_w;

    /*------------------------------------------------------------*/

    bl1_sdots( BLIS1_CONJUGATE,
               m_A,
               buff_1,
               a1, rs_A,
               u,  inc_u,
               buff_beta,
               psi1 );

    // bl1_dmult4( &minus_inv_tau, conj_psi1, alpha1, alpha1 );
    *alpha1 = *alpha1 + minus_inv_tau * *psi1;

    bl1_saxpyv( BLIS1_NO_CONJUGATE,
                m_A,
                alpha1,
                a1, rs_A,
                w,  inc_w );
/*
    F77_saxpy( &m_A,
               alpha1,
               a1, &rs_A,
               w,  &inc_w );
*/

    /*------------------------------------------------------------*/

  }

  return FLA_SUCCESS;
}

References FLA_Fused_Ahx_Axpy_Ax_opc_var1(), FLA_Fused_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Ahx_Axpy_Ax_ops_var1(), FLA_Fused_Ahx_Axpy_Ax_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().

{
/*
   Effective computation:
   y = beta * y + A' * u;
   a = a - conj(y) / tau;
   w = A * conj(a);
*/
  FLA_Datatype datatype;
  int          m_A, n_A;
  int          rs_A, cs_A;
  int          inc_u, inc_a, inc_y, inc_w;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  n_A      = FLA_Obj_width( A );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  inc_u    = FLA_Obj_vector_inc( u );

  inc_a    = FLA_Obj_vector_inc( a );

  inc_y    = FLA_Obj_vector_inc( y );

  inc_w    = FLA_Obj_vector_inc( w );
  

  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float* buff_A   = FLA_FLOAT_PTR( A );
      float* buff_u   = FLA_FLOAT_PTR( u );
      float* buff_a   = FLA_FLOAT_PTR( a );
      float* buff_y   = FLA_FLOAT_PTR( y );
      float* buff_w   = FLA_FLOAT_PTR( w );
      float* buff_tau = FLA_FLOAT_PTR( tau );
      float* buff_beta = FLA_FLOAT_PTR( beta );

      FLA_Fused_Ahx_Axpy_Ax_ops_var1( m_A,
                                      n_A,
                                      buff_tau,
                                      buff_beta,
                                      buff_A, rs_A, cs_A,
                                      buff_u, inc_u,
                                      buff_a, inc_a,
                                      buff_y, inc_y,
                                      buff_w, inc_w );

      break;
    }

    case FLA_DOUBLE:
    {
      double* buff_A   = FLA_DOUBLE_PTR( A );
      double* buff_u   = FLA_DOUBLE_PTR( u );
      double* buff_a   = FLA_DOUBLE_PTR( a );
      double* buff_y   = FLA_DOUBLE_PTR( y );
      double* buff_w   = FLA_DOUBLE_PTR( w );
      double* buff_tau = FLA_DOUBLE_PTR( tau );
      double* buff_beta = FLA_DOUBLE_PTR( beta );

      FLA_Fused_Ahx_Axpy_Ax_opd_var1( m_A,
                                      n_A,
                                      buff_tau,
                                      buff_beta,
                                      buff_A, rs_A, cs_A,
                                      buff_u, inc_u,
                                      buff_a, inc_a,
                                      buff_y, inc_y,
                                      buff_w, inc_w );

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_A   = FLA_COMPLEX_PTR( A );
      scomplex* buff_u   = FLA_COMPLEX_PTR( u );
      scomplex* buff_a   = FLA_COMPLEX_PTR( a );
      scomplex* buff_y   = FLA_COMPLEX_PTR( y );
      scomplex* buff_w   = FLA_COMPLEX_PTR( w );
      scomplex* buff_tau = FLA_COMPLEX_PTR( tau );
      scomplex* buff_beta = FLA_COMPLEX_PTR( beta );

      FLA_Fused_Ahx_Axpy_Ax_opc_var1( m_A,
                                      n_A,
                                      buff_tau,
                                      buff_beta,
                                      buff_A, rs_A, cs_A,
                                      buff_u, inc_u,
                                      buff_a, inc_a,
                                      buff_y, inc_y,
                                      buff_w, inc_w );

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A   = FLA_DOUBLE_COMPLEX_PTR( A );
      dcomplex* buff_u   = FLA_DOUBLE_COMPLEX_PTR( u );
      dcomplex* buff_a   = FLA_DOUBLE_COMPLEX_PTR( a );
      dcomplex* buff_y   = FLA_DOUBLE_COMPLEX_PTR( y );
      dcomplex* buff_w   = FLA_DOUBLE_COMPLEX_PTR( w );
      dcomplex* buff_tau = FLA_DOUBLE_COMPLEX_PTR( tau );
      dcomplex* buff_beta = FLA_DOUBLE_COMPLEX_PTR( beta );

      FLA_Fused_Ahx_Axpy_Ax_opz_var1( m_A,
                                      n_A,
                                      buff_tau,
                                      buff_beta,
                                      buff_A, rs_A, cs_A,
                                      buff_u, inc_u,
                                      buff_a, inc_a,
                                      buff_y, inc_y,
                                      buff_w, inc_w );

      break;
    }
  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opz_var1 ( int  m_A,
int  n_A,
dcomplex buff_tau,
dcomplex buff_beta,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_u,
int  inc_u,
dcomplex buff_a,
int  inc_a,
dcomplex buff_y,
int  inc_y,
dcomplex buff_w,
int  inc_w 
)

References bl1_z0(), bl1_zaxpyv(), bl1_zaxpyv2b(), bl1_zdot(), bl1_zdotsv2(), bl1_zm1(), bl1_zsetv(), BLIS1_CONJUGATE, and BLIS1_NO_CONJUGATE.

Referenced by FLA_Bidiag_UT_u_step_ofz_var3(), FLA_Bidiag_UT_u_step_ofz_var4(), and FLA_Fused_Ahx_Axpy_Ax_opt_var1().

{
  dcomplex  zero      = bl1_z0();
  dcomplex  minus_one = bl1_zm1();
  dcomplex* restrict u = buff_u;
  dcomplex* restrict w = buff_w;
  dcomplex* restrict beta = buff_beta;
  dcomplex* restrict a1;
  dcomplex* restrict a2;
  dcomplex* restrict psi1;
  dcomplex* restrict psi2;
  dcomplex* restrict alpha1;
  dcomplex* restrict alpha2;

  dcomplex  minus_inv_tau;
  dcomplex  conj_psi1;
  dcomplex  conj_psi2;
  dcomplex  conj_alpha1;
  dcomplex  conj_alpha2;
  int       i;
  int       n_run    = n_A / 2;
  int       n_left   = n_A % 2;
  int       twocs_A  = 2*cs_A;
  int       twoinc_y = 2*inc_y;
  int       twoinc_a = 2*inc_a;


  bl1_zsetv( m_A,
             &zero,
             buff_w, inc_w );

  bl1_zdiv3( &minus_one, buff_tau, &minus_inv_tau );

  a1     = buff_A;
  a2     = buff_A + cs_A;
  psi1   = buff_y;
  psi2   = buff_y + inc_y;
  alpha1 = buff_a;
  alpha2 = buff_a + inc_a;

  for ( i = 0; i < n_run; ++i )
  {
/*
   Effective computation:
   y = beta * y + A' * u;
   a = a - conj(y) / tau;
   w = A * conj(a);
*/
    /*------------------------------------------------------------*/

    bl1_zdotsv2( BLIS1_CONJUGATE,
                 m_A,
                 a1, rs_A,
                 a2, rs_A,
                 u,  inc_u,
                 beta,
                 psi1,
                 psi2 );

    bl1_zcopyconj( psi1, &conj_psi1 );
    bl1_zcopyconj( psi2, &conj_psi2 );
    bl1_zmult4( &minus_inv_tau, &conj_psi1, alpha1, alpha1 );
    bl1_zmult4( &minus_inv_tau, &conj_psi2, alpha2, alpha2 );
    bl1_zcopyconj( alpha1, &conj_alpha1 );
    bl1_zcopyconj( alpha2, &conj_alpha2 );

    bl1_zaxpyv2b( m_A,
                  &conj_alpha1,
                  &conj_alpha2,
                  a1, rs_A,
                  a2, rs_A,
                  w,  inc_w );

    /*------------------------------------------------------------*/

    a1     += twocs_A;
    a2     += twocs_A;
    psi1   += twoinc_y;
    psi2   += twoinc_y;
    alpha1 += twoinc_a;
    alpha2 += twoinc_a;
  }

  if ( n_left == 1 )
  {
    dcomplex rho1;

    bl1_zdot( BLIS1_CONJUGATE,
              m_A,
              a1, rs_A,
              u,  inc_u,
              &rho1 );
    bl1_zscals( buff_beta, psi1 );
    bl1_zadd3( psi1, &rho1, psi1 );

    bl1_zcopyconj( psi1, &conj_psi1 );
    bl1_zmult4( &minus_inv_tau, &conj_psi1, alpha1, alpha1 );
    bl1_zcopyconj( alpha1, &conj_alpha1 );

    bl1_zaxpyv( BLIS1_NO_CONJUGATE,
                m_A,
                &conj_alpha1,
                a1, rs_A,
                w,  inc_w );
  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1 ( int  m_A,
int  n_A,
scomplex buff_tau,
scomplex buff_alpha,
scomplex buff_u,
int  inc_u,
scomplex buff_y,
int  inc_y,
scomplex buff_z,
int  inc_z,
scomplex buff_v,
int  inc_v,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_up,
int  inc_up,
scomplex buff_a,
int  inc_a,
scomplex buff_w,
int  inc_w 
)

References bl1_caxpyv(), bl1_cdot(), bl1_csetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_MINUS_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_ofc_var3(), and FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1().

{
  scomplex* buff_0  = FLA_COMPLEX_PTR( FLA_ZERO );
  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
  scomplex  minus_inv_tau;
  scomplex  conj_psi1;
  scomplex  conj_nu1;
  scomplex  conj_alpha1;
  int       i;

  bl1_csetv( m_A,
             buff_0,
             buff_w, inc_w );

  bl1_cdiv3( buff_m1, buff_tau, &minus_inv_tau );

  for ( i = 0; i < n_A; ++i )
  {
    scomplex* a1       = buff_A + (i  )*cs_A + (0  )*rs_A;
    scomplex* u        = buff_u;
    scomplex* psi1     = buff_y + (i  )*inc_y;
    scomplex* nu1      = buff_v + (i  )*inc_v;
    scomplex* z        = buff_z;
    scomplex* up       = buff_up;
    scomplex* alpha1   = buff_a + (i  )*inc_a;
    scomplex* w        = buff_w;
    scomplex* alpha    = buff_alpha;
    scomplex  temp1;
    scomplex  temp2;

    /*------------------------------------------------------------*/

    bl1_ccopyconj( psi1, &conj_psi1 );
    bl1_cmult3( alpha, &conj_psi1, &temp1 );

    bl1_ccopyconj( nu1, &conj_nu1 );
    bl1_cmult3( alpha, &conj_nu1, &temp2 );

    bl1_caxpyv( BLIS1_NO_CONJUGATE,
                m_A,
                &temp1,
                u,  inc_u,
                a1, rs_A );
    //F77_caxpy( &m_A,
    //           &temp1,
    //           u,  &inc_u,
    //           a1, &rs_A );

    bl1_caxpyv( BLIS1_NO_CONJUGATE,
                m_A,
                &temp2,
                z,  inc_z,
                a1, rs_A );
    //F77_caxpy( &m_A,
    //           &temp2,
    //           z,  &inc_z,
    //           a1, &rs_A );

    bl1_cdot( BLIS1_CONJUGATE,
              m_A,
              a1, rs_A,
              up, inc_up,
              psi1 );

    bl1_ccopyconj( psi1, &conj_psi1 );
    bl1_cmult4( &minus_inv_tau, &conj_psi1, alpha1, alpha1 );

    bl1_ccopyconj( alpha1, &conj_alpha1 );

    bl1_caxpyv( BLIS1_NO_CONJUGATE,
                m_A,
                &conj_alpha1,
                a1, rs_A,
                w,  inc_w );
    //F77_caxpy( &m_A,
    //           &conj_alpha1,
    //           a1, &rs_A,
    //           w,  &inc_w );

    /*------------------------------------------------------------*/

  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1 ( int  m_A,
int  n_A,
double *  buff_tau,
double *  buff_alpha,
double *  buff_u,
int  inc_u,
double *  buff_y,
int  inc_y,
double *  buff_z,
int  inc_z,
double *  buff_v,
int  inc_v,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_up,
int  inc_up,
double *  buff_a,
int  inc_a,
double *  buff_w,
int  inc_w 
)

References bl1_d0(), bl1_daxpyv(), bl1_daxpyv2b(), bl1_ddot(), bl1_ddotsv2(), bl1_dm1(), bl1_dsetv(), BLIS1_CONJUGATE, and BLIS1_NO_CONJUGATE.

Referenced by FLA_Bidiag_UT_u_step_ofd_var3(), and FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1().

{
  double    zero      = bl1_d0();
  double    minus_one = bl1_dm1();
  double*   restrict u  = buff_u;
  double*   restrict up = buff_up;
  double*   restrict w = buff_w;
  double*   restrict z  = buff_z;
  double*   restrict alpha = buff_alpha;
  double*   restrict a1;
  double*   restrict a2;
  double*   restrict psi1;
  double*   restrict psi2;
  double*   restrict alpha1;
  double*   restrict alpha2;
  double*   restrict nu1;
  double*   restrict nu2;

  double    minus_inv_tau;
  double    alpha_conj_psi1;
  double    alpha_conj_psi2;
  double    alpha_conj_nu1;
  double    alpha_conj_nu2;
  int       i;
  int       n_run    = n_A / 2;
  int       n_left   = n_A % 2;
  int       twocs_A  = 2*cs_A;
  int       twoinc_y = 2*inc_y;
  int       twoinc_a = 2*inc_a;
  int       twoinc_v = 2*inc_v;


  bl1_dsetv( m_A,
             &zero,
             buff_w, inc_w );

  bl1_ddiv3( &minus_one, buff_tau, &minus_inv_tau );

  a1     = buff_A;
  a2     = buff_A + cs_A;
  psi1   = buff_y;
  psi2   = buff_y + inc_y;
  alpha1 = buff_a;
  alpha2 = buff_a + inc_a;
  nu1    = buff_v;
  nu2    = buff_v + inc_v;

  for ( i = 0; i < n_run; ++i )
  {

    /*------------------------------------------------------------*/

    bl1_dmult3( alpha, psi1, &alpha_conj_psi1 );
    bl1_dmult3( alpha, psi2, &alpha_conj_psi2 );

    bl1_dmult3( alpha, nu1, &alpha_conj_nu1 );
    bl1_dmult3( alpha, nu2, &alpha_conj_nu2 );

/*
   Effective computation:
   A = A + alpha * ( u * y' + z * v' );
   y = A' * up;
   a = a - conj(y) / tau;
   w = A * conj(a);
*/
    bl1_daxpyv2b( m_A,
                  &alpha_conj_psi1,
                  &alpha_conj_nu1,
                  u,  inc_u,
                  z,  inc_z,
                  a1, rs_A );
    bl1_daxpyv2b( m_A,
                  &alpha_conj_psi2,
                  &alpha_conj_nu2,
                  u,  inc_u,
                  z,  inc_z,
                  a2, rs_A );


    bl1_ddotsv2( BLIS1_CONJUGATE,
                 m_A,
                 a1, rs_A,
                 a2, rs_A,
                 up, inc_up,
                 &zero,
                 psi1,
                 psi2 );

    bl1_dmult4( &minus_inv_tau, psi1, alpha1, alpha1 );
    bl1_dmult4( &minus_inv_tau, psi2, alpha2, alpha2 );

    bl1_daxpyv2b( m_A,
                  alpha1,
                  alpha2,
                  a1, rs_A,
                  a2, rs_A,
                  w,  inc_w );

    /*------------------------------------------------------------*/

    a1     += twocs_A;
    a2     += twocs_A;
    psi1   += twoinc_y;
    psi2   += twoinc_y;
    alpha1 += twoinc_a;
    alpha2 += twoinc_a;
    nu1    += twoinc_v;
    nu2    += twoinc_v;
  }

  if ( n_left == 1 )
  {
    double   rho1;

    bl1_dmult3( alpha, psi1, &alpha_conj_psi1 );
    bl1_dmult3( alpha, nu1,  &alpha_conj_nu1 );

    bl1_daxpyv2b( m_A,
                  &alpha_conj_psi1,
                  &alpha_conj_nu1,
                  u,  inc_u,
                  z,  inc_z,
                  a1, rs_A );

    bl1_ddot( BLIS1_CONJUGATE,
              m_A,
              a1, rs_A,
              up, inc_up,
              &rho1 );
    bl1_dscals( &zero, psi1 );
    bl1_dadd3( psi1, &rho1, psi1 );

    bl1_dmult4( &minus_inv_tau, psi1, alpha1, alpha1 );

    bl1_daxpyv( BLIS1_NO_CONJUGATE,
                m_A,
                alpha1,
                a1, rs_A,
                w,  inc_w );
  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1 ( int  m_A,
int  n_A,
float *  buff_tau,
float *  buff_alpha,
float *  buff_u,
int  inc_u,
float *  buff_y,
int  inc_y,
float *  buff_z,
int  inc_z,
float *  buff_v,
int  inc_v,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_up,
int  inc_up,
float *  buff_a,
int  inc_a,
float *  buff_w,
int  inc_w 
)

References bl1_saxpyv(), bl1_sdot(), bl1_ssetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_MINUS_ONE, and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_step_ofs_var3(), and FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1().

{
  float*    buff_0  = FLA_FLOAT_PTR( FLA_ZERO );
  float*    buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
  float     minus_inv_tau;
  int       i;

  bl1_ssetv( m_A,
             buff_0,
             buff_w, inc_w );

  minus_inv_tau = *buff_m1 / *buff_tau;

  for ( i = 0; i < n_A; ++i )
  {
    float*    a1       = buff_A + (i  )*cs_A + (0  )*rs_A;
    float*    u        = buff_u;
    float*    psi1     = buff_y + (i  )*inc_y;
    float*    nu1      = buff_v + (i  )*inc_v;
    float*    z        = buff_z;
    float*    up       = buff_up;
    float*    alpha1   = buff_a + (i  )*inc_a;
    float*    w        = buff_w;
    float*    alpha    = buff_alpha;
    float     temp1;
    float     temp2;

    /*------------------------------------------------------------*/

    // bl1_smult3( alpha, psi1, &temp1 );
    temp1 = *alpha * *psi1;

    // bl1_smult3( alpha, nu1, &temp2 );
    temp2 = *alpha * *nu1;

    bl1_saxpyv( BLIS1_NO_CONJUGATE,
                m_A,
                &temp1,
                u,  inc_u,
                a1, rs_A );
    //F77_saxpy( &m_A,
    //           &temp1,
    //           u,  &inc_u,
    //           a1, &rs_A );

    bl1_saxpyv( BLIS1_NO_CONJUGATE,
                m_A,
                &temp2,
                z,  inc_z,
                a1, rs_A );
    //F77_saxpy( &m_A,
    //           &temp2,
    //           z,  &inc_z,
    //           a1, &rs_A );

    bl1_sdot( BLIS1_CONJUGATE,
              m_A,
              a1, rs_A,
              up,  inc_up,
              psi1 );
    //*psi1 = F77_sdot( &m_A,
    //                  a1, &rs_A,
    //                  up, &inc_up );

    // bl1_smult4( &minus_inv_tau, psi1, alpha1, alpha1 );
    *alpha1 = *alpha1 + minus_inv_tau * *psi1;

    bl1_saxpyv( BLIS1_NO_CONJUGATE,
                m_A,
                alpha1,
                a1, rs_A,
                w,  inc_w );
    //F77_saxpy( &m_A,
    //           alpha1,
    //           a1, &rs_A,
    //           w,  &inc_w );

    /*------------------------------------------------------------*/

  }

  return FLA_SUCCESS;
}

References FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().

{
/*
   Effective computation:
   A = A + alpha * ( u * y' + z * v' );
   y = A' * up;
   a = a - conj(y) / tau;
   w = A * conj(a);
*/
  FLA_Datatype datatype;
  int          m_A, n_A;
  int          rs_A, cs_A;
  int          inc_u, inc_y, inc_z, inc_v;
  int          inc_up, inc_a, inc_w;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  n_A      = FLA_Obj_width( A );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  inc_u    = FLA_Obj_vector_inc( u );
  inc_y    = FLA_Obj_vector_inc( y );
  inc_z    = FLA_Obj_vector_inc( z );
  inc_v    = FLA_Obj_vector_inc( v );

  inc_up   = FLA_Obj_vector_inc( up );
  inc_a    = FLA_Obj_vector_inc( a );
  inc_w    = FLA_Obj_vector_inc( w );
  

  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float* buff_A   = FLA_FLOAT_PTR( A );
      float* buff_u   = FLA_FLOAT_PTR( u );
      float* buff_y   = FLA_FLOAT_PTR( y );
      float* buff_z   = FLA_FLOAT_PTR( z );
      float* buff_v   = FLA_FLOAT_PTR( v );
      float* buff_up  = FLA_FLOAT_PTR( up );
      float* buff_a   = FLA_FLOAT_PTR( a );
      float* buff_w   = FLA_FLOAT_PTR( w );
      float* buff_tau = FLA_FLOAT_PTR( tau );
      float* buff_alpha = FLA_FLOAT_PTR( alpha );

      FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1( m_A,
                                            n_A,
                                            buff_tau,
                                            buff_alpha,
                                            buff_u, inc_u,
                                            buff_y, inc_y,
                                            buff_z, inc_z,
                                            buff_v, inc_v,
                                            buff_A, rs_A, cs_A,
                                            buff_up, inc_up,
                                            buff_a, inc_a,
                                            buff_w, inc_w );

      break;
    }

    case FLA_DOUBLE:
    {
      double* buff_A   = FLA_DOUBLE_PTR( A );
      double* buff_u   = FLA_DOUBLE_PTR( u );
      double* buff_y   = FLA_DOUBLE_PTR( y );
      double* buff_z   = FLA_DOUBLE_PTR( z );
      double* buff_v   = FLA_DOUBLE_PTR( v );
      double* buff_up  = FLA_DOUBLE_PTR( up );
      double* buff_a   = FLA_DOUBLE_PTR( a );
      double* buff_w   = FLA_DOUBLE_PTR( w );
      double* buff_tau = FLA_DOUBLE_PTR( tau );
      double* buff_alpha = FLA_DOUBLE_PTR( alpha );

      FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1( m_A,
                                            n_A,
                                            buff_tau,
                                            buff_alpha,
                                            buff_u, inc_u,
                                            buff_y, inc_y,
                                            buff_z, inc_z,
                                            buff_v, inc_v,
                                            buff_A, rs_A, cs_A,
                                            buff_up, inc_up,
                                            buff_a, inc_a,
                                            buff_w, inc_w );

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_A   = FLA_COMPLEX_PTR( A );
      scomplex* buff_u   = FLA_COMPLEX_PTR( u );
      scomplex* buff_y   = FLA_COMPLEX_PTR( y );
      scomplex* buff_z   = FLA_COMPLEX_PTR( z );
      scomplex* buff_v   = FLA_COMPLEX_PTR( v );
      scomplex* buff_up  = FLA_COMPLEX_PTR( up );
      scomplex* buff_a   = FLA_COMPLEX_PTR( a );
      scomplex* buff_w   = FLA_COMPLEX_PTR( w );
      scomplex* buff_tau = FLA_COMPLEX_PTR( tau );
      scomplex* buff_alpha = FLA_COMPLEX_PTR( alpha );

      FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1( m_A,
                                            n_A,
                                            buff_tau,
                                            buff_alpha,
                                            buff_u, inc_u,
                                            buff_y, inc_y,
                                            buff_z, inc_z,
                                            buff_v, inc_v,
                                            buff_A, rs_A, cs_A,
                                            buff_up, inc_up,
                                            buff_a, inc_a,
                                            buff_w, inc_w );

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A   = FLA_DOUBLE_COMPLEX_PTR( A );
      dcomplex* buff_u   = FLA_DOUBLE_COMPLEX_PTR( u );
      dcomplex* buff_y   = FLA_DOUBLE_COMPLEX_PTR( y );
      dcomplex* buff_z   = FLA_DOUBLE_COMPLEX_PTR( z );
      dcomplex* buff_v   = FLA_DOUBLE_COMPLEX_PTR( v );
      dcomplex* buff_up  = FLA_DOUBLE_COMPLEX_PTR( up );
      dcomplex* buff_a   = FLA_DOUBLE_COMPLEX_PTR( a );
      dcomplex* buff_w   = FLA_DOUBLE_COMPLEX_PTR( w );
      dcomplex* buff_tau = FLA_DOUBLE_COMPLEX_PTR( tau );
      dcomplex* buff_alpha = FLA_DOUBLE_COMPLEX_PTR( alpha );

      FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1( m_A,
                                            n_A,
                                            buff_tau,
                                            buff_alpha,
                                            buff_u, inc_u,
                                            buff_y, inc_y,
                                            buff_z, inc_z,
                                            buff_v, inc_v,
                                            buff_A, rs_A, cs_A,
                                            buff_up, inc_up,
                                            buff_a, inc_a,
                                            buff_w, inc_w );

      break;
    }
  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1 ( int  m_A,
int  n_A,
dcomplex buff_tau,
dcomplex buff_alpha,
dcomplex buff_u,
int  inc_u,
dcomplex buff_y,
int  inc_y,
dcomplex buff_z,
int  inc_z,
dcomplex buff_v,
int  inc_v,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_up,
int  inc_up,
dcomplex buff_a,
int  inc_a,
dcomplex buff_w,
int  inc_w 
)

References bl1_z0(), bl1_zaxpyv(), bl1_zaxpyv2b(), bl1_zdot(), bl1_zdotsv2(), bl1_zm1(), bl1_zsetv(), BLIS1_CONJUGATE, and BLIS1_NO_CONJUGATE.

Referenced by FLA_Bidiag_UT_u_step_ofz_var3(), and FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1().

{
  dcomplex  zero      = bl1_z0();
  dcomplex  minus_one = bl1_zm1();
  dcomplex* restrict u  = buff_u;
  dcomplex* restrict up = buff_up;
  dcomplex* restrict w = buff_w;
  dcomplex* restrict z  = buff_z;
  dcomplex* restrict alpha = buff_alpha;
  dcomplex* restrict a1;
  dcomplex* restrict a2;
  dcomplex* restrict psi1;
  dcomplex* restrict psi2;
  dcomplex* restrict alpha1;
  dcomplex* restrict alpha2;
  dcomplex* restrict nu1;
  dcomplex* restrict nu2;

  dcomplex  minus_inv_tau;
  dcomplex  conj_psi1;
  dcomplex  conj_psi2;
  dcomplex  conj_nu1;
  dcomplex  conj_nu2;
  dcomplex  conj_alpha1;
  dcomplex  conj_alpha2;
  dcomplex  alpha_conj_psi1;
  dcomplex  alpha_conj_psi2;
  dcomplex  alpha_conj_nu1;
  dcomplex  alpha_conj_nu2;
  int       i;
  int       n_run    = n_A / 2;
  int       n_left   = n_A % 2;
  int       twocs_A  = 2*cs_A;
  int       twoinc_y = 2*inc_y;
  int       twoinc_a = 2*inc_a;
  int       twoinc_v = 2*inc_v;


  bl1_zsetv( m_A,
             &zero,
             buff_w, inc_w );

  bl1_zdiv3( &minus_one, buff_tau, &minus_inv_tau );

  a1     = buff_A;
  a2     = buff_A + cs_A;
  psi1   = buff_y;
  psi2   = buff_y + inc_y;
  alpha1 = buff_a;
  alpha2 = buff_a + inc_a;
  nu1    = buff_v;
  nu2    = buff_v + inc_v;

  for ( i = 0; i < n_run; ++i )
  {

    /*------------------------------------------------------------*/

    bl1_zcopyconj( psi1, &conj_psi1 );
    bl1_zcopyconj( psi2, &conj_psi2 );
    bl1_zmult3( alpha, &conj_psi1, &alpha_conj_psi1 );
    bl1_zmult3( alpha, &conj_psi2, &alpha_conj_psi2 );

    bl1_zcopyconj( nu1, &conj_nu1 );
    bl1_zcopyconj( nu2, &conj_nu2 );
    bl1_zmult3( alpha, &conj_nu1, &alpha_conj_nu1 );
    bl1_zmult3( alpha, &conj_nu2, &alpha_conj_nu2 );

    bl1_zaxpyv2b( m_A,
                  &alpha_conj_psi1,
                  &alpha_conj_nu1,
                  u,  inc_u,
                  z,  inc_z,
                  a1, rs_A );
    bl1_zaxpyv2b( m_A,
                  &alpha_conj_psi2,
                  &alpha_conj_nu2,
                  u,  inc_u,
                  z,  inc_z,
                  a2, rs_A );


    bl1_zdotsv2( BLIS1_CONJUGATE,
                 m_A,
                 a1, rs_A,
                 a2, rs_A,
                 up, inc_up,
                 &zero,
                 psi1,
                 psi2 );

    bl1_zcopyconj( psi1, &conj_psi1 );
    bl1_zcopyconj( psi2, &conj_psi2 );
    bl1_zmult4( &minus_inv_tau, &conj_psi1, alpha1, alpha1 );
    bl1_zmult4( &minus_inv_tau, &conj_psi2, alpha2, alpha2 );
    bl1_zcopyconj( alpha1, &conj_alpha1 );
    bl1_zcopyconj( alpha2, &conj_alpha2 );

    bl1_zaxpyv2b( m_A,
                  &conj_alpha1,
                  &conj_alpha2,
                  a1, rs_A,
                  a2, rs_A,
                  w,  inc_w );

    /*------------------------------------------------------------*/

    a1     += twocs_A;
    a2     += twocs_A;
    psi1   += twoinc_y;
    psi2   += twoinc_y;
    alpha1 += twoinc_a;
    alpha2 += twoinc_a;
    nu1    += twoinc_v;
    nu2    += twoinc_v;
  }

  if ( n_left == 1 )
  {
    dcomplex rho1;

    bl1_zcopyconj( psi1, &conj_psi1 );
    bl1_zmult3( alpha, &conj_psi1, &alpha_conj_psi1 );
    bl1_zcopyconj( nu1, &conj_nu1 );
    bl1_zmult3( alpha, &conj_nu1, &alpha_conj_nu1 );

    bl1_zaxpyv2b( m_A,
                  &alpha_conj_psi1,
                  &alpha_conj_nu1,
                  u,  inc_u,
                  z,  inc_z,
                  a1, rs_A );

    bl1_zdot( BLIS1_CONJUGATE,
              m_A,
              a1, rs_A,
              up, inc_up,
              &rho1 );
    bl1_zscals( &zero, psi1 );
    bl1_zadd3( psi1, &rho1, psi1 );

    bl1_zcopyconj( psi1, &conj_psi1 );
    bl1_zmult4( &minus_inv_tau, &conj_psi1, alpha1, alpha1 );
    bl1_zcopyconj( alpha1, &conj_alpha1 );

    bl1_zaxpyv( BLIS1_NO_CONJUGATE,
                m_A,
                &conj_alpha1,
                a1, rs_A,
                w,  inc_w );
  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Fused_Gerc2_opc_var1 ( int  m_A,
int  n_A,
scomplex buff_alpha,
scomplex buff_u,
int  inc_u,
scomplex buff_y,
int  inc_y,
scomplex buff_z,
int  inc_z,
scomplex buff_v,
int  inc_v,
scomplex buff_A,
int  rs_A,
int  cs_A 
)

References bl1_caxpyv(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Bidiag_UT_u_step_ofc_var2(), FLA_Fused_Gerc2_opt_var1(), FLA_Hess_UT_step_ofc_var2(), and FLA_Hess_UT_step_ofc_var3().

{
  int       i;

  for ( i = 0; i < n_A; ++i )
  {
    scomplex* a1       = buff_A + (i  )*cs_A + (0  )*rs_A;
    scomplex* u        = buff_u;
    scomplex* psi1     = buff_y + (i  )*inc_y;
    scomplex* z        = buff_z;
    scomplex* nu1      = buff_v + (i  )*inc_v;
    scomplex* alpha    = buff_alpha;
    scomplex  psi1_conj;
    scomplex  nu1_conj;
    scomplex  temp1;
    scomplex  temp2;

    /*------------------------------------------------------------*/

    bl1_ccopyconj( psi1, &psi1_conj );
    bl1_cmult3( alpha, &psi1_conj, &temp1 );

    bl1_ccopyconj( nu1, &nu1_conj );
    bl1_cmult3( alpha, &nu1_conj, &temp2 );

    bl1_caxpyv( BLIS1_NO_CONJUGATE,
                m_A,
                &temp1,
                u,  inc_u,
                a1, rs_A );
/*
    F77_caxpy( &m_A,
               &temp1,
               u,  &inc_u,
               a1, &rs_A );
*/

    bl1_caxpyv( BLIS1_NO_CONJUGATE,
                m_A,
                &temp2,
                z,  inc_z,
                a1, rs_A );
/*
    F77_caxpy( &m_A,
               &temp2,
               z,  &inc_z,
               a1, &rs_A );
*/

    /*------------------------------------------------------------*/

  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Fused_Gerc2_opd_var1 ( int  m_A,
int  n_A,
double *  buff_alpha,
double *  buff_u,
int  inc_u,
double *  buff_y,
int  inc_y,
double *  buff_z,
int  inc_z,
double *  buff_v,
int  inc_v,
double *  buff_A,
int  rs_A,
int  cs_A 
)

References bl1_daxpyv2b().

Referenced by FLA_Bidiag_UT_u_step_ofd_var2(), FLA_Fused_Gerc2_opt_var1(), FLA_Hess_UT_step_ofd_var2(), and FLA_Hess_UT_step_ofd_var3().

{
  int       i;

  for ( i = 0; i < n_A; ++i )
  {
/*
   Effective computation:
   A = A + alpha * ( u * y' + z * v' );
*/
    double*   restrict a1       = buff_A + (i  )*cs_A + (0  )*rs_A;
    double*   restrict u        = buff_u;
    double*   restrict psi1     = buff_y + (i  )*inc_y;
    double*   restrict z        = buff_z;
    double*   restrict nu1      = buff_v + (i  )*inc_v;
    double*   restrict alpha    = buff_alpha;
    double    alpha_conj_psi1;
    double    alpha_conj_nu1;

    /*------------------------------------------------------------*/

    bl1_dmult3( alpha, psi1, &alpha_conj_psi1 );

    bl1_dmult3( alpha, nu1, &alpha_conj_nu1 );

    bl1_daxpyv2b( m_A,
                  &alpha_conj_psi1,
                  &alpha_conj_nu1,
                  u,  inc_u,
                  z,  inc_z,
                  a1, rs_A );

    /*------------------------------------------------------------*/

  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Fused_Gerc2_ops_var1 ( int  m_A,
int  n_A,
float *  buff_alpha,
float *  buff_u,
int  inc_u,
float *  buff_y,
int  inc_y,
float *  buff_z,
int  inc_z,
float *  buff_v,
int  inc_v,
float *  buff_A,
int  rs_A,
int  cs_A 
)

References bl1_saxpyv(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Bidiag_UT_u_step_ofs_var2(), FLA_Fused_Gerc2_opt_var1(), FLA_Hess_UT_step_ofs_var2(), and FLA_Hess_UT_step_ofs_var3().

{
  int       i;

  for ( i = 0; i < n_A; ++i )
  {
    float*    a1       = buff_A + (i  )*cs_A + (0  )*rs_A;
    float*    u        = buff_u;
    float*    psi1     = buff_y + (i  )*inc_y;
    float*    z        = buff_z;
    float*    nu1      = buff_v + (i  )*inc_v;
    float*    alpha    = buff_alpha;
    float     temp1;
    float     temp2;

    /*------------------------------------------------------------*/

    // bl1_smult3( alpha, psi1, &temp1 );
    temp1 = *alpha * *psi1;

    // bl1_smult3( alpha, nu1, &temp2 );
    temp2 = *alpha * *nu1;

    bl1_saxpyv( BLIS1_NO_CONJUGATE,
                m_A,
                &temp1,
                u,  inc_u,
                a1, rs_A );
/*
    F77_saxpy( &m_A,
               &temp1,
               u,  &inc_u,
               a1, &rs_A );
*/

    bl1_saxpyv( BLIS1_NO_CONJUGATE,
                m_A,
                &temp2,
                z,  inc_z,
                a1, rs_A );
/*
    F77_saxpy( &m_A,
               &temp2,
               z,  &inc_z,
               a1, &rs_A );
*/

    /*------------------------------------------------------------*/

  }

  return FLA_SUCCESS;
}

References FLA_Fused_Gerc2_opc_var1(), FLA_Fused_Gerc2_opd_var1(), FLA_Fused_Gerc2_ops_var1(), FLA_Fused_Gerc2_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().

{
/*
   Effective computation:
   A = A + alpha * ( u * y' + z * v' );
*/
  FLA_Datatype datatype;
  int          m_A, n_A;
  int          rs_A, cs_A;
  int          inc_u, inc_y, inc_z, inc_v;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  n_A      = FLA_Obj_width( A );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  inc_u    = FLA_Obj_vector_inc( u );
  inc_y    = FLA_Obj_vector_inc( y );
  inc_z    = FLA_Obj_vector_inc( z );
  inc_v    = FLA_Obj_vector_inc( v );
  

  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float* buff_A = FLA_FLOAT_PTR( A );
      float* buff_u = FLA_FLOAT_PTR( u );
      float* buff_y = FLA_FLOAT_PTR( y );
      float* buff_z = FLA_FLOAT_PTR( z );
      float* buff_v = FLA_FLOAT_PTR( v );
      float* buff_alpha = FLA_FLOAT_PTR( alpha );

      FLA_Fused_Gerc2_ops_var1( m_A,
                                n_A,
                                buff_alpha,
                                buff_u, inc_u,
                                buff_y, inc_y,
                                buff_z, inc_z,
                                buff_v, inc_v,
                                buff_A, rs_A, cs_A );

      break;
    }

    case FLA_DOUBLE:
    {
      double* buff_A = FLA_DOUBLE_PTR( A );
      double* buff_u = FLA_DOUBLE_PTR( u );
      double* buff_y = FLA_DOUBLE_PTR( y );
      double* buff_z = FLA_DOUBLE_PTR( z );
      double* buff_v = FLA_DOUBLE_PTR( v );
      double* buff_alpha = FLA_DOUBLE_PTR( alpha );

      FLA_Fused_Gerc2_opd_var1( m_A,
                                n_A,
                                buff_alpha,
                                buff_u, inc_u,
                                buff_y, inc_y,
                                buff_z, inc_z,
                                buff_v, inc_v,
                                buff_A, rs_A, cs_A );

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_A = FLA_COMPLEX_PTR( A );
      scomplex* buff_u = FLA_COMPLEX_PTR( u );
      scomplex* buff_y = FLA_COMPLEX_PTR( y );
      scomplex* buff_z = FLA_COMPLEX_PTR( z );
      scomplex* buff_v = FLA_COMPLEX_PTR( v );
      scomplex* buff_alpha = FLA_COMPLEX_PTR( alpha );

      FLA_Fused_Gerc2_opc_var1( m_A,
                                n_A,
                                buff_alpha,
                                buff_u, inc_u,
                                buff_y, inc_y,
                                buff_z, inc_z,
                                buff_v, inc_v,
                                buff_A, rs_A, cs_A );

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
      dcomplex* buff_u = FLA_DOUBLE_COMPLEX_PTR( u );
      dcomplex* buff_y = FLA_DOUBLE_COMPLEX_PTR( y );
      dcomplex* buff_z = FLA_DOUBLE_COMPLEX_PTR( z );
      dcomplex* buff_v = FLA_DOUBLE_COMPLEX_PTR( v );
      dcomplex* buff_alpha = FLA_DOUBLE_COMPLEX_PTR( alpha );

      FLA_Fused_Gerc2_opz_var1( m_A,
                                n_A,
                                buff_alpha,
                                buff_u, inc_u,
                                buff_y, inc_y,
                                buff_z, inc_z,
                                buff_v, inc_v,
                                buff_A, rs_A, cs_A );

      break;
    }
  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Fused_Gerc2_opz_var1 ( int  m_A,
int  n_A,
dcomplex buff_alpha,
dcomplex buff_u,
int  inc_u,
dcomplex buff_y,
int  inc_y,
dcomplex buff_z,
int  inc_z,
dcomplex buff_v,
int  inc_v,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

References bl1_zaxpyv2b().

Referenced by FLA_Bidiag_UT_u_step_ofz_var2(), FLA_Fused_Gerc2_opt_var1(), FLA_Hess_UT_step_ofz_var2(), and FLA_Hess_UT_step_ofz_var3().

{
  int i;

  for ( i = 0; i < n_A; ++i )
  {
    dcomplex* restrict a1       = buff_A + (i  )*cs_A + (0  )*rs_A;
    dcomplex* restrict u        = buff_u;
    dcomplex* restrict psi1     = buff_y + (i  )*inc_y;
    dcomplex* restrict z        = buff_z;
    dcomplex* restrict nu1      = buff_v + (i  )*inc_v;
    dcomplex* restrict alpha    = buff_alpha;
    dcomplex  conj_psi1;
    dcomplex  conj_nu1;
    dcomplex  alpha_conj_psi1;
    dcomplex  alpha_conj_nu1;

    /*------------------------------------------------------------*/

    bl1_zcopyconj( psi1, &conj_psi1 );
    bl1_zmult3( alpha, &conj_psi1, &alpha_conj_psi1 );

    bl1_zcopyconj( nu1, &conj_nu1 );
    bl1_zmult3( alpha, &conj_nu1, &alpha_conj_nu1 );

    bl1_zaxpyv2b( m_A,
                  &alpha_conj_psi1,
                  &alpha_conj_nu1,
                  u,  inc_u,
                  z,  inc_z,
                  a1, rs_A );

    /*------------------------------------------------------------*/

  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Fused_UYx_ZVx_opc_var1 ( int  m_U,
int  n_U,
int  m_V,
int  n_V,
scomplex buff_delta,
scomplex buff_U,
int  rs_U,
int  cs_U,
scomplex buff_Y,
int  rs_Y,
int  cs_Y,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_V,
int  rs_V,
int  cs_V,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_temp,
int  inc_temp,
scomplex buff_t,
int  inc_t,
scomplex buff_a,
int  inc_a,
scomplex buff_w,
int  inc_w,
scomplex buff_al,
int  inc_al 
)

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Bidiag_UT_u_step_ofc_var4(), and FLA_Fused_UYx_ZVx_opt_var1().

{
  int       i;
  int       m_A = m_U;
  int       m_Z = m_U;

  bl1_ccopyv( BLIS1_NO_CONJUGATE,
              m_A,
              buff_A,  rs_A,
              buff_al, inc_al );

  for ( i = 0; i < n_U; ++i )
  {
    scomplex* u1       = buff_U + (i  )*cs_U + (0  )*rs_U;
    scomplex* y1       = buff_Y + (i  )*cs_Y + (0  )*rs_Y;
    scomplex* z1       = buff_Z + (i  )*cs_Z + (0  )*rs_Z;
    scomplex* v1       = buff_V + (0  )*cs_V + (i  )*rs_V;
    scomplex* tau1     = buff_t + (i  )*inc_t;
    scomplex* delta    = buff_delta;
    scomplex* a        = buff_a;
    scomplex* w        = buff_w;
    scomplex* al       = buff_al;
    scomplex* psi20_l  = buff_Y + (i  )*cs_Y + (0  )*rs_Y;
    scomplex* nu20_l   = buff_V + (0  )*cs_V + (i  )*rs_V;
    scomplex  alpha;
    scomplex  beta;
    scomplex  gamma;
    scomplex  kappa;

    /*------------------------------------------------------------*/

    bl1_cdot( BLIS1_NO_CONJUGATE,
              n_V,
              y1, rs_Y,
              a,  inc_a,
              &alpha );

    bl1_cdot( BLIS1_NO_CONJUGATE,
              n_V,
              v1, cs_V,
              a,  inc_a,
              &beta );

    bl1_cconjs( &alpha );
    bl1_cconjs( &beta );
    bl1_ccopyconj( psi20_l, &gamma );
    bl1_ccopyconj( nu20_l,  &kappa );

    *tau1 = beta;

    bl1_cscals( delta, &alpha );
    bl1_cscals( delta, &beta );
    bl1_cscals( delta, &gamma );
    bl1_cscals( delta, &kappa );

    bl1_caxpyv( BLIS1_NO_CONJUGATE,
                m_U,
                &alpha,
                u1, rs_U,
                w,  inc_w );
    //F77_caxpy( &m_U,
    //           &alpha,
    //           u1, &rs_U,
    //           w,  &inc_w );

    bl1_caxpyv( BLIS1_NO_CONJUGATE,
                m_Z,
                &beta,
                z1, rs_Z,
                w,  inc_w );
    //F77_caxpy( &m_Z,
    //           &beta,
    //           z1, &rs_Z,
    //           w,  &inc_w );

    bl1_caxpyv( BLIS1_NO_CONJUGATE,
                m_U,
                &gamma,
                u1, rs_U,
                al, inc_al );
    //F77_caxpy( &m_U,
    //           &gamma,
    //           u1, &rs_U,
    //           al, &inc_al );

    bl1_caxpyv( BLIS1_NO_CONJUGATE,
                m_Z,
                &kappa,
                z1, rs_Z,
                al,  inc_al);
    //F77_caxpy( &m_Z,
    //           &kappa,
    //           z1, &rs_Z,
    //           al, &inc_al );

    /*------------------------------------------------------------*/

  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Fused_UYx_ZVx_opd_var1 ( int  m_U,
int  n_U,
int  m_V,
int  n_V,
double *  buff_delta,
double *  buff_U,
int  rs_U,
int  cs_U,
double *  buff_Y,
int  rs_Y,
int  cs_Y,
double *  buff_Z,
int  rs_Z,
int  cs_Z,
double *  buff_V,
int  rs_V,
int  cs_V,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_temp,
int  inc_temp,
double *  buff_t,
int  inc_t,
double *  buff_a,
int  inc_a,
double *  buff_w,
int  inc_w,
double *  buff_al,
int  inc_al 
)

References bl1_d0(), bl1_daxmyv2(), bl1_dcopyv(), bl1_ddotsv2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Bidiag_UT_u_step_ofd_var4(), and FLA_Fused_UYx_ZVx_opt_var1().

{
  double    zero = bl1_d0();
  int       i;
  int       m_A = m_U;
  int       m_Z = m_U;

  bl1_dcopyv( BLIS1_NO_CONJUGATE,
              m_A,
              buff_A,  rs_A,
              buff_al, inc_al );

  if ( m_U == 0 || n_U == 0 ) return 0;
  if ( m_V == 0 || n_V == 0 ) return 0;

  for ( i = 0; i < n_U; ++i )
  {
    double*   restrict u1       = buff_U + (i  )*cs_U + (0  )*rs_U;
    double*   restrict y1       = buff_Y + (i  )*cs_Y + (0  )*rs_Y;
    double*   restrict z1       = buff_Z + (i  )*cs_Z + (0  )*rs_Z;
    double*   restrict v1       = buff_V + (0  )*cs_V + (i  )*rs_V;
    double*   restrict tau1     = buff_t + (i  )*inc_t;
    double*   restrict t1       = buff_temp;
    double*   restrict a        = buff_a;
    double*   restrict w        = buff_w;
    double*   restrict al       = buff_al;
    double*   restrict psi20_l  = buff_Y + (i  )*cs_Y + (0  )*rs_Y;
    double*   restrict nu20_l   = buff_V + (0  )*cs_V + (i  )*rs_V;
    double    alpha;
    double    beta;
    double    gamma;
    double    kappa;

    /*------------------------------------------------------------*/

    bl1_dcopyv( BLIS1_NO_CONJUGATE,
                n_V,
                v1, cs_V,
                t1, inc_t );

    bl1_ddotsv2( BLIS1_NO_CONJUGATE,
                 n_V,
                 y1, rs_Y,
                 t1, inc_t,
                 a,  inc_a,
                 &zero,
                 &alpha,
                 &beta );

    *tau1 = beta;

    bl1_dcopyconj( psi20_l, &gamma );
    bl1_dcopyconj( nu20_l,  &kappa );

    bl1_daxmyv2( BLIS1_NO_CONJUGATE,
                 m_U,
                 &alpha,
                 &gamma,
                 u1, rs_U,
                 w,  inc_w,
                 al, inc_al );

    bl1_daxmyv2( BLIS1_NO_CONJUGATE,
                 m_Z,
                 &beta,
                 &kappa,
                 z1, rs_U,
                 w,  inc_w,
                 al, inc_al );

    /*------------------------------------------------------------*/

  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Fused_UYx_ZVx_ops_var1 ( int  m_U,
int  n_U,
int  m_V,
int  n_V,
float *  buff_delta,
float *  buff_U,
int  rs_U,
int  cs_U,
float *  buff_Y,
int  rs_Y,
int  cs_Y,
float *  buff_Z,
int  rs_Z,
int  cs_Z,
float *  buff_V,
int  rs_V,
int  cs_V,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_temp,
int  inc_temp,
float *  buff_t,
int  inc_t,
float *  buff_a,
int  inc_a,
float *  buff_w,
int  inc_w,
float *  buff_al,
int  inc_al 
)

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Bidiag_UT_u_step_ofs_var4(), and FLA_Fused_UYx_ZVx_opt_var1().

{
  int       i;
  int       m_A = m_U;
  int       m_Z = m_U;

  bl1_scopyv( BLIS1_NO_CONJUGATE,
              m_A,
              buff_A,  rs_A,
              buff_al, inc_al );

  for ( i = 0; i < n_U; ++i )
  {
    float*    u1       = buff_U + (i  )*cs_U + (0  )*rs_U;
    float*    y1       = buff_Y + (i  )*cs_Y + (0  )*rs_Y;
    float*    z1       = buff_Z + (i  )*cs_Z + (0  )*rs_Z;
    float*    v1       = buff_V + (0  )*cs_V + (i  )*rs_V;
    float*    tau1     = buff_t + (i  )*inc_t;
    float*    delta    = buff_delta;
    float*    a        = buff_a;
    float*    w        = buff_w;
    float*    al       = buff_al;
    float*    psi20_l  = buff_Y + (i  )*cs_Y + (0  )*rs_Y;
    float*    nu20_l   = buff_V + (0  )*cs_V + (i  )*rs_V;
    float     alpha;
    float     beta;
    float     gamma;
    float     kappa;

    /*------------------------------------------------------------*/

    bl1_sdot( BLIS1_NO_CONJUGATE,
              n_V,
              y1, rs_Y,
              a,  inc_a,
              &alpha );
    //alpha = F77_sdot( &n_V,
    //                  y1, &rs_Y,
    //                  a,  &inc_a );

    bl1_sdot( BLIS1_NO_CONJUGATE,
              n_V,
              v1, cs_V,
              a,  inc_a,
              &beta );
    //beta = F77_sdot( &n_V,
    //                 v1, &cs_V,
    //                 a,  &inc_a );

    *tau1 = beta;

    // bl1_sconjs( &alpha );
    // bl1_sconjs( &beta );
    // bl1_scopyconj( psi20_l, &gamma );
    // bl1_scopyconj( nu20_l,  &kappa );
    gamma = *psi20_l;
    kappa = *nu20_l;

    // bl1_dscals( delta, &alpha );
    // bl1_dscals( delta, &beta );
    // bl1_dscals( delta, &gamma );
    // bl1_dscals( delta, &kappa );
    alpha *= *delta;
    beta  *= *delta;
    gamma *= *delta;
    kappa *= *delta;

    bl1_saxpyv( BLIS1_NO_CONJUGATE,
                m_U,
                &alpha,
                u1, rs_U,
                w,  inc_w );
    //F77_saxpy( &m_U,
    //           &alpha,
    //           u1, &rs_U,
    //           w,  &inc_w );

    bl1_saxpyv( BLIS1_NO_CONJUGATE,
                m_Z,
                &beta,
                z1, rs_Z,
                w,  inc_w );
    //F77_saxpy( &m_Z,
    //           &beta,
    //           z1, &rs_Z,
    //           w,  &inc_w );

    bl1_saxpyv( BLIS1_NO_CONJUGATE,
                m_U,
                &gamma,
                u1, rs_U,
                al, inc_al );
    //F77_saxpy( &m_U,
    //           &gamma,
    //           u1, &rs_U,
    //           al, &inc_al );

    bl1_saxpyv( BLIS1_NO_CONJUGATE,
                m_Z,
                &kappa,
                z1, rs_Z,
                al,  inc_al );
    //F77_saxpy( &m_Z,
    //           &kappa,
    //           z1, &rs_Z,
    //           al, &inc_al );

    /*------------------------------------------------------------*/

  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Fused_UYx_ZVx_opt_var1 ( FLA_Obj  delta,
FLA_Obj  a,
FLA_Obj  U,
FLA_Obj  Y,
FLA_Obj  Z,
FLA_Obj  V,
FLA_Obj  A,
FLA_Obj  temp,
FLA_Obj  t,
FLA_Obj  w,
FLA_Obj  al 
)

References FLA_Fused_UYx_ZVx_opc_var1(), FLA_Fused_UYx_ZVx_opd_var1(), FLA_Fused_UYx_ZVx_ops_var1(), FLA_Fused_UYx_ZVx_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().

{
/*
   Effective computation:
   w  = w      + delta * ( U ( Y' conj(a)  ) + Z ( V' conj(a)  ) );
   al = A * e0 + delta * ( U ( Y' e0       ) + Z ( V' e0       ) );
   t  = V' conj(a);
*/
  FLA_Datatype datatype;
  int          m_U, n_U;
  int          m_V, n_V;
  int          rs_A, cs_A;
  int          rs_U, cs_U;
  int          rs_Y, cs_Y;
  int          rs_Z, cs_Z;
  int          rs_V, cs_V;
  int          inc_a, inc_temp, inc_t, inc_w, inc_al;

  datatype = FLA_Obj_datatype( A );

  m_U      = FLA_Obj_length( U );
  n_U      = FLA_Obj_width( U );

  m_V      = FLA_Obj_length( V );
  n_V      = FLA_Obj_width( V );

  rs_U     = FLA_Obj_row_stride( U );
  cs_U     = FLA_Obj_col_stride( U );

  rs_Y     = FLA_Obj_row_stride( Y );
  cs_Y     = FLA_Obj_col_stride( Y );

  rs_Z     = FLA_Obj_row_stride( Z );
  cs_Z     = FLA_Obj_col_stride( Z );

  rs_V     = FLA_Obj_row_stride( V );
  cs_V     = FLA_Obj_col_stride( V );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  inc_temp = FLA_Obj_vector_inc( temp );
  inc_t    = FLA_Obj_vector_inc( t );
  inc_a    = FLA_Obj_vector_inc( a );
  inc_w    = FLA_Obj_vector_inc( w );
  inc_al   = FLA_Obj_vector_inc( al );
  

  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float*    buff_A     = FLA_FLOAT_PTR( A );
      float*    buff_U     = FLA_FLOAT_PTR( U );
      float*    buff_Y     = FLA_FLOAT_PTR( Y );
      float*    buff_Z     = FLA_FLOAT_PTR( Z );
      float*    buff_V     = FLA_FLOAT_PTR( V );
      float*    buff_temp  = FLA_FLOAT_PTR( temp );
      float*    buff_t     = FLA_FLOAT_PTR( t );
      float*    buff_a     = FLA_FLOAT_PTR( a );
      float*    buff_w     = FLA_FLOAT_PTR( w );
      float*    buff_al    = FLA_FLOAT_PTR( al );
      float*    buff_delta = FLA_FLOAT_PTR( delta );

      FLA_Fused_UYx_ZVx_ops_var1( m_U,
                                  n_U,
                                  m_V,
                                  n_V,
                                  buff_delta,
                                  buff_U, rs_U, cs_U,
                                  buff_Y, rs_Y, cs_Y,
                                  buff_Z, rs_Z, cs_Z,
                                  buff_V, rs_V, cs_V,
                                  buff_A, rs_A, cs_A,
                                  buff_temp, inc_temp,
                                  buff_t, inc_t,
                                  buff_a, inc_a,
                                  buff_w, inc_w,
                                  buff_al, inc_al );

      break;
    }

    case FLA_DOUBLE:
    {
      double*   buff_A     = FLA_DOUBLE_PTR( A );
      double*   buff_U     = FLA_DOUBLE_PTR( U );
      double*   buff_Y     = FLA_DOUBLE_PTR( Y );
      double*   buff_Z     = FLA_DOUBLE_PTR( Z );
      double*   buff_V     = FLA_DOUBLE_PTR( V );
      double*   buff_temp  = FLA_DOUBLE_PTR( temp );
      double*   buff_t     = FLA_DOUBLE_PTR( t );
      double*   buff_a     = FLA_DOUBLE_PTR( a );
      double*   buff_w     = FLA_DOUBLE_PTR( w );
      double*   buff_al    = FLA_DOUBLE_PTR( al );
      double*   buff_delta = FLA_DOUBLE_PTR( delta );

      FLA_Fused_UYx_ZVx_opd_var1( m_U,
                                  n_U,
                                  m_V,
                                  n_V,
                                  buff_delta,
                                  buff_U, rs_U, cs_U,
                                  buff_Y, rs_Y, cs_Y,
                                  buff_Z, rs_Z, cs_Z,
                                  buff_V, rs_V, cs_V,
                                  buff_A, rs_A, cs_A,
                                  buff_temp, inc_temp,
                                  buff_t, inc_t,
                                  buff_a, inc_a,
                                  buff_w, inc_w,
                                  buff_al, inc_al );

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_A     = FLA_COMPLEX_PTR( A );
      scomplex* buff_U     = FLA_COMPLEX_PTR( U );
      scomplex* buff_Y     = FLA_COMPLEX_PTR( Y );
      scomplex* buff_Z     = FLA_COMPLEX_PTR( Z );
      scomplex* buff_V     = FLA_COMPLEX_PTR( V );
      scomplex* buff_temp  = FLA_COMPLEX_PTR( temp );
      scomplex* buff_t     = FLA_COMPLEX_PTR( t );
      scomplex* buff_a     = FLA_COMPLEX_PTR( a );
      scomplex* buff_w     = FLA_COMPLEX_PTR( w );
      scomplex* buff_al    = FLA_COMPLEX_PTR( al );
      scomplex* buff_delta = FLA_COMPLEX_PTR( delta );

      FLA_Fused_UYx_ZVx_opc_var1( m_U,
                                  n_U,
                                  m_V,
                                  n_V,
                                  buff_delta,
                                  buff_U, rs_U, cs_U,
                                  buff_Y, rs_Y, cs_Y,
                                  buff_Z, rs_Z, cs_Z,
                                  buff_V, rs_V, cs_V,
                                  buff_A, rs_A, cs_A,
                                  buff_temp, inc_temp,
                                  buff_t, inc_t,
                                  buff_a, inc_a,
                                  buff_w, inc_w,
                                  buff_al, inc_al );

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A     = FLA_DOUBLE_COMPLEX_PTR( A );
      dcomplex* buff_U     = FLA_DOUBLE_COMPLEX_PTR( U );
      dcomplex* buff_Y     = FLA_DOUBLE_COMPLEX_PTR( Y );
      dcomplex* buff_Z     = FLA_DOUBLE_COMPLEX_PTR( Z );
      dcomplex* buff_V     = FLA_DOUBLE_COMPLEX_PTR( V );
      dcomplex* buff_temp  = FLA_DOUBLE_COMPLEX_PTR( temp );
      dcomplex* buff_t     = FLA_DOUBLE_COMPLEX_PTR( t );
      dcomplex* buff_a     = FLA_DOUBLE_COMPLEX_PTR( a );
      dcomplex* buff_w     = FLA_DOUBLE_COMPLEX_PTR( w );
      dcomplex* buff_al    = FLA_DOUBLE_COMPLEX_PTR( al );
      dcomplex* buff_delta = FLA_DOUBLE_COMPLEX_PTR( delta );

      FLA_Fused_UYx_ZVx_opz_var1( m_U,
                                  n_U,
                                  m_V,
                                  n_V,
                                  buff_delta,
                                  buff_U, rs_U, cs_U,
                                  buff_Y, rs_Y, cs_Y,
                                  buff_Z, rs_Z, cs_Z,
                                  buff_V, rs_V, cs_V,
                                  buff_A, rs_A, cs_A,
                                  buff_temp, inc_temp,
                                  buff_t, inc_t,
                                  buff_a, inc_a,
                                  buff_w, inc_w,
                                  buff_al, inc_al );

      break;
    }
  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Fused_UYx_ZVx_opz_var1 ( int  m_U,
int  n_U,
int  m_V,
int  n_V,
dcomplex buff_delta,
dcomplex buff_U,
int  rs_U,
int  cs_U,
dcomplex buff_Y,
int  rs_Y,
int  cs_Y,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_V,
int  rs_V,
int  cs_V,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_temp,
int  inc_temp,
dcomplex buff_t,
int  inc_t,
dcomplex buff_a,
int  inc_a,
dcomplex buff_w,
int  inc_w,
dcomplex buff_al,
int  inc_al 
)

References bl1_z0(), bl1_zaxmyv2(), bl1_zcopyv(), bl1_zdotsv2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Bidiag_UT_u_step_ofz_var4(), and FLA_Fused_UYx_ZVx_opt_var1().

{
  dcomplex  zero = bl1_z0();
  int       i;
  int       m_A = m_U;
  int       m_Z = m_U;

  bl1_zcopyv( BLIS1_NO_CONJUGATE,
              m_A,
              buff_A,  rs_A,
              buff_al, inc_al );

  if ( m_U == 0 || n_U == 0 ) return 0;
  if ( m_V == 0 || n_V == 0 ) return 0;

  for ( i = 0; i < n_U; ++i )
  {
    dcomplex* restrict u1       = buff_U + (i  )*cs_U + (0  )*rs_U;
    dcomplex* restrict y1       = buff_Y + (i  )*cs_Y + (0  )*rs_Y;
    dcomplex* restrict z1       = buff_Z + (i  )*cs_Z + (0  )*rs_Z;
    dcomplex* restrict v1       = buff_V + (0  )*cs_V + (i  )*rs_V;
    dcomplex* restrict tau1     = buff_t + (i  )*inc_t;
    dcomplex* restrict a        = buff_a;
    dcomplex* restrict w        = buff_w;
    dcomplex* restrict al       = buff_al;
    dcomplex* restrict psi20_l  = buff_Y + (i  )*cs_Y + (0  )*rs_Y;
    dcomplex* restrict nu20_l   = buff_V + (0  )*cs_V + (i  )*rs_V;
    dcomplex  alpha;
    dcomplex  beta;
    dcomplex  gamma;
    dcomplex  kappa;

    /*------------------------------------------------------------*/

    bl1_zdotsv2( BLIS1_NO_CONJUGATE,
                 n_V,
                 y1, rs_Y,
                 v1, cs_V,
                 a,  inc_a,
                 &zero,
                 &alpha,
                 &beta );

    bl1_zconjs( &alpha );
    bl1_zconjs( &beta );

    *tau1 = beta;

    bl1_zcopyconj( psi20_l, &gamma );
    bl1_zcopyconj( nu20_l,  &kappa );

    bl1_zaxmyv2( BLIS1_NO_CONJUGATE,
                 m_U,
                 &alpha,
                 &gamma,
                 u1, rs_U,
                 w,  inc_w,
                 al, inc_al );

    bl1_zaxmyv2( BLIS1_NO_CONJUGATE,
                 m_Z,
                 &beta,
                 &kappa,
                 z1, rs_U,
                 w,  inc_w,
                 al, inc_al );

    /*------------------------------------------------------------*/

  }

  return FLA_SUCCESS;
}