libflame  revision_anchor
Functions
FLA_Fused_UYx_ZVx_opt_var1.c File Reference

(r)

Functions

FLA_Error FLA_Fused_UYx_ZVx_opt_var1 (FLA_Obj delta, FLA_Obj a, FLA_Obj U, FLA_Obj Y, FLA_Obj Z, FLA_Obj V, FLA_Obj A, FLA_Obj temp, FLA_Obj t, FLA_Obj w, FLA_Obj al)
FLA_Error FLA_Fused_UYx_ZVx_ops_var1 (int m_U, int n_U, int m_V, int n_V, float *buff_delta, float *buff_U, int rs_U, int cs_U, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_V, int rs_V, int cs_V, float *buff_A, int rs_A, int cs_A, float *buff_temp, int inc_temp, float *buff_t, int inc_t, float *buff_a, int inc_a, float *buff_w, int inc_w, float *buff_al, int inc_al)
FLA_Error FLA_Fused_UYx_ZVx_opd_var1 (int m_U, int n_U, int m_V, int n_V, double *buff_delta, double *buff_U, int rs_U, int cs_U, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_V, int rs_V, int cs_V, double *buff_A, int rs_A, int cs_A, double *buff_temp, int inc_temp, double *buff_t, int inc_t, double *buff_a, int inc_a, double *buff_w, int inc_w, double *buff_al, int inc_al)
FLA_Error FLA_Fused_UYx_ZVx_opc_var1 (int m_U, int n_U, int m_V, int n_V, scomplex *buff_delta, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_V, int rs_V, int cs_V, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_temp, int inc_temp, scomplex *buff_t, int inc_t, scomplex *buff_a, int inc_a, scomplex *buff_w, int inc_w, scomplex *buff_al, int inc_al)
FLA_Error FLA_Fused_UYx_ZVx_opz_var1 (int m_U, int n_U, int m_V, int n_V, dcomplex *buff_delta, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_V, int rs_V, int cs_V, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_temp, int inc_temp, dcomplex *buff_t, int inc_t, dcomplex *buff_a, int inc_a, dcomplex *buff_w, int inc_w, dcomplex *buff_al, int inc_al)

Function Documentation

FLA_Error FLA_Fused_UYx_ZVx_opc_var1 ( int  m_U,
int  n_U,
int  m_V,
int  n_V,
scomplex buff_delta,
scomplex buff_U,
int  rs_U,
int  cs_U,
scomplex buff_Y,
int  rs_Y,
int  cs_Y,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_V,
int  rs_V,
int  cs_V,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_temp,
int  inc_temp,
scomplex buff_t,
int  inc_t,
scomplex buff_a,
int  inc_a,
scomplex buff_w,
int  inc_w,
scomplex buff_al,
int  inc_al 
)

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Bidiag_UT_u_step_ofc_var4(), and FLA_Fused_UYx_ZVx_opt_var1().

{
  int       i;
  int       m_A = m_U;
  int       m_Z = m_U;

  bl1_ccopyv( BLIS1_NO_CONJUGATE,
              m_A,
              buff_A,  rs_A,
              buff_al, inc_al );

  for ( i = 0; i < n_U; ++i )
  {
    scomplex* u1       = buff_U + (i  )*cs_U + (0  )*rs_U;
    scomplex* y1       = buff_Y + (i  )*cs_Y + (0  )*rs_Y;
    scomplex* z1       = buff_Z + (i  )*cs_Z + (0  )*rs_Z;
    scomplex* v1       = buff_V + (0  )*cs_V + (i  )*rs_V;
    scomplex* tau1     = buff_t + (i  )*inc_t;
    scomplex* delta    = buff_delta;
    scomplex* a        = buff_a;
    scomplex* w        = buff_w;
    scomplex* al       = buff_al;
    scomplex* psi20_l  = buff_Y + (i  )*cs_Y + (0  )*rs_Y;
    scomplex* nu20_l   = buff_V + (0  )*cs_V + (i  )*rs_V;
    scomplex  alpha;
    scomplex  beta;
    scomplex  gamma;
    scomplex  kappa;

    /*------------------------------------------------------------*/

    bl1_cdot( BLIS1_NO_CONJUGATE,
              n_V,
              y1, rs_Y,
              a,  inc_a,
              &alpha );

    bl1_cdot( BLIS1_NO_CONJUGATE,
              n_V,
              v1, cs_V,
              a,  inc_a,
              &beta );

    bl1_cconjs( &alpha );
    bl1_cconjs( &beta );
    bl1_ccopyconj( psi20_l, &gamma );
    bl1_ccopyconj( nu20_l,  &kappa );

    *tau1 = beta;

    bl1_cscals( delta, &alpha );
    bl1_cscals( delta, &beta );
    bl1_cscals( delta, &gamma );
    bl1_cscals( delta, &kappa );

    bl1_caxpyv( BLIS1_NO_CONJUGATE,
                m_U,
                &alpha,
                u1, rs_U,
                w,  inc_w );
    //F77_caxpy( &m_U,
    //           &alpha,
    //           u1, &rs_U,
    //           w,  &inc_w );

    bl1_caxpyv( BLIS1_NO_CONJUGATE,
                m_Z,
                &beta,
                z1, rs_Z,
                w,  inc_w );
    //F77_caxpy( &m_Z,
    //           &beta,
    //           z1, &rs_Z,
    //           w,  &inc_w );

    bl1_caxpyv( BLIS1_NO_CONJUGATE,
                m_U,
                &gamma,
                u1, rs_U,
                al, inc_al );
    //F77_caxpy( &m_U,
    //           &gamma,
    //           u1, &rs_U,
    //           al, &inc_al );

    bl1_caxpyv( BLIS1_NO_CONJUGATE,
                m_Z,
                &kappa,
                z1, rs_Z,
                al,  inc_al);
    //F77_caxpy( &m_Z,
    //           &kappa,
    //           z1, &rs_Z,
    //           al, &inc_al );

    /*------------------------------------------------------------*/

  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Fused_UYx_ZVx_opd_var1 ( int  m_U,
int  n_U,
int  m_V,
int  n_V,
double *  buff_delta,
double *  buff_U,
int  rs_U,
int  cs_U,
double *  buff_Y,
int  rs_Y,
int  cs_Y,
double *  buff_Z,
int  rs_Z,
int  cs_Z,
double *  buff_V,
int  rs_V,
int  cs_V,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_temp,
int  inc_temp,
double *  buff_t,
int  inc_t,
double *  buff_a,
int  inc_a,
double *  buff_w,
int  inc_w,
double *  buff_al,
int  inc_al 
)

References bl1_d0(), bl1_daxmyv2(), bl1_dcopyv(), bl1_ddotsv2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Bidiag_UT_u_step_ofd_var4(), and FLA_Fused_UYx_ZVx_opt_var1().

{
  double    zero = bl1_d0();
  int       i;
  int       m_A = m_U;
  int       m_Z = m_U;

  bl1_dcopyv( BLIS1_NO_CONJUGATE,
              m_A,
              buff_A,  rs_A,
              buff_al, inc_al );

  if ( m_U == 0 || n_U == 0 ) return 0;
  if ( m_V == 0 || n_V == 0 ) return 0;

  for ( i = 0; i < n_U; ++i )
  {
    double*   restrict u1       = buff_U + (i  )*cs_U + (0  )*rs_U;
    double*   restrict y1       = buff_Y + (i  )*cs_Y + (0  )*rs_Y;
    double*   restrict z1       = buff_Z + (i  )*cs_Z + (0  )*rs_Z;
    double*   restrict v1       = buff_V + (0  )*cs_V + (i  )*rs_V;
    double*   restrict tau1     = buff_t + (i  )*inc_t;
    double*   restrict t1       = buff_temp;
    double*   restrict a        = buff_a;
    double*   restrict w        = buff_w;
    double*   restrict al       = buff_al;
    double*   restrict psi20_l  = buff_Y + (i  )*cs_Y + (0  )*rs_Y;
    double*   restrict nu20_l   = buff_V + (0  )*cs_V + (i  )*rs_V;
    double    alpha;
    double    beta;
    double    gamma;
    double    kappa;

    /*------------------------------------------------------------*/

    bl1_dcopyv( BLIS1_NO_CONJUGATE,
                n_V,
                v1, cs_V,
                t1, inc_t );

    bl1_ddotsv2( BLIS1_NO_CONJUGATE,
                 n_V,
                 y1, rs_Y,
                 t1, inc_t,
                 a,  inc_a,
                 &zero,
                 &alpha,
                 &beta );

    *tau1 = beta;

    bl1_dcopyconj( psi20_l, &gamma );
    bl1_dcopyconj( nu20_l,  &kappa );

    bl1_daxmyv2( BLIS1_NO_CONJUGATE,
                 m_U,
                 &alpha,
                 &gamma,
                 u1, rs_U,
                 w,  inc_w,
                 al, inc_al );

    bl1_daxmyv2( BLIS1_NO_CONJUGATE,
                 m_Z,
                 &beta,
                 &kappa,
                 z1, rs_U,
                 w,  inc_w,
                 al, inc_al );

    /*------------------------------------------------------------*/

  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Fused_UYx_ZVx_ops_var1 ( int  m_U,
int  n_U,
int  m_V,
int  n_V,
float *  buff_delta,
float *  buff_U,
int  rs_U,
int  cs_U,
float *  buff_Y,
int  rs_Y,
int  cs_Y,
float *  buff_Z,
int  rs_Z,
int  cs_Z,
float *  buff_V,
int  rs_V,
int  cs_V,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_temp,
int  inc_temp,
float *  buff_t,
int  inc_t,
float *  buff_a,
int  inc_a,
float *  buff_w,
int  inc_w,
float *  buff_al,
int  inc_al 
)

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Bidiag_UT_u_step_ofs_var4(), and FLA_Fused_UYx_ZVx_opt_var1().

{
  int       i;
  int       m_A = m_U;
  int       m_Z = m_U;

  bl1_scopyv( BLIS1_NO_CONJUGATE,
              m_A,
              buff_A,  rs_A,
              buff_al, inc_al );

  for ( i = 0; i < n_U; ++i )
  {
    float*    u1       = buff_U + (i  )*cs_U + (0  )*rs_U;
    float*    y1       = buff_Y + (i  )*cs_Y + (0  )*rs_Y;
    float*    z1       = buff_Z + (i  )*cs_Z + (0  )*rs_Z;
    float*    v1       = buff_V + (0  )*cs_V + (i  )*rs_V;
    float*    tau1     = buff_t + (i  )*inc_t;
    float*    delta    = buff_delta;
    float*    a        = buff_a;
    float*    w        = buff_w;
    float*    al       = buff_al;
    float*    psi20_l  = buff_Y + (i  )*cs_Y + (0  )*rs_Y;
    float*    nu20_l   = buff_V + (0  )*cs_V + (i  )*rs_V;
    float     alpha;
    float     beta;
    float     gamma;
    float     kappa;

    /*------------------------------------------------------------*/

    bl1_sdot( BLIS1_NO_CONJUGATE,
              n_V,
              y1, rs_Y,
              a,  inc_a,
              &alpha );
    //alpha = F77_sdot( &n_V,
    //                  y1, &rs_Y,
    //                  a,  &inc_a );

    bl1_sdot( BLIS1_NO_CONJUGATE,
              n_V,
              v1, cs_V,
              a,  inc_a,
              &beta );
    //beta = F77_sdot( &n_V,
    //                 v1, &cs_V,
    //                 a,  &inc_a );

    *tau1 = beta;

    // bl1_sconjs( &alpha );
    // bl1_sconjs( &beta );
    // bl1_scopyconj( psi20_l, &gamma );
    // bl1_scopyconj( nu20_l,  &kappa );
    gamma = *psi20_l;
    kappa = *nu20_l;

    // bl1_dscals( delta, &alpha );
    // bl1_dscals( delta, &beta );
    // bl1_dscals( delta, &gamma );
    // bl1_dscals( delta, &kappa );
    alpha *= *delta;
    beta  *= *delta;
    gamma *= *delta;
    kappa *= *delta;

    bl1_saxpyv( BLIS1_NO_CONJUGATE,
                m_U,
                &alpha,
                u1, rs_U,
                w,  inc_w );
    //F77_saxpy( &m_U,
    //           &alpha,
    //           u1, &rs_U,
    //           w,  &inc_w );

    bl1_saxpyv( BLIS1_NO_CONJUGATE,
                m_Z,
                &beta,
                z1, rs_Z,
                w,  inc_w );
    //F77_saxpy( &m_Z,
    //           &beta,
    //           z1, &rs_Z,
    //           w,  &inc_w );

    bl1_saxpyv( BLIS1_NO_CONJUGATE,
                m_U,
                &gamma,
                u1, rs_U,
                al, inc_al );
    //F77_saxpy( &m_U,
    //           &gamma,
    //           u1, &rs_U,
    //           al, &inc_al );

    bl1_saxpyv( BLIS1_NO_CONJUGATE,
                m_Z,
                &kappa,
                z1, rs_Z,
                al,  inc_al );
    //F77_saxpy( &m_Z,
    //           &kappa,
    //           z1, &rs_Z,
    //           al, &inc_al );

    /*------------------------------------------------------------*/

  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Fused_UYx_ZVx_opt_var1 ( FLA_Obj  delta,
FLA_Obj  a,
FLA_Obj  U,
FLA_Obj  Y,
FLA_Obj  Z,
FLA_Obj  V,
FLA_Obj  A,
FLA_Obj  temp,
FLA_Obj  t,
FLA_Obj  w,
FLA_Obj  al 
)

References FLA_Fused_UYx_ZVx_opc_var1(), FLA_Fused_UYx_ZVx_opd_var1(), FLA_Fused_UYx_ZVx_ops_var1(), FLA_Fused_UYx_ZVx_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().

{
/*
   Effective computation:
   w  = w      + delta * ( U ( Y' conj(a)  ) + Z ( V' conj(a)  ) );
   al = A * e0 + delta * ( U ( Y' e0       ) + Z ( V' e0       ) );
   t  = V' conj(a);
*/
  FLA_Datatype datatype;
  int          m_U, n_U;
  int          m_V, n_V;
  int          rs_A, cs_A;
  int          rs_U, cs_U;
  int          rs_Y, cs_Y;
  int          rs_Z, cs_Z;
  int          rs_V, cs_V;
  int          inc_a, inc_temp, inc_t, inc_w, inc_al;

  datatype = FLA_Obj_datatype( A );

  m_U      = FLA_Obj_length( U );
  n_U      = FLA_Obj_width( U );

  m_V      = FLA_Obj_length( V );
  n_V      = FLA_Obj_width( V );

  rs_U     = FLA_Obj_row_stride( U );
  cs_U     = FLA_Obj_col_stride( U );

  rs_Y     = FLA_Obj_row_stride( Y );
  cs_Y     = FLA_Obj_col_stride( Y );

  rs_Z     = FLA_Obj_row_stride( Z );
  cs_Z     = FLA_Obj_col_stride( Z );

  rs_V     = FLA_Obj_row_stride( V );
  cs_V     = FLA_Obj_col_stride( V );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  inc_temp = FLA_Obj_vector_inc( temp );
  inc_t    = FLA_Obj_vector_inc( t );
  inc_a    = FLA_Obj_vector_inc( a );
  inc_w    = FLA_Obj_vector_inc( w );
  inc_al   = FLA_Obj_vector_inc( al );
  

  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float*    buff_A     = FLA_FLOAT_PTR( A );
      float*    buff_U     = FLA_FLOAT_PTR( U );
      float*    buff_Y     = FLA_FLOAT_PTR( Y );
      float*    buff_Z     = FLA_FLOAT_PTR( Z );
      float*    buff_V     = FLA_FLOAT_PTR( V );
      float*    buff_temp  = FLA_FLOAT_PTR( temp );
      float*    buff_t     = FLA_FLOAT_PTR( t );
      float*    buff_a     = FLA_FLOAT_PTR( a );
      float*    buff_w     = FLA_FLOAT_PTR( w );
      float*    buff_al    = FLA_FLOAT_PTR( al );
      float*    buff_delta = FLA_FLOAT_PTR( delta );

      FLA_Fused_UYx_ZVx_ops_var1( m_U,
                                  n_U,
                                  m_V,
                                  n_V,
                                  buff_delta,
                                  buff_U, rs_U, cs_U,
                                  buff_Y, rs_Y, cs_Y,
                                  buff_Z, rs_Z, cs_Z,
                                  buff_V, rs_V, cs_V,
                                  buff_A, rs_A, cs_A,
                                  buff_temp, inc_temp,
                                  buff_t, inc_t,
                                  buff_a, inc_a,
                                  buff_w, inc_w,
                                  buff_al, inc_al );

      break;
    }

    case FLA_DOUBLE:
    {
      double*   buff_A     = FLA_DOUBLE_PTR( A );
      double*   buff_U     = FLA_DOUBLE_PTR( U );
      double*   buff_Y     = FLA_DOUBLE_PTR( Y );
      double*   buff_Z     = FLA_DOUBLE_PTR( Z );
      double*   buff_V     = FLA_DOUBLE_PTR( V );
      double*   buff_temp  = FLA_DOUBLE_PTR( temp );
      double*   buff_t     = FLA_DOUBLE_PTR( t );
      double*   buff_a     = FLA_DOUBLE_PTR( a );
      double*   buff_w     = FLA_DOUBLE_PTR( w );
      double*   buff_al    = FLA_DOUBLE_PTR( al );
      double*   buff_delta = FLA_DOUBLE_PTR( delta );

      FLA_Fused_UYx_ZVx_opd_var1( m_U,
                                  n_U,
                                  m_V,
                                  n_V,
                                  buff_delta,
                                  buff_U, rs_U, cs_U,
                                  buff_Y, rs_Y, cs_Y,
                                  buff_Z, rs_Z, cs_Z,
                                  buff_V, rs_V, cs_V,
                                  buff_A, rs_A, cs_A,
                                  buff_temp, inc_temp,
                                  buff_t, inc_t,
                                  buff_a, inc_a,
                                  buff_w, inc_w,
                                  buff_al, inc_al );

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_A     = FLA_COMPLEX_PTR( A );
      scomplex* buff_U     = FLA_COMPLEX_PTR( U );
      scomplex* buff_Y     = FLA_COMPLEX_PTR( Y );
      scomplex* buff_Z     = FLA_COMPLEX_PTR( Z );
      scomplex* buff_V     = FLA_COMPLEX_PTR( V );
      scomplex* buff_temp  = FLA_COMPLEX_PTR( temp );
      scomplex* buff_t     = FLA_COMPLEX_PTR( t );
      scomplex* buff_a     = FLA_COMPLEX_PTR( a );
      scomplex* buff_w     = FLA_COMPLEX_PTR( w );
      scomplex* buff_al    = FLA_COMPLEX_PTR( al );
      scomplex* buff_delta = FLA_COMPLEX_PTR( delta );

      FLA_Fused_UYx_ZVx_opc_var1( m_U,
                                  n_U,
                                  m_V,
                                  n_V,
                                  buff_delta,
                                  buff_U, rs_U, cs_U,
                                  buff_Y, rs_Y, cs_Y,
                                  buff_Z, rs_Z, cs_Z,
                                  buff_V, rs_V, cs_V,
                                  buff_A, rs_A, cs_A,
                                  buff_temp, inc_temp,
                                  buff_t, inc_t,
                                  buff_a, inc_a,
                                  buff_w, inc_w,
                                  buff_al, inc_al );

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A     = FLA_DOUBLE_COMPLEX_PTR( A );
      dcomplex* buff_U     = FLA_DOUBLE_COMPLEX_PTR( U );
      dcomplex* buff_Y     = FLA_DOUBLE_COMPLEX_PTR( Y );
      dcomplex* buff_Z     = FLA_DOUBLE_COMPLEX_PTR( Z );
      dcomplex* buff_V     = FLA_DOUBLE_COMPLEX_PTR( V );
      dcomplex* buff_temp  = FLA_DOUBLE_COMPLEX_PTR( temp );
      dcomplex* buff_t     = FLA_DOUBLE_COMPLEX_PTR( t );
      dcomplex* buff_a     = FLA_DOUBLE_COMPLEX_PTR( a );
      dcomplex* buff_w     = FLA_DOUBLE_COMPLEX_PTR( w );
      dcomplex* buff_al    = FLA_DOUBLE_COMPLEX_PTR( al );
      dcomplex* buff_delta = FLA_DOUBLE_COMPLEX_PTR( delta );

      FLA_Fused_UYx_ZVx_opz_var1( m_U,
                                  n_U,
                                  m_V,
                                  n_V,
                                  buff_delta,
                                  buff_U, rs_U, cs_U,
                                  buff_Y, rs_Y, cs_Y,
                                  buff_Z, rs_Z, cs_Z,
                                  buff_V, rs_V, cs_V,
                                  buff_A, rs_A, cs_A,
                                  buff_temp, inc_temp,
                                  buff_t, inc_t,
                                  buff_a, inc_a,
                                  buff_w, inc_w,
                                  buff_al, inc_al );

      break;
    }
  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Fused_UYx_ZVx_opz_var1 ( int  m_U,
int  n_U,
int  m_V,
int  n_V,
dcomplex buff_delta,
dcomplex buff_U,
int  rs_U,
int  cs_U,
dcomplex buff_Y,
int  rs_Y,
int  cs_Y,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_V,
int  rs_V,
int  cs_V,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_temp,
int  inc_temp,
dcomplex buff_t,
int  inc_t,
dcomplex buff_a,
int  inc_a,
dcomplex buff_w,
int  inc_w,
dcomplex buff_al,
int  inc_al 
)

References bl1_z0(), bl1_zaxmyv2(), bl1_zcopyv(), bl1_zdotsv2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Bidiag_UT_u_step_ofz_var4(), and FLA_Fused_UYx_ZVx_opt_var1().

{
  dcomplex  zero = bl1_z0();
  int       i;
  int       m_A = m_U;
  int       m_Z = m_U;

  bl1_zcopyv( BLIS1_NO_CONJUGATE,
              m_A,
              buff_A,  rs_A,
              buff_al, inc_al );

  if ( m_U == 0 || n_U == 0 ) return 0;
  if ( m_V == 0 || n_V == 0 ) return 0;

  for ( i = 0; i < n_U; ++i )
  {
    dcomplex* restrict u1       = buff_U + (i  )*cs_U + (0  )*rs_U;
    dcomplex* restrict y1       = buff_Y + (i  )*cs_Y + (0  )*rs_Y;
    dcomplex* restrict z1       = buff_Z + (i  )*cs_Z + (0  )*rs_Z;
    dcomplex* restrict v1       = buff_V + (0  )*cs_V + (i  )*rs_V;
    dcomplex* restrict tau1     = buff_t + (i  )*inc_t;
    dcomplex* restrict a        = buff_a;
    dcomplex* restrict w        = buff_w;
    dcomplex* restrict al       = buff_al;
    dcomplex* restrict psi20_l  = buff_Y + (i  )*cs_Y + (0  )*rs_Y;
    dcomplex* restrict nu20_l   = buff_V + (0  )*cs_V + (i  )*rs_V;
    dcomplex  alpha;
    dcomplex  beta;
    dcomplex  gamma;
    dcomplex  kappa;

    /*------------------------------------------------------------*/

    bl1_zdotsv2( BLIS1_NO_CONJUGATE,
                 n_V,
                 y1, rs_Y,
                 v1, cs_V,
                 a,  inc_a,
                 &zero,
                 &alpha,
                 &beta );

    bl1_zconjs( &alpha );
    bl1_zconjs( &beta );

    *tau1 = beta;

    bl1_zcopyconj( psi20_l, &gamma );
    bl1_zcopyconj( nu20_l,  &kappa );

    bl1_zaxmyv2( BLIS1_NO_CONJUGATE,
                 m_U,
                 &alpha,
                 &gamma,
                 u1, rs_U,
                 w,  inc_w,
                 al, inc_al );

    bl1_zaxmyv2( BLIS1_NO_CONJUGATE,
                 m_Z,
                 &beta,
                 &kappa,
                 z1, rs_U,
                 w,  inc_w,
                 al, inc_al );

    /*------------------------------------------------------------*/

  }

  return FLA_SUCCESS;
}