libflame  revision_anchor
Functions
FLA_Apply_pivots_ln.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_Apply_pivots_ln_blk_var1 (FLA_Obj p, FLA_Obj A, fla_appiv_t *cntl)
FLA_Error FLA_Apply_pivots_ln_blk_var2 (FLA_Obj p, FLA_Obj A, fla_appiv_t *cntl)
FLA_Error FLA_Apply_pivots_ln_opt_var1 (FLA_Obj p, FLA_Obj A)
FLA_Error FLA_Apply_pivots_ln_opi_var1 (int n, int *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
FLA_Error FLA_Apply_pivots_ln_ops_var1 (int n, float *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
FLA_Error FLA_Apply_pivots_ln_opd_var1 (int n, double *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
FLA_Error FLA_Apply_pivots_ln_opc_var1 (int n, scomplex *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
FLA_Error FLA_Apply_pivots_ln_opz_var1 (int n, dcomplex *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)

Function Documentation

References FLA_Apply_pivots_internal(), FLA_Cont_with_1x3_to_1x2(), FLA_Determine_blocksize(), FLA_Obj_width(), FLA_Part_1x2(), and FLA_Repart_1x2_to_1x3().

Referenced by FLA_Apply_pivots_ln().

{
  FLA_Obj AL,  AR,       A0,  A1,  A2;

  dim_t b;

  FLA_Part_1x2( A,    &AL,  &AR,      0, FLA_LEFT );

  while ( FLA_Obj_width( AL ) < FLA_Obj_width( A ) ) {

    b = FLA_Determine_blocksize( AR, FLA_RIGHT, FLA_Cntl_blocksize( cntl ) );

    FLA_Repart_1x2_to_1x3( AL,  /**/ AR,        &A0, /**/ &A1, &A2,
                           b, FLA_RIGHT );

    /*------------------------------------------------------------*/

    /* Apply pivots to each column panel */
    FLA_Apply_pivots_internal( FLA_LEFT, FLA_NO_TRANSPOSE, p, A1,
                               FLA_Cntl_sub_appiv( cntl ) );

    /*------------------------------------------------------------*/

    FLA_Cont_with_1x3_to_1x2( &AL,  /**/ &AR,        A0, A1, /**/ A2,
                              FLA_LEFT );
  }

  return FLA_SUCCESS;
}

References FLA_Apply_pivots_internal(), FLA_Cont_with_3x1_to_2x1(), FLA_Determine_blocksize(), FLA_Obj_length(), FLA_Part_2x1(), and FLA_Repart_2x1_to_3x1().

Referenced by FLA_Apply_pivots_ln().

{
  FLA_Obj AT,              A0,
          AB,              A1,
                           A2;

  FLA_Obj pT,              p0,
          pB,              pi1,
                           p2;

  dim_t b;

  FLA_Part_2x1( A,    &AT, 
                      &AB,            0, FLA_TOP );

  FLA_Part_2x1( p,    &pT, 
                      &pB,            0, FLA_TOP );

  while ( FLA_Obj_length( AT ) < FLA_Obj_length( A ) ) {

    b = FLA_Determine_blocksize( AB, FLA_BOTTOM, FLA_Cntl_blocksize( cntl ) );

    FLA_Repart_2x1_to_3x1( AT,                &A0, 
                        /* ** */            /* ** */
                                              &A1, 
                           AB,                &A2,        b, FLA_BOTTOM );

    FLA_Repart_2x1_to_3x1( pT,                &p0, 
                        /* ** */            /* ** */
                                              &pi1, 
                           pB,                &p2,        b, FLA_BOTTOM );

    /*------------------------------------------------------------*/

    /* Apply pivots to a block and matrix */
    FLA_Apply_pivots_internal( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB,
                               FLA_Cntl_sub_appiv( cntl ) );

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x1_to_2x1( &AT,                A0, 
                                                  A1, 
                            /* ** */           /* ** */
                              &AB,                A2,     FLA_TOP );

    FLA_Cont_with_3x1_to_2x1( &pT,                p0, 
                                                  pi1, 
                            /* ** */           /* ** */
                              &pB,                p2,     FLA_TOP );
  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Apply_pivots_ln_opc_var1 ( int  n,
scomplex a,
int  a_rs,
int  a_cs,
int  k1,
int  k2,
int *  p,
int  incp 
)

Referenced by FLA_Apply_pivots_ln_opt_var1(), FLA_Apply_pivots_lt_opt_var1(), FLA_Apply_pivots_rn_opt_var1(), FLA_Apply_pivots_rt_opt_var1(), FLA_LU_piv_opc_var3(), FLA_LU_piv_opc_var4(), and FLA_LU_piv_opc_var5().

{
    scomplex  temp;
    scomplex* a_i_0;
    scomplex* a_pi_0;
    scomplex* a_0_j;
    scomplex* a_i_j;
    scomplex* a_pi_j;
    int       i, j;
    int       i_begin, i_bound, i_inc;
    int       p_inc;

    // Handle both positive and negative increments for the pivot vector.
    if ( incp > 0 )
    {
        i_begin = k1;
        i_bound = k2 + 1;
        i_inc   = 1;
        p_inc   = 1*incp;
    }
    else // if ( incp < 0 )
    {
        i_begin = k2;
        i_bound = k1 - 1;
        i_inc   = -1;
        p_inc   = -1*incp;
    }

    // Optimize memory accesses depending on whether A is stored in
    // column-major or row-major order. That is, for column-major
    // matrices, we interchange all the elements in a single column
    // at a time. But for row-major matrices, we perform an entire
    // row interchange before moving to the next interchange. For
    // general storage, we decide based on which stride is closer
    // to one.
    if ( a_rs == 1 || a_rs < a_cs )
    {
        for ( j = 0; j < n; j++ )
        {
            a_0_j = a + j*a_cs;

            for ( i = i_begin; i != i_bound; i += i_inc )
            {
                a_i_j  = a_0_j + (              i )*a_rs;
                // Add i to shift from relative to absolute index.
                a_pi_j = a_0_j + ( p[i*p_inc] + i )*a_rs;

                temp    = *a_pi_j;
                *a_pi_j = *a_i_j;
                *a_i_j  = temp;
            }
        }
    }
    else // if ( a_cs == 1 || a_cs < a_rs )
    {
        for ( i = i_begin; i != i_bound; i += i_inc )
        {
            a_i_0  = a + (              i )*a_rs;
            // Add i to shift from relative to absolute index.
            a_pi_0 = a + ( p[i*p_inc] + i )*a_rs;

            for ( j = 0; j < n; j++ )
            {
                a_i_j  = a_i_0 + j*a_cs;
                a_pi_j = a_pi_0 + j*a_cs;

                temp    = *a_pi_j;
                *a_pi_j = *a_i_j;
                *a_i_j  = temp;
            }
        }
    }

    return FLA_SUCCESS;
}
FLA_Error FLA_Apply_pivots_ln_opd_var1 ( int  n,
double *  a,
int  a_rs,
int  a_cs,
int  k1,
int  k2,
int *  p,
int  incp 
)

Referenced by FLA_Apply_pivots_ln_opt_var1(), FLA_Apply_pivots_lt_opt_var1(), FLA_Apply_pivots_rn_opt_var1(), FLA_Apply_pivots_rt_opt_var1(), FLA_LU_piv_opd_var3(), FLA_LU_piv_opd_var4(), and FLA_LU_piv_opd_var5().

{
    double    temp;
    double*   a_i_0;
    double*   a_pi_0;
    double*   a_0_j;
    double*   a_i_j;
    double*   a_pi_j;
    int       i, j;
    int       i_begin, i_bound, i_inc;
    int       p_inc;

    // Handle both positive and negative increments for the pivot vector.
    if ( incp > 0 )
    {
        i_begin = k1;
        i_bound = k2 + 1;
        i_inc   = 1;
        p_inc   = 1*incp;
    }
    else // if ( incp < 0 )
    {
        i_begin = k2;
        i_bound = k1 - 1;
        i_inc   = -1;
        p_inc   = -1*incp;
    }

    // Optimize memory accesses depending on whether A is stored in
    // column-major or row-major order. That is, for column-major
    // matrices, we interchange all the elements in a single column
    // at a time. But for row-major matrices, we perform an entire
    // row interchange before moving to the next interchange. For
    // general storage, we decide based on which stride is closer
    // to one.
    if ( a_rs == 1 || a_rs < a_cs )
    {
        for ( j = 0; j < n; j++ )
        {
            a_0_j = a + j*a_cs;

            for ( i = i_begin; i != i_bound; i += i_inc )
            {
                a_i_j  = a_0_j + (              i )*a_rs;
                // Add i to shift from relative to absolute index.
                a_pi_j = a_0_j + ( p[i*p_inc] + i )*a_rs;

                temp    = *a_pi_j;
                *a_pi_j = *a_i_j;
                *a_i_j  = temp;
            }
        }
    }
    else // if ( a_cs == 1 || a_cs < a_rs )
    {
        for ( i = i_begin; i != i_bound; i += i_inc )
        {
            a_i_0  = a + (              i )*a_rs;
            // Add i to shift from relative to absolute index.
            a_pi_0 = a + ( p[i*p_inc] + i )*a_rs;

            for ( j = 0; j < n; j++ )
            {
                a_i_j  = a_i_0 + j*a_cs;
                a_pi_j = a_pi_0 + j*a_cs;

                temp    = *a_pi_j;
                *a_pi_j = *a_i_j;
                *a_i_j  = temp;
            }
        }
    }

    return FLA_SUCCESS;
}
FLA_Error FLA_Apply_pivots_ln_opi_var1 ( int  n,
int *  a,
int  a_rs,
int  a_cs,
int  k1,
int  k2,
int *  p,
int  incp 
)

Referenced by FLA_Apply_pivots_ln_opt_var1(), FLA_Apply_pivots_lt_opt_var1(), FLA_Apply_pivots_rn_opt_var1(), and FLA_Apply_pivots_rt_opt_var1().

{
    int       temp;
    int*      a_i_0;
    int*      a_pi_0;
    int*      a_0_j;
    int*      a_i_j;
    int*      a_pi_j;
    int       i, j;
    int       i_begin, i_bound, i_inc;
    int       p_inc;

    // Handle both positive and negative increments for the pivot vector.
    if ( incp > 0 )
    {
        i_begin = k1;
        i_bound = k2 + 1;
        i_inc   = 1;
        p_inc   = 1*incp;
    }
    else // if ( incp < 0 )
    {
        i_begin = k2;
        i_bound = k1 - 1;
        i_inc   = -1;
        p_inc   = -1*incp;
    }

    // Optimize memory accesses depending on whether A is stored in
    // column-major or row-major order. That is, for column-major
    // matrices, we interchange all the elements in a single column
    // at a time. But for row-major matrices, we perform an entire
    // row interchange before moving to the next interchange. For
    // general storage, we decide based on which stride is closer
    // to one.
    if ( a_rs == 1 || a_rs < a_cs )
    {
        for ( j = 0; j < n; j++ )
        {
            a_0_j = a + j*a_cs;

            for ( i = i_begin; i != i_bound; i += i_inc )
            {
                a_i_j  = a_0_j + (              i )*a_rs;
                // Add i to shift from relative to absolute index.
                a_pi_j = a_0_j + ( p[i*p_inc] + i )*a_rs;

                temp    = *a_pi_j;
                *a_pi_j = *a_i_j;
                *a_i_j  = temp;
            }
        }
    }
    else // if ( a_cs == 1 || a_cs < a_rs )
    {
        for ( i = i_begin; i != i_bound; i += i_inc )
        {
            a_i_0  = a + (              i )*a_rs;
            // Add i to shift from relative to absolute index.
            a_pi_0 = a + ( p[i*p_inc] + i )*a_rs;

            for ( j = 0; j < n; j++ )
            {
                a_i_j  = a_i_0 + j*a_cs;
                a_pi_j = a_pi_0 + j*a_cs;

                temp    = *a_pi_j;
                *a_pi_j = *a_i_j;
                *a_i_j  = temp;
            }
        }
    }

    return FLA_SUCCESS;
}
FLA_Error FLA_Apply_pivots_ln_ops_var1 ( int  n,
float *  a,
int  a_rs,
int  a_cs,
int  k1,
int  k2,
int *  p,
int  incp 
)

Referenced by FLA_Apply_pivots_ln_opt_var1(), FLA_Apply_pivots_lt_opt_var1(), FLA_Apply_pivots_rn_opt_var1(), FLA_Apply_pivots_rt_opt_var1(), FLA_LU_piv_ops_var3(), FLA_LU_piv_ops_var4(), and FLA_LU_piv_ops_var5().

{
    float     temp;
    float*    a_i_0;
    float*    a_pi_0;
    float*    a_0_j;
    float*    a_i_j;
    float*    a_pi_j;
    int       i, j;
    int       i_begin, i_bound, i_inc;
    int       p_inc;

    // Handle both positive and negative increments for the pivot vector.
    if ( incp > 0 )
    {
        i_begin = k1;
        i_bound = k2 + 1;
        i_inc   = 1;
        p_inc   = 1*incp;
    }
    else // if ( incp < 0 )
    {
        i_begin = k2;
        i_bound = k1 - 1;
        i_inc   = -1;
        p_inc   = -1*incp;
    }

    // Optimize memory accesses depending on whether A is stored in
    // column-major or row-major order. That is, for column-major
    // matrices, we interchange all the elements in a single column
    // at a time. But for row-major matrices, we perform an entire
    // row interchange before moving to the next interchange. For
    // general storage, we decide based on which stride is closer
    // to one.
    if ( a_rs == 1 || a_rs < a_cs )
    {
        for ( j = 0; j < n; j++ )
        {
            a_0_j = a + j*a_cs;

            for ( i = i_begin; i != i_bound; i += i_inc )
            {
                a_i_j  = a_0_j + (              i )*a_rs;
                // Add i to shift from relative to absolute index.
                a_pi_j = a_0_j + ( p[i*p_inc] + i )*a_rs;

                temp    = *a_pi_j;
                *a_pi_j = *a_i_j;
                *a_i_j  = temp;
            }
        }
    }
    else // if ( a_cs == 1 || a_cs < a_rs )
    {
        for ( i = i_begin; i != i_bound; i += i_inc )
        {
            a_i_0  = a + (              i )*a_rs;
            // Add i to shift from relative to absolute index.
            a_pi_0 = a + ( p[i*p_inc] + i )*a_rs;

            for ( j = 0; j < n; j++ )
            {
                a_i_j  = a_i_0 + j*a_cs;
                a_pi_j = a_pi_0 + j*a_cs;

                temp    = *a_pi_j;
                *a_pi_j = *a_i_j;
                *a_i_j  = temp;
            }
        }
    }

    return FLA_SUCCESS;
}

References FLA_Apply_pivots_ln_opc_var1(), FLA_Apply_pivots_ln_opd_var1(), FLA_Apply_pivots_ln_opi_var1(), FLA_Apply_pivots_ln_ops_var1(), FLA_Apply_pivots_ln_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_row_stride(), FLA_Obj_vector_dim(), FLA_Obj_vector_inc(), and FLA_Obj_width().

Referenced by FLA_Apply_pivots_ln().

{
  FLA_Datatype datatype;
  int          n_A;
  int          rs_A, cs_A;
  int          inc_p;
  int          k1_0, k2_0;

  datatype = FLA_Obj_datatype( A );

  n_A      = FLA_Obj_width( A );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  inc_p    = FLA_Obj_vector_inc( p );

  // Use zero-based indices.
  k1_0     = 0;
  k2_0     = ( int ) FLA_Obj_vector_dim( p ) - 1;

  switch ( datatype )
  {
    case FLA_INT:
    {
      int*   buff_A = FLA_INT_PTR( A );
      int*   buff_p = FLA_INT_PTR( p );

      FLA_Apply_pivots_ln_opi_var1( n_A,
                                    buff_A, rs_A, cs_A,
                                    k1_0,
                                    k2_0,
                                    buff_p, inc_p );

      break;
    }

    case FLA_FLOAT:
    {
      float* buff_A = FLA_FLOAT_PTR( A );
      int*   buff_p = FLA_INT_PTR( p );

      FLA_Apply_pivots_ln_ops_var1( n_A,
                                    buff_A, rs_A, cs_A,
                                    k1_0,
                                    k2_0,
                                    buff_p, inc_p );

      break;
    }

    case FLA_DOUBLE:
    {
      double* buff_A = FLA_DOUBLE_PTR( A );
      int*    buff_p = FLA_INT_PTR( p );

      FLA_Apply_pivots_ln_opd_var1( n_A,
                                    buff_A, rs_A, cs_A,
                                    k1_0,
                                    k2_0,
                                    buff_p, inc_p );

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_A = FLA_COMPLEX_PTR( A );
      int*      buff_p = FLA_INT_PTR( p );

      FLA_Apply_pivots_ln_opc_var1( n_A,
                                    buff_A, rs_A, cs_A,
                                    k1_0,
                                    k2_0,
                                    buff_p, inc_p );

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
      int*      buff_p = FLA_INT_PTR( p );

      FLA_Apply_pivots_ln_opz_var1( n_A,
                                    buff_A, rs_A, cs_A,
                                    k1_0,
                                    k2_0,
                                    buff_p, inc_p );

      break;
    }
  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Apply_pivots_ln_opz_var1 ( int  n,
dcomplex a,
int  a_rs,
int  a_cs,
int  k1,
int  k2,
int *  p,
int  incp 
)

Referenced by FLA_Apply_pivots_ln_opt_var1(), FLA_Apply_pivots_lt_opt_var1(), FLA_Apply_pivots_rn_opt_var1(), FLA_Apply_pivots_rt_opt_var1(), FLA_LU_piv_opz_var3(), FLA_LU_piv_opz_var4(), and FLA_LU_piv_opz_var5().

{
    dcomplex  temp;
    dcomplex* a_i_0;
    dcomplex* a_pi_0;
    dcomplex* a_0_j;
    dcomplex* a_i_j;
    dcomplex* a_pi_j;
    int       i, j;
    int       i_begin, i_bound, i_inc;
    int       p_inc;

    // Handle both positive and negative increments for the pivot vector.
    if ( incp > 0 )
    {
        i_begin = k1;
        i_bound = k2 + 1;
        i_inc   = 1;
        p_inc   = 1*incp;
    }
    else // if ( incp < 0 )
    {
        i_begin = k2;
        i_bound = k1 - 1;
        i_inc   = -1;
        p_inc   = -1*incp;
    }

    // Optimize memory accesses depending on whether A is stored in
    // column-major or row-major order. That is, for column-major
    // matrices, we interchange all the elements in a single column
    // at a time. But for row-major matrices, we perform an entire
    // row interchange before moving to the next interchange. For
    // general storage, we decide based on which stride is closer
    // to one.
    if ( a_rs == 1 || a_rs < a_cs )
    {
        for ( j = 0; j < n; j++ )
        {
            a_0_j = a + j*a_cs;

            for ( i = i_begin; i != i_bound; i += i_inc )
            {
                a_i_j  = a_0_j + (              i )*a_rs;
                // Add i to shift from relative to absolute index.
                a_pi_j = a_0_j + ( p[i*p_inc] + i )*a_rs;

                temp    = *a_pi_j;
                *a_pi_j = *a_i_j;
                *a_i_j  = temp;
            }
        }
    }
    else // if ( a_cs == 1 || a_cs < a_rs )
    {
        for ( i = i_begin; i != i_bound; i += i_inc )
        {
            a_i_0  = a + (              i )*a_rs;
            // Add i to shift from relative to absolute index.
            a_pi_0 = a + ( p[i*p_inc] + i )*a_rs;

            for ( j = 0; j < n; j++ )
            {
                a_i_j  = a_i_0 + j*a_cs;
                a_pi_j = a_pi_0 + j*a_cs;

                temp    = *a_pi_j;
                *a_pi_j = *a_i_j;
                *a_i_j  = temp;
            }
        }
    }

    return FLA_SUCCESS;
}