summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorspiros <andyspiros@gmail.com>2011-07-22 23:53:02 +0200
committerspiros <andyspiros@gmail.com>2011-07-22 23:53:02 +0200
commit47af1c14eaed0149ab6a3eb2b040d767a9fca82e (patch)
tree2e3552d44d74cda2408cc562eb2497711484c813
parentAdded 2D FFTW tests. Much work around FFTW. (diff)
downloadauto-numerical-bench-47af1c14eaed0149ab6a3eb2b040d767a9fca82e.tar.gz
auto-numerical-bench-47af1c14eaed0149ab6a3eb2b040d767a9fca82e.tar.bz2
auto-numerical-bench-47af1c14eaed0149ab6a3eb2b040d767a9fca82e.zip
Added support for parallel LU decomposition in the PBLAS module. Much
work on the distributed memory framework.
-rw-r--r--btl/actions/action_parallel_lu_decomp.hh116
-rw-r--r--btl/actions/action_parallel_matrix_vector_product.hh19
-rw-r--r--btl/generic_bench/init/init_function.hh2
-rw-r--r--btl/libs/BLACS/blacs_interface.hh37
-rw-r--r--btl/libs/LAPACK/lapack_interface.hh11
-rw-r--r--btl/libs/PBLAS/main.cpp10
-rw-r--r--btl/libs/PBLAS/pblas.h10
-rw-r--r--btl/libs/PBLAS/pblas_interface.hh24
-rw-r--r--btl/libs/PBLAS/pblas_interface_impl.hh12
-rw-r--r--pblas.py2
10 files changed, 197 insertions, 46 deletions
diff --git a/btl/actions/action_parallel_lu_decomp.hh b/btl/actions/action_parallel_lu_decomp.hh
new file mode 100644
index 0000000..4882a21
--- /dev/null
+++ b/btl/actions/action_parallel_lu_decomp.hh
@@ -0,0 +1,116 @@
+#ifndef ACTION_PARALLEL_LU_DECOMP_HH_
+#define ACTION_PARALLEL_LU_DECOMP_HH_
+
+#include "utilities.h"
+#include "init/init_function.hh"
+#include "init/init_vector.hh"
+
+#include "lapack_interface.hh"
+#include "STL_interface.hh"
+
+#include <string>
+
+template<class Interface>
+class Action_parallel_lu_decomp {
+
+public :
+
+ // Constructor
+ BTL_DONT_INLINE Action_parallel_lu_decomp( int size ) : _size(size)
+ {
+ MESSAGE("Action_parallel_lu_decomp Ctor");
+
+ int myid, procnum;
+ blacs_pinfo_(&myid, &procnum);
+ iamroot = (myid == 0);
+
+ // STL matrix and vector initialization
+ if (iamroot) {
+ init_vector<pseudo_random>(Global_A_stl, size*size);
+ }
+
+ Interface::scatter_matrix(Global_A_stl, Local_A_stl, desc, size, size, 64, 64);
+ LocalRows = desc[8];
+ LocalCols = Local_A_stl.size()/desc[8];
+
+ // Generic local matrix and vectors initialization
+ Interface::matrix_from_stl(Local_A_ref, Local_A_stl);
+ Interface::matrix_from_stl(Local_A , Local_A_stl);
+
+ _cost = 2.0*size*size*size/3.0 + static_cast<double>(size*size);
+ }
+
+
+ // Invalidate copy constructor
+ Action_parallel_lu_decomp(const Action_parallel_lu_decomp&)
+ {
+ INFOS("illegal call to Action_parallel_lu_decomp copy constructor");
+ exit(1);
+ }
+
+ // Destructor
+ ~Action_parallel_lu_decomp()
+ {
+ MESSAGE("Action_parallel_lu_decomp destructor");
+
+ // Deallocation
+ Interface::free_matrix(Local_A_ref, Local_A_stl.size());
+ Interface::free_matrix(Local_A , Local_A_stl.size());
+ }
+
+ // Action name
+ static inline std::string name()
+ {
+ return "lu_decomp_" + Interface::name();
+ }
+
+ double nb_op_base()
+ {
+ return _cost;
+ }
+
+ BTL_DONT_INLINE void initialize()
+ {
+ Interface::copy_matrix(Local_A_ref, Local_A, Local_A_stl.size());
+ }
+
+ BTL_DONT_INLINE void calculate()
+ {
+ Interface::parallel_lu_decomp(Local_A, desc);
+ }
+
+ BTL_DONT_INLINE void check_result()
+ {
+ /* Gather */
+ typename Interface::stl_matrix Global_Y;
+ typename Interface::stl_matrix Local_Y(Local_A_stl.size());
+ Interface::matrix_to_stl(Local_A, Local_Y);
+ Interface::gather_matrix(Global_Y, Local_Y, desc);
+
+ if (iamroot) {
+
+ typename Interface::gene_matrix A;
+ Interface::matrix_from_stl(A, Global_A_stl);
+ lapack_interface<typename Interface::real_type>::lu_decomp(&Global_A_stl[0], A, _size);
+ typename Interface::stl_vector correct(A, A+_size*_size);
+ typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(Global_Y, correct);
+ if (error > 10e-5)
+ cerr << " { !! error : " << error << " !! } ";
+
+ Interface::free_matrix(A, _size*_size);
+ }
+ }
+
+private:
+ int _size, desc[9], LocalRows, LocalCols;
+ double _cost;
+ bool iamroot;
+
+ typename Interface::stl_matrix Global_A_stl;
+ typename Interface::stl_matrix Local_A_stl;
+ typename Interface::gene_matrix Local_A_ref;
+ typename Interface::gene_matrix Local_A;
+};
+
+
+#endif /* ACTION_PARALLEL_LU_DECOMP_HH_ */
diff --git a/btl/actions/action_parallel_matrix_vector_product.hh b/btl/actions/action_parallel_matrix_vector_product.hh
index 5c97b1d..67e64bf 100644
--- a/btl/actions/action_parallel_matrix_vector_product.hh
+++ b/btl/actions/action_parallel_matrix_vector_product.hh
@@ -10,8 +10,6 @@
#include "blas.h"
-using namespace std;
-
template<class Interface>
class Action_parallel_matrix_vector_product {
@@ -42,9 +40,9 @@ public :
init_vector<null_function>(Global_y_stl, GlobalRows);
}
- Interface::scatter_matrix(Global_A_stl, Local_A_stl, GlobalRows, GlobalCols, BlockRows, BlockCols, LocalRows, LocalCols);
- Interface::scatter_matrix(Global_x_stl, Local_x_stl, GlobalCols, LocalXCols, BlockRows, BlockCols, LocalXRows, LocalXCols);
- Interface::scatter_matrix(Global_y_stl, Local_y_stl, GlobalRows, LocalYCols, BlockRows, BlockCols, LocalYRows, LocalYCols);
+ Interface::scatter_matrix(Global_A_stl, Local_A_stl, descA, GlobalRows, GlobalCols, BlockRows, BlockCols);
+ Interface::scatter_matrix(Global_x_stl, Local_x_stl, descX, GlobalCols, 1, BlockRows, BlockCols);
+ Interface::scatter_matrix(Global_y_stl, Local_y_stl, descY, GlobalRows, 1, BlockRows, BlockCols);
// generic local matrix and vectors initialization
@@ -54,17 +52,6 @@ public :
Interface::vector_from_stl(Local_x , Local_x_stl);
Interface::vector_from_stl(Local_y_ref, Local_y_stl);
Interface::vector_from_stl(Local_y , Local_y_stl);
-
- // Descinit
- int context = Interface::context();
- int info;
- int LDA, LDX, LDY;
- LDA = std::max(1, LocalRows);
- LDX = std::max(1, LocalXRows);
- LDY = std::max(1, LocalYRows);
- descinit_(descA, &GlobalRows, &GlobalCols, &BlockRows, &BlockCols, &iZERO, &iZERO, &context, &LDA, &info);
- descinit_(descX, &GlobalCols, &iONE, &BlockRows, &BlockCols, &iZERO, &iZERO, &context, &LDX, &info);
- descinit_(descY, &GlobalRows, &iONE, &BlockRows, &BlockCols, &iZERO, &iZERO, &context, &LDY, &info);
}
// invalidate copy ctor
diff --git a/btl/generic_bench/init/init_function.hh b/btl/generic_bench/init/init_function.hh
index 7b3bdba..5401673 100644
--- a/btl/generic_bench/init/init_function.hh
+++ b/btl/generic_bench/init/init_function.hh
@@ -32,7 +32,7 @@ double simple_function(int index_i, int index_j)
double pseudo_random(int index)
{
- return std::rand()/double(RAND_MAX);
+ return static_cast<int>((std::rand()/double(RAND_MAX)-.5)*20);
}
double pseudo_random(int index_i, int index_j)
diff --git a/btl/libs/BLACS/blacs_interface.hh b/btl/libs/BLACS/blacs_interface.hh
index 00aafee..6932150 100644
--- a/btl/libs/BLACS/blacs_interface.hh
+++ b/btl/libs/BLACS/blacs_interface.hh
@@ -2,7 +2,13 @@
#define BTL_BLACS_INTERFACE_H
#include <vector>
+#include <algorithm>
#include "blacs.h"
+extern "C" {
+ void descinit_(int*, const int*, const int*, const int*, const int*, const int*, const int*, const int*, const int*, int*);
+ int numroc_(const int*, const int*, const int*, const int*, const int*);
+}
+
#include "scatter.h"
#include "gather.h"
@@ -80,6 +86,27 @@ public:
scatter(context(), GlobalMatrix, LocalMatrix, GlobalRows, GlobalCols, BlockRows, BlockCols, LocalRows, LocalCols);
}
+ static void scatter_matrix(const stl_vector& GlobalMatrix, stl_vector& LocalMatrix,
+ int *desc,
+ const int& GlobalRows=0, const int& GlobalCols=0,
+ const int& BlockRows=0, const int& BlockCols=0
+ ) {
+ int GlobalRows_ = GlobalRows, GlobalCols_ = GlobalCols,
+ BlockRows_ = BlockRows, BlockCols_ = BlockCols,
+ LocalRows_, LocalCols_;
+ const int ctxt = context();
+ scatter(ctxt, GlobalMatrix, LocalMatrix,
+ GlobalRows_, GlobalCols_, BlockRows_, BlockCols_, LocalRows_, LocalCols_
+ );
+
+ const int iZERO = 0;
+ int info;
+ const int LLD = std::max(1, LocalRows_);
+ descinit_(desc, &GlobalRows_, &GlobalCols_, &BlockRows_, &BlockCols_,
+ &iZERO, &iZERO, &ctxt, &LLD, &info
+ );
+ }
+
static void gather_matrix(stl_vector& GlobalMatrix, const stl_vector& LocalMatrix,
int& GlobalRows, int& GlobalCols,
int& BlockRows, int& BlockCols,
@@ -88,6 +115,16 @@ public:
gather(context(), GlobalMatrix, LocalMatrix, GlobalRows, GlobalCols, BlockRows, BlockCols, LocalRows, LocalCols);
}
+ static void gather_matrix(stl_vector& GlobalMatrix, const stl_vector& LocalMatrix,
+ int* desc
+ ) {
+ int GlobalRows = desc[2], GlobalCols = desc[3],
+ BlockRows = desc[4], BlockCols = desc[5],
+ LocalRows = desc[8], LocalCols = LocalMatrix.size()/desc[8];
+ const int ctxt = context();
+ gather(ctxt, GlobalMatrix, LocalMatrix, GlobalRows, GlobalCols, BlockRows, BlockCols, LocalRows, LocalCols);
+ }
+
};
diff --git a/btl/libs/LAPACK/lapack_interface.hh b/btl/libs/LAPACK/lapack_interface.hh
index 33de2be..1840741 100644
--- a/btl/libs/LAPACK/lapack_interface.hh
+++ b/btl/libs/LAPACK/lapack_interface.hh
@@ -1,3 +1,6 @@
+#ifndef LAPACK_INTERFACE_HH
+#define LAPACK_INTERFACE_HH
+
#include <../BLAS/c_interface_base.h>
#include <complex>
@@ -24,8 +27,10 @@ void dsyev_(char*, char*, int*, double*, int*, double*, double*, int*, int*);
#define MAKE_STRING2(S) #S
#define MAKE_STRING(S) MAKE_STRING2(S)
-#define CAT2(A,B) A##B
-#define CAT(A,B) CAT2(A,B)
+#ifndef CAT
+# define CAT2(A,B) A##B
+# define CAT(A,B) CAT2(A,B)
+#endif
template <typename real> class lapack_interface;
@@ -51,3 +56,5 @@ static int zeroint = 0;
#include "lapack_interface_impl.hh"
#undef SCALAR
#undef SCALAR_PREFIX
+
+#endif /* LAPACK_INTERFACE_HH */
diff --git a/btl/libs/PBLAS/main.cpp b/btl/libs/PBLAS/main.cpp
index 4b64f12..a2aae2a 100644
--- a/btl/libs/PBLAS/main.cpp
+++ b/btl/libs/PBLAS/main.cpp
@@ -1,3 +1,5 @@
+#define DISTRIBUTED
+
#include "mpi.h"
#include "utilities.h"
#include "bench.hh"
@@ -5,9 +7,10 @@
#include <iostream>
//using namespace std;
-#include "blacsinit.hh"
#include "pblas_interface.hh"
+#include "blacsinit.hh"
#include "action_parallel_matrix_vector_product.hh"
+#include "action_parallel_lu_decomp.hh"
#include "action_parallel_axpy.hh"
#include <string>
@@ -19,8 +22,9 @@ int main(int argc, char **argv)
bool iamroot = blacsinit(&argc, &argv);
// distr_bench<Action_parallel_matrix_vector_product<pblas_interface<double> > >(10,MAX_MV,NB_POINT,!iamroot);
- distr_bench<Action_parallel_axpy<pblas_interface<REAL_TYPE> > >(10,MAX_MV,NB_POINT,!iamroot);
-// Action_parallel_axpy<pblas_interface<double> > action(8);
+// distr_bench<Action_parallel_axpy<pblas_interface<REAL_TYPE> > >(10,MAX_MV,NB_POINT,!iamroot);
+ distr_bench<Action_parallel_lu_decomp<pblas_interface<REAL_TYPE> > >(10,MAX_MV,NB_POINT,!iamroot);
+// Action_parallel_lu_decomp<pblas_interface<REAL_TYPE> > action(8);
// action.initialize();
// action.calculate();
// action.check_result();
diff --git a/btl/libs/PBLAS/pblas.h b/btl/libs/PBLAS/pblas.h
index adc6c91..2b5860e 100644
--- a/btl/libs/PBLAS/pblas.h
+++ b/btl/libs/PBLAS/pblas.h
@@ -6,7 +6,7 @@ extern "C" {
#endif
int numroc_(const int*, const int*, const int*, const int*, const int*);
- int descinit_(const int*, const int*, const int*, const int*, const int*, const int*, const int*, const int*, const int*, int*);
+ void descinit_(int*, const int*, const int*, const int*, const int*, const int*, const int*, const int*, const int*, int*);
/* Level 1 */
@@ -42,6 +42,14 @@ extern "C" {
);
+
+ /*************
+ * Scalapack *
+ *************/
+ void psgetrf_(const int*, const int*, float*, const int*, const int*, const int*, int*, int*);
+ void pdgetrf_(const int*, const int*, double*, const int*, const int*, const int*, int*, int*);
+
+
#ifdef __cplusplus
}
#endif
diff --git a/btl/libs/PBLAS/pblas_interface.hh b/btl/libs/PBLAS/pblas_interface.hh
index b969e4e..cdfb70a 100644
--- a/btl/libs/PBLAS/pblas_interface.hh
+++ b/btl/libs/PBLAS/pblas_interface.hh
@@ -1,23 +1,3 @@
-//=====================================================
-// File : blas_interface.hh
-// Author : L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D, lun sep 30 14:23:28 CEST 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-//
-
#include "pblas.h"
#include "blacs_interface.hh"
@@ -32,7 +12,7 @@
template<class real> class pblas_interface;
-
+/*
static char notrans = 'N';
static char trans = 'T';
static char nonunit = 'N';
@@ -40,7 +20,7 @@ static char lower = 'L';
static char right = 'R';
static char left = 'L';
static int intone = 1;
-
+*/
#define SCALAR float
#define SCALAR_PREFIX s
diff --git a/btl/libs/PBLAS/pblas_interface_impl.hh b/btl/libs/PBLAS/pblas_interface_impl.hh
index b534a4e..c36c249 100644
--- a/btl/libs/PBLAS/pblas_interface_impl.hh
+++ b/btl/libs/PBLAS/pblas_interface_impl.hh
@@ -33,6 +33,7 @@ public:
real_type alpha = 1., beta = 0.;
int iONE = 1;
int myid, procnum;
+ const char notrans = 'N';
blacs_pinfo_(&myid, &procnum);
PBLAS_FUNC(gemv)(&notrans, &GlobalRows, &GlobalCols,
@@ -42,5 +43,16 @@ public:
);
}
+
+
+ static inline void parallel_lu_decomp(gene_matrix& X, const int* desc)
+ {
+ const int GlobalRows = desc[2], GlobalCols = desc[3];
+ const int iONE = 1;
+ int info;
+ std::vector<int> ipiv(desc[8] + desc[4]);
+ PBLAS_FUNC(getrf)(&GlobalRows, &GlobalCols, X, &iONE, &iONE, desc,
+ &ipiv[0], &info);
+ }
};
diff --git a/pblas.py b/pblas.py
index e1ec738..6f0f6cd 100644
--- a/pblas.py
+++ b/pblas.py
@@ -5,7 +5,7 @@ numproc = 4
class Module(btlbase.BTLBase):
def _initialize(self):
self.libname = "scalapack"
- self.avail = ['axpy', 'matrix_vector']
+ self.avail = ['axpy', 'matrix_vector', 'lu_decomp']
def _parse_args(self, args):
# Parse arguments