summaryrefslogtreecommitdiff
path: root/btl
diff options
context:
space:
mode:
authorspiros <andyspiros@gmail.com>2011-07-17 02:10:23 +0200
committerspiros <andyspiros@gmail.com>2011-07-17 02:10:23 +0200
commitb313e5056658a236113931275dd92c2ff366b0ba (patch)
tree21559b074a753e972b8291fc60b15375d554c290 /btl
parentmain.py executable (diff)
downloadauto-numerical-bench-b313e5056658a236113931275dd92c2ff366b0ba.tar.gz
auto-numerical-bench-b313e5056658a236113931275dd92c2ff366b0ba.tar.bz2
auto-numerical-bench-b313e5056658a236113931275dd92c2ff366b0ba.zip
Initial work on distributed-memory benchmarks.
Diffstat (limited to 'btl')
-rw-r--r--btl/actions/action_parallel_matrix_vector_product.hh172
-rw-r--r--btl/generic_bench/bench.hh20
-rw-r--r--btl/generic_bench/timers/portable_perf_analyzer.hh5
-rw-r--r--btl/libs/BLACS/blacs.h33
-rw-r--r--btl/libs/BLACS/blacs_interface.hh94
-rw-r--r--btl/libs/BLACS/gather.h13
-rw-r--r--btl/libs/BLACS/gather_impl.h116
-rw-r--r--btl/libs/BLACS/scatter.h17
-rw-r--r--btl/libs/BLACS/scatter_impl.h109
-rw-r--r--btl/libs/PBLAS/main.cpp32
-rw-r--r--btl/libs/PBLAS/pblas.h28
-rw-r--r--btl/libs/PBLAS/pblas_interface.hh59
-rw-r--r--btl/libs/PBLAS/pblas_interface_impl.hh33
13 files changed, 720 insertions, 11 deletions
diff --git a/btl/actions/action_parallel_matrix_vector_product.hh b/btl/actions/action_parallel_matrix_vector_product.hh
new file mode 100644
index 0000000..c166e01
--- /dev/null
+++ b/btl/actions/action_parallel_matrix_vector_product.hh
@@ -0,0 +1,172 @@
+//=====================================================
+// File : action_matrix_vector_product.hh
+// Author : L. Plagne <laurent.plagne@edf.fr)>
+// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+#ifndef ACTION_MATRIX_VECTOR_PRODUCT
+#define ACTION_MATRIX_VECTOR_PRODUCT
+#include "utilities.h"
+#include "STL_interface.hh"
+#include <string>
+#include "init/init_function.hh"
+#include "init/init_vector.hh"
+#include "init/init_matrix.hh"
+
+using namespace std;
+
+template<class Interface>
+class Action_parallel_matrix_vector_product {
+
+public :
+
+ // Ctor
+
+ BTL_DONT_INLINE Action_parallel_matrix_vector_product( int size ):_size(size)
+ {
+ MESSAGE("Action_parallel_matrix_vector_product Ctor");
+ const int iZERO = 0, iONE = 1;
+
+ GlobalRows = _size;
+ GlobalCols = _size;
+ BlockRows = 2;
+ BlockCols= 2;
+ LocalXCols = 1;
+ LocalYCols = 1;
+
+ int myid, procnum;
+ blacs_pinfo_(&myid, &procnum);
+ iamroot = (myid == 0);
+
+ // STL matrix and vector initialization
+ if (iamroot) {
+ init_vector<pseudo_random>(Global_A_stl, GlobalRows*GlobalCols);
+ init_vector<pseudo_random>(Global_x_stl, GlobalCols);
+ init_vector<null_function>(Global_y_stl, GlobalRows);
+
+ // Compute YTest (?)
+ }
+
+ Interface::scatter_matrix(Global_A_stl, Local_A_stl, GlobalRows, GlobalCols, BlockRows, BlockCols, LocalRows, LocalCols);
+ Interface::scatter_matrix(Global_x_stl, Local_x_stl, GlobalCols, LocalXCols, BlockRows, BlockCols, LocalXRows, LocalXCols);
+ Interface::scatter_matrix(Global_y_stl, Local_y_stl, GlobalRows, LocalYCols, BlockRows, BlockCols, LocalYRows, LocalYCols);
+
+ // generic local matrix and vectors initialization
+
+ Interface::matrix_from_stl(Local_A_ref, Local_A_stl);
+ Interface::matrix_from_stl(Local_A , Local_A_stl);
+ Interface::vector_from_stl(Local_x_ref, Local_x_stl);
+ Interface::vector_from_stl(Local_x , Local_x_stl);
+ Interface::vector_from_stl(Local_y_ref, Local_y_stl);
+ Interface::vector_from_stl(Local_y , Local_y_stl);
+
+ // Descinit
+ int context = Interface::context();
+ int info;
+ descinit_(descA, &GlobalRows, &GlobalCols, &BlockRows, &BlockCols, &iZERO, &iZERO, &context, &LocalRows, &info);
+ descinit_(descX, &GlobalCols, &iONE, &BlockRows, &BlockCols, &iZERO, &iZERO, &context, &LocalXRows, &info);
+ descinit_(descY, &GlobalRows, &iONE, &BlockRows, &BlockCols, &iZERO, &iZERO, &context, &LocalYRows, &info);
+ }
+
+ // invalidate copy ctor
+
+ Action_parallel_matrix_vector_product( const Action_parallel_matrix_vector_product & )
+ {
+ INFOS("illegal call to Action_parallel_matrix_vector_product Copy Ctor");
+ exit(1);
+ }
+
+ // Dtor
+
+ BTL_DONT_INLINE ~Action_parallel_matrix_vector_product( void ){
+
+ MESSAGE("Action_parallel_matrix_vector_product Dtor");
+
+ // deallocation
+
+ Interface::free_matrix(Local_A_ref, GlobalRows*GlobalCols);;
+ Interface::free_vector(Local_x_ref);
+ Interface::free_vector(Local_y_ref);
+
+ Interface::free_matrix(Local_A, GlobalRows*GlobalCols);;
+ Interface::free_vector(Local_x);
+ Interface::free_vector(Local_y);
+
+ }
+
+ // action name
+
+ static inline std::string name( void )
+ {
+ return "parallel_matrix_vector_" + Interface::name();
+ }
+
+ double nb_op_base( void ){
+ return 2.0*_size*_size;
+ }
+
+ BTL_DONT_INLINE void initialize( void ){
+
+ Interface::copy_matrix(Local_A_ref,Local_A,LocalRows*LocalCols);
+ Interface::copy_vector(Local_x_ref,Local_x,LocalXRows*LocalXCols);
+ Interface::copy_vector(Local_y_ref,Local_y,LocalYRows*LocalYCols);
+
+ }
+
+ BTL_DONT_INLINE void calculate( void ) {
+ BTL_ASM_COMMENT("#begin matrix_vector_product");
+ Interface::parallel_matrix_vector_product(GlobalRows, GlobalCols, Local_A, descA, Local_x, descX, Local_y, descY);
+ BTL_ASM_COMMENT("end matrix_vector_product");
+ }
+
+ BTL_DONT_INLINE void check_result( void ){
+
+ // calculation check
+
+ // TODO
+
+ }
+
+private :
+
+ typename Interface::stl_matrix Global_A_stl;
+ typename Interface::stl_vector Global_x_stl;
+ typename Interface::stl_vector Global_y_stl;
+
+ typename Interface::stl_matrix Local_A_stl;
+ typename Interface::stl_vector Local_x_stl;
+ typename Interface::stl_vector Local_y_stl;
+
+ typename Interface::gene_matrix Local_A_ref;
+ typename Interface::gene_vector Local_x_ref;
+ typename Interface::gene_vector Local_y_ref;
+
+ typename Interface::gene_matrix Local_A;
+ typename Interface::gene_vector Local_x;
+ typename Interface::gene_vector Local_y;
+
+ bool iamroot;
+ int _size, GlobalRows, GlobalCols, LocalRows, LocalCols, BlockRows, BlockCols;
+ int LocalXRows, LocalXCols, LocalYRows, LocalYCols;
+ int descA[9], descX[9], descY[9];
+
+};
+
+
+#endif
+
+
+
diff --git a/btl/generic_bench/bench.hh b/btl/generic_bench/bench.hh
index 005c363..d9906a4 100644
--- a/btl/generic_bench/bench.hh
+++ b/btl/generic_bench/bench.hh
@@ -38,14 +38,14 @@ extern "C" void cblas_saxpy(const int, const float, const float*, const int, flo
using namespace std;
template <template<class> class Perf_Analyzer, class Action>
-BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point )
+BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point, bool silent = false )
{
if (BtlConfig::skipAction(Action::name()))
return;
string filename="bench_"+Action::name()+".dat";
- INFOS("starting " <<filename);
+ if (!silent) { INFOS("starting " <<filename); }
// utilities
@@ -65,7 +65,8 @@ BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point )
for (int i=nb_point-1;i>=0;i--)
{
//INFOS("size=" <<tab_sizes[i]<<" ("<<nb_point-i<<"/"<<nb_point<<")");
- std::cout << " " << "size = " << tab_sizes[i] << " " << std::flush;
+ if (!silent)
+ std::cout << " " << "size = " << tab_sizes[i] << " " << std::flush;
BTL_DISABLE_SSE_EXCEPTIONS();
#ifdef HAVE_MKL
@@ -75,14 +76,14 @@ BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point )
}
#endif
- tab_mflops[i] = perf_action.eval_mflops(tab_sizes[i]);
- std::cout << tab_mflops[i];
+ tab_mflops[i] = perf_action.eval_mflops(tab_sizes[i], silent);
+ if (!silent) std::cout << tab_mflops[i];
if (hasOldResults)
{
while (oldi>=0 && oldSizes[oldi]>tab_sizes[i])
--oldi;
- if (oldi>=0 && oldSizes[oldi]==tab_sizes[i])
+ if (oldi>=0 && oldSizes[oldi]==tab_sizes[i] && !silent)
{
if (oldFlops[oldi]<tab_mflops[i])
std::cout << "\t > ";
@@ -92,6 +93,7 @@ BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point )
}
--oldi;
}
+ if (!silent)
std::cout << " MFlops (" << nb_point-i << "/" << nb_point << ")" << std::endl;
}
@@ -144,17 +146,17 @@ BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point )
}
// dump the result in a file :
- dump_xy_file(tab_sizes,tab_mflops,filename);
+ if (!silent) dump_xy_file(tab_sizes,tab_mflops,filename);
}
// default Perf Analyzer
template <class Action>
-BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point ){
+BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point, bool silent ){
// if the rdtsc is not available :
- bench<Portable_Perf_Analyzer,Action>(size_min,size_max,nb_point);
+ bench<Portable_Perf_Analyzer,Action>(size_min,size_max,nb_point,silent);
// if the rdtsc is available :
// bench<Mixed_Perf_Analyzer,Action>(size_min,size_max,nb_point);
diff --git a/btl/generic_bench/timers/portable_perf_analyzer.hh b/btl/generic_bench/timers/portable_perf_analyzer.hh
index fc0f316..161992f 100644
--- a/btl/generic_bench/timers/portable_perf_analyzer.hh
+++ b/btl/generic_bench/timers/portable_perf_analyzer.hh
@@ -38,7 +38,7 @@ public:
MESSAGE("Portable_Perf_Analyzer Dtor");
};
- BTL_DONT_INLINE double eval_mflops(int size)
+ BTL_DONT_INLINE double eval_mflops(int size, bool silent = false)
{
Action action(size);
@@ -56,7 +56,8 @@ public:
for (int i=1; i<BtlConfig::Instance.tries; ++i)
{
Action _action(size);
- std::cout << " " << _action.nb_op_base()*_nb_calc/(m_time_action*1e6) << " ";
+ if (!silent)
+ std::cout << " " << _action.nb_op_base()*_nb_calc/(m_time_action*1e6) << " ";
_action.initialize();
m_time_action = std::min(m_time_action, time_calculate(_action));
}
diff --git a/btl/libs/BLACS/blacs.h b/btl/libs/BLACS/blacs.h
new file mode 100644
index 0000000..f3673d8
--- /dev/null
+++ b/btl/libs/BLACS/blacs.h
@@ -0,0 +1,33 @@
+#ifndef BLACS_H_
+#define BLACS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+ /* BLACS declarations */
+ void blacs_pinfo_(int*, int*);
+ void blacs_get_(int*, int*, int*);
+ void blacs_gridinit_(int*, const char*, int*, int*);
+ void blacs_gridinfo_(const int*, int*, int*, int*, int*);
+ void blacs_pcoord_(const int*, const int*, int*, int*);
+ void blacs_gridexit_(int*);
+ void blacs_exit_(int*);
+ void blacs_barrier_(const int*, const char*);
+ void dgerv2d_(const int*, const int*, const int*, double*, const int*, const int*, const int*);
+ void dgesd2d_(const int*, const int*, const int*, const double*, const int*, const int*, const int*);
+ void sgerv2d_(const int*, const int*, const int*, float*, const int*, const int*, const int*);
+ void sgesd2d_(const int*, const int*, const int*, const float*, const int*, const int*, const int*);
+ void igebs2d_(const int*, const char*, const char*, const int*, const int*, const int*, const int*);
+ void igebr2d_(const int*, const char*, const char*, const int*, const int*, int*, const int*, const int*, const int*);
+ void dgebs2d_(const int*, const char*, const char*, const int*, const int*, const double*, const int*);
+ void dgebr2d_(const int*, const char*, const char*, const int*, const int*, double*, const int*, const int*, const int*);
+ void dgsum2d_(const int*, const char*, const char*, const int*, const int*, double*, const int*, const int*, const int*);
+ void igsum2d_(const int*, const char*, const char*, const int*, const int*, int*, const int*, const int*, const int*);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* BLACS_H_ */
diff --git a/btl/libs/BLACS/blacs_interface.hh b/btl/libs/BLACS/blacs_interface.hh
new file mode 100644
index 0000000..00aafee
--- /dev/null
+++ b/btl/libs/BLACS/blacs_interface.hh
@@ -0,0 +1,94 @@
+#ifndef BTL_BLACS_INTERFACE_H
+#define BTL_BLACS_INTERFACE_H
+
+#include <vector>
+#include "blacs.h"
+#include "scatter.h"
+#include "gather.h"
+
+template<typename real>
+class blacs_interface
+{
+
+public:
+ typedef real real_type;
+ typedef std::vector<real_type> stl_vector;
+ typedef stl_vector stl_matrix;
+
+ typedef real* gene_matrix;
+ typedef real* gene_vector;
+
+
+ static void free_matrix(gene_matrix & A, int N){
+ delete A;
+ }
+
+ static void free_vector(gene_vector & B){
+ delete B;
+ }
+
+ static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){
+ int N = A_stl.size();
+ A = new real[N];
+ for (int j=0;j<N;j++)
+ A[j] = A_stl[j];
+ }
+
+ static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){
+ int N = B_stl.size();
+ B = new real[N];
+ for (int i=0;i<N;i++)
+ B[i] = B_stl[i];
+ }
+
+ static inline void vector_to_stl(gene_vector & B, stl_vector & B_stl){
+ int N = B_stl.size();
+ for (int i=0;i<N;i++)
+ B_stl[i] = B[i];
+ }
+
+ static inline void matrix_to_stl(gene_matrix & A, stl_matrix & A_stl){
+ int N = A_stl.size();
+ for (int i=0;i<N;i++)
+ A_stl[i] = A[i];
+ }
+
+ static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){
+ for (int i=0;i<N;i++)
+ cible[i]=source[i];
+ }
+
+ static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
+ for (int i=0;i<N;i++)
+ cible[i]=source[i];
+ }
+
+
+public:
+ static int context() {
+ int ctxt, ignored, what = 0;
+ blacs_get_(&ignored, &what, &ctxt);
+ return ctxt;
+ }
+
+
+ static void scatter_matrix(const stl_vector& GlobalMatrix, stl_vector& LocalMatrix,
+ int& GlobalRows, int& GlobalCols,
+ int& BlockRows, int& BlockCols,
+ int& LocalRows, int& LocalCols
+ ) {
+ scatter(context(), GlobalMatrix, LocalMatrix, GlobalRows, GlobalCols, BlockRows, BlockCols, LocalRows, LocalCols);
+ }
+
+ static void gather_matrix(stl_vector& GlobalMatrix, const stl_vector& LocalMatrix,
+ int& GlobalRows, int& GlobalCols,
+ int& BlockRows, int& BlockCols,
+ int& LocalRows, int& LocalCols
+ ) {
+ gather(context(), GlobalMatrix, LocalMatrix, GlobalRows, GlobalCols, BlockRows, BlockCols, LocalRows, LocalCols);
+ }
+
+
+};
+
+#endif /* BTL_BLACS_INTERFACE_H */
diff --git a/btl/libs/BLACS/gather.h b/btl/libs/BLACS/gather.h
new file mode 100644
index 0000000..101d975
--- /dev/null
+++ b/btl/libs/BLACS/gather.h
@@ -0,0 +1,13 @@
+#ifndef GATHER_H_
+#define GATHER_H_
+
+//#define TYPENAME float
+//#define TYPEPREFIX s
+//#include "gather_impl.h"
+
+#define TYPENAME double
+#define TYPEPREFIX d
+#include "gather_impl.h"
+
+
+#endif /* GATHER_H_ */
diff --git a/btl/libs/BLACS/gather_impl.h b/btl/libs/BLACS/gather_impl.h
new file mode 100644
index 0000000..7683454
--- /dev/null
+++ b/btl/libs/BLACS/gather_impl.h
@@ -0,0 +1,116 @@
+#define PRFX d
+#define CAT_(x,y) x##y
+#define CAT(x,y) CAT_(x,y)
+
+#ifndef TYPENAME
+# define TYPENAME double
+# define TYPEPREFIX d
+#endif
+
+#define FUNCNAME(name) CAT(CAT(TYPEPREFIX, name),_)
+#define vector_t std::vector<TYPENAME>
+
+#include <vector>
+
+
+inline void gather(
+ const int& context, // [IN]
+ vector_t& GlobalMatrixVector, // [OUT] Only relevant for root
+ const vector_t& LocalMatrixVector, // [IN]
+ int& GlobalRows, // [OUT]
+ int& GlobalCols, // [OUT]
+ int& BlockRows, // [IN (root) / OUT (other)]
+ int& BlockCols, // [IN (root) / OUT (other)]
+ int& LocalRows, // [IN]
+ int& LocalCols, // [IN]
+ const int& rootrow = 0, // [IN]
+ const int& rootcol = 0 // [IN]
+) {
+ /* Helper variables */
+ int iONE = 1, iTWO = 2, imONE = -1;
+
+ int myid, myrow, mycol, procrows, proccols, procnum;
+ blacs_pinfo_(&myid, &procnum);
+ blacs_gridinfo_(&context, &procrows, &proccols, &myrow, &mycol);
+ bool iamroot = (myrow == rootrow && mycol == rootcol);
+ double *GlobalMatrix;
+ const double *LocalMatrix = &LocalMatrixVector[0];
+
+ /* Broadcast matrix info */
+ int binfo[2];
+ if (iamroot) {
+ binfo[0] = BlockRows;
+ binfo[1] = BlockCols;
+
+ igebs2d_(&context, "All", " ", &iTWO, &iONE, binfo, &iTWO);
+ } else {
+ igebr2d_(&context, "All", " ", &iTWO, &iONE, binfo, &iTWO,
+ &rootrow, &rootcol);
+ }
+ BlockRows = binfo[0];
+ BlockCols = binfo[1];
+
+ /* Retrieve matrix global dimensions */
+ int minfo[2];
+ minfo[0] = LocalRows; minfo[1] = LocalCols;
+ igsum2d_(&context, "Col", " ", &iONE, &iONE, minfo, &iONE, &imONE, &imONE);
+ igsum2d_(&context, "Row", " ", &iONE, &iONE, minfo+1, &iONE, &imONE, &imONE);
+ if(iamroot) {
+ GlobalRows = minfo[0]; GlobalCols = minfo[1];
+ }
+
+
+ /* Reserve space on root */
+ if (iamroot) {
+ GlobalMatrixVector.resize(GlobalRows*GlobalCols);
+ GlobalMatrix = &GlobalMatrixVector[0];
+ }
+
+ /* Gather matrix */
+ int srcr = 0, srcc = 0;
+ int SendRows, SendCols;
+ int StartRow = 0, StartCol = 0;
+ for (int r = 0; r < GlobalRows; r += BlockRows, srcr=(srcr+1)%procrows) {
+ srcc = 0;
+
+ // Is this the last row bloc?
+ SendRows = BlockRows;
+ if (GlobalRows-r < BlockRows)
+ SendRows = GlobalRows-r;
+ if (SendRows <= 0)
+ SendRows = 0;
+
+ for (int c=0; c<GlobalCols; c+=BlockCols, srcc=(srcc+1)%proccols) {
+
+ // Is this the last column block?
+ SendCols = BlockCols;
+ if (GlobalCols-c < BlockCols)
+ SendCols = GlobalCols-c;
+
+ // Send data
+ if (myrow == srcr && mycol == srcc) {
+ FUNCNAME(gesd2d) (&context, &SendRows, &SendCols,
+ LocalMatrix+LocalRows*StartCol+StartRow,
+ &LocalRows, &rootrow, &rootcol
+ );
+
+ // Adjust the next starting column
+ StartCol = (StartCol + SendCols) % LocalCols;
+ }
+
+ // Receive data
+ if (iamroot) {
+ FUNCNAME(gerv2d) (&context, &SendRows, &SendCols,
+ GlobalMatrix + GlobalRows*c + r,
+ &GlobalRows, &srcr, &srcc
+ );
+ }
+ }
+
+ // Adjust the next starting row
+ if (myrow == srcr)
+ StartRow = (StartRow + SendRows) % LocalRows;
+
+ }
+
+}
diff --git a/btl/libs/BLACS/scatter.h b/btl/libs/BLACS/scatter.h
new file mode 100644
index 0000000..5e6a76c
--- /dev/null
+++ b/btl/libs/BLACS/scatter.h
@@ -0,0 +1,17 @@
+#ifndef SCATTER_H_
+#define SCATTER_H_
+
+#define TYPENAME float
+#define TYPEPREFIX s
+#include "scatter_impl.h"
+#undef TYPENAME
+#undef TYPEPREFIX
+
+#define TYPENAME double
+#define TYPEPREFIX d
+#include "scatter_impl.h"
+#undef TYPENAME
+#undef TYPEPREFIX
+
+
+#endif /* SCATTER_H_ */
diff --git a/btl/libs/BLACS/scatter_impl.h b/btl/libs/BLACS/scatter_impl.h
new file mode 100644
index 0000000..5c82ea5
--- /dev/null
+++ b/btl/libs/BLACS/scatter_impl.h
@@ -0,0 +1,109 @@
+#define CAT_(x,y) x##y
+#define CAT(x,y) CAT_(x,y)
+
+//#ifndef TYPENAME
+//# define TYPENAME double
+//# define TYPEPREFIX d
+//#endif
+
+#define FUNCNAME(name) CAT(CAT(TYPEPREFIX, name),_)
+#define vector_t std::vector<TYPENAME>
+
+#include <vector>
+
+inline void scatter(
+ const int& context, // [IN]
+ const vector_t& GlobalMatrixVector, // [IN] Only relevant for root
+ vector_t& LocalMatrixVector, // [OUT] The space is reserved here
+ int& GlobalRows, // [IN (root) / OUT (other)]
+ int& GlobalCols, // [IN (root) / OUT (other)]
+ int& BlockRows, // [IN (root) / OUT (other)]
+ int& BlockCols, // [IN (root) / OUT (other)]
+ int& LocalRows, // [OUT]
+ int& LocalCols, // [OUT]
+ const int& rootrow = 0, // [IN]
+ const int& rootcol = 0 // [IN]
+) {
+ /* Helper variables */
+ int iZERO = 0, iONE = 1, iFOUR = 4;
+
+ int myid, myrow, mycol, procrows, proccols, procnum;
+ blacs_pinfo_(&myid, &procnum);
+ blacs_gridinfo_(&context, &procrows, &proccols, &myrow, &mycol);
+ bool iamroot = (myrow == rootrow && mycol == rootcol);
+ const TYPENAME *GlobalMatrix = &GlobalMatrixVector[0];
+ TYPENAME *LocalMatrix;
+
+ /* Broadcast matrix info */
+ int minfo[4];
+ if (iamroot) {
+ minfo[0] = GlobalRows;
+ minfo[1] = GlobalCols;
+ minfo[2] = BlockRows;
+ minfo[3] = BlockCols;
+ igebs2d_(&context, "All", " ", &iFOUR, &iONE, minfo, &iFOUR);
+ } else {
+ igebr2d_(&context, "All", " ", &iFOUR, &iONE, minfo, &iFOUR,
+ &iZERO, &iZERO);
+ }
+
+ GlobalRows = minfo[0];
+ GlobalCols = minfo[1];
+ BlockRows = minfo[2];
+ BlockCols = minfo[3];
+
+
+ /* Reserve space */
+ LocalRows = numroc_(&GlobalRows, &BlockRows, &myrow, &iZERO, &procrows);
+ LocalCols = numroc_(&GlobalCols, &BlockCols, &mycol, &iZERO, &proccols);
+ LocalMatrixVector.resize(LocalRows*LocalCols);
+ LocalMatrix = &LocalMatrixVector[0];
+
+ /* Scatter matrix */
+ int destr = 0, destc = 0;
+ int SendRows, SendCols;
+ int RecvRow = 0, RecvCol = 0;
+ for (int r = 0; r < GlobalRows; r += BlockRows, destr=(destr+1)%procrows) {
+ destc = 0;
+
+ // Is this the last row bloc?
+ SendRows = BlockRows;
+ if (GlobalRows-r < BlockRows)
+ SendRows = GlobalRows-r;
+ if (SendRows <= 0)
+ SendRows = 0;
+
+ for (int c=0; c<GlobalCols; c+=BlockCols, destc=(destc+1)%proccols) {
+
+ // Is this the last column block?
+ SendCols = BlockCols;
+ if (GlobalCols-c < BlockCols)
+ SendCols = GlobalCols-c;
+
+ // Send data
+ if (iamroot) {
+ FUNCNAME(gesd2d) (&context, &SendRows, &SendCols,
+ GlobalMatrix + GlobalRows*c + r,
+ &GlobalRows, &destr, &destc
+ );
+ }
+
+ // Rerceive data
+ if (myrow == destr && mycol == destc) {
+ FUNCNAME(gerv2d) (&context, &SendRows, &SendCols,
+ LocalMatrix+LocalRows*RecvCol+RecvRow,
+ &LocalRows, &rootrow, &rootcol
+ );
+
+ // Adjust the next starting column
+ RecvCol = (RecvCol + SendCols) % LocalCols;
+ }
+ }
+
+ // Adjust the next starting row
+ if (myrow == destr)
+ RecvRow = (RecvRow + SendRows) % LocalRows;
+
+ }
+
+}
diff --git a/btl/libs/PBLAS/main.cpp b/btl/libs/PBLAS/main.cpp
new file mode 100644
index 0000000..888f123
--- /dev/null
+++ b/btl/libs/PBLAS/main.cpp
@@ -0,0 +1,32 @@
+#include "mpi.h"
+#include "utilities.h"
+#include "bench.hh"
+
+#include <iostream>
+using namespace std;
+
+#include "pblas_interface.hh"
+#include "action_parallel_matrix_vector_product.hh"
+
+#include <string>
+
+BTL_MAIN;
+
+int main(int argc, char **argv)
+{
+ MPI_Init(&argc, &argv);
+
+ int myid, numproc, context, procrows = 2, proccols = 2, iZERO = 0;
+ blacs_pinfo_(&myid, &numproc);
+ blacs_get_(&iZERO, &iZERO, &context);
+ blacs_gridinit_(&context, "Row-major", &procrows, &proccols);
+ bool iamroot = (myid == 0);
+
+ bench<Action_parallel_matrix_vector_product<pblas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT,!iamroot);
+
+// blacs_exit_(&iZERO);
+ MPI_Finalize();
+ return 0;
+}
+
+
diff --git a/btl/libs/PBLAS/pblas.h b/btl/libs/PBLAS/pblas.h
new file mode 100644
index 0000000..4144292
--- /dev/null
+++ b/btl/libs/PBLAS/pblas.h
@@ -0,0 +1,28 @@
+#ifndef SCALAPACK_H_
+#define SCALAPACK_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+ /* PBLAS declarations */
+ void pdgemv_(const char*, const int*, const int*,
+ const double*, const double*, const int*, const int*, const int*,
+ const double*, const int*, const int*, const int*, const int*,
+ const double*, double*, const int*, const int*, const int*, const int*);
+ void psgemv_(const char*, const int*, const int*,
+ const float*, const float*, const int*, const int*, const int*,
+ const float*, const int*, const int*, const int*, const int*,
+ const float*, float*, const int*, const int*, const int*, const int*);
+
+
+ int numroc_(const int*, const int*, const int*, const int*, const int*);
+ int descinit_(const int*, const int*, const int*, const int*, const int*, const int*, const int*, const int*, const int*, int*);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+
+#endif /* SCALAPACK_H_ */
diff --git a/btl/libs/PBLAS/pblas_interface.hh b/btl/libs/PBLAS/pblas_interface.hh
new file mode 100644
index 0000000..b969e4e
--- /dev/null
+++ b/btl/libs/PBLAS/pblas_interface.hh
@@ -0,0 +1,59 @@
+//=====================================================
+// File : blas_interface.hh
+// Author : L. Plagne <laurent.plagne@edf.fr)>
+// Copyright (C) EDF R&D, lun sep 30 14:23:28 CEST 2002
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+
+#include "pblas.h"
+#include "blacs_interface.hh"
+
+#define MAKE_STRING2(S) #S
+#define MAKE_STRING(S) MAKE_STRING2(S)
+
+#ifndef CAT
+#define CAT2(A,B) A##B
+#define CAT(A,B) CAT2(A,B)
+#endif
+
+
+template<class real> class pblas_interface;
+
+
+static char notrans = 'N';
+static char trans = 'T';
+static char nonunit = 'N';
+static char lower = 'L';
+static char right = 'R';
+static char left = 'L';
+static int intone = 1;
+
+
+#define SCALAR float
+#define SCALAR_PREFIX s
+#include "pblas_interface_impl.hh"
+#undef SCALAR
+#undef SCALAR_PREFIX
+
+
+#define SCALAR double
+#define SCALAR_PREFIX d
+#include "pblas_interface_impl.hh"
+#undef SCALAR
+#undef SCALAR_PREFIX
+
+
+
diff --git a/btl/libs/PBLAS/pblas_interface_impl.hh b/btl/libs/PBLAS/pblas_interface_impl.hh
new file mode 100644
index 0000000..14b27df
--- /dev/null
+++ b/btl/libs/PBLAS/pblas_interface_impl.hh
@@ -0,0 +1,33 @@
+
+#define PBLAS_PREFIX p
+#define PBLAS_FUNC(NAME) CAT(CAT(CAT(PBLAS_PREFIX, SCALAR_PREFIX),NAME),_)
+
+template<> class pblas_interface<SCALAR> : public blacs_interface<SCALAR>
+{
+public:
+ static inline std::string name()
+ {
+ return MAKE_STRING(PBLASNAME);
+ }
+
+ static inline void parallel_matrix_vector_product(
+ int GlobalRows, int GlobalCols,
+ gene_matrix& A, int *descA,
+ gene_vector& x, int *descX,
+ gene_vector& y, int *descY
+ )
+ {
+ real_type alpha = 1., beta = 0.;
+ int iONE = 1;
+ int myid, procnum;
+ blacs_pinfo_(&myid, &procnum);
+
+ PBLAS_FUNC(gemv)(&notrans, &GlobalRows, &GlobalCols,
+ &alpha, A, &iONE, &iONE, descA,
+ x, &iONE, &iONE, descX, &iONE,
+ &beta, y, &iONE, &iONE, descY, &iONE
+ );
+
+ }
+};
+