#include "mpi.h"
#include "Common.h"
#include "mfmemory.h"
#include "wrapperMPI.h"
Functions
void	zgemm_ (char TRANSA, char TRANSB, int M, int N, int K, double complex ALPHA, double complex matJL, int LDA, double complex arrayz, int LDB, double complex BETA, double complex arrayx, int *LDC)

void	child_general_int_spin_MPIBoost (struct BindStruct X, double complex tmp_v0, double complex tmp_v1, double complex tmp_v2, double complex *tmp_v3)
Function Documentation

◆ child_general_int_spin_MPIBoost()

void child_general_int_spin_MPIBoost	(	struct BindStruct *	X,
		double complex *	tmp_v0,
		double complex *	tmp_v1,
		double complex *	tmp_v2,
		double complex *	tmp_v3
	)
Exchange term in Spin model
Author: Mitsuaki Kawamura (The University of Tokyo); Youhei Yamaji (The University of Tokyo)
Parameters
[in,out]	X
[out]	tmp_v0	Result v0 = H v1
[in]	tmp_v1	v0 = H v1
[in,out]	tmp_v2	bufffer
[in,out]	tmp_v3	bufffer
Definition at line 36 of file mltplyMPIBoost.c.
References nproc, X, and zgemm_().
Referenced by mltplySpinGCBoost().
 {
 #ifdef MPI
   
   //double complex dam_pr = 0;
   // MPI_Status statusMPI;
 
   //  int ierr;
   //  int INFO;
   char TRANSA, TRANSB;
   int M, N, K, LDA, LDB, LDC;
   double complex ALPHA, BETA;  
   long unsigned int i_max;
   long unsigned int j, k, ell, iloop;
   long unsigned int i1, i2;
   long unsigned int iomp;
   long unsigned int ell4, ell5, ell6, m0, Ipart1;
   long unsigned int mi, mj, mri, mrj, mrk, mrl;
   int indj;
   long unsigned int ellrl, ellrk, ellrj, ellri, elli1, elli2, ellj1, ellj2;
   long unsigned int iSS1, iSS2, iSSL1, iSSL2;
   double complex **vecJ;
   double complex **matJ, **matJ2;
   double complex *matJL;
   double complex *matI;
   double complex **matB;
   double complex *arrayz;
   double complex *arrayx;
   double complex *arrayw;
   long unsigned int ishift1, ishift2, ishift3, ishift4, ishift5, pivot_flag, num_J_star;
   long unsigned int pow4, pow5, pow41, pow51;  
   //long unsigned int pow1, pow2, pow3, pow4, pow5, pow11, pow21, pow31, pow41, pow51; 
 
   i_max = X->Check.idim_max;
 
 /*
 //zero clear
   #pragma omp parallel for default(none) private(j) \
   shared(i_max,tmp_v0)
   for(j=0;j<i_max;j++){
     tmp_v0[j+1]=0.0;
   }
 */
 
   c_malloc2(vecJ, 3, 3); 
   c_malloc2(matJ, 4, 4); 
   c_malloc2(matJ2, 4, 4); 
   c_malloc2(matB, 2, 2); 
   c_malloc1(matJL, (64*64)); 
   c_malloc1(matI, (64*64)); 
  
 //  c_malloc1(arrayx, (64*((int)pow(2.0, 16))));
 //  c_malloc1(arrayz, (64*((int)pow(2.0, 16))));
 //  c_malloc1(arrayw, (64*((int)pow(2.0, 16))));
  
   //defmodelBoost(X->Boost.W0, X->Boost.R0, X->Boost.num_pivot, X->Boost.ishift_nspin, X->Boost.list_6spin_star, X->Boost.list_6spin_pair, 1, X->Boost.arrayJ, X->Boost.vecB);
   
   for(iloop=0; iloop < X->Boost.R0; iloop++){
 
 
     for(j=iloop*X->Boost.num_pivot; j < (iloop+1)*X->Boost.num_pivot; j++){
       
       num_J_star = (long unsigned int)X->Boost.list_6spin_star[j][0]; //(0,j) 
       ishift1    = (long unsigned int)X->Boost.list_6spin_star[j][1]; //(1,j) 
       ishift2    = (long unsigned int)X->Boost.list_6spin_star[j][2]; //(2,j) 
       ishift3    = (long unsigned int)X->Boost.list_6spin_star[j][3]; //(3,j)
       ishift4    = (long unsigned int)X->Boost.list_6spin_star[j][4]; //(4,j)
       ishift5    = (long unsigned int)X->Boost.list_6spin_star[j][5]; //(5,j)
       pivot_flag = (long unsigned int)X->Boost.list_6spin_star[j][6]; //(6,j)
       //pow1 = (int)pow(2.0,ishift1);
       //pow2 = (int)pow(2.0,ishift1+ishift2);
       //pow3 = (int)pow(2.0,ishift1+ishift2+ishift3);
       pow4 = (int)pow(2.0,ishift1+ishift2+ishift3+ishift4);
       pow5 = (int)pow(2.0,ishift1+ishift2+ishift3+ishift4+ishift5);
       //pow11= (int)pow(2.0,ishift1+1);
       //pow21= (int)pow(2.0,ishift1+ishift2+1);
       //pow31= (int)pow(2.0,ishift1+ishift2+ishift3+1);
       pow41= (int)pow(2.0,ishift1+ishift2+ishift3+ishift4+1);
       pow51= (int)pow(2.0,ishift1+ishift2+ishift3+ishift4+ishift5+1);
 
       for(k=0; k < (64*64); k++){
         matJL[k] = 0.0 + 0.0*I;
         matI[k]  = 0.0 + 0.0*I;
       }
       for(k=0; k < 64; k++){
         matI[k+64*k] = 1.0;
       }
 
       for(ell=0; ell < num_J_star; ell++){
         mi   = (long unsigned int)X->Boost.list_6spin_pair[j][0][ell]; //(1,ell,j)
         mj   = (long unsigned int)X->Boost.list_6spin_pair[j][1][ell]; //(2,ell,j)
         mri  = (long unsigned int)X->Boost.list_6spin_pair[j][2][ell]; //(3,ell,j)
         mrj  = (long unsigned int)X->Boost.list_6spin_pair[j][3][ell]; //(4,ell,j)
         mrk  = (long unsigned int)X->Boost.list_6spin_pair[j][4][ell]; //(5,ell,j)
         mrl  = (long unsigned int)X->Boost.list_6spin_pair[j][5][ell]; //(6,ell,j)
         indj = X->Boost.list_6spin_pair[j][6][ell]; //(7,ell,j)
         for(i1 = 0; i1 < 3; i1++){
           for(i2 = 0; i2 < 3; i2++){
             vecJ[i1][i2] = X->Boost.arrayJ[(indj-1)][i1][i2];
           }
         } 
         //matJSS(1,1) = vecJ(3,3)
         matJ[0][0] = vecJ[2][2];
         //matJSS(1,2)= vecJ(1,1)-vecJ(2,2)-dcmplx(0.0d0,1.0d0)*vecJ(1,2)-dcmplx(0.0d0,1.0d0)*vecJ(2,1)
         matJ[0][1] = vecJ[0][0]-vecJ[1][1]-I*vecJ[0][1]-I*vecJ[1][0];
         //matJSS(1,3)= vecJ(3,1)-dcmplx(0.0d0,1.0d0)*vecJ(3,2)
         matJ[0][2] = vecJ[2][0]-I*vecJ[2][1];
         //matJSS(1,4)= vecJ(1,3)-dcmplx(0.0d0,1.0d0)*vecJ(2,3)
         matJ[0][3] = vecJ[0][2]-I*vecJ[1][2];
         //matJSS(2,1)= vecJ(1,1)-vecJ(2,2)+dcmplx(0.0d0,1.0d0)*vecJ(1,2)+dcmplx(0.0d0,1.0d0)*vecJ(2,1)
         matJ[1][0] = vecJ[0][0]-vecJ[1][1]+I*vecJ[0][1]+I*vecJ[1][0];
         //matJSS(2,2)= vecJ(3,3)
         matJ[1][1] = vecJ[2][2];
         //matJSS(2,3)=dcmplx(-1.0d0,0.0d0)*vecJ(1,3)-dcmplx(0.0d0,1.0d0)*vecJ(2,3)
         matJ[1][2] =(-1.0)*vecJ[0][2]-I*vecJ[1][2];
         //matJSS(2,4)=dcmplx(-1.0d0,0.0d0)*vecJ(3,1)-dcmplx(0.0d0,1.0d0)*vecJ(3,2)
         matJ[1][3] =(-1.0)*vecJ[2][0]-I*vecJ[2][1];
         //matJSS(3,1)= vecJ(3,1)+dcmplx(0.0d0,1.0d0)*vecJ(3,2)
         matJ[2][0] = vecJ[2][0]+I*vecJ[2][1];
         //matJSS(3,2)=dcmplx(-1.0d0,0.0d0)*vecJ(1,3)+dcmplx(0.0d0,1.0d0)*vecJ(2,3)
         matJ[2][1] =(-1.0)*vecJ[0][2]+I*vecJ[1][2];
         //matJSS(3,3)=dcmplx(-1.0d0,0.0d0)*vecJ(3,3)
         matJ[2][2] =(-1.0)*vecJ[2][2];
         //matJSS(3,4)= vecJ(1,1)+vecJ(2,2)+dcmplx(0.0d0,1.0d0)*vecJ(1,2)-dcmplx(0.0d0,1.0d0)*vecJ(2,1)
         matJ[2][3] = vecJ[0][0]+vecJ[1][1]+I*vecJ[0][1]-I*vecJ[1][0];
         //matJSS(4,1)= vecJ(1,3)+dcmplx(0.0d0,1.0d0)*vecJ(2,3)
         matJ[3][0] = vecJ[0][2]+I*vecJ[1][2];
         //matJSS(4,2)=dcmplx(-1.0d0,0.0d0)*vecJ(3,1)+dcmplx(0.0d0,1.0d0)*vecJ(3,2)
         matJ[3][1] =(-1.0)*vecJ[2][0]+I*vecJ[2][1];
         //matJSS(4,3)= vecJ(1,1)+vecJ(2,2)-dcmplx(0.0d0,1.0d0)*vecJ(1,2)+dcmplx(0.0d0,1.0d0)*vecJ(2,1)
         matJ[3][2] = vecJ[0][0]+vecJ[1][1]-I*vecJ[0][1]+I*vecJ[1][0];
         //matJSS(4,4)=dcmplx(-1.0d0,0.0d0)*vecJ(3,3)
         matJ[3][3] =(-1.0)*vecJ[2][2];
         
         matJ2[3][3] = matJ[0][0]; 
         matJ2[3][0] = matJ[0][1]; 
         matJ2[3][1] = matJ[0][2]; 
         matJ2[3][2] = matJ[0][3]; 
         matJ2[0][3] = matJ[1][0]; 
         matJ2[0][0] = matJ[1][1]; 
         matJ2[0][1] = matJ[1][2]; 
         matJ2[0][2] = matJ[1][3]; 
         matJ2[1][3] = matJ[2][0]; 
         matJ2[1][0] = matJ[2][1];
         matJ2[1][1] = matJ[2][2]; 
         matJ2[1][2] = matJ[2][3]; 
         matJ2[2][3] = matJ[3][0]; 
         matJ2[2][0] = matJ[3][1]; 
         matJ2[2][1] = matJ[3][2]; 
         matJ2[2][2] = matJ[3][3]; 
 
         for(ellri=0; ellri<2; ellri++){
         for(ellrj=0; ellrj<2; ellrj++){
         for(ellrk=0; ellrk<2; ellrk++){
         for(ellrl=0; ellrl<2; ellrl++){
           for(elli1=0; elli1<2; elli1++){
           for(ellj1=0; ellj1<2; ellj1++){
           for(elli2=0; elli2<2; elli2++){
           for(ellj2=0; ellj2<2; ellj2++){
             
             iSSL1 = elli1*(int)pow(2,mi) + ellj1*(int)pow(2,mj) + ellri*(int)pow(2,mri) + ellrj*(int)pow(2,mrj) + ellrk*(int)pow(2,mrk) + ellrl*(int)pow(2,mrl);
             iSSL2 = elli2*(int)pow(2,mi) + ellj2*(int)pow(2,mj) + ellri*(int)pow(2,mri) + ellrj*(int)pow(2,mrj) + ellrk*(int)pow(2,mrk) + ellrl*(int)pow(2,mrl);
             iSS1  = elli1 + 2*ellj1;
             iSS2  = elli2 + 2*ellj2;
             matJL[iSSL1+64*iSSL2] += matJ2[iSS1][iSS2];
           }
           }
           }
           }
         }
         }
         }
         }
 
         
       }/* loop for ell */
 
       /* external magnetic field B */
       if(pivot_flag==1){
         matB[0][0] = + X->Boost.vecB[2]; // -BM
         matB[1][1] = - X->Boost.vecB[2]; // -BM
         //matB[0][1] = - X->Boost.vecB[0] + I*X->Boost.vecB[1]; // -BM
         //matB[1][0] = - X->Boost.vecB[0] - I*X->Boost.vecB[1]; // -BM
         matB[0][1] = - X->Boost.vecB[0] - I*X->Boost.vecB[1]; // -BM
         matB[1][0] = - X->Boost.vecB[0] + I*X->Boost.vecB[1]; // -BM
         for(ellri=0; ellri<2; ellri++){
         for(ellrj=0; ellrj<2; ellrj++){
         for(ellrk=0; ellrk<2; ellrk++){
         for(ellrl=0; ellrl<2; ellrl++){
         for(ellj1=0; ellj1<2; ellj1++){
           for(elli1=0; elli1<2; elli1++){
           for(elli2=0; elli2<2; elli2++){
             for(ellj2=0; ellj2<X->Boost.ishift_nspin; ellj2++){
               iSSL1 = elli1*(int)pow(2,ellj2) + ellj1*(int)pow(2,((ellj2+1)%6)) + ellri*(int)pow(2,((ellj2+2)%6)) + ellrj*(int)pow(2,((ellj2+3)%6)) + ellrk*(int)pow(2,((ellj2+4)%6)) + ellrl*(int)pow(2,((ellj2+5)%6));
               iSSL2 = elli2*(int)pow(2,ellj2) + ellj1*(int)pow(2,((ellj2+1)%6)) + ellri*(int)pow(2,((ellj2+2)%6)) + ellrj*(int)pow(2,((ellj2+3)%6)) + ellrk*(int)pow(2,((ellj2+4)%6)) + ellrl*(int)pow(2,((ellj2+5)%6));
               matJL[iSSL1+64*iSSL2] += matB[elli1][elli2];
             }
           } 
           } 
         }
         }
         }
         }
         }
       }
       /* external magnetic field B */
     
       iomp=i_max/(int)pow(2.0,ishift1+ishift2+ishift3+ishift4+ishift5+2);
 
       #pragma omp parallel default(none) private(arrayx,arrayz,arrayw,ell4,ell5,ell6,m0,Ipart1,TRANSA,TRANSB,M,N,K,LDA,LDB,LDC,ALPHA,BETA) \
       shared(matJL,matI,iomp,i_max,myrank,ishift1,ishift2,ishift3,ishift4,ishift5,pow4,pow5,pow41,pow51,tmp_v0,tmp_v1,tmp_v3)
       {
 
             c_malloc1(arrayx, (64*((int)pow(2.0,ishift4+ishift5-1))));
             c_malloc1(arrayz, (64*((int)pow(2.0,ishift4+ishift5-1))));
             c_malloc1(arrayw, (64*((int)pow(2.0,ishift4+ishift5-1))));
 
 #pragma omp for
         for(ell6 = 0; ell6 < iomp; ell6++){
           Ipart1=pow51*2*ell6;
           for(ell5 = 0; ell5 < (int)pow(2.0, ishift5-1); ell5++){
         for(ell4 = 0; ell4 < (int)pow(2.0, ishift4-1); ell4++){
           for(m0 = 0; m0 < 16; m0++){   
         arrayz[(0 + m0 +64*(ell4+ell5*(int)pow(2.0,ishift4-1)))] = tmp_v1[(1 + m0+16*ell4          +pow41*ell5+Ipart1)];
         arrayz[(16+ m0 +64*(ell4+ell5*(int)pow(2.0,ishift4-1)))] = tmp_v1[(1 + m0+16*ell4+pow4     +pow41*ell5+Ipart1)];
         arrayz[(32+ m0 +64*(ell4+ell5*(int)pow(2.0,ishift4-1)))] = tmp_v1[(1 + m0+16*ell4+pow5     +pow41*ell5+Ipart1)];
         arrayz[(48+ m0 +64*(ell4+ell5*(int)pow(2.0,ishift4-1)))] = tmp_v1[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+Ipart1)];
         tmp_v3[(1 + m0+16*ell4          +pow41*ell5+Ipart1)]=tmp_v1[(1 + m0+16*ell4          +pow41*ell5+Ipart1)];
         tmp_v3[(1 + m0+16*ell4+pow4     +pow41*ell5+Ipart1)]=tmp_v1[(1 + m0+16*ell4+pow4     +pow41*ell5+Ipart1)];
         tmp_v3[(1 + m0+16*ell4+pow5     +pow41*ell5+Ipart1)]=tmp_v1[(1 + m0+16*ell4+pow5     +pow41*ell5+Ipart1)];
         tmp_v3[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+Ipart1)]=tmp_v1[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+Ipart1)];
         arrayx[(0 + m0 +64*(ell4+ell5*(int)pow(2.0,ishift4-1)))] = tmp_v0[(1 + m0+16*ell4          +pow41*ell5+Ipart1)];
         arrayx[(16+ m0 +64*(ell4+ell5*(int)pow(2.0,ishift4-1)))] = tmp_v0[(1 + m0+16*ell4+pow4     +pow41*ell5+Ipart1)];
         arrayx[(32+ m0 +64*(ell4+ell5*(int)pow(2.0,ishift4-1)))] = tmp_v0[(1 + m0+16*ell4+pow5     +pow41*ell5+Ipart1)];
         arrayx[(48+ m0 +64*(ell4+ell5*(int)pow(2.0,ishift4-1)))] = tmp_v0[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+Ipart1)];
           } 
         }
           }
       
       
           for(ell5 = 0; ell5 < (int)pow(2.0, ishift5-1); ell5++){
         for(ell4 = 0; ell4 < (int)pow(2.0, ishift4-1); ell4++){
           for(m0 = 0; m0 < 16; m0++){
         arrayz[(0 + m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))] = tmp_v1[(1 + m0+16*ell4          +pow41*ell5+pow51+Ipart1)];
         arrayz[(16+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))] = tmp_v1[(1 + m0+16*ell4+pow4     +pow41*ell5+pow51+Ipart1)];
         arrayz[(32+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))] = tmp_v1[(1 + m0+16*ell4+pow5     +pow41*ell5+pow51+Ipart1)];
         arrayz[(48+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))] = tmp_v1[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+pow51+Ipart1)];
         tmp_v3[(1 + m0+16*ell4          +pow41*ell5+pow51+Ipart1)] = tmp_v1[(1 + m0+16*ell4          +pow41*ell5+pow51+Ipart1)];
         tmp_v3[(1 + m0+16*ell4+pow4     +pow41*ell5+pow51+Ipart1)] = tmp_v1[(1 + m0+16*ell4+pow4     +pow41*ell5+pow51+Ipart1)];
         tmp_v3[(1 + m0+16*ell4+pow5     +pow41*ell5+pow51+Ipart1)] = tmp_v1[(1 + m0+16*ell4+pow5     +pow41*ell5+pow51+Ipart1)];
         tmp_v3[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+pow51+Ipart1)] = tmp_v1[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+pow51+Ipart1)];
         arrayx[(0 + m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))] = tmp_v0[(1 + m0+16*ell4          +pow41*ell5+pow51+Ipart1)];
         arrayx[(16+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))] = tmp_v0[(1 + m0+16*ell4+pow4     +pow41*ell5+pow51+Ipart1)];
         arrayx[(32+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))] = tmp_v0[(1 + m0+16*ell4+pow5     +pow41*ell5+pow51+Ipart1)];
         arrayx[(48+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))] = tmp_v0[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+pow51+Ipart1)];
           }
         
         }
       } 
       
           TRANSA = 'N';
           TRANSB = 'N';
           M = 64;
           N = (int)pow(2.0, ishift4+ishift5-1);
           K = 64;
           ALPHA = 1.0;
           LDA = 64;
           LDB = 64;
           BETA = 1.0;
           LDC = 64;
       
             zgemm_(&TRANSA,&TRANSB,&M,&N,&K,&ALPHA,matJL,&LDA,arrayz,&LDB,&BETA,arrayx,&LDC);
             //zgemm_(&TRANSA,&TRANSB,&M,&N,&K,&ALPHA,matI,&LDA,arrayz,&LDB,&BETA,arrayx,&LDC);
 /*          
           for(ell5=0;ell5<(64*N);ell5++){
             arrayw[ell5]=0.0;
           }
           for(ell5=0;ell5<64;ell5++){
             for(ell4=0;ell4<64;ell4++){
               for(m0=0;m0<N;m0++){
                 arrayw[(ell5+64*m0)] += matJL[(ell5+64*ell4)]*arrayz[(ell4+64*m0)];
               }
             }
           }
           for(ell5=0;ell5<64*N;ell5++){
             arrayx[ell5] += arrayw[ell5];
           }
 */          
         
           
 
           for(ell5 = 0; ell5 < (int)pow(2.0,ishift5-1); ell5++){
           for(ell4 = 0; ell4 < (int)pow(2.0,ishift4-1); ell4++){
             for(m0 = 0; m0 < 16; m0++){
               tmp_v1[(1 + m0+16*ell4          +pow41*ell5+Ipart1)]       = arrayx[(0 + m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)))];
               tmp_v1[(1 + m0+16*ell4+pow4     +pow41*ell5+Ipart1)]       = arrayx[(16+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)))];
               tmp_v1[(1 + m0+16*ell4+pow5     +pow41*ell5+Ipart1)]       = arrayx[(32+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)))];
               tmp_v1[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+Ipart1)]       = arrayx[(48+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)))];
             }
           }
           }
           for(ell5 = 0; ell5 < (int)pow(2.0,ishift5-1); ell5++){
           for(ell4 = 0; ell4 < (int)pow(2.0,ishift4-1); ell4++){
             for(m0 = 0; m0 < 16; m0++){
               tmp_v1[(1 + m0+16*ell4          +pow41*ell5+pow51+Ipart1)] = arrayx[(0 + m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))];
               tmp_v1[(1 + m0+16*ell4+pow4     +pow41*ell5+pow51+Ipart1)] = arrayx[(16+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))];
               tmp_v1[(1 + m0+16*ell4+pow5     +pow41*ell5+pow51+Ipart1)] = arrayx[(32+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))];
               tmp_v1[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+pow51+Ipart1)] = arrayx[(48+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))];
             }
           }
           }
 
         }/* omp parallel for */
       c_free1(arrayz, (64*((int)pow(2.0,ishift4+ishift5-1))) );
       c_free1(arrayx, (64*((int)pow(2.0,ishift4+ishift5-1))) );
       c_free1(arrayw, (64*((int)pow(2.0,ishift4+ishift5-1))) );
 
       }/* omp parallel */
 
       if(pivot_flag==1){
         iomp=i_max/(int)pow(2.0,X->Boost.ishift_nspin);
         #pragma omp parallel for default(none) private(ell4,ell5,ell6,m0,Ipart1,TRANSA,TRANSB,M,N,K,LDA,LDB,LDC,ALPHA,BETA) \
         firstprivate(iomp) shared(i_max,ishift1,ishift2,ishift3,ishift4,ishift5,pow4,pow5,pow41,pow51,X,tmp_v0,tmp_v1)
         for(ell5 = 0; ell5 < iomp; ell5++ ){
           for(ell4 = 0; ell4 < (int)pow(2.0,X->Boost.ishift_nspin); ell4++){
             tmp_v0[(1 + ell5+(i_max/(int)pow(2.0,X->Boost.ishift_nspin))*ell4)] = tmp_v1[(1 + ell4+((int)pow(2.0,X->Boost.ishift_nspin))*ell5)];
           } 
         }
         iomp=i_max/(int)pow(2.0,X->Boost.ishift_nspin);
         #pragma omp parallel for default(none) private(ell4,ell5) \
         firstprivate(iomp) shared(i_max,X,tmp_v1,tmp_v3)
         for(ell5 = 0; ell5 < iomp; ell5++ ){
           for(ell4 = 0; ell4 < (int)pow(2.0,X->Boost.ishift_nspin); ell4++){
             tmp_v1[(1 + ell5+(i_max/(int)pow(2.0,X->Boost.ishift_nspin))*ell4)] = tmp_v3[(1 + ell4+((int)pow(2.0,X->Boost.ishift_nspin))*ell5)];
           } 
         }
       }
       else{ 
         #pragma omp parallel for default(none) private(ell4) \
         shared(i_max,tmp_v0,tmp_v1,tmp_v3)
         for(ell4 = 0; ell4 < i_max; ell4++ ){
           tmp_v0[1 + ell4] = tmp_v1[1 + ell4];
           tmp_v1[1 + ell4] = tmp_v3[1 + ell4];
         }
       }/* if pivot_flag */
 
     }/* loop for j */
 
     /*
     ierr = MPI_Alltoall(&tmp_v1[1],(int)(i_max/nproc),MPI_DOUBLE_COMPLEX,&tmp_v3[1],(int)(i_max/nproc),MPI_DOUBLE_COMPLEX,MPI_COMM_WORLD);
     ierr = MPI_Alltoall(&tmp_v0[1],(int)(i_max/nproc),MPI_DOUBLE_COMPLEX,&tmp_v2[1],(int)(i_max/nproc),MPI_DOUBLE_COMPLEX,MPI_COMM_WORLD);
      */
     MPI_Alltoall(&tmp_v1[1],(int)(i_max/nproc),MPI_DOUBLE_COMPLEX,&tmp_v3[1],(int)(i_max/nproc),MPI_DOUBLE_COMPLEX,MPI_COMM_WORLD);
     MPI_Alltoall(&tmp_v0[1],(int)(i_max/nproc),MPI_DOUBLE_COMPLEX,&tmp_v2[1],(int)(i_max/nproc),MPI_DOUBLE_COMPLEX,MPI_COMM_WORLD);
 
 
     iomp=(int)pow(2.0,X->Boost.W0)/nproc;
     #pragma omp parallel for default(none) private(ell4,ell5,ell6) \
     firstprivate(iomp) shared(i_max,X,nproc,tmp_v0,tmp_v1,tmp_v2,tmp_v3)
     //for(ell4 = 0; ell4 < (int)pow(2.0,X->Boost.W0)/nproc; ell4++ ){
     for(ell4 = 0; ell4 < iomp; ell4++ ){
       for(ell5 = 0; ell5 < nproc; ell5++ ){
         for(ell6 = 0; ell6 < (int)(i_max/(int)pow(2.0,X->Boost.W0)); ell6++ ){
           tmp_v1[(1 + ell6+ell5*i_max/(int)pow(2.0,X->Boost.W0)+ell4*i_max/((int)pow(2.0,X->Boost.W0)/nproc))] = tmp_v3[(1 + ell6+ell4*i_max/(int)pow(2.0,X->Boost.W0)+ell5*i_max/nproc)];
           tmp_v0[(1 + ell6+ell5*i_max/(int)pow(2.0,X->Boost.W0)+ell4*i_max/((int)pow(2.0,X->Boost.W0)/nproc))] = tmp_v2[(1 + ell6+ell4*i_max/(int)pow(2.0,X->Boost.W0)+ell5*i_max/nproc)];
         }
       }   
     }
 
 
   }/* loop for iloop */
 
 /*
   dam_pr= X_child_general_int_spin_MPIBoost
     (
      matJ, X, tmp_v0, tmp_v1);
   
   X->Large.prdct += dam_pr;
 */
 //  c_free1(arrayz, (int)pow(2.0, 16));
 //  c_free1(arrayx, (int)pow(2.0, 16));
 //  c_free1(arrayw, (int)pow(2.0, 16));
 
   c_free2(vecJ, 3, 3);
   c_free2(matJ, 4, 4);
   c_free2(matJ2, 4, 4);
   c_free2(matB, 2, 2);
   c_free1(matJL, (64*64));
   c_free1(matI, (64*64));
  
 #endif
   
 }/*void child_general_int_spin_MPIBoost*/
◆ zgemm_()

void zgemm_	(	char *	TRANSA,
		char *	TRANSB,
		int *	M,
		int *	N,
		int *	K,
		double complex *	ALPHA,
		double complex *	matJL,
		int *	LDA,
		double complex *	arrayz,
		int *	LDB,
		double complex *	BETA,
		double complex *	arrayx,
		int *	LDC
	)
Referenced by child_general_int_spin_MPIBoost().
Functions

Function Documentation

◆ child_general_int_spin_MPIBoost()

◆ zgemm_()