23 #include "mltplyCommon.h" 24 #include "mltplyHubbardCore.h" 25 #include "mltplyMPIHubbard.h" 26 #include "mltplyMPIHubbardCore.h" 28 #include "wrapperMPI.h" 37 if (org_isite + 1 >
X->Def.Nsite) {
51 long unsigned int is1_spin,
52 long unsigned int orgbit,
53 long unsigned int *offbit
55 long unsigned int ibit_tmp;
56 ibit_tmp = orgbit & is1_spin;
58 *offbit = orgbit + is1_spin;
71 long unsigned int is1_spin,
72 long unsigned int orgbit,
73 long unsigned int *offbit
75 long unsigned int ibit_tmp;
76 ibit_tmp = orgbit & is1_spin;
78 *offbit = orgbit - is1_spin;
100 long unsigned int orgbit,
101 long unsigned int *offbit
103 long unsigned int tmp_ispin;
104 long unsigned int tmp_org, tmp_off;
105 int iflgBitExist =
TRUE;
110 tmp_ispin =
X->Def.Tpow[2 * org_isite1 + org_isigma1];
112 iflgBitExist =
FALSE;
118 tmp_ispin =
X->Def.Tpow[2 * org_isite2 + org_isigma2];
120 iflgBitExist =
FALSE;
126 tmp_ispin =
X->Def.Tpow[2 * org_isite3 + org_isigma3];
128 iflgBitExist =
FALSE;
134 tmp_ispin =
X->Def.Tpow[2 * org_isite4 + org_isigma4];
136 iflgBitExist =
FALSE;
141 if(iflgBitExist !=
TRUE){
159 long unsigned int orgbit
161 long unsigned int tmp_ispin;
162 long unsigned int tmp_org, tmp_off;
163 int iflgBitExist =
TRUE;
167 tmp_ispin =
X->Def.Tpow[2 * org_isite1 + org_isigma1];
174 tmp_ispin =
X->Def.Tpow[2 * org_isite3 + org_isigma3];
176 iflgBitExist =
FALSE;
180 if(iflgBitExist !=
TRUE){
193 unsigned long int isite1,
194 unsigned long int isite2,
195 unsigned long int isite3,
196 unsigned long int isite4,
199 unsigned long int orgbit,
200 unsigned long int *offbit
202 long unsigned int diffA;
203 long unsigned int tmp_off;
204 long unsigned int tmp_ispin1, tmp_ispin2;
210 if (tmp_ispin1 == tmp_ispin2) {
211 if ((orgbit & tmp_ispin1) == 0) {
220 if (tmp_ispin2 > tmp_ispin1) diffA = tmp_ispin2 - tmp_ispin1 * 2;
221 else diffA = tmp_ispin1-tmp_ispin2*2;
223 tmp_sgn=
X_GC_CisAjt(orgbit,
X, tmp_ispin1, tmp_ispin2, tmp_ispin1+tmp_ispin2, diffA, &tmp_off);
234 if(tmp_ispin1 == tmp_ispin2){
235 if( (tmp_off & tmp_ispin1) == 0){
243 if(tmp_ispin2 > tmp_ispin1) diffA = tmp_ispin2 - tmp_ispin1*2;
244 else diffA = tmp_ispin1-tmp_ispin2*2;
245 tmp_sgn *=
X_GC_CisAjt(tmp_off,
X, tmp_ispin1, tmp_ispin2, tmp_ispin1+tmp_ispin2, diffA, offbit);
255 *offbit = *offbit%
X->Def.OrgTpow[2*
X->Def.Nsite];
268 double complex tmp_V,
270 double complex *tmp_v0,
271 double complex *tmp_v1
274 double complex dam_pr = 0.0;
276 unsigned long int tmp_ispin1;
277 unsigned long int i_max =
X->Check.idim_max;
278 unsigned long int tmp_off, j;
287 #pragma omp parallel reduction(+:dam_pr) default(none) shared(org_isite1, org_ispin1, org_isite3, org_ispin3, tmp_v0, tmp_v1) \ 288 firstprivate(i_max, tmp_V, X) private(dmv, j, tmp_off, tmp_ispin1) 291 if (org_isite1 + 1 >
X->Def.Nsite && org_isite3 + 1 >
X->Def.Nsite) {
292 if (
X->Large.mode == M_MLTPLY ||
X->Large.mode == M_CALCSPEC) {
294 for (j = 1; j <= i_max; j++) {
295 dmv = tmp_v1[j] * tmp_V;
297 dam_pr += conj(tmp_v1[j]) * dmv;
302 for (j = 1; j <= i_max; j++) {
303 dmv = tmp_v1[j] * tmp_V;
304 dam_pr += conj(tmp_v1[j]) * dmv;
308 else if (org_isite1 + 1 >
X->Def.Nsite || org_isite3 + 1 >
X->Def.Nsite) {
309 if (org_isite1 > org_isite3) tmp_ispin1 =
X->Def.Tpow[2 * org_isite3 + org_ispin3];
310 else tmp_ispin1 =
X->Def.Tpow[2 * org_isite1 + org_ispin1];
312 if (
X->Large.mode == M_MLTPLY ||
X->Large.mode == M_CALCSPEC) {
314 for (j = 1; j <= i_max; j++) {
316 dmv = tmp_v1[j] * tmp_V;
318 dam_pr += conj(tmp_v1[j]) * dmv;
324 for (j = 1; j <= i_max; j++) {
326 dmv = tmp_v1[j] * tmp_V;
327 dam_pr += conj(tmp_v1[j]) * dmv;
350 double complex tmp_V,
352 double complex *tmp_v0,
353 double complex *tmp_v1
356 double complex dam_pr = 0.0;
357 unsigned long int i_max =
X->Check.idim_max;
358 unsigned long int idim_max_buf;
359 int iCheck, ierr, Fsgn;
360 unsigned long int isite1, isite2, isite3;
361 unsigned long int tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4;
362 unsigned long int j, Asum, Adiff;
364 unsigned long int origin, tmp_off;
365 unsigned long int org_rankbit;
366 MPI_Status statusMPI;
368 iCheck =
CheckBit_InterAllPE(org_isite1, org_ispin1, org_isite2, org_ispin2, org_isite3, org_ispin3, org_isite3, org_ispin3,
X, (
long unsigned int)
myrank, &origin);
369 isite1 =
X->Def.Tpow[2 * org_isite1 + org_ispin1];
370 isite2 =
X->Def.Tpow[2 * org_isite2 + org_ispin2];
371 isite3 =
X->Def.Tpow[2 * org_isite3 + org_ispin3];
373 if (iCheck ==
TRUE) {
374 tmp_isite1 =
X->Def.OrgTpow[2 * org_isite1 + org_ispin1];
375 tmp_isite2 =
X->Def.OrgTpow[2 * org_isite2 + org_ispin2];
376 tmp_isite3 =
X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
377 tmp_isite4 =
X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
378 Asum = tmp_isite1 + tmp_isite2;
379 if (tmp_isite2 > tmp_isite1) Adiff = tmp_isite2 - tmp_isite1 * 2;
380 else Adiff = tmp_isite1 - tmp_isite2 * 2;
383 iCheck =
CheckBit_InterAllPE(org_isite3, org_ispin3, org_isite3, org_ispin3, org_isite2, org_ispin2, org_isite1, org_ispin1,
X, (
long unsigned int)
myrank, &origin);
384 if (iCheck ==
TRUE) {
386 tmp_isite4 =
X->Def.OrgTpow[2 * org_isite1 + org_ispin1];
387 tmp_isite3 =
X->Def.OrgTpow[2 * org_isite2 + org_ispin2];
388 tmp_isite2 =
X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
389 tmp_isite1 =
X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
390 Asum = tmp_isite3 + tmp_isite4;
391 if (tmp_isite4 > tmp_isite3) Adiff = tmp_isite4 - tmp_isite3 * 2;
392 else Adiff = tmp_isite3 - tmp_isite4 * 2;
393 if (
X->Large.mode == M_CORR ||
X->Large.mode == M_CALCSPEC) {
406 #pragma omp parallel default(none) reduction(+:dam_pr) \ 407 firstprivate(i_max,X,Asum,Adiff,isite1,isite2, tmp_V) private(j,tmp_off) shared(tmp_v0, tmp_v1) 410 for (j = 1; j <= i_max; j++)
411 dam_pr +=
GC_CisAjt(j, tmp_v0, tmp_v1,
X, isite2, isite1, Asum, Adiff, tmp_V, &tmp_off);
413 if (
X->Large.mode != M_CORR) {
415 for (j = 1; j <= i_max; j++)
416 dam_pr +=
GC_CisAjt(j, tmp_v0, tmp_v1,
X, isite1, isite2, Asum, Adiff, tmp_V, &tmp_off);
422 ierr = MPI_Sendrecv(&
X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0,
423 &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0,
424 MPI_COMM_WORLD, &statusMPI);
426 ierr = MPI_Sendrecv(tmp_v1,
X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0,
427 v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0,
428 MPI_COMM_WORLD, &statusMPI);
431 #pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv, tmp_off, Fsgn, org_rankbit, Adiff) \ 432 shared(v1buf, tmp_v1, tmp_v0, myrank, origin, isite3, org_isite3, isite1, isite2, org_isite2, org_isite1) \ 433 firstprivate(idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4) 435 if (org_isite1 + 1 >
X->Def.Nsite && org_isite2 + 1 >
X->Def.Nsite) {
436 if (isite2 > isite1) Adiff = isite2 - isite1 * 2;
437 else Adiff = isite1 - isite2 * 2;
441 if (org_isite3 + 1 >
X->Def.Nsite) {
442 if (
X->Large.mode == M_MLTPLY ||
X->Large.mode == M_CALCSPEC) {
444 for (j = 1; j <= idim_max_buf; j++) {
445 dmv = tmp_V *
v1buf[j];
447 dam_pr += conj(tmp_v1[j]) * dmv;
452 for (j = 1; j <= idim_max_buf; j++) {
453 dmv = tmp_V *
v1buf[j];
454 dam_pr += conj(tmp_v1[j]) * dmv;
459 if (
X->Large.mode == M_MLTPLY ||
X->Large.mode == M_CALCSPEC) {
461 for (j = 1; j <= idim_max_buf; j++) {
463 dmv = tmp_V *
v1buf[j];
465 dam_pr += conj(tmp_v1[j]) * dmv;
471 for (j = 1; j <= idim_max_buf; j++) {
473 dmv = tmp_V *
v1buf[j];
474 dam_pr += conj(tmp_v1[j]) * dmv;
481 org_rankbit =
X->Def.OrgTpow[2 *
X->Def.Nsite] * origin;
482 if (
X->Large.mode == M_MLTPLY ||
X->Large.mode == M_CALCSPEC) {
484 for (j = 1; j <= idim_max_buf; j++) {
485 if (
GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn,
X, (j - 1) + org_rankbit, &tmp_off) ==
TRUE) {
486 dmv = tmp_V *
v1buf[j] * Fsgn;
487 tmp_v0[tmp_off + 1] += dmv;
488 dam_pr += conj(tmp_v1[tmp_off + 1]) * dmv;
494 for (j = 1; j <= idim_max_buf; j++) {
495 if (
GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn,
X, (j - 1) + org_rankbit, &tmp_off) ==
TRUE) {
496 dmv = tmp_V *
v1buf[j] * Fsgn;
497 dam_pr += conj(tmp_v1[tmp_off + 1]) * dmv;
521 double complex tmp_V,
523 double complex *tmp_v0,
524 double complex *tmp_v1
527 double complex dam_pr = 0;
529 org_isite4, org_ispin4, org_isite3, org_ispin3,
530 org_isite1, org_ispin1, conj(tmp_V),
X, tmp_v0, tmp_v1);
550 double complex tmp_V,
552 double complex *tmp_v0,
553 double complex *tmp_v1
556 double complex dam_pr = 0;
557 unsigned long int i_max =
X->Check.idim_max;
558 unsigned long int idim_max_buf;
559 int iCheck, ierr, Fsgn;
560 unsigned long int isite1, isite2, isite3, isite4;
561 unsigned long int tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4;
562 unsigned long int j, Adiff, Bdiff;
564 unsigned long int origin, tmp_off, tmp_off2;
565 unsigned long int org_rankbit;
566 int iFlgHermite =
FALSE;
567 MPI_Status statusMPI;
570 org_isite3, org_ispin3, org_isite4, org_ispin4,
571 X, (
long unsigned int)
myrank, &origin);
572 isite1 =
X->Def.Tpow[2 * org_isite1 + org_ispin1];
573 isite2 =
X->Def.Tpow[2 * org_isite2 + org_ispin2];
574 isite3 =
X->Def.Tpow[2 * org_isite3 + org_ispin3];
575 isite4 =
X->Def.Tpow[2 * org_isite4 + org_ispin4];
577 if (iCheck ==
TRUE) {
578 tmp_isite1 =
X->Def.OrgTpow[2 * org_isite1 + org_ispin1];
579 tmp_isite2 =
X->Def.OrgTpow[2 * org_isite2 + org_ispin2];
580 tmp_isite3 =
X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
581 tmp_isite4 =
X->Def.OrgTpow[2 * org_isite4 + org_ispin4];
585 org_isite2, org_ispin2, org_isite1, org_ispin1,
586 X, (
long unsigned int)
myrank, &origin);
587 if (iCheck ==
TRUE) {
589 tmp_isite4 =
X->Def.OrgTpow[2 * org_isite1 + org_ispin1];
590 tmp_isite3 =
X->Def.OrgTpow[2 * org_isite2 + org_ispin2];
591 tmp_isite2 =
X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
592 tmp_isite1 =
X->Def.OrgTpow[2 * org_isite4 + org_ispin4];
594 if (
X->Large.mode == M_CORR ||
X->Large.mode == M_CALCSPEC) {
604 if (isite1 == isite4 && isite2 == isite3) {
610 else if (isite2 == isite3) {
612 if (isite4 > isite1) Adiff = isite4 - isite1 * 2;
613 else Adiff = isite1 - isite4 * 2;
615 #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_off) \ 616 firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0) 617 for (j = 1; j <= i_max; j++)
618 dam_pr +=
GC_CisAjt(j - 1, tmp_v0, tmp_v1,
X, isite1, isite4, (isite1 + isite4), Adiff, tmp_V, &tmp_off);
622 org_isite2, org_ispin2, tmp_V,
X, tmp_v0, tmp_v1);
623 if (
X->Large.mode != M_CORR) {
624 #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_off) \ 625 firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0) 626 for (j = 1; j <= i_max; j++)
627 dam_pr +=
GC_CisAjt(j - 1, tmp_v0, tmp_v1,
X, isite4, isite1, (isite1 + isite4), Adiff, tmp_V, &tmp_off);
631 org_isite1, org_ispin1, tmp_V,
X, tmp_v0, tmp_v1);
636 org_isite2, org_ispin2, tmp_V,
X, tmp_v0, tmp_v1);
637 if (
X->Large.mode != M_CORR) {
639 org_isite3, org_ispin3, tmp_V,
X, tmp_v0, tmp_v1);
645 ierr = MPI_Sendrecv(&
X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0,
646 &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0,
647 MPI_COMM_WORLD, &statusMPI);
649 ierr = MPI_Sendrecv(tmp_v1,
X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0,
650 v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0,
651 MPI_COMM_WORLD, &statusMPI);
654 if (org_isite1 + 1 >
X->Def.Nsite && org_isite2 + 1 >
X->Def.Nsite
655 && org_isite3 + 1 >
X->Def.Nsite && org_isite4 + 1 >
X->Def.Nsite) {
657 if (isite2 > isite1) Adiff = isite2 - isite1 * 2;
658 else Adiff = isite1 - isite2 * 2;
659 if (isite4 > isite3) Bdiff = isite4 - isite3 * 2;
660 else Bdiff = isite3 - isite4 * 2;
662 if (iFlgHermite ==
FALSE) {
663 Fsgn =
X_GC_CisAjt((
long unsigned int)
myrank,
X, isite2, isite1, (isite1 + isite2), Adiff, &tmp_off2);
664 Fsgn *=
X_GC_CisAjt(tmp_off2,
X, isite4, isite3, (isite3 + isite4), Bdiff, &tmp_off);
668 Fsgn =
X_GC_CisAjt((
long unsigned int)
myrank,
X, isite3, isite4, (isite3 + isite4), Bdiff, &tmp_off2);
669 Fsgn *=
X_GC_CisAjt(tmp_off2,
X, isite1, isite2, (isite1 + isite2), Adiff, &tmp_off);
672 if (
X->Large.mode == M_MLTPLY ||
X->Large.mode == M_CALCSPEC) {
673 #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, dmv) firstprivate(idim_max_buf, tmp_V, X) shared(v1buf, tmp_v1, tmp_v0) 674 for (j = 1; j <= idim_max_buf; j++) {
675 dmv = tmp_V *
v1buf[j];
677 dam_pr += conj(tmp_v1[j]) * dmv;
681 #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, dmv) firstprivate(idim_max_buf, tmp_V, X) shared(v1buf, tmp_v1, tmp_v0) 682 for (j = 1; j <= idim_max_buf; j++) {
683 dmv = tmp_V *
v1buf[j];
684 dam_pr += conj(tmp_v1[j]) * dmv;
689 org_rankbit =
X->Def.OrgTpow[2 *
X->Def.Nsite] * origin;
690 if (
X->Large.mode == M_MLTPLY ||
X->Large.mode == M_CALCSPEC) {
691 #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, dmv, tmp_off, Fsgn) firstprivate(idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4, org_rankbit) shared(v1buf, tmp_v1, tmp_v0) 692 for (j = 1; j <= idim_max_buf; j++) {
693 if (
GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn,
X, (j - 1) + org_rankbit, &tmp_off) ==
TRUE) {
694 dmv = tmp_V *
v1buf[j] * Fsgn;
695 tmp_v0[tmp_off + 1] += dmv;
696 dam_pr += conj(tmp_v1[tmp_off + 1]) * dmv;
701 #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, dmv, tmp_off, Fsgn) firstprivate(idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4, org_rankbit) shared(v1buf, tmp_v1, tmp_v0) 702 for (j = 1; j <= idim_max_buf; j++) {
703 if (
GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn,
X, (j - 1) + org_rankbit, &tmp_off) ==
TRUE) {
704 dmv = tmp_V *
v1buf[j] * Fsgn;
705 dam_pr += conj(tmp_v1[tmp_off + 1]) * dmv;
724 double complex tmp_V,
726 double complex *tmp_v0,
727 double complex *tmp_v1
730 double complex dam_pr = 0.0;
731 unsigned long int i_max =
X->Check.idim_max;
732 unsigned long int j, isite1, tmp_off;
736 isite1 =
X->Def.Tpow[2 * org_isite1 + org_ispin1];
737 if (org_isite1 + 1 >
X->Def.Nsite) {
740 #pragma omp parallel reduction(+:dam_pr) default(none) shared(tmp_v0, tmp_v1) \ 741 firstprivate(i_max, tmp_V, X) private(dmv, j, tmp_off) 743 if (
X->Large.mode == M_MLTPLY ||
X->Large.mode == M_CALCSPEC) {
745 for (j = 1; j <= i_max; j++) {
746 dmv = tmp_v1[j] * tmp_V;
748 dam_pr += conj(tmp_v1[j]) * dmv;
753 for (j = 1; j <= i_max; j++) {
754 dmv = tmp_v1[j] * tmp_V;
755 dam_pr += conj(tmp_v1[j]) * dmv;
761 #pragma omp parallel reduction(+:dam_pr) default(none) shared(tmp_v0, tmp_v1) \ 762 firstprivate(i_max, tmp_V, X, isite1) private(dmv, j, tmp_off) 764 if (
X->Large.mode == M_MLTPLY ||
X->Large.mode == M_CALCSPEC) {
766 for (j = 1; j <= i_max; j++) {
768 dmv = tmp_v1[j] * tmp_V;
770 dam_pr += conj(tmp_v1[j]) * dmv;
776 for (j = 1; j <= i_max; j++) {
778 dmv = tmp_v1[j] * tmp_V;
779 dam_pr += conj(tmp_v1[j]) * dmv;
800 double complex tmp_trans,
802 double complex *tmp_v0,
803 double complex *tmp_v1
806 double complex dam_pr = 0.0;
809 if (org_isite1 + 1 >
X->Def.Nsite && org_isite2 + 1 >
X->Def.Nsite) {
812 else if (org_isite1 + 1 >
X->Def.Nsite || org_isite2 + 1 >
X->Def.Nsite) {
834 double complex tmp_V,
836 double complex *tmp_v0,
837 double complex *tmp_v1
840 double complex dam_pr = 0.0;
842 unsigned long int tmp_ispin1;
843 unsigned long int i_max =
X->Check.idim_max;
844 unsigned long int tmp_off, j;
849 if (iCheck !=
TRUE)
return 0.0;
851 #pragma omp parallel reduction(+:dam_pr) default(none) \ 852 shared(tmp_v0, tmp_v1, list_1, org_isite1, org_ispin1, org_isite3, org_ispin3) \ 853 firstprivate(i_max, tmp_V, X, tmp_ispin1) private(dmv, j, tmp_off) 855 if (org_isite1 + 1 >
X->Def.Nsite && org_isite3 + 1 >
X->Def.Nsite) {
856 if (
X->Large.mode == M_MLTPLY ||
X->Large.mode == M_CALCSPEC) {
858 for (j = 1; j <= i_max; j++) {
859 dmv = tmp_v1[j] * tmp_V;
861 dam_pr += conj(tmp_v1[j]) * dmv;
866 for (j = 1; j <= i_max; j++) {
867 dmv = tmp_v1[j] * tmp_V;
868 dam_pr += conj(tmp_v1[j]) * dmv;
872 else if (org_isite1 + 1 >
X->Def.Nsite || org_isite3 + 1 >
X->Def.Nsite) {
873 if (org_isite1 > org_isite3) tmp_ispin1 =
X->Def.Tpow[2 * org_isite3 + org_ispin3];
874 else tmp_ispin1 =
X->Def.Tpow[2 * org_isite1 + org_ispin1];
876 if (
X->Large.mode == M_MLTPLY ||
X->Large.mode == M_CALCSPEC) {
878 for (j = 1; j <= i_max; j++) {
880 dmv = tmp_v1[j] * tmp_V;
882 dam_pr += conj(tmp_v1[j]) * dmv;
888 for (j = 1; j <= i_max; j++) {
890 dmv = tmp_v1[j] * tmp_V;
891 dam_pr += conj(tmp_v1[j]) * dmv;
916 double complex tmp_V,
918 double complex *tmp_v0,
919 double complex *tmp_v1
922 double complex dam_pr = 0;
923 unsigned long int i_max =
X->Check.idim_max;
924 unsigned long int idim_max_buf;
925 int iCheck, ierr, Fsgn;
926 unsigned long int isite1, isite2, isite3, isite4;
927 unsigned long int tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4;
928 unsigned long int j, Adiff, Bdiff;
930 unsigned long int origin, tmp_off, tmp_off2;
931 unsigned long int org_rankbit, ioff;
932 int iFlgHermite =
FALSE;
933 MPI_Status statusMPI;
936 org_isite3, org_ispin3, org_isite4, org_ispin4,
937 X, (
long unsigned int)
myrank, &origin);
939 isite1 =
X->Def.Tpow[2 * org_isite1 + org_ispin1];
940 isite2 =
X->Def.Tpow[2 * org_isite2 + org_ispin2];
941 isite3 =
X->Def.Tpow[2 * org_isite3 + org_ispin3];
942 isite4 =
X->Def.Tpow[2 * org_isite4 + org_ispin4];
944 if (iCheck ==
TRUE) {
945 tmp_isite1 =
X->Def.OrgTpow[2 * org_isite1 + org_ispin1];
946 tmp_isite2 =
X->Def.OrgTpow[2 * org_isite2 + org_ispin2];
947 tmp_isite3 =
X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
948 tmp_isite4 =
X->Def.OrgTpow[2 * org_isite4 + org_ispin4];
952 org_isite2, org_ispin2, org_isite1, org_ispin1,
953 X, (
long unsigned int)
myrank, &origin);
954 if (iCheck ==
TRUE) {
956 tmp_isite4 =
X->Def.OrgTpow[2 * org_isite1 + org_ispin1];
957 tmp_isite3 =
X->Def.OrgTpow[2 * org_isite2 + org_ispin2];
958 tmp_isite2 =
X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
959 tmp_isite1 =
X->Def.OrgTpow[2 * org_isite4 + org_ispin4];
961 if (
X->Large.mode == M_CORR ||
X->Large.mode == M_CALCSPEC) tmp_V = 0;
967 if (isite1 == isite4 && isite2 == isite3) {
973 else if (isite2 == isite3) {
974 if (isite4 > isite1) Adiff = isite4 - isite1 * 2;
975 else Adiff = isite1 - isite4 * 2;
978 #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_off) \ 979 firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0, list_1) 980 for (j = 1; j <= i_max; j++)
981 dam_pr +=
CisAjt(j, tmp_v0, tmp_v1,
X, isite1, isite4, (isite1 + isite4), Adiff, tmp_V);
985 org_isite2, org_ispin2, tmp_V,
X, tmp_v0, tmp_v1);
987 if (
X->Large.mode != M_CORR) {
988 #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_off) \ 989 firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0) 990 for (j = 1; j <= i_max; j++)
991 dam_pr +=
CisAjt(j, tmp_v0, tmp_v1,
X, isite4, isite1, (isite1 + isite4), Adiff, tmp_V);
1000 if (
X->Large.mode != M_CORR)
1002 org_isite3, org_ispin3, tmp_V,
X, tmp_v0, tmp_v1);
1007 ierr = MPI_Sendrecv(&
X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0,
1008 &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0,
1009 MPI_COMM_WORLD, &statusMPI);
1012 ierr = MPI_Sendrecv(
list_1,
X->Check.idim_max + 1, MPI_UNSIGNED_LONG, origin, 0,
1013 list_1buf, idim_max_buf + 1, MPI_UNSIGNED_LONG, origin, 0,
1014 MPI_COMM_WORLD, &statusMPI);
1017 ierr = MPI_Sendrecv(tmp_v1,
X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0,
1018 v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0,
1019 MPI_COMM_WORLD, &statusMPI);
1021 if (org_isite1 + 1 >
X->Def.Nsite && org_isite2 + 1 >
X->Def.Nsite
1022 && org_isite3 + 1 >
X->Def.Nsite && org_isite4 + 1 >
X->Def.Nsite)
1024 if (isite2 > isite1) Adiff = isite2 - isite1 * 2;
1025 else Adiff = isite1 - isite2 * 2;
1026 if (isite4 > isite3) Bdiff = isite4 - isite3 * 2;
1027 else Bdiff = isite3 - isite4 * 2;
1029 if (iFlgHermite ==
FALSE) {
1030 Fsgn =
X_GC_CisAjt((
long unsigned int)
myrank,
X, isite2, isite1, (isite1 + isite2), Adiff, &tmp_off2);
1031 Fsgn *=
X_GC_CisAjt(tmp_off2,
X, isite4, isite3, (isite3 + isite4), Bdiff, &tmp_off);
1035 Fsgn =
X_GC_CisAjt((
long unsigned int)
myrank,
X, isite3, isite4, (isite3 + isite4), Bdiff, &tmp_off2);
1036 Fsgn *=
X_GC_CisAjt(tmp_off2,
X, isite1, isite2, (isite1 + isite2), Adiff, &tmp_off);
1040 #pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv, ioff) \ 1041 firstprivate(idim_max_buf, tmp_V, X) shared(v1buf, tmp_v1, tmp_v0, list_2_1, list_2_2, list_1buf) 1043 if (
X->Large.mode == M_MLTPLY ||
X->Large.mode == M_CALCSPEC) {
1045 for (j = 1; j <= idim_max_buf; j++) {
1047 X->Large.irght,
X->Large.ilft,
X->Large.ihfbit, &ioff) ==
TRUE)
1049 dmv = tmp_V *
v1buf[j];
1050 tmp_v0[ioff] += dmv;
1051 dam_pr += conj(tmp_v1[ioff]) * dmv;
1057 for (j = 1; j <= idim_max_buf; j++) {
1059 X->Large.irght,
X->Large.ilft,
X->Large.ihfbit, &ioff) ==
TRUE)
1061 dmv = tmp_V *
v1buf[j];
1062 dam_pr += conj(tmp_v1[ioff]) * dmv;
1070 org_rankbit =
X->Def.OrgTpow[2 *
X->Def.Nsite] * origin;
1073 #pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv, tmp_off, Fsgn, ioff) \ 1074 firstprivate(myrank, idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4, org_rankbit, \ 1075 org_isite1, org_ispin1, org_isite2, org_ispin2, org_isite3, org_ispin3, org_isite4, org_ispin4) \ 1076 shared(v1buf, tmp_v1, tmp_v0, list_1buf, list_2_1, list_2_2) 1078 if (
X->Large.mode == M_MLTPLY ||
X->Large.mode == M_CALCSPEC) {
1080 for (j = 1; j <= idim_max_buf; j++) {
1081 if (
GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn,
X,
1086 dmv = tmp_V *
v1buf[j] * Fsgn;
1087 tmp_v0[ioff] += dmv;
1088 dam_pr += conj(tmp_v1[ioff]) * dmv;
1095 for (j = 1; j <= idim_max_buf; j++) {
1096 if (
GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn,
X,
1100 X->Large.irght,
X->Large.ilft,
X->Large.ihfbit, &ioff) ==
TRUE)
1102 dmv = tmp_V *
v1buf[j] * Fsgn;
1103 dam_pr += conj(tmp_v1[ioff]) * dmv;
1128 double complex tmp_V,
1130 double complex *tmp_v0,
1131 double complex *tmp_v1
1134 double complex dam_pr = 0.0;
1135 unsigned long int i_max =
X->Check.idim_max;
1136 unsigned long int idim_max_buf, ioff;
1137 int iCheck, ierr, Fsgn;
1138 unsigned long int isite1, isite2, isite3;
1139 unsigned long int tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4;
1140 unsigned long int j, Asum, Adiff;
1142 unsigned long int origin, tmp_off;
1143 unsigned long int org_rankbit;
1144 MPI_Status statusMPI;
1146 iCheck =
CheckBit_InterAllPE(org_isite1, org_ispin1, org_isite2, org_ispin2, org_isite3, org_ispin3, org_isite3, org_ispin3,
X, (
long unsigned int)
myrank, &origin);
1149 isite1 =
X->Def.Tpow[2 * org_isite1 + org_ispin1];
1150 isite2 =
X->Def.Tpow[2 * org_isite2 + org_ispin2];
1151 isite3 =
X->Def.Tpow[2 * org_isite3 + org_ispin3];
1153 if (iCheck ==
TRUE) {
1154 tmp_isite1 =
X->Def.OrgTpow[2 * org_isite1 + org_ispin1];
1155 tmp_isite2 =
X->Def.OrgTpow[2 * org_isite2 + org_ispin2];
1156 tmp_isite3 =
X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
1157 tmp_isite4 =
X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
1158 Asum = tmp_isite1 + tmp_isite2;
1159 if (tmp_isite2 > tmp_isite1) Adiff = tmp_isite2 - tmp_isite1 * 2;
1160 else Adiff = tmp_isite1 - tmp_isite2 * 2;
1163 iCheck =
CheckBit_InterAllPE(org_isite3, org_ispin3, org_isite3, org_ispin3, org_isite2, org_ispin2, org_isite1, org_ispin1,
X, (
long unsigned int)
myrank, &origin);
1164 if (iCheck ==
TRUE) {
1165 tmp_V = conj(tmp_V);
1166 tmp_isite4 =
X->Def.OrgTpow[2 * org_isite1 + org_ispin1];
1167 tmp_isite3 =
X->Def.OrgTpow[2 * org_isite2 + org_ispin2];
1168 tmp_isite2 =
X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
1169 tmp_isite1 =
X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
1170 Asum = tmp_isite3 + tmp_isite4;
1171 if (tmp_isite4 > tmp_isite3) Adiff = tmp_isite4 - tmp_isite3 * 2;
1172 else Adiff = tmp_isite3 - tmp_isite4 * 2;
1173 if (
X->Large.mode == M_CORR ||
X->Large.mode == M_CALCSPEC) tmp_V = 0;
1181 #pragma omp parallel default(none) reduction(+:dam_pr) \ 1182 firstprivate(i_max, Asum, Adiff, isite1, isite2, tmp_V, X) private(j) shared(tmp_v0, tmp_v1) 1185 for (j = 1; j <= i_max; j++)
1186 dam_pr +=
CisAjt(j, tmp_v0, tmp_v1,
X, isite1, isite2, Asum, Adiff, tmp_V);
1188 if (
X->Large.mode != M_CORR) {
1190 for (j = 1; j <= i_max; j++)
1191 dam_pr +=
CisAjt(j, tmp_v0, tmp_v1,
X, isite2, isite1, Asum, Adiff, tmp_V);
1197 ierr = MPI_Sendrecv(&
X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0,
1198 &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0,
1199 MPI_COMM_WORLD, &statusMPI);
1201 ierr = MPI_Sendrecv(
list_1,
X->Check.idim_max + 1, MPI_UNSIGNED_LONG, origin, 0,
1202 list_1buf, idim_max_buf + 1, MPI_UNSIGNED_LONG, origin, 0,
1203 MPI_COMM_WORLD, &statusMPI);
1206 ierr = MPI_Sendrecv(tmp_v1,
X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0,
1207 v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0,
1208 MPI_COMM_WORLD, &statusMPI);
1211 #pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv, ioff, tmp_off, Fsgn, Adiff) \ 1212 firstprivate(idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4, org_rankbit, isite3) \ 1213 shared(v1buf, tmp_v1, tmp_v0, list_1buf, list_2_1, list_2_2, origin, org_isite3, myrank, isite1, isite2, org_isite1, org_isite2) 1216 if (org_isite1 + 1 >
X->Def.Nsite && org_isite2 + 1 >
X->Def.Nsite) {
1217 if (isite2 > isite1) Adiff = isite2 - isite1 * 2;
1218 else Adiff = isite1 - isite2 * 2;
1222 if (org_isite3 + 1 >
X->Def.Nsite) {
1223 if (
X->Large.mode == M_MLTPLY ||
X->Large.mode == M_CALCSPEC) {
1225 for (j = 1; j <= idim_max_buf; j++) {
1226 dmv = tmp_V *
v1buf[j];
1228 X->Large.irght,
X->Large.ilft,
X->Large.ihfbit, &ioff);
1229 tmp_v0[ioff] += dmv;
1230 dam_pr += conj(tmp_v1[ioff]) * dmv;
1235 for (j = 1; j <= idim_max_buf; j++) {
1236 dmv = tmp_V *
v1buf[j];
1238 dam_pr += conj(tmp_v1[ioff]) * dmv;
1243 if (
X->Large.mode == M_MLTPLY ||
X->Large.mode == M_CALCSPEC) {
1245 for (j = 1; j <= idim_max_buf; j++) {
1247 dmv = tmp_V *
v1buf[j];
1249 X->Large.irght,
X->Large.ilft,
X->Large.ihfbit, &ioff);
1250 tmp_v0[ioff] += dmv;
1251 dam_pr += conj(tmp_v1[ioff]) * dmv;
1257 for (j = 1; j <= idim_max_buf; j++) {
1260 dmv = tmp_V *
v1buf[j];
1262 X->Large.irght,
X->Large.ilft,
X->Large.ihfbit, &ioff);
1263 dam_pr += conj(tmp_v1[ioff]) * dmv;
1270 org_rankbit =
X->Def.OrgTpow[2 *
X->Def.Nsite] * origin;
1271 if (
X->Large.mode == M_MLTPLY ||
X->Large.mode == M_CALCSPEC) {
1273 for (j = 1; j <= idim_max_buf; j++) {
1274 if (
GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn,
X,
1276 dmv = tmp_V *
v1buf[j] * Fsgn;
1278 X->Large.irght,
X->Large.ilft,
X->Large.ihfbit, &ioff);
1279 tmp_v0[ioff] += dmv;
1280 dam_pr += conj(tmp_v1[ioff]) * dmv;
1286 for (j = 1; j <= idim_max_buf; j++) {
1287 if (
GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn,
X,
1290 dmv = tmp_V *
v1buf[j] * Fsgn;
1292 X->Large.irght,
X->Large.ilft,
X->Large.ihfbit, &ioff);
1293 dam_pr += conj(tmp_v1[ioff]) * dmv;
1317 double complex tmp_V,
1319 double complex *tmp_v0,
1320 double complex *tmp_v1
1323 double complex dam_pr = 0;
1326 org_isite4, org_ispin4, org_isite3, org_ispin3,
1327 org_isite1, org_ispin1, conj(tmp_V),
X, tmp_v0, tmp_v1);
1329 return conj(dam_pr);
1338 double complex tmp_V,
1340 double complex *tmp_v0,
1341 double complex *tmp_v1
1344 double complex dam_pr = 0.0;
1345 unsigned long int i_max =
X->Check.idim_max;
1346 unsigned long int j, isite1, tmp_off;
1350 isite1 =
X->Def.Tpow[2 * org_isite1 + org_ispin1];
1351 if (org_isite1 + 1 >
X->Def.Nsite) {
1355 #pragma omp parallel reduction(+:dam_pr) default(none) shared(tmp_v0, tmp_v1) \ 1356 firstprivate(i_max, tmp_V, X) private(dmv, j, tmp_off) 1358 if (
X->Large.mode == M_MLTPLY ||
X->Large.mode == M_CALCSPEC) {
1360 for (j = 1; j <= i_max; j++) {
1361 dmv = tmp_v1[j] * tmp_V;
1363 dam_pr += conj(tmp_v1[j]) * dmv;
1368 for (j = 1; j <= i_max; j++) {
1369 dmv = tmp_v1[j] * tmp_V;
1370 dam_pr += conj(tmp_v1[j]) * dmv;
1376 #pragma omp parallel reduction(+:dam_pr) default(none) shared(tmp_v0, tmp_v1, list_1) \ 1377 firstprivate(i_max, tmp_V, X, isite1) private(dmv, j, tmp_off) 1379 if (
X->Large.mode == M_MLTPLY ||
X->Large.mode == M_CALCSPEC) {
1381 for (j = 1; j <= i_max; j++) {
1383 dmv = tmp_v1[j] * tmp_V;
1385 dam_pr += conj(tmp_v1[j]) * dmv;
1391 for (j = 1; j <= i_max; j++) {
1393 dmv = tmp_v1[j] * tmp_V;
1394 dam_pr += conj(tmp_v1[j]) * dmv;
1415 double complex tmp_trans,
1416 double complex *tmp_v0,
1417 double complex *tmp_v1,
1418 unsigned long int idim_max,
1419 double complex *tmp_v1buf,
1420 unsigned long int *Tpow
1423 int mask2, state2, ierr, origin, bit2diff, Fsgn;
1424 unsigned long int idim_max_buf, j;
1425 MPI_Status statusMPI;
1426 double complex trans, dmv, dam_pr;
1429 mask2 = (int)Tpow[2 * org_isite + org_ispin];
1432 state2 = origin & mask2;
1440 SgnBit((
unsigned long int) (bit2diff), &Fsgn);
1442 ierr = MPI_Sendrecv(&idim_max, 1, MPI_UNSIGNED_LONG, origin, 0,
1443 &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0,
1444 MPI_COMM_WORLD, &statusMPI);
1447 ierr = MPI_Sendrecv(tmp_v1, idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0,
1448 tmp_v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0,
1449 MPI_COMM_WORLD, &statusMPI);
1452 if (state2 == mask2) {
1455 else if (state2 == 0) {
1456 trans = (double)Fsgn * tmp_trans;
1461 #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, dmv) \ 1462 firstprivate(idim_max_buf, trans) shared(tmp_v1buf, tmp_v1, tmp_v0) 1463 for (j = 0; j < idim_max_buf; j++) {
1464 dmv = trans * tmp_v1buf[j + 1];
1465 tmp_v0[j + 1] += dmv;
1466 dam_pr += conj(tmp_v1[j + 1]) * dmv;
1483 double complex tmp_trans,
1484 double complex *tmp_v0,
1485 double complex *tmp_v1,
1486 unsigned long int idim_max,
1487 double complex *tmp_v1buf,
1488 unsigned long int *Tpow
1491 int mask2, state2, ierr, origin, bit2diff, Fsgn;
1492 unsigned long int idim_max_buf, j;
1493 MPI_Status statusMPI;
1494 double complex trans, dmv, dam_pr;
1497 mask2 = (int)Tpow[2 * org_isite + org_ispin];
1500 state2 = origin & mask2;
1508 SgnBit((
unsigned long int) (bit2diff), &Fsgn);
1510 ierr = MPI_Sendrecv(&idim_max, 1, MPI_UNSIGNED_LONG, origin, 0,
1511 &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0,
1512 MPI_COMM_WORLD, &statusMPI);
1515 ierr = MPI_Sendrecv(tmp_v1, idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0,
1516 tmp_v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0,
1517 MPI_COMM_WORLD, &statusMPI);
1520 if ( state2 == 0 ) trans = 0;
1521 else if (state2 == mask2) trans = (double)Fsgn * tmp_trans;
1525 #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, dmv) \ 1526 firstprivate(idim_max_buf, trans) shared(tmp_v1buf, tmp_v1, tmp_v0) 1527 for (j = 0; j < idim_max_buf; j++) {
1528 dmv = trans * tmp_v1buf[j + 1];
1529 tmp_v0[j + 1] += dmv;
1530 dam_pr += conj(tmp_v1[j + 1]) * dmv;
1544 unsigned int org_ispin,
1545 double complex tmp_trans,
1546 double complex *tmp_v0,
1547 double complex *tmp_v1,
1548 double complex *tmp_v1buf,
1549 unsigned long int idim_max,
1550 long unsigned int *Tpow,
1553 long unsigned int *list_2_1_target,
1554 long unsigned int *list_2_2_target,
1555 long unsigned int _irght,
1556 long unsigned int _ilft,
1557 long unsigned int _ihfbit
1560 int mask2, state2, ierr, origin, bit2diff, Fsgn;
1561 unsigned long int idim_max_buf, j, ioff;
1562 MPI_Status statusMPI;
1563 double complex trans, dmv, dam_pr;
1566 mask2 = (int)Tpow[2 * org_isite + org_ispin];
1569 state2 = origin & mask2;
1576 SgnBit((
unsigned long int) (bit2diff), &Fsgn);
1578 ierr = MPI_Sendrecv(&idim_max, 1, MPI_UNSIGNED_LONG, origin, 0,
1579 &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0,
1580 MPI_COMM_WORLD, &statusMPI);
1583 ierr = MPI_Sendrecv(
list_1_org, idim_max + 1, MPI_UNSIGNED_LONG, origin, 0,
1584 list_1buf_org, idim_max_buf + 1, MPI_UNSIGNED_LONG, origin, 0,
1585 MPI_COMM_WORLD, &statusMPI);
1588 ierr = MPI_Sendrecv(tmp_v1, idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0,
1589 tmp_v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0,
1590 MPI_COMM_WORLD, &statusMPI);
1593 if (state2 == mask2) {
1596 else if (state2 == 0) {
1597 trans = (double)Fsgn * tmp_trans;
1602 #pragma omp parallel for default(none) private(j, dmv) \ 1603 firstprivate(idim_max_buf, trans, ioff, _irght, _ilft, _ihfbit, list_2_1_target, list_2_2_target) \ 1604 shared(tmp_v1buf, tmp_v1, tmp_v0, list_1buf_org) 1605 for (j = 1; j <= idim_max_buf; j++) {
1607 _irght, _ilft, _ihfbit, &ioff);
1608 dmv = trans * tmp_v1buf[j];
1609 tmp_v0[ioff] += dmv;
1623 unsigned int org_ispin,
1624 double complex tmp_trans,
1625 double complex *tmp_v0,
1626 double complex *tmp_v1,
1627 double complex *tmp_v1buf,
1628 unsigned long int idim_max,
1629 long unsigned int *Tpow,
1632 long unsigned int *list_2_1_target,
1633 long unsigned int *list_2_2_target,
1634 long unsigned int _irght,
1635 long unsigned int _ilft,
1636 long unsigned int _ihfbit
1639 int mask2, state2, ierr, origin, bit2diff, Fsgn;
1640 unsigned long int idim_max_buf, j, ioff;
1641 MPI_Status statusMPI;
1642 double complex trans, dmv, dam_pr;
1645 mask2 = (int)Tpow[2 * org_isite + org_ispin];
1648 state2 = origin & mask2;
1655 SgnBit((
unsigned long int) (bit2diff), &Fsgn);
1656 ierr = MPI_Sendrecv(&idim_max, 1, MPI_UNSIGNED_LONG, origin, 0,
1657 &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0,
1658 MPI_COMM_WORLD, &statusMPI);
1661 ierr = MPI_Sendrecv(
list_1_org, idim_max + 1, MPI_UNSIGNED_LONG, origin, 0,
1662 list_1buf_org, idim_max_buf + 1, MPI_UNSIGNED_LONG, origin, 0,
1663 MPI_COMM_WORLD, &statusMPI);
1666 ierr = MPI_Sendrecv(tmp_v1, idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0,
1667 tmp_v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0,
1668 MPI_COMM_WORLD, &statusMPI);
1674 else if (state2 == mask2) {
1675 trans = (double)Fsgn * tmp_trans;
1680 #pragma omp parallel for default(none) private(j, dmv) \ 1681 firstprivate(idim_max_buf, trans, ioff, _irght, _ilft, _ihfbit, list_2_1_target, list_2_2_target) \ 1682 shared(tmp_v1buf, tmp_v1, tmp_v0, list_1buf_org) 1683 for (j = 1; j <= idim_max_buf; j++) {
1685 _irght, _ilft, _ihfbit, &ioff);
1686 dmv = trans * tmp_v1buf[j];
1687 tmp_v0[ioff] += dmv;
int CheckBit_Cis(long unsigned int is1_spin, long unsigned int orgbit, long unsigned int *offbit)
Check the occupation of state, and compute the index of final wavefunction associated to ...
void exitMPI(int errorcode)
MPI Abortation wrapper.
double complex X_child_CisAis_Hubbard_MPI(int org_isite1, int org_ispin1, double complex tmp_V, struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
int X_CisAis(long unsigned int list_1_j, struct BindStruct *X, long unsigned int is1_spin)
term in Hubbard (canonical)
double complex X_GC_child_general_hopp_MPIdouble(int org_isite1, int org_ispin1, int org_isite2, int org_ispin2, double complex tmp_trans, struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
Hopping term in Hubbard + GC When both site1 and site2 are in the inter process region.
int CheckPE(int org_isite, struct BindStruct *X)
Check whether this site is in the inter process region or not.
double complex X_GC_child_CisAisCjtAku_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite3, int org_ispin3, int org_isite4, int org_ispin4, double complex tmp_V, struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
Compute term of grandcanonical Hubbard system.
int CheckBit_InterAllPE(int org_isite1, int org_isigma1, int org_isite2, int org_isigma2, int org_isite3, int org_isigma3, int org_isite4, int org_isigma4, struct BindStruct *X, long unsigned int orgbit, long unsigned int *offbit)
Compute the index of final wavefunction associated to , and check whether this operator is relevant o...
long unsigned int * list_1buf
double complex X_child_CisAisCjtAku_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite3, int org_ispin3, int org_isite4, int org_ispin4, double complex tmp_V, struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
Compute term of canonical Hubbard system.
int CheckBit_PairPE(int org_isite1, int org_isigma1, int org_isite3, int org_isigma3, struct BindStruct *X, long unsigned int orgbit)
Check the occupation of both site 1 and site 3.
double complex CisAjt(long unsigned int j, double complex *tmp_v0, double complex *tmp_v1, struct BindStruct *X, long unsigned int is1_spin, long unsigned int is2_spin, long unsigned int sum_spin, long unsigned int diff_spin, double complex tmp_V)
term for canonical Hubbard
double complex X_GC_Cis_MPI(int org_isite, int org_ispin, double complex tmp_trans, double complex *tmp_v0, double complex *tmp_v1, unsigned long int idim_max, double complex *tmp_v1buf, unsigned long int *Tpow)
Single creation/annihilation operator in the inter process region for HubbardGC.
double complex X_Cis_MPI(int org_isite, unsigned int org_ispin, double complex tmp_trans, double complex *tmp_v0, double complex *tmp_v1, double complex *tmp_v1buf, unsigned long int idim_max, long unsigned int *Tpow, long unsigned int *list_1_org, long unsigned int *list_1buf_org, long unsigned int *list_2_1_target, long unsigned int *list_2_2_target, long unsigned int _irght, long unsigned int _ilft, long unsigned int _ihfbit)
Compute term of canonical Hubbard system.
double complex X_child_CisAisCjtAjt_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite3, int org_ispin3, double complex tmp_V, struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
Compute term of canonical Hubbard system.
int CheckBit_Ajt(long unsigned int is1_spin, long unsigned int orgbit, long unsigned int *offbit)
Check the occupation of state, and compute the index of final wavefunction associated to ...
long unsigned int * list_1buf_org
double complex X_Ajt_MPI(int org_isite, unsigned int org_ispin, double complex tmp_trans, double complex *tmp_v0, double complex *tmp_v1, double complex *tmp_v1buf, unsigned long int idim_max, long unsigned int *Tpow, long unsigned int *list_1_org, long unsigned int *list_1buf_org, long unsigned int *list_2_1_target, long unsigned int *list_2_2_target, long unsigned int _irght, long unsigned int _ilft, long unsigned int _ihfbit)
Compute term of canonical Hubbard system.
double complex X_GC_child_general_hopp_MPIsingle(int org_isite1, int org_ispin1, int org_isite2, int org_ispin2, double complex tmp_trans, struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
Hopping term in Hubbard + GC When only site2 is in the inter process region.
double complex GC_CisAjt(long unsigned int j, double complex *tmp_v0, double complex *tmp_v1, struct BindStruct *X, long unsigned int is1_spin, long unsigned int is2_spin, long unsigned int sum_spin, long unsigned int diff_spin, double complex tmp_V, long unsigned int *tmp_off)
term for grandcanonical Hubbard
double complex X_GC_child_CisAisCjtAjt_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite3, int org_ispin3, double complex tmp_V, struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
Compute term of grandcanonical Hubbard system.
double complex X_child_CisAjtCkuAlv_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite2, int org_ispin2, int org_isite3, int org_ispin3, int org_isite4, int org_ispin4, double complex tmp_V, struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
Compute term of canonical Hubbard system.
double complex X_GC_child_CisAjt_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite2, int org_ispin2, double complex tmp_trans, struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
Compute term of grandcanonical Hubbard system.
long unsigned int * list_1_org
double complex X_child_CisAjtCkuAku_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite2, int org_ispin2, int org_isite3, int org_ispin3, double complex tmp_V, struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
Compute term of canonical Hubbard system.
long unsigned int * list_2_1
int GetOffComp(long unsigned int *_list_2_1, long unsigned int *_list_2_2, long unsigned int _ibit, const long unsigned int _irght, const long unsigned int _ilft, const long unsigned int _ihfbit, long unsigned int *_ioffComp)
function of getting off-diagonal component
int GetSgnInterAll(unsigned long int isite1, unsigned long int isite2, unsigned long int isite3, unsigned long int isite4, int *Fsgn, struct BindStruct *X, unsigned long int orgbit, unsigned long int *offbit)
Compute the index of final wavefunction associated to , and Fermion sign.
double complex X_GC_Ajt_MPI(int org_isite, int org_ispin, double complex tmp_trans, double complex *tmp_v0, double complex *tmp_v1, unsigned long int idim_max, double complex *tmp_v1buf, unsigned long int *Tpow)
Single creation/annihilation operator in the inter process region for HubbardGC.
double complex X_GC_child_CisAjtCkuAku_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite2, int org_ispin2, int org_isite3, int org_ispin3, double complex tmp_V, struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
Compute term of grandcanonical Hubbard system.
double complex X_GC_child_CisAis_Hubbard_MPI(int org_isite1, int org_ispin1, double complex tmp_V, struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
Compute term of grandcanonical Hubbard system.
long unsigned int * list_1
int X_GC_CisAjt(long unsigned int list_1_j, struct BindStruct *X, long unsigned int is1_spin, long unsigned int is2_spin, long unsigned int sum_spin, long unsigned int diff_spin, long unsigned int *tmp_off)
Compute index of wavefunction of final state.
long unsigned int * list_2_2
double complex X_GC_child_CisAjtCkuAlv_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite2, int org_ispin2, int org_isite3, int org_ispin3, int org_isite4, int org_ispin4, double complex tmp_V, struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
Compute term of grandcanonical Hubbard system.
int myrank
Process ID, defined in InitializeMPI()
void SgnBit(const long unsigned int org_bit, int *sgn)
function of getting fermion sign (64 bit)