23 #include "matrixlapack.h" 24 #include "Lanczos_EigenValue.h" 25 #include "wrapperMPI.h" 57 unsigned long int i_max_tmp;
61 double complex temp1, temp2;
62 double complex cbeta1;
63 double E[5], ebefor, E_target;
70 int int_i, int_j,
mfint[7];
74 i_max =
X->Check.idim_max;
75 k_exct =
X->Def.k_exct;
76 unsigned long int liLanczosStp;
77 liLanczosStp =
X->Def.Lanczos_max;
78 unsigned long int liLanczosStp_vec=0;
80 if (
X->Def.iReStart == RESTART_INOUT ||
X->Def.iReStart == RESTART_IN){
82 fprintf(
stdoutMPI,
" Error: Fail to read initial vectors\n");
87 fprintf(
stdoutMPI,
" Error: Fail to read TMcomponents\n");
90 if(liLanczosStp_vec !=liLanczosStp){
91 fprintf(
stdoutMPI,
" Error: Input files for vector and TMcomponents are incoorect.\n");
92 fprintf(
stdoutMPI,
" Error: Input vector %ld th stps, TMcomponents %ld th stps.\n", liLanczosStp_vec, liLanczosStp);
95 X->Def.Lanczos_restart=liLanczosStp;
98 liLanczosStp = liLanczosStp+
X->Def.Lanczos_max;
99 alpha1=
alpha[
X->Def.Lanczos_restart];
100 beta1=
beta[
X->Def.Lanczos_restart];
112 alpha1 = creal(
X->Large.prdct);
117 #pragma omp parallel for reduction(+:cbeta1) default(none) private(i) shared(v0, v1) firstprivate(i_max, alpha1) 118 for (i = 1; i <= i_max; i++) {
119 cbeta1 += conj(
v0[i] - alpha1 *
v1[i]) * (
v0[i] - alpha1 *
v1[i]);
122 beta1 = creal(cbeta1);
126 liLanczosStp =
X->Def.Lanczos_max;
127 X->Def.Lanczos_restart =1;
134 if (i_max_tmp < liLanczosStp) {
135 liLanczosStp = i_max_tmp;
137 if (i_max_tmp < X->Def.LanczosTarget) {
138 liLanczosStp = i_max_tmp;
140 if (i_max_tmp == 1) {
146 X->Phys.Target_energy = E[k_exct];
148 fprintf(
stdoutMPI,
" LanczosStep E[1] \n");
149 fprintf(
stdoutMPI,
" stp=%d %.10lf \n", stp, E[1]);
152 fprintf(
stdoutMPI,
" LanczosStep E[1] E[2] E[3] E[4] Target:E[%d] E_Max/Nsite\n",
X->Def.LanczosTarget + 1);
153 for (stp =
X->Def.Lanczos_restart+1; stp <= liLanczosStp; stp++) {
154 #pragma omp parallel for default(none) private(i,temp1, temp2) shared(v0, v1) firstprivate(i_max, alpha1, beta1) 155 for (i = 1; i <= i_max; i++) {
157 temp2 = (
v0[i] - alpha1 *
v1[i]) / beta1;
158 v0[i] = -beta1 * temp1;
166 alpha1 = creal(
X->Large.prdct);
170 #pragma omp parallel for reduction(+:cbeta1) default(none) private(i) shared(v0, v1) firstprivate(i_max, alpha1) 171 for (i = 1; i <= i_max; i++) {
172 cbeta1 += conj(
v0[i] - alpha1 *
v1[i]) * (
v0[i] - alpha1 *
v1[i]);
175 beta1 = creal(cbeta1);
179 Target =
X->Def.LanczosTarget;
182 d_malloc2(tmp_mat, stp, stp);
183 d_malloc1(tmp_E, stp + 1);
185 for (int_i = 0; int_i < stp; int_i++) {
186 for (int_j = 0; int_j < stp; int_j++) {
187 tmp_mat[int_i][int_j] = 0.0;
190 tmp_mat[0][0] =
alpha[1];
191 tmp_mat[0][1] =
beta[1];
192 tmp_mat[1][0] =
beta[1];
193 tmp_mat[1][1] =
alpha[2];
198 E_target = tmp_E[Target];
201 d_free1(tmp_E, stp + 1);
202 d_free2(tmp_mat, stp, stp);
206 fprintf(fp,
"LanczosStep E[1] E[2] E[3] E[4] Target:E[%d] E_Max/Nsite\n", Target + 1);
208 fprintf(
stdoutMPI,
" stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx %.10lf xxxxxxxxx \n", stp, E[1], E[2],
210 fprintf(fp,
"stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx %.10lf xxxxxxxxx \n", stp, E[1], E[2], E_target);
212 fprintf(
stdoutMPI,
" stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx xxxxxxxxx xxxxxxxxx \n", stp, E[1], E[2]);
213 fprintf(fp,
"stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx xxxxxxxxx xxxxxxxxx \n", stp, E[1], E[2]);
223 d_malloc2(tmp_mat, stp, stp);
224 d_malloc1(tmp_E, stp + 1);
226 for (int_i = 0; int_i < stp; int_i++) {
227 for (int_j = 0; int_j < stp; int_j++) {
228 tmp_mat[int_i][int_j] = 0.0;
231 tmp_mat[0][0] =
alpha[1];
232 tmp_mat[0][1] =
beta[1];
233 for (int_i = 1; int_i < stp - 1; int_i++) {
234 tmp_mat[int_i][int_i] =
alpha[int_i + 1];
235 tmp_mat[int_i][int_i + 1] =
beta[int_i + 1];
236 tmp_mat[int_i][int_i - 1] =
beta[int_i];
238 tmp_mat[int_i][int_i] =
alpha[int_i + 1];
239 tmp_mat[int_i][int_i - 1] =
beta[int_i];
247 E[0] = tmp_E[stp - 1];
249 E_target = tmp_E[Target];
251 d_free1(tmp_E, stp + 1);
252 d_free2(tmp_mat, stp, stp);
254 fprintf(
stdoutMPI,
" stp = %d %.10lf %.10lf %.10lf %.10lf %.10lf %.10lf\n", stp, E[1], E[2], E[3], E[4],
255 E_target, E[0] / (
double)
X->Def.NsiteMPI);
256 fprintf(fp,
"stp=%d %.10lf %.10lf %.10lf %.10lf %.10lf %.10lf\n", stp, E[1], E[2], E[3], E[4], E_target,
257 E[0] / (
double)
X->Def.NsiteMPI);
259 fprintf(
stdoutMPI,
" stp = %d %.10lf %.10lf %.10lf %.10lf xxxxxxxxx %.10lf\n", stp, E[1], E[2], E[3], E[4],
260 E[0] / (
double)
X->Def.NsiteMPI);
261 fprintf(fp,
"stp=%d %.10lf %.10lf %.10lf %.10lf xxxxxxxxx %.10lf\n", stp, E[1], E[2], E[3], E[4],
262 E[0] / (
double)
X->Def.NsiteMPI);
266 if (fabs((E_target - ebefor) / E_target) <
eps_Lanczos || fabs(
beta[stp]) < pow(10.0, -14)) {
272 d_malloc1(tmp_E, stp + 1);
277 X->Phys.Target_energy = E_target;
278 X->Phys.Target_CG_energy = tmp_E[k_exct];
280 d_free1(tmp_E, stp + 1);
288 if (
X->Def.iReStart == RESTART_INOUT ||
X->Def.iReStart == RESTART_OUT ){
289 if(stp !=
X->Def.Lanczos_restart+2) {
299 fprintf(
stdoutMPI,
" Lanczos Eigenvalue is not converged in this process.\n");
325 double complex *tmp_v1,
326 unsigned long int *liLanczos_step
332 i_max =
X->Check.idim_max;
334 unsigned long int i_max_tmp;
335 double beta1, alpha1;
336 double complex temp1, temp2;
337 double complex cbeta1;
345 if (i_max_tmp < *liLanczos_step || i_max_tmp < X->Def.LanczosTarget) {
346 *liLanczos_step = i_max_tmp;
349 if (
X->Def.Lanczos_restart == 0) {
350 #pragma omp parallel for default(none) private(i) shared(v0, v1, tmp_v1) firstprivate(i_max) 351 for (i = 1; i <= i_max; i++) {
358 alpha1 = creal(
X->Large.prdct);
361 fprintf(
stdoutMPI,
" Step / Step_max alpha beta \n");
363 #pragma omp parallel for reduction(+:cbeta1) default(none) private(i) shared(v0, v1) firstprivate(i_max, alpha1) 364 for (i = 1; i <= i_max; i++) {
365 cbeta1 += conj(
v0[i] - alpha1 *
v1[i]) * (
v0[i] - alpha1 *
v1[i]);
368 beta1 = creal(cbeta1);
371 X->Def.Lanczos_restart = 1;
373 alpha1 =
alpha[
X->Def.Lanczos_restart];
374 beta1 =
beta[
X->Def.Lanczos_restart];
377 for (stp =
X->Def.Lanczos_restart + 1; stp <= *liLanczos_step; stp++) {
379 if (fabs(_beta[stp - 1]) < pow(10.0, -14)) {
380 *liLanczos_step = stp - 1;
384 #pragma omp parallel for default(none) private(i, temp1, temp2) shared(v0, v1) firstprivate(i_max, alpha1, beta1) 385 for (i = 1; i <= i_max; i++) {
387 temp2 = (
v0[i] - alpha1 *
v1[i]) / beta1;
388 v0[i] = -beta1 * temp1;
394 alpha1 = creal(
X->Large.prdct);
395 _alpha[stp] = alpha1;
398 #pragma omp parallel for reduction(+:cbeta1) default(none) private(i) shared(v0, v1) firstprivate(i_max, alpha1) 399 for (i = 1; i <= i_max; i++) {
400 cbeta1 += conj(
v0[i] - alpha1 *
v1[i]) * (
v0[i] - alpha1 *
v1[i]);
403 beta1 = creal(cbeta1);
407 fprintf(
stdoutMPI,
" stp = %d / %lu %.10lf %.10lf \n", stp, *liLanczos_step, alpha1, beta1);
430 unsigned long int i_max;
432 fprintf(
stdoutMPI,
" Start: Input vectors for recalculation.\n");
438 byte_size = fread(liLanczosStp_vec,
sizeof(*liLanczosStp_vec),1,fp);
439 byte_size = fread(&i_max,
sizeof(
long int), 1, fp);
440 if(i_max !=
X->Check.idim_max){
441 fprintf(stderr,
"Error: A size of Inputvector is incorrect.\n");
444 byte_size = fread(_v0,
sizeof(complex
double),
X->Check.idim_max + 1, fp);
445 byte_size = fread(_v1,
sizeof(complex
double),
X->Check.idim_max + 1, fp);
447 fprintf(
stdoutMPI,
" End: Input vectors for recalculation.\n");
449 if (byte_size == 0) printf(
"byte_size: %d \n", (
int)byte_size);
464 double complex* tmp_v0,
465 double complex *tmp_v1,
466 unsigned long int liLanczosStp_vec){
470 fprintf(
stdoutMPI,
" Start: Output vectors for recalculation.\n");
477 fwrite(&liLanczosStp_vec,
sizeof(liLanczosStp_vec),1,fp);
478 fwrite(&
X->Check.idim_max,
sizeof(
X->Check.idim_max),1,fp);
479 fwrite(tmp_v0,
sizeof(complex
double),
X->Check.idim_max+1, fp);
480 fwrite(tmp_v1,
sizeof(complex
double),
X->Check.idim_max+1, fp);
483 fprintf(
stdoutMPI,
" End: Output vectors for recalculation.\n");
498 long int i, iv, i_max;
499 unsigned long int i_max_tmp, sum_i_max;
504 double complex cdnorm;
505 long unsigned int u_long_i;
508 i_max =
X->Check.idim_max;
510 if(
X->Def.iFlgMPI==0) {
514 sum_i_max =
X->Check.idim_max;
516 X->Large.iv = (sum_i_max / 2 +
X->Def.initial_iv) % sum_i_max + 1;
518 fprintf(
stdoutMPI,
" initial_mode=%d normal: iv = %ld i_max=%ld k_exct =%d \n\n",
initial_mode, iv, i_max,
520 #pragma omp parallel for default(none) private(i) shared(tmp_v0, tmp_v1) firstprivate(i_max) 521 for (i = 1; i <= i_max; i++) {
527 if(
X->Def.iFlgMPI==0) {
528 for (iproc = 0; iproc <
nproc; iproc++) {
530 if (sum_i_max <= iv && iv < sum_i_max + i_max_tmp) {
532 tmp_v1[iv - sum_i_max + 1] = 1.0;
533 if (
X->Def.iInitialVecType == 0) {
534 tmp_v1[iv - sum_i_max + 1] += 1.0 * I;
535 tmp_v1[iv - sum_i_max + 1] /= sqrt(2.0);
540 sum_i_max += i_max_tmp;
545 tmp_v1[iv + 1] = 1.0;
546 if (
X->Def.iInitialVecType == 0) {
547 tmp_v1[iv + 1] += 1.0 * I;
548 tmp_v1[iv + 1] /= sqrt(2.0);
553 iv =
X->Def.initial_iv;
554 fprintf(
stdoutMPI,
" initial_mode=%d (random): iv = %ld i_max=%ld k_exct =%d \n\n",
initial_mode, iv, i_max,
556 #pragma omp parallel default(none) private(i, u_long_i, mythread, dsfmt) \ 557 shared(tmp_v0, tmp_v1, iv, X, nthreads, myrank) firstprivate(i_max) 561 for (i = 1; i <= i_max; i++) {
568 mythread = omp_get_thread_num();
572 if(
X->Def.iFlgMPI==0) {
576 u_long_i = 123432 + labs(iv)+ mythread ;
578 dsfmt_init_gen_rand(&dsfmt, u_long_i);
580 if (
X->Def.iInitialVecType == 0) {
582 for (i = 1; i <= i_max; i++)
583 tmp_v1[i] = 2.0 * (dsfmt_genrand_close_open(&dsfmt) - 0.5) +
584 2.0 * (dsfmt_genrand_close_open(&dsfmt) - 0.5) * I;
587 for (i = 1; i <= i_max; i++)
588 tmp_v1[i] = 2.0 * (dsfmt_genrand_close_open(&dsfmt) - 0.5);
594 #pragma omp parallel for default(none) private(i) shared(tmp_v1, i_max) reduction(+: cdnorm) 595 for (i = 1; i <= i_max; i++) {
596 cdnorm += conj(tmp_v1[i]) * tmp_v1[i];
598 if(
X->Def.iFlgMPI==0) {
601 dnorm = creal(cdnorm);
603 #pragma omp parallel for default(none) private(i) shared(tmp_v1) firstprivate(i_max, dnorm) 604 for (i = 1; i <= i_max; i++) {
605 tmp_v1[i] = tmp_v1[i] / dnorm;
624 unsigned long int *_i_max,
630 unsigned long int idx, i, ivec;
631 unsigned long int i_max;
640 fgetsMPI(ctmp,
sizeof(ctmp)/
sizeof(
char), fp);
641 sscanf(ctmp,
"%ld \n", &i_max);
642 if (
X->Def.LanczosTarget >
X->Def.nvec) {
643 ivec =
X->Def.LanczosTarget + 1;
646 ivec =
X->Def.nvec + 1;
650 alpha = (
double *) realloc(
alpha,
sizeof(
double) * (i_max +
X->Def.Lanczos_max + 1));
651 beta = (
double *) realloc(
beta,
sizeof(
double) * (i_max +
X->Def.Lanczos_max + 1));
652 for (i = 0; i < ivec; i++) {
653 vec[i] = (complex
double *) realloc(
vec[i], (i_max +
X->Def.Lanczos_max + 1) *
sizeof(complex double));
657 alpha=(
double*)realloc(
alpha,
sizeof(
double)*(i_max + 1));
658 beta=(
double*)realloc(
beta,
sizeof(
double)*(i_max + 1));
659 for (i = 0; i < ivec; i++) {
660 vec[i] = (complex
double *) realloc(
vec[i], (i_max +
X->Def.Lanczos_max + 1) *
sizeof(complex double));
667 fgetsMPI(ctmp,
sizeof(ctmp)/
sizeof(
char), fp);
668 sscanf(ctmp,
"%lf \n", &dnorm);
669 while(
fgetsMPI(ctmp,
sizeof(ctmp)/
sizeof(
char), fp) != NULL){
670 sscanf(ctmp,
"%lf %lf \n", &
alpha[idx], &
beta[idx]);
707 fprintf(fp,
"%d \n",liLanczosStp);
708 fprintf(fp,
"%.10lf \n",_dnorm);
709 for( i = 1 ; i <= liLanczosStp; i++){
710 fprintf(fp,
"%.10lf %.10lf \n", _alpha[i], _beta[i]);
int childfopenALL(const char *_cPathChild, const char *_cmode, FILE **_fp)
All processes open file in output/ directory.
int mltply(struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
Parent function of multiplying the wavefunction by the Hamiltonian. . First, the calculation of diago...
void StartTimer(int n)
function for initializing elapse time [start]
double complex SumMPI_dc(double complex norm)
MPI wrapper function to obtain sum of Double complex across processes.
const char * cLogLanczos_EigenValueNotConverged
unsigned long int BcastMPI_li(int root, unsigned long int idim)
MPI wrapper function to broadcast unsigned long integer across processes.
void SetInitialVector(struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
Set initial vector to start the calculation for Lanczos method. .
void StopTimer(int n)
function for calculating elapse time [elapse time=StartTimer-StopTimer]
int Lanczos_GetTridiagonalMatrixComponents(struct BindStruct *X, double *_alpha, double *_beta, double complex *tmp_v1, unsigned long int *liLanczos_step)
Calculate tridiagonal matrix components by Lanczos method.
int childfopenMPI(const char *_cPathChild, const char *_cmode, FILE **_fp)
Only the root process open file in output/ directory.
const char * c_Lanczos_SpectrumStep
const char * cLanczos_EigenValueFinish
const char * cFileNameOutputRestartVec
int DSEVvalue(int xNsize, double **A, double *r)
obtain eigenvalues of real symmetric A
const char * cFileNameLanczosStep
int nthreads
Number of Threads, defined in InitializeMPI()
int nproc
Number of processors, defined in InitializeMPI()
int OutputLanczosVector(struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1, unsigned long int liLanczosStp_vec)
Output initial vectors for the restart calculation.
const char * c_OutputSpectrumRecalcvecEnd
const char * c_InputSpectrumRecalcvecStart
const char * cLogLanczos_EigenValueStart
const char * cLogLanczos_EigenValueEnd
const char * cLanczos_EigenValueStart
const char * cFileNameTridiagonalMatrixComponents
static unsigned long int mfint[7]
int ReadInitialVector(struct BindStruct *X, double complex *_v0, double complex *_v1, unsigned long int *liLanczosStp_vec)
Read initial vectors for the restart calculation.
const char * cFileNameTimeKeep
const char * c_InputSpectrumRecalcvecEnd
void vec12(double alpha[], double beta[], unsigned int ndim, double tmp_E[], struct BindStruct *X)
Diagonalize a tri-diagonal matrix and store eigenvectors into vec.
int OutputTMComponents(struct BindStruct *X, double *_alpha, double *_beta, double _dnorm, int liLanczosStp)
Output tridiagonal matrix components obtained by the Lanczos method.
int Lanczos_EigenValue(struct BindStruct *X)
Main function for calculating eigen values by Lanczos method. The energy convergence is judged by the...
const char * cLanczos_EigenValueStep
int myrank
Process ID, defined in InitializeMPI()
char * fgetsMPI(char *InputString, int maxcount, FILE *fp)
MPI file I/O (get a line, fgets) wrapper. Only the root node (myrank = 0) reads and broadcast string...
int ReadTMComponents(struct BindStruct *X, double *_dnorm, unsigned long int *_i_max, int iFlg)
Read tridiagonal matrix components obtained by the Lanczos method. .
const char * c_OutputSpectrumRecalcvecStart
unsigned long int SumMPI_li(unsigned long int idim)
MPI wrapper function to obtain sum of unsigned long integer across processes.
int TimeKeeperWithStep(struct BindStruct *X, const char *cFileName, const char *cTimeKeeper_Message, const char *cWriteType, const int istep)
Functions for writing a time log.
int TimeKeeper(struct BindStruct *X, const char *cFileName, const char *cTimeKeeper_Message, const char *cWriteType)
Functions for writing a time log.
FILE * stdoutMPI
File pointer to the standard output defined in InitializeMPI()