From b6a227bb5209b3e7e90402e8659b63a6bc6244f2 Mon Sep 17 00:00:00 2001
From: Stanislav Cherepanov <75815728+stanislc@users.noreply.github.com>
Date: Wed, 5 Feb 2025 17:35:54 -0500
Subject: [PATCH 1/2] Update GetFreeEnergy5.py

Updated documentation and allow selection of different cutoffs
---
 alf/GetFreeEnergy5.py | 73 +++++++++++++++++++++++++++++++------------
 1 file changed, 53 insertions(+), 20 deletions(-)

diff --git a/alf/GetFreeEnergy5.py b/alf/GetFreeEnergy5.py
index 03b74fe..9a4a809 100755
--- a/alf/GetFreeEnergy5.py
+++ b/alf/GetFreeEnergy5.py
@@ -1,6 +1,6 @@
 #! /usr/bin/env python
 
-def GetFreeEnergy5(alf_info,ms,msprof):
+def GetFreeEnergy5(alf_info,ms,msprof, cutb=2.0, cutc=8.0, cutx=2.0, cuts=1.0, cutc2=1.0, cutx2=0.5, cuts2=0.5):
   """
   Perform the matrix inversion to solve for optimal bias changes
 
@@ -11,7 +11,9 @@ def GetFreeEnergy5(alf_info,ms,msprof):
   function with respect to changes in the bias potential are saved as a
   matrix in analysis[i]/multisite/C.dat and the first derivatives are
   saved in analysis[i]/multisite/V.dat. This routine should be run in
-  analysis[i]. This routine adds an additional regularization term to the
+  analysis[i]. 
+  
+  This routine adds an additional regularization term to the
   C.dat matrix and inverts the matrix to solve the linear equation
   dictating the optimal solution. Because this represents a linear
   approximation, and because unsampled states may become dominant as
@@ -19,20 +21,59 @@ def GetFreeEnergy5(alf_info,ms,msprof):
   parameter. If these caps are exceed, all changes are scaled down to
   bring the largest change below these caps. The scaling is printed every
   cycle of ALF, and will be 1 or less. Smaller values indicate poorly
-  converged biases. Changes to the b, c, x, and s parameters are saved to
+  converged biases. 
+  
+  Changes to the b, c, x, and s parameters are saved to
   b.dat, c.dat, x.dat, and s.dat in the analysis[i] directory.
 
   Parameters
   ----------
-  alf_info : dict
-      Dictionary of variables alf needs to run
-  ms : int
-      Flag for whether to include intersite biases. 0 for no, 1 for c, x,
-      and s biases, 2 for just c biases. Typically taken from the first
-      element of ntersite list.
-  msprof : int
-      Flag for whether to include intersite profiles. 0 for no, 1 for yes.
-      Typically taken from the second element of ntersite list.
+    Parameters
+    ----------
+    alf_info : dict
+        Dictionary of variables ALF needs to run, including:
+        - `temp` : Temperature of system.
+        - `nsubs` : List specifying the number of blocks for each site.
+        - `nblocks` : Total number of sub-s in the system.
+
+    ms : int
+        Flag for whether to include intersite biases:
+        - `0` for no intersite biases.
+        - `1` for including `c`, `x`, and `s` biases.
+        - `2` for including only `c` biases.
+        Typically taken from the first element of the `ntersite` list.
+
+    msprof : int
+        Flag for whether to include intersite profiles:
+        - `0` for no intersite profiles.
+        - `1` for including intersite profiles.
+        Typically taken from the second element of the `ntersite` list.
+
+    cutb : float, optional
+        Cap for fixed bias 'b' changes (default: `2.0`).
+        V = b(I)*[lambda(I)]
+
+    cutc : float, optional
+        Cap for intra-site Quadratic `c` parameter changes (default: `8.0`).
+        V = c(I,J)*[lambda(I) * lambda(J)]
+
+    cutx : float, optional
+        Cap for intra-site diagonal-quadratic `x` parameter changes (default: `2.0`).
+        V  = x(I,J)*[lambda(I) * lambda(J)]/[lambda(I) + 0.017]
+
+    cuts : float, optional
+        Cap for intra-site end-point `s` parameter changes (default: `1.0`).
+        V = s(I,J)*[lambda(J) * (1 - exp(REF*lambda(I)))]
+
+    cutc2 : float, optional
+        Cap for inter-site `c` parameter changes (default: `1.0`).
+
+    cutx2 : float, optional
+        Cap for inter-site `x` parameter changes (default: `0.5`).
+
+    cuts2 : float, optional
+        Cap for inter-site `s` parameter changes (default: `0.5`).
+
   """
 
   import sys, os
@@ -41,14 +82,6 @@ def GetFreeEnergy5(alf_info,ms,msprof):
   kT=0.001987*alf_info['temp']
   krest=1
 
-  cutb=2
-  cutc=8
-  cutx=2
-  cuts=1
-  cutc2=2
-  cutx2=0.5
-  cuts2=0.5
-
   Emid=np.arange(1.0/800,1,1.0/400)
   Emid2=np.arange(1.0/40,1,1.0/20)
 

From 13acc398d8bcf2e2af7df8ed37f7e5ed703f00ee Mon Sep 17 00:00:00 2001
From: Stanislav Cherepanov <75815728+stanislc@users.noreply.github.com>
Date: Wed, 5 Feb 2025 18:11:47 -0500
Subject: [PATCH 2/2] Update GetEnergy.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The main purpose is reading Lambda files from various runs is to capture data from different execution scenarios. For example, runs “run1,” “run2,” and “run3” may have only 1 execution, while runs “run4” and “run5” may have multiple replicates.

a) Error handling for nbshift, checking the existence of directories
b) Automatic detection of flattening vs production phase for each run
c) reading lambda files in loops over range(NF) & range(nrep) changed to lambda_files
d) simplified logic for reading relevant frames
---
 alf/GetEnergy.py | 218 +++++++++++++++++++++++++----------------------
 1 file changed, 116 insertions(+), 102 deletions(-)

diff --git a/alf/GetEnergy.py b/alf/GetEnergy.py
index b1d6def..a2c6181 100755
--- a/alf/GetEnergy.py
+++ b/alf/GetEnergy.py
@@ -4,29 +4,36 @@ def GetEnergy(alf_info,Fi,Ff,skipE=1):
   """
   Compute energies for wham/mbar reweighting
 
-  This routine selects several simulations for reweighting with wham/mbar
-  and computes the relative bias energy from each simulation on the
-  alchemical trajectory for each simulation. Different replicas in
-  replica exchange are considered different simulations. This routine
-  should be run inside analysis[Ff]. Biases are taken from the apropriate
-  analysis[i] directory and alchemical trajectories are taken from the
-  output of the routine alf.GetLambdas in analysis[i]/data. Copies
-  alchemical trajectories from data directories analysis[Fi]/data to
-  analysis[Ff]/data into analysis[Ff]/Lambda/Lambda[*].dat and copies
-  bias energies into analysis[Ff]/Energy/ESim[*].dat . Energies are
-  computed relative to the central replica bias in the [Ff] cycle.
-
-  This routine can be called during flattening or production. During
-  flattening, Ff-Fi=4 is recommended (include 5 cycles in analysis),
-  during production Ff-Fi=0 is recommended (include 1 cycle in analysis).
-  Flattening versus production is detected by the presence of the run[Ff]
-  directory. During production this directory does not exist, instead,
-  run[Ff]a, and the other independent trial directories exist.
+  This routine selects several simulations for reweighting using WHAM/MBAR
+  It computes the relative bias energy from each simulation along the 
+  alchemical trajectory. Different replicas in replica exchange are considered 
+  separate simulations. 
+  
+  This routine should be run inside `analysis[Ff]`. . Biases are taken from the 
+  appropriate `analysis[i]` directories, and alchemical trajectories are taken 
+  from the output of the routine `alf.GetLambdas`, located in `analysis[i]/data`. 
+  The alchemical trajectories from `analysis[Fi]/data` to `analysis[Ff]/data` 
+  are copied into `analysis[Ff]/Lambda/Lambda[*].dat`, and the computed bias 
+  energies are saved in `analysis[Ff]/Energy/ESim[*].dat`. 
+
+  Energies are computed relative to the central replica bias in the `[Ff]` cycle.
+
+  This routine can be called during flattening or production:
+    - During **flattening**, `Ff - Fi = 4` is recommended (including 5 cycles in analysis).
+    - During **production**, `Ff - Fi = 0` is recommended (including 1 cycle in analysis).
+
+  The function detects flattening vs. production based on the presence of the 
+    `run[Ff]` directory. In production, this directory does not exist; instead, 
+    `run[Ff]a` and other independent trial directories exist.
 
   Parameters
   ----------
   alf_info : dict
       Dictionary of variables alf needs to run
+        - `nblocks`: Number of blocks.
+        - `nsubs`: List specifying the number of sub-states for each site.
+        - `nreps`: Number of replicas.
+        - `ncentral`: Index of the central replica.
   Fi : int
       The first cycle of alf to include in analysis (inclusive)
   Ff : int
@@ -35,89 +42,96 @@ def GetEnergy(alf_info,Fi,Ff,skipE=1):
       In longer production runs the number of lambda samples may require
       significant amounts of memory to store and analyze. Only alchemical
       frames with index modulus skipE equal to skipE-1 are analyzed.
-      (default is 1 to analyze all frames) 
+      - Default: `1` (analyze all frames).
   """
 
-  import sys, os
-  import numpy as np
-
-  # Fi=int(sys.argv[1])
-  # Ff=int(sys.argv[2])
-  NF=Ff-Fi+1;
-
-  # if len(sys.argv)>3:
-  #   skipE=int(sys.argv[3])
-  # else:
-  #   skipE=1
-
-  alphabet='abcdefghijklmnopqrstuvwxyz'
-
-  if os.path.isdir('../run'+str(Ff)):
-    production=False
-    ndupl=1
-  else:
-    production=True
-    ndupl=0
-    for i in range(0,len(alphabet)):
-      if os.path.isdir('../run'+str(Ff)+alphabet[i]):
-        ndupl+=1
-    if ndupl==0:
-      print("Error, not flattening or production")
-      quit()
-
-  nblocks=alf_info['nblocks']
-  nsubs=alf_info['nsubs']
-  nreps=alf_info['nreps']
-  ncentral=alf_info['ncentral']
-
-  b_shift=np.loadtxt('../nbshift/b_shift.dat')
-  c_shift=np.loadtxt('../nbshift/c_shift.dat')
-  x_shift=np.loadtxt('../nbshift/x_shift.dat')
-  s_shift=np.loadtxt('../nbshift/s_shift.dat')
-
-  Lambda=[]
-  b=[]
-  c=[]
-  x=[]
-  s=[]
-  for i in range(0,NF):
-    DIR='../analysis'+str(Fi+i)
-    for j in range(0,ndupl):
-      for k in range(0,nreps):
-        if production:
-          Lambda.append(np.loadtxt(DIR+'/data/Lambda.'+str(j)+'.'+str(k)+'.dat')[(skipE-1)::skipE,:])
-        else:
-          Lambda.append(np.loadtxt(DIR+'/data/Lambda.'+str(j)+'.'+str(k)+'.dat'))
-        b.append(np.loadtxt(DIR+'/b_prev.dat')+b_shift*(k-ncentral))
-        c.append(np.loadtxt(DIR+'/c_prev.dat')+c_shift*(k-ncentral))
-        x.append(np.loadtxt(DIR+'/x_prev.dat')+x_shift*(k-ncentral))
-        s.append(np.loadtxt(DIR+'/s_prev.dat')+s_shift*(k-ncentral))
-
-  if not os.path.isdir('Lambda'):
-    os.mkdir('Lambda')
-  if not os.path.isdir('Energy'):
-    os.mkdir('Energy')
-
-  E=[]
-  for i in range(0,NF*ndupl*nreps):
-    E.append([])
-    for j in range(0,NF*ndupl*nreps):
-      bi=b[i]
-      ci=c[i]
-      xi=x[i]
-      si=s[i]
-      Lj=Lambda[j]
-      E[-1].append(np.reshape(np.dot(Lj,-bi),(-1,1)))
-      E[-1][-1]+=np.sum(np.dot(Lj,-ci)*Lj,axis=1,keepdims=True)
-      E[-1][-1]+=np.sum(np.dot(1-np.exp(-5.56*Lj),-xi)*Lj,axis=1,keepdims=True)
-      E[-1][-1]+=np.sum(np.dot(Lj/(Lj+0.017),-si)*Lj,axis=1,keepdims=True)
-
-  for i in range(0,NF*ndupl*nreps):
-    Ei=E[nreps*(NF*ndupl-1)+ncentral][i]
-    for j in range(0,NF*ndupl*nreps):
-      Ei=np.concatenate((Ei,E[j][i]),axis=1)
-    np.savetxt('Energy/ESim'+str(i+1)+'.dat',Ei,fmt='%12.5f')
-
-  for i in range(0,NF*ndupl*nreps):
-    Li=Lambda[i]
-    np.savetxt('Lambda/Lambda'+str(i+1)+'.dat',Li,fmt='%10.6f')
+    import sys, os
+    import numpy as np
+
+    NF = Ff - Fi + 1
+
+    nblocks = alf_info['nblocks']
+    nsubs = alf_info['nsubs']
+    nreps = alf_info['nreps']
+    ncentral = alf_info['ncentral']
+
+    try:
+        b_shift = np.loadtxt('../nbshift/b_shift.dat')
+        c_shift = np.loadtxt('../nbshift/c_shift.dat')
+        x_shift = np.loadtxt('../nbshift/x_shift.dat')
+        s_shift = np.loadtxt('../nbshift/s_shift.dat')
+    except FileNotFoundError as e:
+        print(f"Error loading shift files: {e}")
+        return
+
+    Lambda = []
+    b = []
+    c = []
+    x = []
+    s = []
+
+    for i in range(NF):
+        analysis_dir = f'../analysis{Fi+i}'
+        data_dir = os.path.join(analysis_dir, 'data')
+        
+        if not os.path.isdir(data_dir):
+            print(f"Warning: Directory {data_dir} not found")
+            continue
+
+        # print(f"Checking directory: {data_dir}")
+
+        lambda_files = [f for f in os.listdir(data_dir) if f.startswith('Lambda.') and f.endswith('.dat')]
+        lambda_files.sort()
+
+        for lambda_file in lambda_files:
+            file_path = os.path.join(data_dir, lambda_file)
+            print(f"Loading file: {file_path}")
+            try:
+                Lambda.append(np.loadtxt(file_path)[(skipE-1)::skipE,:])
+                
+                # Extract j and k from the filename (assuming format Lambda.j.k.dat)
+                j, k = map(int, lambda_file.split('.')[1:3])
+                
+                b.append(np.loadtxt(os.path.join(analysis_dir, 'b_prev.dat')) + b_shift * (k - ncentral))
+                c.append(np.loadtxt(os.path.join(analysis_dir, 'c_prev.dat')) + c_shift * (k - ncentral))
+                x.append(np.loadtxt(os.path.join(analysis_dir, 'x_prev.dat')) + x_shift * (k - ncentral))
+                s.append(np.loadtxt(os.path.join(analysis_dir, 's_prev.dat')) + s_shift * (k - ncentral))
+            except Exception as e:
+                print(f"Error loading file {file_path}: {e}")
+
+    if not os.path.isdir('Lambda'):
+        os.mkdir('Lambda')
+    if not os.path.isdir('Energy'):
+        os.mkdir('Energy')
+
+    total_simulations = len(Lambda)
+    # print(f"Total simulations: {total_simulations}")
+
+    if total_simulations == 0:
+        print("Error: No Lambda files found.")
+        return
+
+    E = [[] for _ in range(total_simulations)]
+
+    try:
+        for i in range(total_simulations):
+            for j in range(total_simulations):
+                bi, ci, xi, si = b[i], c[i], x[i], s[i]
+                Lj = Lambda[j]
+                E[i].append(np.reshape(np.dot(Lj,-bi),(-1,1)))
+                E[i][-1] += np.sum(np.dot(Lj,-ci)*Lj,axis=1,keepdims=True)
+                E[i][-1] += np.sum(np.dot(1-np.exp(-5.56*Lj),-xi)*Lj,axis=1,keepdims=True)
+                E[i][-1] += np.sum(np.dot(Lj/(Lj+0.017),-si)*Lj,axis=1,keepdims=True)
+    except Exception as e:
+        print(f"Error during energy calculation: {e}")
+        return
+
+    for i in range(total_simulations):
+        Ei = E[total_simulations-1][i]
+        for j in range(total_simulations):
+            Ei = np.concatenate((Ei,E[j][i]),axis=1)
+        np.savetxt(f'Energy/ESim{i+1}.dat', Ei, fmt='%12.5f')
+
+    for i in range(total_simulations):
+        Li = Lambda[i]
+        np.savetxt(f'Lambda/Lambda{i+1}.dat', Li, fmt='%10.6f')