-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Select model feature in process_density_map script (#150)
* Add model selection mode to process_density_map; add cryofold2_setup script Using process_density_map select_model it is now possible to select a best-fitting model from the MD trajectory: 1. Using a 10-frame window RMSF is calculated for each atom 2. RMSF is converted to B-factors and saved in a PDB file 3. A map is simulated from a model using gemmi library: the simulated map accounts for atomic scattering factors and B-factors determined in the previous step 4. Optionally a mask is created from the model to exclude the solvent signal in the experimental map 5. Experimental and simulated map are compared using real-space cross-correlation or integrated FSC (iFSC, as defined by Wang et al doi:10.7554/eLife17219). The model with the best CC or iFSC is symlinked. In addition map/model FSC plots are generated for each PDB file The script cryofold2_setup is a command-line adaptation of CryoFold2 tutorial (https://github.com/ccccclw/meld/blob/master/docs/tutorial/cryofold_tutorial/setup_cryofold.py) * Update setup.py to include script cryofold2_setup * Update build-ubuntu-latest.yml Add python dependencies for process_density script: - mdtraj - gemmi - mrcfile - matplotlib - progressbar2 * Expose temperature scaling and MD-steps in cryofold2_setup, provide the MD time calculation * Fix crash with model B-factors out of range [-10 100] This is a limitation of the PDB file format enforced by `mdtraj` when writing trajectory PDB * Add python requirements to setup.py; update CHANGELOG.md * Rename temp settings add density weight It is now possible to set density weighting in cryofold2_setup; temperature settings (ramp between replicas, alphamin/max) are added and properly described in the CLI help
- Loading branch information
Showing
5 changed files
with
716 additions
and
61 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,199 @@ | ||
#!/usr/bin/env python3 | ||
__description__ = 'Set up files to run MELD/CryoFold 2. Based on https://github.com/ccccclw/meld/blob/master/docs/tutorial/cryofold_tutorial/setup_cryofold.py' | ||
# encoding: utf-8 | ||
import numpy as np | ||
import argparse | ||
import meld | ||
from meld import system | ||
from meld.comm import MPICommunicator | ||
from meld import comm, vault | ||
from meld import system | ||
from meld.remd import ladder, adaptor, leader | ||
from meld.system.scalers import LinearRamp,ConstantRamp | ||
import openmm.unit as u | ||
|
||
class MeldSetup: | ||
# a simple class to handle the MELD setup | ||
# TODO dtype checks | ||
def __init__(self, n_replicas=8, n_steps=500, block_size=20, md_steps=100, md_minimize_steps=100): | ||
''' | ||
Parameters | ||
---------- | ||
n_steps: | ||
number of replica exchange steps | ||
md_steps | ||
number of md steps in each replica exchange step , i.e. the total number of md steps is n_steps * md_steps | ||
md_minimize_steps | ||
in each md step also run this number of minimization steps (these do not count towards the total number of steps?) | ||
''' | ||
self.N_REPLICAS = n_replicas | ||
self.N_STEPS = n_steps | ||
self.BLOCK_SIZE = block_size | ||
self.MD_STEPS = md_steps | ||
self.MD_MINIMIZE_STEPS = md_steps | ||
# print the info on the total MD time: | ||
# NOTE assumes use_big_timestep=True | ||
print(f'Total REMD runtime is: {n_steps * md_steps * 3.5 / 1e6} ns ({n_steps} REMD steps)') | ||
|
||
''' NOTE not used in the script, so commented out | ||
def gen_state_templates(self, index, templates): | ||
n_templates = len(templates) | ||
print((index,n_templates,index%n_templates)) | ||
a = system.subsystem.SubSystemFromPdbFile(templates[index%n_templates]) | ||
#Note that it does not matter which forcefield we use here to build | ||
#as that information is not passed on, it is used for all the same as | ||
#in the setup part of the script | ||
b = system.builder.SystemBuilder(forcefield="ff14sbside") | ||
c = b.build_system([a]) | ||
pos = c._coordinates | ||
c._box_vectors=np.array([0.,0.,0.]) | ||
vel = np.zeros_like(pos) | ||
alpha = index / (self.N_REPLICAS - 1.0) | ||
energy = 0 | ||
return system.state.SystemState(pos, vel, alpha, energy,c._box_vectors) | ||
''' | ||
def gen_state(self, s, index): | ||
state = s.get_state_template() | ||
state.alpha = index / (self.N_REPLICAS - 1.0) | ||
return state | ||
|
||
def setup_system(self, template, denmap, forcefield='ff14sbside', implicit_solvent_model='gbNeck2', temp_min=300., temp_max=300., use_geometric_temp_scaler=False, temp_alpha_range=(0,1), denmap_weight=0.3): | ||
''' | ||
Prepare all necessary files for MELD run | ||
Parameters | ||
---------- | ||
template | ||
path to the model PDB file | ||
denmap | ||
path to the cryo-EM density map in MRC format | ||
denmap_weight | ||
global density map weight when setting up restraints; increase to 0.5-0.5 to have a higher driving force of density fitting | ||
start_temp, end_temp | ||
starting and ending temperature (in K) along the replica ladder of the REMD; both heating and cooling are possible; if start_temp == end_temp, a constant temperature is applied | ||
use_geometric_temp_scaler | ||
if True, the temperature is scaled geometrically (exponentially), otherwise linear scaling is used | ||
temp_alpha_range | ||
... see temp scaler documentation; enables placing constant temperature in some of the replicas and having a gradient in others | ||
TODO | ||
---- | ||
* adding other restraints, e.g. the positional restraints from ModelAngelo model or distance restraints from evolutionary coupling, crosslinks, etc | ||
''' | ||
#template = "./1ake_s.pdb" | ||
#template = './model.pdb' | ||
p = meld.AmberSubSystemFromPdbFile(template) | ||
build_options = meld.AmberOptions( | ||
forcefield=forcefield, | ||
implicit_solvent_model=implicit_solvent_model, | ||
use_big_timestep=True, # 3.5 fs timestep with hydrogen mass repartitioning | ||
cutoff=1.8*u.nanometer | ||
) | ||
builder = meld.AmberSystemBuilder(build_options) | ||
s = builder.build_system([p]).finalize() | ||
|
||
if temp_min == temp_max: | ||
s.temperature_scaler = meld.ConstantTemperatureScaler(temp_min * u.kelvin) | ||
else: | ||
# example: start with a higher temperature to sample more conformations | ||
# TODO alpha_min can be above 0 and alpha_max can be below 1 to have some constant temperature period | ||
# start_temp < end_temp: system is heated | ||
# start_temp > end_temp: system is cooled | ||
if use_geometric_temp_scaler: | ||
TempScalerClass = meld.GeometricTemperatureScaler | ||
else: | ||
TempScalerClass = meld.LinearTemperatureScaler | ||
s.temperature_scaler = TempScalerClass(*temp_alpha_range, temp_min * u.kelvin, temp_max * u.kelvin) | ||
|
||
n_res = s.residue_numbers[-1] | ||
all_atoms=[] | ||
map_restraints = [] | ||
dist_scaler = s.restraints.create_scaler('constant') | ||
blur_scaler = s.restraints.create_scaler( | ||
"linear_blur",alpha_min=0, alpha_max=1, | ||
min_blur=0, max_blur=2, num_replicas=self.N_REPLICAS | ||
) | ||
for i in range(len(s.residue_numbers)): | ||
if 'H' not in s.atom_names[i]: | ||
all_atoms.append(s.index.atom(resid=s.residue_numbers[i],atom_name=s.atom_names[i])) | ||
density_map=s.density.add_density(denmap, blur_scaler, 0, denmap_weight) | ||
r = s.restraints.create_restraint( | ||
"density", | ||
dist_scaler, | ||
LinearRamp(0,100,0,1), | ||
atom=all_atoms, | ||
density=density_map | ||
) | ||
map_restraints.append(r) | ||
s.restraints.add_as_always_active_list(map_restraints) | ||
|
||
|
||
# create the options | ||
# see also: https://github.com/maccallumlab/meld/blob/master/docs/tutorial/getting_started.rst#setting-up-the-system | ||
# run time of a single MD step: step_size (3.5 fs in case of use_big_timestep) * timesteps = 0.35 ps | ||
# i.e. after every 0.35 ps a replica exchange is attempted | ||
options = meld.RunOptions( | ||
timesteps = self.MD_STEPS, | ||
minimize_steps = self.MD_MINIMIZE_STEPS, | ||
) | ||
|
||
# create a store | ||
store = vault.DataStore(self.gen_state(s,0), self.N_REPLICAS, s.get_pdb_writer(), block_size=self.BLOCK_SIZE) | ||
store.initialize(mode='w') | ||
store.save_system(s) | ||
store.save_run_options(options) | ||
|
||
# create and store the remd_runner | ||
l = ladder.NearestNeighborLadder(n_trials=100) | ||
policy = adaptor.AdaptationPolicy(2.0, 50, 50) | ||
a = adaptor.EqualAcceptanceAdaptor(n_replicas=self.N_REPLICAS, adaptation_policy=policy) | ||
|
||
remd_runner = leader.LeaderReplicaExchangeRunner(self.N_REPLICAS, max_steps=self.N_STEPS, ladder=l, adaptor=a) | ||
store.save_remd_runner(remd_runner) | ||
|
||
# create and store the communicator | ||
c = comm.MPICommunicator(s.n_atoms, self.N_REPLICAS) | ||
store.save_communicator(c) | ||
|
||
# create and save the initial states | ||
states = [self.gen_state(s, i) for i in range(self.N_REPLICAS)] | ||
store.save_states(states, 0) | ||
|
||
# save data_store | ||
store.save_data_store() | ||
|
||
def get_cli(): | ||
# defines argument parser and returns commandline arguments | ||
parser = argparse.ArgumentParser(description=__description__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) | ||
parser.add_argument('map', help='Path to the cryo-EM map in MRC format') | ||
parser.add_argument('model', help='Path to the starting model in PDB format; the model needs to be roughly placed into the map density') | ||
parser.add_argument('--n-replicas', default=8, type=int, help='Number of replicas') | ||
parser.add_argument('--n-steps', default=500, type=int, help='Number of steps for each replica') | ||
parser.add_argument('--n-md-steps', default=100, type=int, help='Number of MD steps (3.5 fs) within each REMD step') | ||
parser.add_argument('--n-md-minimize-steps', default=100, type=int, help='Number of minimization steps prior to MD within each REMD step') | ||
parser.add_argument('--map-weight', default=0.3, type=float, help='Weight of the density map restraints; increase to bring up the fitting force') | ||
# NOTE this is not an essential parameter, to keep things simple block_size is always set to be the same as n-steps | ||
#parser.add_argument('--block-size', default=500, type=int, help='Number of steps for saving trajectory') | ||
# NOTE available force fields are listed here: | ||
# https://github.com/maccallumlab/meld/blob/master/docs/tutorial/getting_started.rst | ||
# TODO how to set different force fields for different parts of the model (e.g. protein or nucleic acids?) | ||
parser.add_argument('--force-field', '--ff', default='ff14sbside', choices=('ff12sb', 'ff14sbside', 'ff14sb'), help='Force field to be used in simulation') | ||
# TODO other solvent options? | ||
parser.add_argument('--implicit-solvent-model', '--ism', default='gbNeck2', help='Implicit solvent model for the simulation') | ||
# temperature settings | ||
parser.add_argument('--temp', type=float, default=300., help='Temperature of the system in Kelvins or temperature of the lowest replica if --temp-highest-replica is set') | ||
parser.add_argument('--temp-highest-replica', type=float, help='Temperature in K at the highest replica; if None, then all replicas run at constant temperature equalling start_temp') | ||
parser.add_argument('--geometric-temp-scaler', action='store_true', help='If a temperature is scaled across replicas, scale geometrically (exponentially) rather then linearly') | ||
parser.add_argument('--temp-alpha-range', nargs=2, default=(0.,1.), type=float, help='Alpha values between which the temperature is scaled; allows having replicas with constant temperature as well as replicas that have temperature scaling in one run') | ||
# parse the cli and manually set parameters | ||
args = parser.parse_args() | ||
args.block_size = args.n_steps | ||
if args.temp_highest_replica is None: | ||
args.temp_highest_replica = args.temp | ||
return args | ||
|
||
if __name__ == '__main__': | ||
args = get_cli() | ||
setup_handler = MeldSetup(n_replicas = args.n_replicas, n_steps = args.n_steps, block_size = args.block_size, md_steps = args.n_md_steps, md_minimize_steps = args.n_md_minimize_steps) | ||
setup_handler.setup_system(template = args.model, denmap = args.map, forcefield = args.force_field, implicit_solvent_model=args.implicit_solvent_model, temp_min=args.temp, temp_max=args.temp_highest_replica, use_geometric_temp_scaler=args.geometric_temp_scaler, temp_alpha_range = args.temp_alpha_range, denmap_weight=args.map_weight) | ||
|
Oops, something went wrong.