from pyrosetta import init, Pose, get_fa_scorefxn from pyrosetta.rosetta.protocols.loops import Loops, Loop from pyrosetta.rosetta.protocols.loops.loop_mover.perturb import LoopMover_Perturb_KIC from pyrosetta.rosetta.protocols.loops.loop_mover.refine import LoopMover_Refine_KIC, LoopMover_Refine_CCD
#from pyrosetta.rosetta.core.import_pose import pose_from_pdbstring as pose_from_pdb from pyrosetta import pose_from_pdb
# 1. Initialize PyRosetta init()
# 2. Load Your Protein Pose pose = pose_from_pdb( "test.pdb")
n_chains = pose.num_chains() for chain_index inrange(1, n_chains+1): start_res = pose.chain_begin(chain_index) end_res = pose.chain_end(chain_index) print(f"Chain {chain_index}: residues {start_res} to {end_res}")
loops_by_chain = {}
# Iterate over chains n_chains = pose.num_chains() for chain_index inrange(1, n_chains+1): start_res = pose.chain_begin(chain_index) end_res = pose.chain_end(chain_index)
# Extract secondary structure substring for this chain chain_secstruct = secstruct[start_res-1:end_res]
loop_regions = [] current_loop_start = None
# Identify loop regions as stretches of 'L' for i, s inenumerate(chain_secstruct, start=start_res): if s == 'L': if current_loop_start isNone: current_loop_start = i else: if current_loop_start isnotNone: loop_regions.append((current_loop_start, i-1)) current_loop_start = None
# Check if a loop extends to the end of the chain if current_loop_start isnotNone: loop_regions.append((current_loop_start, end_res))
# Extract sequences for each loop region # Store them in a dictionary keyed by chain index chain_loops = [] for (loop_start, loop_end) in loop_regions: # Extract the sequence of the loop loop_seq = "".join([pose.residue(r).name1() for r inrange(loop_start, loop_end+1)]) chain_loops.append({ "start": loop_start, "end": loop_end, "sequence": loop_seq })
loops_by_chain[chain_index] = chain_loops
n_chains = pose.num_chains() for chain_index inrange(1, n_chains+1): start_res = pose.chain_begin(chain_index) end_res = pose.chain_end(chain_index) print(f"Chain {chain_index}: residues {start_res} to {end_res}")
# 3. Define the Loop(s) You Want to Remodel # Suppose you want to remodel the loop from residues 45 to 55. # Choose a cut point (ideally inside the loop), typically near the middle. loop_start = 593 loop_end = 608 cutpoint = 601
# 4. Set Up a Scorefunction scorefxn = get_fa_scorefxn()
# 5. Set Up the Loop Remodeling Protocol # You have multiple options: # Example: Use KIC Perturb and then Refine perturb_mover = LoopMover_Perturb_KIC(loops) perturb_mover.set_scorefxn(scorefxn)
# Alternatively, you might use CCD refinement: # refine_mover = LoopMover_Refine_CCD(loops) # refine_mover.set_scorefxn(scorefxn)
# 6. Optionally: Set Up Monte Carlo or Repeats # Often you do multiple trials and pick the best model.
# 7. Apply the Movers # First do perturbation perturb_mover.apply(pose)
# Then refine refine_mover.apply(pose)
# After this, you should have a remodeled loop region. # You can save the resulting structure to a PDB file: pose.dump_pdb("remodeled_loop.pdb")
Raw loop
Predicted loop by ussing ImmuneBuilder. The Predicted results has some trouble in the CDRH3 region. And if we place it in the corrected position and it has crush.
Rosetta reconstructed loop by using the code above. Rosetta takes lots of time to reconstruct the loop and the result is terrible. The loop inseted into a very wired and unlikly position
How to check the Chain and the number of residues
from pyrosetta import init, pose_from_pdb
# 1. Initialize PyRosetta init() # 2. Load Your Protein Pose pose = pose_from_pdb( "data/14-1_ImmuneCorrect.pdb")
# 3. Count and print the result
n_chains = pose.num_chains() for chain_index inrange(1, n_chains+1): start_res = pose.chain_begin(chain_index) end_res = pose.chain_end(chain_index) print(f"Chain {chain_index}: residues {start_res} to {end_res}")
Chain 1: residues 1 to 322
Chain 2: residues 323 to 493
Chain 3: residues 494 to 620
Chain 4: residues 621 to 729
Get the Second Structure
from pyrosetta import init, pose_from_pdb from pyrosetta.rosetta.core.scoring.dssp import Dssp
# 1. Initialize PyRosetta init() # 2. Load Your Protein Pose pose = pose_from_pdb( "data/14-1_ImmuneCorrect.pdb")
# Run DSSP to get secondary structure dssp = Dssp(pose) secstruct = dssp.get_dssp_reduced_IG_as_L_secstruct()