pyrosetta© Karobben

pyrosetta

Pyrosetta

Loop Regenerate

from pyrosetta import init, Pose, get_fa_scorefxn
from pyrosetta.rosetta.protocols.loops import Loops, Loop
from pyrosetta.rosetta.protocols.loops.loop_mover.perturb import LoopMover_Perturb_KIC
from pyrosetta.rosetta.protocols.loops.loop_mover.refine import LoopMover_Refine_KIC, LoopMover_Refine_CCD

#from pyrosetta.rosetta.core.import_pose import pose_from_pdbstring as pose_from_pdb
from pyrosetta import pose_from_pdb

# 1. Initialize PyRosetta
init()

# 2. Load Your Protein Pose
pose = pose_from_pdb( "test.pdb")

n_chains = pose.num_chains()
for chain_index in range(1, n_chains+1):
start_res = pose.chain_begin(chain_index)
end_res = pose.chain_end(chain_index)
print(f"Chain {chain_index}: residues {start_res} to {end_res}")

loops_by_chain = {}

# Iterate over chains
n_chains = pose.num_chains()
for chain_index in range(1, n_chains+1):
start_res = pose.chain_begin(chain_index)
end_res = pose.chain_end(chain_index)

# Extract secondary structure substring for this chain
chain_secstruct = secstruct[start_res-1:end_res]

loop_regions = []
current_loop_start = None

# Identify loop regions as stretches of 'L'
for i, s in enumerate(chain_secstruct, start=start_res):
if s == 'L':
if current_loop_start is None:
current_loop_start = i
else:
if current_loop_start is not None:
loop_regions.append((current_loop_start, i-1))
current_loop_start = None

# Check if a loop extends to the end of the chain
if current_loop_start is not None:
loop_regions.append((current_loop_start, end_res))

# Extract sequences for each loop region
# Store them in a dictionary keyed by chain index
chain_loops = []
for (loop_start, loop_end) in loop_regions:
# Extract the sequence of the loop
loop_seq = "".join([pose.residue(r).name1() for r in range(loop_start, loop_end+1)])
chain_loops.append({
"start": loop_start,
"end": loop_end,
"sequence": loop_seq
})

loops_by_chain[chain_index] = chain_loops

n_chains = pose.num_chains()
for chain_index in range(1, n_chains+1):
start_res = pose.chain_begin(chain_index)
end_res = pose.chain_end(chain_index)
print(f"Chain {chain_index}: residues {start_res} to {end_res}")

# 3. Define the Loop(s) You Want to Remodel
# Suppose you want to remodel the loop from residues 45 to 55.
# Choose a cut point (ideally inside the loop), typically near the middle.
loop_start = 593
loop_end = 608
cutpoint = 601

loops = Loops()
loops.add_loop( Loop(loop_start, loop_end, cutpoint) )

# 4. Set Up a Scorefunction
scorefxn = get_fa_scorefxn()

# 5. Set Up the Loop Remodeling Protocol
# You have multiple options:
# Example: Use KIC Perturb and then Refine
perturb_mover = LoopMover_Perturb_KIC(loops)
perturb_mover.set_scorefxn(scorefxn)

refine_mover = LoopMover_Refine_KIC(loops)
refine_mover.set_scorefxn(scorefxn)

# Alternatively, you might use CCD refinement:
# refine_mover = LoopMover_Refine_CCD(loops)
# refine_mover.set_scorefxn(scorefxn)

# 6. Optionally: Set Up Monte Carlo or Repeats
# Often you do multiple trials and pick the best model.

# 7. Apply the Movers
# First do perturbation
perturb_mover.apply(pose)

# Then refine
refine_mover.apply(pose)

# After this, you should have a remodeled loop region.
# You can save the resulting structure to a PDB file:
pose.dump_pdb("remodeled_loop.pdb")
Raw loop Raw loop
Predicted loop Predicted loop by ussing ImmuneBuilder. The Predicted results has some trouble in the CDRH3 region. And if we place it in the corrected position and it has crush.
Reconstructed loop Rosetta reconstructed loop by using the code above. Rosetta takes lots of time to reconstruct the loop and the result is terrible. The loop inseted into a very wired and unlikly position

How to check the Chain and the number of residues

from pyrosetta import init, pose_from_pdb

# 1. Initialize PyRosetta
init()
# 2. Load Your Protein Pose
pose = pose_from_pdb( "data/14-1_ImmuneCorrect.pdb")

# 3. Count and print the result

n_chains = pose.num_chains()
for chain_index in range(1, n_chains+1):
start_res = pose.chain_begin(chain_index)
end_res = pose.chain_end(chain_index)
print(f"Chain {chain_index}: residues {start_res} to {end_res}")
Chain 1: residues 1 to 322
Chain 2: residues 323 to 493
Chain 3: residues 494 to 620
Chain 4: residues 621 to 729

Get the Second Structure

from pyrosetta import init, pose_from_pdb
from pyrosetta.rosetta.core.scoring.dssp import Dssp

# 1. Initialize PyRosetta
init()
# 2. Load Your Protein Pose
pose = pose_from_pdb( "data/14-1_ImmuneCorrect.pdb")

# Run DSSP to get secondary structure
dssp = Dssp(pose)
secstruct = dssp.get_dssp_reduced_IG_as_L_secstruct()
LLEEEEELELLLLLLEEEELLEEEEEELLEEELEELLLLLLEEEELLELLEELLLELHHHHHHLLLLLLLL
LLLLLLLLEEELLLLLELLLLLLLELLHHHHHHHLLLELLLEEEELLLLLLLLLLEELLLLEHLHLLLLLLE
LLLLEEEEEELLLLLLLEEEEEELLLLLLEEEEEEEEELLLHHHHHHHHLLLLLLEEEEELLLEEEELLLLL
LLLLLLLLLLEEEEEEEEELLLLEEEEEELLLEEEELEEEELLELLLLLEEELLLLEEEEEELEELLLLLEL...

What does it mean?

  • H: Alpha-Helix
  • E: Beta-Strand
  • L: Loop or Irregular Region
Author

Karobben

Posted on

2024-12-20

Updated on

2024-12-21

Licensed under

Comments