Skip to content

LoadPDBFileIntoProtein

dstoeckel edited this page Mar 16, 2015 · 2 revisions

How can I load a PDB-file into a BALL Protein?

Load the PDB-file into a BALL::System and get the sequence of the first protein

C++

#include <BALL/FORMAT/PDBFile.h>
#include <BALL/STRUCTURE/peptides.h>
#include <BALL/STRUCTURE/fragmentDB.h>
#include <BALL/KERNEL/system.h>
#include <BALL/KERNEL/chain.h>
#include <BALL/KERNEL/protein.h>

#include <iostream>

using namespace BALL;

...

// read the PDB-file into a BALL::System
PDBFile f("myProtein.pdb");
System S;
f >> S;	

// now we open a fragment database
FragmentDB fragment_db("");

// and normalize the atom names, i.e. we convert different
// naming standards to the PDB naming scheme - just in case!
S.apply(fragment_db.normalize_names);

// now we add any missing hydrogens to the residues
// the data on the hydrogen positions stems from the
// fragment database. However the hydrogen positions
// created in this way are only good estimates
S.apply(fragment_db.add_hydrogens);

// now we create the bonds between the atoms (PDB files hardly
// ever contain a complete set of CONECT records)
S.apply(fragment_db.build_bonds);


// check the first molecule
if (S.getProtein(0))
{
   // cast the system's first molecule to BALL::Protein
   Protein* protein = S.getProtein(0);
   
   // get the protein's sequence
   std::cout << Peptides::GetSequence(*protein) << std::endl;

   // get the number of chains in the protein
   std::cout << "Number of chains in the protein: " << protein->countChains() << std::endl;

   // iterate over all chains
   for (ChainIterator ch_it = protein->beginChain(); +ch_it; ++ch_it)
   {
      // get the number of residues per chain
      std::cout << "Number of residues: " << ch_it->countResidues()  << std::endl;   
   }
}

Python

import sys
from BALL import *

# read the PDB-file into a BALL::System
f = PDBFile(sys.argv[1])
S = System()
f.read(S) 

# now we open a fragment database
fdb = FragmentDB("")

# and normalize the atom names, i.e. we convert different
# naming standards to the PDB naming scheme - just in case!
S.apply(fdb.normalize_names)

# now we add any missing hydrogens to the residues
# the data on the hydrogen positions stems from the
# fragment database. However the hydrogen positions
# created in this way are only good estimates
S.apply(fdb.add_hydrogens)

# now we create the bonds between the atoms (PDB files hardly
# ever contain a complete set of CONECT records)
S.apply(fdb.build_bonds)

# check the first protein
protein = S.getProtein(0)

if (protein != None):			   
    # get the protein's sequence
    print Peptides.GetSequence(protein)

    # get the number of chains in the protein
    print "Number of chains in the protein: " , protein.countChains()

    # iterate over all chains
    for chain in chains(protein):
    # get the number of residues per chain
     print "Number of residues: " , chain.countResidues()   
Clone this wiki locally