Skip to content
dstoeckel edited this page Feb 20, 2015 · 5 revisions

How to get the 1 letter code AS sequence out of a .pdb file?

#include <BALL/KERNEL/chain.h>
#include <BALL/KERNEL/protein.h>
#include <BALL/KERNEL/system.h>
#include <BALL/STRUCTURE/peptides.h>
#include <BALL/FORMAT/PDBFile.h>

using namespace BALL;

//read in .pdb file containing protein
PDBFile infile("system.pdb");
System S;
infile >> S;
infile.close();

// get the BALL sequence
String ball_seq= "";

// get the protein
if (RTTI::isKindOf<Protein>(*(S.getMolecule(0))))
{
  //we take the first chain for illustration
  Protein* protein = RTTI::castTo<Protein>(*(S.getMolecule(0)));
  ChainIterator ch_it = protein->beginChain();
  for (ResidueIterator r_it = ch_it->beginResidue(); +r_it; r_it++)
  {
    if ((r_it->isAminoAcid()) && (r_it->getName() != String("ACE")))
    {
      ball_seq += Peptides::OneLetterCode(r_it->getName());
    }
  }
}
cout << "One letter code: " << ball_seq << endl;

A possible alternative would look like this:

#include <BALL/FORMAT/PDBFile.h>
#include <BALL/KERNEL/system.h>
#include <BALL/STRUCTURE/peptides.h>

#include <iostream>

using namespace BALL;

int main(int argc, char* argv[])
{
  PDBFile in(argv[1]);
  if(!in) {
    return -1;
  }
  System sys;
  in >> sys;
  in.close();
  for(ProteinIterator it = sys.beginProtein(); +it; ++it) {
    String seq = Peptides::GetSequence(*it);
    std::cout << "Sequence of protein "
              << it->getName() << ": " << seq << std::endl;
  }
  return 0;
}
Clone this wiki locally