Skip to content

Commit

Permalink
almost there
Browse files Browse the repository at this point in the history
  • Loading branch information
danpf committed Sep 13, 2019
1 parent 7c74b18 commit 082afe4
Show file tree
Hide file tree
Showing 7 changed files with 375 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@
[submodule "mmtf_spec"]
path = mmtf_spec
url = https://github.com/rcsb/mmtf
[submodule "pybind11"]
path = pybind11
url = [email protected]:pybind/pybind11.git
Binary file added bindings/4lgr.mmtf
Binary file not shown.
189 changes: 189 additions & 0 deletions bindings/bindings.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@

//#ifndef MMTF_CPP_PYBIND_BINDINGS_HH
//#define MMTF_CPP_PYBIND_BINDINGS_HH

#include <mmtf.hpp>

#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
#include <pybind11/stl.h>
#include <vector>
#include <string>
#include <sstream>

namespace py = pybind11;

std::string
array2str1(py::array const & arr) {
std::vector<double> array_vec(arr.size());
std::memcpy(array_vec.data(),arr.data(),arr.size()*sizeof(double));
std::string ret("w");
return ret;
}

std::string
array2str2(std::vector<float> const & arr) {
//if (arr.empty()) return "[]";
//std::stringstream ss;
//std::string const delim(", ");
//for (float const & f : arr) {
// ss << f << delim;
//}
//std::string ret(ss.str());
//ret.pop_back(); ret.pop_back();
std::string ret("x");
return ret;
}

struct Pet {
Pet(const std::string &name) : name(name) {
for (int i=0; i< 1000000; ++i) {
bdata.push_back(i);
}
}
void setName(const std::string &name_) { name = name_; }
const std::string &getName() const { return name; }

std::string name;
std::vector<float> bdata;
};

template< typename T >
py::array
array1d_from_vector(std::vector<T> & m) {
if (m.empty()) return py::array_t<T>();
std::vector<T>* ptr = new std::vector<T>(std::move(m));
auto capsule = py::capsule(ptr, [](void* p) { delete reinterpret_cast<std::vector<T>*>(p); });
return py::array_t<T>(ptr->size(), // shape of array
ptr->data(), // c-style contiguous strides for Sequence
capsule // numpy array references this parent
);
}


template< typename T >
py::array
array2d_from_vector(std::vector<std::vector<T>> & m) {
if (m.empty()) return py::array_t<T>();
std::vector<std::vector<T>>* ptr = new std::vector<std::vector<T>>(std::move(m));
auto capsule = py::capsule(ptr, [](void* p) { delete reinterpret_cast<std::vector<std::vector<T>>*>(p); });
return py::array_t<T>({ptr->size(), ptr->at(0).size()}, // shape of array
{ptr->size()*ptr->at(0).size()*sizeof(T)}, // c-style contiguous strides for Sequence
capsule // numpy array references this parent
);
}

py::bytes
raw_properties(mmtf::StructureData const & sd) {
std::stringstream bytes;
std::map< std::string, std::map< std::string, msgpack::object > > objs({
{"bondProperties", sd.bondProperties },
{"atomProperties", sd.atomProperties },
{"groupProperties", sd.groupProperties },
{"chainProperties", sd.chainProperties },
{"modelProperties", sd.modelProperties },
{"extraProperties", sd.extraProperties }});
msgpack::pack(bytes, objs);
return py::bytes(bytes.str().data());
}



PYBIND11_MODULE(example, m) {
py::class_<Pet>(m, "Pet")
.def(py::init<const std::string &>())
.def("setName", &Pet::setName)
.def("getName", &Pet::getName)
.def_readwrite("bdata", &Pet::bdata)
.def("bdata2", [](Pet &m) -> py::array {
py::buffer_info buff_info(py::buffer_info(
m.bdata.data(), /* Pointer to buffer */
sizeof(float), /* Size of one scalar */
py::format_descriptor<float>::format(), /* Python struct-style format descriptor */
m.bdata.size() /* Number of dimensions */
));
return py::array(buff_info);
});
m.def("array2str1", &array2str1, "array impl");
m.def("array2str2", &array2str2, "vector impl");
//}
//
//PYBIND11_MODULE(notsure, m) {
// new stuff here
py::class_<mmtf::StructureData>(m, "CPPStructureData")
.def( pybind11::init( [](){ return new mmtf::StructureData(); } ) )
.def( pybind11::init( [](mmtf::StructureData const &o){ return new mmtf::StructureData(o); } ) )
.def_readwrite("mmtfVersion", &mmtf::StructureData::mmtfVersion)
.def_readwrite("mmtfProducer", &mmtf::StructureData::mmtfProducer)
.def("unitCell", [](mmtf::StructureData &m){return array1d_from_vector(m.unitCell);})
.def_readwrite("spaceGroup", &mmtf::StructureData::spaceGroup)
.def_readwrite("structureId", &mmtf::StructureData::structureId)
.def_readwrite("title", &mmtf::StructureData::title)
.def_readwrite("depositionDate", &mmtf::StructureData::depositionDate)
.def_readwrite("releaseDate", &mmtf::StructureData::releaseDate)
//.def("ncsOperatorList", [](mmtf::StructureData &m){return array2d_from_vector(m.ncsOperatorList, 16);})
.def("ncsOperatorList", [](mmtf::StructureData &m){return array2d_from_vector(m.ncsOperatorList);})
.def_readwrite("bioAssemblyList", &mmtf::StructureData::bioAssemblyList)
.def_readwrite("entityList", &mmtf::StructureData::entityList)
.def_readwrite("experimentalMethods", &mmtf::StructureData::experimentalMethods)
.def_readwrite("resolution", &mmtf::StructureData::resolution)
.def_readwrite("rFree", &mmtf::StructureData::rFree)
.def_readwrite("rWork", &mmtf::StructureData::rWork)
.def_readwrite("numBonds", &mmtf::StructureData::numBonds)
.def_readwrite("numAtoms", &mmtf::StructureData::numAtoms)
.def_readwrite("numGroups", &mmtf::StructureData::numGroups)
.def_readwrite("numChains", &mmtf::StructureData::numChains)
.def_readwrite("numModels", &mmtf::StructureData::numModels)
.def_readwrite("groupList", &mmtf::StructureData::groupList)
.def("unitCell", [](mmtf::StructureData &m){return array1d_from_vector(m.unitCell);})
.def("bondAtomList", [](mmtf::StructureData &m){return array1d_from_vector(m.bondAtomList);})
.def("bondOrderList", [](mmtf::StructureData &m){return array1d_from_vector(m.bondOrderList);})
.def("bondResonanceList", [](mmtf::StructureData &m){return array1d_from_vector(m.bondResonanceList);})
.def("xCoordList", [](mmtf::StructureData &m){return array1d_from_vector(m.xCoordList);})
.def("yCoordList", [](mmtf::StructureData &m){return array1d_from_vector(m.yCoordList);})
.def("zCoordList", [](mmtf::StructureData &m){return array1d_from_vector(m.zCoordList);})
.def("bFactorList", [](mmtf::StructureData &m){return array1d_from_vector(m.bFactorList);})
.def("atomIdList", [](mmtf::StructureData &m){return array1d_from_vector(m.atomIdList);})
.def_readwrite("altLocList", &mmtf::StructureData::altLocList)
.def("occupancyList", [](mmtf::StructureData &m){return array1d_from_vector(m.occupancyList);})
.def("groupIdList", [](mmtf::StructureData &m){return array1d_from_vector(m.groupIdList);})
.def("groupTypeList", [](mmtf::StructureData &m){return array1d_from_vector(m.groupTypeList);})
.def("secStructList", [](mmtf::StructureData &m){return array1d_from_vector(m.secStructList);})
.def_readwrite("insCodeList", &mmtf::StructureData::insCodeList)
.def("sequenceIndexList", [](mmtf::StructureData &m){return array1d_from_vector(m.sequenceIndexList);})
.def_readwrite("chainIdList", &mmtf::StructureData::chainIdList)
.def_readwrite("chainNameList", &mmtf::StructureData::chainNameList)
.def("groupsPerChain", [](mmtf::StructureData &m){return array1d_from_vector(m.groupsPerChain);})
.def("chainsPerModel", [](mmtf::StructureData &m){return array1d_from_vector(m.chainsPerModel);})
.def("raw_properties", [](mmtf::StructureData const &m){return raw_properties(m);});
//cl.def_readonly("msgpack_zone", &mmtf::StructureData::msgpack_zone);
//cl.def_readwrite("bondProperties", &mmtf::StructureData::bondProperties);
//cl.def_readwrite("atomProperties", &mmtf::StructureData::atomProperties);
//cl.def_readwrite("groupProperties", &mmtf::StructureData::groupProperties);
//cl.def_readwrite("chainProperties", &mmtf::StructureData::chainProperties);
//cl.def_readwrite("modelProperties", &mmtf::StructureData::modelProperties);
//cl.def_readwrite("extraProperties", &mmtf::StructureData::extraProperties);
m.def("decodeFromFile", &mmtf::decodeFromFile, "decode a mmtf::StructureData from a file");

py::class_<mmtf::BioAssembly>(m, "CPPBioAssembly")
.def( pybind11::init( [](){ return new mmtf::BioAssembly(); } ) )
.def( pybind11::init( [](mmtf::BioAssembly const &o){ return new mmtf::BioAssembly(o); } ) )
.def_readwrite("name", &mmtf::BioAssembly::name);
// TODO ^^ insert transformlist
py::class_<mmtf::Transform>(m, "CPPTransform")
.def( pybind11::init( [](){ return new mmtf::Transform(); } ) )
.def( pybind11::init( [](mmtf::Transform const &o){ return new mmtf::Transform(o); } ) );
/// TODO finish ^^
py::class_<mmtf::GroupType>(m, "CPPGroupType")
.def( pybind11::init( [](){ return new mmtf::GroupType(); } ) )
.def( pybind11::init( [](mmtf::GroupType const &o){ return new mmtf::GroupType(o); } ) );
/// TODO finish ^^
py::class_<mmtf::Entity>(m, "CPPEntity")
.def( pybind11::init( [](){ return new mmtf::Entity(); } ) )
.def( pybind11::init( [](mmtf::Entity const &o){ return new mmtf::Entity(o); } ) );


}


//#endif
7 changes: 7 additions & 0 deletions bindings/check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import numpy as np
import example
import time

p = example.Pet("xx")

m
2 changes: 2 additions & 0 deletions bindings/compile.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
c++ -O3 -Wall -shared -std=c++11 -fPIC `python3 -m pybind11 --includes` -I../pybind11/include -I../msgpack-c/include -I../include bindings.cpp -o example`python3-config --extension-suffix`
python mmtf_t.py
173 changes: 173 additions & 0 deletions bindings/mmtf_t.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@

import time
import numpy as np
import mmtf
import msgpack

import example
from example import CPPStructureData as CPPSD, decodeFromFile

class StructureData:
def __init__(self, file_name=None, file_bytes=None):
if file_name:
self.init_from_file_name(file_name)
elif file_bytes:
self.init_from_raw_bytes(file_bytes)
else:
self.raw_init()

def init_from_file_name(self, file_name: str):
cppsd = CPPSD()
decodeFromFile(cppsd, file_name)
self.init_from_cppsd(cppsd)

def init_from_cppsd(self, cppsd: 'CPPStructureData'):
self.mmtfVersion = cppsd.mmtfVersion
self.mmtfProducer = cppsd.mmtfProducer
self.unitCell = cppsd.unitCell()
self.spaceGroup = cppsd.spaceGroup
self.structureId = cppsd.structureId
self.title = cppsd.title
self.depositionDate = cppsd.depositionDate
self.releaseDate = cppsd.releaseDate
self.ncsOperatorList = cppsd.ncsOperatorList()
# self.bioAssemblyList = cppsd.bioAssemblyList
# self.entityList = cppsd.entityList
self.experimentalMethods = cppsd.experimentalMethods
self.resolution = cppsd.resolution
self.rFree = cppsd.rFree
self.rWork = cppsd.rWork
self.numBonds = cppsd.numBonds
self.numAtoms = cppsd.numAtoms
self.numGroups = cppsd.numGroups
self.numChains = cppsd.numChains
self.numModels = cppsd.numModels
# self.groupList = cppsd.groupList

self.bondAtomList = cppsd.bondAtomList()
self.bondOrderList = cppsd.bondOrderList()
self.bondResonanceList = cppsd.bondResonanceList()
self.xCoordList = cppsd.xCoordList()
self.yCoordList = cppsd.yCoordList()
self.zCoordList = cppsd.zCoordList()
self.bFactorList = cppsd.bFactorList()
self.atomIdList = cppsd.atomIdList()
self.altLocList = cppsd.altLocList
self.occupancyList = cppsd.occupancyList()
# print(type(cppsd.groupIdList()))
self.groupIdList = cppsd.groupIdList()
# print(self.groupIdList)
self.groupIdList = cppsd.groupIdList()
self.groupTypeList = cppsd.groupTypeList()
self.secStructList = cppsd.secStructList()
self.insCodeList = cppsd.insCodeList
self.sequenceIndexList = cppsd.sequenceIndexList()
self.chainIdList = cppsd.chainIdList
self.chainNameList = cppsd.chainNameList
self.groupsPerChain = cppsd.groupsPerChain()
self.chainsPerModel = cppsd.chainsPerModel()


# self.bondAtomList = np.array(cppsd.bondAtomList(), copy=False)
# self.bondOrderList = np.array(cppsd.bondOrderList(), copy=False)
# self.bondResonanceList = np.array(cppsd.bondResonanceList(), copy=False)
# self.xCoordList = np.array(cppsd.xCoordList(), copy=False)
# self.yCoordList = np.array(cppsd.yCoordList(), copy=False)
# self.zCoordList = np.array(cppsd.zCoordList(), copy=False)
# self.bFactorList = np.array(cppsd.bFactorList(), copy=False)
# self.atomIdList = np.array(cppsd.atomIdList(), copy=False)
# self.altLocList = np.array(cppsd.altLocList, copy=False)
# self.occupancyList = np.array(cppsd.occupancyList(), copy=False)
# self.groupIdList = np.array(cppsd.groupIdList(), copy=False)
# # print(self.groupIdList)
# self.groupIdList = np.array(cppsd.groupIdList(), copy=False)
# self.groupTypeList = np.array(cppsd.groupTypeList(), copy=False)
# self.secStructList = np.array(cppsd.secStructList(), copy=False)
# self.insCodeList = np.array(cppsd.insCodeList, copy=False)
# self.sequenceIndexList = np.array(cppsd.sequenceIndexList(), copy=False)
# self.chainIdList = np.array(cppsd.chainIdList, copy=False)
# self.chainNameList = np.array(cppsd.chainNameList, copy=False)
# self.groupsPerChain = np.array(cppsd.groupsPerChain(), copy=False)
# self.chainsPerModel = np.array(cppsd.chainsPerModel(), copy=False)

raw_properties = cppsd.raw_properties()
raw_properties = msgpack.unpackb(raw_properties, raw=False)
# print(type(raw_properties), len(raw_properties))
self.bondProperties = raw_properties["bondProperties"]
self.atomProperties = raw_properties["atomProperties"]
self.groupProperties = raw_properties["groupProperties"]
self.chainProperties = raw_properties["chainProperties"]
self.modelProperties = raw_properties["modelProperties"]
self.extraProperties = raw_properties["extraProperties"]

def raw_init(self):
self.mmtfVersion = None
self.mmtfProducer = None
self.unitCell = None
self.spaceGroup = None
self.structureId = None
self.title = None
self.depositionDate = None
self.releaseDate = None
self.ncsOperatorList = None
self.bioAssemblyList = None
self.entityList = None
self.experimentalMethods = None
self.resolution = None
self.rFree = None
self.rWork = None
self.numBonds = None
self.numAtoms = None
self.numGroups = None
self.numChains = None
self.numModels = None
self.groupList = None
self.bondAtomList = None
self.bondOrderList = None
self.bondResonanceList = None
self.xCoordList = None
self.yCoordList = None
self.zCoordList = None
self.bFactorList = None
self.atomIdList = None
self.altLocList = None
self.occupancyList = None
self.groupIdList = None
self.groupTypeList = None
self.secStructList = None
self.insCodeList = None
self.sequenceIndexList = None
self.chainIdList = None
self.chainNameList = None
self.groupsPerChain = None
self.chainsPerModel = None
self.bondProperties = None
self.atomProperties = None
self.groupProperties = None
self.chainProperties = None
self.modelProperties = None
self.extraProperties = None



start = time.time()
for x in range(10000):
sd = StructureData("4lgr.mmtf")
stop = time.time()
python_t = stop-start
print("python", python_t)

start = time.time()
for x in range(1000):
sd = mmtf.parse("4lgr.mmtf")
stop = time.time()
python_t = stop-start
print("python og", python_t)

start = time.time()
for x in range(10000):
cppsd = CPPSD()
decodeFromFile(cppsd, "4lgr.mmtf")
stop = time.time()
cpp_t = stop-start
print("cpp", cpp_t)
1 change: 1 addition & 0 deletions pybind11
Submodule pybind11 added at f6c4c1

0 comments on commit 082afe4

Please sign in to comment.