forked from rcsb/mmtf-cpp
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
375 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,3 +7,6 @@ | |
[submodule "mmtf_spec"] | ||
path = mmtf_spec | ||
url = https://github.com/rcsb/mmtf | ||
[submodule "pybind11"] | ||
path = pybind11 | ||
url = [email protected]:pybind/pybind11.git |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,189 @@ | ||
|
||
//#ifndef MMTF_CPP_PYBIND_BINDINGS_HH | ||
//#define MMTF_CPP_PYBIND_BINDINGS_HH | ||
|
||
#include <mmtf.hpp> | ||
|
||
#include <pybind11/pybind11.h> | ||
#include <pybind11/numpy.h> | ||
#include <pybind11/stl.h> | ||
#include <vector> | ||
#include <string> | ||
#include <sstream> | ||
|
||
namespace py = pybind11; | ||
|
||
std::string | ||
array2str1(py::array const & arr) { | ||
std::vector<double> array_vec(arr.size()); | ||
std::memcpy(array_vec.data(),arr.data(),arr.size()*sizeof(double)); | ||
std::string ret("w"); | ||
return ret; | ||
} | ||
|
||
std::string | ||
array2str2(std::vector<float> const & arr) { | ||
//if (arr.empty()) return "[]"; | ||
//std::stringstream ss; | ||
//std::string const delim(", "); | ||
//for (float const & f : arr) { | ||
// ss << f << delim; | ||
//} | ||
//std::string ret(ss.str()); | ||
//ret.pop_back(); ret.pop_back(); | ||
std::string ret("x"); | ||
return ret; | ||
} | ||
|
||
struct Pet { | ||
Pet(const std::string &name) : name(name) { | ||
for (int i=0; i< 1000000; ++i) { | ||
bdata.push_back(i); | ||
} | ||
} | ||
void setName(const std::string &name_) { name = name_; } | ||
const std::string &getName() const { return name; } | ||
|
||
std::string name; | ||
std::vector<float> bdata; | ||
}; | ||
|
||
template< typename T > | ||
py::array | ||
array1d_from_vector(std::vector<T> & m) { | ||
if (m.empty()) return py::array_t<T>(); | ||
std::vector<T>* ptr = new std::vector<T>(std::move(m)); | ||
auto capsule = py::capsule(ptr, [](void* p) { delete reinterpret_cast<std::vector<T>*>(p); }); | ||
return py::array_t<T>(ptr->size(), // shape of array | ||
ptr->data(), // c-style contiguous strides for Sequence | ||
capsule // numpy array references this parent | ||
); | ||
} | ||
|
||
|
||
template< typename T > | ||
py::array | ||
array2d_from_vector(std::vector<std::vector<T>> & m) { | ||
if (m.empty()) return py::array_t<T>(); | ||
std::vector<std::vector<T>>* ptr = new std::vector<std::vector<T>>(std::move(m)); | ||
auto capsule = py::capsule(ptr, [](void* p) { delete reinterpret_cast<std::vector<std::vector<T>>*>(p); }); | ||
return py::array_t<T>({ptr->size(), ptr->at(0).size()}, // shape of array | ||
{ptr->size()*ptr->at(0).size()*sizeof(T)}, // c-style contiguous strides for Sequence | ||
capsule // numpy array references this parent | ||
); | ||
} | ||
|
||
py::bytes | ||
raw_properties(mmtf::StructureData const & sd) { | ||
std::stringstream bytes; | ||
std::map< std::string, std::map< std::string, msgpack::object > > objs({ | ||
{"bondProperties", sd.bondProperties }, | ||
{"atomProperties", sd.atomProperties }, | ||
{"groupProperties", sd.groupProperties }, | ||
{"chainProperties", sd.chainProperties }, | ||
{"modelProperties", sd.modelProperties }, | ||
{"extraProperties", sd.extraProperties }}); | ||
msgpack::pack(bytes, objs); | ||
return py::bytes(bytes.str().data()); | ||
} | ||
|
||
|
||
|
||
PYBIND11_MODULE(example, m) { | ||
py::class_<Pet>(m, "Pet") | ||
.def(py::init<const std::string &>()) | ||
.def("setName", &Pet::setName) | ||
.def("getName", &Pet::getName) | ||
.def_readwrite("bdata", &Pet::bdata) | ||
.def("bdata2", [](Pet &m) -> py::array { | ||
py::buffer_info buff_info(py::buffer_info( | ||
m.bdata.data(), /* Pointer to buffer */ | ||
sizeof(float), /* Size of one scalar */ | ||
py::format_descriptor<float>::format(), /* Python struct-style format descriptor */ | ||
m.bdata.size() /* Number of dimensions */ | ||
)); | ||
return py::array(buff_info); | ||
}); | ||
m.def("array2str1", &array2str1, "array impl"); | ||
m.def("array2str2", &array2str2, "vector impl"); | ||
//} | ||
// | ||
//PYBIND11_MODULE(notsure, m) { | ||
// new stuff here | ||
py::class_<mmtf::StructureData>(m, "CPPStructureData") | ||
.def( pybind11::init( [](){ return new mmtf::StructureData(); } ) ) | ||
.def( pybind11::init( [](mmtf::StructureData const &o){ return new mmtf::StructureData(o); } ) ) | ||
.def_readwrite("mmtfVersion", &mmtf::StructureData::mmtfVersion) | ||
.def_readwrite("mmtfProducer", &mmtf::StructureData::mmtfProducer) | ||
.def("unitCell", [](mmtf::StructureData &m){return array1d_from_vector(m.unitCell);}) | ||
.def_readwrite("spaceGroup", &mmtf::StructureData::spaceGroup) | ||
.def_readwrite("structureId", &mmtf::StructureData::structureId) | ||
.def_readwrite("title", &mmtf::StructureData::title) | ||
.def_readwrite("depositionDate", &mmtf::StructureData::depositionDate) | ||
.def_readwrite("releaseDate", &mmtf::StructureData::releaseDate) | ||
//.def("ncsOperatorList", [](mmtf::StructureData &m){return array2d_from_vector(m.ncsOperatorList, 16);}) | ||
.def("ncsOperatorList", [](mmtf::StructureData &m){return array2d_from_vector(m.ncsOperatorList);}) | ||
.def_readwrite("bioAssemblyList", &mmtf::StructureData::bioAssemblyList) | ||
.def_readwrite("entityList", &mmtf::StructureData::entityList) | ||
.def_readwrite("experimentalMethods", &mmtf::StructureData::experimentalMethods) | ||
.def_readwrite("resolution", &mmtf::StructureData::resolution) | ||
.def_readwrite("rFree", &mmtf::StructureData::rFree) | ||
.def_readwrite("rWork", &mmtf::StructureData::rWork) | ||
.def_readwrite("numBonds", &mmtf::StructureData::numBonds) | ||
.def_readwrite("numAtoms", &mmtf::StructureData::numAtoms) | ||
.def_readwrite("numGroups", &mmtf::StructureData::numGroups) | ||
.def_readwrite("numChains", &mmtf::StructureData::numChains) | ||
.def_readwrite("numModels", &mmtf::StructureData::numModels) | ||
.def_readwrite("groupList", &mmtf::StructureData::groupList) | ||
.def("unitCell", [](mmtf::StructureData &m){return array1d_from_vector(m.unitCell);}) | ||
.def("bondAtomList", [](mmtf::StructureData &m){return array1d_from_vector(m.bondAtomList);}) | ||
.def("bondOrderList", [](mmtf::StructureData &m){return array1d_from_vector(m.bondOrderList);}) | ||
.def("bondResonanceList", [](mmtf::StructureData &m){return array1d_from_vector(m.bondResonanceList);}) | ||
.def("xCoordList", [](mmtf::StructureData &m){return array1d_from_vector(m.xCoordList);}) | ||
.def("yCoordList", [](mmtf::StructureData &m){return array1d_from_vector(m.yCoordList);}) | ||
.def("zCoordList", [](mmtf::StructureData &m){return array1d_from_vector(m.zCoordList);}) | ||
.def("bFactorList", [](mmtf::StructureData &m){return array1d_from_vector(m.bFactorList);}) | ||
.def("atomIdList", [](mmtf::StructureData &m){return array1d_from_vector(m.atomIdList);}) | ||
.def_readwrite("altLocList", &mmtf::StructureData::altLocList) | ||
.def("occupancyList", [](mmtf::StructureData &m){return array1d_from_vector(m.occupancyList);}) | ||
.def("groupIdList", [](mmtf::StructureData &m){return array1d_from_vector(m.groupIdList);}) | ||
.def("groupTypeList", [](mmtf::StructureData &m){return array1d_from_vector(m.groupTypeList);}) | ||
.def("secStructList", [](mmtf::StructureData &m){return array1d_from_vector(m.secStructList);}) | ||
.def_readwrite("insCodeList", &mmtf::StructureData::insCodeList) | ||
.def("sequenceIndexList", [](mmtf::StructureData &m){return array1d_from_vector(m.sequenceIndexList);}) | ||
.def_readwrite("chainIdList", &mmtf::StructureData::chainIdList) | ||
.def_readwrite("chainNameList", &mmtf::StructureData::chainNameList) | ||
.def("groupsPerChain", [](mmtf::StructureData &m){return array1d_from_vector(m.groupsPerChain);}) | ||
.def("chainsPerModel", [](mmtf::StructureData &m){return array1d_from_vector(m.chainsPerModel);}) | ||
.def("raw_properties", [](mmtf::StructureData const &m){return raw_properties(m);}); | ||
//cl.def_readonly("msgpack_zone", &mmtf::StructureData::msgpack_zone); | ||
//cl.def_readwrite("bondProperties", &mmtf::StructureData::bondProperties); | ||
//cl.def_readwrite("atomProperties", &mmtf::StructureData::atomProperties); | ||
//cl.def_readwrite("groupProperties", &mmtf::StructureData::groupProperties); | ||
//cl.def_readwrite("chainProperties", &mmtf::StructureData::chainProperties); | ||
//cl.def_readwrite("modelProperties", &mmtf::StructureData::modelProperties); | ||
//cl.def_readwrite("extraProperties", &mmtf::StructureData::extraProperties); | ||
m.def("decodeFromFile", &mmtf::decodeFromFile, "decode a mmtf::StructureData from a file"); | ||
|
||
py::class_<mmtf::BioAssembly>(m, "CPPBioAssembly") | ||
.def( pybind11::init( [](){ return new mmtf::BioAssembly(); } ) ) | ||
.def( pybind11::init( [](mmtf::BioAssembly const &o){ return new mmtf::BioAssembly(o); } ) ) | ||
.def_readwrite("name", &mmtf::BioAssembly::name); | ||
// TODO ^^ insert transformlist | ||
py::class_<mmtf::Transform>(m, "CPPTransform") | ||
.def( pybind11::init( [](){ return new mmtf::Transform(); } ) ) | ||
.def( pybind11::init( [](mmtf::Transform const &o){ return new mmtf::Transform(o); } ) ); | ||
/// TODO finish ^^ | ||
py::class_<mmtf::GroupType>(m, "CPPGroupType") | ||
.def( pybind11::init( [](){ return new mmtf::GroupType(); } ) ) | ||
.def( pybind11::init( [](mmtf::GroupType const &o){ return new mmtf::GroupType(o); } ) ); | ||
/// TODO finish ^^ | ||
py::class_<mmtf::Entity>(m, "CPPEntity") | ||
.def( pybind11::init( [](){ return new mmtf::Entity(); } ) ) | ||
.def( pybind11::init( [](mmtf::Entity const &o){ return new mmtf::Entity(o); } ) ); | ||
|
||
|
||
} | ||
|
||
|
||
//#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
import numpy as np | ||
import example | ||
import time | ||
|
||
p = example.Pet("xx") | ||
|
||
m |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
c++ -O3 -Wall -shared -std=c++11 -fPIC `python3 -m pybind11 --includes` -I../pybind11/include -I../msgpack-c/include -I../include bindings.cpp -o example`python3-config --extension-suffix` | ||
python mmtf_t.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
|
||
import time | ||
import numpy as np | ||
import mmtf | ||
import msgpack | ||
|
||
import example | ||
from example import CPPStructureData as CPPSD, decodeFromFile | ||
|
||
class StructureData: | ||
def __init__(self, file_name=None, file_bytes=None): | ||
if file_name: | ||
self.init_from_file_name(file_name) | ||
elif file_bytes: | ||
self.init_from_raw_bytes(file_bytes) | ||
else: | ||
self.raw_init() | ||
|
||
def init_from_file_name(self, file_name: str): | ||
cppsd = CPPSD() | ||
decodeFromFile(cppsd, file_name) | ||
self.init_from_cppsd(cppsd) | ||
|
||
def init_from_cppsd(self, cppsd: 'CPPStructureData'): | ||
self.mmtfVersion = cppsd.mmtfVersion | ||
self.mmtfProducer = cppsd.mmtfProducer | ||
self.unitCell = cppsd.unitCell() | ||
self.spaceGroup = cppsd.spaceGroup | ||
self.structureId = cppsd.structureId | ||
self.title = cppsd.title | ||
self.depositionDate = cppsd.depositionDate | ||
self.releaseDate = cppsd.releaseDate | ||
self.ncsOperatorList = cppsd.ncsOperatorList() | ||
# self.bioAssemblyList = cppsd.bioAssemblyList | ||
# self.entityList = cppsd.entityList | ||
self.experimentalMethods = cppsd.experimentalMethods | ||
self.resolution = cppsd.resolution | ||
self.rFree = cppsd.rFree | ||
self.rWork = cppsd.rWork | ||
self.numBonds = cppsd.numBonds | ||
self.numAtoms = cppsd.numAtoms | ||
self.numGroups = cppsd.numGroups | ||
self.numChains = cppsd.numChains | ||
self.numModels = cppsd.numModels | ||
# self.groupList = cppsd.groupList | ||
|
||
self.bondAtomList = cppsd.bondAtomList() | ||
self.bondOrderList = cppsd.bondOrderList() | ||
self.bondResonanceList = cppsd.bondResonanceList() | ||
self.xCoordList = cppsd.xCoordList() | ||
self.yCoordList = cppsd.yCoordList() | ||
self.zCoordList = cppsd.zCoordList() | ||
self.bFactorList = cppsd.bFactorList() | ||
self.atomIdList = cppsd.atomIdList() | ||
self.altLocList = cppsd.altLocList | ||
self.occupancyList = cppsd.occupancyList() | ||
# print(type(cppsd.groupIdList())) | ||
self.groupIdList = cppsd.groupIdList() | ||
# print(self.groupIdList) | ||
self.groupIdList = cppsd.groupIdList() | ||
self.groupTypeList = cppsd.groupTypeList() | ||
self.secStructList = cppsd.secStructList() | ||
self.insCodeList = cppsd.insCodeList | ||
self.sequenceIndexList = cppsd.sequenceIndexList() | ||
self.chainIdList = cppsd.chainIdList | ||
self.chainNameList = cppsd.chainNameList | ||
self.groupsPerChain = cppsd.groupsPerChain() | ||
self.chainsPerModel = cppsd.chainsPerModel() | ||
|
||
|
||
# self.bondAtomList = np.array(cppsd.bondAtomList(), copy=False) | ||
# self.bondOrderList = np.array(cppsd.bondOrderList(), copy=False) | ||
# self.bondResonanceList = np.array(cppsd.bondResonanceList(), copy=False) | ||
# self.xCoordList = np.array(cppsd.xCoordList(), copy=False) | ||
# self.yCoordList = np.array(cppsd.yCoordList(), copy=False) | ||
# self.zCoordList = np.array(cppsd.zCoordList(), copy=False) | ||
# self.bFactorList = np.array(cppsd.bFactorList(), copy=False) | ||
# self.atomIdList = np.array(cppsd.atomIdList(), copy=False) | ||
# self.altLocList = np.array(cppsd.altLocList, copy=False) | ||
# self.occupancyList = np.array(cppsd.occupancyList(), copy=False) | ||
# self.groupIdList = np.array(cppsd.groupIdList(), copy=False) | ||
# # print(self.groupIdList) | ||
# self.groupIdList = np.array(cppsd.groupIdList(), copy=False) | ||
# self.groupTypeList = np.array(cppsd.groupTypeList(), copy=False) | ||
# self.secStructList = np.array(cppsd.secStructList(), copy=False) | ||
# self.insCodeList = np.array(cppsd.insCodeList, copy=False) | ||
# self.sequenceIndexList = np.array(cppsd.sequenceIndexList(), copy=False) | ||
# self.chainIdList = np.array(cppsd.chainIdList, copy=False) | ||
# self.chainNameList = np.array(cppsd.chainNameList, copy=False) | ||
# self.groupsPerChain = np.array(cppsd.groupsPerChain(), copy=False) | ||
# self.chainsPerModel = np.array(cppsd.chainsPerModel(), copy=False) | ||
|
||
raw_properties = cppsd.raw_properties() | ||
raw_properties = msgpack.unpackb(raw_properties, raw=False) | ||
# print(type(raw_properties), len(raw_properties)) | ||
self.bondProperties = raw_properties["bondProperties"] | ||
self.atomProperties = raw_properties["atomProperties"] | ||
self.groupProperties = raw_properties["groupProperties"] | ||
self.chainProperties = raw_properties["chainProperties"] | ||
self.modelProperties = raw_properties["modelProperties"] | ||
self.extraProperties = raw_properties["extraProperties"] | ||
|
||
def raw_init(self): | ||
self.mmtfVersion = None | ||
self.mmtfProducer = None | ||
self.unitCell = None | ||
self.spaceGroup = None | ||
self.structureId = None | ||
self.title = None | ||
self.depositionDate = None | ||
self.releaseDate = None | ||
self.ncsOperatorList = None | ||
self.bioAssemblyList = None | ||
self.entityList = None | ||
self.experimentalMethods = None | ||
self.resolution = None | ||
self.rFree = None | ||
self.rWork = None | ||
self.numBonds = None | ||
self.numAtoms = None | ||
self.numGroups = None | ||
self.numChains = None | ||
self.numModels = None | ||
self.groupList = None | ||
self.bondAtomList = None | ||
self.bondOrderList = None | ||
self.bondResonanceList = None | ||
self.xCoordList = None | ||
self.yCoordList = None | ||
self.zCoordList = None | ||
self.bFactorList = None | ||
self.atomIdList = None | ||
self.altLocList = None | ||
self.occupancyList = None | ||
self.groupIdList = None | ||
self.groupTypeList = None | ||
self.secStructList = None | ||
self.insCodeList = None | ||
self.sequenceIndexList = None | ||
self.chainIdList = None | ||
self.chainNameList = None | ||
self.groupsPerChain = None | ||
self.chainsPerModel = None | ||
self.bondProperties = None | ||
self.atomProperties = None | ||
self.groupProperties = None | ||
self.chainProperties = None | ||
self.modelProperties = None | ||
self.extraProperties = None | ||
|
||
|
||
|
||
start = time.time() | ||
for x in range(10000): | ||
sd = StructureData("4lgr.mmtf") | ||
stop = time.time() | ||
python_t = stop-start | ||
print("python", python_t) | ||
|
||
start = time.time() | ||
for x in range(1000): | ||
sd = mmtf.parse("4lgr.mmtf") | ||
stop = time.time() | ||
python_t = stop-start | ||
print("python og", python_t) | ||
|
||
start = time.time() | ||
for x in range(10000): | ||
cppsd = CPPSD() | ||
decodeFromFile(cppsd, "4lgr.mmtf") | ||
stop = time.time() | ||
cpp_t = stop-start | ||
print("cpp", cpp_t) |