From 67a0554ba804603f432e015cf40a19a9a6e86beb Mon Sep 17 00:00:00 2001 From: Dave Parker Date: Wed, 23 Aug 2017 18:37:58 -0700 Subject: [PATCH 1/4] Add atom container `align` and `rmsd` methods (Zube 2914). --- .../Example 6. Align Two Molecules.ipynb | 309 ++++++++++++++++++ moldesign/_tests/test_alignments.py | 34 +- moldesign/mathutils/__init__.py | 3 +- moldesign/mathutils/align.py | 117 +++++++ moldesign/molecules/atomcollections.py | 45 +++ 5 files changed, 506 insertions(+), 2 deletions(-) create mode 100644 moldesign/_notebooks/Example 6. Align Two Molecules.ipynb create mode 100644 moldesign/mathutils/align.py diff --git a/moldesign/_notebooks/Example 6. Align Two Molecules.ipynb b/moldesign/_notebooks/Example 6. Align Two Molecules.ipynb new file mode 100644 index 0000000..88f118f --- /dev/null +++ b/moldesign/_notebooks/Example 6. Align Two Molecules.ipynb @@ -0,0 +1,309 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "About      Forum      Issues      Tutorials      Documentation\n", + "\n", + "![Molecular Design Toolkit](img/Top.png)\n", + "\n", + "\n", + "

Example 6: Align Two Molecules

\n", + "---\n", + "\n", + "\n", + "This notebook reads in two structures and calculates the transformation matrix aligning the C-alpha atoms from a subset of residues one structure to the other.\n", + "\n", + " - _Author_: [Dave Parker](https://github.com/ktbolt), Autodesk Research
\n", + " - _Created on_: August 23, 2017\n", + " - _Tags_: align, fit\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Info: OpenBabel not installed; will run in docker container\n", + "Info: OpenMM not installed; will run in docker container\n", + "Info: PDBFixer not installed; will run in docker container\n", + "Info: PySCF not installed; will run in docker container\n", + "Reading configuration from /Users/parkerda/.moldesign/moldesign.yml\n" + ] + } + ], + "source": [ + "import moldesign as mdt\n", + "from moldesign.molecules.atomcollections import AtomList\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "mol1 = mdt.from_pdb(\"1yu8\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "residues = mol1.chains[\"X\"].residues" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "residue_ids = set(range(64,73))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "src_atoms = [res.atoms[\"CA\"] for res in residues if res.pdbindex in residue_ids]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "src_atoms_list = AtomList(src_atoms)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING: Residue GLU502 (index 271, chain A) is missing expected atoms. Attempting to infer chain end\n" + ] + } + ], + "source": [ + "mol2 = mdt.from_pdb(\"3ac2\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "residues = mol2.chains[\"A\"].residues" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "residue_ids = set(range(446,455))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "dst_atoms = [res.atoms[\"CA\"] for res in residues if res.pdbindex in residue_ids]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "dst_atoms_list = AtomList(dst_atoms)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ">>> src center [ 14.392 6.67377778 18.87433333]\n", + ">>> dst center [ 20.92355556 4.70444444 10.85988889]\n" + ] + } + ], + "source": [ + "rmsd_error, xform = src_atoms_list.align(dst_atoms_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.41244109986341254" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rmsd_error\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 4.42719613e-01, 8.84833988e-01, -1.45148745e-01,\n", + " 1.13863353e+01],\n", + " [ 1.30739446e-02, -1.68229930e-01, -9.85661079e-01,\n", + " 2.42427092e+01],\n", + " [ -8.96564786e-01, 4.34473825e-01, -8.60469602e-02,\n", + " 2.24877465e+01],\n", + " [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,\n", + " 1.00000000e+00]])" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "xform\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "src_atoms = mol1.atoms" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "coords = np.array([0.0, 0.0, 0.0, 1.0],dtype=float)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from moldesign import units as u\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + " for i in range(0,len(src_atoms)):\n", + " coords[0:3] = src_atoms[i].position.value_in(u.angstrom)\n", + " xcoords = xform.dot(coords)\n", + " src_atoms[i].x = xcoords[0] * u.angstrom\n", + " src_atoms[i].y = xcoords[1] * u.angstrom\n", + " src_atoms[i].z = xcoords[2] * u.angstrom\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/moldesign/_tests/test_alignments.py b/moldesign/_tests/test_alignments.py index 8fd072a..cffb6c8 100644 --- a/moldesign/_tests/test_alignments.py +++ b/moldesign/_tests/test_alignments.py @@ -6,10 +6,11 @@ import moldesign as mdt from moldesign import geom from moldesign.mathutils import normalized +from moldesign.molecules.atomcollections import AtomList from moldesign import units as u from .molecule_fixtures import * -from .helpers import assert_almost_equal +from .helpers import assert_almost_equal, get_data_path __PYTEST_MARK__ = 'internal' # mark all tests in this module with this label (see ./conftest.py) @@ -146,3 +147,34 @@ def test_pmi_reorientation_on_benzene(benzene): np.testing.assert_allclose(newdistmat.defunits_value(), original_distmat) +# Test the alignmnet of the atoms from two molecules. +def test_align_molecules(): + + # Get the CA atoms for chain X residues 64-72. + mol1 = mdt.read(get_data_path('1yu8.pdb')) + residues = mol1.chains["X"].residues + residue_ids = set(range(64,73)) + src_atoms = [res.atoms["CA"] for res in residues if res.pdbindex in residue_ids] + + # Get the CA atoms for chain A residues 446-454. + mol2 = mdt.read(get_data_path('3ac2.pdb')) + residues = mol2.chains["A"].residues + residue_ids = set(range(446,455)) + dst_atoms = [res.atoms["CA"] for res in residues if res.pdbindex in residue_ids] + + # Align the two atom lists. + src_atoms_list = AtomList(src_atoms) + dst_atoms_list = AtomList(dst_atoms) + rmsd_error, xform = src_atoms_list.align(dst_atoms_list) + + # Compare results to expected values. + rtol = 1e-5 + expected_rmsd_error = 0.412441 + expected_xform = np.array([(4.42719613e-01, 8.84833988e-01, -1.45148745e-01, 1.13863353e+01), \ + (1.30739446e-02, -1.68229930e-01, -9.85661079e-01, 2.42427092e+01), \ + (-8.96564786e-01, 4.34473825e-01, -8.60469602e-02, 2.24877465e+01), \ + (0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.00000000e+00)]) + np.testing.assert_allclose(rmsd_error, expected_rmsd_error, rtol) + np.testing.assert_allclose(xform, expected_xform, rtol) + + diff --git a/moldesign/mathutils/__init__.py b/moldesign/mathutils/__init__.py index fb97fc3..36d8c68 100644 --- a/moldesign/mathutils/__init__.py +++ b/moldesign/mathutils/__init__.py @@ -1,3 +1,4 @@ from .vectormath import * from .eigen import * -from .grids import * \ No newline at end of file +from .grids import * +from .align import * diff --git a/moldesign/mathutils/align.py b/moldesign/mathutils/align.py new file mode 100644 index 0000000..e8b804d --- /dev/null +++ b/moldesign/mathutils/align.py @@ -0,0 +1,117 @@ +from __future__ import print_function, absolute_import, division +from future.builtins import * +from future import standard_library +standard_library.install_aliases() + +# Copyright 2017 Autodesk Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +from math import sqrt +from .. import units as u +from ..utils import exports + +@exports +def rmsd_align(src_points, dst_points): + """ Calculate the 4x4 transformation matrix that aligns a set of source points + to a set of destination of points. + + This function implements the method from Kabsch (Acta Cryst. (1978) A34, 827-828) that + calculate the optimal rotation matrix that minimizes the root mean squared deviation between + two paired sets of points. + + Args: + src_points(numpy.ndarray): An Nx3 array of the 3D source points. + dst_points(numpy.ndarray): An Nx3 array of the 3D destination points. + + Returns: + rmsd_error(Float): The root mean square deviation measuring the alignment error. + Xform (np.ndarray): The 4x4 array that transforms the source points to the destination points. + """ + + # Calculate point centers. + num_points = src_points.shape[0] + src_center = src_points.sum(axis=0) / num_points + dst_center = dst_points.sum(axis=0) / num_points + + # Calculate correlation matrix. + C = np.dot(np.transpose(dst_points[:]-dst_center), src_points[:]-src_center) + + # Compute singular value decomposition. + U, S, V = np.linalg.svd(C) + d = np.linalg.det(V) * np.linalg.det(U) + + # Make sure the rotation preserves orientation (det = 1). + if d < 0.0: + U[:, -1] = -U[:, -1] + + # Calculate matrix rotating src to dst. + R = np.dot(U, V) + + # Calculate the 4x4 matrix transforming src to dst. + Tsrc = np.identity(4, dtype=float) + Tsrc[0:3,3] = src_center + Tcenter = np.identity(4, dtype=float) + Tcenter[0:3,3] = dst_center - src_center + Rn = np.identity(4, dtype=float) + Rn[:3,:3] = R + M1 = np.dot(Rn, np.linalg.inv(Tsrc)) + M2 = np.dot(Tcenter, Tsrc) + Xform = np.dot(M2, M1) + print(">>> src center " + str(src_center)) + print(">>> dst center " + str(dst_center)) + + # Calculate rmsd error. + rmsd_error = 0.0 + for i in range(num_points): + cdiff = R.dot(src_points[i]-src_center) - (dst_points[i]-dst_center) + rmsd_error += cdiff.dot(cdiff) + #__for i in range(num_points) + rmsd_error = sqrt(rmsd_error / num_points) + return rmsd_error, Xform + +@exports +def calculate_rmsd(points1, points2, centered=False): + """ Calculate the root mean square deviation (RMSD) between two sets of 3D points. + + Args: + points1(numpy.ndarray): An Nx3 array of the 3D points. + points2(numpy.ndarray): An Nx3 array of the 3D points. + centered(Boolean): If true then the points are centered around (0,0,0). + + Returns: + rmsd(Float): The root mean square deviation. + """ + + if points1.shape[0] != points2.shape[0]: + raise ValueError('The number of points for calculating the RMSD between the first array %d does not equal the number of points in the second array %d' % (num_points1, num_points2)) + + # Calculate point centers. + num_points = points1.shape[0] + if centered: + center1 = np.array([0.0, 0.0, 0.0],dtype=float) + center2 = np.array([0.0, 0.0, 0.0],dtype=float) + else: + center1 = points1.sum(axis=0) / num_points + center2 = points2.sum(axis=0) / num_points + + # Calculate RMSD. + rmsd = 0.0 + center = center2 - center1 + for i in range(num_points): + cdiff = points1[i] - points2[i] + center + rmsd += cdiff.dot(cdiff) + + return sqrt(rmsd / num_points) + diff --git a/moldesign/molecules/atomcollections.py b/moldesign/molecules/atomcollections.py index fb07b22..005bbd2 100644 --- a/moldesign/molecules/atomcollections.py +++ b/moldesign/molecules/atomcollections.py @@ -25,6 +25,7 @@ import numpy as np import moldesign as mdt +from .. mathutils.align import rmsd_align, calculate_rmsd from .. import units as u from .. import utils, external, mathutils, helpers from . import toplevel @@ -432,6 +433,50 @@ def bonds_to(self, other): bonds.append(bond) return bonds + def align(self, other): + """ Aligns this list of atoms to another list of atoms. + + Args: + other (AtomContainer): The list of atoms to align to. + + Returns: + rmsd_error(Float): The root mean square deviation measuring the alignment error. + xform(np.ndarray): The 4x4 matrix that transforms this list of atoms to the input list of atoms. + """ + if len(self.atoms) != len(other.atoms): + raise ValueError('The number of atoms for fitting this atom list %d does not equal the number of atoms in the input list %d' % (len(self.atoms), len(other.atoms))) + + # Create atom coordinate arrays. + num_atoms = len(self.atoms) + self_coords = np.zeros((num_atoms, 3), dtype=float) + other_coords = np.zeros((num_atoms, 3), dtype=float) + for i in range(0,num_atoms): + self_coords[i] = self.atoms[i].position.value_in(u.angstrom) + other_coords[i] = other.atoms[i].position.value_in(u.angstrom) + + # Calculate the 4x4 matrix transforming self_coords into other_coords. + rmsd_error, xform = rmsd_align(self_coords, other_coords) + return rmsd_error, xform + + def rmsd(self, other): + """ Calculates the root mean square deviation (RMSD) between this list of atoms + and another list of atoms. + + Args: + other (AtomContainer): The list of atoms to calculate the RMSD with. + + Returns: + rmsd_error(Float): The root mean square deviation measuring the alignment error. + """ + if len(self.atoms) != len(other.atoms): + raise ValueError('The number of atoms for calculating the RMSD between this atom list %d does not equal the number of atoms in the input list %d' % (len(self.atoms), len(other.atoms))) + num_atoms = len(self.atoms) + self_coords = np.zeros((num_atoms, 3), dtype=float) + other_coords = np.zeros((num_atoms, 3), dtype=float) + for i in range(0,num_atoms): + self_coords[i] = self.atoms[i].position.value_in(u.angstrom) + other_coords[i] = other.atoms[i].position.value_in(u.angstrom) + return rmsd(self_coords, other_coords) @toplevel class AtomList(AtomContainer, list): # order is important, list will override methods otherwise From 50baa0e47bc95bdaec026fc9944f9081167d8bdb Mon Sep 17 00:00:00 2001 From: Dave Parker Date: Thu, 24 Aug 2017 10:30:51 -0700 Subject: [PATCH 2/4] Fix some comments. --- .../Example 6. Align Two Molecules.ipynb | 132 +++++++----------- 1 file changed, 52 insertions(+), 80 deletions(-) diff --git a/moldesign/_notebooks/Example 6. Align Two Molecules.ipynb b/moldesign/_notebooks/Example 6. Align Two Molecules.ipynb index 88f118f..7b480b7 100644 --- a/moldesign/_notebooks/Example 6. Align Two Molecules.ipynb +++ b/moldesign/_notebooks/Example 6. Align Two Molecules.ipynb @@ -25,16 +25,14 @@ { "cell_type": "code", "execution_count": 1, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "Info: OpenBabel not installed; will run in docker container\n", - "Info: OpenMM not installed; will run in docker container\n", - "Info: PDBFixer not installed; will run in docker container\n", - "Info: PySCF not installed; will run in docker container\n", "Reading configuration from /Users/parkerda/.moldesign/moldesign.yml\n" ] } @@ -82,16 +80,28 @@ "cell_type": "code", "execution_count": 5, "metadata": { - "collapsed": true + "collapsed": false }, - "outputs": [], + "outputs": [ + { + "ename": "TypeError", + "evalue": "list indices must be integers, not str", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0msrc_atoms\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mres\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0matoms\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"CA\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mres\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mresidues\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mres\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpdbindex\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mresidue_ids\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m: list indices must be integers, not str" + ] + } + ], "source": [ "src_atoms = [res.atoms[\"CA\"] for res in residues if res.pdbindex in residue_ids]" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": { "collapsed": true }, @@ -102,24 +112,18 @@ }, { "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING: Residue GLU502 (index 271, chain A) is missing expected atoms. Attempting to infer chain end\n" - ] - } - ], + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], "source": [ "mol2 = mdt.from_pdb(\"3ac2\")\n" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": { "collapsed": true }, @@ -130,7 +134,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": { "collapsed": true }, @@ -141,7 +145,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": { "collapsed": true }, @@ -152,7 +156,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": { "collapsed": true }, @@ -163,72 +167,40 @@ }, { "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - ">>> src center [ 14.392 6.67377778 18.87433333]\n", - ">>> dst center [ 20.92355556 4.70444444 10.85988889]\n" - ] - } - ], + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], "source": [ "rmsd_error, xform = src_atoms_list.align(dst_atoms_list)" ] }, { "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.41244109986341254" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], "source": [ "rmsd_error\n" ] }, { "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[ 4.42719613e-01, 8.84833988e-01, -1.45148745e-01,\n", - " 1.13863353e+01],\n", - " [ 1.30739446e-02, -1.68229930e-01, -9.85661079e-01,\n", - " 2.42427092e+01],\n", - " [ -8.96564786e-01, 4.34473825e-01, -8.60469602e-02,\n", - " 2.24877465e+01],\n", - " [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,\n", - " 1.00000000e+00]])" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], "source": [ "xform\n" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": { "collapsed": true }, @@ -239,7 +211,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": { "collapsed": true }, @@ -250,7 +222,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": { "collapsed": true }, @@ -261,7 +233,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": { "collapsed": true }, @@ -287,21 +259,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 2", "language": "python", - "name": "python3" + "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 3 + "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.1" + "pygments_lexer": "ipython2", + "version": "2.7.10" } }, "nbformat": 4, From bf74e9c12bd28e6c5d459f3e352dbfa972ba38b0 Mon Sep 17 00:00:00 2001 From: Dave Parker Date: Thu, 24 Aug 2017 20:07:01 -0700 Subject: [PATCH 3/4] Add visualization to the align notebook example. --- .../Example 6. Align Two Molecules.ipynb | 265 ++++++++---------- moldesign/mathutils/align.py | 2 - 2 files changed, 124 insertions(+), 143 deletions(-) diff --git a/moldesign/_notebooks/Example 6. Align Two Molecules.ipynb b/moldesign/_notebooks/Example 6. Align Two Molecules.ipynb index 7b480b7..8e840ee 100644 --- a/moldesign/_notebooks/Example 6. Align Two Molecules.ipynb +++ b/moldesign/_notebooks/Example 6. Align Two Molecules.ipynb @@ -24,227 +24,210 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 70, "metadata": { - "collapsed": false + "collapsed": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Reading configuration from /Users/parkerda/.moldesign/moldesign.yml\n" - ] - } - ], + "outputs": [], "source": [ "import moldesign as mdt\n", + "from moldesign import units as u\n", "from moldesign.molecules.atomcollections import AtomList\n", "import numpy as np" ] }, { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "mol1 = mdt.from_pdb(\"1yu8\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "residues = mol1.chains[\"X\"].residues" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": true - }, - "outputs": [], + "cell_type": "markdown", + "metadata": {}, "source": [ - "residue_ids = set(range(64,73))\n" + "## Read in and display the source molecule." ] }, { "cell_type": "code", - "execution_count": 5, - "metadata": { - "collapsed": false - }, + "execution_count": 71, + "metadata": {}, "outputs": [ { - "ename": "TypeError", - "evalue": "list indices must be integers, not str", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0msrc_atoms\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mres\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0matoms\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"CA\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mres\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mresidues\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mres\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpdbindex\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mresidue_ids\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m: list indices must be integers, not str" - ] + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "2395475165f342f5b5d94da1027b4e26" + } + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ - "src_atoms = [res.atoms[\"CA\"] for res in residues if res.pdbindex in residue_ids]" + "src_mol = mdt.from_pdb(\"1yu8\")\n", + "src_mol.draw()" ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], + "cell_type": "markdown", + "metadata": {}, "source": [ - "src_atoms_list = AtomList(src_atoms)" + "## Extract the source molecule C-alpha atoms for residues 64-72." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 89, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ - "mol2 = mdt.from_pdb(\"3ac2\")\n" + "src_residues = src_mol.chains[\"X\"].residues\n", + "src_residue_ids = set(range(64,73))\n", + "src_atoms = [res.atoms[\"CA\"] for res in src_residues if res.pdbindex in src_residue_ids]\n", + "src_atoms_list = AtomList(src_atoms)" ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], + "cell_type": "markdown", + "metadata": {}, "source": [ - "residues = mol2.chains[\"A\"].residues" + "## Read in and display the destination molecule." ] }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING: Residue GLU502 (index 271, chain A) is missing expected atoms. Attempting to infer chain end\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "4948f6a1dc234e02b35d375f52ffa115" + } + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ - "residue_ids = set(range(446,455))" + "dst_mol = mdt.from_pdb(\"3ac2\")\n", + "dst_mol.draw()" ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], + "cell_type": "markdown", + "metadata": {}, "source": [ - "dst_atoms = [res.atoms[\"CA\"] for res in residues if res.pdbindex in residue_ids]" + "## Extract the destination molecule C-alpha atoms for residues 446-454." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 77, "metadata": { "collapsed": true }, "outputs": [], "source": [ + "dst_residues = dst_mol.chains[\"A\"].residues\n", + "dst_residue_ids = set(range(446,455))\n", + "dst_atoms = [res.atoms[\"CA\"] for res in residues if res.pdbindex in residue_ids]\n", "dst_atoms_list = AtomList(dst_atoms)" ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "rmsd_error, xform = src_atoms_list.align(dst_atoms_list)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], + "cell_type": "markdown", + "metadata": {}, "source": [ - "rmsd_error\n" + "## Calculate the transformation aligning the source atoms to destination atoms." ] }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], + "execution_count": 88, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "rmsd = 0.412441\n", + "transformation = \n", + "[[ 1.00000000e+00 4.56737751e-16 2.21965480e-16 -7.10542736e-15]\n", + " [ -3.52269708e-16 1.00000000e+00 -6.83377612e-17 8.88178420e-15]\n", + " [ -4.55009138e-16 5.86301682e-16 1.00000000e+00 8.88178420e-15]\n", + " [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 1.00000000e+00]]\n" + ] + } + ], "source": [ - "xform\n" + "rmsd, xform = src_atoms_list.align(dst_atoms_list)\n", + "print(\"rmsd = %f\" % rmsd)\n", + "print(\"transformation = \\n\" + np.array_str(xform))" ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], + "cell_type": "markdown", + "metadata": {}, "source": [ - "src_atoms = mol1.atoms" + "## Transform all of the source atoms." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 79, "metadata": { "collapsed": true }, "outputs": [], "source": [ - "coords = np.array([0.0, 0.0, 0.0, 1.0],dtype=float)" + "src_atoms = src_mol.atoms\n", + "coords = np.array([0.0, 0.0, 0.0, 1.0],dtype=float)\n", + "\n", + "for i in range(0,len(src_atoms)):\n", + " coords[0:3] = src_atoms[i].position.value_in(u.angstrom)\n", + " xcoords = xform.dot(coords)\n", + " src_atoms[i].x = xcoords[0] * u.angstrom\n", + " src_atoms[i].y = xcoords[1] * u.angstrom\n", + " src_atoms[i].z = xcoords[2] * u.angstrom\n" ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], + "cell_type": "markdown", + "metadata": {}, "source": [ - "from moldesign import units as u\n" + "## Merge the source and destination molecules into s single molecule and display it." ] }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING: atom indices modified due to name clashes\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "888fa1f3e38b498ea3c11fc37be561f7" + } + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ - " for i in range(0,len(src_atoms)):\n", - " coords[0:3] = src_atoms[i].position.value_in(u.angstrom)\n", - " xcoords = xform.dot(coords)\n", - " src_atoms[i].x = xcoords[0] * u.angstrom\n", - " src_atoms[i].y = xcoords[1] * u.angstrom\n", - " src_atoms[i].z = xcoords[2] * u.angstrom\n" + "merged_mol = mdt.Molecule([src_mol, dst_mol])\n", + "merged_mol.draw()" ] }, { @@ -259,21 +242,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.10" + "pygments_lexer": "ipython3", + "version": "3.6.1" } }, "nbformat": 4, diff --git a/moldesign/mathutils/align.py b/moldesign/mathutils/align.py index e8b804d..ed940f6 100644 --- a/moldesign/mathutils/align.py +++ b/moldesign/mathutils/align.py @@ -69,8 +69,6 @@ def rmsd_align(src_points, dst_points): M1 = np.dot(Rn, np.linalg.inv(Tsrc)) M2 = np.dot(Tcenter, Tsrc) Xform = np.dot(M2, M1) - print(">>> src center " + str(src_center)) - print(">>> dst center " + str(dst_center)) # Calculate rmsd error. rmsd_error = 0.0 From ef0bc802285eafe8fce75c08eae748a1de34684c Mon Sep 17 00:00:00 2001 From: Dave Parker Date: Thu, 31 Aug 2017 12:35:13 -0700 Subject: [PATCH 4/4] Fix some style issues, and add some more checking and some additional code to support units in align functions. --- moldesign/mathutils/align.py | 100 ++++++++++++++++--------- moldesign/molecules/atomcollections.py | 21 +++--- 2 files changed, 78 insertions(+), 43 deletions(-) diff --git a/moldesign/mathutils/align.py b/moldesign/mathutils/align.py index ed940f6..35253aa 100644 --- a/moldesign/mathutils/align.py +++ b/moldesign/mathutils/align.py @@ -17,83 +17,115 @@ # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np from math import sqrt -from .. import units as u +import numpy as np +import moldesign as mdt from ..utils import exports @exports def rmsd_align(src_points, dst_points): """ Calculate the 4x4 transformation matrix that aligns a set of source points - to a set of destination of points. + to a set of destination of points. - This function implements the method from Kabsch (Acta Cryst. (1978) A34, 827-828) that - calculate the optimal rotation matrix that minimizes the root mean squared deviation between - two paired sets of points. + This function implements the method from Kabsch (Acta Cryst. (1978) A34, 827-828) that + calculate the optimal rotation matrix that minimizes the root mean squared deviation between + two paired sets of points. Args: - src_points(numpy.ndarray): An Nx3 array of the 3D source points. - dst_points(numpy.ndarray): An Nx3 array of the 3D destination points. + src_points (numpy.ndarray): An Nx3 array of the 3D source points. + dst_points (numpy.ndarray): An Nx3 array of the 3D destination points. Returns: - rmsd_error(Float): The root mean square deviation measuring the alignment error. - Xform (np.ndarray): The 4x4 array that transforms the source points to the destination points. + units.Scalar[length]: The root mean square deviation measuring the alignment error. + numpy.ndarray: The 4x4 array that transforms the source points to the destination points. """ + if (src_points.shape[0] == 0) or (dst_points.shape[0] == 0): + n = "Source" if src_points.shape[0] == 0 else "Destination" + print('WARNING: %s points array for RMSD aligning has zero length.' % n) + return 0.0, np.identity(4, dtype=float) + + if src_points.shape[0] != dst_points.shape[0]: + raise ValueError( + 'The number of points for calculating the RMSD between the first array %d \n' + 'does not equal the number of points in the second array %d' % (num_points1, num_points2)) + + if mdt.units.get_units(src_points) != mdt.units.get_units(dst_points): + raise ValueError("The source points units '%s' don't match the destination points units '%s'" % + (mdt.units.get_units(src_points), mdt.units.get_units(dst_points))) + + # Numpy matrix/vector multiplication results in a vector with dimensionless units + # even if the vector has units. To retain the vector's units multiple the result by + # the points units. + if isinstance(src_points, mdt.units.MdtQuantity): + units = mdt.units.get_units(src_points) + else: + units = 1.0 + # Calculate point centers. num_points = src_points.shape[0] src_center = src_points.sum(axis=0) / num_points dst_center = dst_points.sum(axis=0) / num_points # Calculate correlation matrix. - C = np.dot(np.transpose(dst_points[:]-dst_center), src_points[:]-src_center) + corr_mat = np.dot(np.transpose(dst_points[:]-dst_center), src_points[:]-src_center) # Compute singular value decomposition. - U, S, V = np.linalg.svd(C) - d = np.linalg.det(V) * np.linalg.det(U) + u, s, v = np.linalg.svd(corr_mat) + det = np.linalg.det(v) * np.linalg.det(u) # Make sure the rotation preserves orientation (det = 1). - if d < 0.0: - U[:, -1] = -U[:, -1] + if det < 0.0: + u[:, -1] = -u[:, -1] # Calculate matrix rotating src to dst. - R = np.dot(U, V) + rot_mat = np.dot(u, v) # Calculate the 4x4 matrix transforming src to dst. - Tsrc = np.identity(4, dtype=float) - Tsrc[0:3,3] = src_center - Tcenter = np.identity(4, dtype=float) - Tcenter[0:3,3] = dst_center - src_center - Rn = np.identity(4, dtype=float) - Rn[:3,:3] = R - M1 = np.dot(Rn, np.linalg.inv(Tsrc)) - M2 = np.dot(Tcenter, Tsrc) - Xform = np.dot(M2, M1) + tsrc = np.identity(4, dtype=float) + tsrc[0:3,3] = src_center + tcenter = np.identity(4, dtype=float) + tcenter[0:3,3] = dst_center - src_center + rn = np.identity(4, dtype=float) + rn[:3,:3] = rot_mat + m1 = np.dot(rn, np.linalg.inv(tsrc)) + m2 = np.dot(tcenter, tsrc) + xform = np.dot(m2, m1) # Calculate rmsd error. rmsd_error = 0.0 for i in range(num_points): - cdiff = R.dot(src_points[i]-src_center) - (dst_points[i]-dst_center) + cdiff = rot_mat.dot(src_points[i]-src_center)*units - (dst_points[i]-dst_center) rmsd_error += cdiff.dot(cdiff) #__for i in range(num_points) - rmsd_error = sqrt(rmsd_error / num_points) - return rmsd_error, Xform + rmsd_error = np.sqrt(rmsd_error / num_points) + return rmsd_error, xform @exports def calculate_rmsd(points1, points2, centered=False): """ Calculate the root mean square deviation (RMSD) between two sets of 3D points. Args: - points1(numpy.ndarray): An Nx3 array of the 3D points. - points2(numpy.ndarray): An Nx3 array of the 3D points. - centered(Boolean): If true then the points are centered around (0,0,0). + points1 (numpy.ndarray): An Nx3 array of the 3D points. + points2 (numpy.ndarray): An Nx3 array of the 3D points. + centered (bool): If true then the points are centered around (0,0,0). Returns: - rmsd(Float): The root mean square deviation. + units.Scalar[length]: The root mean square deviation. """ + if (points1.shape[0] == 0) or (points2.shape[0] == 0): + n = 1 if points1.shape[0] == 0 else 2 + print('WARNING: Points%d array for RMSD calculation has zero length.' % n) + return 0.0 if points1.shape[0] != points2.shape[0]: - raise ValueError('The number of points for calculating the RMSD between the first array %d does not equal the number of points in the second array %d' % (num_points1, num_points2)) + raise ValueError( + 'The number of points for calculating the RMSD between the first array %d \n' + 'does not equal the number of points in the second array %d' % (num_points1, num_points2)) + + if mdt.units.get_units(points1) != mdt.units.get_units(points2): + raise ValueError("Points1 units '%s' don't match the points2 units '%s'" % + (mdt.units.get_units(points1), mdt.units.get_units(points2))) # Calculate point centers. num_points = points1.shape[0] @@ -111,5 +143,5 @@ def calculate_rmsd(points1, points2, centered=False): cdiff = points1[i] - points2[i] + center rmsd += cdiff.dot(cdiff) - return sqrt(rmsd / num_points) + return np.sqrt(rmsd / num_points) diff --git a/moldesign/molecules/atomcollections.py b/moldesign/molecules/atomcollections.py index 33a8a27..f5476cc 100644 --- a/moldesign/molecules/atomcollections.py +++ b/moldesign/molecules/atomcollections.py @@ -441,19 +441,22 @@ def align(self, other): other (AtomContainer): The list of atoms to align to. Returns: - rmsd_error(Float): The root mean square deviation measuring the alignment error. - xform(np.ndarray): The 4x4 matrix that transforms this list of atoms to the input list of atoms. + units.Scalar[length]: The root mean square deviation measuring the alignment error. + numpy.ndarray: The 4x4 matrix that transforms this list of atoms to the input list of atoms. """ if len(self.atoms) != len(other.atoms): - raise ValueError('The number of atoms for fitting this atom list %d does not equal the number of atoms in the input list %d' % (len(self.atoms), len(other.atoms))) + raise ValueError( + 'The number of atoms for fitting this atom list %d does not equal \n' + 'the number of atoms in the input list %d' % (len(self.atoms), len(other.atoms))) # Create atom coordinate arrays. num_atoms = len(self.atoms) - self_coords = np.zeros((num_atoms, 3), dtype=float) - other_coords = np.zeros((num_atoms, 3), dtype=float) + coord_units = mdt.units.get_units(self.atoms[0].position) + self_coords = np.zeros((num_atoms, 3), dtype=float) * coord_units + other_coords = np.zeros((num_atoms, 3), dtype=float) * coord_units for i in range(0,num_atoms): - self_coords[i] = self.atoms[i].position.value_in(u.angstrom) - other_coords[i] = other.atoms[i].position.value_in(u.angstrom) + self_coords[i] = self.atoms[i].position + other_coords[i] = other.atoms[i].position # Calculate the 4x4 matrix transforming self_coords into other_coords. rmsd_error, xform = rmsd_align(self_coords, other_coords) @@ -467,7 +470,7 @@ def rmsd(self, other): other (AtomContainer): The list of atoms to calculate the RMSD with. Returns: - rmsd_error(Float): The root mean square deviation measuring the alignment error. + u.Scalar[length]: The root mean square deviation measuring the alignment error. """ if len(self.atoms) != len(other.atoms): raise ValueError('The number of atoms for calculating the RMSD between this atom list %d does not equal the number of atoms in the input list %d' % (len(self.atoms), len(other.atoms))) @@ -477,7 +480,7 @@ def rmsd(self, other): for i in range(0,num_atoms): self_coords[i] = self.atoms[i].position.value_in(u.angstrom) other_coords[i] = other.atoms[i].position.value_in(u.angstrom) - return rmsd(self_coords, other_coords) + return rmsd(self_coords, other_coords) * u.default.length @toplevel class AtomList(AtomContainer, list): # order is important, list will override methods otherwise