diff --git a/.coveragerc b/.coveragerc index 2c98cbe..5fb6c5d 100644 --- a/.coveragerc +++ b/.coveragerc @@ -23,7 +23,6 @@ exclude_lines = pragma: no cover def __repr__ def __str__ - def _repr_markdown_ raise AssertionError raise NotImplementedError if PY2 diff --git a/.gitignore b/.gitignore index 6c3a7ac..38acb0a 100644 --- a/.gitignore +++ b/.gitignore @@ -12,7 +12,8 @@ build .cloudcomputecannon __pycache__ .idea -.coverage* +.coverage +.coverage.* codeship.aes tokens moldesign/_static_data/components.ci* diff --git a/deployment/codeship_runtests.sh b/deployment/codeship_runtests.sh index 4e3e987..f22c5ad 100755 --- a/deployment/codeship_runtests.sh +++ b/deployment/codeship_runtests.sh @@ -5,7 +5,7 @@ set -e # fail immediately if any command fails VERSION="${TESTENV}.py${PYVERSION}" -PYTESTFLAGS="-n 2 --spec --disable-warnings --durations=20 --junit-xml=/opt/reports/junit.${VERSION}.xml --timeout=1800 --tb=short" +PYTESTFLAGS="-n 2 --spec --durations=20 --junit-xml=/opt/reports/junit.${VERSION}.xml --timeout=3600 --tb=short" if [ "${VERSION}" == "complete.py3" ]; then PYTESTFLAGS="--cov moldesign ${PYTESTFLAGS}" fi diff --git a/moldesign/_static_data/nist_atomic.yml b/moldesign/_static_data/nist_atomic.yml new file mode 100644 index 0000000..b609639 --- /dev/null +++ b/moldesign/_static_data/nist_atomic.yml @@ -0,0 +1,474 @@ +# This data was derived from the NIST Isotopic Mass database +# https://www.nist.gov/pml/atomic-weights-and-isotopic-compositions-relative-atomic-masses +1: +- {abundance: 0.9998857, mass: 1.007825032239, number: 1, symbol: H} +- {abundance: 0.0001157, mass: 2.0141017781212, number: 2, symbol: D} +- {abundance: 0.0, mass: 3.016049277924, number: 3, symbol: T} +2: +- {abundance: 0.999998663, mass: 4.002603254136, number: 4, symbol: He} +- {abundance: 1.343e-06, mass: 3.016029320125, number: 3, symbol: He} +3: +- {abundance: 0.92414, mass: 7.016003436645, number: 7, symbol: Li} +- {abundance: 0.07594, mass: 6.015122887416, number: 6, symbol: Li} +4: +- {abundance: 1.0, mass: 9.01218306582, number: 9, symbol: Be} +5: +- {abundance: 0.8017, mass: 11.0093053645, number: 11, symbol: B} +- {abundance: 0.1997, mass: 10.0129369541, number: 10, symbol: B} +6: +- {abundance: 0.98938, mass: 12.0, number: 12, symbol: C} +- {abundance: 0.01078, mass: 13.0033548350723, number: 13, symbol: C} +- {abundance: 0.0, mass: 14.00324198844, number: 14, symbol: C} +7: +- {abundance: 0.996362, mass: 14.003074004432, number: 14, symbol: N} +- {abundance: 0.003642, mass: 15.0001088988864, number: 15, symbol: N} +8: +- {abundance: 0.9975716, mass: 15.9949146195717, number: 16, symbol: O} +- {abundance: 0.0020514, mass: 17.9991596128676, number: 18, symbol: O} +- {abundance: 0.000381, mass: 16.9991317565069, number: 17, symbol: O} +9: +- {abundance: 1.0, mass: 18.9984031627392, number: 19, symbol: F} +10: +- {abundance: 0.90483, mass: 19.992440176217, number: 20, symbol: Ne} +- {abundance: 0.09253, mass: 21.99138511418, number: 22, symbol: Ne} +- {abundance: 0.00271, mass: 20.99384668541, number: 21, symbol: Ne} +11: +- {abundance: 1.0, mass: 22.989769282019, number: 23, symbol: Na} +12: +- {abundance: 0.78994, mass: 23.98504169714, number: 24, symbol: Mg} +- {abundance: 0.11013, mass: 25.98259296831, number: 26, symbol: Mg} +- {abundance: 0.10001, mass: 24.9858369765, number: 25, symbol: Mg} +13: +- {abundance: 1.0, mass: 26.9815385311, number: 27, symbol: Al} +14: +- {abundance: 0.9222319, mass: 27.9769265346544, number: 28, symbol: Si} +- {abundance: 0.046858, mass: 28.9764946649052, number: 29, symbol: Si} +- {abundance: 0.0309211, mass: 29.97377013623, number: 30, symbol: Si} +15: +- {abundance: 1.0, mass: 30.973761998427, number: 31, symbol: P} +16: +- {abundance: 0.949926, mass: 31.972071174414, number: 32, symbol: S} +- {abundance: 0.042524, mass: 33.96786700447, number: 34, symbol: S} +- {abundance: 0.00752, mass: 32.971458909815, number: 33, symbol: S} +- {abundance: 0.00011, mass: 35.967080712, number: 36, symbol: S} +17: +- {abundance: 0.75761, mass: 34.96885268237, number: 35, symbol: Cl} +- {abundance: 0.24241, mass: 36.96590260255, number: 37, symbol: Cl} +18: +- {abundance: 0.99603525, mass: 39.962383123724, number: 40, symbol: Ar} +- {abundance: 0.00333621, mass: 35.96754510528, number: 36, symbol: Ar} +- {abundance: 0.0006297, mass: 37.9627321121, number: 38, symbol: Ar} +19: +- {abundance: 0.93258144, mass: 38.963706486449, number: 39, symbol: K} +- {abundance: 0.06730244, mass: 40.961825257941, number: 41, symbol: K} +- {abundance: 0.0001171, mass: 39.9639981666, number: 40, symbol: K} +20: +- {abundance: 0.96941156, mass: 39.96259086322, number: 40, symbol: Ca} +- {abundance: 0.0208611, mass: 43.9554815635, number: 44, symbol: Ca} +- {abundance: 0.0064723, mass: 41.9586178316, number: 42, symbol: Ca} +- {abundance: 0.0018721, mass: 47.9525227613, number: 48, symbol: Ca} +- {abundance: 0.001351, mass: 42.9587664424, number: 43, symbol: Ca} +- {abundance: 4.3e-05, mass: 45.953689024, number: 46, symbol: Ca} +21: +- {abundance: 1.0, mass: 44.9559082877, number: 45, symbol: Sc} +22: +- {abundance: 0.73723, mass: 47.9479419838, number: 48, symbol: Ti} +- {abundance: 0.08253, mass: 45.9526277235, number: 46, symbol: Ti} +- {abundance: 0.07442, mass: 46.9517587938, number: 47, symbol: Ti} +- {abundance: 0.05412, mass: 48.9478656839, number: 49, symbol: Ti} +- {abundance: 0.05182, mass: 49.9447868939, number: 50, symbol: Ti} +23: +- {abundance: 0.997504, mass: 50.9439570494, number: 51, symbol: V} +- {abundance: 0.002504, mass: 49.9471560195, number: 50, symbol: V} +24: +- {abundance: 0.8378918, mass: 51.9405062363, number: 52, symbol: Cr} +- {abundance: 0.0950117, mass: 52.9406481562, number: 53, symbol: Cr} +- {abundance: 0.0434513, mass: 49.9460418394, number: 50, symbol: Cr} +- {abundance: 0.023657, mass: 53.9388791661, number: 54, symbol: Cr} +25: +- {abundance: 1.0, mass: 54.9380439148, number: 55, symbol: Mn} +26: +- {abundance: 0.9175436, mass: 55.9349363349, number: 56, symbol: Fe} +- {abundance: 0.0584535, mass: 53.9396089953, number: 54, symbol: Fe} +- {abundance: 0.021191, mass: 56.9353928449, number: 57, symbol: Fe} +- {abundance: 0.002824, mass: 57.9332744353, number: 58, symbol: Fe} +27: +- {abundance: 1.0, mass: 58.9331942956, number: 59, symbol: Co} +28: +- {abundance: 0.6807719, mass: 57.9353424152, number: 58, symbol: Ni} +- {abundance: 0.2622315, mass: 59.9307858852, number: 60, symbol: Ni} +- {abundance: 0.0363464, mass: 61.9283453755, number: 62, symbol: Ni} +- {abundance: 0.01139913, mass: 60.9310555752, number: 61, symbol: Ni} +- {abundance: 0.00925519, mass: 63.9279668258, number: 64, symbol: Ni} +29: +- {abundance: 0.691515, mass: 62.9295977256, number: 63, symbol: Cu} +- {abundance: 0.308515, mass: 64.9277897071, number: 65, symbol: Cu} +30: +- {abundance: 0.491775, mass: 63.9291420171, number: 64, symbol: Zn} +- {abundance: 0.277398, mass: 65.9260338194, number: 66, symbol: Zn} +- {abundance: 0.184563, mass: 67.9248445598, number: 68, symbol: Zn} +- {abundance: 0.040416, mass: 66.9271277596, number: 67, symbol: Zn} +- {abundance: 0.00611, mass: 69.925319221, number: 70, symbol: Zn} +31: +- {abundance: 0.601089, mass: 68.925573513, number: 69, symbol: Ga} +- {abundance: 0.398929, mass: 70.9247025887, number: 71, symbol: Ga} +32: +- {abundance: 0.36502, mass: 73.92117776113, number: 74, symbol: Ge} +- {abundance: 0.274532, mass: 71.92207582681, number: 72, symbol: Ge} +- {abundance: 0.205727, mass: 69.924248759, number: 70, symbol: Ge} +- {abundance: 0.077512, mass: 72.92345895661, number: 73, symbol: Ge} +- {abundance: 0.077312, mass: 75.92140272619, number: 76, symbol: Ge} +33: +- {abundance: 1.0, mass: 74.9215945795, number: 75, symbol: As} +34: +- {abundance: 0.496141, mass: 79.916521813, number: 80, symbol: Se} +- {abundance: 0.237728, mass: 77.917309282, number: 78, symbol: Se} +- {abundance: 0.093729, mass: 75.91921370417, number: 76, symbol: Se} +- {abundance: 0.087322, mass: 81.916699515, number: 82, symbol: Se} +- {abundance: 0.076316, mass: 76.91991415467, number: 77, symbol: Se} +- {abundance: 0.00894, mass: 73.92247593415, number: 74, symbol: Se} +35: +- {abundance: 0.50697, mass: 78.918337614, number: 79, symbol: Br} +- {abundance: 0.49317, mass: 80.916289714, number: 81, symbol: Br} +36: +- {abundance: 0.5698715, mass: 83.911497728244, number: 84, symbol: Kr} +- {abundance: 0.1727941, mass: 85.910610626941, number: 86, symbol: Kr} +- {abundance: 0.1159331, mass: 81.9134827394, number: 82, symbol: Kr} +- {abundance: 0.1150019, mass: 82.9141271632, number: 83, symbol: Kr} +- {abundance: 0.022861, mass: 79.9163780875, number: 80, symbol: Kr} +- {abundance: 0.003553, mass: 77.9203649476, number: 78, symbol: Kr} +37: +- {abundance: 0.72172, mass: 84.911789737954, number: 85, symbol: Rb} +- {abundance: 0.27832, mass: 86.90918053106, number: 87, symbol: Rb} +38: +- {abundance: 0.82581, mass: 87.905612512, number: 88, symbol: Sr} +- {abundance: 0.09861, mass: 85.909260612, number: 86, symbol: Sr} +- {abundance: 0.07001, mass: 86.908877512, number: 87, symbol: Sr} +- {abundance: 0.00561, mass: 83.913419113, number: 84, symbol: Sr} +39: +- {abundance: 1.0, mass: 88.905840324, number: 89, symbol: Y} +40: +- {abundance: 0.51454, mass: 89.90469772, number: 90, symbol: Zr} +- {abundance: 0.173828, mass: 93.90631082, number: 94, symbol: Zr} +- {abundance: 0.17158, mass: 91.90503472, number: 92, symbol: Zr} +- {abundance: 0.11225, mass: 90.90563962, number: 91, symbol: Zr} +- {abundance: 0.02809, mass: 95.908271421, number: 96, symbol: Zr} +41: +- {abundance: 1.0, mass: 92.90637302, number: 93, symbol: Nb} +42: +- {abundance: 0.243937, mass: 97.9054048249, number: 98, symbol: Mo} +- {abundance: 0.166715, mass: 95.9046761247, number: 96, symbol: Mo} +- {abundance: 0.158411, mass: 94.9058387747, number: 95, symbol: Mo} +- {abundance: 0.14533, mass: 91.9068079684, number: 92, symbol: Mo} +- {abundance: 0.098231, mass: 99.907471811, number: 100, symbol: Mo} +- {abundance: 0.096014, mass: 96.9060181249, number: 97, symbol: Mo} +- {abundance: 0.09159, mass: 93.9050849048, number: 94, symbol: Mo} +43: +- {abundance: 0.0, mass: 96.90636674, number: 97, symbol: Tc} +- {abundance: 0.0, mass: 97.907212436, number: 98, symbol: Tc} +- {abundance: 0.0, mass: 98.90625081, number: 99, symbol: Tc} +44: +- {abundance: 0.315514, mass: 101.904344112, number: 102, symbol: Ru} +- {abundance: 0.186227, mass: 103.905427528, number: 104, symbol: Ru} +- {abundance: 0.17062, mass: 100.905576912, number: 101, symbol: Ru} +- {abundance: 0.127614, mass: 98.905934111, number: 99, symbol: Ru} +- {abundance: 0.12607, mass: 99.904214311, number: 100, symbol: Ru} +- {abundance: 0.055414, mass: 95.9075902549, number: 96, symbol: Ru} +- {abundance: 0.01873, mass: 97.905286869, number: 98, symbol: Ru} +45: +- {abundance: 1.0, mass: 102.905498026, number: 103, symbol: Rh} +46: +- {abundance: 0.27333, mass: 105.903480412, number: 106, symbol: Pd} +- {abundance: 0.26469, mass: 107.903891612, number: 108, symbol: Pd} +- {abundance: 0.22338, mass: 104.905079612, number: 105, symbol: Pd} +- {abundance: 0.11729, mass: 109.9051722075, number: 110, symbol: Pd} +- {abundance: 0.11148, mass: 103.904030514, number: 104, symbol: Pd} +- {abundance: 0.01021, mass: 101.905602228, number: 102, symbol: Pd} +47: +- {abundance: 0.518398, mass: 106.905091626, number: 107, symbol: Ag} +- {abundance: 0.481618, mass: 108.904755314, number: 109, symbol: Ag} +48: +- {abundance: 0.287342, mass: 113.9033650943, number: 114, symbol: Cd} +- {abundance: 0.241321, mass: 111.902762876, number: 112, symbol: Cd} +- {abundance: 0.128012, mass: 110.9041828761, number: 111, symbol: Cd} +- {abundance: 0.124918, mass: 109.9030066161, number: 110, symbol: Cd} +- {abundance: 0.122212, mass: 112.9044081345, number: 113, symbol: Cd} +- {abundance: 0.074918, mass: 115.9047631517, number: 116, symbol: Cd} +- {abundance: 0.01256, mass: 105.906459912, number: 106, symbol: Cd} +- {abundance: 0.00893, mass: 107.904183412, number: 108, symbol: Cd} +49: +- {abundance: 0.95715, mass: 114.90387877612, number: 115, symbol: In} +- {abundance: 0.04295, mass: 112.9040618491, number: 113, symbol: In} +50: +- {abundance: 0.32589, mass: 119.9022016397, number: 120, symbol: Sn} +- {abundance: 0.24229, mass: 117.9016065754, number: 118, symbol: Sn} +- {abundance: 0.14549, mass: 115.901742801, number: 116, symbol: Sn} +- {abundance: 0.08594, mass: 118.9033111778, number: 119, symbol: Sn} +- {abundance: 0.07687, mass: 116.9029539852, number: 117, symbol: Sn} +- {abundance: 0.05795, mass: 123.905276611, number: 124, symbol: Sn} +- {abundance: 0.04633, mass: 121.903443826, number: 122, symbol: Sn} +- {abundance: 0.00971, mass: 111.9048238761, number: 112, symbol: Sn} +- {abundance: 0.00661, mass: 113.90278271, number: 114, symbol: Sn} +- {abundance: 0.00341, mass: 114.90334469916, number: 115, symbol: Sn} +51: +- {abundance: 0.57215, mass: 120.90381203, number: 121, symbol: Sb} +- {abundance: 0.42795, mass: 122.904213223, number: 123, symbol: Sb} +52: +- {abundance: 0.340862, mass: 129.90622274812, number: 130, symbol: Te} +- {abundance: 0.31748, mass: 127.9044612893, number: 128, symbol: Te} +- {abundance: 0.188425, mass: 125.903310916, number: 126, symbol: Te} +- {abundance: 0.070715, mass: 124.904429916, number: 125, symbol: Te} +- {abundance: 0.047414, mass: 123.902817116, number: 124, symbol: Te} +- {abundance: 0.025512, mass: 121.903043516, number: 122, symbol: Te} +- {abundance: 0.00893, mass: 122.904269816, number: 123, symbol: Te} +- {abundance: 0.00091, mass: 119.904059333, number: 120, symbol: Te} +53: +- {abundance: 1.0, mass: 126.904471939, number: 127, symbol: I} +54: +- {abundance: 0.26908633, mass: 131.904155085656, number: 132, symbol: Xe} +- {abundance: 0.26400682, mass: 128.90478086116, number: 129, symbol: Xe} +- {abundance: 0.2123243, mass: 130.9050840624, number: 131, symbol: Xe} +- {abundance: 0.10435721, mass: 133.905394669, number: 134, symbol: Xe} +- {abundance: 0.08857344, mass: 135.90721448411, number: 136, symbol: Xe} +- {abundance: 0.04071013, mass: 129.9035093491, number: 130, symbol: Xe} +- {abundance: 0.0191028, mass: 127.903531011, number: 128, symbol: Xe} +- {abundance: 0.0009523, mass: 123.905892019, number: 124, symbol: Xe} +- {abundance: 0.0008902, mass: 125.904298338, number: 126, symbol: Xe} +55: +- {abundance: 1.0, mass: 132.90545196108, number: 133, symbol: Cs} +56: +- {abundance: 0.7169842, mass: 137.9052470031, number: 138, symbol: Ba} +- {abundance: 0.1123224, mass: 136.905827143, number: 137, symbol: Ba} +- {abundance: 0.0785424, mass: 135.9045757329, number: 136, symbol: Ba} +- {abundance: 0.0659212, mass: 134.9056883829, number: 135, symbol: Ba} +- {abundance: 0.0241718, mass: 133.904508183, number: 134, symbol: Ba} +- {abundance: 0.001061, mass: 129.906320728, number: 130, symbol: Ba} +- {abundance: 0.001011, mass: 131.905061111, number: 132, symbol: Ba} +57: +- {abundance: 0.999111971, mass: 138.906356324, number: 139, symbol: La} +- {abundance: 0.000888171, mass: 137.907114937, number: 138, symbol: La} +58: +- {abundance: 0.8845051, mass: 139.905443123, number: 140, symbol: Ce} +- {abundance: 0.1111451, mass: 141.909250429, number: 142, symbol: Ce} +- {abundance: 0.002512, mass: 137.90599111, number: 138, symbol: Ce} +- {abundance: 0.001852, mass: 135.9071292141, number: 136, symbol: Ce} +59: +- {abundance: 1.0, mass: 140.907657623, number: 141, symbol: Pr} +60: +- {abundance: 0.271524, mass: 141.90772902, number: 142, symbol: Nd} +- {abundance: 0.2379819, mass: 143.91009302, number: 144, symbol: Nd} +- {abundance: 0.1718932, mass: 145.91312262, number: 146, symbol: Nd} +- {abundance: 0.1217426, mass: 142.90982002, number: 143, symbol: Nd} +- {abundance: 0.0829312, mass: 144.91257932, number: 145, symbol: Nd} +- {abundance: 0.0575621, mass: 147.916899326, number: 148, symbol: Nd} +- {abundance: 0.0563828, mass: 149.920902218, number: 150, symbol: Nd} +61: +- {abundance: 0.0, mass: 144.912755933, number: 145, symbol: Pm} +- {abundance: 0.0, mass: 146.915145019, number: 147, symbol: Pm} +62: +- {abundance: 0.267516, mass: 151.919739718, number: 152, symbol: Sm} +- {abundance: 0.227529, mass: 153.92221692, number: 154, symbol: Sm} +- {abundance: 0.149918, mass: 146.914904419, number: 147, symbol: Sm} +- {abundance: 0.13827, mass: 148.917192118, number: 149, symbol: Sm} +- {abundance: 0.11241, mass: 147.914829219, number: 148, symbol: Sm} +- {abundance: 0.07381, mass: 149.917282918, number: 150, symbol: Sm} +- {abundance: 0.03077, mass: 143.912006521, number: 144, symbol: Sm} +63: +- {abundance: 0.52196, mass: 152.921238018, number: 153, symbol: Eu} +- {abundance: 0.47816, mass: 150.919857818, number: 151, symbol: Eu} +64: +- {abundance: 0.24847, mass: 157.924112317, number: 158, symbol: Gd} +- {abundance: 0.218619, mass: 159.927062418, number: 160, symbol: Gd} +- {abundance: 0.20479, mass: 155.922131217, number: 156, symbol: Gd} +- {abundance: 0.15652, mass: 156.923968617, number: 157, symbol: Gd} +- {abundance: 0.148012, mass: 154.922630517, number: 155, symbol: Gd} +- {abundance: 0.02183, mass: 153.920874117, number: 154, symbol: Gd} +- {abundance: 0.00201, mass: 151.919799518, number: 152, symbol: Gd} +65: +- {abundance: 1.0, mass: 158.925354719, number: 159, symbol: Tb} +66: +- {abundance: 0.2826054, mass: 163.92918192, number: 164, symbol: Dy} +- {abundance: 0.2547536, mass: 161.92680562, number: 162, symbol: Dy} +- {abundance: 0.2489642, mass: 162.92873832, number: 163, symbol: Dy} +- {abundance: 0.1888942, mass: 160.92694052, number: 161, symbol: Dy} +- {abundance: 0.0232918, mass: 159.92520462, number: 160, symbol: Dy} +- {abundance: 0.000953, mass: 157.924415931, number: 158, symbol: Dy} +- {abundance: 0.000563, mass: 155.924284717, number: 156, symbol: Dy} +67: +- {abundance: 1.0, mass: 164.930328821, number: 165, symbol: Ho} +68: +- {abundance: 0.3350336, mass: 165.930299522, number: 166, symbol: Er} +- {abundance: 0.2697818, mass: 167.932376722, number: 168, symbol: Er} +- {abundance: 0.228699, mass: 166.932054622, number: 167, symbol: Er} +- {abundance: 0.1491036, mass: 169.935470226, number: 170, symbol: Er} +- {abundance: 0.016013, mass: 163.92920882, number: 164, symbol: Er} +- {abundance: 0.001395, mass: 161.92878842, number: 162, symbol: Er} +69: +- {abundance: 1.0, mass: 168.934217922, number: 169, symbol: Tm} +70: +- {abundance: 0.320268, mass: 173.938866422, number: 174, symbol: Yb} +- {abundance: 0.216813, mass: 171.936385922, number: 172, symbol: Yb} +- {abundance: 0.1610363, mass: 172.938215122, number: 173, symbol: Yb} +- {abundance: 0.140914, mass: 170.936330222, number: 171, symbol: Yb} +- {abundance: 0.1299683, mass: 175.942576424, number: 176, symbol: Yb} +- {abundance: 0.0298239, mass: 169.934766422, number: 170, symbol: Yb} +- {abundance: 0.001233, mass: 167.933889622, number: 168, symbol: Yb} +71: +- {abundance: 0.9740113, mass: 174.94077522, number: 175, symbol: Lu} +- {abundance: 0.0259913, mass: 175.94268972, number: 176, symbol: Lu} +72: +- {abundance: 0.350816, mass: 179.94655702, number: 180, symbol: Hf} +- {abundance: 0.27287, mass: 177.94370582, number: 178, symbol: Hf} +- {abundance: 0.18609, mass: 176.94322772, number: 177, symbol: Hf} +- {abundance: 0.13622, mass: 178.94582322, number: 179, symbol: Hf} +- {abundance: 0.05267, mass: 175.941407622, number: 176, symbol: Hf} +- {abundance: 0.00161, mass: 173.940046128, number: 174, symbol: Hf} +73: +- {abundance: 0.999879932, mass: 180.94799582, number: 181, symbol: Ta} +- {abundance: 0.000120132, mass: 179.947464824, number: 180, symbol: Ta} +74: +- {abundance: 0.30642, mass: 183.9509309294, number: 184, symbol: W} +- {abundance: 0.284319, mass: 185.954362817, number: 186, symbol: W} +- {abundance: 0.265016, mass: 181.9482039491, number: 182, symbol: W} +- {abundance: 0.14314, mass: 182.950222759, number: 183, symbol: W} +- {abundance: 0.00121, mass: 179.94671082, number: 180, symbol: W} +75: +- {abundance: 0.62602, mass: 186.955750116, number: 187, symbol: Re} +- {abundance: 0.37402, mass: 184.952954513, number: 185, symbol: Re} +76: +- {abundance: 0.407819, mass: 191.961477029, number: 192, symbol: Os} +- {abundance: 0.26262, mass: 189.958443717, number: 190, symbol: Os} +- {abundance: 0.16155, mass: 188.958144217, number: 189, symbol: Os} +- {abundance: 0.13248, mass: 187.955835216, number: 188, symbol: Os} +- {abundance: 0.01962, mass: 186.955747416, number: 187, symbol: Os} +- {abundance: 0.01593, mass: 185.953835016, number: 186, symbol: Os} +- {abundance: 0.00021, mass: 183.952488514, number: 184, symbol: Os} +77: +- {abundance: 0.6272, mass: 192.962921621, number: 193, symbol: Ir} +- {abundance: 0.3732, mass: 190.960589321, number: 191, symbol: Ir} +78: +- {abundance: 0.337824, mass: 194.96479171, number: 195, symbol: Pt} +- {abundance: 0.32864, mass: 193.96268091, number: 194, symbol: Pt} +- {abundance: 0.252134, mass: 195.9649520999, number: 196, symbol: Pt} +- {abundance: 0.0735613, mass: 197.967894923, number: 198, symbol: Pt} +- {abundance: 0.0078224, mass: 191.961038732, number: 192, symbol: Pt} +- {abundance: 0.000122, mass: 189.959929763, number: 190, symbol: Pt} +79: +- {abundance: 1.0, mass: 196.9665687971, number: 197, symbol: Au} +80: +- {abundance: 0.298626, mass: 201.9706434069, number: 202, symbol: Hg} +- {abundance: 0.231019, mass: 199.9683265947, number: 200, symbol: Hg} +- {abundance: 0.168722, mass: 198.9682806446, number: 199, symbol: Hg} +- {abundance: 0.13189, mass: 200.9703028469, number: 201, symbol: Hg} +- {abundance: 0.09972, mass: 197.9667686052, number: 198, symbol: Hg} +- {abundance: 0.068715, mass: 203.9734939853, number: 204, symbol: Hg} +- {abundance: 0.00151, mass: 195.965832632, number: 196, symbol: Hg} +81: +- {abundance: 0.70481, mass: 204.974427814, number: 205, symbol: Tl} +- {abundance: 0.29521, mass: 202.972344614, number: 203, symbol: Tl} +82: +- {abundance: 0.5241, mass: 207.976652513, number: 208, symbol: Pb} +- {abundance: 0.2411, mass: 205.974465713, number: 206, symbol: Pb} +- {abundance: 0.2211, mass: 206.975897313, number: 207, symbol: Pb} +- {abundance: 0.0141, mass: 203.973044013, number: 204, symbol: Pb} +83: +- {abundance: 1.0, mass: 208.980399116, number: 209, symbol: Bi} +84: +- {abundance: 0.0, mass: 208.98243082, number: 209, symbol: Po} +- {abundance: 0.0, mass: 209.982874113, number: 210, symbol: Po} +85: +- {abundance: 0.0, mass: 209.987147983, number: 210, symbol: At} +- {abundance: 0.0, mass: 210.98749663, number: 211, symbol: At} +86: +- {abundance: 0.0, mass: 210.990601173, number: 211, symbol: Rn} +- {abundance: 0.0, mass: 220.011394123, number: 220, symbol: Rn} +- {abundance: 0.0, mass: 222.017578225, number: 222, symbol: Rn} +87: +- {abundance: 0.0, mass: 223.019736025, number: 223, symbol: Fr} +88: +- {abundance: 0.0, mass: 223.018502327, number: 223, symbol: Ra} +- {abundance: 0.0, mass: 224.020212023, number: 224, symbol: Ra} +- {abundance: 0.0, mass: 226.025410325, number: 226, symbol: Ra} +- {abundance: 0.0, mass: 228.031070726, number: 228, symbol: Ra} +89: +- {abundance: 0.0, mass: 227.027752325, number: 227, symbol: Ac} +90: +- {abundance: 1.0, mass: 232.038055821, number: 232, symbol: Th} +- {abundance: 0.0, mass: 230.033134119, number: 230, symbol: Th} +91: +- {abundance: 1.0, mass: 231.035884224, number: 231, symbol: Pa} +92: +- {abundance: 0.9927421, mass: 238.05078842, number: 238, symbol: U} +- {abundance: 0.0072046, mass: 235.043930119, number: 235, symbol: U} +- {abundance: 5.45e-05, mass: 234.040952319, number: 234, symbol: U} +- {abundance: 0.0, mass: 233.039635529, number: 233, symbol: U} +- {abundance: 0.0, mass: 236.045568219, number: 236, symbol: U} +93: +- {abundance: 0.0, mass: 236.04657054, number: 236, symbol: Np} +- {abundance: 0.0, mass: 237.048173619, number: 237, symbol: Np} +94: +- {abundance: 0.0, mass: 238.049560119, number: 238, symbol: Pu} +- {abundance: 0.0, mass: 239.052163619, number: 239, symbol: Pu} +- {abundance: 0.0, mass: 240.053813819, number: 240, symbol: Pu} +- {abundance: 0.0, mass: 241.056851719, number: 241, symbol: Pu} +- {abundance: 0.0, mass: 242.05874282, number: 242, symbol: Pu} +- {abundance: 0.0, mass: 244.064205356, number: 244, symbol: Pu} +95: +- {abundance: 0.0, mass: 241.056829319, number: 241, symbol: Am} +- {abundance: 0.0, mass: 243.061381324, number: 243, symbol: Am} +96: +- {abundance: 0.0, mass: 243.061389322, number: 243, symbol: Cm} +- {abundance: 0.0, mass: 244.062752819, number: 244, symbol: Cm} +- {abundance: 0.0, mass: 245.065491522, number: 245, symbol: Cm} +- {abundance: 0.0, mass: 246.067223822, number: 246, symbol: Cm} +- {abundance: 0.0, mass: 247.070354147, number: 247, symbol: Cm} +- {abundance: 0.0, mass: 248.072349956, number: 248, symbol: Cm} +97: +- {abundance: 0.0, mass: 247.070307359, number: 247, symbol: Bk} +- {abundance: 0.0, mass: 249.074987727, number: 249, symbol: Bk} +98: +- {abundance: 0.0, mass: 249.074853923, number: 249, symbol: Cf} +- {abundance: 0.0, mass: 250.076406222, number: 250, symbol: Cf} +- {abundance: 0.0, mass: 251.079588648, number: 251, symbol: Cf} +- {abundance: 0.0, mass: 252.081627256, number: 252, symbol: Cf} +99: +- {abundance: 0.0, mass: 252.08298054, number: 252, symbol: Es} +100: +- {abundance: 0.0, mass: 257.095106169, number: 257, symbol: Fm} +101: +- {abundance: 0.0, mass: 258.09843155, number: 258, symbol: Md} +- {abundance: 0.0, mass: 260.1036534, number: 260, symbol: Md} +102: +- {abundance: 0.0, mass: 259.1010311, number: 259, symbol: 'No'} +103: +- {abundance: 0.0, mass: 262.1096122, number: 262, symbol: Lr} +104: +- {abundance: 0.0, mass: 267.1217962, number: 267, symbol: Rf} +105: +- {abundance: 0.0, mass: 268.1256757, number: 268, symbol: Db} +106: +- {abundance: 0.0, mass: 271.1339363, number: 271, symbol: Sg} +107: +- {abundance: 0.0, mass: 272.1382658, number: 272, symbol: Bh} +108: +- {abundance: 0.0, mass: 270.1342927, number: 270, symbol: Hs} +109: +- {abundance: 0.0, mass: 276.1515959, number: 276, symbol: Mt} +110: +- {abundance: 0.0, mass: 281.1645159, number: 281, symbol: Ds} +111: +- {abundance: 0.0, mass: 280.1651461, number: 280, symbol: Rg} +112: +- {abundance: 0.0, mass: 285.177126, number: 285, symbol: Cn} +113: +- {abundance: 0.0, mass: 284.1787362, number: 284, symbol: Uut} +114: +- {abundance: 0.0, mass: 289.190426, number: 289, symbol: Fl} +115: +- {abundance: 0.0, mass: 288.1927462, number: 288, symbol: Uup} +116: +- {abundance: 0.0, mass: 293.204496, number: 293, symbol: Lv} +117: +- {abundance: 0.0, mass: 292.2074675, number: 292, symbol: Uus} +118: +- {abundance: 0.0, mass: 294.2139271, number: 294, symbol: Uuo} diff --git a/moldesign/_tests/molecule_fixtures.py b/moldesign/_tests/molecule_fixtures.py index 44bd31e..9c0dc94 100644 --- a/moldesign/_tests/molecule_fixtures.py +++ b/moldesign/_tests/molecule_fixtures.py @@ -63,45 +63,68 @@ def random_atoms_from_3aid(pdb3aid): @pytest.fixture(scope='session') -def create_small_molecule(): +def cached_small_molecule(): mol = mdt.from_smiles('CNCOS(=O)C') mol.positions += 0.001*np.random.random(mol.positions.shape)*u.angstrom # move out of minimum return mol @pytest.fixture -def small_molecule(create_small_molecule): - return create_small_molecule.copy() +def small_molecule(cached_small_molecule): + return cached_small_molecule.copy() -@pytest.fixture() -def benzene(): +@pytest.fixture(scope='session') +def cached_benzene(): return mdt.from_smiles('c1ccccc1') +@pytest.fixture +def benzene(cached_benzene): + return cached_benzene.copy() + + @typedfixture('molecule') def h2(): - atom1 = mdt.Atom('H') - atom1.x = 0.5 * u.angstrom - atom2 = mdt.Atom(atnum=1) - atom2.position = [-0.5, 0.0, 0.0] * u.angstrom - h2 = mdt.Molecule([atom1, atom2], name='h2') - atom1.bond_to(atom2, 1) - return h2 + mol = mdt.Molecule([mdt.Atom('H1'), + mdt.Atom('H2')]) + mol.atoms[0].x = 0.5 * u.angstrom + mol.atoms[1].x = -0.25 * u.angstrom + mol.atoms[0].bond_to(mol.atoms[1], 1) + return mol -@pytest.fixture -def ethylene(): +@typedfixture('molecule') +def heh_plus(): + mol = mdt.Molecule([mdt.Atom('H'), + mdt.Atom('He')]) + mol.atoms[1].z = 1.0 * u.angstrom + mol.charge = 1 * u.q_e + mol.atoms[0].bond_to(mol.atoms[1], 1) + return mol + + +@pytest.fixture(scope='session') +def cached_ethylene(): return mdt.from_smiles('C=C') +@pytest.fixture +def ethylene(cached_ethylene): + return cached_ethylene.copy() + + @pytest.fixture(scope='session') +def cached_pdb1yu8(): + return mdt.read(get_data_path('1yu8.pdb')) + +@pytest.fixture def pdb1yu8(): return mdt.read(get_data_path('1yu8.pdb')) -@pytest.fixture() -def mol_from_xyz(): +@pytest.fixture(scope='session') +def cached_mol_from_xyz(): return mdt.read("""43 c1.pdb C -1.21700 1.04300 2.45300 @@ -150,8 +173,13 @@ def mol_from_xyz(): """, format='xyz') -@pytest.fixture() -def mol_from_sdf(): +@pytest.fixture +def mol_from_xyz(cached_mol_from_xyz): + return cached_mol_from_xyz.copy() + + +@pytest.fixture(scope='session') +def cached_mol_from_sdf(): return mdt.read(""" OpenBabel02271712493D @@ -181,6 +209,10 @@ def mol_from_sdf(): $$$$ """, format='sdf') +@pytest.fixture +def mol_from_sdf(cached_mol_from_sdf): + return cached_mol_from_sdf.copy() + @typedfixture('molecule') def nucleic(): @@ -193,46 +225,46 @@ def nucleic(): # Molecules with forcefields assigned - these use a session-scoped constructor w/ a copy factory @pytest.fixture(scope='session') -def create_parameterize_zeros(create_small_molecule): - return _param_small_mol(create_small_molecule.copy(), 'zero') +def cached_mol_parameterized_with_zeros(cached_small_molecule): + return _param_small_mol(cached_small_molecule.copy(), 'zero') @typedfixture('hasmodel') -def parameterize_zeros(create_parameterize_zeros): - return create_parameterize_zeros.copy() +def mol_with_zerocharge_params(cached_mol_parameterized_with_zeros): + return cached_mol_parameterized_with_zeros.copy() @pytest.fixture(scope='session') -def create_parameterize_am1bcc(create_small_molecule): +def cached_mol_parameterized_with_am1bcc(cached_small_molecule): """ We don't use this fixture directly, rather use another fixture that copies these results so that we don't have to repeatedly call tleap/antechamber """ - return _param_small_mol(create_small_molecule.copy(), 'am1-bcc') + return _param_small_mol(cached_small_molecule.copy(), 'am1-bcc') @typedfixture('hasmodel') -def parameterize_am1bcc(create_parameterize_am1bcc): - return create_parameterize_am1bcc.copy() +def mol_with_am1bcc_params(cached_mol_parameterized_with_am1bcc): + return cached_mol_parameterized_with_am1bcc.copy() @pytest.fixture(scope='session') -def create_gaff_model_gasteiger(create_small_molecule): +def cached_mol_parameterized_with_gasteiger(cached_small_molecule): """ We don't use this fixture directly, rather use another fixture that copies these results so that we don't have to repeatedly call tleap/antechamber """ - mol = create_small_molecule.copy() + mol = cached_small_molecule.copy() mol.set_energy_model(mdt.models.GaffSmallMolecule, partial_charges='gasteiger') mol.energy_model.prep() return mol @typedfixture('hasmodel') -def gaff_model_gasteiger(create_gaff_model_gasteiger): - return create_gaff_model_gasteiger.copy() +def mol_with_gast_params(cached_mol_parameterized_with_gasteiger): + return cached_mol_parameterized_with_gasteiger.copy() -def _param_small_mol(create_small_molecule, chargemodel): - mol = create_small_molecule.copy() +def _param_small_mol(cached_small_molecule, chargemodel): + mol = cached_small_molecule.copy() params = mdt.create_ff_parameters(mol, charges=chargemodel, baseff='gaff2') params.assign(mol) mol.set_energy_model(mdt.models.ForceField) @@ -240,11 +272,11 @@ def _param_small_mol(create_small_molecule, chargemodel): @pytest.fixture(scope='session') -def create_protein_default_amber_forcefield(pdb1yu8): +def cached_protein_with_default_amber_ff(cached_pdb1yu8): """ We don't use this fixture directly, rather use another fixture that copies these results so that we don't have to repeatedly call tleap/antechamber """ - mol = pdb1yu8 + mol = cached_pdb1yu8 ff = mdt.forcefields.DefaultAmber() newmol = ff.create_prepped_molecule(mol) newmol.set_energy_model(mdt.models.ForceField) @@ -252,5 +284,5 @@ def create_protein_default_amber_forcefield(pdb1yu8): @typedfixture('hasmodel') -def protein_default_amber_forcefield(create_protein_default_amber_forcefield): - return create_protein_default_amber_forcefield.copy() +def protein_default_amber_forcefield(cached_protein_with_default_amber_ff): + return cached_protein_with_default_amber_ff.copy() diff --git a/moldesign/_tests/object_fixtures.py b/moldesign/_tests/object_fixtures.py index dc91ad3..230beec 100644 --- a/moldesign/_tests/object_fixtures.py +++ b/moldesign/_tests/object_fixtures.py @@ -16,8 +16,7 @@ from .molecule_fixtures import * -registered_types = {} -registered_types.update(molecule_standards) +registered_types = {key:val[:] for key,val in molecule_standards.items()} def typedfixture(*types, **kwargs): """This is a decorator that lets us associate fixtures with one or more arbitrary types. diff --git a/moldesign/_tests/test_atom_bond_computed_properties.py b/moldesign/_tests/test_atom_bond_computed_properties.py index db538cb..b2307d6 100644 --- a/moldesign/_tests/test_atom_bond_computed_properties.py +++ b/moldesign/_tests/test_atom_bond_computed_properties.py @@ -3,11 +3,12 @@ import moldesign as mdt from moldesign import units as u -from .test_ambertools_xface import protein_default_amber_forcefield, pdb1yu8 from .molecule_fixtures import * +from . import helpers from .test_qm_xfaces import h2_rhfwfn + def test_forcefield_atom_term_access(protein_default_amber_forcefield): mol = protein_default_amber_forcefield for atom in mol.atoms: @@ -61,6 +62,11 @@ def test_atom_property_access_to_mulliken_charges(h2_rhfwfn): assert abs(atom.properties.mulliken) <= 1e-5 * u.q_e +def test_atomic_forces(h2_rhfwfn): + mol = h2_rhfwfn + helpers.assert_almost_equal(mol.atoms[0].force, -mol.atoms[1].force) + + def test_atom_properties_are_empty_dict_if_nothings_computed(h2): empty = mdt.utils.DotDict() for atom in h2.atoms: diff --git a/moldesign/_tests/test_atom_containers.py b/moldesign/_tests/test_atom_containers.py index a326499..d791112 100644 --- a/moldesign/_tests/test_atom_containers.py +++ b/moldesign/_tests/test_atom_containers.py @@ -84,7 +84,10 @@ def test_pairwise_distance_arrays(f1, f2, request): o1 = request.getfixturevalue(f1) o2 = request.getfixturevalue(f2) - array = o1.calc_distance_array(o2) + if isinstance(o1, mdt.Atom): + array = np.array([o1.calc_distance(o2)]) + else: + array = o1.calc_distance_array(o2) if o1.num_atoms * o2.num_atoms > 250: # stochastically test larger matrices pairs = ((random.randrange(0, o1.num_atoms), random.randrange(0, o2.num_atoms)) diff --git a/moldesign/_tests/test_atoms.py b/moldesign/_tests/test_atoms.py new file mode 100644 index 0000000..8efc4d5 --- /dev/null +++ b/moldesign/_tests/test_atoms.py @@ -0,0 +1,250 @@ +import pytest + +import moldesign as mdt +from .molecule_fixtures import * +from . import helpers + +__PYTEST_MARK__ = 'internal' + + +def test_create_atom_with_element_as_name(): + he_plus = mdt.Atom("He", position=np.ones(3) * u.nm, formal_charge=1*u.q_e) + assert he_plus.atomic_number == 2 + helpers.assert_almost_equal(he_plus.position, 10 * np.ones(3) * u.angstrom) + assert he_plus.name == 'He' + assert he_plus.element == 'He' + assert he_plus.formal_charge == 1 * u.q_e + + +def test_create_atom_with_uppercase_element(): + cl_minus = mdt.Atom("CL2", element='CL', formal_charge=-1) + assert cl_minus.formal_charge == -1 * u.q_e + assert cl_minus.atnum == 17 + assert cl_minus.name == 'CL2' + assert cl_minus.element == 'Cl' + + +def test_create_atom_with_unrelated_name(): + carbon_named_bob = mdt.Atom("bob", element='c') + assert carbon_named_bob.name == 'bob' + assert carbon_named_bob.atnum == 6 + assert carbon_named_bob.element == 'C' + + +def test_create_atom_with_element_name_as_first_char(): + carbon_alpha = mdt.Atom('ca') + assert carbon_alpha.name == 'ca' + assert carbon_alpha.atnum == 6 + assert carbon_alpha.element == 'C' + + +def test_create_atom_with_atomic_number(): + h = mdt.Atom('ca', atnum=1) + assert h.atnum == 1 + assert h.element == 'H' + assert h.name == 'ca' + + with pytest.raises(AssertionError): + mdt.Atom('ca', atnum=1, element='He') + + +def test_add_atom(h2): + newatom = mdt.Atom('Xe') + h2.add_atom(newatom) + assert newatom in h2.atoms + assert h2.num_atoms == 3 + assert h2.num_residues == 1 + assert h2.num_chains == 1 + assert newatom.residue is h2.residues[0] + assert newatom.chain is h2.chains[0] + + +def test_add_already_owned_atoms(h2): + h2cpy = h2.copy() + h2cpy.translate([10, 11, 12] * u.angstrom) + + h2.add_atoms(h2cpy.atoms) + + assert h2.num_atoms == 4 + assert h2cpy.num_atoms == 2 + + for atom in h2cpy.atoms: + assert atom.molecule is h2cpy + assert atom.residue is h2cpy.residues[0] + assert atom.chain is h2cpy.chains[0] + + for newatom in h2.atoms[2:]: + assert newatom.molecule is h2 + assert newatom.residue is h2.residues[1] + assert newatom.chain is h2.chains[1] + + helpers.assert_almost_equal(h2.positions[2:], + h2cpy.positions) + + +def test_atom_shortstr_3aid(pdb3aid): + assert pdb3aid.atoms[10]._shortstr() == 'CA #10 in A.GLN2' + + +def test_atom_shortstr_benzene(benzene): + for atom in benzene.atoms: + assert atom._shortstr() == '%s #%s' % (atom.name, atom.index) + + +def test_atom_bond_iterators(benzene): + atom = benzene.atoms[0] + assert atom.atnum == 6 # sanity check + atombonds = benzene.bond_graph[atom] + + assert set(atom.bonded_atoms) == set(atombonds.keys()) + assert atom.bond_graph == atombonds + + assert len(atom.heavy_bonds) == 2 + heavybonds = sorted(atom.heavy_bonds, key=lambda x:x.order) + assert len(heavybonds) == 2 + assert heavybonds[0].order == 1 and heavybonds[1].order == 2 + + assert len(atom.bonds) == 3 + hbond, csingle, cdouble = False, False, False + for bond in atom.bonds: + assert bond.a1 is atom # since "atom" has the lowest index, it's always a1 + assert bond.partner(atom) is bond.a2 + + if bond.a2.atnum == 1: + assert hbond is False + hbond = True + assert bond.order == 1 + assert len(bond.a2.heavy_bonds) == 0 + else: + assert bond.a2.atnum == 6 + if bond.order == 1: + assert csingle is False + csingle = True + assert bond == heavybonds[0] + else: + assert cdouble is False + cdouble = True + assert bond.order == 2 + assert bond == heavybonds[1] + + assert hbond and csingle and cdouble + + +def test_atom_bond_to(pdb3aid): + mol = pdb3aid + a1, a2 = mol.atoms[3], mol.atoms[321] # arbitrary atom indices + mol.ff = 'arglebargle' # changing topology should reset this + nbonds = mol.num_bonds + assert a1 not in mol.bond_graph[a2] + + a1.bond_to(a2, 3) + + assert mol.num_bonds == nbonds + 1 + assert mol.ff is None + assert_consistent_bond(mol, a1, a2, 3) + + +def test_delete_bond_with_atoms(pdb3aid): + mol = pdb3aid + a1 = mol.atoms[35] # arbitrary atom + mol.ff = 'arglebargle' # changing topology should reset this + nbonds = mol.num_bonds + a2 = a1.bonded_atoms[-1] + + mol.delete_bond(a1, a2) + + assert mol.num_bonds == nbonds - 1 + assert_not_bonded(mol, a1, a2) + + +def test_delete_bond(pdb3aid): + mol = pdb3aid + a1 = mol.atoms[89] # arbitrary atom + mol.ff = 'arglebargle' # changing topology should reset this + nbonds = mol.num_bonds + a2 = a1.bonded_atoms[-1] + bond = mdt.Bond(a1, a2) + + mol.delete_bond(bond) + + assert mol.num_bonds == nbonds - 1 + assert_not_bonded(mol, a1, a2) + + +def test_bond_object_automatically_associates_with_molecule(pdb3aid): + mol = pdb3aid + a1 = mol.atoms[511] # arbitrary atom + bond_from_atom = a1.bonds[-1] + a2 = bond_from_atom.partner(a1) + + created_bond = mdt.Bond(a2, a1) + + assert created_bond == bond_from_atom + assert created_bond.order == mol.bond_graph[a1][a2] + assert bond_from_atom.order == mol.bond_graph[a1][a2] + assert created_bond.molecule is mol + assert bond_from_atom.molecule is mol + + +def test_change_bond_order_with_bond_object(pdb3aid): + mol = pdb3aid + a1 = mol.atoms[511] # arbitrary atom + bond = a1.bonds[-1] + a2 = bond.partner(a1) + + other_bond_object = mdt.Bond(a1, a2) + oldorder = bond.order + assert oldorder == other_bond_object.order + assert a1.bond_graph[a2] == oldorder + + # First raise the order + bond.order += 1 + assert_consistent_bond(mol, a1, a2, oldorder+1) + + # Now delete the bond by setting it to None + bond.order = None + assert_not_bonded(mol, a1, a2) + + +def test_bond_object_for_unbonded_atoms(pdb3aid): + mol = pdb3aid + a1 = mol.atoms[1] + a2 = mol.atoms[-10] + bond = mdt.Bond(a1, a2) + assert bond.order is None + + bond.order = 2 + assert_consistent_bond(mol, a1, a2, 2) + + +def assert_not_bonded(mol, a1, a2): + assert mol.ff is None + assert a1 not in a2.bond_graph + assert a1 not in a2.bonded_atoms + assert a2 not in a1.bond_graph + assert a2 not in a1.bonded_atoms + assert a2 not in mol.bond_graph[a1] + assert a1 not in mol.bond_graph[a2] + + +def assert_consistent_bond(mol, a1, a2, order): + assert a1 in a2.bond_graph + assert a2 in a1.bond_graph + for bond in a1.bonds: + if bond.a2 is a2: + assert bond.order == order + break + else: + assert False, "bond not found in atom.bonds" + + for bond in a2.bonds: + if bond.a1 is a1: + assert bond.order == order + break + else: + assert False, "bond not found in atom.bonds" + + assert a1.bond_graph[a2] == a2.bond_graph[a1] == order + assert mol.bond_graph[a1][a2] == mol.bond_graph[a2][a1] == order + + diff --git a/moldesign/_tests/test_constraints.py b/moldesign/_tests/test_constraints.py index 032aa5a..a9087be 100644 --- a/moldesign/_tests/test_constraints.py +++ b/moldesign/_tests/test_constraints.py @@ -164,8 +164,8 @@ def test_dihedral_constraint_errors_at_0(four_particle_45_twist): np.testing.assert_allclose(constraint.error().value_in(u.degrees), -angle.magnitude) -def test_cannot_add_duplicate_constraints(parameterize_zeros): - mol = parameterize_zeros +def test_cannot_add_duplicate_constraints(mol_with_zerocharge_params): + mol = mol_with_zerocharge_params mol.constrain_distance(*mol.atoms[:2]) mol.constrain_angle(*mol.atoms[:3]) mol.constrain_dihedral(*mol.atoms[:4]) @@ -189,3 +189,104 @@ def test_cannot_add_duplicate_constraints(parameterize_zeros): mol.constrain_angle(*mol.atoms[1:4]) mol.constrain_dihedral(*mol.atoms[1:5]) mol.constrain_atom(mol.atoms[1]) + + +def test_degrees_of_freedom_with_constraints(benzene, h2): + water = mdt.from_smiles('[OH2]') + water.residues[0].resname = 'HOH' + + benzene.residues[0].resname = 'UNL' + h2.residues[0].resname = 'H2' + + mol = benzene.combine(h2, water, water) + + assert mol.num_atoms == 20 # assert I can count + assert mol.dynamic_dof == 3 * mol.num_atoms + + full_dof = mol.dynamic_dof + + def _constrain_direction(): + c = mdt.geom.constraints.FixedCoordinate(mol.atoms[0], np.array([0.0, 0, 1.0])) + mol.constraints.append(c) + + for constrainer, num_dof, args in ((mol.constrain_distance, 1, mol.atoms[:2]), + (mol.constrain_angle, 1, mol.atoms[:3]), + (mol.constrain_dihedral, 1, mol.atoms[:4]), + (mol.constrain_atom, 3, mol.atoms[:1]), + (mol.constrain_hbonds, 11, [True]), + (_constrain_direction, 1, [])): + constrainer(*args) + assert mol.dynamic_dof == full_dof-num_dof + mol.clear_constraints() + assert mol.dynamic_dof == full_dof + + +def test_degrees_of_freedom_with_integrator_constraints(benzene): + water = mdt.from_smiles('[OH2]') + water.residues[0].resname = 'HOH' + mol = benzene.combine(water) + + full_dof = mol.dynamic_dof + assert full_dof == 3 * mol.num_atoms + + mol.set_energy_model(mdt.models.OpenMMPotential) + mol.set_integrator(mdt.integrators.OpenMMLangevin) + + mol.integrator.params.remove_translation = False + mol.integrator.params.remove_rotation = False + mol.integrator.params.constrain_hbonds = False + mol.integrator.params.constrain_water = False + assert mol.dynamic_dof == full_dof + + mol.integrator.params.constrain_hbonds = True + mol.integrator.params.constrain_water = False + assert mol.dynamic_dof == full_dof - 8 + + mol.integrator.params.constrain_hbonds = False + mol.integrator.params.constrain_water = True + assert mol.dynamic_dof == full_dof - 3 + + mol.integrator.params.constrain_hbonds = True + mol.integrator.params.constrain_water = True + assert mol.dynamic_dof == full_dof - 9 + + mol.clear_constraints() # does nothing, constraints are in the integrator + assert mol.dynamic_dof == full_dof - 9 + + mol.integrator.params.constrain_hbonds = False + mol.integrator.params.constrain_water = False + assert mol.dynamic_dof == full_dof + + mol.integrator.params.remove_translation = True + mol.integrator.params.remove_rotation = True + mol.integrator.params.constrain_hbonds = True + mol.integrator.params.constrain_water = True + assert mol.dynamic_dof == full_dof - 9 - 6 + + + +def test_degrees_of_freedom_with_integrator_reorientation(pdb3aid): + mol = pdb3aid + + full_dof = mol.dynamic_dof + + mol.set_energy_model(mdt.models.OpenMMPotential) + mol.set_integrator(mdt.integrators.OpenMMLangevin) + + mol.integrator.params.remove_translation = False + mol.integrator.params.remove_rotation = False + mol.integrator.params.constrain_hbonds = False + mol.integrator.params.constrain_water = False + assert mol.dynamic_dof == full_dof + + mol.integrator.params.remove_translation = False + mol.integrator.params.remove_rotation = True + assert mol.dynamic_dof == full_dof - 3 + + mol.integrator.params.remove_translation = True + mol.integrator.params.remove_rotation = False + assert mol.dynamic_dof == full_dof - 3 + + mol.integrator.params.remove_translation = True + mol.integrator.params.remove_rotation = True + assert mol.dynamic_dof == full_dof - 6 diff --git a/moldesign/_tests/test_copies.py b/moldesign/_tests/test_copies.py index a71ef1d..7b0019b 100644 --- a/moldesign/_tests/test_copies.py +++ b/moldesign/_tests/test_copies.py @@ -1,4 +1,6 @@ from builtins import zip + +import itertools import numpy as np import pytest @@ -81,7 +83,7 @@ def test_copy_atoms(fixture_key, request): oldatoms = oldobj.atoms atoms = oldatoms.copy_atoms() assert isinstance(atoms, mdt.AtomList) - _test_copy_integrity(atoms, oldatoms) + assert_atom_copy_integrity(atoms, oldatoms) @pytest.mark.parametrize('fixture_key', @@ -93,25 +95,46 @@ def test_copy_atoms_in_containers(fixture_key, request): newobj = oldobj.copy() assert newobj.__class__ is oldobj.__class__ - _test_copy_integrity(newobj.atoms, oldobj.atoms) - - -def _test_copy_integrity(atoms, oldatoms): - for old, new in zip(oldatoms, atoms): - assert (old.position == new.position).all() - assert (old.velocity == new.velocity).all() - - new.x += 1.0 * u.angstrom - assert all((old.position == new.position) == [False, True, True]) - - new.px += 1.0 * u.angstrom * u.amu / u.fs - assert all((old.momentum == new.momentum) == [False, True, True]) - - assert old.residue is not new.residue - assert old.residue.name == new.residue.name - assert old.chain is not new.chain - assert old.chain.name == new.chain.name - assert (old.molecule is new.molecule is None) or (old.molecule is not new.molecule) + assert_atom_copy_integrity(newobj.atoms, oldobj.atoms) + + +def assert_atom_copy_integrity(atoms, oldatoms): + newmol = atoms[0].molecule + assert len(atoms) == len(oldatoms) + for oldatom, atom in itertools.zip_longest(oldatoms, atoms): + assert oldatom.name == atom.name + assert oldatom.atnum == atom.atnum + assert oldatom.mass == atom.mass + assert (oldatom.position == atom.position).all() + assert (oldatom.velocity == atom.velocity).all() + + atom.x += 1.0 * u.angstrom + assert all((oldatom.position == atom.position) == [False, True, True]) + + atom.px += 1.0 * u.angstrom * u.amu / u.fs + assert all((oldatom.momentum == atom.momentum) == [False, True, True]) + + assert oldatom.residue is not atom.residue + assert oldatom.residue.name == atom.residue.name + assert oldatom.chain is not atom.chain + assert oldatom.chain.name == atom.chain.name + assert (oldatom.molecule is atom.molecule is None) or (oldatom.molecule is not atom.molecule) + + assert atom.molecule is newmol # might be None + + if oldatom.residue is not None: + assert atom.residue.molecule is newmol + assert atom in atom.residue + assert atom.residue[atom.name] is atom + if oldatom.chain is not None: + assert atom.chain.molecule is newmol + assert atom in atom.chain.atoms + assert atom.residue in atom.chain + assert atom.chain[atom.residue.name] is atom.residue + if newmol is not None: + assert atom.residue is newmol.residues[atom.residue.index] + assert atom.chain is newmol.chains[atom.chain.index] + assert atom is newmol.atoms[atom.index] @pytest.mark.parametrize('fixture_key', moldesign_objects) @@ -167,8 +190,8 @@ def test_chain_rename(pdb3aid): assert newmol.chains[1].name == 'B' -@pytest.mark.parametrize('molkey', ["create_parameterize_am1bcc", - "create_protein_default_amber_forcefield"]) +@pytest.mark.parametrize('molkey', ["cached_mol_parameterized_with_zeros", + "cached_protein_with_default_amber_ff"]) def test_forcefield_copied_with_molecule(molkey, request): mol = request.getfixturevalue(molkey) m2 = mol.copy() @@ -186,8 +209,8 @@ def test_forcefield_copied_with_molecule(molkey, request): assert p1.improper_types == p2.improper_types -def test_constraints_copied_with_molecule(parameterize_zeros): - mol = parameterize_zeros +def test_constraints_copied_with_molecule(mol_with_zerocharge_params): + mol = mol_with_zerocharge_params mol.constrain_distance(*mol.atoms[:2]) mol.constrain_angle(*mol.atoms[:3]) diff --git a/moldesign/_tests/test_molecules.py b/moldesign/_tests/test_molecules.py index aab2eb0..75c9674 100644 --- a/moldesign/_tests/test_molecules.py +++ b/moldesign/_tests/test_molecules.py @@ -8,8 +8,8 @@ import moldesign.utils.classes from .object_fixtures import * -from .test_ambertools_xface import protein_default_amber_forcefield -from .test_qm_xfaces import h2_rhfwfn +from .molecule_fixtures import * +from . import helpers __PYTEST_MARK__ = 'internal' # mark all tests in this module with this label (see ./conftest.py) @@ -48,6 +48,12 @@ def test_h2_array_link(h2): assert h2.atoms[1].py == 3.0*u.default.momentum +def test_benzene_orbital_numbers(benzene): + assert benzene.num_electrons == 42 + assert benzene.homo == 20 + assert benzene.lumo == 21 + + def test_h2_set_coord_slices(h2): mol = h2.copy() mol.positions[:] = np.zeros((2,3)) * u.angstrom @@ -228,6 +234,10 @@ def test_pickling(objkey, request): assert type(y) == type(obj) +def test_degrees_of_freedom(benzene): + assert benzene.dynamic_dof == 36 + + @pytest.mark.parametrize('objkey', registered_types['equality']) def test_pickled_equality(objkey, request): obj = request.getfixturevalue(objkey) @@ -245,8 +255,8 @@ def test_pickled_equality(objkey, request): def test_h2_positions(h2): atom1, atom2 = h2.atoms assert (atom1.position == np.array([0.5, 0.0, 0.0]) * u.angstrom).all() - assert atom2.x == -0.5 * u.angstrom - assert atom1.distance(atom2) == 1.0 * u.angstrom + assert atom2.x == -0.25 * u.angstrom + assert atom1.distance(atom2) == 0.75 * u.angstrom def test_h2(h2): @@ -302,11 +312,14 @@ def test_h2_trajectory(h2_trajectory): assert frame.positions[0, 0] < -0.1 * u.angstrom -def test_markdown_reprs_work(nucleic): +@pytest.mark.parametrize('molkey', ['nucleic', 'pdb3aid', 'h2', 'mol_with_zerocharge_params']) +@pytest.mark.screening +def test_markdown_reprs_work(molkey, request): # Not bothering to test the content, just want to make sure there's no error - assert isinstance(nucleic._repr_markdown_(), basestring) - assert isinstance(nucleic.atoms[0]._repr_markdown_(), basestring) - assert isinstance(nucleic.residues[0]._repr_markdown_(), basestring) + mol = request.getfixturevalue(molkey) + assert isinstance(mol._repr_markdown_(), basestring) + assert isinstance(mol.atoms[0]._repr_markdown_(), basestring) + assert isinstance(mol.residues[0]._repr_markdown_(), basestring) def test_dna_and_hydrogen_are_different(nucleic, h2): @@ -314,6 +327,15 @@ def test_dna_and_hydrogen_are_different(nucleic, h2): assert not nucleic.is_identical(h2) +def test_attribute_error_without_simulation_setup(h2): + with pytest.raises(AttributeError): + h2.calculate() + with pytest.raises(AttributeError): + h2.minimize() + with pytest.raises(AttributeError): + h2.run(500) + + def test_changing_momenta_and_positions_makes_mols_different(nucleic): n = nucleic.copy() assert nucleic.same_topology(n) @@ -363,3 +385,42 @@ def test_changing_bonds_makes_mols_different(nucleic): n.bond_graph[atom1][atom2] = n.bond_graph[atom2][atom1] = 1 assert not nucleic.is_identical(n, verbose=True) assert not nucleic.same_topology(n, verbose=True) + + +def test_charge_from_number(h2): + h2plus = mdt.Molecule(h2, charge=1) + assert h2plus.charge == 1 * u.q_e + + h2plus.charge = 2 * u.q_e + assert h2plus.charge == 2 * u.q_e + + h2plus.charge = 3 + assert h2plus.charge == 3 * u.q_e + + +def test_energy_model_charges(h2): + h2.charge = 1 * u.q_e + assert h2.charge == 1 * u.q_e + h2.set_energy_model(mdt.models.OpenMMPotential) + + assert h2.energy_model.params.charge == 1 * u.q_e + + h2.set_energy_model(mdt.models.OpenMMPotential, charge=3*u.q_e) + # TODO: test for warning here as well + assert h2.energy_model.params.charge == 3 * u.q_e + + +def test_initialization_charges(): + a1 = mdt.Atom('Na', formal_charge=-1) + mol = mdt.Molecule([a1]) + assert mol.charge == -1 * u.q_e + + with pytest.raises(TypeError): + mdt.Atom('H', charge=3) # it needs to be "formal_charge", to distinguish from partial charge + + m2 = mdt.Molecule([a1], charge=-1) + assert m2.charge == -1 * u.q_e + + m2 = mdt.Molecule([a1], charge=-3*u.q_e) + # TODO: test for warning + assert m2.charge == -3 * u.q_e diff --git a/moldesign/_tests/test_openmm_xface.py b/moldesign/_tests/test_openmm_xface.py index a67f76b..30c5d02 100644 --- a/moldesign/_tests/test_openmm_xface.py +++ b/moldesign/_tests/test_openmm_xface.py @@ -27,8 +27,8 @@ def protein(protein_default_amber_forcefield): @pytest.fixture -def small_mol(gaff_model_gasteiger): - mol = gaff_model_gasteiger +def small_mol(mol_with_gast_params): + mol = mol_with_gast_params mol.energy_model.params.compute_platform = 'cpu' mol.energy_model.params.num_cpus = 1 mol.minimize(force_tolerance=0.5*u.eV/u.angstrom) # perform a very partial minimization diff --git a/moldesign/_tests/test_pdbfixer_xface.py b/moldesign/_tests/test_pdbfixer_xface.py index 8c0e1b4..ec5dd5a 100644 --- a/moldesign/_tests/test_pdbfixer_xface.py +++ b/moldesign/_tests/test_pdbfixer_xface.py @@ -25,7 +25,7 @@ import pytest -from .molecule_fixtures import pdb3aid, benzene, pdb1yu8 +from .molecule_fixtures import * def test_inchain_residue_mutation_in_protein(pdb3aid): diff --git a/moldesign/_tests/test_qm_xfaces.py b/moldesign/_tests/test_qm_xfaces.py index 86a4aad..a891c59 100644 --- a/moldesign/_tests/test_qm_xfaces.py +++ b/moldesign/_tests/test_qm_xfaces.py @@ -8,52 +8,23 @@ from moldesign import units as u from . import helpers - -registered_types = {} +from .molecule_fixtures import * # TODO: automated method testing based on its metadata - i.e. test to make sure parameters are # honored, test that it calcultes what it says it does, test that properties have the right # units and array shapes, etc. -def typedfixture(*types, **kwargs): - """This is a decorator that lets us associate fixtures with one or more arbitrary types. - We'll later use this type to determine what tests to run on the result""" - - def fixture_wrapper(func): - for t in types: - registered_types.setdefault(t, []).append(func.__name__) - return pytest.fixture(**kwargs)(func) - - return fixture_wrapper - - -@typedfixture('molecule', scope='function') -def h2(): - mol = mdt.Molecule([mdt.Atom('H'), - mdt.Atom('H')]) - mol.atoms[1].z = 0.75 * u.angstrom - return mol - - -@typedfixture('molecule', scope='function') -def heh_plus(): - mol = mdt.Molecule([mdt.Atom('H'), - mdt.Atom('He')]) - mol.atoms[1].z = 1.0 * u.angstrom - mol.charge = 1 * u.q_e - return mol - - # Note that this is an incomplete set of models models_to_test = list(itertools.product((mdt.models.NWChemQM, mdt.models.PySCFPotential), 'sto-3g 6-31g'.split(), 'rhf rks mp2'.split())) model_ids = ['/'.join((model.__name__, theory, basis)) for (model, theory, basis) in models_to_test] +TESTSET = ['h2', 'small_molecule', 'benzene'] + @pytest.fixture(params=models_to_test, ids=model_ids, scope='function') -@pytest.mark.screening def h2_with_model(request, h2): model, basis, theory = request.param @@ -101,11 +72,11 @@ def nwchem_dft(): @pytest.fixture def h2_rhfwfn(h2): h2.set_energy_model(mdt.models.PySCFPotential, basis='sto-3g', theory='rhf') - h2.calculate() + h2.calculate(requests=['forces']) return h2 -@pytest.mark.parametrize('objkey', registered_types['molecule']) +@pytest.mark.parametrize('objkey', TESTSET) def test_pyscf_rhf_sto3g_properties(objkey, request): mol = request.getfixturevalue(objkey) mol.set_energy_model(mdt.models.PySCFPotential, basis='sto-3g', theory='rhf') @@ -120,7 +91,7 @@ def test_pyscf_rhf_sto3g_properties(objkey, request): - mol.charge.value_in(u.q_e) -@pytest.mark.parametrize('objkey', registered_types['molecule']) +@pytest.mark.parametrize('objkey', TESTSET) def test_pyscf_rhf_sto3g_matrices(objkey, request): mol = request.getfixturevalue(objkey) mol.set_energy_model(mdt.models.PySCFPotential, basis='sto-3g', theory='rhf') @@ -154,7 +125,7 @@ def test_pyscf_casci(h2): # TODO: actually test results -@pytest.mark.parametrize('objkey', registered_types['molecule']) +@pytest.mark.parametrize('objkey', TESTSET) def test_pyscf_rhf_sto3g_forces(objkey, request): mol = request.getfixturevalue(objkey) mol.set_energy_model(mdt.models.PySCFPotential, basis='sto-3g', theory='rhf') @@ -174,6 +145,7 @@ def test_calc_eri_tensor(h2): eris[0,1,2,1] +@pytest.mark.screening def test_aobasis(h2_rhfwfn): # it's sto-3g, so structure is simple aobasis = h2_rhfwfn.wfn.aobasis diff --git a/moldesign/_tests/test_symmetry.py b/moldesign/_tests/test_symmetry.py index a2da7af..a40c9e2 100644 --- a/moldesign/_tests/test_symmetry.py +++ b/moldesign/_tests/test_symmetry.py @@ -5,7 +5,7 @@ from moldesign import units as u from moldesign.external import transformations -from .molecule_fixtures import benzene +from .molecule_fixtures import * @pytest.fixture diff --git a/moldesign/data/atomic.py b/moldesign/data/atomic.py index d824bc8..c28b629 100644 --- a/moldesign/data/atomic.py +++ b/moldesign/data/atomic.py @@ -17,43 +17,26 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os +import yaml from moldesign import units as u +from . import PACKAGEPATH + __all__ = 'ATOMIC_MASSES ATOMIC_NUMBERS ELEMENTS SYMBOLS'.split() -ATOMIC_NUMBERS = {'Ac': 89, 'Ag': 47, 'Al': 13, 'Am': 95, 'Ar': 18, 'As': 33, 'At': 85, 'Au': 79, - 'B': 5, 'Ba': 56, 'Be': 4, 'Bh': 107, 'Bi': 83, 'Bk': 97, 'Br': 35, 'C': 6, - 'Ca': 20, 'Cd': 48, 'Ce': 58, 'Cf': 98, 'Cl': 17, 'Cm': 96, 'Cn': 112, 'Co': 27, - 'Cr': 24, 'Cs': 55, 'Cu': 29, 'Db': 105, 'Ds': 110, 'Dy': 66, 'Er': 68, 'Es': 99, - 'Eu': 63, 'F': 9, 'Fe': 26, 'Fm': 100, 'Fr': 87, 'Ga': 31, 'Gd': 64, 'Ge': 32, - 'H': 1, 'He': 2, 'Hf': 72, 'Hg': 80, 'Ho': 67, 'Hs': 108, 'I': 53, 'In': 49, 'Ir': 77, - 'K': 19, 'Kr': 36, 'La': 57, 'Li': 3, 'Lr': 103, 'Lu': 71, 'Md': 101, 'Mg': 12, - 'Mn': 25, 'Mo': 42, 'Mt': 109, 'N': 7, 'Na': 11, 'Nb': 41, 'Nd': 60, 'Ne': 10, - 'Ni': 28, 'No': 102, 'Np': 93, 'O': 8, 'Os': 76, 'P': 15, 'Pa': 91, 'Pb': 82, - 'Pd': 46, 'Pm': 61, 'Po': 84, 'Pr': 59, 'Pt': 78, 'Pu': 94, 'Ra': 88, 'Rb': 37, - 'Re': 75, 'Rf': 104, 'Rg': 111, 'Rh': 45, 'Rn': 86, 'Ru': 44, 'S': 16, 'Sb': 51, - 'Sc': 21, 'Se': 34, 'Sg': 106, 'Si': 14, 'Sm': 62, 'Sn': 50, 'Sr': 38, 'Ta': 73, - 'Tb': 65, 'Tc': 43, 'Te': 52, 'Th': 90, 'Ti': 22, 'Tl': 81, 'Tm': 69, 'U': 92, - 'Uuh': 116, 'Uuo': 118, 'Uup': 115, 'Uuq': 114, 'Uus': 117, 'Uut': 113, 'V': 23, - 'W': 74, 'Xe': 54, 'Y': 39, 'Yb': 70, 'Zn': 30, 'Zr': 40} +with open(os.path.join(PACKAGEPATH, '_static_data', 'nist_atomic.yml'), 'r') as ymlfile: + isotopes = yaml.load(ymlfile) + +ATOMIC_NUMBERS = {record[0]['symbol']: atnum for atnum, record in isotopes.items()} ELEMENTS = {val: key for key, val in ATOMIC_NUMBERS.items()} SYMBOLS = ELEMENTS # Isotopic masses for the most abundant species of each element # from https://www.ncsu.edu/chemistry/msf/pdf/IsotopicMass_NaturalAbundance.pdf -ATOMIC_MASSES = {i: m*u.amu for i, m in zip(range(1, 55), ( - 1.007825, 4.002603, 7.016004, 9.012182, 11.009305, 12.0, 14.003074, 15.994915, 18.998403, - 19.99244, 22.98977, - 23.985042, 26.981538, 27.976927, 30.973762, 31.972071, 34.968853, 39.962383, 38.963707, - 39.962591, 44.95591, - 47.947947, 50.943964, 51.940512, 54.93805, 55.934942, 58.9332, 57.935348, 62.929601, 63.929147, - 68.925581, - 73.921178, 74.921596, 79.916522, 78.918338, 83.911507, 84.911789, 87.905614, 88.905848, - 89.904704, 92.906378, - 97.905408, 97.907216, 101.90435, 102.905504, 107.903894, 106.905093, 113.903358, 114.903878, - 119.902197, 120.903818, - 129.906223, 126.904468, 131.904154))} +ATOMIC_MASSES = {atnum: records[0]['mass']*u.amu for atnum, records in isotopes.items()} + for atnum, mass in list(ATOMIC_MASSES.items()): ATOMIC_MASSES[ELEMENTS[atnum]] = mass # index by atnum and symbol diff --git a/moldesign/fileio.py b/moldesign/fileio.py index 1bf3de4..c6cef64 100644 --- a/moldesign/fileio.py +++ b/moldesign/fileio.py @@ -321,7 +321,6 @@ def read_xyz(f): tempmol = openbabel_interface.read_stream(f, 'xyz') for atom in tempmol.atoms: atom.residue = None - atom.chain = None return mdt.Molecule(tempmol.atoms) diff --git a/moldesign/geom/constraints.py b/moldesign/geom/constraints.py index 308c560..89322cf 100644 --- a/moldesign/geom/constraints.py +++ b/moldesign/geom/constraints.py @@ -257,30 +257,34 @@ def atomgrad(self, atom=None): class HBondsConstraint(GeometryConstraint): - """ Constraints the lengths of all bonds involving hydrogen to their equilibrium forcefield - values. + """ Constrains the lengths of all bonds involving hydrogen. - Generally, this is used to signal to a molecular dynamics program to apply H-bond constraints. - - Note: - This constraint can only be applied to molecules with an assigned forcefield. + By default, the lengths will be constrained to their forcefield equilibrium values. + They can also be constrained to their current values by setting ``usecurrent=True`` Args: mol (moldesign.Molecule): Constrain all h-bonds in this molecule - values (Vector[length]): constrained bond distances for all atoms (if not given, then - these are automatically set to their current values) + usecurrent (bool): if False (default), set the constraint values to their forcefield + equilibrium values (this will fail if no forcefield is assigned). If True, constrain + the hydrogen bonds at the current values. + + Raises: + AttributeError: if usecurrent=False but no forcefield is assigned """ desc = 'hbonds' - def __init__(self, mol): + def __init__(self, mol, usecurrent=False): self.mol = mol self.bonds = [] self.subconstraints = [] for bond in self.mol.bonds: if bond.a1.atnum == 1 or bond.a2.atnum == 1: self.bonds.append(bond) - self.subconstraints.append(DistanceConstraint(bond.a1, bond.a2, - value=bond.ff.equilibrium_length)) + if usecurrent: + l = bond.length + else: + l = bond.ff.equilibrium_length + self.subconstraints.append(DistanceConstraint(bond.a1, bond.a2, value=l)) self.values = [c.current() for c in self.subconstraints] def copy(self, mol=None): @@ -383,7 +387,7 @@ def atomgrad(self, atom=None): return [self.vector] * u.ureg.dimensionless def _constraintsig(self): - return super()._constraintsig() + (self.vector,) + return super()._constraintsig() + tuple(self.vector) def get_base_constraints(constraintlist): diff --git a/moldesign/helpers/helpers.py b/moldesign/helpers/helpers.py index 2b5f026..645d427 100644 --- a/moldesign/helpers/helpers.py +++ b/moldesign/helpers/helpers.py @@ -163,7 +163,5 @@ def restore_topology(mol, topo): res.pdbindex = refres.pdbindex res.name = refres.name res.chain = chain_map[refres.chain] - for atom in res.atoms: - atom.chain = res.chain return mdt.Molecule(mol.atoms) diff --git a/moldesign/interfaces/ambertools.py b/moldesign/interfaces/ambertools.py index ba6f1c0..f8adb9e 100644 --- a/moldesign/interfaces/ambertools.py +++ b/moldesign/interfaces/ambertools.py @@ -184,8 +184,6 @@ def finish_job(job): newchain = mdt.Chain('B') for residue in mol.residues[mol.num_residues//2:]: residue.chain = newchain - for atom in residue: - atom.chain = newchain mol = mdt.Molecule(mol) mdt.helpers.assign_biopolymer_bonds(mol) @@ -280,7 +278,7 @@ def _prep_for_tleap(mol): print('INFO: disulfide bond detected. Renaming %s from CYS to CYX' % residue) sulfur.residue.resname = 'CYX' - clean._rebuild_topology() + clean._rebuild_from_atoms() return clean diff --git a/moldesign/interfaces/openbabel.py b/moldesign/interfaces/openbabel.py index 30eb808..53daa58 100644 --- a/moldesign/interfaces/openbabel.py +++ b/moldesign/interfaces/openbabel.py @@ -269,18 +269,13 @@ def pybel_to_mol(pbmol, newatoms.append(mdtatom) - newtopo = {} for ibond in range(pbmol.OBMol.NumBonds()): obbond = pbmol.OBMol.GetBond(ibond) a1 = newatom_map[obbond.GetBeginAtomIdx()] a2 = newatom_map[obbond.GetEndAtomIdx()] order = obbond.GetBondOrder() - if a1 not in newtopo: - newtopo[a1] = {} - if a2 not in newtopo: - newtopo[a2] = {} - newtopo[a1][a2] = order - newtopo[a2][a1] = order + bond = mdt.Bond(a1, a2) + bond.order = order if reorder_atoms_by_residue and primary_structure: resorder = {} @@ -288,9 +283,7 @@ def pybel_to_mol(pbmol, resorder.setdefault(atom.residue, len(resorder)) newatoms.sort(key=lambda a: resorder[a.residue]) - return mdt.Molecule(newatoms, - bond_graph=newtopo, - **kwargs) + return mdt.Molecule(newatoms, **kwargs) def from_smiles(smi, name=None): diff --git a/moldesign/interfaces/openmm.py b/moldesign/interfaces/openmm.py index a787571..8c6b2dd 100644 --- a/moldesign/interfaces/openmm.py +++ b/moldesign/interfaces/openmm.py @@ -292,7 +292,6 @@ def topology_to_mol(topo, name=None, positions=None, velocities=None, assign_bon for atom in residue.atoms(): newatom = atommap[atom] newatom.residue = newresidue - newatom.chain = newchain newresidue.add(newatom) # Bonds @@ -304,13 +303,13 @@ def topology_to_mol(topo, name=None, positions=None, velocities=None, assign_bon bonds[na1] = {} if na2 not in bonds: bonds[na2] = {} - bonds[na1][na2] = 1 - bonds[na2][na1] = 1 + b = mdt.Bond(na1, na2) + b.order = 1 if name is None: name = 'Unnamed molecule from OpenMM' - newmol = mdt.Molecule(newatoms, bond_graph=bonds, name=name) + newmol = mdt.Molecule(newatoms, name=name) if assign_bond_orders: for residue in newmol.residues: diff --git a/moldesign/interfaces/parmed_interface.py b/moldesign/interfaces/parmed_interface.py index 79c5259..bc3ee6a 100644 --- a/moldesign/interfaces/parmed_interface.py +++ b/moldesign/interfaces/parmed_interface.py @@ -179,7 +179,6 @@ def parmed_to_mdt(pmdmol): atom.residue = residue residue.add(atom) - atom.chain = chain assert patm not in atoms atoms[patm] = atom @@ -284,8 +283,6 @@ def _reassign_chains(f, mol): for residue in mol.residues: newchain = reschains[residue.resname, str(residue.pdbindex), residue.chain.name] - for atom in residue.atoms: - atom.chain = newchain residue.chain = newchain return mdt.Molecule(mol.atoms, diff --git a/moldesign/interfaces/pdbfixer_interface.py b/moldesign/interfaces/pdbfixer_interface.py index a72cc03..a65d5f7 100644 --- a/moldesign/interfaces/pdbfixer_interface.py +++ b/moldesign/interfaces/pdbfixer_interface.py @@ -113,8 +113,6 @@ def mutate_residues(mol, residue_map): residues_to_copy.append(mutant_res) mutant_res.mol = None mutant_res.chain = oldres.chain - for atom in mutant_res.atoms: - atom.chain = oldres.chain else: residues_to_copy.append(oldres) diff --git a/moldesign/models/openmm.py b/moldesign/models/openmm.py index 4fd1852..5d12949 100644 --- a/moldesign/models/openmm.py +++ b/moldesign/models/openmm.py @@ -230,13 +230,10 @@ def _set_constraints(self): for constraint in self.mol.constraints: if constraint.desc == 'position': fixed_atoms.add(constraint.atom) - self.mol.assert_atom(constraint.atom) system.setParticleMass(constraint.atom.index, 0.0) # Constrain distances between atom pairs elif constraint.desc == 'distance': - self.mol.assert_atom(constraint.a1) - self.mol.assert_atom(constraint.a2) system.addConstraint(constraint.a1.index, constraint.a2.index, opm.pint2simtk(constraint.value)) diff --git a/moldesign/molecules/README.md b/moldesign/molecules/README.md index c0f55ef..bc53d5e 100644 --- a/moldesign/molecules/README.md +++ b/moldesign/molecules/README.md @@ -1,6 +1,6 @@ ## Molecule subpackage -The `moldesign.molecule` subpackage contains class definitions for molecular data, -including atomic, molecular, biomolecular, and trajectory data structures. +The `moldesign.molecule` subpackage contains MDT's core molecular data structures. + ### Naming and indexing conventions diff --git a/moldesign/molecules/__init__.py b/moldesign/molecules/__init__.py index b4ab73f..03a9161 100644 --- a/moldesign/molecules/__init__.py +++ b/moldesign/molecules/__init__.py @@ -4,6 +4,7 @@ def toplevel(o): __all__ = [] +from .bond_graph import * from .coord_arrays import * from .atomcollections import * from .bonds import * @@ -11,5 +12,6 @@ def toplevel(o): from .biounits import * from .residue import * from .chain import * +from .primary_structure import * from .molecule import * from .trajectory import * diff --git a/moldesign/molecules/atomcollections.py b/moldesign/molecules/atomcollections.py index 381e4c0..fb07b22 100644 --- a/moldesign/molecules/atomcollections.py +++ b/moldesign/molecules/atomcollections.py @@ -75,7 +75,7 @@ def momentum(self): def velocity(self): """ u.Vector[velocity]: center of mass velocity of this object """ - return old_div(self.momentum,self.mass) + return self.momentum/self.mass @property def kinetic_energy(self): @@ -234,72 +234,29 @@ def dihedral(self, a1, a2, a3=None, a4=None): return mdt.geom.dihedral(*list(map(self._getatom, (a1, a2, a3, a4)))) def copy_atoms(self): - """ - Copy a group of atoms which may already have bonds, residues, and a parent molecule - assigned. Do so by copying only the relevant entities, and creating a "mask" with - deepcopy's memo function to stop anything else from being copied. + """ Copy a group of atoms along and relevant topological information. + + This specifically copies: + - the atoms themselves (along with their positions, momenta, and bond graphs) + - any residues they are are part of + - any chains they are part of + - any bonds between them + It does NOT copy: + - other atoms that are part of these atoms' residues + - other residues that are part of these atoms chains + - the molecule these atoms are part of Returns: AtomList: list of copied atoms """ - from . import ChildList - - oldatoms = self.atoms - old_bond_graph = {a: {} for a in self.atoms} + graph = {} + memo = {'bondgraph':graph} for atom in self.atoms: - for nbr in atom.bond_graph: - if nbr in old_bond_graph: - old_bond_graph[atom][nbr] = atom.bond_graph[nbr] - - # Figure out which bonds, residues and chains to copy - tempatoms = AtomList([copy.copy(atom) for atom in oldatoms]) - old_to_new = dict(zip(oldatoms, tempatoms)) - temp_bond_graph = {a: {} for a in tempatoms} - replaced = {} - for atom, oldatom in zip(tempatoms, oldatoms): - atom.molecule = None - atom.bond_graph = {} - - if atom.chain is not None: - if atom.chain not in replaced: - chain = copy.copy(atom.chain) - chain.molecule = None - chain.children = ChildList(chain) - replaced[atom.chain] = chain - else: - chain = replaced[atom.chain] - - atom.chain = chain - - if atom.residue is not None: - if atom.residue not in replaced: - res = copy.copy(atom.residue) - res.molecule = None - res.chain = atom.chain - res.children = ChildList(res) - - res.chain.add(res) - replaced[atom.residue] = res - else: - res = replaced[atom.residue] - - atom.residue = res - assert atom.residue.chain is atom.chain - res.add(atom) - - for oldnbr, bondorder in oldatom.bond_graph.items(): - if oldnbr not in old_to_new: continue - newnbr = old_to_new[oldnbr] - temp_bond_graph[atom][newnbr] = bondorder - - # Finally, do a deepcopy, which bring all the information along without linking it - newatoms, new_bond_graph = copy.deepcopy((tempatoms, temp_bond_graph)) - - for atom, original in zip(newatoms, oldatoms): - atom.bond_graph = new_bond_graph[atom] - atom.position = original.position - atom.momentum = original.momentum - + atom._subcopy(memo) + tempatoms = [memo[atom] for atom in self.atoms] + newatoms, newbonds = copy.deepcopy((tempatoms, graph)) + for atom in newatoms: + atom.bond_graph = newbonds[atom] return AtomList(newatoms) ########################################### @@ -421,10 +378,10 @@ def bonds(self): for atom, nbrs in bg.items(): for nbr, order in nbrs.items(): if atom.index < nbr.index or nbr not in bg: - yield mdt.Bond(atom,nbr, order) + yield mdt.Bond(atom, nbr) def get_bond(self, a1, a2): - return mdt.Bond(a1, a2, order=self.bond_graph[a1][a2]) + return mdt.Bond(a1, a2) @property def internal_bonds(self): @@ -434,7 +391,7 @@ def internal_bonds(self): for atom, nbrs in bg.items(): for nbr, order in nbrs.items(): if atom.index < nbr.index and nbr in bg: - yield mdt.Bond(atom, nbr, order) + yield mdt.Bond(atom, nbr) @property def external_bonds(self): @@ -445,7 +402,7 @@ def external_bonds(self): for atom, nbrs in bg.items(): for nbr, order in nbrs.items(): if nbr not in bg: - yield mdt.Bond(atom, nbr, order) + yield mdt.Bond(atom, nbr) @property def bonded_atoms(self): diff --git a/moldesign/molecules/atoms.py b/moldesign/molecules/atoms.py index 7bbe043..06eb918 100644 --- a/moldesign/molecules/atoms.py +++ b/moldesign/molecules/atoms.py @@ -161,19 +161,34 @@ class Atom(AtomPropertyMixin): ################################################################# # Methods for BUILDING the atom and indexing it in a molecule - def __init__(self, name=None, atnum=None, mass=None, chain=None, residue=None, + def __init__(self, name=None, atnum=None, mass=None, residue=None, formal_charge=None, pdbname=None, pdbindex=None, element=None, metadata=None, position=None, momentum=None): # Allow user to instantiate an atom as Atom(6) or Atom('C') if atnum is None and element is None: - if isinstance(name, int): - atnum = name - name = None - else: element = name + if isinstance(name, int): # Ex: name=6 becomes atnum=6 + atnum, name = name, None + elif name in data.ATOMIC_NUMBERS: # Ex: name='Na' becomes element='Na' + element, name = name, None + + # Determine the element + if atnum: + self.atnum = atnum + if element: + assert atnum == data.ATOMIC_NUMBERS[element.capitalize()], \ + "Atomic number '%s' does match specified element '%s'" % (atnum, element) + elif element: + if element.capitalize() in data.ATOMIC_NUMBERS: + self.atnum = data.ATOMIC_NUMBERS[element.capitalize()] + else: + raise KeyError("Unknown element '%s'" % element) + elif name[0].upper() in data.ATOMIC_NUMBERS: # Ex: name='Ca' -> atnum=6 + self.atnum = data.ATOMIC_NUMBERS[name[0].upper()] + else: + raise KeyError('Could not determine the atomic number of this atom. ' + 'You can set it explicitly with the "atnum" keyword.') - if element: self.atnum = data.ATOMIC_NUMBERS[element] - else: self.atnum = atnum self.name = utils.if_not_none(name, self.elem) self.pdbname = utils.if_not_none(pdbname, self.name) @@ -183,8 +198,9 @@ def __init__(self, name=None, atnum=None, mass=None, chain=None, residue=None, else: self.mass = mass self.formal_charge = utils.if_not_none(formal_charge, 0.0 * u.q_e) + if not hasattr(self.formal_charge, 'units'): + self.formal_charge *= u.q_e self.residue = residue - self.chain = chain self.molecule = None self.index = None if position is None: @@ -202,6 +218,53 @@ def __init__(self, name=None, atnum=None, mass=None, chain=None, residue=None, if metadata: self.metadata.update(metadata) + def _subcopy(self, memo=None): + """ Private data mangement method for copying the local substructure of an atom. + This is a shallow copy, and is intended to be deepcopied to avoid corrupting the original + atom's data. + + Generally, the public interface for this is the ``copy`` methods of objects like + Molecules, AtomLists, Residues etc. ... + """ + import copy + if memo is None: + memo = {'bondgraph':{}} + if self in memo: + return + + newatom = copy.copy(self) + newatom.molecule = None + newatom.residue = None + newatom.bond_graph = {} + memo[self] = newatom + + # This separates the bond graph from the atoms for serialization; otherwise it creates + # highly recursive relationships between all the atoms + memo['bondgraph'][newatom] = {} + for nbr, order in self.bond_graph.items(): + if nbr not in memo: + continue + else: + memo['bondgraph'][newatom][memo[nbr]] = order + memo['bondgraph'][memo[nbr]][newatom] = order + + if self.residue is not None: + if self.residue not in memo: + self.residue._subcopy(memo) + memo[self.residue].add(newatom) + + @property + def chain(self): + if self.residue is not None: + return self.residue.chain + else: + return None + + @chain.setter + def chain(self, val): + raise AttributeError("To assign an atom to a chain, assign it to a residue _within_ that" + "chain.") + def __str__(self): desc = '%s %s (elem %s)' % (self.__class__.__name__, self.name, self.elem) molstring = '' @@ -282,9 +345,11 @@ def bond_to(self, other, order): """ if self.molecule is other.molecule: self.bond_graph[other] = other.bond_graph[self] = order + if self.molecule is not None: + self.molecule._topology_changed() else: # allow unassigned atoms to be bonded to anything for building purposes self.bond_graph[other] = order - return Bond(self, other, order) + return Bond(self, other) @property def bond_graph(self): @@ -294,12 +359,7 @@ def bond_graph(self): if self.molecule is None: return self._bond_graph else: - self._bond_graph = None - try: - return self.molecule.bond_graph[self] - except KeyError: - self.molecule.bond_graph[self] = {} - return self.molecule.bond_graph[self] + return self.molecule.bond_graph[self] @bond_graph.setter def bond_graph(self, value): @@ -313,7 +373,7 @@ def bond_graph(self, value): def bonds(self): """ List[Bond]: list of all bonds this atom is involved in """ - return [Bond(self, nbr, order) for nbr, order in self.bond_graph.items()] + return [Bond(self, nbr) for nbr in self.bond_graph] @property def heavy_bonds(self): @@ -325,8 +385,8 @@ def heavy_bonds(self): if self.atnum == 1: return [] else: - return [Bond(self, nbr, order) - for nbr, order in self.bond_graph.items() + return [Bond(self, nbr) + for nbr in self.bond_graph if nbr.atnum > 1] @property diff --git a/moldesign/molecules/biounits.py b/moldesign/molecules/biounits.py index 8c820d7..ae3f659 100644 --- a/moldesign/molecules/biounits.py +++ b/moldesign/molecules/biounits.py @@ -52,15 +52,16 @@ def __dir__(self): + list(self._childbyname.keys())) def __getitem__(self, item): - if isinstance(item, basestring): - if item not in self._childbyname: - raise KeyError('No object in "%s" named "%s"' % (self.parent, item)) - return self._childbyname[item] - else: + if isinstance(item, (int, slice)): try: return self._childinorder[item] except IndexError: - raise IndexError("No object with index '%d' in %s" % (item, self.parent)) + raise IndexError("No object with index %d in %s" % (item, self.parent)) + else: + try: + return self._childbyname[item] + except KeyError: + raise KeyError("No object with name '%s' in %s" % (item, self.parent)) def __setitem__(self, key, val): if key in self._childbyname: @@ -136,15 +137,14 @@ def __lt__(self, other): return id(self.obj) < id(other.obj) -@toplevel -class BioContainer(AtomContainer): +class MolecularHierarchy(AtomContainer): """ Generalized storage mechanism for hierarchical representation of biomolecules, e.g. by residue, chain, etc. Permits other groupings, provided that everything is tree-like. All children of a given entity must have unique names. An individual child can be retrieved with - ``biocontainer.childname`` or ``biocontainer['childname']`` or ``biocontainer[index]`` + ``x.childname`` or ``x['childname']`` or ``x[index]`` Yields: BioContainer or mdt.Atom: this entity's children, in order @@ -188,7 +188,7 @@ def add(self, item, key=None): KeyError: if an object with this key already exists Args: - item (BioContainer or mdt.Atom): the child object to add + item (MolecularHierarchy or mdt.Atom): the child object to add key (str): Key to retrieve this item (default: ``item.name`` ) """ if key is None: @@ -236,15 +236,3 @@ def __call__(self, **kwargs): return retlist -@toplevel -class Instance(BioContainer): - """ The singleton biomolecular container for each ``Molecule``. Its children are generally - PDB chains. Users won't ever really see this object. - """ - def __str__(self): - return str(self.children) - - def __repr__(self): - return '' % str(self.children) - - diff --git a/moldesign/molecules/bond_graph.py b/moldesign/molecules/bond_graph.py new file mode 100644 index 0000000..4b578f7 --- /dev/null +++ b/moldesign/molecules/bond_graph.py @@ -0,0 +1,104 @@ +from __future__ import print_function, absolute_import, division +from future.builtins import * +from future import standard_library +standard_library.install_aliases() + +# Copyright 2017 Autodesk Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from future.moves.collections import UserDict +from future.utils import PY2 + +from .. import utils + +if not PY2: # we'll skip the type hints in Python 2 + from typing import Mapping + import moldesign as mdt # needed for the late-binding type hints + + +class AtomBondDict(utils.NewUserDict): + """ Holds the bonds for a given atom + + This object does the legwork of making sure that the bond graph remains symmetric - it makes + sure any modifications to this data are reflected on the symmetric side of the graph + """ + + if not PY2: + __metaclass__ = Mapping['mdt.Atom', int] + + def __init__(self, parent, atom): + super().__init__() + self.parent = parent + self.atom = atom + + def __setitem__(self, otheratom, order): + self.data[otheratom] = order + if self.parent is not None: + self.parent[otheratom].data[self.atom] = order + + def __delitem__(self, otheratom): + del self.data[otheratom] + if self.parent is not None: + otherdict = self.parent[otheratom] + del otherdict.data[self.atom] + + +class BondGraph(utils.NewUserDict): + """ The bond graph of a molecule. This object manages the molecular bonding topology. + + This object can generally be treated as a dict, with two important exceptions: + 1. it maintains symmetry, so that setting ``bond_graph[a][b] = o`` will automatically set + ``bond_graph[b][a] = o`` + 2. It has exactly one entry for every atom in the molecule; it does not expose any methods + for adding or deleting these entries. + """ + if not PY2: + __metaclass__ = Mapping['mdt.Atom', AtomBondDict] + + def __init__(self, molecule): + """ Initializes the object, and raises up the bond state of all atoms in the molecule + """ + super().__init__() + self.molecule = molecule + self._add_atoms(molecule.atoms) + + def _add_atoms(self, atoms): + """ Move bonds defined in the atom to this object. + + If the atom is already present in the graph, do nothing. + + This method raises state - it centralizes bonding information about the molecule in + one place. However, bonding information about atoms OUTSIDE of the molecule is left + untouched. + + Args: + atoms (List[moldesign.Atom]): atom to initialize + """ + add_atoms = [atom for atom in atoms if atom not in self] + for atom in add_atoms: + self.data[atom] = AtomBondDict(self, atom) + for atom in add_atoms: + bg = atom._bond_graph + for nbr, order in list(bg.items()): + if nbr in self: + self[atom][nbr] = order + del bg[nbr] + + def __setitem__(self, key, args): + raise NotImplementedError('Bond graphs cannot be manipulated using this method') + + def __delitem__(self, key): + raise NotImplementedError('Bond graphs cannot be manipulated using this method') + + + diff --git a/moldesign/molecules/bonds.py b/moldesign/molecules/bonds.py index 530defe..9c83636 100644 --- a/moldesign/molecules/bonds.py +++ b/moldesign/molecules/bonds.py @@ -35,7 +35,6 @@ class Bond(object): Args: a1 (Atom): First atom a2 (Atom): Second atom (the order of atoms doesn't matter) - order (int): Order of the bond Notes: Comparisons and hashes involving bonds will return True if the atoms involved in the bonds @@ -46,11 +45,11 @@ class Bond(object): Attributes: a1 (Atom): First atom in the bond; assigned so that ``self.a1.index < self.a2.index`` a2 (Atom): Second atom in the bond; assigned so that ``self.a2.index > self.a1.index`` - order (int): bond order (can be ``None``); not used in comparisons + order (int or None): order of the bond between the atoms (or None of they are not bonded) """ SYMBOLS = {1: u'-', 2: u'=', 3: u'≡'} - def __init__(self, a1, a2, order=None): + def __init__(self, a1, a2): if a1.molecule is not a2.molecule: raise ValueError('Cannot create bond for atoms in different molecules.') @@ -58,11 +57,6 @@ def __init__(self, a1, a2, order=None): a1, a2 = a2, a1 self.a1 = a1 self.a2 = a2 - if order is None: - try: self.order = self.a1.bond_graph[a2] - except KeyError: self.order = None - else: - self.order = order def __str__(self): return "%s bond between %s and %s (order: %s)" % (self.type, @@ -72,6 +66,36 @@ def __str__(self): def __repr__(self): return "<%s>" % self + @property + def order(self): + a1order = self.a1.bond_graph.get(self.a2, None) + a2order = self.a2.bond_graph.get(self.a1, None) + if a1order == a2order: + return a1order + elif a1order is None: + return a2order + elif a2order is None: + return a1order + else: # inconsistent + raise ValueError('Inconsistent bond orders between %s and %s' % (self.a1, self.a2)) + + @order.setter + def order(self, order): + if order is None: + if self.order is None: + return # already done + elif self.molecule: + self.molecule.delete_bond(self) + else: + self.a1.bond_graph.pop(self.a2, None) + self.a2.bond_graph.pop(self.a1, None) + + else: + for atom in (self.a1, self.a2): + nbr = self.partner(atom) + if atom.molecule is self.molecule: + atom.bond_graph[nbr] = order + @property def type(self): return self.a1.symbol + self.SYMBOLS.get(self.order, u'?̶') + self.a2.symbol diff --git a/moldesign/molecules/chain.py b/moldesign/molecules/chain.py index ab555c2..e2d75ce 100644 --- a/moldesign/molecules/chain.py +++ b/moldesign/molecules/chain.py @@ -21,18 +21,18 @@ import moldesign as mdt from .. import utils, data -from . import BioContainer, toplevel, HasResidues +from . import MolecularHierarchy, toplevel, HasResidues @toplevel -class Chain(BioContainer, HasResidues): +class Chain(MolecularHierarchy, HasResidues): """ Biomolecular chain class - its children are almost always residues. Attributes: parent (mdt.Molecule): the molecule this residue belongs to chain (Chain): the chain this residue belongs to """ - @utils.args_from(BioContainer) + @utils.args_from(MolecularHierarchy) def __init__(self, name=None, **kwargs): for key in ('pdbname', 'pdbindex'): val = kwargs.pop(key, None) diff --git a/moldesign/molecules/molecule.py b/moldesign/molecules/molecule.py index 6cb6eb1..7a6c3e7 100644 --- a/moldesign/molecules/molecule.py +++ b/moldesign/molecules/molecule.py @@ -29,7 +29,7 @@ from ..exceptions import NotCalculatedError from ..min.base import MinimizerBase from .properties import MolecularProperties -from . import toplevel, Residue, Chain, Instance, AtomGroup, Bond, HasResidues +from . import toplevel, PrimaryStructure, AtomGroup, Bond, HasResidues, BondGraph from ..helpers import WidgetMethod from .coord_arrays import * @@ -62,8 +62,7 @@ def constrain_atom(self, atom, pos=None): Returns: moldesign.geometry.FixedPosition: constraint object """ - from moldesign import geom - self.constraints.append(geom.FixedPosition(atom, value=pos)) + self.constraints.append(mdt.geom.FixedPosition(atom, value=pos)) self._reset_methods() return self.constraints[-1] @@ -78,9 +77,8 @@ def constrain_distance(self, atom1, atom2, dist=None): Returns: moldesign.geometry.DistanceConstraint: constraint object """ - from moldesign import geom self.constraints.append( - geom.constraints.DistanceConstraint(atom1, atom2, value=dist)) + mdt.geom.DistanceConstraint(atom1, atom2, value=dist)) self._reset_methods() return self.constraints[-1] @@ -96,9 +94,8 @@ def constrain_angle(self, atom1, atom2, atom3, angle=None): Returns: moldesign.geometry.AngleConstraint: constraint object """ - from moldesign import geom self.constraints.append( - geom.constraints.AngleConstraint(atom1, atom2, atom3, value=angle)) + mdt.geom.AngleConstraint(atom1, atom2, atom3, value=angle)) self._reset_methods() return self.constraints[-1] @@ -115,27 +112,27 @@ def constrain_dihedral(self, atom1, atom2, atom3, atom4, angle=None): Returns: moldesign.geom.AngleConstraint: constraint object """ - from moldesign import geom self.constraints.append( - geom.constraints.DihedralConstraint(atom1, atom2, atom3, atom4, value=angle)) + mdt.geom.DihedralConstraint(atom1, atom2, atom3, atom4, value=angle)) self._reset_methods() return self.constraints[-1] - def constrain_hbonds(self): - """ Constrain h-bonds in this molecule to their equilibrium forcefield values. + def constrain_hbonds(self, usecurrent=False): + """ Constrains the lengths of all bonds involving hydrogen. - For molecules with an associated forcefield, this method adds a constraint to the molecule - that fixes all the lengths of all bonds that involve at least one hydrogen atom. The bond - lengths are constrained to their equilibrium forcefield values. + By default, the lengths will be constrained to their forcefield equilibrium values. + They can also be constrained to their current values by setting ``usecurrent=True`` - Note: - This can only be applied if the molecule is associated with a forcefield. + Args: + mol (moldesign.Molecule): Constrain all h-bonds in this molecule + usecurrent (bool): if False (default), set the constraint values to their forcefield + equilibrium values (this will fail if no forcefield is assigned). If True, constrain + the hydrogen bonds at the current values. - Returns: - moldesign.geom.HBondConstraint: constraint object + Raises: + AttributeError: if usecurrent=False but no forcefield is assigned """ - from moldesign import geom - self.constraints.append(geom.constraints.HBondsConstraint(self)) + self.constraints.append(mdt.geom.HBondsConstraint(self, usecurrent)) self._reset_methods() return self.constraints[-1] @@ -164,20 +161,14 @@ def kinetic_temperature(self): def dynamic_dof(self): """ int: Count the number of spatial degrees of freedom of the system, taking into account any constraints - - Note: - If there are other DOFs not taken into account here, this quantity can be - set explicitly """ - if self._dof is not None: - return self._dof df = self.ndims if self.integrator is not None: if self.integrator.params.get('remove_translation', False): df -= 3 if self.integrator.params.get('remove_rotation', False): if self.num_atoms > 2: - df -= 2 + df -= 3 const_hbonds = const_water = False if self.integrator is not None: @@ -186,7 +177,8 @@ def dynamic_dof(self): if const_hbonds: # TODO: deal with molecular hydrogen for atom in self.atoms: - if atom.atnum == 1: df -= 1 + if atom.atnum == 1: + df -= 1 if const_water: for residue in self.residues: @@ -196,17 +188,23 @@ def dynamic_dof(self): else: df -= 3 - for constraint in self.constraints: # TODO: deal with more double-counting cases - if const_hbonds: - if isinstance(constraint, mdt.geom.DistanceConstraint): - # don't double-count constrained hbonds - if constraint.a1.atnum == 1 or constraint.a2.atnum == 1: continue + for constraint in self.constraints: # TODO: deal better with overlaps! + if (const_hbonds + and isinstance(constraint, mdt.geom.DistanceConstraint) + and (constraint.a1.atnum == 1 or constraint.a2.atnum == 1)): + continue df -= constraint.dof return df - @dynamic_dof.setter - def dynamic_dof(self, val): - self._dof = val + @property + def charge(self): + return self._charge + + @charge.setter + def charge(self, val): + if not hasattr(val, 'units'): + val = val * u.q_e + self._charge = val @property def num_electrons(self): @@ -371,46 +369,6 @@ def properties(self, val): calc_potential_energy = calculate_potential_energy calc_forces = calculate_forces - def _biomol_summary_markdown(self): - """A markdown description of biomolecular structure. - - Returns: - str: Markdown string""" - lines = [] - if len(self.residues) > 1: - lines.append('### Biopolymer chains') - seqs = [] - for chain in self.chains: - seq = chain._get_sequence(_html=True) - if not seq.strip(): # don't write anything if there's no sequence - continue - - seqs.append('**%s**: %s'%(chain.name, seq)) - lines.append('
'.join(seqs)) - - return lines - - def get_residue_table(self): - """Creates a data table summarizing this molecule's primary structure. - - Returns: - moldesign.utils.MarkdownTable""" - table = utils.MarkdownTable(*(['chain']+ - 'protein dna rna unknown water solvent ion'.split())) - for chain in self.chains: - counts = {} - unk = [] - for residue in chain.residues: - cat = residue.type - if cat == 'unknown': - unk.append(residue.name) - counts[cat] = counts.get(cat, 0)+1 - counts['chain'] = '%s'%chain.name - if 0 < len(unk) <= 4: - counts['unknown'] = ','.join(unk) - table.add_line(counts) - return table - class MolTopologyMixin(object): """ Functions for building and keeping track of bond topology and biochemical structure. @@ -434,51 +392,27 @@ def copy(self, name=None): charge=self.charge, metadata=self.metadata) newmol.properties = self.properties.copy(mol=newmol) - newmodel = self._copy_method(newmol, 'energy_model') - if newmodel is not None: + if self.energy_model is not None: + newmodel = self._copy_method('energy_model') newmol.set_energy_model(newmodel) - newintegrator = self._copy_method(newmol, 'integrator') - if newintegrator is not None: + if self.integrator is not None: + newintegrator = self._copy_method('integrator') newmol.set_integrator(newintegrator) if self.ff is not None: self.ff.copy_to(newmol) newmol.constraints = [c.copy(newmol) for c in self.constraints] return newmol - def _copy_method(self, newmol, methodname): + def _copy_method(self, methodname): method = getattr(self, methodname) - if method is None: - return None newmethod = method.__class__() newmethod.params.clear() newmethod.params.update(method.params) return newmethod - def assert_atom(self, atom): - """If passed an integer, just return self.atoms[atom]. - Otherwise, assert that the atom belongs to this molecule""" - if type(atom) is int: - atom = self.mol.atoms[atom] - else: - assert atom.molecule is self, "Atom %s does not belong to %s" % (atom, self) - return atom - - def rebuild(self): - self.chains = Instance(molecule=self) - self.residues = [] - self._rebuild_topology() - - def _rebuild_topology(self, bond_graph=None): - """ Build the molecule's bond graph based on its atoms' bonds - - Args: - bond_graph (dict): graph to build the bonds from + def _rebuild_from_atoms(self): + """ Rebuild component data structures based on atomic data """ - if bond_graph is None: - self.bond_graph = self._build_bonds(self.atoms) - else: - self.bond_graph = bond_graph - self.is_biomolecule = False self.ndims = 3 * self.num_atoms self._positions = np.zeros((self.num_atoms, 3)) * u.default.length @@ -486,35 +420,9 @@ def _rebuild_topology(self, bond_graph=None): self.masses = np.zeros(self.num_atoms) * u.default.mass self.dim_masses = u.broadcast_to(self.masses, (3, self.num_atoms)).T self._assign_atom_indices() - self._assign_residue_indices() + self.chains.rebuild_hierarchy() self._dof = None - - @staticmethod - def _build_bonds(atoms): - """ Build a bond graph describing bonds between this list of atoms - - Args: - atoms (List[moldesign.atoms.Atom]) - """ - # TODO: check atom parents - bonds = {} - - # First pass - create initial bonds - for atom in atoms: - assert atom not in bonds, 'Atom appears twice in this list' - if hasattr(atom, 'bonds') and atom.bond_graph is not None: - bonds[atom] = atom.bond_graph - else: - bonds[atom] = {} - - # Now make sure both atoms have a record of their bonds - for atom in atoms: - for nbr in bonds[atom]: - if atom in bonds[nbr]: - assert bonds[nbr][atom] == bonds[atom][nbr] - else: - bonds[nbr][atom] = bonds[atom][nbr] - return bonds + self._topology_changed() def _assign_atom_indices(self): """ @@ -531,87 +439,6 @@ def _assign_atom_indices(self): atom._index_into_molecule('_position', self.positions, idx) atom._index_into_molecule('_momentum', self.momenta, idx) - def _assign_residue_indices(self): - """ - Set up the chain/residue/atom hierarchy - """ - # TODO: consistency checks - - if self._defchain is None: - self._defchain = Chain(name=None, - index=None, - molecule=None) - - if self._defres is None: - self._defres = Residue(name=None, - index=None, - pdbindex=None, - pdbname=None, - chain=self._defchain, - molecule=None) - self._defchain.add(self._defres) - - default_residue = self._defres - default_chain = self._defchain - num_biores = 0 - conflicts = set() - - pdbatoms = [atom for atom in self.atoms if atom.pdbindex is not None] - if pdbatoms: - min_pdb_atom = min(pdbatoms, key=lambda x:x.pdbindex) - last_pdb_idx = min_pdb_atom.pdbindex - min_pdb_atom.index - 1 - else: - last_pdb_idx = 0 - - for atom in self.atoms: - if atom.pdbindex is None: - atom.pdbindex = last_pdb_idx + 1 - if last_pdb_idx is not None and atom.pdbindex <= last_pdb_idx: - atom.pdbindex = last_pdb_idx + 1 - conflicts.add('atom indices') - last_pdb_idx = atom.pdbindex - - # if atom has no chain/residue, assign defaults - if atom.residue is None: - atom.residue = default_residue - atom.chain = default_chain - atom.residue.add(atom) - - # assign the chain to this molecule if necessary - if atom.chain.molecule is None: - atom.chain.molecule = self - atom.chain.index = len(self.chains) - - assert atom.chain not in self.chains - oldname = atom.chain.name - if atom.chain.name is None and num_biores > 1: - atom.chain.name = 'A' - while atom.chain.name in self.chains: - if atom.chain.name is None: - atom.chain.name = 'A' - atom.chain.name = chr(ord(atom.chain.name)+1) - - if oldname != atom.chain.name: - conflicts.add('chain ids') - self.chains.add(atom.chain) - else: - assert atom.chain.molecule is self - - # assign the residue to this molecule - if atom.residue.molecule is None: - atom.residue.molecule = self - atom.residue.index = len(self.residues) - self.residues.append(atom.residue) - if atom.residue.type in ('dna', 'rna', 'protein'): - num_biores += 1 - else: - assert atom.chain.molecule is self - - if conflicts: - print('WARNING: %s modified due to name clashes' % ( - ', '.join(conflicts))) - self.is_biomolecule = (num_biores >= 2) - def is_identical(self, other, verbose=False): """ Test whether two molecules are "identical" @@ -652,10 +479,11 @@ def is_identical(self, other, verbose=False): return True + residues = utils.Alias('chains.residues') @property def num_residues(self): - return len(self.residues) + return len(self.chains.residues) nresidues = numresidues = num_residues @property @@ -807,7 +635,7 @@ def run(self, run_for): moldesign.trajectory.Trajectory """ if self.integrator is None: - raise ValueError('Cannot simulate; no integrator set for %s' % self) + raise AttributeError('Cannot simulate; no integrator set for %s' % self) init_time = self.time traj = self.integrator.run(run_for) @@ -831,7 +659,7 @@ def calculate(self, requests=None, wait=True, use_cache=True): MolecularProperties """ if self.energy_model is None: - raise ValueError('Cannot calculate properties; no energy model set for %s' % self) + raise AttributeError('Cannot calculate properties; no energy model set for %s' % self) if requests is None: requests = [] @@ -884,12 +712,12 @@ def set_energy_model(self, model, **params): model.mol = self model.params.update(params) - if 'charge' in model.params: - if model.params.charge is None: - model.params.charge = self.charge - elif model.params.charge != self.charge: - print("Warning: molecular charge (%d) does not match energy model's charge (%d)" % ( - self.charge, model.params.charge)) + if 'charge' not in model.params: + if self.charge != 0: + model.params.charge = self.charge + elif model.params['charge'] != self.charge: + print("Warning: molecular charge (%s) does not match energy model's charge (%s)" % ( + self.charge.value_in(u.q_e), model.params.charge.value_in(u.q_e))) model._prepped = False def set_integrator(self, integrator, **params): @@ -931,7 +759,7 @@ def minimize(self, assert_converged=False, **kwargs): moldesign.trajectory.Trajectory """ if self.energy_model is None: - raise ValueError('Cannot minimize molecule; no energy model set for %s' % self) + raise AttributeError('Cannot minimize molecule; no energy model set for %s' % self) try: trajectory = self.energy_model.minimize(**kwargs) @@ -955,6 +783,10 @@ def _reset_methods(self): if self.integrator is not None: self.integrator._prepped = False + def _topology_changed(self): + self._reset_methods() + self.ff = None + @toplevel class Molecule(AtomGroup, @@ -985,10 +817,6 @@ class Molecule(AtomGroup, leaving the original molecule untouched. name (str): name of the molecule (automatically generated if not provided) - bond_graph (dict): dictionary specifying bonds between the atoms - of the form - ``{atom1:{atom2:bond_order, atom3:bond_order}, atom2:...}`` - This structure must be symmetric; we require - ``bond_graph[atom1][atom2] == bond_graph[atom2][atom1]`` copy_atoms (bool): Create the molecule with *copies* of the passed atoms (they will be copied automatically if they already belong to another molecule) pdbname (str): Name of the PDB file @@ -1010,7 +838,7 @@ class Molecule(AtomGroup, Attributes: atoms (AtomList): List of all atoms in this molecule. - bond_graph (dict): symmetric dictionary specifying bonds between the + bond_graph (mdt.molecules.BondGraph): symmetric dictionary specifying bonds between the atoms: ``bond_graph = {atom1:{atom2:bond_order, atom3:bond_order}, atom2:...}`` @@ -1057,24 +885,27 @@ class Molecule(AtomGroup, _PERSIST_REFERENCES = True # relevant for `pyccc` RPC calls def __init__(self, atomcontainer, - name=None, bond_graph=None, + name=None, copy_atoms=False, pdbname=None, charge=None, metadata=None): super().__init__() + # initial property init + self.name = 'uninitialized molecule' + self._constraints = None + self._charge = None + self._properties = None + atoms, name = self._get_initializing_atoms(atomcontainer, name, copy_atoms) if metadata is None: metadata = getattr(atomcontainer, 'metadata', utils.DotDict()) self.atoms = atoms + self.bond_graph = BondGraph(self) self.time = getattr(atomcontainer, 'time', 0.0 * u.default.time) - self.name = 'uninitialized molecule' - self._defres = None - self._defchain = None - self._constraints = None self.pdbname = pdbname self.constraints = [] self.energy_model = None @@ -1084,16 +915,13 @@ def __init__(self, atomcontainer, if charge is not None: self.charge = charge - if not hasattr(charge, 'units'): # assume fundamental charge units if not explicitly set - self.charge *= u.q_e else: self.charge = getattr(atomcontainer, 'charge', u.unitsum(atom.formal_charge for atom in self.atoms)) # Builds the internal memory structures - self.chains = Instance(molecule=self) - self.residues = [] - self._rebuild_topology(bond_graph=bond_graph) + self.chains = PrimaryStructure(self) + self._rebuild_from_atoms() if name is not None: self.name = name @@ -1171,17 +999,11 @@ def _repr_markdown_(self): if self.integrator: lines.append('**Integrator**: %s'%str(self.integrator)) - if self.num_residues > 1: - table = self.get_residue_table() - lines.append('### Residues') - lines.append(table.markdown(replace={0: ' '})+'|') # extra '|' is bug workaround (?) - - if self.is_biomolecule: - lines.extend(self._biomol_summary_markdown()) + lines.extend(self.chains._repr_markdown_()) return '\n\n'.join(lines) - def newbond(self, a1, a2, order): + def new_bond(self, a1, a2, order): """ Create a new bond Args: @@ -1212,66 +1034,53 @@ def num_bonds(self): nbonds = num_bonds - def addatom(self, newatom): + def add_atom(self, newatom): """ Add a new atom to the molecule Args: newatom (moldesign.Atom): The atom to add (it will be copied if it already belongs to a molecule) """ - self.addatoms([newatom]) + self.add_atoms([newatom]) - def addatoms(self, newatoms): + def add_atoms(self, newatoms): """Add new atoms to this molecule. - For now, we really just rebuild the entire molecule in place. + + *Copies* of the passed atoms will be added if they already belong to another molecule. Args: newatoms (List[moldesign.Atom])) """ - self._reset_methods() - - for atom in newatoms: assert atom.molecule is None - self.atoms.extend(newatoms) + owner = newatoms[0].molecule + for atom in newatoms: + if atom.molecule is not owner: + raise ValueError('Cannot add atoms from multiple sources - add them separately.') - # symmetrize bonds between the new atoms and the pre-existing molecule - bonds = self._build_bonds(self.atoms) - for newatom in newatoms: - for nbr in bonds[newatom]: - if nbr in self.bond_graph: # i.e., it's part of the original molecule - bonds[nbr][newatom] = bonds[newatom][nbr] + if owner is not None: # copy if the atoms are already owned + newatoms = mdt.AtomList(newatoms).copy() - self._rebuild_topology(bonds) + self.atoms.extend(newatoms) + self.bond_graph._add_atoms(newatoms) + self._rebuild_from_atoms() - def deletebond(self, bond): + def delete_bond(self, bond_or_atom, a2=None): """ Remove this bond from the molecule's topology Args: - Bond: bond to remove + bond_or_atom (Bond or Atom): bond to remove (or the first of two atom) + a2 (Atom): second atom in bond (if first argument was also an atom) """ - self.bond_graph[bond.a1].pop(bond.a2) - self.bond_graph[bond.a2].pop(bond.a1) - - def _force_converged(self, tolerance): - """ Return True if the forces on this molecule: - 1) Are less than tolerance in every dimension - 2) have an RMS of less than 1/3 the tolerance value - - Args: - tolerance (units.Scalar[force]): force tolerance - - Returns: - bool: True if RMSD force is less than this quantity - """ - forces = self.calc_forces() - if forces.max() > tolerance: return False - rmsd2 = forces.dot(forces) / self.ndims - if rmsd2 > tolerance * tolerance / 3.0: return False - return True + if a2 is None: # it's a bond + a1, a2 = bond_or_atom.a1, bond_or_atom.a2 + else: # they passed 2 atoms + a1 = bond_or_atom + self.bond_graph[a1].pop(a2) + self._topology_changed() def write(self, filename=None, **kwargs): """ Write this molecule to a string or file. - This is a convenience method for :ref:`moldesign.converters.write` + Calls :ref:`moldesign.converters.write` Args: filename (str): filename to write (if not passed, write to string) diff --git a/moldesign/molecules/primary_structure.py b/moldesign/molecules/primary_structure.py new file mode 100644 index 0000000..5a4a407 --- /dev/null +++ b/moldesign/molecules/primary_structure.py @@ -0,0 +1,195 @@ +from __future__ import print_function, absolute_import, division +from future.builtins import * +from future import standard_library +standard_library.install_aliases() + +# Copyright 2017 Autodesk Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import collections + +from . import MolecularHierarchy, Residue, Chain, ChildList +from .. import utils + +# TODO: fully delegate all biochemical property management and views to this object. +# TODO: atom.residue, atom.chain, mol.residues should all delegate to PrimaryStructure + + +class PrimaryStructure(MolecularHierarchy): + """ A tree-like structure that encapsulates the biochemical organization of a ``Molecule`` + + *Most* of the state related to the biochemical hierarchy is stored + here, with one major exception: each atom stores a reference to its containing residue. That + data must be consistent with the data in this object + + Args: + mol (moldesign.Molecule): molecule this object stores hierarchy for + """ + def __init__(self, mol): + super().__init__(molecule=mol, name='root') + self._defchain = Chain(name=None, + index=None, + molecule=None) + self._defres = Residue(name=None, + index=None, + pdbindex=None, + pdbname=None, + chain=self._defchain, + molecule=None) + self.residues = None + + def __str__(self): + return str(self.children) + + def __repr__(self): + return '' % str(self.children) + + def rebuild_hierarchy(self): + """ + Set up the chain/residue/atom hierarchy + + This rebuilds the primary structure tree using each object's parents. If atoms + don't have a residue, they are assigned a default residue. If residues don't have a + chain, they're assigned a default chain. + """ + self.children = ChildList(self) + + conflicts = set() + self._fix_atom_pdbnames(conflicts) + self.residues = self._rebuild_residues() + self._rebuild_chains(conflicts) + + if conflicts: + print('WARNING: %s modified due to name clashes' % (', '.join(conflicts))) + + def _fix_atom_pdbnames(self, conflicts): + """ Rename atom pdbindices to avoid name conflicts + """ + pdbatoms = [atom for atom in self.molecule.atoms if atom.pdbindex is not None] + if pdbatoms: + min_pdb_atom = min(pdbatoms, key=lambda x: x.pdbindex) + last_pdb_idx = min_pdb_atom.pdbindex-min_pdb_atom.index-1 + else: + last_pdb_idx = 0 + + for atom in self.molecule.atoms: + # assign PDB indices + if atom.pdbindex is None: + atom.pdbindex = last_pdb_idx+1 + if last_pdb_idx is not None and atom.pdbindex <= last_pdb_idx: + atom.pdbindex = last_pdb_idx+1 + conflicts.add('atom indices') + last_pdb_idx = atom.pdbindex + + def _rebuild_residues(self): + """ Recreate list of residues + """ + num_biores = 0 + + foundresidues = collections.OrderedDict() # used as an ordered set + for atom in self.molecule.atoms: + res = atom.residue if atom.residue is not None else self._defres + + if res not in foundresidues: + res.rebuild() + res.index = len(foundresidues) + foundresidues[res] = None + if res.type in ('dna', 'rna', 'protein'): + num_biores += 1 + + if res.molecule is None: + res.molecule = self.molecule + else: + assert res.molecule is self.molecule + + if atom not in res: + res.add(atom) + + self.molecule.is_biomolecule = (num_biores >= 2) + return list(foundresidues) + + def _rebuild_chains(self, conflicts): + """ Recreate list of chains, renaming as necessary to avoid conflicts + """ + for residue in self.residues: + chain = residue.chain if residue.chain is not None else self._defchain + + if chain not in self: + chain.rebuild() + + oldname = chain.name + if chain.name is None and len(self.residues) > 1: + chain.name = 'A' + while chain.name in self: + if chain.name is None: + chain.name = 'A' + chain.name = chr(ord(chain.name)+1) + + chain.index = len(self) + self.add(chain) + if chain.name != oldname: + conflicts.add('chain ids') + + if chain.molecule is None: + chain.molecule = self.molecule + else: + assert chain.molecule is self.molecule + + if residue not in chain: + chain.add(residue) + + def _repr_markdown_(self): + """A markdown description of biomolecular structure. + + Returns: + str: Markdown string""" + lines = [] + + if len(self.residues) > 1: + table = self._get_residue_table() + lines.append('### Residues') + lines.append(table.markdown(replace={0: ' '})+'|') # extra '|' is bug workaround (?) + + lines.append('### Biopolymer chains') + seqs = [] + for chain in self: + seq = chain._get_sequence(_html=True) + if not seq.strip(): # don't write anything if there's no sequence + continue + + seqs.append('**%s**: %s' % (chain.name, seq)) + lines.append('
'.join(seqs)) + + return lines + + def _get_residue_table(self): + """Creates a data table summarizing this molecule's primary structure. + + Returns: + moldesign.utils.MarkdownTable: markdown-formatted table""" + table = utils.MarkdownTable(*(['chain']+ + 'protein dna rna unknown water solvent ion'.split())) + for chain in self: + counts = {} + unk = [] + for residue in chain.residues: + cat = residue.type + if cat == 'unknown': + unk.append(residue.name) + counts[cat] = counts.get(cat, 0)+1 + counts['chain'] = '%s'%chain.name + if 0 < len(unk) <= 4: + counts['unknown'] = ','.join(unk) + table.add_line(counts) + return table + diff --git a/moldesign/molecules/residue.py b/moldesign/molecules/residue.py index 34683a3..4bcc05e 100644 --- a/moldesign/molecules/residue.py +++ b/moldesign/molecules/residue.py @@ -22,11 +22,11 @@ import moldesign as mdt from .. import utils, data -from . import BioContainer, AtomList, toplevel +from . import MolecularHierarchy, AtomList, toplevel, ChildList @toplevel -class Residue(BioContainer): +class Residue(MolecularHierarchy): """ A biomolecular residue - most often an amino acid, a nucleic base, or a solvent molecule. In PDB structures, also often refers to non-biochemical molecules. @@ -36,18 +36,7 @@ class Residue(BioContainer): parent (mdt.Molecule): the molecule this residue belongs to chain (Chain): the chain this residue belongs to """ - - def copy(self): - """ Create a copy of this residue and all topology within. - - Returns: - Residue: copy of this residue and all its atoms. This copy will NOT - reference any parent molecule - """ - newatoms = super().copy_atoms() - return newatoms[0].residue - - @utils.args_from(BioContainer) + @utils.args_from(MolecularHierarchy) def __init__(self, **kwargs): """ Initialization Args: @@ -63,6 +52,44 @@ def __init__(self, **kwargs): self._template_name = None self._name = None + def copy(self): + """ Create a copy of this residue and all topology within. + + Returns: + Residue: copy of this residue and all its atoms. This copy will NOT + reference any parent molecule + """ + newatoms = super().copy_atoms() + return newatoms[0].residue + + def _subcopy(self, memo=None): + """ Private data mangement method for copying a residue. + Returns a shallow copy intended to be deepcopied to avoid corrupting the original + data. + + See :method:`moldesign.Residue.copy` for the public interface for copying these objects. + """ + import copy + + if memo is None: + memo = {'bondgraph':{}} + if self in memo: + return + + newresidue = copy.copy(self) + newresidue.chain = None + newresidue.molecule = None + newresidue.children = ChildList(newresidue) + + memo[self] = newresidue + if self.chain is not None: + if self.chain not in memo: + newchain = copy.copy(self.chain) + newchain.molecule = None + newchain.children = ChildList(newchain) + memo[self.chain] = newchain + memo[self.chain].add(newresidue) + @property def name(self): if self._name is not None: @@ -91,7 +118,7 @@ def markdown_summary(self): if self.molecule is None: lines = ["

Residue %s

"%self.name] else: - lines = ["

Residue %s (index %d)

"%(self.name, self.index)] + lines = ["

Residue %s (index %s)

"%(self.name, self.index)] if self.type == 'protein': lines.append('**Residue codes**: %s / %s'%(self.resname, self.code)) @@ -103,7 +130,7 @@ def markdown_summary(self): lines.append('**

Chain:** %s'%self.chain.name) - lines.append('**PDB sequence #**: %d'%self.pdbindex) + lines.append('**PDB sequence #**: %s'%self.pdbindex) terminus = None if self.type == 'dna': @@ -153,16 +180,12 @@ def add(self, atom, key=None): if atom.residue is not None: assert atom.residue is self, 'Atom already assigned to a residue' atom.residue = self - if atom.chain is None: - atom.chain = self.chain - else: - assert atom.chain == self.chain, "Atom's chain does not match residue's chain" if key is not None or atom.name not in self.children: return super().add(atom, key=key) else: return super().add(atom, key='%s%s' % (atom.name, len(self))) - add.__doc__ = BioContainer.add.__doc__ + add.__doc__ = MolecularHierarchy.add.__doc__ @property def is_n_terminal(self): diff --git a/moldesign/molecules/trajectory.py b/moldesign/molecules/trajectory.py index 948d06d..f517356 100644 --- a/moldesign/molecules/trajectory.py +++ b/moldesign/molecules/trajectory.py @@ -246,7 +246,6 @@ def __init__(self, mol, unit_system=None, first_frame=False, name=None): self.unit_system = utils.if_not_none(unit_system, mdt.units.default) self.properties = utils.DotDict() self._reset() - self._tempmol.dynamic_dof = self.mol.dynamic_dof self.name = utils.if_not_none(name, 'untitled') if first_frame: self.new_frame() diff --git a/moldesign/tools/topology.py b/moldesign/tools/topology.py index 8b1bdea..d023821 100644 --- a/moldesign/tools/topology.py +++ b/moldesign/tools/topology.py @@ -187,8 +187,6 @@ def bonded(r1, r2): def addto(chain, res): res.chain = None chain.add(res) - for atom in res: - atom.chain = chain allchains = [mdt.Chain(tempmol.chains[0].name)] for chain in tempmol.chains: diff --git a/moldesign/utils/classes.py b/moldesign/utils/classes.py index 498a7dd..8b228a9 100644 --- a/moldesign/utils/classes.py +++ b/moldesign/utils/classes.py @@ -100,6 +100,25 @@ def __repr__(self): __str__ = __repr__ +class NewUserDict(collections.MutableMapping): + """ Reimplementation of UserDict with new-style classes but without subclassing dict. + + In addition to the normal dict methods, the only addition here is the ``data`` attribute + that gives us an actual ``dict`` to store things in + + Unlike dict, pickles easily. + Unlike UserDict, uses new-style classes. + """ + def __init__(self, *args, **kwargs): + self.data = dict(*args, **kwargs) + + __delitem__ = Alias('data.__delitem__') + __setitem__ = Alias('data.__setitem__') + __getitem__ = Alias('data.__getitem__') + __len__ = Alias('data.__len__') + __iter__ = Alias('data.__iter__') + + class DotDict(object): """ An attribute-accessible dictionary that preserves insertion order """