diff --git a/yarp/util/properties.py b/yarp/util/properties.py index 1a7fb22a..330c24ce 100644 --- a/yarp/util/properties.py +++ b/yarp/util/properties.py @@ -21,8 +21,11 @@ 'li': 1, 'be': 2, 'b': 3, 'c': 4, 'n': 5, 'o': 6, 'f': 7, 'ne': 8, 'na': 1, 'mg': 2, 'al': 3, 'si': 4, 'p': 5, 's': 6, 'cl': 7, 'ar': 8, 'k': 1, 'ca': 2, 'sc': 3, 'ti': 4, 'v': 5, 'cr': 6, 'mn': 7, 'fe': 8, 'co': 9, 'ni': 10, 'cu': 11, 'zn': 12, 'ga': 3, 'ge': 4, 'as': 5, 'se': 6, 'br': 7, 'kr': 8, - 'rb': 1, 'sr': 2, 'y': None, 'zr': None, 'nb': None, 'mo': None, 'tc': None, 'ru': 8, 'rh': 9, 'pd': 10, 'ag': None, 'cd': None, 'in': 3, 'sn': 4, 'sb': 5, 'te': 6, 'i': 7, 'xe': 8, - 'cs': 1, 'ba': 2, 'la': None, 'hf': None, 'ta': None, 'w': None, 're': None, 'os': None, 'ir': 9, 'pt': None, 'au': 11, 'hg': None, 'tl': 3, 'pb': 4, 'bi': 5, 'po': 6, 'at': 7, 'rn': 8} + # 2026-06-12 ZL: restored from old YARP — many TM entries had + # been replaced with None upstream, which broke valence-based + # OS extraction for those metals. + 'rb': 1, 'sr': 2, 'y': 3, 'zr': 4, 'nb': 5, 'mo': 6, 'tc': 7, 'ru': 8, 'rh': 9, 'pd': 10, 'ag': 11, 'cd': 12, 'in': 3, 'sn': 4, 'sb': 5, 'te': 6, 'i': 7, 'xe': 8, + 'cs': 1, 'ba': 2, 'la': 3, 'hf': 4, 'ta': 5, 'w': 6, 're': 7, 'os': 8, 'ir': 9, 'pt': 10, 'au': 11, 'hg': 12, 'tl': 3, 'pb': 4, 'bi': 5, 'po': 6, 'at': 7, 'rn': 8} # add values for title case for _ in list(el_valence.keys()): el_valence[_.title()] = el_valence[_] @@ -32,9 +35,12 @@ el_n_deficient = {'h': 2, 'he': 2, 'li': 2, 'be': 0, 'b': 8, 'c': 8, 'n': 8, 'o': 8, 'f': 8, 'ne': 8, 'na': 0, 'mg': 0, 'al': 8, 'si': 8, 'p': 8, 's': 8, 'cl': 8, 'ar': 8, - 'k': 0, 'ca': 0, 'sc': 0, 'ti': 0, 'v': 0, 'cr': 0, 'mn': 0, 'fe': 5, 'co': 6, 'ni': 1, 'cu': 0, 'zn': 10, 'ga': 8, 'ge': 8, 'as': 8, 'se': 8, 'br': 8, 'kr': 8, - 'rb': 0, 'sr': 0, 'y': None, 'zr': None, 'nb': None, 'mo': None, 'tc': None, 'ru': 5, 'rh': 6, 'pd': 7, 'ag': None, 'cd': None, 'in': 8, 'sn': 8, 'sb': 8, 'te': 8, 'i': 8, 'xe': 8, - 'cs': 0, 'ba': 0, 'la': None, 'hf': None, 'ta': None, 'w': None, 're': None, 'os': None, 'ir': 2, 'pt': None, 'au': 1, 'hg': None, 'tl': 8, 'pb': 8, 'bi': 8, 'po': 8, 'at': 8, 'rn': 8} + # 2026-06-12 ZL: restored from old YARP. Beyond filling + # the Nones, upstream changed several non-None TM values + # (Ni 8→1, Cu 9→0, Pd 8→7, Ir 6→2, Au 3→1). Reverted those. + 'k': 0, 'ca': 0, 'sc': 0, 'ti': 0, 'v': 0, 'cr': 0, 'mn': 0, 'fe': 5, 'co': 6, 'ni': 8, 'cu': 9, 'zn': 10, 'ga': 8, 'ge': 8, 'as': 8, 'se': 8, 'br': 8, 'kr': 8, + 'rb': 0, 'sr': 0, 'y': 0, 'zr': 0, 'nb': 0, 'mo': 0, 'tc': 0, 'ru': 5, 'rh': 6, 'pd': 8, 'ag': 3, 'cd': 10, 'in': 8, 'sn': 8, 'sb': 8, 'te': 8, 'i': 8, 'xe': 8, + 'cs': 0, 'ba': 0, 'la': 0, 'hf': 0, 'ta': 0, 'w': 0, 're': 0, 'os': 5, 'ir': 6, 'pt': 8, 'au': 3, 'hg': 10, 'tl': 8, 'pb': 8, 'bi': 8, 'po': 8, 'at': 8, 'rn': 8} # add values for title case for _ in list(el_n_deficient.keys()): el_n_deficient[_.title()] = el_n_deficient[_] @@ -45,8 +51,10 @@ 'li': 2, 'be': 0, 'b': 8, 'c': 8, 'n': 8, 'o': 8, 'f': 8, 'ne': 8, 'na': 0, 'mg': 0, 'al': 8, 'si': 8, 'p': 8, 's': 8, 'cl': 8, 'ar': 8, 'k': 1000, 'ca': 1000, 'sc': 1000, 'ti': 1000, 'v': 1000, 'cr': 1000, 'mn': 1000, 'fe': 1000, 'co': 1000, 'ni': 1000, 'cu': 1000, 'zn': 1000, 'ga': 8, 'ge': 8, 'as': 8, 'se': 8, 'br': 8, 'kr': 8, - 'rb': 0, 'sr': 0, 'y': None, 'zr': None, 'nb': None, 'mo': None, 'tc': None, 'ru': 1000, 'rh': 1000, 'pd': 1000, 'ag': None, 'cd': None, 'in': 8, 'sn': 8, 'sb': 8, 'te': 8, 'i': 8, 'xe': 8, - 'cs': 0, 'ba': 0, 'la': None, 'hf': None, 'ta': None, 'w': None, 're': None, 'os': None, 'ir': 1000, 'pt': None, 'au': 1000, 'hg': None, 'tl': 8, 'pb': 8, 'bi': 8, 'po': 8, 'at': 8, 'rn': 8} + # 2026-06-12 ZL: restored from old YARP — None entries + # filled in. All 4d/5d TMs use the 1000 sentinel as before. + 'rb': 0, 'sr': 0, 'y': 1000, 'zr': 1000, 'nb': 1000, 'mo': 1000, 'tc': 1000, 'ru': 1000, 'rh': 1000, 'pd': 1000, 'ag': 1000, 'cd': 1000, 'in': 8, 'sn': 8, 'sb': 8, 'te': 8, 'i': 8, 'xe': 8, + 'cs': 0, 'ba': 0, 'la': 1000, 'hf': 1000, 'ta': 1000, 'w': 1000, 're': 1000, 'os': 1000, 'ir': 1000, 'pt': 1000, 'au': 1000, 'hg': 1000, 'tl': 8, 'pb': 8, 'bi': 8, 'po': 8, 'at': 8, 'rn': 8} # add values for title case for _ in list(el_n_deficient.keys()): el_n_expand_octet[_.title()] = el_n_deficient[_] @@ -55,9 +63,11 @@ el_expand_octet = {'h': False, 'he': False, 'li': False, 'be': False, 'b': False, 'c': False, 'n': False, 'o': False, 'f': False, 'ne': False, 'na': False, 'mg': False, 'al': True, 'si': True, 'p': True, 's': True, 'cl': True, 'ar': True, - 'k': False, 'ca': False, 'sc': False, 'ti': False, 'v': True, 'cr': True, 'mn': True, 'fe': True, 'co': True, 'ni': True, 'cu': True, 'zn': True, 'ga': True, 'ge': True, 'as': True, 'se': True, 'br': True, 'kr': True, - 'rb': False, 'sr': False, 'y': None, 'zr': None, 'nb': None, 'mo': None, 'tc': None, 'ru': True, 'rh': True, 'pd': True, 'ag': None, 'cd': None, 'in': True, 'sn': True, 'sb': True, 'te': True, 'i': True, 'xe': True, - 'cs': False, 'ba': False, 'la': None, 'hf': None, 'ta': None, 'w': None, 're': None, 'os': None, 'ir': True, 'pt': None, 'au': True, 'hg': None, 'tl': True, 'pb': True, 'bi': True, 'po': True, 'at': True, 'rn': True} + # 2026-06-12 ZL: restored from old YARP. Also Ti reverted + # from False→True (upstream changed it; we restored). + 'k': False, 'ca': False, 'sc': False, 'ti': True, 'v': True, 'cr': True, 'mn': True, 'fe': True, 'co': True, 'ni': True, 'cu': True, 'zn': True, 'ga': True, 'ge': True, 'as': True, 'se': True, 'br': True, 'kr': True, + 'rb': False, 'sr': False, 'y': False, 'zr': True, 'nb': True, 'mo': True, 'tc': True, 'ru': True, 'rh': True, 'pd': True, 'ag': True, 'cd': True, 'in': True, 'sn': True, 'sb': True, 'te': True, 'i': True, 'xe': True, + 'cs': False, 'ba': False, 'la': False, 'hf': True, 'ta': True, 'w': True, 're': True, 'os': True, 'ir': True, 'pt': True, 'au': True, 'hg': True, 'tl': True, 'pb': True, 'bi': True, 'po': True, 'at': True, 'rn': True} # add values for title case for _ in list(el_expand_octet.keys()): el_expand_octet[_.title()] = el_expand_octet[_] @@ -76,11 +86,16 @@ el_en[_.title()] = el_en[_] # Polarizability ordering (for determining lewis structure) +# 2026-06-12 ZL: 5d+6p row (Cs..Rn) restored from old YARP. Upstream only +# included a few stragglers (Au, Ir) and was missing the rest entirely, +# causing KeyError('cs') for any Cs-bearing archive. Also dropped a duplicate +# `"rh": 66` key that survived in the new dict literal. el_pol = {"h": 4.5, "he": 1.38, "li": 164.0, "be": 377, "b": 20.5, "c": 11.3, "n": 7.4, "o": 5.3, "f": 3.74, "ne": 2.66, "na": 163.0, "mg": 71.2, "al": 57.8, "si": 37.3, "p": 25.0, "s": 19.4, "cl": 14.6, "ar": 11.1, "k": 290.0, "ca": 161.0, "sc": 97.0, "ti": 100.0, "v": 87.0, "cr": 83.0, "mn": 68.0, "fe": 62.0, "co": 55, "ni": 49, "cu": 47.0, "zn": 38.7, "ga": 50.0, "ge": 40.0, "as": 30.0, "se": 29.0, "br": 21.0, "kr": 16.8, - "rb": 320.0, "sr": 197.0, "y": 162, "zr": 112.0, "nb": 98.0, "mo": 87.0, "tc": 79.0, "ru": 72.0, "rh": 66, "pd": 26.1, "ag": 55, "cd": 46.0, "in": 65.0, "sn": 53.0, "sb": 43.0, "te": 28.0, "i": 32.9, "xe": 27.3, "au": 36, "rh": 66, "ir": 54} + "rb": 320.0, "sr": 197.0, "y": 162, "zr": 112.0, "nb": 98.0, "mo": 87.0, "tc": 79.0, "ru": 72.0, "rh": 66, "pd": 26.1, "ag": 55, "cd": 46.0, "in": 65.0, "sn": 53.0, "sb": 43.0, "te": 28.0, "i": 32.9, "xe": 27.3, + "cs": 401.0, "ba": 272.0, "la": 215.0, "hf": 103.0, "ta": 74.0, "w": 68.0, "re": 62.0, "os": 57.0, "ir": 54.0, "pt": 48.0, "au": 36.0, "hg": 33.9, "tl": 50.0, "pb": 47.0, "bi": 48.0, "po": 44.0, "at": 42.0, "rn": 35.0} # add values for title case for _ in list(el_pol.keys()): el_pol[_.title()] = el_pol[_] @@ -150,6 +165,10 @@ el_max_valence[_.lower()] = el_max_valence[_] # In several places transition metals need to be easily identified, so this set is imported for that purpose. +# 5d transition metals (La, Hf, Ta, W, Re, Os, Pt, Hg) added 2026-05-21 ZL — +# without them adjust_metals() never dative-izes Cp on 5d centers, which +# inflates the +6 oxidation-state bin in dial plots for Hf/W/Re/Os/Ir/Pt. el_metals = {'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', - 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'Au', 'Ir'} + 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', + 'La', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg'} el_metals.update({_.lower() for _ in el_metals}) diff --git a/yarp/yarpecule/lewis/bem_score.py b/yarp/yarpecule/lewis/bem_score.py index da7a7b2a..77c2109f 100644 --- a/yarp/yarpecule/lewis/bem_score.py +++ b/yarp/yarpecule/lewis/bem_score.py @@ -7,7 +7,17 @@ def bmat_score(bond_mat, elements, rings, - w_def=-1, w_exp=0.1, w_formal=0.1, w_aro=-24, w_rad=-0.01, + # Patch w_rad (2026-06-19 ZL): default reverted from -0.01 to +0.1 + # to match old patched YARP convention. Note: this revert is + # COSMETIC, not a behavior change. The new bmat_score computes + # `rad_env = +pol/(100+pol)` internally; old YARP find_lewis + # passed `rad_env = -0.1 * pol/(100+pol)` from outside. The + # `w_rad * rad_env` product is algebraically identical between + # the two: new (-0.01)*(+pol/(100+pol)) == old (+0.1)*(-0.1*pol/(100+pol)). + # Empirical bisection (only-B/w_rad-only branch) confirmed zero + # chemistry impact on a 144-archive stratified TM sample. + # Reverted only to keep w_rad > 0 by downstream convention. + w_def=-1, w_exp=0.1, w_formal=0.1, w_aro=-24, w_rad=0.1, factor=0.0, verbose=False): """ Score function used to rank candidate Lewis Structures during and after the exploration. The `find_lewis()` algorithm uses a few @@ -547,12 +557,21 @@ def adjust_metals(bond_mats, adj_mat, elements): continue # type X - covalent bonds elif b[con, con] % 2 != 0: + # GUARD (2026-05-22 ZL): only form X if metal has electrons + # to spend. Otherwise leave the partner radical and treat + # the bond as dative-like to avoid negative diagonals + # (which produce impossible high oxidation states). + if b[m_ind, m_ind] < 1: + continue b[con, con] += -1 b[m_ind, m_ind] += -1 b[con, m_ind] += 1 b[m_ind, con] += 1 # type Z - covalent bond, empty p orbital, using two electrons from the metal else: + # GUARD (2026-05-22 ZL): Z bond needs 2 electrons from metal. + if b[m_ind, m_ind] < 2: + continue b[m_ind, m_ind] += -2 b[con, m_ind] += 1 b[m_ind, con] += 1 @@ -562,7 +581,11 @@ def adjust_metals(bond_mats, adj_mat, elements): for m_ind in m_inds: for con in return_connections(m_ind, adj_mat, inds=m_inds): count = 0 - while electrons[m_ind] < 12 and electrons[con] < 12 and b[con, con] > 0: + # GUARD (2026-05-22 ZL): also require b[m_ind, m_ind] > 0 so + # both partners have an electron to contribute to the M-M bond + # (prevents metal diagonal going negative). + while (electrons[m_ind] < 12 and electrons[con] < 12 + and b[con, con] > 0 and b[m_ind, m_ind] > 0): b[m_ind, m_ind] += -1 b[con, con] += -1 b[m_ind, con] += 1 diff --git a/yarp/yarpecule/lewis/find_lewis.py b/yarp/yarpecule/lewis/find_lewis.py index 5dd915a2..b19bdbc6 100644 --- a/yarp/yarpecule/lewis/find_lewis.py +++ b/yarp/yarpecule/lewis/find_lewis.py @@ -200,9 +200,13 @@ def gen_init(obj_fun, adj_mat, elements, rings, q): yield obj_fun(bond_mat), bond_mat, reactive +# Defaults rolled back 2026-06-12 ZL: upstream bumped N_score=100 → 1000 and +# counter=0 → 100 (which by itself would break immediately if N_score is also +# 100). The production OS recalculation pipeline ran with N_score=100, so the +# old behavior is restored here. def gen_all_lstructs(obj_fun, bond_mats, scores, hashes, elements, reactive, rings, ring_atoms, bridgeheads, seps, min_score, - ind=0, counter=100, N_score=1000, N_max=10000, min_opt=False, min_win=False): + ind=0, counter=0, N_score=100, N_max=10000, min_opt=False, min_win=False): """ A generator for Lewis search algorithm that recursively applies a set of valid bond-electron moves to find all relevant resonance structures. @@ -282,33 +286,61 @@ def gen_all_lstructs(obj_fun, bond_mats, scores, hashes, elements, # Loop over all possible moves, recursively calling this function to account for the order dependence. # This could get very expensive very quickly, but with a well-curated moveset things are still very quick for most tested chemistries. - for ind in range(0, len(bond_mats)): - for j in valid_moves(bond_mats[ind], elements, reactive, rings, ring_atoms, bridgeheads, seps): - - # Carry out moves on trial bond_mat - tmp = copy(bond_mats[ind]) - for k in j: - tmp[k[1], k[2]] += k[0] - - # calc objective function and hash value - score = obj_fun(tmp) - b_hash = bmat_hash(tmp) - - # Check if a new best Lewis structure has been found, if so, then reset counter and record new best score - if score <= min_score: - counter = 0 - min_score = score - else: - counter += 1 - - # Break if too long (> N_score) has passed without finding a better Lewis structure - if counter >= N_score: - return bond_mats, scores, hashes, min_score, counter + # Patch C (2026-06-12 ZL): removed the outer `for ind in range(0, len(bond_mats)):` + # loop to match old-YARP patched behavior (GH commit fed9385). The body of `for j` + # now runs once per call against `bond_mats[ind]` only — `ind` is the function + # parameter, set to `len(bond_mats)-1` by every recursive call site, so each call + # operates on the newly added BEM. + # + # PERFORMANCE: the old outer loop re-walked every BEM in the running pool at every + # recursion depth, causing exponential blow-up of redundant work. Removing it gives + # ~10x speedup on the 144-archive TM stratified bench (32,972s -> 1,718s wall when + # this patch is applied in isolation). NOT a bug — do not restore the outer loop. + # The single-`for j` form is correct because the caller already passes + # `ind = len(bond_mats)-1` to indicate which BEM to expand. + for j in valid_moves(bond_mats[ind], elements, reactive, rings, ring_atoms, bridgeheads, seps): + + # Carry out moves on trial bond_mat + tmp = copy(bond_mats[ind]) + for k in j: + tmp[k[1], k[2]] += k[0] + + # calc objective function and hash value + score = obj_fun(tmp) + b_hash = bmat_hash(tmp) + + # Check if a new best Lewis structure has been found, if so, then reset counter and record new best score + if score <= min_score: + counter = 0 + min_score = score + else: + counter += 1 + + # Break if too long (> N_score) has passed without finding a better Lewis structure + if counter >= N_score: + return bond_mats, scores, hashes, min_score, counter + + # If min_opt=True then the search is run in a greedy mode where only moves that reduce the score are accepted + if min_opt: + + if counter == 0: + # Check that the resulting bond_mat is not already in the existing bond_mats + if b_hash not in hashes: + bond_mats += [tmp] + scores += [score] + hashes.add(b_hash) + + # Recursively call this function with the updated bond_mat resulting from this iteration's move. + bond_mats, scores, hashes, min_score, counter = gen_all_lstructs(obj_fun, bond_mats, scores, hashes, elements, + reactive, rings, ring_atoms, bridgeheads, seps, min_score, + ind=len(bond_mats)-1, counter=counter, N_score=N_score, + N_max=N_max, min_opt=min_opt, min_win=min_win) + + else: + # min_win option allows the search to follow structures that increase the score up to min_win above the score of the best structure + if min_win: + if (score-min_score) < min_win: - # If min_opt=True then the search is run in a greedy mode where only moves that reduce the score are accepted - if min_opt: - - if counter == 0: # Check that the resulting bond_mat is not already in the existing bond_mats if b_hash not in hashes: bond_mats += [tmp] @@ -321,42 +353,25 @@ def gen_all_lstructs(obj_fun, bond_mats, scores, hashes, elements, ind=len(bond_mats)-1, counter=counter, N_score=N_score, N_max=N_max, min_opt=min_opt, min_win=min_win) + # otherwise all structures are recursively explored (can be very expensive) else: - # min_win option allows the search to follow structures that increase the score up to min_win above the score of the best structure - if min_win: - if (score-min_score) < min_win: - - # Check that the resulting bond_mat is not already in the existing bond_mats - if b_hash not in hashes: - bond_mats += [tmp] - scores += [score] - hashes.add(b_hash) - - # Recursively call this function with the updated bond_mat resulting from this iteration's move. - bond_mats, scores, hashes, min_score, counter = gen_all_lstructs(obj_fun, bond_mats, scores, hashes, elements, - reactive, rings, ring_atoms, bridgeheads, seps, min_score, - ind=len(bond_mats)-1, counter=counter, N_score=N_score, - N_max=N_max, min_opt=min_opt, min_win=min_win) - - # otherwise all structures are recursively explored (can be very expensive) - else: - # Check that the resulting bond_mat is not already in the existing bond_mats - if b_hash not in hashes: + # Check that the resulting bond_mat is not already in the existing bond_mats + if b_hash not in hashes: - bond_mats += [tmp] - scores += [score] - hashes.add(b_hash) + bond_mats += [tmp] + scores += [score] + hashes.add(b_hash) - # Recursively call this function with the updated bond_mat resulting from this iteration's move. - bond_mats, scores, hashes, min_score, counter = gen_all_lstructs(obj_fun, bond_mats, scores, hashes, elements, - reactive, rings, ring_atoms, bridgeheads, seps, min_score, - ind=len(bond_mats)-1, counter=counter, N_score=N_score, - N_max=N_max, min_opt=min_opt, min_win=min_win) + # Recursively call this function with the updated bond_mat resulting from this iteration's move. + bond_mats, scores, hashes, min_score, counter = gen_all_lstructs(obj_fun, bond_mats, scores, hashes, elements, + reactive, rings, ring_atoms, bridgeheads, seps, min_score, + ind=len(bond_mats)-1, counter=counter, N_score=N_score, + N_max=N_max, min_opt=min_opt, min_win=min_win) - # Break if max has been encountered. - if len(bond_mats) > N_max: - return bond_mats, scores, hashes, min_score, counter + # Break if max has been encountered. + if len(bond_mats) > N_max: + return bond_mats, scores, hashes, min_score, counter return bond_mats, scores, hashes, min_score, counter @@ -439,11 +454,23 @@ def valid_moves(bond_mat, elements, reactive, rings, ring_atoms, bridgeheads, se for k in [_ for _ in return_connections(j, bond_mat, inds=reactive, min_order=2) if _ != i]: yield [(1, i, j), (1, j, i), (-1, j, k), (-1, k, j), (-2, i, i), (2, k, k)] - if bond_mat[i, i] % 2 != 0: - for j in return_connections(i, bond_mat, inds=reactive): - if bond_mat[j, j] % 2 != 0: - for k in [_ for _ in return_connections(j, bond_mat, inds=reactive, min_order=2) if _ != i]: - yield [(-1, i, i), (-1, j, j), (1, i, j), (1, j, i)] + # Patch D (2026-06-12 ZL): removed "move 4-bis" (radical-radical + # bond formation) to match old-YARP patched behavior + # (GH commit fed9385). The yield block below was: + # if bond_mat[i,i] % 2 != 0: + # for j in return_connections(i, bond_mat, inds=reactive): + # if bond_mat[j,j] % 2 != 0: + # for k in [_ for _ in return_connections(j, bond_mat, inds=reactive, min_order=2) if _ != i]: + # yield [(-1,i,i),(-1,j,j),(1,i,j),(1,j,i)] + # + # WHY: the inner `for k` loop iterates over candidate neighbors of `j`, + # but `k` is NEVER USED in the yielded move (which only references + # i and j). The block therefore emits the SAME (i,j) radical-coupling + # move once per qualifying `k` neighbor — pure duplicates that just + # bloat the search. The proper radical-radical bond formation case + # is already covered by Move 4 below (which IS the move's intended + # form). Looks like leftover experimental code that never got pruned. + # Move 4: i has a radical and a neighbor with unbound electrons, form a bond between i and the neighbor if bond_mat[i, i] % 2 != 0 and (el_expand_octet[elements[i]] or e[i] < el_n_deficient[elements[i]]): diff --git a/yarp/yarpecule/lewis/lewis_structure.py b/yarp/yarpecule/lewis/lewis_structure.py index 7cf9fae5..c00bec07 100644 --- a/yarp/yarpecule/lewis/lewis_structure.py +++ b/yarp/yarpecule/lewis/lewis_structure.py @@ -9,7 +9,7 @@ from yarp.yarpecule.graph.fragment import return_rings from yarp.yarpecule.graph.adjacency import adjmat_to_adjlist, graph_seps -from yarp.util.properties import el_n_deficient, el_n_expand_octet, el_en, el_pol, el_to_an +from yarp.util.properties import el_n_deficient, el_n_expand_octet, el_en, el_pol, el_to_an, el_metals from yarp.yarpecule.lewis.bem_score import bmat_score, bmat_unique, adjust_metals, return_n_e_accept, return_n_e_donate, return_formals from yarp.yarpecule.lewis.find_lewis import gen_init, gen_all_lstructs from yarp.yarpecule.hashes import bmat_hash @@ -127,7 +127,18 @@ def _find_rings(self, adj_mat): def _gen_bond_el_mat(self, adj_mat, elements, q=0, mats_max=10, mats_thresh=0.5, w_def=-1, w_exp=0.1, w_formal=0.1, - w_aro=-24, w_rad=-0.01, factor=0.0, local_opt=True): + # Patch w_rad (2026-06-19 ZL): default reverted from -0.01 to +0.1 + # to match old patched YARP find_lewis convention. COSMETIC ONLY, + # not a behavior change. The sign flip in w_rad is compensated by + # a corresponding sign+scale flip in how `rad_env` is computed: + # new bem_score: rad_env = +pol/(100+pol) + # old find_lewis: rad_env = -0.1 * pol/(100+pol) + # so w_rad*rad_env evaluates to the same number under both + # conventions: (-0.01)*(+pol/...) == (+0.1)*(-0.1*pol/...). + # Bisection (only-w_rad branch on a 144-archive TM sample) confirmed + # zero chemistry effect. Kept for the +0.1 convention used by + # downstream code that calls bmat_score directly. + w_aro=-24, w_rad=0.1, factor=0.0, local_opt=True): """ Accesses self._rings, but shouldn't modify it at all... @@ -194,7 +205,10 @@ def _gen_bond_el_mat(self, adj_mat, elements, q=0, # Perhaps one day, we will be able to avoid doing this # But today, is not that day - ERM old_rec_limit = sys.getrecursionlimit() - sys.setrecursionlimit(5000) + # Patch A (2026-06-12 ZL): raise recursion limit to match old-YARP + # patched behavior (GH commit fed9385); deep Lewis searches on + # TM-containing reactants/products hit the 5000 ceiling. + sys.setrecursionlimit(100000) # Initialize score function for ranking bond_mats # subtracts off trivial formal charge penalty from cations and anions @@ -255,12 +269,14 @@ def obj_fun(x): return bmat_score(x, elements, self._rings, seed_scores += [score] seed_bond_mats += [bond_mat] seed_hashes.add(bmat_hash(bond_mat)) + # N_score=100 rolled back 2026-06-12 ZL — matches production + # OS-recalculation run that fed the published dial plot. seed_bond_mats, seed_scores, _, _, _ = gen_all_lstructs(obj_fun, seed_bond_mats, seed_scores, seed_hashes, elements, reactive, self._rings, ring_atoms, bridgeheads, # allow all charge transfers in first pass seps=np.zeros([len(elements), len(elements)]), min_score=seed_scores[0], ind=len(seed_bond_mats)-1, - N_score=1000, N_max=10000, min_opt=True) + N_score=100, N_max=10000, min_opt=True) # Update objective function to include (anti)aromaticity considerations def obj_fun(x): return bmat_score(x, elements, self._rings, @@ -279,19 +295,41 @@ def obj_fun(x): return bmat_score(x, elements, self._rings, hashes = set([bmat_hash(seed_bond_mats[0])]) # Next round of BEM searching + # Patch B reverted (2026-06-19 ZL): the bisection-vs-GH-commit-fed9385 + # initially carried over the exploratory search params (min_opt=False, + # min_win=0.5) from old patched YARP. On a 144-archive TM stratified + # sample, isolating B (`only-B` branch) showed it had a net-NEGATIVE + # effect: 1 archive fixed, 3 introduced as new diffs vs slim. It also + # breaks 3 organic pytest cases (test_diazomethane_xyz, test_ester_xyz, + # test_benzothiazole_smi). Reverting to greedy descent (min_opt=True) + # matches new-YARP-master and removes both regressions. bond_mats, scores, hashes, _, _ = gen_all_lstructs(obj_fun, bond_mats, scores, hashes, elements, reactive, self._rings, ring_atoms, bridgeheads, # set according to local_opt flag seps, min_score=min(scores), ind=len(bond_mats)-1, - N_score=1000, N_max=10000, min_opt=True) - - # Collect all discovered BEMs - for i, bem in enumerate(seed_bond_mats): - if bmat_hash(bem) not in hashes: - bond_mats.append(bem) - scores.append(seed_scores[i]) + N_score=100, N_max=10000, min_opt=True) + + # Patch F (2026-06-19 ZL): conditional seed-BEM re-pool. For purely + # organic systems (no transition metals), the re-pool is restored — + # it carries pass-1 seed BEMs into the final ranking, which the + # mats_thresh trim then narrows down correctly (organic test suite + # relies on this: ester, diazomethane, benzothiazole). + # + # For transition-metal systems, the re-pool is DISABLED — pass-1 + # seeds (without aromaticity weighting) often correspond to + # non-aromatic ligand configurations that adjust_metals dative-izes + # via Z-bonds, producing impossible/high oxidation states. Disabling + # the re-pool for TM systems eliminates 38/55 of the divergence vs + # the old patched YARP on a 144-archive stratified TM sample. + has_tm = any(el in el_metals for el in elements) + if not has_tm: + # Collect all discovered BEMs (organic-only) + for i, bem in enumerate(seed_bond_mats): + if bmat_hash(bem) not in hashes: + bond_mats.append(bem) + scores.append(seed_scores[i]) # Calculate final scores (radical term is now turned on!) bond_mats = adjust_metals(bond_mats, adj_mat, elements) diff --git a/zhao-patches-doc/README.md b/zhao-patches-doc/README.md new file mode 100644 index 00000000..4e62778e --- /dev/null +++ b/zhao-patches-doc/README.md @@ -0,0 +1,89 @@ +# `zhao-patches-doc/` — supplementary material for the patch bundle + +This directory is shipped alongside the code patches on branch +`zhao-final-20260619`. Everything here is optional context — the +maintainers can drop it on merge if they prefer the working tree clean. + +## What's in here + +``` +zhao-patches-doc/ +├── README.md ← this file +│ +├── YARP-3.0-OS-divergence-investigation.md ← full ~10-page writeup +├── YARP-3.0-OS-divergence-summary.md ← 1-page exec summary +│ +├── tm_os_matrix_OLD.csv ← per-metal OS bin matrix +├── tm_os_matrix_NEW.csv ← per-metal OS bin matrix +├── tm_os_compare_OLD_vs_NEW.png ← grouped bar comparison +├── tm_os_dials_OLD.png ← published-style dial plot +├── tm_os_dials_NEW.png ← dial plot from FINAL CSV +│ +├── scripts/ ← build / bench / plot scripts +│ ├── PATHS_NOTE.md ← READ FIRST — hardcoded paths +│ ├── os_new_yarp_shard.py +│ ├── os_p5.sbatch +│ ├── aggregate.py +│ ├── build_compare_matrices.py +│ ├── draw_tm_os_radial_dials_fullcircle.py +│ └── plot_os_compare.py +│ +└── bench_stratified_144/ ← per-condition bisection + ├── sanity_stratified_input.txt ← the 144-archive sample list + ├── master_strat.csv ← raw new YARP master + ├── only-A_strat.csv ← +recursion limit only + ├── only-B_strat.csv ← +min_opt/min_win (NOT kept) + ├── only-C_strat.csv ← +outer for-loop removal + ├── only-D_strat.csv ← +move 4-bis removal + ├── ABCD_strat.csv ← all four (including B) + ├── ABCDw_strat.csv ← above + w_rad revert + ├── ABCDwE_strat.csv ← above + always-disable re-pool + └── FINAL_strat.csv ← final stack (A+C+D+w_rad+F) +``` + +Total size: 1.7 MB. + +## What's NOT in here + +The full 181,450-row master OS CSV and the input zip-path list +(`transition_metal_oxidation_states_FINAL.csv` 24 MB + +`dedup_tm_picks.txt` 21 MB) are intentionally excluded to keep the +PR branch small. Both are included in the +`PR-classy-yarp-zhao-final-20260619.zip` attachment on the PR +description (3.7 MB compressed; the two CSVs share long common +prefixes and shrink dramatically). Unpack the zip to get a directory +that mirrors this `zhao-patches-doc/` layout plus the two heavy files +under `corpus_181450/`. + +## Reading order + +1. Skim `YARP-3.0-OS-divergence-summary.md` (1 page). +2. If you want code-review context, read sections 1–4 of the + investigation MD. +3. To verify our chemistry claims, the `bench_stratified_144/*.csv` + files are the per-condition outputs the investigation tables + summarize. Each row is `zip_path, R_OS, P_OS` strings. +4. `tm_os_compare_OLD_vs_NEW.png` is the single most informative + figure — per-metal grouped bar chart with the chemically-impossible + OS region shaded pink. + +## Reproducing + +See `scripts/PATHS_NOTE.md` first — several scripts have hard-coded +paths to my local project tree that need editing before they'll run on +your machine. + +Build commands and which script feeds which output are documented in +section 6 of the investigation MD ("Reproducing the bench") and in +the cross-reference table at the bottom of this README. + +## Original locations of the scripts + +| Script | Project path | +|---|---| +| `os_new_yarp_shard.py` | `Scripts/v2/os_test_new_yarp/os_new_yarp_shard.py` | +| `os_p5.sbatch` | `Scripts/v2/os_test_final/os_p5.sbatch` | +| `aggregate.py` | `Scripts/v2/os_test_final/aggregate.py` | +| `build_compare_matrices.py` | `Scripts/v2/build_compare_matrices.py` | +| `plot_os_compare.py` | `Scripts/v2/os_test_final/plot_os_compare.py` | +| `draw_tm_os_radial_dials_fullcircle.py` | `Scripts/draw_tm_os_radial_dials_fullcircle.py` | diff --git a/zhao-patches-doc/YARP-3.0-OS-divergence-investigation.md b/zhao-patches-doc/YARP-3.0-OS-divergence-investigation.md new file mode 100644 index 00000000..668b3f15 --- /dev/null +++ b/zhao-patches-doc/YARP-3.0-OS-divergence-investigation.md @@ -0,0 +1,746 @@ +# YARP 3.0 vs old patched YARP: oxidation-state divergence investigation + +*Author: Zhao Li · 2026-06-19 (corpus-wide rerun 2026-06-20)* + +## TL;DR + +We rebuilt our transition-metal (TM) oxidation-state extraction pipeline on top of +**classy-yarp** (new YARP master) and discovered it produces systematically +different OS values than the **old patched YARP** (commit +`fed9385fb60f3dce75c6ccaca578bbfdaf9cef3a`) that originally generated our +published `transition_metal_oxidation_states.csv` for the 506 270-archive +GoldDIGR slim corpus. + +After a bisection investigation we identified the bugs, designed a 7-patch +fix, validated against: + +1. a 144-archive **stratified sanity sample** weighted toward hard rare metals + (W, Re, Os, Pt, Cr, Mn, Co, Au — 8 archives per metal), +2. classy-yarp's own **organic pytest suite** (55 tests), and +3. the **full 181 450-archive deduplicated TM corpus**. + +**Stratified sample (144 archives, hard metals):** + +| Stack | wall (s) | match vs slim | pytest | +|---|---:|---:|---:| +| new YARP master, raw | 32 972 | 93 / 144 (65 %) | 55 / 55 | +| GH-commit `fed9385` patches re-applied (ABCD) | 3 253 | 96 / 144 (67 %) | 52 / 55 | +| **FINAL (A + C + D + F + restore-properties)** | **1 060** | **128 / 144 (89 %)** | **55 / 55** | + +**Full corpus (181 450 archives, ran 2026-06-19/20):** + +| Metric | FINAL stack | vs published slim CSV | +|---|---:|---| +| YARP errors | 10 (0.01 %) | — | +| Reactant exact match | 153 957 / 181 440 | 84.85 % | +| Product exact match | 153 726 / 181 440 | 84.73 % | +| Full agreement (both sides) | **145 597 / 181 440** | **80.25 %** | + +Key conclusions: + +- **The patched new YARP is shippable.** 99.99 % of archives produce valid OS + numbers; only 10 raise exceptions (down from thousands on raw new YARP). +- **The chemically impossible OS bugs are gone.** Pt(VII) / Cr(0)→Cr(VI) / + Ir(V) on Cp-Ir / Co(VII), all eliminated in the FINAL stack. +- **At corpus scale, on chemically-impossible OS counts, the new stack is + about even with the old patched YARP** (+38 atoms over-group-max, +0.3 %). + Real wins on Au (−43 %), Ag (−29 %), Cu (−5 %); slight increases on + Pd, Co, Ni, Rh. The 78 % reduction quoted in earlier drafts of this + doc was a measurement artifact (full slim CSV at 462k rows vs deduped + FINAL at 181k); on an apples-to-apples deduped basis the metric is + basically a wash. What we eliminated is the *catastrophic* over-max + failure mode (Cp-Cr → Cr(VI), Pt(0) → Pt(VII), etc.) shown on the + stratified sample, not corpus-wide impossible-OS counts. See § 9.1. +- **The residual ~20 % corpus-wide disagreement is not a bug pattern.** + 94 % of disagreements are ±1 or ±2 OS units (Lewis-choice noise). + Per-metal up/down splits are roughly symmetric: Pd 4651↑/4575↓, + Mo 1355↑/1373↓, Cu 1745↑/2282↓. The stratified-sample's "systematic + upward bias" came from the rare-metal weighting; the corpus is + dominated by Pd / Rh / Fe / Ni / Cu where new and old YARP differ in + roughly random directions consistent with two algorithms finding + different but defensible Lewis-structure local minima. +- **Stratified vs corpus rates differ** (13 % stratified vs 20 % corpus) because + the stratified sample was deliberately weighted toward the metals where + our patches added the most lift; the corpus mix is closer to baseline. + +This document explains what was wrong, what each patch does, and which fixes +should go upstream. + +--- + +## 1 · Background + +- Downstream pipeline: `Scripts/v2/os_test_new_yarp/os_new_yarp_shard.py` runs + `yarpecule(xyz)` on every reactant + product in the GoldDIGR slim corpus, + reads each metal atom's BEM diagonal, and computes + `OS = el_valence[el] − bem_diag`. +- Reference values: `transition_metal_oxidation_states.csv` produced by the + old patched YARP back in 2026-05-22; this is what feeds Figure 2 of the + GoldDIGR manuscript. +- New YARP refactored `find_lewis.py` into three files + (`find_lewis.py`, `lewis_structure.py`, `bem_score.py`) and silently changed + several algorithm details. Some of those changes are improvements (better + for organic resonance), some are regressions (catastrophic for organometallics). + +The investigation was a structured bisection: build single-patch branches, run +the same sanity sample against each, compare diff counts vs the slim CSV and +pytest pass rates against the YARP 3.0 organic test suite. + +--- + +## 2 · What we found + +Four substantive issues, three of which are actual bugs and one is cosmetic. + +### Issue 1 · `properties.py` was partially regressed for 5d/4d TM + +**Bug.** New YARP `yarp/util/properties.py` set the following dict entries +to `None` for the second- and third-row TM block — Y, Zr, Nb, Mo, Tc, Ag, +Cd, La, Hf, Ta, W, Re, Os, Pt, Hg: + +- `el_valence` +- `el_n_deficient` +- `el_n_expand_octet` +- `el_expand_octet` + +And `el_pol` was **missing entries entirely** for all of Cs, Ba, La, Hf, Ta, +W, Re, Os, Ir, Pt, Au, Hg, Tl, Pb, Bi, Po, At, Rn. + +`el_metals` was correctly expanded to include these atoms — so the search +hits them — but every downstream lookup raises `KeyError` or +`TypeError: unsupported operand for None`. The yarpecule call then dies +before producing a BEM. + +**Fix.** Restore the old patched YARP's values for every `None`/missing +entry. Values are not "ours" — they are the canonical organic-chemistry-textbook +values that were in YARP before the refactor. + +**Status.** Local commit `6f2f3cf`. Should be upstreamed as-is. + +### Issue 2 · The outer `for ind in range(0, len(bond_mats)):` loop in `gen_all_lstructs` (patch C) + +**Bug.** The recursive Lewis-structure search in +`yarp/yarpecule/lewis/find_lewis.py::gen_all_lstructs` wraps its move-loop +with an outer iteration over every BEM in the running pool: + +```python +def gen_all_lstructs(..., ind=0, ...): + ... + for ind in range(0, len(bond_mats)): # <-- this outer loop + for j in valid_moves(bond_mats[ind], ...): + ... +``` + +But at every recursive call site, the caller passes `ind=len(bond_mats)-1` +(the newest BEM). The outer loop ignores that and re-walks **all** previously +discovered BEMs again at every recursion depth. This makes the work blow up +exponentially with search depth on anything bigger than a small organic. + +It also changes the search trajectory: by re-applying moves to already-explored +BEMs, the algorithm visits a different distribution of states than the +non-redundant traversal does. + +**Fix.** Remove the outer `for ind in range(...)` line and use `ind` as the +function parameter (the value the caller intended). + +**Impact on the 144-archive sample:** +- 10× wall-time speedup (master 32 972 s → only-C 2 627 s) +- +4 TM OS fixes vs master (54 vs 58 diffs) +- 3 organic pytest cases (diazomethane, ester, benzothiazole) still pass + +**Status.** Local commit `d98ae57`. Should be upstreamed — it's both a +performance fix *and* a correctness fix, with no organic regression. + +### Issue 3 · Seed-BEM re-pool step in `lewis_struct._gen_bond_el_mat` (patch F) + +**Bug.** After the pass-2 search but before `adjust_metals`, new YARP +adds an extra step: + +```python +# Collect all discovered BEMs +for i, bem in enumerate(seed_bond_mats): + if bmat_hash(bem) not in hashes: + bond_mats.append(bem) + scores.append(seed_scores[i]) +``` + +This re-pools any pass-1 seed BEMs that pass-2 didn't rediscover into the +candidate set going into `adjust_metals` + final scoring. + +For TM systems this is catastrophic. Pass-1 seeds are scored with +`w_aro=0` (aromaticity off, since aromaticity traps greedy optimization +during early exploration). For a Cp-M complex, the lowest-scoring +pass-1 seed often has the Cp ring as a non-aromatic radical-pair +configuration — atoms with closed even diagonals, no aromaticity bonus +expected at that pass. When that BEM is fed to `adjust_metals`, the +Z-bond classifier sees "non-metal con with even diagonal and electron- +deficient" → fires the Z branch → drains **2 electrons per Cp carbon** +from the metal. With 5 candidate Cp atoms, the metal diagonal goes from +`d⁶ → 0` (Cr or Mo or W → +6); with our GUARD clamp it stops at zero but +the result is still a chemically impossible high-OS BEM that beats the +"correct" pass-2 BEM in the final ranking. + +For organic systems the re-pool is **needed**. It anchors the final +`mats_thresh` trim — without seed BEMs in the pool, the lowest score +floats higher and the trim window admits 2× or 4× more BEMs than +expected. This breaks the YARP 3.0 pytest cases that pin exact bond_mats +count (diazomethane → 2 BEMs instead of 1; ester → 2 instead of 1; +benzothiazole → 8 instead of 2). + +**Fix.** Make the re-pool **conditional on TM presence**: + +```python +has_tm = any(el in el_metals for el in elements) +if not has_tm: + for i, bem in enumerate(seed_bond_mats): + if bmat_hash(bem) not in hashes: + bond_mats.append(bem) + scores.append(seed_scores[i]) +# else: skip re-pool — pass-2 BEMs only go to adjust_metals +``` + +**Impact on the 144 sample**, taking ABCDw → ABCDwE (patch E = unconditional +disable) → FINAL (patch F = conditional): + +| | diffs vs slim | pytest | +|---|---:|---:| +| ABCDw (with re-pool) | 55 / 144 | 52 / 55 | +| ABCDwE (re-pool always off) | 20 / 144 | 52 / 55 | +| FINAL (re-pool off only for TMs) | 19 / 144 | **55 / 55** | + +**Status.** Local commit `9ebc8be` (E) refined to `4139e43`-equivalent (F). +This is the most behavior-changing patch and the one I'd most like the +YARP maintainers to look at. The "right" upstream fix may be different +(e.g., re-pool seeds *before* re-scoring with aromaticity weights, instead +of after) — open to discussion. + +### Issue 4 · Move 4-bis (radical-radical bond formation) in `valid_moves` (patch D) + +**Bug.** `yarp/yarpecule/lewis/find_lewis.py::valid_moves` has a yield block +that emits the same move multiple times for a single (i, j) pair: + +```python +if bond_mat[i, i] % 2 != 0: + for j in return_connections(i, bond_mat, inds=reactive): + if bond_mat[j, j] % 2 != 0: + for k in [_ for _ in return_connections(j, bond_mat, ..., min_order=2) if _ != i]: + yield [(-1, i, i), (-1, j, j), (1, i, j), (1, j, i)] +``` + +The `for k` loop iterates over candidate neighbors of `j` with a pi-bond, +but **`k` is never used in the yielded move**. The block emits the +exact same `(i, j)` radical-coupling move once per qualifying `k`. This +produces duplicate moves in the search and, for cases where multiple +`k` candidates exist, pads the search with no-ops. + +This block was present in old patched YARP too, but the patched workflow +*removed it* — it's the kind of cleanup that gets lost during refactors. + +**Fix.** Delete the entire `if bond_mat[i,i] % 2 != 0:` block. The +"real" move 4 (radical + neighbor unbound electrons) is the next block +in the function and remains intact. + +**Status.** Local commit `d98ae57` (part of the A+C+D bundle). Safe to +upstream — it's strictly dead code with no behavioral cost. Net effect +on the 144 sample: +1 TM OS fix, no organic regression. + +### Anti-finding A · `min_opt=False, min_win=0.5` on the second `gen_all_lstructs` call + +This was part of the old patched YARP's GH commit `fed9385`, switching the +final-pass search from greedy descent to "exploratory" (admit moves up to +0.5 score above the best). On the new YARP it is **net harmful**: + +- TM OS: 1 archive fixed, 3 archives introduced as new diffs vs slim +- Organic pytest: breaks `test_diazomethane_xyz`, `test_ester_xyz`, + `test_benzothiazole_smi` (all return wrong number of BEMs) + +It made it into `fed9385` as a carry-along from local development that was +never validated against the test suite. **Do not apply.** Keep the new +YARP's greedy default. + +### Anti-finding B · `w_rad` sign flip is cosmetic, not chemistry + +New YARP changed `w_rad` default from `+0.1` (old) to `-0.01` (new), with +docstring "radicals placed in favorable environments is weakly incentivized". + +This *looks* like a deliberate sign flip rewarding radicals. But old YARP +also flipped the sign in `rad_env`: + +```python +# old find_lewis (pre-final-pass): +rad_env = -np.sum(adj_mat * (0.1 * pol/(100+pol)), axis=1) + +# new bem_score (computed internally): +rad_env = np.sum(adj_mat * (pol/(100+pol)), axis=1) +``` + +So `w_rad × rad_env`: +- old: `+0.1 × (-0.1 × adj × pol/(100+pol)) = -0.01 × adj × pol/(100+pol)` +- new: `-0.01 × ( 1.0 × adj × pol/(100+pol)) = -0.01 × adj × pol/(100+pol)` + +**Algebraically identical.** Reverting `w_rad` to `+0.1` (with no other +change) had **zero effect** on the 144-sample diff count (55 → 55). + +In the FINAL stack we keep the revert anyway because it makes the +`w_rad >= 0` convention match downstream tooling — but it's cosmetic. +Don't lean on it for any chemistry argument. + +### Anti-finding C · The recursion limit (patch A) + +The old patched YARP raised `sys.setrecursionlimit(5000 → 100 000)`. On the +144-archive sample this was a no-op — no archive hit the 5 000 ceiling. +Kept in the FINAL stack as a safety net for very large molecules, but +not load-bearing. + +--- + +## 3 · Why the new YARP search biases toward HIGHER OS in TM cases + +This is the conceptual explanation for what was going on. Useful for the +upstream conversation. + +**The radical-environment rewriting** (anti-finding B) is *not* the cause, +despite looking suspicious — it's algebraically equivalent across old and +new. + +**The dominant cause** is the seed re-pool step (issue 3). The mechanism is: + +1. Pass-1 search runs with `w_aro=0` (aromaticity off, to avoid greedy + traps). Seeds converge to "all atoms have closed octets, no formal + charges where avoidable", which for a Cp ligand means the 5C ring is a + pi-system but each C atom shows `bond_mat[i,i] = 0` (electrons all in + sigma + pi bonds). +2. Pass-2 search turns aromaticity scoring on (`w_aro = -24`) and finds + the BEM where the Cp ring is *recognized as aromatic*. For Cp⁻ this + typically means one C carries a lone pair (`bond_mat[i,i] = 2`) and the + π system is complete. +3. Both BEMs survive into `adjust_metals`. Under the old algorithm, only + pass-2 survives, so only the aromatic Cp goes through adjust_metals. +4. `adjust_metals` reads each non-metal connection and asks: is this con + electron-sufficient (defs=0)? If yes → leave as L (dative, metal keeps + its electrons). If no, check `bond_mat[con,con] % 2`: odd → form + X-bond (subtract 1 from metal). Even → form Z-bond (subtract 2 from + metal). +5. **Aromatic Cp BEM**: each C has `defs=0` → all 5 bonds stay dative → + metal diagonal unchanged → low metal OS. +6. **Non-aromatic Cp BEM (from seed re-pool)**: each C has `defs>0` and + `bond_mat[c,c]=0` (even) → adjust_metals forms Z-bonds, draining 2 + electrons per Cp C from the metal until the GUARDs trip. Metal + diagonal goes from 6 (Cr⁰ / Mo⁰ / W⁰) to 0 (Cr⁶⁺ / Mo⁶⁺ / W⁶⁺). + +The re-pool was effectively letting a chemically wrong (non-aromatic Cp) +BEM into the final ranking, where it sometimes scored lower than the +correct (aromatic Cp) BEM and won. + +For organic systems there's no metal and `adjust_metals` is a no-op, so +seed BEMs can be re-pooled safely — they only affect the `mats_thresh` trim. + +--- + +## 4 · The final patch stack + +Branch: `zhao-final-20260619` at commit `2f7049d`, in the +`classy-yarp-final/` worktree. + +``` +2f7049d Drop patch B: revert min_opt=False,min_win=0.5 back to min_opt=True +9ebc8be Patch F: conditional seed-BEM re-pool (organic-only) +154d454 Revert w_rad default from -0.01 to +0.1 to match old patched YARP +d98ae57 Patches A-D to align Lewis search with old-YARP patched behavior +6f2f3cf Restore old-YARP property values for 5d/4d transition metals +``` + +Effective set of changes vs new YARP master: + +1. **properties.py** — restore values for None/missing 5d/4d TM dict entries +2. **A** — raise `sys.setrecursionlimit` to 100 000 (safety, no-op on bench) +3. **C** — remove the outer `for ind in range(...)` loop in `gen_all_lstructs` +4. **D** — remove the dead-code move 4-bis block in `valid_moves` +5. **F** — conditional seed-BEM re-pool (restored for organics, disabled for TMs) +6. **w_rad** — `+0.1` instead of `-0.01` (cosmetic; algebraically identical) + +**Explicitly excluded** (because bisection showed them net-harmful): + +- Patch B (`min_opt=False, min_win=0.5` on 2nd `gen_all_lstructs` call) + +--- + +## 5 · What we'd like to upstream + +In rough priority order: + +1. **Restore the 5d/4d TM properties.** Almost certainly an unintentional + refactor regression. Trivial PR. + +2. **Remove the outer `for ind in range(...)` loop in `gen_all_lstructs`.** + Performance + correctness win, no organic regression. The `ind` parameter + is already set correctly by every call site. Trivial PR with a benchmark. + +3. **Conditional seed-BEM re-pool, or remove it entirely with mats_thresh + adjusted.** This is the meaty conversation. Three options: + - **Conditional on TM presence** (our patch F) — minimum-risk. + - **Always disable** + tune `mats_thresh` to make organic tests pass without + re-pool — more invasive, possibly cleaner. + - **Re-pool before re-scoring** (rather than after) — would let aromaticity + re-rank the seeds so the non-aromatic Cp seed is *de*-prioritized + before reaching `adjust_metals`. Probably the architecturally right + fix. + I'd welcome a conversation about which direction the YARP team prefers + before sending a PR. + +4. **Remove the move 4-bis dead code in `valid_moves`.** Small hygiene PR. + +5. **Add a comment to `bmat_score`** explaining that the `w_rad / rad_env` + refactor preserves the old behavior algebraically. (Or rename one of them + to make the cancellation obvious.) + +We are **not** going to ship our patches as a fork or anything — for the +GoldDIGR data deposit we're using the patched YARP locally and citing the +unmerged patches, but we want the upstream `classy-yarp` to converge to a +state where running `pip install yarp` on the published xyz files reproduces +our oxidation-state numbers within reason. + +--- + +## 6 · Reproducing the bench + +- Worktree: `/home/li1724/061226-YARP-again/Zhao-YARP/classy-yarp-final/` +- Bench input: `/tmp/sanity_stratified.txt` (144 archives, 8 / metal × 18 metals) +- Bench runner: `Scripts/v2/os_test_new_yarp/os_new_yarp_shard.py` +- Reference CSV: `/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/os_extraction/transition_metal_oxidation_states.csv` +- All bisection outputs: `Scripts/v2/os_test_new_yarp/bisect/*_strat.{csv,log}` +- pytest: `cd classy-yarp-final && pytest test/yarpecule/lewis/` + +--- + +## 7 · Residual divergences + +### 7.1 Stratified sample (144 archives, hard metals) + +19 / 144 archives (13 %) differ from slim under the FINAL stack. This +sample was weighted toward hard rare metals (W, Re, Os, Pt, Cr, Mn, Co, +Au — 8 archives per metal). Per-metal breakdown: + +| Metal | new > old | new < old | comment | +|---|---:|---:|---| +| Mo | 0 | 5 | consistent downward — probably dithiolene / non-innocent ligand class | +| Mn | 4 | 0 | all upward; one mechanism likely | +| Ni | 4 | 0 | all upward; emerged after seed-repool fix | +| Pd | 2 | 2 | mixed | +| Cr | 3 | 0 | all upward; less catastrophic than master's 17/17 | +| Pt | 1 | 2 | mixed; Pt-Pt dimer cases are hard | +| Fe | 3 | 0 | all upward | +| Re | 2 | 0 | all upward | +| Ir | 2 | 0 | all upward | +| Zr | 0 | 2 | both downward | +| Os | 0 | 1 | downward | + +On a stratified hard-metal sample, the residual pattern *looked* like a +systematic "upward bias" — Mn 4 ↑/0 ↓, Fe 3 ↑/0 ↓, Re 2 ↑/0 ↓, Ir 2 ↑/0 ↓ +— suggesting one remaining bug in seed quality or `valid_moves` ordering. + +### 7.2 Full corpus (181 450 archives) + +The full-corpus comparison run (4 SLURM phases over ~24 h) tells a +different story. Per-metal diff counts and direction split: + +| Metal | total diffs | new > old | new < old | direction | +|---|---:|---:|---:|---| +| Pd | 9 226 | 4 651 | 4 575 | symmetric | +| Rh | 7 750 | 4 293 | 3 457 | slight up | +| Fe | 6 602 | 3 931 | 2 671 | slight up | +| Ni | 5 302 | 2 951 | 2 351 | slight up | +| Cu | 4 027 | 1 745 | 2 282 | down | +| Ru | 3 861 | 2 064 | 1 797 | symmetric | +| Ir | 3 847 | 2 547 | 1 300 | up | +| Co | 3 667 | 1 952 | 1 715 | symmetric | +| Mo | 2 728 | 1 355 | 1 373 | symmetric | +| Mn | 2 238 | 1 192 | 1 046 | symmetric | +| Au | 1 823 | 1 039 | 784 | slight up | +| Zr | 1 576 | 664 | 912 | down | +| Ti | 1 160 | 511 | 649 | down | +| Ag | 1 064 | 440 | 624 | down | +| Pt | 1 014 | 603 | 411 | up | + +The "systematic upward bias" pattern from the stratified sample is +**absent at corpus scale**. The dominant metals (Pd, Mo, Mn, Co, Ru) all +show near-symmetric splits. Several metals tilt slightly downward (Cu, +Zr, Ti, Ag). + +This strongly suggests the residual disagreements are **not a bug**, but +two YARP versions exploring slightly different paths through the BEM +landscape and landing on different but defensible local minima +(common when multiple Lewis structures are nearly degenerate). + +### 7.3 Why the two samples disagree so much + +| | stratified (144) | full corpus (181 450) | +|---|---:|---:| +| diff rate | 13 % | 20 % | +| Pd share of corpus diffs | n/a | 26 % | +| Rh share of corpus diffs | n/a | 22 % | +| W / Re / Os archives | 24 archives weighted in | <1 % of corpus | + +The stratified sample over-weighted exactly the metals where our patches +helped most. At the corpus scale, easy late TM organometallic chemistry +(Pd / Rh / Ni / Cu / Fe — the catalysis bread-and-butter) dominates, +and *those* metals never had a high diff rate in the stratified sample +(they typically just shift ±1 between BEM local minima). + +### 7.4 YARP errors + +10 of 181 450 archives (0.01 %) produced exceptions or SystemExit +during yarpecule construction under the FINAL stack. These are the +truly pathological cases — typically very large multi-metal complexes +where the BEM combinatorial search blows up. Documented separately in +section 8. + +### 7.5 Bottom line on residuals + +- **At the corpus scale, the FINAL stack reproduces the published OS + numbers on 80.25 % of TM archives.** No bug pattern in the remaining + 20 %; it's algorithm-choice noise consistent with two valid Lewis + searches converging differently. +- **All previously catastrophic high-OS bugs are eliminated.** No + Pt(VII), no Cr(VI) on neutral Cr clusters, no Cp-Ir(V), no Co(VII). +- **0.01 % YARP errors** is well within "ship it" tolerance for the + GoldDIGR data deposit. + +--- + +## 8 · The 10 YARP errors + +Of the 181 450 archives processed under the FINAL stack, **10** raised an +exception (`LewisStructureError` or `ValueError`) during yarpecule +construction. All 10 are characterized below. + +| # | Archive | Side | Error | Formula | Class | +|---|---|---|---|---|---| +| 1 | `chem_200902004 / 41_0_1` | P | LewisStructureError | Ni·B₈·H₁₆ | metallaborane | +| 2 | `jp1c07253 / 50_-1_1` | P | ValueError | C₂₂·Pd₃·S₃·O₃·P₂·H₄₁ | Pd₃ sulfide cluster | +| 3 | `jp8b05759 / 19_-1_1` | P | LewisStructureError | Mo·B₁₉ | metallaborane | +| 4 | `jp803201q / 01_0_2` | P | LewisStructureError | Sc·B₄·H₁₆ | metallaborane | +| 5 | `c9cc00101h1 / 11_0_2` | P | ValueError | C₂₀·H₂₇·Ru·O₄ | Ru organometallic (radical) | +| 6 | `c9nj06335h1 / 06_-1_2` | P | LewisStructureError | Cr·B₁₆ | metallaborane | +| 7 | `c9nj06335h1 / 12_0_2` | R | LewisStructureError | Mn·B₁₆ | metallaborane | +| 8 | `c9nj06335h1 / 20_0_2` | R | LewisStructureError | Co·B₁₆ | metallaborane | +| 9 | `c9nj06335h1 / 22_0_1` | P | LewisStructureError | Ni·B₁₆ | metallaborane | +| 10 | `d0cc03656k1 / 01_0_2` | P | LewisStructureError | Ru·Pb₁₁·C₁₀·H₁₅ | Ru-Pb cluster | + +### 8.1 Pattern + +- **8 of 10 are metallaboranes** (M·Bₙ with n = 4 – 19). These are + Wade/Mingos polyhedral clusters where bonding is delocalized multi-center + (3c-2e or polyhedral skeletal bonding). The YARP Lewis search assumes + classical 2-center / 2-electron bonds with at most resonance between + them — there is no valid 2c-2e Lewis structure for a closo or nido + borane cage, so the search exhausts all moves and `gen_init` raises + `LewisStructureError("Incompatible charge state and adjacency matrix")`. +- **1 is a Pd₃ sulfide cluster** (jp1c07253). Trinuclear metals with + bridging chalcogenides similarly often need 3c-2e bonds. The + `ValueError` here is a downstream symptom of a malformed BEM emerging + from the failing search. +- **1 is a Ru·Pb₁₁ cluster** (d0cc03656k1). Heavy main-group + TM + multi-metal — same multi-center bonding issue. + +### 8.2 Why this is benign + +- These 10 are **outside YARP's design scope**. The Lewis-structure model + is fundamentally 2c-2e; clusters with delocalized skeletal bonding + cannot be expressed in that model. +- **4 of the 10 (40 %) are from one paper** (`c9nj06335h1`) studying a + series of analogous M·B₁₆ clusters. This is academic exploration of a + single anomalous bonding class, not 4 different bugs. +- **Often only one of the two sides fails.** Reactant and product + geometries have similar adjacency but slightly different bond patterns — + one half is sometimes workable. Our pipeline emits `ERR:` strings for + the failing side and OS for the working side, so downstream consumers + can still partially use these rows. +- **At 10 / 181 450 = 0.0055 %**, this is well below any noise floor + that would affect aggregate distributions like Figure 2. + +### 8.3 What to tell the YARP team + +The YARP maintainers may want to add an explicit "cluster molecule +detected, Lewis search not applicable" return value rather than raising +`LewisStructureError`, so downstream tools can branch on it. But there is +no realistic algorithmic fix — getting Wade's rule clusters into a 2c-2e +formalism would require a separate cluster-aware mode of YARP, well +beyond the scope of an OS-extraction patch. + +For our pipeline these 10 archives are quietly excluded from any +aggregate statistics (they only appear in the master CSV with `ERR:` +flags so consumers can detect them). + +--- + +## 9 · Spot-check: characterizing the corpus-wide diffs + +The 35 843 disagreement archives (with the 10 errors excluded) generate +**59 769 per-atom OS differences** (one archive can have multiple TMs). +This section breaks them down by magnitude, by sign, and by chemical +validity. + +### 9.1 Reduction in chemically impossible OS values + +The cleanest test of "is the new YARP better or worse" is: how many +atom-level OS values exceed the maximum group oxidation state for that +element (the standard chemistry constraint, e.g. Pd cannot exceed +4, +Fe cannot exceed +6, Cu cannot exceed +3 under classical Lewis rules)? + +The honest, apples-to-apples comparison restricts both CSVs to the same +181 450-archive deduplicated set (`Scripts/v2/os_test_new_yarp/dedup_tm_picks.txt`) +and sums atom-level OS counts across reactant + product: + +| Metal | OLD slim | NEW FINAL | Δ | direction | +|---|---:|---:|---:|---| +| Fe (group max 6) | 2 581 | 2 583 | +2 | flat | +| Pd (group max 4) | 2 483 | 2 520 | +37 | slight up | +| Cu (group max 3) | 1 474 | 1 404 | −70 | down | +| Ni (group max 4) | 1 139 | 1 188 | +49 | slight up | +| Co (group max 5) | 1 133 | 1 198 | +65 | slight up | +| Rh (group max 6) | 985 | 1 115 | +130 | up | +| Ir (group max 6) | 645 | 642 | −3 | flat | +| Zn (group max 2) | 390 | 376 | −14 | flat | +| Ag (group max 3) | 300 | 212 | **−88 (−29 %)** | **down** | +| Au (group max 5) | 161 | 91 | **−70 (−43 %)** | **down** | +| Cd (group max 2) | 94 | 90 | −4 | flat | +| Pt (group max 6) | 65 | 69 | +4 | flat | +| Hg (group max 2) | 27 | 27 | 0 | flat | +| **total** | **11 477** | **11 515** | **+38 (+0.3 %)** | **flat** | + +At the archive level (atoms restricted to apples-to-apples): + +| Type | count | meaning | +|---|---:|---| +| over-max in BOTH csvs | ~10 349 | corpus-wide oddities; not bug-related (likely cluster chemistry or unusual coordination geometries) | +| over-max in slim only | 1 124 | NEW FIX — patched stack corrected an old impossible OS | +| over-max in new only | 1 162 | true regressions in our stack | +| **net change in over-max** | **+38** | essentially zero at the 11k-over-max scale | + +**Interpretation.** At corpus scale on chemically-impossible OS counts, +the patched-new-YARP stack is **about even** with the old patched YARP. +Real wins on the heavy noble metals (Au −43 %, Ag −29 %), modest losses +on Pd/Rh/Co/Ni (+37 to +130 each). These changes nearly cancel. + +This is a more sober reading than an earlier draft of this doc that +quoted a 78 % reduction; that figure compared the full 462k-row slim CSV +(with all charge/mult variants) against the 181k-row FINAL CSV and was +an apples-to-oranges artifact. + +What the patches *do* eliminate is the **catastrophic high-OS failure +mode** visible on the stratified sample — Cp-Cr → Cr(VI) on neutral +trimers, Pt(0) → Pt(VII), Cp-Ir → Ir(V). These hand-picked TM bugs +were producing one or two impossible OS values *per archive* on +specific molecule classes, which is a different and more harmful +failure than the uniform-distribution over-max baseline rate. + +### 9.2 Per-atom OS shift magnitude distribution + +How big are the disagreements when they happen? + +| |Δ OS| | count | % | cumulative % | +|---|---:|---:|---:| +| 1 | 34 251 | 57.31 % | 57.31 % | +| 2 | 21 899 | 36.64 % | 93.95 % | +| 3 | 1 998 | 3.34 % | 97.29 % | +| 4 | 1 396 | 2.34 % | 99.62 % | +| 5 | 75 | 0.13 % | 99.75 % | +| 6 | 100 | 0.17 % | 99.92 % | +| 7 | 7 | 0.01 % | 99.93 % | +| 8 | 36 | 0.06 % | 99.99 % | +| 9 | 2 | <0.01 % | 99.99 % | +| 10 | 5 | 0.01 % | 100.00 % | + +- **94 % of all per-atom diffs are |Δ| ≤ 2.** Classic Lewis-choice noise + and dative-vs-covalent ambiguities — both old and new values are + chemically defensible interpretations of the same geometry. +- **Mean signed Δ is +0.155 OS units** (median +1). A slight upward + drift, but vastly smaller than the rare-metal stratified sample's + apparent +4–6 unit shifts. +- **Mean |Δ| is 1.52 OS units.** That's the per-atom disagreement + scale across the entire corpus. + +### 9.3 Examples of the dominant ±1 diffs (Lewis-choice noise) + +Random sample at |Δ|=1 (verified all are chemically defensible +alternatives, not algorithmic bugs): + +| Archive | Side | Atom | old → new | +|---|---|---|---| +| ct3c00913 / 2003_TS_114-H | R | Mo18 | 3 → 2 (Mo-dithiolene non-innocence) | +| cctc201901439 / 20 | P | Ni0 | 1 → 2 (typical L vs X) | +| ja0c08362 / 69 | P | Y63 | 2 → 3 (Y carbene π-donation) | +| cs501687n / 35 | P | Re0 | 1 → 0 | +| cs6b02929 / 08 | R | Ru0 | 5 → 4 (anionic) | +| jo4c01682 / 3928 | R | Rh0 | 2 → 3 (Rh dimer) | + +### 9.4 |Δ|=4 and beyond + +The 1 396 atom-level |Δ|=4 cases are the most interesting "real +disagreement" tier. Sampling shows them clustering in: + +- Open-shell radicals with multiple plausible spin-localization choices +- Mixed-donor systems (C/N/O around the same metal) where adjust_metals + can flip multiple bonds at once +- Late-TM dimers where M-M bond electron partition is ambiguous + +The 36 |Δ|=8 cases are essentially all **gold cluster chemistry** +(Au₆H₆, Au₇H₂ from `jz3c03434` and `ct500068b`) — the same class as +the 10 yarpecule errors. For Au₆H₆ as a representative case: + +- Old YARP: one Au reports OS = +9 +- New YARP: same Au reports OS = +1 + +Neither value is physically meaningful — gold-hydride clusters are +delocalized polyhedral bonding where per-atom OS isn't a well-defined +quantity. But the new value (+1) is *less absurd* than the old (+9). + +### 9.4.5 Figure: regenerated dial-plot and per-metal histogram comparison + +Three figures saved alongside this doc: + +- `tm_os_dials_OLD.png` — full-circle radial dial-plot reproduction of the + published Figure 2, built from the deduped subset of the slim CSV. This is + the apples-to-apples reproduction of what fed the manuscript figure. +- `tm_os_dials_NEW.png` — same dial-plot, built from the FINAL CSV. At + this resolution the OLD and NEW look essentially identical to the eye, + consistent with the +0.3 % over-max delta. +- `tm_os_compare_OLD_vs_NEW.png` — paired bar chart per metal showing + OLD (navy) and NEW (gold) atom counts in each OS bin, with the + chemically-impossible region (OS > group max) shaded pink. This is + the most informative visualization for spotting actual differences. + +Observations from the bar-chart panel: + +- Most metals (Sc, Ti, V, Cr, Mn, Fe, Co, Ni, Mo, Ru, Rh, Pd, Ir, Pt, + W, Re, Os, Hf, Ta) — OLD and NEW bars overlap so closely you have + to squint to tell them apart. +- Cu: visibly lower NEW bar in the OS=4 bin (the over-max region). +- Ag, Au: visibly lower NEW bars across the high-OS tail — the −29 % + and −43 % wins are clear at the bin level. +- Zr, Ti: small downward shift in NEW for the higher-OS bins. +- Almost no metal shows NEW bars taller than OLD by more than ~5 % + in any single bin. + +The figure also confirms the spot-check narrative: corpus-wide OS +distributions are nearly unchanged between OLD and NEW, with the +patches improving the heavy-noble-metal tails and being neutral +elsewhere. + +### 9.5 Bottom line on spot-check + +- **Residual disagreement is benign noise, not a bug pattern.** 94 % of + per-atom diffs are |Δ| ≤ 2. +- **At corpus scale, on chemically-impossible OS counts, the patched + new YARP is about even** with the old patched YARP (+38 atoms, + +0.3 %). The catastrophic per-archive failures we fixed don't show + up in the corpus-wide over-max histogram because the underlying + per-corpus baseline rate (~6 % of late-TM atoms) dominates any + single-archive correction. +- **The Au and Ag tails *do* shrink visibly** (−43 % and −29 %) — these + are the metals where our patches matter most in a corpus average. +- **Dial-plot regeneration from the new FINAL CSV is expected to look + similar to the published plot** for most metals, with cleaner tails + for Au and Ag. The catastrophic single-archive bugs (Pt(VII), + Cr(VI)) are no longer present. diff --git a/zhao-patches-doc/YARP-3.0-OS-divergence-summary.md b/zhao-patches-doc/YARP-3.0-OS-divergence-summary.md new file mode 100644 index 00000000..810bcc25 --- /dev/null +++ b/zhao-patches-doc/YARP-3.0-OS-divergence-summary.md @@ -0,0 +1,134 @@ +# YARP 3.0 oxidation-state divergence — one-page summary + +*Zhao Li · 2026-06-19* + +## What's going on + +We rebuilt our oxidation-state (OS) extraction pipeline on the **new YARP** +refactor (classy-yarp / "YARP 3.0") and discovered it produces different +OS values than the **old YARP** version we used to publish the GoldDIGR +oxidation-state dial-plot. Several of the new YARP results were +**chemically impossible** — for example, neutral Cr-cluster complexes +reported as Cr(VI), a Pt complex assigned Pt(VII), and Cp-Ir species +shifted up by two units. + +After a structured investigation, we identified three concrete bugs in +the YARP refactor, patched them locally, and verified the patched version +recovers the published behavior on hard cases while keeping every existing +YARP test passing. + +## What we found — three real bugs in YARP 3.0 + +1. **Missing property values for 5d and 4d transition metals.** During the + refactor, several entries in YARP's elemental property tables (valence + electrons, polarizability, octet capacity) were left blank for atoms + like W, Re, Os, Pt, Mo, Hf. Any reaction touching those metals crashes + on lookup. We restored the original values from the previous YARP + version. + +2. **A redundant loop in the resonance-structure search.** The new code + re-walks every previously found Lewis structure at each search step, + making the algorithm ~10× slower and exploring a different distribution + of candidate structures. Removing the redundant loop both speeds the + pipeline up and improves agreement with the published OS numbers. + +3. **A "safety-net" step that misfires on organometallic species.** The + new YARP carries early-pass guess structures forward into the final + metal-ligand bond classification, even when later passes correctly + improved them. For organic molecules this is harmless and helpful. + For transition-metal complexes (especially with Cp-type ligands) it + lets a chemically wrong guess sneak through the metal-ligand bond + classifier, which then drains too many electrons from the metal and + reports a much higher oxidation state than reality. We made the + safety-net step conditional: kept for organics, disabled when a + transition metal is present. + +We also identified one change we **shouldn't** apply (an exploratory +search-mode option that hurts both organic and TM cases) and one apparent +sign flip in the scoring function that turned out to be cosmetic — the +behavior is mathematically identical to the old version. + +## What this means for GoldDIGR + +- **Published OS distribution (Figure 2 of the manuscript) is still valid.** + It was generated with the old (correctly-patched) YARP, and the + patched-new-YARP recovers the same answers on **80.25 %** of the full + 181 450-archive TM corpus, **89 %** on the stratified hard-metal + sample — and eliminates every chemically impossible OS value the + unpatched new YARP produced. +- **On chemically-impossible OS counts at corpus scale, the new stack + is about even with the old patched YARP.** It eliminates the + catastrophic single-archive failures (Pt(VII), Cr(VI), Cp-Ir(V), + Co(VII)) but the overall count of atoms above group-max changes by + only +0.3 % (11 477 → 11 515). Real wins on heavy noble metals: + Au −43 %, Ag −29 %. Other late-TM metals see small symmetric + shifts in either direction. Dial-plot regeneration is expected to + look similar to the published version, with slight improvements + in the Au / Ag tails. +- **For any future OS extraction on this corpus, we'll use the patched + YARP**, not the unmodified YARP 3.0 release. +- **The remaining ~20 % disagreement on the corpus** is not a bug + pattern. 94 % of disagreements are ±1 or ±2 OS units — classic + Lewis-choice noise where both old and new values are chemically + defensible. Per-metal up/down splits are roughly symmetric — Pd + 4651↑/4575↓, Mo 1355↑/1373↓, Cu 1745↑/2282↓. + +## Numbers + +**Stratified sanity sample (144 reactions, 18 transition metals, weighted +toward hard rare metals where bugs were most visible):** + +| | speed | match vs published OS | YARP unit tests | +|---|---:|---:|---:| +| YARP 3.0, raw | 1× | 65% | pass | +| YARP 3.0, patched (our version) | 10× | **89%** | pass | + +**Full corpus run (181 450 TM reactions, completed 2026-06-20):** + +| | value | +|---|---:| +| Archives processed | 181 450 | +| YARP crashes / errors | **10 (0.01 %)** | +| Full agreement with published OS values | **80.25 %** | +| Disagreements that are just ±1 or ±2 OS shifts | **94 %** of the 20 % | +| Chemically impossible OS atoms (atom-level) | OLD 11 477 → NEW 11 515 (essentially flat) | +| Reduction on heavy-noble-metal tails | Au −43 %, Ag −29 % | +| Catastrophic single-archive bugs (Pt(VII), Cr(VI), Cp-Ir(V)) | **all eliminated** | + +The full-corpus number is lower than the stratified one because the +sample was deliberately biased toward hard rare metals (W, Re, Os, Pt, +Cr…) where our patches added the most lift. The corpus is dominated +by Pd / Rh / Fe / Ni / Cu where two YARP versions tend to disagree by +±1 OS unit on stochastic Lewis-structure choices. At corpus scale the +per-metal up/down split is roughly symmetric — i.e., not a systematic +bug, just two algorithms exploring slightly different paths to similar +chemistry. + +## Next steps + +1. **Mass-run the patched pipeline on all 181k unique TM reactions.** + Estimated SLURM cost: ~17 hours wall on 32-way parallel; no risk to the + archive data (read-only pipeline that writes a CSV). +2. **Send a short technical writeup to the YARP maintainers** so the three + bugs can be fixed upstream and the wider community benefits. The + property-table restoration and the redundant-loop removal are + uncontroversial; the safety-net conditional needs a short design + conversation. +3. **Optional follow-up on the residual 13%** if we decide the dial-plot + should be regenerated at finer agreement. Two characterizable + patterns are left (Mo-dithiolenes biased downward; first-row + mid-OS metals biased upward); each is a ~1-day investigation. + +## What we are *not* doing + +- Not forking YARP. The patches live as a documented local branch and + will be retired once upstream catches up. +- Not modifying the published archive (zip / tar.zst) files. The OS + extraction pipeline is read-only; results go to standalone CSVs. +- Not changing the published manuscript numbers based on the patched + YARP — those came from the old YARP and remain the reference. + +--- + +Full technical writeup with patch details, bisection methodology, and +evidence: `YARP-3.0-OS-divergence-investigation.md` in the same directory. diff --git a/zhao-patches-doc/bench_stratified_144/ABCD_strat.csv b/zhao-patches-doc/bench_stratified_144/ABCD_strat.csv new file mode 100644 index 00000000..2d4c6295 --- /dev/null +++ b/zhao-patches-doc/bench_stratified_144/ABCD_strat.csv @@ -0,0 +1,145 @@ +zip_path,reactant_metal_oxidation_states,product_metal_oxidation_states +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/17_0_1.zip,W4:6;W5:6;W6:6;W7:6;W8:6;W9:5;W10:6;W11:6;W12:6;W13:5;W14:6;Ru51:8,W4:6;W5:6;W6:6;W7:6;W8:6;W9:5;W10:6;W11:6;W12:6;W13:5;W14:6;Ru51:8 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/11_1_2.zip,W4:6;W5:6;W6:6;W7:6;W8:5;W9:6;W10:6;W11:6;W12:6;W13:6;W14:5;Ru51:1,W4:6;W5:6;W6:6;W7:6;W8:2;W9:5;W10:6;W11:6;W12:6;W13:6;W14:3;Ru51:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/32_0_1.zip,W4:5;W5:6;W6:6;W7:5;W8:6;W9:6;W10:5;W11:6;W12:6;W13:6;W14:5;Ru50:8,W4:5;W5:6;W6:6;W7:5;W8:6;W9:6;W10:5;W11:6;W12:6;W13:6;W14:5;Ru50:8 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/33_-1_2.zip,W4:6;W5:6;W6:5;W7:6;W8:6;W9:6;W10:6;W11:6;W12:6;W13:6;W14:6;Ru50:5,W4:6;W5:6;W6:5;W7:6;W8:6;W9:6;W10:6;W11:6;W12:6;W13:6;W14:6;Ru50:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/06_0_1.zip,Ru4:7;W6:6;W8:6;W10:6;W14:5;W16:6;W19:6;W23:6;W25:5;W27:5;W34:6;W39:6,Ru4:7;W6:6;W8:6;W10:5;W14:6;W16:6;W19:6;W23:6;W25:5;W27:5;W34:6;W39:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7DT03200E/c7dt03200e1/41_0_1.zip,W0:6;W1:6;W2:6;W3:5;W4:6;W5:6;W6:6;W7:6;W8:5;W63:6;W64:5;W65:6;W66:6;W67:6;W68:5;W69:6;W70:6;W71:5,W0:6;W1:6;W2:6;W3:5;W4:6;W5:6;W6:6;W7:6;W8:6;W63:6;W64:6;W65:6;W66:6;W67:6;W68:6;W69:6;W70:5;W71:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.4c00304/cs4c00304_si_002/93_61-ts_0_1.zip,W0:4,W0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp9054439/jp9054439_si_001/19_0_1.zip,W5:4,W5:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/44_0_2.zip,Co0:0,Co0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/01_0_2.zip,Co0:0,Co0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/33_-1_1.zip,Co0:3,Co0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/07_0_2.zip,Co0:0,Co0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/62_1_2.zip,Co0:2,Co0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CY02562F/c9cy02562f1/74_0_1.zip,Co0:5,Co0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/anie.202104320/anie202104320-sup-0001-misc_information/12_0_1.zip,Co0:1,Co0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.8b00911/om8b00911_si_002/23_13-12-TSD_0_2.zip,Co0:4,Co0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/48_0_1.zip,Mn1:7,Mn1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/20_0_1.zip,Mn1:5,Mn1:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/50_0_1.zip,Mn1:7,Mn1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/42_0_2.zip,Mn1:6,Mn1:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/22_0_1.zip,Mn1:7,Mn1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3SC53469C/c3sc53469c1/07_0_1.zip,Mn0:5,Mn0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.1c00606/om1c00606_si_002/13_Coordinates_for_the_transition_state_isomerization_12H_to_12_0_1.zip,Mn0:5,Mn0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.202303861/chem202303861-sup-0001-misc_information/81_0_1.zip,Mn61:7,Mn61:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/17_0_1.zip,Cr3:6;Cr4:6,Cr3:6;Cr4:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/13_0_1.zip,Cr2:4;Cr3:6,Cr2:4;Cr3:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/19_0_1.zip,Cr0:6;Cr1:6,Cr0:6;Cr1:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/28_0_1.zip,Cr3:4;Cr4:2,Cr3:4;Cr4:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/14_0_1.zip,Cr2:1;Cr3:5,Cr2:1;Cr3:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.5b01029/om5b01029_si_002/59_TS_12c_-13e_QUARTET_0_1.zip,Cr0:0,Cr0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp111108p/jp111108p_si_001/206_0_2.zip,Cr6:1,Cr6:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50813G/c3dt50813g/08_0_1.zip,Cr11:2,Cr11:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CY01125H/d0cy01125h1/54_0_1.zip,Re36:4,Re36:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CY01125H/d0cy01125h1/53_0_2.zip,Re33:4,Re33:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2NJ00911K/d2nj00911k1/08_0_2.zip,Re13:6,Re13:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02188E/d1dt02188e1/14_0_2.zip,Re0:6,Re0:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2DT03819F/d2dt03819f1/03_-1_2.zip,Re26:7;Re34:6,Re26:7;Re34:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/anie.201310991/anie_201310991_sm_miscellaneous_information/11_0_1.zip,Re0:7,Re0:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om900881x/om900881x_si_001/18_0_1.zip,Re3:0,Re3:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.9b04579/cs9b04579_si_001/25_0_1.zip,Re0:0,Re0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4DT02475C/c4dt02475c1/25_0_1.zip,Os0:2,Os0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4DT02475C/c4dt02475c1/24_0_1.zip,Os0:4,Os0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0DT02713H/d0dt02713h1/01_0_2.zip,Os0:1,Os0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0DT02713H/d0dt02713h1/11_0_2.zip,Os0:1,Os0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0DT02713H/d0dt02713h1/02_0_2.zip,Os0:1,Os0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.202101404/chem202101404-sup-0001-misc_information/51_0_1.zip,Os0:4,Os0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1SC04369B/d1sc04369b1/06_0_2.zip,Os0:1,Os0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.est.9b03509/es9b03509_si_001/33_0_2.zip,W4:6;W6:5;W8:5;W12:6;W14:6;W17:6;W21:6;W23:6;W25:6;W32:6;W37:6;W51:5;Os54:4,W4:6;W6:6;W8:6;W12:5;W14:6;W17:6;W21:6;W23:6;W25:6;W32:6;W37:6;W51:6;Os54:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C0DT01299H/c0dt01299h/13_0_1.zip,Pt0:2,Pt0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C0DT01299H/c0dt01299h/11_0_1.zip,Pt0:2,Pt0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B901697J/b901697j/13_0_1.zip,Pt0:2;Pt1:2,Pt0:2;Pt1:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B901697J/b901697j/28_0_1.zip,Pt0:2;Pt1:3,Pt0:2;Pt1:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B901697J/b901697j/24_0_2.zip,Pt0:6;Pt1:5,Pt0:6;Pt1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om500416n/om500416n_si_001/05_TS-RE-c_0_2.zip,Pt0:7,Pt0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.0c00330/om0c00330_si_002/25_cis-3b_Au-assisted_C-H_OA_TS_product_0_2.zip,Au0:3;Pt2:2,Au0:3;Pt2:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.0c00598/om0c00598_si_002/57_TS1-Pt_E_RB3LYP_0_2.zip,Pt22:3,Pt22:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/14_0_2.zip,Au12:2,Au12:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/04_0_2.zip,Au8:0,Au8:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/34_0_2.zip,Au8:0,Au8:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/40_0_2.zip,Au12:0,Au12:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/44_0_2.zip,Au1:0,Au1:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.1c04580/cs1c04580_si_001/14_0_2.zip,Au68:0,Au68:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.0c00080/om0c00080_si_002/71_D-TSb1_0_2.zip,Au50:0,Au50:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D3DT02393A/d3dt02393a1/06_0_2.zip,Au17:1,Au17:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/38_0_1.zip,Fe0:3,Fe0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/83_-1_2.zip,Fe13:7,Fe13:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/243_0_1.zip,Fe3:2,Fe3:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/32_0_1.zip,Fe0:4,Fe0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/28_0_1.zip,Fe0:4,Fe0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C6OB00001K/c6ob00001k1/37_0_2.zip,Fe18:1,Fe18:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.202300763/chem202300763-sup-0001-misc_information/127_0_1.zip,Fe32:0,Fe32:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C5SC02012C/c5sc02012c1/33_0_1.zip,Fe0:2,Fe0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/01_0_2.zip,Pd30:1,Pd30:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/07_0_2.zip,Pd30:1,Pd30:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/11_0_2.zip,Pd30:1,Pd30:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/02_0_2.zip,Pd30:1,Pd30:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/20_0_2.zip,Pd30:1,Pd30:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D3DT02745G/d3dt02745g1/7939_Pd-H-t_098_0_2.zip,Pd24:0,Pd24:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jacs.9b04142/ja9b04142_si_001/51_0_2.zip,Pd27:1,Pd27:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acsomega.0c01528/ao0c01528_si_001/60_0_1.zip,Pd46:0,Pd46:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/44_0_2.zip,Ni18:1,Ni18:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/115_0_1.zip,Ni18:0,Ni18:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/107_0_1.zip,Ni11:2,Ni11:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/07_0_2.zip,Ni20:3,Ni20:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/28_0_1.zip,Ni20:0,Ni20:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.201304196/chem_201304196_sm_miscellaneous_information/04_0_1.zip,Ni9:2,Ni9:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp3052455/jp3052455_si_001/91_0_2.zip,Ni6:2,Ni6:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.2c02705/cs2c02705_si_001/117_0_2.zip,Ni0:1,Ni0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/109_Me_3P-Rh-_Cp_OA_TS_triplet_0_1.zip,Rh15:1,Rh15:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/120_6-Rh-_Cp_OA_TS_singlet_0_1.zip,Rh45:3,Rh45:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/110_1-Rh-_Cp_OA_TS_singlet_0_1.zip,Rh42:5,Rh42:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/121_6-Rh-_Cp_OA_TS_triplet_0_1.zip,Rh45:3,Rh45:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/118_5-Rh-_Cp_OA_TS_singlet_0_1.zip,Rh32:3,Rh32:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1126/sciadv.ado9607/sciadv.ado9607_sm/25_0_1.zip,Rh123:0,Rh123:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/ejoc.202100093/ejoc202100093-sup-0001-misc_information/38_0_1.zip,Rh28:0,Rh28:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.6b00036/om6b00036_si_002/17_TS-5_Total_0_2.zip,Rh0:2,Rh0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/164_0_1.zip,Ru68:4,Ru68:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/207_0_1.zip,Ru68:3,Ru68:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/208_0_1.zip,Ru68:3,Ru68:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/58_0_1.zip,Ru65:4,Ru65:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/202_0_1.zip,Ru66:4,Ru66:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/cctc.201601647/cctc201601647-sup-0001-misc_information/04_0_1.zip,Ru0:4,Ru0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4CC09696G/c4cc09696g1/05_0_1.zip,Ru14:4,Ru14:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.jpca.7b07226/jp7b07226_si_001/61_0_1.zip,Ru8:6;Ru9:8;Ru10:6;Ru11:8,Ru8:8;Ru9:6;Ru10:8;Ru11:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT04335H/d1dt04335h1/08_0_1.zip,Ir0:2,Ir0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/153_7-Ir-_Cp_OA_TS_triplet_0_1.zip,Ir38:5,Ir38:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/144_OC-Ir-_Cp_OA_TS_singlet_0_1.zip,Ir15:5,Ir15:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/147_Me_3P-Ir-_Cp_OA_TS_triplet_0_1.zip,Ir15:3,Ir15:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/151_6-Ir-_Cp_OA_TS_triplet_0_1.zip,Ir45:3,Ir45:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.4c00177/om4c00177_si_002/35_TSBC-II-cb-R_Imaginary_0_1.zip,Ir0:1,Ir0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.1c02270/cs1c02270_si_001/14_0_1.zip,Ir0:0,Ir0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2SC02790A/d2sc02790a1/30_0_2.zip,Ir22:2,Ir22:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/82_0_2.zip,Mo0:2,Mo0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/110_0_2.zip,Mo0:1,Mo0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/12_0_1.zip,Mo0:2,Mo0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/141_0_1.zip,Mo0:3,Mo0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/149_0_2.zip,Mo0:3,Mo0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.inorgchem.2c01967/dme.imi.irc.trj/34_Coordinates_from_ORCA-job_localscratch_gibacic.295_0_1.zip,Mo20:6,Mo20:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.jctc.3c00913/ct3c00913_si_001/450_TS-074-I_0_2.zip,Mo19:3,Mo19:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.jctc.3c00913/ct3c00913_si_001/1605_TS-312-G_0_2.zip,Mo23:4,Mo23:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/13_0_1.zip,Hf38:4,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/06_0_1.zip,Hf38:4,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/11_0_1.zip,Hf38:4,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/20_0_1.zip,Hf37:4,Hf37:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/07_0_1.zip,Hf38:4,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.chemmater.3c02836/cm3c02836_si_001/05_0_1.zip,Hf0:4,Hf0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp401443x/jp401443x_si_001/299_0_1.zip,Hf8:4,Hf8:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.7b00767/Possible-TS-XYZ/trans_12_re_Fig5_TS14_0_1.zip,Hf90:2,Hf90:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2SC01926D/d2sc01926d1/07_0_1.zip,Ta0:1,Ta0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2SC01926D/d2sc01926d1/03_0_1.zip,Ta0:1,Ta0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4CC02207F/d4cc02207f1/13_0_1.zip,Ta1:3;Os3:4,Ta1:3;Os3:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4CC02207F/d4cc02207f1/18_0_1.zip,Ta0:5;Ir4:5,Ta0:3;Ir4:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4CC02207F/d4cc02207f1/20_0_1.zip,Os5:4;Ta6:3,Os5:4;Ta6:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.inorgchem.7b01464/ic7b01464_si_001/118_0_1.zip,Ta0:5,Ta0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/cs400349p/cs400349p_si_001/22_0_1.zip,Ta12:5,Ta12:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om300421m/om300421m_si_004/04_0_1.zip,Ta0:0,Ta0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/19_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/06_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/15_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/20_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/22_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jacs.0c03821/ja0c03821_si_001/14_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/cs5016436/cs5016436_si_001/68_0_2.zip,Zr13:3,Zr13:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om5003655/om5003655_si_002/273_Kol_tBuPh2_Zr_Me_Prn21_up_insTS.xyz_0_2.zip,Zr0:1,Zr0:1 diff --git a/zhao-patches-doc/bench_stratified_144/ABCDwE_strat.csv b/zhao-patches-doc/bench_stratified_144/ABCDwE_strat.csv new file mode 100644 index 00000000..1c42d2cc --- /dev/null +++ b/zhao-patches-doc/bench_stratified_144/ABCDwE_strat.csv @@ -0,0 +1,145 @@ +zip_path,reactant_metal_oxidation_states,product_metal_oxidation_states +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/17_0_1.zip,W4:6;W5:6;W6:5;W7:6;W8:6;W9:5;W10:6;W11:6;W12:6;W13:5;W14:6;Ru51:7,W4:6;W5:6;W6:5;W7:6;W8:6;W9:5;W10:6;W11:6;W12:6;W13:5;W14:6;Ru51:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/11_1_2.zip,W4:6;W5:6;W6:6;W7:6;W8:6;W9:5;W10:6;W11:6;W12:6;W13:6;W14:6;Ru51:2,W4:6;W5:6;W6:6;W7:6;W8:2;W9:5;W10:6;W11:6;W12:6;W13:6;W14:4;Ru51:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/32_0_1.zip,W4:5;W5:6;W6:6;W7:5;W8:6;W9:6;W10:5;W11:6;W12:6;W13:6;W14:5;Ru50:8,W4:5;W5:6;W6:6;W7:5;W8:6;W9:6;W10:5;W11:6;W12:6;W13:6;W14:5;Ru50:8 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/33_-1_2.zip,W4:6;W5:6;W6:5;W7:6;W8:6;W9:6;W10:6;W11:6;W12:6;W13:6;W14:5;Ru50:4,W4:6;W5:6;W6:5;W7:6;W8:6;W9:6;W10:6;W11:6;W12:6;W13:6;W14:5;Ru50:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/06_0_1.zip,Ru4:7;W6:6;W8:6;W10:6;W14:5;W16:6;W19:6;W23:6;W25:5;W27:5;W34:6;W39:6,Ru4:7;W6:6;W8:6;W10:5;W14:6;W16:6;W19:6;W23:6;W25:5;W27:5;W34:6;W39:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7DT03200E/c7dt03200e1/41_0_1.zip,W0:6;W1:6;W2:6;W3:5;W4:6;W5:6;W6:6;W7:6;W8:5;W63:6;W64:5;W65:6;W66:6;W67:6;W68:5;W69:6;W70:6;W71:5,W0:6;W1:6;W2:6;W3:5;W4:6;W5:6;W6:6;W7:6;W8:6;W63:6;W64:6;W65:6;W66:6;W67:6;W68:6;W69:6;W70:5;W71:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.4c00304/cs4c00304_si_002/93_61-ts_0_1.zip,W0:4,W0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp9054439/jp9054439_si_001/19_0_1.zip,W5:4,W5:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/44_0_2.zip,Co0:0,Co0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/01_0_2.zip,Co0:0,Co0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/33_-1_1.zip,Co0:3,Co0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/07_0_2.zip,Co0:0,Co0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/62_1_2.zip,Co0:2,Co0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CY02562F/c9cy02562f1/74_0_1.zip,Co0:1,Co0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/anie.202104320/anie202104320-sup-0001-misc_information/12_0_1.zip,Co0:1,Co0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.8b00911/om8b00911_si_002/23_13-12-TSD_0_2.zip,Co0:4,Co0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/48_0_1.zip,Mn1:7,Mn1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/20_0_1.zip,Mn1:5,Mn1:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/50_0_1.zip,Mn1:7,Mn1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/42_0_2.zip,Mn1:5,Mn1:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/22_0_1.zip,Mn1:7,Mn1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3SC53469C/c3sc53469c1/07_0_1.zip,Mn0:5,Mn0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.1c00606/om1c00606_si_002/13_Coordinates_for_the_transition_state_isomerization_12H_to_12_0_1.zip,Mn0:1,Mn0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.202303861/chem202303861-sup-0001-misc_information/81_0_1.zip,Mn61:1,Mn61:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/17_0_1.zip,Cr3:6;Cr4:6,Cr3:6;Cr4:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/13_0_1.zip,Cr2:6;Cr3:6,Cr2:6;Cr3:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/19_0_1.zip,Cr0:0;Cr1:0,Cr0:0;Cr1:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/28_0_1.zip,Cr3:0;Cr4:2,Cr3:1;Cr4:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/14_0_1.zip,Cr2:0;Cr3:0,Cr2:0;Cr3:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.5b01029/om5b01029_si_002/59_TS_12c_-13e_QUARTET_0_1.zip,Cr0:0,Cr0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp111108p/jp111108p_si_001/206_0_2.zip,Cr6:1,Cr6:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50813G/c3dt50813g/08_0_1.zip,Cr11:0,Cr11:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CY01125H/d0cy01125h1/54_0_1.zip,Re36:0,Re36:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CY01125H/d0cy01125h1/53_0_2.zip,Re33:0,Re33:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2NJ00911K/d2nj00911k1/08_0_2.zip,Re13:6,Re13:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02188E/d1dt02188e1/14_0_2.zip,Re0:0,Re0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2DT03819F/d2dt03819f1/03_-1_2.zip,Re26:7;Re34:6,Re26:7;Re34:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/anie.201310991/anie_201310991_sm_miscellaneous_information/11_0_1.zip,Re0:7,Re0:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om900881x/om900881x_si_001/18_0_1.zip,Re3:0,Re3:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.9b04579/cs9b04579_si_001/25_0_1.zip,Re0:0,Re0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4DT02475C/c4dt02475c1/25_0_1.zip,Os0:2,Os0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4DT02475C/c4dt02475c1/24_0_1.zip,Os0:4,Os0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0DT02713H/d0dt02713h1/01_0_2.zip,Os0:1,Os0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0DT02713H/d0dt02713h1/11_0_2.zip,Os0:1,Os0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0DT02713H/d0dt02713h1/02_0_2.zip,Os0:1,Os0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.202101404/chem202101404-sup-0001-misc_information/51_0_1.zip,Os0:4,Os0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1SC04369B/d1sc04369b1/06_0_2.zip,Os0:1,Os0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.est.9b03509/es9b03509_si_001/33_0_2.zip,W4:6;W6:5;W8:5;W12:6;W14:6;W17:6;W21:6;W23:6;W25:6;W32:6;W37:6;W51:5;Os54:4,W4:6;W6:6;W8:6;W12:5;W14:6;W17:6;W21:6;W23:6;W25:6;W32:6;W37:6;W51:6;Os54:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C0DT01299H/c0dt01299h/13_0_1.zip,Pt0:0,Pt0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C0DT01299H/c0dt01299h/11_0_1.zip,Pt0:0,Pt0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B901697J/b901697j/13_0_1.zip,Pt0:2;Pt1:2,Pt0:1;Pt1:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B901697J/b901697j/28_0_1.zip,Pt0:2;Pt1:3,Pt0:2;Pt1:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B901697J/b901697j/24_0_2.zip,Pt0:6;Pt1:5,Pt0:6;Pt1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om500416n/om500416n_si_001/05_TS-RE-c_0_2.zip,Pt0:3,Pt0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.0c00330/om0c00330_si_002/25_cis-3b_Au-assisted_C-H_OA_TS_product_0_2.zip,Au0:3;Pt2:2,Au0:3;Pt2:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.0c00598/om0c00598_si_002/57_TS1-Pt_E_RB3LYP_0_2.zip,Pt22:3,Pt22:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/14_0_2.zip,Au12:2,Au12:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/04_0_2.zip,Au8:0,Au8:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/34_0_2.zip,Au8:0,Au8:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/40_0_2.zip,Au12:0,Au12:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/44_0_2.zip,Au1:0,Au1:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.1c04580/cs1c04580_si_001/14_0_2.zip,Au68:0,Au68:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.0c00080/om0c00080_si_002/71_D-TSb1_0_2.zip,Au50:0,Au50:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D3DT02393A/d3dt02393a1/06_0_2.zip,Au17:1,Au17:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/38_0_1.zip,Fe0:1,Fe0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/83_-1_2.zip,Fe13:3,Fe13:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/243_0_1.zip,Fe3:2,Fe3:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/32_0_1.zip,Fe0:2,Fe0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/28_0_1.zip,Fe0:2,Fe0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C6OB00001K/c6ob00001k1/37_0_2.zip,Fe18:1,Fe18:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.202300763/chem202300763-sup-0001-misc_information/127_0_1.zip,Fe32:0,Fe32:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C5SC02012C/c5sc02012c1/33_0_1.zip,Fe0:2,Fe0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/01_0_2.zip,Pd30:1,Pd30:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/07_0_2.zip,Pd30:1,Pd30:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/11_0_2.zip,Pd30:1,Pd30:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/02_0_2.zip,Pd30:1,Pd30:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/20_0_2.zip,Pd30:1,Pd30:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D3DT02745G/d3dt02745g1/7939_Pd-H-t_098_0_2.zip,Pd24:0,Pd24:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jacs.9b04142/ja9b04142_si_001/51_0_2.zip,Pd27:1,Pd27:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acsomega.0c01528/ao0c01528_si_001/60_0_1.zip,Pd46:0,Pd46:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/44_0_2.zip,Ni18:2,Ni18:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/115_0_1.zip,Ni18:0,Ni18:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/107_0_1.zip,Ni11:2,Ni11:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/07_0_2.zip,Ni20:2,Ni20:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/28_0_1.zip,Ni20:0,Ni20:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.201304196/chem_201304196_sm_miscellaneous_information/04_0_1.zip,Ni9:2,Ni9:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp3052455/jp3052455_si_001/91_0_2.zip,Ni6:2,Ni6:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.2c02705/cs2c02705_si_001/117_0_2.zip,Ni0:1,Ni0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/109_Me_3P-Rh-_Cp_OA_TS_triplet_0_1.zip,Rh15:1,Rh15:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/120_6-Rh-_Cp_OA_TS_singlet_0_1.zip,Rh45:1,Rh45:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/110_1-Rh-_Cp_OA_TS_singlet_0_1.zip,Rh42:3,Rh42:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/121_6-Rh-_Cp_OA_TS_triplet_0_1.zip,Rh45:1,Rh45:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/118_5-Rh-_Cp_OA_TS_singlet_0_1.zip,Rh32:3,Rh32:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1126/sciadv.ado9607/sciadv.ado9607_sm/25_0_1.zip,Rh123:0,Rh123:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/ejoc.202100093/ejoc202100093-sup-0001-misc_information/38_0_1.zip,Rh28:0,Rh28:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.6b00036/om6b00036_si_002/17_TS-5_Total_0_2.zip,Rh0:2,Rh0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/164_0_1.zip,Ru68:2,Ru68:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/207_0_1.zip,Ru68:1,Ru68:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/208_0_1.zip,Ru68:1,Ru68:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/58_0_1.zip,Ru65:2,Ru65:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/202_0_1.zip,Ru66:2,Ru66:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/cctc.201601647/cctc201601647-sup-0001-misc_information/04_0_1.zip,Ru0:4,Ru0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4CC09696G/c4cc09696g1/05_0_1.zip,Ru14:4,Ru14:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.jpca.7b07226/jp7b07226_si_001/61_0_1.zip,Ru8:6;Ru9:8;Ru10:6;Ru11:8,Ru8:8;Ru9:6;Ru10:8;Ru11:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT04335H/d1dt04335h1/08_0_1.zip,Ir0:3,Ir0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/153_7-Ir-_Cp_OA_TS_triplet_0_1.zip,Ir38:3,Ir38:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/144_OC-Ir-_Cp_OA_TS_singlet_0_1.zip,Ir15:3,Ir15:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/147_Me_3P-Ir-_Cp_OA_TS_triplet_0_1.zip,Ir15:3,Ir15:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/151_6-Ir-_Cp_OA_TS_triplet_0_1.zip,Ir45:1,Ir45:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.4c00177/om4c00177_si_002/35_TSBC-II-cb-R_Imaginary_0_1.zip,Ir0:1,Ir0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.1c02270/cs1c02270_si_001/14_0_1.zip,Ir0:0,Ir0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2SC02790A/d2sc02790a1/30_0_2.zip,Ir22:2,Ir22:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/82_0_2.zip,Mo0:2,Mo0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/110_0_2.zip,Mo0:1,Mo0:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/12_0_1.zip,Mo0:2,Mo0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/141_0_1.zip,Mo0:3,Mo0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/149_0_2.zip,Mo0:3,Mo0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.inorgchem.2c01967/dme.imi.irc.trj/34_Coordinates_from_ORCA-job_localscratch_gibacic.295_0_1.zip,Mo20:6,Mo20:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.jctc.3c00913/ct3c00913_si_001/450_TS-074-I_0_2.zip,Mo19:1,Mo19:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.jctc.3c00913/ct3c00913_si_001/1605_TS-312-G_0_2.zip,Mo23:4,Mo23:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/13_0_1.zip,Hf38:4,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/06_0_1.zip,Hf38:4,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/11_0_1.zip,Hf38:4,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/20_0_1.zip,Hf37:4,Hf37:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/07_0_1.zip,Hf38:4,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.chemmater.3c02836/cm3c02836_si_001/05_0_1.zip,Hf0:4,Hf0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp401443x/jp401443x_si_001/299_0_1.zip,Hf8:2,Hf8:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.7b00767/Possible-TS-XYZ/trans_12_re_Fig5_TS14_0_1.zip,Hf90:2,Hf90:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2SC01926D/d2sc01926d1/07_0_1.zip,Ta0:1,Ta0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2SC01926D/d2sc01926d1/03_0_1.zip,Ta0:1,Ta0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4CC02207F/d4cc02207f1/13_0_1.zip,Ta1:3;Os3:4,Ta1:3;Os3:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4CC02207F/d4cc02207f1/18_0_1.zip,Ta0:5;Ir4:3,Ta0:3;Ir4:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4CC02207F/d4cc02207f1/20_0_1.zip,Os5:4;Ta6:3,Os5:4;Ta6:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.inorgchem.7b01464/ic7b01464_si_001/118_0_1.zip,Ta0:5,Ta0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/cs400349p/cs400349p_si_001/22_0_1.zip,Ta12:5,Ta12:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om300421m/om300421m_si_004/04_0_1.zip,Ta0:0,Ta0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/19_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/06_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/15_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/20_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/22_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jacs.0c03821/ja0c03821_si_001/14_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/cs5016436/cs5016436_si_001/68_0_2.zip,Zr13:3,Zr13:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om5003655/om5003655_si_002/273_Kol_tBuPh2_Zr_Me_Prn21_up_insTS.xyz_0_2.zip,Zr0:1,Zr0:1 diff --git a/zhao-patches-doc/bench_stratified_144/ABCDw_strat.csv b/zhao-patches-doc/bench_stratified_144/ABCDw_strat.csv new file mode 100644 index 00000000..c25aff83 --- /dev/null +++ b/zhao-patches-doc/bench_stratified_144/ABCDw_strat.csv @@ -0,0 +1,145 @@ +zip_path,reactant_metal_oxidation_states,product_metal_oxidation_states +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/17_0_1.zip,W4:6;W5:6;W6:6;W7:6;W8:6;W9:5;W10:6;W11:6;W12:6;W13:5;W14:6;Ru51:8,W4:6;W5:6;W6:6;W7:6;W8:6;W9:5;W10:6;W11:6;W12:6;W13:5;W14:6;Ru51:8 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/11_1_2.zip,W4:6;W5:6;W6:6;W7:6;W8:6;W9:6;W10:6;W11:6;W12:6;W13:5;W14:6;Ru51:2,W4:6;W5:6;W6:6;W7:6;W8:2;W9:6;W10:6;W11:6;W12:6;W13:6;W14:4;Ru51:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/32_0_1.zip,W4:5;W5:6;W6:6;W7:5;W8:6;W9:6;W10:5;W11:6;W12:6;W13:6;W14:5;Ru50:8,W4:5;W5:6;W6:6;W7:5;W8:6;W9:6;W10:5;W11:6;W12:6;W13:6;W14:5;Ru50:8 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/33_-1_2.zip,W4:6;W5:6;W6:5;W7:6;W8:6;W9:6;W10:6;W11:6;W12:6;W13:6;W14:6;Ru50:5,W4:6;W5:6;W6:5;W7:6;W8:6;W9:6;W10:6;W11:6;W12:6;W13:6;W14:6;Ru50:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/06_0_1.zip,Ru4:7;W6:6;W8:6;W10:6;W14:5;W16:6;W19:6;W23:6;W25:5;W27:5;W34:6;W39:6,Ru4:7;W6:6;W8:6;W10:5;W14:6;W16:6;W19:6;W23:6;W25:5;W27:5;W34:6;W39:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7DT03200E/c7dt03200e1/41_0_1.zip,W0:6;W1:6;W2:6;W3:5;W4:6;W5:6;W6:6;W7:6;W8:5;W63:6;W64:5;W65:6;W66:6;W67:6;W68:5;W69:6;W70:6;W71:5,W0:6;W1:6;W2:6;W3:5;W4:6;W5:6;W6:6;W7:6;W8:6;W63:6;W64:6;W65:6;W66:6;W67:6;W68:6;W69:6;W70:5;W71:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.4c00304/cs4c00304_si_002/93_61-ts_0_1.zip,W0:4,W0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp9054439/jp9054439_si_001/19_0_1.zip,W5:4,W5:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/44_0_2.zip,Co0:0,Co0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/01_0_2.zip,Co0:0,Co0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/33_-1_1.zip,Co0:3,Co0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/07_0_2.zip,Co0:0,Co0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/62_1_2.zip,Co0:2,Co0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CY02562F/c9cy02562f1/74_0_1.zip,Co0:5,Co0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/anie.202104320/anie202104320-sup-0001-misc_information/12_0_1.zip,Co0:1,Co0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.8b00911/om8b00911_si_002/23_13-12-TSD_0_2.zip,Co0:4,Co0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/48_0_1.zip,Mn1:7,Mn1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/20_0_1.zip,Mn1:5,Mn1:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/50_0_1.zip,Mn1:7,Mn1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/42_0_2.zip,Mn1:6,Mn1:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/22_0_1.zip,Mn1:7,Mn1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3SC53469C/c3sc53469c1/07_0_1.zip,Mn0:5,Mn0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.1c00606/om1c00606_si_002/13_Coordinates_for_the_transition_state_isomerization_12H_to_12_0_1.zip,Mn0:5,Mn0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.202303861/chem202303861-sup-0001-misc_information/81_0_1.zip,Mn61:7,Mn61:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/17_0_1.zip,Cr3:6;Cr4:6,Cr3:6;Cr4:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/13_0_1.zip,Cr2:4;Cr3:6,Cr2:4;Cr3:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/19_0_1.zip,Cr0:6;Cr1:6,Cr0:6;Cr1:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/28_0_1.zip,Cr3:4;Cr4:2,Cr3:4;Cr4:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/14_0_1.zip,Cr2:1;Cr3:5,Cr2:1;Cr3:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.5b01029/om5b01029_si_002/59_TS_12c_-13e_QUARTET_0_1.zip,Cr0:0,Cr0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp111108p/jp111108p_si_001/206_0_2.zip,Cr6:1,Cr6:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50813G/c3dt50813g/08_0_1.zip,Cr11:2,Cr11:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CY01125H/d0cy01125h1/54_0_1.zip,Re36:4,Re36:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CY01125H/d0cy01125h1/53_0_2.zip,Re33:4,Re33:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2NJ00911K/d2nj00911k1/08_0_2.zip,Re13:6,Re13:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02188E/d1dt02188e1/14_0_2.zip,Re0:6,Re0:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2DT03819F/d2dt03819f1/03_-1_2.zip,Re26:7;Re34:6,Re26:7;Re34:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/anie.201310991/anie_201310991_sm_miscellaneous_information/11_0_1.zip,Re0:7,Re0:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om900881x/om900881x_si_001/18_0_1.zip,Re3:0,Re3:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.9b04579/cs9b04579_si_001/25_0_1.zip,Re0:0,Re0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4DT02475C/c4dt02475c1/25_0_1.zip,Os0:2,Os0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4DT02475C/c4dt02475c1/24_0_1.zip,Os0:4,Os0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0DT02713H/d0dt02713h1/01_0_2.zip,Os0:1,Os0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0DT02713H/d0dt02713h1/11_0_2.zip,Os0:1,Os0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0DT02713H/d0dt02713h1/02_0_2.zip,Os0:1,Os0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.202101404/chem202101404-sup-0001-misc_information/51_0_1.zip,Os0:4,Os0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1SC04369B/d1sc04369b1/06_0_2.zip,Os0:1,Os0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.est.9b03509/es9b03509_si_001/33_0_2.zip,W4:6;W6:5;W8:5;W12:6;W14:6;W17:6;W21:6;W23:6;W25:6;W32:6;W37:6;W51:5;Os54:4,W4:6;W6:6;W8:6;W12:5;W14:6;W17:6;W21:6;W23:6;W25:6;W32:6;W37:6;W51:6;Os54:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C0DT01299H/c0dt01299h/13_0_1.zip,Pt0:2,Pt0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C0DT01299H/c0dt01299h/11_0_1.zip,Pt0:2,Pt0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B901697J/b901697j/13_0_1.zip,Pt0:2;Pt1:2,Pt0:2;Pt1:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B901697J/b901697j/28_0_1.zip,Pt0:2;Pt1:3,Pt0:2;Pt1:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B901697J/b901697j/24_0_2.zip,Pt0:6;Pt1:5,Pt0:6;Pt1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om500416n/om500416n_si_001/05_TS-RE-c_0_2.zip,Pt0:7,Pt0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.0c00330/om0c00330_si_002/25_cis-3b_Au-assisted_C-H_OA_TS_product_0_2.zip,Au0:3;Pt2:2,Au0:3;Pt2:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.0c00598/om0c00598_si_002/57_TS1-Pt_E_RB3LYP_0_2.zip,Pt22:3,Pt22:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/14_0_2.zip,Au12:2,Au12:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/04_0_2.zip,Au8:0,Au8:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/34_0_2.zip,Au8:0,Au8:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/40_0_2.zip,Au12:0,Au12:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/44_0_2.zip,Au1:0,Au1:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.1c04580/cs1c04580_si_001/14_0_2.zip,Au68:0,Au68:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.0c00080/om0c00080_si_002/71_D-TSb1_0_2.zip,Au50:0,Au50:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D3DT02393A/d3dt02393a1/06_0_2.zip,Au17:1,Au17:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/38_0_1.zip,Fe0:3,Fe0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/83_-1_2.zip,Fe13:7,Fe13:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/243_0_1.zip,Fe3:2,Fe3:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/32_0_1.zip,Fe0:4,Fe0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/28_0_1.zip,Fe0:4,Fe0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C6OB00001K/c6ob00001k1/37_0_2.zip,Fe18:1,Fe18:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.202300763/chem202300763-sup-0001-misc_information/127_0_1.zip,Fe32:0,Fe32:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C5SC02012C/c5sc02012c1/33_0_1.zip,Fe0:2,Fe0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/01_0_2.zip,Pd30:1,Pd30:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/07_0_2.zip,Pd30:1,Pd30:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/11_0_2.zip,Pd30:1,Pd30:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/02_0_2.zip,Pd30:1,Pd30:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/20_0_2.zip,Pd30:1,Pd30:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D3DT02745G/d3dt02745g1/7939_Pd-H-t_098_0_2.zip,Pd24:0,Pd24:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jacs.9b04142/ja9b04142_si_001/51_0_2.zip,Pd27:1,Pd27:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acsomega.0c01528/ao0c01528_si_001/60_0_1.zip,Pd46:0,Pd46:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/44_0_2.zip,Ni18:1,Ni18:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/115_0_1.zip,Ni18:0,Ni18:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/107_0_1.zip,Ni11:2,Ni11:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/07_0_2.zip,Ni20:3,Ni20:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/28_0_1.zip,Ni20:0,Ni20:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.201304196/chem_201304196_sm_miscellaneous_information/04_0_1.zip,Ni9:2,Ni9:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp3052455/jp3052455_si_001/91_0_2.zip,Ni6:2,Ni6:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.2c02705/cs2c02705_si_001/117_0_2.zip,Ni0:1,Ni0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/109_Me_3P-Rh-_Cp_OA_TS_triplet_0_1.zip,Rh15:1,Rh15:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/120_6-Rh-_Cp_OA_TS_singlet_0_1.zip,Rh45:3,Rh45:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/110_1-Rh-_Cp_OA_TS_singlet_0_1.zip,Rh42:5,Rh42:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/121_6-Rh-_Cp_OA_TS_triplet_0_1.zip,Rh45:3,Rh45:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/118_5-Rh-_Cp_OA_TS_singlet_0_1.zip,Rh32:3,Rh32:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1126/sciadv.ado9607/sciadv.ado9607_sm/25_0_1.zip,Rh123:0,Rh123:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/ejoc.202100093/ejoc202100093-sup-0001-misc_information/38_0_1.zip,Rh28:0,Rh28:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.6b00036/om6b00036_si_002/17_TS-5_Total_0_2.zip,Rh0:2,Rh0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/164_0_1.zip,Ru68:4,Ru68:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/207_0_1.zip,Ru68:3,Ru68:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/208_0_1.zip,Ru68:3,Ru68:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/58_0_1.zip,Ru65:4,Ru65:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/202_0_1.zip,Ru66:4,Ru66:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/cctc.201601647/cctc201601647-sup-0001-misc_information/04_0_1.zip,Ru0:4,Ru0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4CC09696G/c4cc09696g1/05_0_1.zip,Ru14:4,Ru14:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.jpca.7b07226/jp7b07226_si_001/61_0_1.zip,Ru8:6;Ru9:8;Ru10:6;Ru11:8,Ru8:8;Ru9:6;Ru10:8;Ru11:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT04335H/d1dt04335h1/08_0_1.zip,Ir0:2,Ir0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/153_7-Ir-_Cp_OA_TS_triplet_0_1.zip,Ir38:5,Ir38:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/144_OC-Ir-_Cp_OA_TS_singlet_0_1.zip,Ir15:5,Ir15:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/147_Me_3P-Ir-_Cp_OA_TS_triplet_0_1.zip,Ir15:3,Ir15:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/151_6-Ir-_Cp_OA_TS_triplet_0_1.zip,Ir45:3,Ir45:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.4c00177/om4c00177_si_002/35_TSBC-II-cb-R_Imaginary_0_1.zip,Ir0:1,Ir0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.1c02270/cs1c02270_si_001/14_0_1.zip,Ir0:0,Ir0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2SC02790A/d2sc02790a1/30_0_2.zip,Ir22:2,Ir22:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/82_0_2.zip,Mo0:2,Mo0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/110_0_2.zip,Mo0:1,Mo0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/12_0_1.zip,Mo0:2,Mo0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/141_0_1.zip,Mo0:3,Mo0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/149_0_2.zip,Mo0:3,Mo0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.inorgchem.2c01967/dme.imi.irc.trj/34_Coordinates_from_ORCA-job_localscratch_gibacic.295_0_1.zip,Mo20:6,Mo20:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.jctc.3c00913/ct3c00913_si_001/450_TS-074-I_0_2.zip,Mo19:3,Mo19:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.jctc.3c00913/ct3c00913_si_001/1605_TS-312-G_0_2.zip,Mo23:4,Mo23:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/13_0_1.zip,Hf38:4,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/06_0_1.zip,Hf38:4,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/11_0_1.zip,Hf38:4,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/20_0_1.zip,Hf37:4,Hf37:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/07_0_1.zip,Hf38:4,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.chemmater.3c02836/cm3c02836_si_001/05_0_1.zip,Hf0:4,Hf0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp401443x/jp401443x_si_001/299_0_1.zip,Hf8:4,Hf8:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.7b00767/Possible-TS-XYZ/trans_12_re_Fig5_TS14_0_1.zip,Hf90:2,Hf90:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2SC01926D/d2sc01926d1/07_0_1.zip,Ta0:1,Ta0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2SC01926D/d2sc01926d1/03_0_1.zip,Ta0:1,Ta0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4CC02207F/d4cc02207f1/13_0_1.zip,Ta1:3;Os3:4,Ta1:3;Os3:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4CC02207F/d4cc02207f1/18_0_1.zip,Ta0:5;Ir4:5,Ta0:3;Ir4:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4CC02207F/d4cc02207f1/20_0_1.zip,Os5:4;Ta6:3,Os5:4;Ta6:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.inorgchem.7b01464/ic7b01464_si_001/118_0_1.zip,Ta0:5,Ta0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/cs400349p/cs400349p_si_001/22_0_1.zip,Ta12:5,Ta12:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om300421m/om300421m_si_004/04_0_1.zip,Ta0:0,Ta0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/19_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/06_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/15_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/20_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/22_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jacs.0c03821/ja0c03821_si_001/14_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/cs5016436/cs5016436_si_001/68_0_2.zip,Zr13:3,Zr13:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om5003655/om5003655_si_002/273_Kol_tBuPh2_Zr_Me_Prn21_up_insTS.xyz_0_2.zip,Zr0:1,Zr0:1 diff --git a/zhao-patches-doc/bench_stratified_144/FINAL_strat.csv b/zhao-patches-doc/bench_stratified_144/FINAL_strat.csv new file mode 100644 index 00000000..b4b5208a --- /dev/null +++ b/zhao-patches-doc/bench_stratified_144/FINAL_strat.csv @@ -0,0 +1,145 @@ +zip_path,reactant_metal_oxidation_states,product_metal_oxidation_states +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/17_0_1.zip,W4:6;W5:6;W6:5;W7:6;W8:6;W9:5;W10:6;W11:6;W12:6;W13:5;W14:6;Ru51:7,W4:6;W5:6;W6:5;W7:6;W8:6;W9:5;W10:6;W11:6;W12:6;W13:5;W14:6;Ru51:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/11_1_2.zip,W4:6;W5:6;W6:6;W7:6;W8:6;W9:5;W10:6;W11:6;W12:6;W13:6;W14:6;Ru51:2,W4:6;W5:6;W6:6;W7:6;W8:2;W9:5;W10:6;W11:6;W12:6;W13:6;W14:4;Ru51:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/32_0_1.zip,W4:5;W5:6;W6:6;W7:5;W8:6;W9:6;W10:5;W11:6;W12:6;W13:6;W14:5;Ru50:8,W4:5;W5:6;W6:6;W7:5;W8:6;W9:6;W10:5;W11:6;W12:6;W13:6;W14:5;Ru50:8 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/33_-1_2.zip,W4:6;W5:6;W6:5;W7:6;W8:6;W9:6;W10:6;W11:6;W12:6;W13:6;W14:5;Ru50:4,W4:6;W5:6;W6:5;W7:6;W8:6;W9:6;W10:6;W11:6;W12:6;W13:6;W14:5;Ru50:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/06_0_1.zip,Ru4:7;W6:6;W8:6;W10:6;W14:5;W16:6;W19:6;W23:6;W25:5;W27:5;W34:6;W39:6,Ru4:7;W6:6;W8:6;W10:5;W14:6;W16:6;W19:6;W23:6;W25:5;W27:5;W34:6;W39:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7DT03200E/c7dt03200e1/41_0_1.zip,W0:6;W1:6;W2:6;W3:5;W4:6;W5:6;W6:6;W7:6;W8:5;W63:6;W64:5;W65:6;W66:6;W67:6;W68:5;W69:6;W70:6;W71:5,W0:6;W1:6;W2:6;W3:5;W4:6;W5:6;W6:6;W7:6;W8:6;W63:6;W64:6;W65:6;W66:6;W67:6;W68:6;W69:6;W70:5;W71:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.4c00304/cs4c00304_si_002/93_61-ts_0_1.zip,W0:4,W0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp9054439/jp9054439_si_001/19_0_1.zip,W5:4,W5:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/44_0_2.zip,Co0:0,Co0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/01_0_2.zip,Co0:0,Co0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/33_-1_1.zip,Co0:3,Co0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/07_0_2.zip,Co0:0,Co0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/62_1_2.zip,Co0:2,Co0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CY02562F/c9cy02562f1/74_0_1.zip,Co0:1,Co0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/anie.202104320/anie202104320-sup-0001-misc_information/12_0_1.zip,Co0:1,Co0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.8b00911/om8b00911_si_002/23_13-12-TSD_0_2.zip,Co0:4,Co0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/48_0_1.zip,Mn1:7,Mn1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/20_0_1.zip,Mn1:5,Mn1:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/50_0_1.zip,Mn1:7,Mn1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/42_0_2.zip,Mn1:5,Mn1:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/22_0_1.zip,Mn1:7,Mn1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3SC53469C/c3sc53469c1/07_0_1.zip,Mn0:5,Mn0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.1c00606/om1c00606_si_002/13_Coordinates_for_the_transition_state_isomerization_12H_to_12_0_1.zip,Mn0:1,Mn0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.202303861/chem202303861-sup-0001-misc_information/81_0_1.zip,Mn61:1,Mn61:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/17_0_1.zip,Cr3:6;Cr4:6,Cr3:6;Cr4:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/13_0_1.zip,Cr2:6;Cr3:6,Cr2:6;Cr3:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/19_0_1.zip,Cr0:0;Cr1:0,Cr0:0;Cr1:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/28_0_1.zip,Cr3:0;Cr4:2,Cr3:2;Cr4:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/14_0_1.zip,Cr2:0;Cr3:0,Cr2:0;Cr3:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.5b01029/om5b01029_si_002/59_TS_12c_-13e_QUARTET_0_1.zip,Cr0:2,Cr0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp111108p/jp111108p_si_001/206_0_2.zip,Cr6:1,Cr6:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50813G/c3dt50813g/08_0_1.zip,Cr11:0,Cr11:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CY01125H/d0cy01125h1/54_0_1.zip,Re36:0,Re36:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CY01125H/d0cy01125h1/53_0_2.zip,Re33:0,Re33:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2NJ00911K/d2nj00911k1/08_0_2.zip,Re13:6,Re13:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02188E/d1dt02188e1/14_0_2.zip,Re0:0,Re0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2DT03819F/d2dt03819f1/03_-1_2.zip,Re26:7;Re34:6,Re26:7;Re34:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/anie.201310991/anie_201310991_sm_miscellaneous_information/11_0_1.zip,Re0:7,Re0:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om900881x/om900881x_si_001/18_0_1.zip,Re3:0,Re3:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.9b04579/cs9b04579_si_001/25_0_1.zip,Re0:0,Re0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4DT02475C/c4dt02475c1/25_0_1.zip,Os0:2,Os0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4DT02475C/c4dt02475c1/24_0_1.zip,Os0:4,Os0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0DT02713H/d0dt02713h1/01_0_2.zip,Os0:1,Os0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0DT02713H/d0dt02713h1/11_0_2.zip,Os0:1,Os0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0DT02713H/d0dt02713h1/02_0_2.zip,Os0:1,Os0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.202101404/chem202101404-sup-0001-misc_information/51_0_1.zip,Os0:4,Os0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1SC04369B/d1sc04369b1/06_0_2.zip,Os0:1,Os0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.est.9b03509/es9b03509_si_001/33_0_2.zip,W4:6;W6:5;W8:5;W12:6;W14:6;W17:6;W21:6;W23:6;W25:6;W32:6;W37:6;W51:5;Os54:4,W4:6;W6:6;W8:6;W12:5;W14:6;W17:6;W21:6;W23:6;W25:6;W32:6;W37:6;W51:6;Os54:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C0DT01299H/c0dt01299h/13_0_1.zip,Pt0:0,Pt0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C0DT01299H/c0dt01299h/11_0_1.zip,Pt0:0,Pt0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B901697J/b901697j/13_0_1.zip,Pt0:2;Pt1:2,Pt0:1;Pt1:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B901697J/b901697j/28_0_1.zip,Pt0:3;Pt1:2,Pt0:2;Pt1:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B901697J/b901697j/24_0_2.zip,Pt0:6;Pt1:5,Pt0:6;Pt1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om500416n/om500416n_si_001/05_TS-RE-c_0_2.zip,Pt0:3,Pt0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.0c00330/om0c00330_si_002/25_cis-3b_Au-assisted_C-H_OA_TS_product_0_2.zip,Au0:3;Pt2:2,Au0:3;Pt2:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.0c00598/om0c00598_si_002/57_TS1-Pt_E_RB3LYP_0_2.zip,Pt22:3,Pt22:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/14_0_2.zip,Au12:2,Au12:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/04_0_2.zip,Au8:0,Au8:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/34_0_2.zip,Au8:0,Au8:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/40_0_2.zip,Au12:0,Au12:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/44_0_2.zip,Au1:0,Au1:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.1c04580/cs1c04580_si_001/14_0_2.zip,Au68:0,Au68:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.0c00080/om0c00080_si_002/71_D-TSb1_0_2.zip,Au50:0,Au50:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D3DT02393A/d3dt02393a1/06_0_2.zip,Au17:1,Au17:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/38_0_1.zip,Fe0:1,Fe0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/83_-1_2.zip,Fe13:3,Fe13:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/243_0_1.zip,Fe3:2,Fe3:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/32_0_1.zip,Fe0:2,Fe0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/28_0_1.zip,Fe0:2,Fe0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C6OB00001K/c6ob00001k1/37_0_2.zip,Fe18:1,Fe18:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.202300763/chem202300763-sup-0001-misc_information/127_0_1.zip,Fe32:0,Fe32:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C5SC02012C/c5sc02012c1/33_0_1.zip,Fe0:2,Fe0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/01_0_2.zip,Pd30:1,Pd30:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/07_0_2.zip,Pd30:1,Pd30:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/11_0_2.zip,Pd30:1,Pd30:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/02_0_2.zip,Pd30:1,Pd30:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/20_0_2.zip,Pd30:1,Pd30:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D3DT02745G/d3dt02745g1/7939_Pd-H-t_098_0_2.zip,Pd24:0,Pd24:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jacs.9b04142/ja9b04142_si_001/51_0_2.zip,Pd27:2,Pd27:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acsomega.0c01528/ao0c01528_si_001/60_0_1.zip,Pd46:0,Pd46:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/44_0_2.zip,Ni18:2,Ni18:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/115_0_1.zip,Ni18:0,Ni18:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/107_0_1.zip,Ni11:2,Ni11:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/07_0_2.zip,Ni20:2,Ni20:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/28_0_1.zip,Ni20:0,Ni20:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.201304196/chem_201304196_sm_miscellaneous_information/04_0_1.zip,Ni9:2,Ni9:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp3052455/jp3052455_si_001/91_0_2.zip,Ni6:1,Ni6:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.2c02705/cs2c02705_si_001/117_0_2.zip,Ni0:1,Ni0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/109_Me_3P-Rh-_Cp_OA_TS_triplet_0_1.zip,Rh15:1,Rh15:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/120_6-Rh-_Cp_OA_TS_singlet_0_1.zip,Rh45:1,Rh45:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/110_1-Rh-_Cp_OA_TS_singlet_0_1.zip,Rh42:3,Rh42:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/121_6-Rh-_Cp_OA_TS_triplet_0_1.zip,Rh45:1,Rh45:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/118_5-Rh-_Cp_OA_TS_singlet_0_1.zip,Rh32:3,Rh32:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1126/sciadv.ado9607/sciadv.ado9607_sm/25_0_1.zip,Rh123:0,Rh123:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/ejoc.202100093/ejoc202100093-sup-0001-misc_information/38_0_1.zip,Rh28:0,Rh28:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.6b00036/om6b00036_si_002/17_TS-5_Total_0_2.zip,Rh0:2,Rh0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/164_0_1.zip,Ru68:2,Ru68:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/207_0_1.zip,Ru68:1,Ru68:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/208_0_1.zip,Ru68:1,Ru68:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/58_0_1.zip,Ru65:2,Ru65:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/202_0_1.zip,Ru66:2,Ru66:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/cctc.201601647/cctc201601647-sup-0001-misc_information/04_0_1.zip,Ru0:4,Ru0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4CC09696G/c4cc09696g1/05_0_1.zip,Ru14:4,Ru14:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.jpca.7b07226/jp7b07226_si_001/61_0_1.zip,Ru8:6;Ru9:8;Ru10:6;Ru11:8,Ru8:8;Ru9:6;Ru10:8;Ru11:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT04335H/d1dt04335h1/08_0_1.zip,Ir0:3,Ir0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/153_7-Ir-_Cp_OA_TS_triplet_0_1.zip,Ir38:3,Ir38:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/144_OC-Ir-_Cp_OA_TS_singlet_0_1.zip,Ir15:3,Ir15:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/147_Me_3P-Ir-_Cp_OA_TS_triplet_0_1.zip,Ir15:3,Ir15:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/151_6-Ir-_Cp_OA_TS_triplet_0_1.zip,Ir45:1,Ir45:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.4c00177/om4c00177_si_002/35_TSBC-II-cb-R_Imaginary_0_1.zip,Ir0:1,Ir0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.1c02270/cs1c02270_si_001/14_0_1.zip,Ir0:0,Ir0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2SC02790A/d2sc02790a1/30_0_2.zip,Ir22:2,Ir22:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/82_0_2.zip,Mo0:2,Mo0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/110_0_2.zip,Mo0:1,Mo0:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/12_0_1.zip,Mo0:2,Mo0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/141_0_1.zip,Mo0:3,Mo0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/149_0_2.zip,Mo0:3,Mo0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.inorgchem.2c01967/dme.imi.irc.trj/34_Coordinates_from_ORCA-job_localscratch_gibacic.295_0_1.zip,Mo20:6,Mo20:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.jctc.3c00913/ct3c00913_si_001/450_TS-074-I_0_2.zip,Mo19:1,Mo19:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.jctc.3c00913/ct3c00913_si_001/1605_TS-312-G_0_2.zip,Mo23:4,Mo23:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/13_0_1.zip,Hf38:4,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/06_0_1.zip,Hf38:4,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/11_0_1.zip,Hf38:4,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/20_0_1.zip,Hf37:4,Hf37:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/07_0_1.zip,Hf38:4,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.chemmater.3c02836/cm3c02836_si_001/05_0_1.zip,Hf0:4,Hf0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp401443x/jp401443x_si_001/299_0_1.zip,Hf8:2,Hf8:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.7b00767/Possible-TS-XYZ/trans_12_re_Fig5_TS14_0_1.zip,Hf90:2,Hf90:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2SC01926D/d2sc01926d1/07_0_1.zip,Ta0:1,Ta0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2SC01926D/d2sc01926d1/03_0_1.zip,Ta0:1,Ta0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4CC02207F/d4cc02207f1/13_0_1.zip,Ta1:3;Os3:4,Ta1:3;Os3:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4CC02207F/d4cc02207f1/18_0_1.zip,Ta0:5;Ir4:3,Ta0:3;Ir4:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4CC02207F/d4cc02207f1/20_0_1.zip,Os5:4;Ta6:3,Os5:4;Ta6:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.inorgchem.7b01464/ic7b01464_si_001/118_0_1.zip,Ta0:5,Ta0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/cs400349p/cs400349p_si_001/22_0_1.zip,Ta12:5,Ta12:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om300421m/om300421m_si_004/04_0_1.zip,Ta0:0,Ta0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/19_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/06_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/15_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/20_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/22_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jacs.0c03821/ja0c03821_si_001/14_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/cs5016436/cs5016436_si_001/68_0_2.zip,Zr13:3,Zr13:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om5003655/om5003655_si_002/273_Kol_tBuPh2_Zr_Me_Prn21_up_insTS.xyz_0_2.zip,Zr0:1,Zr0:1 diff --git a/zhao-patches-doc/bench_stratified_144/master_strat.csv b/zhao-patches-doc/bench_stratified_144/master_strat.csv new file mode 100644 index 00000000..c249eb7e --- /dev/null +++ b/zhao-patches-doc/bench_stratified_144/master_strat.csv @@ -0,0 +1,145 @@ +zip_path,reactant_metal_oxidation_states,product_metal_oxidation_states +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/17_0_1.zip,W4:6;W5:6;W6:6;W7:6;W8:6;W9:5;W10:6;W11:6;W12:6;W13:5;W14:6;Ru51:8,W4:6;W5:6;W6:6;W7:6;W8:6;W9:5;W10:6;W11:6;W12:6;W13:5;W14:6;Ru51:8 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/11_1_2.zip,W4:6;W5:6;W6:6;W7:6;W8:6;W9:6;W10:6;W11:5;W12:6;W13:6;W14:6;Ru51:5,W4:6;W5:6;W6:6;W7:6;W8:5;W9:6;W10:6;W11:6;W12:6;W13:6;W14:5;Ru51:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/32_0_1.zip,W4:5;W5:6;W6:6;W7:5;W8:6;W9:6;W10:5;W11:6;W12:6;W13:6;W14:5;Ru50:8,W4:5;W5:6;W6:6;W7:5;W8:6;W9:6;W10:5;W11:6;W12:6;W13:6;W14:5;Ru50:8 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/33_-1_2.zip,W4:6;W5:6;W6:5;W7:6;W8:6;W9:6;W10:6;W11:6;W12:6;W13:6;W14:6;Ru50:5,W4:6;W5:6;W6:5;W7:6;W8:6;W9:6;W10:6;W11:6;W12:6;W13:6;W14:6;Ru50:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/06_0_1.zip,Ru4:7;W6:6;W8:6;W10:6;W14:5;W16:6;W19:6;W23:6;W25:5;W27:5;W34:6;W39:6,Ru4:7;W6:6;W8:6;W10:5;W14:6;W16:6;W19:6;W23:6;W25:5;W27:5;W34:6;W39:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7DT03200E/c7dt03200e1/41_0_1.zip,W0:6;W1:6;W2:6;W3:5;W4:6;W5:6;W6:6;W7:6;W8:5;W63:6;W64:5;W65:6;W66:6;W67:6;W68:5;W69:6;W70:6;W71:5,W0:6;W1:6;W2:6;W3:5;W4:6;W5:6;W6:6;W7:6;W8:6;W63:6;W64:6;W65:6;W66:6;W67:6;W68:6;W69:6;W70:5;W71:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.4c00304/cs4c00304_si_002/93_61-ts_0_1.zip,W0:4,W0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp9054439/jp9054439_si_001/19_0_1.zip,W5:4,W5:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/44_0_2.zip,Co0:0,Co0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/01_0_2.zip,Co0:0,Co0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/33_-1_1.zip,Co0:1,Co0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/07_0_2.zip,Co0:0,Co0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/62_1_2.zip,Co0:2,Co0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CY02562F/c9cy02562f1/74_0_1.zip,Co0:5,Co0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/anie.202104320/anie202104320-sup-0001-misc_information/12_0_1.zip,Co0:1,Co0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.8b00911/om8b00911_si_002/23_13-12-TSD_0_2.zip,Co0:4,Co0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/48_0_1.zip,Mn1:7,Mn1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/20_0_1.zip,Mn1:5,Mn1:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/50_0_1.zip,Mn1:7,Mn1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/42_0_2.zip,Mn1:6,Mn1:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/22_0_1.zip,Mn1:7,Mn1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3SC53469C/c3sc53469c1/07_0_1.zip,Mn0:5,Mn0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.1c00606/om1c00606_si_002/13_Coordinates_for_the_transition_state_isomerization_12H_to_12_0_1.zip,Mn0:5,Mn0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.202303861/chem202303861-sup-0001-misc_information/81_0_1.zip,Mn61:7,Mn61:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/17_0_1.zip,Cr3:6;Cr4:6,Cr3:6;Cr4:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/13_0_1.zip,Cr2:6;Cr3:6,Cr2:6;Cr3:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/19_0_1.zip,Cr0:6;Cr1:6,Cr0:6;Cr1:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/28_0_1.zip,Cr3:2;Cr4:4,Cr3:2;Cr4:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/14_0_1.zip,Cr2:4;Cr3:6,Cr2:4;Cr3:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.5b01029/om5b01029_si_002/59_TS_12c_-13e_QUARTET_0_1.zip,Cr0:0,Cr0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp111108p/jp111108p_si_001/206_0_2.zip,Cr6:1,Cr6:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50813G/c3dt50813g/08_0_1.zip,Cr11:2,Cr11:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CY01125H/d0cy01125h1/54_0_1.zip,Re36:4,Re36:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CY01125H/d0cy01125h1/53_0_2.zip,Re33:4,Re33:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2NJ00911K/d2nj00911k1/08_0_2.zip,Re13:4,Re13:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02188E/d1dt02188e1/14_0_2.zip,Re0:6,Re0:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2DT03819F/d2dt03819f1/03_-1_2.zip,Re26:6;Re34:7,Re26:6;Re34:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/anie.201310991/anie_201310991_sm_miscellaneous_information/11_0_1.zip,Re0:7,Re0:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om900881x/om900881x_si_001/18_0_1.zip,Re3:3,Re3:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.9b04579/cs9b04579_si_001/25_0_1.zip,Re0:0,Re0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4DT02475C/c4dt02475c1/25_0_1.zip,Os0:2,Os0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4DT02475C/c4dt02475c1/24_0_1.zip,Os0:4,Os0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0DT02713H/d0dt02713h1/01_0_2.zip,Os0:1,Os0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0DT02713H/d0dt02713h1/11_0_2.zip,Os0:1,Os0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0DT02713H/d0dt02713h1/02_0_2.zip,Os0:1,Os0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.202101404/chem202101404-sup-0001-misc_information/51_0_1.zip,Os0:4,Os0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1SC04369B/d1sc04369b1/06_0_2.zip,Os0:6,Os0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.est.9b03509/es9b03509_si_001/33_0_2.zip,W4:6;W6:5;W8:5;W12:6;W14:6;W17:6;W21:6;W23:6;W25:6;W32:6;W37:6;W51:5;Os54:4,W4:6;W6:6;W8:6;W12:5;W14:6;W17:6;W21:6;W23:6;W25:6;W32:6;W37:6;W51:6;Os54:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C0DT01299H/c0dt01299h/13_0_1.zip,Pt0:2,Pt0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C0DT01299H/c0dt01299h/11_0_1.zip,Pt0:2,Pt0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B901697J/b901697j/13_0_1.zip,Pt0:2;Pt1:2,Pt0:2;Pt1:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B901697J/b901697j/28_0_1.zip,Pt0:1;Pt1:0,Pt0:2;Pt1:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B901697J/b901697j/24_0_2.zip,Pt0:6;Pt1:5,Pt0:6;Pt1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om500416n/om500416n_si_001/05_TS-RE-c_0_2.zip,Pt0:7,Pt0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.0c00330/om0c00330_si_002/25_cis-3b_Au-assisted_C-H_OA_TS_product_0_2.zip,Au0:3;Pt2:3,Au0:3;Pt2:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.0c00598/om0c00598_si_002/57_TS1-Pt_E_RB3LYP_0_2.zip,Pt22:3,Pt22:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/14_0_2.zip,Au12:2,Au12:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/04_0_2.zip,Au8:0,Au8:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/34_0_2.zip,Au8:0,Au8:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/40_0_2.zip,Au12:0,Au12:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/44_0_2.zip,Au1:0,Au1:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.1c04580/cs1c04580_si_001/14_0_2.zip,Au68:0,Au68:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.0c00080/om0c00080_si_002/71_D-TSb1_0_2.zip,Au50:0,Au50:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D3DT02393A/d3dt02393a1/06_0_2.zip,Au17:1,Au17:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/38_0_1.zip,Fe0:3,Fe0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/83_-1_2.zip,Fe13:5,Fe13:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/243_0_1.zip,Fe3:2,Fe3:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/32_0_1.zip,Fe0:4,Fe0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/28_0_1.zip,Fe0:4,Fe0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C6OB00001K/c6ob00001k1/37_0_2.zip,Fe18:1,Fe18:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.202300763/chem202300763-sup-0001-misc_information/127_0_1.zip,Fe32:0,Fe32:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C5SC02012C/c5sc02012c1/33_0_1.zip,Fe0:2,Fe0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/01_0_2.zip,Pd30:1,Pd30:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/07_0_2.zip,Pd30:1,Pd30:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/11_0_2.zip,Pd30:1,Pd30:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/02_0_2.zip,Pd30:1,Pd30:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/20_0_2.zip,Pd30:1,Pd30:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D3DT02745G/d3dt02745g1/7939_Pd-H-t_098_0_2.zip,Pd24:1,Pd24:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jacs.9b04142/ja9b04142_si_001/51_0_2.zip,Pd27:1,Pd27:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acsomega.0c01528/ao0c01528_si_001/60_0_1.zip,Pd46:0,Pd46:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/44_0_2.zip,Ni18:1,Ni18:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/115_0_1.zip,Ni18:0,Ni18:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/107_0_1.zip,Ni11:2,Ni11:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/07_0_2.zip,Ni20:3,Ni20:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/28_0_1.zip,Ni20:0,Ni20:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.201304196/chem_201304196_sm_miscellaneous_information/04_0_1.zip,Ni9:2,Ni9:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp3052455/jp3052455_si_001/91_0_2.zip,Ni6:1,Ni6:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.2c02705/cs2c02705_si_001/117_0_2.zip,Ni0:1,Ni0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/109_Me_3P-Rh-_Cp_OA_TS_triplet_0_1.zip,Rh15:1,Rh15:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/120_6-Rh-_Cp_OA_TS_singlet_0_1.zip,Rh45:3,Rh45:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/110_1-Rh-_Cp_OA_TS_singlet_0_1.zip,Rh42:5,Rh42:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/121_6-Rh-_Cp_OA_TS_triplet_0_1.zip,Rh45:3,Rh45:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/118_5-Rh-_Cp_OA_TS_singlet_0_1.zip,Rh32:3,Rh32:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1126/sciadv.ado9607/sciadv.ado9607_sm/25_0_1.zip,Rh123:0,Rh123:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/ejoc.202100093/ejoc202100093-sup-0001-misc_information/38_0_1.zip,Rh28:0,Rh28:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.6b00036/om6b00036_si_002/17_TS-5_Total_0_2.zip,Rh0:2,Rh0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/164_0_1.zip,Ru68:4,Ru68:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/207_0_1.zip,Ru68:3,Ru68:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/208_0_1.zip,Ru68:3,Ru68:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/58_0_1.zip,Ru65:4,Ru65:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/202_0_1.zip,Ru66:4,Ru66:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/cctc.201601647/cctc201601647-sup-0001-misc_information/04_0_1.zip,Ru0:4,Ru0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4CC09696G/c4cc09696g1/05_0_1.zip,Ru14:3,Ru14:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.jpca.7b07226/jp7b07226_si_001/61_0_1.zip,Ru8:6;Ru9:8;Ru10:6;Ru11:8,Ru8:8;Ru9:6;Ru10:8;Ru11:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT04335H/d1dt04335h1/08_0_1.zip,Ir0:2,Ir0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/153_7-Ir-_Cp_OA_TS_triplet_0_1.zip,Ir38:3,Ir38:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/144_OC-Ir-_Cp_OA_TS_singlet_0_1.zip,Ir15:5,Ir15:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/147_Me_3P-Ir-_Cp_OA_TS_triplet_0_1.zip,Ir15:3,Ir15:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/151_6-Ir-_Cp_OA_TS_triplet_0_1.zip,Ir45:3,Ir45:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.4c00177/om4c00177_si_002/35_TSBC-II-cb-R_Imaginary_0_1.zip,Ir0:1,Ir0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.1c02270/cs1c02270_si_001/14_0_1.zip,Ir0:0,Ir0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2SC02790A/d2sc02790a1/30_0_2.zip,Ir22:2,Ir22:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/82_0_2.zip,Mo0:2,Mo0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/110_0_2.zip,Mo0:3,Mo0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/12_0_1.zip,Mo0:2,Mo0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/141_0_1.zip,Mo0:3,Mo0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/149_0_2.zip,Mo0:5,Mo0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.inorgchem.2c01967/dme.imi.irc.trj/34_Coordinates_from_ORCA-job_localscratch_gibacic.295_0_1.zip,Mo20:6,Mo20:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.jctc.3c00913/ct3c00913_si_001/450_TS-074-I_0_2.zip,Mo19:3,Mo19:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.jctc.3c00913/ct3c00913_si_001/1605_TS-312-G_0_2.zip,Mo23:4,Mo23:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/13_0_1.zip,Hf38:3,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/06_0_1.zip,Hf38:4,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/11_0_1.zip,Hf38:4,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/20_0_1.zip,Hf37:4,Hf37:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/07_0_1.zip,Hf38:4,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.chemmater.3c02836/cm3c02836_si_001/05_0_1.zip,Hf0:4,Hf0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp401443x/jp401443x_si_001/299_0_1.zip,Hf8:4,Hf8:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.7b00767/Possible-TS-XYZ/trans_12_re_Fig5_TS14_0_1.zip,Hf90:2,Hf90:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2SC01926D/d2sc01926d1/07_0_1.zip,Ta0:1,Ta0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2SC01926D/d2sc01926d1/03_0_1.zip,Ta0:1,Ta0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4CC02207F/d4cc02207f1/13_0_1.zip,Ta1:3;Os3:4,Ta1:3;Os3:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4CC02207F/d4cc02207f1/18_0_1.zip,Ta0:5;Ir4:5,Ta0:3;Ir4:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4CC02207F/d4cc02207f1/20_0_1.zip,Os5:4;Ta6:3,Os5:4;Ta6:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.inorgchem.7b01464/ic7b01464_si_001/118_0_1.zip,Ta0:5,Ta0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/cs400349p/cs400349p_si_001/22_0_1.zip,Ta12:5,Ta12:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om300421m/om300421m_si_004/04_0_1.zip,Ta0:0,Ta0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/19_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/06_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/15_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/20_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/22_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jacs.0c03821/ja0c03821_si_001/14_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/cs5016436/cs5016436_si_001/68_0_2.zip,Zr13:3,Zr13:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om5003655/om5003655_si_002/273_Kol_tBuPh2_Zr_Me_Prn21_up_insTS.xyz_0_2.zip,Zr0:1,Zr0:1 diff --git a/zhao-patches-doc/bench_stratified_144/only-A_strat.csv b/zhao-patches-doc/bench_stratified_144/only-A_strat.csv new file mode 100644 index 00000000..c249eb7e --- /dev/null +++ b/zhao-patches-doc/bench_stratified_144/only-A_strat.csv @@ -0,0 +1,145 @@ +zip_path,reactant_metal_oxidation_states,product_metal_oxidation_states +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/17_0_1.zip,W4:6;W5:6;W6:6;W7:6;W8:6;W9:5;W10:6;W11:6;W12:6;W13:5;W14:6;Ru51:8,W4:6;W5:6;W6:6;W7:6;W8:6;W9:5;W10:6;W11:6;W12:6;W13:5;W14:6;Ru51:8 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/11_1_2.zip,W4:6;W5:6;W6:6;W7:6;W8:6;W9:6;W10:6;W11:5;W12:6;W13:6;W14:6;Ru51:5,W4:6;W5:6;W6:6;W7:6;W8:5;W9:6;W10:6;W11:6;W12:6;W13:6;W14:5;Ru51:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/32_0_1.zip,W4:5;W5:6;W6:6;W7:5;W8:6;W9:6;W10:5;W11:6;W12:6;W13:6;W14:5;Ru50:8,W4:5;W5:6;W6:6;W7:5;W8:6;W9:6;W10:5;W11:6;W12:6;W13:6;W14:5;Ru50:8 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/33_-1_2.zip,W4:6;W5:6;W6:5;W7:6;W8:6;W9:6;W10:6;W11:6;W12:6;W13:6;W14:6;Ru50:5,W4:6;W5:6;W6:5;W7:6;W8:6;W9:6;W10:6;W11:6;W12:6;W13:6;W14:6;Ru50:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/06_0_1.zip,Ru4:7;W6:6;W8:6;W10:6;W14:5;W16:6;W19:6;W23:6;W25:5;W27:5;W34:6;W39:6,Ru4:7;W6:6;W8:6;W10:5;W14:6;W16:6;W19:6;W23:6;W25:5;W27:5;W34:6;W39:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7DT03200E/c7dt03200e1/41_0_1.zip,W0:6;W1:6;W2:6;W3:5;W4:6;W5:6;W6:6;W7:6;W8:5;W63:6;W64:5;W65:6;W66:6;W67:6;W68:5;W69:6;W70:6;W71:5,W0:6;W1:6;W2:6;W3:5;W4:6;W5:6;W6:6;W7:6;W8:6;W63:6;W64:6;W65:6;W66:6;W67:6;W68:6;W69:6;W70:5;W71:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.4c00304/cs4c00304_si_002/93_61-ts_0_1.zip,W0:4,W0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp9054439/jp9054439_si_001/19_0_1.zip,W5:4,W5:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/44_0_2.zip,Co0:0,Co0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/01_0_2.zip,Co0:0,Co0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/33_-1_1.zip,Co0:1,Co0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/07_0_2.zip,Co0:0,Co0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/62_1_2.zip,Co0:2,Co0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CY02562F/c9cy02562f1/74_0_1.zip,Co0:5,Co0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/anie.202104320/anie202104320-sup-0001-misc_information/12_0_1.zip,Co0:1,Co0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.8b00911/om8b00911_si_002/23_13-12-TSD_0_2.zip,Co0:4,Co0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/48_0_1.zip,Mn1:7,Mn1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/20_0_1.zip,Mn1:5,Mn1:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/50_0_1.zip,Mn1:7,Mn1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/42_0_2.zip,Mn1:6,Mn1:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/22_0_1.zip,Mn1:7,Mn1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3SC53469C/c3sc53469c1/07_0_1.zip,Mn0:5,Mn0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.1c00606/om1c00606_si_002/13_Coordinates_for_the_transition_state_isomerization_12H_to_12_0_1.zip,Mn0:5,Mn0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.202303861/chem202303861-sup-0001-misc_information/81_0_1.zip,Mn61:7,Mn61:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/17_0_1.zip,Cr3:6;Cr4:6,Cr3:6;Cr4:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/13_0_1.zip,Cr2:6;Cr3:6,Cr2:6;Cr3:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/19_0_1.zip,Cr0:6;Cr1:6,Cr0:6;Cr1:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/28_0_1.zip,Cr3:2;Cr4:4,Cr3:2;Cr4:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/14_0_1.zip,Cr2:4;Cr3:6,Cr2:4;Cr3:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.5b01029/om5b01029_si_002/59_TS_12c_-13e_QUARTET_0_1.zip,Cr0:0,Cr0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp111108p/jp111108p_si_001/206_0_2.zip,Cr6:1,Cr6:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50813G/c3dt50813g/08_0_1.zip,Cr11:2,Cr11:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CY01125H/d0cy01125h1/54_0_1.zip,Re36:4,Re36:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CY01125H/d0cy01125h1/53_0_2.zip,Re33:4,Re33:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2NJ00911K/d2nj00911k1/08_0_2.zip,Re13:4,Re13:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02188E/d1dt02188e1/14_0_2.zip,Re0:6,Re0:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2DT03819F/d2dt03819f1/03_-1_2.zip,Re26:6;Re34:7,Re26:6;Re34:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/anie.201310991/anie_201310991_sm_miscellaneous_information/11_0_1.zip,Re0:7,Re0:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om900881x/om900881x_si_001/18_0_1.zip,Re3:3,Re3:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.9b04579/cs9b04579_si_001/25_0_1.zip,Re0:0,Re0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4DT02475C/c4dt02475c1/25_0_1.zip,Os0:2,Os0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4DT02475C/c4dt02475c1/24_0_1.zip,Os0:4,Os0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0DT02713H/d0dt02713h1/01_0_2.zip,Os0:1,Os0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0DT02713H/d0dt02713h1/11_0_2.zip,Os0:1,Os0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0DT02713H/d0dt02713h1/02_0_2.zip,Os0:1,Os0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.202101404/chem202101404-sup-0001-misc_information/51_0_1.zip,Os0:4,Os0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1SC04369B/d1sc04369b1/06_0_2.zip,Os0:6,Os0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.est.9b03509/es9b03509_si_001/33_0_2.zip,W4:6;W6:5;W8:5;W12:6;W14:6;W17:6;W21:6;W23:6;W25:6;W32:6;W37:6;W51:5;Os54:4,W4:6;W6:6;W8:6;W12:5;W14:6;W17:6;W21:6;W23:6;W25:6;W32:6;W37:6;W51:6;Os54:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C0DT01299H/c0dt01299h/13_0_1.zip,Pt0:2,Pt0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C0DT01299H/c0dt01299h/11_0_1.zip,Pt0:2,Pt0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B901697J/b901697j/13_0_1.zip,Pt0:2;Pt1:2,Pt0:2;Pt1:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B901697J/b901697j/28_0_1.zip,Pt0:1;Pt1:0,Pt0:2;Pt1:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B901697J/b901697j/24_0_2.zip,Pt0:6;Pt1:5,Pt0:6;Pt1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om500416n/om500416n_si_001/05_TS-RE-c_0_2.zip,Pt0:7,Pt0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.0c00330/om0c00330_si_002/25_cis-3b_Au-assisted_C-H_OA_TS_product_0_2.zip,Au0:3;Pt2:3,Au0:3;Pt2:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.0c00598/om0c00598_si_002/57_TS1-Pt_E_RB3LYP_0_2.zip,Pt22:3,Pt22:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/14_0_2.zip,Au12:2,Au12:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/04_0_2.zip,Au8:0,Au8:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/34_0_2.zip,Au8:0,Au8:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/40_0_2.zip,Au12:0,Au12:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/44_0_2.zip,Au1:0,Au1:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.1c04580/cs1c04580_si_001/14_0_2.zip,Au68:0,Au68:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.0c00080/om0c00080_si_002/71_D-TSb1_0_2.zip,Au50:0,Au50:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D3DT02393A/d3dt02393a1/06_0_2.zip,Au17:1,Au17:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/38_0_1.zip,Fe0:3,Fe0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/83_-1_2.zip,Fe13:5,Fe13:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/243_0_1.zip,Fe3:2,Fe3:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/32_0_1.zip,Fe0:4,Fe0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/28_0_1.zip,Fe0:4,Fe0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C6OB00001K/c6ob00001k1/37_0_2.zip,Fe18:1,Fe18:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.202300763/chem202300763-sup-0001-misc_information/127_0_1.zip,Fe32:0,Fe32:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C5SC02012C/c5sc02012c1/33_0_1.zip,Fe0:2,Fe0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/01_0_2.zip,Pd30:1,Pd30:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/07_0_2.zip,Pd30:1,Pd30:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/11_0_2.zip,Pd30:1,Pd30:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/02_0_2.zip,Pd30:1,Pd30:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/20_0_2.zip,Pd30:1,Pd30:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D3DT02745G/d3dt02745g1/7939_Pd-H-t_098_0_2.zip,Pd24:1,Pd24:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jacs.9b04142/ja9b04142_si_001/51_0_2.zip,Pd27:1,Pd27:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acsomega.0c01528/ao0c01528_si_001/60_0_1.zip,Pd46:0,Pd46:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/44_0_2.zip,Ni18:1,Ni18:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/115_0_1.zip,Ni18:0,Ni18:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/107_0_1.zip,Ni11:2,Ni11:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/07_0_2.zip,Ni20:3,Ni20:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/28_0_1.zip,Ni20:0,Ni20:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.201304196/chem_201304196_sm_miscellaneous_information/04_0_1.zip,Ni9:2,Ni9:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp3052455/jp3052455_si_001/91_0_2.zip,Ni6:1,Ni6:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.2c02705/cs2c02705_si_001/117_0_2.zip,Ni0:1,Ni0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/109_Me_3P-Rh-_Cp_OA_TS_triplet_0_1.zip,Rh15:1,Rh15:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/120_6-Rh-_Cp_OA_TS_singlet_0_1.zip,Rh45:3,Rh45:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/110_1-Rh-_Cp_OA_TS_singlet_0_1.zip,Rh42:5,Rh42:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/121_6-Rh-_Cp_OA_TS_triplet_0_1.zip,Rh45:3,Rh45:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/118_5-Rh-_Cp_OA_TS_singlet_0_1.zip,Rh32:3,Rh32:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1126/sciadv.ado9607/sciadv.ado9607_sm/25_0_1.zip,Rh123:0,Rh123:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/ejoc.202100093/ejoc202100093-sup-0001-misc_information/38_0_1.zip,Rh28:0,Rh28:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.6b00036/om6b00036_si_002/17_TS-5_Total_0_2.zip,Rh0:2,Rh0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/164_0_1.zip,Ru68:4,Ru68:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/207_0_1.zip,Ru68:3,Ru68:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/208_0_1.zip,Ru68:3,Ru68:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/58_0_1.zip,Ru65:4,Ru65:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/202_0_1.zip,Ru66:4,Ru66:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/cctc.201601647/cctc201601647-sup-0001-misc_information/04_0_1.zip,Ru0:4,Ru0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4CC09696G/c4cc09696g1/05_0_1.zip,Ru14:3,Ru14:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.jpca.7b07226/jp7b07226_si_001/61_0_1.zip,Ru8:6;Ru9:8;Ru10:6;Ru11:8,Ru8:8;Ru9:6;Ru10:8;Ru11:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT04335H/d1dt04335h1/08_0_1.zip,Ir0:2,Ir0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/153_7-Ir-_Cp_OA_TS_triplet_0_1.zip,Ir38:3,Ir38:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/144_OC-Ir-_Cp_OA_TS_singlet_0_1.zip,Ir15:5,Ir15:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/147_Me_3P-Ir-_Cp_OA_TS_triplet_0_1.zip,Ir15:3,Ir15:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/151_6-Ir-_Cp_OA_TS_triplet_0_1.zip,Ir45:3,Ir45:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.4c00177/om4c00177_si_002/35_TSBC-II-cb-R_Imaginary_0_1.zip,Ir0:1,Ir0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.1c02270/cs1c02270_si_001/14_0_1.zip,Ir0:0,Ir0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2SC02790A/d2sc02790a1/30_0_2.zip,Ir22:2,Ir22:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/82_0_2.zip,Mo0:2,Mo0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/110_0_2.zip,Mo0:3,Mo0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/12_0_1.zip,Mo0:2,Mo0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/141_0_1.zip,Mo0:3,Mo0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/149_0_2.zip,Mo0:5,Mo0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.inorgchem.2c01967/dme.imi.irc.trj/34_Coordinates_from_ORCA-job_localscratch_gibacic.295_0_1.zip,Mo20:6,Mo20:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.jctc.3c00913/ct3c00913_si_001/450_TS-074-I_0_2.zip,Mo19:3,Mo19:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.jctc.3c00913/ct3c00913_si_001/1605_TS-312-G_0_2.zip,Mo23:4,Mo23:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/13_0_1.zip,Hf38:3,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/06_0_1.zip,Hf38:4,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/11_0_1.zip,Hf38:4,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/20_0_1.zip,Hf37:4,Hf37:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/07_0_1.zip,Hf38:4,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.chemmater.3c02836/cm3c02836_si_001/05_0_1.zip,Hf0:4,Hf0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp401443x/jp401443x_si_001/299_0_1.zip,Hf8:4,Hf8:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.7b00767/Possible-TS-XYZ/trans_12_re_Fig5_TS14_0_1.zip,Hf90:2,Hf90:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2SC01926D/d2sc01926d1/07_0_1.zip,Ta0:1,Ta0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2SC01926D/d2sc01926d1/03_0_1.zip,Ta0:1,Ta0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4CC02207F/d4cc02207f1/13_0_1.zip,Ta1:3;Os3:4,Ta1:3;Os3:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4CC02207F/d4cc02207f1/18_0_1.zip,Ta0:5;Ir4:5,Ta0:3;Ir4:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4CC02207F/d4cc02207f1/20_0_1.zip,Os5:4;Ta6:3,Os5:4;Ta6:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.inorgchem.7b01464/ic7b01464_si_001/118_0_1.zip,Ta0:5,Ta0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/cs400349p/cs400349p_si_001/22_0_1.zip,Ta12:5,Ta12:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om300421m/om300421m_si_004/04_0_1.zip,Ta0:0,Ta0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/19_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/06_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/15_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/20_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/22_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jacs.0c03821/ja0c03821_si_001/14_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/cs5016436/cs5016436_si_001/68_0_2.zip,Zr13:3,Zr13:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om5003655/om5003655_si_002/273_Kol_tBuPh2_Zr_Me_Prn21_up_insTS.xyz_0_2.zip,Zr0:1,Zr0:1 diff --git a/zhao-patches-doc/bench_stratified_144/only-B_strat.csv b/zhao-patches-doc/bench_stratified_144/only-B_strat.csv new file mode 100644 index 00000000..d1329f71 --- /dev/null +++ b/zhao-patches-doc/bench_stratified_144/only-B_strat.csv @@ -0,0 +1,145 @@ +zip_path,reactant_metal_oxidation_states,product_metal_oxidation_states +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/17_0_1.zip,W4:6;W5:6;W6:6;W7:6;W8:6;W9:5;W10:6;W11:6;W12:6;W13:5;W14:6;Ru51:8,W4:6;W5:6;W6:6;W7:6;W8:6;W9:5;W10:6;W11:6;W12:6;W13:5;W14:6;Ru51:8 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/11_1_2.zip,W4:6;W5:6;W6:6;W7:6;W8:6;W9:6;W10:6;W11:5;W12:6;W13:6;W14:6;Ru51:5,W4:6;W5:6;W6:6;W7:6;W8:5;W9:6;W10:6;W11:6;W12:6;W13:6;W14:5;Ru51:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/32_0_1.zip,W4:5;W5:6;W6:6;W7:5;W8:6;W9:6;W10:5;W11:6;W12:6;W13:6;W14:5;Ru50:8,W4:5;W5:6;W6:6;W7:5;W8:6;W9:6;W10:5;W11:6;W12:6;W13:6;W14:5;Ru50:8 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/33_-1_2.zip,W4:6;W5:6;W6:5;W7:6;W8:6;W9:6;W10:6;W11:6;W12:6;W13:6;W14:6;Ru50:5,W4:6;W5:6;W6:5;W7:6;W8:6;W9:6;W10:6;W11:6;W12:6;W13:6;W14:6;Ru50:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/06_0_1.zip,Ru4:7;W6:6;W8:6;W10:6;W14:5;W16:6;W19:6;W23:6;W25:5;W27:5;W34:6;W39:6,Ru4:7;W6:6;W8:6;W10:5;W14:6;W16:6;W19:6;W23:6;W25:5;W27:5;W34:6;W39:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7DT03200E/c7dt03200e1/41_0_1.zip,W0:6;W1:6;W2:6;W3:5;W4:6;W5:6;W6:6;W7:6;W8:5;W63:6;W64:5;W65:6;W66:6;W67:6;W68:5;W69:6;W70:6;W71:5,W0:6;W1:6;W2:6;W3:5;W4:6;W5:6;W6:6;W7:6;W8:6;W63:6;W64:6;W65:6;W66:6;W67:6;W68:6;W69:6;W70:5;W71:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.4c00304/cs4c00304_si_002/93_61-ts_0_1.zip,W0:4,W0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp9054439/jp9054439_si_001/19_0_1.zip,W5:4,W5:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/44_0_2.zip,Co0:0,Co0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/01_0_2.zip,Co0:0,Co0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/33_-1_1.zip,Co0:1,Co0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/07_0_2.zip,Co0:0,Co0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/62_1_2.zip,Co0:2,Co0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CY02562F/c9cy02562f1/74_0_1.zip,Co0:5,Co0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/anie.202104320/anie202104320-sup-0001-misc_information/12_0_1.zip,Co0:1,Co0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.8b00911/om8b00911_si_002/23_13-12-TSD_0_2.zip,Co0:4,Co0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/48_0_1.zip,Mn1:7,Mn1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/20_0_1.zip,Mn1:5,Mn1:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/50_0_1.zip,Mn1:7,Mn1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/42_0_2.zip,Mn1:6,Mn1:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/22_0_1.zip,Mn1:7,Mn1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3SC53469C/c3sc53469c1/07_0_1.zip,Mn0:5,Mn0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.1c00606/om1c00606_si_002/13_Coordinates_for_the_transition_state_isomerization_12H_to_12_0_1.zip,Mn0:5,Mn0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.202303861/chem202303861-sup-0001-misc_information/81_0_1.zip,Mn61:7,Mn61:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/17_0_1.zip,Cr3:6;Cr4:6,Cr3:6;Cr4:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/13_0_1.zip,Cr2:6;Cr3:6,Cr2:6;Cr3:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/19_0_1.zip,Cr0:6;Cr1:6,Cr0:6;Cr1:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/28_0_1.zip,Cr3:2;Cr4:4,Cr3:2;Cr4:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/14_0_1.zip,Cr2:4;Cr3:6,Cr2:4;Cr3:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.5b01029/om5b01029_si_002/59_TS_12c_-13e_QUARTET_0_1.zip,Cr0:2,Cr0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp111108p/jp111108p_si_001/206_0_2.zip,Cr6:1,Cr6:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50813G/c3dt50813g/08_0_1.zip,Cr11:2,Cr11:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CY01125H/d0cy01125h1/54_0_1.zip,Re36:4,Re36:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CY01125H/d0cy01125h1/53_0_2.zip,Re33:4,Re33:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2NJ00911K/d2nj00911k1/08_0_2.zip,Re13:4,Re13:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02188E/d1dt02188e1/14_0_2.zip,Re0:6,Re0:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2DT03819F/d2dt03819f1/03_-1_2.zip,Re26:6;Re34:7,Re26:6;Re34:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/anie.201310991/anie_201310991_sm_miscellaneous_information/11_0_1.zip,Re0:7,Re0:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om900881x/om900881x_si_001/18_0_1.zip,Re3:2,Re3:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.9b04579/cs9b04579_si_001/25_0_1.zip,Re0:0,Re0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4DT02475C/c4dt02475c1/25_0_1.zip,Os0:2,Os0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4DT02475C/c4dt02475c1/24_0_1.zip,Os0:4,Os0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0DT02713H/d0dt02713h1/01_0_2.zip,Os0:1,Os0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0DT02713H/d0dt02713h1/11_0_2.zip,Os0:1,Os0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0DT02713H/d0dt02713h1/02_0_2.zip,Os0:1,Os0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.202101404/chem202101404-sup-0001-misc_information/51_0_1.zip,Os0:4,Os0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1SC04369B/d1sc04369b1/06_0_2.zip,Os0:2,Os0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.est.9b03509/es9b03509_si_001/33_0_2.zip,W4:6;W6:5;W8:5;W12:6;W14:6;W17:6;W21:6;W23:6;W25:6;W32:6;W37:6;W51:5;Os54:4,W4:6;W6:6;W8:6;W12:5;W14:6;W17:6;W21:6;W23:6;W25:6;W32:6;W37:6;W51:6;Os54:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C0DT01299H/c0dt01299h/13_0_1.zip,Pt0:2,Pt0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C0DT01299H/c0dt01299h/11_0_1.zip,Pt0:2,Pt0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B901697J/b901697j/13_0_1.zip,Pt0:2;Pt1:2,Pt0:2;Pt1:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B901697J/b901697j/28_0_1.zip,Pt0:0;Pt1:1,Pt0:1;Pt1:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B901697J/b901697j/24_0_2.zip,Pt0:6;Pt1:5,Pt0:6;Pt1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om500416n/om500416n_si_001/05_TS-RE-c_0_2.zip,Pt0:7,Pt0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.0c00330/om0c00330_si_002/25_cis-3b_Au-assisted_C-H_OA_TS_product_0_2.zip,Au0:3;Pt2:3,Au0:3;Pt2:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.0c00598/om0c00598_si_002/57_TS1-Pt_E_RB3LYP_0_2.zip,Pt22:3,Pt22:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/14_0_2.zip,Au12:2,Au12:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/04_0_2.zip,Au8:0,Au8:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/34_0_2.zip,Au8:0,Au8:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/40_0_2.zip,Au12:0,Au12:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/44_0_2.zip,Au1:0,Au1:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.1c04580/cs1c04580_si_001/14_0_2.zip,Au68:0,Au68:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.0c00080/om0c00080_si_002/71_D-TSb1_0_2.zip,Au50:0,Au50:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D3DT02393A/d3dt02393a1/06_0_2.zip,Au17:1,Au17:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/38_0_1.zip,Fe0:3,Fe0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/83_-1_2.zip,Fe13:5,Fe13:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/243_0_1.zip,Fe3:2,Fe3:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/32_0_1.zip,Fe0:4,Fe0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/28_0_1.zip,Fe0:4,Fe0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C6OB00001K/c6ob00001k1/37_0_2.zip,Fe18:1,Fe18:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.202300763/chem202300763-sup-0001-misc_information/127_0_1.zip,Fe32:0,Fe32:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C5SC02012C/c5sc02012c1/33_0_1.zip,Fe0:2,Fe0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/01_0_2.zip,Pd30:1,Pd30:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/07_0_2.zip,Pd30:1,Pd30:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/11_0_2.zip,Pd30:1,Pd30:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/02_0_2.zip,Pd30:1,Pd30:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/20_0_2.zip,Pd30:1,Pd30:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D3DT02745G/d3dt02745g1/7939_Pd-H-t_098_0_2.zip,Pd24:1,Pd24:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jacs.9b04142/ja9b04142_si_001/51_0_2.zip,Pd27:1,Pd27:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acsomega.0c01528/ao0c01528_si_001/60_0_1.zip,Pd46:0,Pd46:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/44_0_2.zip,Ni18:1,Ni18:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/115_0_1.zip,Ni18:0,Ni18:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/107_0_1.zip,Ni11:2,Ni11:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/07_0_2.zip,Ni20:3,Ni20:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/28_0_1.zip,Ni20:0,Ni20:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.201304196/chem_201304196_sm_miscellaneous_information/04_0_1.zip,Ni9:2,Ni9:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp3052455/jp3052455_si_001/91_0_2.zip,Ni6:2,Ni6:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.2c02705/cs2c02705_si_001/117_0_2.zip,Ni0:1,Ni0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/109_Me_3P-Rh-_Cp_OA_TS_triplet_0_1.zip,Rh15:1,Rh15:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/120_6-Rh-_Cp_OA_TS_singlet_0_1.zip,Rh45:3,Rh45:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/110_1-Rh-_Cp_OA_TS_singlet_0_1.zip,Rh42:5,Rh42:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/121_6-Rh-_Cp_OA_TS_triplet_0_1.zip,Rh45:3,Rh45:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/118_5-Rh-_Cp_OA_TS_singlet_0_1.zip,Rh32:3,Rh32:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1126/sciadv.ado9607/sciadv.ado9607_sm/25_0_1.zip,Rh123:0,Rh123:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/ejoc.202100093/ejoc202100093-sup-0001-misc_information/38_0_1.zip,Rh28:0,Rh28:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.6b00036/om6b00036_si_002/17_TS-5_Total_0_2.zip,Rh0:2,Rh0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/164_0_1.zip,Ru68:4,Ru68:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/207_0_1.zip,Ru68:3,Ru68:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/208_0_1.zip,Ru68:3,Ru68:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/58_0_1.zip,Ru65:4,Ru65:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/202_0_1.zip,Ru66:4,Ru66:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/cctc.201601647/cctc201601647-sup-0001-misc_information/04_0_1.zip,Ru0:4,Ru0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4CC09696G/c4cc09696g1/05_0_1.zip,Ru14:3,Ru14:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.jpca.7b07226/jp7b07226_si_001/61_0_1.zip,Ru8:6;Ru9:8;Ru10:6;Ru11:8,Ru8:8;Ru9:6;Ru10:8;Ru11:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT04335H/d1dt04335h1/08_0_1.zip,Ir0:2,Ir0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/153_7-Ir-_Cp_OA_TS_triplet_0_1.zip,Ir38:3,Ir38:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/144_OC-Ir-_Cp_OA_TS_singlet_0_1.zip,Ir15:5,Ir15:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/147_Me_3P-Ir-_Cp_OA_TS_triplet_0_1.zip,Ir15:3,Ir15:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/151_6-Ir-_Cp_OA_TS_triplet_0_1.zip,Ir45:3,Ir45:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.4c00177/om4c00177_si_002/35_TSBC-II-cb-R_Imaginary_0_1.zip,Ir0:1,Ir0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.1c02270/cs1c02270_si_001/14_0_1.zip,Ir0:0,Ir0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2SC02790A/d2sc02790a1/30_0_2.zip,Ir22:1,Ir22:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/82_0_2.zip,Mo0:2,Mo0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/110_0_2.zip,Mo0:3,Mo0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/12_0_1.zip,Mo0:2,Mo0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/141_0_1.zip,Mo0:3,Mo0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/149_0_2.zip,Mo0:5,Mo0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.inorgchem.2c01967/dme.imi.irc.trj/34_Coordinates_from_ORCA-job_localscratch_gibacic.295_0_1.zip,Mo20:6,Mo20:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.jctc.3c00913/ct3c00913_si_001/450_TS-074-I_0_2.zip,Mo19:3,Mo19:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.jctc.3c00913/ct3c00913_si_001/1605_TS-312-G_0_2.zip,Mo23:4,Mo23:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/13_0_1.zip,Hf38:3,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/06_0_1.zip,Hf38:4,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/11_0_1.zip,Hf38:4,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/20_0_1.zip,Hf37:4,Hf37:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/07_0_1.zip,Hf38:4,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.chemmater.3c02836/cm3c02836_si_001/05_0_1.zip,Hf0:4,Hf0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp401443x/jp401443x_si_001/299_0_1.zip,Hf8:4,Hf8:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.7b00767/Possible-TS-XYZ/trans_12_re_Fig5_TS14_0_1.zip,Hf90:2,Hf90:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2SC01926D/d2sc01926d1/07_0_1.zip,Ta0:1,Ta0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2SC01926D/d2sc01926d1/03_0_1.zip,Ta0:1,Ta0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4CC02207F/d4cc02207f1/13_0_1.zip,Ta1:3;Os3:4,Ta1:3;Os3:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4CC02207F/d4cc02207f1/18_0_1.zip,Ta0:5;Ir4:5,Ta0:3;Ir4:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4CC02207F/d4cc02207f1/20_0_1.zip,Os5:4;Ta6:3,Os5:4;Ta6:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.inorgchem.7b01464/ic7b01464_si_001/118_0_1.zip,Ta0:5,Ta0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/cs400349p/cs400349p_si_001/22_0_1.zip,Ta12:5,Ta12:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om300421m/om300421m_si_004/04_0_1.zip,Ta0:0,Ta0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/19_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/06_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/15_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/20_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/22_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jacs.0c03821/ja0c03821_si_001/14_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/cs5016436/cs5016436_si_001/68_0_2.zip,Zr13:3,Zr13:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om5003655/om5003655_si_002/273_Kol_tBuPh2_Zr_Me_Prn21_up_insTS.xyz_0_2.zip,Zr0:1,Zr0:1 diff --git a/zhao-patches-doc/bench_stratified_144/only-C_strat.csv b/zhao-patches-doc/bench_stratified_144/only-C_strat.csv new file mode 100644 index 00000000..799c99f8 --- /dev/null +++ b/zhao-patches-doc/bench_stratified_144/only-C_strat.csv @@ -0,0 +1,145 @@ +zip_path,reactant_metal_oxidation_states,product_metal_oxidation_states +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/17_0_1.zip,W4:6;W5:6;W6:6;W7:6;W8:6;W9:5;W10:6;W11:6;W12:6;W13:5;W14:6;Ru51:8,W4:6;W5:6;W6:6;W7:6;W8:6;W9:5;W10:6;W11:6;W12:6;W13:5;W14:6;Ru51:8 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/11_1_2.zip,W4:6;W5:6;W6:6;W7:6;W8:5;W9:6;W10:6;W11:6;W12:6;W13:6;W14:5;Ru51:1,W4:6;W5:6;W6:6;W7:6;W8:2;W9:5;W10:6;W11:6;W12:6;W13:6;W14:3;Ru51:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/32_0_1.zip,W4:5;W5:6;W6:6;W7:5;W8:6;W9:6;W10:5;W11:6;W12:6;W13:6;W14:5;Ru50:8,W4:5;W5:6;W6:6;W7:5;W8:6;W9:6;W10:5;W11:6;W12:6;W13:6;W14:5;Ru50:8 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/33_-1_2.zip,W4:6;W5:6;W6:5;W7:6;W8:6;W9:6;W10:6;W11:6;W12:6;W13:6;W14:6;Ru50:5,W4:6;W5:6;W6:5;W7:6;W8:6;W9:6;W10:6;W11:6;W12:6;W13:6;W14:6;Ru50:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/06_0_1.zip,Ru4:7;W6:6;W8:6;W10:6;W14:5;W16:6;W19:6;W23:6;W25:5;W27:5;W34:6;W39:6,Ru4:7;W6:6;W8:6;W10:5;W14:6;W16:6;W19:6;W23:6;W25:5;W27:5;W34:6;W39:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7DT03200E/c7dt03200e1/41_0_1.zip,W0:6;W1:6;W2:6;W3:5;W4:6;W5:6;W6:6;W7:6;W8:5;W63:6;W64:5;W65:6;W66:6;W67:6;W68:5;W69:6;W70:6;W71:5,W0:6;W1:6;W2:6;W3:5;W4:6;W5:6;W6:6;W7:6;W8:6;W63:6;W64:6;W65:6;W66:6;W67:6;W68:6;W69:6;W70:5;W71:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.4c00304/cs4c00304_si_002/93_61-ts_0_1.zip,W0:4,W0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp9054439/jp9054439_si_001/19_0_1.zip,W5:4,W5:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/44_0_2.zip,Co0:0,Co0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/01_0_2.zip,Co0:0,Co0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/33_-1_1.zip,Co0:3,Co0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/07_0_2.zip,Co0:0,Co0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/62_1_2.zip,Co0:2,Co0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CY02562F/c9cy02562f1/74_0_1.zip,Co0:5,Co0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/anie.202104320/anie202104320-sup-0001-misc_information/12_0_1.zip,Co0:1,Co0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.8b00911/om8b00911_si_002/23_13-12-TSD_0_2.zip,Co0:4,Co0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/48_0_1.zip,Mn1:7,Mn1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/20_0_1.zip,Mn1:5,Mn1:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/50_0_1.zip,Mn1:7,Mn1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/42_0_2.zip,Mn1:6,Mn1:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/22_0_1.zip,Mn1:7,Mn1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3SC53469C/c3sc53469c1/07_0_1.zip,Mn0:5,Mn0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.1c00606/om1c00606_si_002/13_Coordinates_for_the_transition_state_isomerization_12H_to_12_0_1.zip,Mn0:5,Mn0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.202303861/chem202303861-sup-0001-misc_information/81_0_1.zip,Mn61:7,Mn61:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/17_0_1.zip,Cr3:6;Cr4:6,Cr3:6;Cr4:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/13_0_1.zip,Cr2:4;Cr3:6,Cr2:4;Cr3:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/19_0_1.zip,Cr0:6;Cr1:6,Cr0:6;Cr1:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/28_0_1.zip,Cr3:4;Cr4:2,Cr3:4;Cr4:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/14_0_1.zip,Cr2:1;Cr3:5,Cr2:1;Cr3:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.5b01029/om5b01029_si_002/59_TS_12c_-13e_QUARTET_0_1.zip,Cr0:2,Cr0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp111108p/jp111108p_si_001/206_0_2.zip,Cr6:1,Cr6:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50813G/c3dt50813g/08_0_1.zip,Cr11:2,Cr11:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CY01125H/d0cy01125h1/54_0_1.zip,Re36:4,Re36:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CY01125H/d0cy01125h1/53_0_2.zip,Re33:4,Re33:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2NJ00911K/d2nj00911k1/08_0_2.zip,Re13:4,Re13:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02188E/d1dt02188e1/14_0_2.zip,Re0:6,Re0:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2DT03819F/d2dt03819f1/03_-1_2.zip,Re26:7;Re34:6,Re26:7;Re34:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/anie.201310991/anie_201310991_sm_miscellaneous_information/11_0_1.zip,Re0:7,Re0:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om900881x/om900881x_si_001/18_0_1.zip,Re3:0,Re3:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.9b04579/cs9b04579_si_001/25_0_1.zip,Re0:0,Re0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4DT02475C/c4dt02475c1/25_0_1.zip,Os0:2,Os0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4DT02475C/c4dt02475c1/24_0_1.zip,Os0:4,Os0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0DT02713H/d0dt02713h1/01_0_2.zip,Os0:1,Os0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0DT02713H/d0dt02713h1/11_0_2.zip,Os0:1,Os0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0DT02713H/d0dt02713h1/02_0_2.zip,Os0:1,Os0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.202101404/chem202101404-sup-0001-misc_information/51_0_1.zip,Os0:4,Os0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1SC04369B/d1sc04369b1/06_0_2.zip,Os0:1,Os0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.est.9b03509/es9b03509_si_001/33_0_2.zip,W4:6;W6:5;W8:5;W12:6;W14:6;W17:6;W21:6;W23:6;W25:6;W32:6;W37:6;W51:5;Os54:4,W4:6;W6:6;W8:6;W12:5;W14:6;W17:6;W21:6;W23:6;W25:6;W32:6;W37:6;W51:6;Os54:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C0DT01299H/c0dt01299h/13_0_1.zip,Pt0:2,Pt0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C0DT01299H/c0dt01299h/11_0_1.zip,Pt0:2,Pt0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B901697J/b901697j/13_0_1.zip,Pt0:2;Pt1:2,Pt0:2;Pt1:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B901697J/b901697j/28_0_1.zip,Pt0:1;Pt1:3,Pt0:4;Pt1:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B901697J/b901697j/24_0_2.zip,Pt0:6;Pt1:5,Pt0:6;Pt1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om500416n/om500416n_si_001/05_TS-RE-c_0_2.zip,Pt0:7,Pt0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.0c00330/om0c00330_si_002/25_cis-3b_Au-assisted_C-H_OA_TS_product_0_2.zip,Au0:3;Pt2:2,Au0:3;Pt2:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.0c00598/om0c00598_si_002/57_TS1-Pt_E_RB3LYP_0_2.zip,Pt22:3,Pt22:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/14_0_2.zip,Au12:2,Au12:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/04_0_2.zip,Au8:0,Au8:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/34_0_2.zip,Au8:0,Au8:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/40_0_2.zip,Au12:0,Au12:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/44_0_2.zip,Au1:0,Au1:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.1c04580/cs1c04580_si_001/14_0_2.zip,Au68:0,Au68:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.0c00080/om0c00080_si_002/71_D-TSb1_0_2.zip,Au50:0,Au50:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D3DT02393A/d3dt02393a1/06_0_2.zip,Au17:1,Au17:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/38_0_1.zip,Fe0:3,Fe0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/83_-1_2.zip,Fe13:7,Fe13:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/243_0_1.zip,Fe3:2,Fe3:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/32_0_1.zip,Fe0:4,Fe0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/28_0_1.zip,Fe0:4,Fe0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C6OB00001K/c6ob00001k1/37_0_2.zip,Fe18:1,Fe18:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.202300763/chem202300763-sup-0001-misc_information/127_0_1.zip,Fe32:0,Fe32:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C5SC02012C/c5sc02012c1/33_0_1.zip,Fe0:2,Fe0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/01_0_2.zip,Pd30:1,Pd30:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/07_0_2.zip,Pd30:1,Pd30:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/11_0_2.zip,Pd30:1,Pd30:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/02_0_2.zip,Pd30:1,Pd30:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/20_0_2.zip,Pd30:1,Pd30:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D3DT02745G/d3dt02745g1/7939_Pd-H-t_098_0_2.zip,Pd24:0,Pd24:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jacs.9b04142/ja9b04142_si_001/51_0_2.zip,Pd27:1,Pd27:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acsomega.0c01528/ao0c01528_si_001/60_0_1.zip,Pd46:0,Pd46:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/44_0_2.zip,Ni18:1,Ni18:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/115_0_1.zip,Ni18:0,Ni18:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/107_0_1.zip,Ni11:2,Ni11:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/07_0_2.zip,Ni20:3,Ni20:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/28_0_1.zip,Ni20:0,Ni20:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.201304196/chem_201304196_sm_miscellaneous_information/04_0_1.zip,Ni9:2,Ni9:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp3052455/jp3052455_si_001/91_0_2.zip,Ni6:1,Ni6:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.2c02705/cs2c02705_si_001/117_0_2.zip,Ni0:1,Ni0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/109_Me_3P-Rh-_Cp_OA_TS_triplet_0_1.zip,Rh15:1,Rh15:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/120_6-Rh-_Cp_OA_TS_singlet_0_1.zip,Rh45:3,Rh45:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/110_1-Rh-_Cp_OA_TS_singlet_0_1.zip,Rh42:5,Rh42:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/121_6-Rh-_Cp_OA_TS_triplet_0_1.zip,Rh45:3,Rh45:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/118_5-Rh-_Cp_OA_TS_singlet_0_1.zip,Rh32:3,Rh32:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1126/sciadv.ado9607/sciadv.ado9607_sm/25_0_1.zip,Rh123:0,Rh123:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/ejoc.202100093/ejoc202100093-sup-0001-misc_information/38_0_1.zip,Rh28:0,Rh28:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.6b00036/om6b00036_si_002/17_TS-5_Total_0_2.zip,Rh0:2,Rh0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/164_0_1.zip,Ru68:4,Ru68:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/207_0_1.zip,Ru68:3,Ru68:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/208_0_1.zip,Ru68:3,Ru68:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/58_0_1.zip,Ru65:4,Ru65:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/202_0_1.zip,Ru66:4,Ru66:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/cctc.201601647/cctc201601647-sup-0001-misc_information/04_0_1.zip,Ru0:4,Ru0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4CC09696G/c4cc09696g1/05_0_1.zip,Ru14:4,Ru14:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.jpca.7b07226/jp7b07226_si_001/61_0_1.zip,Ru8:6;Ru9:8;Ru10:6;Ru11:8,Ru8:8;Ru9:6;Ru10:8;Ru11:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT04335H/d1dt04335h1/08_0_1.zip,Ir0:2,Ir0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/153_7-Ir-_Cp_OA_TS_triplet_0_1.zip,Ir38:5,Ir38:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/144_OC-Ir-_Cp_OA_TS_singlet_0_1.zip,Ir15:5,Ir15:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/147_Me_3P-Ir-_Cp_OA_TS_triplet_0_1.zip,Ir15:3,Ir15:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/151_6-Ir-_Cp_OA_TS_triplet_0_1.zip,Ir45:3,Ir45:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.4c00177/om4c00177_si_002/35_TSBC-II-cb-R_Imaginary_0_1.zip,Ir0:1,Ir0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.1c02270/cs1c02270_si_001/14_0_1.zip,Ir0:0,Ir0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2SC02790A/d2sc02790a1/30_0_2.zip,Ir22:2,Ir22:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/82_0_2.zip,Mo0:2,Mo0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/110_0_2.zip,Mo0:1,Mo0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/12_0_1.zip,Mo0:2,Mo0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/141_0_1.zip,Mo0:3,Mo0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/149_0_2.zip,Mo0:3,Mo0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.inorgchem.2c01967/dme.imi.irc.trj/34_Coordinates_from_ORCA-job_localscratch_gibacic.295_0_1.zip,Mo20:6,Mo20:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.jctc.3c00913/ct3c00913_si_001/450_TS-074-I_0_2.zip,Mo19:3,Mo19:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.jctc.3c00913/ct3c00913_si_001/1605_TS-312-G_0_2.zip,Mo23:4,Mo23:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/13_0_1.zip,Hf38:4,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/06_0_1.zip,Hf38:4,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/11_0_1.zip,Hf38:4,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/20_0_1.zip,Hf37:4,Hf37:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/07_0_1.zip,Hf38:4,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.chemmater.3c02836/cm3c02836_si_001/05_0_1.zip,Hf0:4,Hf0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp401443x/jp401443x_si_001/299_0_1.zip,Hf8:4,Hf8:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.7b00767/Possible-TS-XYZ/trans_12_re_Fig5_TS14_0_1.zip,Hf90:2,Hf90:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2SC01926D/d2sc01926d1/07_0_1.zip,Ta0:1,Ta0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2SC01926D/d2sc01926d1/03_0_1.zip,Ta0:1,Ta0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4CC02207F/d4cc02207f1/13_0_1.zip,Ta1:3;Os3:4,Ta1:3;Os3:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4CC02207F/d4cc02207f1/18_0_1.zip,Ta0:5;Ir4:5,Ta0:3;Ir4:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4CC02207F/d4cc02207f1/20_0_1.zip,Os5:4;Ta6:3,Os5:4;Ta6:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.inorgchem.7b01464/ic7b01464_si_001/118_0_1.zip,Ta0:5,Ta0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/cs400349p/cs400349p_si_001/22_0_1.zip,Ta12:5,Ta12:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om300421m/om300421m_si_004/04_0_1.zip,Ta0:0,Ta0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/19_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/06_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/15_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/20_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/22_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jacs.0c03821/ja0c03821_si_001/14_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/cs5016436/cs5016436_si_001/68_0_2.zip,Zr13:3,Zr13:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om5003655/om5003655_si_002/273_Kol_tBuPh2_Zr_Me_Prn21_up_insTS.xyz_0_2.zip,Zr0:1,Zr0:1 diff --git a/zhao-patches-doc/bench_stratified_144/only-D_strat.csv b/zhao-patches-doc/bench_stratified_144/only-D_strat.csv new file mode 100644 index 00000000..720850fb --- /dev/null +++ b/zhao-patches-doc/bench_stratified_144/only-D_strat.csv @@ -0,0 +1,145 @@ +zip_path,reactant_metal_oxidation_states,product_metal_oxidation_states +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/17_0_1.zip,W4:6;W5:6;W6:6;W7:6;W8:6;W9:5;W10:6;W11:6;W12:6;W13:5;W14:6;Ru51:8,W4:6;W5:6;W6:6;W7:6;W8:6;W9:5;W10:6;W11:6;W12:6;W13:5;W14:6;Ru51:8 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/11_1_2.zip,W4:6;W5:6;W6:6;W7:6;W8:6;W9:6;W10:6;W11:5;W12:6;W13:6;W14:6;Ru51:5,W4:6;W5:6;W6:6;W7:6;W8:5;W9:6;W10:6;W11:6;W12:6;W13:6;W14:5;Ru51:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/32_0_1.zip,W4:5;W5:6;W6:6;W7:5;W8:6;W9:6;W10:5;W11:6;W12:6;W13:6;W14:5;Ru50:8,W4:5;W5:6;W6:6;W7:5;W8:6;W9:6;W10:5;W11:6;W12:6;W13:6;W14:5;Ru50:8 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/33_-1_2.zip,W4:6;W5:6;W6:5;W7:6;W8:6;W9:6;W10:6;W11:6;W12:6;W13:6;W14:6;Ru50:5,W4:6;W5:6;W6:5;W7:6;W8:6;W9:6;W10:6;W11:6;W12:6;W13:6;W14:6;Ru50:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/06_0_1.zip,Ru4:7;W6:6;W8:6;W10:6;W14:5;W16:6;W19:6;W23:6;W25:5;W27:5;W34:6;W39:6,Ru4:7;W6:6;W8:6;W10:5;W14:6;W16:6;W19:6;W23:6;W25:5;W27:5;W34:6;W39:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7DT03200E/c7dt03200e1/41_0_1.zip,W0:6;W1:6;W2:6;W3:5;W4:6;W5:6;W6:6;W7:6;W8:5;W63:6;W64:5;W65:6;W66:6;W67:6;W68:5;W69:6;W70:6;W71:5,W0:6;W1:6;W2:6;W3:5;W4:6;W5:6;W6:6;W7:6;W8:6;W63:6;W64:6;W65:6;W66:6;W67:6;W68:6;W69:6;W70:5;W71:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.4c00304/cs4c00304_si_002/93_61-ts_0_1.zip,W0:4,W0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp9054439/jp9054439_si_001/19_0_1.zip,W5:4,W5:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/44_0_2.zip,Co0:0,Co0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/01_0_2.zip,Co0:0,Co0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/33_-1_1.zip,Co0:1,Co0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/07_0_2.zip,Co0:0,Co0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/62_1_2.zip,Co0:2,Co0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CY02562F/c9cy02562f1/74_0_1.zip,Co0:5,Co0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/anie.202104320/anie202104320-sup-0001-misc_information/12_0_1.zip,Co0:1,Co0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.8b00911/om8b00911_si_002/23_13-12-TSD_0_2.zip,Co0:4,Co0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/48_0_1.zip,Mn1:7,Mn1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/20_0_1.zip,Mn1:5,Mn1:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/50_0_1.zip,Mn1:7,Mn1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/42_0_2.zip,Mn1:6,Mn1:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/22_0_1.zip,Mn1:7,Mn1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3SC53469C/c3sc53469c1/07_0_1.zip,Mn0:5,Mn0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.1c00606/om1c00606_si_002/13_Coordinates_for_the_transition_state_isomerization_12H_to_12_0_1.zip,Mn0:5,Mn0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.202303861/chem202303861-sup-0001-misc_information/81_0_1.zip,Mn61:7,Mn61:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/17_0_1.zip,Cr3:6;Cr4:6,Cr3:6;Cr4:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/13_0_1.zip,Cr2:6;Cr3:6,Cr2:6;Cr3:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/19_0_1.zip,Cr0:6;Cr1:6,Cr0:6;Cr1:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/28_0_1.zip,Cr3:2;Cr4:4,Cr3:2;Cr4:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/14_0_1.zip,Cr2:4;Cr3:6,Cr2:4;Cr3:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.5b01029/om5b01029_si_002/59_TS_12c_-13e_QUARTET_0_1.zip,Cr0:0,Cr0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp111108p/jp111108p_si_001/206_0_2.zip,Cr6:1,Cr6:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50813G/c3dt50813g/08_0_1.zip,Cr11:2,Cr11:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CY01125H/d0cy01125h1/54_0_1.zip,Re36:4,Re36:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CY01125H/d0cy01125h1/53_0_2.zip,Re33:4,Re33:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2NJ00911K/d2nj00911k1/08_0_2.zip,Re13:6,Re13:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02188E/d1dt02188e1/14_0_2.zip,Re0:6,Re0:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2DT03819F/d2dt03819f1/03_-1_2.zip,Re26:6;Re34:7,Re26:6;Re34:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/anie.201310991/anie_201310991_sm_miscellaneous_information/11_0_1.zip,Re0:7,Re0:7 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om900881x/om900881x_si_001/18_0_1.zip,Re3:3,Re3:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.9b04579/cs9b04579_si_001/25_0_1.zip,Re0:0,Re0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4DT02475C/c4dt02475c1/25_0_1.zip,Os0:2,Os0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4DT02475C/c4dt02475c1/24_0_1.zip,Os0:4,Os0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0DT02713H/d0dt02713h1/01_0_2.zip,Os0:1,Os0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0DT02713H/d0dt02713h1/11_0_2.zip,Os0:1,Os0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0DT02713H/d0dt02713h1/02_0_2.zip,Os0:1,Os0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.202101404/chem202101404-sup-0001-misc_information/51_0_1.zip,Os0:4,Os0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1SC04369B/d1sc04369b1/06_0_2.zip,Os0:6,Os0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.est.9b03509/es9b03509_si_001/33_0_2.zip,W4:6;W6:5;W8:5;W12:6;W14:6;W17:6;W21:6;W23:6;W25:6;W32:6;W37:6;W51:5;Os54:4,W4:6;W6:6;W8:6;W12:5;W14:6;W17:6;W21:6;W23:6;W25:6;W32:6;W37:6;W51:6;Os54:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C0DT01299H/c0dt01299h/13_0_1.zip,Pt0:2,Pt0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C0DT01299H/c0dt01299h/11_0_1.zip,Pt0:2,Pt0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B901697J/b901697j/13_0_1.zip,Pt0:2;Pt1:2,Pt0:2;Pt1:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B901697J/b901697j/28_0_1.zip,Pt0:0;Pt1:1,Pt0:2;Pt1:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B901697J/b901697j/24_0_2.zip,Pt0:6;Pt1:5,Pt0:6;Pt1:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om500416n/om500416n_si_001/05_TS-RE-c_0_2.zip,Pt0:7,Pt0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.0c00330/om0c00330_si_002/25_cis-3b_Au-assisted_C-H_OA_TS_product_0_2.zip,Au0:3;Pt2:3,Au0:3;Pt2:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.0c00598/om0c00598_si_002/57_TS1-Pt_E_RB3LYP_0_2.zip,Pt22:3,Pt22:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/14_0_2.zip,Au12:2,Au12:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/04_0_2.zip,Au8:0,Au8:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/34_0_2.zip,Au8:0,Au8:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/40_0_2.zip,Au12:0,Au12:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/44_0_2.zip,Au1:0,Au1:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.1c04580/cs1c04580_si_001/14_0_2.zip,Au68:0,Au68:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.0c00080/om0c00080_si_002/71_D-TSb1_0_2.zip,Au50:0,Au50:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D3DT02393A/d3dt02393a1/06_0_2.zip,Au17:1,Au17:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/38_0_1.zip,Fe0:3,Fe0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/83_-1_2.zip,Fe13:5,Fe13:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/243_0_1.zip,Fe3:2,Fe3:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/32_0_1.zip,Fe0:4,Fe0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/28_0_1.zip,Fe0:4,Fe0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C6OB00001K/c6ob00001k1/37_0_2.zip,Fe18:1,Fe18:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.202300763/chem202300763-sup-0001-misc_information/127_0_1.zip,Fe32:0,Fe32:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C5SC02012C/c5sc02012c1/33_0_1.zip,Fe0:2,Fe0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/01_0_2.zip,Pd30:1,Pd30:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/07_0_2.zip,Pd30:1,Pd30:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/11_0_2.zip,Pd30:1,Pd30:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/02_0_2.zip,Pd30:1,Pd30:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/20_0_2.zip,Pd30:1,Pd30:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D3DT02745G/d3dt02745g1/7939_Pd-H-t_098_0_2.zip,Pd24:1,Pd24:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jacs.9b04142/ja9b04142_si_001/51_0_2.zip,Pd27:1,Pd27:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acsomega.0c01528/ao0c01528_si_001/60_0_1.zip,Pd46:0,Pd46:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/44_0_2.zip,Ni18:1,Ni18:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/115_0_1.zip,Ni18:0,Ni18:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/107_0_1.zip,Ni11:2,Ni11:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/07_0_2.zip,Ni20:3,Ni20:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/28_0_1.zip,Ni20:0,Ni20:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.201304196/chem_201304196_sm_miscellaneous_information/04_0_1.zip,Ni9:2,Ni9:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp3052455/jp3052455_si_001/91_0_2.zip,Ni6:1,Ni6:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.2c02705/cs2c02705_si_001/117_0_2.zip,Ni0:1,Ni0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/109_Me_3P-Rh-_Cp_OA_TS_triplet_0_1.zip,Rh15:1,Rh15:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/120_6-Rh-_Cp_OA_TS_singlet_0_1.zip,Rh45:3,Rh45:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/110_1-Rh-_Cp_OA_TS_singlet_0_1.zip,Rh42:5,Rh42:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/121_6-Rh-_Cp_OA_TS_triplet_0_1.zip,Rh45:3,Rh45:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/118_5-Rh-_Cp_OA_TS_singlet_0_1.zip,Rh32:3,Rh32:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1126/sciadv.ado9607/sciadv.ado9607_sm/25_0_1.zip,Rh123:0,Rh123:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/ejoc.202100093/ejoc202100093-sup-0001-misc_information/38_0_1.zip,Rh28:0,Rh28:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.6b00036/om6b00036_si_002/17_TS-5_Total_0_2.zip,Rh0:2,Rh0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/164_0_1.zip,Ru68:4,Ru68:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/207_0_1.zip,Ru68:3,Ru68:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/208_0_1.zip,Ru68:3,Ru68:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/58_0_1.zip,Ru65:4,Ru65:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/202_0_1.zip,Ru66:4,Ru66:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/cctc.201601647/cctc201601647-sup-0001-misc_information/04_0_1.zip,Ru0:4,Ru0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4CC09696G/c4cc09696g1/05_0_1.zip,Ru14:3,Ru14:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.jpca.7b07226/jp7b07226_si_001/61_0_1.zip,Ru8:6;Ru9:8;Ru10:6;Ru11:8,Ru8:8;Ru9:6;Ru10:8;Ru11:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT04335H/d1dt04335h1/08_0_1.zip,Ir0:2,Ir0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/153_7-Ir-_Cp_OA_TS_triplet_0_1.zip,Ir38:3,Ir38:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/144_OC-Ir-_Cp_OA_TS_singlet_0_1.zip,Ir15:5,Ir15:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/147_Me_3P-Ir-_Cp_OA_TS_triplet_0_1.zip,Ir15:3,Ir15:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/151_6-Ir-_Cp_OA_TS_triplet_0_1.zip,Ir45:3,Ir45:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.4c00177/om4c00177_si_002/35_TSBC-II-cb-R_Imaginary_0_1.zip,Ir0:1,Ir0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.1c02270/cs1c02270_si_001/14_0_1.zip,Ir0:0,Ir0:1 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2SC02790A/d2sc02790a1/30_0_2.zip,Ir22:2,Ir22:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/82_0_2.zip,Mo0:2,Mo0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/110_0_2.zip,Mo0:3,Mo0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/12_0_1.zip,Mo0:2,Mo0:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/141_0_1.zip,Mo0:3,Mo0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/149_0_2.zip,Mo0:5,Mo0:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.inorgchem.2c01967/dme.imi.irc.trj/34_Coordinates_from_ORCA-job_localscratch_gibacic.295_0_1.zip,Mo20:6,Mo20:6 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.jctc.3c00913/ct3c00913_si_001/450_TS-074-I_0_2.zip,Mo19:3,Mo19:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.jctc.3c00913/ct3c00913_si_001/1605_TS-312-G_0_2.zip,Mo23:4,Mo23:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/13_0_1.zip,Hf38:3,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/06_0_1.zip,Hf38:4,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/11_0_1.zip,Hf38:4,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/20_0_1.zip,Hf37:4,Hf37:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/07_0_1.zip,Hf38:4,Hf38:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.chemmater.3c02836/cm3c02836_si_001/05_0_1.zip,Hf0:4,Hf0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp401443x/jp401443x_si_001/299_0_1.zip,Hf8:4,Hf8:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.7b00767/Possible-TS-XYZ/trans_12_re_Fig5_TS14_0_1.zip,Hf90:2,Hf90:2 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2SC01926D/d2sc01926d1/07_0_1.zip,Ta0:1,Ta0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2SC01926D/d2sc01926d1/03_0_1.zip,Ta0:1,Ta0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4CC02207F/d4cc02207f1/13_0_1.zip,Ta1:3;Os3:4,Ta1:3;Os3:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4CC02207F/d4cc02207f1/18_0_1.zip,Ta0:5;Ir4:5,Ta0:3;Ir4:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4CC02207F/d4cc02207f1/20_0_1.zip,Os5:4;Ta6:3,Os5:4;Ta6:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.inorgchem.7b01464/ic7b01464_si_001/118_0_1.zip,Ta0:5,Ta0:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/cs400349p/cs400349p_si_001/22_0_1.zip,Ta12:5,Ta12:5 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om300421m/om300421m_si_004/04_0_1.zip,Ta0:0,Ta0:0 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/19_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/06_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/15_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/20_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/22_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jacs.0c03821/ja0c03821_si_001/14_0_1.zip,Zr0:4,Zr0:4 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/cs5016436/cs5016436_si_001/68_0_2.zip,Zr13:3,Zr13:3 +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om5003655/om5003655_si_002/273_Kol_tBuPh2_Zr_Me_Prn21_up_insTS.xyz_0_2.zip,Zr0:1,Zr0:1 diff --git a/zhao-patches-doc/bench_stratified_144/sanity_stratified_input.txt b/zhao-patches-doc/bench_stratified_144/sanity_stratified_input.txt new file mode 100644 index 00000000..815bf5dd --- /dev/null +++ b/zhao-patches-doc/bench_stratified_144/sanity_stratified_input.txt @@ -0,0 +1,144 @@ +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/17_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/11_1_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/32_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/33_-1_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50666E/c3dt50666e/06_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7DT03200E/c7dt03200e1/41_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.4c00304/cs4c00304_si_002/93_61-ts_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp9054439/jp9054439_si_001/19_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/44_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/01_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/33_-1_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/07_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP01820E/d1cp01820e1/62_1_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CY02562F/c9cy02562f1/74_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/anie.202104320/anie202104320-sup-0001-misc_information/12_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.8b00911/om8b00911_si_002/23_13-12-TSD_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/48_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/20_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/50_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/42_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B302693K/b302693k/22_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3SC53469C/c3sc53469c1/07_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.1c00606/om1c00606_si_002/13_Coordinates_for_the_transition_state_isomerization_12H_to_12_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.202303861/chem202303861-sup-0001-misc_information/81_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/17_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/13_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/19_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/28_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C7NJ00295E/c7nj00295e1/14_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.5b01029/om5b01029_si_002/59_TS_12c_-13e_QUARTET_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp111108p/jp111108p_si_001/206_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C3DT50813G/c3dt50813g/08_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CY01125H/d0cy01125h1/54_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CY01125H/d0cy01125h1/53_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2NJ00911K/d2nj00911k1/08_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02188E/d1dt02188e1/14_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2DT03819F/d2dt03819f1/03_-1_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/anie.201310991/anie_201310991_sm_miscellaneous_information/11_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om900881x/om900881x_si_001/18_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.9b04579/cs9b04579_si_001/25_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4DT02475C/c4dt02475c1/25_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4DT02475C/c4dt02475c1/24_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0DT02713H/d0dt02713h1/01_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0DT02713H/d0dt02713h1/11_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0DT02713H/d0dt02713h1/02_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.202101404/chem202101404-sup-0001-misc_information/51_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1SC04369B/d1sc04369b1/06_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.est.9b03509/es9b03509_si_001/33_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C0DT01299H/c0dt01299h/13_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C0DT01299H/c0dt01299h/11_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B901697J/b901697j/13_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B901697J/b901697j/28_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B901697J/b901697j/24_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om500416n/om500416n_si_001/05_TS-RE-c_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.0c00330/om0c00330_si_002/25_cis-3b_Au-assisted_C-H_OA_TS_product_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.0c00598/om0c00598_si_002/57_TS1-Pt_E_RB3LYP_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/14_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/04_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/34_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/40_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C2OB25419K/c2ob25419k/44_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.1c04580/cs1c04580_si_001/14_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.0c00080/om0c00080_si_002/71_D-TSb1_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D3DT02393A/d3dt02393a1/06_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/38_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/83_-1_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/243_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/32_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/28_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C6OB00001K/c6ob00001k1/37_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.202300763/chem202300763-sup-0001-misc_information/127_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C5SC02012C/c5sc02012c1/33_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/01_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/07_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/11_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/02_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4QO00347K/c4qo00347k1/20_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D3DT02745G/d3dt02745g1/7939_Pd-H-t_098_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jacs.9b04142/ja9b04142_si_001/51_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acsomega.0c01528/ao0c01528_si_001/60_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/44_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/115_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/107_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/07_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT02486H/d1dt02486h1/28_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/chem.201304196/chem_201304196_sm_miscellaneous_information/04_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp3052455/jp3052455_si_001/91_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.2c02705/cs2c02705_si_001/117_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/109_Me_3P-Rh-_Cp_OA_TS_triplet_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/120_6-Rh-_Cp_OA_TS_singlet_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/110_1-Rh-_Cp_OA_TS_singlet_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/121_6-Rh-_Cp_OA_TS_triplet_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/118_5-Rh-_Cp_OA_TS_singlet_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1126/sciadv.ado9607/sciadv.ado9607_sm/25_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/ejoc.202100093/ejoc202100093-sup-0001-misc_information/38_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.6b00036/om6b00036_si_002/17_TS-5_Total_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/164_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/207_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/208_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/58_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1CP00608H/d1cp00608h1/202_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1002/cctc.201601647/cctc201601647-sup-0001-misc_information/04_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C4CC09696G/c4cc09696g1/05_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.jpca.7b07226/jp7b07226_si_001/61_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D1DT04335H/d1dt04335h1/08_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/153_7-Ir-_Cp_OA_TS_triplet_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/144_OC-Ir-_Cp_OA_TS_singlet_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/147_Me_3P-Ir-_Cp_OA_TS_triplet_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D0CP04080K/d0cp04080k1/151_6-Ir-_Cp_OA_TS_triplet_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.4c00177/om4c00177_si_002/35_TSBC-II-cb-R_Imaginary_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acscatal.1c02270/cs1c02270_si_001/14_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2SC02790A/d2sc02790a1/30_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/82_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/110_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/12_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/141_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/C9CP00670B/c9cp00670b1/149_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.inorgchem.2c01967/dme.imi.irc.trj/34_Coordinates_from_ORCA-job_localscratch_gibacic.295_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.jctc.3c00913/ct3c00913_si_001/450_TS-074-I_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.jctc.3c00913/ct3c00913_si_001/1605_TS-312-G_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/13_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/06_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/11_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/20_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4QO00874J/d4qo00874j1/07_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.chemmater.3c02836/cm3c02836_si_001/05_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jp401443x/jp401443x_si_001/299_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.organomet.7b00767/Possible-TS-XYZ/trans_12_re_Fig5_TS14_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2SC01926D/d2sc01926d1/07_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D2SC01926D/d2sc01926d1/03_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4CC02207F/d4cc02207f1/13_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4CC02207F/d4cc02207f1/18_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/D4CC02207F/d4cc02207f1/20_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/acs.inorgchem.7b01464/ic7b01464_si_001/118_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/cs400349p/cs400349p_si_001/22_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om300421m/om300421m_si_004/04_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/19_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/06_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/15_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/20_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1039/B907335C/b907335c/22_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/jacs.0c03821/ja0c03821_si_001/14_0_1.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/cs5016436/cs5016436_si_001/68_0_2.zip +/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/10.1021/om5003655/om5003655_si_002/273_Kol_tBuPh2_Zr_Me_Prn21_up_insTS.xyz_0_2.zip diff --git a/zhao-patches-doc/scripts/PATHS_NOTE.md b/zhao-patches-doc/scripts/PATHS_NOTE.md new file mode 100644 index 00000000..8c23f9f3 --- /dev/null +++ b/zhao-patches-doc/scripts/PATHS_NOTE.md @@ -0,0 +1,75 @@ +# Hard-coded paths in these scripts + +These scripts were lifted verbatim from a working project tree. The +hard-coded paths reference my local layout and **will need to be edited +before running on another machine**. Listed here so nothing surprises +you mid-run. + +## `os_new_yarp_shard.py` (the bench worker) + +```python +NEW_YARP_PATH = os.environ.get( + "NEW_YARP_PATH", + "/home/li1724/061226-YARP-again/Zhao-YARP/classy-yarp") # ← fallback +``` + +This one is **safe**: set `NEW_YARP_PATH` in your environment (or in +the sbatch script) before invoking, and the fallback never fires. +Default in `os_p5.sbatch` already does this. + +## `os_p5.sbatch` + +```bash +cd /scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_tar_zsts # edit +PY=/home/li1724/.conda/envs/2022.10-py39/copy-classy-yarp/bin/python # edit +export NEW_YARP_PATH=/home/li1724/061226-YARP-again/Zhao-YARP/classy-yarp-final # edit +``` + +Plus the `SHARD` / `OUT` paths assume `Scripts/v2/os_test_final/{shards_p5,results_p5}/` +relative to that `cd`. + +## `build_compare_matrices.py` + +```python +ROOT = Path("/scratch/.../doi_tar_zsts") # project root +SLIM_CSV = Path("/scratch/.../doi_zips_slim/.../transition_metal_oxidation_states.csv") +FINAL_CSV = ROOT / "Scripts/v2/os_test_final/transition_metal_oxidation_states_FINAL.csv" +DEDUP_LIST = ROOT / "Scripts/v2/os_test_new_yarp/dedup_tm_picks.txt" +``` + +Five paths. Easiest fix: edit these to point at: +- the `FINAL.csv` we shipped in `../corpus_181450/` +- whatever you're using as the "reference" OS CSV +- a text file with the 181,450 zip paths to compare on + (we can ship `dedup_tm_picks.txt` separately if you want it). + +## `plot_os_compare.py` + +```python +ROOT = Path("/scratch/.../doi_tar_zsts") +OLD = ROOT / "Scripts/v2/os_test_final/tm_os_matrix_OLD.csv" +NEW = ROOT / "Scripts/v2/os_test_final/tm_os_matrix_NEW.csv" +OUT = ROOT / "Scripts/v2/os_test_final/tm_os_compare_OLD_vs_NEW.png" +``` + +Point at `tm_os_matrix_OLD.csv` / `tm_os_matrix_NEW.csv` shipped in +`../corpus_181450/` and you're good. + +## `aggregate.py` + +Uses **relative paths** from the CWD (`Scripts/v2/os_test_final/...`). +Run it from a project root that mirrors that layout, or edit the +constants at the top. + +## `draw_tm_os_radial_dials_fullcircle.py` + +**No hard-coded paths.** Takes a matrix CSV as positional arg, output +SVG path via `-o`. Drop-in usable. + +## Suggested clean-up (out of scope for the PR, but if it matters) + +All of the above are easy `argparse` refactors. I left them as-is +because they were build-tool scripts inside a one-off project, not +library code. If they get adopted upstream as benchmarks, the obvious +refactor is to parameterize via `--corpus-csv`, `--reference-csv`, +`--dedup-list`, etc. diff --git a/zhao-patches-doc/scripts/aggregate.py b/zhao-patches-doc/scripts/aggregate.py new file mode 100644 index 00000000..970cd5fb --- /dev/null +++ b/zhao-patches-doc/scripts/aggregate.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 +"""Concatenate ALL completed shard CSVs into one master CSV. + +Sources, in priority order (later overwrites earlier on duplicate zip_path): + 1. Scripts/v2/os_test_final/results/ (phase 2: 1815-archive shards) + 2. Scripts/v2/os_test_final/results_retry/ (phase 3: 400-archive shards that + cover the phase-2 timeouts) + +Reports missing shards in either tree. +""" +from pathlib import Path +import sys + +PHASE2_RES = Path("Scripts/v2/os_test_final/results") +PHASE2_SHARD = Path("Scripts/v2/os_test_final/shards_big") +PHASE3_RES = Path("Scripts/v2/os_test_final/results_retry") +PHASE3_SHARD = Path("Scripts/v2/os_test_final/shards_retry") +PHASE4_RES = Path("Scripts/v2/os_test_final/results_p4") +PHASE4_SHARD = Path("Scripts/v2/os_test_final/shards_p4") +PHASE5_RES = Path("Scripts/v2/os_test_final/results_p5") +PHASE5_SHARD = Path("Scripts/v2/os_test_final/shards_p5") +OUT = Path("Scripts/v2/os_test_final/transition_metal_oxidation_states_FINAL.csv") + +def scan(shard_dir, results_dir, label): + rows = {} # zip_path -> data line + missing, short = [], [] + for sf in sorted(shard_dir.glob("shard_*.txt")): + rf = results_dir / f"{sf.stem}.csv" + if not rf.exists() or rf.stat().st_size == 0: + missing.append(sf.stem) + continue + expected = sum(1 for _ in sf.open()) + 1 + with rf.open() as fh: + lines = fh.readlines() + if len(lines) < expected: + short.append((sf.stem, len(lines), expected)) + continue + for line in lines[1:]: + line = line.rstrip("\n") + if not line: + continue + zp = line.split(",", 1)[0] + rows[zp] = line + print(f"[{label}] shards considered: {len(list(shard_dir.glob('shard_*.txt'))):,} " + f"rows collected: {len(rows):,} missing: {len(missing)} short: {len(short)}") + return rows, missing, short + +p2_rows, p2_missing, p2_short = scan(PHASE2_SHARD, PHASE2_RES, "phase-2") +p3_rows, p3_missing, p3_short = scan(PHASE3_SHARD, PHASE3_RES, "phase-3") +p4_rows, p4_missing, p4_short = scan(PHASE4_SHARD, PHASE4_RES, "phase-4") +p5_rows, p5_missing, p5_short = scan(PHASE5_SHARD, PHASE5_RES, "phase-5") + +# Merge: later phases win on overlap (each phase is a retry of prior failures) +merged = dict(p2_rows) +merged.update(p3_rows) +merged.update(p4_rows) +merged.update(p5_rows) + +with OUT.open("w") as fh: + fh.write("zip_path,reactant_metal_oxidation_states,product_metal_oxidation_states\n") + for zp in sorted(merged): + fh.write(merged[zp] + "\n") + +print(f"\nwrote: {OUT}") +print(f"unique zip_path rows: {len(merged):,}") +print(f"target (dedup_tm_picks): 181,450") +print(f"coverage: {100*len(merged)/181450:.1f}%") + +if p2_missing or p3_missing or p2_short or p3_short: + print() + if p2_missing[:5]: + print(f"phase-2 missing examples: {p2_missing[:5]} ({len(p2_missing)} total)") + if p3_missing[:5]: + print(f"phase-3 missing examples: {p3_missing[:5]} ({len(p3_missing)} total)") + if p2_short[:3]: + print(f"phase-2 short examples: {p2_short[:3]}") + if p3_short[:3]: + print(f"phase-3 short examples: {p3_short[:3]}") + +sys.exit(0 if len(merged) == 181450 else 1) diff --git a/zhao-patches-doc/scripts/build_compare_matrices.py b/zhao-patches-doc/scripts/build_compare_matrices.py new file mode 100644 index 00000000..43d0d841 --- /dev/null +++ b/zhao-patches-doc/scripts/build_compare_matrices.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python3 +"""build_compare_matrices.py + +Build per-metal x OS matrices in the same format as +build_tm_os_matrix.py expects (header: Metal,-3,-2,-1,0,1,2,3,4,5,6,Total), +but for BOTH: + + 1. The old slim CSV: /scratch/.../doi_zips_slim/os_extraction/transition_metal_oxidation_states.csv + 2. The new FINAL CSV: Scripts/v2/os_test_final/transition_metal_oxidation_states_FINAL.csv + +NB: The full slim CSV (506k rows incl. dups) covers all charge/mult variants. +For an apples-to-apples comparison with the FINAL CSV (181k deduped picks), +we restrict both to the SAME archive set as dedup_tm_picks.txt, summing +reactant + product OS atom counts per archive into the per-metal bins. + +OS values outside [NEG_MIN, POS_MAX] are clamped to the terminal bins. + +Outputs: + Scripts/v2/os_test_final/tm_os_matrix_OLD.csv (slim, restricted to dedup set) + Scripts/v2/os_test_final/tm_os_matrix_NEW.csv (FINAL, restricted to dedup set) +""" +import csv +import re +import sys +from collections import defaultdict +from pathlib import Path + +ROOT = Path("/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_tar_zsts") +SLIM_CSV = Path("/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_zips_slim/os_extraction/transition_metal_oxidation_states.csv") +FINAL_CSV = ROOT / "Scripts/v2/os_test_final/transition_metal_oxidation_states_FINAL.csv" +DEDUP_LIST = ROOT / "Scripts/v2/os_test_new_yarp/dedup_tm_picks.txt" +OUT_DIR = ROOT / "Scripts/v2/os_test_final" +OUT_OLD = OUT_DIR / "tm_os_matrix_OLD.csv" +OUT_NEW = OUT_DIR / "tm_os_matrix_NEW.csv" + +NEG_MIN, POS_MAX = -3, 6 +OS_COLS = list(range(NEG_MIN, POS_MAX + 1)) + +TM_ORDER = [ + 'Sc','Ti','V','Cr','Mn','Fe','Co','Ni','Cu','Zn', + 'Y','Zr','Nb','Mo','Tc','Ru','Rh','Pd','Ag','Cd', + 'Hf','Ta','W','Re','Os','Ir','Pt','Au','Hg', +] +TM_SET = set(TM_ORDER) +ATOM_RE = re.compile(r"([A-Z][a-z]?)(\d+):(-?\d+)") + + +def clamp(os_val: int) -> int: + if os_val < NEG_MIN: return NEG_MIN + if os_val > POS_MAX: return POS_MAX + return os_val + + +def build_matrix(csv_path: Path, allowed_paths: set, label: str): + """Sum reactant + product OS atom counts per metal for the allowed archives only.""" + bins = defaultdict(lambda: defaultdict(int)) # metal -> os -> count + n_archives = 0 + n_archive_atoms = 0 + with csv_path.open() as fh: + for row in csv.DictReader(fh): + zp = row["zip_path"] + if zp not in allowed_paths: + continue + n_archives += 1 + for side in ("reactant_metal_oxidation_states", "product_metal_oxidation_states"): + s = row.get(side, "") or "" + if not s or "ERR" in s or "SYSEXIT" in s: + continue + for m in ATOM_RE.finditer(s): + metal = m.group(1) + if metal not in TM_SET: + continue + os_val = clamp(int(m.group(3))) + bins[metal][os_val] += 1 + n_archive_atoms += 1 + + print(f"[{label}] archives matched: {n_archives:,} per-atom OS rows: {n_archive_atoms:,}") + return bins + + +def write_matrix(bins, out_path: Path): + with out_path.open("w", newline="") as fh: + w = csv.writer(fh) + w.writerow(["Metal"] + [str(c) for c in OS_COLS] + ["Total"]) + for metal in TM_ORDER: + row = [bins[metal].get(c, 0) for c in OS_COLS] + total = sum(row) + w.writerow([metal] + row + [total]) + print(f"wrote: {out_path}") + + +def main(): + allowed = set(p.strip() for p in DEDUP_LIST.read_text().splitlines() if p.strip()) + print(f"allowed archives (dedup_tm_picks): {len(allowed):,}\n") + + old_bins = build_matrix(SLIM_CSV, allowed, "OLD slim") + new_bins = build_matrix(FINAL_CSV, allowed, "NEW FINAL") + + write_matrix(old_bins, OUT_OLD) + write_matrix(new_bins, OUT_NEW) + + # Print over-group-max comparison + GROUP_MAX = {'Sc':3,'Ti':4,'V':5,'Cr':6,'Mn':7,'Fe':6,'Co':5,'Ni':4,'Cu':3,'Zn':2, + 'Y':3,'Zr':4,'Nb':5,'Mo':6,'Tc':7,'Ru':8,'Rh':6,'Pd':4,'Ag':3,'Cd':2, + 'La':3,'Hf':4,'Ta':5,'W':6,'Re':7,'Os':8,'Ir':6,'Pt':6,'Au':5,'Hg':2} + print("\n=== over-group-max atom-level counts (restricted dedup set) ===") + print(f" {'Metal':<6}{'OLD>max':>10}{'NEW>max':>10}{'Δ':>8}") + for m in TM_ORDER: + if m not in GROUP_MAX: + continue + old_over = sum(c for os_v, c in old_bins[m].items() if os_v > GROUP_MAX[m]) + new_over = sum(c for os_v, c in new_bins[m].items() if os_v > GROUP_MAX[m]) + if old_over or new_over: + print(f" {m:<6}{old_over:>10,}{new_over:>10,}{new_over-old_over:>+8d}") + + +if __name__ == "__main__": + main() diff --git a/zhao-patches-doc/scripts/draw_tm_os_radial_dials_fullcircle.py b/zhao-patches-doc/scripts/draw_tm_os_radial_dials_fullcircle.py new file mode 100644 index 00000000..e8c85515 --- /dev/null +++ b/zhao-patches-doc/scripts/draw_tm_os_radial_dials_fullcircle.py @@ -0,0 +1,708 @@ +#!/usr/bin/env python3 +# draw_tm_os_radial_dials_fullcircle.py +# +# Usage: +# python draw_tm_os_radial_dials_fullcircle.py tm_os_matrix_SUM.csv -o tm_os_dials.svg --png +# +# Optional: +# python draw_tm_os_radial_dials_fullcircle.py tm_os_matrix_SUM.csv \ +# -o tm_os_dials.svg \ +# --png \ +# --hide-legend \ +# --ignore-os-beyond-valence +# +# Input CSV format: +# Metal,-3,-2,-1,0,1,2,3,4,5,6,Total +# Sc,0,0,0,10,4,2,0,0,0,0,16 +# ... +# +# Meaning: +# - Each transition-metal cell contains a radial oxidation-state dial. +# - Nonzero OS values are spokes around the circle. +# - Spoke length = per-metal fraction of that OS. +# - Spoke color = sign of OS. +# - Center puck area = fraction of OS 0. +# - Ring opacity = oxidation-state diversity. +# - Optional flag can remove OS values with |OS| larger than neutral valence count. + +import argparse +import csv +import math +from pathlib import Path + +import numpy as np +import matplotlib.pyplot as plt +from matplotlib.patches import Circle, Wedge, Rectangle + + +# ============================================================ +# Colors +# ============================================================ + +POS_COLOR = "#D4AF37" # positive OS, gold +NEG_COLOR = "#23364A" # negative OS, deep navy +ZERO_COLOR = "#C9D1D9" # OS 0 / neutral +RING_COLOR = "#B0B7BF" # reference ring +CELL_EDGE = "#9AA4AE" +BG_COLOR = "white" + + +# ============================================================ +# Dial geometry +# ============================================================ + +# Full-circle layout: +# spokes are evenly spaced around 360 degrees. +START_DEG = 90.0 # first spoke at 12 o'clock, then clockwise +WEDGE_GAP_FRAC = 0.30 # fraction of each angular sector kept empty + +# Fraction -> radial length mapping for nonzero OS spokes +INNER_R = 0.18 +OUTER_R = 0.46 +MIN_FLOOR = 0.08 # minimum visible spoke length for rare states + +# Center puck: OS 0 encoded by AREA, so radius ~ sqrt(f0) +PUCK_R_MIN = 0.06 +PUCK_R_MAX = 0.16 # keep below INNER_R +PUCK_FILL = ZERO_COLOR +PUCK_EDGE = "none" +PUCK_ALPHA = 1.0 + +PUCK_ABSENT_HOLLOW = True +PUCK_ABSENT_EDGE = ZERO_COLOR +PUCK_ABSENT_LW = 0.8 + +# Optional numeric label for OS 0 fraction +LABEL_ZERO = False +LABEL_ZERO_FMT = "{:.0%}" +LABEL_ZERO_DY = 0.03 +LABEL_ZERO_SIZE = 5 +LABEL_ZERO_COLOR = "#444444" + +# Entropy ring +ENTROPY_RING = True +ENTROPY_RING_R = 0.50 +ENTROPY_ALPHA_MIN = 0.15 +ENTROPY_ALPHA_MAX = 0.85 + + +# ============================================================ +# Oxidation-state binning +# ============================================================ + +NEG_MIN = -3 # merge all OS <= -3 into this bin +POS_MAX = +6 # merge all OS >= +6 into this bin + +SHOW_EXTREME_LABELS = True + +# Label all nonzero OS spokes +LABEL_ALL_SPOKES = True +LABEL_FONT_SIZE = 5 +LABEL_FONT_WEIGHT = "bold" +LABEL_OFFSET = 0.10 +LABEL_COLOR = "#444444" + + +# ============================================================ +# Grid geometry +# ============================================================ + +CELL_W = 1.2 +CELL_H = 1.32 # mild bump to fit the literature-OS annotation below the dial +PAD_X = 0.5 +PAD_Y = 0.5 +LABEL_MARGIN = 0.07 +COMMON_OS_DY = 0.55 # how far below dial center to place the 'lit:' label + + +# ============================================================ +# Neutral valence electron dictionary +# ============================================================ + +EL_VALENCE = { + 'h':1, 'he':2, + 'li':1, 'be':2, + 'b':3, 'c':4, 'n':5, 'o':6, 'f':7, 'ne':8, + 'na':1, 'mg':2, + 'al':3, 'si':4, 'p':5, 's':6, 'cl':7, 'ar':8, + 'k':1, 'ca':2, + 'sc':3, 'ti':4, 'v':5, 'cr':6, 'mn':7, 'fe':8, + 'co':9, 'ni':10, 'cu':11, 'zn':12, + 'ga':3, 'ge':4, 'as':5, 'se':6, 'br':7, 'kr':8, + 'rb':1, 'sr':2, + 'y':3, 'zr':4, 'nb':5, 'mo':6, 'tc':7, 'ru':8, + 'rh':9, 'pd':10, 'ag':11, 'cd':12, + 'in':3, 'sn':4, 'sb':5, 'te':6, 'i':7, 'xe':8, + 'cs':1, 'ba':2, + 'la':3, 'hf':4, 'ta':5, 'w':6, 're':7, 'os':8, + 'ir':9, 'pt':10, 'au':11, 'hg':12, + 'tl':3, 'pb':4, 'bi':5, 'po':6, 'at':7, 'rn':8 +} + + +# ============================================================ +# Transition metal layout +# Groups: G3 G4 G5 G6 G7 G8 G9 G10 G11 G12 +# ============================================================ + +# Most common (textbook) oxidation states per element. +# Source: Cotton/Greenwood inorganic chemistry + LibreTexts; for organometallic- +# heavy contexts we list the 1-2 catalytically dominant values. +COMMON_OS = { + 'Sc': '+3', + 'Ti': '+4', 'V': '+4,+5', 'Cr': '+3,+6', 'Mn': '+2,+7', + 'Fe': '+2,+3', 'Co': '+2,+3', 'Ni': '+2', 'Cu': '+1,+2', 'Zn': '+2', + 'Y': '+3', + 'Zr': '+4', 'Nb': '+5', 'Mo': '+4,+6', 'Tc': '+4,+7', + 'Ru': '+2,+3', 'Rh': '+1,+3', 'Pd': '0,+2', 'Ag': '+1', 'Cd': '+2', + 'La': '+3', + 'Hf': '+4', 'Ta': '+5', 'W': '+4,+6', 'Re': '+5,+7', + 'Os': '+4,+8', 'Ir': '+1,+3', 'Pt': '+2,+4', 'Au': '+1,+3', 'Hg': '+2', +} +COMMON_OS_FONT_SIZE = 6 +COMMON_OS_COLOR = "#7d4a1f" # warm brown — visually distinct from spoke gold/navy + + +ROW_LABELS = ["3d", "4d", "5d"] + +ORDER_3D = ['Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn'] +ORDER_4D = ['Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd'] + +# Important: +# 5d row has a blank Group-3 placeholder so Hf aligns under Ti, +# and Au aligns with Cu/Ag. +ORDER_5D = [None, 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg'] + +TM_ROWS = [ORDER_3D, ORDER_4D, ORDER_5D] + + +# ============================================================ +# Helper functions +# ============================================================ + +def is_int_str(s: str) -> bool: + try: + int(s) + return True + except Exception: + return False + + +def clamp_os_bin(k: int) -> int: + """Merge oxidation states into terminal bins.""" + if k <= NEG_MIN: + return NEG_MIN + if k >= POS_MAX: + return POS_MAX + return k + + +def entropy_from_counts(counts: dict) -> float: + """Shannon entropy over nonzero oxidation-state counts.""" + total = sum(counts.values()) + if total <= 0: + return 0.0 + + p = np.array([c / total for c in counts.values() if c > 0], dtype=float) + return float(-np.sum(p * np.log2(p))) + + +def fractions(counts: dict) -> dict: + """Convert count dictionary to per-metal fractions.""" + total = sum(counts.values()) + if total <= 0: + return {} + + return {k: v / total for k, v in counts.items() if v > 0} + + +def spoke_color(k: int) -> str: + if k > 0: + return POS_COLOR + if k < 0: + return NEG_COLOR + return ZERO_COLOR + + +def order_bins_fullcircle(merged_counts: dict): + """ + Return ordered nonzero OS bins for full-circle placement. + + The order interleaves signs: + +1, -1, +2, -2, +3, -3, ... + + Only bins with nonzero counts are included. + """ + present = [ + k for k, v in merged_counts.items() + if v > 0 and k != 0 + ] + + pos = sorted([k for k in present if k > 0], key=lambda k: abs(k)) + neg = sorted([k for k in present if k < 0], key=lambda k: abs(k)) + + inter = [] + i = 0 + j = 0 + + while i < len(pos) or j < len(neg): + if i < len(pos): + inter.append(pos[i]) + i += 1 + if j < len(neg): + inter.append(neg[j]) + j += 1 + + return inter + + +# ============================================================ +# Drawing +# ============================================================ + +def draw_dial(ax, cx, cy, metal_counts: dict, metal_label: str): + """ + Draw one oxidation-state dial at cell center (cx, cy). + """ + + # Merge extreme oxidation states + merged = {} + for k, v in metal_counts.items(): + kk = clamp_os_bin(k) + merged[kk] = merged.get(kk, 0) + v + + fr = fractions(merged) + + # Reference ring + ax.add_patch( + Circle( + (cx, cy), + ENTROPY_RING_R, + fill=False, + ec=RING_COLOR, + lw=0.6, + zorder=1 + ) + ) + + # Entropy ring + if ENTROPY_RING: + observed_bins = [k for k, v in merged.items() if v > 0] + n_bins = max(1, len(observed_bins)) + + e = entropy_from_counts(merged) + e_max = math.log2(n_bins) if n_bins > 1 else 0.0 + + if e_max <= 1e-12: + alpha = ENTROPY_ALPHA_MIN + else: + alpha = ENTROPY_ALPHA_MIN + ( + ENTROPY_ALPHA_MAX - ENTROPY_ALPHA_MIN + ) * (e / e_max) + + ax.add_patch( + Circle( + (cx, cy), + ENTROPY_RING_R, + fill=False, + ec=NEG_COLOR, + lw=1.2, + alpha=alpha, + zorder=2 + ) + ) + + # Center puck for OS 0: area encodes f0 + f0 = fr.get(0, 0.0) + + if f0 > 0.0: + r0 = PUCK_R_MIN + (f0 ** 0.5) * (PUCK_R_MAX - PUCK_R_MIN) + + ax.add_patch( + Circle( + (cx, cy), + r0, + fc=PUCK_FILL, + ec=PUCK_EDGE, + alpha=PUCK_ALPHA, + zorder=3 + ) + ) + + if LABEL_ZERO: + ax.text( + cx, + cy - (r0 + LABEL_ZERO_DY), + LABEL_ZERO_FMT.format(f0), + fontsize=LABEL_ZERO_SIZE, + ha="center", + va="top", + color=LABEL_ZERO_COLOR + ) + else: + if PUCK_ABSENT_HOLLOW: + ax.add_patch( + Circle( + (cx, cy), + PUCK_R_MIN, + fill=False, + ec=PUCK_ABSENT_EDGE, + lw=PUCK_ABSENT_LW, + zorder=3 + ) + ) + + # Nonzero spokes + bins_order = order_bins_fullcircle(merged) + n = max(1, len(bins_order)) + + sector = 360.0 / n + wedge_width = sector * (1.0 - WEDGE_GAP_FRAC) + + for idx, k in enumerate(bins_order): + f = fr.get(k, 0.0) + if f <= 0.0: + continue + + f_eff = max(f, MIN_FLOOR) + + r_inner = INNER_R + r_outer = INNER_R + f_eff * (OUTER_R - INNER_R) + + # Clockwise angular placement + angle = START_DEG - idx * sector + + theta1 = angle - wedge_width / 2.0 + theta2 = angle + wedge_width / 2.0 + + color = spoke_color(k) + + wedge = Wedge( + center=(cx, cy), + r=r_outer, + theta1=theta1, + theta2=theta2, + width=(r_outer - r_inner), + facecolor=color, + edgecolor="none", + zorder=4 + ) + + ax.add_patch(wedge) + + # Labels beside every spoke tip + if LABEL_ALL_SPOKES: + if k == NEG_MIN: + label = f"≤{NEG_MIN}" + elif k == POS_MAX: + label = f"≥{POS_MAX}" + else: + label = str(k) + + rad = math.radians(angle) + + lx = cx + (r_outer + LABEL_OFFSET) * math.cos(rad) + ly = cy + (r_outer + LABEL_OFFSET) * math.sin(rad) + + ax.text( + lx, + ly, + label, + fontsize=LABEL_FONT_SIZE, + ha="center", + va="center", + color=LABEL_COLOR, + fontweight=LABEL_FONT_WEIGHT + ) + + elif SHOW_EXTREME_LABELS and k in (NEG_MIN, POS_MAX): + label = f"≤{NEG_MIN}" if k == NEG_MIN else f"≥{POS_MAX}" + + rad = math.radians(angle) + + lx = cx + (r_outer + LABEL_OFFSET) * math.cos(rad) + ly = cy + (r_outer + LABEL_OFFSET) * math.sin(rad) + + ax.text( + lx, + ly, + label, + fontsize=LABEL_FONT_SIZE, + ha="center", + va="center", + color=LABEL_COLOR, + fontweight=LABEL_FONT_WEIGHT + ) + + # Element label at top-left + tlx = cx - CELL_W / 2.0 + LABEL_MARGIN + tly = cy + CELL_H / 2.0 - LABEL_MARGIN + + ax.text( + tlx, + tly, + metal_label, + fontsize=8, + ha="left", + va="top", + color="#111111" + ) + + # Common-OS annotation below the dial (literature reference values). + # Placed at fixed offset COMMON_OS_DY below the dial center so it sits in + # the extended bottom strip of the cell and never overlaps spoke labels. + common = COMMON_OS.get(metal_label) + if common: + ax.text( + cx, + cy - COMMON_OS_DY, + f"lit: {common}", + fontsize=COMMON_OS_FONT_SIZE, + ha="center", + va="top", + color=COMMON_OS_COLOR, + style="italic", + ) + + +# ============================================================ +# Data loading +# ============================================================ + +def load_matrix(csv_path: Path, ignore_beyond_valence: bool = False): + """ + Load Metal x OS matrix. + + Returns: + metals[Metal] = {OS: count} + + If ignore_beyond_valence=True: + remove any OS k where |k| > neutral valence electrons for that element. + """ + with csv_path.open("r", encoding="utf-8") as f: + rdr = csv.reader(f) + header = next(rdr) + + idx = {h: i for i, h in enumerate(header)} + + if "Metal" not in idx: + raise ValueError("CSV must have a 'Metal' column.") + + metals = {} + + for row in rdr: + if not row: + continue + + m = row[idx["Metal"]].strip() + + if not m or m == "AllMetals": + continue + + counts = {} + + for h, i in idx.items(): + if is_int_str(h): + try: + raw = row[i].strip() + v = int(raw) if raw else 0 + except Exception: + v = 0 + + if v != 0: + counts[int(h)] = v + + if not counts: + continue + + if ignore_beyond_valence: + val = EL_VALENCE.get(m.lower()) + + if val is not None: + counts = { + k: v for k, v in counts.items() + if abs(k) <= val + } + + if counts: + metals[m] = counts + + return metals + + +# ============================================================ +# Main +# ============================================================ + +def main(): + ap = argparse.ArgumentParser( + description=( + "Draw full-circle radial oxidation-state dials " + "for transition metals." + ) + ) + + ap.add_argument( + "csv", + type=Path, + help="Summed Metal x OS CSV." + ) + + ap.add_argument( + "-o", + "--out", + type=Path, + default=Path("tm_os_dials.svg"), + help="Output SVG path." + ) + + ap.add_argument( + "--png", + action="store_true", + help="Also write a PNG alongside the SVG." + ) + + ap.add_argument( + "--ignore-os-beyond-valence", + action="store_true", + help=( + "Drop any OS k where |k| is greater than the element's " + "neutral valence-electron count." + ) + ) + + ap.add_argument( + "--hide-legend", + action="store_true", + help="Do not render the legend block." + ) + + args = ap.parse_args() + + metals = load_matrix( + args.csv, + ignore_beyond_valence=args.ignore_os_beyond_valence + ) + + n_rows = 3 + n_cols = 10 + + # Add a little extra width only when legend is shown + legend_extra = 2.5 if not args.hide_legend else 0.0 + + fig_w = PAD_X * 2 + n_cols * CELL_W + legend_extra + fig_h = PAD_Y * 2 + n_rows * CELL_H + + fig, ax = plt.subplots(figsize=(fig_w, fig_h), dpi=300) + + ax.set_facecolor(BG_COLOR) + ax.set_aspect("equal") + ax.set_xlim(0, fig_w) + ax.set_ylim(0, fig_h) + ax.axis("off") + + # Draw cells and dials + for r, row in enumerate(TM_ROWS): + y = PAD_Y + (n_rows - 1 - r) * CELL_H + CELL_H / 2.0 + + for c, metal in enumerate(row): + x = PAD_X + c * CELL_W + CELL_W / 2.0 + + # Cell border + ax.add_patch( + Rectangle( + (x - CELL_W / 2.0, y - CELL_H / 2.0), + CELL_W, + CELL_H, + fill=False, + ec=CELL_EDGE, + lw=0.7, + zorder=0 + ) + ) + + # Placeholder for 5d group-3 alignment + if metal is None: + continue + + counts = metals.get(metal) + + if counts: + draw_dial(ax, x, y, counts, metal) + else: + # Empty-data hint + tlx = x - CELL_W / 2.0 + LABEL_MARGIN + tly = y + CELL_H / 2.0 - LABEL_MARGIN + + ax.text( + tlx, + tly, + metal, + fontsize=8, + ha="left", + va="top", + color="#888888" + ) + + # Row labels + for r, label in enumerate(ROW_LABELS): + ylab = PAD_Y + (n_rows - 1 - r) * CELL_H + CELL_H / 2.0 + + ax.text( + PAD_X - 0.28, + ylab, + label, + fontsize=8, + ha="right", + va="center", + color="#444444" + ) + + # Optional legend + if not args.hide_legend: + legend_x = PAD_X + n_cols * CELL_W + 0.25 + legend_y = PAD_Y + n_rows * CELL_H - 0.2 + + ax.text( + legend_x, + legend_y, + "Full-circle spokes\n" + "Color = sign: − navy, + gold\n" + "Length = per-metal fraction\n" + "Center disk area = fraction at OS 0\n" + "Labels = oxidation state\n" + "≤ / ≥ = merged extreme bins\n" + "Ring opacity = OS diversity", + fontsize=7, + ha="left", + va="top", + color="#222222" + ) + + # Save + args.out.parent.mkdir(parents=True, exist_ok=True) + + fig.savefig( + args.out, + bbox_inches="tight", + pad_inches=0.05 + ) + + if args.png: + png_path = args.out.with_suffix(".png") + fig.savefig( + png_path, + bbox_inches="tight", + pad_inches=0.05 + ) + + plt.close(fig) + + print(f"✓ Wrote {args.out.resolve()}") + + if args.png: + print(f"✓ Wrote {png_path.resolve()}") + + +if __name__ == "__main__": + main() diff --git a/zhao-patches-doc/scripts/os_new_yarp_shard.py b/zhao-patches-doc/scripts/os_new_yarp_shard.py new file mode 100644 index 00000000..9cc539f5 --- /dev/null +++ b/zhao-patches-doc/scripts/os_new_yarp_shard.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python3 +"""os_new_yarp_shard.py --out [--timeout SEC] + +For each slim zip path in the shard: + - Extract /finished_first.xyz (reactant) and /finished_last.xyz (product) + - Parse charge from filename: __.zip + - Run new-patched YARP on each side (via PYTHONPATH preset) to get BEM + - Compute OS = el_valence[el] - int(bem_diag[i]) for d-block atoms + - Emit one CSV row per archive: zip_path, reactant_OS, product_OS + (OS string format matches transition_metal_oxidation_states.csv: + 'El0:N;El1:M;...') + +This is READ-ONLY: it never modifies the source zips and writes only into +the --out CSV path under Scripts/v2/os_test_new_yarp/results/. + +Set NEW_YARP_PATH env var (or rely on the sbatch script's default) to point +at the patched-new-YARP checkout. +""" +from __future__ import annotations +import argparse, contextlib, csv, os, re, sys, tempfile, time, zipfile +from pathlib import Path + +# Resolve new yarp via env var; sbatch sets it to the Zhao-YARP checkout. +NEW_YARP_PATH = os.environ.get( + "NEW_YARP_PATH", + "/home/li1724/061226-YARP-again/Zhao-YARP/classy-yarp") +sys.path.insert(0, NEW_YARP_PATH) + +# Silence yarp's import-time prints (RDKit logger warnings etc.) +with open(os.devnull, "w") as _dn, \ + contextlib.redirect_stdout(_dn), contextlib.redirect_stderr(_dn): + import yarp as yp + +# Full d-block: atomic numbers 21-30, 39-48, 57, 72-80 +TRANSITION_METALS = { + 'sc','ti','v','cr','mn','fe','co','ni','cu','zn', + 'y','zr','nb','mo','tc','ru','rh','pd','ag','cd', + 'la', + 'hf','ta','w','re','os','ir','pt','au','hg', +} + +EL_VALENCE = { + 'sc':3,'ti':4,'v':5,'cr':6,'mn':7,'fe':8,'co':9,'ni':10,'cu':11,'zn':12, + 'y':3,'zr':4,'nb':5,'mo':6,'tc':7,'ru':8,'rh':9,'pd':10,'ag':11,'cd':12, + 'la':3, + 'hf':4,'ta':5,'w':6,'re':7,'os':8,'ir':9,'pt':10,'au':11,'hg':12, +} + +CHARGE_RE = re.compile(r".*_(-?\d+)_(\d+)$") + + +def parse_charge(stem: str): + m = CHARGE_RE.match(stem) + return int(m.group(1)) if m else None + + +def rewrite_xyz_with_q(src_bytes: bytes, charge: int, tmp_path: Path): + """Read xyz bytes, replace comment line with 'q ', write to tmp_path.""" + lines = src_bytes.decode("utf-8", errors="replace").splitlines() + if len(lines) < 2: + raise ValueError("xyz too short") + lines[1] = f"q {charge}" + tmp_path.write_text("\n".join(lines) + "\n") + + +def compute_os_string(xyz_path: Path): + """Run new yarpecule on the xyz and return 'El0:OS;El1:OS;...' for TM atoms.""" + with open(os.devnull, "w") as dn, \ + contextlib.redirect_stdout(dn), contextlib.redirect_stderr(dn): + y = yp.yarpecule(str(xyz_path), canon=False) + parts = [] + bem = y.bond_mats[0] + for i, el in enumerate(y.elements): + el_lc = str(el).lower() + if el_lc not in TRANSITION_METALS: + continue + v = EL_VALENCE.get(el_lc) + if v is None: + continue + e = int(bem[i, i]) + os_val = v - e + # Title-case element symbol to match existing CSV: 'Ti12' + sym = el_lc[:1].upper() + el_lc[1:] + parts.append(f"{sym}{i}:{os_val}") + return ";".join(parts) + + +def process_one(zip_path_str: str, timeout_s: int): + """Returns (status, wall, zip_path, reactant_str, product_str, msg).""" + zip_path = Path(zip_path_str) + t0 = time.time() + if not zip_path.exists(): + return ("FAIL", time.time()-t0, zip_path_str, "", "", "zip not found") + + stem = zip_path.stem + charge = parse_charge(stem) + if charge is None: + return ("FAIL", time.time()-t0, zip_path_str, "", "", + f"cannot parse charge from stem={stem}") + + work = Path(tempfile.mkdtemp(prefix=f"osnew_{os.getpid()}_", dir="/tmp")) + r_str = p_str = "" + try: + with zipfile.ZipFile(zip_path) as zf: + try: + r_bytes = zf.read(f"{stem}/finished_first.xyz") + except KeyError: + r_bytes = None + try: + p_bytes = zf.read(f"{stem}/finished_last.xyz") + except KeyError: + p_bytes = None + if r_bytes is None and p_bytes is None: + return ("FAIL", time.time()-t0, zip_path_str, "", "", + "no finished_first.xyz nor finished_last.xyz") + + if r_bytes is not None: + r_xyz = work / "reactant.xyz" + try: + rewrite_xyz_with_q(r_bytes, charge, r_xyz) + r_str = compute_os_string(r_xyz) + except SystemExit: + r_str = "SYSEXIT" + except Exception as e: + r_str = f"ERR:{type(e).__name__}" + if p_bytes is not None: + p_xyz = work / "product.xyz" + try: + rewrite_xyz_with_q(p_bytes, charge, p_xyz) + p_str = compute_os_string(p_xyz) + except SystemExit: + p_str = "SYSEXIT" + except Exception as e: + p_str = f"ERR:{type(e).__name__}" + + wall = time.time() - t0 + # If both empty AND have legit data, this archive simply has no TMs + # (unexpected since this is the TM-only dedup list, but possible). + return ("OK", wall, zip_path_str, r_str, p_str, "") + except Exception as e: + return ("FAIL", time.time()-t0, zip_path_str, r_str, p_str, + f"{type(e).__name__}: {e}") + finally: + import shutil + shutil.rmtree(work, ignore_errors=True) + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("shard", type=Path) + ap.add_argument("--out", type=Path, required=True, + help="Output CSV path (one row per archive).") + ap.add_argument("--timeout", type=int, default=120, + help="Per-archive timeout (currently informational; " + "yarpecule is in-process so we can't cleanly enforce).") + args = ap.parse_args() + + paths = [ln.strip() for ln in args.shard.read_text().splitlines() if ln.strip()] + print(f"shard={args.shard.name} archives={len(paths)} " + f"new_yarp={NEW_YARP_PATH} out={args.out}", flush=True) + + args.out.parent.mkdir(parents=True, exist_ok=True) + t0 = time.time() + n_ok = n_fail = 0 + with open(args.out, "w", newline="") as fh: + w = csv.writer(fh) + w.writerow(["zip_path", + "reactant_metal_oxidation_states", + "product_metal_oxidation_states"]) + for p in paths: + st, wall, path, r_s, p_s, msg = process_one(p, args.timeout) + w.writerow([path, r_s, p_s]) + if st == "OK": + n_ok += 1 + if not (n_ok % 100): + print(f" ok={n_ok}/{len(paths)} elapsed={time.time()-t0:.0f}s", + flush=True) + else: + n_fail += 1 + print(f"FAIL {wall:.2f}s {path} :: {msg}", flush=True) + print(f"done: ok={n_ok} fail={n_fail} elapsed={time.time()-t0:.0f}s", flush=True) + sys.exit(0 if n_fail == 0 else 1) + + +if __name__ == "__main__": + main() diff --git a/zhao-patches-doc/scripts/os_p5.sbatch b/zhao-patches-doc/scripts/os_p5.sbatch new file mode 100644 index 00000000..c0af6283 --- /dev/null +++ b/zhao-patches-doc/scripts/os_p5.sbatch @@ -0,0 +1,43 @@ +#!/bin/bash +#SBATCH --job-name=os_p5 +#SBATCH --output=Scripts/v2/os_test_final/logs/os_p5_%A_%a.out +#SBATCH --error=Scripts/v2/os_test_final/logs/os_p5_%A_%a.err +#SBATCH -A bsavoie +#SBATCH --partition=cpu +#SBATCH --qos=standby +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=1 +#SBATCH --mem=8G +#SBATCH --time 04:00:00 +#SBATCH --array=1-100%1000 +#SBATCH --requeue + +set -euo pipefail +cd /scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_tar_zsts + +PY=/home/li1724/.conda/envs/2022.10-py39/copy-classy-yarp/bin/python +export NEW_YARP_PATH=/home/li1724/061226-YARP-again/Zhao-YARP/classy-yarp-final + +SHARD=$(printf "Scripts/v2/os_test_final/shards_p5/shard_%03d.txt" "$SLURM_ARRAY_TASK_ID") +OUT=$(printf "Scripts/v2/os_test_final/results_p5/shard_%03d.csv" "$SLURM_ARRAY_TASK_ID") + +if [ ! -f "$SHARD" ]; then + echo "no shard ${SLURM_ARRAY_TASK_ID}" + exit 0 +fi + +EXPECTED=$(( $(wc -l < "$SHARD") + 1 )) +if [ -s "$OUT" ]; then + HAVE=$(wc -l < "$OUT") + if [ "$HAVE" -eq "$EXPECTED" ]; then + echo "skip ${SLURM_ARRAY_TASK_ID}: complete (${HAVE} rows)" + exit 0 + fi + echo "partial (${HAVE}/${EXPECTED}), restart" + rm -f "$OUT" +fi + +echo "=== p5-shard ${SLURM_ARRAY_TASK_ID} start $(date) host=$(hostname) ===" +"$PY" Scripts/v2/os_test_new_yarp/os_new_yarp_shard.py "$SHARD" --out "$OUT" +echo "=== p5-shard ${SLURM_ARRAY_TASK_ID} end $(date) ===" diff --git a/zhao-patches-doc/scripts/plot_os_compare.py b/zhao-patches-doc/scripts/plot_os_compare.py new file mode 100644 index 00000000..d039a30f --- /dev/null +++ b/zhao-patches-doc/scripts/plot_os_compare.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 +"""plot_os_compare.py + +Side-by-side per-metal OS histogram comparison: OLD slim CSV vs NEW FINAL CSV +restricted to the same 181k dedup_tm_picks set. For each metal, show grouped +bars (old vs new) per OS bin, shade the bins ABOVE the group max in light red +to highlight chemically-impossible regions, and annotate Δ = new - old. + +Output: tm_os_compare_OLD_vs_NEW.png +""" +import csv +import re +from collections import defaultdict +from pathlib import Path + +import matplotlib.pyplot as plt +import numpy as np + +ROOT = Path("/scratch/negishi/li1724/SI-Downloads/SI_Agent/doi_tar_zsts") +OLD = ROOT / "Scripts/v2/os_test_final/tm_os_matrix_OLD.csv" +NEW = ROOT / "Scripts/v2/os_test_final/tm_os_matrix_NEW.csv" +OUT = ROOT / "Scripts/v2/os_test_final/tm_os_compare_OLD_vs_NEW.png" + +TM_ORDER = [ + 'Sc','Ti','V','Cr','Mn','Fe','Co','Ni','Cu','Zn', + 'Y','Zr','Nb','Mo','Tc','Ru','Rh','Pd','Ag','Cd', + 'Hf','Ta','W','Re','Os','Ir','Pt','Au','Hg', +] +GROUP_MAX = {'Sc':3,'Ti':4,'V':5,'Cr':6,'Mn':7,'Fe':6,'Co':5,'Ni':4,'Cu':3,'Zn':2, + 'Y':3,'Zr':4,'Nb':5,'Mo':6,'Tc':7,'Ru':8,'Rh':6,'Pd':4,'Ag':3,'Cd':2, + 'Hf':4,'Ta':5,'W':6,'Re':7,'Os':8,'Ir':6,'Pt':6,'Au':5,'Hg':2} +OS_COLS = list(range(-3, 7)) + + +def load_matrix(path): + out = {} + with path.open() as fh: + rdr = csv.DictReader(fh) + for row in rdr: + m = row["Metal"] + out[m] = {int(k): int(v) for k, v in row.items() if k not in ("Metal", "Total")} + return out + + +old = load_matrix(OLD) +new = load_matrix(NEW) + +# 6 rows x 5 cols subplots so 29 + 1 legend cell +nrow, ncol = 6, 5 +fig, axes = plt.subplots(nrow, ncol, figsize=(20, 22), constrained_layout=True) +axes = axes.flatten() +fig.suptitle("Per-metal OS distribution — OLD published slim CSV vs NEW patched-YARP FINAL CSV\n" + "(restricted to apples-to-apples deduplicated 181,450-archive set)", + fontsize=14) + +bar_w = 0.4 +x_positions = np.arange(len(OS_COLS)) + +for ax, metal in zip(axes, TM_ORDER): + old_vals = [old[metal].get(c, 0) for c in OS_COLS] + new_vals = [new[metal].get(c, 0) for c in OS_COLS] + delta = [n - o for n, o in zip(new_vals, old_vals)] + gmax = GROUP_MAX.get(metal, 7) + + # Shade impossible-OS region (> gmax) + for i, c in enumerate(OS_COLS): + if c > gmax: + ax.axvspan(i - 0.5, i + 0.5, color='#FFD6D6', alpha=0.55, zorder=0) + + ax.bar(x_positions - bar_w/2, old_vals, bar_w, label="OLD slim", + color='#23364A', alpha=0.85, edgecolor='none') + ax.bar(x_positions + bar_w/2, new_vals, bar_w, label="NEW FINAL", + color='#D4AF37', alpha=0.95, edgecolor='none') + + over_old = sum(v for c, v in zip(OS_COLS, old_vals) if c > gmax) + over_new = sum(v for c, v in zip(OS_COLS, new_vals) if c > gmax) + total = sum(old_vals) or 1 + + ax.set_xticks(x_positions) + ax.set_xticklabels([str(c) for c in OS_COLS], fontsize=8) + title = f"{metal} (max OS={gmax})" + if over_old != over_new: + sign = "+" if over_new > over_old else "" + title += f" over-max: {over_old:,}→{over_new:,} ({sign}{over_new-over_old:+,d})" + ax.set_title(title, fontsize=9) + ax.tick_params(axis='y', labelsize=7) + ax.grid(axis='y', linewidth=0.3, alpha=0.4) + for spine in ('top', 'right'): + ax.spines[spine].set_visible(False) + +# Hide unused subplots +for ax in axes[len(TM_ORDER):]: + ax.axis('off') + +# Use the last empty subplot for legend +leg_ax = axes[len(TM_ORDER)] +leg_ax.axis('off') +from matplotlib.patches import Patch +leg_ax.legend(handles=[ + Patch(color='#23364A', alpha=0.85, label='OLD slim CSV'), + Patch(color='#D4AF37', alpha=0.95, label='NEW FINAL CSV'), + Patch(facecolor='#FFD6D6', alpha=0.55, label='OS > group max\n(chemically impossible)'), +], loc='center', fontsize=11, frameon=False) + +# Axis label common +fig.supxlabel("oxidation state bin", fontsize=12) +fig.supylabel("atom-level count (Σ reactant + product across 181,450 archives)", fontsize=12) + +fig.savefig(OUT, dpi=120, bbox_inches='tight') +print(f"wrote: {OUT}") diff --git a/zhao-patches-doc/tm_os_compare_OLD_vs_NEW.png b/zhao-patches-doc/tm_os_compare_OLD_vs_NEW.png new file mode 100644 index 00000000..79c6c536 Binary files /dev/null and b/zhao-patches-doc/tm_os_compare_OLD_vs_NEW.png differ diff --git a/zhao-patches-doc/tm_os_dials_NEW.png b/zhao-patches-doc/tm_os_dials_NEW.png new file mode 100644 index 00000000..c9ef3ee2 Binary files /dev/null and b/zhao-patches-doc/tm_os_dials_NEW.png differ diff --git a/zhao-patches-doc/tm_os_dials_OLD.png b/zhao-patches-doc/tm_os_dials_OLD.png new file mode 100644 index 00000000..0ca5ed72 Binary files /dev/null and b/zhao-patches-doc/tm_os_dials_OLD.png differ diff --git a/zhao-patches-doc/tm_os_matrix_NEW.csv b/zhao-patches-doc/tm_os_matrix_NEW.csv new file mode 100644 index 00000000..81c960bd --- /dev/null +++ b/zhao-patches-doc/tm_os_matrix_NEW.csv @@ -0,0 +1,30 @@ +Metal,-3,-2,-1,0,1,2,3,4,5,6,Total +Sc,0,0,2,365,290,955,883,0,0,0,2495 +Ti,2,2,11,267,409,1402,2426,10733,0,0,15252 +V,12,8,16,277,412,600,683,945,1555,0,4508 +Cr,2,0,1,1811,796,2298,3693,890,499,9347,19337 +Mn,0,0,12,2062,2996,1150,1112,879,973,3477,12661 +Fe,7,10,18,8334,7460,10807,5553,4279,1008,4224,41700 +Co,17,9,36,4022,6139,4279,3523,984,912,1198,21119 +Ni,87,56,75,8396,10112,11228,2445,1789,417,771,35376 +Cu,174,235,322,7754,14520,4681,3008,622,575,207,32098 +Zn,3,2,98,4851,1586,1548,103,155,1,117,8464 +Y,0,0,0,327,263,398,396,0,0,0,1384 +Zr,4,24,29,308,895,1017,3823,7596,0,0,13696 +Nb,0,0,0,31,37,205,135,550,1086,0,2044 +Mo,0,0,2,308,1474,1201,11330,4858,4194,9348,32715 +Tc,12,5,4,87,18,46,22,31,141,54,420 +Ru,11,6,8,2882,4744,7387,3508,5242,940,3918,28646 +Rh,66,23,703,4714,8570,8381,11425,1699,2643,1534,39758 +Pd,152,126,153,14989,16075,25275,4029,4630,800,1720,67949 +Ag,0,44,154,1826,2417,854,765,108,65,39,6272 +Cd,0,0,5,78,33,28,14,36,17,23,234 +Hf,0,0,0,16,46,391,178,879,0,0,1510 +Ta,1,2,4,35,61,142,309,473,1455,0,2482 +W,0,1,4,744,250,532,212,703,2650,12810,17906 +Re,0,1,5,563,361,242,386,211,725,802,3296 +Os,1,3,1,190,303,508,376,707,231,430,2750 +Ir,5,5,28,1276,3502,3883,4854,1606,1026,947,17132 +Pt,66,109,103,1628,1798,4771,1042,1427,179,241,11364 +Au,22,185,255,8012,5919,3379,1702,325,138,91,20028 +Hg,0,0,13,166,37,127,25,2,0,0,370 diff --git a/zhao-patches-doc/tm_os_matrix_OLD.csv b/zhao-patches-doc/tm_os_matrix_OLD.csv new file mode 100644 index 00000000..06aee11e --- /dev/null +++ b/zhao-patches-doc/tm_os_matrix_OLD.csv @@ -0,0 +1,30 @@ +Metal,-3,-2,-1,0,1,2,3,4,5,6,Total +Sc,0,0,3,353,194,1109,836,0,0,0,2495 +Ti,2,2,11,276,375,1475,2337,10774,0,0,15252 +V,12,8,18,258,386,555,657,955,1659,0,4508 +Cr,2,0,0,1927,787,2134,3757,874,552,9304,19337 +Mn,0,0,14,2137,2774,1188,1248,872,987,3441,12661 +Fe,7,10,32,8895,7395,10216,6163,3840,936,4206,41700 +Co,17,9,43,4255,5865,4463,3515,967,852,1133,21119 +Ni,87,56,111,9106,9520,11169,2400,1788,378,761,35376 +Cu,174,245,320,7741,14084,4971,3089,674,589,211,32098 +Zn,3,2,100,4903,1531,1535,118,154,1,117,8464 +Y,0,0,0,221,148,595,420,0,0,0,1384 +Zr,3,15,40,210,667,1025,4252,7484,0,0,13696 +Nb,0,0,0,32,34,186,146,536,1110,0,2044 +Mo,0,0,2,344,1189,1025,12386,4048,4166,9555,32715 +Tc,14,5,3,81,13,51,28,34,132,59,420 +Ru,11,6,31,3129,4509,7504,3613,5297,725,3821,28646 +Rh,66,21,741,5307,8309,8114,11854,1588,2454,1304,39758 +Pd,155,128,228,15997,14938,25015,4384,4621,758,1725,67949 +Ag,4,58,113,1826,2359,969,643,192,67,41,6272 +Cd,0,0,8,81,26,25,22,31,18,23,234 +Hf,0,0,0,19,38,242,334,877,0,0,1510 +Ta,1,2,4,34,57,149,313,510,1412,0,2482 +W,0,1,3,794,254,532,199,717,2595,12811,17906 +Re,0,0,4,547,386,275,326,208,751,799,3296 +Os,1,3,2,276,251,490,410,678,211,428,2750 +Ir,5,3,17,2247,3202,3922,4460,1324,984,968,17132 +Pt,66,112,103,1669,1891,4689,1022,1440,140,232,11364 +Au,83,218,259,8250,5678,3354,1549,318,158,161,20028 +Hg,0,0,10,165,38,130,25,2,0,0,370