|
1336 | 1336 | "match_rep_df_sub\n"
|
1337 | 1337 | ]
|
1338 | 1338 | },
|
| 1339 | + { |
| 1340 | + "cell_type": "code", |
| 1341 | + "execution_count": 59, |
| 1342 | + "metadata": {}, |
| 1343 | + "outputs": [ |
| 1344 | + { |
| 1345 | + "name": "stdout", |
| 1346 | + "output_type": "stream", |
| 1347 | + "text": [ |
| 1348 | + "(181, 28)\n", |
| 1349 | + "(182, 28)\n" |
| 1350 | + ] |
| 1351 | + } |
| 1352 | + ], |
| 1353 | + "source": [ |
| 1354 | + "# Merge the missing profile\n", |
| 1355 | + "\n", |
| 1356 | + "# Merge dropout dataframes\n", |
| 1357 | + "df_rep = pd.read_csv(\"../../checkpoints/moa-replicating_ALT.csv\")\n", |
| 1358 | + "df_match = pd.read_csv(\"../../checkpoints/moa-matching_ALT.csv\")\n", |
| 1359 | + "\n", |
| 1360 | + "# Select only sphering as true\n", |
| 1361 | + "df_rep = df_rep[df_rep[\"sphering\"] == True]\n", |
| 1362 | + "df_match = df_match[df_match[\"sphering\"] == True]\n", |
| 1363 | + "\n", |
| 1364 | + "# Rename columns\n", |
| 1365 | + "df_rep = df_rep.rename(columns={\"Value_95\": \"value_95_replicating\"})\n", |
| 1366 | + "df_match = df_match.rename(columns={\"Value_95\": \"value_95_matching\"})\n", |
| 1367 | + "\n", |
| 1368 | + "# Find the unique columns that are to be included in the merge\n", |
| 1369 | + "set1 = set(df_rep.columns)\n", |
| 1370 | + "set2 = set(df_match.columns)\n", |
| 1371 | + "rep_set = set1 - set2\n", |
| 1372 | + "merge_cols = [\"Vendor\", \"Batch\", \"Assay_Plate_Barcode\", \"sphering\"] + list(rep_set)\n", |
| 1373 | + "\n", |
| 1374 | + "missing_prof = pd.merge(df_rep[merge_cols], df_match, how=\"inner\")\n", |
| 1375 | + "\n", |
| 1376 | + "# Drop distributions to reduce filesize\n", |
| 1377 | + "missing_prof = missing_prof.drop([\"Null_Replicating\", \"Replicating\", \"Matching\", \"Null_Matching\"], axis=1)\n", |
| 1378 | + "missing_prof\n", |
| 1379 | + "\n", |
| 1380 | + "match_rep_df = pd.read_csv(\"../../checkpoints/match_rep_df.csv\")\n", |
| 1381 | + "print(match_rep_df.shape)\n", |
| 1382 | + "match_rep_df = pd.concat([match_rep_df, missing_prof]).reset_index(drop=True)\n", |
| 1383 | + "print(match_rep_df.shape)\n", |
| 1384 | + "match_rep_df.to_csv(\"../../checkpoints/match_rep_df.csv\", index=False)\n" |
| 1385 | + ] |
| 1386 | + }, |
1339 | 1387 | {
|
1340 | 1388 | "cell_type": "code",
|
1341 | 1389 | "execution_count": 42,
|
|
0 commit comments