From d1fad9f5ae96fc6b12bd0f4035ec49773eb85d4c Mon Sep 17 00:00:00 2001
From: Patricia-A-Apellaniz <patricia.alonsod@upm.es>
Date: Mon, 15 Jun 2026 13:32:43 +0200
Subject: [PATCH] New data input modality

---
 src/XAInyPredictor/app.py                     |  11 +-
 src/XAInyPredictor/modules/xai.py             | 209 +---
 .../shinyapp/data_exploration.py              | 118 ++-
 src/XAInyPredictor/shinyapp/data_input.py     | 687 ++++++++++++-
 src/XAInyPredictor/shinyapp/prediction.py     | 495 ++++++++-
 src/XAInyPredictor/shinyapp/www/index.js      |  94 ++
 src/XAInyPredictor/shinyapp/www/layout.css    |   3 +-
 src/XAInyPredictor/shinyapp/www/style.css     | 969 ++++++++++++++++--
 8 files changed, 2205 insertions(+), 381 deletions(-)

diff --git a/src/XAInyPredictor/app.py b/src/XAInyPredictor/app.py
index d9315b3..9c4680b 100644
--- a/src/XAInyPredictor/app.py
+++ b/src/XAInyPredictor/app.py
@@ -180,7 +180,7 @@ def build_page_header(config: dict, current_use_case: str):
                     choices=use_case_choices,
                     selected=current_use_case,
                 ),
-                style="display: flex; align-items: center; margin-left: 10px;"
+                class_="navbar-use-case-selector",
             ),
             id="div-navbar-tabs",
             class_="navigation-menu",
@@ -210,9 +210,9 @@ def build_page_header(config: dict, current_use_case: str):
 
 def build_ui(config: dict, current_use_case: str):
     page_dependencies = ui.tags.head(
-        ui.tags.link(rel="stylesheet", type="text/css", href="layout.css?v=20260608a"),
-        ui.tags.link(rel="stylesheet", type="text/css", href="style.css?v=20260608a"),
-        ui.tags.script(src="index.js?v=20260608a"),
+        ui.tags.link(rel="stylesheet", type="text/css", href="layout.css?v=20260615a"),
+        ui.tags.link(rel="stylesheet", type="text/css", href="style.css?v=20260615a"),
+        ui.tags.script(src="index.js?v=20260615a"),
         ui.tags.meta(name="description", content=config.get("description", "XAI Predictor")),
         ui.tags.meta(name="theme-color", content="#000000"),
         ui.tags.meta(name="viewport", content="width=device-width, initial-scale=1"),
@@ -283,6 +283,7 @@ async def update_navigation_labels(session: Session, config: dict):
         {
             "form": labels.get("manual_entry", "Manual Entry"),
             "file": labels.get("upload_file", "Upload File"),
+            "documents": labels.get("prepare_csv_documents", "Prepare CSV from Documents"),
             "example": labels.get("example_cohort", "Example Cohort"),
         },
     )
@@ -530,6 +531,7 @@ async def _on_startup_confirm():
             ui.update_select("use_case_selector", choices=_use_case_choices(), selected=selected_use_case)
 
             ui.modal_remove()
+            await session.send_custom_message("resetDataInputUploads", {})
             await session.send_custom_message("setUseCaseLoading", {"visible": False})
 
     @reactive.Effect
@@ -592,6 +594,7 @@ async def _confirm_switch():
             ui.notification_show(f"Switched to {new_model_data['config'].get('name', new_use_case)}", type="message")
 
             await session.send_custom_message("toggleActiveTab", {"activeTab": "data_input"})
+            await session.send_custom_message("resetDataInputUploads", {})
 
             ui.modal_remove()
             await session.send_custom_message("setUseCaseLoading", {"visible": False})
diff --git a/src/XAInyPredictor/modules/xai.py b/src/XAInyPredictor/modules/xai.py
index 045cab5..8673584 100644
--- a/src/XAInyPredictor/modules/xai.py
+++ b/src/XAInyPredictor/modules/xai.py
@@ -354,214 +354,7 @@ def analyze_patient(
         return fig_radar, fig_curve
 
 
-def analyze_patient_new(
-        patient_id,
-        df,
-        delta_train,
-        delta_test,
-        x_train,
-        y_train,
-        features_to_plot=None,
-        n_dists=3, # Reduced default for clarity
-        max_plot_curves=10,
-        show_closest_radial=True,
-        show_average_radial=True,
-        show_average_class0_radial=True,
-        show_average_class1_radial=True
-    ):
-
-    logger.debug("Analyzing patient %s with enhanced visualization.", patient_id)
-
-    # --- 1. DATA PREPARATION ---
-    
-    # Locate patient
-    patient_ids = df['ID'].astype(int).tolist()
-    if int(patient_id) not in patient_ids:
-        logger.debug("Patient %s not found.", patient_id)
-        return None, None
-        
-    patient_index = df[df['ID'] == int(patient_id)].index[0]
-    
-    # Filter Features
-    if features_to_plot and len(features_to_plot) > 0:
-        clean_feats = [feat.replace(' ', '_') for feat in features_to_plot]
-        feature_names = [x for x in clean_feats if x in delta_test.columns and x not in ["const", "pred_prob"]]
-    else:
-        feature_names = [x for x in delta_test.columns if x not in ["const", "pred_prob"]]
-        
-    # Extract data subsets
-    d_train = delta_train[feature_names].values
-    d_test_patient = delta_test.loc[patient_index, feature_names].values.flatten()
-    patient_prob = delta_test.iloc[patient_index]['pred_prob']
-    
-    # Probability
-    pred_prob = delta_test.loc[patient_index, "pred_prob"]
-    
-    # Neighbor finding (Euclidean distance in SHAP/Delta space)
-    dists = np.linalg.norm(d_train - d_test_patient, axis=1)
-    idx_closest = np.argsort(dists)[:n_dists]
-
-    # --- 2. RADAR PLOT ---
-    
-    # Setup Data for Radar
-    # We use MinMax Scaling on the DELTA (contribution) values.
-    # Min = Lowest contribution observed in training (Low Risk)
-    # Max = Highest contribution observed in training (High Risk)
-    
-    mins = d_train.min(axis=0)
-    maxs = d_train.max(axis=0)
-    ranges = maxs - mins
-    ranges[ranges == 0] = 1e-9 # Avoid division by zero
-    
-    def normalize(v):
-        return (v - mins) / ranges
-
-    # Prepare vectors to plot
-    pat_norm = normalize(d_test_patient)
-    
-    # Averages
-    class0_mask = (y_train == 0).values
-    class1_mask = (y_train == 1).values
-    
-    avg_norm = normalize(d_train.mean(axis=0))
-    avg_c0_norm = normalize(d_train[class0_mask].mean(axis=0))
-    avg_c1_norm = normalize(d_train[class1_mask].mean(axis=0))
-    
-    # Plotting
-    N = len(feature_names)
-    theta = radar_factory(N, frame='polygon')
-    
-    fig_radar, ax = plt.subplots(figsize=(9, 9), subplot_kw=dict(projection='radar'))
-    
-    # Grid lines and labels
-    ax.set_rgrids([0.2, 0.4, 0.6, 0.8], labels=[], angle=0, color="grey", alpha=0.3)
-    ax.set_varlabels([f.replace("_", " ") for f in feature_names])
-    ax.tick_params(pad=15) # Move labels out slightly
-    
-    # 1. Plot Reference Populations
-    if show_average_class0_radial:
-        ax.plot(theta, avg_c0_norm, color='#2ca02c', linewidth=2, linestyle='--', label='Avg. negative')
-        
-    if show_average_class1_radial:
-        ax.plot(theta, avg_c1_norm, color='#d62728', linewidth=2, linestyle='--', label='Avg. positive')
-        
-    if show_average_radial:
-        ax.plot(theta, avg_norm, color='grey', linewidth=2, label='Population Average')
-
-    # 2. Plot Closest Neighbors (lighter opacity)
-    if show_closest_radial:
-        for i, idx in enumerate(idx_closest):
-            neighbor_vals = normalize(d_train[idx])
-            ax.plot(theta, neighbor_vals, color='#ff7f0e', alpha=0.3, label='Similar Patients' if i == 0 else "")
-
-    # 3. Plot Selected Patient (Thick, Filled)
-    ax.plot(theta, pat_norm, color='#1f77b4', linewidth=2, label='Selected Patient')
-    ax.fill(theta, pat_norm, color='#1f77b4', alpha=0.1)
-
-    # Styling
-    title = f"Patient {patient_id} (prob = {patient_prob:.2f})"
-    ax.set_title(title, position=(0.5, 1.1), ha='center', weight='bold')
-    
-    # Improved Legend
-    legend = ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1), fontsize='small', frameon=False)
-    
-    
-    # --- 3. CURVES PLOT ---
-    
-    # Select top features (sorted by absolute contribution for this patient)
-    # This ensures we see the most relevant features first
-    abs_contribution = np.abs(d_test_patient)
-    top_indices = np.argsort(abs_contribution)[::-1][:max_plot_curves]
-    top_features = [feature_names[i] for i in top_indices]
-    
-    n_curves = len(top_features) + 1
-    
-    # Use constrained_layout for automatic nice spacing
-    fig_curve, axs = plt.subplots(n_curves, 1, figsize=(8, 3 * n_curves), constrained_layout=True)
-    
-    if n_curves == 1: axs = [axs] # Handle single plot case
-
-    # -- A. Overall Probability Gauge (Top Plot) --
-    ax_prob = axs[0]
-    
-    # Create a theoretical sigmoid background
-    x_sigmoid = np.linspace(-6, 6, 100)
-    y_sigmoid = 1 / (1 + np.exp(-x_sigmoid))
-    
-    # Plot population density of predictions
-    all_probs = delta_test['pred_prob'].values
-    ax_prob.hist(delta_train['pred_prob'], bins=30, density=False, alpha=0.15, color='grey', label='Population Dist.')
-    
-    # Plot patient marker
-    ax_prob_twin = ax_prob.twinx() # Use twin axis for probability curve vs histogram count
-    ax_prob_twin.plot([], []) # Dummy to align colors
-    ax_prob_twin.set_ylim(0, 1.1)
-    ax_prob_twin.set_yticks([0, 0.5, 1])
-    ax_prob_twin.set_ylabel("Probability")
-    
-    # Current patient line
-    ax_prob.axvline(pred_prob, color='#1f77b4', linewidth=3, linestyle='-', label=f'Patient: {pred_prob:.2f}')
-    
-    ax_prob.set_title("Overall Risk Prediction", weight='bold')
-    ax_prob.set_xlabel("Predicted Probability")
-    ax_prob.set_yticks([]) # Hide histogram counts
-    ax_prob.legend(loc='upper left')
-
-
-    # -- B. Feature Contribution Curves --
-    
-    for i, feat in enumerate(top_features):
-        ax = axs[i+1]
-        
-        # Get data
-        raw_vals = x_train[feat].values
-        shap_vals = delta_train[feat].values
-        
-        # 1. Background Density (Histogram)
-        # Shows where most patients lie for this feature
-        ax_hist = ax.twinx()
-        ax_hist.hist(raw_vals, bins=20, color='grey', alpha=0.1, density=True)
-        ax_hist.set_yticks([]) # Hide density labels
-        
-        # 2. Relationship Curve (Smoothed or scatter)
-        # We sort to draw a line
-        sort_idx = np.argsort(raw_vals)
-        ax.plot(raw_vals[sort_idx], shap_vals[sort_idx], color='black', alpha=0.4, linewidth=1, label='Risk Trend')
-        
-        # 3. Reference Points (Class Averages)
-        c0_mean_x = x_train.loc[y_train==0, feat].mean()
-        c0_mean_y = delta_train.loc[y_train==0, feat].mean()
-        c1_mean_x = x_train.loc[y_train==1, feat].mean()
-        c1_mean_y = delta_train.loc[y_train==1, feat].mean()
-        
-        ax.scatter(c0_mean_x, c0_mean_y, color='#2ca02c', s=100, marker='D', label='Avg Low Risk', zorder=3)
-        ax.scatter(c1_mean_x, c1_mean_y, color='#d62728', s=100, marker='D', label='Avg High Risk', zorder=3)
-        
-        # 4. Patient & Neighbors
-        pat_x = df.loc[patient_index, feat]
-        pat_y = delta_test.loc[patient_index, feat]
-        
-        # Neighbors
-        for n_idx in idx_closest:
-            n_x = x_train.iloc[n_idx][feat]
-            n_y = delta_train.iloc[n_idx][feat]
-            ax.scatter(n_x, n_y, color='#ff7f0e', alpha=0.6, s=30)
-            
-        # Patient (Large Dot)
-        ax.scatter(pat_x, pat_y, color='#1f77b4', s=150, edgecolors='white', linewidth=2, label='Patient', zorder=5)
-        
-        # Labels
-        ax.set_title(f"Feature: {feat.replace('_', ' ')}")
-        ax.set_xlabel(f"Value ({feat})")
-        ax.set_ylabel("Risk Contribution")
-        ax.grid(True, alpha=0.2)
-        
-        if i == 0: # Only legend on first feature plot to save space
-            ax.legend(loc='best', fontsize='small')
-
-    return fig_radar, fig_curve
-
-def radar_factory(num_vars, frame='circle'):
+def radar_factory(num_vars, frame='circle'):
     """
     Create a radar chart with `num_vars` Axes.
 
diff --git a/src/XAInyPredictor/shinyapp/data_exploration.py b/src/XAInyPredictor/shinyapp/data_exploration.py
index e2e1da5..4988594 100644
--- a/src/XAInyPredictor/shinyapp/data_exploration.py
+++ b/src/XAInyPredictor/shinyapp/data_exploration.py
@@ -13,6 +13,11 @@ def data_exploration_ui(config=None):
     text = labels.get("text", {})
     return ui.layout_sidebar(
         ui.sidebar(
+            ui.div(
+                ui.div("Cohort lens", class_="context-sidebar-eyebrow"),
+                ui.div("Compare one feature at a time against the current input or model reference population.", class_="context-sidebar-copy"),
+                class_="context-sidebar-intro",
+            ),
             ui.output_ui("feature_dropdown"),
             ui.input_select(
                 id="reference_data",
@@ -109,6 +114,28 @@ def _normalize_columns(df):
         df.columns = [col.replace('_', ' ') for col in df.columns]
         return df
 
+    def _missing_count_by_feature(df=None):
+        df = _normalize_columns(global_input_data.get() if df is None else df)
+        if df is None or df.empty:
+            return {}
+
+        counts = {}
+        for col in df.columns:
+            if col == "ID":
+                continue
+            counts[col] = int(df[col].isna().sum())
+        return counts
+
+    def _feature_choice_label(feature: dict, missing_counts: dict, row_count: int) -> str:
+        label = _feature_label(feature)
+        feature_name = feature.get("name", label)
+        missing = missing_counts.get(feature_name, 0)
+        if row_count and missing == row_count:
+            return f"{label} (all missing)"
+        if missing:
+            return f"{label} ({missing} missing)"
+        return label
+
     def _selected_feature_name(df=None):
         display_name = str(input.feature_to_plot())
         col_name = display_to_name.get().get(display_name, display_name)
@@ -153,7 +180,10 @@ def _rebuild_feature_list():
         current_cfg = config_reactive.get() if config_reactive else (config_init or {})
         features = current_cfg.get("features", []) if current_cfg else []
         features = [feature for feature in features if _is_plot_feature(feature)]
-        display_to_name.set({_feature_label(f): f["name"] for f in features})
+        input_df = _normalize_columns(global_input_data.get())
+        row_count = 0 if input_df is None else len(input_df)
+        missing_counts = _missing_count_by_feature(input_df)
+        display_to_name.set({_feature_choice_label(f, missing_counts, row_count): f["name"] for f in features})
 
     @output
     @render.ui
@@ -161,14 +191,28 @@ def feature_dropdown():
         current_cfg = config_reactive.get() if config_reactive else (config_init or {})
         features = current_cfg.get("features", []) if current_cfg else []
         features = [feature for feature in features if _is_plot_feature(feature)]
-        choices = [(_feature_label(f), _feature_label(f)) for f in features]
+        input_df = _normalize_columns(global_input_data.get())
+        row_count = 0 if input_df is None else len(input_df)
+        missing_counts = _missing_count_by_feature(input_df)
+        choices = [
+            (_feature_choice_label(f, missing_counts, row_count), _feature_choice_label(f, missing_counts, row_count))
+            for f in features
+        ]
         if not choices:
             return ui.p("No features configured", style="font-size: 0.85em; color: gray;")
+        selected_feature = next(
+            (
+                _feature_choice_label(f, missing_counts, row_count)
+                for f in features
+                if missing_counts.get(f.get("name"), 0) < row_count
+            ),
+            _feature_choice_label(features[0], missing_counts, row_count) if features else None,
+        )
         return ui.input_select(
             id="feature_to_plot",
             label="Feature to plot:",
             choices=dict(choices),
-            selected=_feature_label(features[0]) if features else None,
+            selected=selected_feature,
         )
 
     @output
@@ -270,12 +314,20 @@ def feature_stats_ui():
         df = _reference_dataframe()
 
         if df is None or df.empty:
-            return ui.div("No data available", class_="feature-summary-empty")
+            return ui.div(
+                ui.div("No reference data yet", class_="feature-summary-empty-title"),
+                ui.div("Confirm a working cohort in Data Input to populate feature context.", class_="feature-summary-empty-copy"),
+                class_="feature-summary-empty",
+            )
 
         feature = _selected_feature_name(df)
 
         if feature not in df.columns:
-            return ui.div("Feature not found", class_="feature-summary-empty")
+            return ui.div(
+                ui.div("Feature not found", class_="feature-summary-empty-title"),
+                ui.div("Choose another feature or refresh the selected use case.", class_="feature-summary-empty-copy"),
+                class_="feature-summary-empty",
+            )
         
         feature_data = df[feature].dropna()
         missing = int(df[feature].isnull().sum())
@@ -289,7 +341,11 @@ def metric(label, value, class_name=""):
         
         if pd.api.types.is_numeric_dtype(feature_data):
             if feature_data.empty:
-                return ui.div("No non-missing values", class_="feature-summary-empty")
+                return ui.div(
+                    ui.div("No non-missing values", class_="feature-summary-empty-title"),
+                    ui.div(f"{missing} missing value(s) in this reference population.", class_="feature-summary-empty-copy"),
+                    class_="feature-summary-empty",
+                )
 
             return ui.div(
                 metric("Count", str(len(feature_data)), "feature-metric-primary"),
@@ -338,6 +394,22 @@ def context_plot_context():
             chips.append(ui.span(f"Value: {_format_value(patient_value)}", class_="context-chip context-chip-highlight"))
 
         notices = []
+        if feature in df.columns:
+            missing = int(df[feature].isna().sum())
+            if missing == len(df):
+                notices.append(
+                    ui.div(
+                        f"No non-missing values are available for {feature_label}. Choose another feature or complete this field before interpreting cohort context.",
+                        class_="context-small-sample context-small-sample-warning",
+                    )
+                )
+            elif missing:
+                notices.append(
+                    ui.div(
+                        f"{missing}/{len(df)} record(s) are missing {feature_label}; the plot uses only non-missing values.",
+                        class_="context-small-sample context-small-sample-warning",
+                    )
+                )
         if len(df) < 10:
             notices.append(
                 ui.div(
@@ -360,7 +432,8 @@ def plot_feature_distribution():
         if input_df is None or input_df.empty or df is None or df.empty:
             fig, ax = plt.subplots()
             ax.set_axis_off()
-            ax.text(0.5, 0.5, "No data available", ha="center", va="center")
+            ax.text(0.5, 0.55, "No confirmed cohort yet", ha="center", va="center", fontsize=15, fontweight="bold")
+            ax.text(0.5, 0.42, "Confirm records in Data Input to show feature context.", ha="center", va="center", fontsize=11, color="#405261")
             return fig
 
         feature = _selected_feature_name(df)
@@ -386,9 +459,35 @@ def plot_feature_distribution():
         # Create figure
         fig, ax = plt.subplots(figsize=(10, 5.6))
 
+        if plot_series.empty:
+            ax.set_axis_off()
+            ax.text(
+                0.5,
+                0.58,
+                f"No non-missing values available for {feature_label}",
+                ha="center",
+                va="center",
+                fontsize=15,
+                fontweight="bold",
+            )
+            ax.text(
+                0.5,
+                0.43,
+                "Choose another feature or complete this field before interpreting cohort context.",
+                ha="center",
+                va="center",
+                fontsize=11,
+                color="#405261",
+            )
+            return fig
+
         # Handle categorical vs numerical
         if pd.api.types.is_numeric_dtype(plot_series):
             numeric_values = pd.to_numeric(plot_series, errors="coerce").dropna()
+            if numeric_values.empty:
+                ax.set_axis_off()
+                ax.text(0.5, 0.5, f"No numeric values available for {feature_label}", ha="center", va="center", fontsize=15)
+                return fig
             if len(numeric_values) < 10:
                 y = [1] * len(numeric_values)
                 ax.scatter(numeric_values, y, color='#4682B4', alpha=0.75, s=80, edgecolor='black', linewidth=0.4, label="Reference records")
@@ -403,8 +502,9 @@ def plot_feature_distribution():
             ax.set_xlabel(feature_label, fontsize=12)
 
             # Add vertical line for patient value
-            ax.axvline(patient_val, color='red', linewidth=3, linestyle='--',
-                       label=f"{_text_labels()['singular_title']} {patient_id} ({_format_value(patient_val)})")
+            if pd.notna(patient_val):
+                ax.axvline(patient_val, color='red', linewidth=3, linestyle='--',
+                           label=f"{_text_labels()['singular_title']} {patient_id} ({_format_value(patient_val)})")
 
             # Add statistics text
             mean_val = numeric_values.mean()
diff --git a/src/XAInyPredictor/shinyapp/data_input.py b/src/XAInyPredictor/shinyapp/data_input.py
index 6718795..3252178 100644
--- a/src/XAInyPredictor/shinyapp/data_input.py
+++ b/src/XAInyPredictor/shinyapp/data_input.py
@@ -9,6 +9,11 @@ def _decimal_text(value):
     return str(value).replace(",", ".")
 
 
+def _slug(value):
+    cleaned = "".join(ch.lower() if ch.isalnum() else "_" for ch in str(value or "use_case"))
+    return "_".join(part for part in cleaned.split("_") if part) or "use_case"
+
+
 def build_form_fields(config: dict) -> tuple:
     """Build form fields dynamically from config.
     Returns (columns, labels_dict) where columns is a list of ui.div for layout,
@@ -157,12 +162,7 @@ def data_input_ui(config=None):
                             ui.card_header(ui.output_ui("upload_entry_header")),
                             ui.div(
                                 ui.div(
-                                    ui.input_file(
-                                        "input_dataset_file",
-                                        "Upload File (TSV, CSV, Excel)",
-                                        accept=[".tsv", ".csv", ".xlsx"],
-                                        multiple=False,
-                                    ),
+                                    ui.output_ui("input_dataset_file_ui"),
                                     ui.p("CSV, TSV, or Excel file with the current use case columns.", class_="upload-template-hint"),
                                     class_="upload-file-picker",
                                 ),
@@ -189,6 +189,74 @@ def data_input_ui(config=None):
                         ),
                         value="file",
                     ),
+                    ui.nav_panel(
+                        "Prepare CSV from Documents",
+                        ui.card(
+                            ui.card_header(ui.output_ui("document_csv_header")),
+                            ui.div(
+                                ui.div(
+                                    ui.div("1", class_="document-csv-step-number"),
+                                    ui.div(
+                                        ui.div("Prepare CSV with an external tool", class_="document-csv-step-title"),
+                                        ui.p(
+                                            "Download the template and data dictionary, attach them with the source documents to an approved external tool, and use the prompt below.",
+                                            class_="upload-template-hint",
+                                        ),
+                                        ui.tags.ol(
+                                            ui.tags.li("Download the CSV template."),
+                                            ui.tags.li("Download the data dictionary."),
+                                            ui.tags.li("Attach both files together with the clinical Word/PDF/Excel documents to the external tool."),
+                                            ui.tags.li("Paste the prompt below and ask for a completed CSV plus an uncertainty review table."),
+                                            class_="document-csv-instructions",
+                                        ),
+                                        ui.div(
+                                            ui.download_button(
+                                                "download_document_input_template",
+                                                "Template",
+                                                class_="btn-default btn-sm input-template-download",
+                                            ),
+                                            ui.download_button(
+                                                "download_document_data_dictionary",
+                                                "Data dictionary",
+                                                class_="btn-default btn-sm input-template-download",
+                                            ),
+                                            class_="upload-resource-actions document-csv-actions",
+                                        ),
+                                        ui.output_ui("document_prompt_ui"),
+                                        class_="document-csv-step-body",
+                                    ),
+                                    class_="document-csv-step",
+                                ),
+                                ui.div(
+                                    ui.div("2", class_="document-csv-step-number"),
+                                    ui.div(
+                                        ui.div("Upload and technically validate the generated CSV", class_="document-csv-step-title"),
+                                        ui.output_ui("generated_csv_file_ui"),
+                                        ui.input_action_button(
+                                            "btn_validate_generated_csv",
+                                            "Validate CSV structure",
+                                            class_="btn-primary",
+                                        ),
+                                        ui.output_ui("generated_csv_validation_report"),
+                                        class_="document-csv-step-body",
+                                    ),
+                                    class_="document-csv-step",
+                                ),
+                                ui.div(
+                                    ui.div("3", class_="document-csv-step-number"),
+                                    ui.div(
+                                        ui.div("Clinical review confirmation", class_="document-csv-step-title"),
+                                        ui.output_ui("clinical_review_checklist"),
+                                        ui.output_ui("load_generated_csv_action"),
+                                        class_="document-csv-step-body",
+                                    ),
+                                    class_="document-csv-step",
+                                ),
+                                class_="document-csv-panel",
+                            ),
+                        ),
+                        value="documents",
+                    ),
                     ui.nav_panel(
                         labels.get("example_cohort", "Example Cohort"),
                         ui.card(
@@ -240,6 +308,9 @@ def server(input: Inputs, output: Outputs, session: Session, model_data, config_
     output_data = reactive.Value(None)
     output_is_custom = reactive.Value(False)
     working_source = reactive.Value("form")
+    generated_csv_df = reactive.Value(None)
+    generated_csv_report = reactive.Value(None)
+    file_input_revision = reactive.Value(0)
 
     def _current_text_labels():
         current_cfg = config_reactive.get() if config_reactive else (config_init or {})
@@ -268,15 +339,6 @@ def _current_text_labels():
             "no_data_message": form_labels.get("no_data_message", "No data uploaded."),
         }
 
-    def _input_method_choices():
-        current_cfg = config_reactive.get() if config_reactive else (config_init or {})
-        labels = current_cfg.get("labels", {})
-        return {
-            "form": labels.get("manual_entry", "Manual Entry"),
-            "file": labels.get("upload_file", "Upload File"),
-            "example": labels.get("example_cohort", "Example Cohort"),
-        }
-
     @reactive.Effect
     def _rebuild_fields():
         current_cfg = config_reactive.get() if config_reactive else config_init
@@ -296,6 +358,8 @@ def _rebuild_fields():
         labels_dict.set(new_labels)
         input_types.set(new_input_types)
         feature_cols.set(new_cols)
+        with reactive.isolate():
+            file_input_revision.set(file_input_revision.get() + 1)
         form_df.set(pd.DataFrame(columns=['ID'] + new_cols))
         id_counter.set(1)
         validation_errors.set([])
@@ -306,6 +370,8 @@ def _rebuild_fields():
         pending_signature.set(None)
         confirmed_signature.set(None)
         confirmed_ready.set(False)
+        generated_csv_df.set(None)
+        generated_csv_report.set(None)
 
         working_source.set("form")
 
@@ -412,6 +478,233 @@ def _data_dictionary_dataframe(config):
             )
         return pd.DataFrame(rows)
 
+    def _required_columns(config):
+        return [
+            feature.get("name")
+            for feature in config.get("features", []) if feature.get("name")
+        ]
+
+    def _format_allowed_values(feature):
+        values = feature.get("values", [])
+        if values:
+            return ", ".join(map(str, values))
+
+        min_val = feature.get("min")
+        max_val = feature.get("max")
+        if min_val is not None and max_val is not None:
+            return f"numeric value between {min_val} and {max_val}"
+        if min_val is not None:
+            return f"numeric value greater than or equal to {min_val}"
+        if max_val is not None:
+            return f"numeric value less than or equal to {max_val}"
+        return "free numeric/text value"
+
+    def _extraction_prompt(config):
+        use_case_name = config.get("name", "XAInyPredictor")
+        required_cols = _required_columns(config)
+        csv_columns = ", ".join(["Participant Id"] + required_cols)
+        feature_rules = []
+        for feature in config.get("features", []):
+            name = feature.get("name")
+            if not name:
+                continue
+            display_name = feature.get("display_name") or feature.get("label") or name
+            feature_rules.append(f"- {name} ({display_name}): {_format_allowed_values(feature)}")
+
+        return "\n".join(
+            [
+                f"You are preparing clinical input data for the {use_case_name} app.",
+                "",
+                "I have attached:",
+                "1. The XAInyPredictor CSV template.",
+                "2. The XAInyPredictor data dictionary.",
+                "3. Clinical source documents in Word, PDF, Excel, CSV, or free-text format.",
+                "",
+                "Use the CSV template and data dictionary as the strict target format.",
+                "Use the clinical documents only as source evidence.",
+                "Extract one row per patient/candidate and return a completed CSV table.",
+                "",
+                "The CSV must use exactly these columns, in this order:",
+                csv_columns,
+                "",
+                "Allowed values and field rules:",
+                *feature_rules,
+                "",
+                "Rules:",
+                "- Do not invent missing information.",
+                "- If a value is not explicitly available or cannot be inferred safely, leave the cell empty.",
+                "- Normalize clear synonyms to the allowed values.",
+                "- Use decimal points for numeric values.",
+                "- Do not add extra columns unless the template includes them.",
+                "- Do not include prediction targets or outcome columns unless they are present in the template.",
+                "- Do not include explanations before the CSV.",
+                "- After the CSV, add a separate section titled Manual review required with: Patient Id, Field, Extracted value, Source text, Reason for uncertainty.",
+            ]
+        )
+
+    def _read_uploaded_table(file_info):
+        if not file_info:
+            raise ValueError("No file selected.")
+
+        file_path = file_info[0]["datapath"]
+        filename = (file_info[0].get("name") or file_path).lower()
+        if filename.endswith(".csv"):
+            return pd.read_csv(file_path)
+        if filename.endswith(".tsv"):
+            return pd.read_csv(file_path, sep="\t")
+        if filename.endswith(".xlsx"):
+            return pd.read_excel(file_path)
+        raise ValueError("Unsupported file format. Use CSV, TSV, or Excel.")
+
+    def _file_input_id(kind: str):
+        current_cfg = config_reactive.get() if config_reactive else (config_init or {})
+        marker = current_cfg.get("name") or current_cfg.get("description") or "use_case"
+        revision = file_input_revision.get()
+        return f"{kind}_{_slug(marker)}_{revision}"
+
+    def _current_file_info(kind: str):
+        active_id = _file_input_id(kind)
+        active_input = getattr(input, active_id, None)
+        if active_input is not None:
+            return active_input()
+        return None
+
+    def _validation_report(df, config):
+        if df is None or df.empty:
+            return {
+                "is_valid": False,
+                "summary": {"Rows detected": 0, "Required columns": "0/0", "Invalid values": 0, "Missing values": 0},
+                "issues": [{"Row": "", "Record": "", "Field": "File", "Issue": "The uploaded file is empty.", "Current value": "", "Allowed values": ""}],
+            }
+
+        clean_df = clean_data(df.copy())
+        features = config.get("features", []) if config else []
+        required_cols = _required_columns(config)
+        missing_cols = [col for col in required_cols if col not in clean_df.columns]
+        matched_cols = [col for col in required_cols if col in clean_df.columns]
+        id_col = "Participant Id" if "Participant Id" in clean_df.columns else "ID" if "ID" in clean_df.columns else None
+        issues = []
+        invalid_count = 0
+        missing_count = 0
+
+        for col in missing_cols:
+            invalid_count += 1
+            issues.append(
+                {
+                    "Row": "",
+                    "Record": "",
+                    "Field": col,
+                    "Issue": "Missing required column",
+                    "Current value": "",
+                    "Allowed values": "Required column",
+                }
+            )
+
+        for idx, row in clean_df.iterrows():
+            record_id = row.get(id_col, idx + 1) if id_col else idx + 1
+            if row.dropna().astype(str).str.strip().eq("").all():
+                invalid_count += 1
+                issues.append(
+                    {
+                        "Row": idx + 2,
+                        "Record": record_id,
+                        "Field": "Row",
+                        "Issue": "Empty row",
+                        "Current value": "",
+                        "Allowed values": "At least one populated value",
+                    }
+                )
+
+        for feature in features:
+            feat_name = feature.get("name")
+            if not feat_name or feat_name not in clean_df.columns:
+                continue
+
+            display_name = feature.get("display_name") or feature.get("label") or feat_name
+            input_type = feature.get("input_type") or feature.get("type", "numeric")
+            allowed_text = _format_allowed_values(feature)
+
+            for idx, value in clean_df[feat_name].items():
+                if pd.isna(value) or str(value).strip() == "":
+                    missing_count += 1
+                    issues.append(
+                        {
+                            "Row": idx + 2,
+                            "Record": clean_df.loc[idx, id_col] if id_col else idx + 1,
+                            "Field": feat_name,
+                            "Issue": "Missing value for clinical review",
+                            "Current value": "",
+                            "Allowed values": allowed_text,
+                        }
+                    )
+                    continue
+
+                if input_type == "numeric":
+                    numeric_value = pd.to_numeric(str(value).replace(",", "."), errors="coerce")
+                    if pd.isna(numeric_value):
+                        invalid_count += 1
+                        issues.append(
+                            {
+                                "Row": idx + 2,
+                                "Record": clean_df.loc[idx, id_col] if id_col else idx + 1,
+                                "Field": feat_name,
+                                "Issue": f"{display_name} must be numeric",
+                                "Current value": value,
+                                "Allowed values": allowed_text,
+                            }
+                        )
+                        continue
+                    min_val = feature.get("min")
+                    max_val = feature.get("max")
+                    if (min_val is not None and numeric_value < min_val) or (max_val is not None and numeric_value > max_val):
+                        invalid_count += 1
+                        issues.append(
+                            {
+                                "Row": idx + 2,
+                                "Record": clean_df.loc[idx, id_col] if id_col else idx + 1,
+                                "Field": feat_name,
+                                "Issue": f"{display_name} is outside the allowed range",
+                                "Current value": value,
+                                "Allowed values": allowed_text,
+                            }
+                        )
+
+                elif input_type == "select":
+                    allowed_values = set(map(str, feature.get("values", [])))
+                    if allowed_values and str(value) not in allowed_values:
+                        invalid_count += 1
+                        issues.append(
+                            {
+                                "Row": idx + 2,
+                                "Record": clean_df.loc[idx, id_col] if id_col else idx + 1,
+                                "Field": feat_name,
+                                "Issue": f"{display_name} has an invalid category",
+                                "Current value": value,
+                                "Allowed values": allowed_text,
+                            }
+                        )
+
+        is_valid = not missing_cols and invalid_count == 0
+        return {
+            "is_valid": is_valid,
+            "summary": {
+                "Rows detected": len(clean_df),
+                "Required columns": f"{len(matched_cols)}/{len(required_cols)}",
+                "Invalid values": invalid_count,
+                "Missing values": missing_count,
+            },
+            "issues": issues,
+            "clean_df": clean_df,
+        }
+
+    def _validation_issues_dataframe(report):
+        if not report:
+            return pd.DataFrame(columns=["Row", "Record", "Field", "Issue", "Current value", "Allowed values"])
+        return pd.DataFrame(
+            report.get("issues", []),
+            columns=["Row", "Record", "Field", "Issue", "Current value", "Allowed values"],
+        )
+
     def _validation_success_message(df, config, method):
         features = config.get("features", []) if config else []
         required_cols = [feature.get("name") for feature in features if feature.get("name")]
@@ -421,6 +714,7 @@ def _validation_success_message(df, config, method):
             "form": text_labels["manual_source"],
             "file": text_labels["uploaded_source"],
             "example": text_labels["example_source"],
+            "documents": "document-prepared CSV",
         }.get(method, text_labels["set_lower"])
         return f"{len(df)} {text_labels['singular']}(s) loaded from {source}. {len(matched_cols)}/{len(required_cols)} required columns matched. No validation issues found."
 
@@ -570,6 +864,8 @@ def _reset_cohort():
         confirmed_ready.set(False)
         output_data.set(None)
         output_is_custom.set(False)
+        generated_csv_df.set(None)
+        generated_csv_report.set(None)
         working_source.set("form")
         ui.update_navset("input_method", selected="form")
         ui.notification_show(_current_text_labels()["reset_message"], type="message")
@@ -577,21 +873,14 @@ def _reset_cohort():
     @reactive.Effect
     @reactive.event(input.btn_load_file)
     def _load_file_into_set():
-        file_info = input.input_dataset_file()
+        file_info = _current_file_info("input_dataset_file")
         if not file_info:
             ui.notification_show("Select a file before loading it into the current set.", type="warning")
             return
 
         file_path = file_info[0]["datapath"]
         try:
-            if file_path.endswith(".csv"):
-                df = pd.read_csv(file_path)
-            elif file_path.endswith(".tsv"):
-                df = pd.read_csv(file_path, sep="\t")
-            elif file_path.endswith(".xlsx"):
-                df = pd.read_excel(file_path)
-            else:
-                raise ValueError("Unsupported file format!")
+            df = _read_uploaded_table(file_info)
             form_df.set(_ensure_id(clean_data(df)))
             working_source.set("file")
             ui.notification_show("File loaded into the current set.", type="message")
@@ -600,6 +889,78 @@ def _load_file_into_set():
             validation_errors.set([f"Could not read uploaded file: {e}"])
             validation_success.set(None)
 
+    @reactive.Effect
+    @reactive.event(input.btn_validate_generated_csv)
+    def _validate_generated_csv():
+        file_info = _current_file_info("generated_csv_file")
+        current_cfg = config_reactive.get() if config_reactive else config_init
+        if not file_info:
+            generated_csv_df.set(None)
+            generated_csv_report.set(None)
+            ui.notification_show("Select a generated CSV before running technical validation.", type="warning")
+            return
+
+        try:
+            df = _read_uploaded_table(file_info)
+            report = _validation_report(df, current_cfg or {})
+            generated_csv_df.set(report.get("clean_df"))
+            generated_csv_report.set(report)
+            if report["is_valid"]:
+                ui.notification_show("Generated CSV passed technical validation.", type="message")
+            else:
+                ui.notification_show("Generated CSV has technical validation issues.", type="warning")
+        except Exception as e:
+            generated_csv_df.set(None)
+            generated_csv_report.set(
+                {
+                    "is_valid": False,
+                    "summary": {
+                        "Rows detected": 0,
+                        "Required columns": "0/0",
+                        "Invalid values": 1,
+                        "Missing values": 0,
+                    },
+                    "issues": [
+                        {
+                            "Row": "",
+                            "Record": "",
+                            "Field": "File",
+                            "Issue": f"Could not read uploaded file: {e}",
+                            "Current value": "",
+                            "Allowed values": "CSV, TSV, or Excel",
+                        }
+                    ],
+                }
+            )
+            ui.notification_show(f"Error reading generated CSV: {e}", type="error")
+
+    @reactive.Effect
+    @reactive.event(input.btn_load_generated_csv)
+    def _load_generated_csv():
+        report = generated_csv_report.get()
+        df = generated_csv_df.get()
+        if not report or df is None or df.empty:
+            ui.notification_show("Validate a generated CSV before loading it.", type="warning")
+            return
+        if not report.get("is_valid"):
+            ui.notification_show("Resolve technical validation issues before loading the CSV.", type="warning")
+            return
+
+        checks = input.clinical_review_checks() or []
+        required_checks = {
+            "source_reviewed",
+            "uncertain_empty",
+            "critical_fields",
+            "clinical_responsibility",
+        }
+        if set(checks) != required_checks:
+            ui.notification_show("Complete the clinical review checklist before loading the CSV.", type="warning")
+            return
+
+        form_df.set(_ensure_id(df.copy()))
+        working_source.set("documents")
+        ui.notification_show("Clinically reviewed CSV loaded into the current set.", type="message")
+
     @reactive.Effect
     @reactive.event(input.btn_load_example)
     def _load_example_into_set():
@@ -738,6 +1099,21 @@ def download_data_dictionary():
         current_cfg = config_reactive.get() if config_reactive else config_init
         yield _data_dictionary_dataframe(current_cfg or {}).to_csv(index=False)
 
+    @render.download(filename="document_input_template.csv")
+    def download_document_input_template():
+        current_cfg = config_reactive.get() if config_reactive else config_init
+        yield _template_dataframe(current_cfg or {}).to_csv(index=False)
+
+    @render.download(filename="document_data_dictionary.csv")
+    def download_document_data_dictionary():
+        current_cfg = config_reactive.get() if config_reactive else config_init
+        yield _data_dictionary_dataframe(current_cfg or {}).to_csv(index=False)
+
+    @render.download(filename="generated_csv_validation_report.csv")
+    def download_generated_csv_validation_report():
+        report = generated_csv_report.get()
+        yield _validation_issues_dataframe(report).to_csv(index=False)
+
     @output
     @render.ui
     def manual_entry_header():
@@ -750,6 +1126,249 @@ def upload_entry_header():
         labels = current_cfg.get("labels", {})
         return ui.tags.b(labels.get("upload_file", "Upload File"))
 
+    @output(suspend_when_hidden=False)
+    @render.ui
+    def input_dataset_file_ui():
+        current_cfg = config_reactive.get() if config_reactive else (config_init or {})
+        use_case_name = current_cfg.get("name", "")
+        return ui.div(
+            ui.input_file(
+                _file_input_id("input_dataset_file"),
+                "Upload File (TSV, CSV, Excel)",
+                accept=[".tsv", ".csv", ".xlsx"],
+                multiple=False,
+            ),
+            ui.span(use_case_name, class_="file-input-use-case-marker"),
+        )
+
+    @output
+    @render.ui
+    def document_csv_header():
+        return ui.tags.b("Prepare CSV from clinical documents")
+
+    @output(suspend_when_hidden=False)
+    @render.ui
+    def generated_csv_file_ui():
+        current_cfg = config_reactive.get() if config_reactive else (config_init or {})
+        use_case_name = current_cfg.get("name", "")
+        return ui.div(
+            ui.input_file(
+                _file_input_id("generated_csv_file"),
+                "Upload generated CSV",
+                accept=[".csv", ".tsv", ".xlsx"],
+                multiple=False,
+            ),
+            ui.span(use_case_name, class_="file-input-use-case-marker"),
+        )
+
+    @output
+    @render.ui
+    def document_prompt_ui():
+        current_cfg = config_reactive.get() if config_reactive else (config_init or {})
+        prompt = _extraction_prompt(current_cfg)
+        return ui.div(
+            ui.div(
+                ui.div("External extraction prompt", class_="document-csv-prompt-label"),
+                ui.tags.button(
+                    "Copy prompt",
+                    type="button",
+                    class_="btn btn-default btn-sm document-csv-copy-prompt",
+                ),
+                class_="document-csv-prompt-toolbar",
+            ),
+            ui.tags.textarea(
+                prompt,
+                readonly="readonly",
+                rows="18",
+                class_="document-csv-prompt",
+            ),
+            ui.p(
+                "XAInyPredictor does not read or interpret clinical documents. The external tool may make extraction errors. Review the generated CSV against the original source documents before uploading it.",
+                class_="document-csv-disclaimer",
+            ),
+            class_="document-csv-prompt-wrap",
+        )
+
+    @output
+    @render.ui
+    def generated_csv_validation_report():
+        report = generated_csv_report.get()
+        if not report:
+            return ui.div(
+                "No generated CSV has been technically validated yet.",
+                class_="document-csv-empty-report",
+            )
+
+        summary = report.get("summary", {})
+        issues = report.get("issues", [])
+        missing_values = int(summary.get("Missing values", 0) or 0)
+        if report.get("is_valid") and missing_values:
+            status_text = "Technical validation passed - clinical review required"
+            status_class = "document-csv-status-warning"
+            status_detail = (
+                f"The CSV structure and coded values are valid, but {missing_values} empty value(s) "
+                "must be reviewed before analysis. Clinical correctness has not been validated."
+            )
+        elif report.get("is_valid"):
+            status_text = "Technical validation passed"
+            status_class = "document-csv-status-pass"
+            status_detail = "The CSV has the required columns and allowed coded values. Clinical correctness has not been validated."
+        else:
+            status_text = "Technical validation needs attention"
+            status_class = "document-csv-status-fail"
+            status_detail = "Resolve invalid values or missing required columns before loading this CSV."
+        issue_preview = issues[:8]
+        issues_df = _validation_issues_dataframe(report)
+        missing_by_field = []
+        if not issues_df.empty and "Issue" in issues_df.columns and "Field" in issues_df.columns:
+            missing_mask = issues_df["Issue"].astype(str).eq("Missing value for clinical review")
+            missing_by_field = issues_df.loc[missing_mask, "Field"].value_counts().items()
+
+        issue_table = None
+        if issue_preview:
+            issue_table = ui.tags.table(
+                ui.tags.thead(
+                    ui.tags.tr(
+                        ui.tags.th("Row"),
+                        ui.tags.th("Record"),
+                        ui.tags.th("Field"),
+                        ui.tags.th("Issue"),
+                        ui.tags.th("Current value"),
+                        ui.tags.th("Allowed values"),
+                    )
+                ),
+                ui.tags.tbody(
+                    *[
+                        ui.tags.tr(
+                            ui.tags.td(str(issue.get("Row", ""))),
+                            ui.tags.td(str(issue.get("Record", ""))),
+                            ui.tags.td(str(issue.get("Field", ""))),
+                            ui.tags.td(str(issue.get("Issue", ""))),
+                            ui.tags.td(str(issue.get("Current value", ""))),
+                            ui.tags.td(str(issue.get("Allowed values", ""))),
+                        )
+                        for issue in issue_preview
+                    ]
+                ),
+                class_="document-csv-issue-table",
+            )
+        else:
+            issue_table = ui.p("No technical issues found.", class_="document-csv-no-issues")
+
+        hidden_count = max(0, len(issues) - len(issue_preview))
+        missing_summary = None
+        missing_by_field = list(missing_by_field)
+        if missing_by_field:
+            missing_summary = ui.div(
+                ui.div("Missing values by field", class_="document-csv-missing-title"),
+                ui.div(
+                    *[
+                        ui.span(f"{field}: {count}", class_="document-csv-missing-chip")
+                        for field, count in missing_by_field
+                    ],
+                    class_="document-csv-missing-chip-row",
+                ),
+                class_="document-csv-missing-summary",
+            )
+
+        return ui.div(
+            ui.div(
+                ui.div(status_text, class_="document-csv-status-title"),
+                ui.div(status_detail, class_="document-csv-status-detail"),
+                class_=f"document-csv-status {status_class}",
+            ),
+            ui.div(
+                *[
+                    ui.div(
+                        ui.div(label, class_="document-csv-summary-label"),
+                        ui.div(str(value), class_="document-csv-summary-value"),
+                        class_="document-csv-summary-item",
+                    )
+                    for label, value in summary.items()
+                ],
+                class_="document-csv-summary",
+            ),
+            missing_summary,
+            issue_table,
+            ui.p(
+                f"{hidden_count} additional issue(s) not shown." if hidden_count else "",
+                class_="document-csv-hidden-issues",
+            ),
+            ui.download_button(
+                "download_generated_csv_validation_report",
+                "Download technical validation report",
+                class_="btn-default btn-sm document-csv-report-download",
+            ),
+            class_="document-csv-report",
+        )
+
+    @output
+    @render.ui
+    def clinical_review_checklist():
+        report = generated_csv_report.get()
+        if not report or not report.get("is_valid"):
+            return ui.p(
+                "Run technical validation successfully before completing the clinical review checklist.",
+                class_="upload-template-hint",
+            )
+
+        missing_values = report.get("summary", {}).get("Missing values", 0)
+        return ui.div(
+            ui.p(
+                "Complete this checklist only after comparing the generated CSV with the original clinical source documents.",
+                class_="document-csv-checklist-intro",
+            ),
+            ui.input_checkbox_group(
+                "clinical_review_checks",
+                "",
+                choices={
+                    "source_reviewed": "I reviewed all extracted values against the source documents.",
+                    "uncertain_empty": "I confirmed that uncertain or unsupported values were left empty.",
+                    "critical_fields": "I confirmed the key clinical fields, including staging, metastases, resection, and risk/treatment context.",
+                    "clinical_responsibility": "I understand that XAInyPredictor validates CSV structure and allowed values, not clinical correctness.",
+                },
+            ),
+            ui.p(
+                f"{missing_values} empty value(s) require clinical review before analysis.",
+                class_="document-csv-disclaimer",
+            ),
+            class_="document-csv-checklist",
+        )
+
+    @output
+    @render.ui
+    def load_generated_csv_action():
+        report = generated_csv_report.get()
+        if not report or not report.get("is_valid"):
+            return None
+
+        checks = set(input.clinical_review_checks() or [])
+        required_checks = {
+            "source_reviewed",
+            "uncertain_empty",
+            "critical_fields",
+            "clinical_responsibility",
+        }
+        if checks != required_checks:
+            return ui.div(
+                ui.tags.button(
+                    "Complete clinical review checklist to load CSV",
+                    type="button",
+                    class_="btn btn-default",
+                    disabled="disabled",
+                ),
+                class_="document-csv-load-action",
+            )
+
+        return ui.div(
+            ui.input_action_button(
+                "btn_load_generated_csv",
+                "Load clinically reviewed CSV into current cohort",
+                class_="btn-primary",
+            ),
+            class_="document-csv-load-action",
+        )
+
     @output
     @render.ui
     def upload_file_requirements():
@@ -810,6 +1429,14 @@ def delete_selected_action():
     @output
     @render.ui
     def reset_cohort_action():
+        df = pending_data.get()
+        if df is None or df.empty:
+            df = last_valid_data.get()
+        if df is None or df.empty:
+            df = output_data.get()
+        if df is None or df.empty:
+            return None
+
         return ui.input_action_button(
             "btn_reset_cohort",
             _current_text_labels()["reset_button"],
@@ -916,11 +1543,19 @@ def current_set_body():
         if display_df is None or display_df.empty:
             labels = _current_text_labels()
             return ui.div(
-                ui.div("No records yet", class_="empty-set-title"),
+                ui.div("Start here", class_="empty-set-eyebrow"),
+                ui.div(f"Build a working {labels['set_lower']}", class_="empty-set-title"),
                 ui.div(
-                    f"Add manually, upload a file, or load the example {labels['set_lower']}.",
+                    "Choose an input path above. You can enter records manually, upload a structured file, prepare a CSV from source documents, or load the example dataset.",
                     class_="empty-set-copy",
                 ),
+                ui.div(
+                    ui.span("Manual entry", class_="empty-set-chip"),
+                    ui.span("Structured upload", class_="empty-set-chip"),
+                    ui.span("CSV from documents", class_="empty-set-chip"),
+                    ui.span(f"Example {labels['set_lower']}", class_="empty-set-chip"),
+                    class_="empty-set-chip-row",
+                ),
                 class_="empty-set-state",
             )
 
diff --git a/src/XAInyPredictor/shinyapp/prediction.py b/src/XAInyPredictor/shinyapp/prediction.py
index 7978420..4d7606a 100644
--- a/src/XAInyPredictor/shinyapp/prediction.py
+++ b/src/XAInyPredictor/shinyapp/prediction.py
@@ -31,9 +31,17 @@ def prediction_ui(config=None):
 
     return ui.layout_sidebar(
         ui.sidebar(
+            ui.div(
+                ui.div("Stratification controls", class_="stratification-sidebar-eyebrow"),
+                ui.div(
+                    f"Select a {singular_title.lower()} and tune the decision threshold used in the stratification output.",
+                    class_="stratification-sidebar-copy",
+                ),
+                class_="stratification-sidebar-intro",
+            ),
             ui.output_ui("patient_selector_ui"),
             ui.div(
-                ui.input_numeric(
+                ui.input_text(
                     id="fnr_threshold",
                     label= ui.div(
                         "Allowed false-negative rate: ",
@@ -43,21 +51,25 @@ def prediction_ui(config=None):
                                 ui.tags.b("Understanding this threshold:"),
                                 ui.tags.p(help_texts.get("fnr_threshold", "This controls the safety margin of the model.")),
                                 ui.tags.ul(
-                                    ui.tags.li(ui.tags.b("0% False Negative Ratio:"), " " + help_texts.get("fnr_zero", "We refuse to miss any true positive patients.")),
-                                    ui.tags.li(ui.tags.b("Higher False Negative Ratio:"), " " + help_texts.get("fnr_higher", "We accept missing some positive patients for higher specificity.")),
+                                    ui.tags.li(ui.tags.b("0.00 false-negative rate:"), " " + help_texts.get("fnr_zero", "We refuse to miss any true positive patients.")),
+                                    ui.tags.li(ui.tags.b("Higher false-negative rate:"), " " + help_texts.get("fnr_higher", "We accept missing some positive patients for higher specificity.")),
                                 ),
                                 style="width: 250px;"
                             ),
                             placement="right"
                         )
                     ),
-                    value=0, min=0, max=100, step=1
+                    value=f"{float(config.get('false_negative_rate', 0)):.2f}",
                 ),
+                ui.output_ui("fnr_threshold_status"),
+                class_="manual-decimal-input",
             ),
+            class_="stratification-sidebar",
         ),
         ui.page_fluid(
             ui.div(
                 ui.div(ui.output_ui("analysis_title_header"), class_="stratification-tabset-title"),
+                ui.output_ui("input_completeness_warning"),
                 ui.navset_tab(
                 ui.nav_panel(
                     singular_title,
@@ -89,6 +101,7 @@ def prediction_ui(config=None):
                             ui.card_header(ui.output_ui("results_table_header")),
                             ui.output_data_frame("results_table_output"),
                             height="240px",
+                            class_="stratification-results-card",
                         ),
                         col_widths=12,
                     ),
@@ -208,9 +221,14 @@ def prediction_ui(config=None):
                         ui.card(
                             ui.card_header(ui.output_ui("closest_reference_header")),
                             ui.output_ui("closest_reference_patients_narrative"),
-                            ui.output_ui("closest_reference_patients_table"),
-                            height="420px",
+                            ui.output_ui("reference_similarity_feature_table"),
+                            ui.tags.details(
+                                ui.tags.summary("Technical neighbor distances"),
+                                ui.output_ui("closest_reference_patients_table"),
+                                class_="reference-distance-details",
+                            ),
                             full_screen=True,
+                            class_="reference-similarity-card",
                         ),
                         col_widths=12,
                     ),
@@ -228,6 +246,8 @@ def prediction_ui(config=None):
 @module.server
 def server(input: Inputs, output: Outputs, session: Session, global_input_data, patient_selected_id, model_data, delta_test_reactive, x_test_reactive, prob_threshold, config_init, config_reactive=None):
     selection_revision = reactive.Value(0)
+    achieved_fnr = reactive.Value(None)
+    last_fnr_config_marker = reactive.Value(None)
 
     @reactive.Calc
     def current_labels():
@@ -311,15 +331,117 @@ def _selected_entity_id():
         except (TypeError, ValueError):
             return None
 
+    def _binary_target_values(y_values, positive_class):
+        y_series = pd.Series(y_values).reset_index(drop=True)
+        numeric_values = pd.to_numeric(y_series, errors="coerce")
+        non_missing_numeric = numeric_values.dropna()
+        if not non_missing_numeric.empty and set(non_missing_numeric.unique()).issubset({0, 1}):
+            return numeric_values.fillna(0).astype(int)
+        return y_series.astype(str).eq(str(positive_class)).astype(int)
+
+    def _feature_to_raw_column(feature_name: str, raw_df: pd.DataFrame | None = None) -> str:
+        if raw_df is None:
+            raw_df = global_input_data.get()
+        if raw_df is None:
+            return feature_name
+        target = str(feature_name).replace("_", " ")
+        for col in raw_df.columns:
+            if str(col).replace("_", " ") == target:
+                return col
+        return target
+
+    def _input_missing_summary():
+        raw_df = global_input_data.get()
+        cfg = current_config()
+        if raw_df is None or raw_df.empty:
+            return {"total": 0, "rows": 0, "by_feature": {}, "all_missing": []}
+
+        by_feature = {}
+        all_missing = []
+        total = 0
+        for feature in cfg.get("features", []):
+            name = feature.get("name")
+            if not name:
+                continue
+            col = _feature_to_raw_column(name, raw_df)
+            if col not in raw_df.columns:
+                continue
+            missing = int(raw_df[col].isna().sum())
+            by_feature[name] = missing
+            total += missing
+            if missing == len(raw_df):
+                all_missing.append(name)
+
+        return {
+            "total": total,
+            "rows": len(raw_df),
+            "by_feature": by_feature,
+            "all_missing": all_missing,
+        }
+
+    def _feature_has_observed_values(feature_name: str) -> bool:
+        raw_df = global_input_data.get()
+        if raw_df is None or raw_df.empty:
+            return True
+        col = _feature_to_raw_column(feature_name, raw_df)
+        if col not in raw_df.columns:
+            return True
+        return raw_df[col].notna().any()
+
+    def _selected_patient_observed_features(features: list[str]) -> tuple[list[str], list[str]]:
+        raw_df = global_input_data.get()
+        sel_id = _selected_entity_id()
+        if raw_df is None or raw_df.empty or sel_id is None or "ID" not in raw_df.columns:
+            return features, []
+
+        patient_rows = raw_df[raw_df["ID"].astype(int) == int(sel_id)]
+        if patient_rows.empty:
+            return features, []
+
+        observed = []
+        excluded = []
+        patient_row = patient_rows.iloc[0]
+        for feat in features:
+            col = _feature_to_raw_column(feat, raw_df)
+            if col in raw_df.columns and (raw_df[col].notna().any()) and pd.notna(patient_row[col]):
+                observed.append(feat)
+            else:
+                excluded.append(feat)
+        return observed, excluded
+
+    def _default_features_for_visuals():
+        md = model_data.get()
+        cfg = current_config()
+        if not md:
+            return []
+        features_to_plot = list(md.get("FEATS_IN_FORMULA", []))
+        features_to_plot = [feat.replace("_", " ") for feat in features_to_plot]
+        features_to_plot = [feat for feat in features_to_plot if _feature_has_observed_values(feat)]
+        max_default_features = cfg.get("default_selected_features", 8)
+        return features_to_plot[:int(max_default_features)]
+
+    def _guided_empty_state(title: str, copy: str, eyebrow: str = "Waiting for confirmed input"):
+        return ui.div(
+            ui.div(eyebrow, class_="guided-empty-eyebrow"),
+            ui.div(title, class_="guided-empty-title"),
+            ui.div(copy, class_="guided-empty-copy"),
+            class_="guided-empty-state",
+        )
+
+    def _format_reference_value(value):
+        if pd.isna(value):
+            return "Missing"
+        if isinstance(value, (int, float, np.integer, np.floating)):
+            return f"{float(value):.3g}"
+        return str(value)
+
     @reactive.Calc
-    def closest_reference_patients_df():
+    def closest_reference_context():
         selection_revision.get()
         raw_df = global_input_data.get()
         delta_test = delta_test_reactive.get()
         md = model_data.get()
         sel_id = _selected_entity_id()
-        lbls = current_labels()
-        threshold = float(prob_threshold.get()) if prob_threshold.get() is not None else 0
 
         if (
             raw_df is None
@@ -358,6 +480,29 @@ def closest_reference_patients_df():
         distances = np.linalg.norm(train_matrix - patient_vector, axis=1)
         closest_positions = np.argsort(distances)[:5]
 
+        return {
+            "raw_df": raw_df,
+            "patient_pos": patient_pos,
+            "selected_patient_row": raw_df.iloc[patient_pos],
+            "delta_train": delta_train.reset_index(drop=True),
+            "distances": distances,
+            "closest_positions": closest_positions,
+            "reference_raw": (md.get("X_TRAIN_RAW") if md else None),
+        }
+
+    @reactive.Calc
+    def closest_reference_patients_df():
+        context = closest_reference_context()
+        lbls = current_labels()
+        threshold = float(prob_threshold.get()) if prob_threshold.get() is not None else 0
+
+        if not context:
+            return None
+
+        delta_train = context["delta_train"]
+        distances = context["distances"]
+        closest_positions = context["closest_positions"]
+
         rows = []
         for pos in closest_positions:
             score = float(delta_train.iloc[int(pos)]["pred_prob"])
@@ -372,6 +517,90 @@ def closest_reference_patients_df():
 
         return pd.DataFrame(rows)
 
+    @reactive.Calc
+    def reference_similarity_features_df():
+        context = closest_reference_context()
+        cfg = current_config()
+        if not context:
+            return None
+
+        reference_raw = context.get("reference_raw")
+        selected_patient_row = context.get("selected_patient_row")
+        closest_positions = context.get("closest_positions")
+        if reference_raw is None or reference_raw.empty or selected_patient_row is None:
+            return None
+
+        reference_raw = reference_raw.reset_index(drop=True)
+        closest_reference = reference_raw.iloc[list(map(int, closest_positions))]
+
+        features = [
+            feature for feature in cfg.get("features", [])
+            if feature.get("role") != "identifier" and feature.get("plot") is not False
+        ]
+        selected_features = list(input.features_to_plot() or [])
+        if selected_features:
+            selected_names = {str(feat).replace("_", " ") for feat in selected_features}
+            prioritized = [
+                feature for feature in features
+                if str(feature.get("name", "")).replace("_", " ") in selected_names
+            ]
+            remaining = [feature for feature in features if feature not in prioritized]
+            features = prioritized + remaining
+
+        rows = []
+        for feature in features:
+            name = feature.get("name")
+            if not name:
+                continue
+            selected_col = _feature_to_raw_column(name, context["raw_df"])
+            reference_col = _feature_to_raw_column(name, reference_raw)
+            if selected_col not in context["raw_df"].columns or reference_col not in closest_reference.columns:
+                continue
+
+            selected_value = selected_patient_row[selected_col]
+            reference_values = closest_reference[reference_col].dropna()
+            if reference_values.empty:
+                summary = "No non-missing values"
+                interpretation = "Not comparable"
+            elif pd.api.types.is_numeric_dtype(reference_values):
+                numeric_values = pd.to_numeric(reference_values, errors="coerce").dropna()
+                selected_numeric = pd.to_numeric(pd.Series([selected_value]), errors="coerce").iloc[0]
+                if numeric_values.empty or pd.isna(selected_numeric):
+                    summary = "No numeric comparison"
+                    interpretation = "Not comparable"
+                else:
+                    median = float(numeric_values.median())
+                    min_value = float(numeric_values.min())
+                    max_value = float(numeric_values.max())
+                    summary = f"median {median:.3g}; range {min_value:.3g}-{max_value:.3g}"
+                    if min_value <= float(selected_numeric) <= max_value:
+                        interpretation = "Within closest-reference range"
+                    elif float(selected_numeric) < min_value:
+                        interpretation = "Below closest-reference range"
+                    else:
+                        interpretation = "Above closest-reference range"
+            else:
+                selected_text = str(selected_value)
+                match_count = int(reference_values.astype(str).eq(selected_text).sum())
+                top_values = reference_values.astype(str).value_counts().head(2)
+                top_summary = ", ".join(f"{value} ({count})" for value, count in top_values.items())
+                summary = top_summary or "No categorical summary"
+                interpretation = f"Shared by {match_count}/{len(reference_values)} closest references"
+
+            rows.append(
+                {
+                    "Feature": feature.get("display_name") or feature.get("label") or name,
+                    "Selected value": _format_reference_value(selected_value),
+                    "Closest-reference summary": summary,
+                    "Interpretation": interpretation,
+                }
+            )
+
+            if len(rows) >= 10:
+                break
+
+        return pd.DataFrame(rows)
+
     @reactive.Calc
     def cohort_summary_df():
         res_df = stratification_results_df()
@@ -440,6 +669,7 @@ def _report_readme_text():
         lbls = current_labels()
         text = lbls["text"]
         closest_reference_file = f"closest_reference_{_slug_for_filename(lbls['plural'])}.csv"
+        similarity_file = f"reference_similarity_{_slug_for_filename(lbls['plural'])}.csv"
         set_summary_file = f"{_slug_for_filename(lbls['set_lower'])}_stratification_summary.csv"
         return (
             "XAInyPredictor report package\n"
@@ -450,7 +680,8 @@ def _report_readme_text():
             f"- metadata.csv: use case, threshold, {lbls['class_column'].lower()}, export timestamp, and prototype context.\n"
             f"- stratification_results.csv: {lbls['singular']}-level stratification score and assigned group.\n"
             f"- {set_summary_file}: {lbls['set_lower']}-level counts, score summary, and metadata.\n"
-            f"- {closest_reference_file}: anonymized closest-reference ranks, distances, scores, and classes for the selected {lbls['singular']} when available.\n\n"
+            f"- {similarity_file}: aggregated feature comparison between the selected {lbls['singular']} and closest references when available.\n"
+            f"- {closest_reference_file}: technical closest-reference ranks, distances, scores, and classes for the selected {lbls['singular']} when available.\n\n"
             "Prototype context:\n" +
             text.get(
                 "report_context",
@@ -466,6 +697,10 @@ def _closest_reference_filename():
         lbls = current_labels()
         return f"closest_reference_{_slug_for_filename(lbls['plural'])}.csv"
 
+    def _reference_similarity_filename():
+        lbls = current_labels()
+        return f"reference_similarity_{_slug_for_filename(lbls['plural'])}.csv"
+
     def _set_summary_filename():
         lbls = current_labels()
         return f"{_slug_for_filename(lbls['set_lower'])}_stratification_summary.csv"
@@ -533,12 +768,78 @@ def _sync_selection_with_active_data():
             patient_selected_id.set(all_ids[0])
             ui.update_selectize("local_patient_select", selected=all_ids[0])
 
+    @reactive.Effect
+    def _sync_fnr_input_with_use_case():
+        cfg = current_config()
+        marker = cfg.get("name", "default")
+        if marker == last_fnr_config_marker.get():
+            return
+        last_fnr_config_marker.set(marker)
+        ui.update_text(
+            "fnr_threshold",
+            value=f"{float(cfg.get('false_negative_rate', 0)):.2f}",
+        )
+
     @output(suspend_when_hidden=False)
     @render.ui
     def analysis_title_header():
         lbls = current_labels()
         return lbls["text"].get("analysis_title", "Stratification Analysis")
 
+    @output(suspend_when_hidden=False)
+    @render.ui
+    def input_completeness_warning():
+        summary = _input_missing_summary()
+        if not summary["total"]:
+            return None
+
+        by_feature = summary["by_feature"]
+        top_missing = sorted(
+            [(feature, count) for feature, count in by_feature.items() if count],
+            key=lambda item: item[1],
+            reverse=True,
+        )[:5]
+        top_text = ", ".join(f"{feature}: {count}" for feature, count in top_missing)
+        detail = f" Most affected fields: {top_text}." if top_text else ""
+        return ui.div(
+            ui.div("Input completeness warning", class_="input-completeness-title"),
+            ui.div(
+                f"This confirmed cohort contains {summary['total']} missing input value(s). "
+                "Predictions may use imputed values, and profile plots hide fields without observed patient values."
+                + detail,
+                class_="input-completeness-copy",
+            ),
+            class_="input-completeness-warning",
+        )
+
+    @output(suspend_when_hidden=False)
+    @render.ui
+    def fnr_threshold_status():
+        threshold = prob_threshold.get()
+        achieved = achieved_fnr.get()
+        try:
+            target = float(input.fnr_threshold() or 0)
+        except (TypeError, ValueError):
+            target = 0
+        return ui.div(
+            ui.div(
+                ui.span("Allowed FNR", class_="fnr-status-label"),
+                ui.span(f"{target:.2f}", class_="fnr-status-value"),
+                class_="fnr-status-item",
+            ),
+            ui.div(
+                ui.span("Achieved FNR", class_="fnr-status-label"),
+                ui.span("N/A" if achieved is None else f"{float(achieved):.2f}", class_="fnr-status-value"),
+                class_="fnr-status-item",
+            ),
+            ui.div(
+                ui.span("Decision threshold", class_="fnr-status-label"),
+                ui.span("N/A" if threshold is None else f"{float(threshold):.3f}", class_="fnr-status-value"),
+                class_="fnr-status-item",
+            ),
+            class_="fnr-status-panel",
+        )
+
     @output(suspend_when_hidden=False)
     @render.ui
     def selected_output_header():
@@ -609,15 +910,15 @@ def closest_reference_header():
         lbls = current_labels()
         text = lbls["text"]
         return ui.div(
-            text.get("closest_reference_title", f"Closest {lbls['reference_title']} "),
+            text.get("reference_similarity_title", "Reference Similarity Summary "),
             ui.popover(
                 ui.span(ui.tags.i(class_="glyphicon glyphicon-info-sign"), "", style="color: #007bc2; cursor: pointer; font-size: 0.9em;"),
                 ui.tags.div(
-                    ui.tags.b(text.get("closest_reference_help_title", f"Closest {lbls['reference_plural']}:")),
+                    ui.tags.b(text.get("reference_similarity_help_title", "Reference similarity:")),
                     ui.tags.p(
                         text.get(
-                            "closest_reference_help",
-                            f"{lbls['reference_title']} are ranked by distance in the model contribution space, using the same feature effects that drive the explanation plots.",
+                            "reference_similarity_help",
+                            "Closest references are identified in the model contribution space. The feature table summarizes observed variables across those references so distance is not interpreted in isolation.",
                         )
                     ),
                     style="width: 260px;",
@@ -673,16 +974,28 @@ def features_to_plot_ui():
         features that appear in the formula (discarding the ones that do not appear).
         """
         md = model_data.get()
-        cfg = current_config()
         feature_names = md.get("FEATURE_ORDER_DISPLAY", []) if md else []
-        features_to_plot = md.get("FEATS_IN_FORMULA", []) if md else []
-        max_default_features = cfg.get("default_selected_features")
-        if max_default_features:
-            features_to_plot = features_to_plot[:int(max_default_features)]
+        summary = _input_missing_summary()
+        row_count = summary.get("rows", 0)
+        missing_counts = summary.get("by_feature", {})
+        features_to_plot = _default_features_for_visuals()
+
+        choices = {}
+        for feature_name in feature_names:
+            raw_name = feature_name.replace("_", " ")
+            missing = missing_counts.get(raw_name, 0)
+            if row_count and missing == row_count:
+                label = f"{feature_name} (all missing)"
+            elif missing:
+                label = f"{feature_name} ({missing} missing)"
+            else:
+                label = feature_name
+            choices[feature_name] = label
+
         return ui.input_selectize(
             id="features_to_plot",
             label="Select features to view:",
-            choices=feature_names,
+            choices=choices,
             selected=features_to_plot,
             multiple=True,
             options={
@@ -696,13 +1009,9 @@ def features_to_plot_ui():
     @reactive.event(input.btn_select_default_features)
     def _select_default_features():
         md = model_data.get()
-        cfg = current_config()
         if not md:
             return
-        features_to_plot = md.get("FEATS_IN_FORMULA", [])
-        max_default_features = cfg.get("default_selected_features", 5)
-        features_to_plot = features_to_plot[:int(max_default_features)]
-        ui.update_selectize("features_to_plot", selected=features_to_plot)
+        ui.update_selectize("features_to_plot", selected=_default_features_for_visuals())
 
     @reactive.Effect
     @reactive.event(input.btn_clear_features)
@@ -749,8 +1058,10 @@ def dynamic_plot_container():
                 )
             ),
             # Height is fixed or auto, but width adapts
+            ui.output_ui("profile_plot_missing_notice"),
             ui.output_plot("radar_plot", height="650px", width="100%"),
-            full_screen=True
+            full_screen=True,
+            class_="patient-profile-plot-card",
         )
         
         curve_card = ui.card(
@@ -769,8 +1080,10 @@ def dynamic_plot_container():
                 )
             ),
             # DYNAMIC HEIGHT APPLIED HERE
+            ui.output_ui("profile_plot_missing_notice"),
             ui.output_plot("curve_plot", height=f"{curve_height_px}px", width="100%"),
-            full_screen=True
+            full_screen=True,
+            class_="patient-profile-plot-card"
         )
 
         if mode == "radar":
@@ -780,6 +1093,31 @@ def dynamic_plot_container():
         else:
             return ui.p("Incorrect view selection!")
 
+    @output(suspend_when_hidden=False)
+    @render.ui
+    def profile_plot_missing_notice():
+        selected_features = list(input.features_to_plot() or [])
+        if not selected_features:
+            return None
+
+        _, excluded_features = _selected_patient_observed_features(selected_features)
+        if not excluded_features:
+            return None
+
+        preview = ", ".join(excluded_features[:6])
+        if len(excluded_features) > 6:
+            preview += f", and {len(excluded_features) - 6} more"
+
+        return ui.div(
+            ui.div("Observed-value filter applied", class_="profile-missing-notice-title"),
+            ui.div(
+                f"The selected patient has missing values for: {preview}. "
+                "These fields are excluded from this visual comparison to avoid displaying imputed values as observed measurements.",
+                class_="profile-missing-notice-copy",
+            ),
+            class_="profile-missing-notice",
+        )
+
     @reactive.Effect
     @reactive.event(input.local_patient_select)
     def _sync_selection():
@@ -810,40 +1148,42 @@ def _calculate_probability_threshold():
         Calculate probability threshold when the FNR threshold changes.
         """
         fnr_val = input.fnr_threshold()
-        delta_test = delta_test_reactive.get()
         md = model_data.get()
-        y_test = md.get("Y_TEST") if md else None
+        y_reference = md.get("Y_TRAIN") if md else None
+        delta_reference = md.get("D_TRAIN") if md else None
 
         if (
-            y_test is None
-            or delta_test is None
-            or delta_test.empty
-            or "pred_prob" not in delta_test.columns
+            y_reference is None
+            or delta_reference is None
+            or delta_reference.empty
+            or "pred_prob" not in delta_reference.columns
         ):
+            achieved_fnr.set(None)
             return
-        if len(y_test) != len(delta_test):
+        if len(y_reference) != len(delta_reference):
+            achieved_fnr.set(None)
             return
 
         try:
-            target_fnr = float(fnr_val) / 100
-        except ValueError:
+            target_fnr = min(max(float(fnr_val), 0), 1)
+        except (TypeError, ValueError):
+            achieved_fnr.set(None)
             return
 
-        # Encode y_test to 1/0
         lbls = current_labels()
         pos_class = lbls["positive_class"]
-        y_test_encoded = (y_test == pos_class).astype(int)
+        y_reference_encoded = _binary_target_values(y_reference, pos_class)
 
-        # Calculate probability threshold
-        y_test_encoded.sort_index(inplace=True)
-        delta_test.sort_index(inplace=True)
         threshold, fnr = threshold_for_target_fnr(
-            y_test_encoded.to_numpy(),
-            delta_test['pred_prob'].to_numpy(),
+            y_reference_encoded.to_numpy(),
+            delta_reference["pred_prob"].to_numpy(),
             target_fnr=target_fnr
         )
 
         prob_threshold.set(threshold)
+        achieved_fnr.set(fnr)
+        with reactive.isolate():
+            selection_revision.set(selection_revision.get() + 1)
 
     # --- Calculation ---
 
@@ -915,11 +1255,21 @@ def stratification_summary():
             class_col = lbls["class_column"]
 
             if res_df is None or res_df.empty or sel_id is None:
-                return ui.div(lbls["text"].get("empty_individual_summary", f"Add or upload {lbls['plural']} to review individual stratification outputs."), class_="stratification-summary-empty")
+                return _guided_empty_state(
+                    f"No {lbls['singular']} selected yet",
+                    lbls["text"].get(
+                        "empty_individual_summary",
+                        f"Confirm a working {lbls['set_lower']} in Data Input to review individual stratification outputs.",
+                    ),
+                )
 
             patient_row = res_df[res_df["ID"].astype(int) == sel_id]
             if patient_row.empty:
-                return ui.div(lbls["text"].get("selected_entity_not_found", f"Selected {lbls['singular']} not found."), class_="stratification-summary-empty")
+                return _guided_empty_state(
+                    f"Selected {lbls['singular']} not found",
+                    "Choose another record from the selector or return to Data Input to refresh the working set.",
+                    eyebrow="Selection needs attention",
+                )
 
             score = float(patient_row[prob_col].iloc[0])
             threshold = float(prob_thr) if prob_thr is not None else 0
@@ -1027,7 +1377,13 @@ def cohort_stratification_summary():
         lbls = current_labels()
 
         if res_df is None or res_df.empty:
-            return ui.div(lbls["text"].get("empty_set_summary", f"Add or upload {lbls['plural']} to summarize {lbls['set_lower']}-level stratification."), class_="stratification-summary-empty")
+            return _guided_empty_state(
+                f"No confirmed {lbls['set_lower']} yet",
+                lbls["text"].get(
+                    "empty_set_summary",
+                    f"Confirm records in Data Input to summarize {lbls['set_lower']}-level stratification.",
+                ),
+            )
 
         prob_col = lbls["probability_column"]
         class_col = lbls["class_column"]
@@ -1272,6 +1628,37 @@ def closest_reference_patients_table():
             class_="reference-candidates-table-wrap",
         )
 
+    @output(suspend_when_hidden=False)
+    @render.ui
+    def reference_similarity_feature_table():
+        feature_df = reference_similarity_features_df()
+        lbls = current_labels()
+        if feature_df is None or feature_df.empty:
+            return ui.div(
+                ui.div("No feature-level similarity summary available", class_="guided-empty-title"),
+                ui.div(
+                    f"Confirm a {lbls['set_lower']} and select a {lbls['singular']} to compare observed variables with the closest reference summaries.",
+                    class_="guided-empty-copy",
+                ),
+                class_="guided-empty-state reference-similarity-empty",
+            )
+
+        return ui.div(
+            ui.div(
+                "Feature-level comparison with closest references",
+                class_="reference-similarity-section-title",
+            ),
+            ui.div(
+                "Values are aggregated across the closest reference records, so this view explains similarity without exposing row-level reference features.",
+                class_="reference-similarity-section-copy",
+            ),
+            ui.tags.div(
+                ui.HTML(feature_df.to_html(index=False, classes="reference-similarity-table", border=0)),
+                class_="reference-similarity-table-wrap",
+            ),
+            class_="reference-similarity-feature-panel",
+        )
+
     @output(suspend_when_hidden=False)
     @render.ui
     def closest_reference_patients_narrative():
@@ -1297,7 +1684,7 @@ def closest_reference_patients_narrative():
             ui.div(
                 lbls["text"].get(
                     "closest_reference_narrative",
-                    f"The selected {lbls['singular']} is most similar to {lbls['reference_plural']} mostly assigned to {top_group} ({top_count}/{total}).{score_text}",
+                    f"The selected {lbls['singular']} is closest in the model contribution space to {lbls['reference_plural']} mostly assigned to {top_group} ({top_count}/{total}).{score_text}",
                 ).format(
                     entity=lbls["singular"],
                     reference_plural=lbls["reference_plural"],
@@ -1311,7 +1698,7 @@ def closest_reference_patients_narrative():
             ui.div(
                 lbls["text"].get(
                     "closest_reference_privacy_note",
-                    "Reference records come from the model reference population, not from the current input set. If the reference population contains private or sensitive records, expose only anonymized summaries or distances rather than row-level reference data.",
+                    "Reference records come from the model reference population, not from the current input set. Distances are technical similarity scores; interpret them together with the aggregated feature comparison below rather than as clinical similarity by themselves.",
                 ),
                 class_="reference-privacy-note",
             ),
@@ -1328,16 +1715,20 @@ def download_stratification_results():
     def download_report_package():
         res_df = stratification_results_df()
         closest_df = closest_reference_patients_df()
+        similarity_df = reference_similarity_features_df()
         if res_df is None:
             res_df = pd.DataFrame()
         if closest_df is None:
             closest_df = pd.DataFrame()
+        if similarity_df is None:
+            similarity_df = pd.DataFrame()
 
         buffer = BytesIO()
         with ZipFile(buffer, mode="w", compression=ZIP_DEFLATED) as report_zip:
             report_zip.writestr("README.txt", _report_readme_text())
             report_zip.writestr("metadata.csv", _metadata_export_df().to_csv(index=False))
             report_zip.writestr("stratification_results.csv", _attach_export_metadata(res_df).to_csv(index=False))
+            report_zip.writestr(_reference_similarity_filename(), _attach_export_metadata(similarity_df).to_csv(index=False))
             report_zip.writestr(_closest_reference_filename(), _attach_export_metadata(closest_df).to_csv(index=False))
             report_zip.writestr(_set_summary_filename(), _cohort_summary_export_df().to_csv(index=False))
 
@@ -1365,9 +1756,11 @@ def radar_plot():
         if not data: return _empty_plot("Radar data unavailable")
         
         opts = list(input.radar_plot_elements() or [])
-        feats = list(input.features_to_plot() or [])
+        feats, excluded = _selected_patient_observed_features(list(input.features_to_plot() or []))
 
         if len(feats) < 3:
+            if excluded:
+                return _empty_plot("At least 3 selected features need observed values for this patient")
             return _empty_plot("Please, select at least 3 features to view")
 
         fig_radar, _ = _run_patient_analysis(data, feats, opts)
@@ -1383,12 +1776,14 @@ def curve_plot():
         if not data: return _empty_plot("Curve data unavailable")
         
         cfg = current_config()
-        feats = list(input.features_to_plot() or [])
+        feats, excluded = _selected_patient_observed_features(list(input.features_to_plot() or []))
         max_curve_features = int(cfg.get("max_curve_features", 8))
         if len(feats) > max_curve_features:
             feats = feats[:max_curve_features]
 
         if len(feats) < 3:
+            if excluded:
+                return _empty_plot("At least 3 selected features need observed values for this patient")
             return _empty_plot("Please, select at least 3 features to view")
 
         _, fig_curve = _run_patient_analysis(data, feats)
diff --git a/src/XAInyPredictor/shinyapp/www/index.js b/src/XAInyPredictor/shinyapp/www/index.js
index 48b47c7..56f4882 100644
--- a/src/XAInyPredictor/shinyapp/www/index.js
+++ b/src/XAInyPredictor/shinyapp/www/index.js
@@ -41,6 +41,14 @@ $(() => {
     }
   });
 
+  $(document).on("input change blur", "input[id$='fnr_threshold']", function () {
+    const normalized = String(this.value || "").replace(/,/g, ".");
+    if (this.value !== normalized) {
+      this.value = normalized;
+      $(this).trigger("change");
+    }
+  });
+
   new MutationObserver((mutations) => {
     mutations.forEach((mutation) => {
       mutation.addedNodes.forEach((node) => {
@@ -68,6 +76,67 @@ $(() => {
     setUseCaseLoading(true, "Confirming data...", "Preparing the confirmed set for downstream analysis.");
   });
 
+  let fileInputScrollPosition = null;
+  let fileInputScrollRestoreTimer = null;
+
+  const rememberFileInputScroll = () => {
+    fileInputScrollPosition = {
+      x: window.scrollX || window.pageXOffset || 0,
+      y: window.scrollY || window.pageYOffset || 0,
+    };
+  };
+
+  const restoreFileInputScroll = () => {
+    if (!fileInputScrollPosition) return;
+
+    const target = { ...fileInputScrollPosition };
+    window.clearTimeout(fileInputScrollRestoreTimer);
+
+    [0, 60, 180, 420].forEach((delay) => {
+      window.setTimeout(() => {
+        window.scrollTo(target.x, target.y);
+      }, delay);
+    });
+
+    fileInputScrollRestoreTimer = window.setTimeout(() => {
+      fileInputScrollPosition = null;
+    }, 700);
+  };
+
+  $(document).on("pointerdown click", "input[type='file']", rememberFileInputScroll);
+  $(document).on("change focus", "input[type='file']", restoreFileInputScroll);
+  window.addEventListener("focus", restoreFileInputScroll);
+
+  $(document).on("click", ".document-csv-copy-prompt", async function () {
+    const button = this;
+    const textarea = $(button).closest(".document-csv-prompt-wrap").find(".document-csv-prompt").get(0);
+    if (!textarea) return;
+
+    const promptText = textarea.value || textarea.textContent || "";
+    const originalText = button.textContent;
+
+    try {
+      if (navigator.clipboard && window.isSecureContext) {
+        await navigator.clipboard.writeText(promptText);
+      } else {
+        textarea.focus();
+        textarea.select();
+        document.execCommand("copy");
+        textarea.setSelectionRange(0, 0);
+      }
+      button.textContent = "Copied";
+      button.classList.add("document-csv-copy-success");
+    } catch (error) {
+      button.textContent = "Copy failed";
+      button.classList.add("document-csv-copy-failed");
+    }
+
+    window.setTimeout(() => {
+      button.textContent = originalText;
+      button.classList.remove("document-csv-copy-success", "document-csv-copy-failed");
+    }, 1600);
+  });
+
   Shiny.addCustomMessageHandler("setUseCaseLoading", (payload) => {
     setUseCaseLoading(payload.visible, payload.title, payload.subtitle);
     if (!payload.visible) {
@@ -76,6 +145,30 @@ $(() => {
     }
   });
 
+  Shiny.addCustomMessageHandler("resetDataInputUploads", (_payload) => {
+    const resetFileControls = () => {
+      $("input[type='file']").each(function () {
+        const $container = $(this).closest(".shiny-input-container, .form-group, .input-group, .upload-file-picker, .document-csv-step-body");
+        if (window.Shiny && Shiny.unbindAll && $container.length) {
+          Shiny.unbindAll($container.get(0));
+        }
+
+        const clone = this.cloneNode(true);
+        clone.value = "";
+        this.replaceWith(clone);
+
+        $container.find(".form-control, input[type='text']").val("").attr("placeholder", "No file selected");
+        $container.find(".custom-file-label, .file-caption-name").text("No file selected");
+
+        if (window.Shiny && Shiny.bindAll && $container.length) {
+          Shiny.bindAll($container.get(0));
+        }
+      });
+    };
+
+    [0, 80, 250, 600, 1000].forEach((delay) => window.setTimeout(resetFileControls, delay));
+  });
+
   Shiny.addCustomMessageHandler("setStratificationTabLabels", (payload) => {
     const labels = {
       patient: payload.patient,
@@ -113,6 +206,7 @@ $(() => {
     const fallbackText = {
       form: ["Manual Entry"],
       file: ["Upload File"],
+      documents: ["Prepare CSV from Documents"],
       example: ["Example Cohort", "Example Candidate Set"],
     };
 
diff --git a/src/XAInyPredictor/shinyapp/www/layout.css b/src/XAInyPredictor/shinyapp/www/layout.css
index 775a9e2..b5cc7a3 100644
--- a/src/XAInyPredictor/shinyapp/www/layout.css
+++ b/src/XAInyPredictor/shinyapp/www/layout.css
@@ -82,7 +82,7 @@ html, body, .container.fluid {
 .navbar-top {
   display: grid;
   /* Columns: Title, flexible space, Menu, flexible space, Logo, Info */
-  grid-template-columns: minmax(220px, auto) minmax(8px, 1fr) minmax(0, auto) minmax(8px, 1fr) auto 40px;
+  grid-template-columns: minmax(220px, auto) minmax(8px, 1fr) minmax(0, clamp(760px, 54vw, 920px)) minmax(8px, 1fr) auto 40px;
   grid-template-rows: 1fr;
   align-items: center; /* Vertically center everything */
   
@@ -108,6 +108,7 @@ html, body, .container.fluid {
   align-items: center;
   gap: 6px;
   min-width: 0;
+  width: 100%;
 }
 
 .navigation-logo-right {
diff --git a/src/XAInyPredictor/shinyapp/www/style.css b/src/XAInyPredictor/shinyapp/www/style.css
index 2a6b9d3..6cd5a6c 100644
--- a/src/XAInyPredictor/shinyapp/www/style.css
+++ b/src/XAInyPredictor/shinyapp/www/style.css
@@ -1,10 +1,18 @@
+.container-fluid {
+  background:
+    linear-gradient(180deg, rgba(245, 251, 255, 0.96) 0%, rgba(255, 255, 255, 1) 220px),
+    #ffffff;
+  color: #243443;
+  font-family: "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
+}
+
 .container-fluid {
   padding-right: 0;
   padding-left: 0;
 }
 
 .navbar-top {
-  background: linear-gradient(180deg, #f5fbff 0%, #eef6fd 100%);
+  background: linear-gradient(180deg, #f7fbff 0%, #edf6fb 100%);
   border-bottom: 1px solid #dbe8f3;
   box-shadow: 0 2px 10px rgba(31, 45, 61, 0.08);
 }
@@ -30,7 +38,7 @@
 }
 
 .navbar-button.active-tab {
-  background: #007bff !important;
+  background: linear-gradient(180deg, #0878d8 0%, #0067bd 100%) !important;
   color: white !important;
   font-weight: bold;          /* Emphasize active state */
   border: 1px solid #006add;
@@ -92,7 +100,7 @@
 .navigation-title h3 {
   font-family: 'Segoe UI', Roboto, Helvetica, Arial, sans-serif;
   text-transform: uppercase;
-  color: #007bff;
+  color: #0674c8;
   font-size: 1.75rem;
   font-weight: 850;
   letter-spacing: 0;
@@ -152,18 +160,60 @@
   border: 1px solid #dbe8f3;
   border-radius: 9px;
   box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.8);
+  box-sizing: border-box;
   padding: 5px;
   min-width: 0;
+  width: 100%;
+}
+
+.card {
+  border-color: #dbe8f3;
+  border-radius: 8px;
+  box-shadow: 0 10px 24px rgba(31, 45, 61, 0.07);
+}
+
+.card-header {
+  background: linear-gradient(180deg, #ffffff 0%, #f8fbff 100%);
+  border-bottom-color: #dbe8f3;
+  color: #1f2d3d;
+  font-weight: 750;
+}
+
+.btn-primary {
+  background: linear-gradient(180deg, #0878d8 0%, #0067bd 100%);
+  border-color: #0062b3;
+  box-shadow: 0 7px 16px rgba(0, 103, 189, 0.18);
+  font-weight: 700;
+}
+
+.btn-primary:hover,
+.btn-primary:focus {
+  background: linear-gradient(180deg, #0a83ea 0%, #006fcf 100%);
+  border-color: #005da8;
+}
+
+.btn-default {
+  border-color: #b9c7d4;
+  color: #2d3b48;
+  font-weight: 650;
 }
 
 #div-navbar-tabs .form-group {
   margin-bottom: 0;
 }
 
+.navbar-use-case-selector {
+  align-items: center;
+  display: flex;
+  flex: 0 1 330px;
+  margin-left: 10px;
+  min-width: 260px;
+}
+
 #div-navbar-tabs select,
 #div-navbar-tabs .selectize-control {
-  min-width: 180px;
-  width: clamp(180px, 18vw, 260px);
+  min-width: 260px;
+  width: 100%;
 }
 
 #div-navbar-tabs .selectize-input {
@@ -174,6 +224,10 @@
   padding: 8px 34px 8px 12px;
 }
 
+#div-navbar-tabs .selectize-input > input {
+  min-width: 0 !important;
+}
+
 .loading-overlay {
   align-items: center;
   background: rgba(245, 251, 255, 0.78);
@@ -560,8 +614,8 @@ body.startup-loading #use-case-loading-overlay {
 
 .patient-visual-settings-card {
   border-color: #dbe8f3;
-  box-shadow: 0 4px 14px rgba(31, 45, 61, 0.06);
-  margin: 10px 0 18px;
+  box-shadow: 0 10px 24px rgba(31, 45, 61, 0.08);
+  margin: 12px 0 18px;
   overflow: visible;
 }
 
@@ -574,18 +628,18 @@ body.startup-loading #use-case-loading-overlay {
 
 .patient-visual-settings-card .card-body {
   overflow: visible;
-  padding: 10px 14px;
+  padding: 13px 14px;
 }
 
 .patient-visual-settings-grid {
   align-items: start;
   display: grid;
-  column-gap: 28px;
+  column-gap: 24px;
   grid-template-areas:
     "top radar"
     "features radar";
   grid-template-columns: minmax(520px, 1fr) minmax(360px, max-content);
-  row-gap: 12px;
+  row-gap: 10px;
   overflow: visible;
 }
 
@@ -596,7 +650,7 @@ body.startup-loading #use-case-loading-overlay {
 .patient-visual-controls-top {
   align-items: end;
   display: grid;
-  gap: 12px;
+  gap: 10px;
   grid-template-columns: minmax(220px, 280px) auto;
   grid-area: top;
   justify-content: start;
@@ -645,7 +699,7 @@ body.startup-loading #use-case-loading-overlay {
 .patient-visual-settings-card .feature-control-button {
   flex: 0 0 auto;
   min-width: 92px;
-  padding: 6px 10px;
+  padding: 7px 11px;
 }
 
 .feature-selector-inline {
@@ -688,17 +742,24 @@ body.startup-loading #use-case-loading-overlay {
 
 .patient-visual-setting-radar .checkbox,
 .patient-visual-setting-radar .form-check {
-  background: #f4f7fa;
+  background: #ffffff;
   border: 1px solid #dbe8f3;
   border-radius: 999px;
   color: #1f2d3d;
   display: inline-flex;
   font-size: 0.8rem;
   line-height: 1;
-  padding: 6px 9px;
+  padding: 7px 10px;
   width: fit-content;
 }
 
+.patient-visual-setting-radar .checkbox:has(input:checked),
+.patient-visual-setting-radar .form-check:has(input:checked) {
+  background: #eef7ff;
+  border-color: #b9dcff;
+  color: #174a73;
+}
+
 .patient-visual-setting-radar input[type="checkbox"] {
   margin: 0 6px 0 0;
 }
@@ -728,27 +789,83 @@ body.startup-loading #use-case-loading-overlay {
   margin-top: 4px;
 }
 
+.context-sidebar {
+  background: linear-gradient(180deg, #fbfdff 0%, #f6fbff 100%);
+}
+
+.context-sidebar-intro {
+  background: #ffffff;
+  border: 1px solid #dbe8f3;
+  border-radius: 8px;
+  margin: 2px 0 18px;
+  padding: 12px;
+}
+
+.context-sidebar-eyebrow {
+  color: #0878d8;
+  font-size: 0.74rem;
+  font-weight: 800;
+  margin-bottom: 5px;
+  text-transform: uppercase;
+}
+
+.context-sidebar-copy {
+  color: #405261;
+  font-size: 0.84rem;
+  line-height: 1.35;
+}
+
+.context-sidebar .form-group,
+.context-sidebar .shiny-input-container {
+  margin-bottom: 18px;
+}
+
+.context-sidebar label,
+.context-sidebar .control-label {
+  color: #243443;
+  font-size: 0.86rem;
+  font-weight: 750;
+}
+
+.context-sidebar .selectize-input,
+.context-sidebar select.form-control {
+  border-color: #b9c7d4;
+  border-radius: 6px;
+  min-height: 38px;
+}
+
+.context-sidebar .selectize-input.focus,
+.context-sidebar select.form-control:focus {
+  border-color: #0878d8;
+  box-shadow: 0 0 0 3px rgba(8, 120, 216, 0.13);
+}
+
 .cohort-context-page {
+  margin: 0 auto;
+  max-width: 1680px;
   padding: 18px 22px 44px;
 }
 
 .cohort-context-plot-card,
 .feature-summary-card {
   border-color: #dbe8f3;
-  box-shadow: 0 8px 22px rgba(31, 45, 61, 0.08);
+  border-radius: 8px;
+  box-shadow: 0 10px 24px rgba(31, 45, 61, 0.08);
+  overflow: hidden;
 }
 
 .cohort-context-plot-card .card-header,
 .feature-summary-card .card-header {
-  background: #fbfdff;
+  background: linear-gradient(180deg, #ffffff 0%, #f8fbff 100%);
   border-bottom-color: #dbe8f3;
   font-weight: 750;
-  padding: 11px 14px;
+  padding: 12px 14px;
 }
 
 .context-plot-context {
+  background: #fbfdff;
   border-bottom: 1px solid #e7eef5;
-  padding: 10px 12px;
+  padding: 11px 14px;
 }
 
 .context-chip-row {
@@ -758,12 +875,12 @@ body.startup-loading #use-case-loading-overlay {
 }
 
 .context-chip {
-  background: #f3f6f9;
-  border: 1px solid #d9e1e8;
+  background: #ffffff;
+  border: 1px solid #d7e5f2;
   border-radius: 999px;
   color: #405261;
   font-size: 0.8rem;
-  font-weight: 650;
+  font-weight: 700;
   line-height: 1;
   padding: 7px 10px;
 }
@@ -777,6 +894,7 @@ body.startup-loading #use-case-loading-overlay {
 .context-small-sample {
   background: #f5f9fd;
   border: 1px solid #cfe0ef;
+  border-left: 4px solid #0878d8;
   border-radius: 6px;
   color: #405261;
   font-size: 0.82rem;
@@ -785,18 +903,25 @@ body.startup-loading #use-case-loading-overlay {
   padding: 8px 10px;
 }
 
+.context-small-sample-warning {
+  background: #fff8e5;
+  border-color: #efd083;
+  color: #664d03;
+}
+
 .feature-metric-grid {
   display: grid;
-  gap: 10px;
+  gap: 8px;
   grid-template-columns: repeat(2, minmax(0, 1fr));
+  padding: 12px;
 }
 
 .feature-metric {
-  background: #f8fbff;
+  background: #ffffff;
   border: 1px solid #dbe8f3;
   border-radius: 7px;
   min-width: 0;
-  padding: 11px 12px;
+  padding: 10px 11px;
 }
 
 .feature-metric-primary {
@@ -810,7 +935,7 @@ body.startup-loading #use-case-loading-overlay {
 
 .feature-metric-label {
   color: #667789;
-  font-size: 0.76rem;
+  font-size: 0.72rem;
   font-weight: 700;
   margin-bottom: 5px;
   text-transform: uppercase;
@@ -818,22 +943,147 @@ body.startup-loading #use-case-loading-overlay {
 
 .feature-metric-value {
   color: #1f2d3d;
-  font-size: 1rem;
-  font-weight: 750;
-  line-height: 1.25;
+  font-size: 1.02rem;
+  font-weight: 800;
+  line-height: 1.2;
   overflow-wrap: anywhere;
 }
 
 .feature-summary-empty {
-  background: #f8fbff;
+  align-items: center;
+  background: linear-gradient(180deg, #f8fbff 0%, #ffffff 100%);
   border: 1px dashed #bfd5e8;
-  border-radius: 7px;
+  border-radius: 8px;
   color: #667789;
+  display: flex;
+  flex-direction: column;
   font-size: 0.9rem;
+  justify-content: center;
+  margin: 12px;
+  min-height: 118px;
   padding: 16px;
   text-align: center;
 }
 
+.feature-summary-empty-title {
+  color: #405261;
+  font-weight: 750;
+  margin-bottom: 4px;
+}
+
+.feature-summary-empty-copy {
+  color: #667789;
+  font-size: 0.84rem;
+  line-height: 1.35;
+}
+
+.input-completeness-warning,
+.profile-missing-notice {
+  background: #fff8e5;
+  border: 1px solid #efd083;
+  border-radius: 7px;
+  color: #664d03;
+  margin: 0 0 12px;
+  padding: 10px 12px;
+}
+
+.input-completeness-title,
+.profile-missing-notice-title {
+  font-weight: 750;
+  margin-bottom: 4px;
+}
+
+.input-completeness-copy,
+.profile-missing-notice-copy {
+  font-size: 0.84rem;
+  line-height: 1.35;
+}
+
+.profile-missing-notice {
+  margin: 12px 14px 0;
+}
+
+.stratification-sidebar {
+  background: linear-gradient(180deg, #fbfdff 0%, #f6fbff 100%);
+}
+
+.stratification-sidebar-intro {
+  background: #ffffff;
+  border: 1px solid #dbe8f3;
+  border-radius: 8px;
+  margin: 2px 0 18px;
+  padding: 12px;
+}
+
+.stratification-sidebar-eyebrow {
+  color: #0878d8;
+  font-size: 0.74rem;
+  font-weight: 800;
+  margin-bottom: 5px;
+  text-transform: uppercase;
+}
+
+.stratification-sidebar-copy {
+  color: #405261;
+  font-size: 0.84rem;
+  line-height: 1.35;
+}
+
+.stratification-sidebar .form-group,
+.stratification-sidebar .shiny-input-container {
+  margin-bottom: 18px;
+}
+
+.stratification-sidebar label,
+.stratification-sidebar .control-label {
+  color: #243443;
+  font-size: 0.86rem;
+  font-weight: 750;
+}
+
+.stratification-sidebar .selectize-input,
+.stratification-sidebar input.form-control {
+  border-color: #b9c7d4;
+  border-radius: 6px;
+  min-height: 38px;
+}
+
+.stratification-sidebar .selectize-input.focus,
+.stratification-sidebar input.form-control:focus {
+  border-color: #0878d8;
+  box-shadow: 0 0 0 3px rgba(8, 120, 216, 0.13);
+}
+
+.fnr-status-panel {
+  background: #ffffff;
+  border: 1px solid #dbe8f3;
+  border-radius: 8px;
+  display: grid;
+  gap: 7px;
+  margin-top: 10px;
+  padding: 10px;
+}
+
+.fnr-status-item {
+  align-items: baseline;
+  display: flex;
+  gap: 8px;
+  justify-content: space-between;
+}
+
+.fnr-status-label {
+  color: #667789;
+  font-size: 0.74rem;
+  font-weight: 800;
+  text-transform: uppercase;
+}
+
+.fnr-status-value {
+  color: #1f2d3d;
+  font-size: 0.9rem;
+  font-weight: 800;
+}
+
 .stratification-summary {
   display: grid;
   grid-template-columns: repeat(4, minmax(0, 1fr));
@@ -849,14 +1099,16 @@ body.startup-loading #use-case-loading-overlay {
 }
 
 .stratification-tabset {
-  padding-top: 6px;
+  margin: 0 auto;
+  max-width: 1680px;
+  padding: 18px 22px 44px;
 }
 
 .stratification-tabset-title {
   color: #1f2d3d;
-  font-size: 1.1rem;
+  font-size: 1.14rem;
   font-weight: 750;
-  margin: 0 0 10px;
+  margin: 0 0 12px;
 }
 
 .stratification-tabset .nav-tabs {
@@ -870,13 +1122,21 @@ body.startup-loading #use-case-loading-overlay {
   border-radius: 7px 7px 0 0;
   color: #405261;
   font-weight: 650;
-  padding: 10px 16px;
+  padding: 9px 16px;
 }
 
 .stratification-tabset .nav-tabs .nav-link.active {
-  background: #f8fbff;
-  border-color: #dbe8f3 #dbe8f3 #f8fbff;
-  color: #007bff;
+  background: linear-gradient(180deg, #ffffff 0%, #f8fbff 100%);
+  border-color: #b9dcff #b9dcff #ffffff;
+  border-top: 3px solid #0878d8;
+  color: #0674c8;
+  padding-top: 7px;
+}
+
+.stratification-tabset .nav-tabs .nav-link:not(.active):hover {
+  background: #eef7ff;
+  border-color: #dbe8f3;
+  color: #1f2d3d;
 }
 
 .stratification-tabset .tab-content {
@@ -892,7 +1152,7 @@ body.startup-loading #use-case-loading-overlay {
   flex-wrap: wrap;
   gap: 8px;
   justify-content: flex-start;
-  margin: 12px 0 10px;
+  margin: 0 0 12px;
   padding: 0;
 }
 
@@ -906,7 +1166,7 @@ body.startup-loading #use-case-loading-overlay {
 }
 
 .stratification-download-details summary {
-  background: #007bff;
+  background: linear-gradient(180deg, #0878d8 0%, #0067bd 100%);
   border: 1px solid #006add;
   border-radius: 7px;
   box-shadow: 0 8px 18px rgba(0, 123, 255, 0.22);
@@ -951,7 +1211,7 @@ body.startup-loading #use-case-loading-overlay {
   position: absolute;
   left: 0;
   top: calc(100% + 6px);
-  z-index: 30;
+  z-index: 80;
 }
 
 .stratification-download-menu .btn {
@@ -969,46 +1229,162 @@ body.startup-loading #use-case-loading-overlay {
   padding: 10px 12px;
 }
 
-.model-card .card-body {
-  padding-bottom: 14px;
+.reference-similarity-card {
+  border-color: #dbe8f3;
+  border-radius: 8px;
+  box-shadow: 0 10px 24px rgba(31, 45, 61, 0.08);
+  height: auto !important;
+  max-height: none !important;
+  overflow: hidden;
 }
 
-.model-card-grid {
-  display: grid;
-  grid-template-columns: repeat(4, minmax(0, 1fr));
-  gap: 10px;
-  margin-bottom: 12px;
+.reference-similarity-card .card-body {
+  height: auto !important;
+  max-height: none !important;
+  overflow: visible !important;
 }
 
-.model-card-item,
-.model-card-note {
-  background: #f8fbff;
-  border: 1px solid #d7e5f2;
-  border-radius: 6px;
-  padding: 10px 12px;
+.reference-similarity-card .card-header {
+  background: linear-gradient(180deg, #ffffff 0%, #f8fbff 100%);
+  border-bottom-color: #dbe8f3;
+  font-weight: 750;
+  padding: 12px 14px;
 }
 
-.model-card-item {
-  min-height: 70px;
+.reference-similarity-feature-panel {
+  padding: 0 14px 14px;
 }
 
-.model-card-label {
-  color: #5f6f7f;
-  font-size: 0.83rem;
-  font-weight: 700;
-  margin-bottom: 5px;
+.reference-similarity-section-title {
+  color: #1f2d3d;
+  font-size: 0.95rem;
+  font-weight: 800;
+  margin: 4px 0 4px;
 }
 
-.model-card-value {
-  color: #1f2d3d;
-  font-size: 0.98rem;
-  font-weight: 750;
-  line-height: 1.25;
-  overflow-wrap: anywhere;
+.reference-similarity-section-copy {
+  color: #5f6f7f;
+  font-size: 0.84rem;
+  line-height: 1.35;
+  margin-bottom: 10px;
 }
 
-.model-card-groups .model-card-value {
-  font-size: 0.94rem;
+.reference-similarity-table-wrap {
+  border: 1px solid #d7e5f2;
+  border-radius: 8px;
+  max-height: none;
+  overflow-x: auto;
+}
+
+.reference-similarity-table {
+  background: #ffffff;
+  border-collapse: collapse;
+  font-size: 0.84rem;
+  width: 100%;
+}
+
+.reference-similarity-table th,
+.reference-similarity-table td {
+  border-bottom: 1px solid #e7eef5;
+  padding: 9px 10px;
+  text-align: left;
+  vertical-align: top;
+}
+
+.reference-similarity-table th {
+  background: #f3f8fd;
+  color: #334b63;
+  font-size: 0.78rem;
+  font-weight: 800;
+}
+
+.reference-similarity-table td:last-child {
+  color: #174a73;
+  font-weight: 700;
+}
+
+.reference-distance-details {
+  margin: 0 14px 14px;
+}
+
+.reference-distance-details summary {
+  color: #405261;
+  cursor: pointer;
+  font-size: 0.84rem;
+  font-weight: 750;
+  padding: 8px 0;
+}
+
+.reference-distance-details .reference-candidates-table-wrap {
+  max-height: 260px;
+  margin-top: 4px;
+  overflow: auto;
+}
+
+.reference-similarity-empty {
+  margin: 12px 14px;
+}
+
+.model-card .card-body {
+  padding-bottom: 14px;
+}
+
+.selected-patient-stratification-card,
+.stratification-results-card,
+.patient-visual-settings-card,
+.patient-profile-plot-card {
+  border-color: #dbe8f3;
+  border-radius: 8px;
+  box-shadow: 0 10px 24px rgba(31, 45, 61, 0.08);
+  overflow: hidden;
+}
+
+.selected-patient-stratification-card .card-header,
+.stratification-results-card .card-header,
+.patient-visual-settings-card .card-header,
+.patient-profile-plot-card .card-header {
+  background: linear-gradient(180deg, #ffffff 0%, #f8fbff 100%);
+  border-bottom-color: #dbe8f3;
+  font-weight: 750;
+  padding: 12px 14px;
+}
+
+.model-card-grid {
+  display: grid;
+  grid-template-columns: repeat(4, minmax(0, 1fr));
+  gap: 10px;
+  margin-bottom: 12px;
+}
+
+.model-card-item,
+.model-card-note {
+  background: #f8fbff;
+  border: 1px solid #d7e5f2;
+  border-radius: 6px;
+  padding: 10px 12px;
+}
+
+.model-card-item {
+  min-height: 70px;
+}
+
+.model-card-label {
+  color: #5f6f7f;
+  font-size: 0.83rem;
+  font-weight: 700;
+  margin-bottom: 5px;
+}
+
+.model-card-value {
+  color: #1f2d3d;
+  font-size: 0.98rem;
+  font-weight: 750;
+  line-height: 1.25;
+  overflow-wrap: anywhere;
+}
+
+.model-card-groups .model-card-value {
+  font-size: 0.94rem;
 }
 
 .model-card-copy {
@@ -1068,6 +1444,40 @@ body.startup-loading #use-case-loading-overlay {
   padding: 12px 0;
 }
 
+.guided-empty-state {
+  align-items: flex-start;
+  background: linear-gradient(180deg, #f8fbff 0%, #ffffff 100%);
+  border: 1px dashed #bfd5e8;
+  border-radius: 8px;
+  display: flex;
+  flex-direction: column;
+  justify-content: center;
+  min-height: 118px;
+  padding: 18px 20px;
+}
+
+.guided-empty-eyebrow {
+  color: #0878d8;
+  font-size: 0.74rem;
+  font-weight: 800;
+  margin-bottom: 6px;
+  text-transform: uppercase;
+}
+
+.guided-empty-title {
+  color: #1f2d3d;
+  font-size: 1rem;
+  font-weight: 800;
+  margin-bottom: 5px;
+}
+
+.guided-empty-copy {
+  color: #5f6f7f;
+  font-size: 0.9rem;
+  line-height: 1.35;
+  max-width: 720px;
+}
+
 .stratification-interpretation {
   border-left: 4px solid #007bff;
   color: #2d3b48;
@@ -1141,33 +1551,62 @@ body.startup-loading #use-case-loading-overlay {
 }
 
 .current-set-card .card-body {
-  padding: 12px 14px 10px;
+  padding: 10px 14px;
 }
 
 .empty-set-state {
   align-items: center;
-  background: #f8fbff;
-  border: 1px dashed #bfd5e8;
-  border-radius: 7px;
+  background:
+    linear-gradient(180deg, rgba(248, 251, 255, 0.98) 0%, rgba(244, 250, 247, 0.98) 100%);
+  border: 1px dashed #abcfe4;
+  border-radius: 8px;
   display: flex;
   flex-direction: column;
   justify-content: center;
-  min-height: 94px;
-  padding: 18px;
+  min-height: 122px;
+  padding: 19px 22px;
   text-align: center;
 }
 
+.empty-set-eyebrow {
+  color: #0878d8;
+  font-size: 0.76rem;
+  font-weight: 800;
+  letter-spacing: 0;
+  margin-bottom: 6px;
+  text-transform: uppercase;
+}
+
 .empty-set-title {
   color: #1f2d3d;
-  font-size: 1rem;
-  font-weight: 750;
-  margin-bottom: 4px;
+  font-size: 1.04rem;
+  font-weight: 800;
+  margin-bottom: 5px;
 }
 
 .empty-set-copy {
   color: #5f6f7f;
   font-size: 0.86rem;
   line-height: 1.35;
+  max-width: 680px;
+}
+
+.empty-set-chip-row {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 7px;
+  justify-content: center;
+  margin-top: 12px;
+}
+
+.empty-set-chip {
+  background: #ffffff;
+  border: 1px solid #d7e5f2;
+  border-radius: 999px;
+  color: #405261;
+  font-size: 0.78rem;
+  font-weight: 700;
+  padding: 5px 9px;
 }
 
 .cohort-table-title {
@@ -1222,8 +1661,8 @@ body.startup-loading #use-case-loading-overlay {
 .data-input-method-tabs {
   background: #ffffff;
   border: 1px solid #dbe8f3;
-  border-radius: 7px;
-  box-shadow: 0 6px 18px rgba(31, 45, 61, 0.06);
+  border-radius: 8px;
+  box-shadow: 0 10px 24px rgba(31, 45, 61, 0.08);
   margin-top: 16px;
   overflow: hidden;
 }
@@ -1260,8 +1699,25 @@ body.startup-loading #use-case-loading-overlay {
 
 .data-input-method-tabs .nav-link {
   border-radius: 6px 6px 0 0;
+  color: #405261;
   font-weight: 650;
-  padding: 10px 14px;
+  margin-top: 6px;
+  padding: 9px 14px;
+  transition: background-color 0.16s ease, border-color 0.16s ease, color 0.16s ease;
+}
+
+.data-input-method-tabs .nav-link.active {
+  background: linear-gradient(180deg, #ffffff 0%, #f8fbff 100%);
+  border-color: #b9dcff #b9dcff #ffffff;
+  border-top: 3px solid #0878d8;
+  color: #0674c8;
+  padding-top: 7px;
+}
+
+.data-input-method-tabs .nav-link:not(.active):hover {
+  background: #eef7ff;
+  border-color: #dbe8f3;
+  color: #1f2d3d;
 }
 
 .data-input-method-tabs .tab-content {
@@ -1314,9 +1770,9 @@ body.startup-loading #use-case-loading-overlay {
 }
 
 .manual-entry-section {
-  background: #ffffff;
+  background: linear-gradient(180deg, #ffffff 0%, #fbfdff 100%);
   border: 1px solid #e1ebf4;
-  border-radius: 7px;
+  border-radius: 8px;
   min-width: 0;
   padding: 14px;
 }
@@ -1350,6 +1806,45 @@ body.startup-loading #use-case-loading-overlay {
   width: 100% !important;
 }
 
+.manual-entry-field input,
+.manual-entry-field select,
+.upload-file-picker input,
+.document-csv-step input,
+.form-control {
+  border-color: #b9c7d4;
+  border-radius: 6px;
+  box-shadow: none;
+  min-height: 38px;
+}
+
+.manual-entry-field input:focus,
+.manual-entry-field select:focus,
+.upload-file-picker input:focus,
+.document-csv-step input:focus,
+.form-control:focus,
+.selectize-input.focus {
+  border-color: #0878d8;
+  box-shadow: 0 0 0 3px rgba(8, 120, 216, 0.13);
+  outline: none;
+}
+
+.manual-entry-field label,
+.upload-file-picker label,
+.document-csv-step label {
+  color: #243443;
+  font-size: 0.86rem;
+  font-weight: 650;
+  margin-bottom: 5px;
+}
+
+.manual-entry-field .selectize-input {
+  border-color: #b9c7d4;
+  border-radius: 6px;
+  box-shadow: none;
+  min-height: 38px;
+  padding: 8px 34px 8px 10px;
+}
+
 .form-field-with-help {
   align-items: center;
   display: flex;
@@ -1392,6 +1887,10 @@ body.startup-loading #use-case-loading-overlay {
   max-width: 420px;
 }
 
+.file-input-use-case-marker {
+  display: none;
+}
+
 .upload-resource-actions {
   align-items: center;
   display: flex;
@@ -1459,6 +1958,300 @@ body.startup-loading #use-case-loading-overlay {
   min-width: 240px;
 }
 
+.document-csv-panel {
+  display: grid;
+  gap: 14px;
+}
+
+.document-csv-step {
+  align-items: start;
+  background: #f8fbff;
+  border: 1px solid #dbe8f3;
+  border-radius: 7px;
+  display: grid;
+  gap: 12px;
+  grid-template-columns: auto minmax(0, 1fr);
+  padding: 16px;
+}
+
+.document-csv-step-number {
+  align-items: center;
+  background: #eef7ff;
+  border: 1px solid #b9dcff;
+  border-radius: 999px;
+  color: #006add;
+  display: inline-flex;
+  font-size: 0.86rem;
+  font-weight: 800;
+  height: 28px;
+  justify-content: center;
+  width: 28px;
+}
+
+.document-csv-step-title {
+  color: #1f2d3d;
+  font-size: 0.94rem;
+  font-weight: 750;
+}
+
+.document-csv-actions {
+  justify-content: flex-start;
+  margin: 10px 0 12px;
+}
+
+.document-csv-instructions {
+  color: #334b63;
+  font-size: 0.84rem;
+  line-height: 1.4;
+  margin: 10px 0 12px;
+  padding-left: 18px;
+}
+
+.document-csv-instructions li {
+  margin: 3px 0;
+}
+
+.document-csv-prompt-wrap {
+  margin-top: 8px;
+}
+
+.document-csv-prompt-toolbar {
+  align-items: center;
+  display: flex;
+  flex-wrap: wrap;
+  gap: 8px;
+  justify-content: space-between;
+  margin-bottom: 6px;
+}
+
+.document-csv-prompt-label {
+  color: #334b63;
+  font-size: 0.82rem;
+  font-weight: 750;
+  margin: 0;
+}
+
+.document-csv-copy-prompt {
+  min-width: 104px;
+}
+
+.document-csv-copy-success {
+  border-color: #198754 !important;
+  color: #0f5132 !important;
+}
+
+.document-csv-copy-failed {
+  border-color: #dc3545 !important;
+  color: #842029 !important;
+}
+
+.document-csv-prompt {
+  background: #ffffff;
+  border: 1px solid #cfe0ef;
+  border-radius: 6px;
+  color: #1f2d3d;
+  font-family: Consolas, Monaco, "Courier New", monospace;
+  font-size: 0.78rem;
+  line-height: 1.4;
+  max-width: 100%;
+  min-height: 260px;
+  padding: 10px 12px;
+  resize: vertical;
+  width: 100%;
+}
+
+.document-csv-disclaimer,
+.document-csv-empty-report,
+.document-csv-hidden-issues,
+.document-csv-no-issues {
+  color: #5f6f7f;
+  font-size: 0.84rem;
+  line-height: 1.35;
+  margin: 8px 0 0;
+}
+
+.document-csv-report-download {
+  margin-top: 10px;
+}
+
+.document-csv-report {
+  margin-top: 12px;
+}
+
+.document-csv-status {
+  border-radius: 6px;
+  margin-bottom: 10px;
+  padding: 9px 11px;
+}
+
+.document-csv-status-title {
+  font-weight: 750;
+}
+
+.document-csv-status-detail {
+  font-size: 0.84rem;
+  font-weight: 500;
+  line-height: 1.35;
+  margin-top: 4px;
+}
+
+.document-csv-status-pass {
+  background: #f3fbf6;
+  border: 1px solid #b7dfc5;
+  color: #0f5132;
+}
+
+.document-csv-status-warning {
+  background: #fff8e5;
+  border: 1px solid #efd083;
+  color: #664d03;
+}
+
+.document-csv-status-fail {
+  background: #fff5f5;
+  border: 1px solid #f1b6bd;
+  color: #842029;
+}
+
+.document-csv-summary {
+  display: grid;
+  gap: 8px;
+  grid-template-columns: repeat(4, minmax(120px, 1fr));
+  margin-bottom: 10px;
+}
+
+.document-csv-summary-item {
+  background: #ffffff;
+  border: 1px solid #d7e5f2;
+  border-radius: 6px;
+  padding: 9px 10px;
+}
+
+.document-csv-summary-label {
+  color: #5f6f7f;
+  font-size: 0.76rem;
+  font-weight: 650;
+}
+
+.document-csv-summary-value {
+  color: #1f2d3d;
+  font-size: 1rem;
+  font-weight: 800;
+  margin-top: 3px;
+}
+
+.document-csv-missing-summary {
+  background: #ffffff;
+  border: 1px solid #d7e5f2;
+  border-radius: 6px;
+  margin: 0 0 10px;
+  padding: 10px;
+}
+
+.document-csv-missing-title {
+  color: #334b63;
+  font-size: 0.78rem;
+  font-weight: 750;
+  margin-bottom: 8px;
+  text-transform: uppercase;
+}
+
+.document-csv-missing-chip-row {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 6px;
+}
+
+.document-csv-missing-chip {
+  background: #fff8e5;
+  border: 1px solid #efd083;
+  border-radius: 999px;
+  color: #664d03;
+  font-size: 0.78rem;
+  font-weight: 650;
+  padding: 5px 8px;
+}
+
+.document-csv-issue-table {
+  background: #ffffff;
+  border: 1px solid #d7e5f2;
+  border-collapse: collapse;
+  font-size: 0.78rem;
+  width: 100%;
+}
+
+.document-csv-issue-table th,
+.document-csv-issue-table td {
+  border-bottom: 1px solid #e7eef5;
+  padding: 7px 8px;
+  text-align: left;
+  vertical-align: top;
+}
+
+.document-csv-issue-table th {
+  background: #f3f8fd;
+  color: #334b63;
+  font-weight: 750;
+}
+
+.document-csv-checklist {
+  margin-top: 18px;
+  max-width: 980px;
+  padding-top: 2px;
+}
+
+.document-csv-checklist-intro {
+  color: #334b63;
+  font-size: 0.84rem;
+  line-height: 1.35;
+  margin: 0 0 14px;
+  max-width: 900px;
+}
+
+.document-csv-checklist .form-group,
+.document-csv-checklist .shiny-input-container {
+  margin-bottom: 0;
+  max-width: none;
+  width: 100%;
+}
+
+.document-csv-checklist .control-label {
+  display: none;
+}
+
+.document-csv-checklist .checkbox,
+.document-csv-checklist .form-check {
+  margin: 0 0 13px;
+}
+
+.document-csv-checklist .checkbox label,
+.document-csv-checklist .form-check-label {
+  align-items: flex-start;
+  color: #334b63;
+  display: flex;
+  gap: 11px;
+  line-height: 1.35;
+  max-width: 920px;
+}
+
+.document-csv-checklist input[type="checkbox"] {
+  flex: 0 0 auto;
+  height: 16px;
+  min-height: 0;
+  margin-top: 3px;
+  width: 16px;
+}
+
+.document-csv-load-action {
+  display: flex;
+  justify-content: flex-end;
+  margin-top: 12px;
+}
+
+.document-csv-load-action .btn-primary {
+  min-width: 260px;
+}
+
 .use-case-summary-item {
   border: 1px solid #d7e5f2;
   border-radius: 6px;
@@ -1619,10 +2412,20 @@ body.startup-loading #use-case-loading-overlay {
     flex-direction: column;
   }
 
+  .document-csv-step,
+  .document-csv-summary {
+    grid-template-columns: 1fr;
+  }
+
+  .document-csv-step-number {
+    margin-bottom: 2px;
+  }
+
   .upload-file-actions .btn-primary,
   .data-input-method-tabs .input-template-download,
   .manual-entry-submit,
-  .example-set-actions .btn-primary {
+  .example-set-actions .btn-primary,
+  .document-csv-load-action .btn-primary {
     width: 100%;
   }