Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 14 additions & 13 deletions dsperse/src/analyzers/onnx_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,13 @@ def analyze(self, save_path:str = None) -> Dict[str, Any]:
"""
# Extract model metadata
graph = self.onnx_model.graph

# Create maps for initializers and value info
initializer_map = {init.name: init for init in graph.initializer}

# Build a comprehensive value_info map from the original full model
full_model_value_info_map = {vi.name: vi for vi in graph.value_info}
full_model_value_info_map.update({vi.name: vi for vi in graph.input})
full_model_value_info_map.update({vi.name: vi for vi in graph.output})
# full_model_value_info_map = {vi.name: vi for vi in graph.value_info}
# full_model_value_info_map.update({vi.name: vi for vi in graph.input})
# full_model_value_info_map.update({vi.name: vi for vi in graph.output})

model_input_shape = self._get_model_input_shapes(graph, initializer_map)
model_output_shape = self._get_model_output_shapes(graph)
Expand Down Expand Up @@ -368,9 +367,9 @@ def generate_slices_metadata(self, model_metadata, slice_points, slices_paths, o

# Get segment metadata
segment_metadata = self._get_segment_metadata(
model_metadata,
segment_idx,
start_idx,
model_metadata,
segment_idx,
start_idx,
end_idx,
slice_path,
output_dir
Expand Down Expand Up @@ -464,7 +463,9 @@ def _get_segment_metadata(self, model_metadata, segment_idx, start_idx, end_idx,

segment_shape = self._get_segment_shape(end_idx, model_metadata, start_idx, slice_path)

output_dir = os.path.join(os.path.dirname(output_dir), "slices", "segment_{}".format(segment_idx)) if output_dir else os.path.join(os.path.dirname(self.onnx_path), "slices", "segment_{}".format(segment_idx))
output_dir = output_dir or os.path.join(os.path.dirname(self.onnx_path), "slices")
output_dir = os.path.join(output_dir, "segment_{}".format(segment_idx))

os.makedirs(output_dir, exist_ok=True)
segment_path = os.path.abspath(os.path.join(output_dir, f"segment_{segment_idx}.onnx"))

Expand Down Expand Up @@ -500,7 +501,7 @@ def _get_segment_dependencies(self, model_metadata, start_idx, end_idx):
for output in node_info['dependencies']['output']:
output_map[output] = True

# Check inputs and add any missing to dependencies
# Check inputs and add any missing to dependencies
for input_name in node_info['dependencies']['input']:
if input_name not in output_map:
if input_name not in segment_dependencies['input']:
Expand All @@ -511,7 +512,7 @@ def _get_segment_dependencies(self, model_metadata, start_idx, end_idx):
for output in output_map:
if output not in segment_dependencies['input']:
segment_dependencies['output'].append(output)

# Filter input names to exclude weights and biases
filtered_inputs = []
for input_name in segment_dependencies['input']:
Expand All @@ -521,18 +522,18 @@ def _get_segment_dependencies(self, model_metadata, start_idx, end_idx):
# Include model inputs and intermediate tensors
if input_name in [inp.name for inp in self.onnx_model.graph.input] or input_name.startswith('/'):
filtered_inputs.append(input_name)

# If there are no inputs after filtering, include the first non-weight/bias input
if not filtered_inputs:
for input_name in segment_dependencies['input']:
if not any(pattern in input_name.lower() for pattern in ["weight", "bias"]):
filtered_inputs.append(input_name)
break

# If still no inputs, use the first input as a fallback
if not filtered_inputs and segment_dependencies['input']:
filtered_inputs.append(segment_dependencies['input'][0])

segment_dependencies['filtered_inputs'] = filtered_inputs

return segment_dependencies
Expand Down
4 changes: 2 additions & 2 deletions dsperse/src/analyzers/runner_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@
logger = logging.getLogger(__name__)

class RunnerAnalyzer:
def __init__(self, model_directory):
def __init__(self, model_directory, slices_dir=None):
"""
Args:
model_directory: Path to the model directory.
"""
self.model_directory = model_directory
self.slices_dir = Path(os.path.join(model_directory, "slices")).resolve()
self.slices_dir = Path(slices_dir or os.path.join(model_directory, "slices")).resolve()
self.slices_metadata_path = self.slices_dir / "metadata.json"

self.size_limit = 1000 * 1024 * 1024 # 1000MB
Expand Down
19 changes: 10 additions & 9 deletions dsperse/src/cli/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def _param_name_suggests_path(name: str) -> bool:
if not name:
return False
name = name.lower()
for token in ("path", "dir", "file", "model", "slices", "output", "input", "run"):
for token in ("path", "dir", "file", "model", "slices", "run"):
if token in name:
return True
return False
Expand All @@ -57,11 +57,9 @@ def _looks_like_path(value: str) -> bool:
return False


def _maybe_normalize_from_prompt(param_name: str, prompt_message: str, value: str) -> str:
def _maybe_normalize_from_prompt(param_name: str, value: str) -> str:
try:
if _param_name_suggests_path(param_name) or _looks_like_path(value) or (
prompt_message and any(t in prompt_message.lower() for t in ["path", "directory", "dir", "file"]) # heuristic
):
if _param_name_suggests_path(param_name) or _looks_like_path(value):
return normalize_path(value)
except Exception:
pass
Expand Down Expand Up @@ -111,7 +109,10 @@ def configure_logging(log_level='WARNING'):
"The answer to life, the universe, and everything is... 42 (but you need a neural network to understand why).",
"Neural networks don't actually think. They just do math really fast.",
"If you're reading this, you're awesome! Keep up the great work!",
"Dsperse: Making neural networks more transparent, one slice at a time."
"Dsperse: Making neural networks more transparent, one slice at a time.",
"Remember: With great power comes great responsibility (and large models).",
"Keep calm and slice on!",
"Why did the neural network go to school? To improve its 'weights'!",
]

def print_header():
Expand Down Expand Up @@ -262,7 +263,7 @@ def prompt_for_value(param_name, prompt_message, default=None, required=True):
logger.debug(f"Using default run name for {param_name}: {default}")
return str(default)
else:
normalized_default = _maybe_normalize_from_prompt(param_name, prompt_message, str(default))
normalized_default = _maybe_normalize_from_prompt(param_name, str(default))
logger.debug(f"Using default value for {param_name}: {normalized_default}")
return normalized_default
value = user_input.strip().strip('\'"') # Strip surrounding quotes
Expand All @@ -271,7 +272,7 @@ def prompt_for_value(param_name, prompt_message, default=None, required=True):
logger.debug(f"User provided run name for {param_name}: {value}")
return value
else:
value = _maybe_normalize_from_prompt(param_name, prompt_message, value)
value = _maybe_normalize_from_prompt(param_name, value)
logger.debug(f"User provided value for {param_name}: {value}")
return value
else:
Expand All @@ -280,7 +281,7 @@ def prompt_for_value(param_name, prompt_message, default=None, required=True):
if user_input.strip() or not required:
if user_input.strip():
value = user_input.strip().strip('\'"') # Strip surrounding quotes
value = _maybe_normalize_from_prompt(param_name, prompt_message, value)
value = _maybe_normalize_from_prompt(param_name, value)
logger.debug(f"User provided value for {param_name}: {value}")
return value
else:
Expand Down
55 changes: 24 additions & 31 deletions dsperse/src/cli/full_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def setup_parser(subparsers):
help='Path to the model file (.onnx) or directory containing the model')
full_run_parser.add_argument('--input-file', '--input', '--if', '-i', dest='input_file',
help='Path to input file for inference and compilation calibration (e.g., input.json)')
full_run_parser.add_argument('--slices-dir', '--slices-directory', '--slices-directroy', '--sd', '-s', dest='slices_dir',
full_run_parser.add_argument('--slices-dir', '--slices-path', '--slices-directory', '--slices-directory', '--sd', '-s', dest='slices_dir',
help='Optional: Pre-existing slices directory to reuse (skips slicing step)')
full_run_parser.add_argument('--layers', '-l', help='Optional: Layers to compile (e.g., "3, 20-22") passed through to compile')
# Optional: allow non-interactive mode later if desired; kept interactive by default
Expand Down Expand Up @@ -72,9 +72,10 @@ def full_run(args):

using_builtin = False
builtin_name = None
canonical_model_dir = None

# 1) Resolve inputs interactively
if (not hasattr(args, 'model_dir') or not args.model_dir) and (not hasattr(args, 'input_file') or not args.input_file):
if not getattr(args, 'model_dir', None) and not getattr(args, 'input_file', None):
# Special prompt that accepts either a filesystem location or a built-in token
choice = prompt_for_value(
'selection',
Expand All @@ -96,22 +97,22 @@ def full_run(args):
# Set args to point to sources
args.model_dir = model_onnx
args.input_file = input_json
# Output root under user's home
output_root = os.path.expanduser(os.path.join('~', 'dsperse', builtin_name))
os.makedirs(output_root, exist_ok=True)
# For downstream steps, canonical model dir should be the output root
canonical_model_dir = normalize_path(output_root)
# For downstream steps, canonical model dir should be the output root. Make one under user's home
canonical_model_dir = normalize_path(os.path.join('~', 'dsperse', builtin_name))
os.makedirs(canonical_model_dir, exist_ok=True)
print(f"{Fore.CYAN}Using built-in model '{builtin_name}'. Outputs will be saved under {canonical_model_dir}{Style.RESET_ALL}")
else:
# Treat as a user-provided file or directory path
args.model_dir = normalize_path(choice)
canonical_model_dir = _determine_model_dir(args.model_dir)
elif getattr(args, 'model_dir', None):
# If model_dir provided - normalize provided values
args.model_dir = normalize_path(args.model_dir)
else:
# Normalize provided values
if hasattr(args, 'model_dir') and args.model_dir:
args.model_dir = normalize_path(args.model_dir)
# Determine canonical model directory for downstream steps
canonical_model_dir = _determine_model_dir(args.model_dir)
# If only input_file provided
args.model_dir = os.path.dirname(normalize_path(args.input_file))

# Determine canonical model directory for downstream steps
canonical_model_dir = canonical_model_dir or _determine_model_dir(args.model_dir)

# Input file resolution
if hasattr(args, 'input_file') and args.input_file:
Expand All @@ -122,30 +123,24 @@ def full_run(args):
args.input_file = prompt_for_value('input-file', 'Enter the input file', default=default_input, required=True)
args.input_file = normalize_path(args.input_file) if args.input_file else args.input_file

# If user provided an existing slices directory, skip slicing step
# 2) Slice (unless slices-dir provided)
slices_dir = None
if hasattr(args, 'slices_dir') and args.slices_dir:
# If user provided an existing slices directory, skip slicing step
slices_dir = normalize_path(args.slices_dir)

# 2) Slice (unless slices-dir provided)
if not slices_dir:
print(f"{Fore.YELLOW}Skipping slicing step, using existing slices at: {slices_dir}{Style.RESET_ALL}")
else:
# Default slices dir depends on whether we're using a built-in selection
default_slices_dir = os.path.join(canonical_model_dir, 'slices')
slices_dir = os.path.join(canonical_model_dir, 'slices')
analysis_dir = os.path.join(canonical_model_dir, 'analysis')
try:
os.makedirs(default_slices_dir, exist_ok=True)
os.makedirs(analysis_dir, exist_ok=True)
except Exception:
pass
os.makedirs(slices_dir, exist_ok=True)
os.makedirs(analysis_dir, exist_ok=True)
# Call existing slice command; keep its logic and interactivity.
# For built-ins, we point the slicer to the built-in model file but output to ~/dsperse/{name}/slices
model_metadata_path = os.path.join(analysis_dir, 'model_metadata.json')
slice_args = Namespace(model_dir=args.model_dir, output_dir=default_slices_dir, save_file=model_metadata_path)
slice_args = Namespace(model_dir=args.model_dir, output_dir=slices_dir, save_file=model_metadata_path)
print(f"{Fore.CYAN}Step 1/5: Slicing model...{Style.RESET_ALL}")
slice_model(slice_args)
slices_dir = default_slices_dir
else:
print(f"{Fore.YELLOW}Skipping slicing step, using existing slices at: {slices_dir}{Style.RESET_ALL}")

# 3) Compile (circuitize) with calibration input
compile_args = Namespace(slices_path=slices_dir, input_file=args.input_file, layers=getattr(args, 'layers', None))
Expand All @@ -154,10 +149,8 @@ def full_run(args):

# 4) Run inference
run_root_dir = os.path.join(canonical_model_dir, 'run')
try:
os.makedirs(run_root_dir, exist_ok=True)
except Exception:
pass
os.makedirs(run_root_dir, exist_ok=True)

inference_output_path = os.path.join(run_root_dir, 'inference_results.json')
run_args = Namespace(slices_dir=slices_dir, run_metadata_path=None, input_file=args.input_file, output_file=inference_output_path)
print(f"{Fore.CYAN}Step 3/5: Running inference over slices...{Style.RESET_ALL}")
Expand Down
17 changes: 10 additions & 7 deletions dsperse/src/cli/prove.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,18 +126,20 @@ def is_run_root_dir(p):
default_model_path = os.path.dirname(os.path.dirname(latest_run_path))

# Prompt with default if found
if default_run:
candidate = prompt_for_value('run-or-run-id-dir', 'Enter run directory (runs root or a run_* directory)', default=default_run)
else:
candidate = prompt_for_value('run-or-run-id-dir', 'Enter run directory (runs root or a run_* directory)')
candidate = prompt_for_value(
'run-or-run-id-dir',
'Enter run directory (runs root or a run_* directory)',
default=default_run or None
)


# Handle run names (starts with "run_") - prepend run/ directory BEFORE normalization
if candidate and candidate.startswith('run_') and not candidate.startswith('/') and not candidate.startswith('./') and not candidate.startswith('../'):
if candidate and candidate.startswith('run_'):
# Always try current directory's run/ first (for when running from model directory)
current_run_dir = os.path.join(os.getcwd(), "run")
if os.path.exists(current_run_dir):
candidate = os.path.join(current_run_dir, candidate)
elif 'default_model_path' in locals() and default_model_path and default_model_path != os.getcwd():
elif default_model_path and default_model_path != os.getcwd():
# Use stored default model path if different from current directory
model_run_dir = os.path.join(default_model_path, "run")
candidate = os.path.join(model_run_dir, candidate)
Expand All @@ -149,14 +151,15 @@ def is_run_root_dir(p):
model_path = os.path.join(models_dir, model_name)
if os.path.isdir(model_path):
model_run_dir = os.path.join(model_path, "run")
# XXX: so we just find any model dir with `run` in it and just use that? Seems brittle.
# What if multiple models have runs? That would lead to really confusing behavior.
if os.path.exists(model_run_dir) and os.path.exists(os.path.join(model_run_dir, candidate)):
candidate = os.path.join(model_run_dir, candidate)
break
# Handle already-normalized run names (absolute paths ending with run_*)
elif candidate and candidate.startswith('/') and os.path.basename(candidate).startswith('run_'):
# Check if this is a run name that was normalized to the wrong directory
basename = os.path.basename(candidate)
dirname = os.path.dirname(candidate)

# If the directory doesn't exist but we have model directories, look there
if not os.path.exists(candidate):
Expand Down
Loading