diff --git a/Cargo.toml b/Cargo.toml index 50d57c3..a61fce3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,6 +48,7 @@ windows = { version = "0.48", features = ["Win32_Graphics_Dxgi"] } default = ["metal"] metal = ["dep:core-graphics", "dep:metal"] windows = ["dep:dxgi", "winapi"] +dashboard = [] [[bin]] name = "gpu-share-vm-manager" @@ -56,3 +57,8 @@ path = "src/main.rs" [dev-dependencies] tokio = { version = "1.0", features = ["full"] } rand = "0.8" +futures = "0.3" + +[[bin]] +name = "dashboard" +path = "src/bin/dashboard.rs" diff --git a/Dockerfile b/Dockerfile index 661f6f5..34ca732 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Build stage -FROM rust:1.75-slim-bookworm as builder +FROM rust:1.75 as builder WORKDIR /usr/src/app RUN apt-get update && apt-get install -y \ @@ -22,34 +22,6 @@ COPY . ./ RUN cargo build --release --locked # Runtime stage -FROM debian:bookworm-slim - -# Install runtime dependencies -RUN apt-get update && apt-get install -y \ - libvirt0 \ - libnvidia-ml1 \ - libvirt-clients \ - && rm -rf /var/lib/apt/lists/* - -# Create non-root user -RUN useradd -ms /bin/bash appuser -WORKDIR /app -RUN chown appuser:appuser /app -USER appuser - -# Copy the built executable -COPY --from=builder /usr/src/app/target/release/gpu-share-vm-manager . -# Copy configuration -COPY config /app/config - -# Create necessary directories -RUN mkdir -p /var/lib/gpu-share/images - -# Set environment variables -ENV CONFIG_PATH=/app/config -ENV RUST_LOG=info - -EXPOSE 3000 - -ENTRYPOINT ["./gpu-share-vm-manager"] -CMD ["serve"] \ No newline at end of file +FROM gcr.io/distroless/cc-debian12 +COPY --from=builder /app/target/release/gpu-share-vm-manager /app/ +CMD ["/app/gpu-share-vm-manager"] \ No newline at end of file diff --git a/src/api/middleware/auth.rs b/src/api/middleware/auth.rs index a712a0a..5ad7405 100644 --- a/src/api/middleware/auth.rs +++ b/src/api/middleware/auth.rs @@ -3,13 +3,16 @@ use headers::{Authorization, HeaderMapExt}; use jsonwebtoken::{decode, DecodingKey, Validation}; #[derive(Debug, Serialize, Deserialize)] -struct Claims { +pub struct Claims { sub: String, role: String, exp: usize, } -pub async fn auth_middleware(mut req: Request, next: Next) -> Result { +pub async fn auth_middleware( + mut req: Request, + next: Next, +) -> Result { let token = req.headers() .typed_get::>() .ok_or(StatusCode::UNAUTHORIZED)?; diff --git a/src/api/middleware/rate_limit.rs b/src/api/middleware/rate_limit.rs index 0b73142..444a6a6 100644 --- a/src/api/middleware/rate_limit.rs +++ b/src/api/middleware/rate_limit.rs @@ -12,12 +12,13 @@ use std::{num::NonZeroU32, sync::Arc, time::Duration}; use tower::limit::RateLimitLayer; use std::error::Error as StdError; use std::fmt; +use crate::monitoring::MetricsCollector; /// Rate limiting configuration for API endpoints #[derive(Debug, Clone)] pub struct RateLimitConfig { - pub requests: NonZeroU32, - pub per_seconds: u64, + pub requests: NonZeroU32, + pub per_seconds: u64, } impl RateLimitConfig { @@ -114,7 +115,14 @@ impl fmt::Display for RateLimitExceeded { } #[derive(Clone)] pub struct RateLimit { - inner: T, + pub inner: T, + pub limiter: Arc, QuantaClock, NoOpMiddleware>>, +} + +impl RateLimit { + pub fn new(inner: T, limiter: Arc, QuantaClock, NoOpMiddleware>>) -> Self { + Self { inner, limiter } + } } // wrapper for RateLimitLayer @@ -209,4 +217,21 @@ mod tests { .unwrap(); assert_eq!(response.status(), StatusCode::TOO_MANY_REQUESTS); } +} + + +pub struct AdaptiveRateLimiter { + base_limits: RateLimitConfig, + metrics: Arc, +} + +impl AdaptiveRateLimiter { + pub fn adjust_limits_based_on_load(&mut self) { + let current_load = self.metrics.current_load(); + if current_load > 80.0 { + self.base_limits.requests = NonZeroU32::new( + self.base_limits.requests.get() * 2 + ).unwrap(); + } + } } \ No newline at end of file diff --git a/src/bin/dashboard.rs b/src/bin/dashboard.rs index 30bb5cd..6f9cd1f 100644 --- a/src/bin/dashboard.rs +++ b/src/bin/dashboard.rs @@ -6,18 +6,17 @@ use crossterm::event::{Event, KeyCode}; use crossterm::{execute, terminal::*}; use ratatui::{prelude::*, widgets::*}; use std::sync::{Arc, Mutex}; +use gpu_share_vm_manager::dashboard; +use tokio::runtime::Runtime; +use tokio::sync::Mutex as TokioMutex; -#[tokio::main] -async fn main() -> Result<()> { - let gpupool = Arc::new(Mutex::new(GPUPool::new())); - let user_manager = Arc::new(Mutex::new(UserManager::new())); - let billing_system = Arc::new(Mutex::new(BillingSystem::new())); +fn main() -> anyhow::Result<()> { + let gpupool = Arc::new(TokioMutex::new(GPUPool::new())); + let users = Arc::new(TokioMutex::new(UserManager::new())); + let billing = Arc::new(TokioMutex::new(BillingSystem::new())); - start_dashboard( - gpupool, - user_manager, - billing_system - ).await + let rt = Runtime::new()?; + rt.block_on(dashboard::start_dashboard(gpupool, users, billing)) } pub async fn start_dashboard( diff --git a/src/dashboard/mod.rs b/src/dashboard/mod.rs index b80682d..5b89a38 100644 --- a/src/dashboard/mod.rs +++ b/src/dashboard/mod.rs @@ -1,16 +1,43 @@ use anyhow::Result; use crossterm::{event::{Event, KeyCode}, execute, terminal::*}; -use ratatui::{prelude::*, widgets::*}; +use ratatui::{ + prelude::*, + widgets::*, + style::{Color, Modifier}, + text::{Line, Span}, + widgets::BorderType, +}; use std::sync::Arc; -// use tokio::sync::Mutex; -use crate::{gpu::virtual_gpu::GPUPool, users::UserManager, billing::BillingSystem}; - +use crate::gpu::GPUPool; +use crate::users::UserManager; +use crate::billing::BillingSystem; pub async fn start_dashboard( gpupool: Arc>, users: Arc>, _billing: Arc> ) -> Result<()> { + // ASCII + let dante_ascii = vec![ + r#"██████╗ █████╗ ███╗ ██╗████████╗███████╗"#, + r#"██╔══██╗██╔══██╗████╗ ██║╚══██╔══╝██╔════╝"#, + r#"██║ ██║███████║██╔██╗ ██║ ██║ █████╗ "#, + r#"██║ ██║██╔══██║██║╚██╗██║ ██║ ██╔══╝ "#, + r#"██████╔╝██║ ██║██║ ╚████║ ██║ ███████╗"#, + r#"╚═════╝ ╚═╝ ╚═╝╚═╝ ╚═══╝ ╚═╝ ╚══════╝"#, + ]; + + let menu_items = vec![ + "GPU Allocation", + "User Management", + "Billing Overview", + "System Metrics", + "Cluster Nodes", + "Exit", + ]; + + let mut selected_menu = 0; + enable_raw_mode()?; let mut stdout = std::io::stdout(); execute!(stdout, EnterAlternateScreen)?; @@ -23,48 +50,105 @@ pub async fn start_dashboard( let gpupool = gpupool.try_lock().unwrap(); let users = users.try_lock().unwrap(); - let gpu_list = List::new( - gpupool.gpus.values() - .map(|gpu| { - let status = if gpu.allocated_to.is_some() { - Span::styled("Occupied", Style::new().red()) - } else { - Span::styled("Available", Style::new().green()) - }; - ListItem::new(format!( - "GPU {}: {}MB - {} Cores - {}", - gpu.id, gpu.vram_mb, gpu.compute_units, status - )) - }) - .collect::>() - ) - .block(Block::default().title("GPUs").borders(Borders::ALL)); + // Ana layout + let main_layout = Layout::default() + .direction(Direction::Vertical) + .constraints([ + Constraint::Length(8), // Header + Constraint::Min(5), // Main content + Constraint::Length(3), // Footer + ]) + .split(f.size()); + + // Header + let header_block = Block::default() + .borders(Borders::ALL) + .border_style(Style::new().fg(Color::LightBlue)) + .border_type(BorderType::Thick); - let user_list = List::new( - users.users.values() - .map(|user| { - ListItem::new(format!( - "{}: ${:.2}", - user.id, user.credits - )) - }) - .collect::>() - ) - .block(Block::default().title("Users").borders(Borders::ALL)); + let header_text: Vec = dante_ascii.iter() + .map(|s| Line::from(*s).style(Style::new().fg(Color::LightBlue))) + .collect(); - let chunks = Layout::default() + f.render_widget( + Paragraph::new(header_text) + .block(header_block) + .alignment(Alignment::Center), + main_layout[0] + ); + + // Ana içerik + let content_layout = Layout::default() .direction(Direction::Horizontal) - .constraints([Constraint::Percentage(50), Constraint::Percentage(50)]) - .split(f.size()); + .constraints([Constraint::Percentage(20), Constraint::Percentage(80)]) + .split(main_layout[1]); + + // Menü paneli + let menu = List::new( + menu_items.iter().enumerate().map(|(i, item)| { + let style = if i == selected_menu { + Style::new() + .fg(Color::Black) + .bg(Color::LightBlue) + .add_modifier(Modifier::BOLD) + } else { + Style::new().fg(Color::White) + }; + ListItem::new(Span::styled(format!("▶ {} ", item), style)) + }) + ).block( + Block::default() + .title("Main Menu") + .borders(Borders::ALL) + .border_style(Style::new().fg(Color::LightBlue)) + ); + + // Detay paneli + let detail_block = Block::default() + .borders(Borders::ALL) + .border_style(Style::new().fg(Color::LightBlue)) + .title(match selected_menu { + 0 => "GPU Allocation", + 1 => "User Management", + 2 => "Billing Overview", + 3 => "System Metrics", + 4 => "Cluster Nodes", + _ => "Dashboard", + }); - f.render_widget(gpu_list, chunks[0]); - f.render_widget(user_list, chunks[1]); + let detail_content = match selected_menu { + 0 => render_gpu_panel(&gpupool), + 1 => render_user_panel(&users), + // Diğer menü öğeleri için render fonksiyonları... + _ => Paragraph::new(""), + }; + + f.render_widget(menu, content_layout[0]); + f.render_widget( + detail_content.block(detail_block).to_owned(), + content_layout[1] + ); + + // Footer + let footer = Paragraph::new(Line::from(vec![ + Span::styled("Q: Quit", Style::new().fg(Color::LightYellow)), + Span::raw(" | "), + Span::styled("↑↓: Navigate", Style::new().fg(Color::LightGreen)), + Span::raw(" | "), + Span::styled("Enter: Select", Style::new().fg(Color::LightMagenta)), + ])).alignment(Alignment::Center); + + f.render_widget(footer, main_layout[2]); })?; if crossterm::event::poll(std::time::Duration::from_millis(100))? { if let Event::Key(key) = crossterm::event::read()? { - if key.code == KeyCode::Char('q') { - break; + match key.code { + KeyCode::Char('q') => break, + KeyCode::Up => selected_menu = selected_menu.saturating_sub(1), + KeyCode::Down => selected_menu = (selected_menu + 1).min(menu_items.len() - 1), + KeyCode::Enter => if let Some(()) = handle_menu_selection(selected_menu) {}, + _ => {} } } } @@ -75,3 +159,48 @@ pub async fn start_dashboard( Ok(()) } +fn render_gpu_panel(gpupool: &GPUPool) -> Paragraph { + let lines: Vec = gpupool.gpus.values() + .map(|gpu| { + let status = if gpu.allocated_to.is_some() { + Span::styled("● Busy", Style::new().fg(Color::Red)) + } else { + Span::styled("○ Free", Style::new().fg(Color::Green)) + }; + + Line::from(vec![ + Span::styled(format!("GPU {} ", gpu.id), Style::new().fg(Color::Cyan)), + Span::raw(format!("VRAM: {}MB ", gpu.vram_mb)), + Span::raw(format!("Cores: {} ", gpu.compute_units)), + status, + ]) + }) + .collect(); + + Paragraph::new(lines) +} + +fn render_user_panel(users: &UserManager) -> Paragraph { + let lines: Vec = users.users.values() + .map(|user| { + Line::from(vec![ + Span::styled(user.id.to_string(), Style::new().fg(Color::Yellow)), + Span::raw(" - Credits: "), + Span::styled( + format!("${:.2}", user.credits), + Style::new().fg(Color::LightGreen) + ), + ]) + }) + .collect(); + + Paragraph::new(lines) +} + +fn handle_menu_selection(selected: usize) -> Option<()> { + match selected { + 5 => std::process::exit(0), + _ => None + } +} + diff --git a/src/gpu/device.rs b/src/gpu/device.rs index eb1d70f..f838e27 100644 --- a/src/gpu/device.rs +++ b/src/gpu/device.rs @@ -289,6 +289,7 @@ struct UeventInfo { device_id: String, subsystem_id: String, model: String, + iommu_group: Option, } impl GPUInfo { diff --git a/src/gpu/mod.rs b/src/gpu/mod.rs index 8a0bd5b..61527e8 100644 --- a/src/gpu/mod.rs +++ b/src/gpu/mod.rs @@ -2,4 +2,6 @@ pub mod device; pub mod virtual_gpu; // exports cuz ain't nobody got time for full paths -pub use device::GPUManager; \ No newline at end of file +pub use device::GPUManager; +pub use virtual_gpu::GPUPool; +// pub use device::GPU; \ No newline at end of file diff --git a/src/monitoring/metrics.rs b/src/monitoring/metrics.rs index 9915215..e560d65 100644 --- a/src/monitoring/metrics.rs +++ b/src/monitoring/metrics.rs @@ -138,7 +138,7 @@ impl MetricsCollector { Ok(None) } - fn calculate_cpu_usage(cpu_time: u64) -> f64 { + pub(crate) fn calculate_cpu_usage(cpu_time: u64) -> f64 { // CPU usage calculation based on CPU time delta static mut LAST_CPU_TIME: u64 = 0; static mut LAST_TIMESTAMP: u64 = 0; @@ -183,7 +183,7 @@ impl MetricsCollector { } pub fn stop(&mut self) -> Result<(), Box> { - // Gerçek implementasyon + Ok(()) } @@ -213,6 +213,34 @@ impl MetricsCollector { .map(|metrics| metrics.clone()) .ok_or_else(|| anyhow::anyhow!("No metrics found for container")) } + + pub fn current_load(&self) -> f64 { + let metrics_store = self.container_metrics.lock().unwrap(); + + + let mut total_cpu = 0.0; + let mut total_memory = 0.0; + let mut count = 0; + + for container_metrics in metrics_store.values() { + if let Some(latest) = container_metrics.last() { + total_cpu += latest.cpu_usage_percent; + total_memory += (latest.memory_usage_mb as f64 / latest.memory_total_mb as f64) * 100.0; + count += 1; + } + } + + if count == 0 { + return 0.0; + } + + + let avg_cpu = total_cpu / count as f64; + let avg_memory = total_memory / count as f64; + + + (avg_cpu + avg_memory) / 2.0 + } } #[derive(Debug, Serialize)] diff --git a/src/monitoring/prometheus.rs b/src/monitoring/prometheus.rs new file mode 100644 index 0000000..99bc3e6 --- /dev/null +++ b/src/monitoring/prometheus.rs @@ -0,0 +1,4 @@ +pub fn register_custom_metrics() { + let cpu_usage = register_gauge!("cpu_usage_percent", "Current CPU usage"); + let gpu_mem = register_gauge!("gpu_memory_used", "GPU memory usage in MB"); +} \ No newline at end of file diff --git a/src/scheduler/mod.rs b/src/scheduler/mod.rs new file mode 100644 index 0000000..1e01019 --- /dev/null +++ b/src/scheduler/mod.rs @@ -0,0 +1,11 @@ +pub async fn schedule_workload( + &self, + workload: AIWorkload, + priority: Priority, +) -> Result { + let job = Job::new(workload) + .with_priority(priority) + .with_resource_requirements(ResourceEstimation::from(workload)); + + self.queue.enqueue(job).await +} \ No newline at end of file diff --git a/tests/gpu_allocation.rs b/tests/gpu_allocation.rs new file mode 100644 index 0000000..39a02f7 --- /dev/null +++ b/tests/gpu_allocation.rs @@ -0,0 +1,21 @@ +use std::sync::Arc; +use tokio::sync::Mutex; +use futures::future::join_all; +use gpu_share_vm_manager::gpu::GPUPool; + +#[tokio::test] +async fn test_concurrent_allocations() { + let pool = Arc::new(Mutex::new(GPUPool::new())); + let mut handles = vec![]; + + for i in 0..10 { + let pool = pool.clone(); + handles.push(tokio::spawn(async move { + let mut pool = pool.lock().await; + pool.allocate(&format!("user{}", i), 0) + })); + } + + let results = join_all(handles).await; + assert_eq!(results.iter().filter(|r| r.is_ok()).count(), 1); +} \ No newline at end of file