From d71f3c683c86840f68a9ae94489aef9437bc536d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Wed, 16 Oct 2024 14:37:18 +0200 Subject: [PATCH] experiment: GPU Overprovisioning (#262) * experiment: GPU Overprovisioning * make gen --- harmony/resources/getGPU.go | 17 +++++++++++++++-- lib/ffiselect/ffiselect.go | 9 ++++++--- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/harmony/resources/getGPU.go b/harmony/resources/getGPU.go index 62d5c091e..a9a207125 100644 --- a/harmony/resources/getGPU.go +++ b/harmony/resources/getGPU.go @@ -11,6 +11,19 @@ import ( ffi "github.com/filecoin-project/filecoin-ffi" ) +var GpuOverprovisionFactor = 1 + +func init() { + if nstr := os.Getenv("HARMONY_GPU_OVERPROVISION_FACTOR"); nstr != "" { + n, err := strconv.Atoi(nstr) + if err != nil { + logger.Errorf("parsing HARMONY_GPU_OVERPROVISION_FACTOR failed: %+v", err) + } else { + GpuOverprovisionFactor = n + } + } +} + func getGPUDevices() float64 { // GPU boolean if nstr := os.Getenv("HARMONY_OVERRIDE_GPUS"); nstr != "" { n, err := strconv.ParseFloat(nstr, 64) @@ -22,13 +35,13 @@ func getGPUDevices() float64 { // GPU boolean } gpus, err := ffi.GetGPUDevices() - logger.Infow("GPUs", "list", gpus) + logger.Infow("GPUs", "list", gpus, "overprovision_factor", GpuOverprovisionFactor) if err != nil { logger.Errorf("getting gpu devices failed: %+v", err) } all := strings.ToLower(strings.Join(gpus, ",")) if len(gpus) > 1 || strings.Contains(all, "ati") || strings.Contains(all, "nvidia") { - return float64(len(gpus)) + return float64(len(gpus) * GpuOverprovisionFactor) } return 0 } diff --git a/lib/ffiselect/ffiselect.go b/lib/ffiselect/ffiselect.go index 5122c8453..d0267ab73 100644 --- a/lib/ffiselect/ffiselect.go +++ b/lib/ffiselect/ffiselect.go @@ -19,6 +19,7 @@ import ( "github.com/filecoin-project/go-state-types/proof" "github.com/filecoin-project/curio/build" + "github.com/filecoin-project/curio/harmony/resources" "github.com/filecoin-project/curio/lib/storiface" ) @@ -45,9 +46,11 @@ func init() { ch = make(chan string, 1) ch <- "0" } else { - ch = make(chan string, len(devices)) - for i := 0; i < len(devices); i++ { - ch <- strconv.Itoa(i) + nSlots := len(devices) * resources.GpuOverprovisionFactor + + ch = make(chan string, nSlots) + for i := 0; i < nSlots; i++ { + ch <- strconv.Itoa(i / resources.GpuOverprovisionFactor) } } }