diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4a1db77833..fcd15ae106 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -114,9 +114,6 @@ jobs: - name: Add OpenCL support - Ubuntu if: ${{ matrix.os == 'ubuntu-latest' }} run: sudo apt-get update -q && sudo apt-get install -qy ocl-icd-opencl-dev libpocl2 - - name: Override SDKROOT for macOS - if: ${{ contains(matrix.os, 'macos') && runner.arch == 'arm64' }} - run: echo "SDKROOT=/Library/Developer/CommandLineTools/SDKs/MacOSX12.3.sdk" >> $GITHUB_ENV - name: disable Windows Defender - Windows if: ${{ matrix.os == 'windows-latest' }} run: | @@ -182,9 +179,6 @@ jobs: - name: Add OpenCL support - Ubuntu if: ${{ matrix.os == 'ubuntu-latest' }} run: sudo apt-get update -q && sudo apt-get install -qy ocl-icd-opencl-dev libpocl2 - - name: Override SDKROOT for macOS - if: ${{ contains(matrix.os, 'macos') && runner.arch == 'arm64' }} - run: echo "SDKROOT=/Library/Developer/CommandLineTools/SDKs/MacOSX12.3.sdk" >> $GITHUB_ENV - name: Add OpenCL support - Windows if: ${{ matrix.os == 'windows-latest' }} run: choco install opencl-intel-cpu-runtime diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index cf47dd5945..80b54a2e3c 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -36,9 +36,6 @@ jobs: - name: Add OpenCL support - Ubuntu if: ${{ matrix.os == 'ubuntu-latest' }} run: sudo apt-get update -q && sudo apt-get install -qy ocl-icd-opencl-dev libpocl2 - - name: Override SDKROOT for macOS - if: ${{ contains(matrix.os, 'macos') && runner.arch == 'arm64' }} - run: echo "SDKROOT=/Library/Developer/CommandLineTools/SDKs/MacOSX12.3.sdk" >> $GITHUB_ENV - name: disable Windows Defender - Windows if: ${{ matrix.os == 'windows-latest' }} run: | diff --git a/Makefile-libs.Inc b/Makefile-libs.Inc index ec86f5a5be..62902dd876 100644 --- a/Makefile-libs.Inc +++ b/Makefile-libs.Inc @@ -50,7 +50,7 @@ else endif endif -POSTRS_SETUP_REV = 0.5.2 +POSTRS_SETUP_REV = 0.6.1 POSTRS_SETUP_ZIP = libpost-$(platform)-v$(POSTRS_SETUP_REV).zip POSTRS_SETUP_URL_ZIP ?= https://github.com/spacemeshos/post-rs/releases/download/v$(POSTRS_SETUP_REV)/$(POSTRS_SETUP_ZIP) POSTRS_PROFILER_ZIP = profiler-$(platform)-v$(POSTRS_SETUP_REV).zip diff --git a/activation/post_supervisor.go b/activation/post_supervisor.go index 9d671a6140..8c596a90be 100644 --- a/activation/post_supervisor.go +++ b/activation/post_supervisor.go @@ -31,6 +31,7 @@ func DefaultPostServiceConfig() PostSupervisorConfig { return PostSupervisorConfig{ PostServiceCmd: filepath.Join(filepath.Dir(path), DefaultPostServiceName), NodeAddress: "http://127.0.0.1:9093", + MaxRetries: 10, } } @@ -44,12 +45,14 @@ func DefaultTestPostServiceConfig() PostSupervisorConfig { return PostSupervisorConfig{ PostServiceCmd: filepath.Join(filepath.Dir(string(path)), "build", DefaultPostServiceName), NodeAddress: "http://127.0.0.1:9093", + MaxRetries: 10, } } type PostSupervisorConfig struct { PostServiceCmd string NodeAddress string + MaxRetries int CACert string Cert string @@ -167,8 +170,7 @@ func (ps *PostSupervisor) Start(opts PostSetupOpts) error { return err } - ps.eg.Go(func() error { return ps.runCmd(ctx, ps.cmdCfg, ps.postCfg, opts, ps.provingOpts) }) - return nil + return ps.runCmd(ctx, ps.cmdCfg, ps.postCfg, opts, ps.provingOpts) }) return nil } @@ -206,22 +208,13 @@ func (ps *PostSupervisor) Stop(deleteFiles bool) error { // it returns when the pipe is closed. func (ps *PostSupervisor) captureCmdOutput(pipe io.ReadCloser) func() error { return func() error { - buf := bufio.NewReader(pipe) - for { - line, err := buf.ReadString('\n') + scanner := bufio.NewScanner(pipe) + for scanner.Scan() { + line := scanner.Text() line = strings.TrimRight(line, "\r\n") // remove line delimiters at end of input - switch err { - case nil: - ps.logger.Info(line) - case io.EOF: - ps.logger.Info(line) - return nil - default: - ps.logger.Info(line) - ps.logger.Warn("read from post service pipe", zap.Error(err)) - return nil - } + ps.logger.Info(line) } + return nil } } @@ -232,67 +225,72 @@ func (ps *PostSupervisor) runCmd( postOpts PostSetupOpts, provingOpts PostProvingOpts, ) error { - for { - args := []string{ - "--address", cmdCfg.NodeAddress, - - "--k1", strconv.FormatUint(uint64(postCfg.K1), 10), - "--k2", strconv.FormatUint(uint64(postCfg.K2), 10), - "--k3", strconv.FormatUint(uint64(postCfg.K3), 10), - "--pow-difficulty", postCfg.PowDifficulty.String(), - - "--dir", postOpts.DataDir, - "-n", strconv.FormatUint(uint64(postOpts.Scrypt.N), 10), - "-r", strconv.FormatUint(uint64(postOpts.Scrypt.R), 10), - "-p", strconv.FormatUint(uint64(postOpts.Scrypt.P), 10), - - "--threads", strconv.FormatUint(uint64(provingOpts.Threads), 10), - "--nonces", strconv.FormatUint(uint64(provingOpts.Nonces), 10), - "--randomx-mode", provingOpts.RandomXMode.String(), - - "--watch-pid", strconv.Itoa(os.Getpid()), - } - if cmdCfg.CACert != "" { - args = append(args, "--ca-cert", cmdCfg.CACert) - } - if cmdCfg.Cert != "" { - args = append(args, "--cert", cmdCfg.Cert) - } - if cmdCfg.Key != "" { - args = append(args, "--key", cmdCfg.Key) - } + args := []string{ + "--address", cmdCfg.NodeAddress, + + "--min-num-units", strconv.FormatUint(uint64(postCfg.MinNumUnits), 10), + "--max-num-units", strconv.FormatUint(uint64(postCfg.MaxNumUnits), 10), + "--labels-per-unit", strconv.FormatUint(uint64(postCfg.LabelsPerUnit), 10), + "--k1", strconv.FormatUint(uint64(postCfg.K1), 10), + "--k2", strconv.FormatUint(uint64(postCfg.K2), 10), + "--k3", strconv.FormatUint(uint64(postCfg.K3), 10), + "--pow-difficulty", postCfg.PowDifficulty.String(), + + "--dir", postOpts.DataDir, + "-n", strconv.FormatUint(uint64(postOpts.Scrypt.N), 10), + "-r", strconv.FormatUint(uint64(postOpts.Scrypt.R), 10), + "-p", strconv.FormatUint(uint64(postOpts.Scrypt.P), 10), + + "--threads", strconv.FormatUint(uint64(provingOpts.Threads), 10), + "--nonces", strconv.FormatUint(uint64(provingOpts.Nonces), 10), + "--randomx-mode", provingOpts.RandomXMode.String(), + + "--watch-pid", strconv.Itoa(os.Getpid()), + } + if cmdCfg.MaxRetries > 0 { + args = append(args, "--max-retries", strconv.Itoa(cmdCfg.MaxRetries)) + } + if cmdCfg.CACert != "" { + args = append(args, "--ca-cert", cmdCfg.CACert) + } + if cmdCfg.Cert != "" { + args = append(args, "--cert", cmdCfg.Cert) + } + if cmdCfg.Key != "" { + args = append(args, "--key", cmdCfg.Key) + } - cmd := exec.CommandContext( - ctx, - cmdCfg.PostServiceCmd, - args..., - ) - cmd.Dir = filepath.Dir(cmdCfg.PostServiceCmd) - pipe, err := cmd.StderrPipe() - if err != nil { - ps.logger.Error("setup stderr pipe for post service", zap.Error(err)) - return nil - } + cmd := exec.CommandContext( + ctx, + cmdCfg.PostServiceCmd, + args..., + ) + cmd.Dir = filepath.Dir(cmdCfg.PostServiceCmd) + pipe, err := cmd.StderrPipe() + if err != nil { + ps.logger.Error("setup stderr pipe for post service", zap.Error(err)) + return nil + } - var eg errgroup.Group - eg.Go(ps.captureCmdOutput(pipe)) - if err := cmd.Start(); err != nil { - pipe.Close() - ps.logger.Error("start post service", zap.Error(err)) - return nil - } - ps.logger.Info("post service started", zap.Int("pid", cmd.Process.Pid), zap.String("cmd", cmd.String())) - ps.pid.Store(int64(cmd.Process.Pid)) - events.EmitPostServiceStarted() - err = cmd.Wait() - if err := ctx.Err(); err != nil { - events.EmitPostServiceStopped() - if err := eg.Wait(); err != nil { - ps.logger.Warn("output reading goroutine failed", zap.Error(err)) - } - return nil + var eg errgroup.Group + eg.Go(ps.captureCmdOutput(pipe)) + if err := cmd.Start(); err != nil { + pipe.Close() + ps.logger.Error("start post service", zap.Error(err)) + return nil + } + ps.logger.Info("post service started", zap.Int("pid", cmd.Process.Pid), zap.String("cmd", cmd.String())) + ps.pid.Store(int64(cmd.Process.Pid)) + events.EmitPostServiceStarted() + err = cmd.Wait() + if ctx.Err() != nil { + events.EmitPostServiceStopped() + if err := eg.Wait(); err != nil { + ps.logger.Warn("output reading goroutine failed", zap.Error(err)) } - ps.logger.Warn("post service exited", zap.Error(err)) - eg.Wait() + return nil } + eg.Wait() + ps.logger.Fatal("post service exited", zap.Error(err)) + return nil } diff --git a/activation/post_supervisor_test.go b/activation/post_supervisor_test.go index daf4590a10..82269b3259 100644 --- a/activation/post_supervisor_test.go +++ b/activation/post_supervisor_test.go @@ -184,8 +184,8 @@ func Test_PostSupervisor_Restart_Possible(t *testing.T) { require.Eventually(t, func() bool { return (ps.pid.Load() == 0) }, 5*time.Second, 100*time.Millisecond) } -func Test_PostSupervisor_RestartsOnCrash(t *testing.T) { - log := zaptest.NewLogger(t) +func Test_PostSupervisor_LogFatalOnCrash(t *testing.T) { + log := zaptest.NewLogger(t, zaptest.WrapOptions(zap.WithFatalHook(calledFatal(t)))) cmdCfg := DefaultTestPostServiceConfig() postCfg := DefaultPostConfig() @@ -209,21 +209,50 @@ func Test_PostSupervisor_RestartsOnCrash(t *testing.T) { require.Eventually(t, func() bool { return (ps.pid.Load() != 0) }, 5*time.Second, 100*time.Millisecond) - oldPid := int(ps.pid.Load()) - process, err := os.FindProcess(oldPid) + pid := int(ps.pid.Load()) + process, err := os.FindProcess(pid) require.NoError(t, err) require.NotNil(t, process) require.NoError(t, process.Kill()) - require.Eventually(t, func() bool { return (ps.pid.Load() != int64(oldPid)) }, 5*time.Second, 100*time.Millisecond) + // log asserts that zap.Fatal was called + require.NoError(t, ps.eg.Wait()) +} + +func Test_PostSupervisor_LogFatalOnInvalidConfig(t *testing.T) { + log := zaptest.NewLogger(t, zaptest.WrapOptions(zap.WithFatalHook(calledFatal(t)))) + + cmdCfg := DefaultTestPostServiceConfig() + cmdCfg.NodeAddress = "http://127.0.0.1:9099" // wrong port + cmdCfg.MaxRetries = 1 // speedup test, will fail on 2nd retry (~ 5s) + postCfg := DefaultPostConfig() + postOpts := DefaultPostSetupOpts() + provingOpts := DefaultPostProvingOpts() + + ctrl := gomock.NewController(t) + mgr := NewMockpostSetupProvider(ctrl) + mgr.EXPECT().PrepareInitializer(postOpts).Return(nil) + mgr.EXPECT().StartSession(gomock.Any()).Return(nil) + + sync := NewMocksyncer(ctrl) + sync.EXPECT().RegisterForATXSynced().DoAndReturn(closedChan) + + ps, err := NewPostSupervisor(log.Named("supervisor"), cmdCfg, postCfg, provingOpts, mgr, sync) + require.NoError(t, err) + require.NotNil(t, ps) + + require.NoError(t, ps.Start(postOpts)) + t.Cleanup(func() { assert.NoError(t, ps.Stop(false)) }) + + require.Eventually(t, func() bool { return (ps.pid.Load() != 0) }, 5*time.Second, 100*time.Millisecond) pid := int(ps.pid.Load()) - process, err = os.FindProcess(pid) + process, err := os.FindProcess(pid) require.NoError(t, err) require.NotNil(t, process) - require.NotEqual(t, oldPid, pid) - require.NoError(t, ps.Stop(false)) + // log asserts that zap.Fatal was called + require.NoError(t, ps.eg.Wait()) } func Test_PostSupervisor_StopOnError(t *testing.T) { diff --git a/go.mod b/go.mod index 485c34d7f4..0d162df076 100644 --- a/go.mod +++ b/go.mod @@ -39,7 +39,7 @@ require ( github.com/spacemeshos/go-scale v1.1.12 github.com/spacemeshos/merkle-tree v0.2.3 github.com/spacemeshos/poet v0.9.7 - github.com/spacemeshos/post v0.10.1 + github.com/spacemeshos/post v0.10.2 github.com/spf13/afero v1.10.0 github.com/spf13/cobra v1.8.0 github.com/spf13/pflag v1.0.5 diff --git a/go.sum b/go.sum index ddf165b562..2b819139f5 100644 --- a/go.sum +++ b/go.sum @@ -649,8 +649,8 @@ github.com/spacemeshos/merkle-tree v0.2.3 h1:zGEgOR9nxAzJr0EWjD39QFngwFEOxfxMloE github.com/spacemeshos/merkle-tree v0.2.3/go.mod h1:VomOcQ5pCBXz7goiWMP5hReyqOfDXGSKbrH2GB9Htww= github.com/spacemeshos/poet v0.9.7 h1:FmKhgUKj//8Tzn8czWSIrn6+FVUFZbvLh8zqLfB8dfE= github.com/spacemeshos/poet v0.9.7/go.mod h1:wGCdhs2jnfQ52Amcmygv9uEEwYpdHAPjbiPg0Uf6cNQ= -github.com/spacemeshos/post v0.10.1 h1:dFWB1xHa4Z+mDqmZjlQxBVn0e5gBs4QS/j50WXfHoac= -github.com/spacemeshos/post v0.10.1/go.mod h1:FPa130ioHforcqca0vqJrYjgUsYzTBwivy6lyLy7sKA= +github.com/spacemeshos/post v0.10.2 h1:FjRbrceno8xjBZi+53a+Xjoy81XormxDBvN0mN+0wxg= +github.com/spacemeshos/post v0.10.2/go.mod h1:lSZOkvCna1UuXgdGrvc+2SPMM+f+IUi9whZpBH7wUbM= github.com/spacemeshos/sha256-simd v0.1.0 h1:G7Mfu5RYdQiuE+wu4ZyJ7I0TI74uqLhFnKblEnSpjYI= github.com/spacemeshos/sha256-simd v0.1.0/go.mod h1:O8CClVIilId7RtuCMV2+YzMj6qjVn75JsxOxaE8vcfM= github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=