Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 37 additions & 4 deletions .github/workflows/go.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,11 @@ permissions:

jobs:
build:
runs-on: ubuntu-latest
timeout-minutes: 5
runs-on: ${{ matrix.os }}
timeout-minutes: 15
strategy:
matrix:
os: [ubuntu-latest, macos-latest]
steps:
- uses: actions/checkout@v5

Expand All @@ -25,12 +28,42 @@ jobs:
run: go build -v ./...

- name: Goroutine leak detector
if: matrix.os == 'ubuntu-latest'
continue-on-error: true
run: go test -c -o tests && for test in $(go test -list . | grep -E "^(Test|Example)"); do ./tests -test.run "^$test\$" &>/dev/null && echo -e "$test passed\n" || echo -e "$test failed\n"; done

- name: Test
- name: Test (Full Suite)
if: matrix.os == 'ubuntu-latest'
run: go test -race -v ./...

- name: Test (spooledtempfile only)
if: matrix.os == 'macos-latest'
run: go test -race -v ./pkg/spooledtempfile/...

- name: Benchmarks
if: matrix.os == 'ubuntu-latest'
run: go test -bench=. -benchmem -run=^$ ./...


# Platform-specific test verification
- name: Test Linux-specific memory implementation
if: matrix.os == 'ubuntu-latest'
run: |
echo "Running Linux-specific memory tests..."
cd pkg/spooledtempfile
go test -v -run "TestCgroup|TestHostMeminfo|TestRead"

- name: Test macOS-specific memory implementation
if: matrix.os == 'macos-latest'
run: |
echo "Running macOS-specific memory tests..."
cd pkg/spooledtempfile
go test -v -run "TestGetSystemMemoryUsedFraction|TestSysctlMemoryValues|TestMemoryFractionConsistency"

# Cross-compilation verification
- name: Cross-compile for macOS (from Linux)
if: matrix.os == 'ubuntu-latest'
run: GOOS=darwin GOARCH=amd64 go build ./...

- name: Cross-compile for Linux (from macOS)
if: matrix.os == 'macos-latest'
run: GOOS=linux GOARCH=amd64 go build ./...
89 changes: 76 additions & 13 deletions client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"crypto/x509"
"crypto/x509/pkix"
"errors"
"fmt"
"io"
"math/big"
"net"
Expand All @@ -16,6 +17,7 @@ import (
"os"
"path"
"path/filepath"
"strconv"
"strings"
"sync"
"testing"
Expand Down Expand Up @@ -63,6 +65,46 @@ func defaultBenchmarkRotatorSettings(t *testing.B) *RotatorSettings {
return rotatorSettings
}

// sumRecordContentLengths returns the total Content-Length across all records in a WARC file.
func sumRecordContentLengths(path string) (int64, error) {
file, err := os.Open(path)
if err != nil {
return 0, err
}
defer file.Close()

reader, err := NewReader(file)
if err != nil {
return 0, err
}

var total int64
for {
record, err := reader.ReadRecord()
if err != nil {
if err == io.EOF {
break
}
return 0, err
}

clStr := record.Header.Get("Content-Length")
cl, err := strconv.ParseInt(clStr, 10, 64)
if err != nil {
record.Content.Close()
return 0, fmt.Errorf("parsing Content-Length %q: %w", clStr, err)
}

total += cl

if err := record.Content.Close(); err != nil {
return 0, err
}
}

return total, nil
}

// Helper function used in many tests
func drainErrChan(t *testing.T, errChan chan *Error) func() {
var wg sync.WaitGroup
Expand Down Expand Up @@ -153,21 +195,27 @@ func TestHTTPClient(t *testing.T) {
t.Fatal(err)
}

var expectedPayloadBytes int64
for _, path := range files {
testFileSingleHashCheck(t, path, "sha1:UIRWL5DFIPQ4MX3D3GFHM2HCVU3TZ6I3", []string{"26872"}, 1, server.URL+"/testdata/image.svg")

totalBytes, err := sumRecordContentLengths(path)
if err != nil {
t.Fatalf("failed to sum record content lengths for %s: %v", path, err)
}
expectedPayloadBytes += totalBytes
}

// verify that the remote dedupe count is correct
dataTotal := httpClient.DataTotal.Load()
if dataTotal < 27130 || dataTotal > 27160 {
t.Fatalf("total bytes downloaded mismatch, expected: 27130-27160 got: %d", dataTotal)
if dataTotal != expectedPayloadBytes {
t.Fatalf("total bytes downloaded mismatch, expected %d got %d", expectedPayloadBytes, dataTotal)
}
}

func TestHTTPClientRequestFailing(t *testing.T) {
var (
rotatorSettings = defaultRotatorSettings(t)
errWg sync.WaitGroup
err error
)

Expand All @@ -180,11 +228,14 @@ func TestHTTPClientRequestFailing(t *testing.T) {
if err != nil {
t.Fatalf("Unable to init WARC writing HTTP client: %s", err)
}
errWg.Add(1)

errCh := make(chan *Error, 1)
var errChWg sync.WaitGroup
errChWg.Add(1)
go func() {
defer errWg.Done()
for _ = range httpClient.ErrChan {
// We expect an error here, so we don't need to log it
defer errChWg.Done()
for err := range httpClient.ErrChan {
errCh <- err
}
}()

Expand All @@ -199,10 +250,21 @@ func TestHTTPClientRequestFailing(t *testing.T) {

_, err = httpClient.Do(req)
if err == nil {
t.Fatal("expected error on Do, got none")
select {
case recv := <-errCh:
if recv == nil {
t.Fatal("expected error via ErrChan but channel closed without value")
}
case <-time.After(2 * time.Second):
t.Fatal("expected error on Do or via ErrChan, got none")
}
} else {
t.Logf("got expected error: %v", err)
}

httpClient.Close()
errChWg.Wait()
close(errCh)
}

func TestHTTPClientConnReadDeadline(t *testing.T) {
Expand Down Expand Up @@ -594,15 +656,15 @@ func TestHTTPClientWithProxy(t *testing.T) {

// init socks5 proxy server
proxyServer := socks5.NewServer()
listener, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatalf("failed to listen for proxy: %v", err)
}

// Create a channel to signal server stop
stopChan := make(chan struct{})

go func() {
listener, err := net.Listen("tcp", "127.0.0.1:8000")
if err != nil {
panic(err)
}
defer listener.Close()

go func() {
Expand All @@ -615,6 +677,7 @@ func TestHTTPClientWithProxy(t *testing.T) {
}
}()

proxyAddr := listener.Addr().String()
// Defer sending the stop signal
defer close(stopChan)

Expand All @@ -625,7 +688,7 @@ func TestHTTPClientWithProxy(t *testing.T) {
// init the HTTP client responsible for recording HTTP(s) requests / responses
httpClient, err := NewWARCWritingHTTPClient(HTTPClientSettings{
RotatorSettings: rotatorSettings,
Proxy: "socks5://127.0.0.1:8000"})
Proxy: fmt.Sprintf("socks5://%s", proxyAddr)})
if err != nil {
t.Fatalf("Unable to init WARC writing HTTP client: %s", err)
}
Expand Down
37 changes: 20 additions & 17 deletions cmd/warc/mend/mend_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,25 @@ import (
"io"
"os"
"path/filepath"
"runtime"
"testing"

"github.com/internetarchive/gowarc/cmd/warc/verify"
"github.com/spf13/cobra"
)

// getTestdataDir returns the path to the testdata directory, resolved relative to this test file.
// This ensures tests work regardless of the working directory (e.g., from root, CI/CD, etc.).
// Test file is at: cmd/warc/mend/mend_test.go, testdata is at: testdata/warcs
// So we need to go up 3 levels from the test file.
func getTestdataDir() string {
_, filename, _, _ := runtime.Caller(1)
return filepath.Join(filepath.Dir(filename), "../../../testdata/warcs")
}

// TestAnalyzeWARCFile tests the analysis of different WARC files
func TestAnalyzeWARCFile(t *testing.T) {
testdataDir := "../../testdata/warcs"
testdataDir := getTestdataDir()

tests := []struct {
name string
Expand Down Expand Up @@ -128,7 +138,7 @@ func TestAnalyzeWARCFile(t *testing.T) {

// TestMendResultValidation tests that mendResult structs are properly populated
func TestMendResultValidation(t *testing.T) {
testdataDir := "../../testdata/warcs"
testdataDir := getTestdataDir()

// Test a file that should have all fields populated
filePath := filepath.Join(testdataDir, "corrupted-trailing-bytes.warc.gz.open")
Expand Down Expand Up @@ -183,7 +193,7 @@ func TestMendResultValidation(t *testing.T) {

// TestAnalyzeWARCFileForceMode tests analyzeWARCFile with force=true on good closed WARC files
func TestAnalyzeWARCFileForceMode(t *testing.T) {
testdataDir := "../../testdata/warcs"
testdataDir := getTestdataDir()

tests := []struct {
name string
Expand Down Expand Up @@ -255,7 +265,7 @@ func TestAnalyzeWARCFileForceMode(t *testing.T) {

// TestSkipNonOpenFiles tests that non-.open files are correctly skipped
func TestSkipNonOpenFiles(t *testing.T) {
testdataDir := "../../testdata/warcs"
testdataDir := getTestdataDir()
filePath := filepath.Join(testdataDir, "skip-non-open.warc.gz")

// Check if test file exists
Expand Down Expand Up @@ -305,7 +315,7 @@ var mendExpectedResults = map[string]expectedResult{
recordCount: 1, // Actual count from mend operation
truncateAt: 0, // No truncation needed
description: "good synthetic file with .open suffix",
shouldBeValid: false, // File has WARC header corruption that mend can't fix
shouldBeValid: true, // After removing the .open suffix the WARC remains valid
},
"empty.warc.gz.open": {
outputFile: "empty.warc.gz",
Expand All @@ -321,15 +331,15 @@ var mendExpectedResults = map[string]expectedResult{
recordCount: 1, // Actual count from mend operation
truncateAt: 2362, // Truncates trailing garbage
description: "synthetic file with trailing garbage bytes",
shouldBeValid: false, // File has WARC header corruption that mend can't fix
shouldBeValid: true, // Truncating the trailing garbage yields a valid WARC record
},
"corrupted-mid-record.warc.gz.open": {
outputFile: "corrupted-mid-record.warc.gz",
sha256: "7c7f896ce58404c841a652500efefbba5f4d92ccc6f9161b0b60aa816f542a7c",
recordCount: 1, // Actual count from mend operation
truncateAt: 1219,
description: "synthetic file corrupted mid-record",
shouldBeValid: false, // File has WARC header corruption that mend can't fix
shouldBeValid: true, // Truncating back to the last valid position restores a valid record
},
}

Expand Down Expand Up @@ -359,14 +369,7 @@ func createMockCobraCommand() *cobra.Command {
// TestMendFunctionDirect verifies that the mend function produces
// expected results on synthetic test data by comparing against pre-computed checksums
func TestMendFunctionDirect(t *testing.T) {
// Get current directory and construct paths relative to workspace root
cwd, err := os.Getwd()
if err != nil {
t.Fatalf("failed to get current directory: %v", err)
}
// From cmd/mend, go up to workspace root
workspaceRoot := filepath.Join(cwd, "../..")
testdataDir := filepath.Join(workspaceRoot, "testdata/warcs")
testdataDir := getTestdataDir()
outputDir := filepath.Join(testdataDir, "mend_test_output")

// Ensure output directory exists
Expand Down Expand Up @@ -505,7 +508,7 @@ func copyFile(src, dst string) error {

// TestIsGzipFile tests the gzip file detection function
func TestIsGzipFile(t *testing.T) {
testdataDir := "../../testdata/warcs"
testdataDir := getTestdataDir()

tests := []struct {
name string
Expand Down Expand Up @@ -643,7 +646,7 @@ func TestConfirmAction(t *testing.T) {

// TestMendDryRun tests the mend function in dry-run mode
func TestMendDryRun(t *testing.T) {
testdataDir := "../../testdata/warcs"
testdataDir := getTestdataDir()
tempDir, err := os.MkdirTemp("", "mend_dry_run_test_*")
if err != nil {
t.Fatalf("failed to create temp dir: %v", err)
Expand Down
Loading
Loading