Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions bits.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// This is a port of the bits.go file from the dolthub/swiss repository.
// The original source code is licensed under the Apache License, Version 2.0.
// The original source code can be found at:
// https://github.com/dolthub/swiss

//go:build !amd64 || nosimd

package lua

import (
"math/bits"
"unsafe"
)

const (
groupSize = 8
maxAvgGroupLoad = 7

loBits uint64 = 0x0101010101010101
hiBits uint64 = 0x8080808080808080
)

type bitset uint64

func metaMatchH2(m *metadata, h h2) bitset {
// https://graphics.stanford.edu/~seander/bithacks.html##ValueInWord
return hasZeroByte(castUint64(m) ^ (loBits * uint64(h)))
}

func metaMatchEmpty(m *metadata) bitset {
return hasZeroByte(castUint64(m) ^ hiBits)
}

func nextMatch(b *bitset) uint32 {
s := uint32(bits.TrailingZeros64(uint64(*b)))
*b &= ^(1 << s) // clear bit |s|
return s >> 3 // div by 8
}

func hasZeroByte(x uint64) bitset {
return bitset(((x - loBits) & ^(x)) & hiBits)
}

func castUint64(m *metadata) uint64 {
return *(*uint64)((unsafe.Pointer)(m))
}
38 changes: 38 additions & 0 deletions bits_amd64.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions bits_amd64.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// Code generated by command: go run asm.go -out match.s -stubs match_amd64.go. DO NOT EDIT.

//go:build amd64

#include "textflag.h"

// func matchMetadata(metadata *[16]int8, hash int8) uint16
// Requires: SSE2, SSSE3
TEXT ·matchMetadata(SB), NOSPLIT, $0-18
MOVQ metadata+0(FP), AX
MOVBLSX hash+8(FP), CX
MOVD CX, X0
PXOR X1, X1
PSHUFB X1, X0
MOVOU (AX), X1
PCMPEQB X1, X0
PMOVMSKB X0, AX
MOVW AX, ret+16(FP)
RET
Loading