golang 对象(struct) hash原理

Posted 2022-12-05 惜暮

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了golang 对象(struct) hash原理相关的知识，希望对你有一定的参考价值。

golang 对象hash原理

map里面的key的hash是怎么实现的
golang里面的对象的hash原理
Hash 测试

map里面的key的hash是怎么实现的

源码：src/runtime/map.go
golang的map是内置关键字，不管是get还是set都需要通过key的hash找到对应的存储实体。具体的hash过程如下代码：

type maptype struct 
	typ        _type
	key        *_type
	elem       *_type
	bucket     *_type // internal type representing a hash bucket
	keysize    uint8  // size of key slot
	elemsize   uint8  // size of elem slot
	bucketsize uint16 // size of bucket
	flags      uint32


//t *maptype
alg := t.key.alg
hash := alg.hash(key, uintptr(h.hash0))

这里其实就是拿到key对应的类型，然后获取当前key的类型的hash算法。然后调用hash函数。

hash函数的定义在这里：

// typeAlg is also copied/used in reflect/type.go.
// keep them in sync.
type typeAlg struct 
	// function for hashing objects of this type
	// (ptr to object, seed) -> hash
	hash func(unsafe.Pointer, uintptr) uintptr
	// function for comparing objects of this type
	// (ptr to object A, ptr to object B) -> ==?
	equal func(unsafe.Pointer, unsafe.Pointer) bool

可以知道，入参是指向key的指针，第二个参数是hash种子。

golang里面的对象的hash原理

golang里面每种数据类型的hash是与数据类型强相关的，并且是由编译器负责做类型绑定的。在src/runtime/alg.go 里面定义个一个map[type]typeAlg，用于表示每个数据类型对应的 typeAlg。

var algarray = [alg_max]typeAlg
	alg_NOEQ:     nil, nil,
	alg_MEM0:     memhash0, memequal0,
	alg_MEM8:     memhash8, memequal8,
	alg_MEM16:    memhash16, memequal16,
	alg_MEM32:    memhash32, memequal32,
	alg_MEM64:    memhash64, memequal64,
	alg_MEM128:   memhash128, memequal128,
	alg_STRING:   strhash, strequal,
	alg_INTER:    interhash, interequal,
	alg_NILINTER: nilinterhash, nilinterequal,
	alg_FLOAT32:  f32hash, f32equal,
	alg_FLOAT64:  f64hash, f64equal,
	alg_CPLX64:   c64hash, c64equal,
	alg_CPLX128:  c128hash, c128equal,

上面的hash函数，最后实际上底层都会调用：func memhash(p unsafe.Pointer, seed, s uintptr) uintptr 函数。以strhash函数为例：

func strhash(a unsafe.Pointer, h uintptr) uintptr 
	x := (*stringStruct)(a)
	return memhash(x.str, h, uintptr(x.len))

下面看一下memhash函数, src/runtime/hash64.go

// p表示需要hash的对象的地址
// seed 是hash 种子，
// s是需要hash的对象的字节数
func memhash(p unsafe.Pointer, seed, s uintptr) uintptr 
	if (GOARCH == "amd64" || GOARCH == "arm64") &&
		GOOS != "nacl" && useAeshash 
		return aeshash(p, seed, s)
	
	h := uint64(seed + s*hashkey[0])
tail:
	switch 
	case s == 0:
	case s < 4:
		h ^= uint64(*(*byte)(p))
		h ^= uint64(*(*byte)(add(p, s>>1))) << 8
		h ^= uint64(*(*byte)(add(p, s-1))) << 16
		h = rotl_31(h*m1) * m2
	case s <= 8:
		h ^= uint64(readUnaligned32(p))
		h ^= uint64(readUnaligned32(add(p, s-4))) << 32
		h = rotl_31(h*m1) * m2
	case s <= 16:
		h ^= readUnaligned64(p)
		h = rotl_31(h*m1) * m2
		h ^= readUnaligned64(add(p, s-8))
		h = rotl_31(h*m1) * m2
	case s <= 32:
		h ^= readUnaligned64(p)
		h = rotl_31(h*m1) * m2
		h ^= readUnaligned64(add(p, 8))
		h = rotl_31(h*m1) * m2
		h ^= readUnaligned64(add(p, s-16))
		h = rotl_31(h*m1) * m2
		h ^= readUnaligned64(add(p, s-8))
		h = rotl_31(h*m1) * m2
	default:
		v1 := h
		v2 := uint64(seed * hashkey[1])
		v3 := uint64(seed * hashkey[2])
		v4 := uint64(seed * hashkey[3])
		for s >= 32 
			v1 ^= readUnaligned64(p)
			v1 = rotl_31(v1*m1) * m2
			p = add(p, 8)
			v2 ^= readUnaligned64(p)
			v2 = rotl_31(v2*m2) * m3
			p = add(p, 8)
			v3 ^= readUnaligned64(p)
			v3 = rotl_31(v3*m3) * m4
			p = add(p, 8)
			v4 ^= readUnaligned64(p)
			v4 = rotl_31(v4*m4) * m1
			p = add(p, 8)
			s -= 32
		
		h = v1 ^ v2 ^ v3 ^ v4
		goto tail
	

	h ^= h >> 29
	h *= m3
	h ^= h >> 32
	return uintptr(h)

这个就是整体的hash实现。对于amd64会使用汇编实现的aeshash函数计算hash。

具体aeshash实现，有兴趣可以看汇编：src/runtime/asm_amd64.s 里面的 runtime·aeshash 函数。

Hash 测试

下面以string作为key，来测试以下三种算法的性能：

fnv算法
memhash算法
aeshash的汇编算法

fnv

测试背景：10000个uuid string;

const LEN = 10000
// the length of element is 36
var keys [LEN][]byte

func init()
	for i:=0; i<LEN; i++ 
		k := uuid.New().String()
		keys[i] = []byte(k)
	


// 777000ns
func main()
	start := time.Now().UnixNano()
	for _, k := range keys 
		h := fnv.New64()
		h.Write(k)
		h.Sum64()
	
	end := time.Now().UnixNano()
	fmt.Println("total time:", end-start, "ns")

执行结果：10000次hash大约是727000ns，也就是平均每次hash要花费72ns。

memhash

这个是调用golang runtime里面的内嵌hash代码，这部分是我从runtime里面copy出来的。

代码如下：

package main

const LEN = 10000
// the length of element is 36
var keys [LEN]string

func init()
	for i:=0; i<LEN; i++ 
		keys[i] = uuid.New().String()
	

// 178000ns
func main()  
	start := time.Now().UnixNano()
	for _, k := range keys 
		memhash(unsafe.Pointer(&k), 1, 36)
	
	end := time.Now().UnixNano()
	fmt.Println("total time:", end-start, "ns")


// used in hash32,64.go to seed the hash function
var hashkey [4]uintptr
const PtrSize = 4 << (^uintptr(0) >> 63)
func init()  
	for i:=0; i<4; i++ 
		hashkey[i]= uintptr(rand.Int63())
	
	hashkey[0] |= 1 // make sure these numbers are odd
	hashkey[1] |= 1
	hashkey[2] |= 1
	hashkey[3] |= 1

func add(p unsafe.Pointer, x uintptr) unsafe.Pointer 
	return unsafe.Pointer(uintptr(p) + x)

func rotl_31(x uint64) uint64 
	return (x << 31) | (x >> (64 - 31))


const (
	// Constants for multiplication: four random odd 64-bit numbers.
	m1 = 16877499708836156737
	m2 = 2820277070424839065
	m3 = 9497967016996688599
	m4 = 15839092249703872147
)

const (
	BigEndian           = false
	DefaultPhysPageSize = 4096
	PCQuantum           = 1
	Int64Align          = 8
	MinFrameSize        = 0
)

func readUnaligned64(p unsafe.Pointer) uint64 
	q := (*[8]byte)(p)
	if BigEndian 
		return uint64(q[7]) | uint64(q[6])<<8 | uint64(q[5])<<16 | uint64(q[4])<<24 |
			uint64(q[3])<<32 | uint64(q[2])<<40 | uint64(q[1])<<48 | uint64(q[0])<<56
	
	return uint64(q[0]) | uint64(q[1])<<8 | uint64(q[2])<<16 | uint64(q[3])<<24 | uint64(q[4])<<32 | uint64(q[5])<<40 | uint64(q[6])<<48 | uint64(q[7])<<56


// Note: These routines perform the read with an native endianness.
func readUnaligned32(p unsafe.Pointer) uint32 
	q := (*[4]byte)(p)
	if BigEndian 
		return uint32(q[3]) | uint32(q[2])<<8 | uint32(q[1])<<16 | uint32(q[0])<<24
	
	return uint32(q[0]) | uint32(q[1])<<8 | uint32(q[2])<<16 | uint32(q[3])<<24


func memhash(p unsafe.Pointer, seed, s uintptr) uintptr 
	h := uint64(seed + s*hashkey[0])
tail:
	switch 
	case s == 0:
	case s < 4:
		h ^= uint64(*(*byte)(p))
		h ^= uint64(*(*byte)(add(p, s>>1))) << 8
		h ^= uint64(*(*byte)(add(p, s-1))) << 16
		h = rotl_31(h*m1) * m2
	case s <= 8:
		h ^= uint64(readUnaligned32(p))
		h ^= uint64(readUnaligned32(add(p, s-4))) << 32
		h = rotl_31(h*m1) * m2
	case s <= 16:
		h ^= readUnaligned64(p)
		h = rotl_31(h*m1) * m2
		h ^= readUnaligned64(add(p, s-8))
		h = rotl_31(h*m1) * m2
	case s <= 32:
		h ^= readUnaligned64(p)
		h = rotl_31(h*m1) * m2
		h ^= readUnaligned64(add(p, 8))
		h = rotl_31(h*m1) * m2
		h ^= readUnaligned64(add(p, s-16))
		h = rotl_31(h*m1) * m2
		h ^= readUnaligned64(add(p, s-8))
		h = rotl_31(h*m1) * m2
	default:
		v1 := h
		v2 := uint64(seed * hashkey[1])
		v3 := uint64(seed * hashkey[2])
		v4 := uint64(seed * hashkey[3])
		for s >= 32 
			v1 ^= readUnaligned64(p)
			v1 = rotl_31(v1*m1) * m2
			p = add(p, 8)
			v2 ^= readUnaligned64(p)
			v2 = rotl_31(v2*m2) * m3
			p = add(p, 8)
			v3 ^= readUnaligned64(p)
			v3 = rotl_31(v3*m3) * m4
			p = add(p, 8)
			v4 ^= readUnaligned64(p)
			v4 = rotl_31(v4*m4) * m1
			p = add(p, 8)
			s -= 32
		
		h = v1 ^ v2 ^ v3 ^ v4
		goto tail
	

	h ^= h >> 29
	h *= m3
	h ^= h >> 32
	return uintptr(h)

执行结果：10000次hash大约是178000ns，也就是平均每次hash要花费17.8ns。

aeshash汇编

这部分代码也是从runtime asm代码copy出来做测试：
注意，需要把src/runtime/go_tls.h、funcdata.h、textflag.h 这个三个头文件放在当前目录下。

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#include "go_asm.h"
#include "go_tls.h"
#include "funcdata.h"
#include "textflag.h"

//louyuting
// func aeshashstr(p unsafe.Pointer, h uintptr) uintptr
TEXT ·aeshashstr2(SB),NOSPLIT,$0-24
	MOVQ	p+0(FP), AX	// ptr to string struct
	MOVQ	8(AX), CX	// length of string
	MOVQ	(AX), AX	// string data
	LEAQ	ret+16(FP), DX
	JMP	aeshashbody<>(SB)

// AX: data
// CX: length
// DX: address to put return value
TEXT aeshashbody<>(SB),NOSPLIT,$0-0
	// Fill an SSE register with our seeds.
	MOVQ	h+8(FP), X0			// 64 bits of per-table hash seed
	PINSRW	$4, CX, X0			// 16 bits of length
	PSHUFHW $0, X0, X0			// repeat length 4 times total
	MOVO	X0, X1				// save unscrambled seed
	PXOR	main·aeskeysched(SB), X0	// xor in per-process seed
	AESENC	X0, X0				// scramble seed

	CMPQ	CX, $16
	JB	aes0to15
	JE	aes16
	CMPQ	CX, $32
	JBE	aes17to32
	CMPQ	CX, $64
	JBE	aes33to64
	CMPQ	CX, $128
	JBE	aes65to128
	JMP	aes129plus

aes0to15:
	TESTQ	CX, CX
	JE	aes0

	ADDQ	$16, AX
	TESTW	$0xff0, AX
	JE	endofpage

	// 16 bytes loaded at this address won't cross
	// a page boundary, so we can load it directly.
	MOVOU	-16(AX), X1
	ADDQ	CX, CX
	MOVQ	$masks<>(SB), AX
	PAND	(AX)(CX*8), X1
final1:
	PXOR	X0, X1	// xor data with seed
	AESENC	X1, X1	// scramble combo 3 times
	AESENC	X1, X1
	AESENC	X1, X1
	MOVQ	X1, (DX)
	RET

endofpage:
	// address ends in 1111xxxx. Might be up against
	// a page boundary, so load ending at last byte.
	// Then shift bytes down using pshufb.
	MOVOU	-32(AX)(CX*1), X1
	ADDQ	CX, CX
	MOVQ	$shifts<>(SB), AX
	PSHUFB	(AX)(CX*8), X1
	JMP	final1

aes0:
	// Return scrambled input seed
	AESENC	X0, X0
	MOVQ	X0, (DX)
	RET

aes16:
	MOVOU	(AX), X1
	JMP	final1

aes17to32:
	// make second starting seed
	PXOR	main·aeskeysched+16(SB), X1
	AESENC	X1, X1

	// load data to be hashed
	MOVOU	(AX), X2
	MOVOU	-16(AX)(CX*1), X3

	// xor with seed
	PXOR	X0, X2
	PXOR	X1, X3

	// scramble 3 times
	AESENC	X2, X2
	AESENC	X3, X3
	AESENC	X2, X2
	AESENC	X3, X3
	AESENC	X2, X2
	AESENC	X3, X3

	// combine results
	PXOR	X3, X2
	MOVQ	X2, (DX)
	RET

aes33to64:
	// make 3 more starting seeds
	MOVO	X1, X2
	MOVO	X1, X3
	PXOR	main·aeskeysched+16(SB), X1
	PXOR	main·aeskeysched+32(SB), X2
	PXOR	main·aeskeysched+48(SB), X3
	AESENC	X1, X1
	AESENC	X2, X2
	AESENC	X3, X3

	MOVOU	(AX), X4
	MOVOU	16(AX), X5
	MOVOU	-32(AX)(CX*1), X6
	MOVOU	-16(AX)(CX*1), X7

	PXOR	X0, X4
	PXOR	X1, X5
	PXO以上是关于golang 对象(struct) hash原理的主要内容，如果未能解决你的问题，请参考以下文章