golang 对象(struct) hash原理
Posted 惜暮
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了golang 对象(struct) hash原理相关的知识,希望对你有一定的参考价值。
golang 对象hash原理
map里面的key的hash是怎么实现的
源码:src/runtime/map.go
golang的map是内置关键字,不管是get还是set都需要通过key的hash找到对应的存储实体。具体的hash过程如下代码:
type maptype struct
typ _type
key *_type
elem *_type
bucket *_type // internal type representing a hash bucket
keysize uint8 // size of key slot
elemsize uint8 // size of elem slot
bucketsize uint16 // size of bucket
flags uint32
//t *maptype
alg := t.key.alg
hash := alg.hash(key, uintptr(h.hash0))
这里其实就是拿到key对应的类型,然后获取当前key的类型的hash算法。然后调用hash函数。
hash函数的定义在这里:
// typeAlg is also copied/used in reflect/type.go.
// keep them in sync.
type typeAlg struct
// function for hashing objects of this type
// (ptr to object, seed) -> hash
hash func(unsafe.Pointer, uintptr) uintptr
// function for comparing objects of this type
// (ptr to object A, ptr to object B) -> ==?
equal func(unsafe.Pointer, unsafe.Pointer) bool
可以知道,入参是指向key的指针,第二个参数是hash种子。
golang里面的对象的hash原理
golang里面每种数据类型的hash是与数据类型强相关的,并且是由编译器负责做类型绑定的。在src/runtime/alg.go
里面定义个一个map[type]typeAlg,用于表示每个数据类型对应的 typeAlg。
var algarray = [alg_max]typeAlg
alg_NOEQ: nil, nil,
alg_MEM0: memhash0, memequal0,
alg_MEM8: memhash8, memequal8,
alg_MEM16: memhash16, memequal16,
alg_MEM32: memhash32, memequal32,
alg_MEM64: memhash64, memequal64,
alg_MEM128: memhash128, memequal128,
alg_STRING: strhash, strequal,
alg_INTER: interhash, interequal,
alg_NILINTER: nilinterhash, nilinterequal,
alg_FLOAT32: f32hash, f32equal,
alg_FLOAT64: f64hash, f64equal,
alg_CPLX64: c64hash, c64equal,
alg_CPLX128: c128hash, c128equal,
上面的hash函数,最后实际上底层都会调用:func memhash(p unsafe.Pointer, seed, s uintptr) uintptr
函数。以strhash
函数为例:
func strhash(a unsafe.Pointer, h uintptr) uintptr
x := (*stringStruct)(a)
return memhash(x.str, h, uintptr(x.len))
下面看一下memhash
函数, src/runtime/hash64.go
// p表示需要hash的对象的地址
// seed 是hash 种子,
// s是需要hash的对象的字节数
func memhash(p unsafe.Pointer, seed, s uintptr) uintptr
if (GOARCH == "amd64" || GOARCH == "arm64") &&
GOOS != "nacl" && useAeshash
return aeshash(p, seed, s)
h := uint64(seed + s*hashkey[0])
tail:
switch
case s == 0:
case s < 4:
h ^= uint64(*(*byte)(p))
h ^= uint64(*(*byte)(add(p, s>>1))) << 8
h ^= uint64(*(*byte)(add(p, s-1))) << 16
h = rotl_31(h*m1) * m2
case s <= 8:
h ^= uint64(readUnaligned32(p))
h ^= uint64(readUnaligned32(add(p, s-4))) << 32
h = rotl_31(h*m1) * m2
case s <= 16:
h ^= readUnaligned64(p)
h = rotl_31(h*m1) * m2
h ^= readUnaligned64(add(p, s-8))
h = rotl_31(h*m1) * m2
case s <= 32:
h ^= readUnaligned64(p)
h = rotl_31(h*m1) * m2
h ^= readUnaligned64(add(p, 8))
h = rotl_31(h*m1) * m2
h ^= readUnaligned64(add(p, s-16))
h = rotl_31(h*m1) * m2
h ^= readUnaligned64(add(p, s-8))
h = rotl_31(h*m1) * m2
default:
v1 := h
v2 := uint64(seed * hashkey[1])
v3 := uint64(seed * hashkey[2])
v4 := uint64(seed * hashkey[3])
for s >= 32
v1 ^= readUnaligned64(p)
v1 = rotl_31(v1*m1) * m2
p = add(p, 8)
v2 ^= readUnaligned64(p)
v2 = rotl_31(v2*m2) * m3
p = add(p, 8)
v3 ^= readUnaligned64(p)
v3 = rotl_31(v3*m3) * m4
p = add(p, 8)
v4 ^= readUnaligned64(p)
v4 = rotl_31(v4*m4) * m1
p = add(p, 8)
s -= 32
h = v1 ^ v2 ^ v3 ^ v4
goto tail
h ^= h >> 29
h *= m3
h ^= h >> 32
return uintptr(h)
这个就是整体的hash实现。 对于amd64会使用汇编实现的aeshash
函数计算hash。
具体aeshash实现,有兴趣可以看汇编:src/runtime/asm_amd64.s
里面的 runtime·aeshash
函数。
Hash 测试
下面以string作为key,来测试以下三种算法的性能:
- fnv算法
- memhash算法
- aeshash的汇编算法
fnv
测试背景:10000个uuid string;
const LEN = 10000
// the length of element is 36
var keys [LEN][]byte
func init()
for i:=0; i<LEN; i++
k := uuid.New().String()
keys[i] = []byte(k)
// 777000ns
func main()
start := time.Now().UnixNano()
for _, k := range keys
h := fnv.New64()
h.Write(k)
h.Sum64()
end := time.Now().UnixNano()
fmt.Println("total time:", end-start, "ns")
执行结果:10000次hash大约是727000ns,也就是平均每次hash要花费72ns。
memhash
这个是调用golang runtime里面的内嵌hash代码,这部分是我从runtime里面copy出来的。
代码如下:
package main
const LEN = 10000
// the length of element is 36
var keys [LEN]string
func init()
for i:=0; i<LEN; i++
keys[i] = uuid.New().String()
// 178000ns
func main()
start := time.Now().UnixNano()
for _, k := range keys
memhash(unsafe.Pointer(&k), 1, 36)
end := time.Now().UnixNano()
fmt.Println("total time:", end-start, "ns")
// used in hash32,64.go to seed the hash function
var hashkey [4]uintptr
const PtrSize = 4 << (^uintptr(0) >> 63)
func init()
for i:=0; i<4; i++
hashkey[i]= uintptr(rand.Int63())
hashkey[0] |= 1 // make sure these numbers are odd
hashkey[1] |= 1
hashkey[2] |= 1
hashkey[3] |= 1
func add(p unsafe.Pointer, x uintptr) unsafe.Pointer
return unsafe.Pointer(uintptr(p) + x)
func rotl_31(x uint64) uint64
return (x << 31) | (x >> (64 - 31))
const (
// Constants for multiplication: four random odd 64-bit numbers.
m1 = 16877499708836156737
m2 = 2820277070424839065
m3 = 9497967016996688599
m4 = 15839092249703872147
)
const (
BigEndian = false
DefaultPhysPageSize = 4096
PCQuantum = 1
Int64Align = 8
MinFrameSize = 0
)
func readUnaligned64(p unsafe.Pointer) uint64
q := (*[8]byte)(p)
if BigEndian
return uint64(q[7]) | uint64(q[6])<<8 | uint64(q[5])<<16 | uint64(q[4])<<24 |
uint64(q[3])<<32 | uint64(q[2])<<40 | uint64(q[1])<<48 | uint64(q[0])<<56
return uint64(q[0]) | uint64(q[1])<<8 | uint64(q[2])<<16 | uint64(q[3])<<24 | uint64(q[4])<<32 | uint64(q[5])<<40 | uint64(q[6])<<48 | uint64(q[7])<<56
// Note: These routines perform the read with an native endianness.
func readUnaligned32(p unsafe.Pointer) uint32
q := (*[4]byte)(p)
if BigEndian
return uint32(q[3]) | uint32(q[2])<<8 | uint32(q[1])<<16 | uint32(q[0])<<24
return uint32(q[0]) | uint32(q[1])<<8 | uint32(q[2])<<16 | uint32(q[3])<<24
func memhash(p unsafe.Pointer, seed, s uintptr) uintptr
h := uint64(seed + s*hashkey[0])
tail:
switch
case s == 0:
case s < 4:
h ^= uint64(*(*byte)(p))
h ^= uint64(*(*byte)(add(p, s>>1))) << 8
h ^= uint64(*(*byte)(add(p, s-1))) << 16
h = rotl_31(h*m1) * m2
case s <= 8:
h ^= uint64(readUnaligned32(p))
h ^= uint64(readUnaligned32(add(p, s-4))) << 32
h = rotl_31(h*m1) * m2
case s <= 16:
h ^= readUnaligned64(p)
h = rotl_31(h*m1) * m2
h ^= readUnaligned64(add(p, s-8))
h = rotl_31(h*m1) * m2
case s <= 32:
h ^= readUnaligned64(p)
h = rotl_31(h*m1) * m2
h ^= readUnaligned64(add(p, 8))
h = rotl_31(h*m1) * m2
h ^= readUnaligned64(add(p, s-16))
h = rotl_31(h*m1) * m2
h ^= readUnaligned64(add(p, s-8))
h = rotl_31(h*m1) * m2
default:
v1 := h
v2 := uint64(seed * hashkey[1])
v3 := uint64(seed * hashkey[2])
v4 := uint64(seed * hashkey[3])
for s >= 32
v1 ^= readUnaligned64(p)
v1 = rotl_31(v1*m1) * m2
p = add(p, 8)
v2 ^= readUnaligned64(p)
v2 = rotl_31(v2*m2) * m3
p = add(p, 8)
v3 ^= readUnaligned64(p)
v3 = rotl_31(v3*m3) * m4
p = add(p, 8)
v4 ^= readUnaligned64(p)
v4 = rotl_31(v4*m4) * m1
p = add(p, 8)
s -= 32
h = v1 ^ v2 ^ v3 ^ v4
goto tail
h ^= h >> 29
h *= m3
h ^= h >> 32
return uintptr(h)
执行结果:10000次hash大约是178000ns,也就是平均每次hash要花费17.8ns。
aeshash汇编
这部分代码也是从runtime asm代码copy出来做测试:
注意,需要把src/runtime/go_tls.h、funcdata.h、textflag.h 这个三个头文件放在当前目录下。
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "go_tls.h"
#include "funcdata.h"
#include "textflag.h"
//louyuting
// func aeshashstr(p unsafe.Pointer, h uintptr) uintptr
TEXT ·aeshashstr2(SB),NOSPLIT,$0-24
MOVQ p+0(FP), AX // ptr to string struct
MOVQ 8(AX), CX // length of string
MOVQ (AX), AX // string data
LEAQ ret+16(FP), DX
JMP aeshashbody<>(SB)
// AX: data
// CX: length
// DX: address to put return value
TEXT aeshashbody<>(SB),NOSPLIT,$0-0
// Fill an SSE register with our seeds.
MOVQ h+8(FP), X0 // 64 bits of per-table hash seed
PINSRW $4, CX, X0 // 16 bits of length
PSHUFHW $0, X0, X0 // repeat length 4 times total
MOVO X0, X1 // save unscrambled seed
PXOR main·aeskeysched(SB), X0 // xor in per-process seed
AESENC X0, X0 // scramble seed
CMPQ CX, $16
JB aes0to15
JE aes16
CMPQ CX, $32
JBE aes17to32
CMPQ CX, $64
JBE aes33to64
CMPQ CX, $128
JBE aes65to128
JMP aes129plus
aes0to15:
TESTQ CX, CX
JE aes0
ADDQ $16, AX
TESTW $0xff0, AX
JE endofpage
// 16 bytes loaded at this address won't cross
// a page boundary, so we can load it directly.
MOVOU -16(AX), X1
ADDQ CX, CX
MOVQ $masks<>(SB), AX
PAND (AX)(CX*8), X1
final1:
PXOR X0, X1 // xor data with seed
AESENC X1, X1 // scramble combo 3 times
AESENC X1, X1
AESENC X1, X1
MOVQ X1, (DX)
RET
endofpage:
// address ends in 1111xxxx. Might be up against
// a page boundary, so load ending at last byte.
// Then shift bytes down using pshufb.
MOVOU -32(AX)(CX*1), X1
ADDQ CX, CX
MOVQ $shifts<>(SB), AX
PSHUFB (AX)(CX*8), X1
JMP final1
aes0:
// Return scrambled input seed
AESENC X0, X0
MOVQ X0, (DX)
RET
aes16:
MOVOU (AX), X1
JMP final1
aes17to32:
// make second starting seed
PXOR main·aeskeysched+16(SB), X1
AESENC X1, X1
// load data to be hashed
MOVOU (AX), X2
MOVOU -16(AX)(CX*1), X3
// xor with seed
PXOR X0, X2
PXOR X1, X3
// scramble 3 times
AESENC X2, X2
AESENC X3, X3
AESENC X2, X2
AESENC X3, X3
AESENC X2, X2
AESENC X3, X3
// combine results
PXOR X3, X2
MOVQ X2, (DX)
RET
aes33to64:
// make 3 more starting seeds
MOVO X1, X2
MOVO X1, X3
PXOR main·aeskeysched+16(SB), X1
PXOR main·aeskeysched+32(SB), X2
PXOR main·aeskeysched+48(SB), X3
AESENC X1, X1
AESENC X2, X2
AESENC X3, X3
MOVOU (AX), X4
MOVOU 16(AX), X5
MOVOU -32(AX)(CX*1), X6
MOVOU -16(AX)(CX*1), X7
PXOR X0, X4
PXOR X1, X5
PXO以上是关于golang 对象(struct) hash原理的主要内容,如果未能解决你的问题,请参考以下文章