Просмотр исходного кода

Merge pull request #768 from rod-hynes/meek-payload-padding

Add meek payload padding
Rod Hynes 2 недель назад
Родитель
Сommit
0193b4bc53
44 измененных файлов с 5718 добавлено и 106 удалено
  1. 2 0
      go.mod
  2. 4 0
      go.sum
  3. 19 0
      psiphon/common/parameters/parameters.go
  4. 383 0
      psiphon/common/protocol/meek.go
  5. 423 0
      psiphon/common/protocol/meek_test.go
  6. 13 0
      psiphon/common/protocol/protocol.go
  7. 63 3
      psiphon/config.go
  8. 32 0
      psiphon/dialParameters.go
  9. 230 32
      psiphon/meekConn.go
  10. 3 1
      psiphon/notice.go
  11. 194 27
      psiphon/server/meek.go
  12. 76 3
      psiphon/server/meek_test.go
  13. 13 3
      psiphon/server/pb/psiphond/dial_params.pb.go
  14. 1 0
      psiphon/server/proto/ca.psiphon.psiphond/dial_params.proto
  15. 67 36
      psiphon/server/server_test.go
  16. 4 1
      psiphon/serverApi.go
  17. 24 0
      vendor/github.com/klauspost/cpuid/v2/.gitignore
  18. 74 0
      vendor/github.com/klauspost/cpuid/v2/.goreleaser.yml
  19. 35 0
      vendor/github.com/klauspost/cpuid/v2/CONTRIBUTING.txt
  20. 22 0
      vendor/github.com/klauspost/cpuid/v2/LICENSE
  21. 499 0
      vendor/github.com/klauspost/cpuid/v2/README.md
  22. 1558 0
      vendor/github.com/klauspost/cpuid/v2/cpuid.go
  23. 47 0
      vendor/github.com/klauspost/cpuid/v2/cpuid_386.s
  24. 72 0
      vendor/github.com/klauspost/cpuid/v2/cpuid_amd64.s
  25. 36 0
      vendor/github.com/klauspost/cpuid/v2/cpuid_arm64.s
  26. 248 0
      vendor/github.com/klauspost/cpuid/v2/detect_arm64.go
  27. 17 0
      vendor/github.com/klauspost/cpuid/v2/detect_ref.go
  28. 41 0
      vendor/github.com/klauspost/cpuid/v2/detect_x86.go
  29. 291 0
      vendor/github.com/klauspost/cpuid/v2/featureid_string.go
  30. 121 0
      vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go
  31. 130 0
      vendor/github.com/klauspost/cpuid/v2/os_linux_arm64.go
  32. 16 0
      vendor/github.com/klauspost/cpuid/v2/os_other_arm64.go
  33. 8 0
      vendor/github.com/klauspost/cpuid/v2/os_safe_linux_arm64.go
  34. 11 0
      vendor/github.com/klauspost/cpuid/v2/os_unsafe_linux_arm64.go
  35. 15 0
      vendor/github.com/klauspost/cpuid/v2/test-architectures.sh
  36. 202 0
      vendor/github.com/minio/crc64nvme/LICENSE
  37. 20 0
      vendor/github.com/minio/crc64nvme/README.md
  38. 185 0
      vendor/github.com/minio/crc64nvme/crc64.go
  39. 17 0
      vendor/github.com/minio/crc64nvme/crc64_amd64.go
  40. 309 0
      vendor/github.com/minio/crc64nvme/crc64_amd64.s
  41. 17 0
      vendor/github.com/minio/crc64nvme/crc64_arm64.go
  42. 157 0
      vendor/github.com/minio/crc64nvme/crc64_arm64.s
  43. 13 0
      vendor/github.com/minio/crc64nvme/crc64_other.go
  44. 6 0
      vendor/modules.txt

+ 2 - 0
go.mod

@@ -128,10 +128,12 @@ require (
 	github.com/kamstrup/intmap v0.5.2 // indirect
 	github.com/kamstrup/intmap v0.5.2 // indirect
 	github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0 // indirect
 	github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0 // indirect
 	github.com/klauspost/compress v1.18.0 // indirect
 	github.com/klauspost/compress v1.18.0 // indirect
+	github.com/klauspost/cpuid/v2 v2.2.9 // indirect
 	github.com/libp2p/go-reuseport v0.4.0 // indirect
 	github.com/libp2p/go-reuseport v0.4.0 // indirect
 	github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
 	github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
 	github.com/mdlayher/netlink v1.7.2 // indirect
 	github.com/mdlayher/netlink v1.7.2 // indirect
 	github.com/mdlayher/socket v0.5.0 // indirect
 	github.com/mdlayher/socket v0.5.0 // indirect
+	github.com/minio/crc64nvme v1.1.1 // indirect
 	github.com/mroth/weightedrand v1.0.0 // indirect
 	github.com/mroth/weightedrand v1.0.0 // indirect
 	github.com/onsi/ginkgo/v2 v2.12.0 // indirect
 	github.com/onsi/ginkgo/v2 v2.12.0 // indirect
 	github.com/pelletier/go-toml v1.9.5 // indirect
 	github.com/pelletier/go-toml v1.9.5 // indirect

+ 4 - 0
go.sum

@@ -144,6 +144,8 @@ github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0 h1:iQTw/8FWTuc7uia
 github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0/go.mod h1:1NbS8ALrpOvjt0rHPNLyCIeMtbizbir8U//inJ+zuB8=
 github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0/go.mod h1:1NbS8ALrpOvjt0rHPNLyCIeMtbizbir8U//inJ+zuB8=
 github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
 github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
 github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
 github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
+github.com/klauspost/cpuid/v2 v2.2.9 h1:66ze0taIn2H33fBvCkXuv9BmCwDfafmiIVpKV9kKGuY=
+github.com/klauspost/cpuid/v2 v2.2.9/go.mod h1:rqkxqrZ1EhYM9G+hXH7YdowN5R5RGN6NK4QwQ3WMXF8=
 github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
 github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
 github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
 github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
 github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
 github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
@@ -167,6 +169,8 @@ github.com/mdlayher/socket v0.5.0 h1:ilICZmJcQz70vrWVes1MFera4jGiWNocSkykwwoy3XI
 github.com/mdlayher/socket v0.5.0/go.mod h1:WkcBFfvyG8QENs5+hfQPl1X6Jpd2yeLIYgrGFmJiJxI=
 github.com/mdlayher/socket v0.5.0/go.mod h1:WkcBFfvyG8QENs5+hfQPl1X6Jpd2yeLIYgrGFmJiJxI=
 github.com/miekg/dns v1.1.56 h1:5imZaSeoRNvpM9SzWNhEcP9QliKiz20/dA2QabIGVnE=
 github.com/miekg/dns v1.1.56 h1:5imZaSeoRNvpM9SzWNhEcP9QliKiz20/dA2QabIGVnE=
 github.com/miekg/dns v1.1.56/go.mod h1:cRm6Oo2C8TY9ZS/TqsSrseAcncm74lfK5G+ikN2SWWY=
 github.com/miekg/dns v1.1.56/go.mod h1:cRm6Oo2C8TY9ZS/TqsSrseAcncm74lfK5G+ikN2SWWY=
+github.com/minio/crc64nvme v1.1.1 h1:8dwx/Pz49suywbO+auHCBpCtlW1OfpcLN7wYgVR6wAI=
+github.com/minio/crc64nvme v1.1.1/go.mod h1:eVfm2fAzLlxMdUGc0EEBGSMmPwmXD5XiNRpnu9J3bvg=
 github.com/mitchellh/panicwrap v0.0.0-20170106182340-fce601fe5557 h1:w1QuuAA2km2Hax+EPamrq5ZRBeaNv2vsjvgB4an0zoU=
 github.com/mitchellh/panicwrap v0.0.0-20170106182340-fce601fe5557 h1:w1QuuAA2km2Hax+EPamrq5ZRBeaNv2vsjvgB4an0zoU=
 github.com/mitchellh/panicwrap v0.0.0-20170106182340-fce601fe5557/go.mod h1:QuAqW7/z+iv6aWFJdrA8kCbsF0OOJVKCICqTcYBexuY=
 github.com/mitchellh/panicwrap v0.0.0-20170106182340-fce601fe5557/go.mod h1:QuAqW7/z+iv6aWFJdrA8kCbsF0OOJVKCICqTcYBexuY=
 github.com/mroth/weightedrand v1.0.0 h1:V8JeHChvl2MP1sAoXq4brElOcza+jxLkRuwvtQu8L3E=
 github.com/mroth/weightedrand v1.0.0 h1:V8JeHChvl2MP1sAoXq4brElOcza+jxLkRuwvtQu8L3E=

+ 19 - 0
psiphon/common/parameters/parameters.go

@@ -251,6 +251,7 @@ const (
 	ReplayTLSFragmentClientHello                       = "ReplayTLSFragmentClientHello"
 	ReplayTLSFragmentClientHello                       = "ReplayTLSFragmentClientHello"
 	ReplayInproxyWebRTC                                = "ReplayInproxyWebRTC"
 	ReplayInproxyWebRTC                                = "ReplayInproxyWebRTC"
 	ReplayInproxySTUN                                  = "ReplayInproxySTUN"
 	ReplayInproxySTUN                                  = "ReplayInproxySTUN"
+	ReplayMeekPayloadPadding                           = "ReplayMeekPayloadPadding"
 	APIRequestUpstreamPaddingMinBytes                  = "APIRequestUpstreamPaddingMinBytes"
 	APIRequestUpstreamPaddingMinBytes                  = "APIRequestUpstreamPaddingMinBytes"
 	APIRequestUpstreamPaddingMaxBytes                  = "APIRequestUpstreamPaddingMaxBytes"
 	APIRequestUpstreamPaddingMaxBytes                  = "APIRequestUpstreamPaddingMaxBytes"
 	APIRequestDownstreamPaddingMinBytes                = "APIRequestDownstreamPaddingMinBytes"
 	APIRequestDownstreamPaddingMinBytes                = "APIRequestDownstreamPaddingMinBytes"
@@ -562,6 +563,14 @@ const (
 	DSLPrioritizeDialPlaceholderTTL                    = "DSLPrioritizeDialPlaceholderTTL"
 	DSLPrioritizeDialPlaceholderTTL                    = "DSLPrioritizeDialPlaceholderTTL"
 	ServerEntryIteratorMaxMoveToFront                  = "ServerEntryIteratorMaxMoveToFront"
 	ServerEntryIteratorMaxMoveToFront                  = "ServerEntryIteratorMaxMoveToFront"
 	ServerEntryIteratorResetProbability                = "ServerEntryIteratorResetProbability"
 	ServerEntryIteratorResetProbability                = "ServerEntryIteratorResetProbability"
+	MeekPayloadPaddingProbability                      = "MeekPayloadPaddingProbability"
+	MeekPayloadPaddingLimitTunnelProtocols             = "MeekPayloadPaddingLimitTunnelProtocols"
+	MeekPayloadPaddingClientOmitProbability            = "MeekPayloadPaddingClientOmitProbability"
+	MeekPayloadPaddingClientMinSize                    = "MeekPayloadPaddingClientMinSize"
+	MeekPayloadPaddingClientMaxSize                    = "MeekPayloadPaddingClientMaxSize"
+	MeekPayloadPaddingServerOmitProbability            = "MeekPayloadPaddingServerOmitProbability"
+	MeekPayloadPaddingServerMinSize                    = "MeekPayloadPaddingServerMinSize"
+	MeekPayloadPaddingServerMaxSize                    = "MeekPayloadPaddingServerMaxSize"
 
 
 	// Retired parameters
 	// Retired parameters
 
 
@@ -838,6 +847,7 @@ var defaultParameters = map[string]struct {
 	ReplayTLSFragmentClientHello:         {value: true},
 	ReplayTLSFragmentClientHello:         {value: true},
 	ReplayInproxyWebRTC:                  {value: true},
 	ReplayInproxyWebRTC:                  {value: true},
 	ReplayInproxySTUN:                    {value: true},
 	ReplayInproxySTUN:                    {value: true},
+	ReplayMeekPayloadPadding:             {value: true},
 
 
 	APIRequestUpstreamPaddingMinBytes:   {value: 0, minimum: 0},
 	APIRequestUpstreamPaddingMinBytes:   {value: 0, minimum: 0},
 	APIRequestUpstreamPaddingMaxBytes:   {value: 1024, minimum: 0},
 	APIRequestUpstreamPaddingMaxBytes:   {value: 1024, minimum: 0},
@@ -1203,6 +1213,15 @@ var defaultParameters = map[string]struct {
 
 
 	ServerEntryIteratorMaxMoveToFront:   {value: -1, minimum: -1},
 	ServerEntryIteratorMaxMoveToFront:   {value: -1, minimum: -1},
 	ServerEntryIteratorResetProbability: {value: 1.0, minimum: 0.0},
 	ServerEntryIteratorResetProbability: {value: 1.0, minimum: 0.0},
+
+	MeekPayloadPaddingProbability:           {value: 0.0, minimum: 0.0},
+	MeekPayloadPaddingLimitTunnelProtocols:  {value: protocol.TunnelProtocols{}},
+	MeekPayloadPaddingClientOmitProbability: {value: 0.0, minimum: 0.0},
+	MeekPayloadPaddingClientMinSize:         {value: 0, minimum: 0},
+	MeekPayloadPaddingClientMaxSize:         {value: 65533, minimum: 0},
+	MeekPayloadPaddingServerOmitProbability: {value: 0.0, minimum: 0.0, flags: serverSideOnly},
+	MeekPayloadPaddingServerMinSize:         {value: 0, minimum: 0, flags: serverSideOnly},
+	MeekPayloadPaddingServerMaxSize:         {value: 65533, minimum: 0, flags: serverSideOnly},
 }
 }
 
 
 // IsServerSideOnly indicates if the parameter specified by name is used
 // IsServerSideOnly indicates if the parameter specified by name is used

+ 383 - 0
psiphon/common/protocol/meek.go

@@ -0,0 +1,383 @@
+/*
+ * Copyright (c) 2025, Psiphon Inc.
+ * All rights reserved.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+package protocol
+
+import (
+	"bytes"
+	"crypto/aes"
+	"crypto/cipher"
+	"crypto/sha256"
+	"encoding/binary"
+	std_errors "errors"
+	"io"
+
+	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
+	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/prng"
+	"golang.org/x/crypto/hkdf"
+)
+
+const (
+	meekPayloadPaddingPrefixNoPadding = 0
+	meekPayloadPaddingPrefixPadding   = 1
+
+	meekPayloadPaddingDirectionRequests  = "meek-payload-padding-requests"
+	meekPayloadPaddingDirectionResponses = "meek-payload-padding-responses"
+
+	meekPayloadPaddingReceiverConsumeStatePrefix    = 0
+	meekPayloadPaddingReceiverConsumeStateSizeByte1 = 1
+	meekPayloadPaddingReceiverConsumeStateSizeByte2 = 2
+	meekPayloadPaddingReceiverConsumeStatePadding   = 3
+
+	MeekPayloadPaddingPrefixSize = 1
+)
+
+// MeekPayloadPaddingState provides support for padding empty meek payloads,
+// to vary request and response traffic shapes.
+//
+// The padding is to be prepended directly to the payloads, the request and
+// response bodies, and is intended to be indistinguishable from the fully
+// encrypted OSSH payload which is observable in HTTP and decrypted HTTPS.
+// The padding header prefix and size are obfuscated with a stream cipher,
+// while the padding itself is plain random bytes.
+//
+// Both the meek client and server will use two MeekPayloadPaddingState
+// instances in payload padding mode: one for request padding and one for
+// response padding. Each client/server pair requires the obfuscation cipher
+// state to be kept in sync; the caller is responsible for handling meek
+// retries in such a way that this synchronization is maintained.
+//
+// MeekPayloadPaddingState also supports omitting padding entirely, with some
+// probability, to further vary traffic shapes.
+//
+// Each MeekPayloadPaddingState instance may only be used for one direction
+// only, sending or receiving.
+type MeekPayloadPaddingState struct {
+	stream cipher.Stream
+
+	// Sender state
+	omitPaddingProbability    float64
+	minPaddingSize            int
+	maxPaddingSize            int
+	senderPaddingHeaderBuffer bytes.Buffer
+
+	// Receiver state
+	receiverConsumeState          int
+	receiverPaddingBytesRemaining int
+	receiverPaddingPreamble       [3]byte
+	receiverConsumeBuffer         [1024]byte
+}
+
+// NewMeekRequestPayloadPaddingState initializes a MeekPayloadPaddingState for
+// sending or receiving padded requests.
+func NewMeekRequestPayloadPaddingState(
+	obfuscatedMeekKey string,
+	obfuscatedMeekCookie string,
+	omitPaddingProbability float64,
+	minPaddingSize int,
+	maxPaddingSize int) (*MeekPayloadPaddingState, error) {
+
+	state, err := newMeekPayloadPaddingState(
+		meekPayloadPaddingDirectionRequests,
+		obfuscatedMeekKey,
+		obfuscatedMeekCookie,
+		omitPaddingProbability,
+		minPaddingSize,
+		maxPaddingSize)
+	if err != nil {
+		return state, errors.Trace(err)
+	}
+	return state, nil
+}
+
+// NewMeekResponsePayloadPaddingState initializes a MeekPayloadPaddingState for
+// sending or receiving padded responses.
+func NewMeekResponsePayloadPaddingState(
+	obfuscatedMeekKey string,
+	obfuscatedMeekCookie string,
+	omitPaddingProbability float64,
+	minPaddingSize int,
+	maxPaddingSize int) (*MeekPayloadPaddingState, error) {
+
+	state, err := newMeekPayloadPaddingState(
+		meekPayloadPaddingDirectionResponses,
+		obfuscatedMeekKey,
+		obfuscatedMeekCookie,
+		omitPaddingProbability,
+		minPaddingSize,
+		maxPaddingSize)
+	if err != nil {
+		return state, errors.Trace(err)
+	}
+	return state, nil
+}
+
+func newMeekPayloadPaddingState(
+	direction string,
+	obfuscatedMeekKey string,
+	obfuscatedMeekCookie string,
+	omitPaddingProbability float64,
+	minPaddingSize int,
+	maxPaddingSize int) (*MeekPayloadPaddingState, error) {
+
+	// Maximum padding length of 65533 is the max meek request size, 65536,
+	// less 3 byte padding header with prefix and length bytes.
+
+	if minPaddingSize < 0 ||
+		minPaddingSize > maxPaddingSize ||
+		maxPaddingSize > 65533 {
+
+		return nil, errors.TraceNew("invalid padding size")
+	}
+
+	state := &MeekPayloadPaddingState{
+		omitPaddingProbability: omitPaddingProbability,
+		minPaddingSize:         minPaddingSize,
+		maxPaddingSize:         maxPaddingSize,
+	}
+
+	// For the cipher stream applied to the padding header, derive a unique
+	// key using a value unknown to an adversary (obfuscatedMeekKey), a
+	// unique nonce per flow (obfuscatedMeekCookie), and a unique salt or
+	// context for the direction (request/response), all ensuring that the
+	// adversary observing the stream cannot distinguish the encrypted
+	// padding header from random bytes either by directly decrypting it or
+	// xoring various bytes together.
+	//
+	// A stream cipher is used to minimize payload overhead. Given the unique
+	// key per flow and direction, an all zeroes IV suffices, saving payload
+	// bytes. There's no authentication, also to save payload bytes and
+	// maximize shape distribution; The underlying SSH layer provides proper
+	// authentication and full transport security for actual tunneled traffic.
+
+	var key [32]byte
+	var iv [aes.BlockSize]byte
+
+	_, err := io.ReadFull(
+		hkdf.New(
+			sha256.New,
+			[]byte(obfuscatedMeekKey),
+			[]byte(obfuscatedMeekCookie),
+			[]byte(direction)),
+		key[:])
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
+
+	block, err := aes.NewCipher(key[:])
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
+
+	state.stream = cipher.NewCTR(block, iv[:])
+
+	return state, nil
+}
+
+// SenderGetNextPadding returns the next obfuscated padding header and padding
+// bytes. When addPadding is false, the returned header contains only the
+// NoPadding prefix. Otherwise, a full padding header and padding bytes are
+// returned. With omitPaddingProbability, in the addPadding true case an
+// empty header may be returned, allowing for zero byte payloads, saving some
+// data and further varying the traffic shape.
+//
+// The returned slice is only valid until the next SenderGetNextPadding call.
+//
+// Not safe for concurrent use.
+func (state *MeekPayloadPaddingState) SenderGetNextPadding(
+	addPadding bool) ([]byte, error) {
+
+	// As a future enhancement, consider adding a new state and prefix,
+	// meekPayloadPaddingPrefixEndPadding. After sufficient packets, this
+	// prefix is sent, and no further padding, including prefix, will be
+	// added. The challenge with this is that meek resiliency and
+	// MeekRedialTLSProbability both result in new TCP flows for the same
+	// meek session, flow which would presumably need to start adding padding
+	// again, requiring some mechanism to signal this; with an intermediary
+	// such as a CDN, the server won't be able to infer new TCP flows simply
+	// at the socket
+
+	state.senderPaddingHeaderBuffer.Reset()
+
+	if addPadding && prng.FlipWeightedCoin(state.omitPaddingProbability) {
+
+		// With the given probability, select no padding header at all.
+		return state.senderPaddingHeaderBuffer.Bytes(), nil
+	}
+
+	if !addPadding {
+
+		var preamble [1]byte
+		preamble[0] = meekPayloadPaddingPrefixNoPadding
+		state.stream.XORKeyStream(preamble[:], preamble[:])
+		state.senderPaddingHeaderBuffer.Write(preamble[:])
+		return state.senderPaddingHeaderBuffer.Bytes(), nil
+	}
+
+	paddingSize := prng.Range(state.minPaddingSize, state.maxPaddingSize)
+
+	var preamble [3]byte
+	preamble[0] = meekPayloadPaddingPrefixPadding
+	binary.BigEndian.PutUint16(preamble[1:3], uint16(paddingSize))
+	state.stream.XORKeyStream(preamble[:], preamble[:])
+	state.senderPaddingHeaderBuffer.Write(preamble[:])
+	state.senderPaddingHeaderBuffer.Write(prng.Bytes(paddingSize))
+	return state.senderPaddingHeaderBuffer.Bytes(), nil
+}
+
+var ErrMeekPaddingStateImmediateEOF = std_errors.New("immediate EOF")
+
+// ReceiverConsumePadding reads and consumes payload padding from the input
+// reader.
+//
+// The padding consists of a preamble with a 1 byte prefix, an optional 2 byte
+// size; followed the specified number of padding bytes, if any. The padding
+// header is deobfuscated using a cipher stream that should be kept in sync
+// with the corresponding sender state.
+//
+// ReceiverConsumePadding supports reading as little as 1 byte at a time from the
+// reader and statefully resuming on subsequent calls. retContinue true and a
+// non-nil retErr indicates a partial read; the caller should call
+// ReceiverConsumePadding to resume. There is no retContinue true and nil retErr
+// case.
+//
+// A special return value for retErr of ErrMeekPaddingStateImmediateEOF
+// indicates that the reader had 0 bytes, and this may be treated as an "omit
+// padding" case.
+//
+// retBytesRead is the number of bytes read from reader, even in error cases.
+//
+// Not safe for concurrent use.
+func (state *MeekPayloadPaddingState) ReceiverConsumePadding(
+	reader io.Reader) (retBytesRead int64, retContinue bool, retErr error) {
+
+	bytesRead := int64(0)
+	for {
+
+		// Use a state machine and read one byte at a time. This allows
+		// MeekPayloadPaddingState.ReceiverConsumePadding to handle meek payload
+		// partial reads which may return as little as one byte and an error.
+
+		switch state.receiverConsumeState {
+
+		case meekPayloadPaddingReceiverConsumeStatePrefix:
+
+			n, err := io.ReadFull(reader, state.receiverPaddingPreamble[0:1])
+			// Only 1 byte is requested, so there's no partial-read-with-error
+			// case to handle.
+			if err != nil {
+				if err == io.EOF {
+
+					// If the request/response body is empty, ReadFull will
+					// immediately return io.EOF. The caller can use the
+					// special ErrImmediateEOF return value to treat this
+					// case as a success, allowing for actual empty payloads
+					// in addition to padded payloads.
+
+					return 0, false, ErrMeekPaddingStateImmediateEOF
+				}
+				return bytesRead, true, errors.TraceReader(err)
+			}
+
+			bytesRead += int64(n)
+			state.stream.XORKeyStream(
+				state.receiverPaddingPreamble[0:1],
+				state.receiverPaddingPreamble[0:1])
+
+			switch state.receiverPaddingPreamble[0] {
+
+			case meekPayloadPaddingPrefixNoPadding:
+				// With NoPadding, there's only 1 byte to read, so go back to
+				// the start state.
+				state.receiverConsumeState = meekPayloadPaddingReceiverConsumeStatePrefix
+
+			case meekPayloadPaddingPrefixPadding:
+				// Next states: read the 2 padding size bytes.
+				state.receiverConsumeState = meekPayloadPaddingReceiverConsumeStateSizeByte1
+
+			default:
+				return bytesRead, false, errors.TraceNew("unknown padding prefix")
+			}
+
+		case meekPayloadPaddingReceiverConsumeStateSizeByte1:
+
+			n, err := io.ReadFull(reader, state.receiverPaddingPreamble[1:2])
+			if err != nil {
+				return bytesRead, true, errors.TraceReader(err)
+			}
+			bytesRead += int64(n)
+			state.stream.XORKeyStream(
+				state.receiverPaddingPreamble[1:2],
+				state.receiverPaddingPreamble[1:2])
+
+			state.receiverConsumeState = meekPayloadPaddingReceiverConsumeStateSizeByte2
+
+		case meekPayloadPaddingReceiverConsumeStateSizeByte2:
+
+			n, err := io.ReadFull(reader, state.receiverPaddingPreamble[2:3])
+			if err != nil {
+				return bytesRead, true, errors.TraceReader(err)
+			}
+			bytesRead += int64(n)
+			state.stream.XORKeyStream(
+				state.receiverPaddingPreamble[2:3],
+				state.receiverPaddingPreamble[2:3])
+
+			// Since the obfuscation cipher has no authentication, we may
+			// proceed with a corrupt or manipulated padding size; but the 2
+			// bytes can only represent up to the max padding size of ~64K anyway.
+
+			state.receiverPaddingBytesRemaining = int(
+				binary.BigEndian.Uint16(state.receiverPaddingPreamble[1:3]))
+			state.receiverConsumeState = meekPayloadPaddingReceiverConsumeStatePadding
+
+		case meekPayloadPaddingReceiverConsumeStatePadding:
+
+			// The size of receiverConsumeBuffer is chosen as a tradeoff
+			// between memory overhead and I/O calls.
+
+			for state.receiverPaddingBytesRemaining > 0 {
+				m := state.receiverPaddingBytesRemaining
+				if m > len(state.receiverConsumeBuffer) {
+					m = len(state.receiverConsumeBuffer)
+				}
+				n, err := io.ReadFull(reader, state.receiverConsumeBuffer[0:m])
+				bytesRead += int64(n)
+				state.receiverPaddingBytesRemaining -= n
+				if err != nil {
+					return bytesRead, true, errors.TraceReader(err)
+				}
+			}
+			// After all padding bytes are read, go back to the start state.
+			state.receiverConsumeState = meekPayloadPaddingReceiverConsumeStatePrefix
+
+		default:
+			return bytesRead, false, errors.TraceNew("unknown consume padding state")
+
+		}
+
+		if state.receiverConsumeState == meekPayloadPaddingReceiverConsumeStatePrefix {
+			// Done when back to the start state.
+			break
+		}
+		// Else loop and read the next byte(s) for the next state.
+	}
+
+	return bytesRead, false, nil
+}

+ 423 - 0
psiphon/common/protocol/meek_test.go

@@ -0,0 +1,423 @@
+/*
+ * Copyright (c) 2025, Psiphon Inc.
+ * All rights reserved.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+package protocol
+
+import (
+	"bytes"
+	"io"
+	"testing"
+
+	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
+	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/prng"
+)
+
+func TestMeekPayloadPadding(t *testing.T) {
+
+	err := runTestMeekPayloadPadding()
+	if err != nil {
+		t.Fatal(err.Error())
+		return
+	}
+}
+
+func runTestMeekPayloadPadding() error {
+
+	key := prng.HexString(16)
+	cookie := prng.HexString(16)
+
+	// Test: invalid configurations
+
+	_, err := NewMeekRequestPayloadPaddingState(key, cookie, 0.0, -1, 0)
+	if err == nil {
+		return errors.TraceNew("unexpected success")
+	}
+
+	_, err = NewMeekRequestPayloadPaddingState(key, cookie, 0.0, 2, 1)
+	if err == nil {
+		return errors.TraceNew("unexpected success")
+	}
+
+	_, err = NewMeekRequestPayloadPaddingState(key, cookie, 0.0, 0, 65534)
+	if err == nil {
+		return errors.TraceNew("unexpected success")
+	}
+
+	// Test: immediate EOF
+
+	receiver, err := NewMeekRequestPayloadPaddingState(key, cookie, 0.0, 0, 0)
+	if err != nil {
+		return errors.Trace(err)
+	}
+
+	bytesRead, morePadding, err := receiver.ReceiverConsumePadding(
+		bytes.NewReader(nil))
+	if bytesRead != 0 || morePadding || err != ErrMeekPaddingStateImmediateEOF {
+		return errors.TraceNew("unexpected consume return values")
+	}
+
+	// Test: unknown prefix
+
+	sender, err := NewMeekRequestPayloadPaddingState(key, cookie, 0.0, 1, 1)
+	if err != nil {
+		return errors.Trace(err)
+	}
+
+	receiver, err = NewMeekRequestPayloadPaddingState(key, cookie, 0.0, 0, 0)
+	if err != nil {
+		return errors.Trace(err)
+	}
+
+	paddingHeader, err := sender.SenderGetNextPadding(false)
+	if err != nil {
+		return errors.Trace(err)
+	}
+	if len(paddingHeader) != 1 {
+		return errors.TraceNew("unexpected padding header length")
+	}
+
+	corrupt := append([]byte(nil), paddingHeader...)
+	corrupt[0] ^= 0x02 // flips decrypted prefix from 0 to 2
+
+	bytesRead, morePadding, err = receiver.ReceiverConsumePadding(
+		bytes.NewReader(corrupt))
+	if bytesRead != 1 || morePadding || err == nil {
+		return errors.TraceNew("unexpected consume return values")
+	}
+
+	// Test: incomplete padding size
+
+	sender, err = NewMeekRequestPayloadPaddingState(key, cookie, 0.0, 1, 1)
+	if err != nil {
+		return errors.Trace(err)
+	}
+
+	receiver, err = NewMeekRequestPayloadPaddingState(key, cookie, 0.0, 0, 0)
+	if err != nil {
+		return errors.Trace(err)
+	}
+
+	paddingHeader, err = sender.SenderGetNextPadding(true)
+	if err != nil {
+		return errors.Trace(err)
+	}
+	if len(paddingHeader) < 3 {
+		return errors.TraceNew("unexpected padding header length")
+	}
+
+	bytesRead, morePadding, err = receiver.ReceiverConsumePadding(
+		bytes.NewReader(paddingHeader[:1]))
+	if bytesRead != 1 || !morePadding || err == nil {
+		return errors.TraceNew("unexpected consume return values")
+	}
+
+	// Test: incomplete padding
+
+	sender, err = NewMeekRequestPayloadPaddingState(key, cookie, 0.0, 1, 1)
+	if err != nil {
+		return errors.Trace(err)
+	}
+
+	receiver, err = NewMeekRequestPayloadPaddingState(key, cookie, 0.0, 0, 0)
+	if err != nil {
+		return errors.Trace(err)
+	}
+
+	paddingHeader, err = sender.SenderGetNextPadding(true)
+	if err != nil {
+		return errors.Trace(err)
+	}
+	if len(paddingHeader) < 4 {
+		return errors.TraceNew("unexpected padded header length")
+	}
+
+	bytesRead, morePadding, err = receiver.ReceiverConsumePadding(
+		bytes.NewReader(paddingHeader[:3]))
+	if bytesRead != 3 || !morePadding || err == nil {
+		return errors.TraceNew("unexpected consume return values")
+	}
+
+	// Test: round trips
+
+	const (
+		roundTrips = 1000
+
+		emptyPayloadProbability = 0.5
+
+		requestMinSize = 1
+		requestMaxSize = 131072
+
+		responseMinSize = 1
+		responseMaxSize = 131072
+
+		omitPaddingProbability = 0.5
+
+		minPaddingSize = 1
+		maxPaddingSize = 65533
+	)
+
+	clientRequestPaddingState, err := NewMeekRequestPayloadPaddingState(
+		key, cookie, omitPaddingProbability, minPaddingSize, maxPaddingSize)
+	if err != nil {
+		return errors.Trace(err)
+	}
+
+	serverRequestPaddingState, err := NewMeekRequestPayloadPaddingState(
+		key, cookie, 0.0, 0, 0)
+	if err != nil {
+		return errors.Trace(err)
+	}
+
+	serverResponsePaddingState, err := NewMeekResponsePayloadPaddingState(
+		key, cookie, omitPaddingProbability, minPaddingSize, maxPaddingSize)
+	if err != nil {
+		return errors.Trace(err)
+	}
+
+	clientResponsePaddingState, err := NewMeekResponsePayloadPaddingState(
+		key, cookie, 0.0, 0, 0)
+	if err != nil {
+		return errors.Trace(err)
+	}
+
+	for i := 0; i < roundTrips; i++ {
+
+		// Client sends potentially padded request to server.
+
+		requestSize := 0
+		if !prng.FlipWeightedCoin(emptyPayloadProbability) {
+			requestSize = prng.Range(requestMinSize, requestMaxSize)
+		}
+
+		requestPaddingHeader, err := clientRequestPaddingState.SenderGetNextPadding(
+			requestSize == 0)
+		if err != nil {
+			return errors.Trace(err)
+		}
+
+		if requestSize > 0 {
+			if len(requestPaddingHeader) != 1 {
+				return errors.TraceNew("unexpected request no-padding header")
+			}
+		} else {
+			if len(requestPaddingHeader) != 0 && len(requestPaddingHeader) < 4 {
+				return errors.TraceNew("unexpected request padding header")
+			}
+		}
+
+		readRequest := func() error {
+
+			if len(requestPaddingHeader) == 0 {
+				return nil
+			}
+
+			reader := bytes.NewReader(requestPaddingHeader)
+
+			failAfterOneByte := prng.FlipCoin()
+			var r io.Reader
+			r = reader
+			if failAfterOneByte {
+				// Exercise the padding reader state machine by returning at most
+				// one byte per read.
+				r = newOneByteReader(reader)
+			}
+
+			for {
+				bytesRead, morePadding, err := serverRequestPaddingState.ReceiverConsumePadding(r)
+				if err != nil && !morePadding {
+					return errors.Trace(err)
+				}
+				if failAfterOneByte && bytesRead != 1 {
+					return errors.Tracef("unexpected request padding 1 bytes read: %d", bytesRead)
+				}
+				if !failAfterOneByte && bytesRead != int64(len(requestPaddingHeader)) {
+					return errors.Tracef("unexpected request padding all bytes read: %d", bytesRead)
+				}
+				if !morePadding {
+					if reader.Len() > 0 {
+						return errors.TraceNew("unexpected unread request padding")
+					}
+					break
+				}
+			}
+			return nil
+		}
+
+		err = readRequest()
+		if err != nil {
+			return errors.Trace(err)
+		}
+
+		// Server sends potentially padded response to client.
+
+		responseSize := 0
+		if !prng.FlipWeightedCoin(emptyPayloadProbability) {
+			responseSize = prng.Range(responseMinSize, responseMaxSize)
+		}
+
+		responsePaddingHeader, err := serverResponsePaddingState.SenderGetNextPadding(
+			responseSize == 0)
+		if err != nil {
+			return errors.Trace(err)
+		}
+
+		if responseSize > 0 {
+			if len(responsePaddingHeader) != 1 {
+				return errors.TraceNew("unexpected response no-padding header")
+			}
+		} else {
+			if len(responsePaddingHeader) != 0 && len(responsePaddingHeader) < 4 {
+				return errors.TraceNew("unexpected response padding header")
+			}
+		}
+
+		readResponse := func() error {
+
+			if len(responsePaddingHeader) == 0 {
+				return nil
+			}
+			reader := bytes.NewReader(responsePaddingHeader)
+
+			failAfterOneByte := prng.FlipCoin()
+			var r io.Reader
+			r = reader
+			if failAfterOneByte {
+				// Exercise the padding reader state machine by returning at most
+				// one byte per read.
+				r = newOneByteReader(reader)
+			}
+
+			for {
+				bytesRead, morePadding, err := clientResponsePaddingState.ReceiverConsumePadding(r)
+				if err != nil && !morePadding {
+					return errors.Trace(err)
+				}
+				if failAfterOneByte && bytesRead != 1 {
+					return errors.Tracef("unexpected response padding 1 bytes read: %d", bytesRead)
+				}
+				if !failAfterOneByte && bytesRead != int64(len(responsePaddingHeader)) {
+					return errors.Tracef("unexpected response padding all bytes read: %d", bytesRead)
+				}
+				if !morePadding {
+					if reader.Len() > 0 {
+						return errors.TraceNew("unexpected unread response padding")
+					}
+					break
+				}
+			}
+			return nil
+		}
+
+		err = readResponse()
+		if err != nil {
+			return errors.Trace(err)
+		}
+	}
+
+	return nil
+}
+
+func FuzzMeekPayloadPaddingReceiverConsume(f *testing.F) {
+
+	// Test: ReceiverConsumePadding handles fuzzed inputs without panicking.
+
+	f.Add(true, 0, 0)
+	f.Add(false, 0, 0)
+	f.Add(true, 255, 3)
+	f.Fuzz(func(t *testing.T, addPadding bool, mutate int, truncate int) {
+		err := runFuzzMeekPayloadPaddingReceiverConsume(addPadding, mutate, truncate)
+		if err != nil {
+			t.Fatal(err.Error())
+			return
+		}
+	})
+}
+
+func runFuzzMeekPayloadPaddingReceiverConsume(
+	addPadding bool, mutate int, truncate int) error {
+
+	key := prng.HexString(16)
+	cookie := prng.HexString(16)
+
+	for i := 0; i < 2; i++ {
+
+		var sender, receiver *MeekPayloadPaddingState
+		var err error
+
+		if i == 0 {
+			sender, err = NewMeekRequestPayloadPaddingState(key, cookie, 0.0, 1, 256)
+			if err != nil {
+				return errors.Trace(err)
+			}
+			receiver, err = NewMeekRequestPayloadPaddingState(key, cookie, 0.0, 0, 0)
+			if err != nil {
+				return errors.Trace(err)
+			}
+		} else {
+			sender, err = NewMeekResponsePayloadPaddingState(key, cookie, 0.0, 1, 256)
+			if err != nil {
+				return errors.Trace(err)
+			}
+			receiver, err = NewMeekResponsePayloadPaddingState(key, cookie, 0.0, 0, 0)
+			if err != nil {
+				return errors.Trace(err)
+			}
+		}
+
+		payload, err := sender.SenderGetNextPadding(addPadding)
+		if err != nil {
+			return errors.Trace(err)
+		}
+
+		input := append([]byte(nil), payload...)
+
+		if len(input) > 0 {
+			cut := uint(truncate) % uint(len(input)+1)
+			input = input[:cut]
+		}
+		if len(input) > 0 && mutate != 0 {
+			input[prng.Intn(len(input))] ^= byte(mutate)
+		}
+
+		_, _, _ = receiver.ReceiverConsumePadding(bytes.NewReader(input))
+	}
+
+	return nil
+}
+
+type oneByteReader struct {
+	reader io.Reader
+	fail   bool
+}
+
+func newOneByteReader(reader io.Reader) *oneByteReader {
+	return &oneByteReader{
+		reader: reader,
+	}
+}
+
+func (r *oneByteReader) Read(p []byte) (int, error) {
+	if r.fail {
+		r.fail = false
+		return 0, io.EOF
+	}
+	n, err := r.reader.Read(p[0:1])
+	r.fail = true
+	return n, err
+}

+ 13 - 0
psiphon/common/protocol/protocol.go

@@ -458,6 +458,18 @@ func TunnelProtocolSupportsUpstreamProxy(protocol string) bool {
 }
 }
 
 
 func TunnelProtocolSupportsTactics(protocol string) bool {
 func TunnelProtocolSupportsTactics(protocol string) bool {
+
+	if TunnelProtocolUsesMeekHTTPNormalizer(protocol) {
+
+		// Limitation: the HTTP normalizer code path has special logic to
+		// extract the meek cookie from the normalized data stream. See
+		// common/transforms.HTTPNormalizer.Read. This logic only supports
+		// one meek cookie per TCP flow, while the untunneled tactics
+		// requests may require up to two meek cookies, one for a speed test
+		// request and one for the tactics request.
+		return false
+	}
+
 	return TunnelProtocolUsesMeek(protocol)
 	return TunnelProtocolUsesMeek(protocol)
 }
 }
 
 
@@ -844,6 +856,7 @@ type SSHPasswordPayload struct {
 
 
 type MeekCookieData struct {
 type MeekCookieData struct {
 	MeekProtocolVersion  int    `json:"v,omitempty"`
 	MeekProtocolVersion  int    `json:"v,omitempty"`
+	EnablePayloadPadding bool   `json:"p,omitempty"`
 	ClientTunnelProtocol string `json:"t,omitempty"`
 	ClientTunnelProtocol string `json:"t,omitempty"`
 	EndPoint             string `json:"e,omitempty"`
 	EndPoint             string `json:"e,omitempty"`
 }
 }

+ 63 - 3
psiphon/config.go

@@ -891,6 +891,8 @@ type Config struct {
 	// TransformHostNameProbability is for testing purposes.
 	// TransformHostNameProbability is for testing purposes.
 	TransformHostNameProbability *float64 `json:",omitempty"`
 	TransformHostNameProbability *float64 `json:",omitempty"`
 
 
+	PickUserAgentProbability *float64 `json:",omitempty"`
+
 	// FragmentorProbability and associated Fragmentor fields are for testing
 	// FragmentorProbability and associated Fragmentor fields are for testing
 	// purposes.
 	// purposes.
 	FragmentorProbability          *float64 `json:",omitempty"`
 	FragmentorProbability          *float64 `json:",omitempty"`
@@ -913,6 +915,10 @@ type Config struct {
 	MeekRedialTLSProbability            *float64 `json:",omitempty"`
 	MeekRedialTLSProbability            *float64 `json:",omitempty"`
 	MeekAlternateCookieNameProbability  *float64 `json:",omitempty"`
 	MeekAlternateCookieNameProbability  *float64 `json:",omitempty"`
 	MeekAlternateContentTypeProbability *float64 `json:",omitempty"`
 	MeekAlternateContentTypeProbability *float64 `json:",omitempty"`
+	MeekPayloadPaddingProbability       *float64 `json:",omitempty"`
+	MeekPayloadPaddingMinSize           *int     `json:",omitempty"`
+	MeekPayloadPaddingMaxSize           *int     `json:",omitempty"`
+	MeekPayloadPaddingOmitProbability   *float64 `json:",omitempty"`
 
 
 	// ObfuscatedSSHAlgorithms and associated ObfuscatedSSH fields are for
 	// ObfuscatedSSHAlgorithms and associated ObfuscatedSSH fields are for
 	// testing purposes. If specified, ObfuscatedSSHAlgorithms must have 4 SSH
 	// testing purposes. If specified, ObfuscatedSSHAlgorithms must have 4 SSH
@@ -1190,9 +1196,8 @@ type Config struct {
 	InproxyClientDisableWaitToShareSession                  *bool                                            `json:",omitempty"`
 	InproxyClientDisableWaitToShareSession                  *bool                                            `json:",omitempty"`
 	InproxyTunnelProtocolPreferProbability                  *float64                                         `json:",omitempty"`
 	InproxyTunnelProtocolPreferProbability                  *float64                                         `json:",omitempty"`
 	InproxyTunnelProtocolForceSelectionCount                *int                                             `json:",omitempty"`
 	InproxyTunnelProtocolForceSelectionCount                *int                                             `json:",omitempty"`
-
-	InproxySkipAwaitFullyConnected  bool `json:",omitempty"`
-	InproxyEnableWebRTCDebugLogging bool `json:",omitempty"`
+	InproxySkipAwaitFullyConnected                          bool                                             `json:",omitempty"`
+	InproxyEnableWebRTCDebugLogging                         bool                                             `json:",omitempty"`
 
 
 	NetworkIDCacheTTLMilliseconds *int `json:",omitempty"`
 	NetworkIDCacheTTLMilliseconds *int `json:",omitempty"`
 
 
@@ -2233,6 +2238,10 @@ func (config *Config) makeConfigParameters() map[string]interface{} {
 		applyParameters[parameters.TransformHostNameProbability] = *config.TransformHostNameProbability
 		applyParameters[parameters.TransformHostNameProbability] = *config.TransformHostNameProbability
 	}
 	}
 
 
+	if config.PickUserAgentProbability != nil {
+		applyParameters[parameters.PickUserAgentProbability] = *config.PickUserAgentProbability
+	}
+
 	if config.FragmentorProbability != nil {
 	if config.FragmentorProbability != nil {
 		applyParameters[parameters.FragmentorProbability] = *config.FragmentorProbability
 		applyParameters[parameters.FragmentorProbability] = *config.FragmentorProbability
 	}
 	}
@@ -2301,6 +2310,22 @@ func (config *Config) makeConfigParameters() map[string]interface{} {
 		applyParameters[parameters.MeekAlternateContentTypeProbability] = *config.MeekAlternateContentTypeProbability
 		applyParameters[parameters.MeekAlternateContentTypeProbability] = *config.MeekAlternateContentTypeProbability
 	}
 	}
 
 
+	if config.MeekPayloadPaddingProbability != nil {
+		applyParameters[parameters.MeekPayloadPaddingProbability] = *config.MeekPayloadPaddingProbability
+	}
+
+	if config.MeekPayloadPaddingMinSize != nil {
+		applyParameters[parameters.MeekPayloadPaddingClientMinSize] = *config.MeekPayloadPaddingMinSize
+	}
+
+	if config.MeekPayloadPaddingMaxSize != nil {
+		applyParameters[parameters.MeekPayloadPaddingClientMaxSize] = *config.MeekPayloadPaddingMaxSize
+	}
+
+	if config.MeekPayloadPaddingOmitProbability != nil {
+		applyParameters[parameters.MeekPayloadPaddingClientOmitProbability] = *config.MeekPayloadPaddingOmitProbability
+	}
+
 	if config.ObfuscatedSSHMinPadding != nil {
 	if config.ObfuscatedSSHMinPadding != nil {
 		applyParameters[parameters.ObfuscatedSSHMinPadding] = *config.ObfuscatedSSHMinPadding
 		applyParameters[parameters.ObfuscatedSSHMinPadding] = *config.ObfuscatedSSHMinPadding
 	}
 	}
@@ -3187,6 +3212,11 @@ func (config *Config) setDialParametersHash() {
 		binary.Write(hash, binary.LittleEndian, *config.TransformHostNameProbability)
 		binary.Write(hash, binary.LittleEndian, *config.TransformHostNameProbability)
 	}
 	}
 
 
+	if config.PickUserAgentProbability != nil {
+		hash.Write([]byte("PickUserAgentProbability"))
+		binary.Write(hash, binary.LittleEndian, *config.PickUserAgentProbability)
+	}
+
 	if config.FragmentorProbability != nil {
 	if config.FragmentorProbability != nil {
 		hash.Write([]byte("FragmentorProbability"))
 		hash.Write([]byte("FragmentorProbability"))
 		binary.Write(hash, binary.LittleEndian, *config.FragmentorProbability)
 		binary.Write(hash, binary.LittleEndian, *config.FragmentorProbability)
@@ -3256,6 +3286,36 @@ func (config *Config) setDialParametersHash() {
 		binary.Write(hash, binary.LittleEndian, *config.MeekRedialTLSProbability)
 		binary.Write(hash, binary.LittleEndian, *config.MeekRedialTLSProbability)
 	}
 	}
 
 
+	if config.MeekAlternateCookieNameProbability != nil {
+		hash.Write([]byte("MeekAlternateCookieNameProbability"))
+		binary.Write(hash, binary.LittleEndian, *config.MeekAlternateCookieNameProbability)
+	}
+
+	if config.MeekAlternateContentTypeProbability != nil {
+		hash.Write([]byte("MeekAlternateContentTypeProbability"))
+		binary.Write(hash, binary.LittleEndian, *config.MeekAlternateContentTypeProbability)
+	}
+
+	if config.MeekPayloadPaddingProbability != nil {
+		hash.Write([]byte("MeekPayloadPaddingProbability"))
+		binary.Write(hash, binary.LittleEndian, *config.MeekPayloadPaddingProbability)
+	}
+
+	if config.MeekPayloadPaddingMinSize != nil {
+		hash.Write([]byte("MeekPayloadPaddingMinSize"))
+		binary.Write(hash, binary.LittleEndian, int64(*config.MeekPayloadPaddingMinSize))
+	}
+
+	if config.MeekPayloadPaddingMaxSize != nil {
+		hash.Write([]byte("MeekPayloadPaddingMaxSize"))
+		binary.Write(hash, binary.LittleEndian, int64(*config.MeekPayloadPaddingMaxSize))
+	}
+
+	if config.MeekPayloadPaddingOmitProbability != nil {
+		hash.Write([]byte("MeekPayloadPaddingOmitProbability"))
+		binary.Write(hash, binary.LittleEndian, *config.MeekPayloadPaddingOmitProbability)
+	}
+
 	if config.ObfuscatedSSHMinPadding != nil {
 	if config.ObfuscatedSSHMinPadding != nil {
 		hash.Write([]byte("ObfuscatedSSHMinPadding"))
 		hash.Write([]byte("ObfuscatedSSHMinPadding"))
 		binary.Write(hash, binary.LittleEndian, int64(*config.ObfuscatedSSHMinPadding))
 		binary.Write(hash, binary.LittleEndian, int64(*config.ObfuscatedSSHMinPadding))

+ 32 - 0
psiphon/dialParameters.go

@@ -121,6 +121,11 @@ type DialParameters struct {
 	MeekObfuscatorPaddingSeed *prng.Seed   `json:",omitempty"`
 	MeekObfuscatorPaddingSeed *prng.Seed   `json:",omitempty"`
 	MeekResolvedIPAddress     atomic.Value `json:"-"`
 	MeekResolvedIPAddress     atomic.Value `json:"-"`
 
 
+	MeekEnablePayloadPadding          bool    `json:",omitempty"`
+	MeekPayloadPaddingMinSize         int     `json:",omitempty"`
+	MeekPayloadPaddingMaxSize         int     `json:",omitempty"`
+	MeekPayloadPaddingOmitProbability float64 `json:",omitempty"`
+
 	TLSOSSHTransformedSNIServerName bool       `json:",omitempty"`
 	TLSOSSHTransformedSNIServerName bool       `json:",omitempty"`
 	TLSOSSHSNIServerName            string     `json:",omitempty"`
 	TLSOSSHSNIServerName            string     `json:",omitempty"`
 	TLSOSSHObfuscatorPaddingSeed    *prng.Seed `json:",omitempty"`
 	TLSOSSHObfuscatorPaddingSeed    *prng.Seed `json:",omitempty"`
@@ -271,6 +276,7 @@ func MakeDialParameters(
 	replayShadowsocksPrefix := p.Bool(parameters.ReplayShadowsocksPrefix)
 	replayShadowsocksPrefix := p.Bool(parameters.ReplayShadowsocksPrefix)
 	replayInproxySTUN := p.Bool(parameters.ReplayInproxySTUN)
 	replayInproxySTUN := p.Bool(parameters.ReplayInproxySTUN)
 	replayInproxyWebRTC := p.Bool(parameters.ReplayInproxyWebRTC)
 	replayInproxyWebRTC := p.Bool(parameters.ReplayInproxyWebRTC)
+	replayMeekPayloadPadding := p.Bool(parameters.ReplayMeekPayloadPadding)
 
 
 	// Check for existing dial parameters for this server/network ID.
 	// Check for existing dial parameters for this server/network ID.
 
 
@@ -1067,6 +1073,28 @@ func MakeDialParameters(
 		}
 		}
 	}
 	}
 
 
+	if (!isReplay || !replayMeekPayloadPadding) &&
+		protocol.TunnelProtocolUsesMeek(dialParams.TunnelProtocol) {
+
+		limitTunnelProtocols := p.TunnelProtocols(
+			parameters.MeekPayloadPaddingLimitTunnelProtocols)
+
+		if len(limitTunnelProtocols) == 0 ||
+			common.Contains(limitTunnelProtocols, dialParams.TunnelProtocol) {
+
+			if p.WeightedCoinFlip(parameters.MeekPayloadPaddingProbability) {
+
+				dialParams.MeekEnablePayloadPadding = true
+				dialParams.MeekPayloadPaddingOmitProbability =
+					p.Float(parameters.MeekPayloadPaddingClientOmitProbability)
+				dialParams.MeekPayloadPaddingMinSize =
+					p.Int(parameters.MeekPayloadPaddingClientMinSize)
+				dialParams.MeekPayloadPaddingMaxSize =
+					p.Int(parameters.MeekPayloadPaddingClientMaxSize)
+			}
+		}
+	}
+
 	// Initialize dialParams.ResolveParameters for dials that will resolve
 	// Initialize dialParams.ResolveParameters for dials that will resolve
 	// domain names, which currently includes fronted meek and Conjure API
 	// domain names, which currently includes fronted meek and Conjure API
 	// registration, where the dial address is not an IP address.
 	// registration, where the dial address is not an IP address.
@@ -1793,6 +1821,10 @@ func MakeDialParameters(
 			MeekCookieEncryptionPublicKey: serverEntry.MeekCookieEncryptionPublicKey,
 			MeekCookieEncryptionPublicKey: serverEntry.MeekCookieEncryptionPublicKey,
 			MeekObfuscatedKey:             serverEntry.MeekObfuscatedKey,
 			MeekObfuscatedKey:             serverEntry.MeekObfuscatedKey,
 			MeekObfuscatorPaddingSeed:     dialParams.MeekObfuscatorPaddingSeed,
 			MeekObfuscatorPaddingSeed:     dialParams.MeekObfuscatorPaddingSeed,
+			EnablePayloadPadding:          dialParams.MeekEnablePayloadPadding,
+			PayloadPaddingMinSize:         dialParams.MeekPayloadPaddingMinSize,
+			PayloadPaddingMaxSize:         dialParams.MeekPayloadPaddingMaxSize,
+			PayloadPaddingOmitProbability: dialParams.MeekPayloadPaddingOmitProbability,
 			NetworkLatencyMultiplier:      dialParams.NetworkLatencyMultiplier,
 			NetworkLatencyMultiplier:      dialParams.NetworkLatencyMultiplier,
 			HTTPTransformerParameters:     dialParams.HTTPTransformerParameters,
 			HTTPTransformerParameters:     dialParams.HTTPTransformerParameters,
 			AdditionalHeaders:             config.MeekAdditionalHeaders,
 			AdditionalHeaders:             config.MeekAdditionalHeaders,

+ 230 - 32
psiphon/meekConn.go

@@ -56,7 +56,7 @@ import (
 // CC0 1.0 Universal
 // CC0 1.0 Universal
 
 
 const (
 const (
-	MEEK_PROTOCOL_VERSION           = 3
+	MEEK_PROTOCOL_VERSION           = 4
 	MEEK_MAX_REQUEST_PAYLOAD_LENGTH = 65536
 	MEEK_MAX_REQUEST_PAYLOAD_LENGTH = 65536
 )
 )
 
 
@@ -234,6 +234,13 @@ type MeekConfig struct {
 	// are added to all meek HTTP requests. An additional header is ignored
 	// are added to all meek HTTP requests. An additional header is ignored
 	// when the header name is already present in a meek request.
 	// when the header name is already present in a meek request.
 	AdditionalHeaders http.Header
 	AdditionalHeaders http.Header
+
+	// EnablePayloadPadding and PayloadPadding fields enable and configure
+	// optional padding of empty meek payloads.
+	EnablePayloadPadding          bool
+	PayloadPaddingMinSize         int
+	PayloadPaddingMaxSize         int
+	PayloadPaddingOmitProbability float64
 }
 }
 
 
 // MeekConn is a network connection that tunnels net.Conn flows over HTTP and supports
 // MeekConn is a network connection that tunnels net.Conn flows over HTTP and supports
@@ -287,6 +294,10 @@ type MeekConn struct {
 	emptySendBuffer         chan *bytes.Buffer
 	emptySendBuffer         chan *bytes.Buffer
 	partialSendBuffer       chan *bytes.Buffer
 	partialSendBuffer       chan *bytes.Buffer
 	fullSendBuffer          chan *bytes.Buffer
 	fullSendBuffer          chan *bytes.Buffer
+
+	requestPaddingState  *protocol.MeekPayloadPaddingState
+	responsePaddingState *protocol.MeekPayloadPaddingState
+	requestPaddingBuffer *bytes.Buffer
 }
 }
 
 
 func (conn *MeekConn) getCustomParameters() parameters.ParametersAccessor {
 func (conn *MeekConn) getCustomParameters() parameters.ParametersAccessor {
@@ -388,6 +399,7 @@ func DialMeek(
 				meekConfig.MeekCookieEncryptionPublicKey,
 				meekConfig.MeekCookieEncryptionPublicKey,
 				meekConfig.MeekObfuscatedKey,
 				meekConfig.MeekObfuscatedKey,
 				meekConfig.MeekObfuscatorPaddingSeed,
 				meekConfig.MeekObfuscatorPaddingSeed,
+				meekConfig.EnablePayloadPadding,
 				meekConfig.ClientTunnelProtocol,
 				meekConfig.ClientTunnelProtocol,
 				"")
 				"")
 		if err != nil {
 		if err != nil {
@@ -739,6 +751,32 @@ func DialMeek(
 	// go routine, only when running in relay mode.
 	// go routine, only when running in relay mode.
 	if meek.mode == MeekModeRelay {
 	if meek.mode == MeekModeRelay {
 
 
+		if meekConfig.EnablePayloadPadding {
+
+			// Initialize payload padding mode. The meek server will be
+			// signaled, via the meek cookie, to expect request padding and
+			// perform response padding.
+
+			var err error
+			meek.requestPaddingState, err = protocol.NewMeekRequestPayloadPaddingState(
+				meekConfig.MeekObfuscatedKey,
+				meek.cookie.Value,
+				meekConfig.PayloadPaddingOmitProbability,
+				meekConfig.PayloadPaddingMinSize,
+				meekConfig.PayloadPaddingMaxSize)
+			if err != nil {
+				return nil, errors.Trace(err)
+			}
+			meek.responsePaddingState, err = protocol.NewMeekResponsePayloadPaddingState(
+				meekConfig.MeekObfuscatedKey,
+				meek.cookie.Value,
+				0.0, 0, 0)
+			if err != nil {
+				return nil, errors.Trace(err)
+			}
+			meek.requestPaddingBuffer = new(bytes.Buffer)
+		}
+
 		// The main loop of a MeekConn is run in the relay() goroutine.
 		// The main loop of a MeekConn is run in the relay() goroutine.
 		// A MeekConn implements net.Conn concurrency semantics:
 		// A MeekConn implements net.Conn concurrency semantics:
 		// "Multiple goroutines may invoke methods on a Conn simultaneously."
 		// "Multiple goroutines may invoke methods on a Conn simultaneously."
@@ -772,8 +810,11 @@ func DialMeek(
 		meek.partialSendBuffer = make(chan *bytes.Buffer, 1)
 		meek.partialSendBuffer = make(chan *bytes.Buffer, 1)
 		meek.fullSendBuffer = make(chan *bytes.Buffer, 1)
 		meek.fullSendBuffer = make(chan *bytes.Buffer, 1)
 
 
-		meek.emptyReceiveBuffer <- new(bytes.Buffer)
-		meek.emptySendBuffer <- new(bytes.Buffer)
+		meek.replaceReceiveBuffer(new(bytes.Buffer))
+
+		// In payload padding mode, a padding prefix placeholder is added to
+		// empty send buffers. This is handled by truncateAndReplaceSendBuffer.
+		meek.truncateAndReplaceSendBuffer(new(bytes.Buffer))
 
 
 		meek.relayWaitGroup.Add(1)
 		meek.relayWaitGroup.Add(1)
 		go meek.relay()
 		go meek.relay()
@@ -1054,6 +1095,7 @@ func (meek *MeekConn) ObfuscatedRoundTrip(
 		meek.meekCookieEncryptionPublicKey,
 		meek.meekCookieEncryptionPublicKey,
 		meek.meekObfuscatedKey,
 		meek.meekObfuscatedKey,
 		meek.meekObfuscatorPaddingSeed,
 		meek.meekObfuscatorPaddingSeed,
+		false,
 		meek.clientTunnelProtocol,
 		meek.clientTunnelProtocol,
 		endPoint)
 		endPoint)
 	if err != nil {
 	if err != nil {
@@ -1166,6 +1208,7 @@ func (meek *MeekConn) Write(buffer []byte) (n int, err error) {
 			_, err = sendBuffer.Write(buffer[:writeLen])
 			_, err = sendBuffer.Write(buffer[:writeLen])
 			buffer = buffer[writeLen:]
 			buffer = buffer[writeLen:]
 		}
 		}
+
 		meek.replaceSendBuffer(sendBuffer)
 		meek.replaceSendBuffer(sendBuffer)
 	}
 	}
 	return n, err
 	return n, err
@@ -1218,15 +1261,43 @@ func (meek *MeekConn) replaceSendBuffer(sendBuffer *bytes.Buffer) {
 	}
 	}
 }
 }
 
 
+func (meek *MeekConn) truncateAndReplaceSendBuffer(sendBuffer *bytes.Buffer) {
+	sendBuffer.Truncate(0)
+
+	// In payload padding mode, add a placeholder for the payload padding
+	// prefix that's required at the start of all payload request bodies.
+	// Adding a placeholder avoids any memory shifts later.
+
+	if meek.requestPaddingState != nil {
+		for i := 0; i < protocol.MeekPayloadPaddingPrefixSize; i++ {
+			sendBuffer.WriteByte(0)
+		}
+	}
+
+	meek.emptySendBuffer <- sendBuffer
+}
+
 // relay sends and receives tunneled traffic (payload). An HTTP request is
 // relay sends and receives tunneled traffic (payload). An HTTP request is
 // triggered when data is in the write queue or at a polling interval.
 // triggered when data is in the write queue or at a polling interval.
 // There's a geometric increase, up to a maximum, in the polling interval when
 // There's a geometric increase, up to a maximum, in the polling interval when
 // no data is exchanged. Only one HTTP request is in flight at a time.
 // no data is exchanged. Only one HTTP request is in flight at a time.
-func (meek *MeekConn) relay() {
+func (meek *MeekConn) relay() (retErr error) {
 	// Note: meek.Close() calls here in relay() are made asynchronously
 	// Note: meek.Close() calls here in relay() are made asynchronously
 	// (using goroutines) since Close() will wait on this WaitGroup.
 	// (using goroutines) since Close() will wait on this WaitGroup.
 	defer meek.relayWaitGroup.Done()
 	defer meek.relayWaitGroup.Done()
 
 
+	defer func() {
+
+		// Since MeekConn.relay is invoked as a goroutine, log any error
+		// returns in a notice. On error, close the MeekConn
+		// (asynchronously due to the relayWaitGroup synchronization).
+
+		if retErr != nil {
+			NoticeWarning("%v", errors.Trace(retErr))
+			go meek.Close()
+		}
+	}()
+
 	p := meek.getCustomParameters()
 	p := meek.getCustomParameters()
 	interval := prng.JitterDuration(
 	interval := prng.JitterDuration(
 		p.Duration(parameters.MeekMinPollInterval),
 		p.Duration(parameters.MeekMinPollInterval),
@@ -1259,9 +1330,78 @@ func (meek *MeekConn) relay() {
 
 
 		sendPayloadSize := 0
 		sendPayloadSize := 0
 		if sendBuffer != nil {
 		if sendBuffer != nil {
+			// In payload padding mode, sendPayloadSize will include the
+			// placeholder padding prefix.
 			sendPayloadSize = sendBuffer.Len()
 			sendPayloadSize = sendBuffer.Len()
 		}
 		}
 
 
+		// Send buffers are exchanged back and forth between MeekConn.Write
+		// and MeekConn.relay as the request payload is assembled.
+		//
+		// In the polling case, there is no send buffer, and in payload
+		// padding mode, meek.requestPaddingBuffer is instead used as a
+		// temporary buffer to construct a padded payload. Don't replace
+		// meek.requestPaddingBuffer back into the buffer exchange channels.
+
+		replaceSendBuffer := sendBuffer != nil
+
+		if meek.requestPaddingState != nil {
+
+			// In payload padding mode, set a padding prefix and, for empty
+			// payloads, add a full padding header and padding to empty payloads.
+			//
+			// Retries, if any, are performed in relayRoundTrip using the same
+			// padding bytes; the padding cipher stream state is advanced
+			// only once per payload, here.
+
+			addPadding := sendBuffer == nil
+
+			paddingHeader, err := meek.requestPaddingState.SenderGetNextPadding(
+				addPadding)
+			if err != nil {
+				return errors.Trace(err)
+			}
+
+			if addPadding {
+
+				if len(paddingHeader) == 0 {
+
+					// SenderGetNextPadding may indicate no padding, including
+					// prefix, at all, so revert to the no-sendBuffer empty
+					// body polling case.
+					sendBuffer = nil
+
+				} else {
+
+					// Full padding case.
+
+					meek.requestPaddingBuffer.Truncate(0)
+					meek.requestPaddingBuffer.Write(paddingHeader)
+
+					sendBuffer = meek.requestPaddingBuffer
+					replaceSendBuffer = false
+				}
+
+			} else {
+
+				// Update the padding prefix placeholder at the start of the payload.
+
+				var err error
+				if len(paddingHeader) != protocol.MeekPayloadPaddingPrefixSize {
+					err = errors.TraceNew("unexpected meek payload padding header size")
+				}
+				if sendBuffer.Len() < protocol.MeekPayloadPaddingPrefixSize+1 {
+					err = errors.TraceNew("unexpected meek send buffer size")
+				}
+				if err != nil {
+					return errors.Trace(err)
+				}
+				for i := 0; i < protocol.MeekPayloadPaddingPrefixSize; i++ {
+					sendBuffer.Bytes()[i] = paddingHeader[i]
+				}
+			}
+		}
+
 		// relayRoundTrip will replace sendBuffer (by calling replaceSendBuffer). This
 		// relayRoundTrip will replace sendBuffer (by calling replaceSendBuffer). This
 		// is a compromise to conserve memory. Using a second buffer here, we could
 		// is a compromise to conserve memory. Using a second buffer here, we could
 		// copy sendBuffer and immediately replace it, unblocking meekConn.Write() and
 		// copy sendBuffer and immediately replace it, unblocking meekConn.Write() and
@@ -1271,8 +1411,8 @@ func (meek *MeekConn) relay() {
 		// still allows meekConn.Write() to unblock before the round trip response is
 		// still allows meekConn.Write() to unblock before the round trip response is
 		// read.
 		// read.
 
 
-		receivedPayloadSize, err := meek.relayRoundTrip(sendBuffer)
-
+		receivedPayloadSize, paddingOnly, err := meek.relayRoundTrip(
+			sendBuffer, replaceSendBuffer)
 		if err != nil {
 		if err != nil {
 			select {
 			select {
 			case <-meek.runCtx.Done():
 			case <-meek.runCtx.Done():
@@ -1281,9 +1421,7 @@ func (meek *MeekConn) relay() {
 				return
 				return
 			default:
 			default:
 			}
 			}
-			NoticeWarning("%s", errors.Trace(err))
-			go meek.Close()
-			return
+			return errors.Trace(err)
 		}
 		}
 
 
 		// Periodically re-dial the underlying TLS/TCP connection
 		// Periodically re-dial the underlying TLS/TCP connection
@@ -1293,15 +1431,14 @@ func (meek *MeekConn) relay() {
 			meek.transport.CloseIdleConnections()
 			meek.transport.CloseIdleConnections()
 		}
 		}
 
 
-		// Calculate polling interval. When data is received,
-		// immediately request more. Otherwise, schedule next
-		// poll with exponential back off. Jitter and coin
-		// flips are used to avoid trivial, static traffic
-		// timing patterns.
+		// Calculate polling interval. When non-padding data is received,
+		// immediately request more. Otherwise, schedule next poll with
+		// exponential back off. Jitter and coin flips are used to avoid
+		// trivial, static traffic timing patterns.
 
 
 		p := meek.getCustomParameters()
 		p := meek.getCustomParameters()
 
 
-		if receivedPayloadSize > 0 || sendPayloadSize > 0 {
+		if (receivedPayloadSize > 0 && !paddingOnly) || sendPayloadSize > 0 {
 
 
 			interval = 0
 			interval = 0
 
 
@@ -1432,7 +1569,9 @@ func (meek *MeekConn) scheduleQUICCloseIdle(request *http.Request) {
 }
 }
 
 
 // relayRoundTrip configures and makes the actual HTTP POST request
 // relayRoundTrip configures and makes the actual HTTP POST request
-func (meek *MeekConn) relayRoundTrip(sendBuffer *bytes.Buffer) (int64, error) {
+func (meek *MeekConn) relayRoundTrip(
+	sendBuffer *bytes.Buffer,
+	replaceSendBuffer bool) (int64, bool, error) {
 
 
 	// Retries are made when the round trip fails. This adds resiliency
 	// Retries are made when the round trip fails. This adds resiliency
 	// to connection interruption and intermittent failures.
 	// to connection interruption and intermittent failures.
@@ -1469,9 +1608,8 @@ func (meek *MeekConn) relayRoundTrip(sendBuffer *bytes.Buffer) (int64, error) {
 
 
 	defer func() {
 	defer func() {
 		// Ensure sendBuffer is replaced, even in error code paths.
 		// Ensure sendBuffer is replaced, even in error code paths.
-		if sendBuffer != nil {
-			sendBuffer.Truncate(0)
-			meek.replaceSendBuffer(sendBuffer)
+		if sendBuffer != nil && replaceSendBuffer {
+			meek.truncateAndReplaceSendBuffer(sendBuffer)
 		}
 		}
 	}()
 	}()
 
 
@@ -1487,6 +1625,9 @@ func (meek *MeekConn) relayRoundTrip(sendBuffer *bytes.Buffer) (int64, error) {
 	serverAcknowledgedRequestPayload := false
 	serverAcknowledgedRequestPayload := false
 
 
 	receivedPayloadSize := int64(0)
 	receivedPayloadSize := int64(0)
+	totalPaddingSize := int64(0)
+
+	morePadding := meek.responsePaddingState != nil
 
 
 	for try := 0; ; try++ {
 	for try := 0; ; try++ {
 
 
@@ -1524,7 +1665,7 @@ func (meek *MeekConn) relayRoundTrip(sendBuffer *bytes.Buffer) (int64, error) {
 			contentLength)
 			contentLength)
 		if err != nil {
 		if err != nil {
 			// Don't retry when can't initialize a Request
 			// Don't retry when can't initialize a Request
-			return 0, errors.Trace(err)
+			return 0, false, errors.Trace(err)
 		}
 		}
 
 
 		expectedStatusCode := http.StatusOK
 		expectedStatusCode := http.StatusOK
@@ -1550,7 +1691,7 @@ func (meek *MeekConn) relayRoundTrip(sendBuffer *bytes.Buffer) (int64, error) {
 				// done with it. MeekConn.Write will exit on Done and not hang
 				// done with it. MeekConn.Write will exit on Done and not hang
 				// awaiting sendBuffer.
 				// awaiting sendBuffer.
 				sendBuffer = nil
 				sendBuffer = nil
-				return 0, errors.TraceNew("meek connection has closed")
+				return 0, false, errors.TraceNew("meek connection has closed")
 			}
 			}
 		}
 		}
 
 
@@ -1558,7 +1699,7 @@ func (meek *MeekConn) relayRoundTrip(sendBuffer *bytes.Buffer) (int64, error) {
 			select {
 			select {
 			case <-meek.runCtx.Done():
 			case <-meek.runCtx.Done():
 				// Exit without retrying and without logging error.
 				// Exit without retrying and without logging error.
-				return 0, errors.Trace(err)
+				return 0, false, errors.Trace(err)
 			default:
 			default:
 			}
 			}
 			NoticeWarning("meek round trip failed: %s", err)
 			NoticeWarning("meek round trip failed: %s", err)
@@ -1568,12 +1709,14 @@ func (meek *MeekConn) relayRoundTrip(sendBuffer *bytes.Buffer) (int64, error) {
 		if err == nil {
 		if err == nil {
 
 
 			if response.StatusCode != expectedStatusCode &&
 			if response.StatusCode != expectedStatusCode &&
-				// Certain http servers return 200 OK where we expect 206, so accept that.
-				!(expectedStatusCode == http.StatusPartialContent && response.StatusCode == http.StatusOK) {
+				// Certain http servers return 200 OK where we expect 206, so
+				// accept that.
+				!(expectedStatusCode == http.StatusPartialContent &&
+					response.StatusCode == http.StatusOK) {
 
 
 				// Don't retry when the status code is incorrect
 				// Don't retry when the status code is incorrect
 				response.Body.Close()
 				response.Body.Close()
-				return 0, errors.Tracef(
+				return 0, false, errors.Tracef(
 					"unexpected status code: %d instead of %d",
 					"unexpected status code: %d instead of %d",
 					response.StatusCode, expectedStatusCode)
 					response.StatusCode, expectedStatusCode)
 			}
 			}
@@ -1594,14 +1737,55 @@ func (meek *MeekConn) relayRoundTrip(sendBuffer *bytes.Buffer) (int64, error) {
 			// buffer may be replaced; this allows meekConn.Write() to unblock
 			// buffer may be replaced; this allows meekConn.Write() to unblock
 			// and start buffering data for the next round trip while still
 			// and start buffering data for the next round trip while still
 			// reading the current round trip response.
 			// reading the current round trip response.
-			if sendBuffer != nil {
+			if sendBuffer != nil && replaceSendBuffer {
 				// Assumes signaller.AwaitClosed is called above, so
 				// Assumes signaller.AwaitClosed is called above, so
 				// sendBuffer will no longer be accessed by RoundTrip.
 				// sendBuffer will no longer be accessed by RoundTrip.
-				sendBuffer.Truncate(0)
-				meek.replaceSendBuffer(sendBuffer)
+				meek.truncateAndReplaceSendBuffer(sendBuffer)
 				sendBuffer = nil
 				sendBuffer = nil
 			}
 			}
 
 
+			if meek.responsePaddingState != nil && morePadding {
+
+				// With retries, the response payload may be read in
+				// increments. In payload padding mode, the start of the
+				// payload contains at least a padding prefix, and
+				// potentially a full padding and padding itself. morePadding
+				// remains true as long as ReceiverConsumePadding indicates
+				// that more padding bytes need to be read and consumed.
+				//
+				// ErrMeekPaddingStateImmediateEOF supports the special case
+				// where an empty payload was left empty with no padding
+				// prefix or padding at all.
+
+				readPaddingSize, more, err := meek.responsePaddingState.
+					ReceiverConsumePadding(response.Body)
+
+				if err == protocol.ErrMeekPaddingStateImmediateEOF {
+
+					// A 0 byte payload with no padding.
+
+					response.Body.Close()
+					// Round trip completed successfully
+					break
+				}
+
+				morePadding = more
+
+				// Add padding bytes read, required for the correct Range
+				// header in case of retry.
+				receivedPayloadSize += readPaddingSize
+
+				totalPaddingSize += readPaddingSize
+
+				if err != nil {
+					NoticeWarning("meek read padding failed: %v", err)
+					response.Body.Close()
+					// ...continue to retry
+					continue
+
+				}
+			}
+
 			readPayloadSize, err := meek.readPayload(response.Body)
 			readPayloadSize, err := meek.readPayload(response.Body)
 			response.Body.Close()
 			response.Body.Close()
 
 
@@ -1611,7 +1795,7 @@ func (meek *MeekConn) relayRoundTrip(sendBuffer *bytes.Buffer) (int64, error) {
 			receivedPayloadSize += readPayloadSize
 			receivedPayloadSize += readPayloadSize
 
 
 			if err != nil {
 			if err != nil {
-				NoticeWarning("meek read payload failed: %s", err)
+				NoticeWarning("meek read payload failed: %v", err)
 				// ...continue to retry
 				// ...continue to retry
 			} else {
 			} else {
 				// Round trip completed successfully
 				// Round trip completed successfully
@@ -1631,7 +1815,7 @@ func (meek *MeekConn) relayRoundTrip(sendBuffer *bytes.Buffer) (int64, error) {
 
 
 		if retries >= 1 &&
 		if retries >= 1 &&
 			(now.After(retryDeadline) || retryDeadline.Sub(now) <= retryDelay) {
 			(now.After(retryDeadline) || retryDeadline.Sub(now) <= retryDelay) {
-			return 0, errors.Trace(err)
+			return 0, false, errors.Trace(err)
 		}
 		}
 		retries += 1
 		retries += 1
 
 
@@ -1641,7 +1825,7 @@ func (meek *MeekConn) relayRoundTrip(sendBuffer *bytes.Buffer) (int64, error) {
 		case <-delayTimer.C:
 		case <-delayTimer.C:
 		case <-meek.runCtx.Done():
 		case <-meek.runCtx.Done():
 			delayTimer.Stop()
 			delayTimer.Stop()
-			return 0, errors.Trace(err)
+			return 0, false, errors.Trace(err)
 		}
 		}
 
 
 		// Increase the next delay, to back off and avoid excessive
 		// Increase the next delay, to back off and avoid excessive
@@ -1654,7 +1838,10 @@ func (meek *MeekConn) relayRoundTrip(sendBuffer *bytes.Buffer) (int64, error) {
 		}
 		}
 	}
 	}
 
 
-	return receivedPayloadSize, nil
+	paddingOnly := totalPaddingSize > 0 &&
+		receivedPayloadSize <= totalPaddingSize
+
+	return receivedPayloadSize, paddingOnly, nil
 }
 }
 
 
 // Add additional headers to the HTTP request using the same method we use for adding
 // Add additional headers to the HTTP request using the same method we use for adding
@@ -1731,6 +1918,7 @@ func makeMeekObfuscationValues(
 	meekCookieEncryptionPublicKey string,
 	meekCookieEncryptionPublicKey string,
 	meekObfuscatedKey string,
 	meekObfuscatedKey string,
 	meekObfuscatorPaddingPRNGSeed *prng.Seed,
 	meekObfuscatorPaddingPRNGSeed *prng.Seed,
+	enablePayloadPadding bool,
 	clientTunnelProtocol string,
 	clientTunnelProtocol string,
 	endPoint string,
 	endPoint string,
 
 
@@ -1747,6 +1935,7 @@ func makeMeekObfuscationValues(
 
 
 	cookieData := &protocol.MeekCookieData{
 	cookieData := &protocol.MeekCookieData{
 		MeekProtocolVersion:  MEEK_PROTOCOL_VERSION,
 		MeekProtocolVersion:  MEEK_PROTOCOL_VERSION,
+		EnablePayloadPadding: enablePayloadPadding,
 		ClientTunnelProtocol: clientTunnelProtocol,
 		ClientTunnelProtocol: clientTunnelProtocol,
 		EndPoint:             endPoint,
 		EndPoint:             endPoint,
 	}
 	}
@@ -1853,6 +2042,15 @@ func makeMeekObfuscationValues(
 		limitRequestPayloadLength = limitRequestPayloadLengthPRNG.Range(
 		limitRequestPayloadLength = limitRequestPayloadLengthPRNG.Range(
 			minLength, maxLength)
 			minLength, maxLength)
 
 
+		// In payload padding mode, the maximum request payload size is
+		// adjusted to allow for the padding prefix and at least one real
+		// payload byte.
+		if enablePayloadPadding &&
+			limitRequestPayloadLength == protocol.MeekPayloadPaddingPrefixSize {
+
+			limitRequestPayloadLength += 1
+		}
+
 		minPadding := p.Int(parameters.MeekMinTLSPadding)
 		minPadding := p.Int(parameters.MeekMinTLSPadding)
 		maxPadding := p.Int(parameters.MeekMaxTLSPadding)
 		maxPadding := p.Int(parameters.MeekMaxTLSPadding)
 
 

+ 3 - 1
psiphon/notice.go

@@ -549,9 +549,11 @@ func noticeWithDialParameters(noticeType string, dialParams *DialParameters, pos
 			args = append(args, "meekHostHeader", dialParams.MeekHostHeader)
 			args = append(args, "meekHostHeader", dialParams.MeekHostHeader)
 		}
 		}
 
 
-		// MeekTransformedHostName is meaningful when meek is used, which is when MeekDialAddress != ""
+		// These fields are meaningful when meek is used, which is when
+		// MeekDialAddress != ""
 		if dialParams.MeekDialAddress != "" {
 		if dialParams.MeekDialAddress != "" {
 			args = append(args, "meekTransformedHostName", dialParams.MeekTransformedHostName)
 			args = append(args, "meekTransformedHostName", dialParams.MeekTransformedHostName)
+			args = append(args, "meekPayloadPadding", dialParams.MeekEnablePayloadPadding)
 		}
 		}
 
 
 		if dialParams.TLSOSSHSNIServerName != "" {
 		if dialParams.TLSOSSHSNIServerName != "" {

+ 194 - 27
psiphon/server/meek.go

@@ -29,7 +29,6 @@ import (
 	"encoding/hex"
 	"encoding/hex"
 	"encoding/json"
 	"encoding/json"
 	std_errors "errors"
 	std_errors "errors"
-	"hash/crc64"
 	"io"
 	"io"
 	"io/ioutil"
 	"io/ioutil"
 	"net"
 	"net"
@@ -54,6 +53,7 @@ import (
 	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/transforms"
 	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/transforms"
 	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/values"
 	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/values"
 	lrucache "github.com/cognusion/go-cache-lru"
 	lrucache "github.com/cognusion/go-cache-lru"
+	"github.com/minio/crc64nvme"
 	"golang.org/x/crypto/nacl/box"
 	"golang.org/x/crypto/nacl/box"
 	"golang.org/x/time/rate"
 	"golang.org/x/time/rate"
 )
 )
@@ -82,6 +82,10 @@ const (
 	// when retrying a request for a partially downloaded response payload.
 	// when retrying a request for a partially downloaded response payload.
 	MEEK_PROTOCOL_VERSION_3 = 3
 	MEEK_PROTOCOL_VERSION_3 = 3
 
 
+	// Protocol version 4 add support for meek payload padding, which is
+	// enabled via the meek cookie.
+	MEEK_PROTOCOL_VERSION_4 = 4
+
 	MEEK_MAX_REQUEST_PAYLOAD_LENGTH                  = 65536
 	MEEK_MAX_REQUEST_PAYLOAD_LENGTH                  = 65536
 	MEEK_MIN_SESSION_ID_LENGTH                       = 8
 	MEEK_MIN_SESSION_ID_LENGTH                       = 8
 	MEEK_MAX_SESSION_ID_LENGTH                       = 20
 	MEEK_MAX_SESSION_ID_LENGTH                       = 20
@@ -130,7 +134,6 @@ type MeekServer struct {
 	stopBroadcast                   <-chan struct{}
 	stopBroadcast                   <-chan struct{}
 	sessionsLock                    sync.RWMutex
 	sessionsLock                    sync.RWMutex
 	sessions                        map[string]*meekSession
 	sessions                        map[string]*meekSession
-	checksumTable                   *crc64.Table
 	bufferPool                      *CachedResponseBufferPool
 	bufferPool                      *CachedResponseBufferPool
 	rateLimitLock                   sync.Mutex
 	rateLimitLock                   sync.Mutex
 	rateLimitHistory                *lrucache.Cache
 	rateLimitHistory                *lrucache.Cache
@@ -207,8 +210,6 @@ func NewMeekServer(
 		}
 		}
 	}
 	}
 
 
-	checksumTable := crc64.MakeTable(crc64.ECMA)
-
 	bufferLength := MEEK_DEFAULT_POOL_BUFFER_LENGTH
 	bufferLength := MEEK_DEFAULT_POOL_BUFFER_LENGTH
 	if support.Config.MeekCachedResponsePoolBufferSize != 0 {
 	if support.Config.MeekCachedResponsePoolBufferSize != 0 {
 		bufferLength = support.Config.MeekCachedResponsePoolBufferSize
 		bufferLength = support.Config.MeekCachedResponsePoolBufferSize
@@ -258,7 +259,6 @@ func NewMeekServer(
 		openConns:                       common.NewConns[net.Conn](),
 		openConns:                       common.NewConns[net.Conn](),
 		stopBroadcast:                   stopBroadcast,
 		stopBroadcast:                   stopBroadcast,
 		sessions:                        make(map[string]*meekSession),
 		sessions:                        make(map[string]*meekSession),
-		checksumTable:                   checksumTable,
 		bufferPool:                      bufferPool,
 		bufferPool:                      bufferPool,
 		rateLimitHistory:                rateLimitHistory,
 		rateLimitHistory:                rateLimitHistory,
 		rateLimitSignalGC:               make(chan struct{}, 1),
 		rateLimitSignalGC:               make(chan struct{}, 1),
@@ -659,6 +659,7 @@ func (server *MeekServer) ServeHTTP(responseWriter http.ResponseWriter, request
 				common.GeoIPData(*endPointGeoIPData),
 				common.GeoIPData(*endPointGeoIPData),
 				responseWriter,
 				responseWriter,
 				request)
 				request)
+
 			// Currently, TacticsServer.HandleEndPoint handles returning a 404 instead
 			// Currently, TacticsServer.HandleEndPoint handles returning a 404 instead
 			// leaving that up to server.handleError.
 			// leaving that up to server.handleError.
 			//
 			//
@@ -735,13 +736,19 @@ func (server *MeekServer) ServeHTTP(responseWriter http.ResponseWriter, request
 
 
 	// If a newer request has arrived while waiting, discard this one.
 	// If a newer request has arrived while waiting, discard this one.
 	// Do not delay processing the newest request.
 	// Do not delay processing the newest request.
-	//
+	if session.requestCount.Load() > requestNumber {
+
+		// Do not return 404 in this error case. Keep session open to allow
+		// client to retry.
+		return
+	}
+
 	// If the session expired and was deleted while this request was waiting,
 	// If the session expired and was deleted while this request was waiting,
 	// discard this request. The session is no longer valid, and the final call
 	// discard this request. The session is no longer valid, and the final call
 	// to session.cachedResponse.Reset may have already occured, so any further
 	// to session.cachedResponse.Reset may have already occured, so any further
 	// session.cachedResponse access may deplete resources (fail to refill the pool).
 	// session.cachedResponse access may deplete resources (fail to refill the pool).
-	if session.requestCount.Load() > requestNumber || session.deleted {
-		common.TerminateHTTPConnection(responseWriter, request)
+	if session.deleted {
+		server.handleError(responseWriter, request)
 		return
 		return
 	}
 	}
 
 
@@ -754,6 +761,8 @@ func (server *MeekServer) ServeHTTP(responseWriter http.ResponseWriter, request
 	// clients to resend request payloads, when retrying due to connection
 	// clients to resend request payloads, when retrying due to connection
 	// interruption, without knowing whether the server has received or
 	// interruption, without knowing whether the server has received or
 	// relayed the data.
 	// relayed the data.
+	//
+	// pumpReads also handles discarding meek request payload padding.
 
 
 	requestSize, err := session.clientConn.pumpReads(request.Body)
 	requestSize, err := session.clientConn.pumpReads(request.Body)
 	if err != nil {
 	if err != nil {
@@ -762,10 +771,9 @@ func (server *MeekServer) ServeHTTP(responseWriter http.ResponseWriter, request
 			// also, golang network error messages may contain client IP.
 			// also, golang network error messages may contain client IP.
 			log.WithTraceFields(LogFields{"error": err}).Debug("read request failed")
 			log.WithTraceFields(LogFields{"error": err}).Debug("read request failed")
 		}
 		}
-		common.TerminateHTTPConnection(responseWriter, request)
-
-		// Note: keep session open to allow client to retry
 
 
+		// Do not return 404 in this error case. Keep session open to allow
+		// client to retry.
 		return
 		return
 	}
 	}
 
 
@@ -888,10 +896,9 @@ func (server *MeekServer) ServeHTTP(responseWriter http.ResponseWriter, request
 			// also, golang network error messages may contain client IP.
 			// also, golang network error messages may contain client IP.
 			log.WithTraceFields(LogFields{"error": responseError}).Debug("write response failed")
 			log.WithTraceFields(LogFields{"error": responseError}).Debug("write response failed")
 		}
 		}
-		server.handleError(responseWriter, request)
-
-		// Note: keep session open to allow client to retry
 
 
+		// Do not return 404 in this error case. Keep session open to allow
+		// client to retry.
 		return
 		return
 	}
 	}
 }
 }
@@ -1159,13 +1166,12 @@ func (server *MeekServer) getSessionOrEndpoint(
 	// See the comment in server.LoadConfig regarding fronting provider ID
 	// See the comment in server.LoadConfig regarding fronting provider ID
 	// limitations.
 	// limitations.
 
 
-	if protocol.TunnelProtocolUsesFrontedMeek(server.listenerTunnelProtocol) &&
-		server.support.ServerTacticsParametersCache != nil {
+	p, err := server.support.ServerTacticsParametersCache.Get(geoIPData)
+	if err != nil {
+		return "", nil, nil, "", "", nil, errors.Trace(err)
+	}
 
 
-		p, err := server.support.ServerTacticsParametersCache.Get(geoIPData)
-		if err != nil {
-			return "", nil, nil, "", "", nil, errors.Trace(err)
-		}
+	if protocol.TunnelProtocolUsesFrontedMeek(server.listenerTunnelProtocol) {
 
 
 		if !p.IsNil() &&
 		if !p.IsNil() &&
 			common.Contains(
 			common.Contains(
@@ -1223,6 +1229,44 @@ func (server *MeekServer) getSessionOrEndpoint(
 
 
 	session.touch()
 	session.touch()
 
 
+	if clientSessionData.EnablePayloadPadding {
+
+		// Initialize meek payload padding when the client signals
+		// use of payload padding via the meek cookie.
+
+		if p.IsNil() {
+			return "", nil, nil, "", "", nil,
+				errors.TraceNew("unsupported payload padding")
+		}
+
+		limitTunnelProtocols := p.TunnelProtocols(
+			parameters.MeekPayloadPaddingLimitTunnelProtocols)
+		if len(limitTunnelProtocols) > 0 &&
+			!common.Contains(limitTunnelProtocols,
+				clientSessionData.ClientTunnelProtocol) {
+
+			return "", nil, nil, "", "", nil,
+				errors.TraceNew("unexpected payload padding")
+		}
+
+		session.requestPaddingState, err = protocol.NewMeekRequestPayloadPaddingState(
+			server.support.Config.MeekObfuscatedKey,
+			meekCookie.Value,
+			0.0, 0, 0)
+		if err != nil {
+			return "", nil, nil, "", "", nil, errors.Trace(err)
+		}
+		session.responsePaddingState, err = protocol.NewMeekResponsePayloadPaddingState(
+			server.support.Config.MeekObfuscatedKey,
+			meekCookie.Value,
+			p.Float(parameters.MeekPayloadPaddingServerOmitProbability),
+			p.Int(parameters.MeekPayloadPaddingServerMinSize),
+			p.Int(parameters.MeekPayloadPaddingServerMaxSize))
+		if err != nil {
+			return "", nil, nil, "", "", nil, errors.Trace(err)
+		}
+	}
+
 	// Create a new meek conn that will relay the payload
 	// Create a new meek conn that will relay the payload
 	// between meek request/responses and the tunnel server client
 	// between meek request/responses and the tunnel server client
 	// handler. The client IP is also used to initialize the
 	// handler. The client IP is also used to initialize the
@@ -2213,6 +2257,8 @@ type meekSession struct {
 	cookieName                       string
 	cookieName                       string
 	contentType                      string
 	contentType                      string
 	httpVersion                      string
 	httpVersion                      string
+	requestPaddingState              *protocol.MeekPayloadPaddingState
+	responsePaddingState             *protocol.MeekPayloadPaddingState
 }
 }
 
 
 func (session *meekSession) touch() {
 func (session *meekSession) touch() {
@@ -2284,6 +2330,8 @@ func (session *meekSession) GetMetrics() common.LogFields {
 	logFields["meek_cookie_name"] = session.cookieName
 	logFields["meek_cookie_name"] = session.cookieName
 	logFields["meek_content_type"] = session.contentType
 	logFields["meek_content_type"] = session.contentType
 	logFields["meek_server_http_version"] = session.httpVersion
 	logFields["meek_server_http_version"] = session.httpVersion
+	logFields["meek_payload_padding"] =
+		session.requestPaddingState != nil || session.responsePaddingState != nil
 	return logFields
 	return logFields
 }
 }
 
 
@@ -2460,7 +2508,10 @@ func (conn *meekConn) StopFragmenting() {
 // without a Read() immediately consuming the bytes, but there's still
 // without a Read() immediately consuming the bytes, but there's still
 // a possibility of a stall if no Read() calls are made after this
 // a possibility of a stall if no Read() calls are made after this
 // read buffer is full.
 // read buffer is full.
-// Returns the number of request bytes read.
+//
+// Returns the number of request bytes read, excluding any payload padding
+// bytes.
+//
 // Note: assumes only one concurrent call to pumpReads
 // Note: assumes only one concurrent call to pumpReads
 func (conn *meekConn) pumpReads(reader io.Reader) (int64, error) {
 func (conn *meekConn) pumpReads(reader io.Reader) (int64, error) {
 
 
@@ -2484,9 +2535,12 @@ func (conn *meekConn) pumpReads(reader io.Reader) (int64, error) {
 
 
 	// +1 allows for an explicit check for request payloads that
 	// +1 allows for an explicit check for request payloads that
 	// exceed the maximum permitted length.
 	// exceed the maximum permitted length.
-	limitReader := io.LimitReader(reader, MEEK_MAX_REQUEST_PAYLOAD_LENGTH+1)
-	n, err := readBuffer.ReadFrom(limitReader)
+	reader = io.LimitReader(reader, MEEK_MAX_REQUEST_PAYLOAD_LENGTH+1)
 
 
+	checksumWriter := crc64nvme.New()
+	reader = io.TeeReader(reader, checksumWriter)
+
+	n, err := readBuffer.ReadFrom(reader)
 	if err == nil && n == MEEK_MAX_REQUEST_PAYLOAD_LENGTH+1 {
 	if err == nil && n == MEEK_MAX_REQUEST_PAYLOAD_LENGTH+1 {
 		err = std_errors.New("invalid request payload length")
 		err = std_errors.New("invalid request payload length")
 	}
 	}
@@ -2494,7 +2548,14 @@ func (conn *meekConn) pumpReads(reader io.Reader) (int64, error) {
 	// If the request read fails, don't relay the new data. This allows
 	// If the request read fails, don't relay the new data. This allows
 	// the client to retry and resend its request payload without
 	// the client to retry and resend its request payload without
 	// interrupting/duplicating the payload flow.
 	// interrupting/duplicating the payload flow.
-	if err != nil {
+	//
+	// Also return early here, and don't update the retry checksum, when an
+	// empty payload is read. In some retry cases, the client will skip
+	// resending the payload when it knows the server received it. In payload
+	// padding mode, this handles the case when padding is omitted for an
+	// empty payload.
+
+	if err != nil || n == 0 {
 		readBuffer.Truncate(newDataOffset)
 		readBuffer.Truncate(newDataOffset)
 		conn.replaceReadBuffer(readBuffer)
 		conn.replaceReadBuffer(readBuffer)
 		return 0, errors.Trace(err)
 		return 0, errors.Trace(err)
@@ -2507,18 +2568,82 @@ func (conn *meekConn) pumpReads(reader io.Reader) (int64, error) {
 	// will not repeat. In the highly unlikely case that it does,
 	// will not repeat. In the highly unlikely case that it does,
 	// the underlying SSH connection will fail and the client
 	// the underlying SSH connection will fail and the client
 	// must reconnect.
 	// must reconnect.
+	//
+	// In payload padding mode, any padding -- prefix, header, padding
+	// itself -- is treated as part of the payload checksum; client retries
+	// will resend the same padding.
 
 
-	checksum := crc64.Checksum(
-		readBuffer.Bytes()[newDataOffset:], conn.meekServer.checksumTable)
+	checksum := checksumWriter.Sum64()
 
 
 	if conn.lastReadChecksum == nil {
 	if conn.lastReadChecksum == nil {
 		conn.lastReadChecksum = new(uint64)
 		conn.lastReadChecksum = new(uint64)
 	} else if *conn.lastReadChecksum == checksum {
 	} else if *conn.lastReadChecksum == checksum {
 		readBuffer.Truncate(newDataOffset)
 		readBuffer.Truncate(newDataOffset)
+		conn.replaceReadBuffer(readBuffer)
+		return 0, errors.Trace(err)
 	}
 	}
 
 
 	*conn.lastReadChecksum = checksum
 	*conn.lastReadChecksum = checksum
 
 
+	paddingBytesRead := int64(0)
+
+	if conn.meekSession.requestPaddingState != nil {
+
+		// In payload padding mode, any non empty request body is expected to
+		// have a padding prefix and possibly a full padding header with
+		// padding itself.
+		//
+		// At this point, the request body has been fully read without error,
+		// and any client retry repeats of the same request body have been
+		// skipped. The ReceiverConsumePadding call will unconditionally
+		// advance the padding cipher stream state, and no short reads
+		// (ErrMeekPaddingStateImmediateEOF) are expected.
+
+		var paddingReader io.Reader
+		if newDataOffset == 0 {
+
+			// Fast path: ReceiverConsumePadding consumes from the start of
+			// readBuffer.
+
+			paddingReader = readBuffer
+		} else {
+
+			// Slower path: the new payload has been appended to a non-empty
+			// readBuffer, so ReceiverConsumePadding will consume from the
+			// middle of the readBuffer and the post-padding bytes will be
+			// shifted forward. This approach doesn't require any additional
+			// buffer allocations.
+
+			paddingReader = bytes.NewReader(readBuffer.Bytes()[newDataOffset:])
+		}
+
+		paddingBytesRead, _, err = conn.meekSession.requestPaddingState.
+			ReceiverConsumePadding(paddingReader)
+		if paddingBytesRead > n {
+			err = errors.TraceNew("unexpected padding bytes read")
+		}
+		if err != nil {
+			readBuffer.Truncate(newDataOffset)
+			conn.replaceReadBuffer(readBuffer)
+			return 0, errors.Trace(err)
+		}
+
+		// Return only the actual payload size read, which is important for
+		// caller's skipExtendedTurnAround heuristic.
+		n -= paddingBytesRead
+
+		if newDataOffset > 0 {
+			// TODO: shift in the other direction, pre-newDataOffset forward,
+			// if that's fewer bytes?
+			buf := readBuffer.Bytes()
+			bufLen := readBuffer.Len()
+			paddingSize := bufLen - newDataOffset - paddingReader.(*bytes.Reader).Len()
+			copy(buf[newDataOffset:],
+				buf[newDataOffset+paddingSize:])
+			readBuffer.Truncate(bufLen - paddingSize)
+		}
+	}
+
 	conn.replaceReadBuffer(readBuffer)
 	conn.replaceReadBuffer(readBuffer)
 
 
 	return n, nil
 	return n, nil
@@ -2577,13 +2702,35 @@ func (conn *meekConn) pumpWrites(
 	for {
 	for {
 		select {
 		select {
 		case buffer := <-conn.nextWriteBuffer:
 		case buffer := <-conn.nextWriteBuffer:
+
+			if conn.meekSession.responsePaddingState != nil && n == 0 {
+
+				// When in payload padding mode, every payload has an initial padding
+				// prefix. In this case, receiving nextWriteBuffer implies
+				// there are payload bytes, so the prefix indicates no padding.
+
+				paddingHeader, err := conn.meekSession.responsePaddingState.
+					SenderGetNextPadding(false)
+				if err == nil {
+					var written int
+					written, err = writer.Write(paddingHeader)
+					n += written
+				}
+				if err != nil {
+					err = errors.Trace(err)
+					// See "always send" comment below.
+					conn.writeResult <- err
+					return n, err
+				}
+			}
+
 			written, err := writer.Write(buffer)
 			written, err := writer.Write(buffer)
 			n += written
 			n += written
 			// Assumes that writeResult won't block.
 			// Assumes that writeResult won't block.
 			// Note: always send the err to writeResult,
 			// Note: always send the err to writeResult,
 			// as the Write() caller is blocking on this.
 			// as the Write() caller is blocking on this.
+			err = errors.Trace(err)
 			conn.writeResult <- err
 			conn.writeResult <- err
-
 			if err != nil {
 			if err != nil {
 				return n, err
 				return n, err
 			}
 			}
@@ -2607,6 +2754,26 @@ func (conn *meekConn) pumpWrites(
 			timeout.Reset(conn.meekServer.turnAroundTimeout)
 			timeout.Reset(conn.meekServer.turnAroundTimeout)
 
 
 		case <-timeout.C:
 		case <-timeout.C:
+
+			if conn.meekSession.responsePaddingState != nil && n == 0 {
+
+				// When in payload padding mode, and there's no payload, add padding.
+
+				paddingHeader, err := conn.meekSession.responsePaddingState.
+					SenderGetNextPadding(true)
+				if err != nil {
+					return n, errors.Trace(err)
+				}
+
+				if len(paddingHeader) > 0 {
+					written, err := writer.Write(paddingHeader)
+					n += written
+					if err != nil {
+						return n, errors.Trace(err)
+					}
+				}
+			}
+
 			return n, nil
 			return n, nil
 
 
 		case <-conn.closeBroadcast:
 		case <-conn.closeBroadcast:

+ 76 - 3
psiphon/server/meek_test.go

@@ -183,10 +183,24 @@ func TestCachedResponse(t *testing.T) {
 }
 }
 
 
 func TestMeekResiliency(t *testing.T) {
 func TestMeekResiliency(t *testing.T) {
-	testMeekResiliency(t, nil, false)
+	testMeekResiliency(t, nil, false, false)
+}
+
+func TestMeekResiliencyWithPayloadPadding(t *testing.T) {
+	testMeekResiliency(t, nil, false, true)
 }
 }
 
 
 func TestMeekHTTPNormalizerResiliency(t *testing.T) {
 func TestMeekHTTPNormalizerResiliency(t *testing.T) {
+	testMeekHTTPNormalizerResiliency(t, false)
+}
+
+func TestMeekHTTPNormalizerResiliencyWithPayloadPadding(t *testing.T) {
+	testMeekHTTPNormalizerResiliency(t, true)
+}
+
+func testMeekHTTPNormalizerResiliency(
+	t *testing.T,
+	enablePayloadPadding bool) {
 
 
 	seed, err := prng.NewSeed()
 	seed, err := prng.NewSeed()
 	if err != nil {
 	if err != nil {
@@ -199,10 +213,14 @@ func TestMeekHTTPNormalizerResiliency(t *testing.T) {
 		ProtocolTransformSeed: seed,
 		ProtocolTransformSeed: seed,
 	}
 	}
 
 
-	testMeekResiliency(t, spec, true)
+	testMeekResiliency(t, spec, true, enablePayloadPadding)
 }
 }
 
 
-func testMeekResiliency(t *testing.T, spec *transforms.HTTPTransformerParameters, useHTTPNormalizer bool) {
+func testMeekResiliency(
+	t *testing.T,
+	spec *transforms.HTTPTransformerParameters,
+	useHTTPNormalizer bool,
+	enablePayloadPadding bool) {
 
 
 	upstreamData := make([]byte, 5*MB)
 	upstreamData := make([]byte, 5*MB)
 	_, _ = rand.Read(upstreamData)
 	_, _ = rand.Read(upstreamData)
@@ -285,6 +303,45 @@ func testMeekResiliency(t *testing.T, spec *transforms.HTTPTransformerParameters
 	}
 	}
 	mockSupport.GeoIPService, _ = NewGeoIPService([]string{})
 	mockSupport.GeoIPService, _ = NewGeoIPService([]string{})
 
 
+	// MeekServer requires a wired-up ServerTacticsParametersCache for
+	// meek payload padding. The default parameter values are used.
+
+	tacticsConfigFilename := filepath.Join(testDataDirName, "tactics_config.json")
+
+	tacticsConfigJSON := `
+    {
+      "DefaultTactics" : {
+        "TTL" : "60s",
+        "Probability" : 1.0,
+        "Parameters" : {
+        }
+      }
+    }
+    `
+	err = ioutil.WriteFile(tacticsConfigFilename, []byte(tacticsConfigJSON), 0600)
+	if err != nil {
+		t.Fatalf("error paving tactics config file: %s", err)
+	}
+
+	tacticsRequestPublicKey, tacticsRequestPrivateKey, tacticsRequestObfuscatedKey, err :=
+		tactics.GenerateKeys()
+	if err != nil {
+		t.Fatalf("error generating tactics keys: %s", err)
+	}
+
+	tacticsServer, err := tactics.NewServer(
+		nil, nil, nil,
+		tacticsConfigFilename,
+		tacticsRequestPublicKey,
+		tacticsRequestPrivateKey,
+		tacticsRequestObfuscatedKey)
+	if err != nil {
+		t.Fatalf("tactics.NewServer failed: %s", err)
+	}
+
+	mockSupport.TacticsServer = tacticsServer
+	mockSupport.ServerTacticsParametersCache = NewServerTacticsParametersCache(mockSupport)
+
 	listener, err := net.Listen("tcp", "127.0.0.1:0")
 	listener, err := net.Listen("tcp", "127.0.0.1:0")
 	if err != nil {
 	if err != nil {
 		t.Fatalf("net.Listen failed: %s", err)
 		t.Fatalf("net.Listen failed: %s", err)
@@ -380,6 +437,22 @@ func testMeekResiliency(t *testing.T, spec *transforms.HTTPTransformerParameters
 		HTTPTransformerParameters:     spec,
 		HTTPTransformerParameters:     spec,
 	}
 	}
 
 
+	if enablePayloadPadding {
+
+		p, err := mockSupport.ServerTacticsParametersCache.Get(GeoIPData{})
+		if err != nil {
+			t.Fatalf("ServerTacticsParametersCache.Get failed: %s", err)
+		}
+
+		meekConfig.EnablePayloadPadding = true
+		meekConfig.PayloadPaddingOmitProbability =
+			p.Float(parameters.MeekPayloadPaddingClientOmitProbability)
+		meekConfig.PayloadPaddingMinSize =
+			p.Int(parameters.MeekPayloadPaddingClientMinSize)
+		meekConfig.PayloadPaddingMaxSize =
+			p.Int(parameters.MeekPayloadPaddingClientMaxSize)
+	}
+
 	ctx, cancelFunc := context.WithTimeout(
 	ctx, cancelFunc := context.WithTimeout(
 		context.Background(), time.Second*5)
 		context.Background(), time.Second*5)
 	defer cancelFunc()
 	defer cancelFunc()

+ 13 - 3
psiphon/server/pb/psiphond/dial_params.pb.go

@@ -112,6 +112,7 @@ type DialParams struct {
 	UniqueCandidateEstimate           *int64                 `protobuf:"varint,86,opt,name=unique_candidate_estimate,json=uniqueCandidateEstimate,proto3,oneof" json:"unique_candidate_estimate,omitempty"`
 	UniqueCandidateEstimate           *int64                 `protobuf:"varint,86,opt,name=unique_candidate_estimate,json=uniqueCandidateEstimate,proto3,oneof" json:"unique_candidate_estimate,omitempty"`
 	CandidatesMovedToFront            *int64                 `protobuf:"varint,87,opt,name=candidates_moved_to_front,json=candidatesMovedToFront,proto3,oneof" json:"candidates_moved_to_front,omitempty"`
 	CandidatesMovedToFront            *int64                 `protobuf:"varint,87,opt,name=candidates_moved_to_front,json=candidatesMovedToFront,proto3,oneof" json:"candidates_moved_to_front,omitempty"`
 	FirstFrontedMeekCandidate         *int64                 `protobuf:"varint,88,opt,name=first_fronted_meek_candidate,json=firstFrontedMeekCandidate,proto3,oneof" json:"first_fronted_meek_candidate,omitempty"`
 	FirstFrontedMeekCandidate         *int64                 `protobuf:"varint,88,opt,name=first_fronted_meek_candidate,json=firstFrontedMeekCandidate,proto3,oneof" json:"first_fronted_meek_candidate,omitempty"`
+	MeekPayloadPadding                *bool                  `protobuf:"varint,89,opt,name=meek_payload_padding,json=meekPayloadPadding,proto3,oneof" json:"meek_payload_padding,omitempty"`
 	unknownFields                     protoimpl.UnknownFields
 	unknownFields                     protoimpl.UnknownFields
 	sizeCache                         protoimpl.SizeCache
 	sizeCache                         protoimpl.SizeCache
 }
 }
@@ -762,11 +763,18 @@ func (x *DialParams) GetFirstFrontedMeekCandidate() int64 {
 	return 0
 	return 0
 }
 }
 
 
+func (x *DialParams) GetMeekPayloadPadding() bool {
+	if x != nil && x.MeekPayloadPadding != nil {
+		return *x.MeekPayloadPadding
+	}
+	return false
+}
+
 var File_ca_psiphon_psiphond_dial_params_proto protoreflect.FileDescriptor
 var File_ca_psiphon_psiphond_dial_params_proto protoreflect.FileDescriptor
 
 
 const file_ca_psiphon_psiphond_dial_params_proto_rawDesc = "" +
 const file_ca_psiphon_psiphond_dial_params_proto_rawDesc = "" +
 	"\n" +
 	"\n" +
-	"%ca.psiphon.psiphond/dial_params.proto\x12\x13ca.psiphon.psiphond\x1a\x1fgoogle/protobuf/timestamp.proto\"\xb54\n" +
+	"%ca.psiphon.psiphond/dial_params.proto\x12\x13ca.psiphon.psiphond\x1a\x1fgoogle/protobuf/timestamp.proto\"\x855\n" +
 	"\n" +
 	"\n" +
 	"DialParams\x12*\n" +
 	"DialParams\x12*\n" +
 	"\x0econjure_cached\x18\x01 \x01(\bH\x00R\rconjureCached\x88\x01\x01\x12(\n" +
 	"\x0econjure_cached\x18\x01 \x01(\bH\x00R\rconjureCached\x88\x01\x01\x12(\n" +
@@ -866,7 +874,8 @@ const file_ca_psiphon_psiphond_dial_params_proto_rawDesc = "" +
 	"\x0fdsl_prioritized\x18U \x01(\bHSR\x0edslPrioritized\x88\x01\x01\x12?\n" +
 	"\x0fdsl_prioritized\x18U \x01(\bHSR\x0edslPrioritized\x88\x01\x01\x12?\n" +
 	"\x19unique_candidate_estimate\x18V \x01(\x03HTR\x17uniqueCandidateEstimate\x88\x01\x01\x12>\n" +
 	"\x19unique_candidate_estimate\x18V \x01(\x03HTR\x17uniqueCandidateEstimate\x88\x01\x01\x12>\n" +
 	"\x19candidates_moved_to_front\x18W \x01(\x03HUR\x16candidatesMovedToFront\x88\x01\x01\x12D\n" +
 	"\x19candidates_moved_to_front\x18W \x01(\x03HUR\x16candidatesMovedToFront\x88\x01\x01\x12D\n" +
-	"\x1cfirst_fronted_meek_candidate\x18X \x01(\x03HVR\x19firstFrontedMeekCandidate\x88\x01\x01B\x11\n" +
+	"\x1cfirst_fronted_meek_candidate\x18X \x01(\x03HVR\x19firstFrontedMeekCandidate\x88\x01\x01\x125\n" +
+	"\x14meek_payload_padding\x18Y \x01(\bHWR\x12meekPayloadPadding\x88\x01\x01B\x11\n" +
 	"\x0f_conjure_cachedB\x10\n" +
 	"\x0f_conjure_cachedB\x10\n" +
 	"\x0e_conjure_delayB\x17\n" +
 	"\x0e_conjure_delayB\x17\n" +
 	"\x15_conjure_empty_packetB\x12\n" +
 	"\x15_conjure_empty_packetB\x12\n" +
@@ -955,7 +964,8 @@ const file_ca_psiphon_psiphond_dial_params_proto_rawDesc = "" +
 	"\x10_dsl_prioritizedB\x1c\n" +
 	"\x10_dsl_prioritizedB\x1c\n" +
 	"\x1a_unique_candidate_estimateB\x1c\n" +
 	"\x1a_unique_candidate_estimateB\x1c\n" +
 	"\x1a_candidates_moved_to_frontB\x1f\n" +
 	"\x1a_candidates_moved_to_frontB\x1f\n" +
-	"\x1d_first_fronted_meek_candidateBHZFgithub.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/server/pb/psiphondb\x06proto3"
+	"\x1d_first_fronted_meek_candidateB\x17\n" +
+	"\x15_meek_payload_paddingBHZFgithub.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/server/pb/psiphondb\x06proto3"
 
 
 var (
 var (
 	file_ca_psiphon_psiphond_dial_params_proto_rawDescOnce sync.Once
 	file_ca_psiphon_psiphond_dial_params_proto_rawDescOnce sync.Once

+ 1 - 0
psiphon/server/proto/ca.psiphon.psiphond/dial_params.proto

@@ -95,4 +95,5 @@ message DialParams {
     optional int64 unique_candidate_estimate = 86;
     optional int64 unique_candidate_estimate = 86;
     optional int64 candidates_moved_to_front = 87;
     optional int64 candidates_moved_to_front = 87;
     optional int64 first_fronted_meek_candidate = 88;
     optional int64 first_fronted_meek_candidate = 88;
+    optional bool meek_payload_padding = 89;
 }
 }

+ 67 - 36
psiphon/server/server_test.go

@@ -545,7 +545,7 @@ func TestHotReload(t *testing.T) {
 func TestHotReloadWithTactics(t *testing.T) {
 func TestHotReloadWithTactics(t *testing.T) {
 	runServer(t,
 	runServer(t,
 		&runServerConfig{
 		&runServerConfig{
-			tunnelProtocol:       "UNFRONTED-MEEK-OSSH",
+			tunnelProtocol:       "UNFRONTED-MEEK-HTTPS-OSSH",
 			doHotReload:          true,
 			doHotReload:          true,
 			requireAuthorization: true,
 			requireAuthorization: true,
 			doTunneledWebRequest: true,
 			doTunneledWebRequest: true,
@@ -846,6 +846,11 @@ func runServer(t *testing.T, runConfig *runServerConfig) {
 	psiphonServerIPAddress := "127.0.0.1"
 	psiphonServerIPAddress := "127.0.0.1"
 	psiphonServerPort := 4000
 	psiphonServerPort := 4000
 
 
+	clientTunnelProtocol := runConfig.tunnelProtocol
+	if runConfig.clientTunnelProtocol != "" {
+		clientTunnelProtocol = runConfig.clientTunnelProtocol
+	}
+
 	// initialize server entry signing
 	// initialize server entry signing
 
 
 	serverEntrySignaturePublicKey,
 	serverEntrySignaturePublicKey,
@@ -941,13 +946,19 @@ func runServer(t *testing.T, runConfig *runServerConfig) {
 
 
 	authorizationIDStr := base64.StdEncoding.EncodeToString(authorizationID)
 	authorizationIDStr := base64.StdEncoding.EncodeToString(authorizationID)
 
 
+	// Flip a coin to exercise meek payload padding. With many Test*Meek
+	// cases, both padding enabled and not enabled will get exercised.
+
+	doMeekPayloadPadding := protocol.TunnelProtocolUsesMeek(clientTunnelProtocol) &&
+		prng.FlipCoin()
+
 	// Enable tactics when the test protocol is meek or uses inproxy. Both the
 	// Enable tactics when the test protocol is meek or uses inproxy. Both the
 	// client and the server will be configured to support tactics. The
 	// client and the server will be configured to support tactics. The
 	// client config will be set with a nonfunctional config so that the
 	// client config will be set with a nonfunctional config so that the
 	// tactics request must succeed, overriding the nonfunctional values, for
 	// tactics request must succeed, overriding the nonfunctional values, for
 	// the tunnel to establish.
 	// the tunnel to establish.
 
 
-	doClientTactics := protocol.TunnelProtocolUsesMeek(runConfig.tunnelProtocol) ||
+	doClientTactics := protocol.TunnelProtocolSupportsTactics(clientTunnelProtocol) ||
 		doInproxy
 		doInproxy
 
 
 	doServerTactics := doClientTactics ||
 	doServerTactics := doClientTactics ||
@@ -955,7 +966,8 @@ func runServer(t *testing.T, runConfig *runServerConfig) {
 		runConfig.forceFragmenting ||
 		runConfig.forceFragmenting ||
 		runConfig.doBurstMonitor ||
 		runConfig.doBurstMonitor ||
 		runConfig.doASNDestBytes ||
 		runConfig.doASNDestBytes ||
-		runConfig.doTunneledDomainRequest
+		runConfig.doTunneledDomainRequest ||
+		doMeekPayloadPadding
 
 
 	// All servers require a tactics config with valid keys.
 	// All servers require a tactics config with valid keys.
 	tacticsRequestPublicKey, tacticsRequestPrivateKey, tacticsRequestObfuscatedKey, err :=
 	tacticsRequestPublicKey, tacticsRequestPrivateKey, tacticsRequestObfuscatedKey, err :=
@@ -1079,7 +1091,6 @@ func runServer(t *testing.T, runConfig *runServerConfig) {
 		livenessTestSize)
 		livenessTestSize)
 
 
 	var tacticsConfigFilename string
 	var tacticsConfigFilename string
-	var tacticsTunnelProtocol string
 	var inproxyTacticsParametersJSON string
 	var inproxyTacticsParametersJSON string
 
 
 	// Only pave the tactics config when tactics are required. This exercises the
 	// Only pave the tactics config when tactics are required. This exercises the
@@ -1087,12 +1098,6 @@ func runServer(t *testing.T, runConfig *runServerConfig) {
 	if doServerTactics {
 	if doServerTactics {
 		tacticsConfigFilename = filepath.Join(testDataDirName, "tactics_config.json")
 		tacticsConfigFilename = filepath.Join(testDataDirName, "tactics_config.json")
 
 
-		if runConfig.clientTunnelProtocol != "" {
-			tacticsTunnelProtocol = runConfig.clientTunnelProtocol
-		} else {
-			tacticsTunnelProtocol = runConfig.tunnelProtocol
-		}
-
 		if doInproxy {
 		if doInproxy {
 			inproxyTacticsParametersJSON = inproxyTestConfig.tacticsParametersJSON
 			inproxyTacticsParametersJSON = inproxyTestConfig.tacticsParametersJSON
 		}
 		}
@@ -1103,7 +1108,7 @@ func runServer(t *testing.T, runConfig *runServerConfig) {
 			tacticsRequestPublicKey,
 			tacticsRequestPublicKey,
 			tacticsRequestPrivateKey,
 			tacticsRequestPrivateKey,
 			tacticsRequestObfuscatedKey,
 			tacticsRequestObfuscatedKey,
-			tacticsTunnelProtocol,
+			clientTunnelProtocol,
 			propagationChannelID,
 			propagationChannelID,
 			livenessTestSize,
 			livenessTestSize,
 			runConfig.doBurstMonitor,
 			runConfig.doBurstMonitor,
@@ -1535,7 +1540,7 @@ func runServer(t *testing.T, runConfig *runServerConfig) {
 				tacticsRequestPublicKey,
 				tacticsRequestPublicKey,
 				tacticsRequestPrivateKey,
 				tacticsRequestPrivateKey,
 				tacticsRequestObfuscatedKey,
 				tacticsRequestObfuscatedKey,
-				tacticsTunnelProtocol,
+				clientTunnelProtocol,
 				propagationChannelID,
 				propagationChannelID,
 				livenessTestSize,
 				livenessTestSize,
 				runConfig.doBurstMonitor,
 				runConfig.doBurstMonitor,
@@ -1600,11 +1605,6 @@ func runServer(t *testing.T, runConfig *runServerConfig) {
 
 
 	testClientFeaturesJSON, _ := json.Marshal(testClientFeatures)
 	testClientFeaturesJSON, _ := json.Marshal(testClientFeatures)
 
 
-	clientTunnelProtocol := runConfig.tunnelProtocol
-	if runConfig.clientTunnelProtocol != "" {
-		clientTunnelProtocol = runConfig.clientTunnelProtocol
-	}
-
 	clientConfigJSON := fmt.Sprintf(`
 	clientConfigJSON := fmt.Sprintf(`
     {
     {
         "ClientVersion" : "%d",
         "ClientVersion" : "%d",
@@ -1682,13 +1682,23 @@ func runServer(t *testing.T, runConfig *runServerConfig) {
 	// When using TLS-OSSH the test expects the server to log the fields
 	// When using TLS-OSSH the test expects the server to log the fields
 	// tls_ossh_sni_server_name and tls_ossh_transformed_host_name, which are
 	// tls_ossh_sni_server_name and tls_ossh_transformed_host_name, which are
 	// only shipped by the client when the host name is transformed.
 	// only shipped by the client when the host name is transformed.
-	if protocol.TunnelProtocolUsesTLSOSSH(clientTunnelProtocol) {
+	//
+	// Also a workaround for UNFRONTED-MEEK-HTTP not supporting tactics requests.
+	// See limitation documented in protocol.TunnelProtocolSupportsTactics.
+	if protocol.TunnelProtocolUsesTLSOSSH(clientTunnelProtocol) ||
+		protocol.TunnelProtocolUsesMeekHTTPNormalizer(clientTunnelProtocol) {
+
 		transformHostNameProbability := 1.0
 		transformHostNameProbability := 1.0
 		clientConfig.TransformHostNameProbability = &transformHostNameProbability
 		clientConfig.TransformHostNameProbability = &transformHostNameProbability
 		clientConfig.CustomHostNameRegexes = []string{testCustomHostNameRegex}
 		clientConfig.CustomHostNameRegexes = []string{testCustomHostNameRegex}
 		customHostNameProbability := 1.0
 		customHostNameProbability := 1.0
 		clientConfig.CustomHostNameProbability = &customHostNameProbability
 		clientConfig.CustomHostNameProbability = &customHostNameProbability
 		clientConfig.CustomHostNameLimitProtocols = []string{clientTunnelProtocol}
 		clientConfig.CustomHostNameLimitProtocols = []string{clientTunnelProtocol}
+
+		if protocol.TunnelProtocolUsesMeekHTTPNormalizer(clientTunnelProtocol) {
+			pickUserAgentProbability := 1.0
+			clientConfig.PickUserAgentProbability = &pickUserAgentProbability
+		}
 	}
 	}
 
 
 	if runConfig.doSteeringIP {
 	if runConfig.doSteeringIP {
@@ -1767,6 +1777,11 @@ func runServer(t *testing.T, runConfig *runServerConfig) {
 		clientConfig.CompressTactics = &compressTactics
 		clientConfig.CompressTactics = &compressTactics
 	}
 	}
 
 
+	if doMeekPayloadPadding {
+		payloadPaddingProbability := 1.0
+		clientConfig.MeekPayloadPaddingProbability = &payloadPaddingProbability
+	}
+
 	err = clientConfig.Commit(false)
 	err = clientConfig.Commit(false)
 	if err != nil {
 	if err != nil {
 		t.Fatalf("error committing configuration file: %s", err)
 		t.Fatalf("error committing configuration file: %s", err)
@@ -2130,7 +2145,7 @@ func runServer(t *testing.T, runConfig *runServerConfig) {
 			tacticsRequestPublicKey,
 			tacticsRequestPublicKey,
 			tacticsRequestPrivateKey,
 			tacticsRequestPrivateKey,
 			tacticsRequestObfuscatedKey,
 			tacticsRequestObfuscatedKey,
-			tacticsTunnelProtocol,
+			clientTunnelProtocol,
 			propagationChannelID,
 			propagationChannelID,
 			livenessTestSize,
 			livenessTestSize,
 			runConfig.doBurstMonitor,
 			runConfig.doBurstMonitor,
@@ -2331,6 +2346,7 @@ func runServer(t *testing.T, runConfig *runServerConfig) {
 		expectServerEntryCount = 2 * protocol.ServerEntryCountRoundingIncrement
 		expectServerEntryCount = 2 * protocol.ServerEntryCountRoundingIncrement
 	}
 	}
 	expectDSLPrioritized := doDSL
 	expectDSLPrioritized := doDSL
+	expectMeekPayloadPadding := doMeekPayloadPadding
 
 
 	// The client still reports domain_bytes up when no port forwards are
 	// The client still reports domain_bytes up when no port forwards are
 	// allowed (expectTrafficFailure).
 	// allowed (expectTrafficFailure).
@@ -2366,6 +2382,7 @@ func runServer(t *testing.T, runConfig *runServerConfig) {
 			expectCheckServerEntryPruneCount,
 			expectCheckServerEntryPruneCount,
 			expectServerEntryCount,
 			expectServerEntryCount,
 			expectDSLPrioritized,
 			expectDSLPrioritized,
+			expectMeekPayloadPadding,
 			inproxyTestConfig,
 			inproxyTestConfig,
 			logFields)
 			logFields)
 		if err != nil {
 		if err != nil {
@@ -2861,6 +2878,7 @@ func checkExpectedServerTunnelLogFields(
 	expectCheckServerEntryPruneCount int,
 	expectCheckServerEntryPruneCount int,
 	expectServerEntryCount int,
 	expectServerEntryCount int,
 	expectDSLPrioritized bool,
 	expectDSLPrioritized bool,
+	expectMeekPayloadPadding bool,
 	inproxyTestConfig *inproxyTestConfig,
 	inproxyTestConfig *inproxyTestConfig,
 	fields map[string]interface{}) error {
 	fields map[string]interface{}) error {
 
 
@@ -2952,15 +2970,15 @@ func checkExpectedServerTunnelLogFields(
 		}
 		}
 	}
 	}
 
 
-	tunnelProtocol := runConfig.tunnelProtocol
+	clientTunnelProtocol := runConfig.tunnelProtocol
 	if runConfig.clientTunnelProtocol != "" {
 	if runConfig.clientTunnelProtocol != "" {
 		// In cases such as UNFRONTED-HTTPS-OSSH/TLS-OSSH demux,
 		// In cases such as UNFRONTED-HTTPS-OSSH/TLS-OSSH demux,
 		// runConfig.tunnelProtocol is the server listening protocol and
 		// runConfig.tunnelProtocol is the server listening protocol and
 		// runConfig.clientTunnelProtocol is the actual tunnel protocol.
 		// runConfig.clientTunnelProtocol is the actual tunnel protocol.
-		tunnelProtocol = runConfig.clientTunnelProtocol
+		clientTunnelProtocol = runConfig.clientTunnelProtocol
 	}
 	}
 
 
-	if fields["relay_protocol"].(string) != tunnelProtocol {
+	if fields["relay_protocol"].(string) != clientTunnelProtocol {
 		return fmt.Errorf("unexpected relay_protocol '%s'", fields["relay_protocol"])
 		return fmt.Errorf("unexpected relay_protocol '%s'", fields["relay_protocol"])
 	}
 	}
 
 
@@ -3082,7 +3100,7 @@ func checkExpectedServerTunnelLogFields(
 		}
 		}
 	}
 	}
 
 
-	if protocol.TunnelProtocolUsesObfuscatedSSH(tunnelProtocol) {
+	if protocol.TunnelProtocolUsesObfuscatedSSH(clientTunnelProtocol) {
 
 
 		for _, name := range []string{
 		for _, name := range []string{
 			"padding",
 			"padding",
@@ -3094,7 +3112,7 @@ func checkExpectedServerTunnelLogFields(
 		}
 		}
 	}
 	}
 
 
-	if protocol.TunnelProtocolUsesMeek(tunnelProtocol) {
+	if protocol.TunnelProtocolUsesMeek(clientTunnelProtocol) {
 
 
 		for _, name := range []string{
 		for _, name := range []string{
 			"user_agent",
 			"user_agent",
@@ -3118,7 +3136,7 @@ func checkExpectedServerTunnelLogFields(
 		}
 		}
 	}
 	}
 
 
-	if protocol.TunnelProtocolUsesMeekHTTP(tunnelProtocol) {
+	if protocol.TunnelProtocolUsesMeekHTTP(clientTunnelProtocol) {
 
 
 		for _, name := range []string{
 		for _, name := range []string{
 			"meek_host_header",
 			"meek_host_header",
@@ -3137,7 +3155,7 @@ func checkExpectedServerTunnelLogFields(
 			return fmt.Errorf("unexpected meek_host_header '%s'", fields["meek_host_header"])
 			return fmt.Errorf("unexpected meek_host_header '%s'", fields["meek_host_header"])
 		}
 		}
 
 
-		if !protocol.TunnelProtocolUsesFrontedMeek(tunnelProtocol) {
+		if !protocol.TunnelProtocolUsesFrontedMeek(clientTunnelProtocol) {
 			for _, name := range []string{
 			for _, name := range []string{
 				"meek_dial_ip_address",
 				"meek_dial_ip_address",
 				"meek_resolved_ip_address",
 				"meek_resolved_ip_address",
@@ -3149,7 +3167,7 @@ func checkExpectedServerTunnelLogFields(
 		}
 		}
 	}
 	}
 
 
-	if protocol.TunnelProtocolUsesMeekHTTPS(tunnelProtocol) {
+	if protocol.TunnelProtocolUsesMeekHTTPS(clientTunnelProtocol) {
 
 
 		for _, name := range []string{
 		for _, name := range []string{
 			"meek_tls_padding",
 			"meek_tls_padding",
@@ -3165,7 +3183,7 @@ func checkExpectedServerTunnelLogFields(
 			return fmt.Errorf("unexpected meek_sni_server_name '%s'", fields["meek_sni_server_name"])
 			return fmt.Errorf("unexpected meek_sni_server_name '%s'", fields["meek_sni_server_name"])
 		}
 		}
 
 
-		if !protocol.TunnelProtocolUsesFrontedMeek(tunnelProtocol) {
+		if !protocol.TunnelProtocolUsesFrontedMeek(clientTunnelProtocol) {
 			for _, name := range []string{
 			for _, name := range []string{
 				"meek_dial_ip_address",
 				"meek_dial_ip_address",
 				"meek_resolved_ip_address",
 				"meek_resolved_ip_address",
@@ -3179,7 +3197,7 @@ func checkExpectedServerTunnelLogFields(
 	}
 	}
 
 
 	name := "first_fronted_meek_candidate"
 	name := "first_fronted_meek_candidate"
-	if protocol.TunnelProtocolUsesFrontedMeek(tunnelProtocol) {
+	if protocol.TunnelProtocolUsesFrontedMeek(clientTunnelProtocol) {
 		if fields[name] == nil {
 		if fields[name] == nil {
 			return fmt.Errorf("missing expected %s", name)
 			return fmt.Errorf("missing expected %s", name)
 		}
 		}
@@ -3189,8 +3207,8 @@ func checkExpectedServerTunnelLogFields(
 		}
 		}
 	}
 	}
 
 
-	if protocol.TunnelProtocolUsesMeekHTTPS(tunnelProtocol) ||
-		protocol.TunnelProtocolUsesTLSOSSH(tunnelProtocol) {
+	if protocol.TunnelProtocolUsesMeekHTTPS(clientTunnelProtocol) ||
+		protocol.TunnelProtocolUsesTLSOSSH(clientTunnelProtocol) {
 
 
 		for _, name := range []string{
 		for _, name := range []string{
 			"tls_profile",
 			"tls_profile",
@@ -3214,7 +3232,7 @@ func checkExpectedServerTunnelLogFields(
 		}
 		}
 	}
 	}
 
 
-	if protocol.TunnelProtocolUsesTLSOSSH(tunnelProtocol) {
+	if protocol.TunnelProtocolUsesTLSOSSH(clientTunnelProtocol) {
 		for _, name := range []string{
 		for _, name := range []string{
 			"tls_padding",
 			"tls_padding",
 			"tls_ossh_sni_server_name",
 			"tls_ossh_sni_server_name",
@@ -3231,7 +3249,7 @@ func checkExpectedServerTunnelLogFields(
 		}
 		}
 	}
 	}
 
 
-	if protocol.TunnelProtocolUsesQUIC(tunnelProtocol) {
+	if protocol.TunnelProtocolUsesQUIC(clientTunnelProtocol) {
 
 
 		for _, name := range []string{
 		for _, name := range []string{
 			"quic_version",
 			"quic_version",
@@ -3254,7 +3272,7 @@ func checkExpectedServerTunnelLogFields(
 		}
 		}
 	}
 	}
 
 
-	if protocol.TunnelProtocolUsesInproxy(tunnelProtocol) {
+	if protocol.TunnelProtocolUsesInproxy(clientTunnelProtocol) {
 
 
 		for _, name := range []string{
 		for _, name := range []string{
 
 
@@ -3385,11 +3403,11 @@ func checkExpectedServerTunnelLogFields(
 
 
 	if runConfig.applyPrefix {
 	if runConfig.applyPrefix {
 
 
-		if protocol.TunnelProtocolIsObfuscatedSSH(runConfig.tunnelProtocol) {
+		if protocol.TunnelProtocolIsObfuscatedSSH(clientTunnelProtocol) {
 			if fields["ossh_prefix"] == nil || fmt.Sprintf("%s", fields["ossh_prefix"]) == "" {
 			if fields["ossh_prefix"] == nil || fmt.Sprintf("%s", fields["ossh_prefix"]) == "" {
 				return fmt.Errorf("missing expected field 'ossh_prefix'")
 				return fmt.Errorf("missing expected field 'ossh_prefix'")
 			}
 			}
-		} else if protocol.TunnelProtocolUsesShadowsocks(runConfig.tunnelProtocol) {
+		} else if protocol.TunnelProtocolUsesShadowsocks(clientTunnelProtocol) {
 			if fields["shadowsocks_prefix"] == nil || fmt.Sprintf("%s", fields["shadowsocks_prefix"]) == "" {
 			if fields["shadowsocks_prefix"] == nil || fmt.Sprintf("%s", fields["shadowsocks_prefix"]) == "" {
 				return fmt.Errorf("missing expected field 'shadowsocks_prefix'")
 				return fmt.Errorf("missing expected field 'shadowsocks_prefix'")
 			}
 			}
@@ -3588,6 +3606,17 @@ func checkExpectedServerTunnelLogFields(
 		return fmt.Errorf("unexpected dsl_prioritized %v", fields["dsl_prioritized"])
 		return fmt.Errorf("unexpected dsl_prioritized %v", fields["dsl_prioritized"])
 	}
 	}
 
 
+	if protocol.TunnelProtocolUsesMeek(clientTunnelProtocol) {
+		if fields["meek_payload_padding"] != expectMeekPayloadPadding {
+			return fmt.Errorf("unexpected meek_payload_padding %v", fields["meek_payload_padding"])
+		}
+	} else {
+		name := "meek_payload_padding"
+		if fields[name] != nil {
+			return fmt.Errorf("unexpected field '%s'", name)
+		}
+	}
+
 	return nil
 	return nil
 }
 }
 
 
@@ -4375,6 +4404,8 @@ func paveTacticsConfigFile(
           "EnableDSLFetcher": %s,
           "EnableDSLFetcher": %s,
           "DSLPrioritizeDialNewServerEntryProbability" : 1.0,
           "DSLPrioritizeDialNewServerEntryProbability" : 1.0,
           "DSLPrioritizeDialExistingServerEntryProbability" : 1.0,
           "DSLPrioritizeDialExistingServerEntryProbability" : 1.0,
+          "MeekPayloadPaddingClientOmitProbability" : 0.5,
+          "MeekPayloadPaddingServerOmitProbability" : 0.5,
           "EstablishTunnelWorkTime" : "1s"
           "EstablishTunnelWorkTime" : "1s"
         }
         }
       },
       },

+ 4 - 1
psiphon/serverApi.go

@@ -1212,14 +1212,17 @@ func getBaseAPIParameters(
 			params["meek_host_header"] = dialParams.MeekHostHeader
 			params["meek_host_header"] = dialParams.MeekHostHeader
 		}
 		}
 
 
-		// MeekTransformedHostName is meaningful when meek is used, which is when
+		// These fields are meaningful when meek is used, which is when
 		// MeekDialAddress != ""
 		// MeekDialAddress != ""
 		if dialParams.MeekDialAddress != "" {
 		if dialParams.MeekDialAddress != "" {
+
 			transformedHostName := "0"
 			transformedHostName := "0"
 			if dialParams.MeekTransformedHostName {
 			if dialParams.MeekTransformedHostName {
 				transformedHostName = "1"
 				transformedHostName = "1"
 			}
 			}
 			params["meek_transformed_host_name"] = transformedHostName
 			params["meek_transformed_host_name"] = transformedHostName
+
+			// meek_payload_padding is logged by the server
 		}
 		}
 
 
 		if dialParams.TLSOSSHSNIServerName != "" {
 		if dialParams.TLSOSSHSNIServerName != "" {

+ 24 - 0
vendor/github.com/klauspost/cpuid/v2/.gitignore

@@ -0,0 +1,24 @@
+# Compiled Object files, Static and Dynamic libs (Shared Objects)
+*.o
+*.a
+*.so
+
+# Folders
+_obj
+_test
+
+# Architecture specific extensions/prefixes
+*.[568vq]
+[568vq].out
+
+*.cgo1.go
+*.cgo2.c
+_cgo_defun.c
+_cgo_gotypes.go
+_cgo_export.*
+
+_testmain.go
+
+*.exe
+*.test
+*.prof

+ 74 - 0
vendor/github.com/klauspost/cpuid/v2/.goreleaser.yml

@@ -0,0 +1,74 @@
+# This is an example goreleaser.yaml file with some sane defaults.
+# Make sure to check the documentation at http://goreleaser.com
+
+builds:
+  -
+    id: "cpuid"
+    binary: cpuid
+    main: ./cmd/cpuid/main.go
+    env:
+      - CGO_ENABLED=0
+    flags:
+      - -ldflags=-s -w
+    goos:
+      - aix
+      - linux
+      - freebsd
+      - netbsd
+      - windows
+      - darwin
+    goarch:
+      - 386
+      - amd64
+      - arm64
+    goarm:
+      - 7
+
+archives:
+  -
+    id: cpuid
+    name_template: "cpuid-{{ .Os }}_{{ .Arch }}_{{ .Version }}"
+    replacements:
+      aix: AIX
+      darwin: OSX
+      linux: Linux
+      windows: Windows
+      386: i386
+      amd64: x86_64
+      freebsd: FreeBSD
+      netbsd: NetBSD
+    format_overrides:
+      - goos: windows
+        format: zip
+    files:
+      - LICENSE
+checksum:
+  name_template: 'checksums.txt'
+snapshot:
+  name_template: "{{ .Tag }}-next"
+changelog:
+  sort: asc
+  filters:
+    exclude:
+    - '^doc:'
+    - '^docs:'
+    - '^test:'
+    - '^tests:'
+    - '^Update\sREADME.md'
+
+nfpms:
+  -
+    file_name_template: "cpuid_package_{{ .Version }}_{{ .Os }}_{{ .Arch }}"
+    vendor: Klaus Post
+    homepage: https://github.com/klauspost/cpuid
+    maintainer: Klaus Post <klauspost@gmail.com>
+    description: CPUID Tool
+    license: BSD 3-Clause
+    formats:
+      - deb
+      - rpm
+    replacements:
+      darwin: Darwin
+      linux: Linux
+      freebsd: FreeBSD
+      amd64: x86_64

+ 35 - 0
vendor/github.com/klauspost/cpuid/v2/CONTRIBUTING.txt

@@ -0,0 +1,35 @@
+Developer Certificate of Origin
+Version 1.1
+
+Copyright (C) 2015- Klaus Post & Contributors.
+Email: klauspost@gmail.com
+
+Everyone is permitted to copy and distribute verbatim copies of this
+license document, but changing it is not allowed.
+
+
+Developer's Certificate of Origin 1.1
+
+By making a contribution to this project, I certify that:
+
+(a) The contribution was created in whole or in part by me and I
+    have the right to submit it under the open source license
+    indicated in the file; or
+
+(b) The contribution is based upon previous work that, to the best
+    of my knowledge, is covered under an appropriate open source
+    license and I have the right under that license to submit that
+    work with modifications, whether created in whole or in part
+    by me, under the same open source license (unless I am
+    permitted to submit under a different license), as indicated
+    in the file; or
+
+(c) The contribution was provided directly to me by some other
+    person who certified (a), (b) or (c) and I have not modified
+    it.
+
+(d) I understand and agree that this project and the contribution
+    are public and that a record of the contribution (including all
+    personal information I submit with it, including my sign-off) is
+    maintained indefinitely and may be redistributed consistent with
+    this project or the open source license(s) involved.

+ 22 - 0
vendor/github.com/klauspost/cpuid/v2/LICENSE

@@ -0,0 +1,22 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Klaus Post
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+

+ 499 - 0
vendor/github.com/klauspost/cpuid/v2/README.md

@@ -0,0 +1,499 @@
+# cpuid
+Package cpuid provides information about the CPU running the current program.
+
+CPU features are detected on startup, and kept for fast access through the life of the application.
+Currently x86 / x64 (AMD64/i386) and ARM (ARM64) is supported, and no external C (cgo) code is used, which should make the library very easy to use.
+
+You can access the CPU information by accessing the shared CPU variable of the cpuid library.
+
+Package home: https://github.com/klauspost/cpuid
+
+[![PkgGoDev](https://pkg.go.dev/badge/github.com/klauspost/cpuid)](https://pkg.go.dev/github.com/klauspost/cpuid/v2)
+[![Go](https://github.com/klauspost/cpuid/actions/workflows/go.yml/badge.svg)](https://github.com/klauspost/cpuid/actions/workflows/go.yml)
+
+## installing
+
+`go get -u github.com/klauspost/cpuid/v2` using modules.
+Drop `v2` for others.
+
+Installing binary:
+
+`go install github.com/klauspost/cpuid/v2/cmd/cpuid@latest`
+
+Or download binaries from release page: https://github.com/klauspost/cpuid/releases
+
+### Homebrew
+
+For macOS/Linux users, you can install via [brew](https://brew.sh/)
+
+```sh
+$ brew install cpuid
+```
+
+## example
+
+```Go
+package main
+
+import (
+	"fmt"
+	"strings"
+
+	. "github.com/klauspost/cpuid/v2"
+)
+
+func main() {
+	// Print basic CPU information:
+	fmt.Println("Name:", CPU.BrandName)
+	fmt.Println("PhysicalCores:", CPU.PhysicalCores)
+	fmt.Println("ThreadsPerCore:", CPU.ThreadsPerCore)
+	fmt.Println("LogicalCores:", CPU.LogicalCores)
+	fmt.Println("Family", CPU.Family, "Model:", CPU.Model, "Vendor ID:", CPU.VendorID)
+	fmt.Println("Features:", strings.Join(CPU.FeatureSet(), ","))
+	fmt.Println("Cacheline bytes:", CPU.CacheLine)
+	fmt.Println("L1 Data Cache:", CPU.Cache.L1D, "bytes")
+	fmt.Println("L1 Instruction Cache:", CPU.Cache.L1I, "bytes")
+	fmt.Println("L2 Cache:", CPU.Cache.L2, "bytes")
+	fmt.Println("L3 Cache:", CPU.Cache.L3, "bytes")
+	fmt.Println("Frequency", CPU.Hz, "hz")
+
+	// Test if we have these specific features:
+	if CPU.Supports(SSE, SSE2) {
+		fmt.Println("We have Streaming SIMD 2 Extensions")
+	}
+}
+```
+
+Sample output:
+```
+>go run main.go
+Name: AMD Ryzen 9 3950X 16-Core Processor
+PhysicalCores: 16
+ThreadsPerCore: 2
+LogicalCores: 32
+Family 23 Model: 113 Vendor ID: AMD
+Features: ADX,AESNI,AVX,AVX2,BMI1,BMI2,CLMUL,CMOV,CX16,F16C,FMA3,HTT,HYPERVISOR,LZCNT,MMX,MMXEXT,NX,POPCNT,RDRAND,RDSEED,RDTSCP,SHA,SSE,SSE2,SSE3,SSE4,SSE42,SSE4A,SSSE3
+Cacheline bytes: 64
+L1 Data Cache: 32768 bytes
+L1 Instruction Cache: 32768 bytes
+L2 Cache: 524288 bytes
+L3 Cache: 16777216 bytes
+Frequency 0 hz
+We have Streaming SIMD 2 Extensions
+```
+
+# usage
+
+The `cpuid.CPU` provides access to CPU features. Use `cpuid.CPU.Supports()` to check for CPU features.
+A faster `cpuid.CPU.Has()` is provided which will usually be inlined by the gc compiler.  
+
+To test a larger number of features, they can be combined using `f := CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2)`, etc.
+This can be using with `cpuid.CPU.HasAll(f)` to quickly test if all features are supported.
+
+Note that for some cpu/os combinations some features will not be detected.
+`amd64` has rather good support and should work reliably on all platforms.
+
+Note that hypervisors may not pass through all CPU features through to the guest OS,
+so even if your host supports a feature it may not be visible on guests.
+
+## arm64 feature detection
+
+Not all operating systems provide ARM features directly 
+and there is no safe way to do so for the rest.
+
+Currently `arm64/linux` and `arm64/freebsd` should be quite reliable. 
+`arm64/darwin` adds features expected from the M1 processor, but a lot remains undetected.
+
+A `DetectARM()` can be used if you are able to control your deployment,
+it will detect CPU features, but may crash if the OS doesn't intercept the calls.
+A `-cpu.arm` flag for detecting unsafe ARM features can be added. See below.
+ 
+Note that currently only features are detected on ARM, 
+no additional information is currently available. 
+
+## flags
+
+It is possible to add flags that affects cpu detection.
+
+For this the `Flags()` command is provided.
+
+This must be called *before* `flag.Parse()` AND after the flags have been parsed `Detect()` must be called.
+
+This means that any detection used in `init()` functions will not contain these flags.
+
+Example:
+
+```Go
+package main
+
+import (
+	"flag"
+	"fmt"
+	"strings"
+
+	"github.com/klauspost/cpuid/v2"
+)
+
+func main() {
+	cpuid.Flags()
+	flag.Parse()
+	cpuid.Detect()
+
+	// Test if we have these specific features:
+	if cpuid.CPU.Supports(cpuid.SSE, cpuid.SSE2) {
+		fmt.Println("We have Streaming SIMD 2 Extensions")
+	}
+}
+```
+
+## commandline
+
+Download as binary from: https://github.com/klauspost/cpuid/releases
+
+Install from source:
+
+`go install github.com/klauspost/cpuid/v2/cmd/cpuid@latest`
+
+### Example
+
+```
+λ cpuid
+Name: AMD Ryzen 9 3950X 16-Core Processor
+Vendor String: AuthenticAMD
+Vendor ID: AMD
+PhysicalCores: 16
+Threads Per Core: 2
+Logical Cores: 32
+CPU Family 23 Model: 113
+Features: ADX,AESNI,AVX,AVX2,BMI1,BMI2,CLMUL,CLZERO,CMOV,CMPXCHG8,CPBOOST,CX16,F16C,FMA3,FXSR,FXSROPT,HTT,HYPERVISOR,LAHF,LZCNT,MCAOVERFLOW,MMX,MMXEXT,MOVBE,NX,OSXSAVE,POPCNT,RDRAND,RDSEED,RDTSCP,SCE,SHA,SSE,SSE2,SSE3,SSE4,SSE42,SSE4A,SSSE3,SUCCOR,X87,XSAVE
+Microarchitecture level: 3
+Cacheline bytes: 64
+L1 Instruction Cache: 32768 bytes
+L1 Data Cache: 32768 bytes
+L2 Cache: 524288 bytes
+L3 Cache: 16777216 bytes
+
+```
+### JSON Output:
+
+```
+λ cpuid --json
+{
+  "BrandName": "AMD Ryzen 9 3950X 16-Core Processor",
+  "VendorID": 2,
+  "VendorString": "AuthenticAMD",
+  "PhysicalCores": 16,
+  "ThreadsPerCore": 2,
+  "LogicalCores": 32,
+  "Family": 23,
+  "Model": 113,
+  "CacheLine": 64,
+  "Hz": 0,
+  "BoostFreq": 0,
+  "Cache": {
+    "L1I": 32768,
+    "L1D": 32768,
+    "L2": 524288,
+    "L3": 16777216
+  },
+  "SGX": {
+    "Available": false,
+    "LaunchControl": false,
+    "SGX1Supported": false,
+    "SGX2Supported": false,
+    "MaxEnclaveSizeNot64": 0,
+    "MaxEnclaveSize64": 0,
+    "EPCSections": null
+  },
+  "Features": [
+    "ADX",
+    "AESNI",
+    "AVX",
+    "AVX2",
+    "BMI1",
+    "BMI2",
+    "CLMUL",
+    "CLZERO",
+    "CMOV",
+    "CMPXCHG8",
+    "CPBOOST",
+    "CX16",
+    "F16C",
+    "FMA3",
+    "FXSR",
+    "FXSROPT",
+    "HTT",
+    "HYPERVISOR",
+    "LAHF",
+    "LZCNT",
+    "MCAOVERFLOW",
+    "MMX",
+    "MMXEXT",
+    "MOVBE",
+    "NX",
+    "OSXSAVE",
+    "POPCNT",
+    "RDRAND",
+    "RDSEED",
+    "RDTSCP",
+    "SCE",
+    "SHA",
+    "SSE",
+    "SSE2",
+    "SSE3",
+    "SSE4",
+    "SSE42",
+    "SSE4A",
+    "SSSE3",
+    "SUCCOR",
+    "X87",
+    "XSAVE"
+  ],
+  "X64Level": 3
+}
+```
+
+### Check CPU microarch level
+
+```
+λ cpuid --check-level=3
+2022/03/18 17:04:40 AMD Ryzen 9 3950X 16-Core Processor
+2022/03/18 17:04:40 Microarchitecture level 3 is supported. Max level is 3.
+Exit Code 0
+
+λ cpuid --check-level=4
+2022/03/18 17:06:18 AMD Ryzen 9 3950X 16-Core Processor
+2022/03/18 17:06:18 Microarchitecture level 4 not supported. Max level is 3.
+Exit Code 1
+```
+
+
+## Available flags
+
+### x86 & amd64 
+
+| Feature Flag       | Description                                                                                                                                                                        |
+|--------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| ADX                | Intel ADX (Multi-Precision Add-Carry Instruction Extensions)                                                                                                                       |
+| AESNI              | Advanced Encryption Standard New Instructions                                                                                                                                      |
+| AMD3DNOW           | AMD 3DNOW                                                                                                                                                                          |
+| AMD3DNOWEXT        | AMD 3DNowExt                                                                                                                                                                       |
+| AMXBF16            | Tile computational operations on BFLOAT16 numbers                                                                                                                                  |
+| AMXINT8            | Tile computational operations on 8-bit integers                                                                                                                                    |
+| AMXFP16            | Tile computational operations on FP16 numbers                                                                                                                                      |
+| AMXFP8             | Tile computational operations on FP8 numbers                                                                                                                                      |
+| AMXTILE            | Tile architecture                                                                                                                                                                  |
+| APX_F              | Intel APX                                                                                                                                                                          |
+| AVX                | AVX functions                                                                                                                                                                      |
+| AVX10              | If set the Intel AVX10 Converged Vector ISA is supported                                                                                                                           |
+| AVX10_128          | If set indicates that AVX10 128-bit vector support is present                                                                                                                      |
+| AVX10_256          | If set indicates that AVX10 256-bit vector support is present                                                                                                                      |
+| AVX10_512          | If set indicates that AVX10 512-bit vector support is present                                                                                                                      |
+| AVX2               | AVX2 functions                                                                                                                                                                     |
+| AVX512BF16         | AVX-512 BFLOAT16 Instructions                                                                                                                                                      |
+| AVX512BITALG       | AVX-512 Bit Algorithms                                                                                                                                                             |
+| AVX512BW           | AVX-512 Byte and Word Instructions                                                                                                                                                 |
+| AVX512CD           | AVX-512 Conflict Detection Instructions                                                                                                                                            |
+| AVX512DQ           | AVX-512 Doubleword and Quadword Instructions                                                                                                                                       |
+| AVX512ER           | AVX-512 Exponential and Reciprocal Instructions                                                                                                                                    |
+| AVX512F            | AVX-512 Foundation                                                                                                                                                                 |
+| AVX512FP16         | AVX-512 FP16 Instructions                                                                                                                                                          |
+| AVX512IFMA         | AVX-512 Integer Fused Multiply-Add Instructions                                                                                                                                    |
+| AVX512PF           | AVX-512 Prefetch Instructions                                                                                                                                                      |
+| AVX512VBMI         | AVX-512 Vector Bit Manipulation Instructions                                                                                                                                       |
+| AVX512VBMI2        | AVX-512 Vector Bit Manipulation Instructions, Version 2                                                                                                                            |
+| AVX512VL           | AVX-512 Vector Length Extensions                                                                                                                                                   |
+| AVX512VNNI         | AVX-512 Vector Neural Network Instructions                                                                                                                                         |
+| AVX512VP2INTERSECT | AVX-512 Intersect for D/Q                                                                                                                                                          |
+| AVX512VPOPCNTDQ    | AVX-512 Vector Population Count Doubleword and Quadword                                                                                                                            |
+| AVXIFMA            | AVX-IFMA instructions                                                                                                                                                              |
+| AVXNECONVERT       | AVX-NE-CONVERT instructions                                                                                                                                                        |
+| AVXSLOW            | Indicates the CPU performs 2 128 bit operations instead of one                                                                                                                     |
+| AVXVNNI            | AVX (VEX encoded) VNNI neural network instructions                                                                                                                                 |
+| AVXVNNIINT8        | AVX-VNNI-INT8 instructions                                                                                                                                                         |
+| AVXVNNIINT16       | AVX-VNNI-INT16 instructions                                                                                                                                                        |
+| BHI_CTRL           | Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598                                                                    |
+| BMI1               | Bit Manipulation Instruction Set 1                                                                                                                                                 |
+| BMI2               | Bit Manipulation Instruction Set 2                                                                                                                                                 |
+| CETIBT             | Intel CET Indirect Branch Tracking                                                                                                                                                 |
+| CETSS              | Intel CET Shadow Stack                                                                                                                                                             |
+| CLDEMOTE           | Cache Line Demote                                                                                                                                                                  |
+| CLMUL              | Carry-less Multiplication                                                                                                                                                          |
+| CLZERO             | CLZERO instruction supported                                                                                                                                                       |
+| CMOV               | i686 CMOV                                                                                                                                                                          |
+| CMPCCXADD          | CMPCCXADD instructions                                                                                                                                                             |
+| CMPSB_SCADBS_SHORT | Fast short CMPSB and SCASB                                                                                                                                                         |
+| CMPXCHG8           | CMPXCHG8 instruction                                                                                                                                                               |
+| CPBOOST            | Core Performance Boost                                                                                                                                                             |
+| CPPC               | AMD: Collaborative Processor Performance Control                                                                                                                                   |
+| CX16               | CMPXCHG16B Instruction                                                                                                                                                             |
+| EFER_LMSLE_UNS     | AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ                                                                                                                        |
+| ENQCMD             | Enqueue Command                                                                                                                                                                    |
+| ERMS               | Enhanced REP MOVSB/STOSB                                                                                                                                                           |
+| F16C               | Half-precision floating-point conversion                                                                                                                                           |
+| FLUSH_L1D          | Flush L1D cache                                                                                                                                                                    |
+| FMA3               | Intel FMA 3. Does not imply AVX.                                                                                                                                                   |
+| FMA4               | Bulldozer FMA4 functions                                                                                                                                                           |
+| FP128              | AMD: When set, the internal FP/SIMD execution datapath is 128-bits wide                                                                                                            |
+| FP256              | AMD: When set, the internal FP/SIMD execution datapath is 256-bits wide                                                                                                            |
+| FSRM               | Fast Short Rep Mov                                                                                                                                                                 |
+| FXSR               | FXSAVE, FXRESTOR instructions, CR4 bit 9                                                                                                                                           |
+| FXSROPT            | FXSAVE/FXRSTOR optimizations                                                                                                                                                       |
+| GFNI               | Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage.                                                                                  |
+| HLE                | Hardware Lock Elision                                                                                                                                                              |
+| HRESET             | If set CPU supports history reset and the IA32_HRESET_ENABLE MSR                                                                                                                   |
+| HTT                | Hyperthreading (enabled)                                                                                                                                                           |
+| HWA                | Hardware assert supported. Indicates support for MSRC001_10                                                                                                                        |
+| HYBRID_CPU         | This part has CPUs of more than one type.                                                                                                                                          |
+| HYPERVISOR         | This bit has been reserved by Intel & AMD for use by hypervisors                                                                                                                   |
+| IA32_ARCH_CAP      | IA32_ARCH_CAPABILITIES MSR (Intel)                                                                                                                                                 |
+| IA32_CORE_CAP      | IA32_CORE_CAPABILITIES MSR                                                                                                                                                         |
+| IBPB               | Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)                                                                                         |
+| IBRS               | AMD: Indirect Branch Restricted Speculation                                                                                                                                        |
+| IBRS_PREFERRED     | AMD: IBRS is preferred over software solution                                                                                                                                      |
+| IBRS_PROVIDES_SMP  | AMD: IBRS provides Same Mode Protection                                                                                                                                            |
+| IBS                | Instruction Based Sampling (AMD)                                                                                                                                                   |
+| IBSBRNTRGT         | Instruction Based Sampling Feature (AMD)                                                                                                                                           |
+| IBSFETCHSAM        | Instruction Based Sampling Feature (AMD)                                                                                                                                           |
+| IBSFFV             | Instruction Based Sampling Feature (AMD)                                                                                                                                           |
+| IBSOPCNT           | Instruction Based Sampling Feature (AMD)                                                                                                                                           |
+| IBSOPCNTEXT        | Instruction Based Sampling Feature (AMD)                                                                                                                                           |
+| IBSOPSAM           | Instruction Based Sampling Feature (AMD)                                                                                                                                           |
+| IBSRDWROPCNT       | Instruction Based Sampling Feature (AMD)                                                                                                                                           |
+| IBSRIPINVALIDCHK   | Instruction Based Sampling Feature (AMD)                                                                                                                                           |
+| IBS_FETCH_CTLX     | AMD: IBS fetch control extended MSR supported                                                                                                                                      |
+| IBS_OPDATA4        | AMD: IBS op data 4 MSR supported                                                                                                                                                   |
+| IBS_OPFUSE         | AMD: Indicates support for IbsOpFuse                                                                                                                                               |
+| IBS_PREVENTHOST    | Disallowing IBS use by the host supported                                                                                                                                          |
+| IBS_ZEN4           | Fetch and Op IBS support IBS extensions added with Zen4                                                                                                                            |
+| IDPRED_CTRL        | IPRED_DIS                                                                                                                                                                          |
+| INT_WBINVD         | WBINVD/WBNOINVD are interruptible.                                                                                                                                                 |
+| INVLPGB            | NVLPGB and TLBSYNC instruction supported                                                                                                                                           |
+| KEYLOCKER          | Key locker                                                                                                                                                                         |
+| KEYLOCKERW         | Key locker wide                                                                                                                                                                    |
+| LAHF               | LAHF/SAHF in long mode                                                                                                                                                             |
+| LAM                | If set, CPU supports Linear Address Masking                                                                                                                                        |
+| LBRVIRT            | LBR virtualization                                                                                                                                                                 |
+| LZCNT              | LZCNT instruction                                                                                                                                                                  |
+| MCAOVERFLOW        | MCA overflow recovery support.                                                                                                                                                     |
+| MCDT_NO            | Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it.                                                                             |
+| MCOMMIT            | MCOMMIT instruction supported                                                                                                                                                      |
+| MD_CLEAR           | VERW clears CPU buffers                                                                                                                                                            |
+| MMX                | standard MMX                                                                                                                                                                       |
+| MMXEXT             | SSE integer functions or AMD MMX ext                                                                                                                                               |
+| MOVBE              | MOVBE instruction (big-endian)                                                                                                                                                     |
+| MOVDIR64B          | Move 64 Bytes as Direct Store                                                                                                                                                      |
+| MOVDIRI            | Move Doubleword as Direct Store                                                                                                                                                    |
+| MOVSB_ZL           | Fast Zero-Length MOVSB                                                                                                                                                             |
+| MPX                | Intel MPX (Memory Protection Extensions)                                                                                                                                           |
+| MOVU               | MOVU SSE instructions are more efficient and should be preferred to SSE	MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD        |
+| MSRIRC             | Instruction Retired Counter MSR available                                                                                                                                          |
+| MSRLIST            | Read/Write List of Model Specific Registers                                                                                                                                        |
+| MSR_PAGEFLUSH      | Page Flush MSR available                                                                                                                                                           |
+| NRIPS              | Indicates support for NRIP save on VMEXIT                                                                                                                                          |
+| NX                 | NX (No-Execute) bit                                                                                                                                                                |
+| OSXSAVE            | XSAVE enabled by OS                                                                                                                                                                |
+| PCONFIG            | PCONFIG for Intel Multi-Key Total Memory Encryption                                                                                                                                |
+| POPCNT             | POPCNT instruction                                                                                                                                                                 |
+| PPIN               | AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled                                            |
+| PREFETCHI          | PREFETCHIT0/1 instructions                                                                                                                                                         |
+| PSFD               | Predictive Store Forward Disable                                                                                                                                                   |
+| RDPRU              | RDPRU instruction supported                                                                                                                                                        |
+| RDRAND             | RDRAND instruction is available                                                                                                                                                    |
+| RDSEED             | RDSEED instruction is available                                                                                                                                                    |
+| RDTSCP             | RDTSCP Instruction                                                                                                                                                                 |
+| RRSBA_CTRL         | Restricted RSB Alternate                                                                                                                                                           |
+| RTM                | Restricted Transactional Memory                                                                                                                                                    |
+| RTM_ALWAYS_ABORT   | Indicates that the loaded microcode is forcing RTM abort.                                                                                                                          |
+| SERIALIZE          | Serialize Instruction Execution                                                                                                                                                    |
+| SEV                | AMD Secure Encrypted Virtualization supported                                                                                                                                      |
+| SEV_64BIT          | AMD SEV guest execution only allowed from a 64-bit host                                                                                                                            |
+| SEV_ALTERNATIVE    | AMD SEV Alternate Injection supported                                                                                                                                              |
+| SEV_DEBUGSWAP      | Full debug state swap supported for SEV-ES guests                                                                                                                                  |
+| SEV_ES             | AMD SEV Encrypted State supported                                                                                                                                                  |
+| SEV_RESTRICTED     | AMD SEV Restricted Injection supported                                                                                                                                             |
+| SEV_SNP            | AMD SEV Secure Nested Paging supported                                                                                                                                             |
+| SGX                | Software Guard Extensions                                                                                                                                                          |
+| SGXLC              | Software Guard Extensions Launch Control                                                                                                                                           |
+| SHA                | Intel SHA Extensions                                                                                                                                                               |
+| SME                | AMD Secure Memory Encryption supported                                                                                                                                             |
+| SME_COHERENT       | AMD Hardware cache coherency across encryption domains enforced                                                                                                                    |
+| SPEC_CTRL_SSBD     | Speculative Store Bypass Disable                                                                                                                                                   |
+| SRBDS_CTRL         | SRBDS mitigation MSR available                                                                                                                                                     |
+| SSE                | SSE functions                                                                                                                                                                      |
+| SSE2               | P4 SSE functions                                                                                                                                                                   |
+| SSE3               | Prescott SSE3 functions                                                                                                                                                            |
+| SSE4               | Penryn SSE4.1 functions                                                                                                                                                            |
+| SSE42              | Nehalem SSE4.2 functions                                                                                                                                                           |
+| SSE4A              | AMD Barcelona microarchitecture SSE4a instructions                                                                                                                                 |
+| SSSE3              | Conroe SSSE3 functions                                                                                                                                                             |
+| STIBP              | Single Thread Indirect Branch Predictors                                                                                                                                           |
+| STIBP_ALWAYSON     | AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On                                                                              |
+| STOSB_SHORT        | Fast short STOSB                                                                                                                                                                   |
+| SUCCOR             | Software uncorrectable error containment and recovery capability.                                                                                                                  |
+| SVM                | AMD Secure Virtual Machine                                                                                                                                                         |
+| SVMDA              | Indicates support for the SVM decode assists.                                                                                                                                      |
+| SVMFBASID          | SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control |
+| SVML               | AMD SVM lock. Indicates support for SVM-Lock.                                                                                                                                      |
+| SVMNP              | AMD SVM nested paging                                                                                                                                                              |
+| SVMPF              | SVM pause intercept filter. Indicates support for the pause intercept filter                                                                                                       |
+| SVMPFT             | SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold                                                                                           |
+| SYSCALL            | System-Call Extension (SCE): SYSCALL and SYSRET instructions.                                                                                                                      |
+| SYSEE              | SYSENTER and SYSEXIT instructions                                                                                                                                                  |
+| TBM                | AMD Trailing Bit Manipulation                                                                                                                                                      |
+| TDX_GUEST          | Intel Trust Domain Extensions Guest                                                                                                                                                |
+| TLB_FLUSH_NESTED   | AMD: Flushing includes all the nested translations for guest translations                                                                                                          |
+| TME                | Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE.                         |
+| TOPEXT             | TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.                                                         |
+| TSCRATEMSR         | MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104                                                                                                       |
+| TSXLDTRK           | Intel TSX Suspend Load Address Tracking                                                                                                                                            |
+| VAES               | Vector AES. AVX(512) versions requires additional checks.                                                                                                                          |
+| VMCBCLEAN          | VMCB clean bits. Indicates support for VMCB clean bits.                                                                                                                            |
+| VMPL               | AMD VM Permission Levels supported                                                                                                                                                 |
+| VMSA_REGPROT       | AMD VMSA Register Protection supported                                                                                                                                             |
+| VMX                | Virtual Machine Extensions                                                                                                                                                         |
+| VPCLMULQDQ         | Carry-Less Multiplication Quadword. Requires AVX for 3 register versions.                                                                                                          |
+| VTE                | AMD Virtual Transparent Encryption supported                                                                                                                                       |
+| WAITPKG            | TPAUSE, UMONITOR, UMWAIT                                                                                                                                                           |
+| WBNOINVD           | Write Back and Do Not Invalidate Cache                                                                                                                                             |
+| WRMSRNS            | Non-Serializing Write to Model Specific Register                                                                                                                                   |
+| X87                | FPU                                                                                                                                                                                |
+| XGETBV1            | Supports XGETBV with ECX = 1                                                                                                                                                       |
+| XOP                | Bulldozer XOP functions                                                                                                                                                            |
+| XSAVE              | XSAVE, XRESTOR, XSETBV, XGETBV                                                                                                                                                     |
+| XSAVEC             | Supports XSAVEC and the compacted form of XRSTOR.                                                                                                                                  |
+| XSAVEOPT           | XSAVEOPT available                                                                                                                                                                 |
+| XSAVES             | Supports XSAVES/XRSTORS and IA32_XSS                                                                                                                                               |
+
+# ARM features:
+
+| Feature Flag | Description                                                      |
+|--------------|------------------------------------------------------------------|
+| AESARM       | AES instructions                                                 |
+| ARMCPUID     | Some CPU ID registers readable at user-level                     |
+| ASIMD        | Advanced SIMD                                                    |
+| ASIMDDP      | SIMD Dot Product                                                 |
+| ASIMDHP      | Advanced SIMD half-precision floating point                      |
+| ASIMDRDM     | Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH) |
+| ATOMICS      | Large System Extensions (LSE)                                    |
+| CRC32        | CRC32/CRC32C instructions                                        |
+| DCPOP        | Data cache clean to Point of Persistence (DC CVAP)               |
+| EVTSTRM      | Generic timer                                                    |
+| FCMA         | Floatin point complex number addition and multiplication         |
+| FP           | Single-precision and double-precision floating point             |
+| FPHP         | Half-precision floating point                                    |
+| GPA          | Generic Pointer Authentication                                   |
+| JSCVT        | Javascript-style double->int convert (FJCVTZS)                   |
+| LRCPC        | Weaker release consistency (LDAPR, etc)                          |
+| PMULL        | Polynomial Multiply instructions (PMULL/PMULL2)                  |
+| SHA1         | SHA-1 instructions (SHA1C, etc)                                  |
+| SHA2         | SHA-2 instructions (SHA256H, etc)                                |
+| SHA3         | SHA-3 instructions (EOR3, RAXI, XAR, BCAX)                       |
+| SHA512       | SHA512 instructions                                              |
+| SM3          | SM3 instructions                                                 |
+| SM4          | SM4 instructions                                                 |
+| SVE          | Scalable Vector Extension                                        |
+
+# license
+
+This code is published under an MIT license. See LICENSE file for more information.

+ 1558 - 0
vendor/github.com/klauspost/cpuid/v2/cpuid.go

@@ -0,0 +1,1558 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+// Package cpuid provides information about the CPU running the current program.
+//
+// CPU features are detected on startup, and kept for fast access through the life of the application.
+// Currently x86 / x64 (AMD64) as well as arm64 is supported.
+//
+// You can access the CPU information by accessing the shared CPU variable of the cpuid library.
+//
+// Package home: https://github.com/klauspost/cpuid
+package cpuid
+
+import (
+	"flag"
+	"fmt"
+	"math"
+	"math/bits"
+	"os"
+	"runtime"
+	"strings"
+)
+
+// AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf
+// and Processor Programming Reference (PPR)
+
+// Vendor is a representation of a CPU vendor.
+type Vendor int
+
+const (
+	VendorUnknown Vendor = iota
+	Intel
+	AMD
+	VIA
+	Transmeta
+	NSC
+	KVM  // Kernel-based Virtual Machine
+	MSVM // Microsoft Hyper-V or Windows Virtual PC
+	VMware
+	XenHVM
+	Bhyve
+	Hygon
+	SiS
+	RDC
+
+	Ampere
+	ARM
+	Broadcom
+	Cavium
+	DEC
+	Fujitsu
+	Infineon
+	Motorola
+	NVIDIA
+	AMCC
+	Qualcomm
+	Marvell
+
+	QEMU
+	QNX
+	ACRN
+	SRE
+	Apple
+
+	lastVendor
+)
+
+//go:generate stringer -type=FeatureID,Vendor
+
+// FeatureID is the ID of a specific cpu feature.
+type FeatureID int
+
+const (
+	// Keep index -1 as unknown
+	UNKNOWN = -1
+
+	// x86 features
+	ADX                 FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
+	AESNI                                // Advanced Encryption Standard New Instructions
+	AMD3DNOW                             // AMD 3DNOW
+	AMD3DNOWEXT                          // AMD 3DNowExt
+	AMXBF16                              // Tile computational operations on BFLOAT16 numbers
+	AMXFP16                              // Tile computational operations on FP16 numbers
+	AMXINT8                              // Tile computational operations on 8-bit integers
+	AMXFP8                               // Tile computational operations on FP8 numbers
+	AMXTILE                              // Tile architecture
+	APX_F                                // Intel APX
+	AVX                                  // AVX functions
+	AVX10                                // If set the Intel AVX10 Converged Vector ISA is supported
+	AVX10_128                            // If set indicates that AVX10 128-bit vector support is present
+	AVX10_256                            // If set indicates that AVX10 256-bit vector support is present
+	AVX10_512                            // If set indicates that AVX10 512-bit vector support is present
+	AVX2                                 // AVX2 functions
+	AVX512BF16                           // AVX-512 BFLOAT16 Instructions
+	AVX512BITALG                         // AVX-512 Bit Algorithms
+	AVX512BW                             // AVX-512 Byte and Word Instructions
+	AVX512CD                             // AVX-512 Conflict Detection Instructions
+	AVX512DQ                             // AVX-512 Doubleword and Quadword Instructions
+	AVX512ER                             // AVX-512 Exponential and Reciprocal Instructions
+	AVX512F                              // AVX-512 Foundation
+	AVX512FP16                           // AVX-512 FP16 Instructions
+	AVX512IFMA                           // AVX-512 Integer Fused Multiply-Add Instructions
+	AVX512PF                             // AVX-512 Prefetch Instructions
+	AVX512VBMI                           // AVX-512 Vector Bit Manipulation Instructions
+	AVX512VBMI2                          // AVX-512 Vector Bit Manipulation Instructions, Version 2
+	AVX512VL                             // AVX-512 Vector Length Extensions
+	AVX512VNNI                           // AVX-512 Vector Neural Network Instructions
+	AVX512VP2INTERSECT                   // AVX-512 Intersect for D/Q
+	AVX512VPOPCNTDQ                      // AVX-512 Vector Population Count Doubleword and Quadword
+	AVXIFMA                              // AVX-IFMA instructions
+	AVXNECONVERT                         // AVX-NE-CONVERT instructions
+	AVXSLOW                              // Indicates the CPU performs 2 128 bit operations instead of one
+	AVXVNNI                              // AVX (VEX encoded) VNNI neural network instructions
+	AVXVNNIINT8                          // AVX-VNNI-INT8 instructions
+	AVXVNNIINT16                         // AVX-VNNI-INT16 instructions
+	BHI_CTRL                             // Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598
+	BMI1                                 // Bit Manipulation Instruction Set 1
+	BMI2                                 // Bit Manipulation Instruction Set 2
+	CETIBT                               // Intel CET Indirect Branch Tracking
+	CETSS                                // Intel CET Shadow Stack
+	CLDEMOTE                             // Cache Line Demote
+	CLMUL                                // Carry-less Multiplication
+	CLZERO                               // CLZERO instruction supported
+	CMOV                                 // i686 CMOV
+	CMPCCXADD                            // CMPCCXADD instructions
+	CMPSB_SCADBS_SHORT                   // Fast short CMPSB and SCASB
+	CMPXCHG8                             // CMPXCHG8 instruction
+	CPBOOST                              // Core Performance Boost
+	CPPC                                 // AMD: Collaborative Processor Performance Control
+	CX16                                 // CMPXCHG16B Instruction
+	EFER_LMSLE_UNS                       // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ
+	ENQCMD                               // Enqueue Command
+	ERMS                                 // Enhanced REP MOVSB/STOSB
+	F16C                                 // Half-precision floating-point conversion
+	FLUSH_L1D                            // Flush L1D cache
+	FMA3                                 // Intel FMA 3. Does not imply AVX.
+	FMA4                                 // Bulldozer FMA4 functions
+	FP128                                // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide
+	FP256                                // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide
+	FSRM                                 // Fast Short Rep Mov
+	FXSR                                 // FXSAVE, FXRESTOR instructions, CR4 bit 9
+	FXSROPT                              // FXSAVE/FXRSTOR optimizations
+	GFNI                                 // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage.
+	HLE                                  // Hardware Lock Elision
+	HRESET                               // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR
+	HTT                                  // Hyperthreading (enabled)
+	HWA                                  // Hardware assert supported. Indicates support for MSRC001_10
+	HYBRID_CPU                           // This part has CPUs of more than one type.
+	HYPERVISOR                           // This bit has been reserved by Intel & AMD for use by hypervisors
+	IA32_ARCH_CAP                        // IA32_ARCH_CAPABILITIES MSR (Intel)
+	IA32_CORE_CAP                        // IA32_CORE_CAPABILITIES MSR
+	IBPB                                 // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
+	IBPB_BRTYPE                          // Indicates that MSR 49h (PRED_CMD) bit 0 (IBPB) flushes	all branch type predictions from the CPU branch predictor
+	IBRS                                 // AMD: Indirect Branch Restricted Speculation
+	IBRS_PREFERRED                       // AMD: IBRS is preferred over software solution
+	IBRS_PROVIDES_SMP                    // AMD: IBRS provides Same Mode Protection
+	IBS                                  // Instruction Based Sampling (AMD)
+	IBSBRNTRGT                           // Instruction Based Sampling Feature (AMD)
+	IBSFETCHSAM                          // Instruction Based Sampling Feature (AMD)
+	IBSFFV                               // Instruction Based Sampling Feature (AMD)
+	IBSOPCNT                             // Instruction Based Sampling Feature (AMD)
+	IBSOPCNTEXT                          // Instruction Based Sampling Feature (AMD)
+	IBSOPSAM                             // Instruction Based Sampling Feature (AMD)
+	IBSRDWROPCNT                         // Instruction Based Sampling Feature (AMD)
+	IBSRIPINVALIDCHK                     // Instruction Based Sampling Feature (AMD)
+	IBS_FETCH_CTLX                       // AMD: IBS fetch control extended MSR supported
+	IBS_OPDATA4                          // AMD: IBS op data 4 MSR supported
+	IBS_OPFUSE                           // AMD: Indicates support for IbsOpFuse
+	IBS_PREVENTHOST                      // Disallowing IBS use by the host supported
+	IBS_ZEN4                             // AMD: Fetch and Op IBS support IBS extensions added with Zen4
+	IDPRED_CTRL                          // IPRED_DIS
+	INT_WBINVD                           // WBINVD/WBNOINVD are interruptible.
+	INVLPGB                              // NVLPGB and TLBSYNC instruction supported
+	KEYLOCKER                            // Key locker
+	KEYLOCKERW                           // Key locker wide
+	LAHF                                 // LAHF/SAHF in long mode
+	LAM                                  // If set, CPU supports Linear Address Masking
+	LBRVIRT                              // LBR virtualization
+	LZCNT                                // LZCNT instruction
+	MCAOVERFLOW                          // MCA overflow recovery support.
+	MCDT_NO                              // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it.
+	MCOMMIT                              // MCOMMIT instruction supported
+	MD_CLEAR                             // VERW clears CPU buffers
+	MMX                                  // standard MMX
+	MMXEXT                               // SSE integer functions or AMD MMX ext
+	MOVBE                                // MOVBE instruction (big-endian)
+	MOVDIR64B                            // Move 64 Bytes as Direct Store
+	MOVDIRI                              // Move Doubleword as Direct Store
+	MOVSB_ZL                             // Fast Zero-Length MOVSB
+	MOVU                                 // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE	MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD
+	MPX                                  // Intel MPX (Memory Protection Extensions)
+	MSRIRC                               // Instruction Retired Counter MSR available
+	MSRLIST                              // Read/Write List of Model Specific Registers
+	MSR_PAGEFLUSH                        // Page Flush MSR available
+	NRIPS                                // Indicates support for NRIP save on VMEXIT
+	NX                                   // NX (No-Execute) bit
+	OSXSAVE                              // XSAVE enabled by OS
+	PCONFIG                              // PCONFIG for Intel Multi-Key Total Memory Encryption
+	POPCNT                               // POPCNT instruction
+	PPIN                                 // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled
+	PREFETCHI                            // PREFETCHIT0/1 instructions
+	PSFD                                 // Predictive Store Forward Disable
+	RDPRU                                // RDPRU instruction supported
+	RDRAND                               // RDRAND instruction is available
+	RDSEED                               // RDSEED instruction is available
+	RDTSCP                               // RDTSCP Instruction
+	RRSBA_CTRL                           // Restricted RSB Alternate
+	RTM                                  // Restricted Transactional Memory
+	RTM_ALWAYS_ABORT                     // Indicates that the loaded microcode is forcing RTM abort.
+	SBPB                                 // Indicates support for the Selective Branch Predictor Barrier
+	SERIALIZE                            // Serialize Instruction Execution
+	SEV                                  // AMD Secure Encrypted Virtualization supported
+	SEV_64BIT                            // AMD SEV guest execution only allowed from a 64-bit host
+	SEV_ALTERNATIVE                      // AMD SEV Alternate Injection supported
+	SEV_DEBUGSWAP                        // Full debug state swap supported for SEV-ES guests
+	SEV_ES                               // AMD SEV Encrypted State supported
+	SEV_RESTRICTED                       // AMD SEV Restricted Injection supported
+	SEV_SNP                              // AMD SEV Secure Nested Paging supported
+	SGX                                  // Software Guard Extensions
+	SGXLC                                // Software Guard Extensions Launch Control
+	SHA                                  // Intel SHA Extensions
+	SME                                  // AMD Secure Memory Encryption supported
+	SME_COHERENT                         // AMD Hardware cache coherency across encryption domains enforced
+	SPEC_CTRL_SSBD                       // Speculative Store Bypass Disable
+	SRBDS_CTRL                           // SRBDS mitigation MSR available
+	SRSO_MSR_FIX                         // Indicates that software may use MSR BP_CFG[BpSpecReduce] to mitigate SRSO.
+	SRSO_NO                              // Indicates the CPU is not subject to the SRSO vulnerability
+	SRSO_USER_KERNEL_NO                  // Indicates the CPU is not subject to the SRSO vulnerability across user/kernel boundaries
+	SSE                                  // SSE functions
+	SSE2                                 // P4 SSE functions
+	SSE3                                 // Prescott SSE3 functions
+	SSE4                                 // Penryn SSE4.1 functions
+	SSE42                                // Nehalem SSE4.2 functions
+	SSE4A                                // AMD Barcelona microarchitecture SSE4a instructions
+	SSSE3                                // Conroe SSSE3 functions
+	STIBP                                // Single Thread Indirect Branch Predictors
+	STIBP_ALWAYSON                       // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On
+	STOSB_SHORT                          // Fast short STOSB
+	SUCCOR                               // Software uncorrectable error containment and recovery capability.
+	SVM                                  // AMD Secure Virtual Machine
+	SVMDA                                // Indicates support for the SVM decode assists.
+	SVMFBASID                            // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control
+	SVML                                 // AMD SVM lock. Indicates support for SVM-Lock.
+	SVMNP                                // AMD SVM nested paging
+	SVMPF                                // SVM pause intercept filter. Indicates support for the pause intercept filter
+	SVMPFT                               // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold
+	SYSCALL                              // System-Call Extension (SCE): SYSCALL and SYSRET instructions.
+	SYSEE                                // SYSENTER and SYSEXIT instructions
+	TBM                                  // AMD Trailing Bit Manipulation
+	TDX_GUEST                            // Intel Trust Domain Extensions Guest
+	TLB_FLUSH_NESTED                     // AMD: Flushing includes all the nested translations for guest translations
+	TME                                  // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE.
+	TOPEXT                               // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.
+	TSCRATEMSR                           // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104
+	TSXLDTRK                             // Intel TSX Suspend Load Address Tracking
+	VAES                                 // Vector AES. AVX(512) versions requires additional checks.
+	VMCBCLEAN                            // VMCB clean bits. Indicates support for VMCB clean bits.
+	VMPL                                 // AMD VM Permission Levels supported
+	VMSA_REGPROT                         // AMD VMSA Register Protection supported
+	VMX                                  // Virtual Machine Extensions
+	VPCLMULQDQ                           // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions.
+	VTE                                  // AMD Virtual Transparent Encryption supported
+	WAITPKG                              // TPAUSE, UMONITOR, UMWAIT
+	WBNOINVD                             // Write Back and Do Not Invalidate Cache
+	WRMSRNS                              // Non-Serializing Write to Model Specific Register
+	X87                                  // FPU
+	XGETBV1                              // Supports XGETBV with ECX = 1
+	XOP                                  // Bulldozer XOP functions
+	XSAVE                                // XSAVE, XRESTOR, XSETBV, XGETBV
+	XSAVEC                               // Supports XSAVEC and the compacted form of XRSTOR.
+	XSAVEOPT                             // XSAVEOPT available
+	XSAVES                               // Supports XSAVES/XRSTORS and IA32_XSS
+
+	// ARM features:
+	AESARM   // AES instructions
+	ARMCPUID // Some CPU ID registers readable at user-level
+	ASIMD    // Advanced SIMD
+	ASIMDDP  // SIMD Dot Product
+	ASIMDHP  // Advanced SIMD half-precision floating point
+	ASIMDRDM // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH)
+	ATOMICS  // Large System Extensions (LSE)
+	CRC32    // CRC32/CRC32C instructions
+	DCPOP    // Data cache clean to Point of Persistence (DC CVAP)
+	EVTSTRM  // Generic timer
+	FCMA     // Floatin point complex number addition and multiplication
+	FP       // Single-precision and double-precision floating point
+	FPHP     // Half-precision floating point
+	GPA      // Generic Pointer Authentication
+	JSCVT    // Javascript-style double->int convert (FJCVTZS)
+	LRCPC    // Weaker release consistency (LDAPR, etc)
+	PMULL    // Polynomial Multiply instructions (PMULL/PMULL2)
+	SHA1     // SHA-1 instructions (SHA1C, etc)
+	SHA2     // SHA-2 instructions (SHA256H, etc)
+	SHA3     // SHA-3 instructions (EOR3, RAXI, XAR, BCAX)
+	SHA512   // SHA512 instructions
+	SM3      // SM3 instructions
+	SM4      // SM4 instructions
+	SVE      // Scalable Vector Extension
+	// Keep it last. It automatically defines the size of []flagSet
+	lastID
+
+	firstID FeatureID = UNKNOWN + 1
+)
+
+// CPUInfo contains information about the detected system CPU.
+type CPUInfo struct {
+	BrandName              string  // Brand name reported by the CPU
+	VendorID               Vendor  // Comparable CPU vendor ID
+	VendorString           string  // Raw vendor string.
+	HypervisorVendorID     Vendor  // Hypervisor vendor
+	HypervisorVendorString string  // Raw hypervisor vendor string
+	featureSet             flagSet // Features of the CPU
+	PhysicalCores          int     // Number of physical processor cores in your CPU. Will be 0 if undetectable.
+	ThreadsPerCore         int     // Number of threads per physical core. Will be 1 if undetectable.
+	LogicalCores           int     // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
+	Family                 int     // CPU family number
+	Model                  int     // CPU model number
+	Stepping               int     // CPU stepping info
+	CacheLine              int     // Cache line size in bytes. Will be 0 if undetectable.
+	Hz                     int64   // Clock speed, if known, 0 otherwise. Will attempt to contain base clock speed.
+	BoostFreq              int64   // Max clock speed, if known, 0 otherwise
+	Cache                  struct {
+		L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
+		L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected
+		L2  int // L2 Cache (per core or shared). Will be -1 if undetected
+		L3  int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected
+	}
+	SGX              SGXSupport
+	AMDMemEncryption AMDMemEncryptionSupport
+	AVX10Level       uint8
+
+	maxFunc   uint32
+	maxExFunc uint32
+}
+
+var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
+var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
+var xgetbv func(index uint32) (eax, edx uint32)
+var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
+var darwinHasAVX512 = func() bool { return false }
+
+// CPU contains information about the CPU as detected on startup,
+// or when Detect last was called.
+//
+// Use this as the primary entry point to you data.
+var CPU CPUInfo
+
+func init() {
+	initCPU()
+	Detect()
+}
+
+// Detect will re-detect current CPU info.
+// This will replace the content of the exported CPU variable.
+//
+// Unless you expect the CPU to change while you are running your program
+// you should not need to call this function.
+// If you call this, you must ensure that no other goroutine is accessing the
+// exported CPU variable.
+func Detect() {
+	// Set defaults
+	CPU.ThreadsPerCore = 1
+	CPU.Cache.L1I = -1
+	CPU.Cache.L1D = -1
+	CPU.Cache.L2 = -1
+	CPU.Cache.L3 = -1
+	safe := true
+	if detectArmFlag != nil {
+		safe = !*detectArmFlag
+	}
+	addInfo(&CPU, safe)
+	if displayFeats != nil && *displayFeats {
+		fmt.Println("cpu features:", strings.Join(CPU.FeatureSet(), ","))
+		// Exit with non-zero so tests will print value.
+		os.Exit(1)
+	}
+	if disableFlag != nil {
+		s := strings.Split(*disableFlag, ",")
+		for _, feat := range s {
+			feat := ParseFeature(strings.TrimSpace(feat))
+			if feat != UNKNOWN {
+				CPU.featureSet.unset(feat)
+			}
+		}
+	}
+}
+
+// DetectARM will detect ARM64 features.
+// This is NOT done automatically since it can potentially crash
+// if the OS does not handle the command.
+// If in the future this can be done safely this function may not
+// do anything.
+func DetectARM() {
+	addInfo(&CPU, false)
+}
+
+var detectArmFlag *bool
+var displayFeats *bool
+var disableFlag *string
+
+// Flags will enable flags.
+// This must be called *before* flag.Parse AND
+// Detect must be called after the flags have been parsed.
+// Note that this means that any detection used in init() functions
+// will not contain these flags.
+func Flags() {
+	disableFlag = flag.String("cpu.disable", "", "disable cpu features; comma separated list")
+	displayFeats = flag.Bool("cpu.features", false, "lists cpu features and exits")
+	detectArmFlag = flag.Bool("cpu.arm", false, "allow ARM features to be detected; can potentially crash")
+}
+
+// Supports returns whether the CPU supports all of the requested features.
+func (c CPUInfo) Supports(ids ...FeatureID) bool {
+	for _, id := range ids {
+		if !c.featureSet.inSet(id) {
+			return false
+		}
+	}
+	return true
+}
+
+// Has allows for checking a single feature.
+// Should be inlined by the compiler.
+func (c *CPUInfo) Has(id FeatureID) bool {
+	return c.featureSet.inSet(id)
+}
+
+// AnyOf returns whether the CPU supports one or more of the requested features.
+func (c CPUInfo) AnyOf(ids ...FeatureID) bool {
+	for _, id := range ids {
+		if c.featureSet.inSet(id) {
+			return true
+		}
+	}
+	return false
+}
+
+// Features contains several features combined for a fast check using
+// CpuInfo.HasAll
+type Features *flagSet
+
+// CombineFeatures allows to combine several features for a close to constant time lookup.
+func CombineFeatures(ids ...FeatureID) Features {
+	var v flagSet
+	for _, id := range ids {
+		v.set(id)
+	}
+	return &v
+}
+
+func (c *CPUInfo) HasAll(f Features) bool {
+	return c.featureSet.hasSetP(f)
+}
+
+// https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
+var oneOfLevel = CombineFeatures(SYSEE, SYSCALL)
+var level1Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2)
+var level2Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3)
+var level3Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE)
+var level4Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL)
+
+// X64Level returns the microarchitecture level detected on the CPU.
+// If features are lacking or non x64 mode, 0 is returned.
+// See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
+func (c CPUInfo) X64Level() int {
+	if !c.featureSet.hasOneOf(oneOfLevel) {
+		return 0
+	}
+	if c.featureSet.hasSetP(level4Features) {
+		return 4
+	}
+	if c.featureSet.hasSetP(level3Features) {
+		return 3
+	}
+	if c.featureSet.hasSetP(level2Features) {
+		return 2
+	}
+	if c.featureSet.hasSetP(level1Features) {
+		return 1
+	}
+	return 0
+}
+
+// Disable will disable one or several features.
+func (c *CPUInfo) Disable(ids ...FeatureID) bool {
+	for _, id := range ids {
+		c.featureSet.unset(id)
+	}
+	return true
+}
+
+// Enable will disable one or several features even if they were undetected.
+// This is of course not recommended for obvious reasons.
+func (c *CPUInfo) Enable(ids ...FeatureID) bool {
+	for _, id := range ids {
+		c.featureSet.set(id)
+	}
+	return true
+}
+
+// IsVendor returns true if vendor is recognized as Intel
+func (c CPUInfo) IsVendor(v Vendor) bool {
+	return c.VendorID == v
+}
+
+// FeatureSet returns all available features as strings.
+func (c CPUInfo) FeatureSet() []string {
+	s := make([]string, 0, c.featureSet.nEnabled())
+	s = append(s, c.featureSet.Strings()...)
+	return s
+}
+
+// RTCounter returns the 64-bit time-stamp counter
+// Uses the RDTSCP instruction. The value 0 is returned
+// if the CPU does not support the instruction.
+func (c CPUInfo) RTCounter() uint64 {
+	if !c.Has(RDTSCP) {
+		return 0
+	}
+	a, _, _, d := rdtscpAsm()
+	return uint64(a) | (uint64(d) << 32)
+}
+
+// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
+// This variable is OS dependent, but on Linux contains information
+// about the current cpu/core the code is running on.
+// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
+func (c CPUInfo) Ia32TscAux() uint32 {
+	if !c.Has(RDTSCP) {
+		return 0
+	}
+	_, _, ecx, _ := rdtscpAsm()
+	return ecx
+}
+
+// SveLengths returns arm SVE vector and predicate lengths.
+// Will return 0, 0 if SVE is not enabled or otherwise unable to detect.
+func (c CPUInfo) SveLengths() (vl, pl uint64) {
+	if !c.Has(SVE) {
+		return 0, 0
+	}
+	return getVectorLength()
+}
+
+// LogicalCPU will return the Logical CPU the code is currently executing on.
+// This is likely to change when the OS re-schedules the running thread
+// to another CPU.
+// If the current core cannot be detected, -1 will be returned.
+func (c CPUInfo) LogicalCPU() int {
+	if c.maxFunc < 1 {
+		return -1
+	}
+	_, ebx, _, _ := cpuid(1)
+	return int(ebx >> 24)
+}
+
+// frequencies tries to compute the clock speed of the CPU. If leaf 15 is
+// supported, use it, otherwise parse the brand string. Yes, really.
+func (c *CPUInfo) frequencies() {
+	c.Hz, c.BoostFreq = 0, 0
+	mfi := maxFunctionID()
+	if mfi >= 0x15 {
+		eax, ebx, ecx, _ := cpuid(0x15)
+		if eax != 0 && ebx != 0 && ecx != 0 {
+			c.Hz = (int64(ecx) * int64(ebx)) / int64(eax)
+		}
+	}
+	if mfi >= 0x16 {
+		a, b, _, _ := cpuid(0x16)
+		// Base...
+		if a&0xffff > 0 {
+			c.Hz = int64(a&0xffff) * 1_000_000
+		}
+		// Boost...
+		if b&0xffff > 0 {
+			c.BoostFreq = int64(b&0xffff) * 1_000_000
+		}
+	}
+	if c.Hz > 0 {
+		return
+	}
+
+	// computeHz determines the official rated speed of a CPU from its brand
+	// string. This insanity is *actually the official documented way to do
+	// this according to Intel*, prior to leaf 0x15 existing. The official
+	// documentation only shows this working for exactly `x.xx` or `xxxx`
+	// cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other
+	// sizes.
+	model := c.BrandName
+	hz := strings.LastIndex(model, "Hz")
+	if hz < 3 {
+		return
+	}
+	var multiplier int64
+	switch model[hz-1] {
+	case 'M':
+		multiplier = 1000 * 1000
+	case 'G':
+		multiplier = 1000 * 1000 * 1000
+	case 'T':
+		multiplier = 1000 * 1000 * 1000 * 1000
+	}
+	if multiplier == 0 {
+		return
+	}
+	freq := int64(0)
+	divisor := int64(0)
+	decimalShift := int64(1)
+	var i int
+	for i = hz - 2; i >= 0 && model[i] != ' '; i-- {
+		if model[i] >= '0' && model[i] <= '9' {
+			freq += int64(model[i]-'0') * decimalShift
+			decimalShift *= 10
+		} else if model[i] == '.' {
+			if divisor != 0 {
+				return
+			}
+			divisor = decimalShift
+		} else {
+			return
+		}
+	}
+	// we didn't find a space
+	if i < 0 {
+		return
+	}
+	if divisor != 0 {
+		c.Hz = (freq * multiplier) / divisor
+		return
+	}
+	c.Hz = freq * multiplier
+}
+
+// VM Will return true if the cpu id indicates we are in
+// a virtual machine.
+func (c CPUInfo) VM() bool {
+	return CPU.featureSet.inSet(HYPERVISOR)
+}
+
+// flags contains detected cpu features and characteristics
+type flags uint64
+
+// log2(bits_in_uint64)
+const flagBitsLog2 = 6
+const flagBits = 1 << flagBitsLog2
+const flagMask = flagBits - 1
+
+// flagSet contains detected cpu features and characteristics in an array of flags
+type flagSet [(lastID + flagMask) / flagBits]flags
+
+func (s *flagSet) inSet(feat FeatureID) bool {
+	return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0
+}
+
+func (s *flagSet) set(feat FeatureID) {
+	s[feat>>flagBitsLog2] |= 1 << (feat & flagMask)
+}
+
+// setIf will set a feature if boolean is true.
+func (s *flagSet) setIf(cond bool, features ...FeatureID) {
+	if cond {
+		for _, offset := range features {
+			s[offset>>flagBitsLog2] |= 1 << (offset & flagMask)
+		}
+	}
+}
+
+func (s *flagSet) unset(offset FeatureID) {
+	bit := flags(1 << (offset & flagMask))
+	s[offset>>flagBitsLog2] = s[offset>>flagBitsLog2] & ^bit
+}
+
+// or with another flagset.
+func (s *flagSet) or(other flagSet) {
+	for i, v := range other[:] {
+		s[i] |= v
+	}
+}
+
+// hasSet returns whether all features are present.
+func (s *flagSet) hasSet(other flagSet) bool {
+	for i, v := range other[:] {
+		if s[i]&v != v {
+			return false
+		}
+	}
+	return true
+}
+
+// hasSet returns whether all features are present.
+func (s *flagSet) hasSetP(other *flagSet) bool {
+	for i, v := range other[:] {
+		if s[i]&v != v {
+			return false
+		}
+	}
+	return true
+}
+
+// hasOneOf returns whether one or more features are present.
+func (s *flagSet) hasOneOf(other *flagSet) bool {
+	for i, v := range other[:] {
+		if s[i]&v != 0 {
+			return true
+		}
+	}
+	return false
+}
+
+// nEnabled will return the number of enabled flags.
+func (s *flagSet) nEnabled() (n int) {
+	for _, v := range s[:] {
+		n += bits.OnesCount64(uint64(v))
+	}
+	return n
+}
+
+func flagSetWith(feat ...FeatureID) flagSet {
+	var res flagSet
+	for _, f := range feat {
+		res.set(f)
+	}
+	return res
+}
+
+// ParseFeature will parse the string and return the ID of the matching feature.
+// Will return UNKNOWN if not found.
+func ParseFeature(s string) FeatureID {
+	s = strings.ToUpper(s)
+	for i := firstID; i < lastID; i++ {
+		if i.String() == s {
+			return i
+		}
+	}
+	return UNKNOWN
+}
+
+// Strings returns an array of the detected features for FlagsSet.
+func (s flagSet) Strings() []string {
+	if len(s) == 0 {
+		return []string{""}
+	}
+	r := make([]string, 0)
+	for i := firstID; i < lastID; i++ {
+		if s.inSet(i) {
+			r = append(r, i.String())
+		}
+	}
+	return r
+}
+
+func maxExtendedFunction() uint32 {
+	eax, _, _, _ := cpuid(0x80000000)
+	return eax
+}
+
+func maxFunctionID() uint32 {
+	a, _, _, _ := cpuid(0)
+	return a
+}
+
+func brandName() string {
+	if maxExtendedFunction() >= 0x80000004 {
+		v := make([]uint32, 0, 48)
+		for i := uint32(0); i < 3; i++ {
+			a, b, c, d := cpuid(0x80000002 + i)
+			v = append(v, a, b, c, d)
+		}
+		return strings.Trim(string(valAsString(v...)), " ")
+	}
+	return "unknown"
+}
+
+func threadsPerCore() int {
+	mfi := maxFunctionID()
+	vend, _ := vendorID()
+
+	if mfi < 0x4 || (vend != Intel && vend != AMD) {
+		return 1
+	}
+
+	if mfi < 0xb {
+		if vend != Intel {
+			return 1
+		}
+		_, b, _, d := cpuid(1)
+		if (d & (1 << 28)) != 0 {
+			// v will contain logical core count
+			v := (b >> 16) & 255
+			if v > 1 {
+				a4, _, _, _ := cpuid(4)
+				// physical cores
+				v2 := (a4 >> 26) + 1
+				if v2 > 0 {
+					return int(v) / int(v2)
+				}
+			}
+		}
+		return 1
+	}
+	_, b, _, _ := cpuidex(0xb, 0)
+	if b&0xffff == 0 {
+		if vend == AMD {
+			// if >= Zen 2 0x8000001e EBX 15-8 bits means threads per core.
+			// The number of threads per core is ThreadsPerCore+1
+			// See PPR for AMD Family 17h Models 00h-0Fh (page 82)
+			fam, _, _ := familyModel()
+			_, _, _, d := cpuid(1)
+			if (d&(1<<28)) != 0 && fam >= 23 {
+				if maxExtendedFunction() >= 0x8000001e {
+					_, b, _, _ := cpuid(0x8000001e)
+					return int((b>>8)&0xff) + 1
+				}
+				return 2
+			}
+		}
+		return 1
+	}
+	return int(b & 0xffff)
+}
+
+func logicalCores() int {
+	mfi := maxFunctionID()
+	v, _ := vendorID()
+	switch v {
+	case Intel:
+		// Use this on old Intel processors
+		if mfi < 0xb {
+			if mfi < 1 {
+				return 0
+			}
+			// CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
+			// that can be assigned to logical processors in a physical package.
+			// The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
+			_, ebx, _, _ := cpuid(1)
+			logical := (ebx >> 16) & 0xff
+			return int(logical)
+		}
+		_, b, _, _ := cpuidex(0xb, 1)
+		return int(b & 0xffff)
+	case AMD, Hygon:
+		_, b, _, _ := cpuid(1)
+		return int((b >> 16) & 0xff)
+	default:
+		return 0
+	}
+}
+
+func familyModel() (family, model, stepping int) {
+	if maxFunctionID() < 0x1 {
+		return 0, 0, 0
+	}
+	eax, _, _, _ := cpuid(1)
+	// If BaseFamily[3:0] is less than Fh then ExtendedFamily[7:0] is reserved and Family is equal to BaseFamily[3:0].
+	family = int((eax >> 8) & 0xf)
+	extFam := family == 0x6 // Intel is 0x6, needs extended model.
+	if family == 0xf {
+		// Add ExtFamily
+		family += int((eax >> 20) & 0xff)
+		extFam = true
+	}
+	// If BaseFamily[3:0] is less than 0Fh then ExtendedModel[3:0] is reserved and Model is equal to BaseModel[3:0].
+	model = int((eax >> 4) & 0xf)
+	if extFam {
+		// Add ExtModel
+		model += int((eax >> 12) & 0xf0)
+	}
+	stepping = int(eax & 0xf)
+	return family, model, stepping
+}
+
+func physicalCores() int {
+	v, _ := vendorID()
+	switch v {
+	case Intel:
+		return logicalCores() / threadsPerCore()
+	case AMD, Hygon:
+		lc := logicalCores()
+		tpc := threadsPerCore()
+		if lc > 0 && tpc > 0 {
+			return lc / tpc
+		}
+
+		// The following is inaccurate on AMD EPYC 7742 64-Core Processor
+		if maxExtendedFunction() >= 0x80000008 {
+			_, _, c, _ := cpuid(0x80000008)
+			if c&0xff > 0 {
+				return int(c&0xff) + 1
+			}
+		}
+	}
+	return 0
+}
+
+// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
+var vendorMapping = map[string]Vendor{
+	"AMDisbetter!": AMD,
+	"AuthenticAMD": AMD,
+	"CentaurHauls": VIA,
+	"GenuineIntel": Intel,
+	"TransmetaCPU": Transmeta,
+	"GenuineTMx86": Transmeta,
+	"Geode by NSC": NSC,
+	"VIA VIA VIA ": VIA,
+	"KVMKVMKVM":    KVM,
+	"Linux KVM Hv": KVM,
+	"TCGTCGTCGTCG": QEMU,
+	"Microsoft Hv": MSVM,
+	"VMwareVMware": VMware,
+	"XenVMMXenVMM": XenHVM,
+	"bhyve bhyve ": Bhyve,
+	"HygonGenuine": Hygon,
+	"Vortex86 SoC": SiS,
+	"SiS SiS SiS ": SiS,
+	"RiseRiseRise": SiS,
+	"Genuine  RDC": RDC,
+	"QNXQVMBSQG":   QNX,
+	"ACRNACRNACRN": ACRN,
+	"SRESRESRESRE": SRE,
+	"Apple VZ":     Apple,
+}
+
+func vendorID() (Vendor, string) {
+	_, b, c, d := cpuid(0)
+	v := string(valAsString(b, d, c))
+	vend, ok := vendorMapping[v]
+	if !ok {
+		return VendorUnknown, v
+	}
+	return vend, v
+}
+
+func hypervisorVendorID() (Vendor, string) {
+	// https://lwn.net/Articles/301888/
+	_, b, c, d := cpuid(0x40000000)
+	v := string(valAsString(b, c, d))
+	vend, ok := vendorMapping[v]
+	if !ok {
+		return VendorUnknown, v
+	}
+	return vend, v
+}
+
+func cacheLine() int {
+	if maxFunctionID() < 0x1 {
+		return 0
+	}
+
+	_, ebx, _, _ := cpuid(1)
+	cache := (ebx & 0xff00) >> 5 // cflush size
+	if cache == 0 && maxExtendedFunction() >= 0x80000006 {
+		_, _, ecx, _ := cpuid(0x80000006)
+		cache = ecx & 0xff // cacheline size
+	}
+	// TODO: Read from Cache and TLB Information
+	return int(cache)
+}
+
+func (c *CPUInfo) cacheSize() {
+	c.Cache.L1D = -1
+	c.Cache.L1I = -1
+	c.Cache.L2 = -1
+	c.Cache.L3 = -1
+	vendor, _ := vendorID()
+	switch vendor {
+	case Intel:
+		if maxFunctionID() < 4 {
+			return
+		}
+		c.Cache.L1I, c.Cache.L1D, c.Cache.L2, c.Cache.L3 = 0, 0, 0, 0
+		for i := uint32(0); ; i++ {
+			eax, ebx, ecx, _ := cpuidex(4, i)
+			cacheType := eax & 15
+			if cacheType == 0 {
+				break
+			}
+			cacheLevel := (eax >> 5) & 7
+			coherency := int(ebx&0xfff) + 1
+			partitions := int((ebx>>12)&0x3ff) + 1
+			associativity := int((ebx>>22)&0x3ff) + 1
+			sets := int(ecx) + 1
+			size := associativity * partitions * coherency * sets
+			switch cacheLevel {
+			case 1:
+				if cacheType == 1 {
+					// 1 = Data Cache
+					c.Cache.L1D = size
+				} else if cacheType == 2 {
+					// 2 = Instruction Cache
+					c.Cache.L1I = size
+				} else {
+					if c.Cache.L1D < 0 {
+						c.Cache.L1I = size
+					}
+					if c.Cache.L1I < 0 {
+						c.Cache.L1I = size
+					}
+				}
+			case 2:
+				c.Cache.L2 = size
+			case 3:
+				c.Cache.L3 = size
+			}
+		}
+	case AMD, Hygon:
+		// Untested.
+		if maxExtendedFunction() < 0x80000005 {
+			return
+		}
+		_, _, ecx, edx := cpuid(0x80000005)
+		c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024)
+		c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024)
+
+		if maxExtendedFunction() < 0x80000006 {
+			return
+		}
+		_, _, ecx, _ = cpuid(0x80000006)
+		c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
+
+		// CPUID Fn8000_001D_EAX_x[N:0] Cache Properties
+		if maxExtendedFunction() < 0x8000001D || !c.Has(TOPEXT) {
+			return
+		}
+
+		// Xen Hypervisor is buggy and returns the same entry no matter ECX value.
+		// Hack: When we encounter the same entry 100 times we break.
+		nSame := 0
+		var last uint32
+		for i := uint32(0); i < math.MaxUint32; i++ {
+			eax, ebx, ecx, _ := cpuidex(0x8000001D, i)
+
+			level := (eax >> 5) & 7
+			cacheNumSets := ecx + 1
+			cacheLineSize := 1 + (ebx & 2047)
+			cachePhysPartitions := 1 + ((ebx >> 12) & 511)
+			cacheNumWays := 1 + ((ebx >> 22) & 511)
+
+			typ := eax & 15
+			size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays)
+			if typ == 0 {
+				return
+			}
+
+			// Check for the same value repeated.
+			comb := eax ^ ebx ^ ecx
+			if comb == last {
+				nSame++
+				if nSame == 100 {
+					return
+				}
+			}
+			last = comb
+
+			switch level {
+			case 1:
+				switch typ {
+				case 1:
+					// Data cache
+					c.Cache.L1D = size
+				case 2:
+					// Inst cache
+					c.Cache.L1I = size
+				default:
+					if c.Cache.L1D < 0 {
+						c.Cache.L1I = size
+					}
+					if c.Cache.L1I < 0 {
+						c.Cache.L1I = size
+					}
+				}
+			case 2:
+				c.Cache.L2 = size
+			case 3:
+				c.Cache.L3 = size
+			}
+		}
+	}
+}
+
+type SGXEPCSection struct {
+	BaseAddress uint64
+	EPCSize     uint64
+}
+
+type SGXSupport struct {
+	Available           bool
+	LaunchControl       bool
+	SGX1Supported       bool
+	SGX2Supported       bool
+	MaxEnclaveSizeNot64 int64
+	MaxEnclaveSize64    int64
+	EPCSections         []SGXEPCSection
+}
+
+func hasSGX(available, lc bool) (rval SGXSupport) {
+	rval.Available = available
+
+	if !available {
+		return
+	}
+
+	rval.LaunchControl = lc
+
+	a, _, _, d := cpuidex(0x12, 0)
+	rval.SGX1Supported = a&0x01 != 0
+	rval.SGX2Supported = a&0x02 != 0
+	rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF)     // pow 2
+	rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2
+	rval.EPCSections = make([]SGXEPCSection, 0)
+
+	for subleaf := uint32(2); subleaf < 2+8; subleaf++ {
+		eax, ebx, ecx, edx := cpuidex(0x12, subleaf)
+		leafType := eax & 0xf
+
+		if leafType == 0 {
+			// Invalid subleaf, stop iterating
+			break
+		} else if leafType == 1 {
+			// EPC Section subleaf
+			baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32)
+			size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32)
+
+			section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size}
+			rval.EPCSections = append(rval.EPCSections, section)
+		}
+	}
+
+	return
+}
+
+type AMDMemEncryptionSupport struct {
+	Available          bool
+	CBitPossition      uint32
+	NumVMPL            uint32
+	PhysAddrReduction  uint32
+	NumEntryptedGuests uint32
+	MinSevNoEsAsid     uint32
+}
+
+func hasAMDMemEncryption(available bool) (rval AMDMemEncryptionSupport) {
+	rval.Available = available
+	if !available {
+		return
+	}
+
+	_, b, c, d := cpuidex(0x8000001f, 0)
+
+	rval.CBitPossition = b & 0x3f
+	rval.PhysAddrReduction = (b >> 6) & 0x3F
+	rval.NumVMPL = (b >> 12) & 0xf
+	rval.NumEntryptedGuests = c
+	rval.MinSevNoEsAsid = d
+
+	return
+}
+
+func support() flagSet {
+	var fs flagSet
+	mfi := maxFunctionID()
+	vend, _ := vendorID()
+	if mfi < 0x1 {
+		return fs
+	}
+	family, model, _ := familyModel()
+
+	_, _, c, d := cpuid(1)
+	fs.setIf((d&(1<<0)) != 0, X87)
+	fs.setIf((d&(1<<8)) != 0, CMPXCHG8)
+	fs.setIf((d&(1<<11)) != 0, SYSEE)
+	fs.setIf((d&(1<<15)) != 0, CMOV)
+	fs.setIf((d&(1<<23)) != 0, MMX)
+	fs.setIf((d&(1<<24)) != 0, FXSR)
+	fs.setIf((d&(1<<25)) != 0, FXSROPT)
+	fs.setIf((d&(1<<25)) != 0, SSE)
+	fs.setIf((d&(1<<26)) != 0, SSE2)
+	fs.setIf((c&1) != 0, SSE3)
+	fs.setIf((c&(1<<5)) != 0, VMX)
+	fs.setIf((c&(1<<9)) != 0, SSSE3)
+	fs.setIf((c&(1<<19)) != 0, SSE4)
+	fs.setIf((c&(1<<20)) != 0, SSE42)
+	fs.setIf((c&(1<<25)) != 0, AESNI)
+	fs.setIf((c&(1<<1)) != 0, CLMUL)
+	fs.setIf(c&(1<<22) != 0, MOVBE)
+	fs.setIf(c&(1<<23) != 0, POPCNT)
+	fs.setIf(c&(1<<30) != 0, RDRAND)
+
+	// This bit has been reserved by Intel & AMD for use by hypervisors,
+	// and indicates the presence of a hypervisor.
+	fs.setIf(c&(1<<31) != 0, HYPERVISOR)
+	fs.setIf(c&(1<<29) != 0, F16C)
+	fs.setIf(c&(1<<13) != 0, CX16)
+
+	if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 {
+		fs.setIf(threadsPerCore() > 1, HTT)
+	}
+	if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 {
+		fs.setIf(threadsPerCore() > 1, HTT)
+	}
+	fs.setIf(c&1<<26 != 0, XSAVE)
+	fs.setIf(c&1<<27 != 0, OSXSAVE)
+	// Check XGETBV/XSAVE (26), OXSAVE (27) and AVX (28) bits
+	const avxCheck = 1<<26 | 1<<27 | 1<<28
+	if c&avxCheck == avxCheck {
+		// Check for OS support
+		eax, _ := xgetbv(0)
+		if (eax & 0x6) == 0x6 {
+			fs.set(AVX)
+			switch vend {
+			case Intel:
+				// Older than Haswell.
+				fs.setIf(family == 6 && model < 60, AVXSLOW)
+			case AMD:
+				// Older than Zen 2
+				fs.setIf(family < 23 || (family == 23 && model < 49), AVXSLOW)
+			}
+		}
+	}
+	// FMA3 can be used with SSE registers, so no OS support is strictly needed.
+	// fma3 and OSXSAVE needed.
+	const fma3Check = 1<<12 | 1<<27
+	fs.setIf(c&fma3Check == fma3Check, FMA3)
+
+	// Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
+	if mfi >= 7 {
+		_, ebx, ecx, edx := cpuidex(7, 0)
+		if fs.inSet(AVX) && (ebx&0x00000020) != 0 {
+			fs.set(AVX2)
+		}
+		// CPUID.(EAX=7, ECX=0).EBX
+		if (ebx & 0x00000008) != 0 {
+			fs.set(BMI1)
+			fs.setIf((ebx&0x00000100) != 0, BMI2)
+		}
+		fs.setIf(ebx&(1<<2) != 0, SGX)
+		fs.setIf(ebx&(1<<4) != 0, HLE)
+		fs.setIf(ebx&(1<<9) != 0, ERMS)
+		fs.setIf(ebx&(1<<11) != 0, RTM)
+		fs.setIf(ebx&(1<<14) != 0, MPX)
+		fs.setIf(ebx&(1<<18) != 0, RDSEED)
+		fs.setIf(ebx&(1<<19) != 0, ADX)
+		fs.setIf(ebx&(1<<29) != 0, SHA)
+
+		// CPUID.(EAX=7, ECX=0).ECX
+		fs.setIf(ecx&(1<<5) != 0, WAITPKG)
+		fs.setIf(ecx&(1<<7) != 0, CETSS)
+		fs.setIf(ecx&(1<<8) != 0, GFNI)
+		fs.setIf(ecx&(1<<9) != 0, VAES)
+		fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ)
+		fs.setIf(ecx&(1<<13) != 0, TME)
+		fs.setIf(ecx&(1<<25) != 0, CLDEMOTE)
+		fs.setIf(ecx&(1<<23) != 0, KEYLOCKER)
+		fs.setIf(ecx&(1<<27) != 0, MOVDIRI)
+		fs.setIf(ecx&(1<<28) != 0, MOVDIR64B)
+		fs.setIf(ecx&(1<<29) != 0, ENQCMD)
+		fs.setIf(ecx&(1<<30) != 0, SGXLC)
+
+		// CPUID.(EAX=7, ECX=0).EDX
+		fs.setIf(edx&(1<<4) != 0, FSRM)
+		fs.setIf(edx&(1<<9) != 0, SRBDS_CTRL)
+		fs.setIf(edx&(1<<10) != 0, MD_CLEAR)
+		fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT)
+		fs.setIf(edx&(1<<14) != 0, SERIALIZE)
+		fs.setIf(edx&(1<<15) != 0, HYBRID_CPU)
+		fs.setIf(edx&(1<<16) != 0, TSXLDTRK)
+		fs.setIf(edx&(1<<18) != 0, PCONFIG)
+		fs.setIf(edx&(1<<20) != 0, CETIBT)
+		fs.setIf(edx&(1<<26) != 0, IBPB)
+		fs.setIf(edx&(1<<27) != 0, STIBP)
+		fs.setIf(edx&(1<<28) != 0, FLUSH_L1D)
+		fs.setIf(edx&(1<<29) != 0, IA32_ARCH_CAP)
+		fs.setIf(edx&(1<<30) != 0, IA32_CORE_CAP)
+		fs.setIf(edx&(1<<31) != 0, SPEC_CTRL_SSBD)
+
+		// CPUID.(EAX=7, ECX=1).EAX
+		eax1, _, _, edx1 := cpuidex(7, 1)
+		fs.setIf(fs.inSet(AVX) && eax1&(1<<4) != 0, AVXVNNI)
+		fs.setIf(eax1&(1<<7) != 0, CMPCCXADD)
+		fs.setIf(eax1&(1<<10) != 0, MOVSB_ZL)
+		fs.setIf(eax1&(1<<11) != 0, STOSB_SHORT)
+		fs.setIf(eax1&(1<<12) != 0, CMPSB_SCADBS_SHORT)
+		fs.setIf(eax1&(1<<22) != 0, HRESET)
+		fs.setIf(eax1&(1<<23) != 0, AVXIFMA)
+		fs.setIf(eax1&(1<<26) != 0, LAM)
+
+		// CPUID.(EAX=7, ECX=1).EDX
+		fs.setIf(edx1&(1<<4) != 0, AVXVNNIINT8)
+		fs.setIf(edx1&(1<<5) != 0, AVXNECONVERT)
+		fs.setIf(edx1&(1<<10) != 0, AVXVNNIINT16)
+		fs.setIf(edx1&(1<<14) != 0, PREFETCHI)
+		fs.setIf(edx1&(1<<19) != 0, AVX10)
+		fs.setIf(edx1&(1<<21) != 0, APX_F)
+
+		// Only detect AVX-512 features if XGETBV is supported
+		if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
+			// Check for OS support
+			eax, _ := xgetbv(0)
+
+			// Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
+			// ZMM16-ZMM31 state are enabled by OS)
+			/// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
+			hasAVX512 := (eax>>5)&7 == 7 && (eax>>1)&3 == 3
+			if runtime.GOOS == "darwin" {
+				hasAVX512 = fs.inSet(AVX) && darwinHasAVX512()
+			}
+			if hasAVX512 {
+				fs.setIf(ebx&(1<<16) != 0, AVX512F)
+				fs.setIf(ebx&(1<<17) != 0, AVX512DQ)
+				fs.setIf(ebx&(1<<21) != 0, AVX512IFMA)
+				fs.setIf(ebx&(1<<26) != 0, AVX512PF)
+				fs.setIf(ebx&(1<<27) != 0, AVX512ER)
+				fs.setIf(ebx&(1<<28) != 0, AVX512CD)
+				fs.setIf(ebx&(1<<30) != 0, AVX512BW)
+				fs.setIf(ebx&(1<<31) != 0, AVX512VL)
+				// ecx
+				fs.setIf(ecx&(1<<1) != 0, AVX512VBMI)
+				fs.setIf(ecx&(1<<3) != 0, AMXFP8)
+				fs.setIf(ecx&(1<<6) != 0, AVX512VBMI2)
+				fs.setIf(ecx&(1<<11) != 0, AVX512VNNI)
+				fs.setIf(ecx&(1<<12) != 0, AVX512BITALG)
+				fs.setIf(ecx&(1<<14) != 0, AVX512VPOPCNTDQ)
+				// edx
+				fs.setIf(edx&(1<<8) != 0, AVX512VP2INTERSECT)
+				fs.setIf(edx&(1<<22) != 0, AMXBF16)
+				fs.setIf(edx&(1<<23) != 0, AVX512FP16)
+				fs.setIf(edx&(1<<24) != 0, AMXTILE)
+				fs.setIf(edx&(1<<25) != 0, AMXINT8)
+				// eax1 = CPUID.(EAX=7, ECX=1).EAX
+				fs.setIf(eax1&(1<<5) != 0, AVX512BF16)
+				fs.setIf(eax1&(1<<19) != 0, WRMSRNS)
+				fs.setIf(eax1&(1<<21) != 0, AMXFP16)
+				fs.setIf(eax1&(1<<27) != 0, MSRLIST)
+			}
+		}
+
+		// CPUID.(EAX=7, ECX=2)
+		_, _, _, edx = cpuidex(7, 2)
+		fs.setIf(edx&(1<<0) != 0, PSFD)
+		fs.setIf(edx&(1<<1) != 0, IDPRED_CTRL)
+		fs.setIf(edx&(1<<2) != 0, RRSBA_CTRL)
+		fs.setIf(edx&(1<<4) != 0, BHI_CTRL)
+		fs.setIf(edx&(1<<5) != 0, MCDT_NO)
+
+		// Add keylocker features.
+		if fs.inSet(KEYLOCKER) && mfi >= 0x19 {
+			_, ebx, _, _ := cpuidex(0x19, 0)
+			fs.setIf(ebx&5 == 5, KEYLOCKERW) // Bit 0 and 2 (1+4)
+		}
+
+		// Add AVX10 features.
+		if fs.inSet(AVX10) && mfi >= 0x24 {
+			_, ebx, _, _ := cpuidex(0x24, 0)
+			fs.setIf(ebx&(1<<16) != 0, AVX10_128)
+			fs.setIf(ebx&(1<<17) != 0, AVX10_256)
+			fs.setIf(ebx&(1<<18) != 0, AVX10_512)
+		}
+	}
+
+	// Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1)
+	// EAX
+	// Bit 00: XSAVEOPT is available.
+	// Bit 01: Supports XSAVEC and the compacted form of XRSTOR if set.
+	// Bit 02: Supports XGETBV with ECX = 1 if set.
+	// Bit 03: Supports XSAVES/XRSTORS and IA32_XSS if set.
+	// Bits 31 - 04: Reserved.
+	// EBX
+	// Bits 31 - 00: The size in bytes of the XSAVE area containing all states enabled by XCRO | IA32_XSS.
+	// ECX
+	// Bits 31 - 00: Reports the supported bits of the lower 32 bits of the IA32_XSS MSR. IA32_XSS[n] can be set to 1 only if ECX[n] is 1.
+	// EDX?
+	// Bits 07 - 00: Used for XCR0. Bit 08: PT state. Bit 09: Used for XCR0. Bits 12 - 10: Reserved. Bit 13: HWP state. Bits 31 - 14: Reserved.
+	if mfi >= 0xd {
+		if fs.inSet(XSAVE) {
+			eax, _, _, _ := cpuidex(0xd, 1)
+			fs.setIf(eax&(1<<0) != 0, XSAVEOPT)
+			fs.setIf(eax&(1<<1) != 0, XSAVEC)
+			fs.setIf(eax&(1<<2) != 0, XGETBV1)
+			fs.setIf(eax&(1<<3) != 0, XSAVES)
+		}
+	}
+	if maxExtendedFunction() >= 0x80000001 {
+		_, _, c, d := cpuid(0x80000001)
+		if (c & (1 << 5)) != 0 {
+			fs.set(LZCNT)
+			fs.set(POPCNT)
+		}
+		// ECX
+		fs.setIf((c&(1<<0)) != 0, LAHF)
+		fs.setIf((c&(1<<2)) != 0, SVM)
+		fs.setIf((c&(1<<6)) != 0, SSE4A)
+		fs.setIf((c&(1<<10)) != 0, IBS)
+		fs.setIf((c&(1<<22)) != 0, TOPEXT)
+
+		// EDX
+		fs.setIf(d&(1<<11) != 0, SYSCALL)
+		fs.setIf(d&(1<<20) != 0, NX)
+		fs.setIf(d&(1<<22) != 0, MMXEXT)
+		fs.setIf(d&(1<<23) != 0, MMX)
+		fs.setIf(d&(1<<24) != 0, FXSR)
+		fs.setIf(d&(1<<25) != 0, FXSROPT)
+		fs.setIf(d&(1<<27) != 0, RDTSCP)
+		fs.setIf(d&(1<<30) != 0, AMD3DNOWEXT)
+		fs.setIf(d&(1<<31) != 0, AMD3DNOW)
+
+		/* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
+		 * used unless the OS has AVX support. */
+		if fs.inSet(AVX) {
+			fs.setIf((c&(1<<11)) != 0, XOP)
+			fs.setIf((c&(1<<16)) != 0, FMA4)
+		}
+
+	}
+	if maxExtendedFunction() >= 0x80000007 {
+		_, b, _, d := cpuid(0x80000007)
+		fs.setIf((b&(1<<0)) != 0, MCAOVERFLOW)
+		fs.setIf((b&(1<<1)) != 0, SUCCOR)
+		fs.setIf((b&(1<<2)) != 0, HWA)
+		fs.setIf((d&(1<<9)) != 0, CPBOOST)
+	}
+
+	if maxExtendedFunction() >= 0x80000008 {
+		_, b, _, _ := cpuid(0x80000008)
+		fs.setIf(b&(1<<28) != 0, PSFD)
+		fs.setIf(b&(1<<27) != 0, CPPC)
+		fs.setIf(b&(1<<24) != 0, SPEC_CTRL_SSBD)
+		fs.setIf(b&(1<<23) != 0, PPIN)
+		fs.setIf(b&(1<<21) != 0, TLB_FLUSH_NESTED)
+		fs.setIf(b&(1<<20) != 0, EFER_LMSLE_UNS)
+		fs.setIf(b&(1<<19) != 0, IBRS_PROVIDES_SMP)
+		fs.setIf(b&(1<<18) != 0, IBRS_PREFERRED)
+		fs.setIf(b&(1<<17) != 0, STIBP_ALWAYSON)
+		fs.setIf(b&(1<<15) != 0, STIBP)
+		fs.setIf(b&(1<<14) != 0, IBRS)
+		fs.setIf((b&(1<<13)) != 0, INT_WBINVD)
+		fs.setIf(b&(1<<12) != 0, IBPB)
+		fs.setIf((b&(1<<9)) != 0, WBNOINVD)
+		fs.setIf((b&(1<<8)) != 0, MCOMMIT)
+		fs.setIf((b&(1<<4)) != 0, RDPRU)
+		fs.setIf((b&(1<<3)) != 0, INVLPGB)
+		fs.setIf((b&(1<<1)) != 0, MSRIRC)
+		fs.setIf((b&(1<<0)) != 0, CLZERO)
+	}
+
+	if fs.inSet(SVM) && maxExtendedFunction() >= 0x8000000A {
+		_, _, _, edx := cpuid(0x8000000A)
+		fs.setIf((edx>>0)&1 == 1, SVMNP)
+		fs.setIf((edx>>1)&1 == 1, LBRVIRT)
+		fs.setIf((edx>>2)&1 == 1, SVML)
+		fs.setIf((edx>>3)&1 == 1, NRIPS)
+		fs.setIf((edx>>4)&1 == 1, TSCRATEMSR)
+		fs.setIf((edx>>5)&1 == 1, VMCBCLEAN)
+		fs.setIf((edx>>6)&1 == 1, SVMFBASID)
+		fs.setIf((edx>>7)&1 == 1, SVMDA)
+		fs.setIf((edx>>10)&1 == 1, SVMPF)
+		fs.setIf((edx>>12)&1 == 1, SVMPFT)
+	}
+
+	if maxExtendedFunction() >= 0x8000001a {
+		eax, _, _, _ := cpuid(0x8000001a)
+		fs.setIf((eax>>0)&1 == 1, FP128)
+		fs.setIf((eax>>1)&1 == 1, MOVU)
+		fs.setIf((eax>>2)&1 == 1, FP256)
+	}
+
+	if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) {
+		eax, _, _, _ := cpuid(0x8000001b)
+		fs.setIf((eax>>0)&1 == 1, IBSFFV)
+		fs.setIf((eax>>1)&1 == 1, IBSFETCHSAM)
+		fs.setIf((eax>>2)&1 == 1, IBSOPSAM)
+		fs.setIf((eax>>3)&1 == 1, IBSRDWROPCNT)
+		fs.setIf((eax>>4)&1 == 1, IBSOPCNT)
+		fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT)
+		fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT)
+		fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK)
+		fs.setIf((eax>>8)&1 == 1, IBS_OPFUSE)
+		fs.setIf((eax>>9)&1 == 1, IBS_FETCH_CTLX)
+		fs.setIf((eax>>10)&1 == 1, IBS_OPDATA4) // Doc says "Fixed,0. IBS op data 4 MSR supported", but assuming they mean 1.
+		fs.setIf((eax>>11)&1 == 1, IBS_ZEN4)
+	}
+
+	if maxExtendedFunction() >= 0x8000001f && vend == AMD {
+		a, _, _, _ := cpuid(0x8000001f)
+		fs.setIf((a>>0)&1 == 1, SME)
+		fs.setIf((a>>1)&1 == 1, SEV)
+		fs.setIf((a>>2)&1 == 1, MSR_PAGEFLUSH)
+		fs.setIf((a>>3)&1 == 1, SEV_ES)
+		fs.setIf((a>>4)&1 == 1, SEV_SNP)
+		fs.setIf((a>>5)&1 == 1, VMPL)
+		fs.setIf((a>>10)&1 == 1, SME_COHERENT)
+		fs.setIf((a>>11)&1 == 1, SEV_64BIT)
+		fs.setIf((a>>12)&1 == 1, SEV_RESTRICTED)
+		fs.setIf((a>>13)&1 == 1, SEV_ALTERNATIVE)
+		fs.setIf((a>>14)&1 == 1, SEV_DEBUGSWAP)
+		fs.setIf((a>>15)&1 == 1, IBS_PREVENTHOST)
+		fs.setIf((a>>16)&1 == 1, VTE)
+		fs.setIf((a>>24)&1 == 1, VMSA_REGPROT)
+	}
+
+	if maxExtendedFunction() >= 0x80000021 && vend == AMD {
+		a, _, _, _ := cpuid(0x80000021)
+		fs.setIf((a>>31)&1 == 1, SRSO_MSR_FIX)
+		fs.setIf((a>>30)&1 == 1, SRSO_USER_KERNEL_NO)
+		fs.setIf((a>>29)&1 == 1, SRSO_NO)
+		fs.setIf((a>>28)&1 == 1, IBPB_BRTYPE)
+		fs.setIf((a>>27)&1 == 1, SBPB)
+	}
+
+	if mfi >= 0x20 {
+		// Microsoft has decided to purposefully hide the information
+		// of the guest TEE when VMs are being created using Hyper-V.
+		//
+		// This leads us to check for the Hyper-V cpuid features
+		// (0x4000000C), and then for the `ebx` value set.
+		//
+		// For Intel TDX, `ebx` is set as `0xbe3`, being 3 the part
+		// we're mostly interested about,according to:
+		// https://github.com/torvalds/linux/blob/d2f51b3516dade79269ff45eae2a7668ae711b25/arch/x86/include/asm/hyperv-tlfs.h#L169-L174
+		_, ebx, _, _ := cpuid(0x4000000C)
+		fs.setIf(ebx == 0xbe3, TDX_GUEST)
+	}
+
+	if mfi >= 0x21 {
+		// Intel Trusted Domain Extensions Guests have their own cpuid leaf (0x21).
+		_, ebx, ecx, edx := cpuid(0x21)
+		identity := string(valAsString(ebx, edx, ecx))
+		fs.setIf(identity == "IntelTDX    ", TDX_GUEST)
+	}
+
+	return fs
+}
+
+func (c *CPUInfo) supportAVX10() uint8 {
+	if c.maxFunc >= 0x24 && c.featureSet.inSet(AVX10) {
+		_, ebx, _, _ := cpuidex(0x24, 0)
+		return uint8(ebx)
+	}
+	return 0
+}
+
+func valAsString(values ...uint32) []byte {
+	r := make([]byte, 4*len(values))
+	for i, v := range values {
+		dst := r[i*4:]
+		dst[0] = byte(v & 0xff)
+		dst[1] = byte((v >> 8) & 0xff)
+		dst[2] = byte((v >> 16) & 0xff)
+		dst[3] = byte((v >> 24) & 0xff)
+		switch {
+		case dst[0] == 0:
+			return r[:i*4]
+		case dst[1] == 0:
+			return r[:i*4+1]
+		case dst[2] == 0:
+			return r[:i*4+2]
+		case dst[3] == 0:
+			return r[:i*4+3]
+		}
+	}
+	return r
+}

+ 47 - 0
vendor/github.com/klauspost/cpuid/v2/cpuid_386.s

@@ -0,0 +1,47 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+//+build 386,!gccgo,!noasm,!appengine
+
+// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
+TEXT ·asmCpuid(SB), 7, $0
+	XORL CX, CX
+	MOVL op+0(FP), AX
+	CPUID
+	MOVL AX, eax+4(FP)
+	MOVL BX, ebx+8(FP)
+	MOVL CX, ecx+12(FP)
+	MOVL DX, edx+16(FP)
+	RET
+
+// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
+TEXT ·asmCpuidex(SB), 7, $0
+	MOVL op+0(FP), AX
+	MOVL op2+4(FP), CX
+	CPUID
+	MOVL AX, eax+8(FP)
+	MOVL BX, ebx+12(FP)
+	MOVL CX, ecx+16(FP)
+	MOVL DX, edx+20(FP)
+	RET
+
+// func xgetbv(index uint32) (eax, edx uint32)
+TEXT ·asmXgetbv(SB), 7, $0
+	MOVL index+0(FP), CX
+	BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
+	MOVL AX, eax+4(FP)
+	MOVL DX, edx+8(FP)
+	RET
+
+// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
+TEXT ·asmRdtscpAsm(SB), 7, $0
+	BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
+	MOVL AX, eax+0(FP)
+	MOVL BX, ebx+4(FP)
+	MOVL CX, ecx+8(FP)
+	MOVL DX, edx+12(FP)
+	RET
+
+// func asmDarwinHasAVX512() bool
+TEXT ·asmDarwinHasAVX512(SB), 7, $0
+	MOVL $0, eax+0(FP)
+	RET

+ 72 - 0
vendor/github.com/klauspost/cpuid/v2/cpuid_amd64.s

@@ -0,0 +1,72 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+//+build amd64,!gccgo,!noasm,!appengine
+
+// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
+TEXT ·asmCpuid(SB), 7, $0
+	XORQ CX, CX
+	MOVL op+0(FP), AX
+	CPUID
+	MOVL AX, eax+8(FP)
+	MOVL BX, ebx+12(FP)
+	MOVL CX, ecx+16(FP)
+	MOVL DX, edx+20(FP)
+	RET
+
+// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
+TEXT ·asmCpuidex(SB), 7, $0
+	MOVL op+0(FP), AX
+	MOVL op2+4(FP), CX
+	CPUID
+	MOVL AX, eax+8(FP)
+	MOVL BX, ebx+12(FP)
+	MOVL CX, ecx+16(FP)
+	MOVL DX, edx+20(FP)
+	RET
+
+// func asmXgetbv(index uint32) (eax, edx uint32)
+TEXT ·asmXgetbv(SB), 7, $0
+	MOVL index+0(FP), CX
+	BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
+	MOVL AX, eax+8(FP)
+	MOVL DX, edx+12(FP)
+	RET
+
+// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
+TEXT ·asmRdtscpAsm(SB), 7, $0
+	BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
+	MOVL AX, eax+0(FP)
+	MOVL BX, ebx+4(FP)
+	MOVL CX, ecx+8(FP)
+	MOVL DX, edx+12(FP)
+	RET
+
+// From https://go-review.googlesource.com/c/sys/+/285572/
+// func asmDarwinHasAVX512() bool
+TEXT ·asmDarwinHasAVX512(SB), 7, $0-1
+	MOVB $0, ret+0(FP) // default to false
+
+#ifdef GOOS_darwin // return if not darwin
+#ifdef GOARCH_amd64 // return if not amd64
+// These values from:
+// https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/osfmk/i386/cpu_capabilities.h
+#define commpage64_base_address         0x00007fffffe00000
+#define commpage64_cpu_capabilities64   (commpage64_base_address+0x010)
+#define commpage64_version              (commpage64_base_address+0x01E)
+#define hasAVX512F                      0x0000004000000000
+	MOVQ $commpage64_version, BX
+	MOVW (BX), AX
+	CMPW AX, $13                            // versions < 13 do not support AVX512
+	JL   no_avx512
+	MOVQ $commpage64_cpu_capabilities64, BX
+	MOVQ (BX), AX
+	MOVQ $hasAVX512F, CX
+	ANDQ CX, AX
+	JZ   no_avx512
+	MOVB $1, ret+0(FP)
+
+no_avx512:
+#endif
+#endif
+	RET
+

+ 36 - 0
vendor/github.com/klauspost/cpuid/v2/cpuid_arm64.s

@@ -0,0 +1,36 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+//+build arm64,!gccgo,!noasm,!appengine
+
+// See https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt
+
+// func getMidr
+TEXT ·getMidr(SB), 7, $0
+	WORD $0xd5380000    // mrs x0, midr_el1         /* Main ID Register */
+	MOVD R0, midr+0(FP)
+	RET
+
+// func getProcFeatures
+TEXT ·getProcFeatures(SB), 7, $0
+	WORD $0xd5380400            // mrs x0, id_aa64pfr0_el1  /* Processor Feature Register 0 */
+	MOVD R0, procFeatures+0(FP)
+	RET
+
+// func getInstAttributes
+TEXT ·getInstAttributes(SB), 7, $0
+	WORD $0xd5380600            // mrs x0, id_aa64isar0_el1 /* Instruction Set Attribute Register 0 */
+	WORD $0xd5380621            // mrs x1, id_aa64isar1_el1 /* Instruction Set Attribute Register 1 */
+	MOVD R0, instAttrReg0+0(FP)
+	MOVD R1, instAttrReg1+8(FP)
+	RET
+
+TEXT ·getVectorLength(SB), 7, $0
+	WORD $0xd2800002  // mov   x2, #0
+	WORD $0x04225022  // addvl x2, x2, #1
+	WORD $0xd37df042  // lsl   x2, x2, #3
+	WORD $0xd2800003  // mov   x3, #0
+	WORD $0x04635023  // addpl x3, x3, #1
+	WORD $0xd37df063  // lsl   x3, x3, #3
+	MOVD R2, vl+0(FP)
+	MOVD R3, pl+8(FP)
+	RET

+ 248 - 0
vendor/github.com/klauspost/cpuid/v2/detect_arm64.go

@@ -0,0 +1,248 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+//go:build arm64 && !gccgo && !noasm && !appengine
+// +build arm64,!gccgo,!noasm,!appengine
+
+package cpuid
+
+import "runtime"
+
+func getMidr() (midr uint64)
+func getProcFeatures() (procFeatures uint64)
+func getInstAttributes() (instAttrReg0, instAttrReg1 uint64)
+func getVectorLength() (vl, pl uint64)
+
+func initCPU() {
+	cpuid = func(uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
+	cpuidex = func(x, y uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
+	xgetbv = func(uint32) (a, b uint32) { return 0, 0 }
+	rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 }
+}
+
+func addInfo(c *CPUInfo, safe bool) {
+	// Seems to be safe to assume on ARM64
+	c.CacheLine = 64
+	detectOS(c)
+
+	// ARM64 disabled since it may crash if interrupt is not intercepted by OS.
+	if safe && !c.Has(ARMCPUID) && runtime.GOOS != "freebsd" {
+		return
+	}
+	midr := getMidr()
+
+	// MIDR_EL1 - Main ID Register
+	// https://developer.arm.com/docs/ddi0595/h/aarch64-system-registers/midr_el1
+	//  x--------------------------------------------------x
+	//  | Name                         |  bits   | visible |
+	//  |--------------------------------------------------|
+	//  | Implementer                  | [31-24] |    y    |
+	//  |--------------------------------------------------|
+	//  | Variant                      | [23-20] |    y    |
+	//  |--------------------------------------------------|
+	//  | Architecture                 | [19-16] |    y    |
+	//  |--------------------------------------------------|
+	//  | PartNum                      | [15-4]  |    y    |
+	//  |--------------------------------------------------|
+	//  | Revision                     | [3-0]   |    y    |
+	//  x--------------------------------------------------x
+
+	switch (midr >> 24) & 0xff {
+	case 0xC0:
+		c.VendorString = "Ampere Computing"
+		c.VendorID = Ampere
+	case 0x41:
+		c.VendorString = "Arm Limited"
+		c.VendorID = ARM
+	case 0x42:
+		c.VendorString = "Broadcom Corporation"
+		c.VendorID = Broadcom
+	case 0x43:
+		c.VendorString = "Cavium Inc"
+		c.VendorID = Cavium
+	case 0x44:
+		c.VendorString = "Digital Equipment Corporation"
+		c.VendorID = DEC
+	case 0x46:
+		c.VendorString = "Fujitsu Ltd"
+		c.VendorID = Fujitsu
+	case 0x49:
+		c.VendorString = "Infineon Technologies AG"
+		c.VendorID = Infineon
+	case 0x4D:
+		c.VendorString = "Motorola or Freescale Semiconductor Inc"
+		c.VendorID = Motorola
+	case 0x4E:
+		c.VendorString = "NVIDIA Corporation"
+		c.VendorID = NVIDIA
+	case 0x50:
+		c.VendorString = "Applied Micro Circuits Corporation"
+		c.VendorID = AMCC
+	case 0x51:
+		c.VendorString = "Qualcomm Inc"
+		c.VendorID = Qualcomm
+	case 0x56:
+		c.VendorString = "Marvell International Ltd"
+		c.VendorID = Marvell
+	case 0x69:
+		c.VendorString = "Intel Corporation"
+		c.VendorID = Intel
+	}
+
+	// Lower 4 bits: Architecture
+	// Architecture	Meaning
+	// 0b0001		Armv4.
+	// 0b0010		Armv4T.
+	// 0b0011		Armv5 (obsolete).
+	// 0b0100		Armv5T.
+	// 0b0101		Armv5TE.
+	// 0b0110		Armv5TEJ.
+	// 0b0111		Armv6.
+	// 0b1111		Architectural features are individually identified in the ID_* registers, see 'ID registers'.
+	// Upper 4 bit: Variant
+	// An IMPLEMENTATION DEFINED variant number.
+	// Typically, this field is used to distinguish between different product variants, or major revisions of a product.
+	c.Family = int(midr>>16) & 0xff
+
+	// PartNum, bits [15:4]
+	// An IMPLEMENTATION DEFINED primary part number for the device.
+	// On processors implemented by Arm, if the top four bits of the primary
+	// part number are 0x0 or 0x7, the variant and architecture are encoded differently.
+	// Revision, bits [3:0]
+	// An IMPLEMENTATION DEFINED revision number for the device.
+	c.Model = int(midr) & 0xffff
+
+	procFeatures := getProcFeatures()
+
+	// ID_AA64PFR0_EL1 - Processor Feature Register 0
+	// x--------------------------------------------------x
+	// | Name                         |  bits   | visible |
+	// |--------------------------------------------------|
+	// | DIT                          | [51-48] |    y    |
+	// |--------------------------------------------------|
+	// | SVE                          | [35-32] |    y    |
+	// |--------------------------------------------------|
+	// | GIC                          | [27-24] |    n    |
+	// |--------------------------------------------------|
+	// | AdvSIMD                      | [23-20] |    y    |
+	// |--------------------------------------------------|
+	// | FP                           | [19-16] |    y    |
+	// |--------------------------------------------------|
+	// | EL3                          | [15-12] |    n    |
+	// |--------------------------------------------------|
+	// | EL2                          | [11-8]  |    n    |
+	// |--------------------------------------------------|
+	// | EL1                          | [7-4]   |    n    |
+	// |--------------------------------------------------|
+	// | EL0                          | [3-0]   |    n    |
+	// x--------------------------------------------------x
+
+	var f flagSet
+	// if procFeatures&(0xf<<48) != 0 {
+	// 	fmt.Println("DIT")
+	// }
+	f.setIf(procFeatures&(0xf<<32) != 0, SVE)
+	if procFeatures&(0xf<<20) != 15<<20 {
+		f.set(ASIMD)
+		// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64pfr0_el1
+		// 0b0001 --> As for 0b0000, and also includes support for half-precision floating-point arithmetic.
+		f.setIf(procFeatures&(0xf<<20) == 1<<20, FPHP, ASIMDHP)
+	}
+	f.setIf(procFeatures&(0xf<<16) != 0, FP)
+
+	instAttrReg0, instAttrReg1 := getInstAttributes()
+
+	// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
+	//
+	// ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0
+	// x--------------------------------------------------x
+	// | Name                         |  bits   | visible |
+	// |--------------------------------------------------|
+	// | TS                           | [55-52] |    y    |
+	// |--------------------------------------------------|
+	// | FHM                          | [51-48] |    y    |
+	// |--------------------------------------------------|
+	// | DP                           | [47-44] |    y    |
+	// |--------------------------------------------------|
+	// | SM4                          | [43-40] |    y    |
+	// |--------------------------------------------------|
+	// | SM3                          | [39-36] |    y    |
+	// |--------------------------------------------------|
+	// | SHA3                         | [35-32] |    y    |
+	// |--------------------------------------------------|
+	// | RDM                          | [31-28] |    y    |
+	// |--------------------------------------------------|
+	// | ATOMICS                      | [23-20] |    y    |
+	// |--------------------------------------------------|
+	// | CRC32                        | [19-16] |    y    |
+	// |--------------------------------------------------|
+	// | SHA2                         | [15-12] |    y    |
+	// |--------------------------------------------------|
+	// | SHA1                         | [11-8]  |    y    |
+	// |--------------------------------------------------|
+	// | AES                          | [7-4]   |    y    |
+	// x--------------------------------------------------x
+
+	// if instAttrReg0&(0xf<<52) != 0 {
+	// 	fmt.Println("TS")
+	// }
+	// if instAttrReg0&(0xf<<48) != 0 {
+	// 	fmt.Println("FHM")
+	// }
+	f.setIf(instAttrReg0&(0xf<<44) != 0, ASIMDDP)
+	f.setIf(instAttrReg0&(0xf<<40) != 0, SM4)
+	f.setIf(instAttrReg0&(0xf<<36) != 0, SM3)
+	f.setIf(instAttrReg0&(0xf<<32) != 0, SHA3)
+	f.setIf(instAttrReg0&(0xf<<28) != 0, ASIMDRDM)
+	f.setIf(instAttrReg0&(0xf<<20) != 0, ATOMICS)
+	f.setIf(instAttrReg0&(0xf<<16) != 0, CRC32)
+	f.setIf(instAttrReg0&(0xf<<12) != 0, SHA2)
+	// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
+	// 0b0010 --> As 0b0001, plus SHA512H, SHA512H2, SHA512SU0, and SHA512SU1 instructions implemented.
+	f.setIf(instAttrReg0&(0xf<<12) == 2<<12, SHA512)
+	f.setIf(instAttrReg0&(0xf<<8) != 0, SHA1)
+	f.setIf(instAttrReg0&(0xf<<4) != 0, AESARM)
+	// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
+	// 0b0010 --> As for 0b0001, plus PMULL/PMULL2 instructions operating on 64-bit data quantities.
+	f.setIf(instAttrReg0&(0xf<<4) == 2<<4, PMULL)
+
+	// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar1_el1
+	//
+	// ID_AA64ISAR1_EL1 - Instruction set attribute register 1
+	// x--------------------------------------------------x
+	// | Name                         |  bits   | visible |
+	// |--------------------------------------------------|
+	// | GPI                          | [31-28] |    y    |
+	// |--------------------------------------------------|
+	// | GPA                          | [27-24] |    y    |
+	// |--------------------------------------------------|
+	// | LRCPC                        | [23-20] |    y    |
+	// |--------------------------------------------------|
+	// | FCMA                         | [19-16] |    y    |
+	// |--------------------------------------------------|
+	// | JSCVT                        | [15-12] |    y    |
+	// |--------------------------------------------------|
+	// | API                          | [11-8]  |    y    |
+	// |--------------------------------------------------|
+	// | APA                          | [7-4]   |    y    |
+	// |--------------------------------------------------|
+	// | DPB                          | [3-0]   |    y    |
+	// x--------------------------------------------------x
+
+	// if instAttrReg1&(0xf<<28) != 0 {
+	// 	fmt.Println("GPI")
+	// }
+	f.setIf(instAttrReg1&(0xf<<28) != 24, GPA)
+	f.setIf(instAttrReg1&(0xf<<20) != 0, LRCPC)
+	f.setIf(instAttrReg1&(0xf<<16) != 0, FCMA)
+	f.setIf(instAttrReg1&(0xf<<12) != 0, JSCVT)
+	// if instAttrReg1&(0xf<<8) != 0 {
+	// 	fmt.Println("API")
+	// }
+	// if instAttrReg1&(0xf<<4) != 0 {
+	// 	fmt.Println("APA")
+	// }
+	f.setIf(instAttrReg1&(0xf<<0) != 0, DCPOP)
+
+	// Store
+	c.featureSet.or(f)
+}

+ 17 - 0
vendor/github.com/klauspost/cpuid/v2/detect_ref.go

@@ -0,0 +1,17 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+//go:build (!amd64 && !386 && !arm64) || gccgo || noasm || appengine
+// +build !amd64,!386,!arm64 gccgo noasm appengine
+
+package cpuid
+
+func initCPU() {
+	cpuid = func(uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
+	cpuidex = func(x, y uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
+	xgetbv = func(uint32) (a, b uint32) { return 0, 0 }
+	rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 }
+
+}
+
+func addInfo(info *CPUInfo, safe bool) {}
+func getVectorLength() (vl, pl uint64) { return 0, 0 }

+ 41 - 0
vendor/github.com/klauspost/cpuid/v2/detect_x86.go

@@ -0,0 +1,41 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+//go:build (386 && !gccgo && !noasm && !appengine) || (amd64 && !gccgo && !noasm && !appengine)
+// +build 386,!gccgo,!noasm,!appengine amd64,!gccgo,!noasm,!appengine
+
+package cpuid
+
+func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
+func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
+func asmXgetbv(index uint32) (eax, edx uint32)
+func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
+func asmDarwinHasAVX512() bool
+
+func initCPU() {
+	cpuid = asmCpuid
+	cpuidex = asmCpuidex
+	xgetbv = asmXgetbv
+	rdtscpAsm = asmRdtscpAsm
+	darwinHasAVX512 = asmDarwinHasAVX512
+}
+
+func addInfo(c *CPUInfo, safe bool) {
+	c.maxFunc = maxFunctionID()
+	c.maxExFunc = maxExtendedFunction()
+	c.BrandName = brandName()
+	c.CacheLine = cacheLine()
+	c.Family, c.Model, c.Stepping = familyModel()
+	c.featureSet = support()
+	c.SGX = hasSGX(c.featureSet.inSet(SGX), c.featureSet.inSet(SGXLC))
+	c.AMDMemEncryption = hasAMDMemEncryption(c.featureSet.inSet(SME) || c.featureSet.inSet(SEV))
+	c.ThreadsPerCore = threadsPerCore()
+	c.LogicalCores = logicalCores()
+	c.PhysicalCores = physicalCores()
+	c.VendorID, c.VendorString = vendorID()
+	c.HypervisorVendorID, c.HypervisorVendorString = hypervisorVendorID()
+	c.AVX10Level = c.supportAVX10()
+	c.cacheSize()
+	c.frequencies()
+}
+
+func getVectorLength() (vl, pl uint64) { return 0, 0 }

+ 291 - 0
vendor/github.com/klauspost/cpuid/v2/featureid_string.go

@@ -0,0 +1,291 @@
+// Code generated by "stringer -type=FeatureID,Vendor"; DO NOT EDIT.
+
+package cpuid
+
+import "strconv"
+
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[ADX-1]
+	_ = x[AESNI-2]
+	_ = x[AMD3DNOW-3]
+	_ = x[AMD3DNOWEXT-4]
+	_ = x[AMXBF16-5]
+	_ = x[AMXFP16-6]
+	_ = x[AMXINT8-7]
+	_ = x[AMXFP8-8]
+	_ = x[AMXTILE-9]
+	_ = x[APX_F-10]
+	_ = x[AVX-11]
+	_ = x[AVX10-12]
+	_ = x[AVX10_128-13]
+	_ = x[AVX10_256-14]
+	_ = x[AVX10_512-15]
+	_ = x[AVX2-16]
+	_ = x[AVX512BF16-17]
+	_ = x[AVX512BITALG-18]
+	_ = x[AVX512BW-19]
+	_ = x[AVX512CD-20]
+	_ = x[AVX512DQ-21]
+	_ = x[AVX512ER-22]
+	_ = x[AVX512F-23]
+	_ = x[AVX512FP16-24]
+	_ = x[AVX512IFMA-25]
+	_ = x[AVX512PF-26]
+	_ = x[AVX512VBMI-27]
+	_ = x[AVX512VBMI2-28]
+	_ = x[AVX512VL-29]
+	_ = x[AVX512VNNI-30]
+	_ = x[AVX512VP2INTERSECT-31]
+	_ = x[AVX512VPOPCNTDQ-32]
+	_ = x[AVXIFMA-33]
+	_ = x[AVXNECONVERT-34]
+	_ = x[AVXSLOW-35]
+	_ = x[AVXVNNI-36]
+	_ = x[AVXVNNIINT8-37]
+	_ = x[AVXVNNIINT16-38]
+	_ = x[BHI_CTRL-39]
+	_ = x[BMI1-40]
+	_ = x[BMI2-41]
+	_ = x[CETIBT-42]
+	_ = x[CETSS-43]
+	_ = x[CLDEMOTE-44]
+	_ = x[CLMUL-45]
+	_ = x[CLZERO-46]
+	_ = x[CMOV-47]
+	_ = x[CMPCCXADD-48]
+	_ = x[CMPSB_SCADBS_SHORT-49]
+	_ = x[CMPXCHG8-50]
+	_ = x[CPBOOST-51]
+	_ = x[CPPC-52]
+	_ = x[CX16-53]
+	_ = x[EFER_LMSLE_UNS-54]
+	_ = x[ENQCMD-55]
+	_ = x[ERMS-56]
+	_ = x[F16C-57]
+	_ = x[FLUSH_L1D-58]
+	_ = x[FMA3-59]
+	_ = x[FMA4-60]
+	_ = x[FP128-61]
+	_ = x[FP256-62]
+	_ = x[FSRM-63]
+	_ = x[FXSR-64]
+	_ = x[FXSROPT-65]
+	_ = x[GFNI-66]
+	_ = x[HLE-67]
+	_ = x[HRESET-68]
+	_ = x[HTT-69]
+	_ = x[HWA-70]
+	_ = x[HYBRID_CPU-71]
+	_ = x[HYPERVISOR-72]
+	_ = x[IA32_ARCH_CAP-73]
+	_ = x[IA32_CORE_CAP-74]
+	_ = x[IBPB-75]
+	_ = x[IBPB_BRTYPE-76]
+	_ = x[IBRS-77]
+	_ = x[IBRS_PREFERRED-78]
+	_ = x[IBRS_PROVIDES_SMP-79]
+	_ = x[IBS-80]
+	_ = x[IBSBRNTRGT-81]
+	_ = x[IBSFETCHSAM-82]
+	_ = x[IBSFFV-83]
+	_ = x[IBSOPCNT-84]
+	_ = x[IBSOPCNTEXT-85]
+	_ = x[IBSOPSAM-86]
+	_ = x[IBSRDWROPCNT-87]
+	_ = x[IBSRIPINVALIDCHK-88]
+	_ = x[IBS_FETCH_CTLX-89]
+	_ = x[IBS_OPDATA4-90]
+	_ = x[IBS_OPFUSE-91]
+	_ = x[IBS_PREVENTHOST-92]
+	_ = x[IBS_ZEN4-93]
+	_ = x[IDPRED_CTRL-94]
+	_ = x[INT_WBINVD-95]
+	_ = x[INVLPGB-96]
+	_ = x[KEYLOCKER-97]
+	_ = x[KEYLOCKERW-98]
+	_ = x[LAHF-99]
+	_ = x[LAM-100]
+	_ = x[LBRVIRT-101]
+	_ = x[LZCNT-102]
+	_ = x[MCAOVERFLOW-103]
+	_ = x[MCDT_NO-104]
+	_ = x[MCOMMIT-105]
+	_ = x[MD_CLEAR-106]
+	_ = x[MMX-107]
+	_ = x[MMXEXT-108]
+	_ = x[MOVBE-109]
+	_ = x[MOVDIR64B-110]
+	_ = x[MOVDIRI-111]
+	_ = x[MOVSB_ZL-112]
+	_ = x[MOVU-113]
+	_ = x[MPX-114]
+	_ = x[MSRIRC-115]
+	_ = x[MSRLIST-116]
+	_ = x[MSR_PAGEFLUSH-117]
+	_ = x[NRIPS-118]
+	_ = x[NX-119]
+	_ = x[OSXSAVE-120]
+	_ = x[PCONFIG-121]
+	_ = x[POPCNT-122]
+	_ = x[PPIN-123]
+	_ = x[PREFETCHI-124]
+	_ = x[PSFD-125]
+	_ = x[RDPRU-126]
+	_ = x[RDRAND-127]
+	_ = x[RDSEED-128]
+	_ = x[RDTSCP-129]
+	_ = x[RRSBA_CTRL-130]
+	_ = x[RTM-131]
+	_ = x[RTM_ALWAYS_ABORT-132]
+	_ = x[SBPB-133]
+	_ = x[SERIALIZE-134]
+	_ = x[SEV-135]
+	_ = x[SEV_64BIT-136]
+	_ = x[SEV_ALTERNATIVE-137]
+	_ = x[SEV_DEBUGSWAP-138]
+	_ = x[SEV_ES-139]
+	_ = x[SEV_RESTRICTED-140]
+	_ = x[SEV_SNP-141]
+	_ = x[SGX-142]
+	_ = x[SGXLC-143]
+	_ = x[SHA-144]
+	_ = x[SME-145]
+	_ = x[SME_COHERENT-146]
+	_ = x[SPEC_CTRL_SSBD-147]
+	_ = x[SRBDS_CTRL-148]
+	_ = x[SRSO_MSR_FIX-149]
+	_ = x[SRSO_NO-150]
+	_ = x[SRSO_USER_KERNEL_NO-151]
+	_ = x[SSE-152]
+	_ = x[SSE2-153]
+	_ = x[SSE3-154]
+	_ = x[SSE4-155]
+	_ = x[SSE42-156]
+	_ = x[SSE4A-157]
+	_ = x[SSSE3-158]
+	_ = x[STIBP-159]
+	_ = x[STIBP_ALWAYSON-160]
+	_ = x[STOSB_SHORT-161]
+	_ = x[SUCCOR-162]
+	_ = x[SVM-163]
+	_ = x[SVMDA-164]
+	_ = x[SVMFBASID-165]
+	_ = x[SVML-166]
+	_ = x[SVMNP-167]
+	_ = x[SVMPF-168]
+	_ = x[SVMPFT-169]
+	_ = x[SYSCALL-170]
+	_ = x[SYSEE-171]
+	_ = x[TBM-172]
+	_ = x[TDX_GUEST-173]
+	_ = x[TLB_FLUSH_NESTED-174]
+	_ = x[TME-175]
+	_ = x[TOPEXT-176]
+	_ = x[TSCRATEMSR-177]
+	_ = x[TSXLDTRK-178]
+	_ = x[VAES-179]
+	_ = x[VMCBCLEAN-180]
+	_ = x[VMPL-181]
+	_ = x[VMSA_REGPROT-182]
+	_ = x[VMX-183]
+	_ = x[VPCLMULQDQ-184]
+	_ = x[VTE-185]
+	_ = x[WAITPKG-186]
+	_ = x[WBNOINVD-187]
+	_ = x[WRMSRNS-188]
+	_ = x[X87-189]
+	_ = x[XGETBV1-190]
+	_ = x[XOP-191]
+	_ = x[XSAVE-192]
+	_ = x[XSAVEC-193]
+	_ = x[XSAVEOPT-194]
+	_ = x[XSAVES-195]
+	_ = x[AESARM-196]
+	_ = x[ARMCPUID-197]
+	_ = x[ASIMD-198]
+	_ = x[ASIMDDP-199]
+	_ = x[ASIMDHP-200]
+	_ = x[ASIMDRDM-201]
+	_ = x[ATOMICS-202]
+	_ = x[CRC32-203]
+	_ = x[DCPOP-204]
+	_ = x[EVTSTRM-205]
+	_ = x[FCMA-206]
+	_ = x[FP-207]
+	_ = x[FPHP-208]
+	_ = x[GPA-209]
+	_ = x[JSCVT-210]
+	_ = x[LRCPC-211]
+	_ = x[PMULL-212]
+	_ = x[SHA1-213]
+	_ = x[SHA2-214]
+	_ = x[SHA3-215]
+	_ = x[SHA512-216]
+	_ = x[SM3-217]
+	_ = x[SM4-218]
+	_ = x[SVE-219]
+	_ = x[lastID-220]
+	_ = x[firstID-0]
+}
+
+const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXFP16AMXINT8AMXFP8AMXTILEAPX_FAVXAVX10AVX10_128AVX10_256AVX10_512AVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXIFMAAVXNECONVERTAVXSLOWAVXVNNIAVXVNNIINT8AVXVNNIINT16BHI_CTRLBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPCCXADDCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCPPCCX16EFER_LMSLE_UNSENQCMDERMSF16CFLUSH_L1DFMA3FMA4FP128FP256FSRMFXSRFXSROPTGFNIHLEHRESETHTTHWAHYBRID_CPUHYPERVISORIA32_ARCH_CAPIA32_CORE_CAPIBPBIBPB_BRTYPEIBRSIBRS_PREFERREDIBRS_PROVIDES_SMPIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_FETCH_CTLXIBS_OPDATA4IBS_OPFUSEIBS_PREVENTHOSTIBS_ZEN4IDPRED_CTRLINT_WBINVDINVLPGBKEYLOCKERKEYLOCKERWLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCDT_NOMCOMMITMD_CLEARMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMOVUMPXMSRIRCMSRLISTMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTPPINPREFETCHIPSFDRDPRURDRANDRDSEEDRDTSCPRRSBA_CTRLRTMRTM_ALWAYS_ABORTSBPBSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSPEC_CTRL_SSBDSRBDS_CTRLSRSO_MSR_FIXSRSO_NOSRSO_USER_KERNEL_NOSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTIBP_ALWAYSONSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTDX_GUESTTLB_FLUSH_NESTEDTMETOPEXTTSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDWRMSRNSX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID"
+
+var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 61, 68, 73, 76, 81, 90, 99, 108, 112, 122, 134, 142, 150, 158, 166, 173, 183, 193, 201, 211, 222, 230, 240, 258, 273, 280, 292, 299, 306, 317, 329, 337, 341, 345, 351, 356, 364, 369, 375, 379, 388, 406, 414, 421, 425, 429, 443, 449, 453, 457, 466, 470, 474, 479, 484, 488, 492, 499, 503, 506, 512, 515, 518, 528, 538, 551, 564, 568, 579, 583, 597, 614, 617, 627, 638, 644, 652, 663, 671, 683, 699, 713, 724, 734, 749, 757, 768, 778, 785, 794, 804, 808, 811, 818, 823, 834, 841, 848, 856, 859, 865, 870, 879, 886, 894, 898, 901, 907, 914, 927, 932, 934, 941, 948, 954, 958, 967, 971, 976, 982, 988, 994, 1004, 1007, 1023, 1027, 1036, 1039, 1048, 1063, 1076, 1082, 1096, 1103, 1106, 1111, 1114, 1117, 1129, 1143, 1153, 1165, 1172, 1191, 1194, 1198, 1202, 1206, 1211, 1216, 1221, 1226, 1240, 1251, 1257, 1260, 1265, 1274, 1278, 1283, 1288, 1294, 1301, 1306, 1309, 1318, 1334, 1337, 1343, 1353, 1361, 1365, 1374, 1378, 1390, 1393, 1403, 1406, 1413, 1421, 1428, 1431, 1438, 1441, 1446, 1452, 1460, 1466, 1472, 1480, 1485, 1492, 1499, 1507, 1514, 1519, 1524, 1531, 1535, 1537, 1541, 1544, 1549, 1554, 1559, 1563, 1567, 1571, 1577, 1580, 1583, 1586, 1592}
+
+func (i FeatureID) String() string {
+	if i < 0 || i >= FeatureID(len(_FeatureID_index)-1) {
+		return "FeatureID(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+	return _FeatureID_name[_FeatureID_index[i]:_FeatureID_index[i+1]]
+}
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[VendorUnknown-0]
+	_ = x[Intel-1]
+	_ = x[AMD-2]
+	_ = x[VIA-3]
+	_ = x[Transmeta-4]
+	_ = x[NSC-5]
+	_ = x[KVM-6]
+	_ = x[MSVM-7]
+	_ = x[VMware-8]
+	_ = x[XenHVM-9]
+	_ = x[Bhyve-10]
+	_ = x[Hygon-11]
+	_ = x[SiS-12]
+	_ = x[RDC-13]
+	_ = x[Ampere-14]
+	_ = x[ARM-15]
+	_ = x[Broadcom-16]
+	_ = x[Cavium-17]
+	_ = x[DEC-18]
+	_ = x[Fujitsu-19]
+	_ = x[Infineon-20]
+	_ = x[Motorola-21]
+	_ = x[NVIDIA-22]
+	_ = x[AMCC-23]
+	_ = x[Qualcomm-24]
+	_ = x[Marvell-25]
+	_ = x[QEMU-26]
+	_ = x[QNX-27]
+	_ = x[ACRN-28]
+	_ = x[SRE-29]
+	_ = x[Apple-30]
+	_ = x[lastVendor-31]
+}
+
+const _Vendor_name = "VendorUnknownIntelAMDVIATransmetaNSCKVMMSVMVMwareXenHVMBhyveHygonSiSRDCAmpereARMBroadcomCaviumDECFujitsuInfineonMotorolaNVIDIAAMCCQualcommMarvellQEMUQNXACRNSREApplelastVendor"
+
+var _Vendor_index = [...]uint8{0, 13, 18, 21, 24, 33, 36, 39, 43, 49, 55, 60, 65, 68, 71, 77, 80, 88, 94, 97, 104, 112, 120, 126, 130, 138, 145, 149, 152, 156, 159, 164, 174}
+
+func (i Vendor) String() string {
+	if i < 0 || i >= Vendor(len(_Vendor_index)-1) {
+		return "Vendor(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+	return _Vendor_name[_Vendor_index[i]:_Vendor_index[i+1]]
+}

+ 121 - 0
vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go

@@ -0,0 +1,121 @@
+// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file.
+
+package cpuid
+
+import (
+	"runtime"
+	"strings"
+
+	"golang.org/x/sys/unix"
+)
+
+func detectOS(c *CPUInfo) bool {
+	if runtime.GOOS != "ios" {
+		tryToFillCPUInfoFomSysctl(c)
+	}
+	// There are no hw.optional sysctl values for the below features on Mac OS 11.0
+	// to detect their supported state dynamically. Assume the CPU features that
+	// Apple Silicon M1 supports to be available as a minimal set of features
+	// to all Go programs running on darwin/arm64.
+	// TODO: Add more if we know them.
+	c.featureSet.setIf(runtime.GOOS != "ios", AESARM, PMULL, SHA1, SHA2)
+
+	return true
+}
+
+func sysctlGetBool(name string) bool {
+	value, err := unix.SysctlUint32(name)
+	if err != nil {
+		return false
+	}
+	return value != 0
+}
+
+func sysctlGetString(name string) string {
+	value, err := unix.Sysctl(name)
+	if err != nil {
+		return ""
+	}
+	return value
+}
+
+func sysctlGetInt(unknown int, names ...string) int {
+	for _, name := range names {
+		value, err := unix.SysctlUint32(name)
+		if err != nil {
+			continue
+		}
+		if value != 0 {
+			return int(value)
+		}
+	}
+	return unknown
+}
+
+func sysctlGetInt64(unknown int, names ...string) int {
+	for _, name := range names {
+		value64, err := unix.SysctlUint64(name)
+		if err != nil {
+			continue
+		}
+		if int(value64) != unknown {
+			return int(value64)
+		}
+	}
+	return unknown
+}
+
+func setFeature(c *CPUInfo, name string, feature FeatureID) {
+	c.featureSet.setIf(sysctlGetBool(name), feature)
+}
+func tryToFillCPUInfoFomSysctl(c *CPUInfo) {
+	c.BrandName = sysctlGetString("machdep.cpu.brand_string")
+
+	if len(c.BrandName) != 0 {
+		c.VendorString = strings.Fields(c.BrandName)[0]
+	}
+
+	c.PhysicalCores = sysctlGetInt(runtime.NumCPU(), "hw.physicalcpu")
+	c.ThreadsPerCore = sysctlGetInt(1, "machdep.cpu.thread_count", "kern.num_threads") /
+		sysctlGetInt(1, "hw.physicalcpu")
+	c.LogicalCores = sysctlGetInt(runtime.NumCPU(), "machdep.cpu.core_count")
+	c.Family = sysctlGetInt(0, "machdep.cpu.family", "hw.cpufamily")
+	c.Model = sysctlGetInt(0, "machdep.cpu.model")
+	c.CacheLine = sysctlGetInt64(0, "hw.cachelinesize")
+	c.Cache.L1I = sysctlGetInt64(-1, "hw.l1icachesize")
+	c.Cache.L1D = sysctlGetInt64(-1, "hw.l1dcachesize")
+	c.Cache.L2 = sysctlGetInt64(-1, "hw.l2cachesize")
+	c.Cache.L3 = sysctlGetInt64(-1, "hw.l3cachesize")
+
+	// from https://developer.arm.com/downloads/-/exploration-tools/feature-names-for-a-profile
+	setFeature(c, "hw.optional.arm.FEAT_AES", AESARM)
+	setFeature(c, "hw.optional.AdvSIMD", ASIMD)
+	setFeature(c, "hw.optional.arm.FEAT_DotProd", ASIMDDP)
+	setFeature(c, "hw.optional.arm.FEAT_RDM", ASIMDRDM)
+	setFeature(c, "hw.optional.FEAT_CRC32", CRC32)
+	setFeature(c, "hw.optional.arm.FEAT_DPB", DCPOP)
+	// setFeature(c, "", EVTSTRM)
+	setFeature(c, "hw.optional.arm.FEAT_FCMA", FCMA)
+	setFeature(c, "hw.optional.arm.FEAT_FP", FP)
+	setFeature(c, "hw.optional.arm.FEAT_FP16", FPHP)
+	setFeature(c, "hw.optional.arm.FEAT_PAuth", GPA)
+	setFeature(c, "hw.optional.arm.FEAT_JSCVT", JSCVT)
+	setFeature(c, "hw.optional.arm.FEAT_LRCPC", LRCPC)
+	setFeature(c, "hw.optional.arm.FEAT_PMULL", PMULL)
+	setFeature(c, "hw.optional.arm.FEAT_SHA1", SHA1)
+	setFeature(c, "hw.optional.arm.FEAT_SHA256", SHA2)
+	setFeature(c, "hw.optional.arm.FEAT_SHA3", SHA3)
+	setFeature(c, "hw.optional.arm.FEAT_SHA512", SHA512)
+	// setFeature(c, "", SM3)
+	// setFeature(c, "", SM4)
+	setFeature(c, "hw.optional.arm.FEAT_SVE", SVE)
+
+	// from empirical observation
+	setFeature(c, "hw.optional.AdvSIMD_HPFPCvt", ASIMDHP)
+	setFeature(c, "hw.optional.armv8_1_atomics", ATOMICS)
+	setFeature(c, "hw.optional.floatingpoint", FP)
+	setFeature(c, "hw.optional.armv8_2_sha3", SHA3)
+	setFeature(c, "hw.optional.armv8_2_sha512", SHA512)
+	setFeature(c, "hw.optional.armv8_3_compnum", FCMA)
+	setFeature(c, "hw.optional.armv8_crc32", CRC32)
+}

+ 130 - 0
vendor/github.com/klauspost/cpuid/v2/os_linux_arm64.go

@@ -0,0 +1,130 @@
+// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file.
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file located
+// here https://github.com/golang/sys/blob/master/LICENSE
+
+package cpuid
+
+import (
+	"encoding/binary"
+	"io/ioutil"
+	"runtime"
+)
+
+// HWCAP bits.
+const (
+	hwcap_FP       = 1 << 0
+	hwcap_ASIMD    = 1 << 1
+	hwcap_EVTSTRM  = 1 << 2
+	hwcap_AES      = 1 << 3
+	hwcap_PMULL    = 1 << 4
+	hwcap_SHA1     = 1 << 5
+	hwcap_SHA2     = 1 << 6
+	hwcap_CRC32    = 1 << 7
+	hwcap_ATOMICS  = 1 << 8
+	hwcap_FPHP     = 1 << 9
+	hwcap_ASIMDHP  = 1 << 10
+	hwcap_CPUID    = 1 << 11
+	hwcap_ASIMDRDM = 1 << 12
+	hwcap_JSCVT    = 1 << 13
+	hwcap_FCMA     = 1 << 14
+	hwcap_LRCPC    = 1 << 15
+	hwcap_DCPOP    = 1 << 16
+	hwcap_SHA3     = 1 << 17
+	hwcap_SM3      = 1 << 18
+	hwcap_SM4      = 1 << 19
+	hwcap_ASIMDDP  = 1 << 20
+	hwcap_SHA512   = 1 << 21
+	hwcap_SVE      = 1 << 22
+	hwcap_ASIMDFHM = 1 << 23
+)
+
+func detectOS(c *CPUInfo) bool {
+	// For now assuming no hyperthreading is reasonable.
+	c.LogicalCores = runtime.NumCPU()
+	c.PhysicalCores = c.LogicalCores
+	c.ThreadsPerCore = 1
+	if hwcap == 0 {
+		// We did not get values from the runtime.
+		// Try reading /proc/self/auxv
+
+		// From https://github.com/golang/sys
+		const (
+			_AT_HWCAP  = 16
+			_AT_HWCAP2 = 26
+
+			uintSize = int(32 << (^uint(0) >> 63))
+		)
+
+		buf, err := ioutil.ReadFile("/proc/self/auxv")
+		if err != nil {
+			// e.g. on android /proc/self/auxv is not accessible, so silently
+			// ignore the error and leave Initialized = false. On some
+			// architectures (e.g. arm64) doinit() implements a fallback
+			// readout and will set Initialized = true again.
+			return false
+		}
+		bo := binary.LittleEndian
+		for len(buf) >= 2*(uintSize/8) {
+			var tag, val uint
+			switch uintSize {
+			case 32:
+				tag = uint(bo.Uint32(buf[0:]))
+				val = uint(bo.Uint32(buf[4:]))
+				buf = buf[8:]
+			case 64:
+				tag = uint(bo.Uint64(buf[0:]))
+				val = uint(bo.Uint64(buf[8:]))
+				buf = buf[16:]
+			}
+			switch tag {
+			case _AT_HWCAP:
+				hwcap = val
+			case _AT_HWCAP2:
+				// Not used
+			}
+		}
+		if hwcap == 0 {
+			return false
+		}
+	}
+
+	// HWCap was populated by the runtime from the auxiliary vector.
+	// Use HWCap information since reading aarch64 system registers
+	// is not supported in user space on older linux kernels.
+	c.featureSet.setIf(isSet(hwcap, hwcap_AES), AESARM)
+	c.featureSet.setIf(isSet(hwcap, hwcap_ASIMD), ASIMD)
+	c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDDP), ASIMDDP)
+	c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDHP), ASIMDHP)
+	c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDRDM), ASIMDRDM)
+	c.featureSet.setIf(isSet(hwcap, hwcap_CPUID), ARMCPUID)
+	c.featureSet.setIf(isSet(hwcap, hwcap_CRC32), CRC32)
+	c.featureSet.setIf(isSet(hwcap, hwcap_DCPOP), DCPOP)
+	c.featureSet.setIf(isSet(hwcap, hwcap_EVTSTRM), EVTSTRM)
+	c.featureSet.setIf(isSet(hwcap, hwcap_FCMA), FCMA)
+	c.featureSet.setIf(isSet(hwcap, hwcap_FP), FP)
+	c.featureSet.setIf(isSet(hwcap, hwcap_FPHP), FPHP)
+	c.featureSet.setIf(isSet(hwcap, hwcap_JSCVT), JSCVT)
+	c.featureSet.setIf(isSet(hwcap, hwcap_LRCPC), LRCPC)
+	c.featureSet.setIf(isSet(hwcap, hwcap_PMULL), PMULL)
+	c.featureSet.setIf(isSet(hwcap, hwcap_SHA1), SHA1)
+	c.featureSet.setIf(isSet(hwcap, hwcap_SHA2), SHA2)
+	c.featureSet.setIf(isSet(hwcap, hwcap_SHA3), SHA3)
+	c.featureSet.setIf(isSet(hwcap, hwcap_SHA512), SHA512)
+	c.featureSet.setIf(isSet(hwcap, hwcap_SM3), SM3)
+	c.featureSet.setIf(isSet(hwcap, hwcap_SM4), SM4)
+	c.featureSet.setIf(isSet(hwcap, hwcap_SVE), SVE)
+
+	// The Samsung S9+ kernel reports support for atomics, but not all cores
+	// actually support them, resulting in SIGILL. See issue #28431.
+	// TODO(elias.naur): Only disable the optimization on bad chipsets on android.
+	c.featureSet.setIf(isSet(hwcap, hwcap_ATOMICS) && runtime.GOOS != "android", ATOMICS)
+
+	return true
+}
+
+func isSet(hwc uint, value uint) bool {
+	return hwc&value != 0
+}

+ 16 - 0
vendor/github.com/klauspost/cpuid/v2/os_other_arm64.go

@@ -0,0 +1,16 @@
+// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file.
+
+//go:build arm64 && !linux && !darwin
+// +build arm64,!linux,!darwin
+
+package cpuid
+
+import "runtime"
+
+func detectOS(c *CPUInfo) bool {
+	c.PhysicalCores = runtime.NumCPU()
+	// For now assuming 1 thread per core...
+	c.ThreadsPerCore = 1
+	c.LogicalCores = c.PhysicalCores
+	return false
+}

+ 8 - 0
vendor/github.com/klauspost/cpuid/v2/os_safe_linux_arm64.go

@@ -0,0 +1,8 @@
+// Copyright (c) 2021 Klaus Post, released under MIT License. See LICENSE file.
+
+//go:build nounsafe
+// +build nounsafe
+
+package cpuid
+
+var hwcap uint

+ 11 - 0
vendor/github.com/klauspost/cpuid/v2/os_unsafe_linux_arm64.go

@@ -0,0 +1,11 @@
+// Copyright (c) 2021 Klaus Post, released under MIT License. See LICENSE file.
+
+//go:build !nounsafe
+// +build !nounsafe
+
+package cpuid
+
+import _ "unsafe" // needed for go:linkname
+
+//go:linkname hwcap internal/cpu.HWCap
+var hwcap uint

+ 15 - 0
vendor/github.com/klauspost/cpuid/v2/test-architectures.sh

@@ -0,0 +1,15 @@
+#!/bin/sh
+
+set -e
+
+go tool dist list | while IFS=/ read os arch; do
+    echo "Checking $os/$arch..."
+    echo " normal"
+    GOARCH=$arch GOOS=$os go build -o /dev/null .
+    echo " noasm"
+    GOARCH=$arch GOOS=$os go build -tags noasm -o /dev/null .
+    echo " appengine"
+    GOARCH=$arch GOOS=$os go build -tags appengine -o /dev/null .
+    echo " noasm,appengine"
+    GOARCH=$arch GOOS=$os go build -tags 'appengine noasm' -o /dev/null .
+done

+ 202 - 0
vendor/github.com/minio/crc64nvme/LICENSE

@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

+ 20 - 0
vendor/github.com/minio/crc64nvme/README.md

@@ -0,0 +1,20 @@
+
+## crc64nvme
+
+This Golang package calculates CRC64 checksums using carryless-multiplication accelerated with SIMD instructions for both ARM and x86. It is based on the NVME polynomial as specified in the [NVM Express® NVM Command Set Specification](https://nvmexpress.org/wp-content/uploads/NVM-Express-NVM-Command-Set-Specification-1.0d-2023.12.28-Ratified.pdf).
+
+The code is based on the [crc64fast-nvme](https://github.com/awesomized/crc64fast-nvme.git) package in Rust and is released under the Apache 2.0 license.
+
+For more background on the exact technique used, see this [Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction](https://web.archive.org/web/20131224125630/https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf) paper.
+
+### Performance
+
+To follow.
+
+### Requirements
+
+All Go versions >= 1.22 are supported.
+
+### Contributing
+
+Contributions are welcome, please send PRs for any enhancements.

+ 185 - 0
vendor/github.com/minio/crc64nvme/crc64.go

@@ -0,0 +1,185 @@
+// Copyright (c) 2025 Minio Inc. All rights reserved.
+// Use of this source code is governed by a license that can be
+// found in the LICENSE file.
+
+// Package crc64nvme implements the 64-bit cyclic redundancy check with NVME polynomial.
+package crc64nvme
+
+import (
+	"encoding/binary"
+	"errors"
+	"hash"
+	"sync"
+	"unsafe"
+)
+
+const (
+	// The size of a CRC-64 checksum in bytes.
+	Size = 8
+
+	// The NVME polynoimial (reversed, as used by Go)
+	NVME = 0x9a6c9329ac4bc9b5
+)
+
+var (
+	// precalculated table.
+	nvmeTable = makeTable(NVME)
+)
+
+// table is a 256-word table representing the polynomial for efficient processing.
+type table [256]uint64
+
+var (
+	slicing8TablesBuildOnce sync.Once
+	slicing8TableNVME       *[8]table
+)
+
+func buildSlicing8TablesOnce() {
+	slicing8TablesBuildOnce.Do(buildSlicing8Tables)
+}
+
+func buildSlicing8Tables() {
+	slicing8TableNVME = makeSlicingBy8Table(makeTable(NVME))
+}
+
+func makeTable(poly uint64) *table {
+	t := new(table)
+	for i := 0; i < 256; i++ {
+		crc := uint64(i)
+		for j := 0; j < 8; j++ {
+			if crc&1 == 1 {
+				crc = (crc >> 1) ^ poly
+			} else {
+				crc >>= 1
+			}
+		}
+		t[i] = crc
+	}
+	return t
+}
+
+func makeSlicingBy8Table(t *table) *[8]table {
+	var helperTable [8]table
+	helperTable[0] = *t
+	for i := 0; i < 256; i++ {
+		crc := t[i]
+		for j := 1; j < 8; j++ {
+			crc = t[crc&0xff] ^ (crc >> 8)
+			helperTable[j][i] = crc
+		}
+	}
+	return &helperTable
+}
+
+// digest represents the partial evaluation of a checksum.
+type digest struct {
+	crc uint64
+}
+
+// New creates a new hash.Hash64 computing the CRC-64 checksum using the
+// NVME polynomial. Its Sum method will lay the
+// value out in big-endian byte order. The returned Hash64 also
+// implements [encoding.BinaryMarshaler] and [encoding.BinaryUnmarshaler] to
+// marshal and unmarshal the internal state of the hash.
+func New() hash.Hash64 { return &digest{0} }
+
+func (d *digest) Size() int { return Size }
+
+func (d *digest) BlockSize() int { return 1 }
+
+func (d *digest) Reset() { d.crc = 0 }
+
+const (
+	magic         = "crc\x02"
+	marshaledSize = len(magic) + 8 + 8
+)
+
+func (d *digest) MarshalBinary() ([]byte, error) {
+	b := make([]byte, 0, marshaledSize)
+	b = append(b, magic...)
+	b = binary.BigEndian.AppendUint64(b, tableSum)
+	b = binary.BigEndian.AppendUint64(b, d.crc)
+	return b, nil
+}
+
+func (d *digest) UnmarshalBinary(b []byte) error {
+	if len(b) < len(magic) || string(b[:len(magic)]) != magic {
+		return errors.New("hash/crc64: invalid hash state identifier")
+	}
+	if len(b) != marshaledSize {
+		return errors.New("hash/crc64: invalid hash state size")
+	}
+	if tableSum != binary.BigEndian.Uint64(b[4:]) {
+		return errors.New("hash/crc64: tables do not match")
+	}
+	d.crc = binary.BigEndian.Uint64(b[12:])
+	return nil
+}
+
+func update(crc uint64, p []byte) uint64 {
+	if hasAsm && len(p) > 127 {
+		ptr := unsafe.Pointer(&p[0])
+		if align := (uintptr(ptr)+15)&^0xf - uintptr(ptr); align > 0 {
+			// Align to 16-byte boundary.
+			crc = update(crc, p[:align])
+			p = p[align:]
+		}
+		runs := len(p) / 128
+		if hasAsm512 && runs >= 8 {
+			// Use 512-bit wide instructions for >= 1KB.
+			crc = updateAsm512(crc, p[:128*runs])
+		} else if runs > 0 {
+			crc = updateAsm(crc, p[:128*runs])
+		}
+		return update(crc, p[128*runs:])
+	}
+
+	buildSlicing8TablesOnce()
+	crc = ^crc
+	// table comparison is somewhat expensive, so avoid it for small sizes
+	if len(p) >= 64 {
+		var helperTable = slicing8TableNVME
+		// Update using slicing-by-8
+		for len(p) > 8 {
+			crc ^= binary.LittleEndian.Uint64(p)
+			crc = helperTable[7][crc&0xff] ^
+				helperTable[6][(crc>>8)&0xff] ^
+				helperTable[5][(crc>>16)&0xff] ^
+				helperTable[4][(crc>>24)&0xff] ^
+				helperTable[3][(crc>>32)&0xff] ^
+				helperTable[2][(crc>>40)&0xff] ^
+				helperTable[1][(crc>>48)&0xff] ^
+				helperTable[0][crc>>56]
+			p = p[8:]
+		}
+	}
+	// For reminders or small sizes
+	for _, v := range p {
+		crc = nvmeTable[byte(crc)^v] ^ (crc >> 8)
+	}
+	return ^crc
+}
+
+// Update returns the result of adding the bytes in p to the crc.
+func Update(crc uint64, p []byte) uint64 {
+	return update(crc, p)
+}
+
+func (d *digest) Write(p []byte) (n int, err error) {
+	d.crc = update(d.crc, p)
+	return len(p), nil
+}
+
+func (d *digest) Sum64() uint64 { return d.crc }
+
+func (d *digest) Sum(in []byte) []byte {
+	s := d.Sum64()
+	return append(in, byte(s>>56), byte(s>>48), byte(s>>40), byte(s>>32), byte(s>>24), byte(s>>16), byte(s>>8), byte(s))
+}
+
+// Checksum returns the CRC-64 checksum of data
+// using the NVME polynomial.
+func Checksum(data []byte) uint64 { return update(0, data) }
+
+// ISO tablesum of NVME poly
+const tableSum = 0x8ddd9ee4402c7163

+ 17 - 0
vendor/github.com/minio/crc64nvme/crc64_amd64.go

@@ -0,0 +1,17 @@
+// Copyright (c) 2025 Minio Inc. All rights reserved.
+// Use of this source code is governed by a license that can be
+// found in the LICENSE file.
+
+//go:build !noasm && !appengine && !gccgo
+
+package crc64nvme
+
+import (
+	"github.com/klauspost/cpuid/v2"
+)
+
+var hasAsm = cpuid.CPU.Supports(cpuid.SSE2, cpuid.CLMUL, cpuid.SSE4)
+var hasAsm512 = cpuid.CPU.Supports(cpuid.AVX512F, cpuid.VPCLMULQDQ, cpuid.AVX512VL, cpuid.CLMUL)
+
+func updateAsm(crc uint64, p []byte) (checksum uint64)
+func updateAsm512(crc uint64, p []byte) (checksum uint64)

+ 309 - 0
vendor/github.com/minio/crc64nvme/crc64_amd64.s

@@ -0,0 +1,309 @@
+// Copyright (c) 2025 Minio Inc. All rights reserved.
+// Use of this source code is governed by a license that can be
+// found in the LICENSE file.
+
+//go:build !noasm && !appengine && !gccgo
+
+#include "textflag.h"
+
+TEXT ·updateAsm(SB), $0-40
+	MOVQ crc+0(FP), AX    // checksum
+	MOVQ p_base+8(FP), SI // start pointer
+	MOVQ p_len+16(FP), CX // length of buffer
+	NOTQ AX
+	SHRQ $7, CX
+	CMPQ CX, $1
+	JLT  skip128
+
+	MOVOA 0x00(SI), X0
+	MOVOA 0x10(SI), X1
+	MOVOA 0x20(SI), X2
+	MOVOA 0x30(SI), X3
+	MOVOA 0x40(SI), X4
+	MOVOA 0x50(SI), X5
+	MOVOA 0x60(SI), X6
+	MOVOA 0x70(SI), X7
+	MOVQ  AX, X8
+	PXOR  X8, X0
+	CMPQ  CX, $1
+	JE    tail128
+
+	MOVQ   $0xa1ca681e733f9c40, AX
+	MOVQ   AX, X8
+	MOVQ   $0x5f852fb61e8d92dc, AX
+	PINSRQ $0x1, AX, X9
+
+loop128:
+	ADDQ      $128, SI
+	SUBQ      $1, CX
+	MOVOA     X0, X10
+	PCLMULQDQ $0x00, X8, X10
+	PCLMULQDQ $0x11, X9, X0
+	PXOR      X10, X0
+	PXOR      0(SI), X0
+	MOVOA     X1, X10
+	PCLMULQDQ $0x00, X8, X10
+	PCLMULQDQ $0x11, X9, X1
+	PXOR      X10, X1
+	PXOR      0x10(SI), X1
+	MOVOA     X2, X10
+	PCLMULQDQ $0x00, X8, X10
+	PCLMULQDQ $0x11, X9, X2
+	PXOR      X10, X2
+	PXOR      0x20(SI), X2
+	MOVOA     X3, X10
+	PCLMULQDQ $0x00, X8, X10
+	PCLMULQDQ $0x11, X9, X3
+	PXOR      X10, X3
+	PXOR      0x30(SI), X3
+	MOVOA     X4, X10
+	PCLMULQDQ $0x00, X8, X10
+	PCLMULQDQ $0x11, X9, X4
+	PXOR      X10, X4
+	PXOR      0x40(SI), X4
+	MOVOA     X5, X10
+	PCLMULQDQ $0x00, X8, X10
+	PCLMULQDQ $0x11, X9, X5
+	PXOR      X10, X5
+	PXOR      0x50(SI), X5
+	MOVOA     X6, X10
+	PCLMULQDQ $0x00, X8, X10
+	PCLMULQDQ $0x11, X9, X6
+	PXOR      X10, X6
+	PXOR      0x60(SI), X6
+	MOVOA     X7, X10
+	PCLMULQDQ $0x00, X8, X10
+	PCLMULQDQ $0x11, X9, X7
+	PXOR      X10, X7
+	PXOR      0x70(SI), X7
+	CMPQ      CX, $1
+	JGT       loop128
+
+tail128:
+	MOVQ      $0xd083dd594d96319d, AX
+	MOVQ      AX, X11
+	PCLMULQDQ $0x00, X0, X11
+	MOVQ      $0x946588403d4adcbc, AX
+	PINSRQ    $0x1, AX, X12
+	PCLMULQDQ $0x11, X12, X0
+	PXOR      X11, X7
+	PXOR      X0, X7
+	MOVQ      $0x3c255f5ebc414423, AX
+	MOVQ      AX, X11
+	PCLMULQDQ $0x00, X1, X11
+	MOVQ      $0x34f5a24e22d66e90, AX
+	PINSRQ    $0x1, AX, X12
+	PCLMULQDQ $0x11, X12, X1
+	PXOR      X11, X1
+	PXOR      X7, X1
+	MOVQ      $0x7b0ab10dd0f809fe, AX
+	MOVQ      AX, X11
+	PCLMULQDQ $0x00, X2, X11
+	MOVQ      $0x03363823e6e791e5, AX
+	PINSRQ    $0x1, AX, X12
+	PCLMULQDQ $0x11, X12, X2
+	PXOR      X11, X2
+	PXOR      X1, X2
+	MOVQ      $0x0c32cdb31e18a84a, AX
+	MOVQ      AX, X11
+	PCLMULQDQ $0x00, X3, X11
+	MOVQ      $0x62242240ace5045a, AX
+	PINSRQ    $0x1, AX, X12
+	PCLMULQDQ $0x11, X12, X3
+	PXOR      X11, X3
+	PXOR      X2, X3
+	MOVQ      $0xbdd7ac0ee1a4a0f0, AX
+	MOVQ      AX, X11
+	PCLMULQDQ $0x00, X4, X11
+	MOVQ      $0xa3ffdc1fe8e82a8b, AX
+	PINSRQ    $0x1, AX, X12
+	PCLMULQDQ $0x11, X12, X4
+	PXOR      X11, X4
+	PXOR      X3, X4
+	MOVQ      $0xb0bc2e589204f500, AX
+	MOVQ      AX, X11
+	PCLMULQDQ $0x00, X5, X11
+	MOVQ      $0xe1e0bb9d45d7a44c, AX
+	PINSRQ    $0x1, AX, X12
+	PCLMULQDQ $0x11, X12, X5
+	PXOR      X11, X5
+	PXOR      X4, X5
+	MOVQ      $0xeadc41fd2ba3d420, AX
+	MOVQ      AX, X11
+	PCLMULQDQ $0x00, X6, X11
+	MOVQ      $0x21e9761e252621ac, AX
+	PINSRQ    $0x1, AX, X12
+	PCLMULQDQ $0x11, X12, X6
+	PXOR      X11, X6
+	PXOR      X5, X6
+	MOVQ      AX, X5
+	PCLMULQDQ $0x00, X6, X5
+	PSHUFD    $0xee, X6, X6
+	PXOR      X5, X6
+	MOVQ      $0x27ecfa329aef9f77, AX
+	MOVQ      AX, X4
+	PCLMULQDQ $0x00, X4, X6
+	PEXTRQ    $0, X6, BX
+	MOVQ      $0x34d926535897936b, AX
+	MOVQ      AX, X4
+	PCLMULQDQ $0x00, X4, X6
+	PXOR      X5, X6
+	PEXTRQ    $1, X6, AX
+	XORQ      BX, AX
+
+skip128:
+	NOTQ AX
+	MOVQ AX, checksum+32(FP)
+	RET
+
+// Constants, pre-splatted.
+DATA ·asmConstantsPoly<>+0x00(SB)/8, $0xa1ca681e733f9c40
+DATA ·asmConstantsPoly<>+0x08(SB)/8, $0
+DATA ·asmConstantsPoly<>+0x10(SB)/8, $0xa1ca681e733f9c40
+DATA ·asmConstantsPoly<>+0x18(SB)/8, $0
+DATA ·asmConstantsPoly<>+0x20(SB)/8, $0xa1ca681e733f9c40
+DATA ·asmConstantsPoly<>+0x28(SB)/8, $0
+DATA ·asmConstantsPoly<>+0x30(SB)/8, $0xa1ca681e733f9c40
+DATA ·asmConstantsPoly<>+0x38(SB)/8, $0
+// Upper
+DATA ·asmConstantsPoly<>+0x40(SB)/8, $0
+DATA ·asmConstantsPoly<>+0x48(SB)/8, $0x5f852fb61e8d92dc
+DATA ·asmConstantsPoly<>+0x50(SB)/8, $0
+DATA ·asmConstantsPoly<>+0x58(SB)/8, $0x5f852fb61e8d92dc
+DATA ·asmConstantsPoly<>+0x60(SB)/8, $0
+DATA ·asmConstantsPoly<>+0x68(SB)/8, $0x5f852fb61e8d92dc
+DATA ·asmConstantsPoly<>+0x70(SB)/8, $0
+DATA ·asmConstantsPoly<>+0x78(SB)/8, $0x5f852fb61e8d92dc
+GLOBL ·asmConstantsPoly<>(SB), (NOPTR+RODATA), $128
+
+TEXT ·updateAsm512(SB), $0-40
+	MOVQ   crc+0(FP), AX    // checksum
+	MOVQ   p_base+8(FP), SI // start pointer
+	MOVQ   p_len+16(FP), CX // length of buffer
+	NOTQ   AX
+	SHRQ   $7, CX
+	CMPQ   CX, $1
+	VPXORQ Z8, Z8, Z8       // Initialize ZMM8 to zero
+	JLT    skip128
+
+	VMOVDQU64 0x00(SI), Z0
+	VMOVDQU64 0x40(SI), Z4
+	MOVQ      $·asmConstantsPoly<>(SB), BX
+	VMOVQ     AX, X8
+
+	// XOR initialization value into lower 64 bits of ZMM0
+	VPXORQ Z8, Z0, Z0
+	CMPQ   CX, $1
+	JE     tail128
+
+	VMOVDQU64 0(BX), Z8
+	VMOVDQU64 64(BX), Z9
+
+	PCALIGN $16
+
+loop128:
+	PREFETCHT0 512(SI)
+	VMOVDQU64  0x80(SI), Z1
+	VMOVDQU64  0xc0(SI), Z5
+	ADDQ       $128, SI
+
+	SUBQ       $1, CX
+	VPCLMULQDQ $0x00, Z8, Z0, Z10
+	VPCLMULQDQ $0x11, Z9, Z0, Z0
+	VPTERNLOGD $0x96, Z1, Z10, Z0 // Combine results with xor into Z0
+
+	PREFETCHT0 512-64(SI)
+	VPCLMULQDQ $0x00, Z8, Z4, Z10
+	VPCLMULQDQ $0x11, Z9, Z4, Z4
+	VPTERNLOGD $0x96, Z5, Z10, Z4 // Combine results with xor into Z4
+
+	CMPQ CX, $1
+	JGT  loop128
+
+tail128:
+	// Extract X0 to X3 from ZMM0
+	VEXTRACTF32X4 $1, Z0, X1 // X1: Second 128-bit lane
+	VEXTRACTF32X4 $2, Z0, X2 // X2: Third 128-bit lane
+	VEXTRACTF32X4 $3, Z0, X3 // X3: Fourth 128-bit lane
+
+	// Extract X4 to X7 from ZMM4
+	VEXTRACTF32X4 $1, Z4, X5 // X5: Second 128-bit lane
+	VEXTRACTF32X4 $2, Z4, X6 // X6: Third 128-bit lane
+	VEXTRACTF32X4 $3, Z4, X7 // X7: Fourth 128-bit lane
+
+	MOVQ      $0xd083dd594d96319d, AX
+	MOVQ      AX, X11
+	PCLMULQDQ $0x00, X0, X11
+	MOVQ      $0x946588403d4adcbc, AX
+	PINSRQ    $0x1, AX, X12
+	PCLMULQDQ $0x11, X12, X0
+	PXOR      X11, X7
+	PXOR      X0, X7
+	MOVQ      $0x3c255f5ebc414423, AX
+	MOVQ      AX, X11
+	PCLMULQDQ $0x00, X1, X11
+	MOVQ      $0x34f5a24e22d66e90, AX
+	PINSRQ    $0x1, AX, X12
+	PCLMULQDQ $0x11, X12, X1
+	PXOR      X11, X1
+	PXOR      X7, X1
+	MOVQ      $0x7b0ab10dd0f809fe, AX
+	MOVQ      AX, X11
+	PCLMULQDQ $0x00, X2, X11
+	MOVQ      $0x03363823e6e791e5, AX
+	PINSRQ    $0x1, AX, X12
+	PCLMULQDQ $0x11, X12, X2
+	PXOR      X11, X2
+	PXOR      X1, X2
+	MOVQ      $0x0c32cdb31e18a84a, AX
+	MOVQ      AX, X11
+	PCLMULQDQ $0x00, X3, X11
+	MOVQ      $0x62242240ace5045a, AX
+	PINSRQ    $0x1, AX, X12
+	PCLMULQDQ $0x11, X12, X3
+	PXOR      X11, X3
+	PXOR      X2, X3
+	MOVQ      $0xbdd7ac0ee1a4a0f0, AX
+	MOVQ      AX, X11
+	PCLMULQDQ $0x00, X4, X11
+	MOVQ      $0xa3ffdc1fe8e82a8b, AX
+	PINSRQ    $0x1, AX, X12
+	PCLMULQDQ $0x11, X12, X4
+	PXOR      X11, X4
+	PXOR      X3, X4
+	MOVQ      $0xb0bc2e589204f500, AX
+	MOVQ      AX, X11
+	PCLMULQDQ $0x00, X5, X11
+	MOVQ      $0xe1e0bb9d45d7a44c, AX
+	PINSRQ    $0x1, AX, X12
+	PCLMULQDQ $0x11, X12, X5
+	PXOR      X11, X5
+	PXOR      X4, X5
+	MOVQ      $0xeadc41fd2ba3d420, AX
+	MOVQ      AX, X11
+	PCLMULQDQ $0x00, X6, X11
+	MOVQ      $0x21e9761e252621ac, AX
+	PINSRQ    $0x1, AX, X12
+	PCLMULQDQ $0x11, X12, X6
+	PXOR      X11, X6
+	PXOR      X5, X6
+	MOVQ      AX, X5
+	PCLMULQDQ $0x00, X6, X5
+	PSHUFD    $0xee, X6, X6
+	PXOR      X5, X6
+	MOVQ      $0x27ecfa329aef9f77, AX
+	MOVQ      AX, X4
+	PCLMULQDQ $0x00, X4, X6
+	PEXTRQ    $0, X6, BX
+	MOVQ      $0x34d926535897936b, AX
+	MOVQ      AX, X4
+	PCLMULQDQ $0x00, X4, X6
+	PXOR      X5, X6
+	PEXTRQ    $1, X6, AX
+	XORQ      BX, AX
+
+skip128:
+	NOTQ AX
+	MOVQ AX, checksum+32(FP)
+	VZEROUPPER
+	RET

+ 17 - 0
vendor/github.com/minio/crc64nvme/crc64_arm64.go

@@ -0,0 +1,17 @@
+// Copyright (c) 2025 Minio Inc. All rights reserved.
+// Use of this source code is governed by a license that can be
+// found in the LICENSE file.
+
+//go:build !noasm && !appengine && !gccgo
+
+package crc64nvme
+
+import (
+	"github.com/klauspost/cpuid/v2"
+)
+
+var hasAsm = cpuid.CPU.Supports(cpuid.ASIMD, cpuid.PMULL, cpuid.SHA3)
+var hasAsm512 = false
+
+func updateAsm(crc uint64, p []byte) (checksum uint64)
+func updateAsm512(crc uint64, p []byte) (checksum uint64) { panic("should not be reached") }

+ 157 - 0
vendor/github.com/minio/crc64nvme/crc64_arm64.s

@@ -0,0 +1,157 @@
+// Copyright (c) 2025 Minio Inc. All rights reserved.
+// Use of this source code is governed by a license that can be
+// found in the LICENSE file.
+
+//go:build !noasm && !appengine && !gccgo
+
+#include "textflag.h"
+
+TEXT ·updateAsm(SB), $0-40
+	MOVD crc+0(FP), R0    // checksum
+	MOVD p_base+8(FP), R1 // start pointer
+	MOVD p_len+16(FP), R2 // length of buffer
+	MOVD  $·const(SB), R3 // constants
+	MVN  R0, R0
+	LSR  $7, R2, R2
+	CMP  $1, R2
+	BLT  skip128
+
+	FLDPQ (R1), (F0, F1)
+	FLDPQ 32(R1), (F2, F3)
+	FLDPQ 64(R1), (F4, F5)
+	FLDPQ 96(R1), (F6, F7)
+	FMOVD R0, F8
+	VMOVI $0, V9.B16
+	VMOV  V9.D[0], V8.D[1]
+	VEOR  V8.B16, V0.B16, V0.B16
+	CMP   $1, R2
+	BEQ   tail128
+
+	MOVD  112(R3), R4
+	MOVD  120(R3), R5
+	FMOVD R4, F8
+	VDUP  R5, V9.D2
+
+loop128:
+	ADD     $128, R1, R1
+	SUB     $1, R2, R2
+	VPMULL  V0.D1, V8.D1, V10.Q1
+	VPMULL2 V0.D2, V9.D2, V0.Q1
+	FLDPQ   (R1), (F11, F12)
+	VEOR3   V0.B16, V11.B16, V10.B16, V0.B16
+	VPMULL  V1.D1, V8.D1, V10.Q1
+	VPMULL2 V1.D2, V9.D2, V1.Q1
+	VEOR3   V1.B16, V12.B16, V10.B16, V1.B16
+	VPMULL  V2.D1, V8.D1, V10.Q1
+	VPMULL2 V2.D2, V9.D2, V2.Q1
+	FLDPQ   32(R1), (F11, F12)
+	VEOR3   V2.B16, V11.B16, V10.B16, V2.B16
+	VPMULL  V3.D1, V8.D1, V10.Q1
+	VPMULL2 V3.D2, V9.D2, V3.Q1
+	VEOR3   V3.B16, V12.B16, V10.B16, V3.B16
+	VPMULL  V4.D1, V8.D1, V10.Q1
+	VPMULL2 V4.D2, V9.D2, V4.Q1
+	FLDPQ   64(R1), (F11, F12)
+	VEOR3   V4.B16, V11.B16, V10.B16, V4.B16
+	VPMULL  V5.D1, V8.D1, V10.Q1
+	VPMULL2 V5.D2, V9.D2, V5.Q1
+	VEOR3   V5.B16, V12.B16, V10.B16, V5.B16
+	VPMULL  V6.D1, V8.D1, V10.Q1
+	VPMULL2 V6.D2, V9.D2, V6.Q1
+	FLDPQ   96(R1), (F11, F12)
+	VEOR3   V6.B16, V11.B16, V10.B16, V6.B16
+	VPMULL  V7.D1, V8.D1, V10.Q1
+	VPMULL2 V7.D2, V9.D2, V7.Q1
+	VEOR3   V7.B16, V12.B16, V10.B16, V7.B16
+	CMP     $1, R2
+	BHI     loop128
+
+tail128:
+	MOVD    (R3), R4
+	FMOVD   R4, F11
+	VPMULL  V0.D1, V11.D1, V11.Q1
+	MOVD    8(R3), R4
+	VDUP    R4, V12.D2
+	VPMULL2 V0.D2, V12.D2, V0.Q1
+	VEOR3   V0.B16, V7.B16, V11.B16, V7.B16
+	MOVD    16(R3), R4
+	FMOVD   R4, F11
+	VPMULL  V1.D1, V11.D1, V11.Q1
+	MOVD    24(R3), R4
+	VDUP    R4, V12.D2
+	VPMULL2 V1.D2, V12.D2, V1.Q1
+	VEOR3   V1.B16, V11.B16, V7.B16, V1.B16
+	MOVD    32(R3), R4
+	FMOVD   R4, F11
+	VPMULL  V2.D1, V11.D1, V11.Q1
+	MOVD    40(R3), R4
+	VDUP    R4, V12.D2
+	VPMULL2 V2.D2, V12.D2, V2.Q1
+	VEOR3   V2.B16, V11.B16, V1.B16, V2.B16
+	MOVD    48(R3), R4
+	FMOVD   R4, F11
+	VPMULL  V3.D1, V11.D1, V11.Q1
+	MOVD    56(R3), R4
+	VDUP    R4, V12.D2
+	VPMULL2 V3.D2, V12.D2, V3.Q1
+	VEOR3   V3.B16, V11.B16, V2.B16, V3.B16
+	MOVD    64(R3), R4
+	FMOVD   R4, F11
+	VPMULL  V4.D1, V11.D1, V11.Q1
+	MOVD    72(R3), R4
+	VDUP    R4, V12.D2
+	VPMULL2 V4.D2, V12.D2, V4.Q1
+	VEOR3   V4.B16, V11.B16, V3.B16, V4.B16
+	MOVD    80(R3), R4
+	FMOVD   R4, F11
+	VPMULL  V5.D1, V11.D1, V11.Q1
+	MOVD    88(R3), R4
+	VDUP    R4, V12.D2
+	VPMULL2 V5.D2, V12.D2, V5.Q1
+	VEOR3   V5.B16, V11.B16, V4.B16, V5.B16
+	MOVD    96(R3), R4
+	FMOVD   R4, F11
+	VPMULL  V6.D1, V11.D1, V11.Q1
+	MOVD    104(R3), R4
+	VDUP    R4, V12.D2
+	VPMULL2 V6.D2, V12.D2, V6.Q1
+	VEOR3   V6.B16, V11.B16, V5.B16, V6.B16
+	FMOVD   R4, F5
+	VPMULL  V6.D1, V5.D1, V5.Q1
+	VDUP    V6.D[1], V6.D2
+	VEOR    V5.B8, V6.B8, V6.B8
+	MOVD    128(R3), R4
+	FMOVD   R4, F4
+	VPMULL  V4.D1, V6.D1, V6.Q1
+	FMOVD   F6, R4
+	MOVD    136(R3), R5
+	FMOVD   R5, F4
+	VPMULL  V4.D1, V6.D1, V6.Q1
+	VEOR    V6.B16, V5.B16, V6.B16
+	VMOV    V6.D[1], R5
+	EOR     R4, R5, R0
+
+skip128:
+	MVN  R0, R0
+	MOVD R0, checksum+32(FP)
+	RET
+
+DATA ·const+0x000(SB)/8, $0xd083dd594d96319d // K_959
+DATA ·const+0x008(SB)/8, $0x946588403d4adcbc // K_895
+DATA ·const+0x010(SB)/8, $0x3c255f5ebc414423 // K_831
+DATA ·const+0x018(SB)/8, $0x34f5a24e22d66e90 // K_767
+DATA ·const+0x020(SB)/8, $0x7b0ab10dd0f809fe // K_703
+DATA ·const+0x028(SB)/8, $0x03363823e6e791e5 // K_639
+DATA ·const+0x030(SB)/8, $0x0c32cdb31e18a84a // K_575
+DATA ·const+0x038(SB)/8, $0x62242240ace5045a // K_511
+DATA ·const+0x040(SB)/8, $0xbdd7ac0ee1a4a0f0 // K_447
+DATA ·const+0x048(SB)/8, $0xa3ffdc1fe8e82a8b // K_383
+DATA ·const+0x050(SB)/8, $0xb0bc2e589204f500 // K_319
+DATA ·const+0x058(SB)/8, $0xe1e0bb9d45d7a44c // K_255
+DATA ·const+0x060(SB)/8, $0xeadc41fd2ba3d420 // K_191
+DATA ·const+0x068(SB)/8, $0x21e9761e252621ac // K_127
+DATA ·const+0x070(SB)/8, $0xa1ca681e733f9c40 // K_1087
+DATA ·const+0x078(SB)/8, $0x5f852fb61e8d92dc // K_1023
+DATA ·const+0x080(SB)/8, $0x27ecfa329aef9f77 // MU
+DATA ·const+0x088(SB)/8, $0x34d926535897936b // POLY
+GLOBL ·const(SB), (NOPTR+RODATA), $144

+ 13 - 0
vendor/github.com/minio/crc64nvme/crc64_other.go

@@ -0,0 +1,13 @@
+// Copyright (c) 2025 Minio Inc. All rights reserved.
+// Use of this source code is governed by a license that can be
+// found in the LICENSE file.
+
+//go:build (!amd64 || noasm || appengine || gccgo) && (!arm64 || noasm || appengine || gccgo)
+
+package crc64nvme
+
+var hasAsm = false
+var hasAsm512 = false
+
+func updateAsm(crc uint64, p []byte) (checksum uint64)    { panic("should not be reached") }
+func updateAsm512(crc uint64, p []byte) (checksum uint64) { panic("should not be reached") }

+ 6 - 0
vendor/modules.txt

@@ -249,6 +249,9 @@ github.com/klauspost/compress/internal/le
 github.com/klauspost/compress/internal/snapref
 github.com/klauspost/compress/internal/snapref
 github.com/klauspost/compress/zstd
 github.com/klauspost/compress/zstd
 github.com/klauspost/compress/zstd/internal/xxhash
 github.com/klauspost/compress/zstd/internal/xxhash
+# github.com/klauspost/cpuid/v2 v2.2.9
+## explicit; go 1.20
+github.com/klauspost/cpuid/v2
 # github.com/libp2p/go-reuseport v0.4.0
 # github.com/libp2p/go-reuseport v0.4.0
 ## explicit; go 1.20
 ## explicit; go 1.20
 github.com/libp2p/go-reuseport
 github.com/libp2p/go-reuseport
@@ -269,6 +272,9 @@ github.com/mdlayher/socket
 # github.com/miekg/dns v1.1.56
 # github.com/miekg/dns v1.1.56
 ## explicit; go 1.19
 ## explicit; go 1.19
 github.com/miekg/dns
 github.com/miekg/dns
+# github.com/minio/crc64nvme v1.1.1
+## explicit; go 1.22
+github.com/minio/crc64nvme
 # github.com/mitchellh/panicwrap v0.0.0-20170106182340-fce601fe5557
 # github.com/mitchellh/panicwrap v0.0.0-20170106182340-fce601fe5557
 ## explicit
 ## explicit
 github.com/mitchellh/panicwrap
 github.com/mitchellh/panicwrap