浏览代码

Merge branch 'master' into inproxy

Rod Hynes 2 年之前
父节点
当前提交
b5607e3597
共有 36 个文件被更改,包括 3170 次插入294 次删除
  1. 2 0
      .github/workflows/tests.yml
  2. 2 5
      go.mod
  3. 4 17
      go.sum
  4. 47 5
      psiphon/common/obfuscator/passthrough.go
  5. 21 2
      psiphon/common/obfuscator/passthrough_test.go
  6. 2 2
      psiphon/common/parameters/frontingSpec.go
  7. 6 1
      psiphon/common/protocol/protocol.go
  8. 1 1
      psiphon/common/quic/quic.go
  9. 0 0
      psiphon/common/regen/LICENSE.txt
  10. 55 7
      psiphon/common/regen/char_class.go
  11. 0 0
      psiphon/common/regen/generator_error.go
  12. 45 0
      psiphon/common/regen/generator_error_test.go
  13. 76 24
      psiphon/common/regen/internal_generator.go
  14. 70 13
      psiphon/common/regen/regen.go
  15. 85 0
      psiphon/common/regen/regen_benchmarks_test.go
  16. 935 0
      psiphon/common/regen/regen_test.go
  17. 34 34
      psiphon/common/regen/regexp_format.go
  18. 0 0
      psiphon/common/regen/rng.go
  19. 47 0
      psiphon/common/regen/rng_test.go
  20. 693 0
      psiphon/common/transforms/httpNormalizer.go
  21. 594 0
      psiphon/common/transforms/httpNormalizer_test.go
  22. 68 53
      psiphon/common/transforms/httpTransformer.go
  23. 97 15
      psiphon/common/transforms/httpTransformer_test.go
  24. 10 7
      psiphon/common/transforms/transforms.go
  25. 13 5
      psiphon/common/values/values.go
  26. 25 18
      psiphon/dialParameters.go
  27. 6 3
      psiphon/server/config.go
  28. 140 9
      psiphon/server/meek.go
  29. 31 2
      psiphon/server/meek_test.go
  30. 1 0
      psiphon/server/tunnelServer.go
  31. 29 10
      vendor/github.com/Psiphon-Labs/qtls-go1-18/handshake_messages.go
  32. 29 10
      vendor/github.com/Psiphon-Labs/qtls-go1-19/handshake_messages.go
  33. 0 28
      vendor/github.com/zach-klippenstein/goregen/.gitignore
  34. 0 7
      vendor/github.com/zach-klippenstein/goregen/.travis.yml
  35. 0 7
      vendor/github.com/zach-klippenstein/goregen/README.md
  36. 2 9
      vendor/modules.txt

+ 2 - 0
.github/workflows/tests.yml

@@ -81,6 +81,7 @@ jobs:
           go test -v -race ./psiphon/common/crypto/ssh
           go test -v -race ./psiphon/common/fragmentor
           go test -v -race ./psiphon/common/inproxy
+          go test -v -race ./psiphon/common/regen
           go test -v -race ./psiphon/common/monotime
           go test -v -race ./psiphon/common/obfuscator
           go test -v -race ./psiphon/common/osl
@@ -113,6 +114,7 @@ jobs:
           go test -v -covermode=count -coverprofile=ssh.coverprofile ./psiphon/common/crypto/ssh
           go test -v -covermode=count -coverprofile=fragmentor.coverprofile ./psiphon/common/fragmentor
           go test -v -covermode=count -coverprofile=inproxy.coverprofile ./psiphon/common/inproxy
+          go test -v -covermode=count -coverprofile=regen.coverprofile ./psiphon/common/regen
           go test -v -covermode=count -coverprofile=monotime.coverprofile ./psiphon/common/monotime
           go test -v -covermode=count -coverprofile=obfuscator.coverprofile ./psiphon/common/obfuscator
           go test -v -covermode=count -coverprofile=osl.coverprofile ./psiphon/common/osl

+ 2 - 5
go.mod

@@ -58,7 +58,6 @@ require (
 	github.com/stretchr/testify v1.8.2
 	github.com/syndtr/gocapability v0.0.0-20170704070218-db04d3cc01c8
 	github.com/wader/filtertransport v0.0.0-20200316221534-bdd9e61eee78
-	github.com/zach-klippenstein/goregen v0.0.0-20160303162051-795b5e3961ea
 	golang.org/x/crypto v0.6.0
 	golang.org/x/net v0.8.0
 	golang.org/x/sync v0.1.0
@@ -71,8 +70,8 @@ require (
 require (
 	git.torproject.org/pluggable-transports/goptlib.git v1.2.0 // indirect
 	github.com/AndreasBriese/bbloom v0.0.0-20170702084017-28f7e881ca57 // indirect
-	github.com/Psiphon-Labs/qtls-go1-18 v0.0.0-20221014170512-3bdc7291c091 // indirect
-	github.com/Psiphon-Labs/qtls-go1-19 v0.0.0-20221014165721-ed28749db082 // indirect
+	github.com/Psiphon-Labs/qtls-go1-18 v0.0.0-20230515185031-ae6632ab97ac // indirect
+	github.com/Psiphon-Labs/qtls-go1-19 v0.0.0-20230515185100-099bac32c181 // indirect
 	github.com/agl/ed25519 v0.0.0-20170116200512-5312a6153412 // indirect
 	github.com/alexbrainman/sspi v0.0.0-20210105120005-909beea2cc74 // indirect
 	github.com/andybalholm/brotli v1.0.5-0.20220518190645-786ec621f618 // indirect
@@ -84,7 +83,6 @@ require (
 	github.com/golang/mock v1.6.0 // indirect
 	github.com/golang/protobuf v1.5.3-0.20210916003710-5d5e8c018a13 // indirect
 	github.com/google/go-cmp v0.5.9 // indirect
-	github.com/google/gxui v0.0.0-20151028112939-f85e0a97b3a4 // indirect
 	github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
 	github.com/google/uuid v1.3.0 // indirect
 	github.com/josharian/native v1.1.1-0.20230202152459-5c7d0dd6ab86 // indirect
@@ -111,7 +109,6 @@ require (
 	github.com/pmezard/go-difflib v1.0.0 // indirect
 	github.com/quic-go/qpack v0.4.0 // indirect
 	github.com/sergeyfrolov/bsbuffer v0.0.0-20180903213811-94e85abb8507 // indirect
-	github.com/smartystreets/goconvey v1.7.2 // indirect
 	github.com/tailscale/goupnp v1.0.1-0.20210804011211-c64d0f06ea05 // indirect
 	github.com/x448/float16 v0.8.4 // indirect
 	gitlab.com/yawning/obfs4.git v0.0.0-20190120164510-816cff15f425 // indirect

+ 4 - 17
go.sum

@@ -10,10 +10,10 @@ github.com/Psiphon-Labs/bolt v0.0.0-20200624191537-23cedaef7ad7 h1:Hx/NCZTnvoKZu
 github.com/Psiphon-Labs/bolt v0.0.0-20200624191537-23cedaef7ad7/go.mod h1:alTtZBo3j4AWFvUrAH6F5ZaHcTj4G5Y01nHz8dkU6vU=
 github.com/Psiphon-Labs/goptlib v0.0.0-20200406165125-c0e32a7a3464 h1:VmnMMMheFXwLV0noxYhbJbLmkV4iaVW3xNnj6xcCNHo=
 github.com/Psiphon-Labs/goptlib v0.0.0-20200406165125-c0e32a7a3464/go.mod h1:Pe5BqN2DdIdChorAXl6bDaQd/wghpCleJfid2NoSli0=
-github.com/Psiphon-Labs/qtls-go1-18 v0.0.0-20221014170512-3bdc7291c091 h1:Kv0LQQ3joUp8s2z36aigpNgNyiLiExT/OS9KOC/L/gI=
-github.com/Psiphon-Labs/qtls-go1-18 v0.0.0-20221014170512-3bdc7291c091/go.mod h1:0IvfcPDkLvBkir+WGq3E0shsx+TLasdcl8ojVWWTflE=
-github.com/Psiphon-Labs/qtls-go1-19 v0.0.0-20221014165721-ed28749db082 h1:arVlc3JYvckFXGyB8N30ul8AmA+rDuLolPRYMDHzgTU=
-github.com/Psiphon-Labs/qtls-go1-19 v0.0.0-20221014165721-ed28749db082/go.mod h1:mHM/QFYc02W9MKJ/Ux5XGOKP4OImosPeQUO7XAaXs0E=
+github.com/Psiphon-Labs/qtls-go1-18 v0.0.0-20230515185031-ae6632ab97ac h1:2/n1zJIAEmpAg/IapXRdcuY29L6tud4WyKrXj8kpWSY=
+github.com/Psiphon-Labs/qtls-go1-18 v0.0.0-20230515185031-ae6632ab97ac/go.mod h1:0IvfcPDkLvBkir+WGq3E0shsx+TLasdcl8ojVWWTflE=
+github.com/Psiphon-Labs/qtls-go1-19 v0.0.0-20230515185100-099bac32c181 h1:+rhvNaRVcVr6OXDPJx3lOaSccBhCxgcKlG/OVU/uvGc=
+github.com/Psiphon-Labs/qtls-go1-19 v0.0.0-20230515185100-099bac32c181/go.mod h1:mHM/QFYc02W9MKJ/Ux5XGOKP4OImosPeQUO7XAaXs0E=
 github.com/Psiphon-Labs/quic-go v0.0.0-20230215230806-9b1ddbf778cc h1:FUmGSvMiMbf1tFXWbK0+N7+5zBhOol8CHQdpB4ZQlDg=
 github.com/Psiphon-Labs/quic-go v0.0.0-20230215230806-9b1ddbf778cc/go.mod h1:cu4yhfHkyt+uQ9FFFjTpjCjcQYf52ntEAyoV4Zg0+fg=
 github.com/Psiphon-Labs/tls-tris v0.0.0-20210713133851-676a693d51ad h1:m6HS84+b5xDPLj7D/ya1CeixyaHOCZoMbBilJ48y+Ts=
@@ -104,14 +104,10 @@ github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
 github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/gopacket v1.1.19-0.20200831200443-df1bbd09a561 h1:VB5cLlMqQWruyqG6OW/EHDLUawT/hel1I3ElBE4iHg0=
 github.com/google/gopacket v1.1.19-0.20200831200443-df1bbd09a561/go.mod h1:iJ8V8n6KS+z2U1A8pUwu8bW5SyEMkXJB8Yo/Vo+TKTo=
-github.com/google/gxui v0.0.0-20151028112939-f85e0a97b3a4 h1:OL2d27ueTKnlQJoqLW2fc9pWYulFnJYLWzomGV7HqZo=
-github.com/google/gxui v0.0.0-20151028112939-f85e0a97b3a4/go.mod h1:Pw1H1OjSNHiqeuxAduB1BKYXIwFtsyrY47nEqSgEiCM=
 github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE=
 github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
 github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
 github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1 h1:EGx4pi6eqNxGaHF6qqu48+N2wcFQ5qg5FXgOdqsJ5d8=
-github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
 github.com/grafov/m3u8 v0.0.0-20171211212457-6ab8f28ed427 h1:xh96CCAZTX8LJPFoOVRgTwZbn2DvJl8fyCyivohhSIg=
 github.com/grafov/m3u8 v0.0.0-20171211212457-6ab8f28ed427/go.mod h1:PdjzaU/pJUo4jTIn2rcgMFs+HqBGl/sPJLr8BI0Xq/I=
 github.com/h2non/gock v1.0.9/go.mod h1:CZMcB0Lg5IWnr9bF79pPMg9WeV6WumxQiUJ1UvdO1iE=
@@ -127,8 +123,6 @@ github.com/jsimonetti/rtnetlink v0.0.0-20190606172950-9527aa82566a/go.mod h1:Oz+
 github.com/jsimonetti/rtnetlink v0.0.0-20200117123717-f846d4f6c1f4/go.mod h1:WGuG/smIU4J/54PblvSbh+xvCZmpJnFgr3ds6Z55XMQ=
 github.com/jsimonetti/rtnetlink v1.1.2-0.20220408201609-d380b505068b h1:Yws7RV6kZr2O7PPdT+RkbSmmOponA8i/1DuGHe8BRsM=
 github.com/jsimonetti/rtnetlink v1.1.2-0.20220408201609-d380b505068b/go.mod h1:TzDCVOZKUa79z6iXbbXqhtAflVgUKaFkZ21M5tK5tzY=
-github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo=
-github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
 github.com/juju/ratelimit v1.0.2 h1:sRxmtRiajbvrcLQT7S+JbqU0ntsb9W2yhSdNN8tWfaI=
 github.com/juju/ratelimit v1.0.2/go.mod h1:qapgC/Gy+xNh9UxzV13HGGl/6UXNN+ct+vwSgWNm/qk=
 github.com/kardianos/osext v0.0.0-20170510131534-ae77be60afb1 h1:PJPDf8OUfOK1bb/NeTKd4f1QXZItOX389VN3B6qC8ro=
@@ -248,10 +242,6 @@ github.com/sergeyfrolov/bsbuffer v0.0.0-20180903213811-94e85abb8507/go.mod h1:Db
 github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
 github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0=
 github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
-github.com/smartystreets/assertions v1.2.0 h1:42S6lae5dvLc7BrLu/0ugRtcFVjoJNMC/N3yZFZkDFs=
-github.com/smartystreets/assertions v1.2.0/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo=
-github.com/smartystreets/goconvey v1.7.2 h1:9RBaZCeXEQ3UselpuwUQHltGVXvdwm6cv1hgR6gDIPg=
-github.com/smartystreets/goconvey v1.7.2/go.mod h1:Vw0tHAZW6lzCRk3xgdin6fKYcG+G3Pg9vgXWeJpQFMM=
 github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 h1:qLC7fQah7D6K1B0ujays3HV9gkFtllcxhzImRR7ArPQ=
 github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
@@ -277,8 +267,6 @@ github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcY
 github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
 github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
-github.com/zach-klippenstein/goregen v0.0.0-20160303162051-795b5e3961ea h1:CyhwejzVGvZ3Q2PSbQ4NRRYn+ZWv5eS1vlaEusT+bAI=
-github.com/zach-klippenstein/goregen v0.0.0-20160303162051-795b5e3961ea/go.mod h1:eNr558nEUjP8acGw8FFjTeWvSgU1stO7FAO6eknhHe4=
 go4.org/mem v0.0.0-20210711025021-927187094b94 h1:OAAkygi2Js191AJP1Ds42MhJRgeofeKGjuoUqNp1QC4=
 go4.org/mem v0.0.0-20210711025021-927187094b94/go.mod h1:reUoABIJ9ikfM5sgtSF3Wushcza7+WeD01VB9Lirh3g=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
@@ -385,7 +373,6 @@ golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
 golang.org/x/text v0.8.0 h1:57P1ETyNKtuIjB4SRd15iJxuhj8Gc416Y78H3qgMh68=
 golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
-golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
 golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=

+ 47 - 5
psiphon/common/obfuscator/passthrough.go

@@ -36,6 +36,7 @@ const (
 	TLS_PASSTHROUGH_NONCE_SIZE   = 16
 	TLS_PASSTHROUGH_KEY_SIZE     = 32
 	TLS_PASSTHROUGH_TIME_PERIOD  = 20 * time.Minute
+	TLS_PASSTHROUGH_HISTORY_TTL  = TLS_PASSTHROUGH_TIME_PERIOD * 3
 	TLS_PASSTHROUGH_MESSAGE_SIZE = 32
 )
 
@@ -51,7 +52,7 @@ const (
 func MakeTLSPassthroughMessage(
 	useTimeFactor bool, obfuscatedKey string) ([]byte, error) {
 
-	passthroughKey, err := derivePassthroughKey(useTimeFactor, obfuscatedKey)
+	passthroughKey, err := derivePassthroughKey(useTimeFactor, 0, obfuscatedKey)
 	if err != nil {
 		return nil, errors.Trace(err)
 	}
@@ -86,9 +87,48 @@ func VerifyTLSPassthroughMessage(
 		message = stub[:]
 	}
 
-	passthroughKey, err := derivePassthroughKey(useTimeFactor, obfuscatedKey)
+	if useTimeFactor {
+
+		// Check three rounded time periods: the current one, the previous
+		// one, and the future one. Even if the client clock is ahead of the
+		// server clock by only a short amount, it can use the future time
+		// period, from the server's perspective, when the server's clock is
+		// close to the end of its current time period. And even if the
+		// client and server clocks are perfectly synchronized, the client
+		// may use the previous time period and then time advances to the
+		// next time period by the time the server receives the message.
+		//
+		// All three time periods are always checked, to avoid leaking via
+		// timing differences.
+
+		match := false
+
+		for _, timePeriodShift := range []int64{-1, 0, 1} {
+
+			passthroughKey, err := derivePassthroughKey(
+				useTimeFactor, timePeriodShift, obfuscatedKey)
+			if err != nil {
+				// derivePassthroughKey is not expected to fail.
+				// TODO: log error
+				return false
+			}
+
+			h := hmac.New(sha256.New, passthroughKey)
+			h.Write(message[0:TLS_PASSTHROUGH_NONCE_SIZE])
+
+			if 1 == subtle.ConstantTimeCompare(
+				message[TLS_PASSTHROUGH_NONCE_SIZE:],
+				h.Sum(nil)[0:TLS_PASSTHROUGH_MESSAGE_SIZE-TLS_PASSTHROUGH_NONCE_SIZE]) {
+
+				match = true
+			}
+		}
+
+		return match
+	}
+
+	passthroughKey, err := derivePassthroughKey(false, 0, obfuscatedKey)
 	if err != nil {
-		// TODO: log error
 		return false
 	}
 
@@ -106,7 +146,7 @@ func VerifyTLSPassthroughMessage(
 var timePeriodSeconds = int64(TLS_PASSTHROUGH_TIME_PERIOD / time.Second)
 
 func derivePassthroughKey(
-	useTimeFactor bool, obfuscatedKey string) ([]byte, error) {
+	useTimeFactor bool, timePeriodShift int64, obfuscatedKey string) ([]byte, error) {
 
 	secret := []byte(obfuscatedKey)
 
@@ -130,7 +170,9 @@ func derivePassthroughKey(
 		// differences at time boundaries. We assume that the server always or never
 		// sets useTimeFactor.
 
-		roundedTimePeriod := (time.Now().Unix() + (timePeriodSeconds / 2)) / timePeriodSeconds
+		roundedTimePeriod := (time.Now().Unix() +
+			(timePeriodSeconds / 2) +
+			timePeriodSeconds*timePeriodShift) / timePeriodSeconds
 
 		var timeFactor [8]byte
 		binary.LittleEndian.PutUint64(timeFactor[:], uint64(roundedTimePeriod))

+ 21 - 2
psiphon/common/obfuscator/passthrough_test.go

@@ -74,7 +74,7 @@ func TestTLSPassthrough(t *testing.T) {
 
 			// test: valid passthrough message now invalid after time factor period
 
-			time.Sleep(time.Duration(timePeriodSeconds)*time.Second + time.Millisecond)
+			time.Sleep(time.Duration(timePeriodSeconds*2)*time.Second + time.Millisecond)
 
 			verified := VerifyTLSPassthroughMessage(useTimeFactor, correctMasterKey, validMessage)
 
@@ -123,9 +123,28 @@ func TestTLSPassthrough(t *testing.T) {
 				timeDiff = -timeDiff
 			}
 
-			if timeDiff.Microseconds() > 100 {
+			if timeDiff.Microseconds() > 500 {
 				t.Fatalf("unexpected elapsed time difference")
 			}
+
+			// test: cross rounded time period boundries
+
+			if useTimeFactor {
+
+				for i := 0; i < 2000; i++ {
+
+					validMessage, err := MakeTLSPassthroughMessage(useTimeFactor, correctMasterKey)
+					if err != nil {
+						t.Fatalf("MakeTLSPassthroughMessage failed: %s", err)
+					}
+
+					time.Sleep(10 * time.Millisecond)
+
+					if !VerifyTLSPassthroughMessage(useTimeFactor, correctMasterKey, validMessage) {
+						t.Fatalf("unexpected invalid passthrough message")
+					}
+				}
+			}
 		})
 	}
 }

+ 2 - 2
psiphon/common/parameters/frontingSpec.go

@@ -24,7 +24,7 @@ import (
 
 	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
 	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/prng"
-	regen "github.com/zach-klippenstein/goregen"
+	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/regen"
 )
 
 // FrontingSpecs is a list of domain fronting specs.
@@ -73,7 +73,7 @@ func (specs FrontingSpecs) SelectParameters() (
 		return "", "", "", "", nil, "", errors.TraceNew("missing fronting address")
 	}
 
-	frontingDialAddr, err := regen.Generate(
+	frontingDialAddr, err := regen.GenerateString(
 		spec.Addresses[prng.Intn(len(spec.Addresses))])
 	if err != nil {
 		return "", "", "", "", nil, "", errors.Trace(err)

+ 6 - 1
psiphon/common/protocol/protocol.go

@@ -196,6 +196,10 @@ func TunnelProtocolUsesMeekHTTP(protocol string) bool {
 		protocol == TUNNEL_PROTOCOL_FRONTED_MEEK_HTTP
 }
 
+func TunnelProtocolUsesMeekHTTPNormalizer(protocol string) bool {
+	return protocol == TUNNEL_PROTOCOL_UNFRONTED_MEEK
+}
+
 func TunnelProtocolUsesMeekHTTPS(protocol string) bool {
 	return protocol == TUNNEL_PROTOCOL_FRONTED_MEEK ||
 		protocol == TUNNEL_PROTOCOL_UNFRONTED_MEEK_HTTPS ||
@@ -251,7 +255,8 @@ func TunnelProtocolRequiresTLS12SessionTickets(protocol string) bool {
 
 func TunnelProtocolSupportsPassthrough(protocol string) bool {
 	return protocol == TUNNEL_PROTOCOL_UNFRONTED_MEEK_HTTPS ||
-		protocol == TUNNEL_PROTOCOL_UNFRONTED_MEEK_SESSION_TICKET
+		protocol == TUNNEL_PROTOCOL_UNFRONTED_MEEK_SESSION_TICKET ||
+		protocol == TUNNEL_PROTOCOL_UNFRONTED_MEEK
 }
 
 func TunnelProtocolSupportsUpstreamProxy(protocol string) bool {

+ 1 - 1
psiphon/common/quic/quic.go

@@ -190,7 +190,7 @@ func Listen(
 	// Irregular events are logged for invalid client activity.
 
 	clientRandomHistory := obfuscator.NewSeedHistory(
-		&obfuscator.SeedHistoryConfig{SeedTTL: obfuscator.TLS_PASSTHROUGH_TIME_PERIOD})
+		&obfuscator.SeedHistoryConfig{SeedTTL: obfuscator.TLS_PASSTHROUGH_HISTORY_TTL})
 
 	verifyClientHelloRandom := func(remoteAddr net.Addr, clientHelloRandom []byte) bool {
 

+ 0 - 0
vendor/github.com/zach-klippenstein/goregen/LICENSE.txt → psiphon/common/regen/LICENSE.txt


+ 55 - 7
vendor/github.com/zach-klippenstein/goregen/char_class.go → psiphon/common/regen/char_class.go

@@ -14,10 +14,30 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
+/*
+ * Copyright (c) 2023, Psiphon Inc.
+ * All rights reserved.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
 package regen
 
 import (
 	"fmt"
+	"math"
 )
 
 // CharClass represents a regular expression character class as a list of ranges.
@@ -57,7 +77,7 @@ e.g.
 func parseCharClass(runes []rune) *tCharClass {
 	var totalSize int32
 	numRanges := len(runes) / 2
-	ranges := make([]tCharClassRange, numRanges, numRanges)
+	ranges := make([]tCharClassRange, numRanges)
 
 	for i := 0; i < numRanges; i++ {
 		start := runes[i*2]
@@ -79,6 +99,38 @@ func parseCharClass(runes []rune) *tCharClass {
 	return &tCharClass{ranges, totalSize}
 }
 
+// parseByteClass parses character classes only for byte values (0-255).
+// Returns nil if runes does not contain any byte values.
+//
+// Note:
+// If an end range is greater than 255, it is truncated to 255.
+func parseByteClass(runes []rune) *tCharClass {
+	var totalSize int32
+
+	var ranges []tCharClassRange
+	for i := 0; i < len(runes)-1; i += 2 {
+		start := runes[i]
+		end := runes[i+1]
+
+		var r tCharClassRange
+
+		if start <= math.MaxUint8 {
+			if end > math.MaxUint8 {
+				end = math.MaxUint8
+			}
+			r = newCharClassRange(start, end)
+			ranges = append(ranges, r)
+			totalSize += r.Size
+		}
+	}
+
+	if len(ranges) == 0 {
+		return nil
+	}
+
+	return &tCharClass{ranges, totalSize}
+}
+
 // GetRuneAt gets a rune from CharClass as a contiguous array of runes.
 func (class *tCharClass) GetRuneAt(i int32) rune {
 	for _, r := range class.Ranges {
@@ -95,10 +147,6 @@ func (class *tCharClass) String() string {
 }
 
 func newCharClassRange(start rune, end rune) tCharClassRange {
-	if start < 1 {
-		panic("char class range cannot contain runes less than 1")
-	}
-
 	size := end - start + 1
 
 	if size < 1 {
@@ -113,8 +161,8 @@ func newCharClassRange(start rune, end rune) tCharClassRange {
 
 func (r tCharClassRange) String() string {
 	if r.Size == 1 {
-		return fmt.Sprintf("%s:1", runesToString(r.Start))
+		return fmt.Sprintf("%s:1", runesToUTF8(r.Start))
 	}
-	return fmt.Sprintf("%s-%s:%d", runesToString(r.Start), runesToString(r.Start+rune(r.Size-1)), r.Size)
+	return fmt.Sprintf("%s-%s:%d", runesToUTF8(r.Start), runesToUTF8(r.Start+rune(r.Size-1)), r.Size)
 
 }

+ 0 - 0
vendor/github.com/zach-klippenstein/goregen/generator_error.go → psiphon/common/regen/generator_error.go


+ 45 - 0
psiphon/common/regen/generator_error_test.go

@@ -0,0 +1,45 @@
+/*
+Copyright 2014 Zachary Klippenstein
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package regen
+
+import (
+	"errors"
+	"testing"
+)
+
+func TestGeneratorError(t *testing.T) {
+
+	t.Run("Handles nil cause", func(t *testing.T) {
+		err := generatorError(nil, "msg")
+		if err == nil {
+			t.Fatal("Expected error, got nil")
+		}
+		if err.Error() != "msg" {
+			t.Fatalf("Expected error message 'msg', got '%s'", err.Error())
+		}
+	})
+
+	t.Run("Formats", func(t *testing.T) {
+		err := generatorError(errors.New("cause"), "msg %s", "arg")
+		if err == nil {
+			t.Fatal("Expected error, got nil")
+		}
+		if err.Error() != "msg arg\ncaused by cause" {
+			t.Fatalf("Expected error message 'msg arg\ncaused by cause', got '%s'", err.Error())
+		}
+	})
+}

+ 76 - 24
vendor/github.com/zach-klippenstein/goregen/internal_generator.go → psiphon/common/regen/internal_generator.go

@@ -14,6 +14,25 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
+/*
+ * Copyright (c) 2023, Psiphon Inc.
+ * All rights reserved.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
 package regen
 
 import (
@@ -56,10 +75,10 @@ func init() {
 
 type internalGenerator struct {
 	Name         string
-	GenerateFunc func() string
+	GenerateFunc func() ([]byte, error)
 }
 
-func (gen *internalGenerator) Generate() string {
+func (gen *internalGenerator) Generate() ([]byte, error) {
 	return gen.GenerateFunc()
 }
 
@@ -69,7 +88,7 @@ func (gen *internalGenerator) String() string {
 
 // Create a new generator for each expression in regexps.
 func newGenerators(regexps []*syntax.Regexp, args *GeneratorArgs) ([]*internalGenerator, error) {
-	generators := make([]*internalGenerator, len(regexps), len(regexps))
+	generators := make([]*internalGenerator, len(regexps))
 	var err error
 
 	// create a generator for each alternate pattern
@@ -98,35 +117,48 @@ func newGenerator(regexp *syntax.Regexp, args *GeneratorArgs) (generator *intern
 
 // Generator that does nothing.
 func noop(regexp *syntax.Regexp, args *GeneratorArgs) (*internalGenerator, error) {
-	return &internalGenerator{regexp.String(), func() string {
-		return ""
+	return &internalGenerator{regexp.String(), func() ([]byte, error) {
+		return []byte{}, nil
 	}}, nil
 }
 
 func opEmptyMatch(regexp *syntax.Regexp, args *GeneratorArgs) (*internalGenerator, error) {
 	enforceOp(regexp, syntax.OpEmptyMatch)
-	return &internalGenerator{regexp.String(), func() string {
-		return ""
+	return &internalGenerator{regexp.String(), func() ([]byte, error) {
+		return []byte{}, nil
 	}}, nil
 }
 
 func opLiteral(regexp *syntax.Regexp, args *GeneratorArgs) (*internalGenerator, error) {
 	enforceOp(regexp, syntax.OpLiteral)
-	return &internalGenerator{regexp.String(), func() string {
-		return runesToString(regexp.Rune...)
+	return &internalGenerator{regexp.String(), func() ([]byte, error) {
+		if args.ByteMode {
+			return runesToBytes(regexp.Rune...)
+		} else {
+			return runesToUTF8(regexp.Rune...), nil
+		}
 	}}, nil
 }
 
 func opAnyChar(regexp *syntax.Regexp, args *GeneratorArgs) (*internalGenerator, error) {
 	enforceOp(regexp, syntax.OpAnyChar)
-	return &internalGenerator{regexp.String(), func() string {
-		return runesToString(rune(args.rng.Int31()))
+	return &internalGenerator{regexp.String(), func() ([]byte, error) {
+		if args.ByteMode {
+			return runesToBytes(rune(args.rng.Intn(math.MaxUint8 + 1)))
+		} else {
+			return runesToUTF8(rune(args.rng.Int31())), nil
+		}
 	}}, nil
 }
 
 func opAnyCharNotNl(regexp *syntax.Regexp, args *GeneratorArgs) (*internalGenerator, error) {
 	enforceOp(regexp, syntax.OpAnyCharNotNL)
-	charClass := newCharClass(1, rune(math.MaxInt32))
+	var charClass *tCharClass
+	if args.ByteMode {
+		charClass = newCharClass(0, rune(math.MaxUint8))
+	} else {
+		charClass = newCharClass(1, rune(math.MaxInt32))
+	}
 	return createCharClassGenerator(regexp.String(), charClass, args)
 }
 
@@ -154,7 +186,15 @@ func opRepeat(regexp *syntax.Regexp, args *GeneratorArgs) (*internalGenerator, e
 // classes that respect it.
 func opCharClass(regexp *syntax.Regexp, args *GeneratorArgs) (*internalGenerator, error) {
 	enforceOp(regexp, syntax.OpCharClass)
-	charClass := parseCharClass(regexp.Rune)
+	var charClass *tCharClass
+	if args.ByteMode {
+		charClass = parseByteClass(regexp.Rune)
+		if charClass == nil {
+			return nil, fmt.Errorf("invalid byte class: /%s/", regexp)
+		}
+	} else {
+		charClass = parseCharClass(regexp.Rune)
+	}
 	return createCharClassGenerator(regexp.String(), charClass, args)
 }
 
@@ -166,12 +206,16 @@ func opConcat(regexp *syntax.Regexp, genArgs *GeneratorArgs) (*internalGenerator
 		return nil, generatorError(err, "error creating generators for concat pattern /%s/", regexp)
 	}
 
-	return &internalGenerator{regexp.String(), func() string {
+	return &internalGenerator{regexp.String(), func() ([]byte, error) {
 		var result bytes.Buffer
 		for _, generator := range generators {
-			result.WriteString(generator.Generate())
+			gen, err := generator.Generate()
+			if err != nil {
+				return nil, err
+			}
+			result.Write(gen)
 		}
-		return result.String()
+		return result.Bytes(), nil
 	}}, nil
 }
 
@@ -185,7 +229,7 @@ func opAlternate(regexp *syntax.Regexp, genArgs *GeneratorArgs) (*internalGenera
 
 	numGens := len(generators)
 
-	return &internalGenerator{regexp.String(), func() string {
+	return &internalGenerator{regexp.String(), func() ([]byte, error) {
 		i := genArgs.rng.Intn(numGens)
 		generator := generators[i]
 		return generator.Generate()
@@ -208,12 +252,12 @@ func opCapture(regexp *syntax.Regexp, args *GeneratorArgs) (*internalGenerator,
 	// Group indices are 0-based, but index 0 is the whole expression.
 	index := regexp.Cap - 1
 
-	return &internalGenerator{regexp.String(), func() string {
+	return &internalGenerator{regexp.String(), func() ([]byte, error) {
 		return args.CaptureGroupHandler(index, regexp.Name, groupRegexp, generator, args)
 	}}, nil
 }
 
-func defaultCaptureGroupHandler(index int, name string, group *syntax.Regexp, generator Generator, args *GeneratorArgs) string {
+func defaultCaptureGroupHandler(index int, name string, group *syntax.Regexp, generator Generator, args *GeneratorArgs) ([]byte, error) {
 	return generator.Generate()
 }
 
@@ -234,10 +278,14 @@ func enforceSingleSub(regexp *syntax.Regexp) error {
 }
 
 func createCharClassGenerator(name string, charClass *tCharClass, args *GeneratorArgs) (*internalGenerator, error) {
-	return &internalGenerator{name, func() string {
+	return &internalGenerator{name, func() ([]byte, error) {
 		i := args.rng.Int31n(charClass.TotalSize)
 		r := charClass.GetRuneAt(i)
-		return runesToString(r)
+		if args.ByteMode {
+			return runesToBytes(r)
+		} else {
+			return runesToUTF8(r), nil
+		}
 	}}, nil
 }
 
@@ -259,13 +307,17 @@ func createRepeatingGenerator(regexp *syntax.Regexp, genArgs *GeneratorArgs, min
 		max = int(genArgs.MaxUnboundedRepeatCount)
 	}
 
-	return &internalGenerator{regexp.String(), func() string {
+	return &internalGenerator{regexp.String(), func() ([]byte, error) {
 		n := min + genArgs.rng.Intn(max-min+1)
 
 		var result bytes.Buffer
 		for i := 0; i < n; i++ {
-			result.WriteString(generator.Generate())
+			value, err := generator.Generate()
+			if err != nil {
+				return nil, err
+			}
+			result.Write(value)
 		}
-		return result.String()
+		return result.Bytes(), nil
 	}}, nil
 }

+ 70 - 13
vendor/github.com/zach-klippenstein/goregen/regen.go → psiphon/common/regen/regen.go

@@ -14,19 +14,40 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
+/*
+ * Copyright (c) 2023, Psiphon Inc.
+ * All rights reserved.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
 /*
 Package regen is a library for generating random strings from regular expressions.
 The generated strings will match the expressions they were generated from. Similar
 to Ruby's randexp library.
 
 E.g.
-	regen.Generate("[a-z0-9]{1,64}")
+
+	regen.GenerateString("[a-z0-9]{1,64}")
+
 will return a lowercase alphanumeric string
 between 1 and 64 characters long.
 
 Expressions are parsed using the Go standard library's parser: http://golang.org/pkg/regexp/syntax/.
 
-Constraints
+# Constraints
 
 "." will generate any character, not necessarily a printable one.
 
@@ -34,7 +55,7 @@ Constraints
 If you care about the maximum number, specify it explicitly in the expression,
 e.g. "x{0,256}".
 
-Flags
+# Flags
 
 Flags can be passed to the parser by setting them in the GeneratorArgs struct.
 Newline flags are respected, and newlines won't be generated unless the appropriate flags for
@@ -48,7 +69,7 @@ The Perl character class flag is supported, and required if the pattern contains
 
 Unicode groups are not supported at this time. Support may be added in the future.
 
-Concurrent Use
+# Concurrent Use
 
 A generator can safely be used from multiple goroutines without locking.
 
@@ -63,7 +84,7 @@ the same source may get the same output. While obviously not cryptographically s
 benefit outweighs the risk of collisions. If you really care about preventing this, the solution is simple: don't
 call a single Generator from multiple goroutines.
 
-Benchmarks
+# Benchmarks
 
 Benchmarks are included for creating and running generators for limited-length,
 complex regexes, and simple, highly-repetitive regexes.
@@ -71,6 +92,7 @@ complex regexes, and simple, highly-repetitive regexes.
 	go test -bench .
 
 The complex benchmarks generate fake HTTP messages with the following regex:
+
 	POST (/[-a-zA-Z0-9_.]{3,12}){3,6}
 	Content-Length: [0-9]{2,3}
 	X-Auth-Token: [a-zA-Z0-9+/]{64}
@@ -79,12 +101,14 @@ The complex benchmarks generate fake HTTP messages with the following regex:
 	){3,15}[A-Za-z0-9+/]{60}([A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)
 
 The repetitive benchmarks use the regex
+
 	a{999}
 
 See regen_benchmarks_test.go for more information.
 
 On my mid-2014 MacBook Pro (2.6GHz Intel Core i5, 8GB 1600MHz DDR3),
 the results of running the benchmarks with minimal load are:
+
 	BenchmarkComplexCreation-4                       200	   8322160 ns/op
 	BenchmarkComplexGeneration-4                   10000	    153625 ns/op
 	BenchmarkLargeRepeatCreateSerial-4  	        3000	    411772 ns/op
@@ -96,6 +120,7 @@ import (
 	"fmt"
 	"math/rand"
 	"regexp/syntax"
+	"strings"
 )
 
 // DefaultMaxUnboundedRepeatCount is default value for MaxUnboundedRepeatCount.
@@ -107,7 +132,7 @@ const DefaultMaxUnboundedRepeatCount = 4096
 // group is the regular expression within the group (e.g. for `(\w+)`, group would be `\w+`).
 // generator is the generator for group.
 // args is the args used to create the generator calling this function.
-type CaptureGroupHandler func(index int, name string, group *syntax.Regexp, generator Generator, args *GeneratorArgs) string
+type CaptureGroupHandler func(index int, name string, group *syntax.Regexp, generator Generator, args *GeneratorArgs) ([]byte, error)
 
 // GeneratorArgs are arguments passed to NewGenerator that control how generators
 // are created.
@@ -131,6 +156,12 @@ type GeneratorArgs struct {
 	// from the expressions in the group.
 	CaptureGroupHandler CaptureGroupHandler
 
+	// Generates bytes instead of valid UTF-8 strings, default is false.
+	// If enabled any char "." will generate a byte in the range 0-255.
+	//
+	// ByteMode is not compatible with negated character classes (e.g. "[^a]").
+	ByteMode bool
+
 	// Used by generators.
 	rng *rand.Rand
 }
@@ -175,29 +206,36 @@ func (a *GeneratorArgs) Rng() *rand.Rand {
 	return a.rng
 }
 
-// Generator generates random strings.
+// Generator generates random bytes or strings.
 type Generator interface {
-	Generate() string
+	Generate() ([]byte, error)
 	String() string
 }
 
 /*
-Generate a random string that matches the regular expression pattern.
+GenerateString generates a random string that matches the regular expression pattern.
 If args is nil, default values are used.
 
 This function does not seed the default RNG, so you must call rand.Seed() if you want
 non-deterministic strings.
 */
-func Generate(pattern string) (string, error) {
+func GenerateString(pattern string) (string, error) {
 	generator, err := NewGenerator(pattern, nil)
 	if err != nil {
 		return "", err
 	}
-	return generator.Generate(), nil
+	b, err := generator.Generate()
+	return string(b), err
 }
 
-// NewGenerator creates a generator that returns random strings that match the regular expression in pattern.
-// If args is nil, default values are used.
+// NewGenerator creates a generator that returns random strings that match the
+// regular expression in pattern. If args is nil, default values are used.
+//
+// If ByteMode is true, pattern should not contain negated character
+// classes (e.g. "[^a]"). This limitation is due to how synxtax.Parse handles
+// negated character classes, which is by replacing them with a positive
+// character range. This makes it impossible to infer the original negated
+// character class.
 func NewGenerator(pattern string, inputArgs *GeneratorArgs) (generator Generator, err error) {
 	args := GeneratorArgs{}
 
@@ -209,6 +247,25 @@ func NewGenerator(pattern string, inputArgs *GeneratorArgs) (generator Generator
 		return nil, err
 	}
 
+	if args.ByteMode {
+		negatedClasses := []string{
+			"[^",
+			"[[:^",
+			`\P`,
+			`\D`,
+			`\S`,
+			`\W`,
+		}
+		for _, negatedCls := range negatedClasses {
+			if strings.Contains(pattern, negatedCls) {
+				return nil, generatorError(nil, "negated character classes are not supported")
+			}
+		}
+		if strings.Contains(pattern, `\x{`) {
+			return nil, generatorError(nil, "only two digit hex codes are supported in byte mode")
+		}
+	}
+
 	var regexp *syntax.Regexp
 	regexp, err = syntax.Parse(pattern, args.Flags)
 	if err != nil {

+ 85 - 0
psiphon/common/regen/regen_benchmarks_test.go

@@ -0,0 +1,85 @@
+/*
+Copyright 2014 Zachary Klippenstein
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package regen
+
+import (
+	"math/rand"
+	"testing"
+)
+
+const BigFancyRegexp = `
+POST (/[-a-zA-Z0-9_.]{3,12}){3,6}
+Content-Length: [0-9]{2,3}
+X-Auth-Token: [a-zA-Z0-9+/]{64}
+
+([A-Za-z0-9+/]{64}
+){3,15}[A-Za-z0-9+/]{60}([A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)
+`
+
+var rngSource = rand.NewSource(42)
+
+// Benchmarks the code that creates generators.
+// Doesn't actually run the generators.
+func BenchmarkComplexCreation(b *testing.B) {
+	// Create everything here to save allocations in the loop.
+	//args := &GeneratorArgs{rngSource, 0, NewSerialExecutor()}
+	args := &GeneratorArgs{
+		RngSource: rngSource,
+		Flags:     0,
+	}
+
+	for i := 0; i < b.N; i++ {
+		NewGenerator(BigFancyRegexp, args)
+	}
+}
+
+func BenchmarkLargeRepeatCreateSerial(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		NewGenerator(`a{999}`, &GeneratorArgs{
+			RngSource: rand.NewSource(0),
+		})
+	}
+}
+
+func BenchmarkComplexGeneration(b *testing.B) {
+	args := &GeneratorArgs{
+		RngSource: rngSource,
+	}
+	generator, err := NewGenerator(BigFancyRegexp, args)
+	if err != nil {
+		panic(err)
+	}
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		generator.Generate()
+	}
+}
+
+func BenchmarkLargeRepeatGenerateSerial(b *testing.B) {
+	generator, err := NewGenerator(`a{999}`, &GeneratorArgs{
+		RngSource: rand.NewSource(0),
+	})
+	if err != nil {
+		b.Fatal(err)
+	}
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		generator.Generate()
+	}
+}

+ 935 - 0
psiphon/common/regen/regen_test.go

@@ -0,0 +1,935 @@
+/*
+Copyright 2014 Zachary Klippenstein
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+/*
+ * Copyright (c) 2023, Psiphon Inc.
+ * All rights reserved.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+package regen
+
+import (
+	"encoding/hex"
+	"fmt"
+	"math"
+	"math/rand"
+	"os"
+	"regexp"
+	"regexp/syntax"
+	"strings"
+	"testing"
+
+	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/prng"
+)
+
+const (
+	// Each expression is generated and validated this many times.
+	SampleSize = 999
+
+	// Arbitrary limit in the standard package.
+	// See https://golang.org/src/regexp/syntax/parse.go?s=18885:18935#L796
+	MaxSupportedRepeatCount = 1000
+)
+
+func ExampleGenerate() {
+	pattern := "[ab]{5}"
+	bytes, _ := GenerateString(pattern)
+
+	if matched, _ := regexp.MatchString(pattern, string(bytes)); matched {
+		fmt.Println("Matches!")
+	}
+
+	// Output:
+	// Matches!
+}
+
+func ExampleNewGenerator() {
+	pattern := "[ab]{5}"
+
+	// Note that this uses a constant seed, so the generated string
+	// will always be the same across different runs of the program.
+	// Use a more random seed for real use (e.g. time-based).
+	generator, _ := NewGenerator(pattern, &GeneratorArgs{
+		RngSource: rand.NewSource(0),
+	})
+
+	bytes, err := generator.Generate()
+	if err != nil {
+		fmt.Println(err)
+		return
+	}
+
+	if matched, _ := regexp.MatchString(pattern, string(bytes)); matched {
+		fmt.Println("Matches!")
+	}
+
+	// Output:
+	// Matches!
+}
+
+func ExampleByteModeGenerator() {
+	for i := 0; i < 100; i++ {
+		gen, err := NewGenerator(`([\x00-\x6a])\x00\x01\x02[\x00-\xff]{5}`, &GeneratorArgs{
+			ByteMode: true,
+		})
+		if err != nil {
+			panic(err)
+		}
+		x, err := gen.Generate()
+		if err != nil {
+			panic(err)
+		}
+		fmt.Println(hex.EncodeToString(x))
+	}
+}
+
+func ExampleNewGenerator_perl() {
+	pattern := `\d{5}`
+
+	generator, _ := NewGenerator(pattern, &GeneratorArgs{
+		Flags: syntax.Perl,
+	})
+
+	bytes, err := generator.Generate()
+	if err != nil {
+		fmt.Println(err)
+		return
+	}
+
+	if matched, _ := regexp.MatchString("[[:digit:]]{5}", string(bytes)); matched {
+		fmt.Println("Matches!")
+	}
+	// Output:
+	// Matches!
+}
+
+func ExampleCaptureGroupHandler() {
+	pattern := `Hello, (?P<firstname>[A-Z][a-z]{2,10}) (?P<lastname>[A-Z][a-z]{2,10})`
+
+	generator, _ := NewGenerator(pattern, &GeneratorArgs{
+		Flags: syntax.Perl,
+		CaptureGroupHandler: func(index int, name string, group *syntax.Regexp, generator Generator, args *GeneratorArgs) ([]byte, error) {
+			value, err := generator.Generate()
+			if err != nil {
+				return nil, err
+			}
+			if name == "firstname" {
+				return []byte(fmt.Sprintf("FirstName (e.g. %s)", string(value))), nil
+			}
+			return []byte(fmt.Sprintf("LastName (e.g. %s)", string(value))), nil
+		},
+	})
+
+	// Print to stderr since we're generating random output and can't assert equality.
+	value, err := generator.Generate()
+	if err != nil {
+		fmt.Println(err)
+		return
+	}
+	fmt.Fprintln(os.Stderr, value)
+
+	// Needed for "go test" to run this example. (Must be a blank line before.)
+
+	// Output:
+}
+
+func TestGeneratorArgs(t *testing.T) {
+	t.Parallel()
+
+	t.Run("Handle empty struct", func(t *testing.T) {
+		shouldNotPanic(t, func() {
+			args := GeneratorArgs{}
+
+			err := args.initialize()
+			if err != nil {
+				t.Fatal(err)
+			}
+		})
+	})
+
+	t.Run("Unicode groups not supported", func(t *testing.T) {
+		args := &GeneratorArgs{
+			Flags: syntax.UnicodeGroups,
+		}
+
+		err := args.initialize()
+		if err == nil {
+			t.Fatal("expected error")
+		}
+		if err.Error() != "UnicodeGroups not supported" {
+			t.Fatalf("unexpected error: %v", err)
+		}
+	})
+
+	t.Run("Panics if repeat bounds are invalid", func(t *testing.T) {
+		args := &GeneratorArgs{
+			MinUnboundedRepeatCount: 2,
+			MaxUnboundedRepeatCount: 1,
+		}
+
+		shouldPanicWith(t, func() {
+			_ = args.initialize()
+		}, "MinUnboundedRepeatCount(2) > MaxUnboundedRepeatCount(1)")
+	})
+
+	t.Run("Allow equal repeat bounds", func(t *testing.T) {
+		args := &GeneratorArgs{
+			MinUnboundedRepeatCount: 1,
+			MaxUnboundedRepeatCount: 1,
+		}
+
+		shouldNotPanic(t, func() {
+			err := args.initialize()
+			if err != nil {
+				t.Fatal(err)
+			}
+		})
+	})
+
+	t.Run("Rng", func(t *testing.T) {
+
+		t.Run("Panics if called before initialize", func(t *testing.T) {
+			args := &GeneratorArgs{}
+
+			shouldPanic(t, func() {
+				_ = args.Rng()
+			})
+		})
+
+		t.Run("Non-nil after initialize", func(t *testing.T) {
+			args := GeneratorArgs{}
+			err := args.initialize()
+			if err != nil {
+				t.Fatal(err)
+			}
+			rng := args.Rng()
+			if rng == nil {
+				t.Fatal("expected non-nil")
+			}
+		})
+
+	})
+}
+
+func TestNewGenerator(t *testing.T) {
+	t.Parallel()
+
+	t.Run("Handles nil GeneratorArgs", func(t *testing.T) {
+		generator, err := NewGenerator("", nil)
+		if generator == nil {
+			t.Fatal("expected non-nil")
+		}
+		if err != nil {
+			t.Fatal(err)
+		}
+	})
+
+	t.Run("Handles empty GeneratorArgs", func(t *testing.T) {
+		generator, err := NewGenerator("", &GeneratorArgs{})
+		if generator == nil {
+			t.Fatal("expected non-nil")
+		}
+		if err != nil {
+			t.Fatal(err)
+		}
+	})
+
+	t.Run("Forwards errors from arsg initialization", func(t *testing.T) {
+		args := &GeneratorArgs{
+			Flags: syntax.UnicodeGroups,
+		}
+
+		_, err := NewGenerator("", args)
+		if err == nil {
+			t.Fatal("expected error")
+		}
+	})
+}
+
+func TestGenEmpty(t *testing.T) {
+	t.Parallel()
+
+	args := &GeneratorArgs{
+		RngSource: rand.NewSource(0),
+	}
+
+	testGeneratesStringMatching(t, args, "", "^$")
+}
+
+func TestGenLiterals(t *testing.T) {
+	t.Parallel()
+
+	testGeneratesStringMatchingItself(t, nil,
+		"a",
+		"abc",
+	)
+}
+
+func TestGenDotNotNl(t *testing.T) {
+	t.Parallel()
+
+	t.Run("DotNotNl", func(t *testing.T) {
+		testGeneratesStringMatchingItself(t, nil, ".")
+	})
+
+	t.Run("No newlines are generated", func(t *testing.T) {
+		generator, _ := NewGenerator(".", nil)
+
+		// Not a very strong assertion, but not sure how to do better. Exploring the entire
+		// generation space (2^32) takes far too long for a unit test.
+		for i := 0; i < SampleSize; i++ {
+			value, err := generator.Generate()
+			if err != nil {
+				t.Fatal(err)
+			}
+			if strings.Contains(string(value), "\n") {
+				t.Fatalf("unexpected newline in %q", value)
+			}
+		}
+	})
+
+}
+
+func TestGenStringStartEnd(t *testing.T) {
+	t.Parallel()
+
+	args := &GeneratorArgs{
+		RngSource: rand.NewSource(0),
+		Flags:     0,
+	}
+
+	testGeneratesStringMatching(t, args, "^abc$", "^abc$")
+	testGeneratesStringMatching(t, args, "$abc^", "^abc$")
+	testGeneratesStringMatching(t, args, "a^b$c", "^abc$")
+}
+
+func TestGenQuestionMark(t *testing.T) {
+	t.Parallel()
+
+	testGeneratesStringMatchingItself(t, nil,
+		"a?",
+		"(abc)?",
+		"[ab]?",
+		".?",
+	)
+}
+
+func TestGenPlus(t *testing.T) {
+	t.Parallel()
+
+	testGeneratesStringMatchingItself(t, nil, "a+")
+}
+
+func TestGenStar(t *testing.T) {
+	t.Parallel()
+
+	t.Run("HitsDefaultMin", func(t *testing.T) {
+		regexp := "a*"
+		args := &GeneratorArgs{
+			RngSource: rand.NewSource(0),
+		}
+		counts := generateLenHistogram(regexp, DefaultMaxUnboundedRepeatCount, args)
+
+		if counts[0] == 0 {
+			t.Fatalf("count should be > 0")
+		}
+	})
+
+	t.Run("HitsCustomMin", func(t *testing.T) {
+		regexp := "a*"
+		args := &GeneratorArgs{
+			RngSource:               rand.NewSource(0),
+			MinUnboundedRepeatCount: 200,
+		}
+		counts := generateLenHistogram(regexp, DefaultMaxUnboundedRepeatCount, args)
+
+		if counts[200] == 0 {
+			t.Fatalf("count should be > 0")
+		}
+		for i := 0; i < 200; i++ {
+			if counts[i] != 0 {
+				t.Fatalf("count should be 0")
+			}
+		}
+	})
+
+	t.Run("HitsDefaultMax", func(t *testing.T) {
+		regexp := "a*"
+		args := &GeneratorArgs{
+			RngSource: rand.NewSource(0),
+		}
+		counts := generateLenHistogram(regexp, DefaultMaxUnboundedRepeatCount, args)
+
+		if len(counts) != DefaultMaxUnboundedRepeatCount+1 {
+			t.Fatalf("count should be %d", DefaultMaxUnboundedRepeatCount+1)
+		}
+		if counts[DefaultMaxUnboundedRepeatCount] == 0 {
+			t.Fatalf("count should be > 0")
+		}
+	})
+
+	t.Run("HitsCustomMax", func(t *testing.T) {
+		regexp := "a*"
+		args := &GeneratorArgs{
+			RngSource:               rand.NewSource(0),
+			MaxUnboundedRepeatCount: 200,
+		}
+		counts := generateLenHistogram(regexp, 200, args)
+
+		if len(counts) != 201 {
+			t.Fatalf("count should be 201")
+		}
+		if counts[200] == 0 {
+			t.Fatalf("count should be > 0")
+		}
+	})
+}
+
+func TestGenCharClassNotNl(t *testing.T) {
+	t.Parallel()
+
+	testGeneratesStringMatchingItself(t, nil,
+		"[a]",
+		"[abc]",
+		"[a-d]",
+		"[ac]",
+		"[0-9]",
+		"[a-z0-9]",
+	)
+
+	t.Run("No newlines are generated", func(t *testing.T) {
+
+		generator, _ := NewGenerator("[^a-zA-Z0-9]", nil)
+		for i := 0; i < SampleSize; i++ {
+			value, err := generator.Generate()
+			if err != nil {
+				t.Fatal(err)
+			}
+			if strings.Contains(string(value), "\n") {
+				t.Fatalf("unexpected newline in %q", value)
+			}
+		}
+
+	})
+
+}
+
+func TestGenNegativeCharClass(t *testing.T) {
+	t.Parallel()
+
+	testGeneratesStringMatchingItself(t, nil, "[^a-zA-Z0-9]")
+}
+
+func TestGenAlternative(t *testing.T) {
+	t.Parallel()
+
+	testGeneratesStringMatchingItself(t, nil,
+		"a|b",
+		"abc|def|ghi",
+		"[ab]|[cd]",
+		"foo|bar|baz", // rewrites to foo|ba[rz]
+	)
+}
+
+func TestGenCapture(t *testing.T) {
+	t.Parallel()
+
+	testGeneratesStringMatching(t, nil, "(abc)", "^abc$")
+	testGeneratesStringMatching(t, nil, "(a)(b)(c)", "^abc$")
+	testGeneratesStringMatching(t, nil, "()", "^$")
+}
+
+func TestGenConcat(t *testing.T) {
+	t.Parallel()
+
+	testGeneratesStringMatchingItself(t, nil, "[ab][cd]")
+}
+
+func TestGenRepeat(t *testing.T) {
+	t.Parallel()
+
+	t.Run("Unbounded", func(t *testing.T) {
+		testGeneratesStringMatchingItself(t, nil, `a{1,}`)
+
+		t.Run("HitsDefaultMax", func(t *testing.T) {
+			regexp := "a{0,}"
+			args := &GeneratorArgs{
+				RngSource: rand.NewSource(0),
+			}
+			counts := generateLenHistogram(regexp, DefaultMaxUnboundedRepeatCount, args)
+
+			if len(counts) != DefaultMaxUnboundedRepeatCount+1 {
+				t.Fatalf("count should be %d", DefaultMaxUnboundedRepeatCount+1)
+			}
+			if counts[DefaultMaxUnboundedRepeatCount] == 0 {
+				t.Fatalf("count should be > 0")
+			}
+		})
+
+		t.Run("HitsCustomMax", func(t *testing.T) {
+			regexp := "a{0,}"
+			args := &GeneratorArgs{
+				RngSource:               rand.NewSource(0),
+				MaxUnboundedRepeatCount: 200,
+			}
+			counts := generateLenHistogram(regexp, 200, args)
+
+			if len(counts) != 201 {
+				t.Fatalf("count should be 201")
+			}
+			if counts[200] == 0 {
+				t.Fatalf("count should be > 0")
+			}
+		})
+	})
+
+	t.Run("HitsMin", func(t *testing.T) {
+		regexp := "a{0,3}"
+		args := &GeneratorArgs{
+			RngSource: rand.NewSource(0),
+		}
+		counts := generateLenHistogram(regexp, 3, args)
+
+		if len(counts) != 4 {
+			t.Fatalf("count should be 4")
+		}
+		if counts[0] == 0 {
+			t.Fatalf("count should be > 0")
+		}
+	})
+
+	t.Run("HitsMax", func(t *testing.T) {
+		regexp := "a{0,3}"
+		args := &GeneratorArgs{
+			RngSource: rand.NewSource(0),
+		}
+		counts := generateLenHistogram(regexp, 3, args)
+
+		if len(counts) != 4 {
+			t.Fatalf("count should be 4")
+		}
+		if counts[3] == 0 {
+			t.Fatalf("count should be > 0")
+		}
+	})
+
+	t.Run("IsWithinBounds", func(t *testing.T) {
+		regexp := "a{5,10}"
+		args := &GeneratorArgs{
+			RngSource: rand.NewSource(0),
+		}
+		counts := generateLenHistogram(regexp, 10, args)
+
+		if len(counts) != 11 {
+			t.Fatalf("count should be 11")
+		}
+
+		for i := 0; i < 11; i++ {
+			if i < 5 {
+				if counts[i] != 0 {
+					t.Fatalf("count should be 0")
+				}
+			} else if i < 11 {
+				if counts[i] == 0 {
+					t.Fatalf("count should be > 0")
+				}
+			}
+		}
+	})
+
+}
+
+func TestGenCharClasses(t *testing.T) {
+	t.Parallel()
+
+	t.Run("Ascii", func(t *testing.T) {
+		testGeneratesStringMatchingItself(t, nil,
+			"[[:alnum:]]",
+			"[[:alpha:]]",
+			"[[:ascii:]]",
+			"[[:blank:]]",
+			"[[:cntrl:]]",
+			"[[:digit:]]",
+			"[[:graph:]]",
+			"[[:lower:]]",
+			"[[:print:]]",
+			"[[:punct:]]",
+			"[[:space:]]",
+			"[[:upper:]]",
+			"[[:word:]]",
+			"[[:xdigit:]]",
+			"[[:^alnum:]]",
+			"[[:^alpha:]]",
+			"[[:^ascii:]]",
+			"[[:^blank:]]",
+			"[[:^cntrl:]]",
+			"[[:^digit:]]",
+			"[[:^graph:]]",
+			"[[:^lower:]]",
+			"[[:^print:]]",
+			"[[:^punct:]]",
+			"[[:^space:]]",
+			"[[:^upper:]]",
+			"[[:^word:]]",
+			"[[:^xdigit:]]",
+		)
+	})
+
+	t.Run("Perl", func(t *testing.T) {
+		args := &GeneratorArgs{
+			Flags: syntax.Perl,
+		}
+
+		testGeneratesStringMatchingItself(t, args,
+			`\d`,
+			`\s`,
+			`\w`,
+			`\D`,
+			`\S`,
+			`\W`,
+		)
+	})
+}
+
+func TestCaptureGroupHandler(t *testing.T) {
+	t.Parallel()
+
+	callCount := 0
+
+	gen, err := NewGenerator(`(?:foo) (bar) (?P<name>baz)`, &GeneratorArgs{
+		Flags: syntax.PerlX,
+		CaptureGroupHandler: func(index int, name string, group *syntax.Regexp, generator Generator, args *GeneratorArgs) ([]byte, error) {
+			callCount++
+
+			if index >= 2 {
+				t.Fatalf("index should be < 2")
+			}
+
+			if index == 0 {
+				if name != "" {
+					t.Fatalf("name should be empty")
+				}
+				if group.String() != "bar" {
+					t.Fatalf("group should be 'bar'")
+				}
+				value, err := generator.Generate()
+				if err != nil {
+					t.Fatalf("err should be nil")
+				}
+				if string(value) != "bar" {
+					t.Fatalf("value should be 'bar'")
+				}
+				return []byte("one"), nil
+			}
+
+			// Index 1
+			if name != "name" {
+				t.Fatalf("name should be 'name'")
+			}
+			if group.String() != "baz" {
+				t.Fatalf("group should be 'baz'")
+			}
+			value, err := generator.Generate()
+			if err != nil {
+				t.Fatalf("err should be nil")
+			}
+			if string(value) != "baz" {
+				t.Fatalf("value should be 'baz'")
+			}
+			return []byte("two"), nil
+		},
+	})
+	if err != nil {
+		t.Fatalf("err should be nil")
+	}
+
+	value, _ := gen.Generate()
+
+	if string(value) != "foo one two" {
+		t.Fatalf("value should be 'foo one two'")
+	}
+	if callCount != 2 {
+		t.Fatalf("callCount should be 2")
+	}
+}
+
+// Byte mode tests
+
+func TestByteModeUniform(t *testing.T) {
+	t.Parallel()
+
+	type test struct {
+		name         string
+		pattern      string
+		length       int   // length of generated bytes
+		uniformRange []int // [min, max] byte values expected to be unfiformly generated
+		flags        syntax.Flags
+	}
+
+	tests := []test{
+		{name: "any byte not NL", pattern: ".", length: 1},
+		{name: "any byte", pattern: ".", length: 1, flags: syntax.MatchNL},
+		{name: "class range", pattern: `[\x00-\xff]`, length: 1},
+		{name: "class multi range", pattern: `[\x00-\x7f\x80-\xff]`, length: 1},
+		{name: "grouping", pattern: `([\x00-\xff])`, length: 1},
+		{name: "empty strings", pattern: `^[\x00-\xff]$`, length: 1},
+		{name: "exactly 1", pattern: `[\x00-\xff]{1}`, length: 1},
+		{name: "exactly 10", pattern: `[\x00-\xff]{10}`, length: 10},
+		{name: "repetition 1", pattern: `[\x00-\xff]{1,1}`, length: 1},
+		{name: "alteration", pattern: `([[:ascii:]]|[\x80-\xff])`, length: 1},
+		{
+			name:         "printable ascii",
+			pattern:      `[[:print:]]`,
+			length:       1,
+			uniformRange: []int{' ', '~'},
+		},
+		{
+			name:         "digits",
+			pattern:      `[0-9]{5}`,
+			length:       5,
+			uniformRange: []int{'0', '9'},
+		},
+		{
+			name:         "digits ascii char class",
+			pattern:      `[[:digit:]]`,
+			length:       1,
+			uniformRange: []int{'0', '9'},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+
+			buckets := make([]int, 256)
+			iters := 200_000
+
+			rng, err := prng.NewPRNG()
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			gen, err := NewGenerator(tt.pattern, &GeneratorArgs{
+				RngSource: rng,
+				Flags:     tt.flags,
+				ByteMode:  true,
+			})
+
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			for i := 0; i < iters; i++ {
+				value, err := gen.Generate()
+				if err != nil {
+					t.Fatal(err)
+				}
+				if len(value) != tt.length {
+					t.Fatalf("expected length %d, got %d", tt.length, len(value))
+				}
+				for _, b := range value {
+					buckets[int(b)]++
+				}
+			}
+
+			uniformRange := []int{0, 255}
+			if tt.uniformRange != nil {
+				if len(tt.uniformRange) != 2 {
+					t.Fatal("expected uniformRange to be a slice of length 2")
+				}
+				uniformRange = tt.uniformRange
+			}
+
+			// Checks if generated bytes are uniformly distributed across
+			// the buckets in the range uniformBuckets[0] to uniformBuckets[1].
+			expectedCount := iters * tt.length / (uniformRange[1] - uniformRange[0] + 1)
+			if !isUniform(buckets[uniformRange[0]:uniformRange[1]+1], expectedCount) {
+				t.Fatalf("expected uniform distribution: %v", buckets[uniformRange[0]:uniformRange[1]+1])
+			}
+		})
+	}
+
+}
+
+func TestByteModeNegatedClasses(t *testing.T) {
+	t.Parallel()
+
+	patterns := []string{
+		"[^0-9]",
+		"\\P",
+		"\\D",
+		"\\S",
+		"\\W",
+		"[^[:ascii:]]",
+		"[[:^ascii:]]",
+	}
+
+	errStr := "negated character classes are not supported"
+
+	for _, pattern := range patterns {
+		gen, err := NewGenerator(pattern, &GeneratorArgs{
+			ByteMode: true,
+		})
+		if gen != nil {
+			t.Fatalf("expected error for %s", pattern)
+		}
+		if err.Error() != errStr {
+			t.Fatalf("expected error %q, got %q", errStr, err.Error())
+		}
+	}
+}
+
+func testGeneratesStringMatchingItself(t *testing.T, args *GeneratorArgs, patterns ...string) {
+	t.Helper()
+	for _, pattern := range patterns {
+		t.Run(fmt.Sprintf("String generated from /%s/ matches itself", pattern), func(t *testing.T) {
+			err := shouldGenerateStringMatching(pattern, pattern, args)
+			if err != nil {
+				t.Fatal(err)
+			}
+		})
+	}
+}
+
+func testGeneratesStringMatching(t *testing.T, args *GeneratorArgs, pattern, expectedPattern string) {
+	t.Helper()
+	t.Run(fmt.Sprintf("String generated from /%s/ matches /%s/", pattern, expectedPattern), func(t *testing.T) {
+		err := shouldGenerateStringMatching(pattern, expectedPattern, args)
+		if err != nil {
+			t.Fatal(err)
+		}
+	})
+}
+
+func shouldGenerateStringMatching(pattern, expectedPattern string, args *GeneratorArgs) error {
+	return shouldGenerateStringMatchingTimes(pattern, expectedPattern, args, SampleSize)
+}
+
+func shouldGenerateStringMatchingTimes(pattern, expectedPattern string, args *GeneratorArgs, times int) error {
+
+	generator, err := NewGenerator(pattern, args)
+	if err != nil {
+		panic(err)
+	}
+
+	for i := 0; i < times; i++ {
+		result, err := generator.Generate()
+		if err != nil {
+			panic(err)
+		}
+		matched, err := regexp.MatchString(expectedPattern, string(result))
+		if err != nil {
+			panic(err)
+		}
+		if !matched {
+			return fmt.Errorf("string “%s” generated from /%s/ did not match /%s/.",
+				result, pattern, expectedPattern)
+		}
+	}
+
+	return nil
+}
+
+func generateLenHistogram(regexp string, maxLen int, args *GeneratorArgs) (counts []int) {
+	generator, err := NewGenerator(regexp, args)
+	if err != nil {
+		panic(err)
+	}
+
+	iterations := max(maxLen*4, SampleSize)
+
+	for i := 0; i < iterations; i++ {
+		value, err := generator.Generate()
+		if err != nil {
+			panic(err)
+		}
+		str := string(value)
+		// Grow the slice if necessary.
+		if len(str) >= len(counts) {
+			newCounts := make([]int, len(str)+1)
+			copy(newCounts, counts)
+			counts = newCounts
+		}
+
+		counts[len(str)]++
+	}
+
+	return
+}
+
+// isUnifrom performs a chi-squared test with 0.025 significance.
+// Each bucket in xs is compared against the expected_value.
+func isUniform(xs []int, expected_value int) bool {
+	critical_squared := float64(25.24) // = 5.024 ^ 2 at 0.025
+	for _, x := range xs {
+		chi_squared := math.Pow(float64(x-expected_value), 2) / float64(expected_value)
+		if chi_squared > critical_squared {
+			return false
+		}
+	}
+	return true
+}
+
+func max(values ...int) int {
+	m := values[0]
+	for _, v := range values {
+		if v > m {
+			m = v
+		}
+	}
+	return m
+}
+
+func shouldPanic(t *testing.T, f func()) {
+	t.Helper()
+	defer func() { _ = recover() }()
+	f()
+	t.Errorf("should have panicked")
+}
+
+func shouldPanicWith(t *testing.T, f func(), expected string) {
+	t.Helper()
+	defer func() {
+		if r := recover(); r != expected {
+			t.Errorf("expected panic %q, got %q", expected, r)
+		}
+	}()
+	f()
+	t.Errorf("should have panicked")
+}
+
+func shouldNotPanic(t *testing.T, f func()) {
+	t.Helper()
+	defer func() {
+		if r := recover(); r != nil {
+			t.Error("should not have panicked")
+		}
+	}()
+	f()
+}

+ 34 - 34
vendor/github.com/zach-klippenstein/goregen/regexp_format.go → psiphon/common/regen/regexp_format.go

@@ -14,6 +14,25 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
+/*
+ * Copyright (c) 2023, Psiphon Inc.
+ * All rights reserved.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
 package regen
 
 import (
@@ -30,20 +49,6 @@ func inspectRegexpToString(r *syntax.Regexp) string {
 	return buffer.String()
 }
 
-// inspectPatternsToString returns a string describing one or more regular expressions.
-func inspectPatternsToString(simplify bool, patterns ...string) string {
-	var buffer bytes.Buffer
-	for _, pattern := range patterns {
-		inspectPatternsToWriter(simplify, &buffer, pattern)
-	}
-	return buffer.String()
-}
-func inspectPatternsToWriter(simplify bool, w io.Writer, patterns ...string) {
-	for _, pattern := range patterns {
-		inspectRegexpToWriter(w, parseOrPanic(simplify, pattern))
-	}
-}
-
 func inspectRegexpToWriter(w io.Writer, r ...*syntax.Regexp) {
 	for _, regexp := range r {
 		inspectWithIndent(regexp, "", w)
@@ -63,37 +68,32 @@ func inspectWithIndent(r *syntax.Regexp, indent string, w io.Writer) {
 	} else {
 		fmt.Fprintf(w, "%s  Sub: []\n", indent)
 	}
-	fmt.Fprintf(w, "%s  Rune: %s (%s)\n", indent, runesToString(r.Rune...), runesToDecimalString(r.Rune))
+	fmt.Fprintf(w, "%s  Rune: %s (%s)\n", indent, runesToUTF8(r.Rune...), runesToDecimalString(r.Rune))
 	fmt.Fprintf(w, "%s  [Min, Max]: [%d, %d]\n", indent, r.Min, r.Max)
 	fmt.Fprintf(w, "%s  Cap: %d\n", indent, r.Cap)
 	fmt.Fprintf(w, "%s  Name: %s\n", indent, r.Name)
 }
 
-// ParseOrPanic parses a regular expression into an AST.
-// Panics on error.
-func parseOrPanic(simplify bool, pattern string) *syntax.Regexp {
-	regexp, err := syntax.Parse(pattern, 0)
-	if err != nil {
-		panic(err)
-	}
-	if simplify {
-		regexp = regexp.Simplify()
+// runesToUTF8 converts a slice of runes to the Unicode string they represent.
+func runesToUTF8(runes ...rune) []byte {
+	var buffer bytes.Buffer
+	for _, r := range runes {
+		buffer.WriteRune(r)
 	}
-	return regexp
+	return buffer.Bytes()
 }
 
-// runesToString converts a slice of runes to the string they represent.
-func runesToString(runes ...rune) string {
-	defer func() {
-		if err := recover(); err != nil {
-			panic(fmt.Errorf("RunesToString panicked"))
-		}
-	}()
+// runesToBytes converst a slice of runes to a slice of bytes.
+// Returns an error if runes not in the range [0-255].
+func runesToBytes(runes ...rune) ([]byte, error) {
 	var buffer bytes.Buffer
 	for _, r := range runes {
-		buffer.WriteRune(r)
+		if r < 0 || r > 255 {
+			return nil, fmt.Errorf("RunesToBytes: rune out of range")
+		}
+		buffer.WriteByte(byte(r))
 	}
-	return buffer.String()
+	return buffer.Bytes(), nil
 }
 
 // RunesToDecimalString converts a slice of runes to their comma-separated decimal values.

+ 0 - 0
vendor/github.com/zach-klippenstein/goregen/rng.go → psiphon/common/regen/rng.go


+ 47 - 0
psiphon/common/regen/rng_test.go

@@ -0,0 +1,47 @@
+/*
+Copyright 2014 Zachary Klippenstein
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package regen
+
+import "testing"
+
+func TestXorShift64(t *testing.T) {
+	t.Run("Int63 should never return negative numbers", func(t *testing.T) {
+		source := xorShift64Source(1)
+		for i := 0; i < SampleSize; i++ {
+			val := source.Int63()
+
+			if val < 0 {
+				t.Fatal("Int63 returned a negative number")
+			}
+		}
+	})
+
+	t.Run("Should not only return zeros", func(t *testing.T) {
+		source := xorShift64Source(0)
+		nonZeroCount := 0
+
+		for i := 0; i < SampleSize; i++ {
+			if source.Int63() != 0 {
+				nonZeroCount++
+			}
+		}
+
+		if nonZeroCount <= 0 {
+			t.Fatal("Int63 returned non-positive numbers")
+		}
+	})
+}

+ 693 - 0
psiphon/common/transforms/httpNormalizer.go

@@ -0,0 +1,693 @@
+/*
+ * Copyright (c) 2023, Psiphon Inc.
+ * All rights reserved.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+package transforms
+
+import (
+	"bytes"
+	stderrors "errors"
+	"io"
+	"net"
+	"net/textproto"
+	"strconv"
+	"sync/atomic"
+	"time"
+
+	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
+	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
+)
+
+const (
+	// httpNormalizerReadReqLineAndHeader HTTPNormalizer is waiting to finish
+	// reading the Request-Line, and headers, of the next request from the
+	// underlying net.Conn.
+	httpNormalizerReadReqLineAndHeader = 0
+	// httpNormalizerReadBody HTTPNormalizer is waiting to finish reading the
+	// current request body from the underlying net.Conn.
+	httpNormalizerReadBody = 1
+
+	// httpNormalizerRequestLine is a valid Request-Line used by the normalizer.
+	httpNormalizerRequestLine = "POST / HTTP/1.1"
+	hostHeader                = "Host"
+	contentLengthHeader       = "Content-Length"
+	cookieHeader              = "Cookie"
+	rangeHeader               = "Range"
+)
+
+var ErrPassthroughActive = stderrors.New("passthrough")
+
+// HTTPNormalizer wraps a net.Conn, intercepting Read calls, and normalizes any
+// HTTP requests that are read. The HTTP request components preceeding the body
+// are normalized; i.e. the Request-Line and headers.
+//
+// Each HTTP request read from the underlying net.Conn is normalized and then
+// returned over subsequent Read calls.
+//
+// HTTPNormalizer is not safe for concurrent use.
+type HTTPNormalizer struct {
+	// state is the HTTPNormalizer state. Possible values are
+	// httpNormalizerReadReqLineAndHeader and httpNormalizerReadBody.
+	state int64
+	// b is used to buffer the accumulated bytes of the current request
+	// until the Request-Line, and headers, are read from the underlying
+	// net.Conn, normalized, and returned in one, or more, Read calls. May
+	// contain bytes of the current request body and subsequent requests until
+	// they are processed.
+	b bytes.Buffer
+	// maxReqLineAndHeadersSize is the maximum number of bytes the normalizer
+	// will read before establishing a passthrough session, or rejecting the
+	// connection, if the request body of the current request has not been
+	// reached.
+	// No limit is applied if the value is 0.
+	maxReqLineAndHeadersSize int
+	// scanIndex is the index that the bytes in b have been processed up to.
+	// Bytes before this index in b will not contain the RequestLine, or
+	// headers, of the current request after a Read call. Applies when state is
+	// httpNormalizerReadReqLineAndHeader.
+	scanIndex int
+	// readRequestLine is set to true when the Request-Line of the current
+	// request has been read. Applies when state is httpNormalizerReadReqLineAndHeader.
+	readRequestLine bool
+	// reqLineAndHeadersBuf is the buffer used to stage the next normalized
+	// Request-Line, and headers, before outputting them in Read.
+	reqLineAndHeadersBuf bytes.Buffer
+	// headers is the staging area for preserved headers and is reset after the
+	// Request-Line, and headers, of the current request are processed.
+	headers map[string][]byte
+	// contentLength of the current request. Reset after the Request-Line, and
+	// headers, of the current request are processed
+	contentLength *uint64
+	// preserveHeaders are the headers to preserve during request normalization.
+	preserveHeaders []string
+	// prohibitedHeaders is a list of HTTP headers to check for in the
+	// request. If one of these headers is found, then a passthrough is
+	// performed. This is used to defend against abuse.
+	// Limitation: prohibited headers are only logged when passthrough is
+	// configured and passthroughLogPassthrough is set.
+	prohibitedHeaders []string
+	// headerWriteOrder is the order in which headers are written if set. Used
+	// for testing.
+	headerWriteOrder []string
+	// readRemain is the number of remaining request body bytes of the current
+	// request to read from the underlying net.Conn.
+	readRemain uint64
+	// copyRemain is the number of remaining bytes of the current request to
+	// return over one, or more, Read calls.
+	copyRemain uint64
+	// validateMeekCookie is called with the cookie header value of the current
+	// request when it is received and a passthrough session is established if
+	// false is returned.
+	// Note: if there are multiple cookie headers, even though prohibited by
+	// rfc6265, then validateMeekCookie will only be invoked once with the
+	// first one received.
+	validateMeekCookie func(rawCookies []byte) ([]byte, error)
+	// ValidateMeekCookieResult stores the result from calling
+	// validateMeekCookie.
+	ValidateMeekCookieResult []byte
+	// passthrough is set to 1 if the normalizer has established a passthrough
+	// session; otherwise 0.
+	// Note: may be accessed concurrently so must be get and set atomically.
+	// E.g. the net.Conn interface methods implemented by HTTPNormalizer may be
+	// called concurrent to each other.
+	passthrough int32
+	// passthroughDialer is used to establish any passthrough sessions.
+	passthroughDialer func(network, address string) (net.Conn, error)
+	// passthroughAddress is the passthrough address that will be used for any
+	// passthrough sessions.
+	passthroughAddress string
+	// passthroughLogPassthrough is called when a passthrough session is
+	// initiated.
+	passthroughLogPassthrough func(clientIP string, tunnelError error, logFields map[string]interface{})
+
+	net.Conn
+}
+
+func NewHTTPNormalizer(conn net.Conn) *HTTPNormalizer {
+	t := HTTPNormalizer{
+		Conn: conn,
+	}
+
+	// TODO/perf: could pre-alloc n.b, and n.reqLineAndHeadersBuf,
+	// with (*bytes.Buffer).Grow().
+
+	t.reqLineAndHeadersBuf.WriteString(httpNormalizerRequestLine)
+
+	t.preserveHeaders = []string{
+		hostHeader,
+		contentLengthHeader,
+		cookieHeader,
+		rangeHeader,
+	}
+
+	return &t
+}
+
+// Read implements the net.Conn interface.
+//
+// Note: it is assumed that the underlying transport, net.Conn, is a reliable
+// stream transport, i.e. TCP, therefore it is required that the caller stop
+// calling Read() on an instance of HTTPNormalizer after an error is returned
+// because, following this assumption, the connection will have failed when a
+// Read() call to the underlying net.Conn fails; a new connection must be
+// established, net.Conn, and wrapped with a new HTTPNormalizer.
+//
+// Warning: Does not handle chunked encoding. Must be called synchronously.
+func (t *HTTPNormalizer) Read(buffer []byte) (int, error) {
+
+	if atomic.LoadInt32(&t.passthrough) == 1 {
+		return 0, io.EOF
+	}
+
+	// TODO/perf: allocate on-demand
+	if t.headers == nil {
+		t.headers = make(map[string][]byte)
+	}
+
+	if t.state == httpNormalizerReadReqLineAndHeader {
+
+		// perf: read into caller's buffer instead of allocating a new one.
+		// perf: theoretically it could be more performant to read directly
+		// into t.b, but there is no mechanism to do so with bytes.Buffer.
+		n, err := t.Conn.Read(buffer)
+
+		if n > 0 {
+			// Do not need to check return value. Applies to all subsequent
+			// calls to t.b.Write() and this comment will not be repeated for
+			// each. See https://github.com/golang/go/blob/1e9ff255a130200fcc4ec5e911d28181fce947d5/src/bytes/buffer.go#L164.
+			t.b.Write(buffer[:n])
+		}
+
+		crlf := []byte("\r\n")
+		doublecrlf := []byte("\r\n\r\n")
+
+		// Check if the maximum number of bytes to read before the request body
+		// has been exceeded first.
+		// Note: could check if max header size will be exceeded before Read
+		// call or ensure the buffer passed into Read is no larger than
+		// t.maxReqLineAndHeadersSize-t.b.Len().
+		if t.maxReqLineAndHeadersSize > 0 && t.b.Len() > t.maxReqLineAndHeadersSize && !bytes.Contains(t.b.Bytes()[:t.maxReqLineAndHeadersSize], doublecrlf) {
+
+			if t.passthroughConfigured() {
+
+				t.startPassthrough(errors.TraceNew("maxReqLineAndHeadersSize exceeded before request body received"), nil)
+
+				return 0, nil
+			}
+
+			return 0, errors.Tracef("%d exceeds maxReqLineAndHeadersSize %d", t.b.Len(), t.maxReqLineAndHeadersSize)
+		}
+
+		if err != nil {
+			// Do not wrap any I/O err returned by Conn
+			return 0, err
+		}
+
+		// preserve headers
+		//
+		// TODO/perf: instead of storing headers in a map they could be
+		// processed and written as they are parsed, but benchmarking this
+		// change shows no measurable change in performance.
+		//
+		// TODO/perf: skip Request-Line, e.g. "GET /foo HTTP/1.1"
+
+		reachedBody := false
+
+		for {
+
+			// NOTE: could add guard here for t.scanIndex < t.b.Len(),
+			// but should never happen.
+			i := bytes.Index(t.b.Bytes()[t.scanIndex:], crlf)
+
+			var header []byte
+			if i == -1 {
+				break // no more CRLF separated headers in t.b
+			} else {
+				header = t.b.Bytes()[t.scanIndex : t.scanIndex+i]
+			}
+
+			if len(header) == 0 && t.readRequestLine {
+				// Zero-length header line means the end of the request headers
+				// has been reached.
+				reachedBody = true
+				break
+			}
+
+			if !t.readRequestLine {
+				t.readRequestLine = true
+			}
+
+			if len(t.headers) >= len(t.preserveHeaders) {
+				t.scanIndex += i + len(crlf)
+				continue // found all headers, continue until final CRLF
+			}
+
+			colon := bytes.Index(header, []byte(":"))
+			if colon == -1 {
+				t.scanIndex += i + len(crlf)
+				continue // not a header, skip
+			}
+
+			// Allow for space before header and trim whitespace around
+			// value.
+			k := textproto.TrimBytes(header[:colon])
+			v := textproto.TrimBytes(header[colon+1:]) // skip over ":"
+
+			err = nil
+			var logFields map[string]interface{}
+
+			if t.validateMeekCookie != nil && t.ValidateMeekCookieResult == nil && bytes.Equal(k, []byte(cookieHeader)) {
+				t.ValidateMeekCookieResult, err = t.validateMeekCookie(v)
+				if err != nil {
+					err = errors.TraceMsg(err, "invalid meek cookie")
+				}
+			}
+
+			if err == nil {
+				if bytes.Equal(k, []byte(contentLengthHeader)) {
+					var cl uint64
+					cl, err = strconv.ParseUint(string(v), 10, 63)
+					if err != nil {
+						err = errors.TraceMsg(err, "invalid Content-Length")
+					} else {
+						t.contentLength = &cl
+					}
+				}
+			}
+
+			if err == nil {
+				// Do passthrough if a prohibited header is found
+				for _, h := range t.prohibitedHeaders {
+
+					// TODO/perf: consider using map, but array may be faster
+					// and use less mem.
+					if bytes.Equal(k, []byte(h)) {
+
+						err = errors.TraceNew("prohibited header")
+						logFields = map[string]interface{}{
+							"header": h,
+							"value":  v,
+						}
+
+						break
+					}
+				}
+			}
+
+			if err != nil {
+				if t.passthroughConfigured() {
+					t.startPassthrough(err, logFields)
+					return 0, nil
+				} else {
+					return 0, errors.Trace(err)
+				}
+			}
+
+			for _, h := range t.preserveHeaders {
+				// TODO/perf: consider using map, but array may be faster and
+				// use less mem.
+				if bytes.Equal(k, []byte(h)) {
+					// TODO: if there are multiple preserved headers with the
+					// same key, then the last header parsed will be the
+					// preserved value. Consider if this is the desired
+					// functionality.
+					t.headers[h] = v
+					break
+				}
+			}
+
+			t.scanIndex += i + len(crlf)
+		}
+
+		if !reachedBody {
+			return 0, nil
+		} // else: Request-Line and all headers have been read.
+
+		bodyOffset := t.scanIndex + len(crlf)
+
+		// reset for next request
+		defer func() {
+			t.scanIndex = 0
+			t.readRequestLine = false
+			t.headers = nil
+			t.contentLength = nil
+		}()
+
+		err = nil
+
+		if t.contentLength == nil {
+			// Irrecoverable error because either Content-Length header
+			// is missing, or Content-Length header value is empty, e.g.
+			// "Content-Length: ", and request body length cannot be
+			// determined.
+			err = errors.TraceNew("Content-Length missing")
+		}
+
+		if err == nil {
+			if t.validateMeekCookie != nil {
+				// NOTE: could check t.ValidateMeekCookieResult == nil instead
+				// if it is guaranteed to return a non-nil result if no error is
+				// returned.
+				if _, ok := t.headers[cookieHeader]; !ok {
+					err = errors.TraceNew("cookie missing")
+				}
+			}
+		}
+
+		if err != nil {
+			if t.passthroughConfigured() {
+				t.startPassthrough(err, nil)
+				return 0, nil
+			} else {
+				return 0, errors.Trace(err)
+			}
+		}
+
+		// No passthrough will be performed. Discard buffered bytes because
+		// they are no longer needed to perform a passthrough.
+		t.b.Next(bodyOffset)
+
+		// TODO: technically at this point we could start copying bytes into the
+		// caller's buffer which would remove the need to copy len(buffer) bytes
+		// twice; first into the internal buffer and second into the caller's
+		// buffer.
+		t.reqLineAndHeadersBuf.Truncate(len(httpNormalizerRequestLine))
+
+		if _, ok := t.headers[hostHeader]; !ok {
+			// net/http expects the host header
+			t.reqLineAndHeadersBuf.WriteString("\r\nHost: example.com")
+		}
+
+		// Write headers
+
+		if t.headerWriteOrder != nil {
+			// Re-add headers in specified order (for testing)
+			for _, k := range t.headerWriteOrder {
+				if v, ok := t.headers[k]; ok {
+					t.reqLineAndHeadersBuf.WriteString("\r\n" + k + ": ")
+					t.reqLineAndHeadersBuf.Write(v)
+				}
+			}
+		} else {
+			for k, v := range t.headers {
+				t.reqLineAndHeadersBuf.WriteString("\r\n" + k + ": ")
+				t.reqLineAndHeadersBuf.Write(v)
+			}
+		}
+		t.reqLineAndHeadersBuf.Write(doublecrlf)
+
+		// TODO/perf: could eliminate copy of header by copying it direct into
+		// the caller's buffer instead of copying the bytes over to t.b first.
+		header := t.reqLineAndHeadersBuf.Bytes()
+
+		// Copy any request body bytes received before resetting the
+		// buffer.
+		var reqBody []byte
+		reqBodyLen := t.b.Len() // number of request body bytes received
+		if reqBodyLen > 0 {
+			reqBody = make([]byte, reqBodyLen)
+			copy(reqBody, t.b.Bytes())
+		}
+
+		t.b.Reset()
+		t.b.Write(header)
+		if len(reqBody) > 0 {
+			t.b.Write(reqBody)
+		}
+
+		// Calculate number of bytes remaining to:
+		// - read from the underlying net.Conn
+		// - return to the caller
+
+		t.state = httpNormalizerReadBody
+
+		totalReqBytes := len(header) + int(*t.contentLength)
+		t.copyRemain = uint64(totalReqBytes)
+
+		bytesOfBodyRead := t.b.Len() - len(header)
+
+		if bytesOfBodyRead > totalReqBytes-len(header) {
+			t.readRemain = 0
+		} else {
+			t.readRemain = *t.contentLength - uint64(bytesOfBodyRead)
+		}
+
+		return t.copy(buffer), nil
+	}
+
+	// Request-Line, and headers, have been normalized. Return any remaining
+	// bytes of these and then read, and return, the bytes of the request body
+	// from the underlying net.Conn.
+
+	var n int
+	var err error
+
+	// Read more bytes from the underlying net.Conn once all the remaining
+	// bytes in t.b have been copied into the caller's buffer in previous Read
+	// calls.
+	if t.b.Len() == 0 {
+
+		// perf: read bytes directly into the caller's buffer.
+
+		bufferLen := len(buffer)
+		if uint64(bufferLen) > t.readRemain {
+			bufferLen = int(t.readRemain)
+		}
+
+		// TODO: could attempt to read more bytes and only copy bufferLen bytes
+		// into buffer but this adds an extra copy.
+		n, err = t.Conn.Read(buffer[:bufferLen])
+
+		if uint64(n) >= t.readRemain {
+			t.readRemain = 0
+			// Do not reset t.b because it may contain bytes of subsequent
+			// requests.
+			t.state = httpNormalizerReadReqLineAndHeader
+		} else {
+			t.readRemain -= uint64(n)
+		}
+
+		// Do not wrap any I/O err returned by Conn
+		return n, err
+	}
+
+	// Copy remaining bytes in t.b into the caller's buffer.
+	return t.copy(buffer), nil
+}
+
+func (t *HTTPNormalizer) copy(buffer []byte) int {
+	// Do not return any bytes from subsequent requests which have been
+	// buffered internally because they need to be normalized first.
+	bytesToCopy := t.copyRemain
+	if uint64(t.b.Len()) < t.copyRemain {
+		bytesToCopy = uint64(t.b.Len())
+	}
+
+	// Copy bytes to caller's buffer
+	n := copy(buffer, t.b.Bytes()[:bytesToCopy])
+
+	// Remove returned bytes from internal buffer and update number of bytes
+	// remaining to return to the caller.
+	t.b.Next(n) // perf: advance read cursor instead of copying bytes to front of buffer
+	t.copyRemain -= uint64(n)
+
+	if t.copyRemain == 0 && t.readRemain == 0 {
+
+		// Shift buffer back to 0 copying any remaining bytes to the start of
+		// the buffer.
+		// TODO/perf: technically bytes.Buffer takes a similar, and more
+		// efficient, approach internally so this should not be necessary.
+		nextBytes := t.b.Bytes()
+		t.b.Reset()
+		if len(nextBytes) > 0 {
+			t.b.Write(nextBytes)
+		}
+
+		// All bytes of the current request have been read and returned to the
+		// caller. Start normalizing the header of the next request.
+		// NOTE: if t.b contains CRLF separated lines, of the next request and
+		// there is remaining space in the buffer supplied by the caller, then
+		// technically we could start processing the next request instead of
+		// returning here.
+
+		// Do not reset t.b because it may contain bytes of subsequent requests.
+		t.state = httpNormalizerReadReqLineAndHeader
+	}
+
+	return n
+}
+
+func (t *HTTPNormalizer) passthroughConfigured() bool {
+	return t.passthroughDialer != nil && t.passthroughAddress != ""
+}
+
+func (t *HTTPNormalizer) startPassthrough(tunnelError error, logFields map[string]interface{}) {
+
+	if t.passthroughLogPassthrough != nil {
+
+		clientAddr := t.Conn.RemoteAddr().String()
+		clientIP, _, _ := net.SplitHostPort(clientAddr)
+
+		t.passthroughLogPassthrough(clientIP, errors.TraceMsg(tunnelError, "passthrough"), logFields)
+	}
+
+	go passthrough(t.Conn, t.passthroughAddress, t.passthroughDialer, t.b.Bytes())
+
+	atomic.StoreInt32(&t.passthrough, 1)
+}
+
+func passthrough(conn net.Conn, address string, dialer func(network, address string) (net.Conn, error), buf []byte) {
+
+	// Perform the passthrough relay.
+	//
+	// Limitations:
+	//
+	// - The local TCP stack may differ from passthrough target in a
+	//   detectable way.
+	//
+	// - There may be detectable timing characteristics due to the network hop
+	//   to the passthrough target.
+	//
+	// - Application-level socket operations may produce detectable
+	//   differences (e.g., CloseWrite/FIN).
+	//
+	// - The dial to the passthrough, or other upstream network operations,
+	//   may fail. These errors are not logged.
+	//
+	// - There's no timeout on the passthrough dial and no time limit on the
+	//   passthrough relay so that the invalid client can't detect a timeout
+	//   shorter than the passthrough target; this may cause additional load.
+
+	defer conn.Close()
+
+	passthroughConn, err := dialer("tcp", address)
+	if err != nil {
+		return
+	}
+	_, err = passthroughConn.Write(buf)
+	if err != nil {
+		return
+	}
+
+	go func() {
+		_, _ = io.Copy(passthroughConn, conn)
+		passthroughConn.Close()
+	}()
+
+	_, _ = io.Copy(conn, passthroughConn)
+}
+
+func (t *HTTPNormalizer) Write(b []byte) (n int, err error) {
+	if atomic.LoadInt32(&t.passthrough) == 1 {
+		return 0, ErrPassthroughActive
+	}
+	return t.Conn.Write(b)
+}
+
+func (t *HTTPNormalizer) Close() error {
+	if atomic.LoadInt32(&t.passthrough) == 1 {
+		return nil
+	}
+	return t.Conn.Close()
+}
+
+func (t *HTTPNormalizer) SetDeadline(tt time.Time) error {
+	if atomic.LoadInt32(&t.passthrough) == 1 {
+		return nil
+	}
+	return t.Conn.SetDeadline(tt)
+}
+
+func (t *HTTPNormalizer) SetReadDeadline(tt time.Time) error {
+	if atomic.LoadInt32(&t.passthrough) == 1 {
+		return nil
+	}
+	return t.Conn.SetReadDeadline(tt)
+}
+
+func (t *HTTPNormalizer) SetWriteDeadline(tt time.Time) error {
+	if atomic.LoadInt32(&t.passthrough) == 1 {
+		return nil
+	}
+	return t.Conn.SetWriteDeadline(tt)
+}
+
+func (t *HTTPNormalizer) GetMetrics() common.LogFields {
+	// Relay any metrics from the underlying conn.
+	m, ok := t.Conn.(common.MetricsSource)
+	if ok {
+		return m.GetMetrics()
+	}
+	return nil
+}
+
+// Note: all config fields must be set before calling Accept.
+type HTTPNormalizerListener struct {
+	HeaderWriteOrder          []string
+	MaxReqLineAndHeadersSize  int
+	ProhibitedHeaders         []string
+	PassthroughAddress        string
+	PassthroughDialer         func(network, address string) (net.Conn, error)
+	PassthroughLogPassthrough func(clientIP string, tunnelError error, logFields map[string]interface{})
+	ValidateMeekCookie        func(clientIP string, rawCookies []byte) ([]byte, error)
+
+	net.Listener
+}
+
+func (t *HTTPNormalizerListener) Accept() (net.Conn, error) {
+	conn, err := t.Listener.Accept()
+	if err != nil {
+		// Do not wrap any err returned by Listener
+		return nil, err
+	}
+
+	normalizer := NewHTTPNormalizer(conn)
+
+	normalizer.headerWriteOrder = t.HeaderWriteOrder // for testing
+	normalizer.maxReqLineAndHeadersSize = t.MaxReqLineAndHeadersSize
+	normalizer.prohibitedHeaders = t.ProhibitedHeaders
+	normalizer.passthroughAddress = t.PassthroughAddress
+	normalizer.passthroughDialer = t.PassthroughDialer
+	normalizer.passthroughLogPassthrough = t.PassthroughLogPassthrough
+
+	if t.ValidateMeekCookie != nil {
+
+		clientIP, _, err := net.SplitHostPort(conn.RemoteAddr().String())
+		if err != nil {
+			return nil, errors.Trace(err)
+		}
+
+		normalizer.validateMeekCookie = func(cookie []byte) ([]byte, error) {
+
+			b, err := t.ValidateMeekCookie(clientIP, cookie)
+			if err != nil {
+				return nil, errors.Trace(err)
+			}
+
+			return b, nil
+		}
+	}
+
+	return normalizer, nil
+}
+
+func WrapListenerWithHTTPNormalizer(listener net.Listener) *HTTPNormalizerListener {
+	return &HTTPNormalizerListener{
+		Listener: listener,
+	}
+}

+ 594 - 0
psiphon/common/transforms/httpNormalizer_test.go

@@ -0,0 +1,594 @@
+/*
+ * Copyright (c) 2023, Psiphon Inc.
+ * All rights reserved.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+package transforms
+
+import (
+	"bytes"
+	stderrors "errors"
+	"io"
+	"net"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
+)
+
+type httpNormalizerTest struct {
+	name               string
+	input              string
+	maxHeaderSize      int
+	prohibitedHeaders  []string
+	headerOrder        []string
+	chunkSize          int
+	connReadErrs       []error
+	validateMeekCookie func([]byte) ([]byte, error)
+	wantOutput         string
+	wantError          error
+}
+
+func runHTTPNormalizerTest(tt *httpNormalizerTest, useNormalizer bool) error {
+
+	conn := testConn{
+		readErrs:   tt.connReadErrs,
+		readBuffer: []byte(tt.input),
+	}
+
+	passthroughMessage := "passthrough"
+
+	passthroughConn := testConn{
+		readBuffer: []byte(passthroughMessage),
+	}
+
+	var normalizer net.Conn
+	if useNormalizer {
+		n := NewHTTPNormalizer(&conn)
+		n.maxReqLineAndHeadersSize = tt.maxHeaderSize
+		n.headerWriteOrder = tt.headerOrder
+		n.prohibitedHeaders = tt.prohibitedHeaders
+		n.validateMeekCookie = tt.validateMeekCookie
+
+		if n.validateMeekCookie != nil {
+
+			n.passthroughAddress = "127.0.0.1:0"
+			n.passthroughDialer = func(network, address string) (net.Conn, error) {
+
+				if network != "tcp" {
+					return nil, errors.Tracef("expected network tcp but got \"%s\"", network)
+				}
+
+				if address != n.passthroughAddress {
+					return nil, errors.Tracef("expected address \"%s\" but got \"%s\"", n.passthroughAddress, address)
+				}
+
+				return &passthroughConn, nil // return underlying conn
+			}
+		}
+		normalizer = n
+	} else {
+		normalizer = &conn
+	}
+	defer normalizer.Close()
+
+	remain := len(tt.wantOutput)
+	var acc []byte
+	var err error
+
+	// Write input bytes to normalizer in chunks and then check
+	// output.
+	for {
+		if remain <= 0 {
+			break
+		}
+
+		b := make([]byte, tt.chunkSize)
+
+		expectedErr := len(conn.readErrs) > 0
+
+		var n int
+		n, err = normalizer.Read(b)
+		if err != nil && !expectedErr {
+			// err checked outside loop
+			break
+		}
+
+		if n > 0 {
+			remain -= n
+			acc = append(acc, b[:n]...)
+		}
+	}
+
+	// Calling Read on an instance of HTTPNormalizer will return io.EOF once a
+	// passthrough has been activated.
+	if tt.validateMeekCookie != nil && err == io.EOF {
+
+		// wait for passthrough to complete
+
+		timeout := time.After(time.Second)
+
+		for len(passthroughConn.ReadBuffer()) != 0 || len(conn.ReadBuffer()) != 0 {
+
+			select {
+			case <-timeout:
+				return errors.TraceNew("timed out waiting for passthrough to complete")
+			case <-time.After(10 * time.Millisecond):
+			}
+		}
+
+		// Subsequent reads should return EOF
+
+		b := make([]byte, 1)
+		_, err := normalizer.Read(b)
+		if err != io.EOF {
+			return errors.TraceNew("expected EOF")
+		}
+
+		// Subsequent writes should not impact conn or passthroughConn
+
+		_, err = normalizer.Write([]byte("ignored"))
+		if !stderrors.Is(err, ErrPassthroughActive) {
+			return errors.Tracef("expected error io.EOF but got %v", err)
+		}
+
+		if string(acc) != "" {
+			return errors.TraceNew("expected to read no bytes")
+		}
+
+		if string(passthroughConn.ReadBuffer()) != "" {
+			return errors.TraceNew("expected read buffer to be emptied")
+		}
+
+		if string(passthroughConn.WriteBuffer()) != tt.wantOutput {
+			return errors.Tracef("expected \"%s\" of len %d but got \"%s\" of len %d", escapeNewlines(tt.wantOutput), len(tt.wantOutput), escapeNewlines(string(passthroughConn.WriteBuffer())), len(passthroughConn.WriteBuffer()))
+		}
+
+		if string(conn.ReadBuffer()) != "" {
+			return errors.TraceNew("expected read buffer to be emptied")
+		}
+
+		if string(conn.WriteBuffer()) != passthroughMessage {
+			return errors.Tracef("expected \"%s\" of len %d but got \"%s\" of len %d", escapeNewlines(passthroughMessage), len(passthroughMessage), escapeNewlines(string(conn.WriteBuffer())), len(conn.WriteBuffer()))
+		}
+	}
+
+	if tt.wantError == nil {
+		if err != nil {
+			return errors.TraceMsg(err, "unexpected error")
+		}
+	} else {
+		// tt.wantError != nil
+		if err == nil {
+			return errors.Tracef("expected error %v", tt.wantError)
+		} else if !strings.Contains(err.Error(), tt.wantError.Error()) {
+			return errors.Tracef("expected error %v got %v", tt.wantError, err)
+		}
+	}
+	if tt.wantError == nil && string(acc) != tt.wantOutput {
+		return errors.Tracef("expected \"%s\" of len %d but got \"%s\" of len %d", escapeNewlines(tt.wantOutput), len(tt.wantOutput), escapeNewlines(string(acc)), len(acc))
+	}
+
+	return nil
+}
+
+func TestHTTPNormalizerHTTPRequest(t *testing.T) {
+
+	tests := []httpNormalizerTest{
+		{
+			name:       "no cookie in chunks",
+			input:      "POST / HTTP/1.1\r\nContent-Length: 4\r\n\r\nabcd",
+			wantOutput: "POST / HTTP/1.1\r\nHost: example.com\r\nContent-Length: 4\r\n\r\nabcd",
+			chunkSize:  1,
+		},
+		{
+			name:        "no cookie in single read",
+			input:       "POST / HTTP/1.1\r\nHost: example.com\r\nContent-Length: 4\r\n\r\nabcd",
+			headerOrder: []string{"Host", "Content-Length"},
+			wantOutput:  "POST / HTTP/1.1\r\nHost: example.com\r\nContent-Length: 4\r\n\r\nabcd",
+			chunkSize:   999,
+		},
+		{
+			name:        "no cookie, first read lands in body",
+			input:       "POST / HTTP/1.1\r\nContent-Length: 4\r\n\r\nabcd",
+			headerOrder: []string{"Host", "Content-Length"},
+			wantOutput:  "POST / HTTP/1.1\r\nHost: example.com\r\nContent-Length: 4\r\n\r\nabcd",
+			chunkSize:   40, // first read goes up to and including "b"
+		},
+		{
+			name:        "no cookie with spaces",
+			input:       "POST / HTTP/1.1\r\n      Content-Length:   4  \r\n\r\nabcd",
+			headerOrder: []string{"Host", "Content-Length"},
+			wantOutput:  "POST / HTTP/1.1\r\nHost: example.com\r\nContent-Length: 4\r\n\r\nabcd",
+			chunkSize:   1,
+		},
+		{
+			name:        "cookie and range",
+			input:       "POST / HTTP/1.1\r\nContent-Length: 4\r\n    Cookie: X\r\nRange: 1234 \r\n\r\nabcd",
+			headerOrder: []string{"Host", "Content-Length", "Cookie", "Range"},
+			wantOutput:  "POST / HTTP/1.1\r\nHost: example.com\r\nContent-Length: 4\r\nCookie: X\r\nRange: 1234\r\n\r\nabcd",
+			chunkSize:   1,
+		},
+		{
+			name:         "partial write and errors",
+			input:        "POST / HTTP/1.1\r\nContent-Length: 4\r\n\r\nabcd",
+			headerOrder:  []string{"Host", "Content-Length"},
+			wantOutput:   "POST / HTTP/1.1\r\nHost: example.com\r\nContent-Length: 4\r\n\r\nabcd",
+			chunkSize:    1,
+			connReadErrs: []error{stderrors.New("err1"), stderrors.New("err2")},
+		},
+		{
+			name:       "Content-Length missing",
+			input:      "POST / HTTP/1.1\r\n\r\nabcd",
+			wantOutput: "POST / HTTP/1.1\r\n\r\nabcd", // set to ensure all bytes are read
+			wantError:  stderrors.New("Content-Length missing"),
+			chunkSize:  1,
+		},
+		{
+			name:       "invalid Content-Length header value",
+			input:      "POST / HTTP/1.1\r\nContent-Length: X\r\n\r\nabcd",
+			wantOutput: "POST / HTTP/1.1\r\nContent-Length: X\r\nHost: example.com\r\n\r\nabcd", // set to ensure all bytes are read
+			wantError:  stderrors.New("strconv.ParseUint: parsing \"X\": invalid syntax"),
+			chunkSize:  1,
+		},
+		{
+			name:       "incorrect Content-Length header value",
+			input:      "POST / HTTP/1.1\r\nContent-Length: 3\r\n\r\nabcd",
+			wantOutput: "POST / HTTP/1.1\r\nHost: example.com\r\nContent-Length: 3\r\n\r\nabc",
+			chunkSize:  1,
+		},
+		{
+			name:        "single HTTP request written in a single write",
+			input:       "POST / HTTP/1.1\r\nRemoved: removed\r\nContent-Length: 4\r\n\r\nabcd",
+			headerOrder: []string{"Host", "Content-Length"},
+			wantOutput:  "POST / HTTP/1.1\r\nHost: example.com\r\nContent-Length: 4\r\n\r\nabcd",
+			chunkSize:   999,
+		},
+		{
+			name:        "multiple HTTP requests written in a single write",
+			input:       "POST / HTTP/1.1\r\nRemoved: removed\r\nContent-Length: 4\r\n\r\nabcdPOST / HTTP/1.1\r\nContent-Length: 2\r\n\r\n12",
+			headerOrder: []string{"Host", "Content-Length"},
+			wantOutput:  "POST / HTTP/1.1\r\nHost: example.com\r\nContent-Length: 4\r\n\r\nabcdPOST / HTTP/1.1\r\nHost: example.com\r\nContent-Length: 2\r\n\r\n12",
+			chunkSize:   999,
+		},
+		{
+			name:        "multiple HTTP requests written in chunks",
+			input:       "POST / HTTP/1.1\r\nRemoved: removed\r\nContent-Length: 4\r\n\r\nabcdPOST / HTTP/1.1\r\nContent-Length: 2\r\n\r\n12",
+			headerOrder: []string{"Host", "Content-Length"},
+			wantOutput:  "POST / HTTP/1.1\r\nHost: example.com\r\nContent-Length: 4\r\n\r\nabcdPOST / HTTP/1.1\r\nHost: example.com\r\nContent-Length: 2\r\n\r\n12",
+			chunkSize:   3,
+		},
+		{
+			name:        "multiple HTTP requests first read lands in middle of last request",
+			input:       "POST / HTTP/1.1\r\nRemoved: removed\r\nContent-Length: 4\r\n\r\nabcdPOST / HTTP/1.1\r\nContent-Length: 2\r\n\r\n12POST / HTTP/1.1\r\nContent-Length: 2\r\n\r\nxyx",
+			headerOrder: []string{"Host", "Content-Length"},
+			wantOutput:  "POST / HTTP/1.1\r\nHost: example.com\r\nContent-Length: 4\r\n\r\nabcdPOST / HTTP/1.1\r\nHost: example.com\r\nContent-Length: 2\r\n\r\n12",
+			chunkSize:   109,
+		},
+		{
+			name:        "longer",
+			input:       "POST / HTTP/1.1\r\nContent-Length: 4\r\n\r\nabcd",
+			headerOrder: []string{"Host", "Content-Length"},
+			wantOutput:  "POST / HTTP/1.1\r\nHost: example.com\r\nContent-Length: 4\r\n\r\nabcd",
+			chunkSize:   1,
+		},
+		{
+			name:        "shorter",
+			input:       "POST / HTTP/1.1111111111111111111\r\nContent-Length: 4\r\n\r\nabcd",
+			headerOrder: []string{"Host", "Content-Length"},
+			wantOutput:  "POST / HTTP/1.1\r\nHost: example.com\r\nContent-Length: 4\r\n\r\nabcd",
+			chunkSize:   1,
+		},
+		{
+			name:  "missing cookie",
+			input: "POST / HTTP/1.1\r\nContent-Length: 4\r\n\r\nabcd",
+			validateMeekCookie: func([]byte) ([]byte, error) {
+				return nil, errors.TraceNew("invalid cookie")
+			},
+			wantOutput: "POST / HTTP/1.1\r\nContent-Length: 4\r\n\r\nabcd",
+			chunkSize:  1,
+			wantError:  io.EOF,
+		},
+		{
+			name:  "invalid cookie",
+			input: "POST / HTTP/1.1\r\nCookie: invalid\r\nContent-Length: 4\r\n\r\nabcd",
+			validateMeekCookie: func([]byte) ([]byte, error) {
+				return nil, errors.TraceNew("invalid cookie")
+			},
+			wantOutput: "POST / HTTP/1.1\r\nCookie: invalid\r\nContent-Length: 4\r\n\r\nabcd",
+			chunkSize:  1,
+			wantError:  io.EOF,
+		},
+		{
+			name:        "valid cookie",
+			input:       "POST / HTTP/1.1\r\nHost: example.com\r\nCookie: valid\r\nContent-Length: 4\r\nRange: unused\r\nSkipped: skipped\r\n\r\nabcd",
+			headerOrder: []string{"Host", "Cookie", "Content-Length", "Range"},
+			validateMeekCookie: func([]byte) ([]byte, error) {
+				return nil, nil
+			},
+			wantOutput: "POST / HTTP/1.1\r\nHost: example.com\r\nCookie: valid\r\nContent-Length: 4\r\nRange: unused\r\n\r\nabcd",
+			chunkSize:  1,
+		},
+		{
+			name:          "exceeds max Request-Line, and headers, size",
+			input:         "POST / HTTP/1.1\r\nContent-Length: 4\r\nCookie: X\r\nRange: 1234 \r\n\r\nabcd",
+			maxHeaderSize: 47, // up to end of Cookie header
+			wantOutput:    "POST / HTTP/1.1\r\nContent-Length: 4\r\nCookie: X\r\nRange: 1234 \r\n\r\nabcd",
+			chunkSize:     1,
+			wantError:     stderrors.New("exceeds maxReqLineAndHeadersSize"),
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+
+			err := runHTTPNormalizerTest(&tt, true)
+			if err != nil {
+				t.Fatalf("runHTTPNormalizerTest failed: %v", err)
+			}
+		})
+	}
+}
+
+// Caveats:
+//   - Does not test or handle mutiple requests in a single connection
+//   - Does not test the scenario where the first request in a connection
+//     passes validation and then a subsequent request fails which triggers
+//     a passthrough - in this scenario both the listener and passthrough
+//     listener will receive bytes.
+func TestHTTPNormalizerHTTPServer(t *testing.T) {
+
+	type test struct {
+		name              string
+		request           string
+		maxHeaderSize     int
+		prohibitedHeaders []string
+		wantPassthrough   bool
+		wantRecv          string
+	}
+
+	tests := []test{
+		{
+			name:     "valid cookie",
+			request:  "POST / HTTP/1.1\r\nCookie: valid\r\nContent-Length: 4\r\n\r\nabcd",
+			wantRecv: "POST / HTTP/1.1\r\nHost: example.com\r\nCookie: valid\r\nContent-Length: 4\r\n\r\nabcd",
+		},
+		{
+			name:            "missing cookie",
+			request:         "POST HTTP/1.1\r\nContent-Length: 4\r\n\r\nabcd",
+			wantPassthrough: true,
+			wantRecv:        "POST HTTP/1.1\r\nContent-Length: 4\r\n\r\nabcd",
+		},
+		{
+			name:            "invalid cookie",
+			request:         "POST HTTP/1.1\r\nCookie: invalid\r\nContent-Length: 4\r\n\r\nabcd",
+			wantPassthrough: true,
+			wantRecv:        "POST HTTP/1.1\r\nCookie: invalid\r\nContent-Length: 4\r\n\r\nabcd",
+		},
+		{
+			name:              "valid cookie with prohibited headers",
+			request:           "POST / HTTP/1.1\r\nCookie: valid\r\nProhibited: prohibited\r\nContent-Length: 4\r\n\r\nabcd",
+			prohibitedHeaders: []string{"Prohibited"},
+			wantPassthrough:   true,
+			wantRecv:          "POST / HTTP/1.1\r\nCookie: valid\r\nProhibited: prohibited\r\nContent-Length: 4\r\n\r\nabcd",
+		},
+		{
+			name:            "valid cookie but exceeds max header size",
+			request:         "POST / HTTP/1.1\r\nCookie: valid\r\nContent-Length: 4\r\n\r\nabcd",
+			wantPassthrough: true,
+			maxHeaderSize:   32, // up to end of Cookie header
+			wantRecv:        "POST / HTTP/1.1\r\nCookie: valid\r\nContent-Length: 4\r\n\r\nabcd",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+
+			listener, err := net.Listen("tcp", "127.0.0.1:0")
+			if err != nil {
+				t.Fatalf("net.Listen failed %v", err)
+			}
+			defer listener.Close()
+
+			passthrough, err := net.Listen("tcp", "127.0.0.1:0")
+			if err != nil {
+				t.Fatalf("net.Listen failed %v", err)
+			}
+			defer passthrough.Close()
+
+			listener = WrapListenerWithHTTPNormalizer(listener)
+			normalizer := listener.(*HTTPNormalizerListener)
+			normalizer.PassthroughAddress = passthrough.Addr().String()
+			normalizer.PassthroughDialer = net.Dial
+			normalizer.MaxReqLineAndHeadersSize = tt.maxHeaderSize
+			normalizer.ProhibitedHeaders = tt.prohibitedHeaders
+			normalizer.PassthroughLogPassthrough = func(clientIP string, tunnelError error, logFields map[string]interface{}) {}
+
+			validateMeekCookieResult := "payload"
+			normalizer.ValidateMeekCookie = func(clientIP string, cookie []byte) ([]byte, error) {
+				if string(cookie) == "valid" {
+					return []byte(validateMeekCookieResult), nil
+				}
+				return nil, stderrors.New("invalid cookie")
+			}
+			normalizer.HeaderWriteOrder = []string{"Host", "Cookie", "Content-Length"}
+
+			type listenerState struct {
+				lType                    string // listener type, "listener" or "passthrough"
+				err                      error
+				recv                     []byte
+				validateMeekCookieResult []byte // set if listener is "passthrough"
+			}
+
+			runListener := func(listener net.Listener, listenerType string, recv chan *listenerState) {
+
+				conn, err := listener.Accept()
+				if err != nil {
+					recv <- &listenerState{
+						lType: listenerType,
+						err:   errors.TraceMsg(err, "listener.Accept failed"),
+					}
+					return
+				}
+
+				defer conn.Close()
+
+				b := make([]byte, len(tt.wantRecv))
+
+				// A single Read should be sufficient because multiple requests
+				// in a single connection are not supported by this test.
+				n, err := conn.Read(b)
+				if err != nil {
+					recv <- &listenerState{
+						lType: listenerType,
+						err:   errors.TraceMsg(err, "conn.Read failed"),
+					}
+					return
+				}
+				b = b[:n]
+
+				var validateMeekCookieResult []byte
+				if n, ok := conn.(*HTTPNormalizer); ok {
+					validateMeekCookieResult = n.ValidateMeekCookieResult
+				}
+
+				_, err = conn.Write([]byte(listenerType))
+				if err != nil {
+					if stderrors.Is(err, ErrPassthroughActive) {
+						return
+					}
+					recv <- &listenerState{
+						lType:                    listenerType,
+						err:                      errors.TraceMsg(err, "conn.Write failed"),
+						validateMeekCookieResult: validateMeekCookieResult,
+					}
+					return
+				}
+
+				recv <- &listenerState{
+					lType:                    listenerType,
+					recv:                     b,
+					err:                      nil,
+					validateMeekCookieResult: validateMeekCookieResult,
+				}
+			}
+
+			recv := make(chan *listenerState)
+
+			listenerType := "listener"
+			passthroughType := "passthrough"
+
+			go runListener(listener, listenerType, recv)
+			go runListener(passthrough, passthroughType, recv)
+
+			conn, err := net.Dial("tcp", listener.Addr().String())
+			if err != nil {
+				t.Fatalf("net.Dial failed %v", err)
+			}
+			defer conn.Close()
+
+			n, err := conn.Write([]byte(tt.request))
+			if err != nil {
+				t.Fatalf("conn.Write failed %v", err)
+			}
+			if n != len(tt.request) {
+				t.Fatalf("expected to write %d bytes but wrote %d", len(tt.request), n)
+			}
+
+			// read response
+
+			b := make([]byte, 512)
+			n, err = conn.Read(b)
+			if err != nil {
+				t.Fatalf("conn.Read failed %v", err)
+			}
+			b = b[:n]
+
+			if tt.wantPassthrough && string(b) != passthroughType {
+				t.Fatalf("expected passthrough but got response from listener")
+			} else if !tt.wantPassthrough && string(b) != listenerType {
+				t.Fatalf("expected no passthrough but got response from passthrough")
+			}
+
+			r := <-recv
+
+			if r.err != nil {
+				t.Fatalf("listener failed %v", r.err)
+			}
+
+			if !bytes.Equal(r.recv, []byte(tt.wantRecv)) {
+				t.Fatalf("expected \"%s\" of len %d but got \"%s\" of len %d", escapeNewlines(string(tt.wantRecv)), len(tt.wantRecv), escapeNewlines(string(r.recv)), len(r.recv))
+			}
+
+			if r.lType != "passthrough" && string(r.validateMeekCookieResult) != validateMeekCookieResult {
+
+				t.Fatalf("expected validateMeekCookieResult value \"%s\" but got \"%s\"", validateMeekCookieResult, string(r.validateMeekCookieResult))
+			}
+
+			// Check that other listener did not get a connection
+
+			n, err = conn.Read(b)
+			if err != nil && err != io.EOF {
+				t.Fatalf("conn.Read failed %v", err)
+			}
+			if n != 0 {
+				t.Fatalf("expected to read 0 bytes")
+			}
+
+			select {
+			case r := <-recv:
+				t.Fatalf("unexpected response from %s: %v \"%s\"", r.lType, r.err, string(r.recv))
+			case <-time.After(10 * time.Millisecond):
+			}
+		})
+	}
+}
+
+func BenchmarkHTTPNormalizer(b *testing.B) {
+
+	inReq := "POST / HTTP/1.1\r\nContent-Length: 400\r\n\r\nxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+	outReq := "POST / HTTP/1.1\r\nHost: example.com\r\nContent-Length: 400\r\n\r\nxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+
+	input := ""
+	output := ""
+
+	// Concatenate many requests to simulate a single connection running over
+	// the normalizer.
+	for i := 0; i < 100; i++ {
+		input += inReq
+		output += outReq
+	}
+
+	// TODO: test different chunk sizes
+	test := &httpNormalizerTest{
+		name:       "no cookie in chunks",
+		input:      input,
+		wantOutput: output,
+		chunkSize:  1,
+	}
+
+	for n := 0; n < b.N; n++ {
+
+		// TODO: does test setup and teardown in runHTTPNormalizerTest skew
+		// the benchmark
+		err := runHTTPNormalizerTest(test, true)
+		if err != nil {
+			b.Fatalf("runHTTPNormalizerTest failed: %v", err)
+		}
+	}
+}

+ 68 - 53
psiphon/common/transforms/httpTransformer.go

@@ -51,11 +51,12 @@ type HTTPTransformerParameters struct {
 }
 
 const (
-	// httpTransformerReadWriteHeader HTTPTransformer is waiting to finish
-	// reading and writing the next HTTP request header.
-	httpTransformerReadWriteHeader = 0
-	// httpTransformerReadWriteBody HTTPTransformer is waiting to finish reading
-	// and writing the current HTTP request body.
+	// httpTransformerReadWriteReqLineAndHeaders HTTPTransformer is waiting to
+	// finish reading and writing the Request-Line, and headers, of the next
+	// request.
+	httpTransformerReadWriteReqLineAndHeaders = 0
+	// httpTransformerReadWriteBody HTTPTransformer is waiting to finish
+	// reading, and writing, the current request body.
 	httpTransformerReadWriteBody = 1
 )
 
@@ -72,15 +73,16 @@ type HTTPTransformer struct {
 	seed      *prng.Seed
 
 	// state is the HTTPTransformer state. Possible values are
-	// httpTransformerReadWriteHeader and httpTransformerReadWriteBody.
+	// httpTransformerReadWriteReqLineAndHeaders and
+	// httpTransformerReadWriteBody.
 	state int64
-	// b is used to buffer the accumulated bytes of the current HTTP request
-	// header until the entire header is received and written.
+	// b is used to buffer the accumulated bytes of the current request until
+	// the Request-Line and all headers are received and written.
 	b bytes.Buffer
-	// remain is the number of remaining HTTP request bytes to write to the
-	// underlying net.Conn. Set to the value of Content-Length (HTTP request
-	// body bytes) plus the length of the transformed HTTP header once the
-	// current request header is received.
+	// remain is the number of remaining request bytes to write to the
+	// underlying net.Conn. Set to the value of Content-Length (request body
+	// bytes) plus the length of the transformed Request-Line, and headers,
+	// once the Request-Line, and headers, of the current request are received.
 	remain uint64
 
 	net.Conn
@@ -102,25 +104,28 @@ type HTTPTransformer struct {
 // in a single Write(). Must be called synchronously.
 func (t *HTTPTransformer) Write(b []byte) (int, error) {
 
-	if t.state == httpTransformerReadWriteHeader {
+	if t.state == httpTransformerReadWriteReqLineAndHeaders {
 
-		// Do not need to check return value https://github.com/golang/go/blob/1e9ff255a130200fcc4ec5e911d28181fce947d5/src/bytes/buffer.go#L164
+		// Do not need to check return value. Applies to all subsequent
+		// calls to t.b.Write() and this comment will not be repeated for
+		// each. See https://github.com/golang/go/blob/1e9ff255a130200fcc4ec5e911d28181fce947d5/src/bytes/buffer.go#L164.
 		t.b.Write(b)
 
-		// Wait until the entire HTTP request header has been read. Must check
-		// all accumulated bytes incase the "\r\n\r\n" separator is written over
-		// multiple Write() calls; from reading the go1.19.5 net/http code the
-		// entire HTTP request is written in a single Write() call.
+		// Wait until the Request-Line, and all headers, have been read. Must
+		// check all accumulated bytes incase the "\r\n\r\n" separator is
+		// written over multiple Write() calls; from reading the go1.19.5
+		// net/http code the entire HTTP request is written in a single Write()
+		// call.
 
 		sep := []byte("\r\n\r\n")
 
-		headerBodyLines := bytes.SplitN(t.b.Bytes(), sep, 2) // split header and body
+		headerBodyLines := bytes.SplitN(t.b.Bytes(), sep, 2) // split Request-Line, and headers, from body
 
 		if len(headerBodyLines) <= 1 {
-			// b buffered in t.b and the entire HTTP request header has not been
-			// recieved so another Write() call is expected.
+			// b buffered in t.b and the Request-Line, and all headers, have not
+			// been recieved so another Write() call is expected.
 			return len(b), nil
-		} // else: HTTP request header has been read
+		} // else: Request-Line, and all headers, have been read
 
 		// read Content-Length before applying transform
 
@@ -128,7 +133,7 @@ func (t *HTTPTransformer) Write(b []byte) (int, error) {
 
 		lines := bytes.Split(headerBodyLines[0], []byte("\r\n"))
 		if len(lines) > 1 {
-			// skip request line, e.g. "GET /foo HTTP/1.1"
+			// skip Request-Line, e.g. "GET /foo HTTP/1.1"
 			headerLines = lines[1:]
 		}
 
@@ -145,7 +150,7 @@ func (t *HTTPTransformer) Write(b []byte) (int, error) {
 		}
 		if len(cl) == 0 {
 			// Irrecoverable error because either Content-Length header
-			// missing, or Content-Length header value is empty, e.g.
+			// is missing, or Content-Length header value is empty, e.g.
 			// "Content-Length: ", and request body length cannot be
 			// determined.
 			return len(b), errors.TraceNew("Content-Length missing")
@@ -160,13 +165,13 @@ func (t *HTTPTransformer) Write(b []byte) (int, error) {
 
 		t.remain = contentLength
 
-		// transform and write header
+		// transform, and write, Request-Line and headers.
 
-		headerLen := len(headerBodyLines[0]) + len(sep)
-		header := t.b.Bytes()[:headerLen]
+		reqLineAndHeadersLen := len(headerBodyLines[0]) + len(sep)
+		reqLineAndHeaders := t.b.Bytes()[:reqLineAndHeadersLen]
 
 		if t.transform != nil {
-			newHeader, err := t.transform.Apply(t.seed, header)
+			newReqLineAndHeaders, err := t.transform.Apply(t.seed, reqLineAndHeaders)
 			if err != nil {
 				// TODO: consider logging an error and skiping transform
 				// instead of returning an error, if the transform is broken
@@ -174,42 +179,42 @@ func (t *HTTPTransformer) Write(b []byte) (int, error) {
 				return len(b), errors.Trace(err)
 			}
 
-			// only allocate new slice if header length changed
-			if len(newHeader) == len(header) {
+			// perf: only allocate new slice if length changed, otherwise the
+			// transformed data can be copied directly over the original in t.b.
+			if len(newReqLineAndHeaders) == len(reqLineAndHeaders) {
 				// Do not need to check return value. It is guaranteed that
-				// n == len(newHeader) because t.b.Len() >= n if the header
-				// size has not changed.
-				copy(t.b.Bytes()[:headerLen], newHeader)
+				// n == len(newReqLineAndHeaders) because t.b.Len() >= n if the
+				// transformed data is the same size as the original data.
+				copy(t.b.Bytes()[:reqLineAndHeadersLen], newReqLineAndHeaders)
 			} else {
 
 				// Copy any request body bytes received before resetting the
 				// buffer.
 				var reqBody []byte
-				reqBodyLen := t.b.Len() - headerLen // number of request body bytes received
+				reqBodyLen := t.b.Len() - reqLineAndHeadersLen // number of request body bytes received
 				if reqBodyLen > 0 {
 					reqBody = make([]byte, reqBodyLen)
-					copy(reqBody, t.b.Bytes()[headerLen:])
+					copy(reqBody, t.b.Bytes()[reqLineAndHeadersLen:])
 				}
 
-				// Reset the buffer and write transformed header and any
-				// request body bytes received into it.
+				// Reset the buffer and write transformed Request-Line, and
+				// headers, and any request body bytes received into it.
 				t.b.Reset()
-				// Do not need to check return value of bytes.Buffer.Write() https://github.com/golang/go/blob/1e9ff255a130200fcc4ec5e911d28181fce947d5/src/bytes/buffer.go#L164
-				t.b.Write(newHeader)
+				t.b.Write(newReqLineAndHeaders)
 				if len(reqBody) > 0 {
 					t.b.Write(reqBody)
 				}
 			}
 
-			header = newHeader
+			reqLineAndHeaders = newReqLineAndHeaders
 		}
 
-		if math.MaxUint64-t.remain < uint64(len(header)) {
+		if math.MaxUint64-t.remain < uint64(len(reqLineAndHeaders)) {
 			// Irrecoverable error because request is malformed:
-			// Content-Length + len(header) > math.MaxUint64.
+			// Content-Length + len(reqLineAndHeaders) > math.MaxUint64.
 			return len(b), errors.TraceNew("t.remain + uint64(len(header)) overflows")
 		}
-		t.remain += uint64(len(header))
+		t.remain += uint64(len(reqLineAndHeaders))
 
 		if uint64(t.b.Len()) > t.remain {
 			// Should never happen, multiple requests written in a single
@@ -228,21 +233,22 @@ func (t *HTTPTransformer) Write(b []byte) (int, error) {
 		return len(b), err
 	}
 
-	// HTTP request header has been transformed. Write any remaining bytes of
-	// HTTP request header and then write HTTP request body.
+	// Request-Line, and headers, have been transformed. Write any remaining
+	// bytes of these and then write request body.
 
 	// Must write buffered bytes first, in-order, to write bytes to underlying
 	// net.Conn in the same order they were received in.
 	//
 	// Already checked that t.b does not contain bytes of a subsequent HTTP
-	// request when the header is parsed, i.e. at this point it is guaranteed
-	// that t.b.Len() <= t.remain.
+	// request when the Request-Line, and headers, are parsed, i.e. at this
+	// point it is guaranteed that t.b.Len() <= t.remain.
 	//
 	// In practise the buffer will be empty by this point because its entire
 	// contents will have been written in the first call to t.b.WriteTo(t.Conn)
-	// when the header is received, parsed, and transformed; otherwise the
-	// underlying transport will have failed and the caller will not invoke
-	// Write() again on this instance. See HTTPTransformer.Write() comment.
+	// when the Request-Line, and headers, are received, parsed, and
+	// transformed; otherwise the underlying transport will have failed and the
+	// caller will not invoke Write() again on this instance. See
+	// HTTPTransformer.Write() comment.
 	wrote, err := t.b.WriteTo(t.Conn)
 	t.remain -= uint64(wrote)
 	if err != nil {
@@ -260,9 +266,9 @@ func (t *HTTPTransformer) Write(b []byte) (int, error) {
 	t.remain -= uint64(n)
 
 	if t.remain <= 0 {
-		// Entire request, header and body, has been written. Return to
-		// waiting for next HTTP request header to arrive.
-		t.state = httpTransformerReadWriteHeader
+		// Entire request has been written. Return to waiting for next HTTP
+		// request to arrive.
+		t.state = httpTransformerReadWriteReqLineAndHeaders
 		t.remain = 0
 	}
 
@@ -270,6 +276,15 @@ func (t *HTTPTransformer) Write(b []byte) (int, error) {
 	return n, err
 }
 
+func (t *HTTPTransformer) GetMetrics() common.LogFields {
+	// Relay any metrics from the underlying conn.
+	m, ok := t.Conn.(common.MetricsSource)
+	if ok {
+		return m.GetMetrics()
+	}
+	return nil
+}
+
 func WrapDialerWithHTTPTransformer(dialer common.Dialer, params *HTTPTransformerParameters) common.Dialer {
 	return func(ctx context.Context, network, addr string) (net.Conn, error) {
 		conn, err := dialer(ctx, network, addr)

+ 97 - 15
psiphon/common/transforms/httpTransformer_test.go

@@ -29,6 +29,7 @@ import (
 	"net"
 	"net/http"
 	"strings"
+	"sync"
 	"testing"
 	"time"
 
@@ -271,8 +272,8 @@ func TestHTTPTransformerHTTPRequest(t *testing.T) {
 				if err != nil {
 					t.Fatalf("unexpected error %v", err)
 				}
-				if string(conn.b) != tt.wantOutput {
-					t.Fatalf("expected \"%s\" of len %d but got \"%s\" of len %d", escapeNewlines(tt.wantOutput), len(tt.wantOutput), escapeNewlines(string(conn.b)), len(conn.b))
+				if string(conn.WriteBuffer()) != tt.wantOutput {
+					t.Fatalf("expected \"%s\" of len %d but got \"%s\" of len %d", escapeNewlines(tt.wantOutput), len(tt.wantOutput), escapeNewlines(string(conn.WriteBuffer())), len(conn.WriteBuffer()))
 				}
 			} else {
 				// tt.wantError != nil
@@ -461,8 +462,16 @@ func escapeNewlines(s string) string {
 }
 
 type testConn struct {
-	// b is the accumulated bytes from Write() calls.
-	b []byte
+	readLock sync.Mutex
+	// readBuffer are the bytes to return from Read() calls.
+	readBuffer []byte
+	// readErrs are returned from Read() calls in order. If empty, then a nil
+	// error is returned.
+	readErrs []error
+
+	writeLock sync.Mutex
+	// writeBuffer are the accumulated bytes from Write() calls.
+	writeBuffer []byte
 	// writeLimit is the max number of bytes that will be written in a Write()
 	// call.
 	writeLimit int
@@ -477,12 +486,66 @@ type testConn struct {
 	net.Conn
 }
 
+// ReadBuffer returns a copy of the underlying readBuffer. The length of the
+// returned buffer is also the number of bytes remaining to be Read when Conn
+// is not set.
+func (c *testConn) ReadBuffer() []byte {
+	c.readLock.Lock()
+	defer c.readLock.Unlock()
+
+	readBufferCopy := make([]byte, len(c.readBuffer))
+	copy(readBufferCopy, c.readBuffer)
+
+	return readBufferCopy
+}
+
 func (c *testConn) Read(b []byte) (n int, err error) {
-	return c.Conn.Read(b)
+
+	c.readLock.Lock()
+	defer c.readLock.Unlock()
+
+	if len(c.readErrs) > 0 {
+		err = c.readErrs[0]
+		c.readErrs = c.readErrs[1:]
+	}
+
+	// If Conn set, then read from it directly and do not use readBuffer.
+	if c.Conn != nil {
+		return c.Conn.Read(b)
+	}
+
+	if len(c.readBuffer) == 0 {
+		n = 0
+		return
+	}
+
+	n = copy(b, c.readBuffer)
+	if n == len(c.readBuffer) {
+		c.readBuffer = nil
+	} else {
+		c.readBuffer = c.readBuffer[n:]
+	}
+
+	return
+}
+
+// WriteBuffer returns a copy of the underlying writeBuffer, which is the
+// accumulation of all bytes written with Write.
+func (c *testConn) WriteBuffer() []byte {
+	c.readLock.Lock()
+	defer c.readLock.Unlock()
+
+	writeBufferCopy := make([]byte, len(c.writeBuffer))
+	copy(writeBufferCopy, c.writeBuffer)
+
+	return writeBufferCopy
 }
 
 func (c *testConn) Write(b []byte) (n int, err error) {
 
+	c.writeLock.Lock()
+	defer c.writeLock.Unlock()
+
 	if len(c.writeErrs) > 0 {
 		err = c.writeErrs[0]
 		c.writeErrs = c.writeErrs[1:]
@@ -492,16 +555,16 @@ func (c *testConn) Write(b []byte) (n int, err error) {
 		n = c.writeLens[0]
 		c.writeLens = c.writeLens[1:]
 		if len(b) <= n {
-			c.b = append(c.b, b...)
+			c.writeBuffer = append(c.writeBuffer, b...)
 			n = len(b)
 		} else {
-			c.b = append(c.b, b[:n]...)
+			c.writeBuffer = append(c.writeBuffer, b[:n]...)
 		}
 	} else if c.writeLimit != 0 && c.writeLimit < len(b) {
-		c.b = append(c.b, b[:c.writeLimit]...)
+		c.writeBuffer = append(c.writeBuffer, b[:c.writeLimit]...)
 		n = c.writeLimit
 	} else {
-		c.b = append(c.b, b...)
+		c.writeBuffer = append(c.writeBuffer, b...)
 		n = len(b)
 	}
 
@@ -514,25 +577,44 @@ func (c *testConn) Write(b []byte) (n int, err error) {
 }
 
 func (c *testConn) Close() error {
-	return c.Conn.Close()
+	if c.Conn != nil {
+		return c.Conn.Close()
+	}
+
+	return nil
 }
 
 func (c *testConn) LocalAddr() net.Addr {
-	return c.Conn.LocalAddr()
+	if c.Conn != nil {
+		return c.Conn.LocalAddr()
+	}
+	return &net.TCPAddr{}
 }
 
 func (c *testConn) RemoteAddr() net.Addr {
-	return c.Conn.RemoteAddr()
+	if c.Conn != nil {
+		return c.Conn.RemoteAddr()
+	}
+	return &net.TCPAddr{}
 }
 
 func (c *testConn) SetDeadline(t time.Time) error {
-	return c.Conn.SetDeadline(t)
+	if c.Conn != nil {
+		return c.Conn.SetDeadline(t)
+	}
+	return nil
 }
 
 func (c *testConn) SetReadDeadline(t time.Time) error {
-	return c.Conn.SetReadDeadline(t)
+	if c.Conn != nil {
+		return c.Conn.SetReadDeadline(t)
+	}
+	return nil
 }
 
 func (c *testConn) SetWriteDeadline(t time.Time) error {
-	return c.Conn.SetWriteDeadline(t)
+	if c.Conn != nil {
+		return c.Conn.SetWriteDeadline(t)
+	}
+	return nil
 }

+ 10 - 7
psiphon/common/transforms/transforms.go

@@ -29,7 +29,7 @@ import (
 
 	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
 	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/prng"
-	regen "github.com/zach-klippenstein/goregen"
+	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/regen"
 )
 
 const (
@@ -155,7 +155,7 @@ func (spec Spec) ApplyString(seed *prng.Seed, input string) (string, error) {
 		if err != nil {
 			return "", errors.Trace(err)
 		}
-		value = re.ReplaceAllString(value, replacement)
+		value = re.ReplaceAllString(value, string(replacement))
 	}
 	return value, nil
 }
@@ -173,7 +173,7 @@ func (spec Spec) Apply(seed *prng.Seed, input []byte) ([]byte, error) {
 		if err != nil {
 			return nil, errors.Trace(err)
 		}
-		value = re.ReplaceAll(value, []byte(replacement))
+		value = re.ReplaceAll(value, replacement)
 	}
 	return value, nil
 }
@@ -181,7 +181,7 @@ func (spec Spec) Apply(seed *prng.Seed, input []byte) ([]byte, error) {
 // makeRegexAndRepl generates the regex and replacement for a given seed and
 // transform. The same seed can be supplied to produce the same output, for
 // replay.
-func makeRegexAndRepl(seed *prng.Seed, transform [2]string) (*regexp.Regexp, string, error) {
+func makeRegexAndRepl(seed *prng.Seed, transform [2]string) (*regexp.Regexp, []byte, error) {
 
 	// TODO: the compiled regexp and regen could be cached, but the seed is an
 	// issue with caching the regen.
@@ -192,14 +192,17 @@ func makeRegexAndRepl(seed *prng.Seed, transform [2]string) (*regexp.Regexp, str
 	}
 	rg, err := regen.NewGenerator(transform[1], args)
 	if err != nil {
-		return nil, "", errors.Trace(err)
+		return nil, nil, errors.Trace(err)
 	}
 
-	replacement := rg.Generate()
+	replacement, err := rg.Generate()
+	if err != nil {
+		return nil, nil, errors.Trace(err)
+	}
 
 	re, err := regexp.Compile(transform[0])
 	if err != nil {
-		return nil, "", errors.Trace(err)
+		return nil, nil, errors.Trace(err)
 	}
 
 	return re, replacement, nil

+ 13 - 5
psiphon/common/values/values.go

@@ -18,10 +18,8 @@
  */
 
 /*
-
 Package values provides a mechanism for specifying and selecting dynamic
 values employed by the Psiphon client and server.
-
 */
 package values
 
@@ -36,7 +34,7 @@ import (
 
 	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
 	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/prng"
-	regen "github.com/zach-klippenstein/goregen"
+	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/regen"
 	"golang.org/x/crypto/nacl/secretbox"
 )
 
@@ -265,6 +263,8 @@ func GetContentType(PRNG *prng.PRNG) string {
 	return spec.GetValue(PRNG)
 }
 
+// generate string given the regexp pattern.
+// generate is intended to be used with hardcoded inputs, and panics on error.
 func generate(PRNG *prng.PRNG, pattern string) string {
 
 	args := &regen.GeneratorArgs{
@@ -275,7 +275,11 @@ func generate(PRNG *prng.PRNG, pattern string) string {
 	if err != nil {
 		panic(err.Error())
 	}
-	return rg.Generate()
+	value, err := rg.Generate()
+	if err != nil {
+		panic(err.Error())
+	}
+	return string(value)
 }
 
 var (
@@ -337,7 +341,11 @@ func generateUserAgent() string {
 
 	g := userAgentGenerators[prng.Range(0, len(userAgentGenerators)-1)]
 
-	value := g.generator.Generate()
+	bytes, err := g.generator.Generate()
+	if err != nil {
+		panic(err.Error())
+	}
+	value := string(bytes)
 	value = strings.ReplaceAll(value, "__VER__", g.version())
 	return value
 }

+ 25 - 18
psiphon/dialParameters.go

@@ -37,11 +37,11 @@ import (
 	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/parameters"
 	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/prng"
 	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/protocol"
+	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/regen"
 	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/resolver"
 	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/transforms"
 	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/values"
 	utls "github.com/refraction-networking/utls"
-	regen "github.com/zach-klippenstein/goregen"
 	"golang.org/x/net/bpf"
 )
 
@@ -675,13 +675,35 @@ func MakeDialParameters(
 
 	if !isReplay || !replayHostname {
 
+		// Any MeekHostHeader selections made here will be overridden below,
+		// as required, for fronting cases.
+
 		if protocol.TunnelProtocolUsesMeekHTTPS(dialParams.TunnelProtocol) ||
 			protocol.TunnelProtocolUsesFrontedMeekQUIC(dialParams.TunnelProtocol) {
 
 			dialParams.MeekSNIServerName = ""
+			hostname := ""
 			if p.WeightedCoinFlip(parameters.TransformHostNameProbability) {
 				dialParams.MeekSNIServerName = selectHostName(dialParams.TunnelProtocol, p)
+				hostname = dialParams.MeekSNIServerName
 				dialParams.MeekTransformedHostName = true
+			} else {
+
+				// Always select a hostname for the Host header in this case.
+				// Unlike HTTP, the Host header isn't plaintext on the wire,
+				// and so there's no anti-fingerprint benefit from presenting
+				// the server IP address in the Host header. Omitting the
+				// server IP here can prevent exposing it in certain
+				// scenarios where the traffic is rerouted and arrives at a
+				// different HTTPS server.
+
+				hostname = selectHostName(dialParams.TunnelProtocol, p)
+			}
+			if serverEntry.MeekServerPort == 443 {
+				dialParams.MeekHostHeader = hostname
+			} else {
+				dialParams.MeekHostHeader = net.JoinHostPort(
+					hostname, strconv.Itoa(serverEntry.MeekServerPort))
 			}
 
 		} else if protocol.TunnelProtocolUsesMeekHTTP(dialParams.TunnelProtocol) {
@@ -765,7 +787,6 @@ func MakeDialParameters(
 				dialParams.ObfuscatedQUICNonceTransformerParameters = nil
 			}
 		}
-
 	}
 
 	if !isReplay || !replayLivenessTest {
@@ -850,7 +871,6 @@ func MakeDialParameters(
 				dialParams.OSSHObfuscatorSeedTransformerParameters = nil
 			}
 		}
-
 	}
 
 	if protocol.TunnelProtocolUsesMeekHTTP(dialParams.TunnelProtocol) {
@@ -919,13 +939,6 @@ func MakeDialParameters(
 	case protocol.TUNNEL_PROTOCOL_UNFRONTED_MEEK:
 
 		dialParams.MeekDialAddress = net.JoinHostPort(serverEntry.IpAddress, dialParams.DialPortNumber)
-		if !dialParams.MeekTransformedHostName {
-			if dialPortNumber == 80 {
-				dialParams.MeekHostHeader = serverEntry.IpAddress
-			} else {
-				dialParams.MeekHostHeader = dialParams.MeekDialAddress
-			}
-		}
 
 	case protocol.TUNNEL_PROTOCOL_UNFRONTED_MEEK_HTTPS,
 		protocol.TUNNEL_PROTOCOL_UNFRONTED_MEEK_SESSION_TICKET:
@@ -935,16 +948,10 @@ func MakeDialParameters(
 			// Note: IP address in SNI field will be omitted.
 			dialParams.MeekSNIServerName = serverEntry.IpAddress
 		}
-		if dialPortNumber == 443 {
-			dialParams.MeekHostHeader = serverEntry.IpAddress
-		} else {
-			dialParams.MeekHostHeader = dialParams.MeekDialAddress
-		}
 
 	default:
 		return nil, errors.Tracef(
 			"unknown tunnel protocol: %s", dialParams.TunnelProtocol)
-
 	}
 
 	if protocol.TunnelProtocolUsesMeek(dialParams.TunnelProtocol) {
@@ -1326,7 +1333,7 @@ func selectFrontingParameters(
 		// Generate a front address based on the regex.
 
 		var err error
-		frontingDialHost, err = regen.Generate(serverEntry.MeekFrontingAddressesRegex)
+		frontingDialHost, err = regen.GenerateString(serverEntry.MeekFrontingAddressesRegex)
 		if err != nil {
 			return "", "", errors.Trace(err)
 		}
@@ -1489,7 +1496,7 @@ func selectHostName(
 	}
 
 	choice := prng.Intn(len(regexStrings))
-	hostName, err := regen.Generate(regexStrings[choice])
+	hostName, err := regen.GenerateString(regexStrings[choice])
 	if err != nil {
 		NoticeWarning("selectHostName: regen.Generate failed: %v", errors.Trace(err))
 		return values.GetHostName()

+ 6 - 3
psiphon/server/config.go

@@ -163,12 +163,13 @@ type Config struct {
 	TunnelProtocolPorts map[string]int
 
 	// TunnelProtocolPassthroughAddresses specifies passthrough addresses to be
-	// used for tunnel protocols configured in  TunnelProtocolPorts. Passthrough
+	// used for tunnel protocols configured in TunnelProtocolPorts. Passthrough
 	// is a probing defense which relays all network traffic between a client and
 	// the passthrough target when the client fails anti-probing tests.
 	//
 	// TunnelProtocolPassthroughAddresses is supported for:
-	// "UNFRONTED-MEEK-HTTPS-OSSH", "UNFRONTED-MEEK-SESSION-TICKET-OSSH".
+	// "UNFRONTED-MEEK-HTTPS-OSSH", "UNFRONTED-MEEK-SESSION-TICKET-OSSH",
+	// "UNFRONTED-MEEK-OSSH".
 	TunnelProtocolPassthroughAddresses map[string]string
 
 	// LegacyPassthrough indicates whether to expect legacy passthrough messages
@@ -1046,7 +1047,9 @@ func GenerateConfig(params *GenerateConfigParams) ([]byte, []byte, []byte, []byt
 
 		capability := protocol.GetCapability(tunnelProtocol)
 
-		if params.Passthrough && protocol.TunnelProtocolSupportsPassthrough(tunnelProtocol) {
+		// Note: do not add passthrough annotation if HTTP unfronted meek
+		// because it would result in an invalid capability.
+		if params.Passthrough && protocol.TunnelProtocolSupportsPassthrough(tunnelProtocol) && tunnelProtocol != protocol.TUNNEL_PROTOCOL_UNFRONTED_MEEK {
 			if !params.LegacyPassthrough {
 				capability += "-PASSTHROUGH-v2"
 			} else {

+ 140 - 9
psiphon/server/meek.go

@@ -46,6 +46,7 @@ import (
 	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/parameters"
 	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/prng"
 	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/protocol"
+	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/transforms"
 	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/values"
 	tris "github.com/Psiphon-Labs/tls-tris"
 	lrucache "github.com/cognusion/go-cache-lru"
@@ -127,6 +128,7 @@ type MeekServer struct {
 	rateLimitHistory                *lrucache.Cache
 	rateLimitCount                  int
 	rateLimitSignalGC               chan struct{}
+	normalizer                      *transforms.HTTPNormalizerListener
 }
 
 // NewMeekServer initializes a new meek server.
@@ -135,7 +137,7 @@ func NewMeekServer(
 	listener net.Listener,
 	listenerTunnelProtocol string,
 	listenerPort int,
-	useTLS, isFronted, useObfuscatedSessionTickets bool,
+	useTLS, isFronted, useObfuscatedSessionTickets, useHTTPNormalizer bool,
 	clientHandler func(clientTunnelProtocol string, clientConn net.Conn),
 	stopBroadcast <-chan struct{}) (*MeekServer, error) {
 
@@ -238,6 +240,13 @@ func NewMeekServer(
 		meekServer.tlsConfig = tlsConfig
 	}
 
+	if useHTTPNormalizer && protocol.TunnelProtocolUsesMeekHTTPNormalizer(listenerTunnelProtocol) {
+
+		normalizer := meekServer.makeMeekHTTPNormalizerListener()
+		meekServer.normalizer = normalizer
+		meekServer.listener = normalizer
+	}
+
 	return meekServer, nil
 }
 
@@ -749,9 +758,22 @@ func (server *MeekServer) getSessionOrEndpoint(
 	// bytes -- assuming that MEEK_MAX_SESSION_ID_LENGTH is too short to be a
 	// valid meek cookie.
 
-	payloadJSON, err := server.getMeekCookiePayload(clientIP, meekCookie.Value)
-	if err != nil {
-		return "", nil, nil, "", nil, errors.Trace(err)
+	var payloadJSON []byte
+
+	if server.normalizer != nil {
+
+		// NOTE: operates on the assumption that the normalizer is not wrapped
+		// with a further conn.
+		underlyingConn := request.Context().Value(meekNetConnContextKey).(net.Conn)
+		normalizedConn := underlyingConn.(*transforms.HTTPNormalizer)
+		payloadJSON = normalizedConn.ValidateMeekCookieResult
+
+	} else {
+
+		payloadJSON, err = server.getMeekCookiePayload(clientIP, meekCookie.Value)
+		if err != nil {
+			return "", nil, nil, "", nil, errors.Trace(err)
+		}
 	}
 
 	// Note: this meek server ignores legacy values PsiphonClientSessionId
@@ -1220,7 +1242,7 @@ func (server *MeekServer) makeMeekTLSConfig(
 	if useObfuscatedSessionTickets {
 
 		// See obfuscated session ticket overview
-		// in NewObfuscatedClientSessionCache.
+		// in NewObfuscatedClientSessionState.
 
 		config.UseObfuscatedSessionTickets = true
 
@@ -1291,7 +1313,7 @@ func (server *MeekServer) makeMeekTLSConfig(
 
 			// Use a custom, shorter TTL based on the validity period of the
 			// passthrough message.
-			TTL := obfuscator.TLS_PASSTHROUGH_TIME_PERIOD
+			TTL := obfuscator.TLS_PASSTHROUGH_HISTORY_TTL
 			if server.support.Config.LegacyPassthrough {
 				TTL = obfuscator.HISTORY_SEED_TTL
 			}
@@ -1323,6 +1345,95 @@ func (server *MeekServer) makeMeekTLSConfig(
 	return config, nil
 }
 
+// makeMeekHTTPNormalizerListener returns the meek server listener wrapped in
+// an HTTP normalizer.
+func (server *MeekServer) makeMeekHTTPNormalizerListener() *transforms.HTTPNormalizerListener {
+
+	normalizer := transforms.WrapListenerWithHTTPNormalizer(server.listener)
+
+	normalizer.ProhibitedHeaders = server.support.Config.MeekProhibitedHeaders
+
+	normalizer.MaxReqLineAndHeadersSize = 8192 // max number of header bytes common web servers will read before returning an error
+
+	if server.passthroughAddress != "" {
+		normalizer.PassthroughAddress = server.passthroughAddress
+		normalizer.PassthroughDialer = net.Dial
+	}
+	normalizer.PassthroughLogPassthrough = func(
+		clientIP string, tunnelError error, logFields map[string]interface{}) {
+
+		logIrregularTunnel(
+			server.support,
+			server.listenerTunnelProtocol,
+			server.listenerPort,
+			clientIP,
+			errors.Trace(tunnelError),
+			logFields)
+	}
+
+	// ValidateMeekCookie is invoked by the normalizer with the value of the
+	// cookie header (if present), before ServeHTTP gets the request and calls
+	// getSessionOrEndpoint; and then any valid meek cookie payload, or meek
+	// session ID, extracted in this callback is stored to be fetched by
+	// getSessionOrEndpoint.
+	// Note: if there are multiple cookie headers, even though prohibited by
+	// rfc6265, then ValidateMeekCookie will only be invoked once with the
+	// first one received.
+	normalizer.ValidateMeekCookie = func(clientIP string, rawCookies []byte) ([]byte, error) {
+
+		// Parse cookie.
+
+		if len(rawCookies) == 0 {
+			return nil, errors.TraceNew("no cookies")
+		}
+
+		// TODO/perf: readCookies in net/http is not exported, use a local
+		// implementation which does not require allocating an http.header
+		// each time.
+		request := http.Request{
+			Header: http.Header{
+				"Cookie": []string{string(rawCookies)},
+			},
+		}
+		cookies := request.Cookies()
+		if len(rawCookies) == 0 {
+			return nil, errors.Tracef("invalid cookies: %s", string(rawCookies))
+		}
+
+		// Use value of the first cookie.
+		meekCookieValue := cookies[0].Value
+
+		// Check for an existing session.
+
+		server.sessionsLock.RLock()
+		existingSessionID := meekCookieValue
+		_, ok := server.sessions[existingSessionID]
+		server.sessionsLock.RUnlock()
+		if ok {
+			// The cookie is a session ID for an active (not expired) session.
+			// Return it and then it will be stored and later fetched by
+			// getSessionOrEndpoint where it will be mapped to the existing
+			// session.
+			// Note: it's possible for the session to expire between this check
+			// and when getSessionOrEndpoint looks up the session.
+			return rawCookies, nil
+		}
+
+		// The session is new (or expired). Treat the cookie value as a new
+		// meek cookie, extract the payload, and return it; and then it will be
+		// stored and later fetched by getSessionOrEndpoint.
+
+		payloadJSON, err := server.getMeekCookiePayload(clientIP, meekCookieValue)
+		if err != nil {
+			return nil, errors.Trace(err)
+		}
+
+		return payloadJSON, nil
+	}
+
+	return normalizer
+}
+
 type meekSession struct {
 	// Note: 64-bit ints used with atomic operations are placed
 	// at the start of struct to ensure 64-bit alignment.
@@ -1533,9 +1644,19 @@ func (conn *meekConn) GetUnderlyingTCPAddrs() (*net.TCPAddr, *net.TCPAddr, bool)
 
 // SetReplay implements the common.FragmentorReplayAccessor interface, applying
 // the inputs to the _first_ underlying TCP connection in the meek tunnel. If
-// the underlying connection is closed, the SetSeed call will have no effect.
+// the underlying connection is closed, then SetSeed call will have no effect.
 func (conn *meekConn) SetReplay(PRNG *prng.PRNG) {
-	fragmentor, ok := conn.firstUnderlyingConn.(common.FragmentorReplayAccessor)
+	underlyingConn := conn.firstUnderlyingConn
+
+	if conn.meekServer.normalizer != nil {
+		// The underlying conn is wrapped with a normalizer.
+		normalizer, ok := underlyingConn.(*transforms.HTTPNormalizer)
+		if ok {
+			underlyingConn = normalizer.Conn
+		}
+	}
+
+	fragmentor, ok := underlyingConn.(common.FragmentorReplayAccessor)
 	if ok {
 		fragmentor.SetReplay(PRNG)
 	}
@@ -1549,7 +1670,17 @@ func (conn *meekConn) SetReplay(PRNG *prng.PRNG) {
 // packet manipulation, any selected packet manipulation spec would have been
 // successful.
 func (conn *meekConn) GetReplay() (*prng.Seed, bool) {
-	fragmentor, ok := conn.firstUnderlyingConn.(common.FragmentorReplayAccessor)
+	underlyingConn := conn.firstUnderlyingConn
+
+	if conn.meekServer.normalizer != nil {
+		// The underlying conn is wrapped with a normalizer.
+		normalizer, ok := underlyingConn.(*transforms.HTTPNormalizer)
+		if ok {
+			underlyingConn = normalizer.Conn
+		}
+	}
+
+	fragmentor, ok := underlyingConn.(common.FragmentorReplayAccessor)
 	if ok {
 		return fragmentor.GetReplay()
 	}

+ 31 - 2
psiphon/server/meek_test.go

@@ -42,6 +42,7 @@ import (
 	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/prng"
 	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/protocol"
 	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/tactics"
+	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/transforms"
 	"golang.org/x/crypto/nacl/box"
 )
 
@@ -173,6 +174,26 @@ func TestCachedResponse(t *testing.T) {
 }
 
 func TestMeekResiliency(t *testing.T) {
+	testMeekResiliency(t, nil, false)
+}
+
+func TestMeekHTTPNormalizerResiliency(t *testing.T) {
+
+	seed, err := prng.NewSeed()
+	if err != nil {
+		t.Fatalf("prng.NewSeed failed %v", err)
+	}
+
+	spec := &transforms.HTTPTransformerParameters{
+		ProtocolTransformName: "spec1",
+		ProtocolTransformSpec: transforms.Spec{{"Host: example.com\r\n", ""}},
+		ProtocolTransformSeed: seed,
+	}
+
+	testMeekResiliency(t, spec, true)
+}
+
+func testMeekResiliency(t *testing.T, spec *transforms.HTTPTransformerParameters, useHTTPNormalizer bool) {
 
 	upstreamData := make([]byte, 5*MB)
 	_, _ = rand.Read(upstreamData)
@@ -246,6 +267,10 @@ func TestMeekResiliency(t *testing.T) {
 		Config: &Config{
 			MeekObfuscatedKey:              meekObfuscatedKey,
 			MeekCookieEncryptionPrivateKey: meekCookieEncryptionPrivateKey,
+			TunnelProtocolPorts: map[string]int{
+				protocol.TUNNEL_PROTOCOL_UNFRONTED_MEEK: 0,
+			},
+			runningProtocols: []string{protocol.TUNNEL_PROTOCOL_UNFRONTED_MEEK},
 		},
 		TrafficRulesSet: &TrafficRulesSet{},
 	}
@@ -287,11 +312,12 @@ func TestMeekResiliency(t *testing.T) {
 	server, err := NewMeekServer(
 		mockSupport,
 		listener,
-		"",
+		protocol.TUNNEL_PROTOCOL_UNFRONTED_MEEK,
 		0,
 		useTLS,
 		isFronted,
 		useObfuscatedSessionTickets,
+		useHTTPNormalizer,
 		clientHandler,
 		stopBroadcast)
 	if err != nil {
@@ -332,7 +358,6 @@ func TestMeekResiliency(t *testing.T) {
 	if err != nil {
 		t.Fatalf("prng.NewSeed failed: %s", err)
 	}
-
 	meekConfig := &psiphon.MeekConfig{
 		Parameters:                    params,
 		DialAddress:                   serverAddress,
@@ -342,6 +367,8 @@ func TestMeekResiliency(t *testing.T) {
 		MeekCookieEncryptionPublicKey: meekCookieEncryptionPublicKey,
 		MeekObfuscatedKey:             meekObfuscatedKey,
 		MeekObfuscatorPaddingSeed:     meekObfuscatorPaddingSeed,
+		ClientTunnelProtocol:          protocol.TUNNEL_PROTOCOL_UNFRONTED_MEEK,
+		HTTPTransformerParameters:     spec,
 	}
 
 	ctx, cancelFunc := context.WithTimeout(
@@ -545,6 +572,7 @@ func runTestMeekAccessControl(t *testing.T, rateLimit, restrictProvider, missing
 	useTLS := false
 	isFronted := false
 	useObfuscatedSessionTickets := false
+	useHTTPNormalizer := false
 
 	server, err := NewMeekServer(
 		mockSupport,
@@ -554,6 +582,7 @@ func runTestMeekAccessControl(t *testing.T, rateLimit, restrictProvider, missing
 		useTLS,
 		isFronted,
 		useObfuscatedSessionTickets,
+		useHTTPNormalizer,
 		func(_ string, conn net.Conn) {
 			go func() {
 				for {

+ 1 - 0
psiphon/server/tunnelServer.go

@@ -531,6 +531,7 @@ func (sshServer *sshServer) runListener(sshListener *sshListener, listenerError
 			protocol.TunnelProtocolUsesMeekHTTPS(sshListener.tunnelProtocol),
 			protocol.TunnelProtocolUsesFrontedMeek(sshListener.tunnelProtocol),
 			protocol.TunnelProtocolUsesObfuscatedSessionTickets(sshListener.tunnelProtocol),
+			true,
 			handleClient,
 			sshServer.shutdownBroadcast)
 

+ 29 - 10
vendor/github.com/Psiphon-Labs/qtls-go1-18/handshake_messages.go

@@ -336,24 +336,43 @@ func (m *clientHelloMsg) marshalRandomized() []byte {
 	// all slices before truncating.
 
 	cipherSuites := make([]uint16, len(m.cipherSuites))
-	perm := m.PRNG.Perm(len(m.cipherSuites))
-	for i, j := range perm {
-		cipherSuites[j] = m.cipherSuites[i]
-	}
-	cut := len(cipherSuites)
-	for ; cut > 1; cut-- {
-		if !m.PRNG.FlipCoin() {
+	for {
+		perm := m.PRNG.Perm(len(m.cipherSuites))
+		for i, j := range perm {
+			cipherSuites[j] = m.cipherSuites[i]
+		}
+		cut := len(cipherSuites)
+		for ; cut > 1; cut-- {
+			if !m.PRNG.FlipCoin() {
+				break
+			}
+		}
+
+		// Must contain at least one of defaultCipherSuitesTLS13.
+		containsDefault := false
+		for _, suite := range cipherSuites[:cut] {
+			for _, defaultSuite := range defaultCipherSuitesTLS13 {
+				if suite == defaultSuite {
+					containsDefault = true
+					break
+				}
+			}
+			if containsDefault {
+				break
+			}
+		}
+		if containsDefault {
+			cipherSuites = cipherSuites[:cut]
 			break
 		}
 	}
-	cipherSuites = cipherSuites[:cut]
 
 	compressionMethods := make([]uint8, len(m.compressionMethods))
-	perm = m.PRNG.Perm(len(m.compressionMethods))
+	perm := m.PRNG.Perm(len(m.compressionMethods))
 	for i, j := range perm {
 		compressionMethods[j] = m.compressionMethods[i]
 	}
-	cut = len(compressionMethods)
+	cut := len(compressionMethods)
 	for ; cut > 1; cut-- {
 		if !m.PRNG.FlipCoin() {
 			break

+ 29 - 10
vendor/github.com/Psiphon-Labs/qtls-go1-19/handshake_messages.go

@@ -336,24 +336,43 @@ func (m *clientHelloMsg) marshalRandomized() []byte {
 	// all slices before truncating.
 
 	cipherSuites := make([]uint16, len(m.cipherSuites))
-	perm := m.PRNG.Perm(len(m.cipherSuites))
-	for i, j := range perm {
-		cipherSuites[j] = m.cipherSuites[i]
-	}
-	cut := len(cipherSuites)
-	for ; cut > 1; cut-- {
-		if !m.PRNG.FlipCoin() {
+	for {
+		perm := m.PRNG.Perm(len(m.cipherSuites))
+		for i, j := range perm {
+			cipherSuites[j] = m.cipherSuites[i]
+		}
+		cut := len(cipherSuites)
+		for ; cut > 1; cut-- {
+			if !m.PRNG.FlipCoin() {
+				break
+			}
+		}
+
+		// Must contain at least one of defaultCipherSuitesTLS13.
+		containsDefault := false
+		for _, suite := range cipherSuites[:cut] {
+			for _, defaultSuite := range defaultCipherSuitesTLS13 {
+				if suite == defaultSuite {
+					containsDefault = true
+					break
+				}
+			}
+			if containsDefault {
+				break
+			}
+		}
+		if containsDefault {
+			cipherSuites = cipherSuites[:cut]
 			break
 		}
 	}
-	cipherSuites = cipherSuites[:cut]
 
 	compressionMethods := make([]uint8, len(m.compressionMethods))
-	perm = m.PRNG.Perm(len(m.compressionMethods))
+	perm := m.PRNG.Perm(len(m.compressionMethods))
 	for i, j := range perm {
 		compressionMethods[j] = m.compressionMethods[i]
 	}
-	cut = len(compressionMethods)
+	cut := len(compressionMethods)
 	for ; cut > 1; cut-- {
 		if !m.PRNG.FlipCoin() {
 			break

+ 0 - 28
vendor/github.com/zach-klippenstein/goregen/.gitignore

@@ -1,28 +0,0 @@
-# Compiled Object files, Static and Dynamic libs (Shared Objects)
-*.o
-*.a
-*.so
-
-# Folders
-_obj
-_test
-
-# Architecture specific extensions/prefixes
-*.[568vq]
-[568vq].out
-
-*.cgo1.go
-*.cgo2.c
-_cgo_defun.c
-_cgo_gotypes.go
-_cgo_export.*
-
-_testmain.go
-
-*.exe
-*.test
-*.prof
-
-# IntelliJ
-*.iml
-.idea/

+ 0 - 7
vendor/github.com/zach-klippenstein/goregen/.travis.yml

@@ -1,7 +0,0 @@
-language: go
-
-go:
-  - 1.5.1
-  - tip
-
-sudo: false

+ 0 - 7
vendor/github.com/zach-klippenstein/goregen/README.md

@@ -1,7 +0,0 @@
-#goregen [![GoDoc](https://godoc.org/github.com/zach-klippenstein/goregen?status.svg)](https://godoc.org/github.com/zach-klippenstein/goregen) [![Build Status](https://travis-ci.org/zach-klippenstein/goregen.svg?branch=master)](https://travis-ci.org/zach-klippenstein/goregen)
-
-A Golang library for generating random strings from regular expressions.
-
-Checkout https://goregen-demo.herokuapp.com for a live demo.
-
-See the [godoc](https://godoc.org/github.com/zach-klippenstein/goregen) for examples.

+ 2 - 9
vendor/modules.txt

@@ -13,10 +13,10 @@ github.com/Psiphon-Labs/bolt
 # github.com/Psiphon-Labs/goptlib v0.0.0-20200406165125-c0e32a7a3464
 ## explicit
 github.com/Psiphon-Labs/goptlib
-# github.com/Psiphon-Labs/qtls-go1-18 v0.0.0-20221014170512-3bdc7291c091
+# github.com/Psiphon-Labs/qtls-go1-18 v0.0.0-20230515185031-ae6632ab97ac
 ## explicit; go 1.18
 github.com/Psiphon-Labs/qtls-go1-18
-# github.com/Psiphon-Labs/qtls-go1-19 v0.0.0-20221014165721-ed28749db082
+# github.com/Psiphon-Labs/qtls-go1-19 v0.0.0-20230515185100-099bac32c181
 ## explicit; go 1.18
 github.com/Psiphon-Labs/qtls-go1-19
 # github.com/Psiphon-Labs/quic-go v0.0.0-20230215230806-9b1ddbf778cc
@@ -145,8 +145,6 @@ github.com/google/go-cmp/cmp/internal/value
 ## explicit; go 1.12
 github.com/google/gopacket
 github.com/google/gopacket/layers
-# github.com/google/gxui v0.0.0-20151028112939-f85e0a97b3a4
-## explicit
 # github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38
 ## explicit; go 1.14
 github.com/google/pprof/profile
@@ -354,8 +352,6 @@ github.com/sergeyfrolov/bsbuffer
 # github.com/sirupsen/logrus v1.9.0
 ## explicit; go 1.13
 github.com/sirupsen/logrus
-# github.com/smartystreets/goconvey v1.7.2
-## explicit; go 1.16
 # github.com/stretchr/testify v1.8.2
 ## explicit; go 1.13
 github.com/stretchr/testify/assert
@@ -378,9 +374,6 @@ github.com/wader/filtertransport
 # github.com/x448/float16 v0.8.4
 ## explicit; go 1.11
 github.com/x448/float16
-# github.com/zach-klippenstein/goregen v0.0.0-20160303162051-795b5e3961ea
-## explicit
-github.com/zach-klippenstein/goregen
 # gitlab.com/yawning/obfs4.git v0.0.0-20190120164510-816cff15f425 => ./replace/obfs4.git
 ## explicit; go 1.19
 gitlab.com/yawning/obfs4.git/common/csrand