Browse Source

Workaround quic-go server ReadFrom issues

Rod Hynes 7 years ago
parent
commit
297430fdde

+ 26 - 6
psiphon/common/quic/obfuscator.go

@@ -146,6 +146,26 @@ func (conn *ObfuscatedPacketConn) Close() error {
 	return conn.PacketConn.Close()
 }
 
+type temporaryNetError struct {
+	err error
+}
+
+func newTemporaryNetError(err error) *temporaryNetError {
+	return &temporaryNetError{err: err}
+}
+
+func (e *temporaryNetError) Timeout() bool {
+	return false
+}
+
+func (e *temporaryNetError) Temporary() bool {
+	return true
+}
+
+func (e *temporaryNetError) Error() string {
+	return e.err.Error()
+}
+
 func (conn *ObfuscatedPacketConn) ReadFrom(p []byte) (int, net.Addr, error) {
 
 	n, addr, err := conn.PacketConn.ReadFrom(p)
@@ -200,8 +220,8 @@ func (conn *ObfuscatedPacketConn) ReadFrom(p []byte) (int, net.Addr, error) {
 			// avoids allocting a buffer.
 
 			if n < (NONCE_SIZE + 1) {
-				return n, addr, common.ContextError(
-					fmt.Errorf("unexpected obfuscated QUIC packet length: %d", n))
+				return n, addr, newTemporaryNetError(common.ContextError(
+					fmt.Errorf("unexpected obfuscated QUIC packet length: %d", n)))
 			}
 
 			cipher, err := chacha20.NewCipher(conn.obfuscationKey[:], p[0:NONCE_SIZE])
@@ -212,8 +232,8 @@ func (conn *ObfuscatedPacketConn) ReadFrom(p []byte) (int, net.Addr, error) {
 
 			paddingLen := int(p[NONCE_SIZE])
 			if paddingLen > MAX_PADDING_SIZE || paddingLen > n-(NONCE_SIZE+1) {
-				return n, addr, common.ContextError(
-					fmt.Errorf("unexpected padding length: %d, %d", paddingLen, n))
+				return n, addr, newTemporaryNetError(common.ContextError(
+					fmt.Errorf("unexpected padding length: %d, %d", paddingLen, n)))
 			}
 
 			n -= (NONCE_SIZE + 1) + paddingLen
@@ -262,8 +282,8 @@ func (conn *ObfuscatedPacketConn) WriteTo(p []byte, addr net.Addr) (int, error)
 		maxQUICPacketSize, maxObfuscatedPacketSize := getMaxPacketSizes(addr)
 
 		if n > maxQUICPacketSize {
-			return 0, common.ContextError(
-				fmt.Errorf("unexpected QUIC packet length: %d", n))
+			return 0, newTemporaryNetError(common.ContextError(
+				fmt.Errorf("unexpected QUIC packet length: %d", n)))
 		}
 
 		// Note: escape analysis showed a local array escaping to the heap,

+ 77 - 1
psiphon/common/quic/quic.go

@@ -68,10 +68,12 @@ var serverIdleTimeout = SERVER_IDLE_TIMEOUT
 // Listener is a net.Listener.
 type Listener struct {
 	quic_go.Listener
+	logger common.Logger
 }
 
 // Listen creates a new Listener.
 func Listen(
+	logger common.Logger,
 	address string,
 	obfuscationKey string) (*Listener, error) {
 
@@ -109,12 +111,17 @@ func Listen(
 		return nil, common.ContextError(err)
 	}
 
-	packetConn, err := NewObfuscatedPacketConnPacketConn(
+	var packetConn net.PacketConn
+	packetConn, err = NewObfuscatedPacketConnPacketConn(
 		udpConn, true, obfuscationKey)
 	if err != nil {
 		return nil, common.ContextError(err)
 	}
 
+	// This wrapping must be outermost to ensure that all
+	// ReadFrom errors are intercepted and logged.
+	packetConn = newLoggingPacketConn(logger, packetConn)
+
 	quicListener, err := quic_go.Listen(
 		packetConn, tlsConfig, quicConfig)
 	if err != nil {
@@ -412,3 +419,72 @@ func isErrorIndicatingClosed(err error) bool {
 	}
 	return false
 }
+
+// loggingPacketConn is a workaround for issues in the quic-go server (as of
+// revision ffdfa1).
+//
+// 1. quic-go will shutdown the QUIC server on any error returned from
+//    ReadFrom, even net.Error.Temporary() errors.
+//
+// 2. The server shutdown hangs due to a mutex deadlock:
+//
+//    sync.(*RWMutex).Lock+0x2c                                          /usr/local/go/src/sync/rwmutex.go:93
+//    [...]/lucas-clemente/quic-go.(*packetHandlerMap).CloseServer+0x41  [...]/lucas-clemente/quic-go/packet_handler_map.go:77
+//    [...]/lucas-clemente/quic-go.(*server).closeWithMutex+0x37         [...]/lucas-clemente/quic-go/server.go:314
+//    [...]/lucas-clemente/quic-go.(*server).closeWithError+0xa2         [...]/lucas-clemente/quic-go/server.go:336
+//    [...]/lucas-clemente/quic-go.(*packetHandlerMap).close+0x1da       [...]/lucas-clemente/quic-go/packet_handler_map.go:115
+//    [...]/lucas-clemente/quic-go.(*packetHandlerMap).listen+0x230      [...]/lucas-clemente/quic-go/packet_handler_map.go:130
+//
+//    packetHandlerMap.CloseServer is attempting to lock the same mutex that
+//    is already locked in packetHandlerMap.close, which deadlocks. As
+//    packetHandlerMap and its mutex are used by all client sessions, this
+//    effectively hangs the entire server.
+//
+// loggingPacketConn PacketConn ReadFrom errors and returns any usable values
+// or loops and calls ReadFrom again. In practise, due to the nature of UDP
+// sockets, ReadFrom errors are exceptional as they will mosyt likely not
+// occur due to network transmission failures. ObfuscatedPacketConn returns
+// errors that could be due to network transmission failures that corrupt
+// packets; these are marked as net.Error.Temporary() and loggingPacketConn
+// logs these at debug level.
+//
+// loggingPacketConn assumes quic-go revision ffdfa1 behavior and will break
+// other behavior, such as setting deadlines and expecting net.Error.Timeout()
+// errors from ReadFrom.
+type loggingPacketConn struct {
+	net.PacketConn
+	logger common.Logger
+}
+
+func newLoggingPacketConn(
+	logger common.Logger,
+	packetConn net.PacketConn) *loggingPacketConn {
+
+	return &loggingPacketConn{
+		PacketConn: packetConn,
+		logger:     logger,
+	}
+}
+
+func (conn *loggingPacketConn) ReadFrom(p []byte) (int, net.Addr, error) {
+
+	for {
+		n, addr, err := conn.PacketConn.ReadFrom(p)
+
+		if err != nil && conn.logger != nil {
+			message := "ReadFrom failed"
+			if e, ok := err.(net.Error); ok && e.Temporary() {
+				conn.logger.WithContextFields(
+					common.LogFields{"error": err}).Debug(message)
+			} else {
+				conn.logger.WithContextFields(
+					common.LogFields{"error": err}).Warning(message)
+			}
+		}
+		err = nil
+
+		if n > 0 || addr != nil {
+			return n, addr, nil
+		}
+	}
+}

+ 1 - 1
psiphon/common/quic/quic_test.go

@@ -62,7 +62,7 @@ func runQUIC(t *testing.T, negotiateQUICVersion string) {
 		t.Fatalf("MakeSecureRandomStringHex failed: %s", err)
 	}
 
-	listener, err := Listen("127.0.0.1:0", obfuscationKey)
+	listener, err := Listen(nil, "127.0.0.1:0", obfuscationKey)
 	if err != nil {
 		t.Fatalf("Listen failed: %s", err)
 	}

+ 1 - 0
psiphon/server/tunnelServer.go

@@ -145,6 +145,7 @@ func (server *TunnelServer) Run() error {
 		if protocol.TunnelProtocolUsesQUIC(tunnelProtocol) {
 
 			listener, err = quic.Listen(
+				CommonLogger(log),
 				localAddress,
 				support.Config.ObfuscatedSSHKey)