Просмотр исходного кода

Merge pull request #153 from rod-hynes/master

Fix: Controller shutdown could delay/hang
Rod Hynes 10 лет назад
Родитель
Сommit
a417b59dbb
6 измененных файлов с 36 добавлено и 16 удалено
  1. 1 1
      psiphon/TCPConn.go
  2. 20 10
      psiphon/controller.go
  3. BIN
      psiphon/controller_test.config.enc
  4. 1 0
      psiphon/net.go
  5. 9 1
      psiphon/serverApi.go
  6. 5 4
      psiphon/tunnel.go

+ 1 - 1
psiphon/TCPConn.go

@@ -94,7 +94,7 @@ func interruptibleTCPDial(addr string, config *DialConfig) (*TCPConn, error) {
 	conn := &TCPConn{dialResult: make(chan error, 1)}
 	conn := &TCPConn{dialResult: make(chan error, 1)}
 
 
 	// Enable interruption
 	// Enable interruption
-	if !config.PendingConns.Add(conn) {
+	if config.PendingConns != nil && !config.PendingConns.Add(conn) {
 		return nil, ContextError(errors.New("pending connections already closed"))
 		return nil, ContextError(errors.New("pending connections already closed"))
 	}
 	}
 
 

+ 20 - 10
psiphon/controller.go

@@ -88,6 +88,8 @@ func NewController(config *Config) (controller *Controller, err error) {
 	// untunneledPendingConns may be used to interrupt the fetch remote server list
 	// untunneledPendingConns may be used to interrupt the fetch remote server list
 	// request and other untunneled connection establishments. BindToDevice may be
 	// request and other untunneled connection establishments. BindToDevice may be
 	// used to exclude these requests and connection from VPN routing.
 	// used to exclude these requests and connection from VPN routing.
+	// TODO: fetch remote server list and untunneled upgrade download should remove
+	// their completed conns from untunneledPendingConns.
 	untunneledPendingConns := new(Conns)
 	untunneledPendingConns := new(Conns)
 	untunneledDialConfig := &DialConfig{
 	untunneledDialConfig := &DialConfig{
 		UpstreamProxyUrl:              config.UpstreamProxyUrl,
 		UpstreamProxyUrl:              config.UpstreamProxyUrl,
@@ -201,20 +203,28 @@ func (controller *Controller) Run(shutdownBroadcast <-chan struct{}) {
 	}
 	}
 
 
 	close(controller.shutdownBroadcast)
 	close(controller.shutdownBroadcast)
+
+	// Interrupts and stops establish workers blocking on
+	// tunnel establishment network operations.
 	controller.establishPendingConns.CloseAll()
 	controller.establishPendingConns.CloseAll()
-	controller.runWaitGroup.Wait()
 
 
-	// Stops untunneled connections, including fetch remote server list,
-	// split tunnel port forwards and also untunneled final stats requests.
-	// Note: there's a circular dependency with runWaitGroup.Wait() and
-	// untunneledPendingConns.CloseAll(): runWaitGroup depends on tunnels
-	// stopping which depends, in orderly shutdown, on final status requests
-	// completing. So this pending conns cancel comes too late to interrupt
-	// final status requests in the orderly shutdown case -- which is desired
-	// since we give those a short timeout and would prefer to not interrupt
-	// them.
+	// Interrupts and stops workers blocking on untunneled
+	// network operations. This includes fetch remote server
+	// list and untunneled uprade download.
+	// Note: this doesn't interrupt the final, untunneled status
+	// requests started in operateTunnel after shutdownBroadcast.
+	// This is by design -- we want to give these requests a short
+	// timer period to succeed and deliver stats. These particular
+	// requests opt out of untunneledPendingConns and use the
+	// PSIPHON_API_SHUTDOWN_SERVER_TIMEOUT timeout (see
+	// doUntunneledStatusRequest).
 	controller.untunneledPendingConns.CloseAll()
 	controller.untunneledPendingConns.CloseAll()
 
 
+	// Now with all workers signaled to stop and with all
+	// blocking network operations interrupted, wait for
+	// all workers to terminate.
+	controller.runWaitGroup.Wait()
+
 	controller.splitTunnelClassifier.Shutdown()
 	controller.splitTunnelClassifier.Shutdown()
 
 
 	NoticeInfo("exiting controller")
 	NoticeInfo("exiting controller")

BIN
psiphon/controller_test.config.enc


+ 1 - 0
psiphon/net.go

@@ -59,6 +59,7 @@ type DialConfig struct {
 	// Dials may be interrupted using PendingConns.CloseAll(). Once instantiated,
 	// Dials may be interrupted using PendingConns.CloseAll(). Once instantiated,
 	// a conn is added to pendingConns before the network connect begins and
 	// a conn is added to pendingConns before the network connect begins and
 	// removed from pendingConns once the connect succeeds or fails.
 	// removed from pendingConns once the connect succeeds or fails.
+	// May be nil.
 	PendingConns *Conns
 	PendingConns *Conns
 
 
 	// BindToDevice parameters are used to exclude connections and
 	// BindToDevice parameters are used to exclude connections and

+ 9 - 1
psiphon/serverApi.go

@@ -422,13 +422,21 @@ func doUntunneledStatusRequest(
 		return ContextError(err)
 		return ContextError(err)
 	}
 	}
 
 
+	dialConfig := tunnel.untunneledDialConfig
+
 	timeout := PSIPHON_API_SERVER_TIMEOUT
 	timeout := PSIPHON_API_SERVER_TIMEOUT
 	if isShutdown {
 	if isShutdown {
 		timeout = PSIPHON_API_SHUTDOWN_SERVER_TIMEOUT
 		timeout = PSIPHON_API_SHUTDOWN_SERVER_TIMEOUT
+
+		// Use a copy of DialConfig without pendingConns. This ensures
+		// this request isn't interrupted/canceled. This measure should
+		// be used only with the very short PSIPHON_API_SHUTDOWN_SERVER_TIMEOUT.
+		dialConfig = new(DialConfig)
+		*dialConfig = *tunnel.untunneledDialConfig
 	}
 	}
 
 
 	httpClient, requestUrl, err := MakeUntunneledHttpsClient(
 	httpClient, requestUrl, err := MakeUntunneledHttpsClient(
-		tunnel.untunneledDialConfig,
+		dialConfig,
 		certificate,
 		certificate,
 		url,
 		url,
 		timeout)
 		timeout)

+ 5 - 4
psiphon/tunnel.go

@@ -869,10 +869,11 @@ func (tunnel *Tunnel) operateTunnel(tunnelOwner TunnelOwner) {
 	// everything must be wrapped up quickly. Also, we still have a working
 	// everything must be wrapped up quickly. Also, we still have a working
 	// tunnel. So we first attempt a tunneled status request (with a short
 	// tunnel. So we first attempt a tunneled status request (with a short
 	// timeout) and then attempt, synchronously -- otherwise the Contoller's
 	// timeout) and then attempt, synchronously -- otherwise the Contoller's
-	// untunneledPendingConns.CloseAll() will immediately interrupt untunneled
-	// requests -- untunneled requests (also with short timeouts).
-	// Note that this depends on the order of untunneledPendingConns.CloseAll()
-	// coming after tunnel.Close(): see note in Controller.Run().
+	// runWaitGroup.Wait() will return while a request is still in progress
+	// -- untunneled requests (also with short timeouts). Note that in this
+	// case the untunneled request will opt out of untunneledPendingConns so
+	// that it's not inadvertently canceled by the Controller shutdown
+	// sequence (see doUntunneledStatusRequest).
 	//
 	//
 	// If the tunnel has failed, the Controller may continue working. We want
 	// If the tunnel has failed, the Controller may continue working. We want
 	// to re-establish as soon as possible (so don't want to block on status
 	// to re-establish as soon as possible (so don't want to block on status