tunnelServer.go 71 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186
  1. /*
  2. * Copyright (c) 2016, Psiphon Inc.
  3. * All rights reserved.
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. *
  18. */
  19. package server
  20. import (
  21. "context"
  22. "crypto/subtle"
  23. "encoding/json"
  24. "errors"
  25. "fmt"
  26. "io"
  27. "net"
  28. "strconv"
  29. "sync"
  30. "sync/atomic"
  31. "syscall"
  32. "time"
  33. "github.com/Psiphon-Inc/crypto/ssh"
  34. cache "github.com/Psiphon-Inc/go-cache"
  35. "github.com/Psiphon-Inc/goarista/monotime"
  36. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
  37. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/osl"
  38. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/protocol"
  39. )
  40. const (
  41. SSH_AUTH_LOG_PERIOD = 30 * time.Minute
  42. SSH_HANDSHAKE_TIMEOUT = 30 * time.Second
  43. SSH_CONNECTION_READ_DEADLINE = 5 * time.Minute
  44. SSH_TCP_PORT_FORWARD_COPY_BUFFER_SIZE = 8192
  45. SSH_TCP_PORT_FORWARD_QUEUE_SIZE = 1024
  46. SSH_SEND_OSL_INITIAL_RETRY_DELAY = 30 * time.Second
  47. SSH_SEND_OSL_RETRY_FACTOR = 2
  48. OSL_SESSION_CACHE_TTL = 5 * time.Minute
  49. )
  50. // TunnelServer is the main server that accepts Psiphon client
  51. // connections, via various obfuscation protocols, and provides
  52. // port forwarding (TCP and UDP) services to the Psiphon client.
  53. // At its core, TunnelServer is an SSH server. SSH is the base
  54. // protocol that provides port forward multiplexing, and transport
  55. // security. Layered on top of SSH, optionally, is Obfuscated SSH
  56. // and meek protocols, which provide further circumvention
  57. // capabilities.
  58. type TunnelServer struct {
  59. runWaitGroup *sync.WaitGroup
  60. listenerError chan error
  61. shutdownBroadcast <-chan struct{}
  62. sshServer *sshServer
  63. }
  64. // NewTunnelServer initializes a new tunnel server.
  65. func NewTunnelServer(
  66. support *SupportServices,
  67. shutdownBroadcast <-chan struct{}) (*TunnelServer, error) {
  68. sshServer, err := newSSHServer(support, shutdownBroadcast)
  69. if err != nil {
  70. return nil, common.ContextError(err)
  71. }
  72. return &TunnelServer{
  73. runWaitGroup: new(sync.WaitGroup),
  74. listenerError: make(chan error),
  75. shutdownBroadcast: shutdownBroadcast,
  76. sshServer: sshServer,
  77. }, nil
  78. }
  79. // Run runs the tunnel server; this function blocks while running a selection of
  80. // listeners that handle connection using various obfuscation protocols.
  81. //
  82. // Run listens on each designated tunnel port and spawns new goroutines to handle
  83. // each client connection. It halts when shutdownBroadcast is signaled. A list of active
  84. // clients is maintained, and when halting all clients are cleanly shutdown.
  85. //
  86. // Each client goroutine handles its own obfuscation (optional), SSH handshake, SSH
  87. // authentication, and then looping on client new channel requests. "direct-tcpip"
  88. // channels, dynamic port fowards, are supported. When the UDPInterceptUdpgwServerAddress
  89. // config parameter is configured, UDP port forwards over a TCP stream, following
  90. // the udpgw protocol, are handled.
  91. //
  92. // A new goroutine is spawned to handle each port forward for each client. Each port
  93. // forward tracks its bytes transferred. Overall per-client stats for connection duration,
  94. // GeoIP, number of port forwards, and bytes transferred are tracked and logged when the
  95. // client shuts down.
  96. //
  97. // Note: client handler goroutines may still be shutting down after Run() returns. See
  98. // comment in sshClient.stop(). TODO: fully synchronized shutdown.
  99. func (server *TunnelServer) Run() error {
  100. type sshListener struct {
  101. net.Listener
  102. localAddress string
  103. tunnelProtocol string
  104. }
  105. // TODO: should TunnelServer hold its own support pointer?
  106. support := server.sshServer.support
  107. // First bind all listeners; once all are successful,
  108. // start accepting connections on each.
  109. var listeners []*sshListener
  110. for tunnelProtocol, listenPort := range support.Config.TunnelProtocolPorts {
  111. localAddress := fmt.Sprintf(
  112. "%s:%d", support.Config.ServerIPAddress, listenPort)
  113. listener, err := net.Listen("tcp", localAddress)
  114. if err != nil {
  115. for _, existingListener := range listeners {
  116. existingListener.Listener.Close()
  117. }
  118. return common.ContextError(err)
  119. }
  120. log.WithContextFields(
  121. LogFields{
  122. "localAddress": localAddress,
  123. "tunnelProtocol": tunnelProtocol,
  124. }).Info("listening")
  125. listeners = append(
  126. listeners,
  127. &sshListener{
  128. Listener: listener,
  129. localAddress: localAddress,
  130. tunnelProtocol: tunnelProtocol,
  131. })
  132. }
  133. for _, listener := range listeners {
  134. server.runWaitGroup.Add(1)
  135. go func(listener *sshListener) {
  136. defer server.runWaitGroup.Done()
  137. log.WithContextFields(
  138. LogFields{
  139. "localAddress": listener.localAddress,
  140. "tunnelProtocol": listener.tunnelProtocol,
  141. }).Info("running")
  142. server.sshServer.runListener(
  143. listener.Listener,
  144. server.listenerError,
  145. listener.tunnelProtocol)
  146. log.WithContextFields(
  147. LogFields{
  148. "localAddress": listener.localAddress,
  149. "tunnelProtocol": listener.tunnelProtocol,
  150. }).Info("stopped")
  151. }(listener)
  152. }
  153. var err error
  154. select {
  155. case <-server.shutdownBroadcast:
  156. case err = <-server.listenerError:
  157. }
  158. for _, listener := range listeners {
  159. listener.Close()
  160. }
  161. server.sshServer.stopClients()
  162. server.runWaitGroup.Wait()
  163. log.WithContext().Info("stopped")
  164. return err
  165. }
  166. // GetLoadStats returns load stats for the tunnel server. The stats are
  167. // broken down by protocol ("SSH", "OSSH", etc.) and type. Types of stats
  168. // include current connected client count, total number of current port
  169. // forwards.
  170. func (server *TunnelServer) GetLoadStats() (ProtocolStats, RegionStats) {
  171. return server.sshServer.getLoadStats()
  172. }
  173. // ResetAllClientTrafficRules resets all established client traffic rules
  174. // to use the latest config and client properties. Any existing traffic
  175. // rule state is lost, including throttling state.
  176. func (server *TunnelServer) ResetAllClientTrafficRules() {
  177. server.sshServer.resetAllClientTrafficRules()
  178. }
  179. // ResetAllClientOSLConfigs resets all established client OSL state to use
  180. // the latest OSL config. Any existing OSL state is lost, including partial
  181. // progress towards SLOKs.
  182. func (server *TunnelServer) ResetAllClientOSLConfigs() {
  183. server.sshServer.resetAllClientOSLConfigs()
  184. }
  185. // SetClientHandshakeState sets the handshake state -- that it completed and
  186. // what paramaters were passed -- in sshClient. This state is used for allowing
  187. // port forwards and for future traffic rule selection. SetClientHandshakeState
  188. // also triggers an immediate traffic rule re-selection, as the rules selected
  189. // upon tunnel establishment may no longer apply now that handshake values are
  190. // set.
  191. func (server *TunnelServer) SetClientHandshakeState(
  192. sessionID string, state handshakeState) error {
  193. return server.sshServer.setClientHandshakeState(sessionID, state)
  194. }
  195. // GetClientHandshaked indicates whether the client has completed a handshake
  196. // and whether its traffic rules are immediately exhausted.
  197. func (server *TunnelServer) GetClientHandshaked(
  198. sessionID string) (bool, bool, error) {
  199. return server.sshServer.getClientHandshaked(sessionID)
  200. }
  201. // SetEstablishTunnels sets whether new tunnels may be established or not.
  202. // When not establishing, incoming connections are immediately closed.
  203. func (server *TunnelServer) SetEstablishTunnels(establish bool) {
  204. server.sshServer.setEstablishTunnels(establish)
  205. }
  206. // GetEstablishTunnels returns whether new tunnels may be established or not.
  207. func (server *TunnelServer) GetEstablishTunnels() bool {
  208. return server.sshServer.getEstablishTunnels()
  209. }
  210. type sshServer struct {
  211. // Note: 64-bit ints used with atomic operations are placed
  212. // at the start of struct to ensure 64-bit alignment.
  213. // (https://golang.org/pkg/sync/atomic/#pkg-note-BUG)
  214. lastAuthLog int64
  215. authFailedCount int64
  216. support *SupportServices
  217. establishTunnels int32
  218. shutdownBroadcast <-chan struct{}
  219. sshHostKey ssh.Signer
  220. clientsMutex sync.Mutex
  221. stoppingClients bool
  222. acceptedClientCounts map[string]map[string]int64
  223. clients map[string]*sshClient
  224. oslSessionCacheMutex sync.Mutex
  225. oslSessionCache *cache.Cache
  226. }
  227. func newSSHServer(
  228. support *SupportServices,
  229. shutdownBroadcast <-chan struct{}) (*sshServer, error) {
  230. privateKey, err := ssh.ParseRawPrivateKey([]byte(support.Config.SSHPrivateKey))
  231. if err != nil {
  232. return nil, common.ContextError(err)
  233. }
  234. // TODO: use cert (ssh.NewCertSigner) for anti-fingerprint?
  235. signer, err := ssh.NewSignerFromKey(privateKey)
  236. if err != nil {
  237. return nil, common.ContextError(err)
  238. }
  239. // The OSL session cache temporarily retains OSL seed state
  240. // progress for disconnected clients. This enables clients
  241. // that disconnect and immediately reconnect to the same
  242. // server to resume their OSL progress. Cached progress
  243. // is referenced by session ID and is retained for
  244. // OSL_SESSION_CACHE_TTL after disconnect.
  245. //
  246. // Note: session IDs are assumed to be unpredictable. If a
  247. // rogue client could guess the session ID of another client,
  248. // it could resume its OSL progress and, if the OSL config
  249. // were known, infer some activity.
  250. oslSessionCache := cache.New(OSL_SESSION_CACHE_TTL, 1*time.Minute)
  251. return &sshServer{
  252. support: support,
  253. establishTunnels: 1,
  254. shutdownBroadcast: shutdownBroadcast,
  255. sshHostKey: signer,
  256. acceptedClientCounts: make(map[string]map[string]int64),
  257. clients: make(map[string]*sshClient),
  258. oslSessionCache: oslSessionCache,
  259. }, nil
  260. }
  261. func (sshServer *sshServer) setEstablishTunnels(establish bool) {
  262. // Do nothing when the setting is already correct. This avoids
  263. // spurious log messages when setEstablishTunnels is called
  264. // periodically with the same setting.
  265. if establish == sshServer.getEstablishTunnels() {
  266. return
  267. }
  268. establishFlag := int32(1)
  269. if !establish {
  270. establishFlag = 0
  271. }
  272. atomic.StoreInt32(&sshServer.establishTunnels, establishFlag)
  273. log.WithContextFields(
  274. LogFields{"establish": establish}).Info("establishing tunnels")
  275. }
  276. func (sshServer *sshServer) getEstablishTunnels() bool {
  277. return atomic.LoadInt32(&sshServer.establishTunnels) == 1
  278. }
  279. // runListener is intended to run an a goroutine; it blocks
  280. // running a particular listener. If an unrecoverable error
  281. // occurs, it will send the error to the listenerError channel.
  282. func (sshServer *sshServer) runListener(
  283. listener net.Listener,
  284. listenerError chan<- error,
  285. listenerTunnelProtocol string) {
  286. runningProtocols := make([]string, 0)
  287. for tunnelProtocol, _ := range sshServer.support.Config.TunnelProtocolPorts {
  288. runningProtocols = append(runningProtocols, tunnelProtocol)
  289. }
  290. handleClient := func(clientTunnelProtocol string, clientConn net.Conn) {
  291. // Note: establish tunnel limiter cannot simply stop TCP
  292. // listeners in all cases (e.g., meek) since SSH tunnel can
  293. // span multiple TCP connections.
  294. if !sshServer.getEstablishTunnels() {
  295. log.WithContext().Debug("not establishing tunnels")
  296. clientConn.Close()
  297. return
  298. }
  299. // The tunnelProtocol passed to handleClient is used for stats,
  300. // throttling, etc. When the tunnel protocol can be determined
  301. // unambiguously from the listening port, use that protocol and
  302. // don't use any client-declared value. Only use the client's
  303. // value, if present, in special cases where the listenting port
  304. // cannot distinguish the protocol.
  305. tunnelProtocol := listenerTunnelProtocol
  306. if clientTunnelProtocol != "" &&
  307. protocol.UseClientTunnelProtocol(
  308. clientTunnelProtocol, runningProtocols) {
  309. tunnelProtocol = clientTunnelProtocol
  310. }
  311. // process each client connection concurrently
  312. go sshServer.handleClient(tunnelProtocol, clientConn)
  313. }
  314. // Note: when exiting due to a unrecoverable error, be sure
  315. // to try to send the error to listenerError so that the outer
  316. // TunnelServer.Run will properly shut down instead of remaining
  317. // running.
  318. if protocol.TunnelProtocolUsesMeekHTTP(listenerTunnelProtocol) ||
  319. protocol.TunnelProtocolUsesMeekHTTPS(listenerTunnelProtocol) {
  320. meekServer, err := NewMeekServer(
  321. sshServer.support,
  322. listener,
  323. protocol.TunnelProtocolUsesMeekHTTPS(listenerTunnelProtocol),
  324. protocol.TunnelProtocolUsesObfuscatedSessionTickets(listenerTunnelProtocol),
  325. handleClient,
  326. sshServer.shutdownBroadcast)
  327. if err != nil {
  328. select {
  329. case listenerError <- common.ContextError(err):
  330. default:
  331. }
  332. return
  333. }
  334. meekServer.Run()
  335. } else {
  336. for {
  337. conn, err := listener.Accept()
  338. select {
  339. case <-sshServer.shutdownBroadcast:
  340. if err == nil {
  341. conn.Close()
  342. }
  343. return
  344. default:
  345. }
  346. if err != nil {
  347. if e, ok := err.(net.Error); ok && e.Temporary() {
  348. log.WithContextFields(LogFields{"error": err}).Error("accept failed")
  349. // Temporary error, keep running
  350. continue
  351. }
  352. select {
  353. case listenerError <- common.ContextError(err):
  354. default:
  355. }
  356. return
  357. }
  358. handleClient("", conn)
  359. }
  360. }
  361. }
  362. // An accepted client has completed a direct TCP or meek connection and has a net.Conn. Registration
  363. // is for tracking the number of connections.
  364. func (sshServer *sshServer) registerAcceptedClient(tunnelProtocol, region string) {
  365. sshServer.clientsMutex.Lock()
  366. defer sshServer.clientsMutex.Unlock()
  367. if sshServer.acceptedClientCounts[tunnelProtocol] == nil {
  368. sshServer.acceptedClientCounts[tunnelProtocol] = make(map[string]int64)
  369. }
  370. sshServer.acceptedClientCounts[tunnelProtocol][region] += 1
  371. }
  372. func (sshServer *sshServer) unregisterAcceptedClient(tunnelProtocol, region string) {
  373. sshServer.clientsMutex.Lock()
  374. defer sshServer.clientsMutex.Unlock()
  375. sshServer.acceptedClientCounts[tunnelProtocol][region] -= 1
  376. }
  377. // An established client has completed its SSH handshake and has a ssh.Conn. Registration is
  378. // for tracking the number of fully established clients and for maintaining a list of running
  379. // clients (for stopping at shutdown time).
  380. func (sshServer *sshServer) registerEstablishedClient(client *sshClient) bool {
  381. sshServer.clientsMutex.Lock()
  382. if sshServer.stoppingClients {
  383. sshServer.clientsMutex.Unlock()
  384. return false
  385. }
  386. // In the case of a duplicate client sessionID, the previous client is closed.
  387. // - Well-behaved clients generate pick a random sessionID that should be
  388. // unique (won't accidentally conflict) and hard to guess (can't be targetted
  389. // by a malicious client).
  390. // - Clients reuse the same sessionID when a tunnel is unexpectedly disconnected
  391. // and resestablished. In this case, when the same server is selected, this logic
  392. // will be hit; closing the old, dangling client is desirable.
  393. // - Multi-tunnel clients should not normally use one server for multiple tunnels.
  394. existingClient := sshServer.clients[client.sessionID]
  395. sshServer.clients[client.sessionID] = client
  396. sshServer.clientsMutex.Unlock()
  397. // Call stop() outside the mutex to avoid deadlock.
  398. if existingClient != nil {
  399. existingClient.stop()
  400. log.WithContext().Info(
  401. "stopped existing client with duplicate session ID")
  402. }
  403. return true
  404. }
  405. func (sshServer *sshServer) unregisterEstablishedClient(client *sshClient) {
  406. sshServer.clientsMutex.Lock()
  407. registeredClient := sshServer.clients[client.sessionID]
  408. // registeredClient will differ from client when client
  409. // is the existingClient terminated in registerEstablishedClient.
  410. // In that case, registeredClient remains connected, and
  411. // the sshServer.clients entry should be retained.
  412. if registeredClient == client {
  413. delete(sshServer.clients, client.sessionID)
  414. }
  415. sshServer.clientsMutex.Unlock()
  416. // Call stop() outside the mutex to avoid deadlock.
  417. client.stop()
  418. }
  419. type ProtocolStats map[string]map[string]int64
  420. type RegionStats map[string]map[string]map[string]int64
  421. func (sshServer *sshServer) getLoadStats() (ProtocolStats, RegionStats) {
  422. sshServer.clientsMutex.Lock()
  423. defer sshServer.clientsMutex.Unlock()
  424. // Explicitly populate with zeros to ensure 0 counts in log messages
  425. zeroStats := func() map[string]int64 {
  426. stats := make(map[string]int64)
  427. stats["accepted_clients"] = 0
  428. stats["established_clients"] = 0
  429. stats["dialing_tcp_port_forwards"] = 0
  430. stats["tcp_port_forwards"] = 0
  431. stats["total_tcp_port_forwards"] = 0
  432. stats["udp_port_forwards"] = 0
  433. stats["total_udp_port_forwards"] = 0
  434. stats["tcp_port_forward_dialed_count"] = 0
  435. stats["tcp_port_forward_dialed_duration"] = 0
  436. stats["tcp_port_forward_failed_count"] = 0
  437. stats["tcp_port_forward_failed_duration"] = 0
  438. stats["tcp_port_forward_rejected_dialing_limit_count"] = 0
  439. return stats
  440. }
  441. zeroProtocolStats := func() map[string]map[string]int64 {
  442. stats := make(map[string]map[string]int64)
  443. stats["ALL"] = zeroStats()
  444. for tunnelProtocol, _ := range sshServer.support.Config.TunnelProtocolPorts {
  445. stats[tunnelProtocol] = zeroStats()
  446. }
  447. return stats
  448. }
  449. // [<protocol or ALL>][<stat name>] -> count
  450. protocolStats := zeroProtocolStats()
  451. // [<region][<protocol or ALL>][<stat name>] -> count
  452. regionStats := make(RegionStats)
  453. // Note: as currently tracked/counted, each established client is also an accepted client
  454. for tunnelProtocol, regionAcceptedClientCounts := range sshServer.acceptedClientCounts {
  455. for region, acceptedClientCount := range regionAcceptedClientCounts {
  456. if acceptedClientCount > 0 {
  457. if regionStats[region] == nil {
  458. regionStats[region] = zeroProtocolStats()
  459. }
  460. protocolStats["ALL"]["accepted_clients"] += acceptedClientCount
  461. protocolStats[tunnelProtocol]["accepted_clients"] += acceptedClientCount
  462. regionStats[region]["ALL"]["accepted_clients"] += acceptedClientCount
  463. regionStats[region][tunnelProtocol]["accepted_clients"] += acceptedClientCount
  464. }
  465. }
  466. }
  467. for _, client := range sshServer.clients {
  468. client.Lock()
  469. tunnelProtocol := client.tunnelProtocol
  470. region := client.geoIPData.Country
  471. if regionStats[region] == nil {
  472. regionStats[region] = zeroProtocolStats()
  473. }
  474. stats := []map[string]int64{
  475. protocolStats["ALL"],
  476. protocolStats[tunnelProtocol],
  477. regionStats[region]["ALL"],
  478. regionStats[region][tunnelProtocol]}
  479. for _, stat := range stats {
  480. stat["established_clients"] += 1
  481. // Note: can't sum trafficState.peakConcurrentPortForwardCount to get a global peak
  482. stat["dialing_tcp_port_forwards"] += client.tcpTrafficState.concurrentDialingPortForwardCount
  483. stat["tcp_port_forwards"] += client.tcpTrafficState.concurrentPortForwardCount
  484. stat["total_tcp_port_forwards"] += client.tcpTrafficState.totalPortForwardCount
  485. // client.udpTrafficState.concurrentDialingPortForwardCount isn't meaningful
  486. stat["udp_port_forwards"] += client.udpTrafficState.concurrentPortForwardCount
  487. stat["total_udp_port_forwards"] += client.udpTrafficState.totalPortForwardCount
  488. stat["tcp_port_forward_dialed_count"] += client.qualityMetrics.tcpPortForwardDialedCount
  489. stat["tcp_port_forward_dialed_duration"] +=
  490. int64(client.qualityMetrics.tcpPortForwardDialedDuration / time.Millisecond)
  491. stat["tcp_port_forward_failed_count"] += client.qualityMetrics.tcpPortForwardFailedCount
  492. stat["tcp_port_forward_failed_duration"] +=
  493. int64(client.qualityMetrics.tcpPortForwardFailedDuration / time.Millisecond)
  494. stat["tcp_port_forward_rejected_dialing_limit_count"] +=
  495. client.qualityMetrics.tcpPortForwardRejectedDialingLimitCount
  496. }
  497. client.qualityMetrics.tcpPortForwardDialedCount = 0
  498. client.qualityMetrics.tcpPortForwardDialedDuration = 0
  499. client.qualityMetrics.tcpPortForwardFailedCount = 0
  500. client.qualityMetrics.tcpPortForwardFailedDuration = 0
  501. client.qualityMetrics.tcpPortForwardRejectedDialingLimitCount = 0
  502. client.Unlock()
  503. }
  504. return protocolStats, regionStats
  505. }
  506. func (sshServer *sshServer) resetAllClientTrafficRules() {
  507. sshServer.clientsMutex.Lock()
  508. clients := make(map[string]*sshClient)
  509. for sessionID, client := range sshServer.clients {
  510. clients[sessionID] = client
  511. }
  512. sshServer.clientsMutex.Unlock()
  513. for _, client := range clients {
  514. client.setTrafficRules()
  515. }
  516. }
  517. func (sshServer *sshServer) resetAllClientOSLConfigs() {
  518. // Flush cached seed state. This has the same effect
  519. // and same limitations as calling setOSLConfig for
  520. // currently connected clients -- all progress is lost.
  521. sshServer.oslSessionCacheMutex.Lock()
  522. sshServer.oslSessionCache.Flush()
  523. sshServer.oslSessionCacheMutex.Unlock()
  524. sshServer.clientsMutex.Lock()
  525. clients := make(map[string]*sshClient)
  526. for sessionID, client := range sshServer.clients {
  527. clients[sessionID] = client
  528. }
  529. sshServer.clientsMutex.Unlock()
  530. for _, client := range clients {
  531. client.setOSLConfig()
  532. }
  533. }
  534. func (sshServer *sshServer) setClientHandshakeState(
  535. sessionID string, state handshakeState) error {
  536. sshServer.clientsMutex.Lock()
  537. client := sshServer.clients[sessionID]
  538. sshServer.clientsMutex.Unlock()
  539. if client == nil {
  540. return common.ContextError(errors.New("unknown session ID"))
  541. }
  542. err := client.setHandshakeState(state)
  543. if err != nil {
  544. return common.ContextError(err)
  545. }
  546. return nil
  547. }
  548. func (sshServer *sshServer) getClientHandshaked(
  549. sessionID string) (bool, bool, error) {
  550. sshServer.clientsMutex.Lock()
  551. client := sshServer.clients[sessionID]
  552. sshServer.clientsMutex.Unlock()
  553. if client == nil {
  554. return false, false, common.ContextError(errors.New("unknown session ID"))
  555. }
  556. completed, exhausted := client.getHandshaked()
  557. return completed, exhausted, nil
  558. }
  559. func (sshServer *sshServer) stopClients() {
  560. sshServer.clientsMutex.Lock()
  561. sshServer.stoppingClients = true
  562. clients := sshServer.clients
  563. sshServer.clients = make(map[string]*sshClient)
  564. sshServer.clientsMutex.Unlock()
  565. for _, client := range clients {
  566. client.stop()
  567. }
  568. }
  569. func (sshServer *sshServer) handleClient(tunnelProtocol string, clientConn net.Conn) {
  570. geoIPData := sshServer.support.GeoIPService.Lookup(
  571. common.IPAddressFromAddr(clientConn.RemoteAddr()))
  572. sshServer.registerAcceptedClient(tunnelProtocol, geoIPData.Country)
  573. defer sshServer.unregisterAcceptedClient(tunnelProtocol, geoIPData.Country)
  574. sshClient := newSshClient(sshServer, tunnelProtocol, geoIPData)
  575. sshClient.run(clientConn)
  576. }
  577. func (sshServer *sshServer) monitorPortForwardDialError(err error) {
  578. // "err" is the error returned from a failed TCP or UDP port
  579. // forward dial. Certain system error codes indicate low resource
  580. // conditions: insufficient file descriptors, ephemeral ports, or
  581. // memory. For these cases, log an alert.
  582. // TODO: also temporarily suspend new clients
  583. // Note: don't log net.OpError.Error() as the full error string
  584. // may contain client destination addresses.
  585. opErr, ok := err.(*net.OpError)
  586. if ok {
  587. if opErr.Err == syscall.EADDRNOTAVAIL ||
  588. opErr.Err == syscall.EAGAIN ||
  589. opErr.Err == syscall.ENOMEM ||
  590. opErr.Err == syscall.EMFILE ||
  591. opErr.Err == syscall.ENFILE {
  592. log.WithContextFields(
  593. LogFields{"error": opErr.Err}).Error(
  594. "port forward dial failed due to unavailable resource")
  595. }
  596. }
  597. }
  598. type sshClient struct {
  599. sync.Mutex
  600. sshServer *sshServer
  601. tunnelProtocol string
  602. sshConn ssh.Conn
  603. activityConn *common.ActivityMonitoredConn
  604. throttledConn *common.ThrottledConn
  605. geoIPData GeoIPData
  606. sessionID string
  607. supportsServerRequests bool
  608. handshakeState handshakeState
  609. udpChannel ssh.Channel
  610. trafficRules TrafficRules
  611. tcpTrafficState trafficState
  612. udpTrafficState trafficState
  613. qualityMetrics qualityMetrics
  614. tcpPortForwardLRU *common.LRUConns
  615. oslClientSeedState *osl.ClientSeedState
  616. signalIssueSLOKs chan struct{}
  617. runContext context.Context
  618. stopRunning context.CancelFunc
  619. tcpPortForwardDialingAvailableSignal context.CancelFunc
  620. }
  621. type trafficState struct {
  622. bytesUp int64
  623. bytesDown int64
  624. concurrentDialingPortForwardCount int64
  625. peakConcurrentDialingPortForwardCount int64
  626. concurrentPortForwardCount int64
  627. peakConcurrentPortForwardCount int64
  628. totalPortForwardCount int64
  629. availablePortForwardCond *sync.Cond
  630. }
  631. // qualityMetrics records upstream TCP dial attempts and
  632. // elapsed time. Elapsed time includes the full TCP handshake
  633. // and, in aggregate, is a measure of the quality of the
  634. // upstream link. These stats are recorded by each sshClient
  635. // and then reported and reset in sshServer.getLoadStats().
  636. type qualityMetrics struct {
  637. tcpPortForwardDialedCount int64
  638. tcpPortForwardDialedDuration time.Duration
  639. tcpPortForwardFailedCount int64
  640. tcpPortForwardFailedDuration time.Duration
  641. tcpPortForwardRejectedDialingLimitCount int64
  642. }
  643. type handshakeState struct {
  644. completed bool
  645. apiProtocol string
  646. apiParams requestJSONObject
  647. }
  648. func newSshClient(
  649. sshServer *sshServer, tunnelProtocol string, geoIPData GeoIPData) *sshClient {
  650. runContext, stopRunning := context.WithCancel(context.Background())
  651. client := &sshClient{
  652. sshServer: sshServer,
  653. tunnelProtocol: tunnelProtocol,
  654. geoIPData: geoIPData,
  655. tcpPortForwardLRU: common.NewLRUConns(),
  656. signalIssueSLOKs: make(chan struct{}, 1),
  657. runContext: runContext,
  658. stopRunning: stopRunning,
  659. }
  660. client.tcpTrafficState.availablePortForwardCond = sync.NewCond(new(sync.Mutex))
  661. client.udpTrafficState.availablePortForwardCond = sync.NewCond(new(sync.Mutex))
  662. return client
  663. }
  664. func (sshClient *sshClient) run(clientConn net.Conn) {
  665. // Some conns report additional metrics
  666. metricsSource, isMetricsSource := clientConn.(MetricsSource)
  667. // Set initial traffic rules, pre-handshake, based on currently known info.
  668. sshClient.setTrafficRules()
  669. // Wrap the base client connection with an ActivityMonitoredConn which will
  670. // terminate the connection if no data is received before the deadline. This
  671. // timeout is in effect for the entire duration of the SSH connection. Clients
  672. // must actively use the connection or send SSH keep alive requests to keep
  673. // the connection active. Writes are not considered reliable activity indicators
  674. // due to buffering.
  675. activityConn, err := common.NewActivityMonitoredConn(
  676. clientConn,
  677. SSH_CONNECTION_READ_DEADLINE,
  678. false,
  679. nil,
  680. nil)
  681. if err != nil {
  682. clientConn.Close()
  683. log.WithContextFields(LogFields{"error": err}).Error("NewActivityMonitoredConn failed")
  684. return
  685. }
  686. clientConn = activityConn
  687. // Further wrap the connection in a rate limiting ThrottledConn.
  688. throttledConn := common.NewThrottledConn(clientConn, sshClient.rateLimits())
  689. clientConn = throttledConn
  690. // Run the initial [obfuscated] SSH handshake in a goroutine so we can both
  691. // respect shutdownBroadcast and implement a specific handshake timeout.
  692. // The timeout is to reclaim network resources in case the handshake takes
  693. // too long.
  694. type sshNewServerConnResult struct {
  695. conn net.Conn
  696. sshConn *ssh.ServerConn
  697. channels <-chan ssh.NewChannel
  698. requests <-chan *ssh.Request
  699. err error
  700. }
  701. resultChannel := make(chan *sshNewServerConnResult, 2)
  702. if SSH_HANDSHAKE_TIMEOUT > 0 {
  703. time.AfterFunc(time.Duration(SSH_HANDSHAKE_TIMEOUT), func() {
  704. resultChannel <- &sshNewServerConnResult{err: errors.New("ssh handshake timeout")}
  705. })
  706. }
  707. go func(conn net.Conn) {
  708. sshServerConfig := &ssh.ServerConfig{
  709. PasswordCallback: sshClient.passwordCallback,
  710. AuthLogCallback: sshClient.authLogCallback,
  711. ServerVersion: sshClient.sshServer.support.Config.SSHServerVersion,
  712. }
  713. sshServerConfig.AddHostKey(sshClient.sshServer.sshHostKey)
  714. result := &sshNewServerConnResult{}
  715. // Wrap the connection in an SSH deobfuscator when required.
  716. if protocol.TunnelProtocolUsesObfuscatedSSH(sshClient.tunnelProtocol) {
  717. // Note: NewObfuscatedSshConn blocks on network I/O
  718. // TODO: ensure this won't block shutdown
  719. conn, result.err = common.NewObfuscatedSshConn(
  720. common.OBFUSCATION_CONN_MODE_SERVER,
  721. conn,
  722. sshClient.sshServer.support.Config.ObfuscatedSSHKey)
  723. if result.err != nil {
  724. result.err = common.ContextError(result.err)
  725. }
  726. }
  727. if result.err == nil {
  728. result.sshConn, result.channels, result.requests, result.err =
  729. ssh.NewServerConn(conn, sshServerConfig)
  730. }
  731. resultChannel <- result
  732. }(clientConn)
  733. var result *sshNewServerConnResult
  734. select {
  735. case result = <-resultChannel:
  736. case <-sshClient.sshServer.shutdownBroadcast:
  737. // Close() will interrupt an ongoing handshake
  738. // TODO: wait for goroutine to exit before returning?
  739. clientConn.Close()
  740. return
  741. }
  742. if result.err != nil {
  743. clientConn.Close()
  744. // This is a Debug log due to noise. The handshake often fails due to I/O
  745. // errors as clients frequently interrupt connections in progress when
  746. // client-side load balancing completes a connection to a different server.
  747. log.WithContextFields(LogFields{"error": result.err}).Debug("handshake failed")
  748. return
  749. }
  750. sshClient.Lock()
  751. sshClient.sshConn = result.sshConn
  752. sshClient.activityConn = activityConn
  753. sshClient.throttledConn = throttledConn
  754. sshClient.Unlock()
  755. if !sshClient.sshServer.registerEstablishedClient(sshClient) {
  756. clientConn.Close()
  757. log.WithContext().Warning("register failed")
  758. return
  759. }
  760. sshClient.runTunnel(result.channels, result.requests)
  761. // Note: sshServer.unregisterEstablishedClient calls sshClient.stop(),
  762. // which also closes underlying transport Conn.
  763. sshClient.sshServer.unregisterEstablishedClient(sshClient)
  764. var additionalMetrics LogFields
  765. if isMetricsSource {
  766. additionalMetrics = metricsSource.GetMetrics()
  767. }
  768. sshClient.logTunnel(additionalMetrics)
  769. // Transfer OSL seed state -- the OSL progress -- from the closing
  770. // client to the session cache so the client can resume its progress
  771. // if it reconnects to this same server.
  772. // Note: following setOSLConfig order of locking.
  773. sshClient.Lock()
  774. if sshClient.oslClientSeedState != nil {
  775. sshClient.sshServer.oslSessionCacheMutex.Lock()
  776. sshClient.oslClientSeedState.Hibernate()
  777. sshClient.sshServer.oslSessionCache.Set(
  778. sshClient.sessionID, sshClient.oslClientSeedState, cache.DefaultExpiration)
  779. sshClient.sshServer.oslSessionCacheMutex.Unlock()
  780. sshClient.oslClientSeedState = nil
  781. }
  782. sshClient.Unlock()
  783. // Initiate cleanup of the GeoIP session cache. To allow for post-tunnel
  784. // final status requests, the lifetime of cached GeoIP records exceeds the
  785. // lifetime of the sshClient.
  786. sshClient.sshServer.support.GeoIPService.MarkSessionCacheToExpire(sshClient.sessionID)
  787. }
  788. func (sshClient *sshClient) passwordCallback(conn ssh.ConnMetadata, password []byte) (*ssh.Permissions, error) {
  789. expectedSessionIDLength := 2 * protocol.PSIPHON_API_CLIENT_SESSION_ID_LENGTH
  790. expectedSSHPasswordLength := 2 * SSH_PASSWORD_BYTE_LENGTH
  791. var sshPasswordPayload protocol.SSHPasswordPayload
  792. err := json.Unmarshal(password, &sshPasswordPayload)
  793. if err != nil {
  794. // Backwards compatibility case: instead of a JSON payload, older clients
  795. // send the hex encoded session ID prepended to the SSH password.
  796. // Note: there's an even older case where clients don't send any session ID,
  797. // but that's no longer supported.
  798. if len(password) == expectedSessionIDLength+expectedSSHPasswordLength {
  799. sshPasswordPayload.SessionId = string(password[0:expectedSessionIDLength])
  800. sshPasswordPayload.SshPassword = string(password[expectedSSHPasswordLength:len(password)])
  801. } else {
  802. return nil, common.ContextError(fmt.Errorf("invalid password payload for %q", conn.User()))
  803. }
  804. }
  805. if !isHexDigits(sshClient.sshServer.support, sshPasswordPayload.SessionId) ||
  806. len(sshPasswordPayload.SessionId) != expectedSessionIDLength {
  807. return nil, common.ContextError(fmt.Errorf("invalid session ID for %q", conn.User()))
  808. }
  809. userOk := (subtle.ConstantTimeCompare(
  810. []byte(conn.User()), []byte(sshClient.sshServer.support.Config.SSHUserName)) == 1)
  811. passwordOk := (subtle.ConstantTimeCompare(
  812. []byte(sshPasswordPayload.SshPassword), []byte(sshClient.sshServer.support.Config.SSHPassword)) == 1)
  813. if !userOk || !passwordOk {
  814. return nil, common.ContextError(fmt.Errorf("invalid password for %q", conn.User()))
  815. }
  816. sessionID := sshPasswordPayload.SessionId
  817. supportsServerRequests := common.Contains(
  818. sshPasswordPayload.ClientCapabilities, protocol.CLIENT_CAPABILITY_SERVER_REQUESTS)
  819. sshClient.Lock()
  820. sshClient.sessionID = sessionID
  821. sshClient.supportsServerRequests = supportsServerRequests
  822. geoIPData := sshClient.geoIPData
  823. sshClient.Unlock()
  824. // Store the GeoIP data associated with the session ID. This makes
  825. // the GeoIP data available to the web server for web API requests.
  826. // A cache that's distinct from the sshClient record is used to allow
  827. // for or post-tunnel final status requests.
  828. // If the client is reconnecting with the same session ID, this call
  829. // will undo the expiry set by MarkSessionCacheToExpire.
  830. sshClient.sshServer.support.GeoIPService.SetSessionCache(sessionID, geoIPData)
  831. return nil, nil
  832. }
  833. func (sshClient *sshClient) authLogCallback(conn ssh.ConnMetadata, method string, err error) {
  834. if err != nil {
  835. if method == "none" && err.Error() == "no auth passed yet" {
  836. // In this case, the callback invocation is noise from auth negotiation
  837. return
  838. }
  839. // Note: here we previously logged messages for fail2ban to act on. This is no longer
  840. // done as the complexity outweighs the benefits.
  841. //
  842. // - The SSH credential is not secret -- it's in the server entry. Attackers targetting
  843. // the server likely already have the credential. On the other hand, random scanning and
  844. // brute forcing is mitigated with high entropy random passwords, rate limiting
  845. // (implemented on the host via iptables), and limited capabilities (the SSH session can
  846. // only port forward).
  847. //
  848. // - fail2ban coverage was inconsistent; in the case of an unfronted meek protocol through
  849. // an upstream proxy, the remote address is the upstream proxy, which should not be blocked.
  850. // The X-Forwarded-For header cant be used instead as it may be forged and used to get IPs
  851. // deliberately blocked; and in any case fail2ban adds iptables rules which can only block
  852. // by direct remote IP, not by original client IP. Fronted meek has the same iptables issue.
  853. //
  854. // Random scanning and brute forcing of port 22 will result in log noise. To mitigate this,
  855. // not every authentication failure is logged. A summary log is emitted periodically to
  856. // retain some record of this activity in case this is relevent to, e.g., a performance
  857. // investigation.
  858. atomic.AddInt64(&sshClient.sshServer.authFailedCount, 1)
  859. lastAuthLog := monotime.Time(atomic.LoadInt64(&sshClient.sshServer.lastAuthLog))
  860. if monotime.Since(lastAuthLog) > SSH_AUTH_LOG_PERIOD {
  861. now := int64(monotime.Now())
  862. if atomic.CompareAndSwapInt64(&sshClient.sshServer.lastAuthLog, int64(lastAuthLog), now) {
  863. count := atomic.SwapInt64(&sshClient.sshServer.authFailedCount, 0)
  864. log.WithContextFields(
  865. LogFields{"lastError": err, "failedCount": count}).Warning("authentication failures")
  866. }
  867. }
  868. log.WithContextFields(LogFields{"error": err, "method": method}).Debug("authentication failed")
  869. } else {
  870. log.WithContextFields(LogFields{"error": err, "method": method}).Debug("authentication success")
  871. }
  872. }
  873. // stop signals the ssh connection to shutdown. After sshConn() returns,
  874. // the connection has terminated but sshClient.run() may still be
  875. // running and in the process of exiting.
  876. func (sshClient *sshClient) stop() {
  877. sshClient.sshConn.Close()
  878. sshClient.sshConn.Wait()
  879. }
  880. // runTunnel handles/dispatches new channels and new requests from the client.
  881. // When the SSH client connection closes, both the channels and requests channels
  882. // will close and runTunnel will exit.
  883. func (sshClient *sshClient) runTunnel(
  884. channels <-chan ssh.NewChannel, requests <-chan *ssh.Request) {
  885. waitGroup := new(sync.WaitGroup)
  886. // Start client SSH API request handler
  887. waitGroup.Add(1)
  888. go func() {
  889. defer waitGroup.Done()
  890. for request := range requests {
  891. // Requests are processed serially; API responses must be sent in request order.
  892. var responsePayload []byte
  893. var err error
  894. if request.Type == "[email protected]" {
  895. // Keepalive requests have an empty response.
  896. } else {
  897. // All other requests are assumed to be API requests.
  898. responsePayload, err = sshAPIRequestHandler(
  899. sshClient.sshServer.support,
  900. sshClient.geoIPData,
  901. request.Type,
  902. request.Payload)
  903. }
  904. if err == nil {
  905. err = request.Reply(true, responsePayload)
  906. } else {
  907. log.WithContextFields(LogFields{"error": err}).Warning("request failed")
  908. err = request.Reply(false, nil)
  909. }
  910. if err != nil {
  911. log.WithContextFields(LogFields{"error": err}).Warning("response failed")
  912. }
  913. }
  914. }()
  915. // Start OSL sender
  916. if sshClient.supportsServerRequests {
  917. waitGroup.Add(1)
  918. go func() {
  919. defer waitGroup.Done()
  920. sshClient.runOSLSender()
  921. }()
  922. }
  923. // Lifecycle of a TCP port forward:
  924. //
  925. // 1. A "direct-tcpip" SSH request is received from the client.
  926. //
  927. // A new TCP port forward request is enqueued. The queue delivers TCP port
  928. // forward requests to the TCP port forward manager, which enforces the TCP
  929. // port forward dial limit.
  930. //
  931. // Enqueuing new requests allows for reading further SSH requests from the
  932. // client without blocking when the dial limit is hit; this is to permit new
  933. // UDP/udpgw port forwards to be restablished without delay. The maximum size
  934. // of the queue enforces a hard cap on resources consumed by a client in the
  935. // pre-dial phase. When the queue is full, new TCP port forwards are
  936. // immediately rejected.
  937. //
  938. // 2. The TCP port forward manager dequeues the request.
  939. //
  940. // The manager calls dialingTCPPortForward(), which increments
  941. // concurrentDialingPortForwardCount, and calls
  942. // isTCPDialingPortForwardLimitExceeded() to check the concurrent dialing
  943. // count.
  944. //
  945. // The manager enforces the concurrent TCP dial limit: when at the limit, the
  946. // manager blocks waiting for the number of dials to drop below the limit before
  947. // dispatching the request to handleTCPPortForward(), which will run in its own
  948. // goroutine and will dial and relay the port forward.
  949. //
  950. // The block delays the current request and also halts dequeuing of subsequent
  951. // requests and could ultimately cause requests to be immediately rejected if
  952. // the queue fills. These actions are intended to apply back pressure when
  953. // upstream network resources are impaired.
  954. //
  955. // The time spent in the queue is deducted from the port forward's dial timeout.
  956. // The time spent blocking while at the dial limit is similarly deducted from
  957. // the dial timeout. If the dial timeout has expired before the dial begins, the
  958. // port forward is rejected and a stat is recorded.
  959. //
  960. // 3. handleTCPPortForward() performs the port forward dial and relaying.
  961. //
  962. // a. Dial the target, using the dial timeout remaining after queue and blocking
  963. // time is deducted.
  964. //
  965. // b. If the dial fails, call abortedTCPPortForward() to decrement
  966. // concurrentDialingPortForwardCount, freeing up a dial slot.
  967. //
  968. // c. If the dial succeeds, call establishedPortForward(), which decrements
  969. // concurrentDialingPortForwardCount and increments concurrentPortForwardCount,
  970. // the "established" port forward count.
  971. //
  972. // d. Check isPortForwardLimitExceeded(), which enforces the configurable limit on
  973. // concurrentPortForwardCount, the number of _established_ TCP port forwards.
  974. // If the limit is exceeded, the LRU established TCP port forward is closed and
  975. // the newly established TCP port forward proceeds. This LRU logic allows some
  976. // dangling resource consumption (e.g., TIME_WAIT) while providing a better
  977. // experience for clients.
  978. //
  979. // e. Relay data.
  980. //
  981. // f. Call closedPortForward() which decrements concurrentPortForwardCount and
  982. // records bytes transferred.
  983. // Start the TCP port forward manager
  984. type newTCPPortForward struct {
  985. enqueueTime monotime.Time
  986. hostToConnect string
  987. portToConnect int
  988. newChannel ssh.NewChannel
  989. }
  990. // The queue size is set to the traffic rules (MaxTCPPortForwardCount +
  991. // MaxTCPDialingPortForwardCount), which is a reasonable indication of resource
  992. // limits per client; when that value is not set, a default is used.
  993. // A limitation: this queue size is set once and doesn't change, for this client,
  994. // when traffic rules are reloaded.
  995. queueSize := sshClient.getTCPPortForwardQueueSize()
  996. if queueSize == 0 {
  997. queueSize = SSH_TCP_PORT_FORWARD_QUEUE_SIZE
  998. }
  999. newTCPPortForwards := make(chan *newTCPPortForward, queueSize)
  1000. waitGroup.Add(1)
  1001. go func() {
  1002. defer waitGroup.Done()
  1003. for newPortForward := range newTCPPortForwards {
  1004. remainingDialTimeout :=
  1005. time.Duration(sshClient.getDialTCPPortForwardTimeoutMilliseconds())*time.Millisecond -
  1006. monotime.Since(newPortForward.enqueueTime)
  1007. if remainingDialTimeout <= 0 {
  1008. sshClient.updateQualityMetricsWithRejectedDialingLimit()
  1009. sshClient.rejectNewChannel(
  1010. newPortForward.newChannel, ssh.Prohibited, "TCP port forward timed out in queue")
  1011. continue
  1012. }
  1013. // Reserve a TCP dialing slot.
  1014. //
  1015. // TOCTOU note: important to increment counts _before_ checking limits; otherwise,
  1016. // the client could potentially consume excess resources by initiating many port
  1017. // forwards concurrently.
  1018. sshClient.dialingTCPPortForward()
  1019. // When max dials are in progress, wait up to remainingDialTimeout for dialing
  1020. // to become available. This blocks all dequeing.
  1021. if sshClient.isTCPDialingPortForwardLimitExceeded() {
  1022. blockStartTime := monotime.Now()
  1023. ctx, cancelCtx := context.WithTimeout(sshClient.runContext, remainingDialTimeout)
  1024. sshClient.setTCPPortForwardDialingAvailableSignal(cancelCtx)
  1025. <-ctx.Done()
  1026. sshClient.setTCPPortForwardDialingAvailableSignal(nil)
  1027. cancelCtx() // "must be called or the new context will remain live until its parent context is cancelled"
  1028. remainingDialTimeout -= monotime.Since(blockStartTime)
  1029. }
  1030. if remainingDialTimeout <= 0 {
  1031. // Release the dialing slot here since handleTCPChannel() won't be called.
  1032. sshClient.abortedTCPPortForward()
  1033. sshClient.updateQualityMetricsWithRejectedDialingLimit()
  1034. sshClient.rejectNewChannel(
  1035. newPortForward.newChannel, ssh.Prohibited, "TCP port forward timed out before dialing")
  1036. continue
  1037. }
  1038. // Dial and relay the TCP port forward. handleTCPChannel is run in its own worker goroutine.
  1039. // handleTCPChannel will release the dialing slot reserved by dialingTCPPortForward(); and
  1040. // will deal with remainingDialTimeout <= 0.
  1041. waitGroup.Add(1)
  1042. go func(remainingDialTimeout time.Duration, newPortForward *newTCPPortForward) {
  1043. defer waitGroup.Done()
  1044. sshClient.handleTCPChannel(
  1045. remainingDialTimeout,
  1046. newPortForward.hostToConnect,
  1047. newPortForward.portToConnect,
  1048. newPortForward.newChannel)
  1049. }(remainingDialTimeout, newPortForward)
  1050. }
  1051. }()
  1052. // Handle new channel (port forward) requests from the client.
  1053. //
  1054. // udpgw client connections are dispatched immediately (clients use this for
  1055. // DNS, so it's essential to not block; and only one udpgw connection is
  1056. // retained at a time).
  1057. //
  1058. // All other TCP port forwards are dispatched via the TCP port forward
  1059. // manager queue.
  1060. for newChannel := range channels {
  1061. if newChannel.ChannelType() != "direct-tcpip" {
  1062. sshClient.rejectNewChannel(newChannel, ssh.Prohibited, "unknown or unsupported channel type")
  1063. continue
  1064. }
  1065. // http://tools.ietf.org/html/rfc4254#section-7.2
  1066. var directTcpipExtraData struct {
  1067. HostToConnect string
  1068. PortToConnect uint32
  1069. OriginatorIPAddress string
  1070. OriginatorPort uint32
  1071. }
  1072. err := ssh.Unmarshal(newChannel.ExtraData(), &directTcpipExtraData)
  1073. if err != nil {
  1074. sshClient.rejectNewChannel(newChannel, ssh.Prohibited, "invalid extra data")
  1075. continue
  1076. }
  1077. // Intercept TCP port forwards to a specified udpgw server and handle directly.
  1078. // TODO: also support UDP explicitly, e.g. with a custom "direct-udp" channel type?
  1079. isUDPChannel := sshClient.sshServer.support.Config.UDPInterceptUdpgwServerAddress != "" &&
  1080. sshClient.sshServer.support.Config.UDPInterceptUdpgwServerAddress ==
  1081. net.JoinHostPort(directTcpipExtraData.HostToConnect, strconv.Itoa(int(directTcpipExtraData.PortToConnect)))
  1082. if isUDPChannel {
  1083. // Dispatch immediately. handleUDPChannel runs the udpgw protocol in its
  1084. // own worker goroutine.
  1085. waitGroup.Add(1)
  1086. go func(channel ssh.NewChannel) {
  1087. defer waitGroup.Done()
  1088. sshClient.handleUDPChannel(channel)
  1089. }(newChannel)
  1090. } else {
  1091. // Dispatch via TCP port forward manager. When the queue is full, the channel
  1092. // is immediately rejected.
  1093. tcpPortForward := &newTCPPortForward{
  1094. enqueueTime: monotime.Now(),
  1095. hostToConnect: directTcpipExtraData.HostToConnect,
  1096. portToConnect: int(directTcpipExtraData.PortToConnect),
  1097. newChannel: newChannel,
  1098. }
  1099. select {
  1100. case newTCPPortForwards <- tcpPortForward:
  1101. default:
  1102. sshClient.updateQualityMetricsWithRejectedDialingLimit()
  1103. sshClient.rejectNewChannel(newChannel, ssh.Prohibited, "TCP port forward dial queue full")
  1104. }
  1105. }
  1106. }
  1107. // The channel loop is interrupted by a client
  1108. // disconnect or by calling sshClient.stop().
  1109. // Stop the TCP port forward manager
  1110. close(newTCPPortForwards)
  1111. // Stop all other worker goroutines
  1112. sshClient.stopRunning()
  1113. waitGroup.Wait()
  1114. }
  1115. func (sshClient *sshClient) logTunnel(additionalMetrics LogFields) {
  1116. // Note: reporting duration based on last confirmed data transfer, which
  1117. // is reads for sshClient.activityConn.GetActiveDuration(), and not
  1118. // connection closing is important for protocols such as meek. For
  1119. // meek, the connection remains open until the HTTP session expires,
  1120. // which may be some time after the tunnel has closed. (The meek
  1121. // protocol has no allowance for signalling payload EOF, and even if
  1122. // it did the client may not have the opportunity to send a final
  1123. // request with an EOF flag set.)
  1124. sshClient.Lock()
  1125. logFields := getRequestLogFields(
  1126. sshClient.sshServer.support,
  1127. "server_tunnel",
  1128. sshClient.geoIPData,
  1129. sshClient.handshakeState.apiParams,
  1130. baseRequestParams)
  1131. logFields["handshake_completed"] = sshClient.handshakeState.completed
  1132. logFields["start_time"] = sshClient.activityConn.GetStartTime()
  1133. logFields["duration"] = sshClient.activityConn.GetActiveDuration() / time.Millisecond
  1134. logFields["bytes_up_tcp"] = sshClient.tcpTrafficState.bytesUp
  1135. logFields["bytes_down_tcp"] = sshClient.tcpTrafficState.bytesDown
  1136. logFields["peak_concurrent_dialing_port_forward_count_tcp"] = sshClient.tcpTrafficState.peakConcurrentDialingPortForwardCount
  1137. logFields["peak_concurrent_port_forward_count_tcp"] = sshClient.tcpTrafficState.peakConcurrentPortForwardCount
  1138. logFields["total_port_forward_count_tcp"] = sshClient.tcpTrafficState.totalPortForwardCount
  1139. logFields["bytes_up_udp"] = sshClient.udpTrafficState.bytesUp
  1140. logFields["bytes_down_udp"] = sshClient.udpTrafficState.bytesDown
  1141. // sshClient.udpTrafficState.peakConcurrentDialingPortForwardCount isn't meaningful
  1142. logFields["peak_concurrent_port_forward_count_udp"] = sshClient.udpTrafficState.peakConcurrentPortForwardCount
  1143. logFields["total_port_forward_count_udp"] = sshClient.udpTrafficState.totalPortForwardCount
  1144. // Merge in additional metrics from the optional metrics source
  1145. if additionalMetrics != nil {
  1146. for name, value := range additionalMetrics {
  1147. // Don't overwrite any basic fields
  1148. if logFields[name] == nil {
  1149. logFields[name] = value
  1150. }
  1151. }
  1152. }
  1153. sshClient.Unlock()
  1154. log.LogRawFieldsWithTimestamp(logFields)
  1155. }
  1156. func (sshClient *sshClient) runOSLSender() {
  1157. for {
  1158. // Await a signal that there are SLOKs to send
  1159. // TODO: use reflect.SelectCase, and optionally await timer here?
  1160. select {
  1161. case <-sshClient.signalIssueSLOKs:
  1162. case <-sshClient.runContext.Done():
  1163. return
  1164. }
  1165. retryDelay := SSH_SEND_OSL_INITIAL_RETRY_DELAY
  1166. for {
  1167. err := sshClient.sendOSLRequest()
  1168. if err == nil {
  1169. break
  1170. }
  1171. log.WithContextFields(LogFields{"error": err}).Warning("sendOSLRequest failed")
  1172. // If the request failed, retry after a delay (with exponential backoff)
  1173. // or when signaled that there are additional SLOKs to send
  1174. retryTimer := time.NewTimer(retryDelay)
  1175. select {
  1176. case <-retryTimer.C:
  1177. case <-sshClient.signalIssueSLOKs:
  1178. case <-sshClient.runContext.Done():
  1179. retryTimer.Stop()
  1180. return
  1181. }
  1182. retryTimer.Stop()
  1183. retryDelay *= SSH_SEND_OSL_RETRY_FACTOR
  1184. }
  1185. }
  1186. }
  1187. // sendOSLRequest will invoke osl.GetSeedPayload to issue SLOKs and
  1188. // generate a payload, and send an OSL request to the client when
  1189. // there are new SLOKs in the payload.
  1190. func (sshClient *sshClient) sendOSLRequest() error {
  1191. seedPayload := sshClient.getOSLSeedPayload()
  1192. // Don't send when no SLOKs. This will happen when signalIssueSLOKs
  1193. // is received but no new SLOKs are issued.
  1194. if len(seedPayload.SLOKs) == 0 {
  1195. return nil
  1196. }
  1197. oslRequest := protocol.OSLRequest{
  1198. SeedPayload: seedPayload,
  1199. }
  1200. requestPayload, err := json.Marshal(oslRequest)
  1201. if err != nil {
  1202. return common.ContextError(err)
  1203. }
  1204. ok, _, err := sshClient.sshConn.SendRequest(
  1205. protocol.PSIPHON_API_OSL_REQUEST_NAME,
  1206. true,
  1207. requestPayload)
  1208. if err != nil {
  1209. return common.ContextError(err)
  1210. }
  1211. if !ok {
  1212. return common.ContextError(errors.New("client rejected request"))
  1213. }
  1214. sshClient.clearOSLSeedPayload()
  1215. return nil
  1216. }
  1217. func (sshClient *sshClient) rejectNewChannel(newChannel ssh.NewChannel, reason ssh.RejectionReason, logMessage string) {
  1218. // Note: Debug level, as logMessage may contain user traffic destination address information
  1219. log.WithContextFields(
  1220. LogFields{
  1221. "channelType": newChannel.ChannelType(),
  1222. "logMessage": logMessage,
  1223. "rejectReason": reason.String(),
  1224. }).Debug("reject new channel")
  1225. // Note: logMessage is internal, for logging only; just the RejectionReason is sent to the client
  1226. newChannel.Reject(reason, reason.String())
  1227. }
  1228. // setHandshakeState records that a client has completed a handshake API request.
  1229. // Some parameters from the handshake request may be used in future traffic rule
  1230. // selection. Port forwards are disallowed until a handshake is complete. The
  1231. // handshake parameters are included in the session summary log recorded in
  1232. // sshClient.stop().
  1233. func (sshClient *sshClient) setHandshakeState(state handshakeState) error {
  1234. sshClient.Lock()
  1235. completed := sshClient.handshakeState.completed
  1236. if !completed {
  1237. sshClient.handshakeState = state
  1238. }
  1239. sshClient.Unlock()
  1240. // Client must only perform one handshake
  1241. if completed {
  1242. return common.ContextError(errors.New("handshake already completed"))
  1243. }
  1244. sshClient.setTrafficRules()
  1245. sshClient.setOSLConfig()
  1246. return nil
  1247. }
  1248. // getHandshaked returns whether the client has completed a handshake API
  1249. // request and whether the traffic rules that were selected after the
  1250. // handshake immediately exhaust the client.
  1251. //
  1252. // When the client is immediately exhausted it will be closed; but this
  1253. // takes effect asynchronously. The "exhausted" return value is used to
  1254. // prevent API requests by clients that will close.
  1255. func (sshClient *sshClient) getHandshaked() (bool, bool) {
  1256. sshClient.Lock()
  1257. defer sshClient.Unlock()
  1258. completed := sshClient.handshakeState.completed
  1259. exhausted := false
  1260. // Notes:
  1261. // - "Immediately exhausted" is when CloseAfterExhausted is set and
  1262. // either ReadUnthrottledBytes or WriteUnthrottledBytes starts from
  1263. // 0, so no bytes would be read or written. This check does not
  1264. // examine whether 0 bytes _remain_ in the ThrottledConn.
  1265. // - This check is made against the current traffic rules, which
  1266. // could have changed in a hot reload since the handshake.
  1267. if completed &&
  1268. *sshClient.trafficRules.RateLimits.CloseAfterExhausted == true &&
  1269. (*sshClient.trafficRules.RateLimits.ReadUnthrottledBytes == 0 ||
  1270. *sshClient.trafficRules.RateLimits.WriteUnthrottledBytes == 0) {
  1271. exhausted = true
  1272. }
  1273. return completed, exhausted
  1274. }
  1275. // setTrafficRules resets the client's traffic rules based on the latest server config
  1276. // and client properties. As sshClient.trafficRules may be reset by a concurrent
  1277. // goroutine, trafficRules must only be accessed within the sshClient mutex.
  1278. func (sshClient *sshClient) setTrafficRules() {
  1279. sshClient.Lock()
  1280. defer sshClient.Unlock()
  1281. sshClient.trafficRules = sshClient.sshServer.support.TrafficRulesSet.GetTrafficRules(
  1282. sshClient.tunnelProtocol, sshClient.geoIPData, sshClient.handshakeState)
  1283. if sshClient.throttledConn != nil {
  1284. // Any existing throttling state is reset.
  1285. sshClient.throttledConn.SetLimits(
  1286. sshClient.trafficRules.RateLimits.CommonRateLimits())
  1287. }
  1288. }
  1289. // setOSLConfig resets the client's OSL seed state based on the latest OSL config
  1290. // As sshClient.oslClientSeedState may be reset by a concurrent goroutine,
  1291. // oslClientSeedState must only be accessed within the sshClient mutex.
  1292. func (sshClient *sshClient) setOSLConfig() {
  1293. sshClient.Lock()
  1294. defer sshClient.Unlock()
  1295. propagationChannelID, err := getStringRequestParam(
  1296. sshClient.handshakeState.apiParams, "propagation_channel_id")
  1297. if err != nil {
  1298. // This should not fail as long as client has sent valid handshake
  1299. return
  1300. }
  1301. // Use a cached seed state if one is found for the client's
  1302. // session ID. This enables resuming progress made in a previous
  1303. // tunnel.
  1304. // Note: go-cache is already concurency safe; the additional mutex
  1305. // is necessary to guarantee that Get/Delete is atomic; although in
  1306. // practice no two concurrent clients should ever supply the same
  1307. // session ID.
  1308. sshClient.sshServer.oslSessionCacheMutex.Lock()
  1309. oslClientSeedState, found := sshClient.sshServer.oslSessionCache.Get(sshClient.sessionID)
  1310. if found {
  1311. sshClient.sshServer.oslSessionCache.Delete(sshClient.sessionID)
  1312. sshClient.sshServer.oslSessionCacheMutex.Unlock()
  1313. sshClient.oslClientSeedState = oslClientSeedState.(*osl.ClientSeedState)
  1314. sshClient.oslClientSeedState.Resume(sshClient.signalIssueSLOKs)
  1315. return
  1316. }
  1317. sshClient.sshServer.oslSessionCacheMutex.Unlock()
  1318. // Two limitations when setOSLConfig() is invoked due to an
  1319. // OSL config hot reload:
  1320. //
  1321. // 1. any partial progress towards SLOKs is lost.
  1322. //
  1323. // 2. all existing osl.ClientSeedPortForwards for existing
  1324. // port forwards will not send progress to the new client
  1325. // seed state.
  1326. sshClient.oslClientSeedState = sshClient.sshServer.support.OSLConfig.NewClientSeedState(
  1327. sshClient.geoIPData.Country,
  1328. propagationChannelID,
  1329. sshClient.signalIssueSLOKs)
  1330. }
  1331. // newClientSeedPortForward will return nil when no seeding is
  1332. // associated with the specified ipAddress.
  1333. func (sshClient *sshClient) newClientSeedPortForward(ipAddress net.IP) *osl.ClientSeedPortForward {
  1334. sshClient.Lock()
  1335. defer sshClient.Unlock()
  1336. // Will not be initialized before handshake.
  1337. if sshClient.oslClientSeedState == nil {
  1338. return nil
  1339. }
  1340. return sshClient.oslClientSeedState.NewClientSeedPortForward(ipAddress)
  1341. }
  1342. // getOSLSeedPayload returns a payload containing all seeded SLOKs for
  1343. // this client's session.
  1344. func (sshClient *sshClient) getOSLSeedPayload() *osl.SeedPayload {
  1345. sshClient.Lock()
  1346. defer sshClient.Unlock()
  1347. // Will not be initialized before handshake.
  1348. if sshClient.oslClientSeedState == nil {
  1349. return &osl.SeedPayload{SLOKs: make([]*osl.SLOK, 0)}
  1350. }
  1351. return sshClient.oslClientSeedState.GetSeedPayload()
  1352. }
  1353. func (sshClient *sshClient) clearOSLSeedPayload() {
  1354. sshClient.Lock()
  1355. defer sshClient.Unlock()
  1356. sshClient.oslClientSeedState.ClearSeedPayload()
  1357. }
  1358. func (sshClient *sshClient) rateLimits() common.RateLimits {
  1359. sshClient.Lock()
  1360. defer sshClient.Unlock()
  1361. return sshClient.trafficRules.RateLimits.CommonRateLimits()
  1362. }
  1363. func (sshClient *sshClient) idleTCPPortForwardTimeout() time.Duration {
  1364. sshClient.Lock()
  1365. defer sshClient.Unlock()
  1366. return time.Duration(*sshClient.trafficRules.IdleTCPPortForwardTimeoutMilliseconds) * time.Millisecond
  1367. }
  1368. func (sshClient *sshClient) idleUDPPortForwardTimeout() time.Duration {
  1369. sshClient.Lock()
  1370. defer sshClient.Unlock()
  1371. return time.Duration(*sshClient.trafficRules.IdleUDPPortForwardTimeoutMilliseconds) * time.Millisecond
  1372. }
  1373. func (sshClient *sshClient) setTCPPortForwardDialingAvailableSignal(signal context.CancelFunc) {
  1374. sshClient.Lock()
  1375. defer sshClient.Unlock()
  1376. sshClient.tcpPortForwardDialingAvailableSignal = signal
  1377. }
  1378. const (
  1379. portForwardTypeTCP = iota
  1380. portForwardTypeUDP
  1381. portForwardTypeTransparentDNS
  1382. )
  1383. func (sshClient *sshClient) isPortForwardPermitted(
  1384. portForwardType int,
  1385. isTransparentDNSForwarding bool,
  1386. remoteIP net.IP,
  1387. port int) bool {
  1388. sshClient.Lock()
  1389. defer sshClient.Unlock()
  1390. if !sshClient.handshakeState.completed {
  1391. return false
  1392. }
  1393. // Disallow connection to loopback. This is a failsafe. The server
  1394. // should be run on a host with correctly configured firewall rules.
  1395. // And exception is made in the case of tranparent DNS forwarding,
  1396. // where the remoteIP has been rewritten.
  1397. if !isTransparentDNSForwarding && remoteIP.IsLoopback() {
  1398. return false
  1399. }
  1400. var allowPorts []int
  1401. if portForwardType == portForwardTypeTCP {
  1402. allowPorts = sshClient.trafficRules.AllowTCPPorts
  1403. } else {
  1404. allowPorts = sshClient.trafficRules.AllowUDPPorts
  1405. }
  1406. if len(allowPorts) == 0 {
  1407. return true
  1408. }
  1409. // TODO: faster lookup?
  1410. if len(allowPorts) > 0 {
  1411. for _, allowPort := range allowPorts {
  1412. if port == allowPort {
  1413. return true
  1414. }
  1415. }
  1416. }
  1417. for _, subnet := range sshClient.trafficRules.AllowSubnets {
  1418. // Note: ignoring error as config has been validated
  1419. _, network, _ := net.ParseCIDR(subnet)
  1420. if network.Contains(remoteIP) {
  1421. return true
  1422. }
  1423. }
  1424. return false
  1425. }
  1426. func (sshClient *sshClient) isTCPDialingPortForwardLimitExceeded() bool {
  1427. sshClient.Lock()
  1428. defer sshClient.Unlock()
  1429. state := &sshClient.tcpTrafficState
  1430. max := *sshClient.trafficRules.MaxTCPDialingPortForwardCount
  1431. if max > 0 && state.concurrentDialingPortForwardCount >= int64(max) {
  1432. return true
  1433. }
  1434. return false
  1435. }
  1436. func (sshClient *sshClient) getTCPPortForwardQueueSize() int {
  1437. sshClient.Lock()
  1438. defer sshClient.Unlock()
  1439. return *sshClient.trafficRules.MaxTCPPortForwardCount +
  1440. *sshClient.trafficRules.MaxTCPDialingPortForwardCount
  1441. }
  1442. func (sshClient *sshClient) getDialTCPPortForwardTimeoutMilliseconds() int {
  1443. sshClient.Lock()
  1444. defer sshClient.Unlock()
  1445. return *sshClient.trafficRules.DialTCPPortForwardTimeoutMilliseconds
  1446. }
  1447. func (sshClient *sshClient) dialingTCPPortForward() {
  1448. sshClient.Lock()
  1449. defer sshClient.Unlock()
  1450. state := &sshClient.tcpTrafficState
  1451. state.concurrentDialingPortForwardCount += 1
  1452. if state.concurrentDialingPortForwardCount > state.peakConcurrentDialingPortForwardCount {
  1453. state.peakConcurrentDialingPortForwardCount = state.concurrentDialingPortForwardCount
  1454. }
  1455. }
  1456. func (sshClient *sshClient) abortedTCPPortForward() {
  1457. sshClient.Lock()
  1458. defer sshClient.Unlock()
  1459. sshClient.tcpTrafficState.concurrentDialingPortForwardCount -= 1
  1460. }
  1461. func (sshClient *sshClient) allocatePortForward(portForwardType int) bool {
  1462. sshClient.Lock()
  1463. defer sshClient.Unlock()
  1464. // Check if at port forward limit. The subsequent counter
  1465. // changes must be atomic with the limit check to ensure
  1466. // the counter never exceeds the limit in the case of
  1467. // concurrent allocations.
  1468. var max int
  1469. var state *trafficState
  1470. if portForwardType == portForwardTypeTCP {
  1471. max = *sshClient.trafficRules.MaxTCPPortForwardCount
  1472. state = &sshClient.tcpTrafficState
  1473. } else {
  1474. max = *sshClient.trafficRules.MaxUDPPortForwardCount
  1475. state = &sshClient.udpTrafficState
  1476. }
  1477. if max > 0 && state.concurrentPortForwardCount >= int64(max) {
  1478. return false
  1479. }
  1480. // Update port forward counters.
  1481. if portForwardType == portForwardTypeTCP {
  1482. // Assumes TCP port forwards called dialingTCPPortForward
  1483. state.concurrentDialingPortForwardCount -= 1
  1484. if sshClient.tcpPortForwardDialingAvailableSignal != nil {
  1485. max := *sshClient.trafficRules.MaxTCPDialingPortForwardCount
  1486. if max <= 0 || state.concurrentDialingPortForwardCount < int64(max) {
  1487. sshClient.tcpPortForwardDialingAvailableSignal()
  1488. }
  1489. }
  1490. }
  1491. state.concurrentPortForwardCount += 1
  1492. if state.concurrentPortForwardCount > state.peakConcurrentPortForwardCount {
  1493. state.peakConcurrentPortForwardCount = state.concurrentPortForwardCount
  1494. }
  1495. state.totalPortForwardCount += 1
  1496. return true
  1497. }
  1498. // establishedPortForward increments the concurrent port
  1499. // forward counter. closedPortForward decrements it, so it
  1500. // must always be called for each establishedPortForward
  1501. // call.
  1502. //
  1503. // When at the limit of established port forwards, the LRU
  1504. // existing port forward is closed to make way for the newly
  1505. // established one. There can be a minor delay as, in addition
  1506. // to calling Close() on the port forward net.Conn,
  1507. // establishedPortForward waits for the LRU's closedPortForward()
  1508. // call which will decrement the concurrent counter. This
  1509. // ensures all resources associated with the LRU (socket,
  1510. // goroutine) are released or will very soon be released before
  1511. // proceeding.
  1512. func (sshClient *sshClient) establishedPortForward(
  1513. portForwardType int, portForwardLRU *common.LRUConns) {
  1514. // Do not lock sshClient here.
  1515. var state *trafficState
  1516. if portForwardType == portForwardTypeTCP {
  1517. state = &sshClient.tcpTrafficState
  1518. } else {
  1519. state = &sshClient.udpTrafficState
  1520. }
  1521. // When the maximum number of port forwards is already
  1522. // established, close the LRU. CloseOldest will call
  1523. // Close on the port forward net.Conn. Both TCP and
  1524. // UDP port forwards have handler goroutines that may
  1525. // be blocked calling Read on the net.Conn. Close will
  1526. // eventually interrupt the Read and cause the handlers
  1527. // to exit, but not immediately. So the following logic
  1528. // waits for a LRU handler to be interrupted and signal
  1529. // availability.
  1530. //
  1531. // Notes:
  1532. //
  1533. // - the port forward limit can change via a traffic
  1534. // rules hot reload; the condition variable handles
  1535. // this case whereas a channel-based semaphore would
  1536. // not.
  1537. //
  1538. // - if a number of goroutines exceeding the total limit
  1539. // arrive here all concurrently, some CloseOldest() calls
  1540. // will have no effect as there can be less existing port
  1541. // forwards than new ones. In this case, the new port
  1542. // forward will be delayed. This is highly unlikely in
  1543. // practise since UDP calls to establishedPortForward are
  1544. // serialized and TCP calls are limited by the dial
  1545. // queue/count.
  1546. if !sshClient.allocatePortForward(portForwardType) {
  1547. portForwardLRU.CloseOldest()
  1548. log.WithContext().Debug("closed LRU port forward")
  1549. state.availablePortForwardCond.L.Lock()
  1550. for !sshClient.allocatePortForward(portForwardType) {
  1551. state.availablePortForwardCond.Wait()
  1552. }
  1553. state.availablePortForwardCond.L.Unlock()
  1554. }
  1555. }
  1556. func (sshClient *sshClient) closedPortForward(
  1557. portForwardType int, bytesUp, bytesDown int64) {
  1558. sshClient.Lock()
  1559. var state *trafficState
  1560. if portForwardType == portForwardTypeTCP {
  1561. state = &sshClient.tcpTrafficState
  1562. } else {
  1563. state = &sshClient.udpTrafficState
  1564. }
  1565. state.concurrentPortForwardCount -= 1
  1566. state.bytesUp += bytesUp
  1567. state.bytesDown += bytesDown
  1568. sshClient.Unlock()
  1569. // Signal any goroutine waiting in establishedPortForward
  1570. // that an established port forward slot is available.
  1571. state.availablePortForwardCond.Signal()
  1572. }
  1573. func (sshClient *sshClient) updateQualityMetricsWithDialResult(
  1574. tcpPortForwardDialSuccess bool, dialDuration time.Duration) {
  1575. sshClient.Lock()
  1576. defer sshClient.Unlock()
  1577. if tcpPortForwardDialSuccess {
  1578. sshClient.qualityMetrics.tcpPortForwardDialedCount += 1
  1579. sshClient.qualityMetrics.tcpPortForwardDialedDuration += dialDuration
  1580. } else {
  1581. sshClient.qualityMetrics.tcpPortForwardFailedCount += 1
  1582. sshClient.qualityMetrics.tcpPortForwardFailedDuration += dialDuration
  1583. }
  1584. }
  1585. func (sshClient *sshClient) updateQualityMetricsWithRejectedDialingLimit() {
  1586. sshClient.Lock()
  1587. defer sshClient.Unlock()
  1588. sshClient.qualityMetrics.tcpPortForwardRejectedDialingLimitCount += 1
  1589. }
  1590. func (sshClient *sshClient) handleTCPChannel(
  1591. remainingDialTimeout time.Duration,
  1592. hostToConnect string,
  1593. portToConnect int,
  1594. newChannel ssh.NewChannel) {
  1595. // Assumptions:
  1596. // - sshClient.dialingTCPPortForward() has been called
  1597. // - remainingDialTimeout > 0
  1598. established := false
  1599. defer func() {
  1600. if !established {
  1601. sshClient.abortedTCPPortForward()
  1602. }
  1603. }()
  1604. // Transparently redirect web API request connections.
  1605. isWebServerPortForward := false
  1606. config := sshClient.sshServer.support.Config
  1607. if config.WebServerPortForwardAddress != "" {
  1608. destination := net.JoinHostPort(hostToConnect, strconv.Itoa(portToConnect))
  1609. if destination == config.WebServerPortForwardAddress {
  1610. isWebServerPortForward = true
  1611. if config.WebServerPortForwardRedirectAddress != "" {
  1612. // Note: redirect format is validated when config is loaded
  1613. host, portStr, _ := net.SplitHostPort(config.WebServerPortForwardRedirectAddress)
  1614. port, _ := strconv.Atoi(portStr)
  1615. hostToConnect = host
  1616. portToConnect = port
  1617. }
  1618. }
  1619. }
  1620. // Dial the remote address.
  1621. //
  1622. // Hostname resolution is performed explicitly, as a seperate step, as the target IP
  1623. // address is used for traffic rules (AllowSubnets) and OSL seed progress.
  1624. //
  1625. // Contexts are used for cancellation (via sshClient.runContext, which is cancelled
  1626. // when the client is stopping) and timeouts.
  1627. dialStartTime := monotime.Now()
  1628. log.WithContextFields(LogFields{"hostToConnect": hostToConnect}).Debug("resolving")
  1629. ctx, cancelCtx := context.WithTimeout(sshClient.runContext, remainingDialTimeout)
  1630. IPs, err := (&net.Resolver{}).LookupIPAddr(ctx, hostToConnect)
  1631. cancelCtx() // "must be called or the new context will remain live until its parent context is cancelled"
  1632. // TODO: shuffle list to try other IPs?
  1633. // TODO: IPv6 support
  1634. var IP net.IP
  1635. for _, ip := range IPs {
  1636. if ip.IP.To4() != nil {
  1637. IP = ip.IP
  1638. break
  1639. }
  1640. }
  1641. if err == nil && IP == nil {
  1642. err = errors.New("no IP address")
  1643. }
  1644. resolveElapsedTime := monotime.Since(dialStartTime)
  1645. if err != nil {
  1646. // Record a port forward failure
  1647. sshClient.updateQualityMetricsWithDialResult(true, resolveElapsedTime)
  1648. sshClient.rejectNewChannel(
  1649. newChannel, ssh.ConnectionFailed, fmt.Sprintf("LookupIP failed: %s", err))
  1650. return
  1651. }
  1652. remainingDialTimeout -= resolveElapsedTime
  1653. if remainingDialTimeout <= 0 {
  1654. sshClient.rejectNewChannel(
  1655. newChannel, ssh.Prohibited, "TCP port forward timed out resolving")
  1656. return
  1657. }
  1658. // Enforce traffic rules, using the resolved IP address.
  1659. if !isWebServerPortForward &&
  1660. !sshClient.isPortForwardPermitted(
  1661. portForwardTypeTCP,
  1662. false,
  1663. IP,
  1664. portToConnect) {
  1665. // Note: not recording a port forward failure in this case
  1666. sshClient.rejectNewChannel(
  1667. newChannel, ssh.Prohibited, "port forward not permitted")
  1668. return
  1669. }
  1670. // TCP dial.
  1671. remoteAddr := net.JoinHostPort(IP.String(), strconv.Itoa(portToConnect))
  1672. log.WithContextFields(LogFields{"remoteAddr": remoteAddr}).Debug("dialing")
  1673. ctx, cancelCtx = context.WithTimeout(sshClient.runContext, remainingDialTimeout)
  1674. fwdConn, err := (&net.Dialer{}).DialContext(ctx, "tcp", remoteAddr)
  1675. cancelCtx() // "must be called or the new context will remain live until its parent context is cancelled"
  1676. // Record port forward success or failure
  1677. sshClient.updateQualityMetricsWithDialResult(err == nil, monotime.Since(dialStartTime))
  1678. if err != nil {
  1679. // Monitor for low resource error conditions
  1680. sshClient.sshServer.monitorPortForwardDialError(err)
  1681. sshClient.rejectNewChannel(
  1682. newChannel, ssh.ConnectionFailed, fmt.Sprintf("DialTimeout failed: %s", err))
  1683. return
  1684. }
  1685. // The upstream TCP port forward connection has been established. Schedule
  1686. // some cleanup and notify the SSH client that the channel is accepted.
  1687. defer fwdConn.Close()
  1688. fwdChannel, requests, err := newChannel.Accept()
  1689. if err != nil {
  1690. log.WithContextFields(LogFields{"error": err}).Warning("accept new channel failed")
  1691. return
  1692. }
  1693. go ssh.DiscardRequests(requests)
  1694. defer fwdChannel.Close()
  1695. // Release the dialing slot and acquire an established slot.
  1696. //
  1697. // establishedPortForward increments the concurrent TCP port
  1698. // forward counter and closes the LRU existing TCP port forward
  1699. // when already at the limit.
  1700. //
  1701. // Known limitations:
  1702. //
  1703. // - Closed LRU TCP sockets will enter the TIME_WAIT state,
  1704. // continuing to consume some resources.
  1705. sshClient.establishedPortForward(portForwardTypeTCP, sshClient.tcpPortForwardLRU)
  1706. // "established = true" cancels the deferred abortedTCPPortForward()
  1707. established = true
  1708. // TODO: 64-bit alignment? https://golang.org/pkg/sync/atomic/#pkg-note-BUG
  1709. var bytesUp, bytesDown int64
  1710. defer func() {
  1711. sshClient.closedPortForward(
  1712. portForwardTypeTCP, atomic.LoadInt64(&bytesUp), atomic.LoadInt64(&bytesDown))
  1713. }()
  1714. lruEntry := sshClient.tcpPortForwardLRU.Add(fwdConn)
  1715. defer lruEntry.Remove()
  1716. // ActivityMonitoredConn monitors the TCP port forward I/O and updates
  1717. // its LRU status. ActivityMonitoredConn also times out I/O on the port
  1718. // forward if both reads and writes have been idle for the specified
  1719. // duration.
  1720. // Ensure nil interface if newClientSeedPortForward returns nil
  1721. var updater common.ActivityUpdater
  1722. seedUpdater := sshClient.newClientSeedPortForward(IP)
  1723. if seedUpdater != nil {
  1724. updater = seedUpdater
  1725. }
  1726. fwdConn, err = common.NewActivityMonitoredConn(
  1727. fwdConn,
  1728. sshClient.idleTCPPortForwardTimeout(),
  1729. true,
  1730. updater,
  1731. lruEntry)
  1732. if err != nil {
  1733. log.WithContextFields(LogFields{"error": err}).Error("NewActivityMonitoredConn failed")
  1734. return
  1735. }
  1736. // Relay channel to forwarded connection.
  1737. log.WithContextFields(LogFields{"remoteAddr": remoteAddr}).Debug("relaying")
  1738. // TODO: relay errors to fwdChannel.Stderr()?
  1739. relayWaitGroup := new(sync.WaitGroup)
  1740. relayWaitGroup.Add(1)
  1741. go func() {
  1742. defer relayWaitGroup.Done()
  1743. // io.Copy allocates a 32K temporary buffer, and each port forward relay uses
  1744. // two of these buffers; using io.CopyBuffer with a smaller buffer reduces the
  1745. // overall memory footprint.
  1746. bytes, err := io.CopyBuffer(
  1747. fwdChannel, fwdConn, make([]byte, SSH_TCP_PORT_FORWARD_COPY_BUFFER_SIZE))
  1748. atomic.AddInt64(&bytesDown, bytes)
  1749. if err != nil && err != io.EOF {
  1750. // Debug since errors such as "connection reset by peer" occur during normal operation
  1751. log.WithContextFields(LogFields{"error": err}).Debug("downstream TCP relay failed")
  1752. }
  1753. // Interrupt upstream io.Copy when downstream is shutting down.
  1754. // TODO: this is done to quickly cleanup the port forward when
  1755. // fwdConn has a read timeout, but is it clean -- upstream may still
  1756. // be flowing?
  1757. fwdChannel.Close()
  1758. }()
  1759. bytes, err := io.CopyBuffer(
  1760. fwdConn, fwdChannel, make([]byte, SSH_TCP_PORT_FORWARD_COPY_BUFFER_SIZE))
  1761. atomic.AddInt64(&bytesUp, bytes)
  1762. if err != nil && err != io.EOF {
  1763. log.WithContextFields(LogFields{"error": err}).Debug("upstream TCP relay failed")
  1764. }
  1765. // Shutdown special case: fwdChannel will be closed and return EOF when
  1766. // the SSH connection is closed, but we need to explicitly close fwdConn
  1767. // to interrupt the downstream io.Copy, which may be blocked on a
  1768. // fwdConn.Read().
  1769. fwdConn.Close()
  1770. relayWaitGroup.Wait()
  1771. log.WithContextFields(
  1772. LogFields{
  1773. "remoteAddr": remoteAddr,
  1774. "bytesUp": atomic.LoadInt64(&bytesUp),
  1775. "bytesDown": atomic.LoadInt64(&bytesDown)}).Debug("exiting")
  1776. }