proxy.go 49 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471
  1. /*
  2. * Copyright (c) 2023, Psiphon Inc.
  3. * All rights reserved.
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. *
  18. */
  19. package inproxy
  20. import (
  21. "context"
  22. "io"
  23. "sync"
  24. "sync/atomic"
  25. "time"
  26. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
  27. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
  28. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/prng"
  29. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/protocol"
  30. )
  31. const (
  32. proxyAnnounceDelay = 1 * time.Second
  33. proxyAnnounceDelayJitter = 0.5
  34. proxyAnnounceMaxBackoffDelay = 1 * time.Minute
  35. proxyAnnounceLogSampleSize = 2
  36. proxyAnnounceLogSamplePeriod = 30 * time.Minute
  37. proxyWebRTCAnswerTimeout = 20 * time.Second
  38. proxyDestinationDialTimeout = 20 * time.Second
  39. proxyRelayInactivityTimeout = 5 * time.Minute
  40. )
  41. // Proxy is the in-proxy proxying component, which relays traffic from a
  42. // client to a Psiphon server.
  43. type Proxy struct {
  44. bytesUp atomic.Int64
  45. bytesDown atomic.Int64
  46. peakBytesUp atomic.Int64
  47. peakBytesDown atomic.Int64
  48. announcing atomic.Int32
  49. connectingClients atomic.Int32
  50. connectedClients atomic.Int32
  51. config *ProxyConfig
  52. activityUpdateWrapper *activityUpdateWrapper
  53. lastAnnouncing int32
  54. lastConnectingClients int32
  55. lastConnectedClients int32
  56. networkDiscoveryMutex sync.Mutex
  57. networkDiscoveryRunOnce bool
  58. networkDiscoveryNetworkID string
  59. nextAnnounceMutex sync.Mutex
  60. nextAnnounceBrokerClient *BrokerClient
  61. nextAnnounceNotBefore time.Time
  62. useReducedSettings bool
  63. reducedStartMinute int
  64. reducedEndMinute int
  65. personalStatsMutex sync.Mutex
  66. personalRegionActivity map[string]*RegionActivity
  67. commonStatsMutex sync.Mutex
  68. commonRegionActivity map[string]*RegionActivity
  69. }
  70. // RegionActivity holds metrics per-region for more detailed metric collection.
  71. type RegionActivity struct {
  72. bytesUp atomic.Int64
  73. bytesDown atomic.Int64
  74. connectingClients atomic.Int32
  75. connectedClients atomic.Int32
  76. }
  77. // TODO: add PublicNetworkAddress/ListenNetworkAddress to facilitate manually
  78. // configured, permanent port mappings.
  79. // ProxyConfig specifies the configuration for a Proxy run.
  80. type ProxyConfig struct {
  81. // Logger is used to log events.
  82. Logger common.Logger
  83. // EnableWebRTCDebugLogging indicates whether to emit WebRTC debug logs.
  84. EnableWebRTCDebugLogging bool
  85. // WaitForNetworkConnectivity is a callback that should block until there
  86. // is network connectivity or shutdown. The return value is true when
  87. // there is network connectivity, and false for shutdown.
  88. WaitForNetworkConnectivity func() bool
  89. // GetCurrentNetworkContext is a callback that returns a context tied to
  90. // the lifetime of the host's current active network interface. If the
  91. // active network changes, the previous context returned by
  92. // GetCurrentNetworkContext should cancel. This context is used to
  93. // immediately cancel/close individual connections when the active
  94. // network changes.
  95. GetCurrentNetworkContext func() context.Context
  96. // GetBrokerClient provides a BrokerClient which the proxy will use for
  97. // making broker requests. If GetBrokerClient returns a shared
  98. // BrokerClient instance, the BrokerClient must support multiple,
  99. // concurrent round trips, as the proxy will use it to concurrently
  100. // announce many proxy instances. The BrokerClient should be implemented
  101. // using multiplexing over a shared network connection -- for example,
  102. // HTTP/2 -- and a shared broker session for optimal performance.
  103. GetBrokerClient func() (*BrokerClient, error)
  104. // GetBaseAPIParameters returns Psiphon API parameters to be sent to and
  105. // logged by the broker. Expected parameters include client/proxy
  106. // application and build version information. GetBaseAPIParameters also
  107. // returns the network ID, corresponding to the parameters, to be used in
  108. // tactics logic; the network ID is not sent to the broker.
  109. GetBaseAPIParameters func(includeTacticsParameters bool) (
  110. common.APIParameters, string, error)
  111. // MakeWebRTCDialCoordinator provides a WebRTCDialCoordinator which
  112. // specifies WebRTC-related dial parameters, including selected STUN
  113. // server addresses; network topology information for the current netork;
  114. // NAT logic settings; and other settings.
  115. //
  116. // MakeWebRTCDialCoordinator is invoked for each proxy/client connection,
  117. // and the provider can select new parameters per connection as reqired.
  118. MakeWebRTCDialCoordinator func() (WebRTCDialCoordinator, error)
  119. // ExcludeInterfaceName specifies the network interface to omit from
  120. // proxy WebRTC ICE interface enumeration.
  121. ExcludeInterfaceName string
  122. // HandleTacticsPayload is a callback that receives any tactics payload,
  123. // provided by the broker in proxy announcement request responses.
  124. // HandleTacticsPayload must return true when the tacticsPayload includes
  125. // new tactics, indicating that the proxy should reinitialize components
  126. // controlled by tactics parameters.
  127. HandleTacticsPayload func(
  128. networkID string, compressTactics bool, tacticsPayload []byte) bool
  129. // MustUpgrade is a callback that is invoked when a MustUpgrade flag is
  130. // received from the broker. When MustUpgrade is received, the proxy
  131. // should be stopped and the user should be prompted to upgrade before
  132. // restarting the proxy.
  133. MustUpgrade func()
  134. // MaxCommonClients (formerly MaxClients) is the maximum number of common
  135. // clients that are allowed to connect to the proxy. Must be > 0.
  136. MaxCommonClients int
  137. // MaxPersonalClients is the maximum number of personal clients that are
  138. // allowed to connect to the proxy. Must be > 0.
  139. MaxPersonalClients int
  140. // LimitUpstreamBytesPerSecond limits the upstream data transfer rate for
  141. // a single client. When 0, there is no limit.
  142. LimitUpstreamBytesPerSecond int
  143. // LimitDownstreamBytesPerSecond limits the downstream data transfer rate
  144. // for a single client. When 0, there is no limit.
  145. LimitDownstreamBytesPerSecond int
  146. // ReducedStartTime specifies the local time of day (HH:MM, 24-hour, UTC)
  147. // at which reduced client settings begin.
  148. ReducedStartTime string
  149. // ReducedEndTime specifies the local time of day (HH:MM, 24-hour, UTC) at
  150. // which reduced client settings end.
  151. ReducedEndTime string
  152. // ReducedMaxCommonClients specifies the maximum number of common clients
  153. // that are allowed to connect to the proxy during the reduced time range.
  154. //
  155. // Limitation: We currently do not support ReducedMaxPersonalClients.
  156. // We assume that due to the importance of personal clients, users
  157. // always prefer to have them connected.
  158. //
  159. // Clients connected when the reduced settings begin will not be
  160. // disconnected.
  161. ReducedMaxCommonClients int
  162. // ReducedLimitUpstreamBytesPerSecond limits the upstream data transfer
  163. // rate for a single client during the reduced time range. When 0,
  164. // LimitUpstreamBytesPerSecond is the limit.
  165. //
  166. // Rates for clients already connected when the reduced settings begin or
  167. // end will not change.
  168. ReducedLimitUpstreamBytesPerSecond int
  169. // ReducedLimitDownstreamBytesPerSecond limits the downstream data
  170. // transfer rate for a single client during the reduced time range. When
  171. // 0, LimitDownstreamBytesPerSecond is the limit.
  172. //
  173. // Rates for clients already connected when the reduced settings begin or
  174. // end will not change.
  175. ReducedLimitDownstreamBytesPerSecond int
  176. // ActivityUpdater specifies an ActivityUpdater for activity associated
  177. // with this proxy.
  178. ActivityUpdater ActivityUpdater
  179. }
  180. // RegionActivitySnapshot holds a point-in-time copy of per-region metrics.
  181. // This is used for the ActivityUpdater callback and notice serialization.
  182. type RegionActivitySnapshot struct {
  183. BytesUp int64 `json:"bytesUp"`
  184. BytesDown int64 `json:"bytesDown"`
  185. ConnectingClients int32 `json:"connectingClients"`
  186. ConnectedClients int32 `json:"connectedClients"`
  187. }
  188. // ActivityUpdater is a callback that is invoked when the proxy announces
  189. // availability, when clients connect and disconnect, and periodically with
  190. // data transfer updates (unless idle). This callback may be used to update
  191. // an activity UI. This callback should post this data to another thread or
  192. // handler and return immediately and not block on UI updates.
  193. //
  194. // The personalRegionActivity and commonRegionActivity parameters contain per-region
  195. // metrics (bytes transferred, connecting/connected counts) segmented by client
  196. // region.
  197. type ActivityUpdater func(
  198. announcing int32,
  199. connectingClients int32,
  200. connectedClients int32,
  201. bytesUp int64,
  202. bytesDown int64,
  203. bytesDuration time.Duration,
  204. personalRegionActivitySnapshot map[string]RegionActivitySnapshot,
  205. commonRegionActivitySnapshot map[string]RegionActivitySnapshot)
  206. // NewProxy initializes a new Proxy with the specified configuration.
  207. func NewProxy(config *ProxyConfig) (*Proxy, error) {
  208. // Check if there are no clients who can connect
  209. if config.MaxCommonClients+config.MaxPersonalClients <= 0 {
  210. return nil, errors.TraceNew("invalid MaxCommonClients")
  211. }
  212. p := &Proxy{
  213. config: config,
  214. personalRegionActivity: make(map[string]*RegionActivity),
  215. commonRegionActivity: make(map[string]*RegionActivity),
  216. }
  217. if config.ReducedStartTime != "" ||
  218. config.ReducedEndTime != "" ||
  219. config.ReducedMaxCommonClients > 0 {
  220. startMinute, err := common.ParseTimeOfDayMinutes(config.ReducedStartTime)
  221. if err != nil {
  222. return nil, errors.Tracef("invalid ReducedStartTime: %v", err)
  223. }
  224. endMinute, err := common.ParseTimeOfDayMinutes(config.ReducedEndTime)
  225. if err != nil {
  226. return nil, errors.Tracef("invalid ReducedEndTime: %v", err)
  227. }
  228. if startMinute == endMinute {
  229. return nil, errors.TraceNew("invalid ReducedStartTime/ReducedEndTime")
  230. }
  231. if config.ReducedMaxCommonClients <= 0 ||
  232. config.ReducedMaxCommonClients > config.MaxCommonClients {
  233. return nil, errors.TraceNew("invalid ReducedMaxCommonClients")
  234. }
  235. p.useReducedSettings = true
  236. p.reducedStartMinute = startMinute
  237. p.reducedEndMinute = endMinute
  238. }
  239. p.activityUpdateWrapper = &activityUpdateWrapper{p: p}
  240. return p, nil
  241. }
  242. // activityUpdateWrapper implements the psiphon/common.ActivityUpdater
  243. // interface and is used to receive bytes transferred updates from the
  244. // ActivityConns wrapping proxied traffic. A wrapper is used so that
  245. // UpdateProgress is not exported from Proxy.
  246. type activityUpdateWrapper struct {
  247. p *Proxy
  248. }
  249. func (w *activityUpdateWrapper) UpdateProgress(bytesRead, bytesWritten int64, _ int64) {
  250. w.p.bytesUp.Add(bytesWritten)
  251. w.p.bytesDown.Add(bytesRead)
  252. }
  253. // connectionActivityWrapper implements common.ActivityUpdater for a single
  254. // connection. It caches the RegionActivity pointer to enable atomic updates
  255. // with no mutex locking.
  256. type connectionActivityWrapper struct {
  257. p *Proxy
  258. regionActivity *RegionActivity
  259. }
  260. func (w *connectionActivityWrapper) UpdateProgress(bytesRead, bytesWritten int64, _ int64) {
  261. w.p.bytesUp.Add(bytesWritten)
  262. w.p.bytesDown.Add(bytesRead)
  263. if w.regionActivity != nil {
  264. w.regionActivity.bytesUp.Add(bytesWritten)
  265. w.regionActivity.bytesDown.Add(bytesRead)
  266. }
  267. }
  268. // Run runs the proxy. The proxy sends requests to the Broker announcing its
  269. // availability; the Broker matches the proxy with clients, and facilitates
  270. // an exchange of WebRTC connection information; the proxy and each client
  271. // attempt to establish a connection; and the client's traffic is relayed to
  272. // Psiphon server.
  273. //
  274. // Run ends when ctx is Done. A proxy run may continue across underlying
  275. // network changes assuming that the ProxyConfig GetBrokerClient and
  276. // MakeWebRTCDialCoordinator callbacks react to network changes and provide
  277. // instances that are reflect network changes.
  278. func (p *Proxy) Run(ctx context.Context) {
  279. // Run MaxClient proxying workers. Each worker handles one client at a time.
  280. proxyWaitGroup := new(sync.WaitGroup)
  281. // Capture activity updates every second, which is the required frequency
  282. // for PeakUp/DownstreamBytesPerSecond. This is also a reasonable
  283. // frequency for invoking the ActivityUpdater and updating UI widgets.
  284. proxyWaitGroup.Add(1)
  285. go func() {
  286. defer proxyWaitGroup.Done()
  287. p.lastAnnouncing = 0
  288. p.lastConnectingClients = 0
  289. p.lastConnectedClients = 0
  290. activityUpdatePeriod := 1 * time.Second
  291. ticker := time.NewTicker(activityUpdatePeriod)
  292. defer ticker.Stop()
  293. loop:
  294. for {
  295. select {
  296. case <-ticker.C:
  297. p.activityUpdate(activityUpdatePeriod)
  298. case <-ctx.Done():
  299. break loop
  300. }
  301. }
  302. }()
  303. // Launch the first proxy worker, passing a signal to be triggered once
  304. // the very first announcement round trip is complete. The first round
  305. // trip is awaited so that:
  306. //
  307. // - The first announce response will arrive with any new tactics,
  308. // which may be applied before launching additional workers.
  309. //
  310. // - The first worker gets no announcement delay and is also guaranteed to
  311. // be the shared session establisher. Since the announcement delays are
  312. // applied _after_ waitToShareSession, it would otherwise be possible,
  313. // with a race of MaxClient initial, concurrent announces, for the
  314. // session establisher to be a different worker than the no-delay worker.
  315. //
  316. // The first worker is the only proxy worker which sets
  317. // ProxyAnnounceRequest.CheckTactics/PreCheckTactics. PreCheckTactics is
  318. // used on the first announcement so the request returns immediately
  319. // without awaiting a match. This allows all workers to be launched
  320. // quickly.
  321. commonProxiesToCreate, personalProxiesToCreate :=
  322. p.config.MaxCommonClients, p.config.MaxPersonalClients
  323. // Doing this outside of the go routine to avoid race conditions
  324. firstWorkerIsPersonal := p.config.MaxCommonClients <= 0
  325. if firstWorkerIsPersonal {
  326. personalProxiesToCreate -= 1
  327. } else {
  328. commonProxiesToCreate -= 1
  329. }
  330. signalFirstAnnounceCtx, signalFirstAnnounceDone :=
  331. context.WithCancel(context.Background())
  332. proxyWaitGroup.Add(1)
  333. go func() {
  334. defer proxyWaitGroup.Done()
  335. p.proxyClients(ctx, signalFirstAnnounceDone, false, firstWorkerIsPersonal)
  336. }()
  337. select {
  338. case <-signalFirstAnnounceCtx.Done():
  339. case <-ctx.Done():
  340. return
  341. }
  342. // Launch the remaining workers.
  343. for i := 0; i < commonProxiesToCreate; i++ {
  344. isPersonal := false
  345. // When reduced settings are in effect, a subset of workers will pause
  346. // during the reduced time period. Since ReducedMaxCommonClients > 0 the
  347. // first proxy worker is never paused.
  348. workerNum := i + 1
  349. reducedPause := p.useReducedSettings &&
  350. workerNum >= p.config.ReducedMaxCommonClients
  351. proxyWaitGroup.Add(1)
  352. go func(reducedPause bool) {
  353. defer proxyWaitGroup.Done()
  354. p.proxyClients(ctx, nil, reducedPause, isPersonal)
  355. }(reducedPause)
  356. }
  357. for i := 0; i < personalProxiesToCreate; i++ {
  358. // Limitation: There are no reduced settings for personal proxies
  359. isPersonal := true
  360. proxyWaitGroup.Add(1)
  361. go func() {
  362. defer proxyWaitGroup.Done()
  363. p.proxyClients(ctx, nil, false, isPersonal)
  364. }()
  365. }
  366. proxyWaitGroup.Wait()
  367. }
  368. func (p *Proxy) activityUpdate(period time.Duration) {
  369. // Concurrency: activityUpdate is called by only the single goroutine
  370. // created in Run.
  371. announcing := p.announcing.Load()
  372. connectingClients := p.connectingClients.Load()
  373. connectedClients := p.connectedClients.Load()
  374. bytesUp := p.bytesUp.Swap(0)
  375. bytesDown := p.bytesDown.Swap(0)
  376. greaterThanSwapInt64(&p.peakBytesUp, bytesUp)
  377. greaterThanSwapInt64(&p.peakBytesDown, bytesDown)
  378. personalRegionActivity := p.snapshotAndResetRegionActivity(
  379. &p.personalStatsMutex, p.personalRegionActivity)
  380. commonRegionActivity := p.snapshotAndResetRegionActivity(
  381. &p.commonStatsMutex, p.commonRegionActivity)
  382. stateChanged := announcing != p.lastAnnouncing ||
  383. connectingClients != p.lastConnectingClients ||
  384. connectedClients != p.lastConnectedClients
  385. p.lastAnnouncing = announcing
  386. p.lastConnectingClients = connectingClients
  387. p.lastConnectedClients = connectedClients
  388. if !stateChanged &&
  389. bytesUp == 0 &&
  390. bytesDown == 0 {
  391. // Skip the activity callback on idle bytes or no change in worker state.
  392. return
  393. }
  394. p.config.ActivityUpdater(
  395. announcing,
  396. connectingClients,
  397. connectedClients,
  398. bytesUp,
  399. bytesDown,
  400. period,
  401. personalRegionActivity,
  402. commonRegionActivity)
  403. }
  404. // getOrCreateRegionActivity returns the RegionActivity for a region, creating it
  405. // if needed. This should be called once at connection start to avoid multiple
  406. // lock usage.
  407. func (p *Proxy) getOrCreateRegionActivity(region string, isPersonal bool) *RegionActivity {
  408. var mutex *sync.Mutex
  409. var statsMap map[string]*RegionActivity
  410. if isPersonal {
  411. mutex = &p.personalStatsMutex
  412. statsMap = p.personalRegionActivity
  413. } else {
  414. mutex = &p.commonStatsMutex
  415. statsMap = p.commonRegionActivity
  416. }
  417. mutex.Lock()
  418. defer mutex.Unlock()
  419. stats, exists := statsMap[region]
  420. if !exists {
  421. stats = &RegionActivity{}
  422. statsMap[region] = stats
  423. }
  424. return stats
  425. }
  426. // snapshotAndResetRegionActivity creates a copy of region stats with bytes reset
  427. // to zero, and prunes any entries that have no active connections and zero
  428. // bytes. The snapshot mechanism allows us to avoid holding locks during the
  429. // callback invocation.
  430. func (p *Proxy) snapshotAndResetRegionActivity(
  431. mutex *sync.Mutex,
  432. statsMap map[string]*RegionActivity,
  433. ) map[string]RegionActivitySnapshot {
  434. mutex.Lock()
  435. defer mutex.Unlock()
  436. result := make(map[string]RegionActivitySnapshot, len(statsMap))
  437. regionsToDelete := []string{}
  438. for region, stats := range statsMap {
  439. snapshot := RegionActivitySnapshot{
  440. BytesUp: stats.bytesUp.Swap(0),
  441. BytesDown: stats.bytesDown.Swap(0),
  442. ConnectingClients: stats.connectingClients.Load(),
  443. ConnectedClients: stats.connectedClients.Load(),
  444. }
  445. if snapshot.BytesUp > 0 || snapshot.BytesDown > 0 ||
  446. snapshot.ConnectingClients > 0 || snapshot.ConnectedClients > 0 {
  447. result[region] = snapshot
  448. } else {
  449. regionsToDelete = append(regionsToDelete, region)
  450. }
  451. }
  452. for _, region := range regionsToDelete {
  453. delete(statsMap, region)
  454. }
  455. return result
  456. }
  457. func greaterThanSwapInt64(addr *atomic.Int64, new int64) bool {
  458. // Limitation: if there are two concurrent calls, the greater value could
  459. // get overwritten.
  460. old := addr.Load()
  461. if new > old {
  462. return addr.CompareAndSwap(old, new)
  463. }
  464. return false
  465. }
  466. func (p *Proxy) isReducedUntil() (int, time.Time) {
  467. if !p.useReducedSettings {
  468. return p.config.MaxCommonClients, time.Time{}
  469. }
  470. now := time.Now().UTC()
  471. minute := now.Hour()*60 + now.Minute()
  472. isReduced := false
  473. if p.reducedStartMinute < p.reducedEndMinute {
  474. isReduced = minute >= p.reducedStartMinute && minute < p.reducedEndMinute
  475. } else {
  476. isReduced = minute >= p.reducedStartMinute || minute < p.reducedEndMinute
  477. }
  478. if !isReduced {
  479. return p.config.MaxCommonClients, time.Time{}
  480. }
  481. endHour := p.reducedEndMinute / 60
  482. endMinute := p.reducedEndMinute % 60
  483. endTime := time.Date(
  484. now.Year(),
  485. now.Month(),
  486. now.Day(),
  487. endHour,
  488. endMinute,
  489. 0,
  490. 0,
  491. now.Location(),
  492. )
  493. if !endTime.After(now) {
  494. endTime = endTime.AddDate(0, 0, 1)
  495. }
  496. return p.config.ReducedMaxCommonClients, endTime
  497. }
  498. func (p *Proxy) getLimits() (int, int, common.RateLimits) {
  499. rateLimits := common.RateLimits{
  500. ReadBytesPerSecond: int64(p.config.LimitUpstreamBytesPerSecond),
  501. WriteBytesPerSecond: int64(p.config.LimitDownstreamBytesPerSecond),
  502. }
  503. maxCommonClients, reducedUntil := p.isReducedUntil()
  504. if !reducedUntil.IsZero() {
  505. upstream := p.config.ReducedLimitUpstreamBytesPerSecond
  506. if upstream == 0 {
  507. upstream = p.config.LimitUpstreamBytesPerSecond
  508. }
  509. downstream := p.config.ReducedLimitDownstreamBytesPerSecond
  510. if downstream == 0 {
  511. downstream = p.config.LimitDownstreamBytesPerSecond
  512. }
  513. rateLimits = common.RateLimits{
  514. ReadBytesPerSecond: int64(upstream),
  515. WriteBytesPerSecond: int64(downstream),
  516. }
  517. }
  518. maxPersonalClients := p.config.MaxPersonalClients
  519. return maxCommonClients, maxPersonalClients, rateLimits
  520. }
  521. // getAnnounceDelayParameters is a helper that fetches the proxy announcement
  522. // delay parameters from the current broker client.
  523. //
  524. // getAnnounceDelayParameters is used to configure a delay when
  525. // proxyOneClient fails. As having no broker clients is a possible
  526. // proxyOneClient failure case, GetBrokerClient errors are ignored here and
  527. // defaults used in that case.
  528. func (p *Proxy) getAnnounceDelayParameters() (time.Duration, time.Duration, float64) {
  529. brokerClient, err := p.config.GetBrokerClient()
  530. if err != nil {
  531. return proxyAnnounceDelay, proxyAnnounceMaxBackoffDelay, proxyAnnounceDelayJitter
  532. }
  533. brokerCoordinator := brokerClient.GetBrokerDialCoordinator()
  534. return common.ValueOrDefault(brokerCoordinator.AnnounceDelay(), proxyAnnounceDelay),
  535. common.ValueOrDefault(brokerCoordinator.AnnounceMaxBackoffDelay(), proxyAnnounceMaxBackoffDelay),
  536. common.ValueOrDefault(brokerCoordinator.AnnounceDelayJitter(), proxyAnnounceDelayJitter)
  537. }
  538. func (p *Proxy) proxyClients(
  539. ctx context.Context, signalAnnounceDone func(), reducedPause bool, isPersonal bool) {
  540. // Proxy one client, repeating until ctx is done.
  541. //
  542. // This worker starts with posting a long-polling announcement request.
  543. // The broker response with a matched client, and the proxy and client
  544. // attempt to establish a WebRTC connection for relaying traffic.
  545. //
  546. // Limitation: this design may not maximize the utility of the proxy,
  547. // since some proxy/client connections will fail at the WebRTC stage due
  548. // to NAT traversal failure, and at most MaxClient concurrent
  549. // establishments are attempted. Another scenario comes from the Psiphon
  550. // client horse race, which may start in-proxy dials but then abort them
  551. // when some other tunnel protocol succeeds.
  552. //
  553. // As a future enhancement, consider using M announcement goroutines and N
  554. // WebRTC dial goroutines. When an announcement gets a response,
  555. // immediately announce again unless there are already MaxClient active
  556. // connections established. This approach may require the proxy to
  557. // backpedal and reject connections when establishment is too successful.
  558. //
  559. // Another enhancement could be a signal from the client, to the broker,
  560. // relayed to the proxy, when a dial is aborted.
  561. failureDelayFactor := time.Duration(1)
  562. // To reduce diagnostic log noise, only log an initial sample of
  563. // announcement request timings (delays/elapsed time) and a periodic
  564. // sample of repeating errors such as "no match".
  565. logAnnounceCount := proxyAnnounceLogSampleSize
  566. logErrorsCount := proxyAnnounceLogSampleSize
  567. lastErrMsg := ""
  568. startLogSampleTime := time.Now()
  569. logAnnounce := func() bool {
  570. if logAnnounceCount > 0 {
  571. logAnnounceCount -= 1
  572. return true
  573. }
  574. return false
  575. }
  576. preCheckTacticsDone := false
  577. for ctx.Err() == nil {
  578. if !p.config.WaitForNetworkConnectivity() {
  579. break
  580. }
  581. // Pause designated workers during the reduced time range. In-flight
  582. // announces are not interrupted and connected clients are not
  583. // disconnected, so there is a gradual transition into reduced mode.
  584. if reducedPause {
  585. _, reducedUntil := p.isReducedUntil()
  586. if !reducedUntil.IsZero() {
  587. pauseDuration := time.Until(reducedUntil)
  588. p.config.Logger.WithTraceFields(common.LogFields{
  589. "duration": pauseDuration.String(),
  590. }).Info("pause worker")
  591. timer := time.NewTimer(pauseDuration)
  592. select {
  593. case <-timer.C:
  594. case <-ctx.Done():
  595. }
  596. timer.Stop()
  597. if ctx.Err() != nil {
  598. break
  599. }
  600. }
  601. }
  602. if time.Since(startLogSampleTime) >= proxyAnnounceLogSamplePeriod {
  603. logAnnounceCount = proxyAnnounceLogSampleSize
  604. logErrorsCount = proxyAnnounceLogSampleSize
  605. lastErrMsg = ""
  606. startLogSampleTime = time.Now()
  607. }
  608. backOff, err := p.proxyOneClient(
  609. ctx, logAnnounce, &preCheckTacticsDone, signalAnnounceDone, isPersonal)
  610. if !backOff || err == nil {
  611. failureDelayFactor = 1
  612. }
  613. if err != nil && ctx.Err() == nil {
  614. // Apply a simple exponential backoff based on whether
  615. // proxyOneClient either relayed client traffic or got no match,
  616. // or encountered a failure.
  617. //
  618. // The proxyOneClient failure could range from local
  619. // configuration (no broker clients) to network issues(failure to
  620. // completely establish WebRTC connection) and this backoff
  621. // prevents both excess local logging and churning in the former
  622. // case and excessive bad service to clients or unintentionally
  623. // overloading the broker in the latter case.
  624. delay, maxBackoffDelay, jitter := p.getAnnounceDelayParameters()
  625. delay = delay * failureDelayFactor
  626. if delay > maxBackoffDelay {
  627. delay = maxBackoffDelay
  628. }
  629. if failureDelayFactor < 1<<20 {
  630. failureDelayFactor *= 2
  631. }
  632. // Sample error log.
  633. //
  634. // Limitation: the lastErrMsg string comparison isn't compatible
  635. // with errors with minor variations, such as "unexpected
  636. // response status code %d after %v" from
  637. // InproxyBrokerRoundTripper.RoundTrip, with a time duration in
  638. // the second parameter.
  639. errMsg := err.Error()
  640. if lastErrMsg != errMsg {
  641. logErrorsCount = proxyAnnounceLogSampleSize
  642. lastErrMsg = errMsg
  643. }
  644. if logErrorsCount > 0 {
  645. p.config.Logger.WithTraceFields(
  646. common.LogFields{
  647. "error": errMsg,
  648. "delay": delay.String(),
  649. "jitter": jitter,
  650. }).Error("proxy client failed")
  651. logErrorsCount -= 1
  652. }
  653. common.SleepWithJitter(ctx, delay, jitter)
  654. }
  655. }
  656. }
  657. // resetNetworkDiscovery resets the network discovery state, which will force
  658. // another network discovery when doNetworkDiscovery is invoked.
  659. // resetNetworkDiscovery is called when new tactics have been received from
  660. // the broker, as new tactics may change parameters that control network
  661. // discovery.
  662. func (p *Proxy) resetNetworkDiscovery() {
  663. p.networkDiscoveryMutex.Lock()
  664. defer p.networkDiscoveryMutex.Unlock()
  665. p.networkDiscoveryRunOnce = false
  666. p.networkDiscoveryNetworkID = ""
  667. }
  668. func (p *Proxy) doNetworkDiscovery(
  669. ctx context.Context,
  670. webRTCCoordinator WebRTCDialCoordinator) {
  671. // Allow only one concurrent network discovery. In practise, this may
  672. // block all other proxyOneClient goroutines while one single goroutine
  673. // runs doNetworkDiscovery. Subsequently, all other goroutines will find
  674. // networkDiscoveryRunOnce is true and use the cached results.
  675. p.networkDiscoveryMutex.Lock()
  676. defer p.networkDiscoveryMutex.Unlock()
  677. networkID := webRTCCoordinator.NetworkID()
  678. if p.networkDiscoveryRunOnce &&
  679. p.networkDiscoveryNetworkID == networkID {
  680. // Already ran discovery for this network.
  681. //
  682. // TODO: periodically re-probe for port mapping services?
  683. return
  684. }
  685. // Reset and configure port mapper component, as required. See
  686. // initPortMapper comment.
  687. initPortMapper(webRTCCoordinator)
  688. // Gather local network NAT/port mapping metrics and configuration before
  689. // sending any announce requests. NAT topology metrics are used by the
  690. // Broker to optimize client and in-proxy matching. Unlike the client, we
  691. // always perform this synchronous step here, since waiting doesn't
  692. // necessarily block a client tunnel dial.
  693. waitGroup := new(sync.WaitGroup)
  694. waitGroup.Add(1)
  695. go func() {
  696. defer waitGroup.Done()
  697. // NATDiscover may use cached NAT type/port mapping values from
  698. // DialParameters, based on the network ID. If discovery is not
  699. // successful, the proxy still proceeds to announce.
  700. NATDiscover(
  701. ctx,
  702. &NATDiscoverConfig{
  703. Logger: p.config.Logger,
  704. WebRTCDialCoordinator: webRTCCoordinator,
  705. })
  706. }()
  707. waitGroup.Wait()
  708. p.networkDiscoveryRunOnce = true
  709. p.networkDiscoveryNetworkID = networkID
  710. }
  711. func (p *Proxy) proxyOneClient(
  712. ctx context.Context,
  713. logAnnounce func() bool,
  714. preCheckTacticsDone *bool,
  715. signalAnnounceDone func(),
  716. isPersonal bool) (bool, error) {
  717. // Cancel/close this connection immediately if the network changes.
  718. if p.config.GetCurrentNetworkContext != nil {
  719. var cancelFunc context.CancelFunc
  720. ctx, cancelFunc = common.MergeContextCancel(
  721. ctx, p.config.GetCurrentNetworkContext())
  722. defer cancelFunc()
  723. }
  724. // Do not trigger back-off unless the proxy successfully announces and
  725. // only then performs poorly.
  726. //
  727. // A no-match response should not trigger back-off, nor should broker
  728. // request transport errors which may include non-200 responses due to
  729. // CDN timeout mismatches or TLS errors due to CDN TLS fingerprint
  730. // incompatibility.
  731. backOff := false
  732. // Get a new WebRTCDialCoordinator, which should be configured with the
  733. // latest network tactics.
  734. webRTCCoordinator, err := p.config.MakeWebRTCDialCoordinator()
  735. if err != nil {
  736. return backOff, errors.Trace(err)
  737. }
  738. // Perform network discovery, to determine NAT type and other network
  739. // topology information that is reported to the broker in the proxy
  740. // announcement and used to optimize proxy/client matching. Unlike
  741. // clients, which can't easily delay dials in the tunnel establishment
  742. // horse race, proxies will always perform network discovery.
  743. // doNetworkDiscovery allows only one concurrent discovery and caches
  744. // results for the current network (as determined by
  745. // WebRTCCoordinator.GetNetworkID), so when multiple proxyOneClient
  746. // goroutines call doNetworkDiscovery, at most one discovery is performed
  747. // per network.
  748. p.doNetworkDiscovery(ctx, webRTCCoordinator)
  749. // Send the announce request
  750. // At this point, no NAT traversal operations have been performed by the
  751. // proxy, since its announcement may sit idle for the long-polling period
  752. // and NAT hole punches or port mappings could expire before the
  753. // long-polling period.
  754. //
  755. // As a future enhancement, the proxy could begin gathering WebRTC ICE
  756. // candidates while awaiting a client match, reducing the turn around
  757. // time after a match. This would make sense if there's high demand for
  758. // proxies, and so hole punches unlikely to expire while awaiting a client match.
  759. //
  760. // Another possibility may be to prepare and send a full offer SDP in the
  761. // announcment; and have the broker modify either the proxy or client
  762. // offer SDP to produce an answer SDP. In this case, the entire
  763. // ProxyAnswerRequest could be skipped as the WebRTC dial can begin after
  764. // the ProxyAnnounceRequest response (and ClientOfferRequest response).
  765. //
  766. // Furthermore, if a port mapping can be established, instead of using
  767. // WebRTC the proxy could run a Psiphon tunnel protocol listener at the
  768. // mapped port and send the dial information -- including some secret to
  769. // authenticate the client -- in its announcement. The client would then
  770. // receive this direct dial information from the broker and connect. The
  771. // proxy should be able to send keep alives to extend the port mapping
  772. // lifetime.
  773. brokerClient, err := p.config.GetBrokerClient()
  774. if err != nil {
  775. return backOff, errors.Trace(err)
  776. }
  777. brokerCoordinator := brokerClient.GetBrokerDialCoordinator()
  778. // Only the first worker, which has signalAnnounceDone configured, checks
  779. // for tactics.
  780. checkTactics := signalAnnounceDone != nil && *preCheckTacticsDone
  781. preCheckTactics := signalAnnounceDone != nil && !*preCheckTacticsDone
  782. maxCommonClients, maxPersonalClients, rateLimits := p.getLimits()
  783. // Get the base Psiphon API parameters and additional proxy metrics,
  784. // including performance information, which is sent to the broker in the
  785. // proxy announcment.
  786. //
  787. // tacticsNetworkID is the exact network ID that corresponds to the
  788. // tactics tag sent in the base parameters; this is passed to
  789. // HandleTacticsPayload in order to double check that any tactics
  790. // returned in the proxy announcment response are associated and stored
  791. // with the original network ID.
  792. metrics, tacticsNetworkID, compressTactics, err := p.getMetrics(
  793. checkTactics || preCheckTactics,
  794. brokerCoordinator,
  795. webRTCCoordinator,
  796. maxCommonClients,
  797. maxPersonalClients,
  798. rateLimits)
  799. if err != nil {
  800. return backOff, errors.Trace(err)
  801. }
  802. // Set a delay before announcing, to stagger the announce request times.
  803. // The delay helps to avoid triggering rate limits or similar errors from
  804. // any intermediate CDN between the proxy and the broker; and provides a
  805. // nudge towards better load balancing across multiple large
  806. // MaxCommonClients proxies, as the broker primarily matches enqueued
  807. // announces in FIFO order, since older announces expire earlier.
  808. //
  809. // The delay is intended to be applied after doNetworkDiscovery, which has
  810. // no reason to be delayed; and also after any waitToShareSession delay,
  811. // as delaying before waitToShareSession can result in the announce
  812. // request times collapsing back together. Delaying after
  813. // waitToShareSession is handled by brokerClient.ProxyAnnounce, which
  814. // will also extend the base request timeout, as required, to account for
  815. // any deliberate delay.
  816. requestDelay := time.Duration(0)
  817. announceDelay, _, announceDelayJitter := p.getAnnounceDelayParameters()
  818. p.nextAnnounceMutex.Lock()
  819. nextDelay := prng.JitterDuration(announceDelay, announceDelayJitter)
  820. if p.nextAnnounceBrokerClient != brokerClient {
  821. // Reset the delay when the broker client changes.
  822. p.nextAnnounceNotBefore = time.Time{}
  823. p.nextAnnounceBrokerClient = brokerClient
  824. }
  825. if p.nextAnnounceNotBefore.IsZero() {
  826. p.nextAnnounceNotBefore = time.Now().Add(nextDelay)
  827. // No delay for the very first announce request, so leave
  828. // announceRequestDelay set to 0.
  829. } else {
  830. requestDelay = time.Until(p.nextAnnounceNotBefore)
  831. if requestDelay < 0 {
  832. // This announce did not arrive until after the next delay already
  833. // passed, so proceed with no delay.
  834. p.nextAnnounceNotBefore = time.Now().Add(nextDelay)
  835. requestDelay = 0
  836. } else {
  837. p.nextAnnounceNotBefore = p.nextAnnounceNotBefore.Add(nextDelay)
  838. }
  839. }
  840. p.nextAnnounceMutex.Unlock()
  841. // A proxy ID is implicitly sent with requests; it's the proxy's session
  842. // public key.
  843. //
  844. // ProxyAnnounce applies an additional request timeout to facilitate
  845. // long-polling.
  846. p.announcing.Add(1)
  847. announceStartTime := time.Now()
  848. // Ignore the personalCompartmentIDs if this proxy is not personal
  849. var personalCompartmentIDs []ID
  850. if isPersonal {
  851. personalCompartmentIDs = brokerCoordinator.PersonalCompartmentIDs()
  852. }
  853. announceResponse, err := brokerClient.ProxyAnnounce(
  854. ctx,
  855. requestDelay,
  856. &ProxyAnnounceRequest{
  857. PersonalCompartmentIDs: personalCompartmentIDs,
  858. Metrics: metrics,
  859. CheckTactics: checkTactics,
  860. PreCheckTactics: preCheckTactics,
  861. })
  862. if logAnnounce() {
  863. p.config.Logger.WithTraceFields(common.LogFields{
  864. "delay": requestDelay.String(),
  865. "elapsedTime": time.Since(announceStartTime).String(),
  866. }).Info("announcement request")
  867. }
  868. p.announcing.Add(-1)
  869. if err != nil {
  870. return backOff, errors.Trace(err)
  871. }
  872. if len(announceResponse.TacticsPayload) > 0 {
  873. // The TacticsPayload may include new tactics, or may simply signal,
  874. // to the Psiphon client, that its tactics tag remains up-to-date and
  875. // to extend cached tactics TTL. HandleTacticsPayload returns true
  876. // when tactics haved changed; in this case we clear cached network
  877. // discovery but proceed with handling the proxy announcement
  878. // response as there may still be a match.
  879. if p.config.HandleTacticsPayload(
  880. tacticsNetworkID,
  881. compressTactics,
  882. announceResponse.TacticsPayload) {
  883. p.resetNetworkDiscovery()
  884. }
  885. }
  886. // Signal that the announce round trip is complete, allowing other workers
  887. // to launch. At this point, the broker Noise session should be established
  888. // and any fresh tactics applied. Also toggle preCheckTacticsDone since
  889. // there's no need to retry PreCheckTactics once a round trip succeeds.
  890. if signalAnnounceDone != nil {
  891. signalAnnounceDone()
  892. }
  893. if preCheckTactics {
  894. *preCheckTacticsDone = true
  895. }
  896. // MustUpgrade has precedence over other cases, to ensure the callback is
  897. // invoked. Trigger back-off back off when rate/entry limited or must
  898. // upgrade; no back-off for no-match.
  899. if announceResponse.MustUpgrade {
  900. if p.config.MustUpgrade != nil {
  901. p.config.MustUpgrade()
  902. }
  903. backOff = true
  904. return backOff, errors.TraceNew("must upgrade")
  905. } else if announceResponse.Limited {
  906. backOff = true
  907. return backOff, errors.TraceNew("limited")
  908. } else if announceResponse.NoMatch {
  909. // No backoff for no-match.
  910. //
  911. // This is also the expected response for CheckTactics with a tactics
  912. // payload and PreCheckTactics with or without a tactics payload,
  913. // distinct cases which should not back off.
  914. return backOff, errors.TraceNew("no match")
  915. }
  916. if preCheckTactics && !announceResponse.NoMatch {
  917. // Sanity check: the broker should always respond with no-match for
  918. // PreCheckTactics.
  919. return backOff, errors.TraceNew("unexpected PreCheckTactics response")
  920. }
  921. if announceResponse.SelectedProtocolVersion < ProtocolVersion1 ||
  922. (announceResponse.UseMediaStreams &&
  923. announceResponse.SelectedProtocolVersion < ProtocolVersion2) ||
  924. announceResponse.SelectedProtocolVersion > LatestProtocolVersion {
  925. backOff = true
  926. return backOff, errors.Tracef(
  927. "unsupported protocol version: %d",
  928. announceResponse.SelectedProtocolVersion)
  929. }
  930. clientRegion := announceResponse.ClientRegion
  931. var regionActivity *RegionActivity
  932. if clientRegion != "" {
  933. regionActivity = p.getOrCreateRegionActivity(clientRegion, isPersonal)
  934. }
  935. // Create per-connection activity wrapper with cached regionActivity pointer
  936. connActivityWrapper := &connectionActivityWrapper{
  937. p: p,
  938. regionActivity: regionActivity,
  939. }
  940. // Trigger back-off if the following WebRTC operations fail to establish a
  941. // connections.
  942. backOff = true
  943. // For activity updates, indicate that a client connection is now underway.
  944. p.connectingClients.Add(1)
  945. if regionActivity != nil {
  946. regionActivity.connectingClients.Add(1)
  947. }
  948. connected := false
  949. defer func() {
  950. if !connected {
  951. p.connectingClients.Add(-1)
  952. if regionActivity != nil {
  953. regionActivity.connectingClients.Add(-1)
  954. }
  955. }
  956. }()
  957. // Initialize WebRTC using the client's offer SDP
  958. webRTCAnswerCtx, webRTCAnswerCancelFunc := context.WithTimeout(
  959. ctx, common.ValueOrDefault(webRTCCoordinator.WebRTCAnswerTimeout(), proxyWebRTCAnswerTimeout))
  960. defer webRTCAnswerCancelFunc()
  961. // In personal pairing mode, RFC 1918/4193 private IP addresses are
  962. // included in SDPs.
  963. hasPersonalCompartmentIDs := len(personalCompartmentIDs) > 0
  964. webRTCConn, SDP, sdpMetrics, webRTCErr := newWebRTCConnForAnswer(
  965. webRTCAnswerCtx,
  966. &webRTCConfig{
  967. Logger: p.config.Logger,
  968. EnableDebugLogging: p.config.EnableWebRTCDebugLogging,
  969. ExcludeInterfaceName: p.config.ExcludeInterfaceName,
  970. WebRTCDialCoordinator: webRTCCoordinator,
  971. ClientRootObfuscationSecret: announceResponse.ClientRootObfuscationSecret,
  972. DoDTLSRandomization: announceResponse.DoDTLSRandomization,
  973. UseMediaStreams: announceResponse.UseMediaStreams,
  974. TrafficShapingParameters: announceResponse.TrafficShapingParameters,
  975. // In media stream mode, this flag indicates to the proxy that it
  976. // should add the QUIC-based reliability layer wrapping to media
  977. // streams. In data channel mode, this flag is ignored, since the
  978. // client configures the data channel using
  979. // webrtc.DataChannelInit.Ordered, and this configuration is sent
  980. // to the proxy in the client's SDP.
  981. ReliableTransport: announceResponse.NetworkProtocol == NetworkProtocolTCP,
  982. },
  983. announceResponse.ClientOfferSDP,
  984. hasPersonalCompartmentIDs)
  985. var webRTCRequestErr string
  986. if webRTCErr != nil {
  987. webRTCErr = errors.Trace(webRTCErr)
  988. webRTCRequestErr = webRTCErr.Error()
  989. SDP = WebRTCSessionDescription{}
  990. sdpMetrics = &webRTCSDPMetrics{}
  991. // Continue to report the error to the broker. The broker will respond
  992. // with failure to the client's offer request.
  993. } else {
  994. defer webRTCConn.Close()
  995. }
  996. // Send answer request with SDP or error.
  997. answerResponse, err := brokerClient.ProxyAnswer(
  998. ctx,
  999. &ProxyAnswerRequest{
  1000. ConnectionID: announceResponse.ConnectionID,
  1001. ProxyAnswerSDP: SDP,
  1002. ICECandidateTypes: sdpMetrics.iceCandidateTypes,
  1003. AnswerError: webRTCRequestErr,
  1004. })
  1005. if err != nil {
  1006. if webRTCErr != nil {
  1007. // Prioritize returning any WebRTC error for logging.
  1008. return backOff, webRTCErr
  1009. }
  1010. // Don't backoff if the answer request fails due to possible transient
  1011. // request transport errors.
  1012. backOff = false
  1013. return backOff, errors.Trace(err)
  1014. }
  1015. // Now that an answer is sent, stop if WebRTC initialization failed.
  1016. if webRTCErr != nil {
  1017. return backOff, webRTCErr
  1018. }
  1019. // Exit if the client was no longer awaiting the answer. There is no
  1020. // backoff in this case, and there's no error, as the proxy did not fail
  1021. // as it's not an unexpected outcome.
  1022. //
  1023. // Limitation: it's possible that the announce request responds quickly
  1024. // and the matched client offer is already close to timing out. The
  1025. // answer request will also respond quickly. There's an increased chance
  1026. // of hitting rate limits in this fast turn around scenario. This outcome
  1027. // is mitigated by InproxyBrokerMatcherOfferMinimumDeadline.
  1028. if answerResponse.NoAwaitingClient {
  1029. backOff = false
  1030. return backOff, nil
  1031. }
  1032. // Await the WebRTC connection.
  1033. // We could concurrently dial the destination, to have that network
  1034. // connection available immediately once the WebRTC channel is
  1035. // established. This would work only for TCP, not UDP, network protocols
  1036. // and could only include the TCP connection, as client traffic is
  1037. // required for all higher layers such as TLS, SSH, etc. This could also
  1038. // create wasted load on destination Psiphon servers, particularly when
  1039. // WebRTC connections fail.
  1040. awaitReadyToProxyCtx, awaitReadyToProxyCancelFunc := context.WithTimeout(
  1041. ctx,
  1042. common.ValueOrDefault(
  1043. webRTCCoordinator.WebRTCAwaitReadyToProxyTimeout(), readyToProxyAwaitTimeout))
  1044. defer awaitReadyToProxyCancelFunc()
  1045. err = webRTCConn.AwaitReadyToProxy(awaitReadyToProxyCtx, announceResponse.ConnectionID)
  1046. if err != nil {
  1047. return backOff, errors.Trace(err)
  1048. }
  1049. // Dial the destination, a Psiphon server. The broker validates that the
  1050. // dial destination is a Psiphon server.
  1051. destinationDialContext, destinationDialCancelFunc := context.WithTimeout(
  1052. ctx,
  1053. common.ValueOrDefault(
  1054. webRTCCoordinator.ProxyDestinationDialTimeout(), proxyDestinationDialTimeout))
  1055. defer destinationDialCancelFunc()
  1056. // Use the custom resolver when resolving destination hostnames, such as
  1057. // those used in domain fronted protocols.
  1058. //
  1059. // - Resolving at the in-proxy should yield a more optimal CDN edge, vs.
  1060. // resolving at the client.
  1061. //
  1062. // - Sending unresolved hostnames to in-proxies can expose some domain
  1063. // fronting configuration. This can be mitigated by enabling domain
  1064. // fronting on this 2nd hop only when the in-proxy is located in a
  1065. // region that may be censored or blocked; this is to be enforced by
  1066. // the broker.
  1067. //
  1068. // - Any DNSResolverPreresolved tactics applied will be relative to the
  1069. // in-proxy location.
  1070. destinationAddress, err := webRTCCoordinator.ResolveAddress(
  1071. ctx, "ip", announceResponse.DestinationAddress)
  1072. if err != nil {
  1073. return backOff, errors.Trace(err)
  1074. }
  1075. destinationConn, err := webRTCCoordinator.ProxyUpstreamDial(
  1076. destinationDialContext,
  1077. announceResponse.NetworkProtocol.String(),
  1078. destinationAddress)
  1079. if err != nil {
  1080. return backOff, errors.Trace(err)
  1081. }
  1082. defer destinationConn.Close()
  1083. // For activity updates, indicate that a client connection is established.
  1084. connected = true
  1085. p.connectingClients.Add(-1)
  1086. p.connectedClients.Add(1)
  1087. if regionActivity != nil {
  1088. regionActivity.connectingClients.Add(-1)
  1089. regionActivity.connectedClients.Add(1)
  1090. }
  1091. defer func() {
  1092. p.connectedClients.Add(-1)
  1093. if regionActivity != nil {
  1094. regionActivity.connectedClients.Add(-1)
  1095. }
  1096. }()
  1097. // Throttle the relay connection.
  1098. //
  1099. // Here, each client gets LimitUp/DownstreamBytesPerSecond. Proxy
  1100. // operators may to want to limit their bandwidth usage with a single
  1101. // up/down value, an overall limit. The ProxyConfig can simply be
  1102. // generated by dividing the limit by MaxCommonClients + MaxPersonalClients.
  1103. // This approach favors performance stability: each client gets the
  1104. // same throttling limits regardless of how many other clients are connected.
  1105. //
  1106. // Rate limits are applied only when a client connection is established;
  1107. // connected clients retain their initial limits even when reduced time
  1108. // starts or ends.
  1109. destinationConn = common.NewThrottledConn(
  1110. destinationConn,
  1111. announceResponse.NetworkProtocol.IsStream(),
  1112. rateLimits)
  1113. // Hook up bytes transferred counting for activity updates.
  1114. // The ActivityMonitoredConn inactivity timeout is configured. For
  1115. // upstream TCP connections, the destinationConn will close when the TCP
  1116. // connection to the Psiphon server closes. But for upstream UDP flows,
  1117. // the relay does not know when the upstream "connection" has closed.
  1118. // Well-behaved clients will close the WebRTC half of the relay when
  1119. // those clients know the UDP-based tunnel protocol connection is closed;
  1120. // the inactivity timeout handles the remaining cases.
  1121. inactivityTimeout :=
  1122. common.ValueOrDefault(
  1123. webRTCCoordinator.ProxyRelayInactivityTimeout(),
  1124. proxyRelayInactivityTimeout)
  1125. destinationConn, err = common.NewActivityMonitoredConn(
  1126. destinationConn, inactivityTimeout, false, nil, connActivityWrapper)
  1127. if err != nil {
  1128. return backOff, errors.Trace(err)
  1129. }
  1130. // Relay the client traffic to the destination. The client traffic is a
  1131. // standard Psiphon tunnel protocol destinated to a Psiphon server. Any
  1132. // blocking/censorship at the 2nd hop will be mitigated by the use of
  1133. // Psiphon circumvention protocols and techniques.
  1134. // Limitation: clients may apply fragmentation to traffic relayed over the
  1135. // data channel, and there's no guarantee that the fragmentation write
  1136. // sizes or delays will carry over to the egress side.
  1137. // The proxy operator's ISP may be able to observe that the operator's
  1138. // host has nearly matching ingress and egress traffic. The traffic
  1139. // content won't be the same: the ingress traffic is wrapped in a WebRTC
  1140. // data channel, and the egress traffic is a Psiphon tunnel protocol.
  1141. // With padding and decoy packets, the ingress and egress traffic shape
  1142. // will differ beyond the basic WebRTC overheader. Even with this
  1143. // measure, over time the number of bytes in and out of the proxy may
  1144. // still indicate proxying.
  1145. waitGroup := new(sync.WaitGroup)
  1146. relayErrors := make(chan error, 2)
  1147. var relayedUp, relayedDown int32
  1148. waitGroup.Add(1)
  1149. go func() {
  1150. defer waitGroup.Done()
  1151. // WebRTC data channels are based on SCTP, which is actually
  1152. // message-based, not a stream. The (default) max message size for
  1153. // pion/sctp is 65536:
  1154. // https://github.com/pion/sctp/blob/44ed465396c880e379aae9c1bf81809a9e06b580/association.go#L52.
  1155. //
  1156. // As io.Copy uses a buffer size of 32K, each relayed message will be
  1157. // less than the maximum. Calls to ClientConn.Write are also expected
  1158. // to use io.Copy, keeping messages at most 32K in size.
  1159. // io.Copy doesn't return an error on EOF, but we still want to signal
  1160. // that relaying is done, so in this case a nil error is sent to the
  1161. // channel.
  1162. //
  1163. // Limitation: if one io.Copy goproutine sends nil and the other
  1164. // io.Copy goroutine sends a non-nil error concurrently, the non-nil
  1165. // error isn't prioritized.
  1166. n, err := io.Copy(webRTCConn, destinationConn)
  1167. if n > 0 {
  1168. atomic.StoreInt32(&relayedDown, 1)
  1169. }
  1170. relayErrors <- errors.Trace(err)
  1171. }()
  1172. waitGroup.Add(1)
  1173. go func() {
  1174. defer waitGroup.Done()
  1175. n, err := io.Copy(destinationConn, webRTCConn)
  1176. if n > 0 {
  1177. atomic.StoreInt32(&relayedUp, 1)
  1178. }
  1179. relayErrors <- errors.Trace(err)
  1180. }()
  1181. select {
  1182. case err = <-relayErrors:
  1183. case <-ctx.Done():
  1184. }
  1185. // Interrupt the relay goroutines by closing the connections.
  1186. webRTCConn.Close()
  1187. destinationConn.Close()
  1188. waitGroup.Wait()
  1189. p.config.Logger.WithTraceFields(common.LogFields{
  1190. "connectionID": announceResponse.ConnectionID,
  1191. }).Info("connection closed")
  1192. // Don't apply a back-off delay to the next announcement since this
  1193. // iteration successfully relayed bytes.
  1194. if atomic.LoadInt32(&relayedUp) == 1 || atomic.LoadInt32(&relayedDown) == 1 {
  1195. backOff = false
  1196. }
  1197. return backOff, err
  1198. }
  1199. func (p *Proxy) getMetrics(
  1200. includeTacticsParameters bool,
  1201. brokerCoordinator BrokerDialCoordinator,
  1202. webRTCCoordinator WebRTCDialCoordinator,
  1203. maxCommonClients int,
  1204. maxPersonalClients int,
  1205. rateLimits common.RateLimits) (
  1206. *ProxyMetrics, string, bool, error) {
  1207. // tacticsNetworkID records the exact network ID that corresponds to the
  1208. // tactics tag sent in the base parameters, and is used when applying any
  1209. // new tactics returned by the broker.
  1210. baseParams, tacticsNetworkID, err := p.config.GetBaseAPIParameters(
  1211. includeTacticsParameters)
  1212. if err != nil {
  1213. return nil, "", false, errors.Trace(err)
  1214. }
  1215. apiParams := common.APIParameters{}
  1216. apiParams.Add(baseParams)
  1217. apiParams.Add(common.APIParameters(brokerCoordinator.MetricsForBrokerRequests()))
  1218. compressTactics := protocol.GetCompressTactics(apiParams)
  1219. packedParams, err := protocol.EncodePackedAPIParameters(apiParams)
  1220. if err != nil {
  1221. return nil, "", false, errors.Trace(err)
  1222. }
  1223. return &ProxyMetrics{
  1224. BaseAPIParameters: packedParams,
  1225. ProtocolVersion: LatestProtocolVersion,
  1226. NATType: webRTCCoordinator.NATType(),
  1227. PortMappingTypes: webRTCCoordinator.PortMappingTypes(),
  1228. MaxCommonClients: int32(maxCommonClients),
  1229. MaxPersonalClients: int32(maxPersonalClients),
  1230. ConnectingClients: p.connectingClients.Load(),
  1231. ConnectedClients: p.connectedClients.Load(),
  1232. LimitUpstreamBytesPerSecond: rateLimits.ReadBytesPerSecond,
  1233. LimitDownstreamBytesPerSecond: rateLimits.WriteBytesPerSecond,
  1234. PeakUpstreamBytesPerSecond: p.peakBytesUp.Load(),
  1235. PeakDownstreamBytesPerSecond: p.peakBytesDown.Load(),
  1236. }, tacticsNetworkID, compressTactics, nil
  1237. }