refraction.go 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917
  1. //go:build PSIPHON_ENABLE_REFRACTION_NETWORKING
  2. // +build PSIPHON_ENABLE_REFRACTION_NETWORKING
  3. /*
  4. * Copyright (c) 2018, Psiphon Inc.
  5. * All rights reserved.
  6. *
  7. * This program is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  19. *
  20. */
  21. /*
  22. Package refraction wraps github.com/refraction-networking/gotapdance with
  23. net.Listener and net.Conn types that provide drop-in integration with Psiphon.
  24. */
  25. package refraction
  26. import (
  27. "context"
  28. "crypto/sha256"
  29. "fmt"
  30. "io/ioutil"
  31. "net"
  32. "os"
  33. "path/filepath"
  34. "sync"
  35. "sync/atomic"
  36. "time"
  37. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
  38. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
  39. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/protocol"
  40. "github.com/armon/go-proxyproto"
  41. lrucache "github.com/cognusion/go-cache-lru"
  42. refraction_networking_proto "github.com/refraction-networking/gotapdance/protobuf"
  43. refraction_networking_client "github.com/refraction-networking/gotapdance/tapdance"
  44. )
  45. const (
  46. READ_PROXY_PROTOCOL_HEADER_TIMEOUT = 5 * time.Second
  47. REGISTRATION_CACHE_MAX_ENTRIES = 256
  48. )
  49. // Enabled indicates if Refraction Networking functionality is enabled.
  50. func Enabled() bool {
  51. return true
  52. }
  53. // Listener is a net.Listener.
  54. type Listener struct {
  55. net.Listener
  56. }
  57. // Listen creates a new Refraction Networking listener.
  58. //
  59. // The Refraction Networking station (TapDance or Conjure) will send the
  60. // original client address via the HAProxy proxy protocol v1,
  61. // https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt. The original
  62. // client address is read and returned by accepted conns' RemoteAddr.
  63. // RemoteAddr _must_ be called non-concurrently before calling Read on
  64. // accepted conns as the HAProxy proxy protocol header reading logic sets
  65. // SetReadDeadline and performs a Read.
  66. //
  67. // Psiphon server hosts should be configured to accept tunnel connections only
  68. // from Refraction Networking stations.
  69. func Listen(address string) (net.Listener, error) {
  70. tcpListener, err := net.Listen("tcp", address)
  71. if err != nil {
  72. return nil, errors.Trace(err)
  73. }
  74. // Setting a timeout ensures that reading the proxy protocol
  75. // header completes or times out and RemoteAddr will not block. See:
  76. // https://godoc.org/github.com/armon/go-proxyproto#Conn.RemoteAddr
  77. proxyListener := &proxyproto.Listener{
  78. Listener: tcpListener,
  79. ProxyHeaderTimeout: READ_PROXY_PROTOCOL_HEADER_TIMEOUT}
  80. stationListener := &stationListener{
  81. proxyListener: proxyListener,
  82. }
  83. return &Listener{Listener: stationListener}, nil
  84. }
  85. // stationListener uses the proxyproto.Listener SourceCheck callback to
  86. // capture and record the direct remote address, the station address, and
  87. // wraps accepted conns to provide station address metrics via GetMetrics.
  88. // These metrics enable identifying which station fronted a connection, which
  89. // is useful for network operations and troubleshooting.
  90. //
  91. // go-proxyproto.Conn.RemoteAddr reports the originating client IP address,
  92. // which is geolocated and recorded for metrics. The underlying conn's remote
  93. // address, the station address, is not accessible via the go-proxyproto API.
  94. //
  95. // stationListener is not safe for concurrent access.
  96. type stationListener struct {
  97. proxyListener *proxyproto.Listener
  98. }
  99. func (l *stationListener) Accept() (net.Conn, error) {
  100. var stationRemoteAddr net.Addr
  101. l.proxyListener.SourceCheck = func(addr net.Addr) (bool, error) {
  102. stationRemoteAddr = addr
  103. return true, nil
  104. }
  105. conn, err := l.proxyListener.Accept()
  106. if err != nil {
  107. return nil, err
  108. }
  109. if stationRemoteAddr == nil {
  110. return nil, errors.TraceNew("missing station address")
  111. }
  112. return &stationConn{
  113. Conn: conn,
  114. stationIPAddress: common.IPAddressFromAddr(stationRemoteAddr),
  115. }, nil
  116. }
  117. func (l *stationListener) Close() error {
  118. return l.proxyListener.Close()
  119. }
  120. func (l *stationListener) Addr() net.Addr {
  121. return l.proxyListener.Addr()
  122. }
  123. type stationConn struct {
  124. net.Conn
  125. stationIPAddress string
  126. }
  127. // IrregularTunnelError implements the common.IrregularIndicator interface.
  128. func (c *stationConn) IrregularTunnelError() error {
  129. // We expect a PROXY protocol header, but go-proxyproto does not produce an
  130. // error if the "PROXY " prefix is absent; instead the connection will
  131. // proceed. To detect this case, check if the go-proxyproto RemoteAddr IP
  132. // address matches the underlying connection IP address. When these values
  133. // match, there was no PROXY protocol header.
  134. //
  135. // Limitation: the values will match if there is a PROXY protocol header
  136. // containing the same IP address as the underlying connection. This is not
  137. // an expected case.
  138. if common.IPAddressFromAddr(c.RemoteAddr()) == c.stationIPAddress {
  139. return errors.TraceNew("unexpected station IP address")
  140. }
  141. return nil
  142. }
  143. // GetMetrics implements the common.MetricsSource interface.
  144. func (c *stationConn) GetMetrics() common.LogFields {
  145. logFields := make(common.LogFields)
  146. // Ensure we don't log a potential non-station IP address.
  147. if c.IrregularTunnelError() == nil {
  148. logFields["station_ip_address"] = c.stationIPAddress
  149. }
  150. return logFields
  151. }
  152. // DialTapDance establishes a new TapDance connection to a TapDance station
  153. // specified in the config assets and forwarding through to the Psiphon server
  154. // specified by address.
  155. //
  156. // The TapDance station config assets (which are also the Conjure station
  157. // assets) are read from dataDirectory/"refraction-networking". When no config
  158. // is found, default assets are paved.
  159. //
  160. // dialer specifies the custom dialer for underlying TCP dials.
  161. //
  162. // The input ctx is expected to have a timeout for the dial.
  163. //
  164. // Limitation: the parameters emitLogs and dataDirectory are used for one-time
  165. // initialization and are ignored after the first DialTapDance/Conjure call.
  166. func DialTapDance(
  167. ctx context.Context,
  168. emitLogs bool,
  169. dataDirectory string,
  170. dialer common.NetDialer,
  171. address string) (net.Conn, error) {
  172. return dial(
  173. ctx,
  174. emitLogs,
  175. dataDirectory,
  176. dialer,
  177. address,
  178. nil)
  179. }
  180. // DialConjure establishes a new Conjure connection to a Conjure station.
  181. //
  182. // dialer specifies the custom dialer to use for phantom dials. Additional
  183. // Conjure-specific parameters are specified in conjureConfig.
  184. //
  185. // See DialTapdance comment.
  186. func DialConjure(
  187. ctx context.Context,
  188. emitLogs bool,
  189. dataDirectory string,
  190. dialer common.NetDialer,
  191. address string,
  192. conjureConfig *ConjureConfig) (net.Conn, error) {
  193. return dial(
  194. ctx,
  195. emitLogs,
  196. dataDirectory,
  197. dialer,
  198. address,
  199. conjureConfig)
  200. }
  201. func dial(
  202. ctx context.Context,
  203. emitLogs bool,
  204. dataDirectory string,
  205. dialer common.NetDialer,
  206. address string,
  207. conjureConfig *ConjureConfig) (net.Conn, error) {
  208. err := initRefractionNetworking(emitLogs, dataDirectory)
  209. if err != nil {
  210. return nil, errors.Trace(err)
  211. }
  212. if _, ok := ctx.Deadline(); !ok {
  213. return nil, errors.TraceNew("dial context has no timeout")
  214. }
  215. useConjure := conjureConfig != nil
  216. manager := newDialManager()
  217. refractionDialer := &refraction_networking_client.Dialer{
  218. TcpDialer: manager.makeManagedDialer(dialer.DialContext),
  219. UseProxyHeader: true,
  220. }
  221. conjureCached := false
  222. conjureDelay := time.Duration(0)
  223. var conjureCachedRegistration *refraction_networking_client.ConjureReg
  224. var conjureRecordRegistrar *recordRegistrar
  225. if useConjure {
  226. // Our strategy is to try one registration per dial attempt: a cached
  227. // registration, if it exists, or API or decoy registration, as configured.
  228. // This assumes Psiphon establishment will try/retry many candidates as
  229. // required, and that the desired mix of API/decoy registrations will be
  230. // configured and generated. In good network conditions, internal gotapdance
  231. // retries (via APIRegistrar.MaxRetries or APIRegistrar.SecondaryRegistrar)
  232. // are unlikely to start before the Conjure dial is canceled.
  233. // Caching registrations reduces average Conjure dial time by often
  234. // eliminating the registration phase. This is especially impactful for
  235. // short duration tunnels, such as on mobile. Caching also reduces domain
  236. // fronted traffic and load on the API registrar and decoys.
  237. //
  238. // We implement a simple in-memory registration cache with the following
  239. // behavior:
  240. //
  241. // - If a new registration succeeds, but the overall Conjure dial is
  242. // _canceled_, the registration is optimistically cached.
  243. // - If the Conjure phantom dial fails, any associated cached registration
  244. // is discarded.
  245. // - A cached registration's TTL is extended upon phantom dial success.
  246. // - If the configured TTL changes, the cache is cleared.
  247. //
  248. // Limitations:
  249. // - The cache is not persistent.
  250. // - There is no TTL extension during a long connection.
  251. // - Caching a successful registration when the phantom dial is canceled may
  252. // skip the necessary "delay" step (however, an immediate re-establishment
  253. // to the same candidate is unlikely in this case).
  254. //
  255. // TODO:
  256. // - Revisit when gotapdance adds its own caching.
  257. // - Consider "pre-registering" Conjure when already connected with a
  258. // different protocol, so a Conjure registration is available on the next
  259. // establishment; in this scenario, a tunneled API registration would not
  260. // require domain fronting.
  261. refractionDialer.DarkDecoy = true
  262. // The pop operation removes the registration from the cache. This
  263. // eliminates the possibility of concurrent candidates (with the same cache
  264. // key) using and modifying the same registration, a potential race
  265. // condition. The popped cached registration must be reinserted in the cache
  266. // after canceling or success, but not on phantom dial failure.
  267. conjureCachedRegistration = conjureRegistrationCache.pop(conjureConfig)
  268. if conjureCachedRegistration != nil {
  269. refractionDialer.DarkDecoyRegistrar = &cachedRegistrar{
  270. registration: conjureCachedRegistration,
  271. }
  272. conjureCached = true
  273. conjureDelay = 0 // report no delay
  274. } else if conjureConfig.APIRegistrarBidirectionalURL != "" {
  275. if conjureConfig.APIRegistrarHTTPClient == nil {
  276. // While not a guaranteed check, if the APIRegistrarHTTPClient isn't set
  277. // then the API registration would certainly be unfronted, resulting in a
  278. // fingerprintable connection leak.
  279. return nil, errors.TraceNew("missing APIRegistrarHTTPClient")
  280. }
  281. refractionDialer.DarkDecoyRegistrar = &refraction_networking_client.APIRegistrarBidirectional{
  282. Endpoint: conjureConfig.APIRegistrarBidirectionalURL,
  283. ConnectionDelay: conjureConfig.APIRegistrarDelay,
  284. MaxRetries: 0,
  285. Client: conjureConfig.APIRegistrarHTTPClient,
  286. }
  287. conjureDelay = conjureConfig.APIRegistrarDelay
  288. } else if conjureConfig.DecoyRegistrarDialer != nil {
  289. refractionDialer.DarkDecoyRegistrar = &refraction_networking_client.DecoyRegistrar{
  290. TcpDialer: manager.makeManagedDialer(
  291. conjureConfig.DecoyRegistrarDialer.DialContext),
  292. }
  293. refractionDialer.Width = conjureConfig.DecoyRegistrarWidth
  294. // Limitation: the decoy regsitration delay is not currently exposed in the
  295. // gotapdance API.
  296. conjureDelay = -1 // don't report delay
  297. } else {
  298. return nil, errors.TraceNew("no conjure registrar specified")
  299. }
  300. if conjureCachedRegistration == nil && conjureConfig.RegistrationCacheTTL != 0 {
  301. // Record the registration result in order to cache it.
  302. conjureRecordRegistrar = &recordRegistrar{
  303. registrar: refractionDialer.DarkDecoyRegistrar,
  304. }
  305. refractionDialer.DarkDecoyRegistrar = conjureRecordRegistrar
  306. }
  307. switch conjureConfig.Transport {
  308. case protocol.CONJURE_TRANSPORT_MIN_OSSH:
  309. refractionDialer.Transport = refraction_networking_proto.TransportType_Min
  310. refractionDialer.TcpDialer = newMinTransportDialer(refractionDialer.TcpDialer)
  311. case protocol.CONJURE_TRANSPORT_OBFS4_OSSH:
  312. refractionDialer.Transport = refraction_networking_proto.TransportType_Obfs4
  313. default:
  314. return nil, errors.Tracef("invalid Conjure transport: %s", conjureConfig.Transport)
  315. }
  316. if conjureCachedRegistration != nil {
  317. // When using a cached registration, patch its TcpDialer to use the custom
  318. // dialer for this dial. In the non-cached code path, gotapdance will set
  319. // refractionDialer.TcpDialer into a new registration.
  320. conjureCachedRegistration.TcpDialer = refractionDialer.TcpDialer
  321. }
  322. }
  323. // If the dial context is cancelled, use dialManager to interrupt
  324. // refractionDialer.DialContext. See dialManager comment explaining why
  325. // refractionDialer.DialContext may block even when the input context is
  326. // cancelled.
  327. dialComplete := make(chan struct{})
  328. go func() {
  329. select {
  330. case <-ctx.Done():
  331. case <-dialComplete:
  332. }
  333. select {
  334. // Prioritize the dialComplete case.
  335. case <-dialComplete:
  336. return
  337. default:
  338. }
  339. manager.close()
  340. }()
  341. conn, err := refractionDialer.DialContext(ctx, "tcp", address)
  342. close(dialComplete)
  343. if err != nil {
  344. // Call manager.close before updating cache, to synchronously shutdown dials
  345. // and ensure there are no further concurrent reads/writes to the recorded
  346. // registration before referencing it.
  347. manager.close()
  348. }
  349. // Cache (or put back) a successful registration. Also put back in the
  350. // specific error case where the phantom dial was canceled, as the
  351. // registration may still be valid. This operation implicitly extends the TTL
  352. // of a reused cached registration; we assume the Conjure station is also
  353. // extending the TTL by the same amount.
  354. //
  355. // Limitation: the cancel case shouldn't extend the TTL.
  356. if useConjure && (conjureCachedRegistration != nil || conjureRecordRegistrar != nil) {
  357. isCanceled := (err != nil && ctx.Err() == context.Canceled)
  358. if err == nil || isCanceled {
  359. registration := conjureCachedRegistration
  360. if registration == nil {
  361. // We assume gotapdance is no longer accessing the Registrar.
  362. registration = conjureRecordRegistrar.registration
  363. }
  364. // conjureRecordRegistrar.registration will be nil if there was no cached
  365. // registration _and_ registration didn't succeed before a cancel.
  366. if registration != nil {
  367. // Do not retain a reference to the custom dialer, as its context will not
  368. // be valid for future dials using this cached registration. Assumes that
  369. // gotapdance will no longer reference the TcpDialer now that the
  370. // connection is established.
  371. registration.TcpDialer = nil
  372. conjureRegistrationCache.put(conjureConfig, registration, isCanceled)
  373. }
  374. } else if conjureCachedRegistration != nil {
  375. conjureConfig.Logger.WithTraceFields(
  376. common.LogFields{
  377. "diagnosticID": conjureConfig.DiagnosticID,
  378. "reason": "phantom dial failed",
  379. }).Info(
  380. "drop cached registration")
  381. }
  382. }
  383. if err != nil {
  384. return nil, errors.Trace(err)
  385. }
  386. manager.startUsingRunCtx()
  387. refractionConn := &refractionConn{
  388. Conn: conn,
  389. manager: manager,
  390. }
  391. if useConjure {
  392. // Retain these values for logging metrics.
  393. refractionConn.isConjure = true
  394. refractionConn.conjureCached = conjureCached
  395. refractionConn.conjureDelay = conjureDelay
  396. refractionConn.conjureTransport = conjureConfig.Transport
  397. }
  398. return refractionConn, nil
  399. }
  400. func DeleteCachedConjureRegistration(config *ConjureConfig) {
  401. conjureRegistrationCache.delete(config)
  402. }
  403. type registrationCache struct {
  404. mutex sync.Mutex
  405. TTL time.Duration
  406. cache *lrucache.Cache
  407. }
  408. func newRegistrationCache() *registrationCache {
  409. return &registrationCache{
  410. cache: lrucache.NewWithLRU(
  411. lrucache.NoExpiration,
  412. 1*time.Minute,
  413. REGISTRATION_CACHE_MAX_ENTRIES),
  414. }
  415. }
  416. func (c *registrationCache) put(
  417. config *ConjureConfig,
  418. registration *refraction_networking_client.ConjureReg,
  419. isCanceled bool) {
  420. c.mutex.Lock()
  421. defer c.mutex.Unlock()
  422. // Clear the entire cache if the configured TTL changes to avoid retaining
  423. // items for too long. This is expected to be an infrequent event. The
  424. // go-cache-lru API does not offer a mechanism to inspect and adjust the TTL
  425. // of all existing items.
  426. if c.TTL != config.RegistrationCacheTTL {
  427. c.cache.Flush()
  428. c.TTL = config.RegistrationCacheTTL
  429. }
  430. // Drop the cached registration if another entry is found under the same key.
  431. // Since the dial pops its entry out of the cache, finding an existing entry
  432. // implies that another tunnel establishment candidate with the same key has
  433. // successfully registered and connected (or canceled) in the meantime.
  434. // Prefer that newer cached registration.
  435. //
  436. // For Psiphon, one scenario resulting in this condition is that the first
  437. // dial to a given server, using a cached registration, is delayed long
  438. // enough that a new candidate for the same server has been started and
  439. // outpaced the first candidate.
  440. _, found := c.cache.Get(config.RegistrationCacheKey)
  441. if found {
  442. config.Logger.WithTraceFields(
  443. common.LogFields{
  444. "diagnosticID": config.DiagnosticID,
  445. "reason": "existing entry found",
  446. }).Info(
  447. "drop cached registration")
  448. return
  449. }
  450. reason := "connected"
  451. if isCanceled {
  452. reason = "canceled"
  453. }
  454. config.Logger.WithTraceFields(
  455. common.LogFields{
  456. "diagnosticID": config.DiagnosticID,
  457. "cacheSize": c.cache.ItemCount(),
  458. "reason": reason,
  459. }).Info(
  460. "put cached registration")
  461. c.cache.Set(
  462. config.RegistrationCacheKey,
  463. registration,
  464. c.TTL)
  465. }
  466. func (c *registrationCache) pop(
  467. config *ConjureConfig) *refraction_networking_client.ConjureReg {
  468. c.mutex.Lock()
  469. defer c.mutex.Unlock()
  470. // See TTL/Flush comment in put.
  471. if c.TTL != config.RegistrationCacheTTL {
  472. c.cache.Flush()
  473. c.TTL = config.RegistrationCacheTTL
  474. }
  475. entry, found := c.cache.Get(config.RegistrationCacheKey)
  476. config.Logger.WithTraceFields(
  477. common.LogFields{
  478. "diagnosticID": config.DiagnosticID,
  479. "cacheSize": c.cache.ItemCount(),
  480. "found": found,
  481. }).Info(
  482. "pop cached registration")
  483. if found {
  484. c.cache.Delete(config.RegistrationCacheKey)
  485. return entry.(*refraction_networking_client.ConjureReg)
  486. }
  487. return nil
  488. }
  489. func (c *registrationCache) delete(config *ConjureConfig) {
  490. c.mutex.Lock()
  491. defer c.mutex.Unlock()
  492. _, found := c.cache.Get(config.RegistrationCacheKey)
  493. config.Logger.WithTraceFields(
  494. common.LogFields{
  495. "diagnosticID": config.DiagnosticID,
  496. "found": found,
  497. }).Info(
  498. "delete cached registration")
  499. if found {
  500. c.cache.Delete(config.RegistrationCacheKey)
  501. }
  502. }
  503. var conjureRegistrationCache = newRegistrationCache()
  504. type cachedRegistrar struct {
  505. registration *refraction_networking_client.ConjureReg
  506. }
  507. func (r *cachedRegistrar) Register(
  508. _ *refraction_networking_client.ConjureSession,
  509. _ context.Context) (*refraction_networking_client.ConjureReg, error) {
  510. return r.registration, nil
  511. }
  512. type recordRegistrar struct {
  513. registrar refraction_networking_client.Registrar
  514. registration *refraction_networking_client.ConjureReg
  515. }
  516. func (r *recordRegistrar) Register(
  517. session *refraction_networking_client.ConjureSession,
  518. ctx context.Context) (*refraction_networking_client.ConjureReg, error) {
  519. registration, err := r.registrar.Register(session, ctx)
  520. if err != nil {
  521. return nil, errors.Trace(err)
  522. }
  523. r.registration = registration
  524. return registration, nil
  525. }
  526. // minTransportConn buffers the first 32-byte random HMAC write performed by
  527. // Conjure TransportType_Min, and prepends it to the subsequent first write
  528. // made by OSSH. The purpose is to avoid a distinct fingerprint consisting of
  529. // the initial TCP data packet always containing exactly 32 bytes of payload.
  530. // The first write by OSSH will be a variable length multi-packet-sized
  531. // sequence of random bytes.
  532. type minTransportConn struct {
  533. net.Conn
  534. mutex sync.Mutex
  535. state int
  536. buffer []byte
  537. err error
  538. }
  539. const (
  540. stateMinTransportInit = iota
  541. stateMinTransportBufferedHMAC
  542. stateMinTransportWroteHMAC
  543. stateMinTransportFailed
  544. )
  545. func newMinTransportConn(conn net.Conn) *minTransportConn {
  546. return &minTransportConn{
  547. Conn: conn,
  548. state: stateMinTransportInit,
  549. }
  550. }
  551. func (conn *minTransportConn) Write(p []byte) (int, error) {
  552. conn.mutex.Lock()
  553. defer conn.mutex.Unlock()
  554. switch conn.state {
  555. case stateMinTransportInit:
  556. if len(p) != sha256.Size {
  557. conn.state = stateMinTransportFailed
  558. conn.err = errors.TraceNew("unexpected HMAC write size")
  559. return 0, conn.err
  560. }
  561. conn.buffer = make([]byte, sha256.Size)
  562. copy(conn.buffer, p)
  563. conn.state = stateMinTransportBufferedHMAC
  564. return sha256.Size, nil
  565. case stateMinTransportBufferedHMAC:
  566. conn.buffer = append(conn.buffer, p...)
  567. n, err := conn.Conn.Write(conn.buffer)
  568. if n < sha256.Size {
  569. conn.state = stateMinTransportFailed
  570. conn.err = errors.TraceNew("failed to write HMAC")
  571. if err == nil {
  572. // As Write must return an error when failing to write the entire buffer,
  573. // we don't expect to hit this case.
  574. err = conn.err
  575. }
  576. } else {
  577. conn.state = stateMinTransportWroteHMAC
  578. }
  579. n -= sha256.Size
  580. // Do not wrap Conn.Write errors, and do not return conn.err here.
  581. return n, err
  582. case stateMinTransportWroteHMAC:
  583. return conn.Conn.Write(p)
  584. case stateMinTransportFailed:
  585. return 0, conn.err
  586. default:
  587. return 0, errors.TraceNew("unexpected state")
  588. }
  589. }
  590. func newMinTransportDialer(dialer common.Dialer) common.Dialer {
  591. return func(ctx context.Context, network, address string) (net.Conn, error) {
  592. conn, err := dialer(ctx, network, address)
  593. if err != nil {
  594. return nil, errors.Trace(err)
  595. }
  596. return newMinTransportConn(conn), nil
  597. }
  598. }
  599. // dialManager tracks all dials performed by and dialed conns used by a
  600. // refraction_networking_client conn. dialManager.close interrupts/closes
  601. // all pending dials and established conns immediately. This ensures that
  602. // blocking calls within refraction_networking_client, such as tls.Handhake,
  603. // are interrupted:
  604. // E.g., https://github.com/refraction-networking/gotapdance/blob/4d84655dad2e242b0af0459c31f687b12085dcca/tapdance/conn_raw.go#L307
  605. // (...preceeding SetDeadline is insufficient for immediate cancellation.)
  606. type dialManager struct {
  607. ctxMutex sync.Mutex
  608. useRunCtx bool
  609. initialDialCtx context.Context
  610. runCtx context.Context
  611. stopRunning context.CancelFunc
  612. conns *common.Conns
  613. }
  614. func newDialManager() *dialManager {
  615. runCtx, stopRunning := context.WithCancel(context.Background())
  616. return &dialManager{
  617. runCtx: runCtx,
  618. stopRunning: stopRunning,
  619. conns: common.NewConns(),
  620. }
  621. }
  622. func (manager *dialManager) makeManagedDialer(dialer common.Dialer) common.Dialer {
  623. return func(ctx context.Context, network, address string) (net.Conn, error) {
  624. return manager.dialWithDialer(dialer, ctx, network, address)
  625. }
  626. }
  627. func (manager *dialManager) dialWithDialer(
  628. dialer common.Dialer,
  629. ctx context.Context,
  630. network string,
  631. address string) (net.Conn, error) {
  632. if network != "tcp" {
  633. return nil, errors.Tracef("unsupported network: %s", network)
  634. }
  635. // The context for this dial is either:
  636. // - ctx, during the initial refraction_networking_client.DialContext, when
  637. // this is Psiphon tunnel establishment.
  638. // - manager.runCtx after the initial refraction_networking_client.Dial
  639. // completes, in which case this is a TapDance protocol reconnection that
  640. // occurs periodically for already established tunnels.
  641. manager.ctxMutex.Lock()
  642. if manager.useRunCtx {
  643. // Preserve the random timeout configured by the TapDance client:
  644. // https://github.com/refraction-networking/gotapdance/blob/4d84655dad2e242b0af0459c31f687b12085dcca/tapdance/conn_raw.go#L263
  645. deadline, ok := ctx.Deadline()
  646. if !ok {
  647. return nil, errors.Tracef("unexpected nil deadline")
  648. }
  649. var cancelFunc context.CancelFunc
  650. ctx, cancelFunc = context.WithDeadline(manager.runCtx, deadline)
  651. defer cancelFunc()
  652. }
  653. manager.ctxMutex.Unlock()
  654. conn, err := dialer(ctx, network, address)
  655. if err != nil {
  656. return nil, errors.Trace(err)
  657. }
  658. // Fail immediately if CloseWrite isn't available in the underlying dialed
  659. // conn. The equivalent check in managedConn.CloseWrite isn't fatal and
  660. // TapDance will run in a degraded state.
  661. // Limitation: if the underlying conn _also_ passes through CloseWrite, this
  662. // check may be insufficient.
  663. if _, ok := conn.(common.CloseWriter); !ok {
  664. return nil, errors.TraceNew("underlying conn is not a CloseWriter")
  665. }
  666. conn = &managedConn{
  667. Conn: conn,
  668. manager: manager,
  669. }
  670. if !manager.conns.Add(conn) {
  671. conn.Close()
  672. return nil, errors.TraceNew("already closed")
  673. }
  674. return conn, nil
  675. }
  676. func (manager *dialManager) startUsingRunCtx() {
  677. manager.ctxMutex.Lock()
  678. manager.initialDialCtx = nil
  679. manager.useRunCtx = true
  680. manager.ctxMutex.Unlock()
  681. }
  682. func (manager *dialManager) close() {
  683. manager.conns.CloseAll()
  684. manager.stopRunning()
  685. }
  686. type managedConn struct {
  687. net.Conn
  688. manager *dialManager
  689. }
  690. // CloseWrite exposes the net.TCPConn.CloseWrite() functionality
  691. // required by TapDance.
  692. func (conn *managedConn) CloseWrite() error {
  693. if closeWriter, ok := conn.Conn.(common.CloseWriter); ok {
  694. return closeWriter.CloseWrite()
  695. }
  696. return errors.TraceNew("underlying conn is not a CloseWriter")
  697. }
  698. func (conn *managedConn) Close() error {
  699. // Remove must be invoked asynchronously, as this Close may be called by
  700. // conns.CloseAll, leading to a reentrant lock situation.
  701. go conn.manager.conns.Remove(conn)
  702. return conn.Conn.Close()
  703. }
  704. type refractionConn struct {
  705. net.Conn
  706. manager *dialManager
  707. isClosed int32
  708. isConjure bool
  709. conjureCached bool
  710. conjureDelay time.Duration
  711. conjureTransport string
  712. }
  713. func (conn *refractionConn) Close() error {
  714. conn.manager.close()
  715. err := conn.Conn.Close()
  716. atomic.StoreInt32(&conn.isClosed, 1)
  717. return err
  718. }
  719. func (conn *refractionConn) IsClosed() bool {
  720. return atomic.LoadInt32(&conn.isClosed) == 1
  721. }
  722. // GetMetrics implements the common.MetricsSource interface.
  723. func (conn *refractionConn) GetMetrics() common.LogFields {
  724. logFields := make(common.LogFields)
  725. if conn.isConjure {
  726. cached := "0"
  727. if conn.conjureCached {
  728. cached = "1"
  729. }
  730. logFields["conjure_cached"] = cached
  731. if conn.conjureDelay != -1 {
  732. logFields["conjure_delay"] = fmt.Sprintf("%d", conn.conjureDelay/time.Millisecond)
  733. }
  734. logFields["conjure_transport"] = conn.conjureTransport
  735. }
  736. return logFields
  737. }
  738. var initRefractionNetworkingOnce sync.Once
  739. func initRefractionNetworking(emitLogs bool, dataDirectory string) error {
  740. var initErr error
  741. initRefractionNetworkingOnce.Do(func() {
  742. if !emitLogs {
  743. refraction_networking_client.Logger().Out = ioutil.Discard
  744. }
  745. assetsDir := filepath.Join(dataDirectory, "refraction-networking")
  746. err := os.MkdirAll(assetsDir, 0700)
  747. if err != nil {
  748. initErr = errors.Trace(err)
  749. return
  750. }
  751. clientConfFileName := filepath.Join(assetsDir, "ClientConf")
  752. _, err = os.Stat(clientConfFileName)
  753. if err != nil && os.IsNotExist(err) {
  754. err = ioutil.WriteFile(clientConfFileName, getEmbeddedClientConf(), 0644)
  755. }
  756. if err != nil {
  757. initErr = errors.Trace(err)
  758. return
  759. }
  760. refraction_networking_client.AssetsSetDir(assetsDir)
  761. })
  762. return initErr
  763. }