refraction.go 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913
  1. // +build PSIPHON_ENABLE_REFRACTION_NETWORKING
  2. /*
  3. * Copyright (c) 2018, Psiphon Inc.
  4. * All rights reserved.
  5. *
  6. * This program is free software: you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation, either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  18. *
  19. */
  20. /*
  21. Package refraction wraps github.com/refraction-networking/gotapdance with
  22. net.Listener and net.Conn types that provide drop-in integration with Psiphon.
  23. */
  24. package refraction
  25. import (
  26. "context"
  27. "crypto/sha256"
  28. "fmt"
  29. "io/ioutil"
  30. "net"
  31. "os"
  32. "path/filepath"
  33. "sync"
  34. "sync/atomic"
  35. "time"
  36. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
  37. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
  38. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/protocol"
  39. "github.com/armon/go-proxyproto"
  40. lrucache "github.com/cognusion/go-cache-lru"
  41. refraction_networking_proto "github.com/refraction-networking/gotapdance/protobuf"
  42. refraction_networking_client "github.com/refraction-networking/gotapdance/tapdance"
  43. )
  44. const (
  45. READ_PROXY_PROTOCOL_HEADER_TIMEOUT = 5 * time.Second
  46. REGISTRATION_CACHE_MAX_ENTRIES = 256
  47. )
  48. // Enabled indicates if Refraction Networking functionality is enabled.
  49. func Enabled() bool {
  50. return true
  51. }
  52. // Listener is a net.Listener.
  53. type Listener struct {
  54. net.Listener
  55. }
  56. // Listen creates a new Refraction Networking listener.
  57. //
  58. // The Refraction Networking station (TapDance or Conjure) will send the
  59. // original client address via the HAProxy proxy protocol v1,
  60. // https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt. The original
  61. // client address is read and returned by accepted conns' RemoteAddr.
  62. // RemoteAddr _must_ be called non-concurrently before calling Read on
  63. // accepted conns as the HAProxy proxy protocol header reading logic sets
  64. // SetReadDeadline and performs a Read.
  65. func Listen(address string) (net.Listener, error) {
  66. tcpListener, err := net.Listen("tcp", address)
  67. if err != nil {
  68. return nil, errors.Trace(err)
  69. }
  70. // Setting a timeout ensures that reading the proxy protocol
  71. // header completes or times out and RemoteAddr will not block. See:
  72. // https://godoc.org/github.com/armon/go-proxyproto#Conn.RemoteAddr
  73. proxyListener := &proxyproto.Listener{
  74. Listener: tcpListener,
  75. ProxyHeaderTimeout: READ_PROXY_PROTOCOL_HEADER_TIMEOUT}
  76. stationListener := &stationListener{
  77. proxyListener: proxyListener,
  78. }
  79. return &Listener{Listener: stationListener}, nil
  80. }
  81. // stationListener uses the proxyproto.Listener SourceCheck callback to
  82. // capture and record the direct remote address, the station address, and
  83. // wraps accepted conns to provide station address metrics via GetMetrics.
  84. // These metrics enable identifying which station fronted a connection, which
  85. // is useful for network operations and troubleshooting.
  86. //
  87. // go-proxyproto.Conn.RemoteAddr reports the originating client IP address,
  88. // which is geolocated and recorded for metrics. The underlying conn's remote
  89. // address, the station address, is not accessible via the go-proxyproto API.
  90. //
  91. // stationListener is not safe for concurrent access.
  92. type stationListener struct {
  93. proxyListener *proxyproto.Listener
  94. }
  95. func (l *stationListener) Accept() (net.Conn, error) {
  96. var stationRemoteAddr net.Addr
  97. l.proxyListener.SourceCheck = func(addr net.Addr) (bool, error) {
  98. stationRemoteAddr = addr
  99. return true, nil
  100. }
  101. conn, err := l.proxyListener.Accept()
  102. if err != nil {
  103. return nil, err
  104. }
  105. if stationRemoteAddr == nil {
  106. return nil, errors.TraceNew("missing station address")
  107. }
  108. return &stationConn{
  109. Conn: conn,
  110. stationIPAddress: common.IPAddressFromAddr(stationRemoteAddr),
  111. }, nil
  112. }
  113. func (l *stationListener) Close() error {
  114. return l.proxyListener.Close()
  115. }
  116. func (l *stationListener) Addr() net.Addr {
  117. return l.proxyListener.Addr()
  118. }
  119. type stationConn struct {
  120. net.Conn
  121. stationIPAddress string
  122. }
  123. // IrregularTunnelError implements the common.IrregularIndicator interface.
  124. func (c *stationConn) IrregularTunnelError() error {
  125. // We expect a PROXY protocol header, but go-proxyproto does not produce an
  126. // error if the "PROXY " prefix is absent; instead the connection will
  127. // proceed. To detect this case, check if the go-proxyproto RemoteAddr IP
  128. // address matches the underlying connection IP address. When these values
  129. // match, there was no PROXY protocol header.
  130. //
  131. // Limitation: the values will match if there is a PROXY protocol header
  132. // containing the same IP address as the underlying connection. This is not
  133. // an expected case.
  134. if common.IPAddressFromAddr(c.RemoteAddr()) == c.stationIPAddress {
  135. return errors.TraceNew("unexpected station IP address")
  136. }
  137. return nil
  138. }
  139. // GetMetrics implements the common.MetricsSource interface.
  140. func (c *stationConn) GetMetrics() common.LogFields {
  141. logFields := make(common.LogFields)
  142. // Ensure we don't log a potential non-station IP address.
  143. if c.IrregularTunnelError() == nil {
  144. logFields["station_ip_address"] = c.stationIPAddress
  145. }
  146. return logFields
  147. }
  148. // DialTapDance establishes a new TapDance connection to a TapDance station
  149. // specified in the config assets and forwarding through to the Psiphon server
  150. // specified by address.
  151. //
  152. // The TapDance station config assets (which are also the Conjure station
  153. // assets) are read from dataDirectory/"refraction-networking". When no config
  154. // is found, default assets are paved.
  155. //
  156. // dialer specifies the custom dialer for underlying TCP dials.
  157. //
  158. // The input ctx is expected to have a timeout for the dial.
  159. //
  160. // Limitation: the parameters emitLogs and dataDirectory are used for one-time
  161. // initialization and are ignored after the first DialTapDance/Conjure call.
  162. func DialTapDance(
  163. ctx context.Context,
  164. emitLogs bool,
  165. dataDirectory string,
  166. dialer common.NetDialer,
  167. address string) (net.Conn, error) {
  168. return dial(
  169. ctx,
  170. emitLogs,
  171. dataDirectory,
  172. dialer,
  173. address,
  174. nil)
  175. }
  176. // DialConjure establishes a new Conjure connection to a Conjure station.
  177. //
  178. // dialer specifies the custom dialer to use for phantom dials. Additional
  179. // Conjure-specific parameters are specified in conjureConfig.
  180. //
  181. // See DialTapdance comment.
  182. func DialConjure(
  183. ctx context.Context,
  184. emitLogs bool,
  185. dataDirectory string,
  186. dialer common.NetDialer,
  187. address string,
  188. conjureConfig *ConjureConfig) (net.Conn, error) {
  189. return dial(
  190. ctx,
  191. emitLogs,
  192. dataDirectory,
  193. dialer,
  194. address,
  195. conjureConfig)
  196. }
  197. func dial(
  198. ctx context.Context,
  199. emitLogs bool,
  200. dataDirectory string,
  201. dialer common.NetDialer,
  202. address string,
  203. conjureConfig *ConjureConfig) (net.Conn, error) {
  204. err := initRefractionNetworking(emitLogs, dataDirectory)
  205. if err != nil {
  206. return nil, errors.Trace(err)
  207. }
  208. if _, ok := ctx.Deadline(); !ok {
  209. return nil, errors.TraceNew("dial context has no timeout")
  210. }
  211. useConjure := conjureConfig != nil
  212. manager := newDialManager()
  213. refractionDialer := &refraction_networking_client.Dialer{
  214. TcpDialer: manager.makeManagedDialer(dialer.DialContext),
  215. UseProxyHeader: true,
  216. }
  217. conjureCached := false
  218. conjureDelay := time.Duration(0)
  219. var conjureCachedRegistration *refraction_networking_client.ConjureReg
  220. var conjureRecordRegistrar *recordRegistrar
  221. if useConjure {
  222. // Our strategy is to try one registration per dial attempt: a cached
  223. // registration, if it exists, or API or decoy registration, as configured.
  224. // This assumes Psiphon establishment will try/retry many candidates as
  225. // required, and that the desired mix of API/decoy registrations will be
  226. // configured and generated. In good network conditions, internal gotapdance
  227. // retries (via APIRegistrar.MaxRetries or APIRegistrar.SecondaryRegistrar)
  228. // are unlikely to start before the Conjure dial is canceled.
  229. // Caching registrations reduces average Conjure dial time by often
  230. // eliminating the registration phase. This is especially impactful for
  231. // short duration tunnels, such as on mobile. Caching also reduces domain
  232. // fronted traffic and load on the API registrar and decoys.
  233. //
  234. // We implement a simple in-memory registration cache with the following
  235. // behavior:
  236. //
  237. // - If a new registration succeeds, but the overall Conjure dial is
  238. // _canceled_, the registration is optimistically cached.
  239. // - If the Conjure phantom dial fails, any associated cached registration
  240. // is discarded.
  241. // - A cached registration's TTL is extended upon phantom dial success.
  242. // - If the configured TTL changes, the cache is cleared.
  243. //
  244. // Limitations:
  245. // - The cache is not persistent.
  246. // - There is no TTL extension during a long connection.
  247. // - Caching a successful registration when the phantom dial is canceled may
  248. // skip the necessary "delay" step (however, an immediate re-establishment
  249. // to the same candidate is unlikely in this case).
  250. //
  251. // TODO:
  252. // - Revisit when gotapdance adds its own caching.
  253. // - Consider "pre-registering" Conjure when already connected with a
  254. // different protocol, so a Conjure registration is available on the next
  255. // establishment; in this scenario, a tunneled API registration would not
  256. // require domain fronting.
  257. refractionDialer.DarkDecoy = true
  258. // The pop operation removes the registration from the cache. This
  259. // eliminates the possibility of concurrent candidates (with the same cache
  260. // key) using and modifying the same registration, a potential race
  261. // condition. The popped cached registration must be reinserted in the cache
  262. // after canceling or success, but not on phantom dial failure.
  263. conjureCachedRegistration = conjureRegistrationCache.pop(conjureConfig)
  264. if conjureCachedRegistration != nil {
  265. refractionDialer.DarkDecoyRegistrar = &cachedRegistrar{
  266. registration: conjureCachedRegistration,
  267. }
  268. conjureCached = true
  269. conjureDelay = 0 // report no delay
  270. } else if conjureConfig.APIRegistrarURL != "" {
  271. if conjureConfig.APIRegistrarHTTPClient == nil {
  272. // While not a guaranteed check, if the APIRegistrarHTTPClient isn't set
  273. // then the API registration would certainly be unfronted, resulting in a
  274. // fingerprintable connection leak.
  275. return nil, errors.TraceNew("missing APIRegistrarHTTPClient")
  276. }
  277. refractionDialer.DarkDecoyRegistrar = &refraction_networking_client.APIRegistrar{
  278. Endpoint: conjureConfig.APIRegistrarURL,
  279. ConnectionDelay: conjureConfig.APIRegistrarDelay,
  280. MaxRetries: 0,
  281. Client: conjureConfig.APIRegistrarHTTPClient,
  282. }
  283. conjureDelay = conjureConfig.APIRegistrarDelay
  284. } else if conjureConfig.DecoyRegistrarDialer != nil {
  285. refractionDialer.DarkDecoyRegistrar = &refraction_networking_client.DecoyRegistrar{
  286. TcpDialer: manager.makeManagedDialer(
  287. conjureConfig.DecoyRegistrarDialer.DialContext),
  288. }
  289. refractionDialer.Width = conjureConfig.DecoyRegistrarWidth
  290. // Limitation: the decoy regsitration delay is not currently exposed in the
  291. // gotapdance API.
  292. conjureDelay = -1 // don't report delay
  293. } else {
  294. return nil, errors.TraceNew("no conjure registrar specified")
  295. }
  296. if conjureCachedRegistration == nil && conjureConfig.RegistrationCacheTTL != 0 {
  297. // Record the registration result in order to cache it.
  298. conjureRecordRegistrar = &recordRegistrar{
  299. registrar: refractionDialer.DarkDecoyRegistrar,
  300. }
  301. refractionDialer.DarkDecoyRegistrar = conjureRecordRegistrar
  302. }
  303. switch conjureConfig.Transport {
  304. case protocol.CONJURE_TRANSPORT_MIN_OSSH:
  305. refractionDialer.Transport = refraction_networking_proto.TransportType_Min
  306. refractionDialer.TcpDialer = newMinTransportDialer(refractionDialer.TcpDialer)
  307. case protocol.CONJURE_TRANSPORT_OBFS4_OSSH:
  308. refractionDialer.Transport = refraction_networking_proto.TransportType_Obfs4
  309. default:
  310. return nil, errors.Tracef("invalid Conjure transport: %s", conjureConfig.Transport)
  311. }
  312. if conjureCachedRegistration != nil {
  313. // When using a cached registration, patch its TcpDialer to use the custom
  314. // dialer for this dial. In the non-cached code path, gotapdance will set
  315. // refractionDialer.TcpDialer into a new registration.
  316. conjureCachedRegistration.TcpDialer = refractionDialer.TcpDialer
  317. }
  318. }
  319. // If the dial context is cancelled, use dialManager to interrupt
  320. // refractionDialer.DialContext. See dialManager comment explaining why
  321. // refractionDialer.DialContext may block even when the input context is
  322. // cancelled.
  323. dialComplete := make(chan struct{})
  324. go func() {
  325. select {
  326. case <-ctx.Done():
  327. case <-dialComplete:
  328. }
  329. select {
  330. // Prioritize the dialComplete case.
  331. case <-dialComplete:
  332. return
  333. default:
  334. }
  335. manager.close()
  336. }()
  337. conn, err := refractionDialer.DialContext(ctx, "tcp", address)
  338. close(dialComplete)
  339. if err != nil {
  340. // Call manager.close before updating cache, to synchronously shutdown dials
  341. // and ensure there are no further concurrent reads/writes to the recorded
  342. // registration before referencing it.
  343. manager.close()
  344. }
  345. // Cache (or put back) a successful registration. Also put back in the
  346. // specific error case where the phantom dial was canceled, as the
  347. // registration may still be valid. This operation implicitly extends the TTL
  348. // of a reused cached registration; we assume the Conjure station is also
  349. // extending the TTL by the same amount.
  350. //
  351. // Limitation: the cancel case shouldn't extend the TTL.
  352. if useConjure && (conjureCachedRegistration != nil || conjureRecordRegistrar != nil) {
  353. isCanceled := (err != nil && ctx.Err() == context.Canceled)
  354. if err == nil || isCanceled {
  355. registration := conjureCachedRegistration
  356. if registration == nil {
  357. // We assume gotapdance is no longer accessing the Registrar.
  358. registration = conjureRecordRegistrar.registration
  359. }
  360. // conjureRecordRegistrar.registration will be nil if there was no cached
  361. // registration _and_ registration didn't succeed before a cancel.
  362. if registration != nil {
  363. // Do not retain a reference to the custom dialer, as its context will not
  364. // be valid for future dials using this cached registration. Assumes that
  365. // gotapdance will no longer reference the TcpDialer now that the
  366. // connection is established.
  367. registration.TcpDialer = nil
  368. conjureRegistrationCache.put(conjureConfig, registration, isCanceled)
  369. }
  370. } else if conjureCachedRegistration != nil {
  371. conjureConfig.Logger.WithTraceFields(
  372. common.LogFields{
  373. "diagnosticID": conjureConfig.DiagnosticID,
  374. "reason": "phantom dial failed",
  375. }).Info(
  376. "drop cached registration")
  377. }
  378. }
  379. if err != nil {
  380. return nil, errors.Trace(err)
  381. }
  382. manager.startUsingRunCtx()
  383. refractionConn := &refractionConn{
  384. Conn: conn,
  385. manager: manager,
  386. }
  387. if useConjure {
  388. // Retain these values for logging metrics.
  389. refractionConn.isConjure = true
  390. refractionConn.conjureCached = conjureCached
  391. refractionConn.conjureDelay = conjureDelay
  392. refractionConn.conjureTransport = conjureConfig.Transport
  393. }
  394. return refractionConn, nil
  395. }
  396. func DeleteCachedConjureRegistration(config *ConjureConfig) {
  397. conjureRegistrationCache.delete(config)
  398. }
  399. type registrationCache struct {
  400. mutex sync.Mutex
  401. TTL time.Duration
  402. cache *lrucache.Cache
  403. }
  404. func newRegistrationCache() *registrationCache {
  405. return &registrationCache{
  406. cache: lrucache.NewWithLRU(
  407. lrucache.NoExpiration,
  408. 1*time.Minute,
  409. REGISTRATION_CACHE_MAX_ENTRIES),
  410. }
  411. }
  412. func (c *registrationCache) put(
  413. config *ConjureConfig,
  414. registration *refraction_networking_client.ConjureReg,
  415. isCanceled bool) {
  416. c.mutex.Lock()
  417. defer c.mutex.Unlock()
  418. // Clear the entire cache if the configured TTL changes to avoid retaining
  419. // items for too long. This is expected to be an infrequent event. The
  420. // go-cache-lru API does not offer a mechanism to inspect and adjust the TTL
  421. // of all existing items.
  422. if c.TTL != config.RegistrationCacheTTL {
  423. c.cache.Flush()
  424. c.TTL = config.RegistrationCacheTTL
  425. }
  426. // Drop the cached registration if another entry is found under the same key.
  427. // Since the dial pops its entry out of the cache, finding an existing entry
  428. // implies that another tunnel establishment candidate with the same key has
  429. // successfully registered and connected (or canceled) in the meantime.
  430. // Prefer that newer cached registration.
  431. //
  432. // For Psiphon, one scenario resulting in this condition is that the first
  433. // dial to a given server, using a cached registration, is delayed long
  434. // enough that a new candidate for the same server has been started and
  435. // outpaced the first candidate.
  436. _, found := c.cache.Get(config.RegistrationCacheKey)
  437. if found {
  438. config.Logger.WithTraceFields(
  439. common.LogFields{
  440. "diagnosticID": config.DiagnosticID,
  441. "reason": "existing entry found",
  442. }).Info(
  443. "drop cached registration")
  444. return
  445. }
  446. reason := "connected"
  447. if isCanceled {
  448. reason = "canceled"
  449. }
  450. config.Logger.WithTraceFields(
  451. common.LogFields{
  452. "diagnosticID": config.DiagnosticID,
  453. "cacheSize": c.cache.ItemCount(),
  454. "reason": reason,
  455. }).Info(
  456. "put cached registration")
  457. c.cache.Set(
  458. config.RegistrationCacheKey,
  459. registration,
  460. c.TTL)
  461. }
  462. func (c *registrationCache) pop(
  463. config *ConjureConfig) *refraction_networking_client.ConjureReg {
  464. c.mutex.Lock()
  465. defer c.mutex.Unlock()
  466. // See TTL/Flush comment in put.
  467. if c.TTL != config.RegistrationCacheTTL {
  468. c.cache.Flush()
  469. c.TTL = config.RegistrationCacheTTL
  470. }
  471. entry, found := c.cache.Get(config.RegistrationCacheKey)
  472. config.Logger.WithTraceFields(
  473. common.LogFields{
  474. "diagnosticID": config.DiagnosticID,
  475. "cacheSize": c.cache.ItemCount(),
  476. "found": found,
  477. }).Info(
  478. "pop cached registration")
  479. if found {
  480. c.cache.Delete(config.RegistrationCacheKey)
  481. return entry.(*refraction_networking_client.ConjureReg)
  482. }
  483. return nil
  484. }
  485. func (c *registrationCache) delete(config *ConjureConfig) {
  486. c.mutex.Lock()
  487. defer c.mutex.Unlock()
  488. _, found := c.cache.Get(config.RegistrationCacheKey)
  489. config.Logger.WithTraceFields(
  490. common.LogFields{
  491. "diagnosticID": config.DiagnosticID,
  492. "found": found,
  493. }).Info(
  494. "delete cached registration")
  495. if found {
  496. c.cache.Delete(config.RegistrationCacheKey)
  497. }
  498. }
  499. var conjureRegistrationCache = newRegistrationCache()
  500. type cachedRegistrar struct {
  501. registration *refraction_networking_client.ConjureReg
  502. }
  503. func (r *cachedRegistrar) Register(
  504. _ *refraction_networking_client.ConjureSession,
  505. _ context.Context) (*refraction_networking_client.ConjureReg, error) {
  506. return r.registration, nil
  507. }
  508. type recordRegistrar struct {
  509. registrar refraction_networking_client.Registrar
  510. registration *refraction_networking_client.ConjureReg
  511. }
  512. func (r *recordRegistrar) Register(
  513. session *refraction_networking_client.ConjureSession,
  514. ctx context.Context) (*refraction_networking_client.ConjureReg, error) {
  515. registration, err := r.registrar.Register(session, ctx)
  516. if err != nil {
  517. return nil, errors.Trace(err)
  518. }
  519. r.registration = registration
  520. return registration, nil
  521. }
  522. // minTransportConn buffers the first 32-byte random HMAC write performed by
  523. // Conjure TransportType_Min, and prepends it to the subsequent first write
  524. // made by OSSH. The purpose is to avoid a distinct fingerprint consisting of
  525. // the initial TCP data packet always containing exactly 32 bytes of payload.
  526. // The first write by OSSH will be a variable length multi-packet-sized
  527. // sequence of random bytes.
  528. type minTransportConn struct {
  529. net.Conn
  530. mutex sync.Mutex
  531. state int
  532. buffer []byte
  533. err error
  534. }
  535. const (
  536. stateMinTransportInit = iota
  537. stateMinTransportBufferedHMAC
  538. stateMinTransportWroteHMAC
  539. stateMinTransportFailed
  540. )
  541. func newMinTransportConn(conn net.Conn) *minTransportConn {
  542. return &minTransportConn{
  543. Conn: conn,
  544. state: stateMinTransportInit,
  545. }
  546. }
  547. func (conn *minTransportConn) Write(p []byte) (int, error) {
  548. conn.mutex.Lock()
  549. defer conn.mutex.Unlock()
  550. switch conn.state {
  551. case stateMinTransportInit:
  552. if len(p) != sha256.Size {
  553. conn.state = stateMinTransportFailed
  554. conn.err = errors.TraceNew("unexpected HMAC write size")
  555. return 0, conn.err
  556. }
  557. conn.buffer = make([]byte, sha256.Size)
  558. copy(conn.buffer, p)
  559. conn.state = stateMinTransportBufferedHMAC
  560. return sha256.Size, nil
  561. case stateMinTransportBufferedHMAC:
  562. conn.buffer = append(conn.buffer, p...)
  563. n, err := conn.Conn.Write(conn.buffer)
  564. if n < sha256.Size {
  565. conn.state = stateMinTransportFailed
  566. conn.err = errors.TraceNew("failed to write HMAC")
  567. if err == nil {
  568. // As Write must return an error when failing to write the entire buffer,
  569. // we don't expect to hit this case.
  570. err = conn.err
  571. }
  572. } else {
  573. conn.state = stateMinTransportWroteHMAC
  574. }
  575. n -= sha256.Size
  576. // Do not wrap Conn.Write errors, and do not return conn.err here.
  577. return n, err
  578. case stateMinTransportWroteHMAC:
  579. return conn.Conn.Write(p)
  580. case stateMinTransportFailed:
  581. return 0, conn.err
  582. default:
  583. return 0, errors.TraceNew("unexpected state")
  584. }
  585. }
  586. func newMinTransportDialer(dialer common.Dialer) common.Dialer {
  587. return func(ctx context.Context, network, address string) (net.Conn, error) {
  588. conn, err := dialer(ctx, network, address)
  589. if err != nil {
  590. return nil, errors.Trace(err)
  591. }
  592. return newMinTransportConn(conn), nil
  593. }
  594. }
  595. // dialManager tracks all dials performed by and dialed conns used by a
  596. // refraction_networking_client conn. dialManager.close interrupts/closes
  597. // all pending dials and established conns immediately. This ensures that
  598. // blocking calls within refraction_networking_client, such as tls.Handhake,
  599. // are interrupted:
  600. // E.g., https://github.com/refraction-networking/gotapdance/blob/4d84655dad2e242b0af0459c31f687b12085dcca/tapdance/conn_raw.go#L307
  601. // (...preceeding SetDeadline is insufficient for immediate cancellation.)
  602. type dialManager struct {
  603. ctxMutex sync.Mutex
  604. useRunCtx bool
  605. initialDialCtx context.Context
  606. runCtx context.Context
  607. stopRunning context.CancelFunc
  608. conns *common.Conns
  609. }
  610. func newDialManager() *dialManager {
  611. runCtx, stopRunning := context.WithCancel(context.Background())
  612. return &dialManager{
  613. runCtx: runCtx,
  614. stopRunning: stopRunning,
  615. conns: common.NewConns(),
  616. }
  617. }
  618. func (manager *dialManager) makeManagedDialer(dialer common.Dialer) common.Dialer {
  619. return func(ctx context.Context, network, address string) (net.Conn, error) {
  620. return manager.dialWithDialer(dialer, ctx, network, address)
  621. }
  622. }
  623. func (manager *dialManager) dialWithDialer(
  624. dialer common.Dialer,
  625. ctx context.Context,
  626. network string,
  627. address string) (net.Conn, error) {
  628. if network != "tcp" {
  629. return nil, errors.Tracef("unsupported network: %s", network)
  630. }
  631. // The context for this dial is either:
  632. // - ctx, during the initial refraction_networking_client.DialContext, when
  633. // this is Psiphon tunnel establishment.
  634. // - manager.runCtx after the initial refraction_networking_client.Dial
  635. // completes, in which case this is a TapDance protocol reconnection that
  636. // occurs periodically for already established tunnels.
  637. manager.ctxMutex.Lock()
  638. if manager.useRunCtx {
  639. // Preserve the random timeout configured by the TapDance client:
  640. // https://github.com/refraction-networking/gotapdance/blob/4d84655dad2e242b0af0459c31f687b12085dcca/tapdance/conn_raw.go#L263
  641. deadline, ok := ctx.Deadline()
  642. if !ok {
  643. return nil, errors.Tracef("unexpected nil deadline")
  644. }
  645. var cancelFunc context.CancelFunc
  646. ctx, cancelFunc = context.WithDeadline(manager.runCtx, deadline)
  647. defer cancelFunc()
  648. }
  649. manager.ctxMutex.Unlock()
  650. conn, err := dialer(ctx, network, address)
  651. if err != nil {
  652. return nil, errors.Trace(err)
  653. }
  654. // Fail immediately if CloseWrite isn't available in the underlying dialed
  655. // conn. The equivalent check in managedConn.CloseWrite isn't fatal and
  656. // TapDance will run in a degraded state.
  657. // Limitation: if the underlying conn _also_ passes through CloseWrite, this
  658. // check may be insufficient.
  659. if _, ok := conn.(common.CloseWriter); !ok {
  660. return nil, errors.TraceNew("underlying conn is not a CloseWriter")
  661. }
  662. conn = &managedConn{
  663. Conn: conn,
  664. manager: manager,
  665. }
  666. if !manager.conns.Add(conn) {
  667. conn.Close()
  668. return nil, errors.TraceNew("already closed")
  669. }
  670. return conn, nil
  671. }
  672. func (manager *dialManager) startUsingRunCtx() {
  673. manager.ctxMutex.Lock()
  674. manager.initialDialCtx = nil
  675. manager.useRunCtx = true
  676. manager.ctxMutex.Unlock()
  677. }
  678. func (manager *dialManager) close() {
  679. manager.conns.CloseAll()
  680. manager.stopRunning()
  681. }
  682. type managedConn struct {
  683. net.Conn
  684. manager *dialManager
  685. }
  686. // CloseWrite exposes the net.TCPConn.CloseWrite() functionality
  687. // required by TapDance.
  688. func (conn *managedConn) CloseWrite() error {
  689. if closeWriter, ok := conn.Conn.(common.CloseWriter); ok {
  690. return closeWriter.CloseWrite()
  691. }
  692. return errors.TraceNew("underlying conn is not a CloseWriter")
  693. }
  694. func (conn *managedConn) Close() error {
  695. // Remove must be invoked asynchronously, as this Close may be called by
  696. // conns.CloseAll, leading to a reentrant lock situation.
  697. go conn.manager.conns.Remove(conn)
  698. return conn.Conn.Close()
  699. }
  700. type refractionConn struct {
  701. net.Conn
  702. manager *dialManager
  703. isClosed int32
  704. isConjure bool
  705. conjureCached bool
  706. conjureDelay time.Duration
  707. conjureTransport string
  708. }
  709. func (conn *refractionConn) Close() error {
  710. conn.manager.close()
  711. err := conn.Conn.Close()
  712. atomic.StoreInt32(&conn.isClosed, 1)
  713. return err
  714. }
  715. func (conn *refractionConn) IsClosed() bool {
  716. return atomic.LoadInt32(&conn.isClosed) == 1
  717. }
  718. // GetMetrics implements the common.MetricsSource interface.
  719. func (conn *refractionConn) GetMetrics() common.LogFields {
  720. logFields := make(common.LogFields)
  721. if conn.isConjure {
  722. cached := "0"
  723. if conn.conjureCached {
  724. cached = "1"
  725. }
  726. logFields["conjure_cached"] = cached
  727. if conn.conjureDelay != -1 {
  728. logFields["conjure_delay"] = fmt.Sprintf("%d", conn.conjureDelay/time.Millisecond)
  729. }
  730. logFields["conjure_transport"] = conn.conjureTransport
  731. }
  732. return logFields
  733. }
  734. var initRefractionNetworkingOnce sync.Once
  735. func initRefractionNetworking(emitLogs bool, dataDirectory string) error {
  736. var initErr error
  737. initRefractionNetworkingOnce.Do(func() {
  738. if !emitLogs {
  739. refraction_networking_client.Logger().Out = ioutil.Discard
  740. }
  741. assetsDir := filepath.Join(dataDirectory, "refraction-networking")
  742. err := os.MkdirAll(assetsDir, 0700)
  743. if err != nil {
  744. initErr = errors.Trace(err)
  745. return
  746. }
  747. clientConfFileName := filepath.Join(assetsDir, "ClientConf")
  748. _, err = os.Stat(clientConfFileName)
  749. if err != nil && os.IsNotExist(err) {
  750. err = ioutil.WriteFile(clientConfFileName, getEmbeddedClientConf(), 0644)
  751. }
  752. if err != nil {
  753. initErr = errors.Trace(err)
  754. return
  755. }
  756. refraction_networking_client.AssetsSetDir(assetsDir)
  757. })
  758. return initErr
  759. }