conn.go 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835
  1. package socket
  2. import (
  3. "context"
  4. "io"
  5. "os"
  6. "sync/atomic"
  7. "syscall"
  8. "time"
  9. "golang.org/x/sys/unix"
  10. )
  11. // Lock in an expected public interface for convenience.
  12. var _ interface {
  13. io.ReadWriteCloser
  14. syscall.Conn
  15. SetDeadline(t time.Time) error
  16. SetReadDeadline(t time.Time) error
  17. SetWriteDeadline(t time.Time) error
  18. } = &Conn{}
  19. // A Conn is a low-level network connection which integrates with Go's runtime
  20. // network poller to provide asynchronous I/O and deadline support.
  21. type Conn struct {
  22. // Indicates whether or not Conn.Close has been called. Must be accessed
  23. // atomically. Atomics definitions must come first in the Conn struct.
  24. closed uint32
  25. // A unique name for the Conn which is also associated with derived file
  26. // descriptors such as those created by accept(2).
  27. name string
  28. // Whether this is a streaming descriptor, as opposed to a
  29. // packet-based descriptor like a UDP socket.
  30. isStream bool
  31. // Whether a zero byte read indicates EOF. This is false for a
  32. // message based socket connection.
  33. zeroReadIsEOF bool
  34. // Provides access to the underlying file registered with the runtime
  35. // network poller, and arbitrary raw I/O calls.
  36. fd *os.File
  37. rc syscall.RawConn
  38. }
  39. // A Config contains options for a Conn.
  40. type Config struct {
  41. // NetNS specifies the Linux network namespace the Conn will operate in.
  42. // This option is unsupported on other operating systems.
  43. //
  44. // If set (non-zero), Conn will enter the specified network namespace and an
  45. // error will occur in Socket if the operation fails.
  46. //
  47. // If not set (zero), a best-effort attempt will be made to enter the
  48. // network namespace of the calling thread: this means that any changes made
  49. // to the calling thread's network namespace will also be reflected in Conn.
  50. // If this operation fails (due to lack of permissions or because network
  51. // namespaces are disabled by kernel configuration), Socket will not return
  52. // an error, and the Conn will operate in the default network namespace of
  53. // the process. This enables non-privileged use of Conn in applications
  54. // which do not require elevated privileges.
  55. //
  56. // Entering a network namespace is a privileged operation (root or
  57. // CAP_SYS_ADMIN are required), and most applications should leave this set
  58. // to 0.
  59. NetNS int
  60. }
  61. // High-level methods which provide convenience over raw system calls.
  62. // Close closes the underlying file descriptor for the Conn, which also causes
  63. // all in-flight I/O operations to immediately unblock and return errors. Any
  64. // subsequent uses of Conn will result in EBADF.
  65. func (c *Conn) Close() error {
  66. // The caller has expressed an intent to close the socket, so immediately
  67. // increment s.closed to force further calls to result in EBADF before also
  68. // closing the file descriptor to unblock any outstanding operations.
  69. //
  70. // Because other operations simply check for s.closed != 0, we will permit
  71. // double Close, which would increment s.closed beyond 1.
  72. if atomic.AddUint32(&c.closed, 1) != 1 {
  73. // Multiple Close calls.
  74. return nil
  75. }
  76. return os.NewSyscallError("close", c.fd.Close())
  77. }
  78. // CloseRead shuts down the reading side of the Conn. Most callers should just
  79. // use Close.
  80. func (c *Conn) CloseRead() error { return c.Shutdown(unix.SHUT_RD) }
  81. // CloseWrite shuts down the writing side of the Conn. Most callers should just
  82. // use Close.
  83. func (c *Conn) CloseWrite() error { return c.Shutdown(unix.SHUT_WR) }
  84. // Read reads directly from the underlying file descriptor.
  85. func (c *Conn) Read(b []byte) (int, error) { return c.fd.Read(b) }
  86. // ReadContext reads from the underlying file descriptor with added support for
  87. // context cancelation.
  88. func (c *Conn) ReadContext(ctx context.Context, b []byte) (int, error) {
  89. var (
  90. n int
  91. err error
  92. )
  93. if c.isStream && len(b) > maxRW {
  94. b = b[:maxRW]
  95. }
  96. doErr := c.read(ctx, "read", func(fd int) error {
  97. n, err = unix.Read(fd, b)
  98. return err
  99. })
  100. switch {
  101. case doErr != nil:
  102. return 0, doErr
  103. case n == 0 && err == nil && c.zeroReadIsEOF:
  104. return 0, io.EOF
  105. }
  106. return n, os.NewSyscallError("read", err)
  107. }
  108. // Write writes directly to the underlying file descriptor.
  109. func (c *Conn) Write(b []byte) (int, error) { return c.fd.Write(b) }
  110. // WriteContext writes to the underlying file descriptor with added support for
  111. // context cancelation.
  112. func (c *Conn) WriteContext(ctx context.Context, b []byte) (int, error) {
  113. var (
  114. n, nn int
  115. err error
  116. )
  117. doErr := c.write(ctx, "write", func(fd int) error {
  118. max := len(b)
  119. if c.isStream && max-nn > maxRW {
  120. max = nn + maxRW
  121. }
  122. n, err = unix.Write(fd, b[nn:max])
  123. if n > 0 {
  124. nn += n
  125. }
  126. if nn == len(b) {
  127. return err
  128. }
  129. if n == 0 && err == nil {
  130. err = io.ErrUnexpectedEOF
  131. return nil
  132. }
  133. return err
  134. })
  135. if doErr != nil {
  136. return 0, doErr
  137. }
  138. return nn, os.NewSyscallError("write", err)
  139. }
  140. // SetDeadline sets both the read and write deadlines associated with the Conn.
  141. func (c *Conn) SetDeadline(t time.Time) error { return c.fd.SetDeadline(t) }
  142. // SetReadDeadline sets the read deadline associated with the Conn.
  143. func (c *Conn) SetReadDeadline(t time.Time) error { return c.fd.SetReadDeadline(t) }
  144. // SetWriteDeadline sets the write deadline associated with the Conn.
  145. func (c *Conn) SetWriteDeadline(t time.Time) error { return c.fd.SetWriteDeadline(t) }
  146. // ReadBuffer gets the size of the operating system's receive buffer associated
  147. // with the Conn.
  148. func (c *Conn) ReadBuffer() (int, error) {
  149. return c.GetsockoptInt(unix.SOL_SOCKET, unix.SO_RCVBUF)
  150. }
  151. // WriteBuffer gets the size of the operating system's transmit buffer
  152. // associated with the Conn.
  153. func (c *Conn) WriteBuffer() (int, error) {
  154. return c.GetsockoptInt(unix.SOL_SOCKET, unix.SO_SNDBUF)
  155. }
  156. // SetReadBuffer sets the size of the operating system's receive buffer
  157. // associated with the Conn.
  158. //
  159. // When called with elevated privileges on Linux, the SO_RCVBUFFORCE option will
  160. // be used to override operating system limits. Otherwise SO_RCVBUF is used
  161. // (which obeys operating system limits).
  162. func (c *Conn) SetReadBuffer(bytes int) error { return c.setReadBuffer(bytes) }
  163. // SetWriteBuffer sets the size of the operating system's transmit buffer
  164. // associated with the Conn.
  165. //
  166. // When called with elevated privileges on Linux, the SO_SNDBUFFORCE option will
  167. // be used to override operating system limits. Otherwise SO_SNDBUF is used
  168. // (which obeys operating system limits).
  169. func (c *Conn) SetWriteBuffer(bytes int) error { return c.setWriteBuffer(bytes) }
  170. // SyscallConn returns a raw network connection. This implements the
  171. // syscall.Conn interface.
  172. //
  173. // SyscallConn is intended for advanced use cases, such as getting and setting
  174. // arbitrary socket options using the socket's file descriptor. If possible,
  175. // those operations should be performed using methods on Conn instead.
  176. //
  177. // Once invoked, it is the caller's responsibility to ensure that operations
  178. // performed using Conn and the syscall.RawConn do not conflict with each other.
  179. func (c *Conn) SyscallConn() (syscall.RawConn, error) {
  180. if atomic.LoadUint32(&c.closed) != 0 {
  181. return nil, os.NewSyscallError("syscallconn", unix.EBADF)
  182. }
  183. // TODO(mdlayher): mutex or similar to enforce syscall.RawConn contract of
  184. // FD remaining valid for duration of calls?
  185. return c.rc, nil
  186. }
  187. // Socket wraps the socket(2) system call to produce a Conn. domain, typ, and
  188. // proto are passed directly to socket(2), and name should be a unique name for
  189. // the socket type such as "netlink" or "vsock".
  190. //
  191. // The cfg parameter specifies optional configuration for the Conn. If nil, no
  192. // additional configuration will be applied.
  193. //
  194. // If the operating system supports SOCK_CLOEXEC and SOCK_NONBLOCK, they are
  195. // automatically applied to typ to mirror the standard library's socket flag
  196. // behaviors.
  197. func Socket(domain, typ, proto int, name string, cfg *Config) (*Conn, error) {
  198. if cfg == nil {
  199. cfg = &Config{}
  200. }
  201. if cfg.NetNS == 0 {
  202. // Non-Linux or no network namespace.
  203. return socket(domain, typ, proto, name)
  204. }
  205. // Linux only: create Conn in the specified network namespace.
  206. return withNetNS(cfg.NetNS, func() (*Conn, error) {
  207. return socket(domain, typ, proto, name)
  208. })
  209. }
  210. // socket is the internal, cross-platform entry point for socket(2).
  211. func socket(domain, typ, proto int, name string) (*Conn, error) {
  212. var (
  213. fd int
  214. err error
  215. )
  216. for {
  217. fd, err = unix.Socket(domain, typ|socketFlags, proto)
  218. switch {
  219. case err == nil:
  220. // Some OSes already set CLOEXEC with typ.
  221. if !flagCLOEXEC {
  222. unix.CloseOnExec(fd)
  223. }
  224. // No error, prepare the Conn.
  225. return New(fd, name)
  226. case !ready(err):
  227. // System call interrupted or not ready, try again.
  228. continue
  229. case err == unix.EINVAL, err == unix.EPROTONOSUPPORT:
  230. // On Linux, SOCK_NONBLOCK and SOCK_CLOEXEC were introduced in
  231. // 2.6.27. On FreeBSD, both flags were introduced in FreeBSD 10.
  232. // EINVAL and EPROTONOSUPPORT check for earlier versions of these
  233. // OSes respectively.
  234. //
  235. // Mirror what the standard library does when creating file
  236. // descriptors: avoid racing a fork/exec with the creation of new
  237. // file descriptors, so that child processes do not inherit socket
  238. // file descriptors unexpectedly.
  239. //
  240. // For a more thorough explanation, see similar work in the Go tree:
  241. // func sysSocket in net/sock_cloexec.go, as well as the detailed
  242. // comment in syscall/exec_unix.go.
  243. syscall.ForkLock.RLock()
  244. fd, err = unix.Socket(domain, typ, proto)
  245. if err != nil {
  246. syscall.ForkLock.RUnlock()
  247. return nil, os.NewSyscallError("socket", err)
  248. }
  249. unix.CloseOnExec(fd)
  250. syscall.ForkLock.RUnlock()
  251. return New(fd, name)
  252. default:
  253. // Unhandled error.
  254. return nil, os.NewSyscallError("socket", err)
  255. }
  256. }
  257. }
  258. // FileConn returns a copy of the network connection corresponding to the open
  259. // file. It is the caller's responsibility to close the file when finished.
  260. // Closing the Conn does not affect the File, and closing the File does not
  261. // affect the Conn.
  262. func FileConn(f *os.File, name string) (*Conn, error) {
  263. // First we'll try to do fctnl(2) with F_DUPFD_CLOEXEC because we can dup
  264. // the file descriptor and set the flag in one syscall.
  265. fd, err := unix.FcntlInt(f.Fd(), unix.F_DUPFD_CLOEXEC, 0)
  266. switch err {
  267. case nil:
  268. // OK, ready to set up non-blocking I/O.
  269. return New(fd, name)
  270. case unix.EINVAL:
  271. // The kernel rejected our fcntl(2), fall back to separate dup(2) and
  272. // setting close on exec.
  273. //
  274. // Mirror what the standard library does when creating file descriptors:
  275. // avoid racing a fork/exec with the creation of new file descriptors,
  276. // so that child processes do not inherit socket file descriptors
  277. // unexpectedly.
  278. syscall.ForkLock.RLock()
  279. fd, err := unix.Dup(fd)
  280. if err != nil {
  281. syscall.ForkLock.RUnlock()
  282. return nil, os.NewSyscallError("dup", err)
  283. }
  284. unix.CloseOnExec(fd)
  285. syscall.ForkLock.RUnlock()
  286. return New(fd, name)
  287. default:
  288. // Any other errors.
  289. return nil, os.NewSyscallError("fcntl", err)
  290. }
  291. }
  292. // New wraps an existing file descriptor to create a Conn. name should be a
  293. // unique name for the socket type such as "netlink" or "vsock".
  294. //
  295. // Most callers should use Socket or FileConn to construct a Conn. New is
  296. // intended for integrating with specific system calls which provide a file
  297. // descriptor that supports asynchronous I/O. The file descriptor is immediately
  298. // set to nonblocking mode and registered with Go's runtime network poller for
  299. // future I/O operations.
  300. //
  301. // Unlike FileConn, New does not duplicate the existing file descriptor in any
  302. // way. The returned Conn takes ownership of the underlying file descriptor.
  303. func New(fd int, name string) (*Conn, error) {
  304. // All Conn I/O is nonblocking for integration with Go's runtime network
  305. // poller. Depending on the OS this might already be set but it can't hurt
  306. // to set it again.
  307. if err := unix.SetNonblock(fd, true); err != nil {
  308. return nil, os.NewSyscallError("setnonblock", err)
  309. }
  310. // os.NewFile registers the non-blocking file descriptor with the runtime
  311. // poller, which is then used for most subsequent operations except those
  312. // that require raw I/O via SyscallConn.
  313. //
  314. // See also: https://golang.org/pkg/os/#NewFile
  315. f := os.NewFile(uintptr(fd), name)
  316. rc, err := f.SyscallConn()
  317. if err != nil {
  318. return nil, err
  319. }
  320. c := &Conn{
  321. name: name,
  322. fd: f,
  323. rc: rc,
  324. }
  325. sotype, err := c.GetsockoptInt(unix.SOL_SOCKET, unix.SO_TYPE)
  326. if err != nil {
  327. return nil, err
  328. }
  329. c.isStream = sotype == unix.SOCK_STREAM
  330. c.zeroReadIsEOF = sotype != unix.SOCK_DGRAM && sotype != unix.SOCK_RAW
  331. return c, nil
  332. }
  333. // Low-level methods which provide raw system call access.
  334. // Accept wraps accept(2) or accept4(2) depending on the operating system, but
  335. // returns a Conn for the accepted connection rather than a raw file descriptor.
  336. //
  337. // If the operating system supports accept4(2) (which allows flags),
  338. // SOCK_CLOEXEC and SOCK_NONBLOCK are automatically applied to flags to mirror
  339. // the standard library's socket flag behaviors.
  340. //
  341. // If the operating system only supports accept(2) (which does not allow flags)
  342. // and flags is not zero, an error will be returned.
  343. func (c *Conn) Accept(flags int) (*Conn, unix.Sockaddr, error) {
  344. var (
  345. nfd int
  346. sa unix.Sockaddr
  347. err error
  348. )
  349. doErr := c.read(context.Background(), sysAccept, func(fd int) error {
  350. // Either accept(2) or accept4(2) depending on the OS.
  351. nfd, sa, err = accept(fd, flags|socketFlags)
  352. return err
  353. })
  354. if doErr != nil {
  355. return nil, nil, doErr
  356. }
  357. if err != nil {
  358. // sysAccept is either "accept" or "accept4" depending on the OS.
  359. return nil, nil, os.NewSyscallError(sysAccept, err)
  360. }
  361. // Successfully accepted a connection, wrap it in a Conn for use by the
  362. // caller.
  363. ac, err := New(nfd, c.name)
  364. if err != nil {
  365. return nil, nil, err
  366. }
  367. return ac, sa, nil
  368. }
  369. // Bind wraps bind(2).
  370. func (c *Conn) Bind(sa unix.Sockaddr) error {
  371. return c.controlErr(context.Background(), "bind", func(fd int) error {
  372. return unix.Bind(fd, sa)
  373. })
  374. }
  375. // Connect wraps connect(2). In order to verify that the underlying socket is
  376. // connected to a remote peer, Connect calls getpeername(2) and returns the
  377. // unix.Sockaddr from that call.
  378. func (c *Conn) Connect(ctx context.Context, sa unix.Sockaddr) (unix.Sockaddr, error) {
  379. const op = "connect"
  380. // TODO(mdlayher): it would seem that trying to connect to unbound vsock
  381. // listeners by calling Connect multiple times results in ECONNRESET for the
  382. // first and nil error for subsequent calls. Do we need to memoize the
  383. // error? Check what the stdlib behavior is.
  384. var (
  385. // Track progress between invocations of the write closure. We don't
  386. // have an explicit WaitWrite call like internal/poll does, so we have
  387. // to wait until the runtime calls the closure again to indicate we can
  388. // write.
  389. progress uint32
  390. // Capture closure sockaddr and error.
  391. rsa unix.Sockaddr
  392. err error
  393. )
  394. doErr := c.write(ctx, op, func(fd int) error {
  395. if atomic.AddUint32(&progress, 1) == 1 {
  396. // First call: initiate connect.
  397. return unix.Connect(fd, sa)
  398. }
  399. // Subsequent calls: the runtime network poller indicates fd is
  400. // writable. Check for errno.
  401. errno, gerr := c.GetsockoptInt(unix.SOL_SOCKET, unix.SO_ERROR)
  402. if err := ctx.Err(); err != nil {
  403. return err
  404. }
  405. if gerr != nil {
  406. return gerr
  407. }
  408. if errno != 0 {
  409. // Connection is still not ready or failed. If errno indicates
  410. // the socket is not ready, we will wait for the next write
  411. // event. Otherwise we propagate this errno back to the as a
  412. // permanent error.
  413. uerr := unix.Errno(errno)
  414. err = uerr
  415. return uerr
  416. }
  417. // According to internal/poll, it's possible for the runtime network
  418. // poller to spuriously wake us and return errno 0 for SO_ERROR.
  419. // Make sure we are actually connected to a peer.
  420. peer, err := c.Getpeername()
  421. if err := ctx.Err(); err != nil {
  422. return err
  423. }
  424. if err != nil {
  425. // internal/poll unconditionally goes back to WaitWrite.
  426. // Synthesize an error that will do the same for us.
  427. return unix.EAGAIN
  428. }
  429. // Connection complete.
  430. rsa = peer
  431. return nil
  432. })
  433. if doErr != nil {
  434. return nil, doErr
  435. }
  436. if err == unix.EISCONN {
  437. // TODO(mdlayher): is this block obsolete with the addition of the
  438. // getsockopt SO_ERROR check above?
  439. //
  440. // EISCONN is reported if the socket is already established and should
  441. // not be treated as an error.
  442. // - Darwin reports this for at least TCP sockets
  443. // - Linux reports this for at least AF_VSOCK sockets
  444. return rsa, nil
  445. }
  446. return rsa, os.NewSyscallError(op, err)
  447. }
  448. // Getsockname wraps getsockname(2).
  449. func (c *Conn) Getsockname() (unix.Sockaddr, error) {
  450. const op = "getsockname"
  451. var (
  452. sa unix.Sockaddr
  453. err error
  454. )
  455. doErr := c.control(context.Background(), op, func(fd int) error {
  456. sa, err = unix.Getsockname(fd)
  457. return err
  458. })
  459. if doErr != nil {
  460. return nil, doErr
  461. }
  462. return sa, os.NewSyscallError(op, err)
  463. }
  464. // Getpeername wraps getpeername(2).
  465. func (c *Conn) Getpeername() (unix.Sockaddr, error) {
  466. const op = "getpeername"
  467. var (
  468. sa unix.Sockaddr
  469. err error
  470. )
  471. doErr := c.control(context.Background(), op, func(fd int) error {
  472. sa, err = unix.Getpeername(fd)
  473. return err
  474. })
  475. if doErr != nil {
  476. return nil, doErr
  477. }
  478. return sa, os.NewSyscallError(op, err)
  479. }
  480. // GetsockoptInt wraps getsockopt(2) for integer values.
  481. func (c *Conn) GetsockoptInt(level, opt int) (int, error) {
  482. const op = "getsockopt"
  483. var (
  484. value int
  485. err error
  486. )
  487. doErr := c.control(context.Background(), op, func(fd int) error {
  488. value, err = unix.GetsockoptInt(fd, level, opt)
  489. return err
  490. })
  491. if doErr != nil {
  492. return 0, doErr
  493. }
  494. return value, os.NewSyscallError(op, err)
  495. }
  496. // Listen wraps listen(2).
  497. func (c *Conn) Listen(n int) error {
  498. return c.controlErr(context.Background(), "listen", func(fd int) error {
  499. return unix.Listen(fd, n)
  500. })
  501. }
  502. // Recvmsg wraps recvmsg(2).
  503. func (c *Conn) Recvmsg(ctx context.Context, p, oob []byte, flags int) (int, int, int, unix.Sockaddr, error) {
  504. const op = "recvmsg"
  505. var (
  506. n, oobn, recvflags int
  507. from unix.Sockaddr
  508. err error
  509. )
  510. doErr := c.read(ctx, op, func(fd int) error {
  511. n, oobn, recvflags, from, err = unix.Recvmsg(fd, p, oob, flags)
  512. return err
  513. })
  514. switch {
  515. case doErr != nil:
  516. return 0, 0, 0, nil, doErr
  517. case n == 0 && err == nil && c.zeroReadIsEOF:
  518. return 0, 0, 0, nil, io.EOF
  519. }
  520. return n, oobn, recvflags, from, os.NewSyscallError(op, err)
  521. }
  522. // Recvfrom wraps recvfrom(2).
  523. func (c *Conn) Recvfrom(ctx context.Context, p []byte, flags int) (int, unix.Sockaddr, error) {
  524. const op = "recvfrom"
  525. var (
  526. n int
  527. addr unix.Sockaddr
  528. err error
  529. )
  530. doErr := c.read(ctx, op, func(fd int) error {
  531. n, addr, err = unix.Recvfrom(fd, p, flags)
  532. return err
  533. })
  534. switch {
  535. case doErr != nil:
  536. return 0, nil, doErr
  537. case n == 0 && err == nil && c.zeroReadIsEOF:
  538. return 0, nil, io.EOF
  539. }
  540. return n, addr, os.NewSyscallError(op, err)
  541. }
  542. // Sendmsg wraps sendmsg(2).
  543. func (c *Conn) Sendmsg(ctx context.Context, p, oob []byte, to unix.Sockaddr, flags int) (int, error) {
  544. var (
  545. n int
  546. err error
  547. )
  548. doErr := c.writeErr(ctx, "sendmsg", func(fd int) error {
  549. n, err = unix.SendmsgN(fd, p, oob, to, flags)
  550. return err
  551. })
  552. if doErr != nil {
  553. return 0, doErr
  554. }
  555. return n, err
  556. }
  557. // Sendto wraps sendto(2).
  558. func (c *Conn) Sendto(ctx context.Context, p []byte, flags int, to unix.Sockaddr) error {
  559. return c.writeErr(ctx, "sendto", func(fd int) error {
  560. return unix.Sendto(fd, p, flags, to)
  561. })
  562. }
  563. // SetsockoptInt wraps setsockopt(2) for integer values.
  564. func (c *Conn) SetsockoptInt(level, opt, value int) error {
  565. return c.controlErr(context.Background(), "setsockopt", func(fd int) error {
  566. return unix.SetsockoptInt(fd, level, opt, value)
  567. })
  568. }
  569. // Shutdown wraps shutdown(2).
  570. func (c *Conn) Shutdown(how int) error {
  571. return c.controlErr(context.Background(), "shutdown", func(fd int) error {
  572. return unix.Shutdown(fd, how)
  573. })
  574. }
  575. // Conn low-level read/write/control functions. These functions mirror the
  576. // syscall.RawConn APIs but the input closures return errors rather than
  577. // booleans. Any syscalls invoked within f should return their error to allow
  578. // the Conn to check for readiness with the runtime network poller, or to retry
  579. // operations which may have been interrupted by EINTR or similar.
  580. //
  581. // Note that errors from the input closure functions are not propagated to the
  582. // error return values of read/write/control, and the caller is still
  583. // responsible for error handling.
  584. // read executes f, a read function, against the associated file descriptor.
  585. // op is used to create an *os.SyscallError if the file descriptor is closed.
  586. //
  587. // It obeys context cancelation and the context must not be nil.
  588. func (c *Conn) read(ctx context.Context, op string, f func(fd int) error) error {
  589. if atomic.LoadUint32(&c.closed) != 0 {
  590. return os.NewSyscallError(op, unix.EBADF)
  591. }
  592. err := c.rc.Read(func(fd uintptr) bool {
  593. select {
  594. default:
  595. return ready(f(int(fd)))
  596. case <-ctx.Done():
  597. return ready(ctx.Err())
  598. }
  599. })
  600. if err := ctx.Err(); err != nil {
  601. return err
  602. }
  603. return err
  604. }
  605. // readErr wraps read to execute a function and capture its error result.
  606. // This is a convenience wrapper for functions which don't return any extra
  607. // values to capture in a closure.
  608. //
  609. // It obeys context cancelation and the context must not be nil.
  610. func (c *Conn) readErr(ctx context.Context, op string, f func(fd int) error) error {
  611. var err error
  612. doErr := c.read(ctx, op, func(fd int) error {
  613. return f(fd)
  614. })
  615. if doErr != nil {
  616. return doErr
  617. }
  618. return os.NewSyscallError(op, err)
  619. }
  620. // write executes f, a write function, against the associated file descriptor.
  621. // op is used to create an *os.SyscallError if the file descriptor is closed.
  622. //
  623. // It obeys context cancelation and the context must not be nil.
  624. func (c *Conn) write(ctx context.Context, op string, f func(fd int) error) error {
  625. if atomic.LoadUint32(&c.closed) != 0 {
  626. return os.NewSyscallError(op, unix.EBADF)
  627. }
  628. err := c.rc.Write(func(fd uintptr) (done bool) {
  629. select {
  630. default:
  631. return ready(f(int(fd)))
  632. case <-ctx.Done():
  633. return ready(ctx.Err())
  634. }
  635. })
  636. if err := ctx.Err(); err != nil {
  637. return err
  638. }
  639. return err
  640. }
  641. // writeErr wraps write to execute a function and capture its error result.
  642. // This is a convenience wrapper for functions which don't return any extra
  643. // values to capture in a closure.
  644. //
  645. // It obeys context cancelation and the context must not be nil.
  646. func (c *Conn) writeErr(ctx context.Context, op string, f func(fd int) error) error {
  647. var err error
  648. doErr := c.write(ctx, op, func(fd int) error {
  649. return f(fd)
  650. })
  651. if doErr != nil {
  652. return doErr
  653. }
  654. return os.NewSyscallError(op, err)
  655. }
  656. // control executes f, a control function, against the associated file
  657. // descriptor. op is used to create an *os.SyscallError if the file descriptor
  658. // is closed.
  659. //
  660. // It obeys context cancelation and the context must not be nil.
  661. func (c *Conn) control(ctx context.Context, op string, f func(fd int) error) error {
  662. if atomic.LoadUint32(&c.closed) != 0 {
  663. return os.NewSyscallError(op, unix.EBADF)
  664. }
  665. var cerr error
  666. err := c.rc.Control(func(fd uintptr) {
  667. // Repeatedly attempt the syscall(s) invoked by f until completion is
  668. // indicated by the return value of ready.
  669. for {
  670. if err := ctx.Err(); err != nil {
  671. cerr = err
  672. return
  673. }
  674. if ready(f(int(fd))) {
  675. return
  676. }
  677. }
  678. })
  679. if cerr != nil {
  680. err = cerr
  681. }
  682. return err
  683. }
  684. // controlErr wraps control to execute a function and capture its error result.
  685. // This is a convenience wrapper for functions which don't return any extra
  686. // values to capture in a closure.
  687. //
  688. // It obeys context cancelation and the context must not be nil.
  689. func (c *Conn) controlErr(ctx context.Context, op string, f func(fd int) error) error {
  690. var err error
  691. doErr := c.control(ctx, op, func(fd int) error {
  692. err = f(fd)
  693. return err
  694. })
  695. if doErr != nil {
  696. return doErr
  697. }
  698. return os.NewSyscallError(op, err)
  699. }
  700. // ready indicates readiness based on the value of err.
  701. func ready(err error) bool {
  702. switch err {
  703. case unix.EAGAIN, unix.EINPROGRESS, unix.EINTR:
  704. // When a socket is in non-blocking mode, we might see a variety of errors:
  705. // - EAGAIN: most common case for a socket read not being ready
  706. // - EINPROGRESS: reported by some sockets when first calling connect
  707. // - EINTR: system call interrupted, more frequently occurs in Go 1.14+
  708. // because goroutines can be asynchronously preempted
  709. //
  710. // Return false to let the poller wait for readiness. See the source code
  711. // for internal/poll.FD.RawRead for more details.
  712. return false
  713. case context.Canceled, context.DeadlineExceeded:
  714. // The caller canceled the operation.
  715. return true
  716. default:
  717. // Ready regardless of whether there was an error or no error.
  718. return true
  719. }
  720. }
  721. // Darwin and FreeBSD can't read or write 2GB+ files at a time,
  722. // even on 64-bit systems.
  723. // The same is true of socket implementations on many systems.
  724. // See golang.org/issue/7812 and golang.org/issue/16266.
  725. // Use 1GB instead of, say, 2GB-1, to keep subsequent reads aligned.
  726. const maxRW = 1 << 30