conn.go 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894
  1. package socket
  2. import (
  3. "context"
  4. "errors"
  5. "io"
  6. "os"
  7. "sync"
  8. "sync/atomic"
  9. "syscall"
  10. "time"
  11. "golang.org/x/sys/unix"
  12. )
  13. // Lock in an expected public interface for convenience.
  14. var _ interface {
  15. io.ReadWriteCloser
  16. syscall.Conn
  17. SetDeadline(t time.Time) error
  18. SetReadDeadline(t time.Time) error
  19. SetWriteDeadline(t time.Time) error
  20. } = &Conn{}
  21. // A Conn is a low-level network connection which integrates with Go's runtime
  22. // network poller to provide asynchronous I/O and deadline support.
  23. //
  24. // Many of a Conn's blocking methods support net.Conn deadlines as well as
  25. // cancelation via context. Note that passing a context with a deadline set will
  26. // override any of the previous deadlines set by calls to the SetDeadline family
  27. // of methods.
  28. type Conn struct {
  29. // Indicates whether or not Conn.Close has been called. Must be accessed
  30. // atomically. Atomics definitions must come first in the Conn struct.
  31. closed uint32
  32. // A unique name for the Conn which is also associated with derived file
  33. // descriptors such as those created by accept(2).
  34. name string
  35. // facts contains information we have determined about Conn to trigger
  36. // alternate behavior in certain functions.
  37. facts facts
  38. // Provides access to the underlying file registered with the runtime
  39. // network poller, and arbitrary raw I/O calls.
  40. fd *os.File
  41. rc syscall.RawConn
  42. }
  43. // facts contains facts about a Conn.
  44. type facts struct {
  45. // isStream reports whether this is a streaming descriptor, as opposed to a
  46. // packet-based descriptor like a UDP socket.
  47. isStream bool
  48. // zeroReadIsEOF reports Whether a zero byte read indicates EOF. This is
  49. // false for a message based socket connection.
  50. zeroReadIsEOF bool
  51. }
  52. // A Config contains options for a Conn.
  53. type Config struct {
  54. // NetNS specifies the Linux network namespace the Conn will operate in.
  55. // This option is unsupported on other operating systems.
  56. //
  57. // If set (non-zero), Conn will enter the specified network namespace and an
  58. // error will occur in Socket if the operation fails.
  59. //
  60. // If not set (zero), a best-effort attempt will be made to enter the
  61. // network namespace of the calling thread: this means that any changes made
  62. // to the calling thread's network namespace will also be reflected in Conn.
  63. // If this operation fails (due to lack of permissions or because network
  64. // namespaces are disabled by kernel configuration), Socket will not return
  65. // an error, and the Conn will operate in the default network namespace of
  66. // the process. This enables non-privileged use of Conn in applications
  67. // which do not require elevated privileges.
  68. //
  69. // Entering a network namespace is a privileged operation (root or
  70. // CAP_SYS_ADMIN are required), and most applications should leave this set
  71. // to 0.
  72. NetNS int
  73. }
  74. // High-level methods which provide convenience over raw system calls.
  75. // Close closes the underlying file descriptor for the Conn, which also causes
  76. // all in-flight I/O operations to immediately unblock and return errors. Any
  77. // subsequent uses of Conn will result in EBADF.
  78. func (c *Conn) Close() error {
  79. // The caller has expressed an intent to close the socket, so immediately
  80. // increment s.closed to force further calls to result in EBADF before also
  81. // closing the file descriptor to unblock any outstanding operations.
  82. //
  83. // Because other operations simply check for s.closed != 0, we will permit
  84. // double Close, which would increment s.closed beyond 1.
  85. if atomic.AddUint32(&c.closed, 1) != 1 {
  86. // Multiple Close calls.
  87. return nil
  88. }
  89. return os.NewSyscallError("close", c.fd.Close())
  90. }
  91. // CloseRead shuts down the reading side of the Conn. Most callers should just
  92. // use Close.
  93. func (c *Conn) CloseRead() error { return c.Shutdown(unix.SHUT_RD) }
  94. // CloseWrite shuts down the writing side of the Conn. Most callers should just
  95. // use Close.
  96. func (c *Conn) CloseWrite() error { return c.Shutdown(unix.SHUT_WR) }
  97. // Read reads directly from the underlying file descriptor.
  98. func (c *Conn) Read(b []byte) (int, error) { return c.fd.Read(b) }
  99. // ReadContext reads from the underlying file descriptor with added support for
  100. // context cancelation.
  101. func (c *Conn) ReadContext(ctx context.Context, b []byte) (int, error) {
  102. if c.facts.isStream && len(b) > maxRW {
  103. b = b[:maxRW]
  104. }
  105. n, err := readT(c, ctx, "read", func(fd int) (int, error) {
  106. return unix.Read(fd, b)
  107. })
  108. if n == 0 && err == nil && c.facts.zeroReadIsEOF {
  109. return 0, io.EOF
  110. }
  111. return n, os.NewSyscallError("read", err)
  112. }
  113. // Write writes directly to the underlying file descriptor.
  114. func (c *Conn) Write(b []byte) (int, error) { return c.fd.Write(b) }
  115. // WriteContext writes to the underlying file descriptor with added support for
  116. // context cancelation.
  117. func (c *Conn) WriteContext(ctx context.Context, b []byte) (int, error) {
  118. var (
  119. n, nn int
  120. err error
  121. )
  122. doErr := c.write(ctx, "write", func(fd int) error {
  123. max := len(b)
  124. if c.facts.isStream && max-nn > maxRW {
  125. max = nn + maxRW
  126. }
  127. n, err = unix.Write(fd, b[nn:max])
  128. if n > 0 {
  129. nn += n
  130. }
  131. if nn == len(b) {
  132. return err
  133. }
  134. if n == 0 && err == nil {
  135. err = io.ErrUnexpectedEOF
  136. return nil
  137. }
  138. return err
  139. })
  140. if doErr != nil {
  141. return 0, doErr
  142. }
  143. return nn, os.NewSyscallError("write", err)
  144. }
  145. // SetDeadline sets both the read and write deadlines associated with the Conn.
  146. func (c *Conn) SetDeadline(t time.Time) error { return c.fd.SetDeadline(t) }
  147. // SetReadDeadline sets the read deadline associated with the Conn.
  148. func (c *Conn) SetReadDeadline(t time.Time) error { return c.fd.SetReadDeadline(t) }
  149. // SetWriteDeadline sets the write deadline associated with the Conn.
  150. func (c *Conn) SetWriteDeadline(t time.Time) error { return c.fd.SetWriteDeadline(t) }
  151. // ReadBuffer gets the size of the operating system's receive buffer associated
  152. // with the Conn.
  153. func (c *Conn) ReadBuffer() (int, error) {
  154. return c.GetsockoptInt(unix.SOL_SOCKET, unix.SO_RCVBUF)
  155. }
  156. // WriteBuffer gets the size of the operating system's transmit buffer
  157. // associated with the Conn.
  158. func (c *Conn) WriteBuffer() (int, error) {
  159. return c.GetsockoptInt(unix.SOL_SOCKET, unix.SO_SNDBUF)
  160. }
  161. // SetReadBuffer sets the size of the operating system's receive buffer
  162. // associated with the Conn.
  163. //
  164. // When called with elevated privileges on Linux, the SO_RCVBUFFORCE option will
  165. // be used to override operating system limits. Otherwise SO_RCVBUF is used
  166. // (which obeys operating system limits).
  167. func (c *Conn) SetReadBuffer(bytes int) error { return c.setReadBuffer(bytes) }
  168. // SetWriteBuffer sets the size of the operating system's transmit buffer
  169. // associated with the Conn.
  170. //
  171. // When called with elevated privileges on Linux, the SO_SNDBUFFORCE option will
  172. // be used to override operating system limits. Otherwise SO_SNDBUF is used
  173. // (which obeys operating system limits).
  174. func (c *Conn) SetWriteBuffer(bytes int) error { return c.setWriteBuffer(bytes) }
  175. // SyscallConn returns a raw network connection. This implements the
  176. // syscall.Conn interface.
  177. //
  178. // SyscallConn is intended for advanced use cases, such as getting and setting
  179. // arbitrary socket options using the socket's file descriptor. If possible,
  180. // those operations should be performed using methods on Conn instead.
  181. //
  182. // Once invoked, it is the caller's responsibility to ensure that operations
  183. // performed using Conn and the syscall.RawConn do not conflict with each other.
  184. func (c *Conn) SyscallConn() (syscall.RawConn, error) {
  185. if atomic.LoadUint32(&c.closed) != 0 {
  186. return nil, os.NewSyscallError("syscallconn", unix.EBADF)
  187. }
  188. // TODO(mdlayher): mutex or similar to enforce syscall.RawConn contract of
  189. // FD remaining valid for duration of calls?
  190. return c.rc, nil
  191. }
  192. // Socket wraps the socket(2) system call to produce a Conn. domain, typ, and
  193. // proto are passed directly to socket(2), and name should be a unique name for
  194. // the socket type such as "netlink" or "vsock".
  195. //
  196. // The cfg parameter specifies optional configuration for the Conn. If nil, no
  197. // additional configuration will be applied.
  198. //
  199. // If the operating system supports SOCK_CLOEXEC and SOCK_NONBLOCK, they are
  200. // automatically applied to typ to mirror the standard library's socket flag
  201. // behaviors.
  202. func Socket(domain, typ, proto int, name string, cfg *Config) (*Conn, error) {
  203. if cfg == nil {
  204. cfg = &Config{}
  205. }
  206. if cfg.NetNS == 0 {
  207. // Non-Linux or no network namespace.
  208. return socket(domain, typ, proto, name)
  209. }
  210. // Linux only: create Conn in the specified network namespace.
  211. return withNetNS(cfg.NetNS, func() (*Conn, error) {
  212. return socket(domain, typ, proto, name)
  213. })
  214. }
  215. // socket is the internal, cross-platform entry point for socket(2).
  216. func socket(domain, typ, proto int, name string) (*Conn, error) {
  217. var (
  218. fd int
  219. err error
  220. )
  221. for {
  222. fd, err = unix.Socket(domain, typ|socketFlags, proto)
  223. switch {
  224. case err == nil:
  225. // Some OSes already set CLOEXEC with typ.
  226. if !flagCLOEXEC {
  227. unix.CloseOnExec(fd)
  228. }
  229. // No error, prepare the Conn.
  230. return New(fd, name)
  231. case !ready(err):
  232. // System call interrupted or not ready, try again.
  233. continue
  234. case err == unix.EINVAL, err == unix.EPROTONOSUPPORT:
  235. // On Linux, SOCK_NONBLOCK and SOCK_CLOEXEC were introduced in
  236. // 2.6.27. On FreeBSD, both flags were introduced in FreeBSD 10.
  237. // EINVAL and EPROTONOSUPPORT check for earlier versions of these
  238. // OSes respectively.
  239. //
  240. // Mirror what the standard library does when creating file
  241. // descriptors: avoid racing a fork/exec with the creation of new
  242. // file descriptors, so that child processes do not inherit socket
  243. // file descriptors unexpectedly.
  244. //
  245. // For a more thorough explanation, see similar work in the Go tree:
  246. // func sysSocket in net/sock_cloexec.go, as well as the detailed
  247. // comment in syscall/exec_unix.go.
  248. syscall.ForkLock.RLock()
  249. fd, err = unix.Socket(domain, typ, proto)
  250. if err != nil {
  251. syscall.ForkLock.RUnlock()
  252. return nil, os.NewSyscallError("socket", err)
  253. }
  254. unix.CloseOnExec(fd)
  255. syscall.ForkLock.RUnlock()
  256. return New(fd, name)
  257. default:
  258. // Unhandled error.
  259. return nil, os.NewSyscallError("socket", err)
  260. }
  261. }
  262. }
  263. // FileConn returns a copy of the network connection corresponding to the open
  264. // file. It is the caller's responsibility to close the file when finished.
  265. // Closing the Conn does not affect the File, and closing the File does not
  266. // affect the Conn.
  267. func FileConn(f *os.File, name string) (*Conn, error) {
  268. // First we'll try to do fctnl(2) with F_DUPFD_CLOEXEC because we can dup
  269. // the file descriptor and set the flag in one syscall.
  270. fd, err := unix.FcntlInt(f.Fd(), unix.F_DUPFD_CLOEXEC, 0)
  271. switch err {
  272. case nil:
  273. // OK, ready to set up non-blocking I/O.
  274. return New(fd, name)
  275. case unix.EINVAL:
  276. // The kernel rejected our fcntl(2), fall back to separate dup(2) and
  277. // setting close on exec.
  278. //
  279. // Mirror what the standard library does when creating file descriptors:
  280. // avoid racing a fork/exec with the creation of new file descriptors,
  281. // so that child processes do not inherit socket file descriptors
  282. // unexpectedly.
  283. syscall.ForkLock.RLock()
  284. fd, err := unix.Dup(fd)
  285. if err != nil {
  286. syscall.ForkLock.RUnlock()
  287. return nil, os.NewSyscallError("dup", err)
  288. }
  289. unix.CloseOnExec(fd)
  290. syscall.ForkLock.RUnlock()
  291. return New(fd, name)
  292. default:
  293. // Any other errors.
  294. return nil, os.NewSyscallError("fcntl", err)
  295. }
  296. }
  297. // New wraps an existing file descriptor to create a Conn. name should be a
  298. // unique name for the socket type such as "netlink" or "vsock".
  299. //
  300. // Most callers should use Socket or FileConn to construct a Conn. New is
  301. // intended for integrating with specific system calls which provide a file
  302. // descriptor that supports asynchronous I/O. The file descriptor is immediately
  303. // set to nonblocking mode and registered with Go's runtime network poller for
  304. // future I/O operations.
  305. //
  306. // Unlike FileConn, New does not duplicate the existing file descriptor in any
  307. // way. The returned Conn takes ownership of the underlying file descriptor.
  308. func New(fd int, name string) (*Conn, error) {
  309. // All Conn I/O is nonblocking for integration with Go's runtime network
  310. // poller. Depending on the OS this might already be set but it can't hurt
  311. // to set it again.
  312. if err := unix.SetNonblock(fd, true); err != nil {
  313. return nil, os.NewSyscallError("setnonblock", err)
  314. }
  315. // os.NewFile registers the non-blocking file descriptor with the runtime
  316. // poller, which is then used for most subsequent operations except those
  317. // that require raw I/O via SyscallConn.
  318. //
  319. // See also: https://golang.org/pkg/os/#NewFile
  320. f := os.NewFile(uintptr(fd), name)
  321. rc, err := f.SyscallConn()
  322. if err != nil {
  323. return nil, err
  324. }
  325. c := &Conn{
  326. name: name,
  327. fd: f,
  328. rc: rc,
  329. }
  330. // Probe the file descriptor for socket settings.
  331. sotype, err := c.GetsockoptInt(unix.SOL_SOCKET, unix.SO_TYPE)
  332. switch {
  333. case err == nil:
  334. // File is a socket, check its properties.
  335. c.facts = facts{
  336. isStream: sotype == unix.SOCK_STREAM,
  337. zeroReadIsEOF: sotype != unix.SOCK_DGRAM && sotype != unix.SOCK_RAW,
  338. }
  339. case errors.Is(err, unix.ENOTSOCK):
  340. // File is not a socket, treat it as a regular file.
  341. c.facts = facts{
  342. isStream: true,
  343. zeroReadIsEOF: true,
  344. }
  345. default:
  346. return nil, err
  347. }
  348. return c, nil
  349. }
  350. // Low-level methods which provide raw system call access.
  351. // Accept wraps accept(2) or accept4(2) depending on the operating system, but
  352. // returns a Conn for the accepted connection rather than a raw file descriptor.
  353. //
  354. // If the operating system supports accept4(2) (which allows flags),
  355. // SOCK_CLOEXEC and SOCK_NONBLOCK are automatically applied to flags to mirror
  356. // the standard library's socket flag behaviors.
  357. //
  358. // If the operating system only supports accept(2) (which does not allow flags)
  359. // and flags is not zero, an error will be returned.
  360. //
  361. // Accept obeys context cancelation and uses the deadline set on the context to
  362. // cancel accepting the next connection. If a deadline is set on ctx, this
  363. // deadline will override any previous deadlines set using SetDeadline or
  364. // SetReadDeadline. Upon return, the read deadline is cleared.
  365. func (c *Conn) Accept(ctx context.Context, flags int) (*Conn, unix.Sockaddr, error) {
  366. type ret struct {
  367. nfd int
  368. sa unix.Sockaddr
  369. }
  370. r, err := readT(c, ctx, sysAccept, func(fd int) (ret, error) {
  371. // Either accept(2) or accept4(2) depending on the OS.
  372. nfd, sa, err := accept(fd, flags|socketFlags)
  373. return ret{nfd, sa}, err
  374. })
  375. if err != nil {
  376. // internal/poll, context error, or user function error.
  377. return nil, nil, err
  378. }
  379. // Successfully accepted a connection, wrap it in a Conn for use by the
  380. // caller.
  381. ac, err := New(r.nfd, c.name)
  382. if err != nil {
  383. return nil, nil, err
  384. }
  385. return ac, r.sa, nil
  386. }
  387. // Bind wraps bind(2).
  388. func (c *Conn) Bind(sa unix.Sockaddr) error {
  389. return c.control("bind", func(fd int) error { return unix.Bind(fd, sa) })
  390. }
  391. // Connect wraps connect(2). In order to verify that the underlying socket is
  392. // connected to a remote peer, Connect calls getpeername(2) and returns the
  393. // unix.Sockaddr from that call.
  394. //
  395. // Connect obeys context cancelation and uses the deadline set on the context to
  396. // cancel connecting to a remote peer. If a deadline is set on ctx, this
  397. // deadline will override any previous deadlines set using SetDeadline or
  398. // SetWriteDeadline. Upon return, the write deadline is cleared.
  399. func (c *Conn) Connect(ctx context.Context, sa unix.Sockaddr) (unix.Sockaddr, error) {
  400. const op = "connect"
  401. // TODO(mdlayher): it would seem that trying to connect to unbound vsock
  402. // listeners by calling Connect multiple times results in ECONNRESET for the
  403. // first and nil error for subsequent calls. Do we need to memoize the
  404. // error? Check what the stdlib behavior is.
  405. var (
  406. // Track progress between invocations of the write closure. We don't
  407. // have an explicit WaitWrite call like internal/poll does, so we have
  408. // to wait until the runtime calls the closure again to indicate we can
  409. // write.
  410. progress uint32
  411. // Capture closure sockaddr and error.
  412. rsa unix.Sockaddr
  413. err error
  414. )
  415. doErr := c.write(ctx, op, func(fd int) error {
  416. if atomic.AddUint32(&progress, 1) == 1 {
  417. // First call: initiate connect.
  418. return unix.Connect(fd, sa)
  419. }
  420. // Subsequent calls: the runtime network poller indicates fd is
  421. // writable. Check for errno.
  422. errno, gerr := c.GetsockoptInt(unix.SOL_SOCKET, unix.SO_ERROR)
  423. if gerr != nil {
  424. return gerr
  425. }
  426. if errno != 0 {
  427. // Connection is still not ready or failed. If errno indicates
  428. // the socket is not ready, we will wait for the next write
  429. // event. Otherwise we propagate this errno back to the as a
  430. // permanent error.
  431. uerr := unix.Errno(errno)
  432. err = uerr
  433. return uerr
  434. }
  435. // According to internal/poll, it's possible for the runtime network
  436. // poller to spuriously wake us and return errno 0 for SO_ERROR.
  437. // Make sure we are actually connected to a peer.
  438. peer, err := c.Getpeername()
  439. if err != nil {
  440. // internal/poll unconditionally goes back to WaitWrite.
  441. // Synthesize an error that will do the same for us.
  442. return unix.EAGAIN
  443. }
  444. // Connection complete.
  445. rsa = peer
  446. return nil
  447. })
  448. if doErr != nil {
  449. // internal/poll or context error.
  450. return nil, doErr
  451. }
  452. if err == unix.EISCONN {
  453. // TODO(mdlayher): is this block obsolete with the addition of the
  454. // getsockopt SO_ERROR check above?
  455. //
  456. // EISCONN is reported if the socket is already established and should
  457. // not be treated as an error.
  458. // - Darwin reports this for at least TCP sockets
  459. // - Linux reports this for at least AF_VSOCK sockets
  460. return rsa, nil
  461. }
  462. return rsa, os.NewSyscallError(op, err)
  463. }
  464. // Getsockname wraps getsockname(2).
  465. func (c *Conn) Getsockname() (unix.Sockaddr, error) {
  466. return controlT(c, "getsockname", unix.Getsockname)
  467. }
  468. // Getpeername wraps getpeername(2).
  469. func (c *Conn) Getpeername() (unix.Sockaddr, error) {
  470. return controlT(c, "getpeername", unix.Getpeername)
  471. }
  472. // GetsockoptICMPv6Filter wraps getsockopt(2) for *unix.ICMPv6Filter values.
  473. func (c *Conn) GetsockoptICMPv6Filter(level, opt int) (*unix.ICMPv6Filter, error) {
  474. return controlT(c, "getsockopt", func(fd int) (*unix.ICMPv6Filter, error) {
  475. return unix.GetsockoptICMPv6Filter(fd, level, opt)
  476. })
  477. }
  478. // GetsockoptInt wraps getsockopt(2) for integer values.
  479. func (c *Conn) GetsockoptInt(level, opt int) (int, error) {
  480. return controlT(c, "getsockopt", func(fd int) (int, error) {
  481. return unix.GetsockoptInt(fd, level, opt)
  482. })
  483. }
  484. // GetsockoptString wraps getsockopt(2) for string values.
  485. func (c *Conn) GetsockoptString(level, opt int) (string, error) {
  486. return controlT(c, "getsockopt", func(fd int) (string, error) {
  487. return unix.GetsockoptString(fd, level, opt)
  488. })
  489. }
  490. // Listen wraps listen(2).
  491. func (c *Conn) Listen(n int) error {
  492. return c.control("listen", func(fd int) error { return unix.Listen(fd, n) })
  493. }
  494. // Recvmsg wraps recvmsg(2).
  495. func (c *Conn) Recvmsg(ctx context.Context, p, oob []byte, flags int) (int, int, int, unix.Sockaddr, error) {
  496. type ret struct {
  497. n, oobn, recvflags int
  498. from unix.Sockaddr
  499. }
  500. r, err := readT(c, ctx, "recvmsg", func(fd int) (ret, error) {
  501. n, oobn, recvflags, from, err := unix.Recvmsg(fd, p, oob, flags)
  502. return ret{n, oobn, recvflags, from}, err
  503. })
  504. if r.n == 0 && err == nil && c.facts.zeroReadIsEOF {
  505. return 0, 0, 0, nil, io.EOF
  506. }
  507. return r.n, r.oobn, r.recvflags, r.from, err
  508. }
  509. // Recvfrom wraps recvfrom(2).
  510. func (c *Conn) Recvfrom(ctx context.Context, p []byte, flags int) (int, unix.Sockaddr, error) {
  511. type ret struct {
  512. n int
  513. addr unix.Sockaddr
  514. }
  515. out, err := readT(c, ctx, "recvfrom", func(fd int) (ret, error) {
  516. n, addr, err := unix.Recvfrom(fd, p, flags)
  517. return ret{n, addr}, err
  518. })
  519. if out.n == 0 && err == nil && c.facts.zeroReadIsEOF {
  520. return 0, nil, io.EOF
  521. }
  522. return out.n, out.addr, err
  523. }
  524. // Sendmsg wraps sendmsg(2).
  525. func (c *Conn) Sendmsg(ctx context.Context, p, oob []byte, to unix.Sockaddr, flags int) (int, error) {
  526. return writeT(c, ctx, "sendmsg", func(fd int) (int, error) {
  527. return unix.SendmsgN(fd, p, oob, to, flags)
  528. })
  529. }
  530. // Sendto wraps sendto(2).
  531. func (c *Conn) Sendto(ctx context.Context, p []byte, flags int, to unix.Sockaddr) error {
  532. return c.write(ctx, "sendto", func(fd int) error {
  533. return unix.Sendto(fd, p, flags, to)
  534. })
  535. }
  536. // SetsockoptICMPv6Filter wraps setsockopt(2) for *unix.ICMPv6Filter values.
  537. func (c *Conn) SetsockoptICMPv6Filter(level, opt int, filter *unix.ICMPv6Filter) error {
  538. return c.control("setsockopt", func(fd int) error {
  539. return unix.SetsockoptICMPv6Filter(fd, level, opt, filter)
  540. })
  541. }
  542. // SetsockoptInt wraps setsockopt(2) for integer values.
  543. func (c *Conn) SetsockoptInt(level, opt, value int) error {
  544. return c.control("setsockopt", func(fd int) error {
  545. return unix.SetsockoptInt(fd, level, opt, value)
  546. })
  547. }
  548. // SetsockoptString wraps setsockopt(2) for string values.
  549. func (c *Conn) SetsockoptString(level, opt int, value string) error {
  550. return c.control("setsockopt", func(fd int) error {
  551. return unix.SetsockoptString(fd, level, opt, value)
  552. })
  553. }
  554. // Shutdown wraps shutdown(2).
  555. func (c *Conn) Shutdown(how int) error {
  556. return c.control("shutdown", func(fd int) error { return unix.Shutdown(fd, how) })
  557. }
  558. // Conn low-level read/write/control functions. These functions mirror the
  559. // syscall.RawConn APIs but the input closures return errors rather than
  560. // booleans.
  561. // read wraps readT to execute a function and capture its error result. This is
  562. // a convenience wrapper for functions which don't return any extra values.
  563. func (c *Conn) read(ctx context.Context, op string, f func(fd int) error) error {
  564. _, err := readT(c, ctx, op, func(fd int) (struct{}, error) {
  565. return struct{}{}, f(fd)
  566. })
  567. return err
  568. }
  569. // write executes f, a write function, against the associated file descriptor.
  570. // op is used to create an *os.SyscallError if the file descriptor is closed.
  571. func (c *Conn) write(ctx context.Context, op string, f func(fd int) error) error {
  572. _, err := writeT(c, ctx, op, func(fd int) (struct{}, error) {
  573. return struct{}{}, f(fd)
  574. })
  575. return err
  576. }
  577. // readT executes c.rc.Read for op using the input function, returning a newly
  578. // allocated result T.
  579. func readT[T any](c *Conn, ctx context.Context, op string, f func(fd int) (T, error)) (T, error) {
  580. return rwT(c, rwContext[T]{
  581. Context: ctx,
  582. Type: read,
  583. Op: op,
  584. Do: f,
  585. })
  586. }
  587. // writeT executes c.rc.Write for op using the input function, returning a newly
  588. // allocated result T.
  589. func writeT[T any](c *Conn, ctx context.Context, op string, f func(fd int) (T, error)) (T, error) {
  590. return rwT(c, rwContext[T]{
  591. Context: ctx,
  592. Type: write,
  593. Op: op,
  594. Do: f,
  595. })
  596. }
  597. // readWrite indicates if an operation intends to read or write.
  598. type readWrite bool
  599. // Possible readWrite values.
  600. const (
  601. read readWrite = false
  602. write readWrite = true
  603. )
  604. // An rwContext provides arguments to rwT.
  605. type rwContext[T any] struct {
  606. // The caller's context passed for cancelation.
  607. Context context.Context
  608. // The type of an operation: read or write.
  609. Type readWrite
  610. // The name of the operation used in errors.
  611. Op string
  612. // The actual function to perform.
  613. Do func(fd int) (T, error)
  614. }
  615. // rwT executes c.rc.Read or c.rc.Write (depending on the value of rw.Type) for
  616. // rw.Op using the input function, returning a newly allocated result T.
  617. //
  618. // It obeys context cancelation and the rw.Context must not be nil.
  619. func rwT[T any](c *Conn, rw rwContext[T]) (T, error) {
  620. if atomic.LoadUint32(&c.closed) != 0 {
  621. // If the file descriptor is already closed, do nothing.
  622. return *new(T), os.NewSyscallError(rw.Op, unix.EBADF)
  623. }
  624. if err := rw.Context.Err(); err != nil {
  625. // Early exit due to context cancel.
  626. return *new(T), os.NewSyscallError(rw.Op, err)
  627. }
  628. var (
  629. // The read or write function used to access the runtime network poller.
  630. poll func(func(uintptr) bool) error
  631. // The read or write function used to set the matching deadline.
  632. deadline func(time.Time) error
  633. )
  634. if rw.Type == write {
  635. poll = c.rc.Write
  636. deadline = c.SetWriteDeadline
  637. } else {
  638. poll = c.rc.Read
  639. deadline = c.SetReadDeadline
  640. }
  641. var (
  642. // Whether or not the context carried a deadline we are actively using
  643. // for cancelation.
  644. setDeadline bool
  645. // Signals for the cancelation watcher goroutine.
  646. wg sync.WaitGroup
  647. doneC = make(chan struct{})
  648. // Atomic: reports whether we have to disarm the deadline.
  649. needDisarm atomic.Bool
  650. )
  651. // On cancel, clean up the watcher.
  652. defer func() {
  653. close(doneC)
  654. wg.Wait()
  655. }()
  656. if d, ok := rw.Context.Deadline(); ok {
  657. // The context has an explicit deadline. We will use it for cancelation
  658. // but disarm it after poll for the next call.
  659. if err := deadline(d); err != nil {
  660. return *new(T), err
  661. }
  662. setDeadline = true
  663. needDisarm.Store(true)
  664. } else {
  665. // The context does not have an explicit deadline. We have to watch for
  666. // cancelation so we can propagate that signal to immediately unblock
  667. // the runtime network poller.
  668. //
  669. // TODO(mdlayher): is it possible to detect a background context vs a
  670. // context with possible future cancel?
  671. wg.Add(1)
  672. go func() {
  673. defer wg.Done()
  674. select {
  675. case <-rw.Context.Done():
  676. // Cancel the operation. Make the caller disarm after poll
  677. // returns.
  678. needDisarm.Store(true)
  679. _ = deadline(time.Unix(0, 1))
  680. case <-doneC:
  681. // Nothing to do.
  682. }
  683. }()
  684. }
  685. var (
  686. t T
  687. err error
  688. )
  689. pollErr := poll(func(fd uintptr) bool {
  690. t, err = rw.Do(int(fd))
  691. return ready(err)
  692. })
  693. if needDisarm.Load() {
  694. _ = deadline(time.Time{})
  695. }
  696. if pollErr != nil {
  697. if rw.Context.Err() != nil || (setDeadline && errors.Is(pollErr, os.ErrDeadlineExceeded)) {
  698. // The caller canceled the operation or we set a deadline internally
  699. // and it was reached.
  700. //
  701. // Unpack a plain context error. We wait for the context to be done
  702. // to synchronize state externally. Otherwise we have noticed I/O
  703. // timeout wakeups when we set a deadline but the context was not
  704. // yet marked done.
  705. <-rw.Context.Done()
  706. return *new(T), os.NewSyscallError(rw.Op, rw.Context.Err())
  707. }
  708. // Error from syscall.RawConn methods. Conventionally the standard
  709. // library does not wrap internal/poll errors in os.NewSyscallError.
  710. return *new(T), pollErr
  711. }
  712. // Result from user function.
  713. return t, os.NewSyscallError(rw.Op, err)
  714. }
  715. // control executes Conn.control for op using the input function.
  716. func (c *Conn) control(op string, f func(fd int) error) error {
  717. _, err := controlT(c, op, func(fd int) (struct{}, error) {
  718. return struct{}{}, f(fd)
  719. })
  720. return err
  721. }
  722. // controlT executes c.rc.Control for op using the input function, returning a
  723. // newly allocated result T.
  724. func controlT[T any](c *Conn, op string, f func(fd int) (T, error)) (T, error) {
  725. if atomic.LoadUint32(&c.closed) != 0 {
  726. // If the file descriptor is already closed, do nothing.
  727. return *new(T), os.NewSyscallError(op, unix.EBADF)
  728. }
  729. var (
  730. t T
  731. err error
  732. )
  733. doErr := c.rc.Control(func(fd uintptr) {
  734. // Repeatedly attempt the syscall(s) invoked by f until completion is
  735. // indicated by the return value of ready or the context is canceled.
  736. //
  737. // The last values for t and err are captured outside of the closure for
  738. // use when the loop breaks.
  739. for {
  740. t, err = f(int(fd))
  741. if ready(err) {
  742. return
  743. }
  744. }
  745. })
  746. if doErr != nil {
  747. // Error from syscall.RawConn methods. Conventionally the standard
  748. // library does not wrap internal/poll errors in os.NewSyscallError.
  749. return *new(T), doErr
  750. }
  751. // Result from user function.
  752. return t, os.NewSyscallError(op, err)
  753. }
  754. // ready indicates readiness based on the value of err.
  755. func ready(err error) bool {
  756. switch err {
  757. case unix.EAGAIN, unix.EINPROGRESS, unix.EINTR:
  758. // When a socket is in non-blocking mode, we might see a variety of errors:
  759. // - EAGAIN: most common case for a socket read not being ready
  760. // - EINPROGRESS: reported by some sockets when first calling connect
  761. // - EINTR: system call interrupted, more frequently occurs in Go 1.14+
  762. // because goroutines can be asynchronously preempted
  763. //
  764. // Return false to let the poller wait for readiness. See the source code
  765. // for internal/poll.FD.RawRead for more details.
  766. return false
  767. default:
  768. // Ready regardless of whether there was an error or no error.
  769. return true
  770. }
  771. }
  772. // Darwin and FreeBSD can't read or write 2GB+ files at a time,
  773. // even on 64-bit systems.
  774. // The same is true of socket implementations on many systems.
  775. // See golang.org/issue/7812 and golang.org/issue/16266.
  776. // Use 1GB instead of, say, 2GB-1, to keep subsequent reads aligned.
  777. const maxRW = 1 << 30