conn.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459
  1. package socket
  2. import (
  3. "os"
  4. "sync/atomic"
  5. "syscall"
  6. "time"
  7. "golang.org/x/sys/unix"
  8. )
  9. // A Conn is a low-level network connection which integrates with Go's runtime
  10. // network poller to provide asynchronous I/O and deadline support.
  11. type Conn struct {
  12. // Indicates whether or not Conn.Close has been called. Must be accessed
  13. // atomically. Atomics definitions must come first in the Conn struct.
  14. closed uint32
  15. // A unique name for the Conn which is also associated with derived file
  16. // descriptors such as those created by accept(2).
  17. name string
  18. // Provides access to the underlying file registered with the runtime
  19. // network poller, and arbitrary raw I/O calls.
  20. fd *os.File
  21. rc syscall.RawConn
  22. }
  23. // High-level methods which provide convenience over raw system calls.
  24. // Close closes the underlying file descriptor for the Conn, which also causes
  25. // all in-flight I/O operations to immediately unblock and return errors. Any
  26. // subsequent uses of Conn will result in EBADF.
  27. func (c *Conn) Close() error {
  28. // The caller has expressed an intent to close the socket, so immediately
  29. // increment s.closed to force further calls to result in EBADF before also
  30. // closing the file descriptor to unblock any outstanding operations.
  31. //
  32. // Because other operations simply check for s.closed != 0, we will permit
  33. // double Close, which would increment s.closed beyond 1.
  34. if atomic.AddUint32(&c.closed, 1) != 1 {
  35. // Multiple Close calls.
  36. return nil
  37. }
  38. return os.NewSyscallError("close", c.fd.Close())
  39. }
  40. // Read implements io.Reader by reading directly from the underlying file
  41. // descriptor.
  42. func (c *Conn) Read(b []byte) (int, error) { return c.fd.Read(b) }
  43. // Write implements io.Writer by writing directly to the underlying file
  44. // descriptor.
  45. func (c *Conn) Write(b []byte) (int, error) { return c.fd.Write(b) }
  46. // SetDeadline sets both the read and write deadlines associated with the Conn.
  47. func (c *Conn) SetDeadline(t time.Time) error { return c.fd.SetDeadline(t) }
  48. // SetReadDeadline sets the read deadline associated with the Conn.
  49. func (c *Conn) SetReadDeadline(t time.Time) error { return c.fd.SetReadDeadline(t) }
  50. // SetWriteDeadline sets the write deadline associated with the Conn.
  51. func (c *Conn) SetWriteDeadline(t time.Time) error { return c.fd.SetWriteDeadline(t) }
  52. // ReadBuffer gets the size of the operating system's receive buffer associated
  53. // with the Conn.
  54. func (c *Conn) ReadBuffer() (int, error) {
  55. return c.GetsockoptInt(unix.SOL_SOCKET, unix.SO_RCVBUF)
  56. }
  57. // WriteBuffer gets the size of the operating system's transmit buffer
  58. // associated with the Conn.
  59. func (c *Conn) WriteBuffer() (int, error) {
  60. return c.GetsockoptInt(unix.SOL_SOCKET, unix.SO_SNDBUF)
  61. }
  62. // SetReadBuffer sets the size of the operating system's receive buffer
  63. // associated with the Conn.
  64. //
  65. // When called with elevated privileges on Linux, the SO_RCVBUFFORCE option will
  66. // be used to override operating system limits. Otherwise SO_RCVBUF is used
  67. // (which obeys operating system limits).
  68. func (c *Conn) SetReadBuffer(bytes int) error { return c.setReadBuffer(bytes) }
  69. // SetWriteBuffer sets the size of the operating system's transmit buffer
  70. // associated with the Conn.
  71. //
  72. // When called with elevated privileges on Linux, the SO_SNDBUFFORCE option will
  73. // be used to override operating system limits. Otherwise SO_SNDBUF is used
  74. // (which obeys operating system limits).
  75. func (c *Conn) SetWriteBuffer(bytes int) error { return c.setWriteBuffer(bytes) }
  76. // SyscallConn returns a raw network connection. This implements the
  77. // syscall.Conn interface.
  78. //
  79. // SyscallConn is intended for advanced use cases, such as getting and setting
  80. // arbitrary socket options using the socket's file descriptor. If possible,
  81. // those operations should be performed using methods on Conn instead.
  82. //
  83. // Once invoked, it is the caller's responsibility to ensure that operations
  84. // performed using Conn and the syscall.RawConn do not conflict with each other.
  85. func (c *Conn) SyscallConn() (syscall.RawConn, error) {
  86. if atomic.LoadUint32(&c.closed) != 0 {
  87. return nil, os.NewSyscallError("syscallconn", unix.EBADF)
  88. }
  89. // TODO(mdlayher): mutex or similar to enforce syscall.RawConn contract of
  90. // FD remaining valid for duration of calls?
  91. return c.rc, nil
  92. }
  93. // Socket wraps the socket(2) system call to produce a Conn. domain, typ, and
  94. // proto are passed directly to socket(2), and name should be a unique name for
  95. // the socket type such as "netlink" or "vsock".
  96. //
  97. // If the operating system supports SOCK_CLOEXEC and SOCK_NONBLOCK, they are
  98. // automatically applied to typ to mirror the standard library's socket flag
  99. // behaviors.
  100. func Socket(domain, typ, proto int, name string) (*Conn, error) {
  101. var (
  102. fd int
  103. err error
  104. )
  105. for {
  106. fd, err = unix.Socket(domain, typ|socketFlags, proto)
  107. switch {
  108. case err == nil:
  109. // Some OSes already set CLOEXEC with typ.
  110. if !flagCLOEXEC {
  111. unix.CloseOnExec(fd)
  112. }
  113. // No error, prepare the Conn.
  114. return newConn(fd, name)
  115. case !ready(err):
  116. // System call interrupted or not ready, try again.
  117. continue
  118. case err == unix.EINVAL, err == unix.EPROTONOSUPPORT:
  119. // On Linux, SOCK_NONBLOCK and SOCK_CLOEXEC were introduced in
  120. // 2.6.27. On FreeBSD, both flags were introduced in FreeBSD 10.
  121. // EINVAL and EPROTONOSUPPORT check for earlier versions of these
  122. // OSes respectively.
  123. //
  124. // Mirror what the standard library does when creating file
  125. // descriptors: avoid racing a fork/exec with the creation of new
  126. // file descriptors, so that child processes do not inherit socket
  127. // file descriptors unexpectedly.
  128. //
  129. // For a more thorough explanation, see similar work in the Go tree:
  130. // func sysSocket in net/sock_cloexec.go, as well as the detailed
  131. // comment in syscall/exec_unix.go.
  132. syscall.ForkLock.RLock()
  133. fd, err = unix.Socket(domain, typ, proto)
  134. if err == nil {
  135. unix.CloseOnExec(fd)
  136. }
  137. syscall.ForkLock.RUnlock()
  138. return newConn(fd, name)
  139. default:
  140. // Unhandled error.
  141. return nil, os.NewSyscallError("socket", err)
  142. }
  143. }
  144. }
  145. // TODO(mdlayher): consider exporting newConn as New?
  146. // newConn wraps an existing file descriptor to create a Conn. name should be a
  147. // unique name for the socket type such as "netlink" or "vsock".
  148. func newConn(fd int, name string) (*Conn, error) {
  149. // All Conn I/O is nonblocking for integration with Go's runtime network
  150. // poller. Depending on the OS this might already be set but it can't hurt
  151. // to set it again.
  152. if err := unix.SetNonblock(fd, true); err != nil {
  153. return nil, os.NewSyscallError("setnonblock", err)
  154. }
  155. // os.NewFile registers the non-blocking file descriptor with the runtime
  156. // poller, which is then used for most subsequent operations except those
  157. // that require raw I/O via SyscallConn.
  158. //
  159. // See also: https://golang.org/pkg/os/#NewFile
  160. f := os.NewFile(uintptr(fd), name)
  161. rc, err := f.SyscallConn()
  162. if err != nil {
  163. return nil, err
  164. }
  165. return &Conn{
  166. name: name,
  167. fd: f,
  168. rc: rc,
  169. }, nil
  170. }
  171. // Low-level methods which provide raw system call access.
  172. // Accept wraps accept(2) or accept4(2) depending on the operating system, but
  173. // returns a Conn for the accepted connection rather than a raw file descriptor.
  174. //
  175. // If the operating system supports accept4(2) (which allows flags),
  176. // SOCK_CLOEXEC and SOCK_NONBLOCK are automatically applied to flags to mirror
  177. // the standard library's socket flag behaviors.
  178. //
  179. // If the operating system only supports accept(2) (which does not allow flags)
  180. // and flags is not zero, an error will be returned.
  181. func (c *Conn) Accept(flags int) (*Conn, unix.Sockaddr, error) {
  182. var (
  183. nfd int
  184. sa unix.Sockaddr
  185. err error
  186. )
  187. doErr := c.read(sysAccept, func(fd int) error {
  188. // Either accept(2) or accept4(2) depending on the OS.
  189. nfd, sa, err = accept(fd, flags|socketFlags)
  190. return err
  191. })
  192. if doErr != nil {
  193. return nil, nil, doErr
  194. }
  195. if err != nil {
  196. // sysAccept is either "accept" or "accept4" depending on the OS.
  197. return nil, nil, os.NewSyscallError(sysAccept, err)
  198. }
  199. // Successfully accepted a connection, wrap it in a Conn for use by the
  200. // caller.
  201. ac, err := newConn(nfd, c.name)
  202. if err != nil {
  203. return nil, nil, err
  204. }
  205. return ac, sa, nil
  206. }
  207. // Bind wraps bind(2).
  208. func (c *Conn) Bind(sa unix.Sockaddr) error {
  209. const op = "bind"
  210. var err error
  211. doErr := c.control(op, func(fd int) error {
  212. err = unix.Bind(fd, sa)
  213. return err
  214. })
  215. if doErr != nil {
  216. return doErr
  217. }
  218. return os.NewSyscallError(op, err)
  219. }
  220. // Connect wraps connect(2).
  221. func (c *Conn) Connect(sa unix.Sockaddr) error {
  222. const op = "connect"
  223. var err error
  224. doErr := c.write(op, func(fd int) error {
  225. err = unix.Connect(fd, sa)
  226. return err
  227. })
  228. if doErr != nil {
  229. return doErr
  230. }
  231. if err == unix.EISCONN {
  232. // Darwin reports EISCONN if already connected, but the socket is
  233. // established and we don't need to report an error.
  234. return nil
  235. }
  236. return os.NewSyscallError(op, err)
  237. }
  238. // Getsockname wraps getsockname(2).
  239. func (c *Conn) Getsockname() (unix.Sockaddr, error) {
  240. const op = "getsockname"
  241. var (
  242. sa unix.Sockaddr
  243. err error
  244. )
  245. doErr := c.control(op, func(fd int) error {
  246. sa, err = unix.Getsockname(fd)
  247. return err
  248. })
  249. if doErr != nil {
  250. return nil, doErr
  251. }
  252. return sa, os.NewSyscallError(op, err)
  253. }
  254. // GetsockoptInt wraps getsockopt(2) for integer values.
  255. func (c *Conn) GetsockoptInt(level, opt int) (int, error) {
  256. const op = "getsockopt"
  257. var (
  258. value int
  259. err error
  260. )
  261. doErr := c.control(op, func(fd int) error {
  262. value, err = unix.GetsockoptInt(fd, level, opt)
  263. return err
  264. })
  265. if doErr != nil {
  266. return 0, doErr
  267. }
  268. return value, os.NewSyscallError(op, err)
  269. }
  270. // Listen wraps listen(2).
  271. func (c *Conn) Listen(n int) error {
  272. const op = "listen"
  273. var err error
  274. doErr := c.control(op, func(fd int) error {
  275. err = unix.Listen(fd, n)
  276. return err
  277. })
  278. if doErr != nil {
  279. return doErr
  280. }
  281. return os.NewSyscallError(op, err)
  282. }
  283. // Recvmsg wraps recvmsg(2).
  284. func (c *Conn) Recvmsg(p, oob []byte, flags int) (int, int, int, unix.Sockaddr, error) {
  285. const op = "recvmsg"
  286. var (
  287. n, oobn, recvflags int
  288. from unix.Sockaddr
  289. err error
  290. )
  291. doErr := c.read(op, func(fd int) error {
  292. n, oobn, recvflags, from, err = unix.Recvmsg(fd, p, oob, flags)
  293. return err
  294. })
  295. if doErr != nil {
  296. return 0, 0, 0, nil, doErr
  297. }
  298. return n, oobn, recvflags, from, os.NewSyscallError(op, err)
  299. }
  300. // Sendmsg wraps sendmsg(2).
  301. func (c *Conn) Sendmsg(p, oob []byte, to unix.Sockaddr, flags int) error {
  302. const op = "sendmsg"
  303. var err error
  304. doErr := c.write(op, func(fd int) error {
  305. err = unix.Sendmsg(fd, p, oob, to, flags)
  306. return err
  307. })
  308. if doErr != nil {
  309. return doErr
  310. }
  311. return os.NewSyscallError(op, err)
  312. }
  313. // SetsockoptInt wraps setsockopt(2) for integer values.
  314. func (c *Conn) SetsockoptInt(level, opt, value int) error {
  315. const op = "setsockopt"
  316. var err error
  317. doErr := c.control(op, func(fd int) error {
  318. err = unix.SetsockoptInt(fd, level, opt, value)
  319. return err
  320. })
  321. if doErr != nil {
  322. return doErr
  323. }
  324. return os.NewSyscallError(op, err)
  325. }
  326. // Conn low-level read/write/control functions. These functions mirror the
  327. // syscall.RawConn APIs but the input closures return errors rather than
  328. // booleans. Any syscalls invoked within f should return their error to allow
  329. // the Conn to check for readiness with the runtime network poller, or to retry
  330. // operations which may have been interrupted by EINTR or similar.
  331. //
  332. // Note that errors from the input closure functions are not propagated to the
  333. // error return values of read/write/control, and the caller is still
  334. // responsible for error handling.
  335. // read executes f, a read function, against the associated file descriptor.
  336. // op is used to create an *os.SyscallError if the file descriptor is closed.
  337. func (c *Conn) read(op string, f func(fd int) error) error {
  338. if atomic.LoadUint32(&c.closed) != 0 {
  339. return os.NewSyscallError(op, unix.EBADF)
  340. }
  341. return c.rc.Read(func(fd uintptr) bool {
  342. return ready(f(int(fd)))
  343. })
  344. }
  345. // write executes f, a write function, against the associated file descriptor.
  346. // op is used to create an *os.SyscallError if the file descriptor is closed.
  347. func (c *Conn) write(op string, f func(fd int) error) error {
  348. if atomic.LoadUint32(&c.closed) != 0 {
  349. return os.NewSyscallError(op, unix.EBADF)
  350. }
  351. return c.rc.Write(func(fd uintptr) bool {
  352. return ready(f(int(fd)))
  353. })
  354. }
  355. // control executes f, a control function, against the associated file
  356. // descriptor. op is used to create an *os.SyscallError if the file descriptor
  357. // is closed.
  358. func (c *Conn) control(op string, f func(fd int) error) error {
  359. if atomic.LoadUint32(&c.closed) != 0 {
  360. return os.NewSyscallError(op, unix.EBADF)
  361. }
  362. return c.rc.Control(func(fd uintptr) {
  363. // Repeatedly attempt the syscall(s) invoked by f until completion is
  364. // indicated by the return value of ready.
  365. for {
  366. if ready(f(int(fd))) {
  367. return
  368. }
  369. }
  370. })
  371. }
  372. // ready indicates readiness based on the value of err.
  373. func ready(err error) bool {
  374. // When a socket is in non-blocking mode, we might see EAGAIN or
  375. // EINPROGRESS. In that case, return false to let the poller wait for
  376. // readiness. See the source code for internal/poll.FD.RawRead for more
  377. // details.
  378. //
  379. // Starting in Go 1.14, goroutines are asynchronously preemptible. The 1.14
  380. // release notes indicate that applications should expect to see EINTR more
  381. // often on slow system calls (like recvmsg while waiting for input), so we
  382. // must handle that case as well.
  383. switch err {
  384. case unix.EAGAIN, unix.EINTR, unix.EINPROGRESS:
  385. // Not ready.
  386. return false
  387. default:
  388. // Ready regardless of whether there was an error or no error.
  389. return true
  390. }
  391. }