|
|
@@ -43,9 +43,8 @@ func IsSupported() bool {
|
|
|
}
|
|
|
|
|
|
const (
|
|
|
- netlinkSocketIOTimeout = 10 * time.Millisecond
|
|
|
- defaultSocketMark = 0x70736970 // "PSIP"
|
|
|
- appliedSpecCacheTTL = 1 * time.Minute
|
|
|
+ defaultSocketMark = 0x70736970 // "PSIP"
|
|
|
+ appliedSpecCacheTTL = 1 * time.Minute
|
|
|
)
|
|
|
|
|
|
// Manipulator is a SYN-ACK packet manipulator.
|
|
|
@@ -79,6 +78,9 @@ const (
|
|
|
//
|
|
|
// NFQUEUE with queue-bypass requires Linux kernel 2.6.39; 3.16 or later is
|
|
|
// validated and recommended.
|
|
|
+//
|
|
|
+// Due to use of NFQUEUE, larger than max socket buffer sizes, and raw
|
|
|
+// sockets, Manipulator requires CAP_NET_ADMIN and CAP_NET_RAW.
|
|
|
type Manipulator struct {
|
|
|
config *Config
|
|
|
mutex sync.Mutex
|
|
|
@@ -195,14 +197,19 @@ func (m *Manipulator) Start() (retErr error) {
|
|
|
// payload, this size should be more than sufficient.
|
|
|
maxPacketLen := uint32(1500)
|
|
|
|
|
|
- // Use the kernel default of 1024:
|
|
|
+ // The kernel default is 1024:
|
|
|
// https://github.com/torvalds/linux/blob/cd8dead0c39457e58ec1d36db93aedca811d48f1/net/netfilter/nfnetlink_queue.c#L51,
|
|
|
// via https://github.com/florianl/go-nfqueue/issues/3.
|
|
|
- maxQueueLen := uint32(1024)
|
|
|
+ // We use a larger queue size to accomodate more concurrent SYN-ACK packets.
|
|
|
+ maxQueueLen := uint32(2048)
|
|
|
|
|
|
- // Note: runContext alone is not sufficient to interrupt the
|
|
|
- // nfqueue.socketCallback goroutine spawned by nfqueue.Register; timeouts
|
|
|
- // must be set. See comment in Manipulator.Stop.
|
|
|
+ // Timeout note: on a small subset of production servers, we have found that
|
|
|
+ // setting a non-zero read timeout results in occasional "orphaned" packets
|
|
|
+ // which remain in the queue but are not delivered to handleInterceptedPacket
|
|
|
+ // for a verdict. This phenomenon leads to a stall in nfqueue processing once
|
|
|
+ // the queue fills up with packers apparently awaiting a verdict. The shorter
|
|
|
+ // the timeout, the faster that orphaned packets accumulate. With no timeout,
|
|
|
+ // and reads in blocking mode, this phenomenon does not occur.
|
|
|
|
|
|
m.nfqueue, err = nfqueue.Open(
|
|
|
&nfqueue.Config{
|
|
|
@@ -211,8 +218,8 @@ func (m *Manipulator) Start() (retErr error) {
|
|
|
MaxQueueLen: maxQueueLen,
|
|
|
Copymode: nfqueue.NfQnlCopyPacket,
|
|
|
Logger: newNfqueueLogger(m.config.Logger),
|
|
|
- ReadTimeout: netlinkSocketIOTimeout,
|
|
|
- WriteTimeout: netlinkSocketIOTimeout,
|
|
|
+ ReadTimeout: 0,
|
|
|
+ WriteTimeout: 0,
|
|
|
})
|
|
|
if err != nil {
|
|
|
return errors.Trace(err)
|
|
|
@@ -223,6 +230,16 @@ func (m *Manipulator) Start() (retErr error) {
|
|
|
}
|
|
|
}()
|
|
|
|
|
|
+ // Set a netlink socket receive buffer size that is significantly larger than
|
|
|
+ // the typical default of 212992. This avoids ENOBUFS in the case of many
|
|
|
+ // netlink messages from the kernel (capped by the max queue size). Note that
|
|
|
+ // the CAP_NET_ADMIN may be required when this exceeds the configured max
|
|
|
+ // buffer size.
|
|
|
+ err = m.nfqueue.Con.SetReadBuffer(1703936)
|
|
|
+ if err != nil {
|
|
|
+ return errors.Trace(err)
|
|
|
+ }
|
|
|
+
|
|
|
runContext, stopRunning := context.WithCancel(context.Background())
|
|
|
defer func() {
|
|
|
if retErr != nil {
|
|
|
@@ -252,23 +269,13 @@ func (m *Manipulator) Stop() {
|
|
|
return
|
|
|
}
|
|
|
|
|
|
- m.stopRunning()
|
|
|
+ // Call stopRunning before interrupting the blocked read; this ensures that
|
|
|
+ // the nfqueue socketCallback loop will exit after the read is interrupted.
|
|
|
|
|
|
- // stopRunning will cancel the context passed into nfqueue.Register. The
|
|
|
- // goroutine spawned by Register, nfqueue.socketCallback, polls the context
|
|
|
- // after a read timeout:
|
|
|
- // https://github.com/florianl/go-nfqueue/blob/1e38df738c06deffbac08da8fec4b7c28a69b918/nfqueue_gteq_1.12.go#L138-L146
|
|
|
- //
|
|
|
- // There's no stop synchronization exposed by nfqueue. Calling nfqueue.Close
|
|
|
- // while socketCallback is still running can result in errors such as
|
|
|
- // "nfqueuenfqueue_gteq_1.12.go:134: Could not unbind from queue: netlink
|
|
|
- // send: sendmsg: bad file descriptor".
|
|
|
- //
|
|
|
- // To avoid invalid file descriptor operations and spurious error messages,
|
|
|
- // sleep for two polling periods, which should be sufficient, in most cases,
|
|
|
- // for socketCallback to poll the context and exit.
|
|
|
+ m.stopRunning()
|
|
|
|
|
|
- time.Sleep(2 * netlinkSocketIOTimeout)
|
|
|
+ // Interrupt a blocked read.
|
|
|
+ m.nfqueue.Con.SetDeadline(time.Unix(0, 1))
|
|
|
|
|
|
m.nfqueue.Close()
|
|
|
|