Patches contributed by University of Waterloo


commit 8a6de2627fd37b76c6e8e77fa6c0fe82888e3fc3
Author: Martin Karsten <mkarsten@uwaterloo.ca>
Date:   Sat Nov 9 05:02:34 2024 +0000

    eventpoll: Control irq suspension for prefer_busy_poll
    
    When events are reported to userland and prefer_busy_poll is set, irqs
    are temporarily suspended using napi_suspend_irqs.
    
    If no events are found and ep_poll would go to sleep, irq suspension is
    cancelled using napi_resume_irqs.
    
    Signed-off-by: Martin Karsten <mkarsten@uwaterloo.ca>
    Co-developed-by: Joe Damato <jdamato@fastly.com>
    Signed-off-by: Joe Damato <jdamato@fastly.com>
    Tested-by: Joe Damato <jdamato@fastly.com>
    Tested-by: Martin Karsten <mkarsten@uwaterloo.ca>
    Acked-by: Stanislav Fomichev <sdf@fomichev.me>
    Reviewed-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
    Link: https://patch.msgid.link/20241109050245.191288-5-jdamato@fastly.com
    Signed-off-by: Jakub Kicinski <kuba@kernel.org>

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index f9e0d9307dad..83bcb559b89f 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -457,6 +457,8 @@ static bool ep_busy_loop(struct eventpoll *ep, int nonblock)
 		 * it back in when we have moved a socket with a valid NAPI
 		 * ID onto the ready list.
 		 */
+		if (prefer_busy_poll)
+			napi_resume_irqs(napi_id);
 		ep->napi_id = 0;
 		return false;
 	}
@@ -540,6 +542,22 @@ static long ep_eventpoll_bp_ioctl(struct file *file, unsigned int cmd,
 	}
 }
 
+static void ep_suspend_napi_irqs(struct eventpoll *ep)
+{
+	unsigned int napi_id = READ_ONCE(ep->napi_id);
+
+	if (napi_id >= MIN_NAPI_ID && READ_ONCE(ep->prefer_busy_poll))
+		napi_suspend_irqs(napi_id);
+}
+
+static void ep_resume_napi_irqs(struct eventpoll *ep)
+{
+	unsigned int napi_id = READ_ONCE(ep->napi_id);
+
+	if (napi_id >= MIN_NAPI_ID && READ_ONCE(ep->prefer_busy_poll))
+		napi_resume_irqs(napi_id);
+}
+
 #else
 
 static inline bool ep_busy_loop(struct eventpoll *ep, int nonblock)
@@ -557,6 +575,14 @@ static long ep_eventpoll_bp_ioctl(struct file *file, unsigned int cmd,
 	return -EOPNOTSUPP;
 }
 
+static void ep_suspend_napi_irqs(struct eventpoll *ep)
+{
+}
+
+static void ep_resume_napi_irqs(struct eventpoll *ep)
+{
+}
+
 #endif /* CONFIG_NET_RX_BUSY_POLL */
 
 /*
@@ -788,6 +814,7 @@ static bool ep_refcount_dec_and_test(struct eventpoll *ep)
 
 static void ep_free(struct eventpoll *ep)
 {
+	ep_resume_napi_irqs(ep);
 	mutex_destroy(&ep->mtx);
 	free_uid(ep->user);
 	wakeup_source_unregister(ep->ws);
@@ -2005,8 +2032,11 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
 			 * trying again in search of more luck.
 			 */
 			res = ep_send_events(ep, events, maxevents);
-			if (res)
+			if (res) {
+				if (res > 0)
+					ep_suspend_napi_irqs(ep);
 				return res;
+			}
 		}
 
 		if (timed_out)

commit ab5b28b007a7ab3edeb0a5e1d04669945ddb1d37
Author: Martin Karsten <mkarsten@uwaterloo.ca>
Date:   Sat Nov 9 05:02:33 2024 +0000

    eventpoll: Trigger napi_busy_loop, if prefer_busy_poll is set
    
    Setting prefer_busy_poll now leads to an effectively nonblocking
    iteration though napi_busy_loop, even when busy_poll_usecs is 0.
    
    Signed-off-by: Martin Karsten <mkarsten@uwaterloo.ca>
    Co-developed-by: Joe Damato <jdamato@fastly.com>
    Signed-off-by: Joe Damato <jdamato@fastly.com>
    Tested-by: Joe Damato <jdamato@fastly.com>
    Tested-by: Martin Karsten <mkarsten@uwaterloo.ca>
    Acked-by: Stanislav Fomichev <sdf@fomichev.me>
    Reviewed-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
    Link: https://patch.msgid.link/20241109050245.191288-4-jdamato@fastly.com
    Signed-off-by: Jakub Kicinski <kuba@kernel.org>

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 1ae4542f0bd8..f9e0d9307dad 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -420,7 +420,9 @@ static bool busy_loop_ep_timeout(unsigned long start_time,
 
 static bool ep_busy_loop_on(struct eventpoll *ep)
 {
-	return !!READ_ONCE(ep->busy_poll_usecs) || net_busy_loop_on();
+	return !!READ_ONCE(ep->busy_poll_usecs) ||
+	       READ_ONCE(ep->prefer_busy_poll) ||
+	       net_busy_loop_on();
 }
 
 static bool ep_busy_loop_end(void *p, unsigned long start_time)

commit 3fcbecbdeb048dfd1bea824f4276717fed02d10e
Author: Martin Karsten <mkarsten@uwaterloo.ca>
Date:   Sat Nov 9 05:02:32 2024 +0000

    net: Add control functions for irq suspension
    
    The napi_suspend_irqs routine bootstraps irq suspension by elongating
    the defer timeout to irq_suspend_timeout.
    
    The napi_resume_irqs routine effectively cancels irq suspension by
    forcing the napi to be scheduled immediately.
    
    Signed-off-by: Martin Karsten <mkarsten@uwaterloo.ca>
    Co-developed-by: Joe Damato <jdamato@fastly.com>
    Signed-off-by: Joe Damato <jdamato@fastly.com>
    Tested-by: Joe Damato <jdamato@fastly.com>
    Tested-by: Martin Karsten <mkarsten@uwaterloo.ca>
    Acked-by: Stanislav Fomichev <sdf@fomichev.me>
    Reviewed-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
    Link: https://patch.msgid.link/20241109050245.191288-3-jdamato@fastly.com
    Signed-off-by: Jakub Kicinski <kuba@kernel.org>

diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index f03040baaefd..c858270141bc 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -52,6 +52,9 @@ void napi_busy_loop_rcu(unsigned int napi_id,
 			bool (*loop_end)(void *, unsigned long),
 			void *loop_end_arg, bool prefer_busy_poll, u16 budget);
 
+void napi_suspend_irqs(unsigned int napi_id);
+void napi_resume_irqs(unsigned int napi_id);
+
 #else /* CONFIG_NET_RX_BUSY_POLL */
 static inline unsigned long net_busy_loop_on(void)
 {
diff --git a/net/core/dev.c b/net/core/dev.c
index 4d910872963f..13d00fc10f55 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6507,6 +6507,43 @@ void napi_busy_loop(unsigned int napi_id,
 }
 EXPORT_SYMBOL(napi_busy_loop);
 
+void napi_suspend_irqs(unsigned int napi_id)
+{
+	struct napi_struct *napi;
+
+	rcu_read_lock();
+	napi = napi_by_id(napi_id);
+	if (napi) {
+		unsigned long timeout = napi_get_irq_suspend_timeout(napi);
+
+		if (timeout)
+			hrtimer_start(&napi->timer, ns_to_ktime(timeout),
+				      HRTIMER_MODE_REL_PINNED);
+	}
+	rcu_read_unlock();
+}
+
+void napi_resume_irqs(unsigned int napi_id)
+{
+	struct napi_struct *napi;
+
+	rcu_read_lock();
+	napi = napi_by_id(napi_id);
+	if (napi) {
+		/* If irq_suspend_timeout is set to 0 between the call to
+		 * napi_suspend_irqs and now, the original value still
+		 * determines the safety timeout as intended and napi_watchdog
+		 * will resume irq processing.
+		 */
+		if (napi_get_irq_suspend_timeout(napi)) {
+			local_bh_disable();
+			napi_schedule(napi);
+			local_bh_enable();
+		}
+	}
+	rcu_read_unlock();
+}
+
 #endif /* CONFIG_NET_RX_BUSY_POLL */
 
 static void __napi_hash_add_with_id(struct napi_struct *napi,

commit 5dc51ec86df6e2214d8398079c1e31736593ab53
Author: Martin Karsten <mkarsten@uwaterloo.ca>
Date:   Sat Nov 9 05:02:31 2024 +0000

    net: Add napi_struct parameter irq_suspend_timeout
    
    Add a per-NAPI IRQ suspension parameter, which can be get/set with
    netdev-genl.
    
    This patch doesn't change any behavior but prepares the code for other
    changes in the following commits which use irq_suspend_timeout as a
    timeout for IRQ suspension.
    
    Signed-off-by: Martin Karsten <mkarsten@uwaterloo.ca>
    Co-developed-by: Joe Damato <jdamato@fastly.com>
    Signed-off-by: Joe Damato <jdamato@fastly.com>
    Tested-by: Joe Damato <jdamato@fastly.com>
    Tested-by: Martin Karsten <mkarsten@uwaterloo.ca>
    Acked-by: Stanislav Fomichev <sdf@fomichev.me>
    Reviewed-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
    Link: https://patch.msgid.link/20241109050245.191288-2-jdamato@fastly.com
    Signed-off-by: Jakub Kicinski <kuba@kernel.org>

diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
index f9cb97d6106c..cbb544bd6c84 100644
--- a/Documentation/netlink/specs/netdev.yaml
+++ b/Documentation/netlink/specs/netdev.yaml
@@ -263,6 +263,11 @@ attribute-sets:
              the end of a NAPI cycle. This may add receive latency in exchange
              for reducing the number of frames processed by the network stack.
         type: uint
+      -
+        name: irq-suspend-timeout
+        doc: The timeout, in nanoseconds, of how long to suspend irq
+             processing, if event polling finds events
+        type: uint
   -
     name: queue
     attributes:
@@ -653,6 +658,7 @@ operations:
             - pid
             - defer-hard-irqs
             - gro-flush-timeout
+            - irq-suspend-timeout
       dump:
         request:
           attributes:
@@ -704,6 +710,7 @@ operations:
             - id
             - defer-hard-irqs
             - gro-flush-timeout
+            - irq-suspend-timeout
 
 kernel-family:
   headers: [ "linux/list.h"]
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index df4483598628..0aae346d919e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -348,6 +348,7 @@ struct gro_list {
  */
 struct napi_config {
 	u64 gro_flush_timeout;
+	u64 irq_suspend_timeout;
 	u32 defer_hard_irqs;
 	unsigned int napi_id;
 };
@@ -384,6 +385,7 @@ struct napi_struct {
 	struct hrtimer		timer;
 	struct task_struct	*thread;
 	unsigned long		gro_flush_timeout;
+	unsigned long		irq_suspend_timeout;
 	u32			defer_hard_irqs;
 	/* control-path-only fields follow */
 	struct list_head	dev_list;
diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h
index e3ebb49f60d2..e4be227d3ad6 100644
--- a/include/uapi/linux/netdev.h
+++ b/include/uapi/linux/netdev.h
@@ -124,6 +124,7 @@ enum {
 	NETDEV_A_NAPI_PID,
 	NETDEV_A_NAPI_DEFER_HARD_IRQS,
 	NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT,
+	NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT,
 
 	__NETDEV_A_NAPI_MAX,
 	NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1)
diff --git a/net/core/dev.c b/net/core/dev.c
index 6a31152e4606..4d910872963f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6666,6 +6666,7 @@ static void napi_restore_config(struct napi_struct *n)
 {
 	n->defer_hard_irqs = n->config->defer_hard_irqs;
 	n->gro_flush_timeout = n->config->gro_flush_timeout;
+	n->irq_suspend_timeout = n->config->irq_suspend_timeout;
 	/* a NAPI ID might be stored in the config, if so use it. if not, use
 	 * napi_hash_add to generate one for us. It will be saved to the config
 	 * in napi_disable.
@@ -6680,6 +6681,7 @@ static void napi_save_config(struct napi_struct *n)
 {
 	n->config->defer_hard_irqs = n->defer_hard_irqs;
 	n->config->gro_flush_timeout = n->gro_flush_timeout;
+	n->config->irq_suspend_timeout = n->irq_suspend_timeout;
 	n->config->napi_id = n->napi_id;
 	napi_hash_del(n);
 }
diff --git a/net/core/dev.h b/net/core/dev.h
index 7881bced70a9..d043dee25a68 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -236,6 +236,31 @@ static inline void netdev_set_gro_flush_timeout(struct net_device *netdev,
 		netdev->napi_config[i].gro_flush_timeout = timeout;
 }
 
+/**
+ * napi_get_irq_suspend_timeout - get the irq_suspend_timeout
+ * @n: napi struct to get the irq_suspend_timeout from
+ *
+ * Return: the per-NAPI value of the irq_suspend_timeout field.
+ */
+static inline unsigned long
+napi_get_irq_suspend_timeout(const struct napi_struct *n)
+{
+	return READ_ONCE(n->irq_suspend_timeout);
+}
+
+/**
+ * napi_set_irq_suspend_timeout - set the irq_suspend_timeout for a napi
+ * @n: napi struct to set the irq_suspend_timeout
+ * @timeout: timeout value to set
+ *
+ * napi_set_irq_suspend_timeout sets the per-NAPI irq_suspend_timeout
+ */
+static inline void napi_set_irq_suspend_timeout(struct napi_struct *n,
+						unsigned long timeout)
+{
+	WRITE_ONCE(n->irq_suspend_timeout, timeout);
+}
+
 int rps_cpumask_housekeeping(struct cpumask *mask);
 
 #if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL)
diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c
index 21de7e10be16..a89cbd8d87c3 100644
--- a/net/core/netdev-genl-gen.c
+++ b/net/core/netdev-genl-gen.c
@@ -92,10 +92,11 @@ static const struct nla_policy netdev_bind_rx_nl_policy[NETDEV_A_DMABUF_FD + 1]
 };
 
 /* NETDEV_CMD_NAPI_SET - do */
-static const struct nla_policy netdev_napi_set_nl_policy[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT + 1] = {
+static const struct nla_policy netdev_napi_set_nl_policy[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT + 1] = {
 	[NETDEV_A_NAPI_ID] = { .type = NLA_U32, },
 	[NETDEV_A_NAPI_DEFER_HARD_IRQS] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_napi_defer_hard_irqs_range),
 	[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT] = { .type = NLA_UINT, },
+	[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT] = { .type = NLA_UINT, },
 };
 
 /* Ops table for netdev */
@@ -186,7 +187,7 @@ static const struct genl_split_ops netdev_nl_ops[] = {
 		.cmd		= NETDEV_CMD_NAPI_SET,
 		.doit		= netdev_nl_napi_set_doit,
 		.policy		= netdev_napi_set_nl_policy,
-		.maxattr	= NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT,
+		.maxattr	= NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT,
 		.flags		= GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
 	},
 };
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index b49c3b4e5fbe..765ce7c9d73b 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -161,6 +161,7 @@ static int
 netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi,
 			const struct genl_info *info)
 {
+	unsigned long irq_suspend_timeout;
 	unsigned long gro_flush_timeout;
 	u32 napi_defer_hard_irqs;
 	void *hdr;
@@ -196,6 +197,11 @@ netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi,
 			napi_defer_hard_irqs))
 		goto nla_put_failure;
 
+	irq_suspend_timeout = napi_get_irq_suspend_timeout(napi);
+	if (nla_put_uint(rsp, NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT,
+			 irq_suspend_timeout))
+		goto nla_put_failure;
+
 	gro_flush_timeout = napi_get_gro_flush_timeout(napi);
 	if (nla_put_uint(rsp, NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT,
 			 gro_flush_timeout))
@@ -306,6 +312,7 @@ int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
 static int
 netdev_nl_napi_set_config(struct napi_struct *napi, struct genl_info *info)
 {
+	u64 irq_suspend_timeout = 0;
 	u64 gro_flush_timeout = 0;
 	u32 defer = 0;
 
@@ -314,6 +321,11 @@ netdev_nl_napi_set_config(struct napi_struct *napi, struct genl_info *info)
 		napi_set_defer_hard_irqs(napi, defer);
 	}
 
+	if (info->attrs[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT]) {
+		irq_suspend_timeout = nla_get_uint(info->attrs[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT]);
+		napi_set_irq_suspend_timeout(napi, irq_suspend_timeout);
+	}
+
 	if (info->attrs[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT]) {
 		gro_flush_timeout = nla_get_uint(info->attrs[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT]);
 		napi_set_gro_flush_timeout(napi, gro_flush_timeout);
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index e3ebb49f60d2..e4be227d3ad6 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -124,6 +124,7 @@ enum {
 	NETDEV_A_NAPI_PID,
 	NETDEV_A_NAPI_DEFER_HARD_IRQS,
 	NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT,
+	NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT,
 
 	__NETDEV_A_NAPI_MAX,
 	NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1)

commit b9ca079dd6b09e08863aa998edf5c47597806c05
Author: Martin Karsten <mkarsten@uwaterloo.ca>
Date:   Tue Aug 6 12:33:01 2024 +0000

    eventpoll: Annotate data-race of busy_poll_usecs
    
    A struct eventpoll's busy_poll_usecs field can be modified via a user
    ioctl at any time. All reads of this field should be annotated with
    READ_ONCE.
    
    Fixes: 85455c795c07 ("eventpoll: support busy poll per epoll instance")
    Cc: stable@vger.kernel.org
    Signed-off-by: Martin Karsten <mkarsten@uwaterloo.ca>
    Link: https://lore.kernel.org/r/20240806123301.167557-1-jdamato@fastly.com
    Reviewed-by: Joe Damato <jdamato@fastly.com>
    Signed-off-by: Christian Brauner <brauner@kernel.org>

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 6c0a1e9715ea..145f5349c612 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -420,7 +420,7 @@ static bool busy_loop_ep_timeout(unsigned long start_time,
 
 static bool ep_busy_loop_on(struct eventpoll *ep)
 {
-	return !!ep->busy_poll_usecs || net_busy_loop_on();
+	return !!READ_ONCE(ep->busy_poll_usecs) || net_busy_loop_on();
 }
 
 static bool ep_busy_loop_end(void *p, unsigned long start_time)

commit 5080f39e8c72e01cf37e8359023e7018e2a4901e
Author: Nik Unger <njunger@uwaterloo.ca>
Date:   Mon Mar 13 10:16:58 2017 -0700

    netem: apply correct delay when rate throttling
    
    I recently reported on the netem list that iperf network benchmarks
    show unexpected results when a bandwidth throttling rate has been
    configured for netem. Specifically:
    
    1) The measured link bandwidth *increases* when a higher delay is added
    2) The measured link bandwidth appears higher than the specified limit
    3) The measured link bandwidth for the same very slow settings varies significantly across
      machines
    
    The issue can be reproduced by using tc to configure netem with a
    512kbit rate and various (none, 1us, 50ms, 100ms, 200ms) delays on a
    veth pair between network namespaces, and then using iperf (or any
    other network benchmarking tool) to test throughput. Complete detailed
    instructions are in the original email chain here:
    https://lists.linuxfoundation.org/pipermail/netem/2017-February/001672.html
    
    There appear to be two underlying bugs causing these effects:
    
    - The first issue causes long delays when the rate is slow and no
      delay is configured (e.g., "rate 512kbit"). This is because SKBs are
      not orphaned when no delay is configured, so orphaning does not
      occur until *after* the rate-induced delay has been applied. For
      this reason, adding a tiny delay (e.g., "rate 512kbit delay 1us")
      dramatically increases the measured bandwidth.
    
    - The second issue is that rate-induced delays are not correctly
      applied, allowing SKB delays to occur in parallel. The indended
      approach is to compute the delay for an SKB and to add this delay to
      the end of the current queue. However, the code does not detect
      existing SKBs in the queue due to improperly testing sch->q.qlen,
      which is nonzero even when packets exist only in the
      rbtree. Consequently, new SKBs do not wait for the current queue to
      empty. When packet delays vary significantly (e.g., if packet sizes
      are different), then this also causes unintended reordering.
    
    I modified the code to expect a delay (and orphan the SKB) when a rate
    is configured. I also added some defensive tests that correctly find
    the latest scheduled delivery time, even if it is (unexpectedly) for a
    packet in sch->q. I have tested these changes on the latest kernel
    (4.11.0-rc1+) and the iperf / ping test results are as expected.
    
    Signed-off-by: Nik Unger <njunger@uwaterloo.ca>
    Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
    Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index c8bb62a1e744..94b4928ad413 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -462,7 +462,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	/* If a delay is expected, orphan the skb. (orphaning usually takes
 	 * place at TX completion time, so _before_ the link transit delay)
 	 */
-	if (q->latency || q->jitter)
+	if (q->latency || q->jitter || q->rate)
 		skb_orphan_partial(skb);
 
 	/*
@@ -530,21 +530,31 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		now = psched_get_time();
 
 		if (q->rate) {
-			struct sk_buff *last;
+			struct netem_skb_cb *last = NULL;
+
+			if (sch->q.tail)
+				last = netem_skb_cb(sch->q.tail);
+			if (q->t_root.rb_node) {
+				struct sk_buff *t_skb;
+				struct netem_skb_cb *t_last;
+
+				t_skb = netem_rb_to_skb(rb_last(&q->t_root));
+				t_last = netem_skb_cb(t_skb);
+				if (!last ||
+				    t_last->time_to_send > last->time_to_send) {
+					last = t_last;
+				}
+			}
 
-			if (sch->q.qlen)
-				last = sch->q.tail;
-			else
-				last = netem_rb_to_skb(rb_last(&q->t_root));
 			if (last) {
 				/*
 				 * Last packet in queue is reference point (now),
 				 * calculate this time bonus and subtract
 				 * from delay.
 				 */
-				delay -= netem_skb_cb(last)->time_to_send - now;
+				delay -= last->time_to_send - now;
 				delay = max_t(psched_tdiff_t, 0, delay);
-				now = netem_skb_cb(last)->time_to_send;
+				now = last->time_to_send;
 			}
 
 			delay += packet_len_2_sched_time(qdisc_pkt_len(skb), q);

commit e0301d0d28a2e55da5b68338d6b9f933620da63b
Author: Adrian Nicoara <anicoara@uwaterloo.ca>
Date:   Mon Sep 8 15:02:49 2014 -0400

    staging: ozwpan: use kmalloc_array over kmalloc with multiply
    
    Cleanup checkpatch.pl warnings.
    
    Signed-off-by: Adrian Nicoara <anicoara@uwaterloo.ca>
    Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

diff --git a/drivers/staging/ozwpan/ozhcd.c b/drivers/staging/ozwpan/ozhcd.c
index ba2168f53559..e88045228607 100644
--- a/drivers/staging/ozwpan/ozhcd.c
+++ b/drivers/staging/ozwpan/ozhcd.c
@@ -1315,8 +1315,8 @@ static int oz_build_endpoints_for_config(struct usb_hcd *hcd,
 	if (num_iface) {
 		struct oz_interface *iface;
 
-		iface = kmalloc(num_iface*sizeof(struct oz_interface),
-				mem_flags | __GFP_ZERO);
+		iface = kmalloc_array(num_iface, sizeof(struct oz_interface),
+					mem_flags | __GFP_ZERO);
 		if (!iface)
 			return -ENOMEM;
 		spin_lock_bh(&ozhcd->hcd_lock);

commit 4bb3f83d47a60109d8bc9d47e547392d9b6c390c
Author: Adrian Nicoara <anicoara@uwaterloo.ca>
Date:   Mon Sep 8 14:43:44 2014 -0400

    staging: ozwpan: fix redundant return in void function
    
    Cleanup checkpatch.pl warnings.
    
    Signed-off-by: Adrian Nicoara <anicoara@uwaterloo.ca>
    Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

diff --git a/drivers/staging/ozwpan/ozproto.c b/drivers/staging/ozwpan/ozproto.c
index cae0e6f5ae68..3d3a3a890f73 100644
--- a/drivers/staging/ozwpan/ozproto.c
+++ b/drivers/staging/ozwpan/ozproto.c
@@ -112,7 +112,6 @@ static void oz_send_conn_rsp(struct oz_pd *pd, u8 status)
 	}
 	oz_dbg(ON, "TX: OZ_ELT_CONNECT_RSP %d", status);
 	dev_queue_xmit(skb);
-	return;
 }
 
 /*

commit d75b6c6154267618363f0126bd50c73f8d448a24
Author: Adrian Nicoara <anicoara@uwaterloo.ca>
Date:   Mon Sep 8 14:41:48 2014 -0400

    staging: ozwpan: fix redundant else after break or return
    
    Cleanup checkpatch.pl warnings.
    
    Signed-off-by: Adrian Nicoara <anicoara@uwaterloo.ca>
    Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

diff --git a/drivers/staging/ozwpan/ozhcd.c b/drivers/staging/ozwpan/ozhcd.c
index b30c4d87c25e..ba2168f53559 100644
--- a/drivers/staging/ozwpan/ozhcd.c
+++ b/drivers/staging/ozwpan/ozhcd.c
@@ -1010,10 +1010,9 @@ void oz_hcd_data_ind(void *hport, u8 endpoint, const u8 *data, int data_len)
 			urb->actual_length = copy_len;
 			oz_complete_urb(port->ozhcd->hcd, urb, 0);
 			return;
-		} else {
-			oz_dbg(ON, "buffering frame as URB is not available\n");
-			oz_hcd_buffer_data(ep, data, data_len);
 		}
+		oz_dbg(ON, "buffering frame as URB is not available\n");
+		oz_hcd_buffer_data(ep, data, data_len);
 		break;
 	case USB_ENDPOINT_XFER_ISOC:
 		oz_hcd_buffer_data(ep, data, data_len);
@@ -1903,8 +1902,7 @@ static int oz_hcd_hub_status_data(struct usb_hcd *hcd, char *buf)
 	spin_unlock_bh(&ozhcd->hcd_lock);
 	if (buf[0] != 0 || buf[1] != 0)
 		return 2;
-	else
-		return 0;
+	return 0;
 }
 
 /*
diff --git a/drivers/staging/ozwpan/ozpd.c b/drivers/staging/ozwpan/ozpd.c
index 26c104946a82..852c288aaf13 100644
--- a/drivers/staging/ozwpan/ozpd.c
+++ b/drivers/staging/ozwpan/ozpd.c
@@ -496,11 +496,10 @@ static int oz_send_next_queued_frame(struct oz_pd *pd, int more_data)
 			oz_dbg(TX_FRAMES, "Sending ISOC Frame, nb_isoc= %d\n",
 			       pd->nb_queued_isoc_frames);
 			return 0;
-		} else {
-			kfree_skb(skb);
-			oz_dbg(TX_FRAMES, "Dropping ISOC Frame>\n");
-			return -1;
 		}
+		kfree_skb(skb);
+		oz_dbg(TX_FRAMES, "Dropping ISOC Frame>\n");
+		return -1;
 	}
 
 	pd->last_sent_frame = e;
@@ -813,8 +812,7 @@ int oz_send_isoc_unit(struct oz_pd *pd, u8 ep_num, const u8 *data, int len)
 			atomic_inc(&g_submitted_isoc);
 			if (dev_queue_xmit(skb) < 0)
 				return -1;
-			else
-				return 0;
+			return 0;
 		}
 
 out:	kfree_skb(skb);

commit ce6880e17508b31e04628a71d4691ef4a00a8b3f
Author: Adrian Nicoara <anicoara@uwaterloo.ca>
Date:   Mon Sep 8 14:39:58 2014 -0400

    staging: ozwpan: fix missing blank line after declaration
    
    Cleanup checkpatch.pl warnings.
    
    Signed-off-by: Adrian Nicoara <anicoara@uwaterloo.ca>
    Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

diff --git a/drivers/staging/ozwpan/ozcdev.c b/drivers/staging/ozwpan/ozcdev.c
index c73d3969a6b2..da0e1fd50f26 100644
--- a/drivers/staging/ozwpan/ozcdev.c
+++ b/drivers/staging/ozwpan/ozcdev.c
@@ -263,6 +263,7 @@ static long oz_cdev_ioctl(struct file *filp, unsigned int cmd,
 	switch (cmd) {
 	case OZ_IOCTL_GET_PD_LIST: {
 			struct oz_pd_list list;
+
 			oz_dbg(ON, "OZ_IOCTL_GET_PD_LIST\n");
 			memset(&list, 0, sizeof(list));
 			list.count = oz_get_pd_list(list.addr, OZ_MAX_PDS);
@@ -273,6 +274,7 @@ static long oz_cdev_ioctl(struct file *filp, unsigned int cmd,
 		break;
 	case OZ_IOCTL_SET_ACTIVE_PD: {
 			u8 addr[ETH_ALEN];
+
 			oz_dbg(ON, "OZ_IOCTL_SET_ACTIVE_PD\n");
 			if (copy_from_user(addr, (void __user *)arg, ETH_ALEN))
 				return -EFAULT;
@@ -281,6 +283,7 @@ static long oz_cdev_ioctl(struct file *filp, unsigned int cmd,
 		break;
 	case OZ_IOCTL_GET_ACTIVE_PD: {
 			u8 addr[ETH_ALEN];
+
 			oz_dbg(ON, "OZ_IOCTL_GET_ACTIVE_PD\n");
 			spin_lock_bh(&g_cdev.lock);
 			ether_addr_copy(addr, g_cdev.active_addr);
@@ -292,6 +295,7 @@ static long oz_cdev_ioctl(struct file *filp, unsigned int cmd,
 	case OZ_IOCTL_ADD_BINDING:
 	case OZ_IOCTL_REMOVE_BINDING: {
 			struct oz_binding_info b;
+
 			if (copy_from_user(&b, (void __user *)arg,
 				sizeof(struct oz_binding_info))) {
 				return -EFAULT;
@@ -320,6 +324,7 @@ static unsigned int oz_cdev_poll(struct file *filp, poll_table *wait)
 	spin_lock_bh(&dev->lock);
 	if (dev->active_pd) {
 		struct oz_serial_ctx *ctx = oz_cdev_claim_ctx(dev->active_pd);
+
 		if (ctx) {
 			if (ctx->rd_in != ctx->rd_out)
 				ret |= POLLIN | POLLRDNORM;
diff --git a/drivers/staging/ozwpan/ozeltbuf.c b/drivers/staging/ozwpan/ozeltbuf.c
index 400cc75279b0..01b25da44241 100644
--- a/drivers/staging/ozwpan/ozeltbuf.c
+++ b/drivers/staging/ozwpan/ozeltbuf.c
@@ -174,6 +174,7 @@ int oz_queue_elt_info(struct oz_elt_buf *buf, u8 isoc, u8 id,
 			== OZ_USB_ENDPOINT_DATA) &&
 			(body->format == OZ_DATA_F_ISOC_FIXED)) {
 			u8 unit_count = body->frame_number;
+
 			body->frame_number = st->frame_number;
 			st->frame_number += unit_count;
 		}
diff --git a/drivers/staging/ozwpan/ozpd.c b/drivers/staging/ozwpan/ozpd.c
index 772641011a44..26c104946a82 100644
--- a/drivers/staging/ozwpan/ozpd.c
+++ b/drivers/staging/ozwpan/ozpd.c
@@ -106,6 +106,7 @@ struct oz_pd *oz_pd_alloc(const u8 *mac_addr)
 
 	if (pd) {
 		int i;
+
 		atomic_set(&pd->ref_count, 2);
 		for (i = 0; i < OZ_NB_APPS; i++)
 			spin_lock_init(&pd->app_lock[i]);
@@ -153,6 +154,7 @@ static void oz_pd_free(struct work_struct *work)
 
 	list_for_each_safe(e, n, &pd->tx_queue) {
 		struct oz_tx_frame *f = list_entry(e, struct oz_tx_frame, link);
+
 		if (f->skb != NULL)
 			kfree_skb(f->skb);
 		oz_retire_frame(pd, f);
@@ -249,6 +251,7 @@ void oz_pd_heartbeat(struct oz_pd *pd, u16 apps)
 		hrtimer_cancel(&pd->heartbeat);
 	if (pd->mode & OZ_F_ISOC_ANYTIME) {
 		int count = 8;
+
 		while (count-- && (oz_send_isoc_frame(pd) >= 0))
 			;
 	}
@@ -752,6 +755,7 @@ int oz_send_isoc_unit(struct oz_pd *pd, u8 ep_num, const u8 *data, int len)
 	} else {
 		struct oz_hdr oz;
 		struct oz_isoc_large iso;
+
 		spin_lock_bh(&pd->stream_lock);
 		iso.frame_number = st->frame_num;
 		st->frame_num += nb_units;
@@ -774,8 +778,10 @@ int oz_send_isoc_unit(struct oz_pd *pd, u8 ep_num, const u8 *data, int len)
 		if (!(pd->mode & OZ_F_ISOC_ANYTIME)) {
 			struct oz_tx_frame *isoc_unit = NULL;
 			int nb = pd->nb_queued_isoc_frames;
+
 			if (nb >= pd->isoc_latency) {
 				struct oz_tx_frame *f;
+
 				oz_dbg(TX_FRAMES, "Dropping ISOC Unit nb= %d\n",
 				       nb);
 				spin_lock(&pd->tx_frame_lock);
diff --git a/drivers/staging/ozwpan/ozproto.c b/drivers/staging/ozwpan/ozproto.c
index af3da3ebabc2..cae0e6f5ae68 100644
--- a/drivers/staging/ozwpan/ozproto.c
+++ b/drivers/staging/ozwpan/ozproto.c
@@ -179,6 +179,7 @@ static struct oz_pd *oz_connect_req(struct oz_pd *cur_pd, struct oz_elt *elt,
 	} else {
 		struct oz_pd *pd2 = NULL;
 		struct list_head *e;
+
 		pd = oz_pd_alloc(pd_addr);
 		if (pd == NULL)
 			return NULL;
@@ -262,6 +263,7 @@ static struct oz_pd *oz_connect_req(struct oz_pd *cur_pd, struct oz_elt *elt,
 		u16 start_apps = new_apps & ~pd->total_apps & ~0x1;
 		u16 stop_apps = pd->total_apps & ~new_apps & ~0x1;
 		u16 resume_apps = new_apps & pd->paused_apps  & ~0x1;
+
 		spin_unlock_bh(&g_polling_lock);
 		oz_pd_set_state(pd, OZ_PD_S_CONNECTED);
 		oz_dbg(ON, "new_apps=0x%x total_apps=0x%x paused_apps=0x%x\n",
@@ -383,6 +385,7 @@ static void oz_rx_frame(struct sk_buff *skb)
 		if ((oz_hdr->control & OZ_F_ACK_REQUESTED) &&
 				(pd->state == OZ_PD_S_CONNECTED)) {
 			int backlog = pd->nb_queued_frames;
+
 			pd->trigger_pkt_num = pkt_num;
 			/* Send queued frames */
 			oz_send_queued_frames(pd, backlog);
@@ -781,6 +784,7 @@ int oz_protocol_init(char *devs)
 		oz_binding_add(NULL);
 	} else {
 		char d[32];
+
 		while (*devs) {
 			devs = oz_get_next_device_name(devs, d, sizeof(d));
 			if (d[0])
diff --git a/drivers/staging/ozwpan/ozusbsvc.c b/drivers/staging/ozwpan/ozusbsvc.c
index db4a3876886a..bf15dc301cb5 100644
--- a/drivers/staging/ozwpan/ozusbsvc.c
+++ b/drivers/staging/ozwpan/ozusbsvc.c
@@ -128,6 +128,7 @@ void oz_usb_stop(struct oz_pd *pd, int pause)
 	spin_unlock_bh(&pd->app_lock[OZ_APPID_USB]);
 	if (usb_ctx) {
 		struct timespec ts, now;
+
 		getnstimeofday(&ts);
 		oz_dbg(ON, "USB service stopping...\n");
 		usb_ctx->stopped = 1;
@@ -235,6 +236,7 @@ int oz_usb_stream_delete(void *hpd, u8 ep_num)
 
 	if (usb_ctx) {
 		struct oz_pd *pd = usb_ctx->pd;
+
 		if (pd) {
 			oz_dbg(ON, "%s: (0x%x)\n", __func__, ep_num);
 			if (pd->mode & OZ_F_ISOC_NO_ELTS) {
diff --git a/drivers/staging/ozwpan/ozusbsvc1.c b/drivers/staging/ozwpan/ozusbsvc1.c
index 12bb236174dc..be7ee01c50ab 100644
--- a/drivers/staging/ozwpan/ozusbsvc1.c
+++ b/drivers/staging/ozwpan/ozusbsvc1.c
@@ -213,6 +213,7 @@ int oz_usb_control_req(void *hpd, u8 req_id, struct usb_ctrlrequest *setup,
 		case USB_REQ_SET_INTERFACE: {
 				u8 if_num = (u8)windex;
 				u8 alt = (u8)wvalue;
+
 				rc = oz_usb_set_interface_req(hpd, req_id,
 					if_num, alt);
 			}
@@ -254,6 +255,7 @@ int oz_usb_send_isoc(void *hpd, u8 ep_num, struct urb *urb)
 	if (pd->mode & OZ_F_ISOC_NO_ELTS) {
 		for (i = 0; i < urb->number_of_packets; i++) {
 			u8 *data;
+
 			desc = &urb->iso_frame_desc[i];
 			data = ((u8 *)urb->transfer_buffer)+desc->offset;
 			oz_send_isoc_unit(pd, ep_num, data, desc->length);
@@ -271,6 +273,7 @@ int oz_usb_send_isoc(void *hpd, u8 ep_num, struct urb *urb)
 		int unit_count;
 		int unit_size;
 		int rem;
+
 		if (ei == NULL)
 			return -1;
 		rem = MAX_ISOC_FIXED_DATA;
@@ -340,6 +343,7 @@ static void oz_usb_handle_ep_data(struct oz_usb_ctx *usb_ctx,
 			u8 *data = body->data;
 			int count;
 			int i;
+
 			if (!unit_size)
 				break;
 			count = data_len/unit_size;