10 files changed, 229 insertions, 22 deletions
diff --git a/CORE/CLD_TXRX/TXRX/ol_tx.c b/CORE/CLD_TXRX/TXRX/ol_tx.c
index 737912d15941..dfaf85bccc92 100644
--- a/CORE/CLD_TXRX/TXRX/ol_tx.c
+++ b/CORE/CLD_TXRX/TXRX/ol_tx.c
@@ -111,6 +111,129 @@ ol_tx_ll(ol_txrx_vdev_handle vdev, adf_nbuf_t msdu_list)
     return NULL; /* all MSDUs were accepted */
 }
 
+#ifdef QCA_SUPPORT_TXRX_VDEV_PAUSE_LL
+
+#define OL_TX_VDEV_PAUSE_QUEUE_SEND_MARGIN 20
+#define OL_TX_VDEV_PAUSE_QUEUE_SEND_PERIOD_MS 5
+static void
+ol_tx_vdev_ll_pause_queue_send_base(struct ol_txrx_vdev_t *vdev)
+{
+    int max_to_accept;
+
+    if (vdev->ll_pause.is_paused == A_TRUE) {
+        return;
+    }
+
+    adf_os_spin_lock_bh(&vdev->ll_pause.mutex);
+
+    /*
+     * Send as much of the backlog as possible, but leave some margin
+     * of unallocated tx descriptors that can be used for new frames
+     * being transmitted by other vdevs.
+     * Ideally there would be a scheduler, which would not only leave
+     * some margin for new frames for other vdevs, but also would
+     * fairly apportion the tx descriptors between multiple vdevs that
+     * have backlogs in their pause queues.
+     * However, the fairness benefit of having a scheduler for frames
+     * from multiple vdev's pause queues is not sufficient to outweigh
+     * the extra complexity.
+     */
+    max_to_accept =
+        vdev->pdev->tx_desc.num_free - OL_TX_VDEV_PAUSE_QUEUE_SEND_MARGIN;
+    while (max_to_accept > 0 && vdev->ll_pause.txq.depth) {
+        adf_nbuf_t tx_msdu;
+        max_to_accept--;
+        vdev->ll_pause.txq.depth--;
+        tx_msdu = vdev->ll_pause.txq.head;
+        vdev->ll_pause.txq.head = adf_nbuf_next(tx_msdu);
+        if (NULL == vdev->ll_pause.txq.head) {
+            vdev->ll_pause.txq.tail = NULL;
+        }
+        adf_nbuf_set_next(tx_msdu, NULL);
+        tx_msdu = ol_tx_ll(vdev, tx_msdu);
+        /*
+         * It is unexpected that ol_tx_ll would reject the frame,
+         * since we checked that there's room for it, though there's
+         * an infinitesimal possibility that between the time we checked
+         * the room available and now, a concurrent batch of tx frames
+         * used up all the room.
+         * For simplicity, just drop the frame.
+         */
+        if (tx_msdu) {
+            adf_nbuf_tx_free(tx_msdu, 1 /* error */);
+        }
+    }
+    if (vdev->ll_pause.txq.depth) {
+        adf_os_timer_start(
+                &vdev->ll_pause.timer, OL_TX_VDEV_PAUSE_QUEUE_SEND_PERIOD_MS);
+    }
+
+    adf_os_spin_unlock_bh(&vdev->ll_pause.mutex);
+}
+
+static adf_nbuf_t
+ol_tx_vdev_pause_queue_append(struct ol_txrx_vdev_t *vdev, adf_nbuf_t msdu_list)
+{
+    adf_os_spin_lock_bh(&vdev->ll_pause.mutex);
+    while (msdu_list &&
+            vdev->ll_pause.txq.depth < vdev->pdev->cfg.ll_pause_txq_limit)
+    {
+        adf_nbuf_t next = adf_nbuf_next(msdu_list);
+
+        vdev->ll_pause.txq.depth++;
+        if (!vdev->ll_pause.txq.head) {
+            vdev->ll_pause.txq.head = msdu_list;
+            vdev->ll_pause.txq.tail = msdu_list;
+        } else {
+            adf_nbuf_set_next(vdev->ll_pause.txq.tail, msdu_list);
+        }
+        vdev->ll_pause.txq.tail = msdu_list;
+
+        msdu_list = next;
+    }
+    if (vdev->ll_pause.txq.tail) {
+        adf_nbuf_set_next(vdev->ll_pause.txq.tail, NULL);
+    }
+    adf_os_spin_unlock_bh(&vdev->ll_pause.mutex);
+
+    adf_os_timer_start(
+            &vdev->ll_pause.timer, OL_TX_VDEV_PAUSE_QUEUE_SEND_PERIOD_MS);
+
+    return msdu_list;
+}
+
+/*
+ * Store up the tx frame in the vdev's tx queue if the vdev is paused.
+ * If there are too many frames in the tx queue, reject it.
+ */
+adf_nbuf_t
+ol_tx_ll_queue(ol_txrx_vdev_handle vdev, adf_nbuf_t msdu_list)
+{
+    if (vdev->ll_pause.is_paused == A_TRUE) {
+        msdu_list = ol_tx_vdev_pause_queue_append(vdev, msdu_list);
+    } else {
+        if (vdev->ll_pause.txq.depth > 0) {
+            /* not paused, but there is a backlog of frms from a prior pause */
+            msdu_list = ol_tx_vdev_pause_queue_append(vdev, msdu_list);
+            /* send as many frames as possible from the vdevs backlog */
+            ol_tx_vdev_ll_pause_queue_send_base(vdev);
+        } else {
+            /* not paused, and no backlog - send the new frames */
+            msdu_list = ol_tx_ll(vdev, msdu_list);
+        }
+    }
+    return msdu_list;
+}
+#endif
+
+void ol_tx_vdev_ll_pause_queue_send(void *context)
+{
+#ifdef QCA_SUPPORT_TXRX_VDEV_PAUSE_LL
+    struct ol_txrx_vdev_t *vdev = (struct ol_txrx_vdev_t *) context;
+    ol_tx_vdev_ll_pause_queue_send_base(vdev);
+#endif
+}
+
 static inline int
 OL_TXRX_TX_IS_RAW(enum ol_tx_spec tx_spec)
 {
diff --git a/CORE/CLD_TXRX/TXRX/ol_tx.h b/CORE/CLD_TXRX/TXRX/ol_tx.h
index 5316b83594db..b9830335af0b 100644
--- a/CORE/CLD_TXRX/TXRX/ol_tx.h
+++ b/CORE/CLD_TXRX/TXRX/ol_tx.h
@@ -42,6 +42,17 @@ adf_nbuf_t
 ol_tx_ll(ol_txrx_vdev_handle vdev, adf_nbuf_t msdu_list);
 
 adf_nbuf_t
+ol_tx_ll_queue(ol_txrx_vdev_handle vdev, adf_nbuf_t msdu_list);
+
+#ifdef QCA_SUPPORT_TXRX_VDEV_PAUSE_LL
+#define OL_TX_LL ol_tx_ll_queue
+#else
+#define OL_TX_LL ol_tx_ll
+#endif
+
+void ol_tx_vdev_ll_pause_queue_send(void *context);
+
+adf_nbuf_t
 ol_tx_non_std_ll(
     ol_txrx_vdev_handle data_vdev,
     enum ol_tx_spec tx_spec,
diff --git a/CORE/CLD_TXRX/TXRX/ol_tx_desc.c b/CORE/CLD_TXRX/TXRX/ol_tx_desc.c
index 3bd802602aee..cb817820f7b1 100644
--- a/CORE/CLD_TXRX/TXRX/ol_tx_desc.c
+++ b/CORE/CLD_TXRX/TXRX/ol_tx_desc.c
@@ -60,6 +60,7 @@ ol_tx_desc_alloc(struct ol_txrx_pdev_t *pdev)
 
     adf_os_spin_lock_bh(&pdev->tx_mutex);
     if (pdev->tx_desc.freelist) {
+        pdev->tx_desc.num_free--;
         tx_desc = &pdev->tx_desc.freelist->tx_desc;
         pdev->tx_desc.freelist = pdev->tx_desc.freelist->next;
     }
@@ -97,6 +98,7 @@ ol_tx_desc_free(struct ol_txrx_pdev_t *pdev, struct ol_tx_desc_t *tx_desc)
     adf_os_spin_lock_bh(&pdev->tx_mutex);
     ((union ol_tx_desc_list_elem_t *) tx_desc)->next = pdev->tx_desc.freelist;
     pdev->tx_desc.freelist = (union ol_tx_desc_list_elem_t *) tx_desc;
+    pdev->tx_desc.num_free++;
     adf_os_spin_unlock_bh(&pdev->tx_mutex);
 }
 
diff --git a/CORE/CLD_TXRX/TXRX/ol_tx_queue.c b/CORE/CLD_TXRX/TXRX/ol_tx_queue.c
index b2f7029647ee..661645af6e1a 100644
--- a/CORE/CLD_TXRX/TXRX/ol_tx_queue.c
+++ b/CORE/CLD_TXRX/TXRX/ol_tx_queue.c
@@ -36,6 +36,7 @@
 #include <ol_txrx_internal.h> /* TXRX_ASSERT1, etc. */
 #include <ol_txrx_types.h>    /* pdev stats */
 #include <ol_tx_desc.h>       /* ol_tx_desc, ol_tx_desc_frame_list_free */
+#include <ol_tx.h>            /* ol_tx_vdev_ll_pause_queue_send */
 #include <ol_tx_sched.h>      /* ol_tx_sched_notify, etc. */
 #include <ol_tx_queue.h>
 #include <ol_txrx_dbg.h>      /* ENABLE_TX_QUEUE_LOG */
@@ -463,49 +464,65 @@ ol_txrx_peer_tid_unpause(ol_txrx_peer_handle peer, int tid)
     TX_SCHED_DEBUG_PRINT("Leave %s\n", __func__);
 }
 
+#endif /* defined(CONFIG_HL_SUPPORT) */
+
+#if defined(CONFIG_HL_SUPPORT) || defined(QCA_SUPPORT_TXRX_VDEV_PAUSE_LL)
+
 void
 ol_txrx_vdev_pause(ol_txrx_vdev_handle vdev)
 {
-    struct ol_txrx_pdev_t *pdev = vdev->pdev;
-    struct ol_txrx_peer_t *peer;
-
     /* TO DO: log the queue pause */
-
     /* acquire the mutex lock, since we'll be modifying the queues */
     TX_SCHED_DEBUG_PRINT("Enter %s\n", __func__);
-    adf_os_spin_lock(&pdev->tx_queue_spinlock);
 
-    TAILQ_FOREACH(peer, &vdev->peer_list, peer_list_elem) {
-        ol_txrx_peer_pause_base(pdev, peer);
+    if (vdev->pdev->cfg.is_high_latency) {
+#if defined(CONFIG_HL_SUPPORT)
+        struct ol_txrx_pdev_t *pdev = vdev->pdev;
+        struct ol_txrx_peer_t *peer;
+        adf_os_spin_lock(&pdev->tx_queue_spinlock);
+        TAILQ_FOREACH(peer, &vdev->peer_list, peer_list_elem) {
+            ol_txrx_peer_pause_base(pdev, peer);
+        }
+        adf_os_spin_unlock(&pdev->tx_queue_spinlock);
+#endif /* defined(CONFIG_HL_SUPPORT) */
+    } else {
+        vdev->ll_pause.is_paused = A_TRUE;
     }
 
-    adf_os_spin_unlock(&pdev->tx_queue_spinlock);
     TX_SCHED_DEBUG_PRINT("Leave %s\n", __func__);
 }
 
 void
 ol_txrx_vdev_unpause(ol_txrx_vdev_handle vdev)
 {
-    struct ol_txrx_pdev_t *pdev = vdev->pdev;
-    struct ol_txrx_peer_t *peer;
-
     /* TO DO: log the queue unpause */
-
     /* acquire the mutex lock, since we'll be modifying the queues */
     TX_SCHED_DEBUG_PRINT("Enter %s\n", __func__);
-    adf_os_spin_lock(&pdev->tx_queue_spinlock);
 
-    TAILQ_FOREACH(peer, &vdev->peer_list, peer_list_elem) {
-        int i;
-        for (i = 0; i < ARRAY_LEN(peer->txqs); i++) {
-            ol_txrx_peer_tid_unpause_base(pdev, peer, i);
+    if (vdev->pdev->cfg.is_high_latency) {
+#if defined(CONFIG_HL_SUPPORT)
+        struct ol_txrx_pdev_t *pdev = vdev->pdev;
+        struct ol_txrx_peer_t *peer;
+        adf_os_spin_lock(&pdev->tx_queue_spinlock);
+
+        TAILQ_FOREACH(peer, &vdev->peer_list, peer_list_elem) {
+            int i;
+            for (i = 0; i < ARRAY_LEN(peer->txqs); i++) {
+                ol_txrx_peer_tid_unpause_base(pdev, peer, i);
+            }
         }
+        adf_os_spin_unlock(&pdev->tx_queue_spinlock);
+#endif /* defined(CONFIG_HL_SUPPORT) */
+    } else {
+        vdev->ll_pause.is_paused = A_FALSE;
+        ol_tx_vdev_ll_pause_queue_send(vdev);
     }
-
-    adf_os_spin_unlock(&pdev->tx_queue_spinlock);
     TX_SCHED_DEBUG_PRINT("Leave %s\n", __func__);
 }
 
+#endif // defined(CONFIG_HL_SUPPORT) || defined(QCA_SUPPORT_TXRX_VDEV_PAUSE_LL)
+
+#if defined(CONFIG_HL_SUPPORT)
 
 /*--- ADDBA triggering functions --------------------------------------------*/
 
diff --git a/CORE/CLD_TXRX/TXRX/ol_tx_send.c b/CORE/CLD_TXRX/TXRX/ol_tx_send.c
index edc38496e209..29c4f1cec3c0 100644
--- a/CORE/CLD_TXRX/TXRX/ol_tx_send.c
+++ b/CORE/CLD_TXRX/TXRX/ol_tx_send.c
@@ -491,6 +491,7 @@ ol_tx_completion_handler(
         tx_desc_last->next = pdev->tx_desc.freelist;
         pdev->tx_desc.freelist = lcl_freelist; 
         adf_os_spin_unlock(&pdev->tx_mutex);
+        pdev->tx_desc.num_free += (u_int16_t) num_msdus;
     } else {
         ol_tx_desc_frame_list_free(pdev, &tx_descs, status != htt_tx_status_ok);
     }
diff --git a/CORE/CLD_TXRX/TXRX/ol_txrx.c b/CORE/CLD_TXRX/TXRX/ol_txrx.c
index 56ca9ecb9607..4ebf05103559 100644
--- a/CORE/CLD_TXRX/TXRX/ol_txrx.c
+++ b/CORE/CLD_TXRX/TXRX/ol_txrx.c
@@ -346,6 +346,7 @@ ol_txrx_pdev_attach(
     }
 
     /* link SW tx descs into a freelist */
+    pdev->tx_desc.num_free = desc_pool_size;
     pdev->tx_desc.freelist = &pdev->tx_desc.array[0];
     for (i = 0; i < desc_pool_size-1; i++) {
         pdev->tx_desc.array[i].next = &pdev->tx_desc.array[i+1];
@@ -560,6 +561,8 @@ ol_txrx_pdev_attach(
 
     OL_TXRX_LOCAL_PEER_ID_POOL_INIT(pdev);
 
+    pdev->cfg.ll_pause_txq_limit = ol_tx_cfg_max_tx_queue_depth_ll(ctrl_pdev);
+
 #ifdef QCA_COMPUTE_TX_DELAY
     adf_os_mem_zero(&pdev->tx_delay, sizeof(pdev->tx_delay));
     adf_os_spinlock_init(&pdev->tx_delay.mutex);
@@ -785,6 +788,15 @@ ol_txrx_vdev_attach(
     }
     #endif /* defined(CONFIG_HL_SUPPORT) */
 
+    vdev->ll_pause.is_paused = A_FALSE;
+    vdev->ll_pause.txq.head = vdev->ll_pause.txq.tail = NULL;
+    vdev->ll_pause.txq.depth = 0;
+    adf_os_timer_init(
+            pdev->osdev,
+            &vdev->ll_pause.timer,
+            ol_tx_vdev_ll_pause_queue_send,
+            vdev);
+
     /* add this vdev into the pdev's list */
     TAILQ_INSERT_TAIL(&pdev->vdev_list, vdev, vdev_list_elem);
 
@@ -814,7 +826,7 @@ void ol_txrx_osif_vdev_register(ol_txrx_vdev_handle vdev,
 		txrx_ops->tx.std = vdev->tx = ol_tx_hl;
 		txrx_ops->tx.non_std = ol_tx_non_std_hl;
 	} else {
-		txrx_ops->tx.std = vdev->tx = ol_tx_ll;
+        txrx_ops->tx.std = vdev->tx = OL_TX_LL;
 		txrx_ops->tx.non_std = ol_tx_non_std_ll;
 	}
 }
@@ -877,6 +889,14 @@ ol_txrx_vdev_detach(
     }
     #endif /* defined(CONFIG_HL_SUPPORT) */
 
+    adf_os_timer_cancel(&vdev->ll_pause.timer);
+    adf_os_timer_free(&vdev->ll_pause.timer);
+    while (vdev->ll_pause.txq.head) {
+        adf_nbuf_t next = adf_nbuf_next(vdev->ll_pause.txq.head);
+        adf_nbuf_tx_free(vdev->ll_pause.txq.head, 1 /* error */);
+        vdev->ll_pause.txq.head = next;
+    }
+
     /* remove the vdev from its parent pdev's list */
     TAILQ_REMOVE(&pdev->vdev_list, vdev, vdev_list_elem);
 
diff --git a/CORE/CLD_TXRX/TXRX/ol_txrx_types.h b/CORE/CLD_TXRX/TXRX/ol_txrx_types.h
index 07b56e0eb085..cbc9889bcef3 100644
--- a/CORE/CLD_TXRX/TXRX/ol_txrx_types.h
+++ b/CORE/CLD_TXRX/TXRX/ol_txrx_types.h
@@ -363,6 +363,7 @@ struct ol_txrx_pdev_t {
 	struct {
 		int is_high_latency;
 		int host_addba;
+		int ll_pause_txq_limit;
 	} cfg;
 
 	/* WDI subscriber's event list */
@@ -458,6 +459,7 @@ struct ol_txrx_pdev_t {
 	/* tx descriptor pool */
 	struct {
 		u_int16_t pool_size;
+		u_int16_t num_free;
 		union ol_tx_desc_list_elem_t *array;
 		union ol_tx_desc_list_elem_t *freelist;
 	} tx_desc;
@@ -693,8 +695,21 @@ struct ol_txrx_vdev_t {
 #if defined(CONFIG_HL_SUPPORT)
 	struct ol_tx_frms_queue_t txqs[OL_TX_VDEV_NUM_QUEUES];
 #endif
+
+	struct {
+		struct {
+			adf_nbuf_t head;
+			adf_nbuf_t tail;
+			int depth;
+		} txq;
+		a_bool_t is_paused;
+		adf_os_spinlock_t mutex;
+		adf_os_timer_t timer;
+	} ll_pause;
+
 };
 
+
 struct ol_rx_reorder_array_elem_t {
 	adf_nbuf_t head;
 	adf_nbuf_t tail;
diff --git a/CORE/SERVICES/COMMON/ol_cfg.h b/CORE/SERVICES/COMMON/ol_cfg.h
index 5d10c8a64c59..40594bad3b7d 100644
--- a/CORE/SERVICES/COMMON/ol_cfg.h
+++ b/CORE/SERVICES/COMMON/ol_cfg.h
@@ -385,5 +385,22 @@ ol_cfg_addba_retry(ol_pdev_handle pdev)
     return 0; /* disabled for now */
 }
 
+/**
+ * @brief How many frames to hold in a paused vdev's tx queue in LL systems
+ */
+static inline int
+ol_tx_cfg_max_tx_queue_depth_ll(ol_pdev_handle pdev)
+{
+    /*
+     * Store up to 700 frames for a paused vdev.
+     * For example, if the vdev is sending 300 Mbps of traffic, and the
+     * PHY is capable of 600 Mbps, then it will take 56 ms for the PHY to
+     * drain both the 700 frames that are queued initially, plus the next
+     * 700 frames that come in while the PHY is catching up.
+     * So in this example scenario, the PHY will remain fully utilized
+     * in a MCC system that has a channel-switching period of 56 ms or less.
+     */
+    return 700;
+}
 
 #endif /* _OL_CFG__H_ */
diff --git a/CORE/SERVICES/COMMON/ol_txrx_ctrl_api.h b/CORE/SERVICES/COMMON/ol_txrx_ctrl_api.h
index a982759b6faa..46c076739f80 100644
--- a/CORE/SERVICES/COMMON/ol_txrx_ctrl_api.h
+++ b/CORE/SERVICES/COMMON/ol_txrx_ctrl_api.h
@@ -281,7 +281,7 @@ ol_txrx_tx_release(
  *
  * @param data_vdev - the virtual device being paused
  */
-#if defined(CONFIG_HL_SUPPORT)
+#if defined(CONFIG_HL_SUPPORT) || defined(QCA_SUPPORT_TXRX_VDEV_PAUSE_LL)
 void
 ol_txrx_vdev_pause(ol_txrx_vdev_handle data_vdev);
 #else
@@ -296,7 +296,7 @@ ol_txrx_vdev_pause(ol_txrx_vdev_handle data_vdev);
  *
  * @param data_vdev - the virtual device being unpaused
  */
-#if defined(CONFIG_HL_SUPPORT)
+#if defined(CONFIG_HL_SUPPORT) || defined(QCA_SUPPORT_TXRX_VDEV_PAUSE_LL)
 void
 ol_txrx_vdev_unpause(ol_txrx_vdev_handle data_vdev);
 #else
diff --git a/Kbuild b/Kbuild
index 7314e91a5651..7977ab48763c 100755
--- a/Kbuild
+++ b/Kbuild
@@ -822,6 +822,7 @@ CDEFINES :=	-DANI_LITTLE_BYTE_ENDIAN \
 		-DWLAN_FEATURE_HOLD_RX_WAKELOCK \
 		-DWLAN_SOFTAP_VSTA_FEATURE \
 		-DWLAN_FEATURE_ROAM_SCAN_OFFLOAD \
+		-DQCA_SUPPORT_TXRX_VDEV_PAUSE_LL
 
 ifeq ($(CONFIG_QCA_WIFI_2_0), 0)
 CDEFINES +=	-DWLANTL_DEBUG