社区微信群开通啦,扫一扫抢先加入社区官方微信群
社区微信群
// 定义pacing_tasklet:
/* include/net/tcp.h */
struct pacing_tasklet {
struct tasklet_struct tasklet;
struct list_head head; /* queue of tcp sockets */
};
extern struct pacing_tasklet pacing_tasklet;
/* net/ipv4/tcp_output.c */
// 定义per cpu的tasklet变量
DEFINE_PER_CPU(struct pacing_tasklet, pacing_tasklet);
// 独立出来的handler,仅仅为了与tasklet的action分离,使其不至于太长
static void tcp_pacing_handler(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
if(!sysctl_tcp_pacing || !tp->pacing.pacing)
return ;
if (sock_owned_by_user(sk)) {
if (!test_and_set_bit(TCP_PACING_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
sock_hold(sk);
goto out;
}
if (sk->sk_state == TCP_CLOSE)
goto out;
if(!sk->sk_send_head){
goto out;
}
tcp_push_pending_frames(sk);
out:
if (tcp_memory_pressure)
sk_mem_reclaim(sk);
}
// pacing tasklet的action函数
static void tcp_pacing_func(unsigned long data)
{
struct pacing_tasklet *pacing = (struct pacing_tasklet *)data;
LIST_HEAD(list);
unsigned long flags;
struct list_head *q, *n;
struct tcp_sock *tp;
struct sock *sk;
local_irq_save(flags);
list_splice_init(&pacing->head, &list);
local_irq_restore(flags);
list_for_each_safe(q, n, &list) {
tp = list_entry(q, struct tcp_sock, pacing_node);
list_del(&tp->pacing_node);
sk = (struct sock *)tp;
bh_lock_sock(sk);
tcp_pacing_handler(sk);
bh_unlock_sock(sk);
clear_bit(PACING_QUEUED, &tp->tsq_flags);
}
}
// 初始化pacing tasklet(完全学着tsq的样子来做)
void __init tcp_tasklet_init(void)
{
int i,j;
struct sock *sk;
local_irq_save(flags);
list_splice_init(&pacing->head, &list);
local_irq_restore(flags);
list_for_each_safe(q, n, &list) {
tp = list_entry(q, struct tcp_sock, pacing_node);
list_del(&tp->pacing_node);
sk = (struct sock *)tp;
bh_lock_sock(sk);
tcp_pacing_handler(sk);
bh_unlock_sock(sk);
clear_bit(PACING_QUEUED, &tp->tsq_flags);
}
}
/* net/ipv4/tcp_timer.c */
// 重置hrtimer定时器
void tcp_pacing_reset_timer(struct sock *sk, u64 expires)
{
struct tcp_sock *tp = tcp_sk(sk);
u32 timeout = nsecs_to_jiffies(expires);
if(!sysctl_tcp_pacing || !tp->pacing.pacing)
return;
hrtimer_start(&sk->timer,
ns_to_ktime(expires),
HRTIMER_MODE_ABS_PINNED);
}
// hrtimer的超时回调
static enum hrtimer_restart tcp_pacing_timer(struct hrtimer *timer)
{
struct sock *sk = container_of(timer, struct sock, timer);
struct tcp_sock *tp = tcp_sk(sk);
if (!test_and_set_bit(PACING_QUEUED, &tp->tsq_flags)) {
unsigned long flags;
struct pacing_tasklet *pacing;
// 仅仅调度起tasklet,而不是执行action!
local_irq_save(flags);
pacing = this_cpu_ptr(&pacing_tasklet);
list_add(&tp->pacing_node, &pacing->head);
tasklet_schedule(&pacing->tasklet);
local_irq_restore(flags);
}
return HRTIMER_NORESTART;
}
// 初始化
void tcp_init_xmit_timers(struct sock *sk)
{
inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
&tcp_keepalive_timer);
hrtimer_init(&sk->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
sk->timer.function = &tcp_pacing_timer;
}
3.tcp_write_xmit中的判断:/* net/ipv4/tcp_output.c */
static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
int push_one, gfp_t gfp)
{
...
while ((skb = tcp_send_head(sk))) {
unsigned int limit;
u64 now = ktime_get_ns();
...
cwnd_quota = tcp_cwnd_test(tp, skb);
if (!cwnd_quota) {
if (push_one == 2)
/* Force out a loss probe pkt. */
cwnd_quota = 1;
else if(tp->pacing.pacing == 0) // 这里是个创举,既然pacing rate就是由cwnd算出来,检查了pacing rate就不必再检测cwnd了,但是在bbr算法中要慎重,因为bbr的pacing rate真不是由cwnd算出来的,恰恰相反,cwnd是由pacing算出来的!
break;
}
// 通告窗口与网络拥塞无关,还是要检测的。
if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
break;
// 这里的逻辑与普通timer版的一样!
if (sysctl_tcp_pacing && tp->pacing.pacing == 1) {
u32 plen;
u64 rate, len;
if (now < tp->pacing.next_to_send) {
tcp_pacing_reset_timer(sk, tp->pacing.next_to_send);
break;
}
rate = sysctl_tcp_rate ? sysctl_tcp_rate:sk->sk_pacing_rate;
plen = skb->len + MAX_HEADER;
len = (u64)plen * NSEC_PER_SEC;
if (rate)
do_div(len, rate);
tp->pacing.next_to_send = now + len;
if (cwnd_quota == 0)
cwnd_quota = 1;
}
if (tso_segs == 1) {
...
}
/* net/ipv4/tcp_output.c */
void tcp_release_cb(struct sock *sk)
{
...
if (flags & (1UL << TCP_PACING_TIMER_DEFERRED)) {
if(sk->sk_send_head) {
tcp_push_pending_frames(sk);
}
__sock_put(sk);
}
...
}
首先看标准cubic算法的曲线:
CTMB,垃圾!都他妈的是垃圾!
其吞吐量曲线如下图所示:
然后再看我的pacing曲线:
然后再看看吞吐量的图!我虽然没有上过大学,其实我也是不屑于大学的,我的圈子里,都是硕博连读的,好久不回一次国,而我,不知本科为何?!那么看看结果吧:
-------------------------------------
最后看看我最初的愿景。如果觉得我的文章对您有用,请随意打赏。你的支持将鼓励我继续创作!