安卓内核内存回收
背景
一般上层由于业务需要,需要内核提供一些定制的内存回收接口。或者内核层本来就想做内存机制的优化。便需要在原有内存回收机制上做一些hook操作。所以了解Linux内核内存回收流程很重要
本章节主要讲kswapd线程,当内存低watermark时,kswapd会被唤醒并开始工作。
内存回收步骤
- kswapd初始化
1、设置每次swap的page数
2、创建kswapd线程,多个numa节点对应多个线程。 - 执行kswapd()
1、进入for死循环
2、整理内存碎片后,进入睡眠。
3、被唤醒后,执行回收
内存回收详细步骤
- kswapd初始化
static int __init kswapd_init(void) {swap_setup();//设置page_cluster,作用是确定每次swap in/out多少page(2^page_cluster)for_each_node_state(nid, N_MEMORY)//遍历所有numa节点kswapd_run(nid);//为每个numa节点创建kswapd线程
}
void kswapd_run(int nid) { //为每个节点id 创建kswapd线程pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid);
}
- 执行kswapd()
// kswapd整个生命都在这儿
static int kswapd(void *p) {pg_data_t *pgdat = (pg_data_t *)p; //每个numa节点,通过pg_data_t描述物理内存布局for ( ; ; ) {//判断kswapd是否进入睡眠,并让出cpu。kswapd_try_to_sleep(pgdat, alloc_order, reclaim_order, highest_zoneidx);// 回收pagereclaim_order = balance_pgdat(pgdat, alloc_order, highest_zoneidx);}
}
// 尝试睡眠
static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_order, unsigned int highest_zoneidx) {//将kswapd线程加入此内存节点的wait队列prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);//小睡一会,目的是唤醒 内存压缩线程,整理内存碎片if (prepare_kswapd_sleep(pgdat, reclaim_order, highest_zoneidx)) {wakeup_kcompactd(pgdat, alloc_order, highest_zoneidx);//唤醒内存压缩线程remaining = schedule_timeout(HZ/10);//小睡一会finish_wait(&pgdat->kswapd_wait, &wait);prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);}if (!remaining && prepare_kswapd_sleep(pgdat, reclaim_order, highest_zoneidx)) {if (!kthread_should_stop())schedule();// 正式睡眠,让出cpu}finish_wait(&pgdat->kswapd_wait, &wait);
}
// 动手回收page
static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx) {struct scan_control sc = {.gfp_mask = GFP_KERNEL,.order = order,.may_unmap = 1,}; //page回收控制do {if (kswapd_shrink_node(pgdat, &sc)) //回收核心函数raise_priority = false;} while (sc.priority >= 1);
}
static bool kswapd_shrink_node(pg_data_t *pgdat, struct scan_control *sc) {sc->nr_to_reclaim = 0;for (z = 0; z <= sc->reclaim_idx; z++) {zone = pgdat->node_zones + z;if (!managed_zone(zone))continue;sc->nr_to_reclaim += max(high_wmark_pages(zone), SWAP_CLUSTER_MAX);//计算期望回收的page数}shrink_node(pgdat, sc);//对内存节点进行回收return sc->nr_scanned >= sc->nr_to_reclaim;
}
static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) {
again:shrink_node_memcgs(pgdat, sc);//往下调用实质性回收函数if (reclaim_state) {sc->nr_reclaimed += reclaim_state->reclaimed_slab;//将slab部分,算进已回收reclaim_state->reclaimed_slab = 0;}if (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed, sc))//回收还不满足要求,继续goto again;
}
static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
{do {shrink_lruvec(lruvec, sc);//lru 相关回收shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, sc->priority);// slab相关回收} while ((memcg = mem_cgroup_iter(target_memcg, memcg, NULL)));
}
源码
参考kernel 6.1