Jimmy Chen

A Programmer

(原创)基于Android 8.1之lowmemorykiller源码分析

  在使用Android的过程中,我们经常性的会按Home键返回到桌面,然后打开其他的应用进行使用,而此时前一个应用会驻留在内存中,当再次打开该应用时就可以直接显示使用。通过这种方法可以提升用户体验以及提高应用打开速度。但是系统内存是有限的,不可能一直将全部应用驻留在系统内存中,所以lowmemorykiller就诞生了。lowmemorykiller,如其名低内存杀手,作用是当内存处于低水平时,杀死系统中余留的暂时还不用的进程,以释放内存。下面我们直接分析lowmemorykiller的源码实现

lmkd启动

  lmkd是在init执行阶段作为守护进程启动的

service lmkd /system/bin/lmkd
    class core
    group root readproc
    critical
    socket lmkd seqpacket 0660 system system
    writepid /dev/cpuset/system-background/tasks

  从上面的代码中可以了解到,lmkd在启动的过程中要创建一个名为lmkd的socket。查看lmkd编译的Android.bp文件可以知道,lmkd是由system/core/lmkd/lmkd.c编译而成。下面直接分析lmkd.c文件的源码

lmkd.c源码分析

main方法

  既然lmkd是一个可执行程序,那么可以直接从main方法开始分析。

int main(int argc __unused, char **argv __unused) {
    struct sched_param param = {
            .sched_priority = 1,
    };

    // 获取并记录系统设置的属性
    medium_oomadj = property_get_int32("ro.lmk.medium", 800);
    critical_oomadj = property_get_int32("ro.lmk.critical", 0);
    debug_process_killing = property_get_bool("ro.lmk.debug", false);
    enable_pressure_upgrade = property_get_bool("ro.lmk.critical_upgrade", false);
    upgrade_pressure = (int64_t)property_get_int32("ro.lmk.upgrade_pressure", 50);
    downgrade_pressure = (int64_t)property_get_int32("ro.lmk.downgrade_pressure", 60);
    is_go_device = property_get_bool("ro.config.low_ram", false);

    mlockall(MCL_FUTURE);

    // 设置此线程的调度策略为 SCHED_FIFO,first-in-first-out,param 中主要设置 sched_priority
    // 由于 SCHED_FIFO 是一种实时调度策略,在这个策略下优先级从1(low) -> 99(high)
    // 实时线程通常会比普通线程有更高的优先级
    sched_setscheduler(0, SCHED_FIFO, ¶m);

    // 调用init进行初始化
    if (!init())
        // 进入loop循环监听socket
        mainloop();

    ALOGI("exiting");
    return 0;
}

  上面的代码主要集中在后面的两个方法,一个是init方法,另一个是mainloop方法。

init方法

static int init(void) {
    // epoll事件
    struct epoll_event epev;
    int i;
    int ret;

    page_k = sysconf(_SC_PAGESIZE);
    if (page_k == -1)
        page_k = PAGE_SIZE;
    page_k /= 1024;

    // 创建epoll文件句柄
    epollfd = epoll_create(MAX_EPOLL_EVENTS);
    if (epollfd == -1) {
        ALOGE("epoll_create failed (errno=%d)", errno);
        return -1;
    }

    // 获取lmkd socket的控制权
    ctrl_lfd = android_get_control_socket("lmkd");
    if (ctrl_lfd < 0) {
        ALOGE("get lmkd control socket failed");
        return -1;
    }

    // 监听lmkd socket
    ret = listen(ctrl_lfd, 1);
    if (ret < 0) {
        ALOGE("lmkd control socket listen failed (errno=%d)", errno);
        return -1;
    }

    // 设置epoll事件的触发方式
    epev.events = EPOLLIN;
    // 设置epoll事件的处理函数
    epev.data.ptr = (void *)ctrl_connect_handler;
    // 在epollfd中添加对lmkd socket文件句柄的监听
    if (epoll_ctl(epollfd, EPOLL_CTL_ADD, ctrl_lfd, &epev) == -1) {
        ALOGE("epoll_ctl for lmkd control socket failed (errno=%d)", errno);
        return -1;
    }
    maxevents++;

    // 判断INKERNEL_MINFREE_PATH是否有写权限,INKERNEL_MINFREE_PATH定义如下
    // #define INKERNEL_MINFREE_PATH "/sys/module/lowmemorykiller/parameters/minfree"
    has_inkernel_module = !access(INKERNEL_MINFREE_PATH, W_OK);

    // use_inkernel_interface为true
    use_inkernel_interface = has_inkernel_module && !is_go_device;

    if (use_inkernel_interface) {
        ALOGI("Using in-kernel low memory killer interface");
    } else {
        ret = init_mp_medium();
        ret |= init_mp_critical();
        if (ret)
            ALOGE("Kernel does not support memory pressure events or in-kernel low memory killer");
    }

    // 初始化procadjslot_list链表,procadjslot_list链表定义下面查看
    for (i = 0; i <= ADJTOSLOT(OOM_SCORE_ADJ_MAX); i++) {
        procadjslot_list[i].next = &procadjslot_list[i];
        procadjslot_list[i].prev = &procadjslot_list[i];
    }

    return 0;
}

  下面看看procadjslot_list的定义

// 双向链表定义
struct adjslot_list {
    struct adjslot_list *next;
    struct adjslot_list *prev;
};

// 链表数组
static struct adjslot_list procadjslot_list[ADJTOSLOT(OOM_SCORE_ADJ_MAX) + 1];

  init方法就分析到这里,下面接着看mainloop方法。

mainloop方法

static void mainloop(void) {
    // 一直循环等待
    while (1) {
        struct epoll_event events[maxevents];
        int nevents;
        int i;

        ctrl_dfd_reopened = 0;
        // 在init方法中,我们已经将lmkd socket的listen事件添加到epollfd了
        // 等待epollfd事件触发
        nevents = epoll_wait(epollfd, events, maxevents, -1);

        if (nevents == -1) {
            if (errno == EINTR)
                continue;
            ALOGE("epoll_wait failed (errno=%d)", errno);
            continue;
        }

        // 处理listen事件到来
        for (i = 0; i < nevents; ++i) {
            if (events[i].events & EPOLLERR)
                ALOGD("EPOLLERR on event #%d", i);
            if (events[i].data.ptr)
                (*(void (*)(uint32_t))events[i].data.ptr)(events[i].events);
        }
    }
}

  上面epoll_wait在等到lmkd socket的listen事件到来,然后再调用event.data.ptr方法,在init方法中,我们将event.data.ptr指向ctrl_connect_handler方法,所以这里调用的是ctrl_connect_handler方法。下面分析ctrl_connect_handler完成的内容:

ctrl_connect_handler方法

static void ctrl_connect_handler(uint32_t events __unused) {
    // 这里创建另一个epoll事件
    struct epoll_event epev;

    if (ctrl_dfd >= 0) {
        ctrl_data_close();
        ctrl_dfd_reopened = 1;
    }

    // 接受lmkd socket连接请求
    ctrl_dfd = accept(ctrl_lfd, NULL, NULL);

    if (ctrl_dfd < 0) {
        ALOGE("lmkd control socket accept failed; errno=%d", errno);
        return;
    }

    ALOGI("ActivityManager connected");
    maxevents++;
    // 设置epoll监听事件
    epev.events = EPOLLIN;
    // 设置epoll处理函数
    epev.data.ptr = (void *)ctrl_data_handler;
    // 将accept后的socket套接字添加到epollfd中进行监听
    if (epoll_ctl(epollfd, EPOLL_CTL_ADD, ctrl_dfd, &epev) == -1) {
        ALOGE("epoll_ctl for data connection socket failed; errno=%d", errno);
        ctrl_data_close();
        return;
    }
}

  ctrl_connect_handler方法在处理lmkd socket的listen事件时,会像epoll创建另一个epoll事件,用于处理lmkd socket的accept事件,accept事件的处理方法为ctrl_data_handler,下面接着分析ctrl_data_handler方法

ctrl_data_handler方法

static void ctrl_data_handler(uint32_t events) {
    if (events & EPOLLHUP) {
        ALOGI("ActivityManager disconnected");
        if (!ctrl_dfd_reopened)
            ctrl_data_close();
    } else if (events & EPOLLIN) {
        // 处理EPOLLIN事件,即文件句柄的读事件
        ctrl_command_handler();
    }
}

static void ctrl_command_handler(void) {
    // 读取数据buffer
    int ibuf[CTRL_PACKET_MAX / sizeof(int)];
    int len;
    int cmd = -1;
    int nargs;
    int targets;

    // 读取数据到ibuf中
    len = ctrl_data_read((char *)ibuf, CTRL_PACKET_MAX);
    if (len <= 0)
        return;

    // 获取数据长度
    nargs = len / sizeof(int) - 1;
    if (nargs < 0)
        goto wronglen;

    // 解析出数据中的command字段
    cmd = ntohl(ibuf[0]);

    // 根据不同长度command字段,调用不同的数据处理方法
    // 不同command字段代表的意思请看代码后的文字解析
    switch(cmd) {
    case LMK_TARGET:
        // 判断后续参数个数是否正确
        targets = nargs / 2;
        if (nargs & 0x1 || targets > (int)ARRAY_SIZE(lowmem_adj))
            goto wronglen;
        // 调用cmd_target处理LMK_TARGET command
        cmd_target(targets, &ibuf[1]);
        break;
    case LMK_PROCPRIO:
        if (nargs != 3)
            goto wronglen;
        // 调用cmd_procprio处理LMK_PROCPRIO command
        cmd_procprio(ntohl(ibuf[1]), ntohl(ibuf[2]), ntohl(ibuf[3]));
        break;
    case LMK_PROCREMOVE:
        if (nargs != 1)
            goto wronglen;
        // 调用cmd_procremove处理LMK_PROCREMOVE command
        cmd_procremove(ntohl(ibuf[1]));
        break;
    default:
        ALOGE("Received unknown command code %d", cmd);
        return;
    }

    return;

wronglen:
    ALOGE("Wrong control socket read length cmd=%d len=%d", cmd, len);
}

  ctrl_command_handler能够处理三种类型的command,command在代码中用enum列出

enum lmk_cmd {
    // 用于更新系统oom_adj,framework发出该command的方法是PorcessList.updateOomLevels()
    LMK_TARGET,
    // 用于更新进程adj,framework发出该command的方法是PorcessList.setOomAdj()
    LMK_PROCPRIO,
    // 用于移除进程,framework发出该command的方法是PorcessList.remove()
    LMK_PROCREMOVE,
};

  上面三种command后续所带的参数使用如下

LMK_TARGET <minfree> <minkillprio> ... (up to 6 pairs)
LMK_PROCPRIO <pid> <uid> <prio>
LMK_PROCREMOVE <pid>

cmd_target方法

static void cmd_target(int ntargets, int *params) {
    int i;

    if (ntargets > (int)ARRAY_SIZE(lowmem_adj))
        return;

    // 将framework传过来的参数保存到lowmem_minfree和lowmem_adj数组中
    for (i = 0; i < ntargets; i++) {
        lowmem_minfree[i] = ntohl(*params++);
        lowmem_adj[i] = ntohl(*params++);
    }

    lowmem_targets_size = ntargets;

    if (has_inkernel_module) {
        char minfreestr[128];
        char killpriostr[128];

        minfreestr[0] = '\0';
        killpriostr[0] = '\0';

        // 根据根据lowmem_minfree和lowmem_adj中的数值,构造出数值字符串
        // 例如:14746,18432,22118,25805,55000,70000 这样的一串数值
        for (i = 0; i < lowmem_targets_size; i++) {
            char val[40];

            if (i) {
                strlcat(minfreestr, ",", sizeof(minfreestr));
                strlcat(killpriostr, ",", sizeof(killpriostr));
            }

            snprintf(val, sizeof(val), "%d", use_inkernel_interface ? lowmem_minfree[i] : 0);
            strlcat(minfreestr, val, sizeof(minfreestr));
            snprintf(val, sizeof(val), "%d", use_inkernel_interface ? lowmem_adj[i] : 0);
            strlcat(killpriostr, val, sizeof(killpriostr));
        }

        // 将上面循环中构造出来的字符串数值写入到/sys/module/lowmemorykiller/parameters/minfree
        writefilestring(INKERNEL_MINFREE_PATH, minfreestr);
        // 将上面循环中构造出来的另一字符串数值写入到/sys/module/lowmemorykiller/parameters/adj
        writefilestring(INKERNEL_ADJ_PATH, killpriostr);
    }
}

  所以cmd_target要做的内容很简单也很好理解,就是讲framework传过来的数值记录到数组中,然后将这两个数组组织成字符串输入,然后写入到内核对应的位置即可。接下来分析另一个command的处理方法cmd_procprio

cmd_procprio方法

static void cmd_procprio(int pid, int uid, int oomadj) {
    struct proc *procp;
    char path[80];
    char val[20];
    int soft_limit_mult;

    // 判断oomadj值是否在规定值内
    if (oomadj < OOM_SCORE_ADJ_MIN || oomadj > OOM_SCORE_ADJ_MAX) {
        ALOGE("Invalid PROCPRIO oomadj argument %d", oomadj);
        return;
    }

    // 根据进程的pid够着对应路径
    snprintf(path, sizeof(path), "/proc/%d/oom_score_adj", pid);
    snprintf(val, sizeof(val), "%d", oomadj);
    // 将oomadj值写入对应进程的oom_score_adj值中
    writefilestring(path, val);

    // 使用内核接口就直接返回
    // 这里,use_inkernel_interface在初始化的时候确实设置为1了,所以这里直接return
    if (use_inkernel_interface)
        return;

    // 这下面的估计是使用另一个设置方式进行设置的
    if (oomadj >= 900) {
        soft_limit_mult = 0;
    } else if (oomadj >= 800) {
        soft_limit_mult = 0;
    } else if (oomadj >= 700) {
        soft_limit_mult = 0;
    } else if (oomadj >= 600) {
        // Launcher should be perceptible, don't kill it.
        oomadj = 200;
        soft_limit_mult = 1;
    } else if (oomadj >= 500) {
        soft_limit_mult = 0;
    } else if (oomadj >= 400) {
        soft_limit_mult = 0;
    } else if (oomadj >= 300) {
        soft_limit_mult = 1;
    } else if (oomadj >= 200) {
        soft_limit_mult = 2;
    } else if (oomadj >= 100) {
        soft_limit_mult = 10;
    } else if (oomadj >=   0) {
        soft_limit_mult = 20;
    } else {
        // Persistent processes will have a large
        // soft limit 512MB.
        soft_limit_mult = 64;
    }

    // 根据uid和pid设置对应的路径
    snprintf(path, sizeof(path), "/dev/memcg/apps/uid_%d/pid_%d/memory.soft_limit_in_bytes", uid, pid);
    // 将上面获取到的oomadj转化为字符串格式
    snprintf(val, sizeof(val), "%d", soft_limit_mult * EIGHT_MEGA);
    // 将对应的oomadj值写入到对应路劲中
    writefilestring(path, val);

    // 判断是否有记录到对应的proc结构
    procp = pid_lookup(pid);
    if (!procp) {
            // 如果没有对应的proc结果,表明该进程是新创建的,需要新分配一个结构proc结构记录该进程
            procp = malloc(sizeof(struct proc));
            if (!procp) {
                // Oh, the irony.  May need to rebuild our state.
                return;
            }

            procp->pid = pid;
            procp->uid = uid;
            procp->oomadj = oomadj;
            proc_insert(procp);
    } else {
        // 如果之前已经有proc记录,那么就更新对应的数值
        proc_unslot(procp);
        procp->oomadj = oomadj;
        proc_slot(procp);
    }
}

  因为我们现在使用的是内核接口,所以只需将oomadj数值写入到/proc/[pid]/oom_score_adj中即可起到更新进程的oomadj的效果。接下来分析最后一个command处理方法cmd_procremove

cmd_procremove方法

static void cmd_procremove(int pid) {
    // 如果使用内核接口直接返回
    if (use_inkernel_interface)
        return;

    pid_remove(pid);
}

// 如果不是使用内核接口的话,需要更新链表的信息,并删除proc结构占用的内存
static int pid_remove(int pid) {
    int hval = pid_hashfn(pid);
    struct proc *procp;
    struct proc *prevp;

    for (procp = pidhash[hval], prevp = NULL; procp && procp->pid != pid;
         procp = procp->pidhash_next)
            prevp = procp;

    if (!procp)
        return -1;

    if (!prevp)
        pidhash[hval] = procp->pidhash_next;
    else
        prevp->pidhash_next = procp->pidhash_next;

    proc_unslot(procp);
    free(procp);
    return 0;
}

  如果使用内核接口,这个方法最是简单了,什么都不用做。OK,系统侧lmkd对应源码到这里就分析完了。看着还是挺简单的,至于framework层我们这里只分析向底层发送者三种command部分的代码,具体的调用待后面有机会遇到再分析好了。

ProgressList.java lmkd部分源码分析

updateOomLevels方法分析

private void updateOomLevels(int displayWidth, int displayHeight, boolean write) {
    // 计算memory的scale值
    float scaleMem = ((float)(mTotalMemMb-350))/(700-350);

    // 计算显示屏的scale值
    int minSize = 480*800;  //  384000
    int maxSize = 1280*800; // 1024000  230400 870400  .264
    float scaleDisp = ((float)(displayWidth*displayHeight)-minSize)/(maxSize-minSize);
    if (false) {
        Slog.i("XXXXXX", "scaleMem=" + scaleMem);
        Slog.i("XXXXXX", "scaleDisp=" + scaleDisp + " dw=" + displayWidth
                + " dh=" + displayHeight);
    }

    // 判断memory和显示屏的scale值哪个大,取较大值
    float scale = scaleMem > scaleDisp ? scaleMem : scaleDisp;
    // 以2G RAM为例,可以判断这个scale值为1了
    if (scale < 0) scale = 0;
    else if (scale > 1) scale = 1;
    int minfree_adj = Resources.getSystem().getInteger(
            com.android.internal.R.integer.config_lowMemoryKillerMinFreeKbytesAdjust);
    int minfree_abs = Resources.getSystem().getInteger(
            com.android.internal.R.integer.config_lowMemoryKillerMinFreeKbytesAbsolute);
    if (false) {
        Slog.i("XXXXXX", "minfree_adj=" + minfree_adj + " minfree_abs=" + minfree_abs);
    }

    // 判断是否为64位系统
    final boolean is64bit = Build.SUPPORTED_64_BIT_ABIS.length > 0;

    // 根据mOomMinFreeLow、mOomMinFreeHigh和scale值填充mOomMinFree数组
    for (int i=0; i < mOomAdj.length; i++) {
        int low = mOomMinFreeLow[i];
        int high = mOomMinFreeHigh[i];
        // 如果是64位系统,第四和第五级的数值会稍大一点
        if (is64bit) {
            // Increase the high min-free levels for cached processes for 64-bit
            if (i == 4) high = (high*3)/2;
            else if (i == 5) high = (high*7)/4;
        }
        // scale为1,所以直接等high值了
        mOomMinFree[i] = (int)(low + ((high-low)*scale));
    }

    if (minfree_abs >= 0) {
        for (int i=0; i < mOomAdj.length; i++) {
            mOomMinFree[i] = (int)((float)minfree_abs * mOomMinFree[i]
                    / mOomMinFree[mOomAdj.length - 1]);
        }
    }

    if (minfree_adj != 0) {
        for (int i=0; i < mOomAdj.length; i++) {
            mOomMinFree[i] += (int)((float)minfree_adj * mOomMinFree[i]
                    / mOomMinFree[mOomAdj.length - 1]);
            if (mOomMinFree[i] < 0) {
                mOomMinFree[i] = 0;
            }
        }
    }

    mCachedRestoreLevel = (getMemLevel(ProcessList.CACHED_APP_MAX_ADJ)/1024) / 3;

    int reserve = displayWidth * displayHeight * 4 * 3 / 1024;
    int reserve_adj = Resources.getSystem().getInteger(com.android.internal.R.integer.config_extraFreeKbytesAdjust);
    int reserve_abs = Resources.getSystem().getInteger(com.android.internal.R.integer.config_extraFreeKbytesAbsolute);

    if (reserve_abs >= 0) {
        reserve = reserve_abs;
    }

    if (reserve_adj != 0) {
        reserve += reserve_adj;
        if (reserve < 0) {
            reserve = 0;
        }
    }

    // 如果需要写入这调用writeLmkd将buf写入到lowmemorykiller中
    if (write) {
        ByteBuffer buf = ByteBuffer.allocate(4 * (2*mOomAdj.length + 1));
        buf.putInt(LMK_TARGET);
        for (int i=0; i < mOomAdj.length; i++) {
            buf.putInt((mOomMinFree[i]*1024)/PAGE_SIZE);
            buf.putInt(mOomAdj[i]);
        }

        writeLmkd(buf);
        SystemProperties.set("sys.sysctl.extra_free_kbytes", Integer.toString(reserve));
    }
    // GB: 2048,3072,4096,6144,7168,8192
    // HC: 8192,10240,12288,14336,16384,20480
}

  updateOomLevels方法前面只是简单的计算出oomMinFree数组的值和oomAdj值,然后通过writeLmkd将数据发送给lmkd。下面看看writeLmkd方法

writeLmkd方法

private static void writeLmkd(ByteBuffer buf) {

    // 尝试打开lmkd socket端口
    for (int i = 0; i < 3; i++) {
        if (sLmkdSocket == null) {
                if (openLmkdSocket() == false) {
                    try {
                        Thread.sleep(1000);
                    } catch (InterruptedException ie) {
                    }
                    continue;
                }
        }

        try {
            // 将数据写到socket端口中
            sLmkdOutputStream.write(buf.array(), 0, buf.position());
            return;
        } catch (IOException ex) {
            Slog.w(TAG, "Error writing to lowmemorykiller socket");

            try {
                sLmkdSocket.close();
            } catch (IOException ex2) {
            }

            sLmkdSocket = null;
        }
    }
}

private static boolean openLmkdSocket() {
    try {
        // 创建本地socket句柄
        sLmkdSocket = new LocalSocket(LocalSocket.SOCKET_SEQPACKET);
        // 连接本地名为lmkd的本地socket
        sLmkdSocket.connect(
            new LocalSocketAddress("lmkd",
                    LocalSocketAddress.Namespace.RESERVED));
        // 获取lmkd socket的输出流
        sLmkdOutputStream = sLmkdSocket.getOutputStream();
    } catch (IOException ex) {
        Slog.w(TAG, "lowmemorykiller daemon socket open failed");
        sLmkdSocket = null;
        return false;
    }

    return true;
}

  LMK_PROCREMOVE和LMK_PROCPRIO这两个command的处理代码比较简单,这里博主就不多做分析了。

内核层lowmemorykiller驱动分析

  上面所讲的代码只是讲述对lowmemorykiller中adj、minfree或者应用的adj进行设置,并没有涉及到具体的kill应用过程。而真正kill应用触发就是在内核层的lowmemorykiller驱动中。所以lowmemorykiller驱动的源码我们还是要继续坚持分析的。

  lowmemorykiller是内核的一个驱动,驱动注册的时候都会触发器init调用,所以我们可以以init方法调用作为突破口来分析这个驱动的实现和功能

init方法调用

static int __init lowmem_init(void)
{
    register_shrinker(&lowmem_shrinker);
    vmpressure_notifier_register(&lmk_vmpr_nb);
    return 0;
}

  init方法中注册了一个shrinker到内核的shrinker链表。当内存不足时 kswapd 线程会遍历一张 shrinker 链表,并回调已注册的 shrinker 函数来回收内存 page,kswapd 还会周期性唤醒来执行内存操作。每个 zone 维护 active_list 和 inactive_list 链表,内核根据页面活动状态将 page 在这两个链表之间移动,最终通过 shrink_slab 和 shrink_zone 来回收内存页,有兴趣想进一步了解 linux 内存回收机制,可自行研究。

  接着我们查看lowmem_shrinker结构中包含哪些方法

static struct shrinker lowmem_shrinker = {
    .scan_objects = lowmem_scan,
    .count_objects = lowmem_count,
    .seeks = DEFAULT_SEEKS * 16
};

lowmem_scan方法

  当系统内存不足时会回调lowmem_scan方法来kill应用以达到释放内存的效果。

static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc)
{
    struct task_struct *tsk;
    struct task_struct *selected = NULL;
    unsigned long rem = 0;
    int tasksize;
    int i;
    int ret = 0;
    short min_score_adj = OOM_SCORE_ADJ_MAX + 1;
    int minfree = 0;
    int selected_tasksize = 0;
    short selected_oom_score_adj;
    int array_size = ARRAY_SIZE(lowmem_adj);
    int other_free;
    int other_file;

    if (mutex_lock_interruptible(&scan_mutex) < 0)
        return 0;

    // 获取剩余内存大小
    other_free = global_page_state(NR_FREE_PAGES);

    if (global_page_state(NR_SHMEM) + total_swapcache_pages() <
        global_page_state(NR_FILE_PAGES) + zcache_pages())
        other_file = global_page_state(NR_FILE_PAGES) + zcache_pages() -
                        global_page_state(NR_SHMEM) -
                        global_page_state(NR_UNEVICTABLE) -
                        total_swapcache_pages();
    else
        other_file = 0;

    tune_lmk_param(&other_free, &other_file, sc);

    // 获取数组大小
    if (lowmem_adj_size < array_size)
        array_size = lowmem_adj_size;
    if (lowmem_minfree_size < array_size)
        array_size = lowmem_minfree_size;

    // 获取当前内存阈值对应的adj值
    for (i = 0; i < array_size; i++) {
        minfree = lowmem_minfree[i];
        if (other_free < minfree && other_file < minfree) {
            min_score_adj = lowmem_adj[i];
            break;
        }
    }

    // 检查是否允许调整最低adj score值
    ret = adjust_minadj(&min_score_adj);

    lowmem_print(3, "lowmem_scan %lu, %x, ofree %d %d, ma %hd\n",
            sc->nr_to_scan, sc->gfp_mask, other_free,
            other_file, min_score_adj);

    // 如果当前min_score_adj为最大adj值加1,表明还剩余足够内存,不需要进行内存释放
    if (min_score_adj == OOM_SCORE_ADJ_MAX + 1) {
        trace_almk_shrink(0, ret, other_free, other_file, 0);
        lowmem_print(5, "lowmem_scan %lu, %x, return 0\n",
                 sc->nr_to_scan, sc->gfp_mask);
        mutex_unlock(&scan_mutex);
        return 0;
    }

    selected_oom_score_adj = min_score_adj;

    rcu_read_lock();
    // 遍历所有的进程task
    for_each_process(tsk) {
        struct task_struct *p;
        short oom_score_adj;

        if (tsk->flags & PF_KTHREAD)
            continue;

        // 如果进程已经不包含任何memory了,跳过
        if (test_task_flag(tsk, TIF_MM_RELEASED))
            continue;

        if (time_before_eq(jiffies, lowmem_deathpending_timeout)) {
            if (test_task_flag(tsk, TIF_MEMDIE)) {
                rcu_read_unlock();
                /* give the system time to free up the memory */
                msleep_interruptible(20);
                mutex_unlock(&scan_mutex);
                return 0;
            }
        }

        p = find_lock_task_mm(tsk);
        if (!p)
            continue;

        // 如果当前进程的oom_score_adj比当前内存阈值的adj值还小,表明当前进程不应该被杀,跳过
        oom_score_adj = p->signal->oom_score_adj;
        if (oom_score_adj < min_score_adj) {
            task_unlock(p);
            continue;
        }
        // 如果当前进程的oom_score_adj比当前内存阈值的adj值大,进入进程被杀候选
        // 首先获取当前进程占用的内存大小
        tasksize = get_mm_rss(p->mm);
        task_unlock(p);
        if (tasksize <= 0)
            continue;
        // 如果已经有经常被挑选为被杀进程
        if (selected) {
            // 如果当前进程的oom_score_adj值比之前挑选的进程的oom_score_adj值小
            // 代表当前进程重要程度比被选中要杀的进程高,则跳过
            if (oom_score_adj < selected_oom_score_adj)
                continue;
            // 如果oom_score_adj值一样大,但是进程占用内存比被选中要杀的进程小也跳过
            if (oom_score_adj == selected_oom_score_adj &&
                tasksize <= selected_tasksize)
                continue;
        }
        // 以上判断都不符合,表明当前进程的oom_score_adj值比被挑选要杀的进程的oom_score_adj大
        // 则更新被挑选要杀进程为当前遍历的进程,并记录相关信息后进入下一轮循环
        selected = p;
        selected_tasksize = tasksize;
        selected_oom_score_adj = oom_score_adj;
        lowmem_print(3, "select '%s' (%d), adj %hd, size %d, to kill\n",
                 p->comm, p->pid, oom_score_adj, tasksize);
    }
    // 如果找到需要被杀进程
    if (selected) {
        // 计算能够释放的内存大小
        long cache_size = other_file * (long)(PAGE_SIZE / 1024);
        long cache_limit = minfree * (long)(PAGE_SIZE / 1024);
        long free = other_free * (long)(PAGE_SIZE / 1024);
        trace_lowmemory_kill(selected, cache_size, cache_limit, free);
        // 打印log信息
        lowmem_print(1, "Killing '%s' (%d), adj %hd,\n" \
                "   to free %ldkB on behalf of '%s' (%d) because\n" \
                "   cache %ldkB is below limit %ldkB for oom_score_adj %hd\n" \
                "   Free memory is %ldkB above reserved.\n" \
                "   Free CMA is %ldkB\n" \
                "   Total reserve is %ldkB\n" \
                "   Total free pages is %ldkB\n" \
                "   Total file cache is %ldkB\n" \
                "   Total zcache is %ldkB\n" \
                "   GFP mask is 0x%x\n",
                 selected->comm, selected->pid,
                 selected_oom_score_adj,
                 selected_tasksize * (long)(PAGE_SIZE / 1024),
                 current->comm, current->pid,
                 cache_size, cache_limit,
                 min_score_adj,
                 other_free * (long)(PAGE_SIZE / 1024),
                 global_page_state(NR_FREE_CMA_PAGES) *
                (long)(PAGE_SIZE / 1024),
                 totalreserve_pages * (long)(PAGE_SIZE / 1024),
                 global_page_state(NR_FREE_PAGES) *
                (long)(PAGE_SIZE / 1024),
                 global_page_state(NR_FILE_PAGES) *
                (long)(PAGE_SIZE / 1024),
                 (long)zcache_pages() * (long)(PAGE_SIZE / 1024),
                 sc->gfp_mask);

        if (lowmem_debug_level >= 2 && selected_oom_score_adj == 0) {
            show_mem(SHOW_MEM_FILTER_NODES);
            dump_tasks(NULL, NULL);
        }

        lowmem_deathpending_timeout = jiffies + HZ;
        set_tsk_thread_flag(selected, TIF_MEMDIE);
        // 发送SIGKILL信号到被选中进程
        send_sig(SIGKILL, selected, 0);
        // 统计释放内存大小
        rem += selected_tasksize;
        rcu_read_unlock();
        /* give the system time to free up the memory */
        msleep_interruptible(20);
        trace_almk_shrink(selected_tasksize, ret,
            other_free, other_file, selected_oom_score_adj);
    } else {
        trace_almk_shrink(1, ret, other_free, other_file, 0);
        rcu_read_unlock();
    }

    lowmem_print(4, "lowmem_scan %lu, %x, return %lu\n",
             sc->nr_to_scan, sc->gfp_mask, rem);
    mutex_unlock(&scan_mutex);
    return rem;
}

  这段代码的主要思想就是首先获取当前内存剩余量,根据剩余量获取都对应的min free adj值;接着遍历当前系统中的所有进程,从中挑选出进程的oom adj最大者,如果存在进程oom adj值相同,则挑选出其中占用内存最大的那个进程;最后向这个进程发送SIGKILL信息,以达到杀死该进程释放内存的效果。这里博主有个思考,如果系统的进程非常多,如果内存不足,在进行lowmemorykiller时每次都需要遍历一遍,那效率岂不是很低,这里有没有优化的方法呢?留待给大家思考,有好的想法也可以留言交流。接下来就剩下最后一个方法了。

lowmem_count方法

static unsigned long lowmem_count(struct shrinker *s,
                  struct shrink_control *sc)
{
    if (!enable_lmk)
        return 0;

    return global_page_state(NR_ACTIVE_ANON) +
        global_page_state(NR_ACTIVE_FILE) +
        global_page_state(NR_INACTIVE_ANON) +
        global_page_state(NR_INACTIVE_FILE);
}

  也很简单,这里只是简单统计各部分占用的内存大小,然后将其返回。

  好了,这一篇就到这里了。内容多了一点点,但是都是比较好理解的。

发表评论

电子邮件地址不会被公开。 必填项已用*标注

This site uses Akismet to reduce spam. Learn how your comment data is processed.

%d 博主赞过: