lmkd: Use process_mrelease to reap the target process from a thread am: 7c3addb2a1
Original change: https://android-review.googlesource.com/c/platform/system/memory/lmkd/+/1904583 Change-Id: I21a13a46890d2eee9ddd66cbeaec1c8f3e9317f4
This commit is contained in:
commit
c4067fc9f2
|
|
@ -12,7 +12,10 @@ cc_defaults {
|
|||
cc_binary {
|
||||
name: "lmkd",
|
||||
|
||||
srcs: ["lmkd.cpp"],
|
||||
srcs: [
|
||||
"lmkd.cpp",
|
||||
"reaper.cpp",
|
||||
],
|
||||
shared_libs: [
|
||||
"libcutils",
|
||||
"liblog",
|
||||
|
|
|
|||
145
lmkd.cpp
145
lmkd.cpp
|
|
@ -16,12 +16,10 @@
|
|||
|
||||
#define LOG_TAG "lowmemorykiller"
|
||||
|
||||
#include <dirent.h>
|
||||
#include <errno.h>
|
||||
#include <inttypes.h>
|
||||
#include <pwd.h>
|
||||
#include <sched.h>
|
||||
#include <signal.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
|
@ -30,12 +28,9 @@
|
|||
#include <sys/eventfd.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/pidfd.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/sysinfo.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/types.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
|
||||
|
|
@ -47,10 +42,9 @@
|
|||
#include <log/log_event_list.h>
|
||||
#include <log/log_time.h>
|
||||
#include <private/android_filesystem_config.h>
|
||||
#include <processgroup/processgroup.h>
|
||||
#include <psi/psi.h>
|
||||
#include <system/thread_defs.h>
|
||||
|
||||
#include "reaper.h"
|
||||
#include "statslog.h"
|
||||
|
||||
#define BPF_FD_JUST_USE_INT
|
||||
|
|
@ -228,6 +222,8 @@ static struct psi_threshold psi_thresholds[VMPRESS_LEVEL_COUNT] = {
|
|||
};
|
||||
|
||||
static android_log_context ctx;
|
||||
static Reaper reaper;
|
||||
static int reaper_comm_fd[2];
|
||||
|
||||
enum polling_update {
|
||||
POLLING_DO_NOT_CHANGE,
|
||||
|
|
@ -277,9 +273,9 @@ static struct event_handler_info vmpressure_hinfo[VMPRESS_LEVEL_COUNT];
|
|||
|
||||
/*
|
||||
* 1 ctrl listen socket, 3 ctrl data socket, 3 memory pressure levels,
|
||||
* 1 lmk events + 1 fd to wait for process death
|
||||
* 1 lmk events + 1 fd to wait for process death + 1 fd to receive kill failure notifications
|
||||
*/
|
||||
#define MAX_EPOLL_EVENTS (1 + MAX_DATA_CONN + VMPRESS_LEVEL_COUNT + 1 + 1)
|
||||
#define MAX_EPOLL_EVENTS (1 + MAX_DATA_CONN + VMPRESS_LEVEL_COUNT + 1 + 1 + 1)
|
||||
static int epollfd;
|
||||
static int maxevents;
|
||||
|
||||
|
|
@ -2008,42 +2004,6 @@ static struct proc *proc_get_heaviest(int oomadj) {
|
|||
return maxprocp;
|
||||
}
|
||||
|
||||
static void set_process_group_and_prio(int pid, const std::vector<std::string>& profiles,
|
||||
int prio) {
|
||||
DIR* d;
|
||||
char proc_path[PATH_MAX];
|
||||
struct dirent* de;
|
||||
|
||||
snprintf(proc_path, sizeof(proc_path), "/proc/%d/task", pid);
|
||||
if (!(d = opendir(proc_path))) {
|
||||
ALOGW("Failed to open %s; errno=%d: process pid(%d) might have died", proc_path, errno,
|
||||
pid);
|
||||
return;
|
||||
}
|
||||
|
||||
while ((de = readdir(d))) {
|
||||
int t_pid;
|
||||
|
||||
if (de->d_name[0] == '.') continue;
|
||||
t_pid = atoi(de->d_name);
|
||||
|
||||
if (!t_pid) {
|
||||
ALOGW("Failed to get t_pid for '%s' of pid(%d)", de->d_name, pid);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (setpriority(PRIO_PROCESS, t_pid, prio) && errno != ESRCH) {
|
||||
ALOGW("Unable to raise priority of killing t_pid (%d): errno=%d", t_pid, errno);
|
||||
}
|
||||
|
||||
if (!SetTaskProfiles(t_pid, profiles, true)) {
|
||||
ALOGW("Failed to set task_profiles on pid(%d) t_pid(%d)", pid, t_pid);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
closedir(d);
|
||||
}
|
||||
|
||||
static bool is_kill_pending(void) {
|
||||
char buf[24];
|
||||
|
||||
|
|
@ -2114,6 +2074,19 @@ static void kill_done_handler(int data __unused, uint32_t events __unused,
|
|||
poll_params->update = POLLING_RESUME;
|
||||
}
|
||||
|
||||
static void kill_fail_handler(int data __unused, uint32_t events __unused,
|
||||
struct polling_params *poll_params) {
|
||||
int pid;
|
||||
|
||||
// Extract pid from the communication pipe. Clearing the pipe this way allows further
|
||||
// epoll_wait calls to sleep until the next event.
|
||||
if (TEMP_FAILURE_RETRY(read(reaper_comm_fd[0], &pid, sizeof(pid))) != sizeof(pid)) {
|
||||
ALOGE("thread communication read failed: %s", strerror(errno));
|
||||
}
|
||||
stop_wait_for_proc_kill(false);
|
||||
poll_params->update = POLLING_RESUME;
|
||||
}
|
||||
|
||||
static void start_wait_for_proc_kill(int pid_or_fd) {
|
||||
static struct event_handler_info kill_done_hinfo = { 0, kill_done_handler };
|
||||
struct epoll_event epev;
|
||||
|
|
@ -2149,7 +2122,7 @@ static int kill_one_process(struct proc* procp, int min_oom_score, struct kill_i
|
|||
int pidfd = procp->pidfd;
|
||||
uid_t uid = procp->uid;
|
||||
char *taskname;
|
||||
int r;
|
||||
int kill_result;
|
||||
int result = -1;
|
||||
struct memory_stat *mem_st;
|
||||
struct kill_stat kill_st;
|
||||
|
|
@ -2188,29 +2161,21 @@ static int kill_one_process(struct proc* procp, int min_oom_score, struct kill_i
|
|||
|
||||
snprintf(desc, sizeof(desc), "lmk,%d,%d,%d,%d,%d", pid, ki ? (int)ki->kill_reason : -1,
|
||||
procp->oomadj, min_oom_score, ki ? ki->max_thrashing : -1);
|
||||
|
||||
trace_kill_start(pid, desc);
|
||||
|
||||
/* CAP_KILL required */
|
||||
if (pidfd < 0) {
|
||||
start_wait_for_proc_kill(pid);
|
||||
r = kill(pid, SIGKILL);
|
||||
} else {
|
||||
start_wait_for_proc_kill(pidfd);
|
||||
r = pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
|
||||
}
|
||||
start_wait_for_proc_kill(pidfd < 0 ? pid : pidfd);
|
||||
kill_result = reaper.kill({ pidfd, pid });
|
||||
|
||||
trace_kill_end();
|
||||
|
||||
if (r) {
|
||||
if (kill_result) {
|
||||
stop_wait_for_proc_kill(false);
|
||||
ALOGE("kill(%d): errno=%d", pid, errno);
|
||||
/* Delete process record even when we fail to kill so that we don't get stuck on it */
|
||||
goto out;
|
||||
}
|
||||
|
||||
set_process_group_and_prio(pid, {"CPUSET_SP_FOREGROUND", "SCHED_SP_FOREGROUND"},
|
||||
ANDROID_PRIORITY_HIGHEST);
|
||||
|
||||
last_kill_tm = *tm;
|
||||
|
||||
inc_killcnt(procp->oomadj);
|
||||
|
|
@ -3158,6 +3123,63 @@ static void destroy_monitors() {
|
|||
}
|
||||
}
|
||||
|
||||
static void drop_reaper_comm() {
|
||||
close(reaper_comm_fd[0]);
|
||||
close(reaper_comm_fd[1]);
|
||||
}
|
||||
|
||||
static bool setup_reaper_comm() {
|
||||
if (pipe(reaper_comm_fd)) {
|
||||
ALOGE("pipe failed: %s", strerror(errno));
|
||||
return false;
|
||||
}
|
||||
|
||||
// Ensure main thread never blocks on read
|
||||
int flags = fcntl(reaper_comm_fd[0], F_GETFL);
|
||||
if (fcntl(reaper_comm_fd[0], F_SETFL, flags | O_NONBLOCK)) {
|
||||
ALOGE("fcntl failed: %s", strerror(errno));
|
||||
drop_reaper_comm();
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool init_reaper() {
|
||||
if (!reaper.is_reaping_supported()) {
|
||||
ALOGI("Process reaping is not supported");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!setup_reaper_comm()) {
|
||||
ALOGE("Failed to create thread communication channel");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Setup epoll handler
|
||||
struct epoll_event epev;
|
||||
static struct event_handler_info kill_failed_hinfo = { 0, kill_fail_handler };
|
||||
epev.events = EPOLLIN;
|
||||
epev.data.ptr = (void *)&kill_failed_hinfo;
|
||||
if (epoll_ctl(epollfd, EPOLL_CTL_ADD, reaper_comm_fd[0], &epev)) {
|
||||
ALOGE("epoll_ctl failed: %s", strerror(errno));
|
||||
drop_reaper_comm();
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!reaper.init(reaper_comm_fd[1])) {
|
||||
ALOGE("Failed to initialize reaper object");
|
||||
if (epoll_ctl(epollfd, EPOLL_CTL_DEL, reaper_comm_fd[0], &epev)) {
|
||||
ALOGE("epoll_ctl failed: %s", strerror(errno));
|
||||
}
|
||||
drop_reaper_comm();
|
||||
return false;
|
||||
}
|
||||
maxevents++;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int init(void) {
|
||||
static struct event_handler_info kernel_poll_hinfo = { 0, kernel_event_handler };
|
||||
struct reread_data file_data = {
|
||||
|
|
@ -3480,6 +3502,8 @@ static void update_props() {
|
|||
thrashing_limit_pct * 2));
|
||||
swap_util_max = clamp(0, 100, GET_LMK_PROPERTY(int32, "swap_util_max", 100));
|
||||
filecache_min_kb = GET_LMK_PROPERTY(int64, "filecache_min_kb", 0);
|
||||
|
||||
reaper.enable_debug(debug_process_killing);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
|
@ -3521,6 +3545,11 @@ int main(int argc, char **argv) {
|
|||
}
|
||||
}
|
||||
|
||||
if (init_reaper()) {
|
||||
ALOGI("Process reaper initialized with %d threads in the pool",
|
||||
reaper.thread_cnt());
|
||||
}
|
||||
|
||||
mainloop();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,248 @@
|
|||
/*
|
||||
* Copyright 2021 Google, Inc
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#define LOG_TAG "lowmemorykiller"
|
||||
|
||||
#include <dirent.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <log/log.h>
|
||||
#include <signal.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/epoll.h>
|
||||
#include <sys/pidfd.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/sysinfo.h>
|
||||
#include <sys/types.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <processgroup/processgroup.h>
|
||||
#include <system/thread_defs.h>
|
||||
|
||||
#include "reaper.h"
|
||||
|
||||
#define NS_PER_MS (NS_PER_SEC / MS_PER_SEC)
|
||||
#define THREAD_POOL_SIZE 2
|
||||
|
||||
#ifndef __NR_process_mrelease
|
||||
#define __NR_process_mrelease 448
|
||||
#endif
|
||||
|
||||
static int process_mrelease(int pidfd, unsigned int flags) {
|
||||
return syscall(__NR_process_mrelease, pidfd, flags);
|
||||
}
|
||||
|
||||
static inline long get_time_diff_ms(struct timespec *from,
|
||||
struct timespec *to) {
|
||||
return (to->tv_sec - from->tv_sec) * (long)MS_PER_SEC +
|
||||
(to->tv_nsec - from->tv_nsec) / (long)NS_PER_MS;
|
||||
}
|
||||
|
||||
static void* reaper_main(void* param) {
|
||||
Reaper *reaper = static_cast<Reaper*>(param);
|
||||
struct timespec start_tm, end_tm;
|
||||
struct Reaper::target_proc target;
|
||||
pid_t tid = gettid();
|
||||
|
||||
// Ensure the thread does not use little cores
|
||||
if (!SetTaskProfiles(tid, {"CPUSET_SP_FOREGROUND"}, true)) {
|
||||
ALOGE("Failed to assign cpuset to the reaper thread");
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
target = reaper->dequeue_request();
|
||||
|
||||
if (reaper->debug_enabled()) {
|
||||
clock_gettime(CLOCK_MONOTONIC_COARSE, &start_tm);
|
||||
}
|
||||
|
||||
if (pidfd_send_signal(target.pidfd, SIGKILL, NULL, 0)) {
|
||||
// Inform the main thread about failure to kill
|
||||
reaper->notify_kill_failure(target.pid);
|
||||
goto done;
|
||||
}
|
||||
if (process_mrelease(target.pidfd, 0)) {
|
||||
ALOGE("process_mrelease %d failed: %s", target.pidfd, strerror(errno));
|
||||
goto done;
|
||||
}
|
||||
if (reaper->debug_enabled()) {
|
||||
clock_gettime(CLOCK_MONOTONIC_COARSE, &end_tm);
|
||||
ALOGI("Process %d was reaped in %ldms", target.pid,
|
||||
get_time_diff_ms(&start_tm, &end_tm));
|
||||
}
|
||||
done:
|
||||
close(target.pidfd);
|
||||
reaper->request_complete();
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool Reaper::is_reaping_supported() {
|
||||
static enum {
|
||||
UNKNOWN,
|
||||
SUPPORTED,
|
||||
UNSUPPORTED
|
||||
} reap_support = UNKNOWN;
|
||||
|
||||
if (reap_support == UNKNOWN) {
|
||||
if (process_mrelease(-1, 0) && errno == ENOSYS) {
|
||||
reap_support = UNSUPPORTED;
|
||||
} else {
|
||||
reap_support = SUPPORTED;
|
||||
}
|
||||
}
|
||||
return reap_support == SUPPORTED;
|
||||
}
|
||||
|
||||
bool Reaper::init(int comm_fd) {
|
||||
char name[16];
|
||||
|
||||
if (thread_cnt_ > 0) {
|
||||
// init should not be called multiple times
|
||||
return false;
|
||||
}
|
||||
|
||||
thread_pool_ = new pthread_t[THREAD_POOL_SIZE];
|
||||
for (int i = 0; i < THREAD_POOL_SIZE; i++) {
|
||||
if (pthread_create(&thread_pool_[thread_cnt_], NULL, reaper_main, this)) {
|
||||
ALOGE("pthread_create failed: %s", strerror(errno));
|
||||
continue;
|
||||
}
|
||||
snprintf(name, sizeof(name), "lmkd_reaper%d", thread_cnt_);
|
||||
if (pthread_setname_np(thread_pool_[thread_cnt_], name)) {
|
||||
ALOGW("pthread_setname_np failed: %s", strerror(errno));
|
||||
}
|
||||
thread_cnt_++;
|
||||
}
|
||||
|
||||
if (!thread_cnt_) {
|
||||
delete[] thread_pool_;
|
||||
return false;
|
||||
}
|
||||
|
||||
queue_.reserve(thread_cnt_);
|
||||
comm_fd_ = comm_fd;
|
||||
return true;
|
||||
}
|
||||
|
||||
static void set_process_group_and_prio(int pid, const std::vector<std::string>& profiles,
|
||||
int prio) {
|
||||
DIR* d;
|
||||
char proc_path[PATH_MAX];
|
||||
struct dirent* de;
|
||||
|
||||
snprintf(proc_path, sizeof(proc_path), "/proc/%d/task", pid);
|
||||
if (!(d = opendir(proc_path))) {
|
||||
ALOGW("Failed to open %s; errno=%d: process pid(%d) might have died", proc_path, errno,
|
||||
pid);
|
||||
return;
|
||||
}
|
||||
|
||||
while ((de = readdir(d))) {
|
||||
int t_pid;
|
||||
|
||||
if (de->d_name[0] == '.') continue;
|
||||
t_pid = atoi(de->d_name);
|
||||
|
||||
if (!t_pid) {
|
||||
ALOGW("Failed to get t_pid for '%s' of pid(%d)", de->d_name, pid);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (setpriority(PRIO_PROCESS, t_pid, prio) && errno != ESRCH) {
|
||||
ALOGW("Unable to raise priority of killing t_pid (%d): errno=%d", t_pid, errno);
|
||||
}
|
||||
|
||||
if (!SetTaskProfiles(t_pid, profiles, true)) {
|
||||
ALOGW("Failed to set task_profiles on pid(%d) t_pid(%d)", pid, t_pid);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
closedir(d);
|
||||
}
|
||||
|
||||
bool Reaper::async_kill(const struct target_proc& target) {
|
||||
if (target.pidfd == -1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!thread_cnt_) {
|
||||
return false;
|
||||
}
|
||||
|
||||
mutex_.lock();
|
||||
if (active_requests_ >= thread_cnt_) {
|
||||
mutex_.unlock();
|
||||
return false;
|
||||
}
|
||||
active_requests_++;
|
||||
|
||||
// Duplicate pidfd instead of reusing the original one to avoid synchronization and refcounting
|
||||
// when both reaper and main threads are using or closing the pidfd
|
||||
queue_.push_back({ dup(target.pidfd), target.pid });
|
||||
// Wake up a reaper thread
|
||||
cond_.notify_one();
|
||||
mutex_.unlock();
|
||||
|
||||
set_process_group_and_prio(target.pid, {"CPUSET_SP_FOREGROUND", "SCHED_SP_FOREGROUND"},
|
||||
ANDROID_PRIORITY_HIGHEST);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int Reaper::kill(const struct target_proc& target) {
|
||||
/* CAP_KILL required */
|
||||
if (target.pidfd < 0) {
|
||||
return ::kill(target.pid, SIGKILL);
|
||||
}
|
||||
|
||||
if (async_kill(target)) {
|
||||
// we assume the kill will be successful and if it fails we will be notified
|
||||
return 0;
|
||||
}
|
||||
|
||||
return pidfd_send_signal(target.pidfd, SIGKILL, NULL, 0);
|
||||
}
|
||||
|
||||
Reaper::target_proc Reaper::dequeue_request() {
|
||||
struct target_proc target;
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
|
||||
while (queue_.empty()) {
|
||||
cond_.wait(lock);
|
||||
}
|
||||
target = queue_.back();
|
||||
queue_.pop_back();
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
void Reaper::request_complete() {
|
||||
std::scoped_lock<std::mutex> lock(mutex_);
|
||||
active_requests_--;
|
||||
}
|
||||
|
||||
void Reaper::notify_kill_failure(int pid) {
|
||||
std::scoped_lock<std::mutex> lock(mutex_);
|
||||
|
||||
ALOGE("Failed to kill process %d", pid);
|
||||
if (TEMP_FAILURE_RETRY(write(comm_fd_, &pid, sizeof(pid))) != sizeof(pid)) {
|
||||
ALOGE("thread communication write failed: %s", strerror(errno));
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
/*
|
||||
* Copyright 2021 Google, Inc
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <condition_variable>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
class Reaper {
|
||||
public:
|
||||
struct target_proc {
|
||||
int pidfd;
|
||||
int pid;
|
||||
};
|
||||
private:
|
||||
// mutex_ and cond_ are used to wakeup the reaper thread.
|
||||
std::mutex mutex_;
|
||||
std::condition_variable cond_;
|
||||
// mutex_ protects queue_ and active_requests_ access.
|
||||
std::vector<struct target_proc> queue_;
|
||||
int active_requests_;
|
||||
// write side of the pipe to communicate kill failures with the main thread
|
||||
int comm_fd_;
|
||||
int thread_cnt_;
|
||||
pthread_t* thread_pool_;
|
||||
bool debug_enabled_;
|
||||
|
||||
bool async_kill(const struct target_proc& target);
|
||||
public:
|
||||
Reaper() : active_requests_(0), thread_cnt_(0), debug_enabled_(false) {}
|
||||
|
||||
static bool is_reaping_supported();
|
||||
|
||||
bool init(int comm_fd);
|
||||
int thread_cnt() const { return thread_cnt_; }
|
||||
void enable_debug(bool enable) { debug_enabled_ = enable; }
|
||||
bool debug_enabled() const { return debug_enabled_; }
|
||||
|
||||
// return 0 on success or error code returned by the syscall
|
||||
int kill(const struct target_proc& target);
|
||||
// below members are used only by reaper_main
|
||||
target_proc dequeue_request();
|
||||
void request_complete();
|
||||
void notify_kill_failure(int pid);
|
||||
};
|
||||
Loading…
Reference in New Issue