Signed-off-by: Wenchao Hao <haowenchao22@gmail.com>
This commit is contained in:
Wenchao Hao 2025-05-10 20:23:22 +08:00
commit c1e8f3652c
6 changed files with 496 additions and 0 deletions

1
applist Executable file
View File

@ -0,0 +1 @@
com.ss.android.ugc.aweme

132
cold_start_app.sh Executable file
View File

@ -0,0 +1,132 @@
app=$1
round=$2
prev_alloc=$(adb shell cat /sys/kernel/mm/transparent_hugepage/hugepages-64kB/stats/anon_fault_alloc)
prev_alloc_fallback=$(adb shell cat /sys/kernel/mm/transparent_hugepage/hugepages-64kB/stats/anon_fault_fallback)
adb shell "simpleperf stat -e dTLB-loads,dTLB-load-misses,iTLB-loads,iTLB-load-misses,page-faults,raw-stall-backend-mem --app $app --duration 3 -o /data/local/tmp/simpleperf-"$app"-"$round".txt" &
echo "即将抓$app的perfetto, 请1秒后启动应用"
adb shell perfetto -c - --txt -o /data/misc/perfetto-traces/trace-"$app"-"$round".ptrace \
<<EOF
buffers: {
size_kb: 707200
fill_policy: DISCARD
}
buffers: {
size_kb: 707200
fill_policy: DISCARD
}
data_sources: {
config {
name: "linux.process_stats"
target_buffer: 1
process_stats_config {
scan_all_processes_on_start: true
proc_stats_poll_ms: 1000
}
}
}
data_sources: {
config {
name: "android.surfaceflinger.frametimeline"
}
}
data_sources: {
config {
name: "linux.sys_stats"
sys_stats_config {
meminfo_period_ms: 250
meminfo_counters: MEMINFO_MEM_FREE
meminfo_counters: MEMINFO_MEM_AVAILABLE
cpufreq_period_ms: 250
}
}
}
data_sources: {
config {
name: "linux.ftrace"
ftrace_config {
buffer_size_kb: 81920
drain_period_ms: 100
symbolize_ksyms: true
ftrace_events: "power/cpu_frequency"
ftrace_events: "sched/sched_switch"
ftrace_events: "power/suspend_resume"
ftrace_events: "sched/sched_wakeup"
ftrace_events: "sched/sched_wakeup_new"
ftrace_events: "sched/sched_waking"
ftrace_events: "power/cpu_idle"
ftrace_events: "vmscan/mm_vmscan_kswapd_wake"
ftrace_events: "vmscan/mm_vmscan_kswapd_sleep"
ftrace_events: "vmscan/mm_vmscan_direct_reclaim_begin"
ftrace_events: "vmscan/mm_vmscan_direct_reclaim_end"
ftrace_events: "compaction/mm_compaction_begin"
ftrace_events: "compaction/mm_compaction_end"
ftrace_events: "mm_filemap_add_to_page_cache"
ftrace_events: "mm_filemap_delete_from_page_cache"
ftrace_events: "sched/sched_process_exit"
ftrace_events: "sched/sched_process_free"
ftrace_events: "task/task_newtask"
ftrace_events: "task/task_rename"
ftrace_events: "lowmemorykiller/lowmemory_kill"
ftrace_events: "oom/oom_score_adj_update"
ftrace_events: "ftrace/print"
ftrace_events: "binder/*"
ftrace_events: "power/gpu_frequency"
ftrace_events: "sched/sched_blocked_reason"
atrace_categories: "input"
atrace_categories: "gfx"
atrace_categories: "view"
atrace_categories: "webview"
atrace_categories: "camera"
atrace_categories: "dalvik"
atrace_categories: "power"
atrace_categories: "wm"
atrace_categories: "am"
atrace_categories: "ss"
atrace_categories: "sched"
atrace_categories: "freq"
atrace_categories: "binder_driver"
atrace_categories: "aidl"
atrace_categories: "binder_lock"
atrace_apps: "*"
}
}
}
duration_ms: 10000
write_into_file: true
flush_period_ms: 30000
incremental_state_config {
clear_period_ms: 5000
}
EOF
# 后面的dumpsys meminfo也会分配大页
# 所以先看分配大页数
alloc=$(adb shell cat /sys/kernel/mm/transparent_hugepage/hugepages-64kB/stats/anon_fault_alloc)
alloc_fallback=$(adb shell cat /sys/kernel/mm/transparent_hugepage/hugepages-64kB/stats/anon_fault_fallback)
adb shell "dumpsys meminfo --package $app" > out/meminfo-"$app"-"$round".txt
# 计算大页分配成功率
inc_alloc=$((alloc-prev_alloc))
inc_alloc_fallback=$((alloc_fallback-prev_alloc_fallback))
inc_total=$((inc_alloc+inc_alloc_fallback))
if [ $inc_total -ne 0 ]; then
inc_ratio=$(echo "scale=4; $inc_alloc / $inc_total" | bc)
else
inc_ratio=1
fi
echo -e "$inc_ratio\t$inc_total" > out/ratio-"$app"-"$round".txt
adb pull /data/misc/perfetto-traces/trace-"$app"-"$round".ptrace out/
adb pull /data/local/tmp/simpleperf-"$app"-"$round".txt out/
adb shell "rm /data/misc/perfetto-traces/trace-"$app"-"$round".ptrace"
adb shell "rm /data/local/tmp/simpleperf-"$app"-"$round".txt"

217
handle_perfetto.py Executable file
View File

@ -0,0 +1,217 @@
import pandas as pd
from perfetto.trace_processor import TraceProcessor, TraceProcessorConfig
import os
import re
import readline
import sys
import pandas as pd
def query_sql(lstart, lend, slice_name, cond):
return f"""
select slice_id,process.name as process_name,process.upid,process.pid,thread.name as thread_name,thread.tid,slice.ts,slice.dur,
MAX(slice.ts,{lstart}) as lstart, MIN(slice.ts+slice.dur,{lend}) as lend,
(case when slice.ts < {lstart} then MIN(slice.ts+slice.dur,{lend})-{lstart} when (slice.ts+slice.dur) > {lend} then ({lend} -MAX({lstart}, slice.ts)) else slice.dur end) as ldur,
(select total(case when thread_state.ts < MAX(slice.ts,{lstart}) then MIN(MIN((slice.ts+slice.dur),{lend}) ,thread_state.ts+thread_state.dur)-MAX(slice.ts,{lstart}) when (thread_state.ts+thread_state.dur) > MIN((slice.ts+slice.dur),{lend}) then (MIN((slice.ts+slice.dur),{lend})-MAX(thread_state.ts,MAX(slice.ts,{lstart}))) else thread_state.dur end) from thread_state where thread_state.utid=thread.utid and thread_state.state='Running' and thread_state.ts < {lend} and (thread_state.ts+thread_state.dur) > {lstart} and thread_state.ts < (slice.ts+slice.dur) and (thread_state.ts+thread_state.dur) > slice.ts) as total_running,
(select total(case when thread_state.ts < MAX(slice.ts,{lstart}) then MIN(MIN((slice.ts+slice.dur),{lend}) ,thread_state.ts+thread_state.dur)-MAX(slice.ts,{lstart}) when (thread_state.ts+thread_state.dur) > MIN((slice.ts+slice.dur),{lend}) then (MIN((slice.ts+slice.dur),{lend}) -MAX(thread_state.ts,MAX(slice.ts,{lstart}))) else thread_state.dur end) from thread_state where thread_state.utid=thread.utid and thread_state.state='S' and thread_state.ts < {lend} and (thread_state.ts+thread_state.dur) > {lstart} and thread_state.ts < (slice.ts+slice.dur) and (thread_state.ts+thread_state.dur) > slice.ts) as total_s,
(select total(case when thread_state.ts < MAX(slice.ts,{lstart}) then MIN(MIN((slice.ts+slice.dur),{lend}) ,thread_state.ts+thread_state.dur)-MAX(slice.ts,{lstart}) when (thread_state.ts+thread_state.dur) > MIN((slice.ts+slice.dur),{lend}) then (MIN((slice.ts+slice.dur),{lend}) -MAX(thread_state.ts,MAX(slice.ts,{lstart}))) else thread_state.dur end) from thread_state where thread_state.utid=thread.utid and thread_state.state='D' and thread_state.ts < {lend} and (thread_state.ts+thread_state.dur) > {lstart} and thread_state.ts < (slice.ts+slice.dur) and (thread_state.ts+thread_state.dur) > slice.ts) as total_d,
(select total(case when thread_state.ts < MAX(slice.ts,{lstart}) then MIN(MIN((slice.ts+slice.dur),{lend}) ,thread_state.ts+thread_state.dur)-MAX(slice.ts,{lstart}) when (thread_state.ts+thread_state.dur) > MIN((slice.ts+slice.dur),{lend}) then (MIN((slice.ts+slice.dur),{lend}) -MAX(thread_state.ts,MAX(slice.ts,{lstart}))) else thread_state.dur end) from thread_state where thread_state.utid=thread.utid and thread_state.state='R' and thread_state.ts < {lend} and (thread_state.ts+thread_state.dur) > {lstart} and thread_state.ts < (slice.ts+slice.dur) and (thread_state.ts+thread_state.dur) > slice.ts) as total_r,
(select total(case when thread_state.ts < MAX(slice.ts,{lstart}) then MIN(MIN((slice.ts+slice.dur),{lend}) ,thread_state.ts+thread_state.dur)-MAX(slice.ts,{lstart}) when (thread_state.ts+thread_state.dur) > MIN((slice.ts+slice.dur),{lend}) then (MIN((slice.ts+slice.dur),{lend}) -MAX(thread_state.ts,MAX(slice.ts,{lstart}))) else thread_state.dur end) from thread_state where thread_state.utid=thread.utid and thread_state.state='R+' and thread_state.ts < {lend} and (thread_state.ts+thread_state.dur) > {lstart} and thread_state.ts < (slice.ts+slice.dur) and (thread_state.ts+thread_state.dur) > slice.ts) as total_rr,
slice.name from slice JOIN thread_track ON slice.track_id = thread_track.id JOIN thread USING(utid) JOIN process USING(upid) WHERE slice.dur > 0 and slice.ts < {lend} and (slice.ts+slice.dur) > {lstart} and slice.name like '{slice_name}%' AND {cond} order by slice.dur desc"""
def perfetto_parse(slice_name):
from perfetto.trace_processor import TraceProcessor, TraceProcessorConfig
tp = TraceProcessor(
trace=sys.argv[1],
config=TraceProcessorConfig(bin_path='/home/mi/mthptest/trace_processor_shell'))
qr_it = tp.query(f"""
INCLUDE PERFETTO MODULE android.startup.startups;
SELECT * FROM android_startups """)
lstart = 0
lend = 0
pkg = None
for r in qr_it:
if r.package != 'com.miui.home' and r.package != 'com.miui.securitycenter':
lstart = r.ts
lend = r.ts_end
pkg = r.package
#print(f"{pkg}: lacunch time:{lend-lstart} {r.startup_type} [{slice_name}]")
print(f"{lend-lstart} {r.startup_type}")
return
qr_it = tp.query(f"""
select thread_state.id,thread_state.utid,thread_state.ts,thread_state.dur,thread_state.state,thread_state.waker_utid from thread_state JOIN thread USING(utid) JOIN process USING(upid) where thread_state.ts < {lend} and (thread_state.ts+thread_state.dur) > {lstart} and thread.is_main_thread=1 and process.name='{pkg}'
""")
s = False
sts = 0
sdur = 0
sleep_list = []
for q in qr_it:
if q.state == 'S':
sts = q.ts
sdur = q.dur
s = True
continue
elif q.state == 'R' and s:
sleep_list.append([q.waker_utid,sts,sdur])
if s:
s = False
# app_main_thread_id = q.utid
if len(qr_it) > 0:
sleep_list.append([q.utid, lstart, lend-lstart])
launch_info = [[">1ms",0,0,0,0,0,0,0,0,0],
["500us-1ms",0,0,0,0,0,0,0,0,0],
["100us-500us",0,0,0,0,0,0,0,0,0],
["10us-100us",0,0,0,0,0,0,0,0,0],
["<10us",0,0,0,0,0,0,0,0,0]]
launch_info_total = [0,0,0,0,0,0,0,0,0,0]
max_time, min_time = 0, 1000000000
for sl in sleep_list:
qutid = sl[0]
qlstart = sl[1]
qlend = sl[1] + sl[2]
# print(len(qr_it))
vmq = tp.query(f"""
select tmp.id as dp_id,(case when tmp.ts < {qlstart} then MIN(tmp.ts+tmp.dur,{qlend})-{qlstart} when (tmp.ts+tmp.dur) > {qlend} then ({qlend} -MAX({qlstart}, tmp.ts)) else tmp.dur end) as ldur
from slice JOIN(slice) as tmp ON slice.parent_id=tmp.id JOIN thread_track ON slice.track_id = thread_track.id
where thread_track.utid={qutid} and slice.name='mm_vmscan_direct_reclaim'
and tmp.name like '{slice_name}%' and slice.ts < {qlend} and (slice.ts+slice.dur) > {qlstart}
""")
vm_len = [0,0,0,0,0]
vm_count = [0, 0, 0, 0, 0]
vm_slice = [set(), set(), set(), set(), set()]
for qr in vmq:
if qr.ldur > 1000000:
vm_len[0] += 1
launch_info[0][7] += 1
if qr.dp_id not in vm_slice[0]:
vm_slice[0].add(qr.dp_id)
vm_count[0] += 1
launch_info[0][9] += 1
# print(qutid, vm_slice[0])
elif qr.ldur > 500000:
vm_len[1] += 1
launch_info[1][7] += 1
if qr.dp_id not in vm_slice[1]:
vm_slice[1].add(qr.dp_id)
vm_count[1] += 1
launch_info[1][9] += 1
elif qr.ldur > 100000:
vm_len[2] += 1
launch_info[2][7] += 1
if qr.dp_id not in vm_slice[2]:
vm_slice[2].add(qr.dp_id)
vm_count[2] += 1
launch_info[2][9] += 1
elif qr.ldur > 10000:
vm_len[3] += 1
launch_info[3][7] += 1
if qr.dp_id not in vm_slice[3]:
vm_slice[3].add(qr.dp_id)
vm_count[3] += 1
launch_info[3][9] += 1
else:
vm_len[4] += 1
launch_info[4][7] += 1
if qr.dp_id not in vm_slice[4]:
vm_slice[4].add(qr.dp_id)
vm_count[4] += 1
launch_info[4][9] += 1
launch_info_total[7] += sum(vm_len)
launch_info_total[9] += sum(vm_count)
# print(vm_slice)
# if len(vm_slice[0]) > 0:
# print(qutid, qlstart, qlend, vm_slice[0])
qr_it = tp.query(query_sql(qlstart, qlend, slice_name, f"thread.utid={qutid}"))
for qr in qr_it:
slid = qr.slice_id
# qr_it_tmp = tp.query(f"select count(*) as count from slice where parent_id={slid} and name='mm_vmscan_direct_reclaim'")
# vm_len = len(qr_it_tmp)
# for qt in qr_it_tmp:
# print("qr_it_tmp", qt.count)
# vm_len = qt.count
# if len(vm_slice[0]) > 0:
# print(qr)
if (qr.total_running + qr.total_s + qr.total_d + qr.total_r + qr.total_rr) != qr.ldur:
for i in range(len(vm_slice)):
if qr.slice_id in vm_slice[i]:
launch_info[i][9] -= 1
continue
# pass
launch_info_total[0] += qr.ldur
launch_info_total[1] += 1
launch_info_total[2] += qr.total_running
launch_info_total[3] += qr.total_s
launch_info_total[4] += qr.total_d
launch_info_total[5] += qr.total_r
launch_info_total[6] += qr.total_rr
tmp_list = [qr.total_running, qr.total_s, qr.total_d, qr.total_r, qr.total_rr]
idx = tmp_list.index(max(tmp_list))
dc = qr.total_d > 0
launch_info_total[8] += dc
max_time = max(max_time, qr.ldur)
min_time = min(min_time, qr.ldur)
# debug
#if qr.ldur > 100000:
# print(qr)
if qr.ldur > 1000000:
#print(qr)
launch_info[0][1] += 1
launch_info[0][idx+2] += 1
launch_info[0][8] += dc
# print(qr.slice_id, qr.ldur, qr.total_running)
# launch_info[0][7] += vm_len[0]
elif qr.ldur > 500000:
launch_info[1][1] += 1
launch_info[1][idx + 2] += 1
launch_info[1][8] += dc
# launch_info[1][7] += vm_len[1]
elif qr.ldur > 100000:
launch_info[2][1] += 1
launch_info[2][idx + 2] += 1
launch_info[2][8] += dc
# launch_info[2][7] += vm_len[2]
elif qr.ldur > 10000:
#if qr.ldur > 50000 and qr.total_rr==0:
# print(qr)
launch_info[3][1] += 1
launch_info[3][idx + 2] += 1
launch_info[3][8] += dc
# launch_info[3][7] += vm_len[3]
else:
launch_info[4][1] += 1
launch_info[4][idx + 2] += 1
launch_info[4][8] += dc
# launch_info[4][7] += vm_len[4]
launch_info.append(launch_info_total)
df = pd.DataFrame(launch_info, columns=['time', 'count', 'running', 's', 'd', 'r', 'r+','dr_total','d_count','dr_count'], index=None)
d_count = df.pop("d_count")
df.insert(5, 'd_count', d_count)
print(df.to_string())
print(f"max:{max_time} min:{min_time}")
# print(launch_info)
qr_it = tp.query(query_sql(lstart, lend, slice_name, f"process.name='{pkg}'"))
launch_ret = [0,0,0,0,0,0]
for q in qr_it:
launch_ret[0] += q.ldur
launch_ret[1] += q.total_running
launch_ret[2] += q.total_s
launch_ret[3] += q.total_d
launch_ret[4] += q.total_r
launch_ret[5] += q.total_rr
# print(q)
count = len(qr_it)
launch_ret.insert(0, count)
#print(f"{pkg}: ", launch_ret)
def main():
perfetto_parse(sys.argv[2])
if __name__ == '__main__':
main()

91
perfetto_config_perfUser.txt Executable file
View File

@ -0,0 +1,91 @@
buffers: {
size_kb: 707200
fill_policy: DISCARD
}
buffers: {
size_kb: 707200
fill_policy: DISCARD
}
data_sources: {
config {
name: "linux.process_stats"
target_buffer: 1
process_stats_config {
scan_all_processes_on_start: true
proc_stats_poll_ms: 1000
}
}
}
data_sources: {
config {
name: "android.surfaceflinger.frametimeline"
}
}
data_sources: {
config {
name: "linux.sys_stats"
sys_stats_config {
meminfo_period_ms: 250
meminfo_counters: MEMINFO_MEM_FREE
meminfo_counters: MEMINFO_MEM_AVAILABLE
cpufreq_period_ms: 250
}
}
}
data_sources: {
config {
name: "linux.ftrace"
ftrace_config {
buffer_size_kb: 81920
drain_period_ms: 100
symbolize_ksyms: true
ftrace_events: "power/cpu_frequency"
ftrace_events: "sched/sched_switch"
ftrace_events: "power/suspend_resume"
ftrace_events: "sched/sched_wakeup"
ftrace_events: "sched/sched_wakeup_new"
ftrace_events: "sched/sched_waking"
ftrace_events: "power/cpu_idle"
ftrace_events: "vmscan/mm_vmscan_kswapd_wake"
ftrace_events: "vmscan/mm_vmscan_kswapd_sleep"
ftrace_events: "vmscan/mm_vmscan_direct_reclaim_begin"
ftrace_events: "vmscan/mm_vmscan_direct_reclaim_end"
ftrace_events: "compaction/mm_compaction_begin"
ftrace_events: "compaction/mm_compaction_end"
ftrace_events: "mm_filemap_add_to_page_cache"
ftrace_events: "mm_filemap_delete_from_page_cache"
ftrace_events: "sched/sched_process_exit"
ftrace_events: "sched/sched_process_free"
ftrace_events: "task/task_newtask"
ftrace_events: "task/task_rename"
ftrace_events: "lowmemorykiller/lowmemory_kill"
ftrace_events: "oom/oom_score_adj_update"
ftrace_events: "ftrace/print"
ftrace_events: "binder/*"
ftrace_events: "power/gpu_frequency"
ftrace_events: "sched/sched_blocked_reason"
atrace_categories: "input"
atrace_categories: "gfx"
atrace_categories: "view"
atrace_categories: "webview"
atrace_categories: "camera"
atrace_categories: "dalvik"
atrace_categories: "power"
atrace_categories: "wm"
atrace_categories: "am"
atrace_categories: "ss"
atrace_categories: "sched"
atrace_categories: "freq"
atrace_categories: "binder_driver"
atrace_categories: "aidl"
atrace_categories: "binder_lock"
atrace_apps: "*"
}
}
}
duration_ms: 60000
write_into_file: true
flush_period_ms: 30000
incremental_state_config {
clear_period_ms: 5000
}

55
run.sh Executable file
View File

@ -0,0 +1,55 @@
#output: round app start_time start_type rss pss ratio pagefault itlbmiss dtlbmiss cpustalled
echo "round app start_time start_type rss pss ratio pagefault itlbmiss dtlbmiss cpustalled" > result
for round in $(seq 1 2); do
for app in `cat applist`; do
echo "开始抓应用 $app"
./cold_start_app.sh $app $round
adb shell input keyevent 3
# 计算启动耗时
python3 handle_perfetto.py out/trace-"$app"-"$round".ptrace mm > tmp.txt
if [ $(cat tmp.txt | wc -l) -ne 1 ]; then
start_time=0
start_type="NONE"
else
start_time=$(cat tmp.txt | awk '{print $1}')
start_type=$(cat tmp.txt | awk '{print $2}')
fi
# 计算PSS RSS
if [ $(cat out/meminfo-"$app"-"$round".txt | grep "MEMINFO in pid" -c) -ne 1 ]; then
cat out/meminfo-"$app"-"$round".txt | grep "TOTAL PSS" | tail -n 1 > tmp.txt
pss=$(cat tmp.txt | awk '{print $3}')
rss=$(cat tmp.txt | awk '{print $6}')
else
cat out/meminfo-"$app"-"$round".txt | grep "TOTAL PSS" > tmp.txt
pss=$(cat tmp.txt | awk '{print $3}')
rss=$(cat tmp.txt | awk '{print $6}')
fi
pagefaults=$(cat out/simpleperf-"$app"-"$round".txt | grep 'page-faults' | awk '{print $4}')
itlbmiss=$(cat out/simpleperf-"$app"-"$round".txt | grep 'iTLB-load-misses' | awk '{print $4}')
dtlbmiss=$(cat out/simpleperf-"$app"-"$round".txt | grep 'dTLB-load-misses' | awk '{print $4}')
cpustall=$(cat out/simpleperf-"$app"-"$round".txt | grep 'raw-stall-backend-mem' | awk '{print $4}')
# 计算大页相关
inc_ratio=$(cat out/ratio-"$app"-"$round".txt | awk '{print $1}')
inc_total=$(cat out/ratio-"$app"-"$round".txt | awk '{print $2}')
echo $round $app $start_time $start_type $inc_ratio $inc_total $rss $pss $pagefaults $itlbmiss $dtlbmiss $cpustall >> result
rm tmp.txt
echo next
read -p "输入命令 " cmd
if [ "$cmd" == "c" ]; then
continue
else
break
fi
done
done

BIN
trace_processor_shell Executable file

Binary file not shown.