qemu記憶體訪問分析
阿新 • • 發佈:2019-01-07
qemu TLB表項:
否命中,如果命中直接從 hva 地址中返回,否則還是通過 mmu 來獲取。
如果沒有命中,則需要通過 mmu 獲取該虛擬地址所對應的實體地址,對 tlb 進行填充。
地址沒有對應的實體地址或者許可權不夠等情況,cpu 就會出現 page_fault 異常。
果有,更新 tlb;否則,儲存出錯資訊。
用大量的 code_buffer。Qemu 使用了外調函式來實現這個功能。在對 target 翻譯過程中,對於記憶體訪問操
作會生成如下形式的中間碼:
以下 qemu_ld/st_helpers 中的函式進行 target 的虛擬地址和實體地址轉換的工作。
qemu 記憶體訪問過程:guest virtual addr (GVA) → guest physical addr (GPA) → host virtualaddr (HVA)。其中 GVA->HVA 由 qemu 負責完成,HVA->HPA 由 host 作業系統完成。tlb 的結構如下,addr_xxx 表示 GVA 地址,同時也表示了執行許可權;addrend =gpa_base – gva_base;
1.get_page_addr_code 會首先檢視 tlb 是否命中,如果沒有命中就 ldub_code 走 mmu 翻譯這個分支,否則直接獲取 hva。typedef struct CPUTLBEntry { target_ulong addr_read; // 可讀 target_ulong addr_write; // 可寫 target_ulong addr_code; // 可執行 unsigned long addend; } CPUTLBEntry;
2.TLB 沒有命中時,會通過 ldub_code,這個函式是由下面的巨集產生。這個巨集首先會在 tlb 檢查下是313 /* NOTE: this function can trigger an exception */ 314 /* NOTE2: the returned address is not exactly the physical address: it 315 is the offset relative to phys_ram_base */ 316 tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr) 317 { 318 int mmu_idx, page_index, pd; 319 void *p; 320 MemoryRegion *mr; 321 322 page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); 323 mmu_idx = cpu_mmu_index(env1); 324 if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code != 325 (addr & TARGET_PAGE_MASK))) { 326 #ifdef CONFIG_TCG_PASS_AREG0 327 cpu_ldub_code(env1, addr); 328 #else 329 ldub_code(addr); 330 #endif 331 } 332 pd = env1->iotlb[mmu_idx][page_index] & ~TARGET_PAGE_MASK; 333 mr = iotlb_to_region(pd); 334 if (memory_region_is_unassigned(mr)) { 335 #if defined(TARGET_ALPHA) || defined(TARGET_MIPS) || defined(TARGET_SPARC) 336 cpu_unassigned_access(env1, addr, 0, 1, 0, 4); 337 #else 338 cpu_abort(env1, "Trying to execute code outside RAM or ROM at 0x" 339 TARGET_FMT_lx "\n", addr); 340 #endif 341 } 342 p = (void *)((uintptr_t)addr + env1->tlb_table[mmu_idx][page_index].addend); 343 return qemu_ram_addr_from_host_nofail(p); 344 }
否命中,如果命中直接從 hva 地址中返回,否則還是通過 mmu 來獲取。
3.在這個模板中會對 tlb 進行查詢,看是否命中,如果命中,還要根據是 io 還是 ram 進行分別處理;95 static inline RES_TYPE 96 glue(glue(glue(CPU_PREFIX, ld), USUFFIX), MEMSUFFIX)(ENV_PARAM 97 target_ulong ptr) 98 { 99 int page_index; 100 RES_TYPE res; 101 target_ulong addr; 102 int mmu_idx; 103 104 addr = ptr; 105 page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); 106 mmu_idx = CPU_MMU_INDEX; 107 if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ != 108 (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) { 109 res = glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_VAR 110 addr, 111 mmu_idx); 112 } else { 113 uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend; 114 res = glue(glue(ld, USUFFIX), _raw)(hostaddr); 115 } 116 return res; 117 }
如果沒有命中,則需要通過 mmu 獲取該虛擬地址所對應的實體地址,對 tlb 進行填充。
106 DATA_TYPE
107 glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_PARAM
108 target_ulong addr,
109 int mmu_idx)
110 {
111 DATA_TYPE res;
112 int index;
113 target_ulong tlb_addr;
114 target_phys_addr_t ioaddr;
115 uintptr_t retaddr;
116
117 /* test if there is match for unaligned or IO access */
118 /* XXX: could done more in memory macro in a non portable way */
119 index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
120 redo:
121 tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
122 if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
123 if (tlb_addr & ~TARGET_PAGE_MASK) {
124 /* IO access */
125 if ((addr & (DATA_SIZE - 1)) != 0)
126 goto do_unaligned_access;
127 retaddr = GETPC();
128 ioaddr = env->iotlb[mmu_idx][index];
129 res = glue(io_read, SUFFIX)(ENV_VAR ioaddr, addr, retaddr);
130 } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
131 /* slow unaligned access (it spans two pages or IO) */
132 do_unaligned_access:
133 retaddr = GETPC();
134 #ifdef ALIGNED_ONLY
135 do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
136 #endif
137 res = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_VAR addr,
138 mmu_idx, retaddr);
139 } else {
140 /* unaligned/aligned access in the same page */
141 uintptr_t addend;
142 #ifdef ALIGNED_ONLY
143 if ((addr & (DATA_SIZE - 1)) != 0) {
144 retaddr = GETPC();
145 do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
146 }
147 #endif
148 addend = env->tlb_table[mmu_idx][index].addend;
149 res = glue(glue(ld, USUFFIX), _raw)((uint8_t *)(intptr_t)
150 (addr + addend));
151 }
152 } else {
153 /* the page is not in the TLB : fill it */
154 retaddr = GETPC();
155 #ifdef ALIGNED_ONLY
156 if ((addr & (DATA_SIZE - 1)) != 0)
157 do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
158 #endif
159 tlb_fill(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
160 goto redo;
161 }
162 return res;
163 }
4.以 arm 為例,tlb_fill 會通過 cpu_arm_handle_mmu_fault 對虛實地址轉換進行處理,如果該虛擬地址沒有對應的實體地址或者許可權不夠等情況,cpu 就會出現 page_fault 異常。
72 /* try to fill the TLB and return an exception if error. If retaddr is
73 NULL, it means that the function was called in C code (i.e. not
74 from generated code or from helper.c) */
75 /* XXX: fix it to restore all registers */
76 void tlb_fill(CPUARMState *env1, target_ulong addr, int is_write, int mmu_idx,
77 uintptr_t retaddr)
78 {
79 TranslationBlock *tb;
80 CPUARMState *saved_env;
81 int ret;
82
83 saved_env = env;
84 env = env1;
85 ret = cpu_arm_handle_mmu_fault(env, addr, is_write, mmu_idx);
86 if (unlikely(ret)) {
87 if (retaddr) {
88 /* now we have a real cpu fault */
89 tb = tb_find_pc(retaddr);
90 if (tb) {
91 /* the PC is inside the translated code. It means that we have
92 a virtual CPU fault */
93 cpu_restore_state(tb, env, retaddr);
94 }
95 }
96 raise_exception(env->exception_index);
97 }
98 env = saved_env;
99 }
5. cpu_arm_handle_mmu_fault 裡面主要是 page_walk,檢查是否存在對應的實體地址和許可權。如果有,更新 tlb;否則,儲存出錯資訊。
2122 int cpu_arm_handle_mmu_fault (CPUARMState *env, target_ulong address,
2123 int access_type, int mmu_idx)
2124 {
2125 uint32_t phys_addr;
2126 target_ulong page_size;
2127 int prot;
2128 int ret, is_user;
2129
2130 is_user = mmu_idx == MMU_USER_IDX;
2131 ret = get_phys_addr(env, address, access_type, is_user, &phys_addr, &prot,
2132 &page_size);
2133 if (ret == 0) {
2134 /* Map a single [sub]page. */
2135 phys_addr &= ~(uint32_t)0x3ff;
2136 address &= ~(uint32_t)0x3ff;
2137 tlb_set_page (env, address, phys_addr, prot, mmu_idx, page_size);
2138 return 0;
2139 }
2140
2141 if (access_type == 2) {
2142 env->cp15.c5_insn = ret;
2143 env->cp15.c6_insn = address;
2144 env->exception_index = EXCP_PREFETCH_ABORT;
2145 } else {
2146 env->cp15.c5_data = ret;
2147 if (access_type == 1 && arm_feature(env, ARM_FEATURE_V6))
2148 env->cp15.c5_data |= (1 << 11);
2149 env->cp15.c6_data = address;
2150 env->exception_index = EXCP_DATA_ABORT;
2151 }
2152 return 1;
2153 }
qemu 的記憶體訪問模擬,qemu 並沒有將記憶體訪問用 IR 表示,這樣一方面會更加翻譯難度,同時也佔用大量的 code_buffer。Qemu 使用了外調函式來實現這個功能。在對 target 翻譯過程中,對於記憶體訪問操
作會生成如下形式的中間碼:
tmp = gen_ld16s(addr, IS_USER(s));
783 static inline TCGv gen_ld16s(TCGv addr, int index)
784 {
785 TCGv tmp = tcg_temp_new_i32();
786 tcg_gen_qemu_ld16s(tmp, addr, index);
787 return tmp;
788 }
接下來在 tcg 翻譯將中間碼翻譯成 host 機器碼時,會首先查詢 tlb,如果命中就直接返回結果;否則就呼叫以下 qemu_ld/st_helpers 中的函式進行 target 的虛擬地址和實體地址轉換的工作。
928 #ifdef CONFIG_SOFTMMU
929
930 #include "../../softmmu_defs.h"
931
932 #ifdef CONFIG_TCG_PASS_AREG0
933 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
934 int mmu_idx) */
935 static const void * const qemu_ld_helpers[4] = {
936 helper_ldb_mmu,
937 helper_ldw_mmu,
938 helper_ldl_mmu,
939 helper_ldq_mmu,
940 };
941
942 /* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
943 uintxx_t val, int mmu_idx) */
944 static const void * const qemu_st_helpers[4] = {
945 helper_stb_mmu,
946 helper_stw_mmu,
947 helper_stl_mmu,
948 helper_stq_mmu,
949 };
950 #else
951 /* legacy helper signature: __ld_mmu(target_ulong addr, int
952 mmu_idx) */
953 static void *qemu_ld_helpers[4] = {
954 __ldb_mmu,
955 __ldw_mmu,
956 __ldl_mmu,
957 __ldq_mmu,
958 };
959
960 /* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
961 int mmu_idx) */
962 static void *qemu_st_helpers[4] = {
963 __stb_mmu,
964 __stw_mmu,
965 __stl_mmu,
966 __stq_mmu,
967 };
968 #endif
969 #endif
971 #define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
972
973 static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
974 {
975 int addr_reg, data_reg, data_reg2, bswap;
976 #ifdef CONFIG_SOFTMMU
977 int mem_index, s_bits;
978 # if TARGET_LONG_BITS == 64
979 int addr_reg2;
980 # endif
981 uint32_t *label_ptr;
982 #endif
983
984 #ifdef TARGET_WORDS_BIGENDIAN
985 bswap = 1;
986 #else
987 bswap = 0;
988 #endif
989 data_reg = *args++;
990 if (opc == 3)
991 data_reg2 = *args++;
992 else
993 data_reg2 = 0; /* suppress warning */
994 addr_reg = *args++;
995 #ifdef CONFIG_SOFTMMU
996 # if TARGET_LONG_BITS == 64
997 addr_reg2 = *args++;
998 # endif
999 mem_index = *args;
1000 s_bits = opc & 3;
1001
1002 /* Should generate something like the following:
1003 * shr r8, addr_reg, #TARGET_PAGE_BITS
1004 * and r0, r8, #(CPU_TLB_SIZE - 1) @ Assumption: CPU_TLB_BITS <= 8
1005 * add r0, env, r0 lsl #CPU_TLB_ENTRY_BITS
1006 */
1007 # if CPU_TLB_BITS > 8
1008 # error
1009 # endif
1010 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_R8,
1011 0, addr_reg, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
1012 tcg_out_dat_imm(s, COND_AL, ARITH_AND,
1013 TCG_REG_R0, TCG_REG_R8, CPU_TLB_SIZE - 1);
1014 tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_AREG0,
1015 TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
1016 /* In the
1017 * ldr r1 [r0, #(offsetof(CPUArchState, tlb_table[mem_index][0].addr_read))]
1018 * below, the offset is likely to exceed 12 bits if mem_index != 0 and
1019 * not exceed otherwise, so use an
1020 * add r0, r0, #(mem_index * sizeof *CPUArchState.tlb_table)
1021 * before.
1022 */
1023 if (mem_index)
1024 tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_REG_R0,
1025 (mem_index << (TLB_SHIFT & 1)) |
1026 ((16 - (TLB_SHIFT >> 1)) << 8));
1027 tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R0,
1028 offsetof(CPUArchState, tlb_table[0][0].addr_read));
1029 tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1,
1030 TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
1031 /* Check alignment. */
1032 if (s_bits)
1033 tcg_out_dat_imm(s, COND_EQ, ARITH_TST,
1034 0, addr_reg, (1 << s_bits) - 1);
1035 # if TARGET_LONG_BITS == 64
1036 /* XXX: possibly we could use a block data load or writeback in
1037 * the first access. */
1038 tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
1039 offsetof(CPUArchState, tlb_table[0][0].addr_read) + 4);
1040 tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
1041 TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0));
1042 # endif
1043 tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
1044 offsetof(CPUArchState, tlb_table[0][0].addend));
1045
1046 switch (opc) {
1047 case 0:
1048 tcg_out_ld8_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
1049 break;
1050 case 0 | 4:
1051 tcg_out_ld8s_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
1052 break;
1053 case 1:
1054 tcg_out_ld16u_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
1055 if (bswap) {
1056 tcg_out_bswap16(s, COND_EQ, data_reg, data_reg);
1057 }
1058 break;
1059 case 1 | 4:
1060 if (bswap) {
1061 tcg_out_ld16u_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
1062 tcg_out_bswap16s(s, COND_EQ, data_reg, data_reg);
1063 } else {
1064 tcg_out_ld16s_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
1065 }
1066 break;
1067 case 2:
1068 default:
1069 tcg_out_ld32_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
1070 if (bswap) {
1071 tcg_out_bswap32(s, COND_EQ, data_reg, data_reg);
1072 }
1073 break;
1074 case 3:
1075 if (bswap) {
1076 tcg_out_ld32_rwb(s, COND_EQ, data_reg2, TCG_REG_R1, addr_reg);
1077 tcg_out_ld32_12(s, COND_EQ, data_reg, TCG_REG_R1, 4);
1078 tcg_out_bswap32(s, COND_EQ, data_reg2, data_reg2);
1079 tcg_out_bswap32(s, COND_EQ, data_reg, data_reg);
1080 } else {
1081 tcg_out_ld32_rwb(s, COND_EQ, data_reg, TCG_REG_R1, addr_reg);
1082 tcg_out_ld32_12(s, COND_EQ, data_reg2, TCG_REG_R1, 4);
1083 }
1084 break;
1085 }
1086
1087 label_ptr = (void *) s->code_ptr;
1088 tcg_out_b_noaddr(s, COND_EQ);
1089
1090 /* TODO: move this code to where the constants pool will be */
1091 if (addr_reg != TCG_REG_R0) {
1092 tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
1093 TCG_REG_R0, 0, addr_reg, SHIFT_IMM_LSL(0));
1094 }
1095 # if TARGET_LONG_BITS == 32
1096 tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R1, 0, mem_index);
1097 # else
1098 tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
1099 TCG_REG_R1, 0, addr_reg2, SHIFT_IMM_LSL(0));
1100 tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R2, 0, mem_index);
1101 # endif
1102 #ifdef CONFIG_TCG_PASS_AREG0
1103 /* XXX/FIXME: suboptimal and incorrect for 64 bit */
1104 tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
1105 tcg_target_call_iarg_regs[2], 0,
1106 tcg_target_call_iarg_regs[1], SHIFT_IMM_LSL(0));
1107 tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
1108 tcg_target_call_iarg_regs[1], 0,
1109 tcg_target_call_iarg_regs[0], SHIFT_IMM_LSL(0));
1110
1111 tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
1112 tcg_target_call_iarg_regs[0], 0, TCG_AREG0,
1113 SHIFT_IMM_LSL(0));
1114 #endif
1115 tcg_out_call(s, (tcg_target_long) qemu_ld_helpers[s_bits]);
1116
1117 switch (opc) {
1118 case 0 | 4:
1119 tcg_out_ext8s(s, COND_AL, data_reg, TCG_REG_R0);
1120 break;
1121 case 1 | 4:
1122 tcg_out_ext16s(s, COND_AL, data_reg, TCG_REG_R0);
1123 break;
1124 case 0:
1125 case 1:
1126 case 2:
1127 default:
1128 if (data_reg != TCG_REG_R0) {
1129 tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
1130 data_reg, 0, TCG_REG_R0, SHIFT_IMM_LSL(0));
1131 }
1132 break;
1133 case 3:
1134 if (data_reg != TCG_REG_R0) {
1135 tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
1136 data_reg, 0, TCG_REG_R0, SHIFT_IMM_LSL(0));
1137 }
1138 if (data_reg2 != TCG_REG_R1) {
1139 tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
1140 data_reg2, 0, TCG_REG_R1, SHIFT_IMM_LSL(0));
1141 }
1142 break;
1143 }
1144
1145 reloc_pc24(label_ptr, (tcg_target_long)s->code_ptr);
1146 #else /* !CONFIG_SOFTMMU */
1147 if (GUEST_BASE) {
1148 uint32_t offset = GUEST_BASE;
1149 int i;
1150 int rot;
1151
1152 while (offset) {
1153 i = ctz32(offset) & ~1;
1154 rot = ((32 - i) << 7) & 0xf00;
1155
1156 tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R8, addr_reg,
1157 ((offset >> i) & 0xff) | rot);
1158 addr_reg = TCG_REG_R8;
1159 offset &= ~(0xff << i);
1160 }
1161 }
1162 switch (opc) {
1163 case 0:
1164 tcg_out_ld8_12(s, COND_AL, data_reg, addr_reg, 0);
1165 break;
1166 case 0 | 4:
1167 tcg_out_ld8s_8(s, COND_AL, data_reg, addr_reg, 0);
1168 break;
1169 case 1:
1170 tcg_out_ld16u_8(s, COND_AL, data_reg, addr_reg, 0);
1171 if (bswap) {
1172 tcg_out_bswap16(s, COND_AL, data_reg, data_reg);
1173 }
1174 break;
1175 case 1 | 4:
1176 if (bswap) {
1177 tcg_out_ld16u_8(s, COND_AL, data_reg, addr_reg, 0);
1178 tcg_out_bswap16s(s, COND_AL, data_reg, data_reg);
1179 } else {
1180 tcg_out_ld16s_8(s, COND_AL, data_reg, addr_reg, 0);
1181 }
1182 break;
1183 case 2:
1184 default:
1185 tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, 0);
1186 if (bswap) {
1187 tcg_out_bswap32(s, COND_AL, data_reg, data_reg);
1188 }
1189 break;
1190 case 3:
1191 /* TODO: use block load -
1192 * check that data_reg2 > data_reg or the other way */
1193 if (data_reg == addr_reg) {
1194 tcg_out_ld32_12(s, COND_AL, data_reg2, addr_reg, bswap ? 0 : 4);
1195 tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, bswap ? 4 : 0);
1196 } else {
1197 tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, bswap ? 4 : 0);
1198 tcg_out_ld32_12(s, COND_AL, data_reg2, addr_reg, bswap ? 0 : 4);
1199 }
1200 if (bswap) {
1201 tcg_out_bswap32(s, COND_AL, data_reg, data_reg);
1202 tcg_out_bswap32(s, COND_AL, data_reg2, data_reg2);
1203 }
1204 break;
1205 }
1206 #endif
1207 }
上面的記憶體訪問中將查詢tlb部分直接翻譯成了host指令,而對於target 的mmu轉換則使用了外調相送的函式來實現的。這樣對提升速度是有好處的。