1. 程式人生 > >qemu記憶體訪問分析

qemu記憶體訪問分析

qemu  TLB表項:

       qemu 記憶體訪問過程:guest virtual addr (GVA) → guest physical addr (GPA) → host virtualaddr (HVA)。其中 GVA->HVA 由 qemu 負責完成,HVA->HPA 由 host 作業系統完成。tlb 的結構如下,addr_xxx 表示 GVA 地址,同時也表示了執行許可權;addrend =gpa_base – gva_base;

typedef struct CPUTLBEntry {
target_ulong addr_read; // 可讀
target_ulong addr_write; // 可寫
target_ulong addr_code; // 可執行
unsigned long addend;

} CPUTLBEntry;
    1.get_page_addr_code 會首先檢視 tlb 是否命中,如果沒有命中就 ldub_code 走 mmu 翻譯這個分支,否則直接獲取 hva。
313 /* NOTE: this function can trigger an exception */
314 /* NOTE2: the returned address is not exactly the physical address: it
315    is the offset relative to phys_ram_base */
316 tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
317 {   
318     int mmu_idx, page_index, pd;
319     void *p;
320     MemoryRegion *mr;
321                                       
322     page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
323     mmu_idx = cpu_mmu_index(env1);    
324     if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code !=
325                  (addr & TARGET_PAGE_MASK))) {
326 #ifdef CONFIG_TCG_PASS_AREG0
327         cpu_ldub_code(env1, addr);
328 #else
329         ldub_code(addr);
330 #endif
331     }
332     pd = env1->iotlb[mmu_idx][page_index] & ~TARGET_PAGE_MASK;
333     mr = iotlb_to_region(pd);
334     if (memory_region_is_unassigned(mr)) {
335 #if defined(TARGET_ALPHA) || defined(TARGET_MIPS) || defined(TARGET_SPARC)
336         cpu_unassigned_access(env1, addr, 0, 1, 0, 4);
337 #else
338         cpu_abort(env1, "Trying to execute code outside RAM or ROM at 0x"
339                   TARGET_FMT_lx "\n", addr);
340 #endif
341     }
342     p = (void *)((uintptr_t)addr + env1->tlb_table[mmu_idx][page_index].addend);
343     return qemu_ram_addr_from_host_nofail(p);
344 }
2.TLB 沒有命中時,會通過 ldub_code,這個函式是由下面的巨集產生。這個巨集首先會在 tlb 檢查下是
否命中,如果命中直接從 hva 地址中返回,否則還是通過 mmu 來獲取。
 95 static inline RES_TYPE
 96 glue(glue(glue(CPU_PREFIX, ld), USUFFIX), MEMSUFFIX)(ENV_PARAM
 97                                                      target_ulong ptr)
 98 {
 99     int page_index;
100     RES_TYPE res;
101     target_ulong addr;
102     int mmu_idx;
103 
104     addr = ptr;
105     page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
106     mmu_idx = CPU_MMU_INDEX;
107     if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
108                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
109         res = glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_VAR
110                                                                      addr,
111                                                                      mmu_idx);
112     } else {
113         uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
114         res = glue(glue(ld, USUFFIX), _raw)(hostaddr);
115     }
116     return res;
117 }
3.在這個模板中會對 tlb 進行查詢,看是否命中,如果命中,還要根據是 io 還是 ram 進行分別處理;
如果沒有命中,則需要通過 mmu 獲取該虛擬地址所對應的實體地址,對 tlb 進行填充。
106 DATA_TYPE
107 glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_PARAM
108                                                        target_ulong addr,
109                                                        int mmu_idx)
110 {
111     DATA_TYPE res;
112     int index;
113     target_ulong tlb_addr;
114     target_phys_addr_t ioaddr;
115     uintptr_t retaddr;
116 
117     /* test if there is match for unaligned or IO access */
118     /* XXX: could done more in memory macro in a non portable way */
119     index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
120  redo:
121     tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
122     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
123         if (tlb_addr & ~TARGET_PAGE_MASK) {
124             /* IO access */
125             if ((addr & (DATA_SIZE - 1)) != 0)
126                 goto do_unaligned_access;
127             retaddr = GETPC();
128             ioaddr = env->iotlb[mmu_idx][index];
129             res = glue(io_read, SUFFIX)(ENV_VAR ioaddr, addr, retaddr);
130         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
131             /* slow unaligned access (it spans two pages or IO) */
132         do_unaligned_access:
133             retaddr = GETPC();
134 #ifdef ALIGNED_ONLY
135             do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
136 #endif
137             res = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_VAR addr,
138                                                          mmu_idx, retaddr);
139         } else {
140             /* unaligned/aligned access in the same page */
141             uintptr_t addend;
142 #ifdef ALIGNED_ONLY
143             if ((addr & (DATA_SIZE - 1)) != 0) {
144                 retaddr = GETPC();
145                 do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
146             }
147 #endif
148             addend = env->tlb_table[mmu_idx][index].addend;
149             res = glue(glue(ld, USUFFIX), _raw)((uint8_t *)(intptr_t)
150                                                 (addr + addend));
151         }
152     } else {
153         /* the page is not in the TLB : fill it */
154         retaddr = GETPC();
155 #ifdef ALIGNED_ONLY
156         if ((addr & (DATA_SIZE - 1)) != 0)
157             do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
158 #endif
159         tlb_fill(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
160         goto redo;
161     }
162     return res;
163 }
4.以 arm 為例,tlb_fill 會通過 cpu_arm_handle_mmu_fault 對虛實地址轉換進行處理,如果該虛擬
地址沒有對應的實體地址或者許可權不夠等情況,cpu 就會出現 page_fault 異常。
 72 /* try to fill the TLB and return an exception if error. If retaddr is
 73    NULL, it means that the function was called in C code (i.e. not
 74    from generated code or from helper.c) */
 75 /* XXX: fix it to restore all registers */
 76 void tlb_fill(CPUARMState *env1, target_ulong addr, int is_write, int mmu_idx,
 77               uintptr_t retaddr)
 78 {
 79     TranslationBlock *tb;
 80     CPUARMState *saved_env;
 81     int ret;
 82 
 83     saved_env = env;
 84     env = env1;
 85     ret = cpu_arm_handle_mmu_fault(env, addr, is_write, mmu_idx);
 86     if (unlikely(ret)) {
 87         if (retaddr) {                 
 88             /* now we have a real cpu fault */
 89             tb = tb_find_pc(retaddr);  
 90             if (tb) {
 91                 /* the PC is inside the translated code. It means that we have
 92                    a virtual CPU fault */
 93                 cpu_restore_state(tb, env, retaddr);
 94             }
 95         }
 96         raise_exception(env->exception_index);
 97     }
 98     env = saved_env;
 99 }
5. cpu_arm_handle_mmu_fault 裡面主要是 page_walk,檢查是否存在對應的實體地址和許可權。如
果有,更新 tlb;否則,儲存出錯資訊。

2122 int cpu_arm_handle_mmu_fault (CPUARMState *env, target_ulong address,
2123                               int access_type, int mmu_idx)
2124 {
2125     uint32_t phys_addr;
2126     target_ulong page_size;
2127     int prot;
2128     int ret, is_user;
2129        
2130     is_user = mmu_idx == MMU_USER_IDX;
2131     ret = get_phys_addr(env, address, access_type, is_user, &phys_addr, &prot,
2132                         &page_size);
2133     if (ret == 0) {
2134         /* Map a single [sub]page.  */
2135         phys_addr &= ~(uint32_t)0x3ff;
2136         address &= ~(uint32_t)0x3ff; 
2137         tlb_set_page (env, address, phys_addr, prot, mmu_idx, page_size);
2138         return 0;
2139     }
2140    
2141     if (access_type == 2) {
2142         env->cp15.c5_insn = ret;
2143         env->cp15.c6_insn = address;
2144         env->exception_index = EXCP_PREFETCH_ABORT;
2145     } else {
2146         env->cp15.c5_data = ret;
2147         if (access_type == 1 && arm_feature(env, ARM_FEATURE_V6))
2148             env->cp15.c5_data |= (1 << 11);
2149         env->cp15.c6_data = address;
2150         env->exception_index = EXCP_DATA_ABORT;
2151     }
2152     return 1;
2153 }
        qemu 的記憶體訪問模擬,qemu 並沒有將記憶體訪問用 IR 表示,這樣一方面會更加翻譯難度,同時也佔
用大量的 code_buffer。Qemu 使用了外調函式來實現這個功能。在對 target 翻譯過程中,對於記憶體訪問操
作會生成如下形式的中間碼:
tmp = gen_ld16s(addr, IS_USER(s));
783 static inline TCGv gen_ld16s(TCGv addr, int index)
784 {
785 TCGv tmp = tcg_temp_new_i32();
786 tcg_gen_qemu_ld16s(tmp, addr, index);
787 return tmp;
788 }
接下來在 tcg 翻譯將中間碼翻譯成 host 機器碼時,會首先查詢 tlb,如果命中就直接返回結果;否則就呼叫
以下 qemu_ld/st_helpers 中的函式進行 target 的虛擬地址和實體地址轉換的工作。
928 #ifdef CONFIG_SOFTMMU
929
930 #include "../../softmmu_defs.h"
931
932 #ifdef CONFIG_TCG_PASS_AREG0
933 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
934 int mmu_idx) */
935 static const void * const qemu_ld_helpers[4] = {
936 helper_ldb_mmu,
937 helper_ldw_mmu,
938 helper_ldl_mmu,
939 helper_ldq_mmu,
940 };
941
942 /* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
943 uintxx_t val, int mmu_idx) */
944 static const void * const qemu_st_helpers[4] = {
945 helper_stb_mmu,
946 helper_stw_mmu,
947 helper_stl_mmu,
948 helper_stq_mmu,
949 };
950 #else
951 /* legacy helper signature: __ld_mmu(target_ulong addr, int
952 mmu_idx) */
953 static void *qemu_ld_helpers[4] = {
954 __ldb_mmu,
955 __ldw_mmu,
956 __ldl_mmu,
957 __ldq_mmu,
958 };
959
960 /* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
961 int mmu_idx) */
962 static void *qemu_st_helpers[4] = {
963 __stb_mmu,
964 __stw_mmu,
965 __stl_mmu,
966 __stq_mmu,
967 };
968 #endif
969 #endif
 971 #define TLB_SHIFT       (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
 972 
 973 static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
 974 {
 975     int addr_reg, data_reg, data_reg2, bswap;
 976 #ifdef CONFIG_SOFTMMU
 977     int mem_index, s_bits;
 978 # if TARGET_LONG_BITS == 64
 979     int addr_reg2;
 980 # endif
 981     uint32_t *label_ptr;
 982 #endif
 983 
 984 #ifdef TARGET_WORDS_BIGENDIAN
 985     bswap = 1;
 986 #else
 987     bswap = 0;
 988 #endif
 989     data_reg = *args++;
 990     if (opc == 3)
 991         data_reg2 = *args++;
 992     else
 993         data_reg2 = 0; /* suppress warning */
 994     addr_reg = *args++;
 995 #ifdef CONFIG_SOFTMMU
 996 # if TARGET_LONG_BITS == 64
 997     addr_reg2 = *args++;
 998 # endif
 999     mem_index = *args;
1000     s_bits = opc & 3;
1001 
1002     /* Should generate something like the following:
1003      *  shr r8, addr_reg, #TARGET_PAGE_BITS
1004      *  and r0, r8, #(CPU_TLB_SIZE - 1)   @ Assumption: CPU_TLB_BITS <= 8
1005      *  add r0, env, r0 lsl #CPU_TLB_ENTRY_BITS
1006      */
1007 #  if CPU_TLB_BITS > 8
1008 #   error
1009 #  endif
1010     tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_R8,
1011                     0, addr_reg, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
1012     tcg_out_dat_imm(s, COND_AL, ARITH_AND,
1013                     TCG_REG_R0, TCG_REG_R8, CPU_TLB_SIZE - 1);
1014     tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_AREG0,
1015                     TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
1016     /* In the
1017      *  ldr r1 [r0, #(offsetof(CPUArchState, tlb_table[mem_index][0].addr_read))]
1018      * below, the offset is likely to exceed 12 bits if mem_index != 0 and
1019      * not exceed otherwise, so use an
1020      *  add r0, r0, #(mem_index * sizeof *CPUArchState.tlb_table)
1021      * before.
1022      */
1023     if (mem_index)
1024         tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_REG_R0,
1025                         (mem_index << (TLB_SHIFT & 1)) |
1026                         ((16 - (TLB_SHIFT >> 1)) << 8));
1027     tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R0,
1028                     offsetof(CPUArchState, tlb_table[0][0].addr_read));
1029     tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1,
1030                     TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
1031     /* Check alignment.  */
1032     if (s_bits)
1033         tcg_out_dat_imm(s, COND_EQ, ARITH_TST,
1034                         0, addr_reg, (1 << s_bits) - 1);
1035 #  if TARGET_LONG_BITS == 64
1036     /* XXX: possibly we could use a block data load or writeback in
1037      * the first access.  */
1038     tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
1039                     offsetof(CPUArchState, tlb_table[0][0].addr_read) + 4);
1040     tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
1041                     TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0));
1042 #  endif
1043     tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
1044                     offsetof(CPUArchState, tlb_table[0][0].addend));
1045 
1046     switch (opc) {
1047     case 0:
1048         tcg_out_ld8_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
1049         break;
1050     case 0 | 4:
1051         tcg_out_ld8s_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
1052         break;
1053     case 1:
1054         tcg_out_ld16u_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
1055         if (bswap) {
1056             tcg_out_bswap16(s, COND_EQ, data_reg, data_reg);
1057         }
1058         break;
1059     case 1 | 4:
1060         if (bswap) {
1061             tcg_out_ld16u_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
1062             tcg_out_bswap16s(s, COND_EQ, data_reg, data_reg);
1063         } else {
1064             tcg_out_ld16s_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
1065         }
1066         break;
1067     case 2:
1068     default:
1069         tcg_out_ld32_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
1070         if (bswap) {
1071             tcg_out_bswap32(s, COND_EQ, data_reg, data_reg);
1072         }
1073         break;
1074     case 3:
1075         if (bswap) {
1076             tcg_out_ld32_rwb(s, COND_EQ, data_reg2, TCG_REG_R1, addr_reg);
1077             tcg_out_ld32_12(s, COND_EQ, data_reg, TCG_REG_R1, 4);
1078             tcg_out_bswap32(s, COND_EQ, data_reg2, data_reg2);
1079             tcg_out_bswap32(s, COND_EQ, data_reg, data_reg);
1080         } else {
1081             tcg_out_ld32_rwb(s, COND_EQ, data_reg, TCG_REG_R1, addr_reg);
1082             tcg_out_ld32_12(s, COND_EQ, data_reg2, TCG_REG_R1, 4);
1083         }
1084         break;
1085     }
1086 
1087     label_ptr = (void *) s->code_ptr;
1088     tcg_out_b_noaddr(s, COND_EQ);
1089 
1090     /* TODO: move this code to where the constants pool will be */
1091     if (addr_reg != TCG_REG_R0) {
1092         tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
1093                         TCG_REG_R0, 0, addr_reg, SHIFT_IMM_LSL(0));
1094     }
1095 # if TARGET_LONG_BITS == 32
1096     tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R1, 0, mem_index);
1097 # else
1098     tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
1099                     TCG_REG_R1, 0, addr_reg2, SHIFT_IMM_LSL(0));
1100     tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R2, 0, mem_index);
1101 # endif
1102 #ifdef CONFIG_TCG_PASS_AREG0
1103     /* XXX/FIXME: suboptimal and incorrect for 64 bit */
1104     tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
1105                     tcg_target_call_iarg_regs[2], 0,
1106                     tcg_target_call_iarg_regs[1], SHIFT_IMM_LSL(0));
1107     tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
1108                     tcg_target_call_iarg_regs[1], 0,
1109                     tcg_target_call_iarg_regs[0], SHIFT_IMM_LSL(0));
1110 
1111     tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
1112                     tcg_target_call_iarg_regs[0], 0, TCG_AREG0,
1113                     SHIFT_IMM_LSL(0));
1114 #endif
1115     tcg_out_call(s, (tcg_target_long) qemu_ld_helpers[s_bits]);
1116 
1117     switch (opc) {
1118     case 0 | 4:
1119         tcg_out_ext8s(s, COND_AL, data_reg, TCG_REG_R0);
1120         break;
1121     case 1 | 4:
1122         tcg_out_ext16s(s, COND_AL, data_reg, TCG_REG_R0);
1123         break;
1124     case 0:
1125     case 1:
1126     case 2:
1127     default:
1128         if (data_reg != TCG_REG_R0) {
1129             tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
1130                             data_reg, 0, TCG_REG_R0, SHIFT_IMM_LSL(0));
1131         }
1132         break;
1133     case 3:
1134         if (data_reg != TCG_REG_R0) {
1135             tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
1136                             data_reg, 0, TCG_REG_R0, SHIFT_IMM_LSL(0));
1137         }
1138         if (data_reg2 != TCG_REG_R1) {
1139             tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
1140                             data_reg2, 0, TCG_REG_R1, SHIFT_IMM_LSL(0));
1141         }
1142         break;
1143     }
1144 
1145     reloc_pc24(label_ptr, (tcg_target_long)s->code_ptr);
1146 #else /* !CONFIG_SOFTMMU */
1147     if (GUEST_BASE) {
1148         uint32_t offset = GUEST_BASE;
1149         int i;
1150         int rot;
1151 
1152         while (offset) {
1153             i = ctz32(offset) & ~1;
1154             rot = ((32 - i) << 7) & 0xf00;
1155 
1156             tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R8, addr_reg,
1157                             ((offset >> i) & 0xff) | rot);
1158             addr_reg = TCG_REG_R8;
1159             offset &= ~(0xff << i);
1160         }
1161     }
1162     switch (opc) {
1163     case 0:
1164         tcg_out_ld8_12(s, COND_AL, data_reg, addr_reg, 0);
1165         break;
1166     case 0 | 4:
1167         tcg_out_ld8s_8(s, COND_AL, data_reg, addr_reg, 0);
1168         break;
1169     case 1:
1170         tcg_out_ld16u_8(s, COND_AL, data_reg, addr_reg, 0);
1171         if (bswap) {
1172             tcg_out_bswap16(s, COND_AL, data_reg, data_reg);
1173         }
1174         break;
1175     case 1 | 4:
1176         if (bswap) {
1177             tcg_out_ld16u_8(s, COND_AL, data_reg, addr_reg, 0);
1178             tcg_out_bswap16s(s, COND_AL, data_reg, data_reg);
1179         } else {
1180             tcg_out_ld16s_8(s, COND_AL, data_reg, addr_reg, 0);
1181         }
1182         break;
1183     case 2:
1184     default:
1185         tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, 0);
1186         if (bswap) {
1187             tcg_out_bswap32(s, COND_AL, data_reg, data_reg);
1188         }
1189         break;
1190     case 3:
1191         /* TODO: use block load -
1192          * check that data_reg2 > data_reg or the other way */
1193         if (data_reg == addr_reg) {
1194             tcg_out_ld32_12(s, COND_AL, data_reg2, addr_reg, bswap ? 0 : 4);
1195             tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, bswap ? 4 : 0);
1196         } else {
1197             tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, bswap ? 4 : 0);
1198             tcg_out_ld32_12(s, COND_AL, data_reg2, addr_reg, bswap ? 0 : 4);
1199         }
1200         if (bswap) {
1201             tcg_out_bswap32(s, COND_AL, data_reg, data_reg);
1202             tcg_out_bswap32(s, COND_AL, data_reg2, data_reg2);
1203         }
1204         break;
1205     }
1206 #endif
1207 }
       上面的記憶體訪問中將查詢tlb部分直接翻譯成了host指令,而對於target 的mmu轉換則使用了外調相送的函式來實現的。這樣對提升速度是有好處的。