Commit 852d481faf7070ac6e46653b77f6c9ecbdfb9efc
1 parent
714fa308
SH: Improve movca.l/ocbi emulation.
Author: Vladimir Prus <vladimir@codesourcery.com>
Fix movcal.l/ocbi emulation.
* target-sh4/cpu.h (memory_content): New.
(CPUSH4State): New fields movcal_backup and movcal_backup_tail.
* target-sh4/helper.h (helper_movcal)
(helper_discard_movcal_backup, helper_ocbi): New.
* target-sh4/op_helper.c (helper_movcal)
(helper_discard_movcal_backup, helper_ocbi): New.
* target-sh4/translate.c (DisasContext): New field has_movcal.
(sh4_defs): Update CVS for SH7785.
(cpu_sh4_init): Initialize env->movcal_backup_tail.
(_decode_opc): Discard movca.l-backup.
Make use of helper_movcal and helper_ocbi.
(gen_intermediate_code_internal): Initialize has_movcal to 1.
Thanks to Shin-ichiro KAWASAKI and Paul Mundt for valuable feedback.
Acked-by: Edgar E. Iglesias <edgar.iglesias@gmail.com>
git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@6966 c046a42c-6fe2-441c-8c8c-71466251a162
Showing
5 changed files
with
156 additions
and
5 deletions
target-sh4/cpu.h
| @@ -100,6 +100,12 @@ enum sh_features { | @@ -100,6 +100,12 @@ enum sh_features { | ||
| 100 | SH_FEATURE_BCR3_AND_BCR4 = 2, | 100 | SH_FEATURE_BCR3_AND_BCR4 = 2, |
| 101 | }; | 101 | }; |
| 102 | 102 | ||
| 103 | +typedef struct memory_content { | ||
| 104 | + uint32_t address; | ||
| 105 | + uint32_t value; | ||
| 106 | + struct memory_content *next; | ||
| 107 | +} memory_content; | ||
| 108 | + | ||
| 103 | typedef struct CPUSH4State { | 109 | typedef struct CPUSH4State { |
| 104 | int id; /* CPU model */ | 110 | int id; /* CPU model */ |
| 105 | 111 | ||
| @@ -148,6 +154,8 @@ typedef struct CPUSH4State { | @@ -148,6 +154,8 @@ typedef struct CPUSH4State { | ||
| 148 | tlb_t itlb[ITLB_SIZE]; /* instruction translation table */ | 154 | tlb_t itlb[ITLB_SIZE]; /* instruction translation table */ |
| 149 | void *intc_handle; | 155 | void *intc_handle; |
| 150 | int intr_at_halt; /* SR_BL ignored during sleep */ | 156 | int intr_at_halt; /* SR_BL ignored during sleep */ |
| 157 | + memory_content *movcal_backup; | ||
| 158 | + memory_content **movcal_backup_tail; | ||
| 151 | } CPUSH4State; | 159 | } CPUSH4State; |
| 152 | 160 | ||
| 153 | CPUSH4State *cpu_sh4_init(const char *cpu_model); | 161 | CPUSH4State *cpu_sh4_init(const char *cpu_model); |
| @@ -162,6 +170,8 @@ void sh4_cpu_list(FILE *f, int (*cpu_fprintf)(FILE *f, const char *fmt, ...)); | @@ -162,6 +170,8 @@ void sh4_cpu_list(FILE *f, int (*cpu_fprintf)(FILE *f, const char *fmt, ...)); | ||
| 162 | void cpu_sh4_write_mmaped_utlb_addr(CPUSH4State *s, target_phys_addr_t addr, | 170 | void cpu_sh4_write_mmaped_utlb_addr(CPUSH4State *s, target_phys_addr_t addr, |
| 163 | uint32_t mem_value); | 171 | uint32_t mem_value); |
| 164 | 172 | ||
| 173 | +int cpu_sh4_is_cached(CPUSH4State * env, target_ulong addr); | ||
| 174 | + | ||
| 165 | static inline void cpu_set_tls(CPUSH4State *env, target_ulong newtls) | 175 | static inline void cpu_set_tls(CPUSH4State *env, target_ulong newtls) |
| 166 | { | 176 | { |
| 167 | env->gbr = newtls; | 177 | env->gbr = newtls; |
| @@ -293,6 +303,8 @@ static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb) | @@ -293,6 +303,8 @@ static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb) | ||
| 293 | env->flags = tb->flags; | 303 | env->flags = tb->flags; |
| 294 | } | 304 | } |
| 295 | 305 | ||
| 306 | +#define TB_FLAG_PENDING_MOVCA (1 << 4) | ||
| 307 | + | ||
| 296 | static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc, | 308 | static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc, |
| 297 | target_ulong *cs_base, int *flags) | 309 | target_ulong *cs_base, int *flags) |
| 298 | { | 310 | { |
| @@ -302,7 +314,8 @@ static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc, | @@ -302,7 +314,8 @@ static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc, | ||
| 302 | | DELAY_SLOT_TRUE | DELAY_SLOT_CLEARME)) /* Bits 0- 3 */ | 314 | | DELAY_SLOT_TRUE | DELAY_SLOT_CLEARME)) /* Bits 0- 3 */ |
| 303 | | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR)) /* Bits 19-21 */ | 315 | | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR)) /* Bits 19-21 */ |
| 304 | | (env->sr & (SR_MD | SR_RB)) /* Bits 29-30 */ | 316 | | (env->sr & (SR_MD | SR_RB)) /* Bits 29-30 */ |
| 305 | - | (env->sr & SR_FD); /* Bit 15 */ | 317 | + | (env->sr & SR_FD) /* Bit 15 */ |
| 318 | + | (env->movcal_backup ? TB_FLAG_PENDING_MOVCA : 0); /* Bit 4 */ | ||
| 306 | } | 319 | } |
| 307 | 320 | ||
| 308 | #endif /* _CPU_SH4_H */ | 321 | #endif /* _CPU_SH4_H */ |
target-sh4/helper.c
| @@ -644,4 +644,48 @@ void cpu_sh4_write_mmaped_utlb_addr(CPUSH4State *s, target_phys_addr_t addr, | @@ -644,4 +644,48 @@ void cpu_sh4_write_mmaped_utlb_addr(CPUSH4State *s, target_phys_addr_t addr, | ||
| 644 | } | 644 | } |
| 645 | } | 645 | } |
| 646 | 646 | ||
| 647 | +int cpu_sh4_is_cached(CPUSH4State * env, target_ulong addr) | ||
| 648 | +{ | ||
| 649 | + int n; | ||
| 650 | + int use_asid = (env->mmucr & MMUCR_SV) == 0 || (env->sr & SR_MD) == 0; | ||
| 651 | + | ||
| 652 | + /* check area */ | ||
| 653 | + if (env->sr & SR_MD) { | ||
| 654 | + /* For previledged mode, P2 and P4 area is not cachable. */ | ||
| 655 | + if ((0xA0000000 <= addr && addr < 0xC0000000) || 0xE0000000 <= addr) | ||
| 656 | + return 0; | ||
| 657 | + } else { | ||
| 658 | + /* For user mode, only U0 area is cachable. */ | ||
| 659 | + if (0x80000000 <= addr) | ||
| 660 | + return 0; | ||
| 661 | + } | ||
| 662 | + | ||
| 663 | + /* | ||
| 664 | + * TODO : Evaluate CCR and check if the cache is on or off. | ||
| 665 | + * Now CCR is not in CPUSH4State, but in SH7750State. | ||
| 666 | + * When you move the ccr inot CPUSH4State, the code will be | ||
| 667 | + * as follows. | ||
| 668 | + */ | ||
| 669 | +#if 0 | ||
| 670 | + /* check if operand cache is enabled or not. */ | ||
| 671 | + if (!(env->ccr & 1)) | ||
| 672 | + return 0; | ||
| 673 | +#endif | ||
| 674 | + | ||
| 675 | + /* if MMU is off, no check for TLB. */ | ||
| 676 | + if (env->mmucr & MMUCR_AT) | ||
| 677 | + return 1; | ||
| 678 | + | ||
| 679 | + /* check TLB */ | ||
| 680 | + n = find_tlb_entry(env, addr, env->itlb, ITLB_SIZE, use_asid); | ||
| 681 | + if (n >= 0) | ||
| 682 | + return env->itlb[n].c; | ||
| 683 | + | ||
| 684 | + n = find_tlb_entry(env, addr, env->utlb, UTLB_SIZE, use_asid); | ||
| 685 | + if (n >= 0) | ||
| 686 | + return env->utlb[n].c; | ||
| 687 | + | ||
| 688 | + return 0; | ||
| 689 | +} | ||
| 690 | + | ||
| 647 | #endif | 691 | #endif |
target-sh4/helper.h
| @@ -9,6 +9,10 @@ DEF_HELPER_0(debug, void) | @@ -9,6 +9,10 @@ DEF_HELPER_0(debug, void) | ||
| 9 | DEF_HELPER_1(sleep, void, i32) | 9 | DEF_HELPER_1(sleep, void, i32) |
| 10 | DEF_HELPER_1(trapa, void, i32) | 10 | DEF_HELPER_1(trapa, void, i32) |
| 11 | 11 | ||
| 12 | +DEF_HELPER_2(movcal, void, i32, i32) | ||
| 13 | +DEF_HELPER_0(discard_movcal_backup, void) | ||
| 14 | +DEF_HELPER_1(ocbi, void, i32) | ||
| 15 | + | ||
| 12 | DEF_HELPER_2(addv, i32, i32, i32) | 16 | DEF_HELPER_2(addv, i32, i32, i32) |
| 13 | DEF_HELPER_2(addc, i32, i32, i32) | 17 | DEF_HELPER_2(addc, i32, i32, i32) |
| 14 | DEF_HELPER_2(subv, i32, i32, i32) | 18 | DEF_HELPER_2(subv, i32, i32, i32) |
target-sh4/op_helper.c
| @@ -18,6 +18,7 @@ | @@ -18,6 +18,7 @@ | ||
| 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA | 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA |
| 19 | */ | 19 | */ |
| 20 | #include <assert.h> | 20 | #include <assert.h> |
| 21 | +#include <stdlib.h> | ||
| 21 | #include "exec.h" | 22 | #include "exec.h" |
| 22 | #include "helper.h" | 23 | #include "helper.h" |
| 23 | 24 | ||
| @@ -122,6 +123,57 @@ void helper_trapa(uint32_t tra) | @@ -122,6 +123,57 @@ void helper_trapa(uint32_t tra) | ||
| 122 | cpu_loop_exit(); | 123 | cpu_loop_exit(); |
| 123 | } | 124 | } |
| 124 | 125 | ||
| 126 | +void helper_movcal(uint32_t address, uint32_t value) | ||
| 127 | +{ | ||
| 128 | + if (cpu_sh4_is_cached (env, address)) | ||
| 129 | + { | ||
| 130 | + memory_content *r = malloc (sizeof(memory_content)); | ||
| 131 | + r->address = address; | ||
| 132 | + r->value = value; | ||
| 133 | + r->next = NULL; | ||
| 134 | + | ||
| 135 | + *(env->movcal_backup_tail) = r; | ||
| 136 | + env->movcal_backup_tail = &(r->next); | ||
| 137 | + } | ||
| 138 | +} | ||
| 139 | + | ||
| 140 | +void helper_discard_movcal_backup(void) | ||
| 141 | +{ | ||
| 142 | + memory_content *current = env->movcal_backup; | ||
| 143 | + | ||
| 144 | + while(current) | ||
| 145 | + { | ||
| 146 | + memory_content *next = current->next; | ||
| 147 | + free (current); | ||
| 148 | + env->movcal_backup = current = next; | ||
| 149 | + if (current == 0) | ||
| 150 | + env->movcal_backup_tail = &(env->movcal_backup); | ||
| 151 | + } | ||
| 152 | +} | ||
| 153 | + | ||
| 154 | +void helper_ocbi(uint32_t address) | ||
| 155 | +{ | ||
| 156 | + memory_content **current = &(env->movcal_backup); | ||
| 157 | + while (*current) | ||
| 158 | + { | ||
| 159 | + uint32_t a = (*current)->address; | ||
| 160 | + if ((a & ~0x1F) == (address & ~0x1F)) | ||
| 161 | + { | ||
| 162 | + memory_content *next = (*current)->next; | ||
| 163 | + stl(a, (*current)->value); | ||
| 164 | + | ||
| 165 | + if (next == 0) | ||
| 166 | + { | ||
| 167 | + env->movcal_backup_tail = current; | ||
| 168 | + } | ||
| 169 | + | ||
| 170 | + free (*current); | ||
| 171 | + *current = next; | ||
| 172 | + break; | ||
| 173 | + } | ||
| 174 | + } | ||
| 175 | +} | ||
| 176 | + | ||
| 125 | uint32_t helper_addc(uint32_t arg0, uint32_t arg1) | 177 | uint32_t helper_addc(uint32_t arg0, uint32_t arg1) |
| 126 | { | 178 | { |
| 127 | uint32_t tmp0, tmp1; | 179 | uint32_t tmp0, tmp1; |
target-sh4/translate.c
| @@ -50,6 +50,7 @@ typedef struct DisasContext { | @@ -50,6 +50,7 @@ typedef struct DisasContext { | ||
| 50 | uint32_t delayed_pc; | 50 | uint32_t delayed_pc; |
| 51 | int singlestep_enabled; | 51 | int singlestep_enabled; |
| 52 | uint32_t features; | 52 | uint32_t features; |
| 53 | + int has_movcal; | ||
| 53 | } DisasContext; | 54 | } DisasContext; |
| 54 | 55 | ||
| 55 | #if defined(CONFIG_USER_ONLY) | 56 | #if defined(CONFIG_USER_ONLY) |
| @@ -283,6 +284,7 @@ CPUSH4State *cpu_sh4_init(const char *cpu_model) | @@ -283,6 +284,7 @@ CPUSH4State *cpu_sh4_init(const char *cpu_model) | ||
| 283 | env = qemu_mallocz(sizeof(CPUSH4State)); | 284 | env = qemu_mallocz(sizeof(CPUSH4State)); |
| 284 | env->features = def->features; | 285 | env->features = def->features; |
| 285 | cpu_exec_init(env); | 286 | cpu_exec_init(env); |
| 287 | + env->movcal_backup_tail = &(env->movcal_backup); | ||
| 286 | sh4_translate_init(); | 288 | sh4_translate_init(); |
| 287 | env->cpu_model_str = cpu_model; | 289 | env->cpu_model_str = cpu_model; |
| 288 | cpu_sh4_reset(env); | 290 | cpu_sh4_reset(env); |
| @@ -495,6 +497,37 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg) | @@ -495,6 +497,37 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg) | ||
| 495 | 497 | ||
| 496 | static void _decode_opc(DisasContext * ctx) | 498 | static void _decode_opc(DisasContext * ctx) |
| 497 | { | 499 | { |
| 500 | + /* This code tries to make movcal emulation sufficiently | ||
| 501 | + accurate for Linux purposes. This instruction writes | ||
| 502 | + memory, and prior to that, always allocates a cache line. | ||
| 503 | + It is used in two contexts: | ||
| 504 | + - in memcpy, where data is copied in blocks, the first write | ||
| 505 | + of to a block uses movca.l for performance. | ||
| 506 | + - in arch/sh/mm/cache-sh4.c, movcal.l + ocbi combination is used | ||
| 507 | + to flush the cache. Here, the data written by movcal.l is never | ||
| 508 | + written to memory, and the data written is just bogus. | ||
| 509 | + | ||
| 510 | + To simulate this, we simulate movcal.l, we store the value to memory, | ||
| 511 | + but we also remember the previous content. If we see ocbi, we check | ||
| 512 | + if movcal.l for that address was done previously. If so, the write should | ||
| 513 | + not have hit the memory, so we restore the previous content. | ||
| 514 | + When we see an instruction that is neither movca.l | ||
| 515 | + nor ocbi, the previous content is discarded. | ||
| 516 | + | ||
| 517 | + To optimize, we only try to flush stores when we're at the start of | ||
| 518 | + TB, or if we already saw movca.l in this TB and did not flush stores | ||
| 519 | + yet. */ | ||
| 520 | + if (ctx->has_movcal) | ||
| 521 | + { | ||
| 522 | + int opcode = ctx->opcode & 0xf0ff; | ||
| 523 | + if (opcode != 0x0093 /* ocbi */ | ||
| 524 | + && opcode != 0x00c3 /* movca.l */) | ||
| 525 | + { | ||
| 526 | + gen_helper_discard_movcal_backup (); | ||
| 527 | + ctx->has_movcal = 0; | ||
| 528 | + } | ||
| 529 | + } | ||
| 530 | + | ||
| 498 | #if 0 | 531 | #if 0 |
| 499 | fprintf(stderr, "Translating opcode 0x%04x\n", ctx->opcode); | 532 | fprintf(stderr, "Translating opcode 0x%04x\n", ctx->opcode); |
| 500 | #endif | 533 | #endif |
| @@ -1545,7 +1578,13 @@ static void _decode_opc(DisasContext * ctx) | @@ -1545,7 +1578,13 @@ static void _decode_opc(DisasContext * ctx) | ||
| 1545 | } | 1578 | } |
| 1546 | return; | 1579 | return; |
| 1547 | case 0x00c3: /* movca.l R0,@Rm */ | 1580 | case 0x00c3: /* movca.l R0,@Rm */ |
| 1548 | - tcg_gen_qemu_st32(REG(0), REG(B11_8), ctx->memidx); | 1581 | + { |
| 1582 | + TCGv val = tcg_temp_new(); | ||
| 1583 | + tcg_gen_qemu_ld32u(val, REG(B11_8), ctx->memidx); | ||
| 1584 | + gen_helper_movcal (REG(B11_8), val); | ||
| 1585 | + tcg_gen_qemu_st32(REG(0), REG(B11_8), ctx->memidx); | ||
| 1586 | + } | ||
| 1587 | + ctx->has_movcal = 1; | ||
| 1549 | return; | 1588 | return; |
| 1550 | case 0x40a9: | 1589 | case 0x40a9: |
| 1551 | /* MOVUA.L @Rm,R0 (Rm) -> R0 | 1590 | /* MOVUA.L @Rm,R0 (Rm) -> R0 |
| @@ -1594,9 +1633,7 @@ static void _decode_opc(DisasContext * ctx) | @@ -1594,9 +1633,7 @@ static void _decode_opc(DisasContext * ctx) | ||
| 1594 | break; | 1633 | break; |
| 1595 | case 0x0093: /* ocbi @Rn */ | 1634 | case 0x0093: /* ocbi @Rn */ |
| 1596 | { | 1635 | { |
| 1597 | - TCGv dummy = tcg_temp_new(); | ||
| 1598 | - tcg_gen_qemu_ld32s(dummy, REG(B11_8), ctx->memidx); | ||
| 1599 | - tcg_temp_free(dummy); | 1636 | + gen_helper_ocbi (REG(B11_8)); |
| 1600 | } | 1637 | } |
| 1601 | return; | 1638 | return; |
| 1602 | case 0x00a3: /* ocbp @Rn */ | 1639 | case 0x00a3: /* ocbp @Rn */ |
| @@ -1876,6 +1913,7 @@ gen_intermediate_code_internal(CPUState * env, TranslationBlock * tb, | @@ -1876,6 +1913,7 @@ gen_intermediate_code_internal(CPUState * env, TranslationBlock * tb, | ||
| 1876 | ctx.tb = tb; | 1913 | ctx.tb = tb; |
| 1877 | ctx.singlestep_enabled = env->singlestep_enabled; | 1914 | ctx.singlestep_enabled = env->singlestep_enabled; |
| 1878 | ctx.features = env->features; | 1915 | ctx.features = env->features; |
| 1916 | + ctx.has_movcal = (tb->flags & TB_FLAG_PENDING_MOVCA); | ||
| 1879 | 1917 | ||
| 1880 | #ifdef DEBUG_DISAS | 1918 | #ifdef DEBUG_DISAS |
| 1881 | qemu_log_mask(CPU_LOG_TB_CPU, | 1919 | qemu_log_mask(CPU_LOG_TB_CPU, |