Commit 852d481faf7070ac6e46653b77f6c9ecbdfb9efc
1 parent
714fa308
SH: Improve movca.l/ocbi emulation.
Author: Vladimir Prus <vladimir@codesourcery.com> Fix movcal.l/ocbi emulation. * target-sh4/cpu.h (memory_content): New. (CPUSH4State): New fields movcal_backup and movcal_backup_tail. * target-sh4/helper.h (helper_movcal) (helper_discard_movcal_backup, helper_ocbi): New. * target-sh4/op_helper.c (helper_movcal) (helper_discard_movcal_backup, helper_ocbi): New. * target-sh4/translate.c (DisasContext): New field has_movcal. (sh4_defs): Update CVS for SH7785. (cpu_sh4_init): Initialize env->movcal_backup_tail. (_decode_opc): Discard movca.l-backup. Make use of helper_movcal and helper_ocbi. (gen_intermediate_code_internal): Initialize has_movcal to 1. Thanks to Shin-ichiro KAWASAKI and Paul Mundt for valuable feedback. Acked-by: Edgar E. Iglesias <edgar.iglesias@gmail.com> git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@6966 c046a42c-6fe2-441c-8c8c-71466251a162
Showing
5 changed files
with
156 additions
and
5 deletions
target-sh4/cpu.h
@@ -100,6 +100,12 @@ enum sh_features { | @@ -100,6 +100,12 @@ enum sh_features { | ||
100 | SH_FEATURE_BCR3_AND_BCR4 = 2, | 100 | SH_FEATURE_BCR3_AND_BCR4 = 2, |
101 | }; | 101 | }; |
102 | 102 | ||
103 | +typedef struct memory_content { | ||
104 | + uint32_t address; | ||
105 | + uint32_t value; | ||
106 | + struct memory_content *next; | ||
107 | +} memory_content; | ||
108 | + | ||
103 | typedef struct CPUSH4State { | 109 | typedef struct CPUSH4State { |
104 | int id; /* CPU model */ | 110 | int id; /* CPU model */ |
105 | 111 | ||
@@ -148,6 +154,8 @@ typedef struct CPUSH4State { | @@ -148,6 +154,8 @@ typedef struct CPUSH4State { | ||
148 | tlb_t itlb[ITLB_SIZE]; /* instruction translation table */ | 154 | tlb_t itlb[ITLB_SIZE]; /* instruction translation table */ |
149 | void *intc_handle; | 155 | void *intc_handle; |
150 | int intr_at_halt; /* SR_BL ignored during sleep */ | 156 | int intr_at_halt; /* SR_BL ignored during sleep */ |
157 | + memory_content *movcal_backup; | ||
158 | + memory_content **movcal_backup_tail; | ||
151 | } CPUSH4State; | 159 | } CPUSH4State; |
152 | 160 | ||
153 | CPUSH4State *cpu_sh4_init(const char *cpu_model); | 161 | CPUSH4State *cpu_sh4_init(const char *cpu_model); |
@@ -162,6 +170,8 @@ void sh4_cpu_list(FILE *f, int (*cpu_fprintf)(FILE *f, const char *fmt, ...)); | @@ -162,6 +170,8 @@ void sh4_cpu_list(FILE *f, int (*cpu_fprintf)(FILE *f, const char *fmt, ...)); | ||
162 | void cpu_sh4_write_mmaped_utlb_addr(CPUSH4State *s, target_phys_addr_t addr, | 170 | void cpu_sh4_write_mmaped_utlb_addr(CPUSH4State *s, target_phys_addr_t addr, |
163 | uint32_t mem_value); | 171 | uint32_t mem_value); |
164 | 172 | ||
173 | +int cpu_sh4_is_cached(CPUSH4State * env, target_ulong addr); | ||
174 | + | ||
165 | static inline void cpu_set_tls(CPUSH4State *env, target_ulong newtls) | 175 | static inline void cpu_set_tls(CPUSH4State *env, target_ulong newtls) |
166 | { | 176 | { |
167 | env->gbr = newtls; | 177 | env->gbr = newtls; |
@@ -293,6 +303,8 @@ static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb) | @@ -293,6 +303,8 @@ static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb) | ||
293 | env->flags = tb->flags; | 303 | env->flags = tb->flags; |
294 | } | 304 | } |
295 | 305 | ||
306 | +#define TB_FLAG_PENDING_MOVCA (1 << 4) | ||
307 | + | ||
296 | static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc, | 308 | static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc, |
297 | target_ulong *cs_base, int *flags) | 309 | target_ulong *cs_base, int *flags) |
298 | { | 310 | { |
@@ -302,7 +314,8 @@ static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc, | @@ -302,7 +314,8 @@ static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc, | ||
302 | | DELAY_SLOT_TRUE | DELAY_SLOT_CLEARME)) /* Bits 0- 3 */ | 314 | | DELAY_SLOT_TRUE | DELAY_SLOT_CLEARME)) /* Bits 0- 3 */ |
303 | | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR)) /* Bits 19-21 */ | 315 | | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR)) /* Bits 19-21 */ |
304 | | (env->sr & (SR_MD | SR_RB)) /* Bits 29-30 */ | 316 | | (env->sr & (SR_MD | SR_RB)) /* Bits 29-30 */ |
305 | - | (env->sr & SR_FD); /* Bit 15 */ | 317 | + | (env->sr & SR_FD) /* Bit 15 */ |
318 | + | (env->movcal_backup ? TB_FLAG_PENDING_MOVCA : 0); /* Bit 4 */ | ||
306 | } | 319 | } |
307 | 320 | ||
308 | #endif /* _CPU_SH4_H */ | 321 | #endif /* _CPU_SH4_H */ |
target-sh4/helper.c
@@ -644,4 +644,48 @@ void cpu_sh4_write_mmaped_utlb_addr(CPUSH4State *s, target_phys_addr_t addr, | @@ -644,4 +644,48 @@ void cpu_sh4_write_mmaped_utlb_addr(CPUSH4State *s, target_phys_addr_t addr, | ||
644 | } | 644 | } |
645 | } | 645 | } |
646 | 646 | ||
647 | +int cpu_sh4_is_cached(CPUSH4State * env, target_ulong addr) | ||
648 | +{ | ||
649 | + int n; | ||
650 | + int use_asid = (env->mmucr & MMUCR_SV) == 0 || (env->sr & SR_MD) == 0; | ||
651 | + | ||
652 | + /* check area */ | ||
653 | + if (env->sr & SR_MD) { | ||
654 | + /* For previledged mode, P2 and P4 area is not cachable. */ | ||
655 | + if ((0xA0000000 <= addr && addr < 0xC0000000) || 0xE0000000 <= addr) | ||
656 | + return 0; | ||
657 | + } else { | ||
658 | + /* For user mode, only U0 area is cachable. */ | ||
659 | + if (0x80000000 <= addr) | ||
660 | + return 0; | ||
661 | + } | ||
662 | + | ||
663 | + /* | ||
664 | + * TODO : Evaluate CCR and check if the cache is on or off. | ||
665 | + * Now CCR is not in CPUSH4State, but in SH7750State. | ||
666 | + * When you move the ccr inot CPUSH4State, the code will be | ||
667 | + * as follows. | ||
668 | + */ | ||
669 | +#if 0 | ||
670 | + /* check if operand cache is enabled or not. */ | ||
671 | + if (!(env->ccr & 1)) | ||
672 | + return 0; | ||
673 | +#endif | ||
674 | + | ||
675 | + /* if MMU is off, no check for TLB. */ | ||
676 | + if (env->mmucr & MMUCR_AT) | ||
677 | + return 1; | ||
678 | + | ||
679 | + /* check TLB */ | ||
680 | + n = find_tlb_entry(env, addr, env->itlb, ITLB_SIZE, use_asid); | ||
681 | + if (n >= 0) | ||
682 | + return env->itlb[n].c; | ||
683 | + | ||
684 | + n = find_tlb_entry(env, addr, env->utlb, UTLB_SIZE, use_asid); | ||
685 | + if (n >= 0) | ||
686 | + return env->utlb[n].c; | ||
687 | + | ||
688 | + return 0; | ||
689 | +} | ||
690 | + | ||
647 | #endif | 691 | #endif |
target-sh4/helper.h
@@ -9,6 +9,10 @@ DEF_HELPER_0(debug, void) | @@ -9,6 +9,10 @@ DEF_HELPER_0(debug, void) | ||
9 | DEF_HELPER_1(sleep, void, i32) | 9 | DEF_HELPER_1(sleep, void, i32) |
10 | DEF_HELPER_1(trapa, void, i32) | 10 | DEF_HELPER_1(trapa, void, i32) |
11 | 11 | ||
12 | +DEF_HELPER_2(movcal, void, i32, i32) | ||
13 | +DEF_HELPER_0(discard_movcal_backup, void) | ||
14 | +DEF_HELPER_1(ocbi, void, i32) | ||
15 | + | ||
12 | DEF_HELPER_2(addv, i32, i32, i32) | 16 | DEF_HELPER_2(addv, i32, i32, i32) |
13 | DEF_HELPER_2(addc, i32, i32, i32) | 17 | DEF_HELPER_2(addc, i32, i32, i32) |
14 | DEF_HELPER_2(subv, i32, i32, i32) | 18 | DEF_HELPER_2(subv, i32, i32, i32) |
target-sh4/op_helper.c
@@ -18,6 +18,7 @@ | @@ -18,6 +18,7 @@ | ||
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA | 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA |
19 | */ | 19 | */ |
20 | #include <assert.h> | 20 | #include <assert.h> |
21 | +#include <stdlib.h> | ||
21 | #include "exec.h" | 22 | #include "exec.h" |
22 | #include "helper.h" | 23 | #include "helper.h" |
23 | 24 | ||
@@ -122,6 +123,57 @@ void helper_trapa(uint32_t tra) | @@ -122,6 +123,57 @@ void helper_trapa(uint32_t tra) | ||
122 | cpu_loop_exit(); | 123 | cpu_loop_exit(); |
123 | } | 124 | } |
124 | 125 | ||
126 | +void helper_movcal(uint32_t address, uint32_t value) | ||
127 | +{ | ||
128 | + if (cpu_sh4_is_cached (env, address)) | ||
129 | + { | ||
130 | + memory_content *r = malloc (sizeof(memory_content)); | ||
131 | + r->address = address; | ||
132 | + r->value = value; | ||
133 | + r->next = NULL; | ||
134 | + | ||
135 | + *(env->movcal_backup_tail) = r; | ||
136 | + env->movcal_backup_tail = &(r->next); | ||
137 | + } | ||
138 | +} | ||
139 | + | ||
140 | +void helper_discard_movcal_backup(void) | ||
141 | +{ | ||
142 | + memory_content *current = env->movcal_backup; | ||
143 | + | ||
144 | + while(current) | ||
145 | + { | ||
146 | + memory_content *next = current->next; | ||
147 | + free (current); | ||
148 | + env->movcal_backup = current = next; | ||
149 | + if (current == 0) | ||
150 | + env->movcal_backup_tail = &(env->movcal_backup); | ||
151 | + } | ||
152 | +} | ||
153 | + | ||
154 | +void helper_ocbi(uint32_t address) | ||
155 | +{ | ||
156 | + memory_content **current = &(env->movcal_backup); | ||
157 | + while (*current) | ||
158 | + { | ||
159 | + uint32_t a = (*current)->address; | ||
160 | + if ((a & ~0x1F) == (address & ~0x1F)) | ||
161 | + { | ||
162 | + memory_content *next = (*current)->next; | ||
163 | + stl(a, (*current)->value); | ||
164 | + | ||
165 | + if (next == 0) | ||
166 | + { | ||
167 | + env->movcal_backup_tail = current; | ||
168 | + } | ||
169 | + | ||
170 | + free (*current); | ||
171 | + *current = next; | ||
172 | + break; | ||
173 | + } | ||
174 | + } | ||
175 | +} | ||
176 | + | ||
125 | uint32_t helper_addc(uint32_t arg0, uint32_t arg1) | 177 | uint32_t helper_addc(uint32_t arg0, uint32_t arg1) |
126 | { | 178 | { |
127 | uint32_t tmp0, tmp1; | 179 | uint32_t tmp0, tmp1; |
target-sh4/translate.c
@@ -50,6 +50,7 @@ typedef struct DisasContext { | @@ -50,6 +50,7 @@ typedef struct DisasContext { | ||
50 | uint32_t delayed_pc; | 50 | uint32_t delayed_pc; |
51 | int singlestep_enabled; | 51 | int singlestep_enabled; |
52 | uint32_t features; | 52 | uint32_t features; |
53 | + int has_movcal; | ||
53 | } DisasContext; | 54 | } DisasContext; |
54 | 55 | ||
55 | #if defined(CONFIG_USER_ONLY) | 56 | #if defined(CONFIG_USER_ONLY) |
@@ -283,6 +284,7 @@ CPUSH4State *cpu_sh4_init(const char *cpu_model) | @@ -283,6 +284,7 @@ CPUSH4State *cpu_sh4_init(const char *cpu_model) | ||
283 | env = qemu_mallocz(sizeof(CPUSH4State)); | 284 | env = qemu_mallocz(sizeof(CPUSH4State)); |
284 | env->features = def->features; | 285 | env->features = def->features; |
285 | cpu_exec_init(env); | 286 | cpu_exec_init(env); |
287 | + env->movcal_backup_tail = &(env->movcal_backup); | ||
286 | sh4_translate_init(); | 288 | sh4_translate_init(); |
287 | env->cpu_model_str = cpu_model; | 289 | env->cpu_model_str = cpu_model; |
288 | cpu_sh4_reset(env); | 290 | cpu_sh4_reset(env); |
@@ -495,6 +497,37 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg) | @@ -495,6 +497,37 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg) | ||
495 | 497 | ||
496 | static void _decode_opc(DisasContext * ctx) | 498 | static void _decode_opc(DisasContext * ctx) |
497 | { | 499 | { |
500 | + /* This code tries to make movcal emulation sufficiently | ||
501 | + accurate for Linux purposes. This instruction writes | ||
502 | + memory, and prior to that, always allocates a cache line. | ||
503 | + It is used in two contexts: | ||
504 | + - in memcpy, where data is copied in blocks, the first write | ||
505 | + of to a block uses movca.l for performance. | ||
506 | + - in arch/sh/mm/cache-sh4.c, movcal.l + ocbi combination is used | ||
507 | + to flush the cache. Here, the data written by movcal.l is never | ||
508 | + written to memory, and the data written is just bogus. | ||
509 | + | ||
510 | + To simulate this, we simulate movcal.l, we store the value to memory, | ||
511 | + but we also remember the previous content. If we see ocbi, we check | ||
512 | + if movcal.l for that address was done previously. If so, the write should | ||
513 | + not have hit the memory, so we restore the previous content. | ||
514 | + When we see an instruction that is neither movca.l | ||
515 | + nor ocbi, the previous content is discarded. | ||
516 | + | ||
517 | + To optimize, we only try to flush stores when we're at the start of | ||
518 | + TB, or if we already saw movca.l in this TB and did not flush stores | ||
519 | + yet. */ | ||
520 | + if (ctx->has_movcal) | ||
521 | + { | ||
522 | + int opcode = ctx->opcode & 0xf0ff; | ||
523 | + if (opcode != 0x0093 /* ocbi */ | ||
524 | + && opcode != 0x00c3 /* movca.l */) | ||
525 | + { | ||
526 | + gen_helper_discard_movcal_backup (); | ||
527 | + ctx->has_movcal = 0; | ||
528 | + } | ||
529 | + } | ||
530 | + | ||
498 | #if 0 | 531 | #if 0 |
499 | fprintf(stderr, "Translating opcode 0x%04x\n", ctx->opcode); | 532 | fprintf(stderr, "Translating opcode 0x%04x\n", ctx->opcode); |
500 | #endif | 533 | #endif |
@@ -1545,7 +1578,13 @@ static void _decode_opc(DisasContext * ctx) | @@ -1545,7 +1578,13 @@ static void _decode_opc(DisasContext * ctx) | ||
1545 | } | 1578 | } |
1546 | return; | 1579 | return; |
1547 | case 0x00c3: /* movca.l R0,@Rm */ | 1580 | case 0x00c3: /* movca.l R0,@Rm */ |
1548 | - tcg_gen_qemu_st32(REG(0), REG(B11_8), ctx->memidx); | 1581 | + { |
1582 | + TCGv val = tcg_temp_new(); | ||
1583 | + tcg_gen_qemu_ld32u(val, REG(B11_8), ctx->memidx); | ||
1584 | + gen_helper_movcal (REG(B11_8), val); | ||
1585 | + tcg_gen_qemu_st32(REG(0), REG(B11_8), ctx->memidx); | ||
1586 | + } | ||
1587 | + ctx->has_movcal = 1; | ||
1549 | return; | 1588 | return; |
1550 | case 0x40a9: | 1589 | case 0x40a9: |
1551 | /* MOVUA.L @Rm,R0 (Rm) -> R0 | 1590 | /* MOVUA.L @Rm,R0 (Rm) -> R0 |
@@ -1594,9 +1633,7 @@ static void _decode_opc(DisasContext * ctx) | @@ -1594,9 +1633,7 @@ static void _decode_opc(DisasContext * ctx) | ||
1594 | break; | 1633 | break; |
1595 | case 0x0093: /* ocbi @Rn */ | 1634 | case 0x0093: /* ocbi @Rn */ |
1596 | { | 1635 | { |
1597 | - TCGv dummy = tcg_temp_new(); | ||
1598 | - tcg_gen_qemu_ld32s(dummy, REG(B11_8), ctx->memidx); | ||
1599 | - tcg_temp_free(dummy); | 1636 | + gen_helper_ocbi (REG(B11_8)); |
1600 | } | 1637 | } |
1601 | return; | 1638 | return; |
1602 | case 0x00a3: /* ocbp @Rn */ | 1639 | case 0x00a3: /* ocbp @Rn */ |
@@ -1876,6 +1913,7 @@ gen_intermediate_code_internal(CPUState * env, TranslationBlock * tb, | @@ -1876,6 +1913,7 @@ gen_intermediate_code_internal(CPUState * env, TranslationBlock * tb, | ||
1876 | ctx.tb = tb; | 1913 | ctx.tb = tb; |
1877 | ctx.singlestep_enabled = env->singlestep_enabled; | 1914 | ctx.singlestep_enabled = env->singlestep_enabled; |
1878 | ctx.features = env->features; | 1915 | ctx.features = env->features; |
1916 | + ctx.has_movcal = (tb->flags & TB_FLAG_PENDING_MOVCA); | ||
1879 | 1917 | ||
1880 | #ifdef DEBUG_DISAS | 1918 | #ifdef DEBUG_DISAS |
1881 | qemu_log_mask(CPU_LOG_TB_CPU, | 1919 | qemu_log_mask(CPU_LOG_TB_CPU, |