Commit 852d481faf7070ac6e46653b77f6c9ecbdfb9efc
1 parent
714fa308
SH: Improve movca.l/ocbi emulation.
Author: Vladimir Prus <vladimir@codesourcery.com> Fix movcal.l/ocbi emulation. * target-sh4/cpu.h (memory_content): New. (CPUSH4State): New fields movcal_backup and movcal_backup_tail. * target-sh4/helper.h (helper_movcal) (helper_discard_movcal_backup, helper_ocbi): New. * target-sh4/op_helper.c (helper_movcal) (helper_discard_movcal_backup, helper_ocbi): New. * target-sh4/translate.c (DisasContext): New field has_movcal. (sh4_defs): Update CVS for SH7785. (cpu_sh4_init): Initialize env->movcal_backup_tail. (_decode_opc): Discard movca.l-backup. Make use of helper_movcal and helper_ocbi. (gen_intermediate_code_internal): Initialize has_movcal to 1. Thanks to Shin-ichiro KAWASAKI and Paul Mundt for valuable feedback. Acked-by: Edgar E. Iglesias <edgar.iglesias@gmail.com> git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@6966 c046a42c-6fe2-441c-8c8c-71466251a162
Showing
5 changed files
with
156 additions
and
5 deletions
target-sh4/cpu.h
... | ... | @@ -100,6 +100,12 @@ enum sh_features { |
100 | 100 | SH_FEATURE_BCR3_AND_BCR4 = 2, |
101 | 101 | }; |
102 | 102 | |
103 | +typedef struct memory_content { | |
104 | + uint32_t address; | |
105 | + uint32_t value; | |
106 | + struct memory_content *next; | |
107 | +} memory_content; | |
108 | + | |
103 | 109 | typedef struct CPUSH4State { |
104 | 110 | int id; /* CPU model */ |
105 | 111 | |
... | ... | @@ -148,6 +154,8 @@ typedef struct CPUSH4State { |
148 | 154 | tlb_t itlb[ITLB_SIZE]; /* instruction translation table */ |
149 | 155 | void *intc_handle; |
150 | 156 | int intr_at_halt; /* SR_BL ignored during sleep */ |
157 | + memory_content *movcal_backup; | |
158 | + memory_content **movcal_backup_tail; | |
151 | 159 | } CPUSH4State; |
152 | 160 | |
153 | 161 | CPUSH4State *cpu_sh4_init(const char *cpu_model); |
... | ... | @@ -162,6 +170,8 @@ void sh4_cpu_list(FILE *f, int (*cpu_fprintf)(FILE *f, const char *fmt, ...)); |
162 | 170 | void cpu_sh4_write_mmaped_utlb_addr(CPUSH4State *s, target_phys_addr_t addr, |
163 | 171 | uint32_t mem_value); |
164 | 172 | |
173 | +int cpu_sh4_is_cached(CPUSH4State * env, target_ulong addr); | |
174 | + | |
165 | 175 | static inline void cpu_set_tls(CPUSH4State *env, target_ulong newtls) |
166 | 176 | { |
167 | 177 | env->gbr = newtls; |
... | ... | @@ -293,6 +303,8 @@ static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb) |
293 | 303 | env->flags = tb->flags; |
294 | 304 | } |
295 | 305 | |
306 | +#define TB_FLAG_PENDING_MOVCA (1 << 4) | |
307 | + | |
296 | 308 | static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc, |
297 | 309 | target_ulong *cs_base, int *flags) |
298 | 310 | { |
... | ... | @@ -302,7 +314,8 @@ static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc, |
302 | 314 | | DELAY_SLOT_TRUE | DELAY_SLOT_CLEARME)) /* Bits 0- 3 */ |
303 | 315 | | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR)) /* Bits 19-21 */ |
304 | 316 | | (env->sr & (SR_MD | SR_RB)) /* Bits 29-30 */ |
305 | - | (env->sr & SR_FD); /* Bit 15 */ | |
317 | + | (env->sr & SR_FD) /* Bit 15 */ | |
318 | + | (env->movcal_backup ? TB_FLAG_PENDING_MOVCA : 0); /* Bit 4 */ | |
306 | 319 | } |
307 | 320 | |
308 | 321 | #endif /* _CPU_SH4_H */ | ... | ... |
target-sh4/helper.c
... | ... | @@ -644,4 +644,48 @@ void cpu_sh4_write_mmaped_utlb_addr(CPUSH4State *s, target_phys_addr_t addr, |
644 | 644 | } |
645 | 645 | } |
646 | 646 | |
647 | +int cpu_sh4_is_cached(CPUSH4State * env, target_ulong addr) | |
648 | +{ | |
649 | + int n; | |
650 | + int use_asid = (env->mmucr & MMUCR_SV) == 0 || (env->sr & SR_MD) == 0; | |
651 | + | |
652 | + /* check area */ | |
653 | + if (env->sr & SR_MD) { | |
654 | + /* For previledged mode, P2 and P4 area is not cachable. */ | |
655 | + if ((0xA0000000 <= addr && addr < 0xC0000000) || 0xE0000000 <= addr) | |
656 | + return 0; | |
657 | + } else { | |
658 | + /* For user mode, only U0 area is cachable. */ | |
659 | + if (0x80000000 <= addr) | |
660 | + return 0; | |
661 | + } | |
662 | + | |
663 | + /* | |
664 | + * TODO : Evaluate CCR and check if the cache is on or off. | |
665 | + * Now CCR is not in CPUSH4State, but in SH7750State. | |
666 | + * When you move the ccr inot CPUSH4State, the code will be | |
667 | + * as follows. | |
668 | + */ | |
669 | +#if 0 | |
670 | + /* check if operand cache is enabled or not. */ | |
671 | + if (!(env->ccr & 1)) | |
672 | + return 0; | |
673 | +#endif | |
674 | + | |
675 | + /* if MMU is off, no check for TLB. */ | |
676 | + if (env->mmucr & MMUCR_AT) | |
677 | + return 1; | |
678 | + | |
679 | + /* check TLB */ | |
680 | + n = find_tlb_entry(env, addr, env->itlb, ITLB_SIZE, use_asid); | |
681 | + if (n >= 0) | |
682 | + return env->itlb[n].c; | |
683 | + | |
684 | + n = find_tlb_entry(env, addr, env->utlb, UTLB_SIZE, use_asid); | |
685 | + if (n >= 0) | |
686 | + return env->utlb[n].c; | |
687 | + | |
688 | + return 0; | |
689 | +} | |
690 | + | |
647 | 691 | #endif | ... | ... |
target-sh4/helper.h
... | ... | @@ -9,6 +9,10 @@ DEF_HELPER_0(debug, void) |
9 | 9 | DEF_HELPER_1(sleep, void, i32) |
10 | 10 | DEF_HELPER_1(trapa, void, i32) |
11 | 11 | |
12 | +DEF_HELPER_2(movcal, void, i32, i32) | |
13 | +DEF_HELPER_0(discard_movcal_backup, void) | |
14 | +DEF_HELPER_1(ocbi, void, i32) | |
15 | + | |
12 | 16 | DEF_HELPER_2(addv, i32, i32, i32) |
13 | 17 | DEF_HELPER_2(addc, i32, i32, i32) |
14 | 18 | DEF_HELPER_2(subv, i32, i32, i32) | ... | ... |
target-sh4/op_helper.c
... | ... | @@ -18,6 +18,7 @@ |
18 | 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA |
19 | 19 | */ |
20 | 20 | #include <assert.h> |
21 | +#include <stdlib.h> | |
21 | 22 | #include "exec.h" |
22 | 23 | #include "helper.h" |
23 | 24 | |
... | ... | @@ -122,6 +123,57 @@ void helper_trapa(uint32_t tra) |
122 | 123 | cpu_loop_exit(); |
123 | 124 | } |
124 | 125 | |
126 | +void helper_movcal(uint32_t address, uint32_t value) | |
127 | +{ | |
128 | + if (cpu_sh4_is_cached (env, address)) | |
129 | + { | |
130 | + memory_content *r = malloc (sizeof(memory_content)); | |
131 | + r->address = address; | |
132 | + r->value = value; | |
133 | + r->next = NULL; | |
134 | + | |
135 | + *(env->movcal_backup_tail) = r; | |
136 | + env->movcal_backup_tail = &(r->next); | |
137 | + } | |
138 | +} | |
139 | + | |
140 | +void helper_discard_movcal_backup(void) | |
141 | +{ | |
142 | + memory_content *current = env->movcal_backup; | |
143 | + | |
144 | + while(current) | |
145 | + { | |
146 | + memory_content *next = current->next; | |
147 | + free (current); | |
148 | + env->movcal_backup = current = next; | |
149 | + if (current == 0) | |
150 | + env->movcal_backup_tail = &(env->movcal_backup); | |
151 | + } | |
152 | +} | |
153 | + | |
154 | +void helper_ocbi(uint32_t address) | |
155 | +{ | |
156 | + memory_content **current = &(env->movcal_backup); | |
157 | + while (*current) | |
158 | + { | |
159 | + uint32_t a = (*current)->address; | |
160 | + if ((a & ~0x1F) == (address & ~0x1F)) | |
161 | + { | |
162 | + memory_content *next = (*current)->next; | |
163 | + stl(a, (*current)->value); | |
164 | + | |
165 | + if (next == 0) | |
166 | + { | |
167 | + env->movcal_backup_tail = current; | |
168 | + } | |
169 | + | |
170 | + free (*current); | |
171 | + *current = next; | |
172 | + break; | |
173 | + } | |
174 | + } | |
175 | +} | |
176 | + | |
125 | 177 | uint32_t helper_addc(uint32_t arg0, uint32_t arg1) |
126 | 178 | { |
127 | 179 | uint32_t tmp0, tmp1; | ... | ... |
target-sh4/translate.c
... | ... | @@ -50,6 +50,7 @@ typedef struct DisasContext { |
50 | 50 | uint32_t delayed_pc; |
51 | 51 | int singlestep_enabled; |
52 | 52 | uint32_t features; |
53 | + int has_movcal; | |
53 | 54 | } DisasContext; |
54 | 55 | |
55 | 56 | #if defined(CONFIG_USER_ONLY) |
... | ... | @@ -283,6 +284,7 @@ CPUSH4State *cpu_sh4_init(const char *cpu_model) |
283 | 284 | env = qemu_mallocz(sizeof(CPUSH4State)); |
284 | 285 | env->features = def->features; |
285 | 286 | cpu_exec_init(env); |
287 | + env->movcal_backup_tail = &(env->movcal_backup); | |
286 | 288 | sh4_translate_init(); |
287 | 289 | env->cpu_model_str = cpu_model; |
288 | 290 | cpu_sh4_reset(env); |
... | ... | @@ -495,6 +497,37 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg) |
495 | 497 | |
496 | 498 | static void _decode_opc(DisasContext * ctx) |
497 | 499 | { |
500 | + /* This code tries to make movcal emulation sufficiently | |
501 | + accurate for Linux purposes. This instruction writes | |
502 | + memory, and prior to that, always allocates a cache line. | |
503 | + It is used in two contexts: | |
504 | + - in memcpy, where data is copied in blocks, the first write | |
505 | + of to a block uses movca.l for performance. | |
506 | + - in arch/sh/mm/cache-sh4.c, movcal.l + ocbi combination is used | |
507 | + to flush the cache. Here, the data written by movcal.l is never | |
508 | + written to memory, and the data written is just bogus. | |
509 | + | |
510 | + To simulate this, we simulate movcal.l, we store the value to memory, | |
511 | + but we also remember the previous content. If we see ocbi, we check | |
512 | + if movcal.l for that address was done previously. If so, the write should | |
513 | + not have hit the memory, so we restore the previous content. | |
514 | + When we see an instruction that is neither movca.l | |
515 | + nor ocbi, the previous content is discarded. | |
516 | + | |
517 | + To optimize, we only try to flush stores when we're at the start of | |
518 | + TB, or if we already saw movca.l in this TB and did not flush stores | |
519 | + yet. */ | |
520 | + if (ctx->has_movcal) | |
521 | + { | |
522 | + int opcode = ctx->opcode & 0xf0ff; | |
523 | + if (opcode != 0x0093 /* ocbi */ | |
524 | + && opcode != 0x00c3 /* movca.l */) | |
525 | + { | |
526 | + gen_helper_discard_movcal_backup (); | |
527 | + ctx->has_movcal = 0; | |
528 | + } | |
529 | + } | |
530 | + | |
498 | 531 | #if 0 |
499 | 532 | fprintf(stderr, "Translating opcode 0x%04x\n", ctx->opcode); |
500 | 533 | #endif |
... | ... | @@ -1545,7 +1578,13 @@ static void _decode_opc(DisasContext * ctx) |
1545 | 1578 | } |
1546 | 1579 | return; |
1547 | 1580 | case 0x00c3: /* movca.l R0,@Rm */ |
1548 | - tcg_gen_qemu_st32(REG(0), REG(B11_8), ctx->memidx); | |
1581 | + { | |
1582 | + TCGv val = tcg_temp_new(); | |
1583 | + tcg_gen_qemu_ld32u(val, REG(B11_8), ctx->memidx); | |
1584 | + gen_helper_movcal (REG(B11_8), val); | |
1585 | + tcg_gen_qemu_st32(REG(0), REG(B11_8), ctx->memidx); | |
1586 | + } | |
1587 | + ctx->has_movcal = 1; | |
1549 | 1588 | return; |
1550 | 1589 | case 0x40a9: |
1551 | 1590 | /* MOVUA.L @Rm,R0 (Rm) -> R0 |
... | ... | @@ -1594,9 +1633,7 @@ static void _decode_opc(DisasContext * ctx) |
1594 | 1633 | break; |
1595 | 1634 | case 0x0093: /* ocbi @Rn */ |
1596 | 1635 | { |
1597 | - TCGv dummy = tcg_temp_new(); | |
1598 | - tcg_gen_qemu_ld32s(dummy, REG(B11_8), ctx->memidx); | |
1599 | - tcg_temp_free(dummy); | |
1636 | + gen_helper_ocbi (REG(B11_8)); | |
1600 | 1637 | } |
1601 | 1638 | return; |
1602 | 1639 | case 0x00a3: /* ocbp @Rn */ |
... | ... | @@ -1876,6 +1913,7 @@ gen_intermediate_code_internal(CPUState * env, TranslationBlock * tb, |
1876 | 1913 | ctx.tb = tb; |
1877 | 1914 | ctx.singlestep_enabled = env->singlestep_enabled; |
1878 | 1915 | ctx.features = env->features; |
1916 | + ctx.has_movcal = (tb->flags & TB_FLAG_PENDING_MOVCA); | |
1879 | 1917 | |
1880 | 1918 | #ifdef DEBUG_DISAS |
1881 | 1919 | qemu_log_mask(CPU_LOG_TB_CPU, | ... | ... |