Commit 5df3a524fc0b923cf2e5e1883ff550d055d36eb5
1 parent
430c1ad1
TCG constant propagation.
Add support for constant propagation to TCG. This has to be paired with the liveness analysis to remove the dead code. Not all possible operations are covered, but the most common ones are. This improves the code generation for several ARM instructions, like MVN (immediate), and it may help other targets as well. v1 -> v2: Added profiling code and hopefully fixed for 64-bit TCG targets. v2 -> v3: Another attempt at fixing the support for 64-bit TCG targets. Signed-off-by: Filip Navara <filip.navara@gmail.com>
Showing
2 changed files
with
187 additions
and
0 deletions
tcg/tcg.c
... | ... | @@ -1025,7 +1025,183 @@ void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs) |
1025 | 1025 | #endif |
1026 | 1026 | tdefs++; |
1027 | 1027 | } |
1028 | +} | |
1029 | + | |
1030 | +static void tcg_const_analysis(TCGContext *s) | |
1031 | +{ | |
1032 | + int nb_cargs, nb_iargs, nb_oargs, dest, src, src2, del_args, i; | |
1033 | + TCGArg *args; | |
1034 | + uint16_t op; | |
1035 | + uint16_t *opc_ptr; | |
1036 | + const TCGOpDef *def; | |
1037 | + uint8_t *const_temps; | |
1038 | + tcg_target_ulong *temp_values; | |
1039 | + tcg_target_ulong val; | |
1040 | + tcg_target_ulong dest_val, src_val, src2_val; | |
1041 | + | |
1042 | + const_temps = tcg_malloc(s->nb_temps); | |
1043 | + memset(const_temps, 0, s->nb_temps); | |
1044 | + temp_values = tcg_malloc(s->nb_temps * sizeof(uint32_t)); | |
1045 | + | |
1046 | + opc_ptr = gen_opc_buf; | |
1047 | + args = gen_opparam_buf; | |
1048 | + while (opc_ptr < gen_opc_ptr) { | |
1049 | + op = *opc_ptr; | |
1050 | + def = &tcg_op_defs[op]; | |
1051 | + nb_oargs = def->nb_oargs; | |
1052 | + nb_iargs = def->nb_iargs; | |
1053 | + nb_cargs = def->nb_cargs; | |
1054 | + del_args = 0; | |
1055 | + | |
1056 | + switch(op) { | |
1057 | + case INDEX_op_movi_i32: | |
1058 | +#if TCG_TARGET_REG_BITS == 64 | |
1059 | + case INDEX_op_movi_i64: | |
1060 | +#endif | |
1061 | + dest = args[0]; | |
1062 | + val = args[1]; | |
1063 | + const_temps[dest] = 1; | |
1064 | + temp_values[dest] = val; | |
1065 | + break; | |
1066 | + case INDEX_op_mov_i32: | |
1067 | +#if TCG_TARGET_REG_BITS == 64 | |
1068 | + case INDEX_op_mov_i64: | |
1069 | +#endif | |
1070 | + dest = args[0]; | |
1071 | + src = args[1]; | |
1072 | + const_temps[dest] = const_temps[src]; | |
1073 | + temp_values[dest] = temp_values[src]; | |
1074 | + break; | |
1075 | +#ifdef TCG_TARGET_HAS_not_i32 | |
1076 | + case INDEX_op_not_i32: | |
1077 | + dest = args[0]; | |
1078 | + src = args[1]; | |
1079 | + if (const_temps[src]) { | |
1080 | + const_temps[dest] = 1; | |
1081 | + *opc_ptr = INDEX_op_movi_i32; | |
1082 | + args[1] = temp_values[dest] = ~temp_values[src] & 0xffffffff; | |
1083 | + } else { | |
1084 | + const_temps[dest] = 0; | |
1085 | + } | |
1086 | + break; | |
1087 | +#endif | |
1088 | +#ifdef TCG_TARGET_HAS_not_i64 | |
1089 | + case INDEX_op_not_i64: | |
1090 | + dest = args[0]; | |
1091 | + src = args[1]; | |
1092 | + if (const_temps[src]) { | |
1093 | + const_temps[dest] = 1; | |
1094 | + *opc_ptr = INDEX_op_movi_i64; | |
1095 | + args[1] = temp_values[dest] = ~temp_values[src]; | |
1096 | + } else { | |
1097 | + const_temps[dest] = 0; | |
1098 | + } | |
1099 | + break; | |
1100 | +#endif | |
1101 | + case INDEX_op_add_i32: | |
1102 | + case INDEX_op_sub_i32: | |
1103 | + case INDEX_op_mul_i32: | |
1104 | + case INDEX_op_and_i32: | |
1105 | + case INDEX_op_or_i32: | |
1106 | + case INDEX_op_xor_i32: | |
1107 | + case INDEX_op_shl_i32: | |
1108 | + case INDEX_op_shr_i32: | |
1109 | + dest = args[0]; | |
1110 | + src = args[1]; | |
1111 | + src2 = args[2]; | |
1112 | + if (const_temps[src] && const_temps[src2]) { | |
1113 | + src_val = temp_values[src]; | |
1114 | + src2_val = temp_values[src2]; | |
1115 | + const_temps[dest] = 1; | |
1116 | + switch (op) { | |
1117 | + case INDEX_op_add_i32: dest_val = src_val + src2_val; break; | |
1118 | + case INDEX_op_sub_i32: dest_val = src_val - src2_val; break; | |
1119 | + case INDEX_op_mul_i32: dest_val = src_val * src2_val; break; | |
1120 | + case INDEX_op_and_i32: dest_val = src_val & src2_val; break; | |
1121 | + case INDEX_op_or_i32: dest_val = src_val | src2_val; break; | |
1122 | + case INDEX_op_xor_i32: dest_val = src_val ^ src2_val; break; | |
1123 | + case INDEX_op_shl_i32: dest_val = src_val << src2_val; break; | |
1124 | + case INDEX_op_shr_i32: dest_val = src_val >> src2_val; break; | |
1125 | + default: tcg_abort(); return; | |
1126 | + } | |
1127 | + *opc_ptr = INDEX_op_movi_i32; | |
1128 | + args[1] = temp_values[dest] = dest_val & 0xffffffff; | |
1129 | + del_args = 1; | |
1130 | + } else { | |
1131 | + const_temps[dest] = 0; | |
1132 | + } | |
1133 | + break; | |
1134 | +#if TCG_TARGET_REG_BITS == 64 | |
1135 | + case INDEX_op_add_i64: | |
1136 | + case INDEX_op_sub_i64: | |
1137 | + case INDEX_op_mul_i64: | |
1138 | + case INDEX_op_and_i64: | |
1139 | + case INDEX_op_or_i64: | |
1140 | + case INDEX_op_xor_i64: | |
1141 | + case INDEX_op_shl_i64: | |
1142 | + case INDEX_op_shr_i64: | |
1143 | + dest = args[0]; | |
1144 | + src = args[1]; | |
1145 | + src2 = args[2]; | |
1146 | + if (const_temps[src] && const_temps[src2]) { | |
1147 | + src_val = temp_values[src]; | |
1148 | + src2_val = temp_values[src2]; | |
1149 | + const_temps[dest] = 1; | |
1150 | + switch (op) { | |
1151 | + case INDEX_op_add_i64: dest_val = src_val + src2_val; break; | |
1152 | + case INDEX_op_sub_i64: dest_val = src_val - src2_val; break; | |
1153 | + case INDEX_op_mul_i64: dest_val = src_val * src2_val; break; | |
1154 | + case INDEX_op_and_i64: dest_val = src_val & src2_val; break; | |
1155 | + case INDEX_op_or_i64: dest_val = src_val | src2_val; break; | |
1156 | + case INDEX_op_xor_i64: dest_val = src_val ^ src2_val; break; | |
1157 | + case INDEX_op_shl_i64: dest_val = src_val << src2_val; break; | |
1158 | + case INDEX_op_shr_i64: dest_val = src_val >> src2_val; break; | |
1159 | + default: tcg_abort(); return; | |
1160 | + } | |
1161 | + *opc_ptr = INDEX_op_movi_i64; | |
1162 | + args[1] = temp_values[dest] = dest_val; | |
1163 | + del_args = 1; | |
1164 | + } else { | |
1165 | + const_temps[dest] = 0; | |
1166 | + } | |
1167 | + break; | |
1168 | +#endif | |
1169 | + case INDEX_op_call: | |
1170 | + nb_oargs = args[0] >> 16; | |
1171 | + nb_iargs = args[0] & 0xffff; | |
1172 | + nb_cargs = def->nb_cargs; | |
1173 | + args++; | |
1174 | + for (i = 0; i < nb_oargs; i++) { | |
1175 | + const_temps[args[i]] = 0; | |
1176 | + } | |
1177 | + break; | |
1178 | + case INDEX_op_nopn: | |
1179 | + /* variable number of arguments */ | |
1180 | + nb_cargs = args[0]; | |
1181 | + break; | |
1182 | + case INDEX_op_set_label: | |
1183 | + memset(const_temps, 0, s->nb_temps); | |
1184 | + break; | |
1185 | + default: | |
1186 | + if (def->flags & TCG_OPF_BB_END) { | |
1187 | + memset(const_temps, 0, s->nb_temps); | |
1188 | + } else { | |
1189 | + for (i = 0; i < nb_oargs; i++) { | |
1190 | + const_temps[args[i]] = 0; | |
1191 | + } | |
1192 | + } | |
1193 | + break; | |
1194 | + } | |
1195 | + opc_ptr++; | |
1196 | + args += nb_iargs + nb_oargs + nb_cargs - del_args; | |
1197 | + if (del_args > 0) { | |
1198 | + gen_opparam_ptr -= del_args; | |
1199 | + memmove(args, args + del_args, (gen_opparam_ptr - args) * sizeof(*args)); | |
1200 | + } | |
1201 | + } | |
1028 | 1202 | |
1203 | + if (args != gen_opparam_ptr) | |
1204 | + tcg_abort(); | |
1029 | 1205 | } |
1030 | 1206 | |
1031 | 1207 | #ifdef USE_LIVENESS_ANALYSIS |
... | ... | @@ -1896,6 +2072,14 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf, |
1896 | 2072 | #endif |
1897 | 2073 | |
1898 | 2074 | #ifdef CONFIG_PROFILER |
2075 | + s->const_time -= profile_getclock(); | |
2076 | +#endif | |
2077 | + tcg_const_analysis(s); | |
2078 | +#ifdef CONFIG_PROFILER | |
2079 | + s->const_time += profile_getclock(); | |
2080 | +#endif | |
2081 | + | |
2082 | +#ifdef CONFIG_PROFILER | |
1899 | 2083 | s->la_time -= profile_getclock(); |
1900 | 2084 | #endif |
1901 | 2085 | tcg_liveness_analysis(s); |
... | ... | @@ -2068,6 +2252,8 @@ void tcg_dump_info(FILE *f, |
2068 | 2252 | (double)s->interm_time / tot * 100.0); |
2069 | 2253 | cpu_fprintf(f, " gen_code time %0.1f%%\n", |
2070 | 2254 | (double)s->code_time / tot * 100.0); |
2255 | + cpu_fprintf(f, "const/code time %0.1f%%\n", | |
2256 | + (double)s->const_time / (s->code_time ? s->code_time : 1) * 100.0); | |
2071 | 2257 | cpu_fprintf(f, "liveness/code time %0.1f%%\n", |
2072 | 2258 | (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0); |
2073 | 2259 | cpu_fprintf(f, "cpu_restore count %" PRId64 "\n", | ... | ... |