Eliminate sign and zero extensions in PPC generated code
A new module is introduced 'extelim.c' and a new RTL pass is introduced.
The '-f[no-]extelim' flag controls this pass and is enabled at -O2 and above.
The algorithm is based on the paper "Effective Sign Extension Elimination", Kawahito, et. al.
More details on implementation in the extelim.c module.
--- gcc-4.6-branch-clean/gcc/opts.c 2011-07-27 12:02:02.483850879 -0500
+++ gcc-4.6-branch/gcc/opts.c 2011-07-25 17:59:00.911975444 -0500
@@ -492,6 +492,7 @@
{ OPT_LEVELS_2_PLUS, OPT_falign_jumps, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_falign_labels, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_falign_functions, NULL, 1 },
+ { OPT_LEVELS_2_PLUS, OPT_fextelim, NULL, 1 },
/* -O3 optimizations. */
{ OPT_LEVELS_3_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 },
--- gcc-4.6-branch-clean/gcc/tree-pass.h 2011-07-27 12:02:02.485981448 -0500
+++ gcc-4.6-branch/gcc/tree-pass.h 2011-07-25 17:59:00.912976334 -0500
@@ -483,6 +483,7 @@
extern struct rtl_opt_pass pass_initial_value_sets;
extern struct rtl_opt_pass pass_unshare_all_rtl;
extern struct rtl_opt_pass pass_instantiate_virtual_regs;
+extern struct rtl_opt_pass pass_rtl_extelim;
extern struct rtl_opt_pass pass_rtl_fwprop;
extern struct rtl_opt_pass pass_rtl_fwprop_addr;
extern struct rtl_opt_pass pass_jump2;
--- gcc-4.6-branch-clean/gcc/timevar.def 2011-07-27 12:02:02.487999008 -0500
+++ gcc-4.6-branch/gcc/timevar.def 2011-07-25 17:59:00.913979563 -0500
@@ -180,6 +180,7 @@
DEFTIMEVAR (TV_VARCONST , "varconst")
DEFTIMEVAR (TV_LOWER_SUBREG , "lower subreg")
DEFTIMEVAR (TV_JUMP , "jump")
+DEFTIMEVAR (TV_EXTELIM , "extension elimination")
DEFTIMEVAR (TV_FWPROP , "forward prop")
DEFTIMEVAR (TV_CSE , "CSE")
DEFTIMEVAR (TV_DCE , "dead code elimination")
--- gcc-4.6-branch-clean/gcc/common.opt 2011-07-27 12:02:02.490978128 -0500
+++ gcc-4.6-branch/gcc/common.opt 2011-07-25 17:59:00.915979093 -0500
@@ -996,6 +996,10 @@
Common Report Var(flag_eliminate_dwarf2_dups)
Perform DWARF2 duplicate elimination
+fextelim
+Common Report Var(flag_extelim)
+Perform zero/sign extension removal
+
fipa-sra
Common Report Var(flag_ipa_sra) Init(0) Optimization
Perform interprocedural reduction of aggregates
--- gcc-4.6-branch-clean/gcc/Makefile.in 2011-07-27 12:02:02.498976606 -0500
+++ gcc-4.6-branch/gcc/Makefile.in 2011-07-25 17:59:00.919975303 -0500
@@ -1233,6 +1233,7 @@
explow.o \
expmed.o \
expr.o \
+ extelim.o \
final.o \
fixed-value.o \
fold-const.o \
@@ -2891,6 +2892,11 @@
reload.h langhooks.h intl.h $(TM_P_H) $(TARGET_H) \
tree-iterator.h gt-expr.h $(MACHMODE_H) $(TIMEVAR_H) $(TREE_FLOW_H) \
$(TREE_PASS_H) $(DF_H) $(DIAGNOSTIC_H) vecprim.h $(SSAEXPAND_H)
+extelim.o : extelim.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
+ $(TREE_H) $(TM_P_H) $(FLAGS_H) $(REGS_H) hard-reg-set.h $(BASIC_BLOCK_H) \
+ insn-config.h $(FUNCTION_H) $(EXPR_H) $(INSN_ATTR_H) $(RECOG_H) \
+ toplev.h $(TARGET_H) $(TIMEVAR_H) $(OPTABS_H) insn-codes.h \
+ output.h $(PARAMS_H) $(TREE_PASS_H) $(CGRAPH_H)
dojump.o : dojump.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(TREE_H) \
$(FLAGS_H) $(FUNCTION_H) $(EXPR_H) $(OPTABS_H) $(INSN_ATTR_H) insn-config.h \
langhooks.h $(GGC_H) gt-dojump.h vecprim.h $(BASIC_BLOCK_H) output.h
--- gcc-4.6-branch-clean/gcc/passes.c 2011-07-27 12:02:02.502976386 -0500
+++ gcc-4.6-branch/gcc/passes.c 2011-07-25 17:59:00.922975752 -0500
@@ -990,6 +990,7 @@
NEXT_PASS (pass_web);
NEXT_PASS (pass_rtl_cprop);
NEXT_PASS (pass_cse2);
+ NEXT_PASS (pass_rtl_extelim);
NEXT_PASS (pass_rtl_dse1);
NEXT_PASS (pass_rtl_fwprop_addr);
NEXT_PASS (pass_inc_dec);
--- gcc-4.6.1-clean/gcc/extelim.c 1969-12-31 18:00:00.000000000 -0600
+++ gcc-4.6.1/gcc/extelim.c 2011-11-14 15:43:10.041143996 -0600
@@ -0,0 +1,3407 @@
+/* Redundant extension elimination
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by John Russo (john.russo@freescale.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+. */
+
+/*
+PURPOSE: Implement a method for eliminating redundant and superflous sign
+extension instructions from 64-bit PPC e5500 generated code.
+
+MOTIVATING EXAMPLE:
+The Nullstone loop_6.c kernel looks like:
+ int i;
+ int a[100];
+
+ ref_int_p (&a[0]);
+
+ for (i=2; i<100; i++)
+ a[i] = a[i-1] + a[i-2];
+
+The final, generated code for the loop body is:
+
+32-bit 64-bit
+add r25,r11,r0 add r5,r5,r8
+add r26,r0,r25 addi r4,r4,12
+stw r25,0(r9) add r27,r5,r8
+add r27,r25,r26 stw r5,0(r9)
+stw r26,4(r9) extsw r12,r27 <===
+add r28,r26,r27 stw r27,4(r9)
+stw r27,8(r9) add r6,r5,r12
+add r29,r27,r28 add r28,r6,r12
+stw r28,12(r9) stw r6,8(r9)
+add r30,r28,r29 extsw r0,r28 <===
+stw r29,16(r9) stw r28,12(r9)
+add r12,r29,r30 add r7,r6,r0
+stw r30,20(r9) add r29,r7,r0
+add r3,r30,r12 stw r7,16(r9)
+stw r12,24(r9) extsw r3,r29 <===
+add r4,r12,r3 stw r29,20(r9)
+stw r3,28(r9) add r10,r7,r3
+add r5,r3,r4 add r30,r10,r3
+stw r4,32(r9) stw r10,24(r9)
+add r6,r4,r5 extsw r8,r30 <===
+stw r5,36(r9) stw r30,28(r9)
+add r7,r5,r6 add r11,r10,r8
+stw r6,40(r9) add r12,r11,r8
+add r8,r6,r7 stw r11,32(r9)
+stw r7,44(r9) extsw r26,r12 <===
+add r10,r7,r8 stw r12,36(r9)
+stw r8,48(r9) add r0,r11,r26
+add r11,r8,r10 add r3,r0,r26
+stw r10,52(r9) stw r0,40(r9)
+add r0,r10,r11 subfic r26,r4,100
+stw r11,56(r9) stw r3,44(r9)
+stw r0,60(r9) extsw r5,r0 <===
+addi r9,r9,64 extsw r8,r3 <===
+bdnz+ 10000640 extsw r4,r4 <===
+ clrldi r26,r26,32
+ addi r9,r9,48
+ bdnz+ 10000890
+
+GENERAL APPROACH:
+Consider a machine whose native register size is 64-bits
+
+0 3132 63
+|-----------||-----------|
+
+where bit 63 is the LSB and bit 0 is the MSB of a long int
+and bit 63 is the LSB and bit 32 is the MSB of an int.
+
+Sign and zero extension are inserted to RTL to preserve the
+operation's semantics when the operands used are not the
+native register size since normally the machine only performs
+the operation using a native register size. In practice, many
+of the inserted extensions are not necessary.
+
+First, the extension may simply be redundant. That is, the
+same operation is performed on the same operands. The redundant
+extensions can be eliminated.
+
+Secondly, if the extended portion of the register (the "upper" bits)
+are not essential to the calculations performed on the output of the
+extension, then the extension is not necessary. For example, given
+int (32-bit) inputs and outputs:
+
+c = a + b
+d = sxt(c)
+e = d + 1;
+
+The "upper" bits of d (bit 0-31) do not affect the calculation
+of e. It doesn't matter what the "upper" bits of d are, the int result
+e is the same regardless of the sxt instruction.
+
+Thirdly, the extensions may not be necessary if the operands are
+already extended and the operation preserves the extended bits.
+
+a = mem[&b] ; sign extending load
+c = a + 1
+d = sxt(c)
+
+Here, a is generated by a sign extending load, the operation
+does nothing to invalidate the extension to c, thus the extension
+on c to d is not necessary.
+
+In each case, the redundant extension must be replaced by a copy,
+with the copy to be optimized out in later phases.
+
+The three cases described above form the general idea behind the
+algorithms implemented here to eliminate redundant and unneccessary
+extensions.
+
+Sign extensions do not have to be preserved for overflow conditions
+since signed overflow behavior is not defined in C. For example,
+take a 16-bit variable in a 32-bit register. It is ok
+for 0x0000_7fff to overflow to 0x0000_8000 and not 0xffff_8000.
+This implies that it is not necessary to preserve the sign
+extension.
+
+Unsigned overflow extension need to be preserved because
+unsigned overflow is modulo. For example, a 16-bit unsigned
+overflow of 0x0000_FFFF must be 0x0000_0000 in a 32-bit register,
+not 0x0001_0000. In order to remove the unsigned zero extension,
+we would need to range check the variable to be sure it doesn't
+overflow.
+
+RTL ANALYSIS:
+I looked at the RTL representation after RTL generation (.expand) and
+after the first forward propagation (.fwprop1). Since RTL is not compact
+when printing out, I reduced the .fwprop1 RTL to this pseudocode:
+
+(note: sxt,zxt mean double word length, 64-bit, extension).
+
+(1) r198 = m[r113+ #112] ; load a[0]
+(2) r174 = sxt(r198)
+(3) r199 = m[r113+ #116] ; load a[1]
+(4) r186 = sxt(r199)
+(5) r181 = r113 + #120 ; load &a[2]
+(6) r180 = 2 ; i = 2
+(7) L1:
+(8) r200 = r174 + r186 ; t1 = a[i-1] + a[i-2]
+(9) r174 = sxt(r200)
+(10) m[r181] = r200 ; a[i] = t1
+(11) r201 = r200 + r186 ; t2 = t1 + a[i-1]
+(12) r186 = sxt(r201)
+(13) m[r181+4] = r201 ; a[i+1] = t2
+(14) r202 = r180 + 2 ; i += 2
+(14.1) r180 = sxt(r202)
+(15) r203 = 100 - r202 ; used to calc loop remainder
+(16) r185 = zxt(r203) ; used to calc loop remainder
+(17) r181 = r181 + 8 ; address induction var
+(18) ccr204 = cmp(r202,#98) ; set CC
+(19) BNE ccr204,L1 ; branch
+
+In the pseudo-code, you see several sign extension candidates: (2),(4),
+(9), (12), (14.1), (16).
+
+ALGORITHM:
+To eliminate the extra sign ext you have to look at (1) the definitions
+of the source of the sign extensions and/or (2) look at the uses of the target
+of the sign extensions. In either case, if doing a global elimination
+pass, you'll need def-use chain information.
+
+The algorithms are recursive. Using the use/def and def/use chains
+we attempt to find ultimately whether the extension is relevant
+or not.
+
+
+Example 1.
+Extensions (2) and (4) are not put in the candidate list because
+they are combined into a load/ext pair that is ultimately generated
+as sign extending loads.
+
+Take the sign extension at (9), r174 = sxt(r200).
+Def analysis shows that r200 is defined by 2 registers, thus no
+further def analysis recursion can occur.
+Use analysis. Find all the uses of r174. There is 1 use at (8) r200 = r174 + r186.
+The extension does not affect the add operation results. Continuing, we look at
+the uses of r200 to see if the results of operations on r200 need the sign extended bits.
+We see 2 uses of r200 at (10) and (11). (10) is a 32-bit store of r200,
+so the sign extended bits are irrelevant. (11), however, is an unknown,
+so we must look that the uses of this result, r201. A similar sequence
+occurs for r201 when it defines r186. Looking at the uses of r186 at
+(8) and (11), we have already visited those statements so they have
+been covered already. So it appears that the sxt to r174 at (9) ultimately
+dead-ends to a store instruction that doesn't case about the sign extended
+bits. The sxt at (9) can be removed.
+
+The remaining extensions are processed similarly.
+
+PROGRAM STRUCTURE:
+
+extension elimination -- main entry point
+ find extensions -- identify extension candidates
+ extension duplication -- insert extension at strategic points to
+ enable removal of extensions at more frequently
+ executed points.
+ find extensions -- recreate extension candidate list
+ sort extensions -- sort extension candidate list by loop depth
+ for each ext in list -- process each extension candidate
+ eliminate one extension
+ replace marked candidates with copy -- optimize the extension
+
+PSEUDOCODE:
+
+Create working list of sign extensions, sxt_list
+
+For each insn, insn_sxt, in sxt_list
+ ext_needed = true
+ For all insns, insn_def, that DEFINE and REACH the SOURCE_REG(insn_sxt)
+ ext_needed = analyze_def(insn_def, insn_sxt)
+ if (ext_needed)
+ break;
+ end_loop
+ if (ext_needed)
+ For all insns, insn_use, that USE and are REACHED by the DEST_REG(insn_sxt)
+ ext_needed = analyze_use(insn_use, insn_sxt)
+ if (ext_needed)
+ break;
+ end_loop
+
+ if (!ext_needed)
+ mark_for_replace_with_copy(I)
+end_loop
+
+For each insn, insn_sxt, in sxt_list
+ if (insn_sxt is marked for replacement)
+ replace_insn_with_copy(insn_sxt)
+end_loop
+
+--------------------------
+function: analyze_def(def)
+---------------------------
+return true if extension is needed, false otherwise.
+
+destination_operand = defined operand of source
+source_operand = source operand of def
+
+if (have_seen_this_insn_already (def))
+ return true;
+
+set_seen_this_insn_flag (def)
+
+analysis_result = analyze_result_def (def)
+switch (analysis_result)
+ case source_operand_is_extended:
+ return false
+ case stop_recursion:
+ return true
+ case continue_recursion:
+ break;
+
+ext_needed = true;
+
+For all insns, insn_def, that USE and REACHED by the register of destination_operand
+ ext_needed = analyze_def(insn_def))
+ if (ext_needed)
+ break;
+end_loop
+
+return ext_needed
+
+--------------------------
+function: analyze_use(use)
+---------------------------
+return true if extension is needed, false otherwise.
+
+destination_operand = destination operand of use
+source_operand = source operand of use
+
+if (have_seen_this_insn_already (use))
+ return false;
+
+set_seen_this_insn_flag (use)
+
+analysis_result = analyze_result_use (use)
+switch (analysis_result)
+ case low_bits_not_affected_by_use:
+ return false
+ case low_bits_affected_by_use:
+ return true
+ case look_at_uses_of_destination_operand
+ break;
+
+ext_needed = true;
+For all insns, insn_use, that USE the register of destination_operand
+ ext_needed = analyze_use(insn_use))
+ if (ext_needed)
+ break;
+end_loop
+
+return ext_needed
+
+REFERENCES:
+
+"Effective Sign Extension Elimination", Kawahito, Komatsu, Nakatani.
+IBM Tokyo Researc Laboratory.
+
+"New sign/zero extension elimination pass", deVries.
+http://gcc.gnu.org/ml/gcc-patches/2010-10/msg01529.html
+*/
+
+/*
+Iteration 4: pre-ZERO_EXTEND version, duplicates sign_extend at uses
+Iteration 5: begin supporting ZERO_EXTEND, crashes on Coremark.
+Iteration 6: revert to 4, support SI:HI sign_extensions.
+Iteration 7: Add support for zero extend. This version deletes
+ "inserted" duplicate extensions when redundant and propagates
+ the copied value. This propagate fails in other_tests/test2.sh.
+ I am reverting back to replacing the "inserted" extension to a copy.
+ Copy propagation should always be able to eliminate this copy.
+ Coremark was stable, however.
+Iteration 8: Revert to change extensions to copy, regardless of whether
+ the extension was duplicated or not.
+ Refactor setting of dest,src in analyze_ext_use, analyze_ext_def, now
+ handled with a single function.
+Iteration 9:
+ Inserted redundant extensions at function return points.
+ Sorted the order that extensions are processed by loop depth.
+ Additional cases in upper_bits_do_not_affect_dest
+Iteration 10:
+ Fixes for test failures. A major problem was uncovered where
+ the "visited" flag was not properly cleared. This meant that
+ each time a new extension was processed, it appeared that some
+ extensions were visited already and there were not. The result
+ was false removals. This fix significantly affects the benchmark.
+ Another change was to comment out the duplicate_exts_at_uses. This
+ seemed to have little effect now that the visited flag issue is
+ fixed.
+Iteration 11:
+ Cleanup warnings during build.
+Iteration 12:
+ QImode support started.
+Iteration 13:
+ Redesign and refactor analyze_ext_use, analyze_ext_def
+Iteration 14:
+ Continue redesign and refactor of analyze_ext_use, analyze_ext_def
+ Debugging paper_example.c
+Iteration 15:
+ cond_c fix
+Iteration 16: (not tested)
+ Refactor check_compare code
+ Refactor action decision in PARALLEL
+ Allow pass-thru on insns that are marked for replace copy
+ instead of stopping recursion if we see a marked insn.
+ Examining lshiftrt.c program (signed and unsigned).
+Iteration 17:
+ Refactor mostly complete. Passed all local testing including
+ nas and perfect. Best coremark results so far.
+Iteration 18:
+ Oops. analyze_ext_def was disabled. Enabling it improves
+ Coremark. Passed coremark, perfect.
+Iteration 19:
+ Local tests are passing. Tested with glibc.
+ Added statistics.
+ Fixed elimination from CALL output in operand_is_extended.
+ This impacted Coremark went from 6300 to 6170. But is necessary.
+ More safety for used regs in analyze_ext_def.
+ More safety for the types of extensions.
+Iteration 20:
+ Fixes for various tests.
+Iteration 21:
+ pr43017 -funroll_loops fix.
+Iteration 22:
+ Fixes for AND immediate in operand_is_extended.
+ Cosmetic cleanup.
+Iteration 23:
+ Fixes for consumer-2,spec2k,spec2k6. Handle
+ SUBREG_PROMOTED_VAR_P flags on operands whose
+ dependent extension has been eliminated.
+Iteration 24:
+ Fixed problem in native build during bootstrapping.
+ Extelim was considering debug_insns and should have
+ ignored them. This resulted in a compare fail between
+ stage2 and stage3.
+Iteration 25:
+ - Post-release 4.6.1 development
+ - Full duplication of extensions at uses turned on.
+ - Recursion into original extension no longer kills optimization (analyze_ext_def only)
+ - Allow some duplication into the same block if it enables insn selection
+ - Allow CCmode and CCUNSmode into mode_supported_p
+Iteration 26:
+ - Solve ICEs due to null df-ref.
+Iteration 27:
+ - Fixed issue with duplication of extension at a self-assign.
+ - Some fixes for copying flags during duplication
+ - Some fixes for counting register uses.
+Iteration 28:
+ - Fixed issue with duplication of extension when use has multiple
+ reaching definitions.
+Iteration 29:
+ - Release candidate for Q42011 release iteration.
+Iteration 30:
+ - Turn off extension duplication - minimally effective
+
+*/
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "tm_p.h"
+#include "flags.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "basic-block.h"
+#include "insn-config.h"
+#include "function.h"
+#include "expr.h"
+#include "insn-attr.h"
+#include "recog.h"
+#include "toplev.h"
+#include "target.h"
+#include "timevar.h"
+#include "optabs.h"
+#include "insn-codes.h"
+#include "rtlhooks-def.h"
+#include "output.h"
+#include "params.h"
+#include "timevar.h"
+#include "tree-pass.h"
+#include "cgraph.h"
+#include "df.h"
+#include "vec.h"
+
+/* Feature flags */
+/* Duplicate extensions at each immediate use */
+#define EXTELIM_DUPLICATE_EXTS_AT_USES 0
+/* Dump DF information also in dump */
+#define EXTELIM_DF_DUMP 0
+
+
+/* Typedefs */
+typedef unsigned int insn_flag_t; /* Insn flags type */
+typedef int extelim_uid_t; /* UID type */
+DEF_VEC_I (insn_flag_t); /* Define vector type and allocation type */
+DEF_VEC_ALLOC_I (insn_flag_t, heap);
+
+typedef struct GTY (()) ext_record
+{
+ rtx ext; /* The extension insn */
+ VEC (rtx, heap) * ext_uses; /* List of use records for this extension. For some
+ some extensions, we will duplicate the extension
+ at these use points. */
+ VEC (rtx, heap) * ext_updates;/* List of rtx that need to be updated if the extension
+ is to be eliminated. For example, SUBREG_PROMOTED flags
+ on SUBREG uses defined by this extension should
+ be reset since the extension is eliminated. The PROMOTED
+ flag is no longer valid. */
+} *ext_record_t;
+
+typedef struct regspec_cb_data
+{
+ unsigned int regno;
+ rtx exp;
+} regspec_cb_data_t;
+
+/* Static variables */
+DEF_VEC_P (ext_record_t);
+DEF_VEC_ALLOC_P (ext_record_t, heap);
+VEC (ext_record_t, heap) * extensions; /* Vector holding all extension records */
+VEC (insn_flag_t, heap) * insn_flags; /* Vector holding flags for all insns */
+VEC (rtx, heap) * returns; /* Vector holding return insns for this function */
+
+ static extelim_uid_t max_uid; /* Max UID insn value for insn_flags allocation */
+ static ext_record_t current_ext_record; /* Current extension record being processed */
+
+/* Statistics */
+ static int num_cand; /* Number of extensions detected */
+ static int num_cand_ignored; /* Number of extensions ignored */
+ static int num_cand_transformed; /* Number of extensions transformed to copy */
+
+/* Basic information about the extension being processed */
+ enum machine_mode ext_to_mode; /* Mode extended to */
+ enum machine_mode ext_from_mode; /* Mode extended from */
+ enum rtx_code ext_code; /* Sign or zero extend */
+
+/* Insn use analysis possible results */
+ enum insn_use_results
+ {
+ EXTELIM_ANALYSIS_RESULT_LOWBITS_NOT_AFFECTED,
+ EXTELIM_ANALYSIS_RESULT_LOWBITS_AFFECTED,
+ EXTELIM_ANALYSIS_RESULT_CONTINUE_RECURSION
+ };
+
+/* Insn def analysis possible results */
+ enum insn_def_results
+ {
+ EXTELIM_ANALYSIS_RESULT_DEF_EXTENDED,
+ EXTELIM_ANALYSIS_RESULT_DEF_STOP_RECURSION,
+ EXTELIM_ANALYSIS_RESULT_DEF_CONTINUE_RECURSION
+ };
+
+/* Insn flags for this pass */
+#define EXTELIM_NONE 0
+#define EXTELIM_SEEN (1<<0) /* Mark insn as visited during DF traversal */
+#define EXTELIM_REPLACE_COPY (1<<1) /* Mark ext insn as replace with copy */
+#define EXTELIM_INSERTED (1<<2) /* Mark ext insn as algorithmically inserted */
+#define EXTELIM_INSERTED_FOR (1<<3) /* Mark use insn for which ext has been inserted */
+
+
+/* Query the insn flag */
+
+ static bool insn_flag_p (insn_flag_t set_p, extelim_uid_t uid)
+{
+ insn_flag_t flags;
+
+ if (((flags = VEC_index (insn_flag_t, insn_flags, uid)) & set_p) == set_p)
+ return true;
+
+ return false;
+}
+
+/* Set the insn flags */
+
+static void
+insn_flag_set (insn_flag_t flags, extelim_uid_t uid)
+{
+ insn_flag_t set;
+ set = VEC_index (insn_flag_t, insn_flags, uid);
+ set |= flags;
+ VEC_replace (insn_flag_t, insn_flags, uid, set);
+}
+
+/* Clear insn flags */
+
+static void
+insn_flag_clear (insn_flag_t flags, extelim_uid_t uid)
+{
+ insn_flag_t clear;
+ clear = VEC_index (insn_flag_t, insn_flags, uid);
+ clear &= ~flags;
+ VEC_replace (insn_flag_t, insn_flags, uid, clear);
+}
+
+/* Set static variable max_uid to the largest
+ insn uid found in the module plus 1. This will be the
+ size of the vector for insn flags. */
+
+static void
+set_max_uid (void)
+{
+ basic_block bb;
+ rtx insn;
+ extelim_uid_t lmax_uid = 0;
+
+ FOR_EACH_BB (bb) FOR_BB_INSNS (bb, insn)
+ {
+ if (INSN_P (insn))
+ {
+ if (INSN_UID (insn) > lmax_uid)
+ lmax_uid = INSN_UID (insn);
+ }
+ }
+ max_uid = lmax_uid + 1;
+}
+
+/* Re-initializes the requested insn flags to their reset state */
+
+static void
+reinit_insn_flags (insn_flag_t flags_to_be_reset)
+{
+ extelim_uid_t i;
+
+ /* Account for new insns */
+ set_max_uid ();
+
+ for (i = 0; i < max_uid; i++)
+ {
+ insn_flag_clear (flags_to_be_reset, i);
+ }
+}
+
+/* Init the vector for insn flags. One
+ vector element per insn is created.
+ The flags are init'd to EXTELIM_NONE. */
+
+static void
+init_flags_vector (void)
+{
+ extelim_uid_t i;
+ /* Get the maximum uid value. We'll use this
+ information to set up a vector of max_uid
+ length. Each element of the vector will hold
+ the pass-specific flags for each insn. */
+ max_uid = 0;
+ set_max_uid ();
+
+ /* Allocate the vector of insn flags */
+ insn_flags = VEC_alloc (insn_flag_t, heap, max_uid);
+
+ /* Initialize the insn flags vector */
+ for (i = 0; i < max_uid; i++)
+ {
+ VEC_quick_insert (insn_flag_t, insn_flags, i, EXTELIM_NONE);
+ }
+}
+
+/* Initialize this pass */
+
+static void
+init_pass (void)
+{
+ /* Init insn flags vector */
+ init_flags_vector ();
+
+ /* This pass requires def-use chain information */
+ df_chain_add_problem (DF_DU_CHAIN + DF_UD_CHAIN);
+ df_analyze ();
+}
+
+static void
+free_extensions (void)
+{
+ ext_record_t ext_record;
+ unsigned i;
+
+ FOR_EACH_VEC_ELT (ext_record_t, extensions, i, ext_record)
+ {
+ if (!VEC_empty (rtx, ext_record->ext_uses))
+ VEC_free (rtx, heap, ext_record->ext_uses);
+
+ if (!VEC_empty (rtx, ext_record->ext_updates))
+ VEC_free (rtx, heap, ext_record->ext_updates);
+ }
+ VEC_free (ext_record_t, heap, extensions);
+}
+
+/* Clean up this pass */
+
+static void
+finish_pass (void)
+{
+ free_extensions ();
+ VEC_free (insn_flag_t, heap, insn_flags);
+ VEC_free (rtx, heap, returns);
+}
+
+static void
+update_uid_vectors (extelim_uid_t uid)
+{
+ VEC_safe_grow_cleared (insn_flag_t, heap, insn_flags, uid + 1);
+}
+
+/* Emit a insn before a given insn, update vector lengths
+ of those vectors that are indexed by uid. Return uid
+ of the inserted insn. */
+
+static extelim_uid_t
+extelim_emit_before (rtx new_insn, rtx before_insn)
+{
+ rtx seq;
+ extelim_uid_t new_uid;
+
+ start_sequence ();
+ emit_insn (new_insn);
+ seq = get_insns ();
+ end_sequence ();
+ new_insn = emit_insn_before (seq, before_insn);
+
+ /* Expand the flags vector to hold the new insn and set the
+ inserted flag on the insn. */
+ new_uid = INSN_UID (new_insn);
+ update_uid_vectors (new_uid);
+ return new_uid;
+}
+
+/* Utility function to find the REG exp
+ given an rtx */
+
+static rtx
+register_exp (rtx exp)
+{
+ if (REG_P (exp))
+ {
+ return exp;
+ }
+ else if (GET_CODE (exp) == SUBREG)
+ {
+ return SUBREG_REG (exp);
+ }
+ else
+ return NULL;
+}
+
+/* Check whether this is a sign extension. */
+
+static bool
+extension_p (rtx insn, rtx * dest, rtx * inner, int *preserved_size)
+{
+ rtx src, op0;
+
+ /* Detect set of reg. */
+ if (GET_CODE (PATTERN (insn)) != SET)
+ return false;
+
+ src = SET_SRC (PATTERN (insn));
+ *dest = SET_DEST (PATTERN (insn));
+
+ if (!REG_P (*dest))
+ return false;
+
+ if (GET_CODE (src) == SIGN_EXTEND || GET_CODE (src) == ZERO_EXTEND)
+ {
+ op0 = XEXP (src, 0);
+
+ /* Determine amount of least significant bits preserved by operation. */
+ if (GET_CODE (src) == AND)
+ *preserved_size = ctz_hwi (~UINTVAL (XEXP (src, 1)));
+ else
+ *preserved_size = GET_MODE_BITSIZE (GET_MODE (op0));
+
+ if (GET_CODE (op0) == SUBREG)
+ {
+ if (subreg_lsb (op0) != 0)
+ return false;
+
+ *inner = SUBREG_REG (op0);
+ return true;
+ }
+ else if (REG_P (op0))
+ {
+ *inner = op0;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/* Return true if this is the last use of a
+ register, false otherwise. */
+
+static bool
+reg_is_dead_p (rtx insn, rtx reg_expr)
+{
+ rtx link;
+ gcc_assert (REG_P (reg_expr));
+
+ for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
+ {
+ if (REG_NOTE_KIND (link) == REG_DEAD && REG_P (XEXP (link, 0)))
+ {
+ if (REGNO (XEXP (link, 0)) == REGNO (reg_expr))
+ return true;
+ }
+ }
+ return false;
+}
+
+/* Return true if we don't want to place this
+ extension in the candidate extensions list because of the
+ previous insn. Return false otherwise. */
+
+static bool
+ignore_extension_prev_p (rtx ext_insn, rtx prev_insn)
+{
+ rtx prev_dest, prev_src, prev = PATTERN (prev_insn);
+ rtx ext_src, ext = PATTERN (ext_insn);
+
+ /* It's OK to allow extension with no accompanying prev real insn */
+ if (!NONDEBUG_INSN_P (prev_insn) || NOTE_P (prev_insn))
+ return false;
+
+ if (GET_CODE (prev) != SET)
+ return false;
+
+ if (GET_CODE (ext) != SET)
+ return false;
+
+ prev_dest = SET_DEST (prev);
+ prev_src = SET_SRC (prev);
+
+ /* Source register of sign extension */
+ ext_src = XEXP (SET_SRC (ext), 0);
+
+ /* Check previous insns */
+
+ /* Previous insn is a load whose dest is the
+ extension's source and the dest reg is
+ dead */
+ if (MEM_P (prev_src) && (prev_dest = register_exp (prev_dest)))
+ {
+ if ((ext_src = register_exp (ext_src)))
+ {
+ if ((REGNO (prev_dest) == REGNO (ext_src))
+ && reg_is_dead_p (ext_insn, ext_src))
+ return true;
+ }
+ }
+ return false;
+}
+
+/* Return true if we don't want to place this
+ extension in the candidate extensions list because of the
+ next insn. Return false otherwise. */
+
+static bool
+ignore_extension_next_p (rtx ext_insn, rtx next_insn)
+{
+ rtx next = PATTERN (next_insn);
+ rtx ext_src, ext = PATTERN (ext_insn);
+
+ if (GET_CODE (ext) != SET)
+ return false;
+
+ /* Check next insns */
+ if (!NONDEBUG_INSN_P (next_insn) || NOTE_P (next_insn))
+ return false;
+
+ if (GET_CODE (next) != SET)
+ return false;
+
+ /* zero-extend followed by left shift by 1 -- this sequence will be
+ detected by the insn selection. */
+ if (GET_CODE (SET_SRC (ext)) == ZERO_EXTEND)
+ {
+ if (GET_CODE (SET_SRC (next)) == ASHIFT
+ && CONST_INT_P (XEXP (SET_SRC (next), 1))
+ && UINTVAL (XEXP (SET_SRC (next), 1)) == 0x1)
+ return true;
+ }
+
+ return false;
+}
+
+/* Find extensions and store them in the extensions vector. */
+
+static bool
+find_extensions (void)
+{
+ basic_block bb;
+ rtx insn, dest, inner;
+ int preserved_size;
+ ext_record_t extrec;
+
+ /* For all insns, call note_use for each use in insn. */
+ FOR_EACH_BB (bb)
+ {
+ FOR_BB_INSNS (bb, insn)
+ {
+ if (!NONDEBUG_INSN_P (insn))
+ continue;
+
+ if (!extension_p (insn, &dest, &inner, &preserved_size))
+ {
+ continue;
+ }
+
+ /* We do not consider extensions that follow a load for
+ this target, as the code selector optimizes the sequence
+ to a load with sign extend or load with zero extend. */
+ if (PREV_INSN (insn)
+ && ignore_extension_prev_p (insn, PREV_INSN (insn)))
+ {
+ if (dump_file)
+ fprintf (dump_file, "extension at uid=%d ignored\n",
+ INSN_UID (insn));
+ num_cand_ignored++;
+ continue;
+ }
+ /* We don't consider certain sequences that are picked up by
+ insn selection. */
+ if (NEXT_INSN (insn)
+ && ignore_extension_next_p (insn, NEXT_INSN (insn)))
+ {
+ if (dump_file)
+ fprintf (dump_file, "extension at uid=%d ignored\n",
+ INSN_UID (insn));
+ num_cand_ignored++;
+ continue;
+ }
+
+ /* Only looking at sign extensions to DImode, SImode, or HImode */
+ if (GET_MODE_BITSIZE (SImode) != preserved_size
+ && GET_MODE_BITSIZE (HImode) != preserved_size
+ && GET_MODE_BITSIZE (QImode) != preserved_size)
+ continue;
+
+ extrec = (ext_record_t) xmalloc (sizeof (struct ext_record));
+ extrec->ext = insn;
+ extrec->ext_uses = NULL;
+ extrec->ext_updates = NULL;
+ VEC_safe_push (ext_record_t, heap, extensions, extrec);
+ num_cand++;
+ }
+ }
+
+ if (dump_file)
+ {
+ if (!VEC_empty (ext_record_t, extensions))
+ fprintf (dump_file, "\n");
+ else
+ fprintf (dump_file, "no extensions found.\n");
+ }
+
+ return !VEC_empty (ext_record_t, extensions);
+}
+
+/* Return true if the rtx mode is a supported mode for
+ this optimization, false otherwise. */
+
+static bool
+mode_supported_p (rtx exp)
+{
+ if (GET_MODE (exp) != QImode
+ && GET_MODE (exp) != HImode
+ && GET_MODE (exp) != SImode
+ && GET_MODE (exp) != DImode
+ && GET_MODE (exp) != CCmode
+ && GET_MODE (exp) != CCUNSmode)
+ return false;
+
+ return true;
+}
+
+/* Return true if the rtx is a function return expr, false otherwise */
+
+static bool
+return_val_p (rtx dest)
+{
+ if ((REG_P (dest) || GET_CODE (dest) == PARALLEL) &&
+ REG_FUNCTION_VALUE_P (dest))
+ {
+ return true;
+ }
+ return false;
+}
+
+
+/* A 'for_each_rtx' callback returning 1 if the rtx is a
+ REG or SUBREG rtx. The first matching rtx found stops the
+ rtx traversal. */
+
+static int
+reg_or_subreg_rtx (rtx * x, void *data)
+{
+ regspec_cb_data_t *ldata = (regspec_cb_data_t *) data;
+
+ if (REG_P (*x))
+ {
+ ldata->exp = *x;
+ return 1;
+ }
+
+ if (GET_CODE (*x) == SUBREG)
+ {
+ ldata->exp = SUBREG_REG (*x);
+ return 1;
+ }
+
+ return 0;
+}
+
+/* A 'for_each_rtx' callback returning 1 if the rtx is a
+ REG or SUBREG rtx whose register number is that passed
+ in the data parameter. Data parameter's rtx value is
+ set to the matching rtx if found. */
+
+static int
+reg_or_subreg_rtx_regno (rtx * x, void *data)
+{
+ regspec_cb_data_t *ldata = (regspec_cb_data_t *) data;
+
+ if (REG_P (*x) && (REGNO (*x) == ldata->regno))
+ {
+ ldata->exp = *x;
+ return 1;
+ }
+ if (GET_CODE (*x) == SUBREG && (REGNO (SUBREG_REG (*x)) == ldata->regno))
+ {
+ ldata->exp = SUBREG_REG (*x);
+ return 1;
+ }
+ return 0;
+}
+
+/* Callback that counts the number of register operands
+ in an expression. Return 0 to allow all rtxs to be
+ traversed. */
+
+static int
+count_reg_operands (rtx * x, void *data)
+{
+ regspec_cb_data_t *ldata = (regspec_cb_data_t *) data;
+
+ if (register_exp (*x) != NULL)
+ {
+ ldata->regno++;
+ }
+ return 0;
+}
+
+/* Count the number of register operands in an expression.
+ We use the regspec_cb_data_t regno field as the number
+ of register operands we found in an expression. */
+
+static int
+num_reg_operands (rtx x)
+{
+ int rv;
+ regspec_cb_data_t data;
+ data.regno = 0;
+ data.exp = NULL_RTX;
+
+ if ((rv = for_each_rtx (&x, count_reg_operands, (void *) &data)) == 0)
+ return (data.regno); /* contains the count */
+ else
+ return 0;
+}
+
+/* Find the SUBREG or REG rtx corresponding to regno in the given rtx.
+ Return NULL_RTX if the regno rtx is not found. */
+
+static rtx
+find_regspec_regno (unsigned int regno, rtx x)
+{
+ int rv;
+ regspec_cb_data_t data;
+ data.regno = regno;
+ data.exp = NULL_RTX;
+
+ if ((rv = for_each_rtx (&x, reg_or_subreg_rtx_regno, (void *) &data)) != 0)
+ return (data.exp);
+ else
+ return NULL_RTX;
+}
+
+/* Find a REG or SUBREG rtx, starting at expr x.
+ Return NULL_RTX if no REG or SUBREG rtx is found.
+ If found, the rtx returned is a REG (not SUBREG) */
+
+static rtx
+find_regspec (rtx x)
+{
+ int rv;
+ regspec_cb_data_t data;
+ data.regno = -1; /* not used */
+ data.exp = NULL_RTX;
+
+ if ((rv = for_each_rtx (&x, reg_or_subreg_rtx, (void *) &data)) != 0)
+ return (data.exp);
+ else
+ return NULL_RTX;
+}
+
+/* Return true if the expression defines single register, regno. */
+
+static bool
+expr_defines_regno_p (rtx insn, unsigned int regno)
+{
+ rtx reg;
+ if (GET_CODE (insn) == SET)
+ {
+ reg = SET_DEST (insn);
+ if (find_regspec_regno (regno, reg) != NULL_RTX)
+ return true;
+ }
+ return false;
+}
+
+/* Return true if the insn defines a single register, regno.
+ Return false otherwise */
+
+static bool
+defines_regno_p (rtx insn_insn, unsigned int regno, int indent)
+{
+ extelim_uid_t uid = INSN_UID (insn_insn);
+ df_ref *p_def;
+
+ /* Get the operands defined */
+ p_def = DF_INSN_UID_DEFS (uid);
+
+ if (!p_def)
+ return false;
+
+ if (*(p_def + 1) != NULL)
+ {
+ if (dump_file)
+ fprintf (dump_file, "%*suid=%d defines multiple registers\n",
+ indent, " ", uid);
+ return false;
+ }
+
+ if (DF_REF_REGNO (*p_def) != regno)
+ {
+ if (dump_file)
+ fprintf (dump_file, "%*suid=%d defines does not define %d\n",
+ indent, " ", uid, regno);
+ return false;
+ }
+
+ return true;
+}
+
+/* The operand is already extended and the extension is compatible with
+ the originating extension with respect to type and size.
+ E.g. zero_extend:HI meets and AND r,#0xffff. Another example
+ is LSHIFT:SI left or right and zero_extend:SI, because the
+ instruction selected is rlwinm and clears the upper 32 bits.
+ Other examples in the code. Return true if a compatible extension
+ is found, false otherwise. */
+
+static bool
+operand_is_extended (rtx dest, rtx srcexp, int indent)
+{
+ /* Output of a CALL is already extended.
+ To ensure that the return value is not modified by the extend,
+ the extend from mode size must be at least the size of the CALL output.
+ Example - this is redundant since output of CALL is extended.
+ X:SI = CALL ...
+ Y:DI = sign_extend:DI (X:SI) */
+ if (GET_CODE (srcexp) == CALL
+ && (GET_MODE_BITSIZE (ext_from_mode)) >=
+ GET_MODE_BITSIZE (GET_MODE (dest)))
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "%*s...is extended already (CALL insn output)\n", indent,
+ " ");
+ return true;
+ }
+
+ /* Output is load immediate or load constant */
+ if (CONST_INT_P (srcexp))
+ {
+ bool is_extended;
+ if (ext_from_mode == QImode && (UINTVAL (srcexp) <= 0xff))
+ is_extended = true;
+ else if (ext_from_mode == HImode && (UINTVAL (srcexp) <= 0xffff))
+ is_extended = true;
+ else if (ext_from_mode == SImode && (UINTVAL (srcexp) <= 0xffffffff))
+ is_extended = true;
+ else
+ is_extended = false;
+
+ if (is_extended)
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "%*s... is extended already (CONST_INT load)\n", indent,
+ " ");
+ return true;
+ }
+ }
+
+ /* Sign extension of the same type as the originating extension.
+ Here the candidate defines the register used in the originating extension.
+ The originating extension will be replaced by a copy if it is found to be
+ redundant with respect to the candidate extension.
+ The candidate (this extension dest,src) must write the at least the same bits as the
+ originating extension in order to be redundant. So, we follow these rules:
+
+ cand_to_mode == machine mode of the destination for this candidate extension
+ cand_from_mode == machine mode of the source for this candidate extension
+ ext_to_mode == machine mode of the originating extension output
+ ext_from_mode == machine mode of the originating extension input
+
+ SIZE(cand_to_mode) >= SIZE(extend_to_mode) && SIZE(cand_from_mode) <= SIZE(extend_from_mode)
+
+ Example 1:
+ Candidate (HI->SI extension)
+ DI SI HI QI 0
+ | |<---| | |
+
+ Originating (SI->DI)
+ DI SI HI QI 0
+ |<-------| | | |
+
+ Not redundant, candidate does not cover the original bits:
+ SIZE(cand_to_mode)[SI] !>= SIZE(extend_to_mode)[DI]
+
+ Example 2:
+ Candidate (QI->DI extension)
+ DI SI HI QI 0
+ |<-------|----|--| |
+
+ Originating (HI->SI)
+ DI SI HI QI 0
+ | |<---| | |
+
+ Redundant, candidate covers the original bits:
+ SIZE(cand_to_mode) [DI] >= SIZE(extend_to_mode) [SI]
+ AND
+ SIZE(cand_from_mode) [QI] <= SIZE(extend_from_mode) [HI]
+ */
+ if (GET_CODE (srcexp) == ext_code)
+ {
+ enum machine_mode cand_from_mode = GET_MODE (XEXP (srcexp, 0));
+ enum machine_mode cand_to_mode = GET_MODE (dest);
+ if ((GET_MODE_BITSIZE (cand_to_mode) >= GET_MODE_BITSIZE (ext_to_mode))
+ && (GET_MODE_BITSIZE (cand_from_mode) <=
+ GET_MODE_BITSIZE (ext_from_mode)))
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "%*s...is already extended (redundant extension)\n",
+ indent, " ");
+ return true;
+ }
+ }
+
+ /* Encountered an insn with the same effect as extension, e.g.
+ AND (regspec) (const_int). E.g. AND (reg:SI) (0x7fff) is equivalent
+ to ZERO_EXTEND:DI (reg:HI) or SIGN_EXTEND:DI (reg:HI). The code selection
+ for AND zero extends the entire register, so we don't have to
+ check that srcexp extends to at least ext_to_mode size. */
+ if ((GET_CODE (srcexp) == AND) && CONST_INT_P (XEXP (srcexp, 1)))
+ {
+ if (ext_from_mode == QImode && (UINTVAL (XEXP (srcexp, 1)) <= 0x7f))
+ return true;
+ else if (ext_from_mode == HImode
+ && (UINTVAL (XEXP (srcexp, 1)) <= 0x7fff))
+ return true;
+ else if (ext_from_mode == SImode
+ && (UINTVAL (XEXP (srcexp, 1)) <= 0x7fffffff))
+ return true;
+ }
+
+ return false;
+}
+
+/* Determine if the operation allows us to continue the propagation.
+ We kill the propagation for all operations except copy. This
+ ensures that the extended operand that we may find eventually
+ is not modified by insns in the def-use chain. It's harsh,
+ but it's safest eliminate all but the most benign (copy) operations
+ in the propagation chain. */
+
+static bool
+continue_def_propagation (rtx dest, rtx srcexp, rtx src_operand, int indent)
+{
+ /* Only continue if its a copy -- that is, the srcexp is a register expression */
+ if ( register_exp (srcexp) )
+ return true;
+
+ return false;
+}
+
+/* Helper for insn_def_analysis_result.
+ The register operand, src is set here. Recall we
+ can only handle one register operand in the src expression.
+ We one of 3 states:
+ 1) Determine the operand is extended, ...DEF_EXTENDED returned.
+ 2) Determine the propagation can continue, ...DEF_CONTINUE_RECURSION returned.
+ 3) Otherwise, ...DEF_STOP_RECURSION is returned. */
+static enum insn_def_results
+insn_def_analysis_result_1 (rtx insn, bool treat_as_copy,
+ unsigned int regno_def ATTRIBUTE_UNUSED,
+ rtx * src, int indent)
+{
+ rtx dest, srcexp;
+ int num_operands;
+
+ /* Insn has to be an expression we can analyze */
+ if (GET_CODE (insn) != SET)
+ {
+ if (dump_file)
+ fprintf (dump_file, "%*s...is not a SET expression\n", indent, " ");
+ return EXTELIM_ANALYSIS_RESULT_DEF_STOP_RECURSION;
+ }
+ dest = SET_DEST (insn);
+ srcexp = SET_SRC (insn);
+
+ /* Dest must be a reg, not expression */
+ if (!REG_P (dest))
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "%*s...dest is not a simple register\n", indent, " ");
+ return EXTELIM_ANALYSIS_RESULT_DEF_STOP_RECURSION;
+ }
+
+ /* First check whether the operand is extended already. If so,
+ we can leave immediately successfully. */
+ if (operand_is_extended (dest, srcexp, indent) && !treat_as_copy)
+ return (EXTELIM_ANALYSIS_RESULT_DEF_EXTENDED);
+
+
+ /* Failing to determine that the operand is already extended,
+ we have to validate that we have register operands to propagate. */
+ num_operands = num_reg_operands (srcexp);
+
+ /* At least one register operand required for propagation. */
+ if (num_operands == 0)
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "%*s...no register operands in RHS\n", indent, " ");
+ return EXTELIM_ANALYSIS_RESULT_DEF_STOP_RECURSION;
+ }
+
+ /* Only one register operand is allowed in the RHS since we can't
+ can't propagate more than one register. */
+ if (num_operands > 1)
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "%*s...found multiple register operands in RHS\n", indent,
+ " ");
+ return EXTELIM_ANALYSIS_RESULT_DEF_STOP_RECURSION;
+ }
+
+ /* Find the used operand in the src expression */
+ *src = find_regspec (srcexp);
+ if (*src == NULL_RTX || !mode_supported_p (*src))
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "%*s...src operand reg=%d cannot be found or is unsupported mode\n",
+ indent, " ", regno_def);
+ return EXTELIM_ANALYSIS_RESULT_DEF_STOP_RECURSION;
+ }
+
+ /* This is an extension, but it is previously marked to be transformed to a copy.
+ We just treat it as a copy even though it hasn't been transformed yet. So
+ continue the propagation. */
+ if (treat_as_copy)
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "%*s...%s is treated as a copy (marked for replace)\n",
+ indent, " ", GET_RTX_NAME (GET_CODE (srcexp)));
+ return (EXTELIM_ANALYSIS_RESULT_DEF_CONTINUE_RECURSION);
+ }
+
+ /* Validate that it's ok to continue propagation with this operand. */
+ if (continue_def_propagation (dest, srcexp, *src, indent))
+ return (EXTELIM_ANALYSIS_RESULT_DEF_CONTINUE_RECURSION);
+
+ /* Else we default to halting the search for a redundant extension */
+ return (EXTELIM_ANALYSIS_RESULT_DEF_STOP_RECURSION);
+}
+
+/* Determine if the insn extends it's destination register in
+ a manner such that the original extension is redundant. */
+
+static enum insn_def_results
+insn_def_analysis_result (rtx insn_insn, unsigned int regno_def, rtx * src,
+ int indent)
+{
+ bool treat_as_copy = false;
+
+ /* Insn must only define one output */
+ if (!defines_regno_p (insn_insn, regno_def, indent))
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "%*s...defines more than 1 output\n", indent, " ");
+ return EXTELIM_ANALYSIS_RESULT_DEF_STOP_RECURSION;
+ }
+
+ /* We want to treat this extension as a copy and continue propagation.
+ Otherwise, it would be detected again as redundant. */
+ if (insn_flag_p (EXTELIM_REPLACE_COPY, INSN_UID (insn_insn)))
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "%*suse at uid=%d is marked to transform to copy\n", indent,
+ " ", INSN_UID (insn_insn));
+ treat_as_copy = true;
+ }
+
+ /* Do the analysis */
+ return (insn_def_analysis_result_1
+ (PATTERN (insn_insn), treat_as_copy, regno_def, src, indent));
+}
+
+/* Analyze each of the expressions in a PARALLEL expression. As each of
+ the expressions may yield a different state, select the most conservative
+ state to return. */
+
+static enum insn_def_results
+insn_def_analysis_2 (rtx insn_def, unsigned int regno_def, rtx * src,
+ int indent)
+{
+ int i;
+ rtx insn = PATTERN (insn_def);
+ enum insn_def_results action;
+ enum insn_def_results return_action =
+ EXTELIM_ANALYSIS_RESULT_DEF_STOP_RECURSION;
+
+ gcc_assert (GET_CODE (insn) == PARALLEL);
+
+ for (i = XVECLEN (insn, 0) - 1; i >= 0; i--)
+ {
+ rtx body = XVECEXP (insn, 0, i);
+ /* Only act on the expressions that define regno_def */
+ if (!expr_defines_regno_p (body, regno_def))
+ continue;
+ /* Determine the next action */
+ action = insn_def_analysis_result_1 (body, false /* treat_as_copy */ ,
+ regno_def, src, indent);
+ /* The result of this expression stops the recursion, i.e. no
+ longer reasonable to continue looking at further recursion. */
+ if (action == EXTELIM_ANALYSIS_RESULT_DEF_STOP_RECURSION)
+ return action;
+ /* Only return EXTENDED if there are no other different actions
+ in the series. Otherwise, CONTINUE_RECURSION is returned. */
+ if (action == EXTELIM_ANALYSIS_RESULT_DEF_CONTINUE_RECURSION)
+ return_action = action;
+ else if (return_action ==
+ EXTELIM_ANALYSIS_RESULT_DEF_CONTINUE_RECURSION)
+ return_action = EXTELIM_ANALYSIS_RESULT_DEF_CONTINUE_RECURSION;
+ else
+ return_action = action;
+ }
+ return (return_action);
+}
+
+/* Helper 1 for insn_def_analysis */
+
+static enum insn_def_results
+insn_def_analysis_1 (rtx insn_def, unsigned int regno_def, rtx * src,
+ int indent)
+{
+ rtx def = PATTERN (insn_def);
+ enum insn_def_results action;
+
+ switch (GET_CODE (def))
+ {
+ case PARALLEL:
+ action = insn_def_analysis_2 (insn_def, regno_def, src, indent);
+ break;
+ default:
+ action = insn_def_analysis_result (insn_def, regno_def, src, indent);
+ break;
+ }
+ return action;
+}
+
+/* We look at the definition of a register that is either the
+ sign or zero extend source register or a definition that that
+ has been propagated to here via analyze_ext_def. The objective
+ is to determine, by looking at the operation and operands, whether
+ the register is sign/zero extended by virtue of the operation and/or
+ operands. If so, the original extension is redundant.
+ The function returns one of 3 possible states after analyzing the
+ insn:
+ 1. EXTELIM_ANALYSIS_RESULT_DEF_EXTENDED - we determined that the
+ insn does indeed extend the original source extension register.
+ analyze_ext_def returns FALSE, therefore, ending the recursion
+ and propagation.
+ 2. EXTELIM_ANALYSIS_RESULT_DEF_STOP_RECURSION - we determined that
+ the insn does not meet the criteria to continue the recursive search.
+ Some conditions causing this may be multiple operands defining this
+ register (we only propagate on a single input operand) or the insn
+ defines more than one output or the operation does not allow
+ a previous extension to propagate, e.g. an arithmetic shift on
+ a SI value clears the upper bits using rlwinm. MUL, DIV, MOD
+ stop recursion because the result is longer than the input size,
+ thus impacting the possible previous extension.
+ 3. EXTELIM_ANALYSIS_RESULT_DEF_CONTINUE_RECURSION - we found an
+ operation with one register operand and the operation will not
+ affect a previous extension if one exists. ADD, SUB are examples.
+ We continue looking up the chain at the definition of the operand
+ for an extended result.
+ If we run into a previous extension marked for replacement during
+ recursion, we treat it as a copy (CONTINUE_RECURSION since the
+ extension is preserved by the copy). */
+
+static enum insn_def_results
+insn_def_analysis (rtx insn_def, unsigned int regno_def, rtx * src,
+ int indent)
+{
+ return (insn_def_analysis_1 (insn_def, regno_def, src, indent));
+}
+
+/* Analyze the insn defining the source of the sign extension.
+ If it can be determined that the definition is already
+ sign extended, return false. Otherwise, return true if
+ extension is needed. */
+
+static bool
+analyze_ext_def (rtx insn_def, unsigned int regno_def, int indent)
+{
+ extelim_uid_t uid;
+ rtx def = PATTERN (insn_def);
+ rtx src;
+ df_ref df_def, *p_use;
+ bool ext_needed, indent_once;
+ struct df_link *link;
+ enum insn_def_results analysis_result;
+
+ gcc_assert (def != NULL);
+
+ uid = INSN_UID (insn_def);
+
+ /* If we seen the originating extension again, return false (ext not needed) */
+ if (current_ext_record->ext == insn_def)
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "%*sdef at uid=%d is original extension\n", indent, " ", uid);
+ return false;
+ }
+
+ /* The recursion has to definitively end with an operand being
+ extended (and compatible with the originating extension). If
+ we see the insn again, this could return a faulty positive (false),
+ so we return true here instead of false. See pr43017 (-funroll-loops)
+ as an example. */
+ if (insn_flag_p (EXTELIM_SEEN, uid))
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "%*sdef at uid=%d is visited already\n", indent, " ", uid);
+ return true;
+ }
+
+ /* Mark this insn as seen */
+ insn_flag_set (EXTELIM_SEEN, uid);
+
+ analysis_result = insn_def_analysis (insn_def, regno_def, &src, indent);
+ switch (analysis_result)
+ {
+ /* We know conclusively that the register defined in this expression
+ is already extended. */
+ case EXTELIM_ANALYSIS_RESULT_DEF_EXTENDED:
+ if (dump_file)
+ fprintf (dump_file, "%*sdef at uid=%d is extended\n", indent, " ",
+ uid);
+ return false;
+ break;
+ /* We know conclusively that we cannot continue the recursion. Perhaps
+ the expression defines multiple registers, etc. */
+ case EXTELIM_ANALYSIS_RESULT_DEF_STOP_RECURSION:
+ if (dump_file)
+ fprintf (dump_file, "%*sdef at uid=%d cannot be propagated\n", indent,
+ " ", uid);
+ return true;
+ break;
+ /* Continue to look at the operands of this expression. They may be extended
+ already. */
+ case EXTELIM_ANALYSIS_RESULT_DEF_CONTINUE_RECURSION:
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ /* This is the operand for which we want to find definitions. There should
+ only be one operand as we have previously checked for operations with only
+ one register operand as the src previously. */
+ p_use = DF_INSN_UID_USES (uid);
+ gcc_assert (p_use != NULL);
+
+ /* Make sure that this use is the one returned in src. Otherwise we simply
+ stop the propagation. Note the DF_INSN_UID_USES works at the insn
+ level, so a PARALLEL pattern may return many uses, hence the need
+ to validate the correct use here. */
+ if ((*p_use == NULL) || (DF_REF_REGNO (*p_use) != REGNO (src)))
+ return true;
+
+ ext_needed = true;
+ indent_once = true;
+ for (link = DF_REF_CHAIN (*p_use); link; link = link->next)
+ {
+ rtx insn_def;
+ df_def = link->ref;
+ if (!df_def)
+ continue;
+ /* Link must be to a definition of the use */
+ if (!DF_REF_REG_DEF_P (df_def))
+ continue;
+ /* Ignore ARTIFICIAL defs */
+ if (DF_REF_IS_ARTIFICIAL (df_def))
+ continue;
+ insn_def = DF_REF_INSN (df_def);
+ /* Don't consider debug_insns */
+ if (!NONDEBUG_INSN_P (insn_def))
+ continue;
+ if (dump_file)
+ fprintf (dump_file,
+ "%*sdef of reg=%d at uid=%d\n", indent, " ",
+ DF_REF_REGNO (df_def), INSN_UID (insn_def));
+ /* Set indent for dump formatting */
+ if (indent_once)
+ {
+ ++indent;
+ indent_once = false;
+ }
+ ext_needed = analyze_ext_def (insn_def, DF_REF_REGNO (df_def), indent);
+ if (ext_needed)
+ break;
+ }
+
+ if (dump_file)
+ fprintf (dump_file,
+ "%*sext %s needed\n", indent, " ", ext_needed ? "" : "not");
+
+ return ext_needed;
+}
+
+/* Determine whether the expression needs to be saved for this extension.
+ The expression will be updated in some way if the extension is ultimately
+ eliminated. */
+
+static bool
+exp_needs_update_p (rtx exp)
+{
+ if (GET_CODE (exp) == SUBREG
+ && (SUBREG_PROMOTED_VAR_P (exp)))
+ {
+ return true;
+ }
+ return false;
+}
+
+/* Some expressions may need to be updated if the originating extension
+ is eliminated. For example, SUBREG_PROMOTED flags on uses are no longer
+ valid if the extension is eliminated. Save the expression here. */
+
+static void
+save_ext_update (ext_record_t extrec, rtx exp)
+{
+ /* Save this expression to be updated if the extension is eliminated. */
+ VEC_safe_push (rtx, heap, extrec->ext_updates, exp);
+}
+
+/* Check a compare operation to determine whether the operands
+ of the compare use the upper bits of the extension. Return
+ true if the upper bits are not relevant in the compare, false
+ otherwise. */
+
+static bool
+check_compare (rtx dest, rtx src)
+{
+ /* Detect
+ (set (reg:CC r0) (compare:CC (REGSPEC) (REGSPEC)))
+ or
+ (set (reg:CC r0) (compare:CC (REGSPEC) (CONST)))
+ where REGSPEC is (reg:mm r) or (subreg:mm (reg:MM r) n)
+ CONST is a constant integer.
+ The mode size of compare ops must be less than the
+ mode of the original extension for the upper bits to
+ be irrelevant.
+ An exception is made for mode sizes less than a word size.
+ For our targets, there is no 'cmph' insn, so we bail out
+ if we see a comparison of sizes less than a word (SI). */
+ if (REG_P (dest)
+ && (GET_MODE (dest) == CCmode || GET_MODE (dest) == CCUNSmode)
+ && GET_CODE (src) == COMPARE
+ && (GET_MODE (src) == CCmode || GET_MODE (src) == CCUNSmode))
+ {
+ rtx compare_op0 = XEXP (src, 0);
+ rtx compare_op1 = XEXP (src, 1);
+
+ /* Check the first operand, op0, size. */
+ if ((REG_P (compare_op0) || GET_CODE (compare_op0) == SUBREG)
+ && (GET_MODE_BITSIZE (GET_MODE (compare_op0)) <=
+ GET_MODE_BITSIZE (ext_from_mode)))
+ {
+ /* Half word compares and smaller are performed as word compares, so upper bits are used. */
+ if (GET_MODE_BITSIZE (GET_MODE (compare_op0)) < SImode)
+ return false;
+
+ /* Now check the other operand, op1. */
+ if ((REG_P (compare_op1) || GET_CODE (compare_op1) == SUBREG)
+ && (GET_MODE_BITSIZE (GET_MODE (compare_op1)) <=
+ GET_MODE_BITSIZE (ext_from_mode)))
+ return true;
+
+ /* Compare to constant, we know op0 already meets size constraints. */
+ if (CONST_INT_P (compare_op1))
+ return true;
+ }
+ }
+ return false;
+}
+
+/* Determine condition a, whether the upper bits are relevant to the operation.
+ Return false if we prove the upper bits are not relevant in the operation,
+ true otherwise. */
+
+static bool
+operation_uses_upper_bits (rtx dest, rtx src, unsigned int regno_use,
+ int indent ATTRIBUTE_UNUSED)
+{
+ rtx regspec_src = find_regspec_regno (regno_use, src);
+
+ if (check_compare (dest, src))
+ return false;
+
+ /* Store of regno to mem, size stored is the same or smaller than the extended from size */
+ if (MEM_P (dest)
+ && (GET_MODE_BITSIZE (GET_MODE (dest)) <=
+ GET_MODE_BITSIZE (ext_from_mode))
+ /* Ensure the used register is being stored and not used in another capacity, say, as a pointer. */
+ && (regspec_src))
+ return false;
+
+ /* Operation operand size is the same or smaller than the extended from size */
+ if (regspec_src)
+ {
+ if (GET_MODE_BITSIZE (GET_MODE (regspec_src)) <=
+ GET_MODE_BITSIZE (ext_from_mode))
+ return false;
+ }
+
+ /* Default to the safest result */
+ return true;
+}
+
+/* Determine if this insn also extends to the size or greater of the original extension.
+ Sign extend can propagate to zero extend and vice-versa because the upper bits
+ haven't affected the low bits up to now throughout the propagation. */
+
+static bool
+operation_extends_to_upper_bits_size (rtx src, int indent ATTRIBUTE_UNUSED)
+{
+ /* Sign extension of the same type as the originating extension.
+ Here the candidate uses the register defined by the originating extension.
+ If the candidate is found to be redundant, the originating extension is
+ replaced with a copy.
+
+ We follow these rules:
+
+ dest_mode == machine mode of the destination for this candidate extension
+ (it's the same mode as the src, e,g, reg:DI = sign_extend:DI ...)
+ src_mode == machine mode of the source for this candidate extension
+ (the mode of the used register, SI in this case, e.g. reg:DI = sign_extend:DI (subreg:SI (reg:DI))
+ ext_to_mode == machine mode of the originating extension output
+ ext_from_mode == machine mode of the originating extension input
+
+ SIZE(cand_from_mode) >= SIZE(extend_from_mode) && SIZE(cand_to_mode) <= SIZE(extend_to_mode)
+
+ Example 1:
+ Originating (SI->DI)
+ DI SI HI QI 0
+ |<-------| | | |
+
+ Candidate (HI->SI extension)
+ DI SI HI QI 0
+ | |<---| | |
+
+ Not redundant, candidate does not cover the original bits:
+ SIZE(dest_mode)[SI] !<= SIZE(extend_to_mode)[DI]
+
+ Example 2:
+ Originating (HI->SI)
+ DI SI HI QI 0
+ | |<---| | |
+
+ Candidate (QI->DI extension)
+ DI SI HI QI 0
+ |<-------|----|--| |
+
+ Redundant, candidate covers the original bits:
+ SIZE(cand_to_mode) [DI] >= SIZE(extend_to_mode) [SI]
+ AND
+ SIZE(cand_from_mode) [QI] <= SIZE(extend_from_mode) [HI] */
+ if (GET_CODE (src) == ext_code)
+ {
+ /* Extend is redundant if we don't overwrite the source of the
+ previous extension and extends to at least the extent of the original. */
+ enum machine_mode cand_from_mode = GET_MODE (XEXP (src, 0));
+ enum machine_mode cand_to_mode = GET_MODE (src);
+ if (GET_MODE_BITSIZE (cand_from_mode) >=
+ GET_MODE_BITSIZE (ext_from_mode)
+ && (GET_MODE_BITSIZE (cand_to_mode) <=
+ GET_MODE_BITSIZE (ext_to_mode)))
+ return true;
+ }
+
+ /* Encountered an insn with the same effect as extension, e.g.
+ AND (regspec) (const_int). E.g. AND (reg:SI) (0xffff) is equivalent
+ to ZERO_EXTEND:DI (reg:HI) */
+ if ((GET_CODE (src) == AND) && CONST_INT_P (XEXP (src, 1)))
+ {
+ /* Extends to at least the original extension size */
+ if (GET_MODE_BITSIZE (GET_MODE (src)) >= GET_MODE_BITSIZE (ext_to_mode))
+ {
+ if (ext_from_mode == QImode && (UINTVAL (XEXP (src, 1)) <= 0xff))
+ return true;
+ else if (ext_from_mode == HImode
+ && (UINTVAL (XEXP (src, 1)) <= 0xffff))
+ return true;
+ else if (ext_from_mode == SImode
+ && (UINTVAL (XEXP (src, 1)) <= 0xffffffff))
+ return true;
+ else
+ return false;
+ }
+ }
+ return false;
+}
+
+/* Determine whether the operation's upper bits subtly or overtly affects the low bits. */
+
+static bool
+operation_implicitly_affects_lowbits (rtx dest, rtx src,
+ unsigned int regno_use, int indent)
+{
+ rtx regspec = find_regspec_regno (regno_use, src);
+
+ /* First, a return expression must be assumed to affect the lowbits as the return value
+ must be extended properly. */
+ if (return_val_p (dest))
+ {
+ if (dump_file)
+ {
+ fprintf (dump_file, "%*sDestination is a return value\n", indent,
+ " ");
+ }
+ return true;
+ }
+
+ /* These operations implicitly affect the lowbits, except where noted. */
+ switch (GET_CODE (src))
+ {
+ case MULT:
+ case DIV:
+ case UDIV:
+ case UMOD:
+ case MOD:
+ /* Normally, yes, these operations return true (affects low bits). But when the
+ the operand size is less than or equal to the "low bits" size AND the operation size
+ is the same as the operand size, the operation is performed only on the "low bits"
+ and the "upper bits" do not contribute to the output. */
+ if (regspec
+ && (GET_MODE_BITSIZE (GET_MODE (regspec)) <=
+ GET_MODE_BITSIZE (ext_from_mode))
+ && GET_MODE_BITSIZE (GET_MODE (src)) ==
+ GET_MODE_BITSIZE (GET_MODE (regspec)))
+ return false;
+ return true;
+
+ break;
+ /* Shift rights normally affect the low bits. There can be special cases where this
+ is not true, such a the operand size is smaller than the extended from size, e.g.
+ set (reg:SI Y) (zero_extend:SI (subreg:HI (reg:SI X)))
+ set (reg:QI Z) (lshiftrt (subreg:QI (reg:SI Y))
+ The shift of the QI data is not affected by the extension of HI data unless the
+ shift is large enough to encroach into the QI bits. This seems rare and I do not
+ check for it. */
+ case LSHIFTRT:
+ case ASHIFTRT:
+ return true;
+ break;
+ /* Other operations are known not to impact the low bits */
+ default:
+ return false;
+ }
+
+}
+
+/* The operation directly defines a propagatable output. Several
+ operations do not define such output. E.g. MEM (loads) do not
+ define an output based on the operation. USE is another example,
+ as it isn't a real operation. */
+
+static bool
+operation_directly_defines_an_output (rtx dest, rtx src,
+ int indent ATTRIBUTE_UNUSED)
+{
+ switch (GET_CODE (src))
+ {
+ case REG:
+ case SUBREG:
+ case PLUS:
+ case MINUS:
+ case NEG:
+ case MULT:
+ case DIV:
+ case MOD:
+ case UDIV:
+ case UMOD:
+ case AND:
+ case IOR:
+ case XOR:
+ case NOT:
+ case ASHIFT:
+ case ROTATE:
+ case ASHIFTRT:
+ case LSHIFTRT:
+ case ROTATERT:
+ case SIGN_EXTEND:
+ case ZERO_EXTEND:
+ case TRUNCATE:
+ return true;
+ break;
+ /* OK to propagate if the output of IF_THEN_ELSE is a register */
+ case IF_THEN_ELSE:
+ if (REG_P (dest))
+ return true;
+ break;
+ /* All others are assumed not to generate a normal output */
+ default:
+ break;
+ }
+ return false;
+}
+
+/* Helper for insn_use_analysis_result */
+
+static enum insn_use_results
+insn_use_analysis_result_1 (rtx insn, bool treat_as_copy,
+ unsigned int regno_use, rtx * dest, int indent)
+{
+ rtx src;
+ bool cond_a, cond_b, cond_c, cond_d;
+
+ if (GET_CODE (insn) != SET)
+ return EXTELIM_ANALYSIS_RESULT_LOWBITS_AFFECTED;
+
+ *dest = SET_DEST (insn);
+ src = SET_SRC (insn);
+
+ /* Bail out on inline assembly also */
+ if (GET_CODE (src) == ASM_INPUT || GET_CODE (src) == ASM_OPERANDS)
+ return EXTELIM_ANALYSIS_RESULT_LOWBITS_AFFECTED;
+
+ /* Bail out on non supported types */
+ if (!mode_supported_p (*dest))
+ return EXTELIM_ANALYSIS_RESULT_LOWBITS_AFFECTED;
+
+ /* First, we determine cond_c (is a redundant extension) because it gates the
+ other conditions. */
+ if ((cond_c = operation_extends_to_upper_bits_size (src, indent)))
+ {
+ if (treat_as_copy)
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "%*s...%s is treated as a copy (marked for replace)\n",
+ indent, " ", GET_RTX_NAME (GET_CODE (src)));
+ return EXTELIM_ANALYSIS_RESULT_CONTINUE_RECURSION;
+ }
+
+ if (dump_file)
+ fprintf (dump_file,
+ "%*s...%s is a redundant extension\n",
+ indent, " ", GET_RTX_NAME (GET_CODE (src)));
+ return EXTELIM_ANALYSIS_RESULT_LOWBITS_NOT_AFFECTED;
+ }
+
+ cond_a = operation_uses_upper_bits (*dest, src, regno_use, indent);
+
+ cond_b =
+ operation_implicitly_affects_lowbits (*dest, src, regno_use, indent);
+
+ cond_d = operation_directly_defines_an_output (*dest, src, indent);
+
+ /* Operation implicitly affects low bits */
+ if (cond_b)
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "%*s...%s implicitly affects low bits\n",
+ indent, " ", GET_RTX_NAME (GET_CODE (src)));
+ return EXTELIM_ANALYSIS_RESULT_LOWBITS_AFFECTED;
+ }
+
+ /* Neither cond_a nor cond_b affects the low bits */
+ if (!cond_a)
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "%*s...%s does not use upper bits\n",
+ indent, " ", GET_RTX_NAME (GET_CODE (src)));
+ return EXTELIM_ANALYSIS_RESULT_LOWBITS_NOT_AFFECTED;
+ }
+
+ /* To continue recursion, the operation must define a
+ meaningful output. */
+ if (!cond_d)
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "%*s...%s does not define a propagatable output\n",
+ indent, " ", GET_RTX_NAME (GET_CODE (src)));
+ return EXTELIM_ANALYSIS_RESULT_LOWBITS_AFFECTED;
+ }
+
+ /* This leaves cond_a, meaning we need to continue down the chain
+ to see if the low bits are ultimately affected by the upper bits. */
+ return EXTELIM_ANALYSIS_RESULT_CONTINUE_RECURSION;
+}
+
+/* Determine the action based on the insn conditions. The truth table is
+ simplified using if statements. Insns previously marked for replace by copy
+ are identified, these will be essentially be treated as copies now and not
+ be detected as redundant for this use. */
+static enum insn_use_results
+insn_use_analysis_result (rtx insn_insn, unsigned int regno_use, rtx * dest,
+ int indent)
+{
+ bool treat_as_copy = false;
+ if (insn_flag_p (EXTELIM_REPLACE_COPY, INSN_UID (insn_insn)))
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "%*suse at uid=%d is marked to transform to copy\n", indent,
+ " ", INSN_UID (insn_insn));
+ treat_as_copy = true;
+ }
+ return (insn_use_analysis_result_1
+ (PATTERN (insn_insn), treat_as_copy, regno_use, dest, indent));
+}
+
+/* We have to analyze each expression action in a PARALLEL series.
+ Return the appropriate action for a series of expressions in a PARALLEL insn.
+ LOWBITS_AFFECTED stops the loop. This leaves only CONTINUE_RECURSION
+ or LOWBITS_NOT_AFFECTED. LOWBITS_NOT_AFFECTED is only returned
+ if there are no other different actions in the series (no CONTINUE_RECURSION
+ states). For each CONTINUE_RECURSION action we encounter, the destination
+ registers must be identical since we can only propagate one use (one definition
+ of dest) should CONTINUE_RECURSION be returned. */
+
+static enum insn_use_results
+analyze_action (enum insn_use_results cur_action,
+ enum insn_use_results prev_action,
+ rtx * dest, rtx * prev_dest)
+{
+ enum insn_use_results return_action;
+
+ if (cur_action == EXTELIM_ANALYSIS_RESULT_LOWBITS_AFFECTED)
+ return cur_action;
+
+ if (cur_action == EXTELIM_ANALYSIS_RESULT_CONTINUE_RECURSION)
+ return_action = cur_action;
+ else if (prev_action == EXTELIM_ANALYSIS_RESULT_CONTINUE_RECURSION)
+ return_action = EXTELIM_ANALYSIS_RESULT_CONTINUE_RECURSION;
+ else
+ return_action = cur_action;
+
+ if (return_action == EXTELIM_ANALYSIS_RESULT_CONTINUE_RECURSION)
+ {
+ if (*prev_dest)
+ {
+ /* All bets off if the series defines multiple outputs */
+ if (*prev_dest != *dest)
+ return_action = EXTELIM_ANALYSIS_RESULT_LOWBITS_AFFECTED;
+ }
+ }
+ /* Set prev_dest */
+ *prev_dest = *dest;
+
+ return return_action;
+}
+
+/* Helper 2 for insn_use_analysis. Return the appropriate action
+ for a series of expressions in a PARALLEL insn. */
+
+static enum insn_use_results
+insn_use_analysis_2 (rtx insn_use, unsigned int regno_use, rtx * dest,
+ int indent)
+{
+ int i;
+ rtx insn = PATTERN (insn_use);
+ rtx prev_dest = NULL_RTX;
+ enum insn_use_results action;
+ enum insn_use_results return_action =
+ EXTELIM_ANALYSIS_RESULT_LOWBITS_NOT_AFFECTED;
+
+ gcc_assert (GET_CODE (insn) == PARALLEL);
+
+ /* We make a quick decision about call_insns here. Since the use reached
+ a call, we assume it's an outgoing parameter and thus must be extended
+ as per the ABI. */
+ if (CALL_P (insn_use))
+ {
+ if (dump_file)
+ fprintf (dump_file, "%*s...is a call parameter\n", indent, " ");
+ return EXTELIM_ANALYSIS_RESULT_LOWBITS_AFFECTED;
+ }
+
+ for (i = XVECLEN (insn, 0) - 1; i >= 0; i--)
+ {
+ rtx body = XVECEXP (insn, 0, i);
+ /* Only act on the expressions containing a use of regno_use. */
+ if (regno_use_in (regno_use, body) == NULL_RTX)
+ continue;
+
+ /* Determine the next action */
+ action = insn_use_analysis_result_1 (body, false /* treat as copy */ ,
+ regno_use, dest, indent);
+
+ /* Here we make a decision on the return action based on the previous actions.
+ This is done to accomodate different actions from different elements in the
+ PARALLEL series of expressions. */
+ return_action =
+ analyze_action (action, return_action, dest, &prev_dest);
+
+ /* The result of this expression stops the recursion, i.e. "low bits"
+ are affected by the operation. */
+ if (return_action == EXTELIM_ANALYSIS_RESULT_LOWBITS_AFFECTED)
+ break;
+ }
+ return (return_action);
+}
+
+/* Helper 1 for insn_use_analysis */
+
+static enum insn_use_results
+insn_use_analysis_1 (rtx insn_use, unsigned int regno_use, rtx * dest,
+ int indent)
+{
+ rtx use = PATTERN (insn_use);
+ enum insn_use_results action;
+
+ switch (GET_CODE (use))
+ {
+ case PARALLEL:
+ action = insn_use_analysis_2 (insn_use, regno_use, dest, indent);
+ break;
+ default:
+ action = insn_use_analysis_result (insn_use, regno_use, dest, indent);
+ break;
+ }
+
+ return action;
+}
+
+/* Analyze the insn and determine the next course of action in the
+ use analysis loop.
+ There are several conditions to consider:
+
+ 1. The "extended from" mode. This is an enum machine_mode value
+ that determines what is the size extended. It is derived from the
+ source of the original extension. It is the "low bits" value.
+ It is these range of bits that cannot be affected by the operation's
+ "upper bits" in order to determine whether the extend is useful or not.
+ Examples:
+ (1) set (reg:DI Y (zero_extend:DI (subreg:QI (reg:DI X))) ==> low bits = QI
+ (2) set (reg:SI Y (sign_extend:SI (reg:HI X) ==> low bits = HI
+
+ 2. The "extend to" mode. This is the size extended to in the original
+ extension. It is the "upper bits" value. The entire extended to size may
+ be used subsequently or it may be subreg'd to a smaller or larger sizes
+ later in the propagation.
+ For example (1) above, "upper bits" is DI, and (2) "upper bits" is SI.
+
+ 3. The code, ext_code, of the original extension, either ZERO_EXTEND or SIGN_EXTEND.
+
+ 4. Operation code. For an insn, the actual operation code corresponding to
+ a machine instruction. For certain codes, we know that the "low bits" of the
+ result are modified by the insn because of the values in the "upper bits" of the
+ input operand. We say the operation implicitly uses the "upper bits" to modify the
+ "low bits". For other codes, the "upper bits" do not affect the output result
+ in the "low bits".
+
+ If the operation does implicitly use the "upper bits" to modify
+ the "low bits", it is instantly a deal killer. The original extension must be
+ preserved.
+
+ If the operation does not implicitly use "upper bits" to modify the "low bits",
+ then the action to take depends on the operation operand size relative to
+ "low bits" size.
+
+ We only want to deal with codes that map to real instructions,
+ like ADD, SUB, MULT, LSHIFTRT, etc. Codes such as PARALLEL, etc. do not map to
+ instruction and must be dissected to extract the real instructions.
+
+ Furthermode, for recursion to continue, the operation and operand must define
+ an output related to the input operand (the use register). This doesn't happen
+ for operations such as "mem" where the output is indirectly related to the
+ input operand.
+
+ 5. Operation mode. The operation mode of the operation code. This sometimes impacts
+ the effect of the operation. For example MULT:SI and MULT:DI map to two different
+ machine instructions and both may have operands of SI mode. However, the MULT:SI
+ results will be oblivious to the upper bits of the DI register whereas, SI part of
+ MULT:DI result will be affected by the upper bits of the DI register.
+
+ Several conditions determine the action to take based on the various inputs.
+
+ The truth table inputs are A, B, and C. The truth table output is the action to take.
+
+ A. True if the used operand mode size is greater than the extended_from ("low bits") mode size.
+ B. True if the operation implicitly uses upper bits to define the low bits
+ C. True if the operation also extends the output to upper bits size
+ D. True if the operation and input operand directly define an output operand.
+
+ Condition A. means the upper bits are in use in the operation. The extend _may_ be needed,
+ all things being equal, so the action would be to continue recursion to the use of the
+ defined operand, i.e. return CONTINUE_RECURSION.
+
+ Condition B. means the "low bits" are modified by the extended portion of the register
+ by virtue of the operation. For example, logical shift right, where the extended
+ portion is shifted into the "low bits". Another example, multiply, where the machine
+ uses the extended portion implicitly to calculate the results, some of which are
+ reflected in the "low bits" of the result. The extension is definitely needed in these
+ cases for this use, so return LOWBITS_AFFECTED. Recursion is stopped and analysis of
+ this extension is halted.
+
+ Condition C. means the operation and it's operands perform the same extension as
+ the originating extension. The operation must extend to the same size _or higher_ of
+ the original extension. In this case, the original extension is truly redundant and
+ we return LOWBITS_NOT_AFFECTED for this use.
+
+ Condtion D. means the operation and operand directly define an output operand. For most
+ arithmetic and unary operations this is true. For mem and other internal operations,
+ e.g. USE, this is false.
+
+ Condition Action Comments
+ ==================================================================
+ A. B. C. D.
+ ------------------------------------------------------------------
+ X X true true LOW_BITS_NOT_AFFECTED extend is redundant
+ ------------------------------------------------------------------
+ false false false X LOW_BITS_NOT_AFFECTED used operand is smaller than "low bits"
+ ------------------------------------------------------------------
+ false true false true LOW_BITS_AFFECTED "low bits" modified implicitly by operation
+ ------------------------------------------------------------------
+ true false false true CONTINUE_RECURSION "low bits" _may_ be impacted by next uses
+ ------------------------------------------------------------------
+ true true false true LOW_BITS_AFFECTED "low bits" modified implicitly by operation */
+
+static enum insn_use_results
+insn_use_analysis (rtx insn_use, unsigned int regno_use, rtx * dest,
+ int indent)
+{
+ return (insn_use_analysis_1 (insn_use, regno_use, dest, indent));
+}
+
+/* Analyze the operation and operands of this use of a sign extension
+ target register. If the target register's upper bits do not
+ affect the result of the operation, then the sign extension is
+ useless. Returns true if the extension is needed, false
+ otherwise. */
+
+static bool
+analyze_ext_use (rtx insn_use, unsigned int regno_use, int indent)
+{
+ bool ext_needed, indent_once;
+ unsigned int dest_target_regno;
+ extelim_uid_t uid;
+ rtx use = PATTERN (insn_use), dest;
+ df_ref df_use, *p_def;
+ struct df_link *link;
+ enum insn_use_results analysis_result;
+
+ gcc_assert (use != NULL);
+
+ uid = INSN_UID (insn_use);
+
+ if (insn_flag_p (EXTELIM_SEEN, uid))
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "%*suse at uid=%d is visited already\n", indent, " ", uid);
+ return false;
+ }
+
+ /* Mark this insn as seen */
+ insn_flag_set (EXTELIM_SEEN, uid);
+
+ analysis_result = insn_use_analysis (insn_use, regno_use, &dest, indent);
+ switch (analysis_result)
+ {
+ /* We know conclusively that the "upper bits" of the extended
+ entity do not impact the "low bits" of the output of the operation. */
+ case EXTELIM_ANALYSIS_RESULT_LOWBITS_NOT_AFFECTED:
+ if (dump_file)
+ fprintf (dump_file, "%*suse at uid=%d is not affected\n", indent, " ",
+ uid);
+ return false;
+ break;
+ /* We know conclusively that the "upper bits" of the extended
+ entity _do_ impact the "low bits" of the output of the operation. */
+ case EXTELIM_ANALYSIS_RESULT_LOWBITS_AFFECTED:
+ if (dump_file)
+ fprintf (dump_file, "%*suse at uid=%d is affected\n", indent, " ",
+ uid);
+ return true;
+ break;
+ /* Continue to look at the uses of the result to determine the impact
+ of the "upper bits" */
+ case EXTELIM_ANALYSIS_RESULT_CONTINUE_RECURSION:
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ /* We reach here because the action taken is CONTINUE_RECURSION.
+ Continue to look at the uses of the destination register recursively.
+ If the propagation ultimately ends where the upper bits are not significant
+ to the final output, then the extension can be removed. */
+ if (!REG_P (dest))
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "%*sdest of uid=%d (SET) is not a register\n", indent, " ",
+ uid);
+ return true;
+ }
+
+ dest_target_regno = REGNO (dest);
+
+ /* What this insn defines */
+ p_def = DF_INSN_UID_DEFS (uid);
+
+ /* Ref must be valid and there must be only one definition and it must be the
+ destination */
+ if ((*p_def == NULL) || (*(p_def + 1) != NULL))
+ return true;
+
+ gcc_assert (DF_REF_REGNO (*p_def) == dest_target_regno);
+
+ ext_needed = true;
+ indent_once = true;
+ for (link = DF_REF_CHAIN (*p_def); link; link = link->next)
+ {
+ rtx insn_use, use_exp;
+ df_use = link->ref;
+ if (!df_use)
+ continue;
+ /* Link must be a USE of the DEF */
+ if (!DF_REF_REG_USE_P (df_use))
+ continue;
+ /* Ignore ARTIFICIAL USES */
+ if (DF_REF_IS_ARTIFICIAL (df_use))
+ continue;
+ insn_use = DF_REF_INSN (df_use);
+ /* Don't consider debug_insns */
+ if (!NONDEBUG_INSN_P (insn_use))
+ continue;
+ use_exp = DF_REF_REG (df_use);
+
+ if (exp_needs_update_p (use_exp))
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "%*ssaved reg=%d expression for update\n", indent, " ", DF_REF_REGNO (df_use));
+ save_ext_update (current_ext_record, use_exp);
+ }
+
+ if (dump_file)
+ fprintf (dump_file,
+ "%*suse at uid=%d of reg=%d\n", indent, " ",
+ INSN_UID (insn_use), DF_REF_REGNO (df_use));
+ /* Set indent for dump formatting */
+ if (indent_once)
+ {
+ ++indent;
+ indent_once = false;
+ }
+ ext_needed = analyze_ext_use (insn_use, DF_REF_REGNO (df_use), indent);
+ if (ext_needed)
+ break;
+ }
+
+ if (dump_file)
+ fprintf (dump_file,
+ "%*sext %s needed\n", indent, " ", ext_needed ? "" : "not");
+
+ return ext_needed;
+}
+
+/* Set a flag on an insn indicating that it is
+ marked for replacement by a copy insn or for
+ deletion. */
+
+static void
+mark_replace_with_copy (rtx ext)
+{
+ extelim_uid_t uid = INSN_UID (ext);
+ insn_flag_set (EXTELIM_REPLACE_COPY, uid);
+}
+
+/* Get the mode that we are sign/zero extending from */
+
+static enum machine_mode
+get_ext_from_mode (rtx src)
+{
+ rtx regexp;
+ gcc_assert (GET_CODE (src) == ZERO_EXTEND || GET_CODE (src) == SIGN_EXTEND);
+
+ /* The SUBREG or REG mode of the extend operand */
+ regexp = XEXP (src, 0);
+ return (GET_MODE (regexp));
+}
+
+/* Perform the action on the expression. Return true
+ if any action performed, false otherwise. */
+
+static bool
+process_ext_update (rtx exp)
+{
+ /* Reset SUBREG_PROMOTED state to false */
+ if (GET_CODE (exp) == SUBREG
+ && SUBREG_PROMOTED_VAR_P (exp))
+ {
+ SUBREG_PROMOTED_VAR_P (exp) = 0;
+ return true;
+ }
+
+ return false;
+}
+
+/* Process the current extension record, looking at all the
+ the expressions that need to be updated because this
+ extension will be replaced by a copy. */
+
+static void
+process_ext_updates (ext_record_t extrec)
+{
+ unsigned i;
+ rtx exp;
+ bool updated=false;
+
+
+ FOR_EACH_VEC_ELT (rtx, extrec->ext_updates, i, exp)
+ {
+ updated |= process_ext_update (exp);
+ }
+
+ if (dump_file && updated)
+ fprintf (dump_file, " updates processed for extension at uid=%d\n",
+ INSN_UID (extrec->ext));
+}
+
+/* Try to eliminate the sign extension by examining the
+ definitions of the extension source and the uses
+ of the extension destination. */
+
+static void
+eliminate_one_extend (rtx ext)
+{
+ rtx src, dest, regexp;
+ df_ref df_use, df_def, *ext_use, *ext_def;
+ unsigned int ext_dest_regno, ext_src_regno, def_use_count = 1;
+ bool ext_needed = true;
+ extelim_uid_t uid = INSN_UID (ext);
+ struct df_link *link;
+ const char *inserted =
+ insn_flag_p (EXTELIM_INSERTED, uid) ? "inserted" : "";
+
+ /* Reset desired per insn flags for each extension analyzed */
+ reinit_insn_flags (EXTELIM_SEEN);
+
+ gcc_assert (GET_CODE (PATTERN (ext)) == SET);
+ src = SET_SRC (PATTERN (ext));
+ dest = SET_DEST (PATTERN (ext));
+
+ /* Save the basic information about the extension in a file global */
+ ext_to_mode = GET_MODE (dest);
+ ext_from_mode = get_ext_from_mode (src);
+ ext_code = GET_CODE (src);
+
+ /* Also mark this original extension as "SEEN" so we don't recurse into it. */
+ insn_flag_set (EXTELIM_SEEN, INSN_UID (ext));
+
+ /* Find the target of the extension */
+ if (!REG_P (dest))
+ return;
+ ext_dest_regno = REGNO (dest);
+
+ /* Find the source of the extension: set (REG:MODE (sign_extend (REG|SUBREG:MODE ... */
+ if ((regexp = register_exp (XEXP (src, 0))) == NULL)
+ return;
+ ext_src_regno = REGNO (regexp);
+
+ /* Iterate through the reaching definitions of the source of the extension
+ recursively. If the source if already sign extended, mark the
+ extension for replacement with a copy or deletion (deletion if it was
+ inserted in the duplication pass). */
+ ext_use = DF_INSN_UID_USES (uid);
+ /* There is only one use in a sign/zero extension insn and it must be the
+ source register */
+ gcc_assert (*(ext_use + 1) == NULL);
+ gcc_assert (DF_REF_REGNO (*ext_use) == ext_src_regno);
+
+ /* Now look at all the reaching definitions of this use */
+ for (link = DF_REF_CHAIN (*ext_use); link; link = link->next)
+ {
+ rtx insn_def;
+ df_def = link->ref;
+ if (!df_def)
+ continue;
+ /* Link must be to a definition of the use */
+ if (!DF_REF_REG_DEF_P (df_def))
+ continue;
+ /* Ignore ARTIFICIAL defs */
+ if (DF_REF_IS_ARTIFICIAL (df_def))
+ continue;
+ insn_def = DF_REF_INSN (df_def);
+ /* Don't consider debug_insns */
+ if (!NONDEBUG_INSN_P (insn_def))
+ continue;
+ if (dump_file)
+ fprintf (dump_file,
+ " analyze def #%d of reg=%d at uid=%u\n",
+ def_use_count, DF_REF_REGNO (*ext_use), INSN_UID (insn_def));
+ ext_needed = analyze_ext_def (insn_def, DF_REF_REGNO (*ext_use), 2);
+ if (ext_needed)
+ break;
+ def_use_count++;
+ }
+
+ /* Try the def-use chains if the extension wasn't marked by the
+ previous pass. */
+ if (ext_needed)
+ {
+ /* Defs of the sign extension */
+ ext_def = DF_INSN_UID_DEFS (uid);
+ /* There is only one def in a sign extension insn and it must be the
+ destination */
+ gcc_assert (*(ext_def + 1) == NULL);
+ gcc_assert (DF_REF_REGNO (*ext_def) == ext_dest_regno);
+
+ /* Counter for debug dump */
+ def_use_count = 1;
+ /* Reset desired per insn flags for each extension analyzed */
+ reinit_insn_flags (EXTELIM_SEEN);
+ /* Also mark this original extension as "SEEN" so we don't recurse into it. */
+ insn_flag_set (EXTELIM_SEEN, INSN_UID (ext));
+
+ /* Iterate over the reached uses of extension destination register recursively.
+ If the destination register's upper bits are ultimately not
+ relevant, the extension can be marked for replacement with a
+ copy. */
+ for (link = DF_REF_CHAIN (*ext_def); link; link = link->next)
+ {
+ rtx insn_use, use_exp;
+ df_use = link->ref;
+ if (!df_use)
+ continue;
+ /* Link must be a USE of the DEF */
+ if (!DF_REF_REG_USE_P (df_use))
+ continue;
+ /* Ignore ARTIFICIAL USES */
+ if (DF_REF_IS_ARTIFICIAL (df_use))
+ continue;
+ insn_use = DF_REF_INSN (df_use);
+ /* Don't consider debug_insns */
+ if (!NONDEBUG_INSN_P (insn_use))
+ continue;
+ use_exp = DF_REF_REG (df_use);
+
+ if (exp_needs_update_p (use_exp))
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ " saved reg=%d expression for update\n", DF_REF_REGNO (df_use));
+ save_ext_update (current_ext_record, use_exp);
+ }
+
+ if (dump_file)
+ fprintf (dump_file,
+ " analyze use #%d at uid=%u of reg=%d\n",
+ def_use_count, INSN_UID (insn_use),
+ DF_REF_REGNO (*ext_def));
+ ext_needed = analyze_ext_use (insn_use, DF_REF_REGNO (*ext_def), 2);
+ if (ext_needed)
+ break;
+ def_use_count++;
+ }
+ }
+
+ /* The extension is not needed. The rtl for the extension is marked
+ for replace by copy. */
+ if (!ext_needed)
+ {
+ process_ext_updates (current_ext_record);
+
+ if (dump_file)
+ fprintf (dump_file,
+ ":) mark %s extension insn uid=%d for copy replacement\n",
+ inserted, INSN_UID (ext));
+ mark_replace_with_copy (ext);
+ num_cand_transformed++;
+ }
+ else
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ ":( %s extension insn uid=%d is needed\n", inserted,
+ INSN_UID (ext));
+ }
+}
+
+/* Replace the sign extension with a copy instruction
+
+ example 1:
+ from:
+ dest src
+ (set (reg:DI destreg) (sign_extend:DI (reg:SI srcreg)))
+ to:
+ (clobber (reg:DI destreg))
+ (set (subreg:SI (reg:DI destreg) 4) (reg:SI srcreg))
+
+ or
+
+ example 2:
+ from:
+ dest src
+ (set (reg:DI destreg) (sign_extend:DI (subreg:SI (reg:DI srcreg) 4)))
+ to:
+ (clobber (reg:DI destreg))
+ (set (subreg:SI (reg:DI destreg) 4) (subreg:SI (reg:DI srcreg) 4))
+
+ or
+
+ example 3:
+ from:
+ dest src
+ (set (reg:SI destreg) (sign_extend:SI (subreg:HI (reg:SI srcreg) 2)))
+ to:
+ (clobber (reg:SI destreg))
+ (set (subreg:HI (reg:SI destreg) 2) (subreg:HI (reg:SI srcreg) 2)) */
+
+static void
+replace_with_copy (rtx ext)
+{
+ rtx extension = PATTERN (ext);
+ rtx ext_op, src, dest, insns, cp_dest, cp_src;
+ enum machine_mode inner_mode;
+ gcc_assert (GET_CODE (extension) == SET);
+
+ dest = SET_DEST (extension);
+ src = SET_SRC (extension);
+
+ /* The sign extension operand */
+ ext_op = XEXP (src, 0);
+ /* Get the inner mode */
+ inner_mode = GET_MODE (ext_op);
+ gcc_assert (inner_mode == SImode || inner_mode == HImode
+ || inner_mode == QImode);
+
+ /* Make dest a SUBREG:mm */
+ cp_dest = gen_lowpart_SUBREG (inner_mode, dest);
+
+ /* Copy src is the sign extension target register */
+ cp_src = ext_op;
+
+ /* ??? clobber is needed for rtl consistency, don't know why */
+ start_sequence ();
+ emit_clobber (dest);
+ emit_move_insn (cp_dest, cp_src);
+ insns = get_insns ();
+ end_sequence ();
+ emit_insn_before (insns, ext);
+
+ delete_insn (ext);
+}
+
+/* Iterate through extensions, replace those extensions
+ that are marked as so with a copy insn. */
+
+static void
+replace_ext_with_copy (void)
+{
+ ext_record_t extrec;
+ unsigned i;
+
+ FOR_EACH_VEC_ELT (ext_record_t, extensions, i, extrec)
+ {
+ const char *inserted = insn_flag_p (EXTELIM_INSERTED,
+ INSN_UID (extrec->
+ ext)) ? "inserted" : "";
+ if (insn_flag_p (EXTELIM_REPLACE_COPY, INSN_UID (extrec->ext)))
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ " replace %s extension uid=%d with a copy\n", inserted,
+ INSN_UID (extrec->ext));
+ replace_with_copy (extrec->ext);
+ }
+ }
+}
+
+
+/* Copy the RTX flags from old to new */
+
+static void
+copy_flags (rtx oldrtx, rtx newrtx)
+{
+ if (RTX_FLAG (oldrtx, in_struct))
+ RTX_FLAG (newrtx, in_struct) = true;
+
+ if (RTX_FLAG (oldrtx, volatil))
+ RTX_FLAG (newrtx, volatil) = true;
+
+ if (RTX_FLAG (oldrtx, unchanging))
+ RTX_FLAG (newrtx, unchanging) = true;
+
+ if (RTX_FLAG (oldrtx, frame_related))
+ RTX_FLAG (newrtx, frame_related) = true;
+
+ if (RTX_FLAG (oldrtx, jump))
+ RTX_FLAG (newrtx, jump) = true;
+
+ if (RTX_FLAG (oldrtx, call))
+ RTX_FLAG (newrtx, call) = true;
+
+ if (RTX_FLAG (oldrtx, return_val))
+ RTX_FLAG (newrtx, return_val) = true;
+}
+
+/* Iterate through the insn notes looking for 'kind'. If
+ found replace the register rtx with the new rtx. */
+
+static void
+update_notes (enum reg_note kind, rtx insn, rtx reg, rtx new_reg)
+{
+ rtx link;
+ for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
+ if (REG_NOTE_KIND (link) == kind)
+ {
+ rtx op0 = XEXP (link, 0);
+ if (kind == REG_DEAD)
+ if (REG_P (op0) && op0 == reg)
+ XEXP (link, 0) = new_reg;
+ }
+}
+
+
+
+#if EXTELIM_DUPLICATE_EXTS_AT_USES
+/* Insert a duplicate sign extension at the use point.
+ Add a flag indicating this extension is algorithmically
+ added. Since the "inserted" extensions have the form
+ regX = sign_extend (subreg:mm (reg:MM regX), offset),
+ they can simply be deleted if they are redundant since we
+ are at a reaching use of the original definition. We also
+ mark the use insn where the insert occurs so we don't insert
+ the same extension from another def at this use. */
+
+static void
+insert_duplicate_ext_at_use (rtx ext_insn, rtx use_insn)
+{
+ rtx ext = PATTERN (ext_insn), ext_src, ext_dest;
+ rtx new_ext_src_inner, new_ext_src_outer, new_ext_part;
+ rtx new_ext_dest, new_ext_insn;
+ extelim_uid_t new_uid;
+ df_ref *p_df_uses;
+ unsigned int ext_dest_regno;
+ enum machine_mode inner_mode;
+ bool sign_extend_p =
+ GET_CODE (SET_SRC (PATTERN (ext_insn))) == SIGN_EXTEND ? true : false;
+
+ /* This new extension must be of the form:
+ set (reg:MM X (sign_extend:MM (subreg:mm (reg:MM X))))
+ where mm is smaller than MM. */
+ ext_dest = SET_DEST (ext);
+ ext_src = SET_SRC (ext);
+
+ gcc_assert (REG_P (register_exp (ext_dest)));
+
+ /* A copy of the extend destination register to a new virtual register */
+ new_ext_dest = gen_reg_rtx (GET_MODE (ext_dest));
+ /* A copy of the extend source (same reg as dest), REG_P */
+ new_ext_src_inner = copy_rtx (ext_dest);
+ /* Get inner mode, either mm for SUBREG:mm (REG:MM) or MM for (REG:MM) */
+ if (GET_CODE (XEXP (ext_src, 0)) == SUBREG)
+ inner_mode = GET_MODE (XEXP (ext_src, 0));
+ else if (REG_P (XEXP (ext_src, 0)))
+ inner_mode = GET_MODE (XEXP (ext_src, 0));
+ else
+ /* Can't determine sign_extend operand */
+ gcc_unreachable ();
+
+ /* Make a subreg rtx */
+ new_ext_src_outer = gen_lowpart_SUBREG (inner_mode, new_ext_src_inner);
+ /* Make a sign/zero extend insn */
+ new_ext_part = sign_extend_p
+ ? gen_rtx_SIGN_EXTEND (GET_MODE (ext_dest), new_ext_src_outer)
+ : gen_rtx_ZERO_EXTEND (GET_MODE (ext_dest), new_ext_src_outer);
+ /* (set (new:MM (sign_extend:MM (subreg:mm (reg:MM ext_dest))))) */
+ new_ext_insn = gen_rtx_SET (VOIDmode, new_ext_dest, new_ext_part);
+
+ /* Now update the use */
+ /* Operands used by this the use_insn */
+ ext_dest_regno = REGNO (register_exp (ext_dest));
+ for (p_df_uses = DF_INSN_UID_USES (INSN_UID (use_insn)); *p_df_uses;
+ p_df_uses++)
+ {
+ if (DF_REF_REGNO (*p_df_uses) == ext_dest_regno)
+ {
+ rtx use_reg = DF_REF_REG (*p_df_uses);
+
+ /* Replace the register use in use_insn with the new register. If the use
+ is a subreg pattern, replace the innermost reg. */
+ replace_rtx (PATTERN (use_insn), register_exp (use_reg),
+ new_ext_dest);
+ /* Update flags on new dest reg */
+ copy_flags (register_exp (use_reg), new_ext_dest);
+ /* Update any notes associated with use reg and use_insn */
+ update_notes (REG_DEAD, use_insn, register_exp (use_reg), new_ext_dest);
+ /* DF info must be updated since existing insn is changed */
+ df_insn_rescan (use_insn);
+ }
+ }
+
+ new_uid = extelim_emit_before (new_ext_insn, use_insn);
+ insn_flag_set (EXTELIM_INSERTED, new_uid);
+}
+
+/* Allow the duplication of the extension even if the extension
+ and the duplication use are in the same block. */
+
+static bool
+allow_same_block_duplication_p (rtx ext_insn, rtx use_insn)
+{
+ rtx ext = PATTERN (ext_insn);
+ rtx use = PATTERN (use_insn);
+
+ if (GET_CODE (SET_SRC (use)) == ASHIFT && GET_CODE (SET_SRC (ext)) == ZERO_EXTEND)
+ return true;
+ return false;
+}
+
+/* Determine if the extension should be duplicated at this use point.
+ Return true if yes, false otherwise. */
+
+static bool
+save_ext_use_p (ext_record_t extrec, rtx use_insn)
+{
+ rtx ext_insn, ext, ext_dest, use = PATTERN (use_insn), use_src;
+ df_ref df_use;
+
+ ext_insn = extrec->ext;
+ ext = PATTERN (ext_insn);
+ ext_dest = SET_DEST (ext);
+
+ if (GET_CODE (use) != SET)
+ {
+ if (dump_file)
+ fprintf (dump_file, " no -- use is not a SET code\n");
+ return false;
+ }
+
+ /* Check for obviousness */
+ /* 1. The use is only reached by the a single definition of the extension.
+ Otherwise, it wouldn't be legal to insert a duplicate extension
+ as other defs reaching this use may not need it. Certainly not all
+ other defs may reach here, but this is the conservative approximation.
+ Found in nof/muldf3.c */
+ df_use = df_find_use (use_insn, ext_dest);
+ if ( df_use && DF_REF_CHAIN (df_use)->next)
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ " no -- there are multiple definitions of reg=%d reaching this use\n",
+ (REGNO (register_exp (ext_dest))));
+ return false;
+ }
+
+ /* 2. The extension and use are in the same block. Since
+ this is a reached use, it's obvious we don't need another
+ extension. The exception is this -- we are trying to set
+ up a specific extension,insn pattern that will be recognized
+ by the insn selector. This pattern will also be ignored when
+ the next extension candidate list is created in the next pass. */
+ if (INSN_P (ext_insn) && INSN_P (use_insn))
+ {
+ if (BLOCK_FOR_INSN (ext_insn) == BLOCK_FOR_INSN (use_insn))
+ {
+ if (allow_same_block_duplication_p (ext_insn, use_insn))
+ ;
+ else
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ " no -- ext and use are in the same block\n");
+ return false;
+ }
+ }
+ }
+
+ /* 3. The use is a sign extension of the extension destination reg */
+ use_src = SET_SRC (use);
+ if (GET_CODE (use_src) == SIGN_EXTEND
+ && REG_P (register_exp (XEXP (use_src, 0)))
+ && REG_P (register_exp (ext_dest)))
+ if (GET_MODE (use_src) == GET_MODE (ext_dest)
+ && REGNO (register_exp (XEXP (use_src, 0))) ==
+ REGNO (register_exp (ext_dest)))
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ " no -- the use is a sign extension of reg=%d\n",
+ REGNO (register_exp (XEXP (use_src, 0))));
+ return false;
+ }
+
+ /* 4. The use already has an extension inserted and one of the use's operands
+ is a register matching the reaching definition. So don't reinsert the same
+ extension. */
+ if (insn_flag_p (EXTELIM_INSERTED_FOR, INSN_UID (use_insn)))
+ {
+ df_ref *p_df_uses;
+ /* Operands used by this the use_insn */
+ for (p_df_uses = DF_INSN_UID_USES (INSN_UID (use_insn)); *p_df_uses;
+ p_df_uses++)
+ {
+ if (REG_P (register_exp (ext_dest)) &&
+ DF_REF_REGNO (*p_df_uses) == REGNO (register_exp (ext_dest)))
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ " no -- this use is marked for sign extension insertion already\n");
+ return false;
+ }
+ }
+ }
+
+ /* 5. There is also a definition of the ext dest register at this use (as can occur in self assignment). */
+ if (register_exp (SET_DEST (use)) && REG_P (ext_dest)
+ && REGNO (register_exp (SET_DEST (use))) == REGNO (ext_dest))
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ " no -- this use also assigns the used register\n");
+ return false;
+ }
+
+
+ if (dump_file)
+ fprintf (dump_file, " yes\n");
+ return true;
+}
+
+/* Save the use insn in the extension records list of
+ uses. At the next phase, we will duplicate the extension
+ at these use points. */
+
+static void
+save_ext_use (ext_record_t extrec, rtx use_insn)
+{
+ /* Mark the use insn, it will have a duplicate inserted */
+ insn_flag_set (EXTELIM_INSERTED_FOR, INSN_UID (use_insn));
+ /* Save use to the list of uses to be duplicated for this extension. */
+ VEC_safe_push (rtx, heap, extrec->ext_uses, use_insn);
+}
+
+
+/* Save the qualified use of an extension to a list */
+
+static void
+gather_ext_uses_info (ext_record_t extrec)
+{
+ rtx ext;
+ df_ref *ext_def, df_use;
+ unsigned int def_use_count = 1;
+ extelim_uid_t uid;
+ struct df_link *link;
+
+ gcc_assert (extrec != NULL);
+ ext = extrec->ext;
+ uid = INSN_UID (ext);
+
+ /* Insn level defs of the sign extension */
+ ext_def = DF_INSN_UID_DEFS (uid);
+ /* There is only one def in a sign extension insn */
+ gcc_assert (*(ext_def + 1) == NULL);
+
+ /* Iterate over the reached uses of extension destination register.
+ Duplicate the extension at the use point. */
+ for (link = DF_REF_CHAIN (*ext_def); link; link = link->next)
+ {
+ rtx insn_use;
+ df_use = link->ref;
+ if (!df_use)
+ continue;
+ /* Link must be a USE of the DEF */
+ if (!DF_REF_REG_USE_P (df_use))
+ continue;
+ /* Ignore ARTIFICIAL USES */
+ if (DF_REF_IS_ARTIFICIAL (df_use))
+ continue;
+ insn_use = DF_REF_INSN (df_use);
+
+ /* Don't consider debug_insns */
+ if (!NONDEBUG_INSN_P (insn_use))
+ continue;
+
+ if (dump_file)
+ fprintf (dump_file,
+ " use #%d duplicate ext of reg=%d at uid=%u?\n",
+ def_use_count, DF_REF_REGNO (*ext_def), INSN_UID (insn_use));
+ if (save_ext_use_p (extrec, insn_use))
+ save_ext_use (extrec, insn_use);
+ def_use_count++;
+ }
+}
+
+/* At each use point of the sign extension, unless the
+ use is obviously already sign extended, insert a
+ sign extension insn before the use. We do this in two
+ passes to avoid confusing the dataflow information. */
+
+static void
+duplicate_exts_at_uses (void)
+{
+ unsigned i, j;
+ ext_record_t extrec;
+ rtx use_insn;
+
+ /* Get the uses where the extensions will be duplicated */
+ FOR_EACH_VEC_ELT (ext_record_t, extensions, i, extrec)
+ {
+ if (dump_file)
+ fprintf (dump_file, "gathering extension uid=%u use information\n",
+ INSN_UID (extrec->ext));
+ gather_ext_uses_info (extrec);
+ }
+
+ /* Now duplicate the extensions at the appropriate use points */
+ FOR_EACH_VEC_ELT (ext_record_t, extensions, i, extrec)
+ {
+ if (dump_file)
+ fprintf (dump_file, "extension uid=%u\n", INSN_UID (extrec->ext));
+
+ FOR_EACH_VEC_ELT (rtx, extrec->ext_uses, j, use_insn)
+ {
+ if (dump_file)
+ fprintf (dump_file, " duplicated at use uid=%u\n",
+ INSN_UID (use_insn));
+ insert_duplicate_ext_at_use (extrec->ext, use_insn);
+ }
+ }
+}
+#endif /* EXTELIM_DUPLICATE_EXTS_AT_USES */
+
+/* Determine if an instruction is a return insn */
+
+static rtx
+return_p (rtx rtn_insn)
+{
+ rtx rtn = PATTERN (rtn_insn), dest;
+ int i;
+
+ if (GET_CODE (rtn) != SET)
+ return false;
+
+ dest = SET_DEST (rtn);
+
+ /* Is a return value? */
+ if ((REG_P (dest) || GET_CODE (dest) == PARALLEL) &&
+ REG_FUNCTION_VALUE_P (dest))
+ {
+ /* Simple SET, return the insn */
+ if (REG_P (dest))
+ return rtn_insn;
+ /* PARALLEL, find the embedded rtx */
+ if (GET_CODE (dest) == PARALLEL)
+ for (i = XVECLEN (rtn_insn, 0) - 1; i >= 0; i--)
+ {
+ rtx body = XVECEXP (rtn_insn, 0, i);
+ if (GET_CODE (body) == SET)
+ {
+ dest = SET_DEST (body);
+ if (REG_FUNCTION_VALUE_P (dest))
+ return body;
+ }
+ }
+ }
+ /* Not a return */
+ return NULL;
+}
+
+/* Find all return RTLs in the function and save them in
+ a list. */
+
+static bool
+find_returns (void)
+{
+ basic_block bb;
+ rtx insn, rtn_insn;
+ bool found = false;
+
+ /* For all insns */
+ FOR_EACH_BB (bb)
+ {
+ FOR_BB_INSNS (bb, insn)
+ {
+ if (!NONDEBUG_INSN_P (insn))
+ continue;
+
+ if ((rtn_insn = return_p (insn)) == NULL)
+ {
+ continue;
+ }
+ if (dump_file)
+ fprintf (dump_file, " found return at uid=%u\n", INSN_UID (rtn_insn));
+
+ VEC_safe_push (rtx, heap, returns, rtn_insn);
+ found = true;
+ }
+ }
+
+ return (found);
+}
+
+/* Get the signedness and machine mode of the function */
+
+static bool
+get_return_info (bool * signed_p, enum machine_mode *return_mode)
+{
+ tree rtninfo;
+
+ if ((rtninfo = DECL_RESULT (current_function_decl)) != NULL)
+ {
+ *signed_p = !TYPE_UNSIGNED (TREE_TYPE (rtninfo));
+ *return_mode = DECL_MODE (rtninfo);
+ return true;
+ }
+ return false;
+}
+
+/* If the dest mode of the return is larger than
+ the function return mode, we can subreg the return
+ insn to the return mode and extend to the destination.
+ E.g. unsigned, return mode: HImode
+ set (reg/i:DI Y) (reg:DI X)
+ becomes
+ set (reg:DI new) (zero_extend:DI (subreg:HI (reg:DI X)))
+ set (reg/i:DI Y) (reg:DI new) */
+
+static void
+make_ext_at_rtn (rtx rtn_insn, bool fun_signed_p, enum machine_mode fun_mode)
+{
+ rtx rtn = PATTERN (rtn_insn);
+ rtx dest, src, new_ext_dest, new_ext_src, new_ext_outer, new_ext_part,
+ new_ext_insn;
+ extelim_uid_t new_uid;
+ gcc_assert (GET_CODE (rtn) == SET);
+
+ dest = SET_DEST (rtn);
+ src = SET_SRC (rtn);
+
+ /* Deal with scalar rtn values only */
+ if (fun_mode != DImode
+ && fun_mode != SImode && fun_mode != HImode && fun_mode != QImode)
+ {
+ if (dump_file)
+ fprintf (dump_file, "failed-- not scalar return mode\n");
+ return;
+ }
+
+ /* Dest and src have to have the same mode. This should always be
+ true for well formed rtl, but we check anyway. */
+ if (GET_MODE (dest) != GET_MODE (src))
+ {
+ if (dump_file)
+ fprintf (dump_file, "failed-- dest and src modes differ\n");
+ return;
+ }
+
+ /* Also check that we are dealing with simple regs here. */
+ if (!REG_P (dest) || !REG_P (src))
+ {
+ if (dump_file)
+ fprintf (dump_file, "failed-- dest or src is not REG_P\n");
+ return;
+ }
+
+ /* The return reg mode should never be smaller than fun return mode. If the
+ same size, however, we can't subreg either, so return */
+ if (GET_MODE_BITSIZE (GET_MODE (dest)) <= GET_MODE_BITSIZE (fun_mode))
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "failed-- dest size mode is smaller or equal to function mode size\n");
+ return;
+ }
+
+ /* From here we should be able to build a subreg since the function return mode
+ size is smaller than the return register mode size */
+ new_ext_dest = gen_reg_rtx (GET_MODE (src)); /* set (reg:MM new) */
+ new_ext_src = copy_rtx (src); /* copy of X, copyX */
+ new_ext_outer = gen_lowpart_SUBREG (fun_mode, new_ext_src); /* subreg:mm (reg:MM copyX) */
+ new_ext_part = fun_signed_p /* extend:MM (subreg:mm (reg:MM copyX)) */
+ ? gen_rtx_SIGN_EXTEND (GET_MODE (src), new_ext_outer)
+ : gen_rtx_ZERO_EXTEND (GET_MODE (src), new_ext_outer);
+ /* Put it together */
+ new_ext_insn = gen_rtx_SET (VOIDmode, new_ext_dest, new_ext_part);
+
+ /* Modify src of return insn to use new pseudo */
+ replace_rtx (PATTERN (rtn_insn), src, new_ext_dest);
+ /* Update flags on new dest reg */
+ copy_flags (src, new_ext_dest);
+ /* Update any notes associated with replaced register */
+ update_notes (REG_DEAD, rtn_insn, src, new_ext_dest);
+ /* Rescan the modified insn */
+ df_insn_rescan (rtn_insn);
+ /* Insert the new insn */
+ new_uid = extelim_emit_before (new_ext_insn, rtn_insn);
+
+ if (dump_file)
+ fprintf (dump_file, "success\n");
+}
+
+/* Insert extensions at return points. Scan the RTL
+ for the return statements. Determine if the RTL
+ can be modified to insert an extension. Modify the
+ return to insert the extension. */
+
+static void
+insert_ext_at_returns (void)
+{
+ bool signed_p;
+ enum machine_mode return_mode;
+ rtx rtn_insn;
+ int i;
+
+ /* Generate list of return rtls for the function */
+ if (dump_file)
+ fprintf (dump_file, "gathering return insns...\n");
+
+ if (!find_returns ())
+ return;
+
+ if (!get_return_info (&signed_p, &return_mode))
+ return;
+
+ /* For each return instruction, generate a sign/zero extend
+ if the current return size is larger than the function
+ return mode. */
+ FOR_EACH_VEC_ELT (rtx, returns, i, rtn_insn)
+ {
+ if (dump_file)
+ fprintf (dump_file, " making extension at return uid=%u...",
+ INSN_UID (rtn_insn));
+ make_ext_at_rtn (rtn_insn, signed_p, return_mode);
+ }
+}
+
+/* Compare two extension records by loop depth.
+ Used by VEC_qsort to sort the order in which extensions
+ are processed. */
+
+static int
+ext_record_compare (const void *p_er1, const void *p_er2)
+{
+ const ext_record_t er1 = *(const ext_record_t *) p_er1;
+ const ext_record_t er2 = *(const ext_record_t *) p_er2;
+ basic_block bb1, bb2;
+ rtx ext1, ext2;
+
+ if (er1 == er2)
+ return 0;
+
+ ext1 = er1->ext;
+ ext2 = er2->ext;
+
+ bb1 = BLOCK_FOR_INSN (ext1);
+ bb2 = BLOCK_FOR_INSN (ext2);
+
+ /* Sort high to low */
+ return (bb2->loop_depth - bb1->loop_depth);
+}
+
+/* The main interface to this optimization. */
+
+static void
+extension_elimination (void)
+{
+ ext_record_t ext;
+ unsigned i;
+
+ init_pass ();
+
+ /* Find initial sign extension candidates */
+ if (!find_extensions ())
+ {
+ finish_pass ();
+ return;
+ }
+
+ /* Insert sign extension at return points in
+ the function. */
+ insert_ext_at_returns ();
+
+ /* Duplicate the sign extensions at their use
+ points unless the use is already obviously sign
+ extended or extension is already added. */
+#if EXTELIM_DUPLICATE_EXTS_AT_USES
+ duplicate_exts_at_uses ();
+#endif
+
+ /* Update DF information since now have new insns. */
+ df_finish_pass (true);
+ df_chain_add_problem (DF_DU_CHAIN + DF_UD_CHAIN);
+ df_analyze ();
+
+#if EXTELIM_DF_DUMP
+ if (dump_file)
+ df_dump (dump_file);
+#endif
+
+ /* Init statistics */
+ num_cand = 0;
+ num_cand_ignored = 0;
+ num_cand_transformed = 0;
+
+ /* Free old extensions list, generate new one that includes
+ the new extensions. */
+ free_extensions ();
+
+ if (!find_extensions ())
+ {
+ finish_pass ();
+ return;
+ }
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nRTL After Extension Duplication\n");
+ print_rtl (dump_file, get_insns ());
+ }
+
+ if (dump_file)
+ fprintf (dump_file, "Begin extension elimination analysis\n");
+
+ /* Sort the extensions by loop depth. We want to try to eliminate
+ those in innermost loops (highest loop depth) first. */
+ VEC_qsort (ext_record_t, extensions, ext_record_compare);
+
+ /* Iterate through extension worklist */
+ FOR_EACH_VEC_ELT (ext_record_t, extensions, i, ext)
+ {
+ rtx ext_insn = ext->ext;
+ rtx ext_src = SET_SRC (PATTERN (ext_insn));
+ const char *ext_name =
+ GET_CODE (ext_src) == SIGN_EXTEND ? "sign" : "zero";
+ const char *inserted =
+ insn_flag_p (EXTELIM_INSERTED, INSN_UID (ext_insn)) ? "inserted" : "";
+ extelim_uid_t uid = INSN_UID (ext_insn);
+
+ if (dump_file)
+ fprintf (dump_file,
+ " (loop_depth=%d)\n",
+ inserted, ext_name, uid,
+ BLOCK_FOR_INSN (ext_insn)->loop_depth);
+
+ current_ext_record = ext;
+ eliminate_one_extend (ext->ext);
+ }
+
+ if (dump_file)
+ fprintf (dump_file, "Begin extension elimination transformations\n");
+
+ replace_ext_with_copy ();
+
+ if (dump_file)
+ fprintf (dump_file, "\nRTL After Extension Elimination\n");
+
+ finish_pass ();
+
+ /* Print statistics */
+ if (dump_file)
+ {
+ fprintf (dump_file,
+ "Number of extensions ignored: %d (of %d candidiates)\nDETECTION EFFECTIVENESS: %f%%\n",
+ num_cand_ignored, num_cand,
+ ((float) (num_cand - num_cand_ignored) / (float) num_cand) *
+ 100);
+ fprintf (dump_file,
+ "Number of extensions converted to copy: %d (of %d candidiates)\nCONVERSION EFFECTIVENESS: %f%%\n",
+ num_cand_transformed, num_cand,
+ ((float) num_cand_transformed / (float) num_cand) * 100);
+ }
+}
+
+/* Remove redundant extensions. */
+
+static unsigned int
+rest_of_handle_extelim (void)
+{
+ extension_elimination ();
+ return 0;
+}
+
+/* Run extelim pass when flag_extelim is set at optimization level > 0. */
+
+static bool
+gate_handle_extelim (void)
+{
+ return (optimize > 0 && flag_extelim);
+}
+
+struct rtl_opt_pass pass_rtl_extelim = {
+ {
+ RTL_PASS,
+ "extelim", /* name */
+ gate_handle_extelim, /* gate */
+ rest_of_handle_extelim, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ TV_EXTELIM, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_ggc_collect | TODO_dump_func | TODO_df_finish | TODO_verify_rtl_sharing, /* todo_flags_finish */
+ }
+};