fstrict-aliasing

本文轉載自查看原文 2013-07-19 22:51 3841 C_language

承如“optimization blocks”文中所述，由於相同的指針可能指向相關的內存區，因此編譯器將不做過分的優化……

特意搜了下編譯器在不同的優化等級下都有哪些默認優化，因此有了此記錄（比較長，盡管只講述了fstrict-aliasing ……）

下述內容引述自：http://dbp-consulting.com/tutorials/StrictAliasing.html

http://cellperformance.beyond3d.com/articles/2006/06/understanding-strict-aliasing.html

http://stackoverflow.com/questions/98650/what-is-the-strict-aliasing-rule

gcc -v
        gcc version 4.4.5 (Ubuntu/Linaro 4.4.4-14ubuntu5)
arm-none-arm-none-linux-gnueabi-gcc -v
        gcc version 4.3.2 (Sourcery G++ Lite 2008q3-72)

首先看下在不同優化等級下gcc的默認優化參數(此處重點關注fstrict-aliasing  在O0（默認優化等級），O2（一般使用時的優化等級）
下的開啟情況）
gcc -Q -O0 --help=optimizers
    -fstrict-aliasing                   [disabled]
arm-gcc -Q -O0 --help=optimizers
    -fstrict-aliasing                   [disabled]
    
gcc -Q -O2 --help=optimzers
    -fstrict-aliasing                   [enabled]
    
arm-gcc -Q -O0 --help=optimizers
    -fstrict-aliasing                   [enabled]

在O0等級下，無論是否顯式的添加fstrict-aliasing，fstrict-aliasing都被禁用
在O2等級下，默認開啟fstrict-aliasing，可以顯式的添加fno-strict-aliasing以禁用fastrict-aliasing

先看下程序和運行結果（分arm與x86）

示例file1.c、
unsigned int swap_words(unsigned int arg)
{
    unsigned short int* const sp = (unsigned short int*)&arg;
    unsigned short int lo = sp[0];
    unsigned short int hi = sp[1];

    sp[0] = hi;
    sp[1] = lo;
    return arg;
}

int main(void)
{
    int x = 0x12345678;
    x = swap_words(x);
    printf("%08x\n", x);
    return 0;
}
gcc -m32 file1.c -o mytest : 56781234
swap_words:
    pushl   %ebp
    movl    %esp,      %ebp
    subl    $16,       %esp
    leal    8(%ebp),   %eax
    movl    %eax,      -8(%ebp)   ;sp
    movl    -8(%ebp),  %eax
    movzwl  (%eax),    %eax       ;arg    
    movw    %ax,       -2(%ebp)   ;arg(l) 
    movl    -8(%ebp),  %eax
    addl    $2,        %eax
    movzwl  (%eax),    %eax
    movw    %ax,       -4(%ebp)   ;arg(h) 
    movl    -8(%ebp),  %eax
    movzwl  -4(%ebp),  %edx
    movw    %dx,       (%eax)     ;arg: arg(h)arg(h)
    movl    -8(%ebp),  %eax
    leal    2(%eax),   %edx
    movzwl  -2(%ebp),  %eax      
    movw    %ax,       (%edx)     ;arg: arg(l)arg(h)
    movl    8(%ebp),   %eax
    leave   
    ret

gcc -m32 -O2 file1.c -o mytest: 12345678
swap_words:
    pushl   %ebp
    movl    %esp,      %ebp
    movzwl  8(%ebp),   %eax      ;arg    
    movzwl  10(%ebp),  %edx      ;arg(h) 
    movw    %ax,       10(%ebp)  ;arg(l)arg(l)
    movl    8(%ebp),   %eax      ;arg(l)arg(l)
    movw    %dx,       8(%ebp)   ;arg(l)arg(h)
    popl    %ebp    
    ret
盡管在swap_words中正確修改了調用者的棧上的值，但是在main中更本就不調用swap_words，
eax的值與我們的原意也不同

gcc -m32 -fno-strict-aliasing -O2 file1.c -o mytest :56781234
swap_words:
    pushl   %ebp
    movl    %esp,      %ebp
    movzwl  8(%ebp),   %eax
    movzwl  10(%ebp),  %edx
    movw    %ax,       10(%ebp)
    movw    %dx,       8(%ebp) 
    movl    8(%ebp),   %eax
    popl    %ebp    
    ret
正確計算了返回值

arm-gcc file1.c -o mytest: 56781234
swap_words:
    str     fp, [sp, #-4]!
    add     fp, sp, #0
    sub     sp, sp, #20 
    str     r0, [fp, #-16]      ;arg
    sub     r3, fp, #16        
    str     r3, [fp, #-12]      ;sp  
    ldr     r3, [fp, #-12]
    ldrh    r3, [r3, #0]        
    strh    r3, [fp, #-8]       ;arg[l]
    ldr     r3, [fp, #-12]
    add     r3, r3, #2
    ldrh    r3, [r3, #0]     
    strh    r3, [fp, #-6]       ;arg[h]
    ldr     r2, [fp, #-12]
    ldrh    r3, [fp, #-6]    
    strh    r3, [r2, #0]        ;arg:arg(l)arg(l)
    ldr     r3, [fp, #-12]
    add     r2, r3, #2
    ldrh    r3, [fp, #-8]    
    strh    r3, [r2, #0]        ;arg:arg(l)arg(h)
    ldr     r3, [fp, #-16]
    mov     r0, r3  
    add     sp, fp, #0
    ldmfd   sp!, {fp}
    bx      lr 

arm-gcc file1.c -O2 -o mytest: 12345678
swap_words:
    sub sp, sp, #8
    str r0, [sp, #4]
    add sp, sp, #8
    bx  lr  
因為實施了fstrict-aliasing優化，因此，swap_words中做了非常大的優化，以至於不做任何計算，直接返回r0

arm-gcc file1.c -O2 -fno-strict-aliasing -o mytest: 56781234
swap_words:
    sub sp, sp, #8
    str r0, [sp, #4]
    ldrh    r3, [sp, #4]
    ldrh    r2, [sp, #6]
    strh    r3, [sp, #6]    @ movhi   
    ldr r0, [sp, #4]
    add sp, sp, #8
    bx  lr  
當我們加上fno-strict-aliasing參數時，swap_words正確計算了返回值，盡管file1.c違背了strict-aliasing規則

先以如上實例闡述一些概念

通過以上的對比分析，我們看到當加入有效的fstrict-aliasing時，編譯器做了大量的優化，由於我們的程序違背了strict-aliasing規則，但是這是由於我們程序員自身造成的，與編譯器本身的優化無關。那么，如果我們既想編譯器做出大量的優化，同時對於違背strict-aliasing規則的程序發出警告，該怎么辦了？？？？加入-Wstrict-aliasing編譯參.
例如：
arm-none-linux-gnueabi-gcc -O2 -Wstrict-aliasing file1.c -o mytest，會有如下輸出：
warning: likely type-punning may break strict-aliasing rules
當我們遇到這類輸出時一定要注意（要么我們就使用fno-strict-aliasing參數，例如內核,使用fno-strict-aliasing后只是在某些部分沒有做優化，但不會導致編譯出的程序與我們的原意相反）

關於c/c++中strict aliasing記錄（重點是c）
aliasing is when more than one lvalue refers to the same memory location.

strict aliasing is an assumption, made by the C (or C++) compiler, that dereferencing pointers to objects of different types will never refer to the same memory location. Enable this option in GCC with -fstrict-aliasing flag. Be sure that all code can safely run with this rule enabled. Enable strict aliasing related warnings with -Wstrict-aliasing, but do not expect to be warned in all cases. In order to discover aliasing problems as quickly as possible, -fstrict-aliasing should always be included in the compilation flags for GCC. Otherwise problems may only be visible at the highest optimization levels where it is the most difficult to debug（如果我們使用參數-fno-strict-warning，有些優化將不被實施，而且對於一些潛在的轉換錯誤，編譯器不會報出）

the compiler writers know what the strict aliasing rules are for. They are written to let compiler writers known when they can safely assume that a change made through one variable won't affect the value of another variable.
Conversely, the compiler must act very conservatively to access the memory when the compiler cannot assume that two object are not aliased.

在編譯選項中加入-fstrict-aliasing的優勢在於向編譯器說明不同類型的lvalue將指向不相關的內存區域，編譯器可以做大量的優化。在編譯內核的編譯選項CFLAGS中，加入了-fno-strict-aliasing，向編譯器表明不同類型的lvalue可能指向相關的內存區域，因此編譯器不會做出一些極端的優化而造成不安全（內核編譯中優化選項為-O2, -O2優化時默認是-fstrict-aliasing，因此需要顯
式的指出編譯參數是-fno-strict-aliasing）

1、編譯器設計者知道一些左值量會指向相關的內存區域，因此不會做出一些不安全的優化（即使編譯選項顯式的指出為-fstrict-aliasing,也不會做極端的優化，因為這些類型的左值量有可能指向相關的內存的區域）；
2、當我們給編譯器加入-fstrict-aliasing選項時，就意味着使用編譯器的程序員清晰地向編譯器表明：不同類型的左值量不會指向相關的內存區域，編譯器可以做出大量的優化，由此產生的不良后果由程序員負責，而非編譯器設計者負責;
   當使用-fstrict-aliasing時，有如下基本示例表明不同類型的左值將不會指向相關的內存區域：
   (1)pointers to different built in types do not alias
   (2)pointers to aggregate or union types with differing tags do not alias
   (3)pointers to aggregate or union types which differ only by name may alias

在 1 中提到:一些左值量可能指向相關的內存區域，那么都有哪些可能呢？，如下：
    An object shall have its stored value accessed only by an lvalue expression that has one of the following types:
    (1)— a type compatible with the effective type of the object,
    (2)— a qualified version of a type compatible with the effective type of the object,
    (3)— a type that is the signed or unsigned type corresponding to the effective type of the object,
    (4)— a type that is the signed or unsigned type corresponding to a qualified version of the effective type of the object,
    (5)— an aggregate or union type that includes one of the aforementioned types among its members (including, recursively, a member of a subaggregate or contained union), or
    (6)— a character type.
即：
    (1)Things that are compatible types or differ only by the addition of any combination of signed, unsigned, or volatile. For most purposes compatible type just means the same type. If you want more details you can read the specs. (Example: If you get a pointer to long, and a pointer to const unsigned long they could point to the same thing.)
    (2)An aggregate (struct or class) or union type can alias types contained inside them. (Example: If a function gets passed a pointer to an int, and a pointer to a struct or union containing an int, or possibly containing another struct or union containing an int, or containing...ad infinitum, it's possible that the int* points to an int contained inside the struct or union pointed at by the other pointer.)
    (3)A character type. A char*, signed char*, or unsigned char* is specifically allowed by the specs to point to anything. That means it can alias anything in memory.
    (4)For C++ only, a possibly CV (const and/or volatile) qualified base class type of a dynamic type can alias the child type. (Example: if class dog has class animal for a base class, pointers or references to class dog and class animal can alias.)
    (5)Any lvalue has to be assumed to possibly alias to another lvalue if these rules say that they can alias. An aliasing issue is just as likely to come up with values passed by reference as it is with values passed as pointer to values. Additionally any combination of pointers and references have a possibility of aliasing.

看完上面的內容，我們來測試下：

示例file2.c, 此實例驗證了上述的第二點（2）

typedef struct
{
    unsigned int a;
    unsigned int b;
    unsigned int c;
}Sample;

void test(unsigned int* values, 
          Sample* uniform, 
          unsigned int count)
{
    unsigned int i = 0;
    for(i = 0; i < count; i++)
    {
        values[i] = (unsigned int)uniform->b;
    }
}

gcc file2.c -o mytest

test:
    pushl   %ebp
    movl    %esp,         %ebp
    subl    $16,          %esp
    movl    $0,           -4(%ebp)       ;i      
    movl    $0,           -4(%ebp)
    jmp .L2 
.L3:
    movl    -4(%ebp),     %eax     ;i
    sall    $2,           %eax
    addl    8(%ebp),      %eax     ;values + 4 * i 
    movl    -4(%ebp),     %edx     ;i
    sall    $2,           %edx
    addl    8(%ebp),      %edx     ;values + 4 * i 
    movl    (%edx),       %ecx
    movl    12(%ebp),     %edx
    movl    4(%edx),      %edx     ;每次重新取得uniform->b
    leal    (%ecx,%edx),  %edx    
    movl    %edx,         (%eax)
    addl    $1,           -4(%ebp)
.L2:
    movl    -4(%ebp),     %eax     ;i      
    cmpl    16(%ebp),     %eax     ;i-count
    jb  .L3 
    leave   
    ret


gcc file2.c -O2 -o mytest
test:
    pushl   %ebp
    movl    %esp,        %ebp
    pushl   %esi
    movl    8(%ebp),     %edx
    pushl   %ebx
    movl    16(%ebp),    %ebx
    movl    12(%ebp),    %esi
    testl   %ebx,        %ebx
    je  .L4 
    xorl    %eax,        %eax
.L3:
    movl    4(%esi),     %ecx   ;每次重新取得uniform->b
    addl    %ecx, (%edx,%eax,4)
    addl    $1,          %eax
    cmpl    %eax,        %ebx
    ja  .L3 
.L4:
    popl    %ebx    
    popl    %esi    
    popl    %ebp    
    ret


arm-gcc file2.c -O2 -o mytest:
test:
    stmfd   sp!, {r4, r5}  
    subs    r5, r2, #0
    mov     r4, r1
    beq     .L4 
    mov     r1, #0
    mov     ip, r1
.L3:
    ldr     r3, [r0, ip]
    ldr     r2, [r4, #4]   ;每次重新取得uniform->b
    add     r1, r1, #1
    add     r3, r3, r2
    cmp     r5, r1
    str     r3, [r0, ip]
    add     ip, ip, #4
    bhi     .L3 
.L4:
    ldmfd   sp!, {r4, r5}
    bx      lr

=========================================================================

The most commonly accepted method of converting one type of object to another is by
using a union type;
實例file3.c, 
typedef union
{
        unsigned int u32;
        unsigned short int u16[2]; 
}U32;

unsigned int swap_words(unsigned int arg)
{
    U32 in; 
    unsigned short int lo; 
    unsigned short int hi; 

    in.u32 = arg;
    lo = in.u16[0];
    hi = in.u16[1];

    in.u16[0] = hi;
    in.u16[1] = lo;

    return in.u32;
}

gcc file3.c -S -o file3.s
swap_words:
pushl   %ebp
movl    %esp,      %ebp
subl    $16,       %esp
movl    8(%ebp),   %eax       ;arg
movl    %eax,      -8(%ebp)
movzwl  -8(%ebp),  %eax       ;arg
movw    %ax,       -2(%ebp)   ;arg(l) 
movzwl  -6(%ebp),  %eax       ;arg(h) 
movw    %ax,       -4(%ebp)   ;arg(h) 
movzwl  -4(%ebp),  %eax
movw    %ax,       -8(%ebp)   ;arg(h)arg(h)
movzwl  -2(%ebp),  %eax
movw    %ax,       -6(%ebp)   ;arg(l)arg(h)
movl    -8(%ebp),  %eax
leave   
ret


gcc file3.c -O2 -S -o file3.s
既做了優化，有沒有錯誤,驗證上述第二點
swap_words:
    pushl   %ebp
    movl    %esp,    %ebp
    movl    8(%ebp), %eax
    popl    %ebp    
    roll    $16,     %eax
    ret

arm-gcc file3.c -S -o file3.s
swap_words:
    str     fp, [sp, #-4]!
    add     fp, sp, #0
    sub     sp, sp, #20 
    str     r0, [fp, #-16]      ;arg
    ldr     r3, [fp, #-16]      ;arg
    str     r3, [fp, #-12]      ;arg
    ldrh    r3, [fp, #-12]      ;arg(l)
    strh    r3, [fp, #-8]       ;arg(l)
    ldrh    r3, [fp, #-10]      ;arg(h)
    strh    r3, [fp, #-6]       ;arg(h)
    ldrh    r3, [fp, #-6]       ;arg(h)
    strh    r3, [fp, #-12]      ;arg(h)arg(h)
    ldrh    r3, [fp, #-8]       ;arg(l)
    strh    r3, [fp, #-10]      ;arg(l)arg(h)
    ldr     r3, [fp, #-12]      ;arg(l)arg(h)
    mov     r0, r3
    add     sp, fp, #0
    ldmfd   sp!, {fp}
    bx      lr  

arm-gcc file3.c -O2 -S -o file3.s
//直接用了循環右移來實現，優化啊！！！
swap_words:
    mov r0, r0, ror #16 
    bx  lr

=============================================================================

Casting proper may be done between a pointer to a type and a pointer to an aggregate or union type which contains a member of a compatible type;


實例file4.c
unsigned int swap_words(unsigned int arg)
{
    U32* in = (U32*)&arg;
    unsigned short int lo = in->u16[0];
    unsigned short int hi = in->u16[1];

    in->u16[0] = hi;
    in->u16[1] = lo;
    return (in->u32);
}

gcc -S -o file4.s
swap_words:
    pushl   %ebp
    movl    %esp,     %ebp    
    subl    $16,      %esp    
    leal    8(%ebp),  %eax       
    movl    %eax,     -8(%ebp)   ;in
    movl    -8(%ebp), %eax
    movzwl  (%eax),   %eax       ;arg    
    movw    %ax,      -2(%ebp)   ;arg(l)
    movl    -8(%ebp), %eax
    movzwl  2(%eax),  %eax       ;arg(h) 
    movw    %ax,      -4(%ebp)   ;arg(h) 
    movl    -8(%ebp), %eax       ;in     
    movzwl  -4(%ebp), %edx       ;arg(h) 
    movw    %dx,      (%eax)     ;arg(h)arg(h)
    movl    -8(%ebp), %eax       ;in     
    movzwl  -2(%ebp), %edx       ;arg(l) 
    movw    %dx,      2(%eax)    ;arg(l)arg(h)
    movl    -8(%ebp), %eax
    movl    (%eax),   %eax       ;arg(l)arg(h)
    leave   
    ret

gcc -S -O2 -o file4.s
swap_words:
    pushl   %ebp
    movl    %esp,     %ebp    
    movzwl  8(%ebp),  %eax
    movzwl  10(%ebp), %edx
    movw    %ax,      10(%ebp)
    movw    %dx,      8(%ebp)
    movl    8(%ebp),  %eax
    popl    %ebp    
    ret


arm-gcc file4.c -S -o file4.s
swap_words:
    str     fp, [sp, #-4]!
    add     fp, sp, #0
    sub     sp, sp, #20 
    str     r0, [fp, #-16]  ;arg
    sub     r3, fp, #16 
    str     r3, [fp, #-12]  ;in
    ldr     r3, [fp, #-12]
    ldrh    r3, [r3, #0]    ;arg(l)
    strh    r3, [fp, #-8]   ;arg(l)
    ldr     r3, [fp, #-12]   
    ldrh    r3, [r3, #2]    ;arg(h)
    strh    r3, [fp, #-6]   ;arg(h)
    ldr     r2, [fp, #-12]
    ldrh    r3, [fp, #-6]   ;arg(h)
    strh    r3, [r2, #0]    ;arg(h)arg(h)
    ldr     r2, [fp, #-12]
    ldrh    r3, [fp, #-8]   ;arg(l)
    strh    r3, [r2, #2]    ;arg(l)arg(h)
    ldr     r3, [fp, #-12]
    ldr     r3, [r3, #0]    ;arg(l)arg(h)
    mov     r0, r3  
    add     sp, fp, #0
    ldmfd   sp!, {fp}
    bx      lr      

arm-gcc file4.c -O2 -S -o file4.s
swap_words:
    sub     sp, sp, #8
    add     r3, sp, #8
    str     r0, [r3, #-4]!  ;arg
    ldrh    r2, [sp, #4]    ;arg(l)
    ldrh    r1, [r3, #2]    ;arg(h)
    strh    r1, [sp, #4]    ;arg(h)arg(h)
    strh    r2, [r3, #2]    ;arg(l)arg(h)
    ldr     r0, [sp, #4]    ;arg(l)arg(h)
    add     sp, sp, #8
    bx      lr

============================================================================

實例file5.c（因為對於部分結構來說空間較大，用指針傳遞）
void swap_words(unsigned int *arg)
{
    U32* combined = (U32*)arg;
    unsigned int start = combined->u32;
    unsigned int lo = start >> 16;
    unsigned int hi = start << 16;

    unsigned int final = lo | hi;

    combined->u32 = final;
}

gcc file5.c -S -o file5.s
swap_words:
    pushl   %ebp
    movl    %esp,       %ebp    
    subl    $32,        %esp    
    movl    8(%ebp),    %eax    
    movl    %eax,       -4(%ebp)  ;combined 
    movl    -4(%ebp),   %eax
    movl    (%eax),     %eax    
    movl    %eax,       -8(%ebp)  ;start  
    movl    -8(%ebp),   %eax
    shrl    $16,        %eax    
    movl    %eax,       -12(%ebp) ;lo
    movl    -8(%ebp),   %eax
    sall    $16,        %eax    
    movl    %eax,       -16(%ebp) ;hi
    movl    -16(%ebp),  %eax      ;hi     
    movl    -12(%ebp),  %edx      ;lo     
    orl     %edx,       %eax      ;hi | lo
    movl    %eax,       -20(%ebp) ;final
    movl    -4(%ebp),   %eax
    movl    -20(%ebp),  %edx
    movl    %edx,       (%eax)    ;combined->u32 = final
    leave   
    ret

gcc file5.c -O2 -S -o file5.s  
swap_words:
    pushl   %ebp
    movl    %esp,    %ebp
    movl    8(%ebp), %eax
    roll    $16,     (%eax)
    popl    %ebp    
    ret

arm-gcc file5.c -S -o file5.s 
swap_words:
    str fp, [sp, #-4]!
    add fp, sp, #0
    sub sp, sp, #36 
    str r0, [fp, #-32]
    ldr r3, [fp, #-32]   
    str r3, [fp, #-24]   ;combined
    ldr r3, [fp, #-24]
    ldr r3, [r3, #0]     
    str r3, [fp, #-20]   ;start
    ldr r3, [fp, #-20]
    mov r3, r3, lsr #16 
    str r3, [fp, #-16]   ;lo
    ldr r3, [fp, #-20]
    mov r3, r3, asl #16 
    str r3, [fp, #-12]   ;hi
    ldr r2, [fp, #-16]
    ldr r3, [fp, #-12]
    orr r3, r2, r3
    str r3, [fp, #-8]    ;final
    ldr r2, [fp, #-24]
    ldr r3, [fp, #-8]
    str r3, [r2, #0]     ;combined->u32 = final
    add sp, fp, #0
    ldmfd   sp!, {fp}
    bx  lr  

arm-gcc file5.c -O2 -S -o file5.s
//注意與file4.c中使用同樣編譯選項的編譯結果的區別(file5.c比較高效,
對於小結構體而言，file3.c同樣的編譯選項更高效)  
swap_words:
    ldr r3, [r0, #0]
    mov r3, r3, ror #16
    str r3, [r0, #0]
    bx  lr

==============================================================================

實例file6.c
Occasionally a programmer may encounter the following INVALID method for creating an alias with 
a pointer of a different type: (注意上述（2）的適用范圍不包括如下情形）
typedef union
{
    unsigned short int* sp;
    unsigned int* wp;
}U32P;

unsigned int swap_words(unsigned int arg)
{
    U32P in = {.wp = &arg};
    const unsigned int hi = in.sp[1];
    const unsigned int lo = in.sp[0];

    in.sp[0] = hi;
    in.sp[1] = lo;

    return arg;
}

int main(void)
{
    int x = 0x12345678;
    int y = swap_words(x);

    printf("x:%08x  y:%08x\n", x, y);
    return 0;
}

gcc file6.c -o mytest: 12345678   56781234
swap_words:
    pushl   %ebp
    movl    %esp,      %ebp    
    subl    $16,       %esp    
    movl    $0,        -4(%ebp)
    leal    8(%ebp),   %eax
    movl    %eax,      -4(%ebp)  ;wp
    movl    -4(%ebp),  %eax      ;sp     
    addl    $2,        %eax      ;sp + 2  
    movzwl  (%eax),    %eax      
    movzwl  %ax,       %eax    
    movl    %eax,      -8(%ebp)  ;hi
    movl    -4(%ebp),  %eax
    movzwl  (%eax),    %eax    
    movzwl  %ax,       %eax    
    movl    %eax,      -12(%ebp) ;lo
    movl    -4(%ebp),  %eax
    movl    -8(%ebp),  %edx      ;hi     
    movw    %dx,       (%eax)    ;sp[0]  = hi
    movl    -4(%ebp),  %eax
    leal    2(%eax),   %edx
    movl    -12(%ebp), %eax
    movw    %ax,       (%edx)    ;sp[1] = lo
    movl    8(%ebp),   %eax      ;lo:hi
    leave   
    ret

gcc file6.c -O2 -o mytest: 12345678 12345678
swap_words:
    pushl   %ebp
    movl    %esp,     %ebp    
    movzwl  8(%ebp),  %eax      ;arg(l) 
    movzwl  10(%ebp), %edx      ;arg(h) 
    movw    %ax,      10(%ebp)  ;arg(l)arg(l)
    movl    8(%ebp),  %eax      ;arg(l)arg(l)
    movw    %dx,      8(%ebp)   ;arg(l)arg(h)
    popl    %ebp    
    ret
盡管在swap_words中正確修改了調用者的棧上的值，但是在main中更本就不調用用swap_words
eax的值與我們的原意也不同

arm-gcc -o mytest : 12345678 56781234
swap_words:
    str fp, [sp, #-4]!     
    add fp, sp, #0
    sub sp, sp, #28 
    str r0, [fp, #-24]     ;arg  
    mov r3, #0
    str r3, [fp, #-16]
    sub r3, fp, #24 
    str r3, [fp, #-16]    ;wp
    ldr r3, [fp, #-16]    ;sp
    add r3, r3, #2
    ldrh    r3, [r3, #0]  
    str r3, [fp, #-12]    ;hi
    ldr r3, [fp, #-16]    ;sp
    ldrh    r3, [r3, #0]
    str r3, [fp, #-8]     ;lo
    ldr r2, [fp, #-16]    ;sp
    ldr r3, [fp, #-12]    ;hi
    mov r3, r3, asl #16 
    mov r3, r3, lsr #16 
    strh    r3, [r2, #0]  ;arg(h)arg(h)
    ldr r3, [fp, #-16]
    add r2, r3, #2
    ldr r3, [fp, #-8]
    mov r3, r3, asl #16 
    mov r3, r3, lsr #16 
    strh    r3, [r2, #0]  ;arg(l)arg(h)
    ldr r3, [fp, #-24]    ;arg(l)arg(h)
    mov r0, r3
    add sp, fp, #0
    ldmfd   sp!, {fp}
    bx  lr  

arm-gcc -O2 -o mytest : 12345678 12345678
swap_words:
    sub sp, sp, #8
    str r0, [sp, #4]
    add sp, sp, #8
    bx  lr
因為實施了fstrict-aliasing優化，因此，swap_words中做了非常大的優化，以至於不做任何計算，直接返回r0  

The problem with this method is although U32P does in fact say that sp is an alias for wp, 
it does not say anything about the relationship between the values pointed to by sp and wp. 
This differs in a critical way from the experiment in "file3.c & file4.c"  which both define 
aliases for the values being pointed to, not the pointers themselves.

=========================================================================

對於上述情形（3）的說明：
It is always presumed that a char* may refer to an alias of any object. It is therefore quite safe, if perhaps a bit unoptimal (for architecture with wide loads and stores) to cast any pointer of any type to a char* type. (即由於char*可能與任何類型的左值相同，因此編譯器不會做過分的優化，無論是否指定fstrict-aliasing參數選項都在安全范圍內做優化，使用char*帶來的問題是可能存在多次load/store，可能會降低性能)

如下代碼，使用-O0, 與-O2編譯出的程序執行效果相同，都可以交換字內的字節序(注意情形（3））
unsigned int swap_words(unsigned int arg)
{
    char* const cp = (char*)arg;
    const char c0 = cp[0];
    const char c1 = cp[1];
    const char c2 = cp[2];
    const char c3 = cp[3];

    cp[0] = c2;
    cp[1] = c3;
    cp[2] = c0;
    cp[3] = c1;

    return arg;
}

對於情形（3），相反的情況則不一定成立，即：
The converse is not true. Casting a char* to a pointer of any type other than a char* and dereferencing it is usually in volation of the strict aliasing rule.

大叔來了，回去洗澡，有點亂，再看看

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 C/C++ strict-aliasing 編譯c時提示“dereferencing type-punned pointer will break strict-aliasing rules”如何處理？