开发者

check if carry flag is set

Using inline assembler [gcc, intel, c], how to check if the carry flag is set after an o开发者_开发百科peration?


sbb %eax,%eax will store -1 in eax if the carry flag is set, 0 if it is clear. There's no need to pre-clear eax to 0; subtracting eax from itself does that for you. This technique can be very powerful since you can use the result as a bitmask to modify the results of computations in place of using conditional jumps.

You should be aware that it is only valid to test the carry flag if it was set by arithmetic performed INSIDE the inline asm block. You can't test carry of a computation that was performed in C code because there are all sorts of ways the compiler could optimize/reorder things that would clobber the carry flag.


With conditional jumps jc (jump if carry) or jnc (jump if not carry).

Or you can store the carry flag,

;; Intel syntax
mov eax, 0
adc eax, 0 ; add with carry


However the x86 assembler hes dedicated fast ALU flag test instructions named SETcc where the cc is desired ALU flag. So you can write:

setc    AL                           //will set AL register to 1 or clear to 0 depend on carry flag

or

setc    byte ptr [edx]               //will set memory byte on location edx depend on carry flag

or even

setc    byte ptr [CarryFlagTestByte]  //will set memory variable on location CarryFlagTestByte depend on carry flag

With SETcc instruction you can test flags like carry, zero, sign, overflow or parity, some SETcc instructions allow to test two flags at once.

EDIT: Added simple test made in Delphi to disappear a doubt about term fast

procedure TfrmTest.ButtonTestClick(Sender: TObject);
  function GetCPUTimeStamp: int64;
  asm
    rdtsc
  end;
var
 ii, i: int64;
begin
  i := GetCPUTimeStamp;
  asm
    mov   ecx, 1000000
@repeat:
    mov   al, 0
    adc   al, 0
    mov   al, 0
    adc   al, 0
    mov   al, 0
    adc   al, 0
    mov   al, 0
    adc   al, 0
    loop  @repeat
  end;
  i := GetCPUTimeStamp - i;

  ii := GetCPUTimeStamp;
  asm
    mov   ecx, 1000000
@repeat:
    setc  al
    setc  al
    setc  al
    setc  al
    loop  @repeat
  end;
  ii := GetCPUTimeStamp - ii;
  caption := IntToStr(i) + '  ' +  IntToStr(ii));
end;

The loop (1M iterations) wich using instruction setc is more than 5 times faster than loop with adc instriuction.

EDIT: Added second test which test result stored in register AL comulative in register CL to be more realistic case.

procedure TfrmTestOtlContainers.Button1Click(Sender: TObject);
  function GetCPUTimeStamp: int64;
  asm
    rdtsc
  end;

var
 ii, i: int64;
begin
  i := GetCPUTimeStamp;
  asm
    xor   ecx, ecx
    mov   edx, $AAAAAAAA

    shl   edx, 1
    mov   al, 0
    adc   al, 0
    add   cl, al

    shl   edx, 1
    mov   al, 0
    adc   al, 0
    add   cl, al

    shl   edx, 1
    mov   al, 0
    adc   al, 0
    add   cl, al

    shl   edx, 1
    mov   al, 0
    adc   al, 0
    add   cl, al

    shl   edx, 1
    mov   al, 0
    adc   al, 0
    add   cl, al

    shl   edx, 1
    mov   al, 0
    adc   al, 0
    add   cl, al

    shl   edx, 1
    mov   al, 0
    adc   al, 0
    add   cl, al

    shl   edx, 1
    mov   al, 0
    adc   al, 0
    add   cl, al

  end;
  i := GetCPUTimeStamp - i;

  ii := GetCPUTimeStamp;
  asm
    xor   ecx, ecx
    mov   edx, $AAAAAAAA

    shl   edx, 1
    setc  al
    add   cl, al

    shl   edx, 1
    setc  al
    add   cl, al

    shl   edx, 1
    setc  al
    add   cl, al

    shl   edx, 1
    setc  al
    add   cl, al

    shl   edx, 1
    setc  al
    add   cl, al

    shl   edx, 1
    setc  al
    add   cl, al

    shl   edx, 1
    setc  al
    add   cl, al

    shl   edx, 1
    setc  al
    add   cl, al

  end;
  ii := GetCPUTimeStamp - ii;
  caption := IntToStr(i) + '  ' +  IntToStr(ii);
end;

Rutine part with SETcc instruction is still faster for about 20%.


The first function performs unsigned addition and then tests for overflow using the carry flag (CF). The volatile's must remain. Otherwise the optimizer will rearrange instructions, which pretty much ensures an incorrect result. I've seen the optimizer change the jnc to a jae (which is also based on CF).

/* Performs r = a + b, returns 1 if the result is safe (no overflow), 0 otherwise */
int add_u32(uint32_t a, uint32_t b, uint32_t* r)
{
    volatile int no_carry = 1;
    volatile uint32_t result = a + b;

    asm volatile
    (
     "jnc 1f          ;"
     "movl $0, %[xc]  ;"
     "1:              ;"
     : [xc] "=m" (no_carry)
     );

    if(r)
        *r = result;

    return no_carry;
}

The next function is for the signed ints. Same use of volatile applies. Note that signed integer math jumps on OF flag via jno. I've seen the optimizer change this to a jnb (which is also based on OF).

/* Performs r = a + b, returns 1 if the result is safe (no overflow), 0 otherwise */
int add_i32(int32_t a, int32_t b, int32_t* r)
{   
    volatile int no_overflow = 1;
    volatile int32_t result = a + b;

    asm volatile
    (
     "jno 1f          ;"
     "movl $0, %[xo]  ;"
     "1:              ;"
     : [xo] "=m" (no_overflow)
     );

    if(r)
        *r = result;

    return no_overflow;
}

In the big picture, you might use the functions as follows. In the same big picture, many folks will probably reject the extra work and aesthetic non-beauty until pwn'd by an overflow/wrap/underflow

int r, a, b;
...

if(!add_i32(a, b, &r))
    abort(); // Integer overflow!!!

...

The inline GCC assembly is available in GCC 3.1 and above. See Assembler Instructions with C Expression Operands, or search for 'GCC Extended Assembly'.

Finally, the same in Visual Studio would be as follows (not much difference in code generation), but syntax is much easier since MASM allows you to jump to a C label:

/* Performs r = a + b, returns 1 if the result is safe (no overflow), 0 otherwise */
int add_i32(__int32 a, __int32 b, __int32* r)
{   
    volatile int no_overflow = 1;
    volatile __int32 result = a + b;

    __asm
    {
        jno NO_OVERFLOW;
        mov no_overflow, 0;
    NO_OVERFLOW:
    }

    if(r)
        *r = result;

    return no_overflow;
}

On the bad side, the above MASM code is only applicable for x86 assembly. For x64 assembly, there is no inlining so you will have to code it up in assembly (in a separate file) and use use MASM64 to compile.


This may give an idea or solution if it's correct. I struggled with testing for wrap around until I found out about in-line assembly. I tried to test with various edge values and seems to work correctly. Program takes input from cmdln and converts it to integer and outputs hex and binary values.

gcc version 11.2.1

$> gcc -Wall -std=c99 -O2 -o uilt uilt.c

snippet:

size_t i = 0;
int mul = 10;
uint128_t sum = 0;
int int_array[48] = {0};

// fill arr. with ea. str val in argv[1] str. converted to int vals.
while (i < strlen(argv[1])) {
  // chk they are digit chars, if not, skip iter
  if (isdigit(argv[1][i]) == 0) {
    i++;
    continue;
  }
  int_array[i] = (argv[1][i] - 48);
  sum = int_array[i] + (sum * mul);

  /* check carry flag */
  __asm__ goto("jc %l0"
               : /* no outputs  */
               : /* no inputs   */
               : /* no clobbers */
               : carry);

  /* no carry */
  goto its_good;

 carry:
  system("clear");
  printf("\n\n\tERROR!!!\
        \n\n\t!!!!!!! uilt has ABORTED !!!!!!\
        \n\tCmdln arg exceeds 2^127 bit limit\
        \n\twhen converted from string to 127\
        \n\tbit unsigned __int128.\n\n");
  exit(1);

 its_good:
  i++;
 }

some output:

[jim@nitroII uiltDev]$ ./uilt 1

Dec: 1

Hex: 0x0001

Bin: 0x0001

[jim@nitroII uiltDev]$ ./uilt 255

Dec: 255

Hex: 0x00ff

Bin: 0x0000 1111 1111

[jim@nitroII uiltDev]$ ./uilt 18446744073709551616

Dec: 18446744073709551616

Hex: 0x0001 0000 0000 0000 0000

Bin: 0x0001 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000

[jim@nitroII uiltDev]$ ./uilt 340282366920938463463374607431768211455

Dec: 340282366920938463463374607431768211455

Hex: 0x0000 ffff ffff ffff ffff ffff ffff ffff ffff

Bin: 0x0000 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111

Dec: 340282366920938463463374607431768211456

        ERROR!!!            

        !!!!!!! uilt has ABORTED !!!!!!            
        Cmdln arg exceeds 2^127 bit limit            
        when converted from string to 127            
        bit unsigned __int128.
0

上一篇:

下一篇:

精彩评论

暂无评论...
验证码 换一张
取 消

最新问答

问答排行榜