How to specify an individual register as constraint in ARM GCC inline assembly?
in x86 inline assembly i can write something like this:
asm ("cpuid"
: "=a" (_eax),
"=b" (_ebx),
"=c" (_ecx),
"=d" (_edx)
: "a" (op));
so in the matchin constraints instead of just writing "=r" and let the compiler chose the register, I can say which particular register i want to use (=a for example to use %eax)
how can I do this for ARM assembly? the ARM GCC assembly cookbook http://www.ethernut.de/en/documents/arm-inline-asm.html states that i can for example use the constraints "r" for one of the general purpose registers R0-R15 "w" for one of the VFP floating point registers S0-S31
but how can I constraint an operan开发者_高级运维d for example exactly to s1? or to a particular general purpose register?
I don't think gcc for ARM allows you to use constraints to specify exactly which register to use. However, you can use explicit register variables to specify a register to store a variable in:
register int my_variable asm("r0");
Explicit register variables minimal runnable example
Here is an ARMv8 Linux C freestanding hello world exemplifying https://stackoverflow.com/a/3936064/9160762 with some disassembly analysis:
main.c
#include <inttypes.h>
void _start(void) {
uint64_t exit_status;
/* write */
{
char msg[] = "hello syscall v8\n";
uint64_t syscall_return;
register uint64_t x0 __asm__ ("x0") = 1; /* stdout */
register char *x1 __asm__ ("x1") = msg;
register uint64_t x2 __asm__ ("x2") = sizeof(msg);
register uint64_t x8 __asm__ ("x8") = 64; /* syscall number */
__asm__ __volatile__ (
"svc 0;"
: "+r" (x0)
: "r" (x1), "r" (x2), "r" (x8)
: "memory"
);
syscall_return = x0;
exit_status = (syscall_return != sizeof(msg));
}
/* exit */
{
register uint64_t x0 __asm__ ("x0") = exit_status;
register uint64_t x8 __asm__ ("x8") = 93;
__asm__ __volatile__ (
"svc 0;"
: "+r" (x0)
: "r" (x8)
:
);
}
}
GitHub upstream.
Compile and run:
sudo apt-get install qemu-user gcc-aarch64-linux-gnu
aarch64-linux-gnu-gcc -O3 -std=c99 -ggdb3 -march=armv8-a -pedantic -Wall -Wextra \
-ffreestanding -nostdlib -static -o main.out main.c
qemu-aarch64 main.out
Output:
hello syscall v8
Disassembly:
aarch64-linux-gnu-objdump -S main.out
Output:
main.out: file format elf64-littleaarch64
Disassembly of section .text:
0000000000400110 <_start>:
void _start(void) {
uint64_t exit_status;
/* write */
{
char msg[] = "hello syscall v8\n";
400110: 90000003 adrp x3, 400000 <_start-0x110>
400114: 91056063 add x3, x3, #0x158
void _start(void) {
400118: d10083ff sub sp, sp, #0x20
uint64_t syscall_return;
register uint64_t x0 __asm__ ("x0") = 1; /* stdout */
40011c: d2800020 mov x0, #0x1 // #1
register char *x1 __asm__ ("x1") = msg;
400120: 910023e1 add x1, sp, #0x8
register uint64_t x2 __asm__ ("x2") = sizeof(msg);
400124: d2800242 mov x2, #0x12 // #18
char msg[] = "hello syscall v8\n";
400128: a9401464 ldp x4, x5, [x3]
register uint64_t x8 __asm__ ("x8") = 64; /* syscall number */
40012c: d2800808 mov x8, #0x40 // #64
char msg[] = "hello syscall v8\n";
400130: 79402063 ldrh w3, [x3, #16]
400134: a90097e4 stp x4, x5, [sp, #8]
400138: 790033e3 strh w3, [sp, #24]
__asm__ __volatile__ (
40013c: d4000001 svc #0x0
: "+r" (x0)
: "r" (x1), "r" (x2), "r" (x8)
: "memory"
);
syscall_return = x0;
exit_status = (syscall_return != sizeof(msg));
400140: eb02001f cmp x0, x2
}
/* exit */
{
register uint64_t x0 __asm__ ("x0") = exit_status;
register uint64_t x8 __asm__ ("x8") = 93;
400144: d2800ba8 mov x8, #0x5d // #93
register uint64_t x0 __asm__ ("x0") = exit_status;
400148: 9a9f07e0 cset x0, ne // ne = any
__asm__ __volatile__ (
40014c: d4000001 svc #0x0
: "+r" (x0)
: "r" (x8)
:
);
}
}
400150: 910083ff add sp, sp, #0x20
400154: d65f03c0 ret
Attempt without explicit register variables
Mostly for fun, I tried to reach the same result without using register variables, but I was not able to do it.
In any case, the code would be more complicated, so you are better off just using register variables.
Here is my best attempt:
main.c
#include <inttypes.h>
void _start(void) {
uint64_t exit_status;
/* write */
{
char msg[] = "hello syscall v8\n";
uint64_t syscall_return;
__asm__ (
"mov x0, 1;" /* stdout */
"mov x1, %[msg];"
"mov x2, %[len];"
"mov x8, 64;" /* syscall number */
"svc 0;"
"mov %[syscall_return], x0;"
: [syscall_return] "=r" (syscall_return)
: [msg] "p" (msg),
[len] "i" (sizeof(msg))
: "x0", "x1", "x2", "x8", "memory"
);
exit_status = (syscall_return != sizeof(msg));
}
/* exit */
__asm__ (
"mov x0, %[exit_status];"
"mov x8, 93;" /* syscall number */
"svc 0;"
:
: [exit_status] "r" (exit_status)
: "x0", "x8"
);
}
GitHub upstream.
Disassembly:
main.out: file format elf64-littleaarch64
Disassembly of section .text:
0000000000400110 <_start>:
void _start(void) {
uint64_t exit_status;
/* write */
{
char msg[] = "hello syscall v8\n";
400110: 90000000 adrp x0, 400000 <_start-0x110>
400114: 9105a000 add x0, x0, #0x168
void _start(void) {
400118: d10083ff sub sp, sp, #0x20
char msg[] = "hello syscall v8\n";
40011c: a9400c02 ldp x2, x3, [x0]
400120: a9008fe2 stp x2, x3, [sp, #8]
400124: 79402000 ldrh w0, [x0, #16]
uint64_t syscall_return;
__asm__ (
400128: 910023e3 add x3, sp, #0x8
char msg[] = "hello syscall v8\n";
40012c: 790033e0 strh w0, [sp, #24]
__asm__ (
400130: d2800020 mov x0, #0x1 // #1
400134: aa0303e1 mov x1, x3
400138: d2800242 mov x2, #0x12 // #18
40013c: d2800808 mov x8, #0x40 // #64
400140: d4000001 svc #0x0
400144: aa0003e3 mov x3, x0
: [syscall_return] "=r" (syscall_return)
: [msg] "p" (msg),
[len] "i" (sizeof(msg))
: "x0", "x1", "x2", "x8", "memory"
);
exit_status = (syscall_return != sizeof(msg));
400148: f100487f cmp x3, #0x12
40014c: 9a9f07e1 cset x1, ne // ne = any
}
/* exit */
__asm__ (
400150: aa0103e0 mov x0, x1
400154: d2800ba8 mov x8, #0x5d // #93
400158: d4000001 svc #0x0
"svc 0;"
:
: [exit_status] "r" (exit_status)
: "x0", "x8"
);
}
40015c: 910083ff add sp, sp, #0x20
400160: d65f03c0 ret
This was less efficient for the following reasons:
writeconstraintpneed to use an intermediate registerx3for theaddtospI don't know how to get the syscall return status without an extra
movto an output registerexitstatus gets moved one extra time throughx1. With register variables is just calculated directly intox0.
Tested in Ubuntu 18.10, GCC 8.2.0, QEMU 2.12.
加载中,请稍侯......
精彩评论