char* str="..." vs char str[]="..." strange behaviour [duplicate]
Possible Duplicate:
Program crashes when trying to set a character of a char array
I have a sample code which works as expected:
/* strtok example */
#include <stdio.h>
#include <string.h>
int main ()
{
char str[] ="- This, a sample string.";
char * pch;
printf ("Splitting string \"%s\" into tokens:\n",str);
pch = strtok (str," ,.-");
/*
while (pch != NULL)
{
printf ("%s\n",pch);
pch = strtok (NULL, " ,.-");
}
*/
return 0;
}
... unless I change char str[] to char* str which shouldn't make any differences in semantics:
/* strtok example */
#include <stdio.h>
#include <string.h>
int main ()
{
char * str ="- This, 开发者_如何学Pythona sample string.";
char * pch;
printf ("Splitting string \"%s\" into tokens:\n",str);
pch = strtok (str," ,.-");
/*
while (pch != NULL)
{
printf ("%s\n",pch);
pch = strtok (NULL, " ,.-");
}
*/
return 0;
}
This is the unexpected result:
Splitting string "- This, a sample string." into tokens:
Segmentation fault
I compiled both examples with:
gcc -O0 main.c
gcc -O3 main.c
g++ -O0 main.c
g++ -O3 main.c
and even looked at the assembly ... But I can't figure out, what's wrong with the second version.
Here the working O1-Assembly:
.file "main.c"
.intel_syntax noprefix
.section .rodata.str1.8,"aMS",@progbits,1
.align 8
.LC0:
.string "Splitting string \"%s\" into tokens:\n"
.section .rodata.str1.1,"aMS",@progbits,1
.LC1:
.string " ,.-"
.text
.globl main
.type main, @function
main:
.LFB58:
.cfi_startproc
push rbx
.cfi_def_cfa_offset 16
sub rsp, 48
.cfi_def_cfa_offset 64
mov rax, QWORD PTR fs:40
mov QWORD PTR [rsp+40], rax
xor eax, eax
mov DWORD PTR [rsp], 1750343725
mov DWORD PTR [rsp+4], 539784041
mov DWORD PTR [rsp+8], 1634934881
mov DWORD PTR [rsp+12], 1701605485
mov DWORD PTR [rsp+16], 1920234272
mov DWORD PTR [rsp+20], 778530409
mov BYTE PTR [rsp+24], 0
mov rdx, rsp
mov esi, OFFSET FLAT:.LC0
mov edi, 1
.cfi_offset 3, -16
call __printf_chk
mov esi, OFFSET FLAT:.LC1
mov rdi, rsp
call strtok
mov eax, 0
mov rdx, QWORD PTR [rsp+40]
xor rdx, QWORD PTR fs:40
je .L3
call __stack_chk_fail
.L3:
add rsp, 48
pop rbx
.p2align 4,,1
ret
.cfi_endproc
.LFE58:
.size main, .-main
.ident "GCC: (Ubuntu/Linaro 4.4.4-14ubuntu5) 4.4.5"
.section .note.GNU-stack,"",@progbits
and the broken one:
.file "main.c"
.intel_syntax noprefix
.section .rodata.str1.1,"aMS",@progbits,1
.LC0:
.string "- This, a sample string."
.section .rodata.str1.8,"aMS",@progbits,1
.align 8
.LC1:
.string "Splitting string \"%s\" into tokens:\n"
.section .rodata.str1.1
.LC2:
.string " ,.-"
.text
.globl main
.type main, @function
main:
.LFB58:
.cfi_startproc
sub rsp, 8
.cfi_def_cfa_offset 16
mov edx, OFFSET FLAT:.LC0
mov esi, OFFSET FLAT:.LC1
mov edi, 1
mov eax, 0
call __printf_chk
mov esi, OFFSET FLAT:.LC2
mov edi, OFFSET FLAT:.LC0
call strtok
mov eax, 0
add rsp, 8
ret
.cfi_endproc
.LFE58:
.size main, .-main
.ident "GCC: (Ubuntu/Linaro 4.4.4-14ubuntu5) 4.4.5"
.section .note.GNU-stack,"",@progbits
The only obvious difference I can see is that in the working version GCC substitutes the string constant by MOVs directly in the code.
Help is very appreciated
edit gcc (Ubuntu/Linaro 4.4.4-14ubuntu5) 4.4.5,
All the best, Thomas
In the second case, you're pointing str
at a static object somewhere in memory that can't be changed. The strtok
man page warns that it changes its first argument and can't be used on a constant string. Hence the error.
strtok()
requires a modifiable buffer, because it replaces the delimiter by a null byte. So you cannot say char * str = "- This, a sample string.";
, because that should really have been const char * str = "- This, a sample string.";
and points to read-only memory. Instead, you have several options:
char str[] = "- This, a sample string."; // local array
char * pch = strtok (str," ,.-");
char * str = strdup("- This, a sample string."); // malloc()ed
char * pch = strtok (str," ,.-");
/* ... */
free(str);
char * str
allocates room for a pointer to a string that happens to be a constant literal (i.e., not writable).
char str[]
allocates room for an array whose size is specified by the assigned literal. The array is writable.
strtok()
modifies the string it works on. This is allowed with str[]
but not with *str
.
When you use char[] p = "literal"
, the many a compiler will allocate a character array of the appropriate length, and then copies the string from wherever string constants are kept into the array, so you end up with modifiable copy of the string.
When you use char* p = "literal"
, you have a pointer that point to that unmodifiable copy of the string. When you attempt to modify it, the behavior is undefined. In fact, at some point g++ started issuing a warning when you do char *p = "literal"
, because the correct way to specify it is const char* p="literal"
since it is a pointer to a constant string.
精彩评论