开发者

char* str="..." vs char str[]="..." strange behaviour [duplicate]

This question already has answers here: Closed 11 years ago.

Possible Duplicate:

Program crashes when trying to set a character of a char array

I have a sample code which works as expected:

/* strtok example */
#include <stdio.h>
#include <string.h>

int main ()
{
  char str[] ="- This, a sample string.";
  char * pch;
  printf ("Splitting string \"%s\" into tokens:\n",str);
  pch = strtok (str," ,.-");
/*
  while (pch != NULL)
  {
    printf ("%s\n",pch);
    pch = strtok (NULL, " ,.-");
  }
*/
  return 0;
}

... unless I change char str[] to char* str which shouldn't make any differences in semantics:

/* strtok example */
#include <stdio.h>
#include <string.h>

int main ()
{
  char * str ="- This, 开发者_如何学Pythona sample string.";
  char * pch;
  printf ("Splitting string \"%s\" into tokens:\n",str);
  pch = strtok (str," ,.-");
/*
  while (pch != NULL)
  {
    printf ("%s\n",pch);
    pch = strtok (NULL, " ,.-");
  }
*/
  return 0;
}

This is the unexpected result:

Splitting string "- This, a sample string." into tokens:
Segmentation fault

I compiled both examples with:

gcc -O0 main.c
gcc -O3 main.c
g++ -O0 main.c
g++ -O3 main.c

and even looked at the assembly ... But I can't figure out, what's wrong with the second version.

Here the working O1-Assembly:

    .file   "main.c"
    .intel_syntax noprefix
    .section    .rodata.str1.8,"aMS",@progbits,1
    .align 8
.LC0:
    .string "Splitting string \"%s\" into tokens:\n"
    .section    .rodata.str1.1,"aMS",@progbits,1
.LC1:
    .string " ,.-"
    .text
.globl main
    .type   main, @function
main:
.LFB58:
    .cfi_startproc
    push    rbx
    .cfi_def_cfa_offset 16
    sub rsp, 48
    .cfi_def_cfa_offset 64
    mov rax, QWORD PTR fs:40
    mov QWORD PTR [rsp+40], rax
    xor eax, eax
    mov DWORD PTR [rsp], 1750343725
    mov DWORD PTR [rsp+4], 539784041
    mov DWORD PTR [rsp+8], 1634934881
    mov DWORD PTR [rsp+12], 1701605485
    mov DWORD PTR [rsp+16], 1920234272
    mov DWORD PTR [rsp+20], 778530409
    mov BYTE PTR [rsp+24], 0
    mov rdx, rsp
    mov esi, OFFSET FLAT:.LC0
    mov edi, 1
    .cfi_offset 3, -16
    call    __printf_chk
    mov esi, OFFSET FLAT:.LC1
    mov rdi, rsp
    call    strtok
    mov eax, 0
    mov rdx, QWORD PTR [rsp+40]
    xor rdx, QWORD PTR fs:40
    je  .L3
    call    __stack_chk_fail
.L3:
    add rsp, 48
    pop rbx
    .p2align 4,,1
    ret
    .cfi_endproc
.LFE58:
    .size   main, .-main
    .ident  "GCC: (Ubuntu/Linaro 4.4.4-14ubuntu5) 4.4.5"
    .section    .note.GNU-stack,"",@progbits

and the broken one:

    .file   "main.c"
    .intel_syntax noprefix
    .section    .rodata.str1.1,"aMS",@progbits,1
.LC0:
    .string "- This, a sample string."
    .section    .rodata.str1.8,"aMS",@progbits,1
    .align 8
.LC1:
    .string "Splitting string \"%s\" into tokens:\n"
    .section    .rodata.str1.1
.LC2:
    .string " ,.-"
    .text
.globl main
    .type   main, @function
main:
.LFB58:
    .cfi_startproc
    sub rsp, 8
    .cfi_def_cfa_offset 16
    mov edx, OFFSET FLAT:.LC0
    mov esi, OFFSET FLAT:.LC1
    mov edi, 1
    mov eax, 0
    call    __printf_chk
    mov esi, OFFSET FLAT:.LC2
    mov edi, OFFSET FLAT:.LC0
    call    strtok
    mov eax, 0
    add rsp, 8
    ret
    .cfi_endproc
.LFE58:
    .size   main, .-main
    .ident  "GCC: (Ubuntu/Linaro 4.4.4-14ubuntu5) 4.4.5"
    .section    .note.GNU-stack,"",@progbits

The only obvious difference I can see is that in the working version GCC substitutes the string constant by MOVs directly in the code.

Help is very appreciated

edit gcc (Ubuntu/Linaro 4.4.4-14ubuntu5) 4.4.5,

All the best, Thomas


In the second case, you're pointing str at a static object somewhere in memory that can't be changed. The strtok man page warns that it changes its first argument and can't be used on a constant string. Hence the error.


strtok() requires a modifiable buffer, because it replaces the delimiter by a null byte. So you cannot say char * str = "- This, a sample string.";, because that should really have been const char * str = "- This, a sample string."; and points to read-only memory. Instead, you have several options:

char str[] = "- This, a sample string.";  // local array
char * pch = strtok (str," ,.-");


char * str = strdup("- This, a sample string.");  // malloc()ed
char * pch = strtok (str," ,.-");
/* ... */
free(str);


char * str allocates room for a pointer to a string that happens to be a constant literal (i.e., not writable).

char str[] allocates room for an array whose size is specified by the assigned literal. The array is writable.

strtok() modifies the string it works on. This is allowed with str[] but not with *str.


When you use char[] p = "literal", the many a compiler will allocate a character array of the appropriate length, and then copies the string from wherever string constants are kept into the array, so you end up with modifiable copy of the string.

When you use char* p = "literal", you have a pointer that point to that unmodifiable copy of the string. When you attempt to modify it, the behavior is undefined. In fact, at some point g++ started issuing a warning when you do char *p = "literal", because the correct way to specify it is const char* p="literal" since it is a pointer to a constant string.

0

上一篇:

下一篇:

精彩评论

暂无评论...
验证码 换一张
取 消

最新问答

问答排行榜