Copy one file to another and return number of bytes copied
At the outset this looks pretty simple, however this was an interview question and the trick is as follows :
I wrote a simple code to copy Bytewise from one file to another and return count which is incremented in the while(!feof) loop. However, my interviewer said executing this loop for copying 1 GB file would take 1 hour cause its copying Bytewise, however this does not happen in real life. Could someone tell me how are huge files actually copied on computers, what is the underlying algorithm? Also, remember I need to return the number of bytes copied.开发者_如何学Python
He's probably just plain wrong.
Unless you wrote the code in something like assembly language, reading/writing one character at a time will almost certainly have only a fairly minimal effect on overall speed. The reason is fairly simple: almost anything higher level than assembly language will do (at least some) buffering for you when to do character-oriented I/O.
Just for example, consider code in C like this:
#include <stdio.h>
int main(int argc, char **argv) {
FILE *infile = fopen(argv[1], "rb");
FILE *outfile = fopen(argv[2], "wb");
unsigned long count = 0;
int ch;
while (EOF != (ch=getc(infile))) {
++count;
putc(ch, outfile);
}
printf("%lu bytes copied\n", count);
return 0;
}
The reality is that this will probably run a little slower than a typical file copy, but only a little. The reason is fairly simple: at least assuming a halfway decent implementation of C, getc
and putc
(along with most of the rest of the standard I/O) will do buffering for you behind the scenes. In fact, getc and putc will often be implemented as macros, so most of the code will be expanded inline as well. Though it varies from one compiler to another, typical code will look something like this:
#define getc(f) f->__pos<f->__len?f->__buf[f->__pos++]:__filbuf()
#define putc(ch, f) f-__>pos<f->__len?f->__buf[f->__pos++]=ch:__flshbuf(f, ch)
This will be accompanied by code something like this:
#define BUF_SIZE 4096
typedef struct {
char __buf[BUF_SIZE];
size_t __pos;
size_t __len=BUF_SIZE;
int __file_number;
} FILE;
Now, it's certainly true that you can improve on this:
- Since you know you're going to use the whole file sequentially, you can use a bigger buffer to reduce the number of round-trips to/from kernel mode.
- Since you know you're going to write the data exactly as it's written, you can read into a buffer, then use exactly the same buffer for writing, instead of copy the data from one buffer to another.
- Since you know you're copying files, and chances are most of that data won't be used again soon, you can probably tell your OS that it shouldn't be cached.
- If the source and destination are on physically separate disks, asychronous I/O may help by allowing the reading/writing to happen at the same time.
Note, however, that chances are these will increase development time quite a bit, and even at best you shouldn't plan on seeing anything like the speed difference suggested by your interviewer. Even a 10x improvement is unlikely, not to mention the ~1000x suggested by your interviewer.
The algorithm is essentially the same, just using buffers larger than one byte. Basically, you do something like this:
- read up to 1MB from file (read call tells you how much it actually read)
- add that amount to your total copied so far
- write that amount of data to other file
- repeat until read tells you eof
That's the simple way. There are sometimes faster ways which are must harder (e.g., using POSIX async io). For example, here is what it looks like (from a program I wrote) using POSIX AIO. Beware of bugs, I wrote this just for fun:
int copy_file(int input, int output) {
struct stat statbuf;
off_t input_size, input_pos, output_pos, last_block;
struct aiocb read_cb, write_cb;
const struct aiocb *suspend_list[2];
char *bufs[NR_BUFFERS];
ssize_t bufs_size[NR_BUFFERS];
int i, ex_code, reading, writing, read_depleted;
uint64_t read_buf, write_buf;
if (-1 == fstat(input, &statbuf)) {
perror("fstat(input)");
return EXIT_FAILURE;
}
input_size = statbuf.st_size;
ex_code = 0;
for (i = 0; i < NR_BUFFERS; ++i)
bufs[i] = 0;
for (i = 0; i < NR_BUFFERS; ++i) {
if (!(bufs[i] = malloc(BUFFER_SIZE))) {
perror("malloc");
ex_code = EXIT_FAILURE;
break;
}
}
memset(&read_cb, 0, sizeof(read_cb));
memset(&write_cb, 0, sizeof(write_cb));
output_pos = input_pos = last_block = 0;
read_buf = write_buf = 0;
read_depleted = reading = writing = 0;
while (!ex_code && (!read_depleted || write_buf != read_buf)) {
if (!read_depleted && !reading && ((read_buf - write_buf) < NR_BUFFERS)) {
read_cb.aio_fildes = input;
read_cb.aio_offset = input_pos;
read_cb.aio_buf = bufs[read_buf % NR_BUFFERS];
read_cb.aio_nbytes = BUFFER_SIZE;
read_cb.aio_sigevent.sigev_notify = SIGEV_NONE;
if (-1 == aio_read(&read_cb)) {
perror("aio_read");
ex_code = EXIT_FAILURE;
break;
}
suspend_list[0] = &read_cb;
reading = 1;
}
if (!writing && (read_buf > write_buf)) {
write_cb.aio_fildes = output;
write_cb.aio_offset = output_pos;
write_cb.aio_buf = bufs[write_buf % NR_BUFFERS];
write_cb.aio_nbytes = bufs_size[write_buf % NR_BUFFERS];
write_cb.aio_sigevent.sigev_notify = SIGEV_NONE;
if (-1 == aio_write(&write_cb)) {
perror("aio_write");
ex_code = EXIT_FAILURE;
break;
}
suspend_list[1] = &write_cb;
writing = 1;
}
suspend_list[0] = reading ? &read_cb : NULL;
suspend_list[1] = writing ? &write_cb : NULL;
if (-1 == aio_suspend(suspend_list, 2, NULL)) {
if (EINTR != errno && EAGAIN != errno) {
perror("aio_suspend");
ex_code = EXIT_FAILURE;
break;
}
} else {
int err;
if (reading && EINPROGRESS != (err = aio_error(&read_cb))) {
if (err) {
fprintf(stderr, "read error: %s\n", strerror(err));
ex_code = EXIT_FAILURE;
break;
}
bufs_size[read_buf%NR_BUFFERS] = aio_return(&read_cb);
input_pos += bufs_size[read_buf%NR_BUFFERS];
if (0 == bufs_size[read_buf % NR_BUFFERS])
read_depleted = 1;
++read_buf;
reading = 0;
}
if (writing && EINPROGRESS != (err = aio_error(&write_cb))) {
if (err) {
fprintf(stderr, "write error: %s\n", strerror(err));
ex_code = EXIT_FAILURE;
break;
}
if (bufs_size[write_buf%NR_BUFFERS] != aio_return(&write_cb)) {
fprintf(stderr, "partial write, fuck, can't handle\n");
ex_code = EXIT_FAILURE;
break;
}
output_pos += bufs_size[write_buf%NR_BUFFERS];
++write_buf;
writing = 0;
}
fprintf(stderr,
"\xd%5.1f%% (%llu of %llu; r: %i, w: %i, r-w: %llu)",
100*((double)output_pos)/input_size,
output_pos, input_size, reading, writing,
(read_buf - write_buf)
);
}
}
fprintf(stderr, "\n");
for (i = 0; i < NR_BUFFERS; ++i)
if (bufs[i])
free(bufs[i]);
return ex_code;
}
You don't copy bytewise. Instead, you keep a memory buffer of some reasonable size (see the "bs" option in dd for example) and read and write in granularity of that buffer. I though that would be obvious
精彩评论