editing a binary file

Markiyan Kushnir mkushnir at lohika.com
Fri Dec 18 15:31:49 UTC 2009


If you don't mind, I would add fixed buffer processing to your program. 
For some really huge files (or any other type of stream) which have 
really huge records, reading entire records into memory would get the 
box down.

Markiyan

/* convert.c: remove record length fields from Fortran output file. */
/* Nick Barnes, Ravenbrook Limited, 2009-12-18 */

#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <assert.h>

#define BUFLEN 1024*1024
static char buf[BUFLEN];


int main(void)
{
         long size, size_in, size_out;
         ssize_t bytes;
         assert(sizeof(size) == 4);
         int buflen;

         while((bytes = read(0, (void*)&size, sizeof(size)))) {
                 if (bytes < 0) {
                         fprintf(stderr, "read() returned %d\n", bytes);
                         exit(1);
                 }
                 if (size <= 0) {
                         fprintf(stderr, "Read bad record "
                                 "length %ld\n", size);
                         exit(1);
                 }
                 size_in = size;
                 buflen = size_in > BUFLEN ? BUFLEN : size_in;
                 while (size_in > 0 &&
                        (bytes = read(0, buf, buflen)) > 0) {
                         write(1, buf, bytes);
                         size_in -= bytes;
                         if (size_in < BUFLEN) {
                                 /* the last chunk is less than BUFLEN */
                                 buflen = size_in;
                         }
                 }
                 if (bytes == 0) {
                         /* EOF */
                         if (size_in > 0) {
                               fprintf(stderr, "premature end of "
                                       "file: %ld bytes unread.\n",
                                       size_in);
                               exit(1);
                         }
                         break;
                 } else if (bytes <= 0) {
                         fprintf(stderr, "read() returned %d\n", bytes);
                         exit(1);
                 }
                 if (read(0, (void *)&size_out, sizeof(size_out)) > 0) {
                         if (size_out != (size)) {
                                 fprintf(stderr,
                                         "Mismatched record lengths: "
                                         "%ld, %ld\n", size, size_out);
                                 exit(1);
                         }

                 }

         }
         return 0;
}





Nick Barnes wrote:
> If your Fortran file has the same word size and enddian-ness as your
> C, this simple program convert.c will strip all the record length
> fields.  I just knocked it up now, no warranty, etc, but it works for
> me.  Use as a pipe:
> 
> $ ls
> convert.c       test.f
> $ gcc -Wall -Werror -ansi -pedantic convert.c -o convert
> $ gfortran44 test.f -o test
> $ ./test
> $ ls -l test-output
> -rw-r--r--  1 nb  nb  2460 Dec 18 11:17 test-output
> $ ./convert < test-output > test-converted
> $ ls -l test-converted
> -rw-r--r--  1 nb  nb  2420 Dec 18 11:18 test-converted
> $
> 
> The code does a fair amount of checking; if you get one of the error
> messages, let us know.  The most obvious unchecked problem is a short
> read, which will complain about mismatched lengths.
> 
> If your Fortran has different word sizes or enddian-ness (e.g. most of
> the Fortran output files I use on the Clear Climate Code project
> <http://clearclimatecode.org/> are generated on big-endian machines),
> you will need to add code to tweak the 'size' value after reading it,
> and when checking the record-end marker.
> 
> Nick B
> 
> /* convert.c: remove record length fields from Fortran output file. */
> /* Nick Barnes, Ravenbrook Limited, 2009-12-18 */
> 
> #include <stdlib.h>
> #include <stdio.h>
> #include <unistd.h>
> #include <assert.h>
> 
> int main(void)
> {
>         long size;
>         char *buf;
>         ssize_t bytes;
>         assert(sizeof(size) == 4);
>         while(bytes = read(0, (void*)&size, sizeof(size))) {
>                 if (bytes < 0) {
>                         fprintf(stderr, "read() returned %ld\n", bytes);
>                         exit(1);
>                 }
>                 if (size <= 0) {
>                         fprintf(stderr, "Read bad record length %ld\n", size);
>                         exit(1);
>                 }
>                 buf = (char*)malloc(size + sizeof(size));
>                 if (!buf) {
>                         fprintf(stderr, "Couldn't allocate buffer of %ld bytes\n",
>                                 size + sizeof(size));
>                         exit(1);
>                 }
>                 bytes = read(0, buf, size + sizeof(size));
>                 if (bytes <= 0) {
>                         fprintf(stderr, "read() returned %ld\n", bytes);
>                         exit(1);
>                 }
>                 if ((*(long*)(buf+size)) != size) {
>                         fprintf(stderr, "Mismatched record lengths: %ld, %ld\n",
>                                 size, *(long*)(buf+size));
>                         exit(1);
>                 }
>                 write(1, buf, size);
>                 free(buf);
>         }
>         return 0;
> }
> 
> 
> 
> _______________________________________________
> freebsd-questions at freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/freebsd-questions
> To unsubscribe, send any mail to "freebsd-questions-unsubscribe at freebsd.org"


More information about the freebsd-questions mailing list