C text vs binary files fread fwrite comparison guide
|

C Text vs Binary Files: When to Use Which (Explained Simply)

You opened a file with "r" and it worked perfectly. Then you tried reading an image file the same way and got garbage. What happened? The answer lies in the fundamental difference between text mode and binary mode in C — a distinction that trips up even experienced programmers.

In the previous lesson on C File I/O Basics, we used text-mode functions like fprintf() and fgets(). Now we’ll explore the binary side of C file I/O — and understand why the distinction matters.

Text Mode vs Binary Mode

When you open a file with fopen(), the mode string determines whether the file is treated as text or binary:

FILE *text_file   = fopen("data.txt", "r");    // text mode
FILE *binary_file = fopen("data.bin", "rb");   // binary mode

// The 'b' flag switches to binary mode
// "rb", "wb", "ab", "rb+", "wb+", "ab+"

In text mode, the C library may perform translations on the data as it’s read or written. The most important translation involves newline characters. In binary mode, no translations occur — every byte is read and written exactly as-is.

On Linux and macOS, there’s actually no difference between text and binary modes because these systems use \n (LF) for line endings natively. The distinction matters primarily on Windows, which uses \r\n (CR+LF). But writing portable code means you should always use the correct mode, regardless of your current platform.

The Newline Translation Problem

Here’s what happens on Windows in text mode:

// Writing in text mode on Windows:
fprintf(fp, "Hello\n");
// Actually writes: H e l l o \r \n  (7 bytes, not 6)

// Reading in text mode on Windows:
fgets(buf, sizeof(buf), fp);
// Reads \r\n from disk, converts to \n in memory

This translation is invisible when working with text — but it corrupts binary data. Imagine writing a struct that contains the byte value 0x0A (which is '\n'). In text mode on Windows, the library inserts an extra 0x0D ('\r') before it, corrupting your data structure.

// Binary data corruption example
int32_t value = 0x0A0B0C0D;

// Text mode write — WRONG for binary data
FILE *fp = fopen("data.bin", "w");  // missing 'b'!
fwrite(&value, sizeof(value), 1, fp);
// On Windows: bytes 0D 0A get expanded to 0D 0D 0A
// File is now 5 bytes instead of 4!

// Binary mode write — CORRECT
FILE *fp = fopen("data.bin", "wb");
fwrite(&value, sizeof(value), 1, fp);
// Exact 4 bytes written: 0D 0C 0B 0A

fread() and fwrite(): Binary I/O Workhorses

While fprintf() and fgets() are designed for human-readable text, fread() and fwrite() handle raw binary data:

size_t fwrite(const void *ptr, size_t size, size_t count, FILE *stream);
size_t fread(void *ptr, size_t size, size_t count, FILE *stream);

// Both return the number of items (not bytes) successfully read/written
// Write an array of integers to binary file
int scores[] = {95, 87, 73, 100, 62};
int count = 5;

FILE *fp = fopen("scores.bin", "wb");
if (fp) {
    fwrite(scores, sizeof(int), count, fp);
    // Writes 5 × 4 = 20 bytes (assuming 4-byte int)
    fclose(fp);
}

// Read them back
int loaded[5];
FILE *fp2 = fopen("scores.bin", "rb");
if (fp2) {
    size_t items_read = fread(loaded, sizeof(int), 5, fp2);
    printf("Read %zu items\n", items_read);  // 5
    for (int i = 0; i < 5; i++) {
        printf("Score %d: %d\n", i, loaded[i]);
    }
    fclose(fp2);
}

Binary I/O is dramatically faster than text I/O because there’s no parsing or formatting overhead. Writing 1 million integers with fwrite() is roughly 10-50x faster than writing them with fprintf(), because fwrite() just copies raw bytes while fprintf() must convert each integer to ASCII characters.

Writing Structs to Binary Files

One of the most powerful uses of binary I/O is serializing C Structs directly to disk:

#include <stdio.h>
#include <string.h>

typedef struct {
    char name[50];
    int age;
    float gpa;
} Student;

int save_students(const char *filename, Student *students, int count) {
    FILE *fp = fopen(filename, "wb");
    if (!fp) return -1;
    
    // Write the count first so we know how many to read back
    fwrite(&count, sizeof(int), 1, fp);
    
    // Write all students in one call
    size_t written = fwrite(students, sizeof(Student), count, fp);
    
    fclose(fp);
    return (written == (size_t)count) ? 0 : -1;
}

Reading Structs from Binary Files

int load_students(const char *filename, Student *students, int max_count) {
    FILE *fp = fopen(filename, "rb");
    if (!fp) return -1;
    
    int count;
    fread(&count, sizeof(int), 1, fp);
    
    if (count > max_count) count = max_count;
    
    size_t items_read = fread(students, sizeof(Student), count, fp);
    
    fclose(fp);
    return (int)items_read;
}

This pattern — writing a count header followed by an array of structs — is the foundation of simple binary file formats. It’s how many games save state, how databases store records, and how countless applications persist structured data.

Complete Example: Mini Student Database

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define DB_FILE "students.db"
#define MAX_STUDENTS 100

typedef struct {
    char name[50];
    int age;
    float gpa;
    int active;  // 1 = active, 0 = deleted
} Student;

typedef struct {
    int count;
    Student records[MAX_STUDENTS];
} Database;

void db_save(const Database *db) {
    FILE *fp = fopen(DB_FILE, "wb");
    if (!fp) { perror("db_save"); return; }
    fwrite(&db->count, sizeof(int), 1, fp);
    fwrite(db->records, sizeof(Student), db->count, fp);
    fclose(fp);
}

int db_load(Database *db) {
    FILE *fp = fopen(DB_FILE, "rb");
    if (!fp) {
        db->count = 0;
        return 0;  // empty database is OK
    }
    fread(&db->count, sizeof(int), 1, fp);
    if (db->count > MAX_STUDENTS) db->count = MAX_STUDENTS;
    fread(db->records, sizeof(Student), db->count, fp);
    fclose(fp);
    return db->count;
}

void db_add(Database *db, const char *name, int age, float gpa) {
    if (db->count >= MAX_STUDENTS) {
        fprintf(stderr, "Database full!\n");
        return;
    }
    Student *s = &db->records[db->count];
    strncpy(s->name, name, 49);
    s->name[49] = '\0';
    s->age = age;
    s->gpa = gpa;
    s->active = 1;
    db->count++;
    db_save(db);
    printf("Added: %s\n", name);
}

void db_list(const Database *db) {
    printf("\n%-5s %-20s %-5s %-5s\n", "ID", "Name", "Age", "GPA");
    printf("%-5s %-20s %-5s %-5s\n", "---", "----", "---", "---");
    for (int i = 0; i < db->count; i++) {
        if (db->records[i].active) {
            printf("%-5d %-20s %-5d %-5.2f\n",
                   i, db->records[i].name,
                   db->records[i].age, db->records[i].gpa);
        }
    }
    printf("\n");
}

int main(void) {
    Database db;
    db_load(&db);
    printf("Loaded %d records.\n", db.count);
    
    // Add some test data
    if (db.count == 0) {
        db_add(&db, "Alice Johnson", 20, 3.85);
        db_add(&db, "Bob Smith", 22, 3.42);
        db_add(&db, "Carol Davis", 19, 3.95);
    }
    
    db_list(&db);
    return EXIT_SUCCESS;
}

When to Use Text vs Binary

Use text mode when:

  • Data needs to be human-readable (config files, logs, CSV)
  • You need to edit the file with a text editor
  • Interoperability matters more than performance
  • Data is naturally string-based

Use binary mode when:

  • Storing structs or numeric arrays directly
  • Performance matters (binary is 10-50x faster for bulk data)
  • File size matters (binary is more compact)
  • Data isn’t meant to be human-readable (images, audio, game saves)
  • You need exact byte-level control

Portability Concerns

Binary files written on one machine may not be readable on another due to:

// 1. Endianness — byte order differs between architectures
int x = 0x01020304;
// Little-endian (x86): stored as 04 03 02 01
// Big-endian (some ARM): stored as 01 02 03 04

// 2. Struct padding — compilers add padding bytes
typedef struct {
    char c;    // 1 byte
    // 3 bytes padding (on most systems)
    int x;     // 4 bytes
} Padded;
// sizeof(Padded) is 8, not 5

// 3. Type sizes — int might be 2 or 4 bytes
// Use fixed-width types from <stdint.h> for portable binary formats:
#include <stdint.h>
int32_t portable_int;    // always 4 bytes
int64_t portable_long;   // always 8 bytes

For truly portable binary formats, you’d need to handle endianness conversion and use fixed-width types. For files that stay on one machine (like game saves or caches), this isn’t a concern.

Common Mistakes

1. Writing Pointers to Files

typedef struct {
    char *name;  // this is a POINTER
    int age;
} Person;

Person p = {"Alice", 30};
fwrite(&p, sizeof(Person), 1, fp);
// WRONG! You wrote the pointer ADDRESS, not the string!
// When you read it back, the pointer is meaningless

2. Forgetting Binary Mode

// WRONG — may corrupt data on Windows
FILE *fp = fopen("image.png", "r");

// CORRECT
FILE *fp = fopen("image.png", "rb");

3. Not Checking fread() Return Value

// WRONG
fread(&data, sizeof(data), 1, fp);
// What if the file was truncated?

// CORRECT
if (fread(&data, sizeof(data), 1, fp) != 1) {
    fprintf(stderr, "Failed to read data\n");
}

Summary

Text mode performs newline translation and works with human-readable data through fprintf()/fgets(). Binary mode preserves exact bytes and uses fread()/fwrite() for raw data. Use text for config files and logs; use binary for structs, arrays, and performance-critical storage. Always add 'b' to your mode string when working with non-text data. In the next lesson, we’ll learn about file positioning with fseek() and ftell() — essential for random access in binary files.

Similar Posts

Leave a Reply

Your email address will not be published. Required fields are marked *