zlib gzip与zip

 分类: 致知


先来看一下zlib,它的官方网站是www.zlib.net,目前的最新版本是1.2.11.官方介绍它是一个免费的数据压缩库(A Massively Spiffy Yet Delicately Unobtrusive Compression Library),zlib由两位主要人员开发,Jean-loup Gailly负责压缩功能,Mark Adler负责解压功能。由于代码是开源的,我们不妨把源码下载下来,编译一下,编译之后我们在zlib的安装目录看到了下列文件:

[root@iZrj9hu97fjb3e1xlfktg8Z zlib]# tree
├── include
│   ├── zconf.h
│   └── zlib.h
├── lib
│   ├── libz.a
│   ├── libz.so -> libz.so.1.2.11
│   ├── libz.so.1 -> libz.so.1.2.11
│   ├── libz.so.1.2.11
│   └── pkgconfig
│       └── zlib.pc
└── share
    └── man
        └── man3
            └── zlib.3

6 directories, 8 files


int ZEXPORT compress2 (dest, destLen, source, sourceLen, level)
    Bytef *dest;
    uLongf *destLen;
    const Bytef *source;
    uLong sourceLen;
    int level;
    z_stream stream;
    int err;
    const uInt max = (uInt)-1;
    uLong left;

    left = *destLen;
    *destLen = 0;

    stream.zalloc = (alloc_func)0;
    stream.zfree = (free_func)0;
    stream.opaque = (voidpf)0;

    err = deflateInit(&stream, level);
    if (err != Z_OK) return err;

    stream.next_out = dest;
    stream.avail_out = 0;
    stream.next_in = (z_const Bytef *)source;
    stream.avail_in = 0;

    do {
        if (stream.avail_out == 0) {
            stream.avail_out = left > (uLong)max ? max : (uInt)left;
            left -= stream.avail_out;
        if (stream.avail_in == 0) {
            stream.avail_in = sourceLen > (uLong)max ? max : (uInt)sourceLen;
            sourceLen -= stream.avail_in;
        err = deflate(&stream, sourceLen ? Z_NO_FLUSH : Z_FINISH);
    } while (err == Z_OK);

    *destLen = stream.total_out;
    return err == Z_STREAM_END ? Z_OK : err;

int ZEXPORT compress (dest, destLen, source, sourceLen)
    Bytef *dest;
    uLongf *destLen;
    const Bytef *source;
    uLong sourceLen;
    return compress2(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION);


#include <stdio.h>
#include <string.h>
#include <zlib.h>
#include <stdlib.h>
int my_write(char* fname,const char * buffer);
int main(int argc,char** argv){
    if(argc == 1){
        printf("Please Input A String You want to Compress\n");
        return -1;
    char * str = argv[1];
    printf("The String You Want to Compress is: %s\n",str);
    uLong sLen = strlen(str);
    uLong tLen;
    //通过zlib API计算tlen的长度,以便接下来给压缩之后的数据分配空间
    tLen = compressBound(sLen);
    char * cspace;
    cspace = malloc(tLen);
    if(cspace == NULL){
        printf("Not enough memory!\n");
        return -1;
    int result;
    result = compress(cspace,&tLen,str,sLen);
    if(result == Z_OK){
        printf("Compress Sucess!\n");    
        int j = my_write("compressdata.bin",cspace);
            printf("\t-Sucess to write into disk!\n");
            printf("\t-Failure to write into disk!\n");
    result = uncompress(str,&sLen,cspace,tLen);
    if(result == Z_OK){
        printf("Original String is: %s\n",str);
        printf("uncompress failure!\n");
    return 0;

int my_write(char* fname,const char * buffer)
    size_t writesize;
    FILE *pFile;
    pFile = fopen(fname,"wb");
    writesize = fwrite(buffer,strlen(buffer),1,pFile);
    if(strlen(buffer)>0 && 1 == writesize){
        return 0;
        return 1;


[root@iZrj9hu97fjb3e1xlfktg8Z ~]# gcc zcompress.c  -I /tmp/zlib/include/ -lz -L /tmp/zlib/lib/ -o z
[root@iZrj9hu97fjb3e1xlfktg8Z ~]# ./z "This is the string"
The String You Want to Compress is: This is the string
Compress Sucess!
	-Sucess to write into disk!
Original String is: This is the string


The deflation algorithm used by zip and gzip is a variation of LZ77
(Lempel-Ziv 1977, see reference below). It finds duplicated strings in
the input data. The second occurrence of a string is replaced by a
pointer to the previous string, in the form of a pair (distance,
length). Distances are limited to 32K bytes, and lengths are limited
to 258 bytes. When a string does not occur anywhere in the previous
32K bytes, it is emitted as a sequence of literal bytes. (In this
description, ‘string’ must be taken as an arbitrary sequence of bytes,
and is not restricted to printable characters.)

2. gzip file format

The pkzip format imposes a lot of overhead in various headers, which
are useful for an archiver but not necessary when only one file is
compressed. gzip uses a much simpler structure. Numbers are in little
endian format, and bit 0 is the least significant bit.
A gzip file is a sequence of compressed members. Each member has the
following structure:

2 bytes magic header 0x1f, 0x8b (\037 \213)
1 byte compression method (0..7 reserved, 8 = deflate)
1 byte flags
bit 0 set: file probably ascii text
bit 1 set: continuation of multi-part gzip file
bit 2 set: extra field present
bit 3 set: original file name present
bit 4 set: file comment present
bit 5 set: file is encrypted
bit 6,7: reserved
4 bytes file modification time in Unix format
1 byte extra flags (depend on compression method)
1 byte operating system on which compression took place

2 bytes optional part number (second part=1)
2 bytes optional extra field length
? bytes optional extra field
? bytes optional original file name, zero terminated
? bytes optional file comment, zero terminated
12 bytes optional encryption header
? bytes compressed data
4 bytes crc32
4 bytes uncompressed input size modulo 2^32


1,zlib提供一系列函数库,可以采用deflation算法对数据进行压缩,还提供ZLIB DATA FORMAT的跨平台的数据格式。


  1. Zlib
  2. http://www.zlib.net/zlib_faq.html
