Python之zlib模塊的使用


zlib模塊作用:
  壓縮數據存放在硬盤或內存等設備

1、內存中數據的壓縮與解壓

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import zlib
import binascii

original_data = b'This is the original text.'
print('源始數據:長度 : {},內容 : {}'.format(len(original_data), original_data))

#壓縮數據
compressed_data = zlib.compress(original_data)
print('壓縮的數據:長度 : {},內容 : {}'.format(len(compressed_data), binascii.hexlify(compressed_data))) #binascii.hexlify主要作用是將字節類轉為16進制顯示

#解壓數據
decompress_data = zlib.decompress(compressed_data)
print('壓縮的數據:長度 : {},內容 : {}'.format(len(decompress_data), decompress_data))
zlib_memory.py

運行效果

[root@ mnt]# python3 zlib_memory.py 
源始數據:長度 : 26,內容 : b'This is the original text.'
壓縮的數據:長度 : 32,內容 : b'789c0bc9c82c5600a2928c5485fca2ccf4ccbcc41c8592d48a123d007f2f097e' #小文件壓縮未必減少文件或內存的大小
壓縮的數據:長度 : 26,內容 : b'This is the original text.'

 2、計算出大小達到多少時進行壓縮才有用的示例

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import zlib
import binascii

original_data = b'This is the original text.'

template = '{:>15}  {:>15}'
print(template.format('原始長度', '壓縮長度'))
print(template.format('-' * 25, '-' * 25))

for i in range(5):
    data = original_data * i #數據倍增
    compressed = zlib.compress(data) #壓縮數據
    highlight = '*' if len(data) < len(compressed) else '' #三目運算法,如果原始數據長度小於壓縮的長度就顯示*
    print(template.format(len(data), len(compressed)), highlight)
zlib_lengths.py

 運行效果

[root@ mnt]# python3 zlib_lengths.py 
           原始長度             壓縮長度
-------------------------  -------------------------
              0                8 *
             26               32 * #從這里開始,壓縮變得有優勢
             52               35 
             78               35 
            104               36 

  3、設置壓縮級別來進行壓縮數據的示例

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import zlib
import binascii

original_data = b'This is the original text.' * 1024

template = '{:>15}  {:>15}'
print(template.format('壓縮級別', '壓縮大小'))
print(template.format('-' * 25, '-' * 25))

for i in range(0, 10):
    data = zlib.compress(original_data, i)  # 設置壓縮級別進行壓縮
    print(template.format(i, len(data)))
zlib_compresslevel.py

 運行效果

[root@python-mysql mnt]# python3 zlib_compresslevel.py 
           壓縮級別             壓縮大小
-------------------------  -------------------------
              0            26635
              1              215
              2              215
              3              215
              4              118
              5              118 <==推薦
              6              118 <==推薦
              7              118
              8              118
              9              118

   4、zlib增量壓縮與解壓

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import zlib
import binascii

compressor = zlib.compressobj(1)

with open('content.txt', 'rb') as input:
    while True:
        block = input.read(64)  # 每次讀取64個字節
        if not block:
            break
        compressed = compressor.compress(block)
        if compressed:
            print('壓縮數據: {}'.format(
                binascii.hexlify(compressed)))
        else:
            print('數據緩存中...')
    remaining = compressor.flush()  # 刷新返回壓縮的數據
    print('Flushed: {}'.format(binascii.hexlify(remaining)))

#一次性解壓數據,需要注意的是增量壓縮,默認會把zlib壓縮的頭部信息去除,所以解壓時需要帶上789c
zlib_head = binascii.unhexlify('789c')
decompress_data = zlib.decompress(zlib_head + remaining)
print(decompress_data)
zlib_incremental.py

 運行效果

[root@ mnt]# python3 zlib_incremental.py 
壓縮數據: b'7801'
數據緩存中...
數據緩存中...
數據緩存中...
數據緩存中...
數據緩存中...
Flushed: b'55904b6ac4400c44f73e451da0f129b20c2110c85e696b8c40ddedd167ce1f7915025a087daa9ef4be8c07e4f21c38962e834b800647435fd3b90747b2810eb9c4bbcc13ac123bded6e4bef1c91ee40d3c6580e3ff52aad2e8cb2eb6062dad74a89ca904cbb0f2545e0db4b1f2e01955b8c511cb2ac08967d228af1447c8ec72e40c4c714116e60cdef171bb6c0feaa255dff1c507c2c4439ec9605b7e0ba9fc54bae39355cb89fd6ebe5841d673c7b7bc68a46f575a312eebd220d4b32441bdc1b36ebf0aedef3d57ea4b26dd986dd39af57dfb05d32279de'

#解壓的數據
b'Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Donec\negestas, enim et consectetuer ullamcorper, lectus ligula rutrum leo, a\nelementum elit tortor eu quam. Duis tincidunt nisi ut ante. Nulla\nfacilisi. Sed tristique eros eu libero. Pellentesque vel arcu. Vivamus\npurus orci, iaculis ac, suscipit sit amet, pulvinar eu,\nlacus.\n'

 5、壓縮與未壓縮數據混合在一起的解壓示例

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import zlib

lorem = open('zlib_mixed.py', 'rb').read()
compressed = zlib.compress(lorem)

# 壓縮數據和沒有壓縮拼接在一起
combined = compressed + lorem

# 創建一個壓縮對象
decompressor = zlib.decompressobj()
decompressed = decompressor.decompress(combined)  # 這里只解壓壓縮的數據

decompressed_matches = decompressed == lorem
print('解壓數據的匹配:', decompressed_matches)

unused_matches = decompressor.unused_data == lorem
print('使用不解壓數據的匹配 :', unused_matches)
zlib_mixed.py

 運行效果

[root@ mnt]# python3 zlib_mixed.py 
解壓數據的匹配: True
使用不解壓數據的匹配 : True

 6、校驗數據的完整性CRC32和adler32算法

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import zlib

data = open('test.py', 'rb').read()

cksum = zlib.adler32(data)
print('Adler32: {:12d}'.format(cksum))
print('       : {:12d}'.format(zlib.adler32(data, cksum)))

cksum = zlib.crc32(data)
print('CRC-32 : {:12d}'.format(cksum))
print('       : {:12d}'.format(zlib.crc32(data, cksum)))
zlib_checksums.py

 運行效果

[root@ mnt]# python3 zlib_checksums.py 
Adler32:   4272063592
       :    539822302
CRC-32 :   2072120480
       :   1894987964

  7、zlib網絡傳輸壓縮與解壓數據的示例(示例最終會讀取文件跟服務端傳過來文件比較是否相等)

Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Donec
egestas, enim et consectetuer ullamcorper, lectus ligula rutrum leo, a
elementum elit tortor eu quam. Duis tincidunt nisi ut ante. Nulla
facilisi. Sed tristique eros eu libero. Pellentesque vel arcu. Vivamus
purus orci, iaculis ac, suscipit sit amet, pulvinar eu,
lacus.
content.txt
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import socket
import logging
from io import BytesIO
import binascii

# 每次讀取的塊大小
import zlib

BLOCK_SIZE = 64

if __name__ == '__main__':
    logging.basicConfig(
        level=logging.DEBUG,
        format='%(name)s : %(message)s'
    )

    logger = logging.getLogger('Client')

    ip_port = ('127.0.0.1', 8000)
    logging.info('開始連接服務器:{}'.format(ip_port[0] + ':' + str(ip_port[1])))
    # 創建socket對象
    sk = socket.socket(family=socket.AF_INET, type=socket.SOCK_STREAM)

    # 連接服務器
    sk.connect(ip_port)

    # 服務端需要讀取的文件名
    request_file = 'content.txt'
    logging.debug('發送文件名:{}'.format(request_file))
    sk.send(request_file.encode('utf-8'))

    # 接收服務端數據
    buffer = BytesIO()

    # 創建一個解壓對象
    decompressor = zlib.decompressobj()

    while True:
        response = sk.recv(BLOCK_SIZE)
        if not response:
            break
        logger.debug('從服務端讀取數據:{}'.format(binascii.hexlify(response)))
        to_decompress = decompressor.unconsumed_tail + response
        while to_decompress:
            decompressed = decompressor.decompress(to_decompress)
            if decompressed:
                logger.debug('解壓數據:{}'.format(decompressed))
                buffer.write(decompressed)
                to_decompress = decompressor.unconsumed_tail
            else:
                logger.debug('緩存中...')
                to_decompress = None

    remainder = decompressor.flush()
    if remainder:
        logger.debug('刷新數據 {}'.format(remainder))
        buffer.write(remainder)

    # 獲取所有的解壓數據
    full_reponse = buffer.getvalue()
    read_file = open(request_file, 'rb').read()
    logger.debug('服務器傳過來的文件與客戶端讀取的文件是否相等 : {}'.format(full_reponse == read_file))
    sk.close()
zlib_client.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import zlib
import socketserver
import logging
import binascii

# 每次讀取的塊大小
BLOCK_SIZE = 64

class ZlibRquestHandler(socketserver.BaseRequestHandler):
    logger = logging.getLogger('Server')

    def handle(self):
        # 創建一個壓縮的對象
        compressor = zlib.compressobj(1)
        # 接收客戶端傳來的文件名
        filename = self.request.recv(1024)

        self.logger.debug('接收客戶端數據,文件名 {}'.format(filename))

        with open(filename, 'rb') as rf:
            while True:
                block = rf.read(BLOCK_SIZE)
                if not block:
                    break
                self.logger.debug('讀取文件內容:{}'.format(block))

                # 壓縮數據
                compressed = compressor.compress(block)
                if compressed:
                    self.logger.debug('發送的十六進制:{}'.format(binascii.hexlify(compressed)))
                    self.request.send(compressed)
                else:
                    self.logger.debug('緩存中...')

            # 獲取壓縮緩存剩下的數據
            remaining = compressor.flush()

            while remaining:  # 循環結束條件,就是刷新壓縮緩存的數據,直到空為止
                to_send = remaining[:BLOCK_SIZE]
                remaining = remaining[BLOCK_SIZE:]
                self.logger.debug('刷新緩存數據:{}'.format(binascii.hexlify(to_send)))
                self.request.send(to_send)
            return

if __name__ == '__main__':
    logging.basicConfig(
        level=logging.DEBUG,
        format='%(name)s : %(message)s'
    )
    ip_port = ('127.0.0.1', 8000)
    socketserver.TCPServer.allow_reuse_address = True
    server = socketserver.TCPServer(ip_port, ZlibRquestHandler)
    server.serve_forever()
zlib_server.py

 運行效果

[root@ mnt]# python3 zlib_server.py 
Server : 接收客戶端數據,文件名 b'content.txt'
Server : 讀取文件內容:b'Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Donec\n'
Server : 發送的十六進制:b'7801'
Server : 讀取文件內容:b'egestas, enim et consectetuer ullamcorper, lectus ligula rutrum '
Server : 緩存中...
Server : 讀取文件內容:b'leo, a\nelementum elit tortor eu quam. Duis tincidunt nisi ut ant'
Server : 緩存中...
Server : 讀取文件內容:b'e. Nulla\nfacilisi. Sed tristique eros eu libero. Pellentesque ve'
Server : 緩存中...
Server : 讀取文件內容:b'l arcu. Vivamus\npurus orci, iaculis ac, suscipit sit amet, pulvi'
Server : 緩存中...
Server : 讀取文件內容:b'nar eu,\nlacus.\n\n'
Server : 緩存中...
Server : 刷新緩存數據:b'55904b4a05410c45e7b58abb80a257e15044109cc7eaf808a4aada7cdefa4d8f44c820e473ef495eb7f1845c9e13e7d66d7009d0e4e8187b398fe04836d02997'
Server : 刷新緩存數據:b'f890f500abc48197bd78347eb00779072f99e0f8bf94aa34c7b68bad434b2b1d2a8f54826558792aef0e6aac3c7945156e71c4b60a70e2276996578a23640d39'
Server : 刷新緩存數據:b'730596b8200b73051f78bb5dda370dd1aa1ff8e01361e2213fc960db7e0ba97c557ae09d55cb89fd6e3e594136f2c0a73c69a6b72bad18b70de9101a5992a0d1'
Server : 刷新緩存數據:b'e159b75f85f6f79e2bf5298b6eccdeb466fd68ed174d1979e8'
[root@p mnt]# python zlib_client.py 
root : 開始連接服務器:127.0.0.1:8000
root : 發送文件名:content.txt
Client : 從服務端讀取數據:780155904b4a05410c45e7b58abb80a257e15044109cc7eaf808a4aada7cdefa4d8f44c820e473ef495eb7f1845c9e13e7d66d7009d0e4e8187b398fe04836d0
Client : 解壓數據:Lorem ipsum dolor sit amet, consectetuer a
Client : 從服務端讀取數據:2997f890f500abc48197bd78347eb00779072f99e0f8bf94aa34c7b68bad434b2b1d2a8f54826558792aef0e6aac3c7945156e71c4b60a70e2276996578a2364
Client : 解壓數據:dipiscing elit. Donec
egestas, enim et consectetuer ullamcorper, lectus ligula rutrum leo, a
elementum elit tortor eu quam. Duis ti
Client : 從服務端讀取數據:0d39730596b8200b73051f78bb5dda370dd1aa1ff8e01361e2213fc960db7e0ba97c557ae09d55cb89fd6e3e594136f2c0a73c69a6b72bad18b70de9101a5992
Client : 解壓數據:ncidunt nisi ut ante. Nulla
facilisi. Sed tristique eros eu libero. Pellentesque vel arcu. Vivamus
purus orci, iaculis
Client : 從服務端讀取數據:a0d1e159b75f85f6f79e2bf5298b6eccdeb466fd68ed174d1979e8
Client : 解壓數據: ac, suscipit sit amet, pulvinar eu,
lacus.


Client : 服務器傳過來的文件與客戶端讀取的文件是否相等 : True


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM