Python之bz2模塊的使用


bz2模塊的作用:
  用於壓縮數據以便存儲或傳輸。
bz2有三種壓縮/解壓方式:
  1、一次性壓縮/解壓數據。
  2、迭代式壓縮/解壓壓數據。
  3、類似文件的類方式的壓縮/解壓數據。

注意:
  如果安裝python的時候,沒有安裝bzip2-devel,導入bz2模塊時,會報如下錯誤:
ModuleNotFoundError: No module named  bz2
解決方法:
  yum install bzip2-devel -y
重新編譯安裝python的源碼即可

1、bz2內存壓縮與解壓示例

#!/usr/bin/env python3
# encoding: utf-8

import bz2
import binascii

# 數據源
original_data = b'This is the original text.'
print('數據源長度: {} bytes'.format(len(original_data)))
print('數據源: ', original_data)

compressed = bz2.compress(original_data)
print('壓縮后的數據源長度 : {} bytes'.format(len(compressed)))
hex_version = binascii.hexlify(compressed)
print(hex_version)

#這里主要學習是如下,數據的折分方法
for i in range(len(hex_version) // 40 + 1):
    print(hex_version[i * 40:(i + 1) * 40])
bz2_memory.py

運行效果

[root@ mnt]# python3 bz2_memory.py 
數據源長度: 26 bytes
數據源:  b'This is the original text.'
壓縮后的數據源長度 : 62 bytes
b'425a683931415926535916be35a600000293804001040022e59c402000314c000111e93d434da223028cf9e73148cae0a0d6ed7f17724538509016be35a6'
b'425a683931415926535916be35a6000002938040'
b'01040022e59c402000314c000111e93d434da223'
b'028cf9e73148cae0a0d6ed7f17724538509016be'
b'35a6'

 2、根據數據源的長度,判斷何時壓縮數據時是最佳的示例

#!/usr/bin/env python3
# encoding: utf-8

import bz2

original_data = b'This is the original text.'

fmt = '{:>15} {:>15}'
print(fmt.format('len(data)', 'len(compressed)'))
print(fmt.format('-' * 15, '-' * 15))
for i in range(5):
    data = original_data * i
    compressed = bz2.compress(data)
    print(fmt.format(len(data), len(compressed)), end='')#加end表示,結尾不使用回車符
    print('*' if len(data) < len(compressed) else '') #使用三目運算
bz2_lengths.py

運行效果

[root@ mnt]# python3 bz2_lengths.py 
      len(data) len(compressed)
--------------- ---------------
              0              14*
             26              62*
             52              68*
             78              70 #長度78時,可以使用壓縮最好
            104              72

 3、bz2增量壓縮文件

#!/usr/bin/env python3
# encoding: utf-8

import bz2
import binascii
import io

compressor = bz2.BZ2Compressor()

# 打開文件,類型為字節,讀模式
with open('content.txt', 'rb') as input:
    while True:
        block = input.read(64)  # 每次讀取64字節
        if not block:
            break
        compressed = compressor.compress(block)  # 壓縮文件
        if compressed:
            print('壓縮: {}'.format(binascii.hexlify(compressed)))
        else:
            print('緩存中...')
    remaining = compressor.flush()
    print('刷新所有的壓縮數據:{}'.format(binascii.hexlify(remaining)))
bz2_incremental.py

運行效果

[root@ mnt]# python3 bz2_incremental.py 
緩存中...
緩存中...
緩存中...
緩存中...
緩存中...
緩存中...
刷新所有的壓縮數據:b'425a683931415926535920530808000022578000104005040549003fa7ff003000f2dac453c29b53d4cd26869a68d0353d0129b469a8343d268254d94694c9ea610d0029d6c1f2e28899e69c327b1c2fa5990ca4fe692cba43b3a70df3b57312f0b764b5666b818341b83f5d55d107479368d3bef097122f14870428b1066608de6e2c177818cc6218c66f6250302cb59ef4a5460e6233b64f88973aebc9ed99567de8afa2cea952ba544478546d7028a318aba8776a5b7ce062da72cd235a3ec5316bec50143550812f26590a528dc458b8b768b56cc4464cc12f08bdc99548fd4c14e93e6792cb94b2269053d8abf8bb9229c2848102984040'

 4、bz2.BZ2Decompressor()的unused_data屬性的使用示例

#!/usr/bin/env python3
# encoding: utf-8

import bz2

content = open('content.txt', 'rt').read().encode('utf-8')
compressed = bz2.compress(content)  # 壓縮的數據
combined = compressed + content  # 壓縮的數據+未壓縮的數據

# 創建一個解壓的對象
decompressor = bz2.BZ2Decompressor()

# 解壓混合的數據
decompressed = decompressor.decompress(combined)

# 判斷解壓數據與未解壓數據是否相等
decompressed_matches = decompressed == content
print('解壓數據與未解壓數據是否相等', decompressed_matches)

# 判斷解壓對象的unused_data(即包含所有未使用的數據)與未解壓數據是否相等
unused_matche = decompressor.unused_data == content
print('解壓對象的unused_data(即包含所有未使用的數據)與未解壓數據是否相等', unused_matche)
bz2_mixed.py

運行效果

[root@ mnt]# python3 bz2_mixed.py 
解壓數據與未解壓數據是否相等 True
解壓對象的unused_data(即包含所有未使用的數據)與未解壓數據是否相等 True

 5、內容壓縮成bz2格式文件的示例

#!/usr/bin/env python3
# encoding: utf-8

import bz2
import io
import os

data = "Content of the example  file go here.\n"

with bz2.BZ2File('example.bz2','wb') as output:
    with io.TextIOWrapper(output,encoding='utf-8') as enc:
        enc.write(data)
os.system('file example.bz2')
bz2_file_write.py

運行效果

[root@ mnt]# python3 bz2_file_write.py 
example.bz2: bzip2 compressed data, block size = 900k

[root@ mnt]# ll
total 12
-rw-r--r-- 1 root root 243 Jan  5 23:21 bz2_file_write.py
-rw-r--r-- 1 root root  74 Jan  5 23:21 example.bz2

 6、bz2設置壓縮等級進行壓縮

#!/usr/bin/env python3
# encoding: utf-8

import bz2
import io
import os

data = open('content.txt', 'r', encoding='utf-8').read() * 1024
print('輸入 {} 字節'.format(len(data.encode('utf-8'))))

for i in range(1, 10):
    filename = 'compress-level-{}.bz2'.format(i)
    with bz2.BZ2File(filename, 'wb', compresslevel=i) as output:
        with io.TextIOWrapper(output, encoding='utf-8') as enc:
            enc.write(data)
    os.system('cksum {}'.format(filename))
bz2_file_compresslevel.py

運行效果

[root@ mnt]# python3 bz2_file_compresslevel.py 
輸入 344064 字節
3563292108 2598 compress-level-1.bz2
2570483258 1426 compress-level-2.bz2
3725315122 1275 compress-level-3.bz2
766048418 617 compress-level-4.bz2
1786876353 617 compress-level-5.bz2
2751081060 617 compress-level-6.bz2
3839050503 617 compress-level-7.bz2
84226287 617 compress-level-8.bz2
1110202764 617 compress-level-9.bz2
[root@  mnt]# ll
total 48
-rw-r--r-- 1 root root  471 Jan  5 23:37 bz2_file_compresslevel.py
-rw-r--r-- 1 root root 2598 Jan  5 23:37 compress-level-1.bz2
-rw-r--r-- 1 root root 1426 Jan  5 23:37 compress-level-2.bz2
-rw-r--r-- 1 root root 1275 Jan  5 23:37 compress-level-3.bz2
-rw-r--r-- 1 root root  617 Jan  5 23:37 compress-level-4.bz2 #這個開始最佳
-rw-r--r-- 1 root root  617 Jan  5 23:37 compress-level-5.bz2
-rw-r--r-- 1 root root  617 Jan  5 23:37 compress-level-6.bz2
-rw-r--r-- 1 root root  617 Jan  5 23:37 compress-level-7.bz2
-rw-r--r-- 1 root root  617 Jan  5 23:37 compress-level-8.bz2
-rw-r--r-- 1 root root  617 Jan  5 23:37 compress-level-9.bz2

 7、bz2多行寫入壓縮數據

#!/usr/bin/env python3
# encoding: utf-8

import bz2
import io
import os
import itertools

data = 'The same line,over and over.\n'
with bz2.BZ2File('lines.bz2', 'wb') as output:
    with io.TextIOWrapper(output, encoding='utf-8') as enc:
        enc.writelines(itertools.repeat(data, 10))

os.system('bzcat lines.bz2')
bz2_writelines.py

運行效果

[root@ mnt]# python3 bz2_writelines.py 
The same line,over and over.
The same line,over and over.
The same line,over and over.
The same line,over and over.
The same line,over and over.
The same line,over and over.
The same line,over and over.
The same line,over and over.
The same line,over and over.
The same line,over and over.

[root@ mnt]# ll
-rw-r--r-- 1 root root 319 Jan  5 23:42 bz2_writelines.py
-rw-r--r-- 1 root root  88 Jan  5 23:43 lines.bz2

8、讀取壓縮文件 

#!/usr/bin/env python3
# encoding: utf-8

import bz2
import io

with bz2.BZ2File('example.bz2', 'rb') as input:
    with io.TextIOWrapper(input, encoding='utf-8') as enc:
        print(enc.read())
bz2_read.py

運行效果

[root@ mnt]# python3 bz2_read.py 
Content of the example  file go here.

 9、bz2 seek()移位取值的示例

#!/usr/bin/env python3
# encoding: utf-8

import bz2

with bz2.BZ2File('example.bz2', 'rb') as input:
    print('讀取壓縮文件內容')
    all_data = input.read()
    print('內容:', all_data)

    # 切片取值
    expected = all_data[5:15]

    # 指針指向0
    input.seek(0)

    # 指針移動5位
    input.seek(5)
    print('開始相對下標獲取5到10個字節')
    partial = input.read(10)
    print('移動指針后獲取的值', partial)
    print('一次性取值切片獲取的值', expected)
bz2_file_seek.py

運行效果

[root@ mnt]# python3 bz2_file_seek.py 
讀取壓縮文件內容
內容: b'Content of the example  file go here.\n'
開始相對下標獲取5到10個字節
移動指針后獲取的值 b'nt of the '
一次性取值切片獲取的值 b'nt of the '

 10、bz2 seek() Unicode編碼的注意示例

#!/usr/bin/env python3
# encoding: utf-8

import bz2

data = 'Character with an åccent.'

with bz2.open('example.bz2', 'wt', encoding='utf-8') as output:
    output.write(data)

with bz2.open('example.bz2', 'rt', encoding='utf-8') as input:
    print('壓縮文件所有的內容:{}'.format(input.read()))

with bz2.open('example.bz2', 'rt', encoding='utf-8') as input:
    input.seek(18)
    print('讀取第一個字符{}'.format(input.read(1)))

with bz2.open('example.bz2', 'rt', encoding='utf-8') as input:
    input.seek(19)
    try:
        print('讀取第一個字符{}', input.read(1))
    except UnicodeDecodeError:
        print('錯誤:解碼失敗,因為文件指針是針對字節移動,而不是按字符移動,unicode編碼占2個字節')
bz2_unicode.py

運行效果

[root@ mnt]# python3 bz2_unicode.py 
壓縮文件所有的內容:Character with an åccent.
讀取第一個字符å
錯誤:解碼失敗,因為文件指針是針對字節移動,而不是按字符移動,unicode編碼占2個字節

11、bz2網絡傳輸壓縮與解壓數據的示例(示例最終會讀取文件跟服務端傳過來文件比較是否相等)

Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Donec
egestas, enim et consectetuer ullamcorper, lectus ligula rutrum leo, a
elementum elit tortor eu quam. Duis tincidunt nisi ut ante. Nulla
facilisi. Sed tristique eros eu libero. Pellentesque vel arcu. Vivamus
purus orci, iaculis ac, suscipit sit amet, pulvinar eu,
lacus.
content.txt
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import bz2
import socketserver
import logging
import binascii

BLOCK_SIZE = 32

class Bz2RequestHandler(socketserver.BaseRequestHandler):
    logger = logging.getLogger('Server')

    def handle(self):
        # 創建一個壓縮對象
        compressor = bz2.BZ2Compressor()
        file_name = self.request.recv(1024).decode('utf-8')
        self.logger.debug('接收到客戶端文件名:%s', file_name)
        with open(file_name, 'rb') as input:
            while True:
                block = input.read(BLOCK_SIZE)
                if not block:
                    break
                self.logger.debug('讀取到數據:%r', block)
                # 壓縮數據
                compressed = compressor.compress(block)
                if compressed:
                    self.logger.debug('發送中 %r', binascii.hexlify(compressed))
                    self.request.send(compressed)
                else:
                    self.logger.debug('緩存中...')
        # 獲取緩存所有的壓縮數據
        remaining = compressor.flush()
        while remaining:
            to_send = remaining[:BLOCK_SIZE]
            remaining = remaining[BLOCK_SIZE:]
            self.logger.debug('FLUSHING %r', binascii.hexlify(to_send))
            self.request.send(to_send)
        return


if __name__ == '__main__':
    address = ('localhost', 8888)
    server = socketserver.TCPServer(address, Bz2RequestHandler)
    ip, port = server.server_address
    server.serve_forever()
bz2_tcp_server.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import socket
import sys
from io import StringIO
import logging
import bz2
import binascii

if __name__ == '__main__':
    BLOCK_SIZE = 64

    logging.basicConfig(level=logging.DEBUG, format='%(name)s : %(message)s')
    server_address = ('localhost', 8888)
    logger = logging.getLogger('Client')
    logging.debug('連接服務器: %s:%s', server_address[0], server_address[1])
    sk = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    sk.connect(server_address)

    # 輸入文件名的參數
    requested_file = (sys.argv[0] if len(sys.argv) > 1 else "content.txt")

    sk.send(requested_file.encode('utf-8'))

    buffer = StringIO()
    decompressor = bz2.BZ2Decompressor()  # 創建解壓對像

    while True:
        response = sk.recv(BLOCK_SIZE)
        if not response:
            break
        logging.debug('讀取到 %r', binascii.hexlify(response))

        decompressed = decompressor.decompress(response)
        print(decompressed)
        if decompressed:
            logging.debug('解壓數據 %r', decompressed)
            buffer.write(decompressed.decode('utf-8'))
        else:
            logging.debug('緩存中...')

    full_response = buffer.getvalue()
    content = open(requested_file, 'rt').read()
    logging.debug('讀取文件內容與服務器響應數據的匹配: %s', full_response == content)

    sk.close()
bz2_tcp_client.py

運行效果

server端

[root@ mnt]# python3 bz2_tcp_server.py 
content.txt 

client端

[root@ mnt]# python3 bz2_tcp_client.py 
root : 連接服務器: localhost:8888
root : 讀取到 b'425a683931415926535920530808000022578000104005040549003fa7ff003000f2dac453c29b53d4cd26869a68d0353d0129b469a8343d268254d94694c9ea'
b''
root : 緩存中...
root : 讀取到 b'610d0029d6c1f2e28899e69c327b1c2fa5990ca4fe692cba43b3a70df3b57312f0b764b5666b818341b83f5d55d107479368d3bef097122f14870428b1066608'
b''
root : 緩存中...
root : 讀取到 b'de6e2c177818cc6218c66f6250302cb59ef4a5460e6233b64f88973aebc9ed99567de8afa2cea952ba544478546d7028a318aba8776a5b7ce062da72cd235a3e'
b''
root : 緩存中...
root : 讀取到 b'c5316bec50143550812f26590a528dc458b8b768b56cc4464cc12f08bdc99548fd4c14e93e6792cb94b2269053d8abf8bb9229c2848102984040'
b'Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Donec\negestas, enim et consectetuer ullamcorper, lectus ligula rutrum leo, a\nelementum elit tortor eu quam. Duis tincidunt nisi ut ante. Nulla\nfacilisi. Sed tristique eros eu libero. Pellentesque vel arcu. Vivamus\npurus orci, iaculis ac, suscipit sit amet, pulvinar eu,\nlacus.\n\n'
root : 解壓數據 b'Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Donec\negestas, enim et consectetuer ullamcorper, lectus ligula rutrum leo, a\nelementum elit tortor eu quam. Duis tincidunt nisi ut ante. Nulla\nfacilisi. Sed tristique eros eu libero. Pellentesque vel arcu. Vivamus\npurus orci, iaculis ac, suscipit sit amet, pulvinar eu,\nlacus.\n\n'
root : 讀取文件內容與服務器響應數據的匹配: True


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM