tensorflow 模型保存與加載 和TensorFlow serving + grpc + docker項目部署


TensorFlow 模型保存與加載

TensorFlow中總共有兩種保存和加載模型的方法。第一種是利用 tf.train.Saver() 來保存,第二種就是利用 SavedModel 來保存模型,接下來以自己項目中的代碼為例。

項目中模型的代碼:

class TensorFlowDKT(object):
    def __init__(self, config, batch_size):
        # 導入配置好的參數
        self.hiddens = hiddens = config.modelConfig.hidden_layers
        self.num_skills = num_skills = config.num_skills
        self.input_size = input_size = config.input_size
        self.keep_prob_value = config.modelConfig.dropout_keep_prob

        # 定義需要喂給模型的參數
        self.max_steps = tf.placeholder(tf.int32, name="max_steps")  # 當前batch中最大序列長度
        self.input_data = tf.placeholder(tf.float32, [None, None, input_size], name="input_x")

        self.sequence_len = tf.placeholder(tf.int32, [None], name="sequence_len")
        self.keep_prob = tf.placeholder(tf.float32, name="keep_prob")  # dropout keep prob

        self.target_id = tf.placeholder(tf.int32, [None, None], name="target_id")
        self.target_correctness = tf.placeholder(tf.float32, [None, None], name="target_correctness")
        self.flat_target_correctness = None
        self.batch_size = tf.placeholder(tf.int32, name="batch_size")
        
        # 構建lstm模型結構
        hidden_layers = []
        for idx, hidden_size in enumerate(hiddens):
            lstm_layer = tf.nn.rnn_cell.LSTMCell(num_units=hidden_size, state_is_tuple=True)
            hidden_layer = tf.nn.rnn_cell.DropoutWrapper(cell=lstm_layer,
                                                         output_keep_prob=self.keep_prob)
            hidden_layers.append(hidden_layer)
        self.hidden_cell = tf.nn.rnn_cell.MultiRNNCell(cells=hidden_layers, state_is_tuple=True)

        # 采用動態rnn,動態輸入序列的長度
        outputs, self.current_state = tf.nn.dynamic_rnn(cell=self.hidden_cell,
                                                        inputs=self.input_data,
                                                        sequence_length=self.sequence_len,
                                                        dtype=tf.float32)

        # 隱層到輸出層的權重系數[最后隱層的神經元數量,知識點數]
        output_w = tf.get_variable("W", [hiddens[-1], num_skills])
        output_b = tf.get_variable("b", [num_skills])

        self.output = tf.reshape(outputs, [-1, hiddens[-1]])
        # 因為權值共享的原因,對生成的矩陣[batch_size * self.max_steps, num_skills]中的每一行都加上b
        self.logits = tf.matmul(self.output, output_w) + output_b

        self.mat_logits = tf.reshape(self.logits, [-1, self.max_steps, num_skills])

        # 對每個batch中每個序列中的每個時間點的輸出中的每個值進行sigmoid計算,這里的值表示對某個知識點的掌握情況,
        # 每個時間點都會輸出對所有知識點的掌握情況
        self.pred_all = tf.sigmoid(self.mat_logits, name="pred_all")

        # 計算損失loss
        flat_logits = tf.reshape(self.logits, [-1])

        flat_target_correctness = tf.reshape(self.target_correctness, [-1])
        self.flat_target_correctness = flat_target_correctness

        flat_base_target_index = tf.range(self.batch_size * self.max_steps) * num_skills

        # 因為flat_logits的長度為batch_size * num_steps * num_skills,我們要根據每一步的target_id將其長度變成batch_size * num_steps
        flat_base_target_id = tf.reshape(self.target_id, [-1])

        flat_target_id = flat_base_target_id + flat_base_target_index
        # gather是從一個tensor中切片一個子集
        flat_target_logits = tf.gather(flat_logits, flat_target_id)

        # 對切片后的數據進行sigmoid轉換
        self.pred = tf.sigmoid(tf.reshape(flat_target_logits, [-1, self.max_steps]), name="pred")
        # 將sigmoid后的值表示為0或1
        self.binary_pred = tf.cast(tf.greater_equal(self.pred, 0.5), tf.float32, name="binary_pred")

        # 定義損失函數
        with tf.name_scope("loss"):
            # flat_target_logits_sigmoid = tf.nn.log_softmax(flat_target_logits)
            # self.loss = -tf.reduce_mean(flat_target_correctness * flat_target_logits_sigmoid)
            self.loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=flat_target_correctness,
                                                                               logits=flat_target_logits))

在之后的預測時,我需要輸入的參數有 input_data,max_steps,sequence_len,keep_prob,target_id,batch_size。輸出的值有pred_all。

首先來看第一種模型保存的方法:

with tf.Graph().as_default():
            gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0)
            session_conf = tf.ConfigProto(
                allow_soft_placement=True,
                log_device_placement=False,
                gpu_options=gpu_options
            )
            sess = tf.Session(config=session_conf)
            ......

                saver = tf.train.Saver(tf.global_variables())
                sess.run(tf.global_variables_initializer()) print("初始化完畢,開始訓練")
                for i in range(config.trainConfig.epochs):
                    np.random.shuffle(train_seqs)
                    for params in dataGen.next_batch(train_seqs):
                        # 批次獲得訓練集,訓練模型
                        self.train_step(params, train_op)

                        current_step = tf.train.global_step(sess, global_step)
                        # train_step.run(feed_dict={x: batch_train[0], y_actual: batch_train[1], keep_prob: 0.5})
                        # 對結果進行記錄
                        if current_step % config.trainConfig.evaluate_every == 0:
                            print("\nEvaluation:")
                            # 獲得測試數據

                            losses = []
                            accuracys = []
                            aucs = []
                            for params in dataGen.next_batch(test_seqs):
                                loss, accuracy, auc = self.dev_step(params)
                                losses.append(loss)
                                accuracys.append(accuracy)
                                aucs.append(auc)

                            time_str = datetime.datetime.now().isoformat()
                            print("dev: {}, step: {}, loss: {}, acc: {}, auc: {}".
                                  format(time_str, current_step, mean(losses), mean(accuracys), mean(aucs)))

                        if current_step % config.trainConfig.checkpoint_every == 0:
                            path = saver.save(sess, "model/my-model", global_step=current_step)
                            print("Saved model checkpoint to {}\n".format(path))

利用 tf.train.Saver() 保存模型非常簡單,就只要上述代碼中標紅的兩句就行了。

模型加載的代碼:

graph = tf.Graph()
    with graph.as_default():
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)
        session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False, gpu_options=gpu_options)
        sess = tf.Session(config=session_conf)

        with sess.as_default():
            checkpoint_file = tf.train.latest_checkpoint("model/")
            saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
            saver.restore(sess, checkpoint_file)

            # 獲得需要喂給模型的參數,輸出的結果依賴的輸入值
            input_x = graph.get_operation_by_name("test/dkt/input_x").outputs[0]
            target_id = graph.get_operation_by_name("test/dkt/target_id").outputs[0]
            keep_prob = graph.get_operation_by_name("test/dkt/keep_prob").outputs[0]
            max_steps = graph.get_operation_by_name("test/dkt/max_steps").outputs[0]
            sequence_len = graph.get_operation_by_name("test/dkt/sequence_len").outputs[0]
            batch_size = graph.get_operation_by_name("test/dkt/batch_size").outputs[0]

            # 獲得輸出的結果
            pred_all = graph.get_tensor_by_name("test/dkt/pred_all:0")

for params in dataGen.next_batch(train_seqs):
                print("step: {}".format(step))

                target_correctness = params['target_correctness']
                
                pred_all_1 = sess.run([pred_all], feed_dict={input_x: params["input_x"],
                                                             target_id: params["target_id"],
                                                             keep_prob: 1.0,
                                                              max_steps: params["max_len"],
                                                              sequence_len: params["seq_len"],
                                                              batch_size: len(params["seq_len"])})
                print(params["seq_len"])
                sequence_lens.append(params["seq_len"])
                studentSkillMasterProbs.append(pred_all_1)
                studentTargetId.append(params["target_id"])
                studentTargetCorrectness.append(params["target_correctness"])

加載模型時要把我們需要的輸入參數和輸出結果的 tensor 讀出來,利用get_tensor_by_name() 的方法,方法中需要傳入tensor 的名稱,所以在定義模型類時需要為這些 tensor 指定 name 參數。之后就是直接 sess.run() 去運行模型進行預測。

這種方法會保存四個文件 xxx.checkpoint,xxx.index,xxx.meta和 xxx.data_00000_of_00001

第二種模型保存的方法:

with tf.Graph().as_default():
            gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0)
            session_conf = tf.ConfigProto(
                allow_soft_placement=True,
                log_device_placement=False,
                gpu_options=gpu_options
            )
            sess = tf.Session(config=session_conf)
            self.sess = sess

            with sess.as_default():
                # 實例化dkt模型對象
                ......

                builder = tf.saved_model.builder.SavedModelBuilder("./builder")

                sess.run(tf.global_variables_initializer())

                print("初始化完畢,開始訓練")
                for i in range(config.trainConfig.epochs):
                    ......
                    
                inputs = {"input_x": tf.saved_model.utils.build_tensor_info(self.train_dkt.input_data),
                         "target_id": tf.saved_model.utils.build_tensor_info(self.train_dkt.target_id),
                         "max_steps": tf.saved_model.utils.build_tensor_info(self.train_dkt.max_steps),
                         "sequence_len": tf.saved_model.utils.build_tensor_info(self.train_dkt.sequence_len),
                         "keep_prob": tf.saved_model.utils.build_tensor_info(self.train_dkt.keep_prob),
                         "batch_size": tf.saved_model.utils.build_tensor_info(self.train_dkt.batch_size)}

                outputs = {"pred_all": tf.saved_model.utils.build_tensor_info(self.train_dkt.pred_all)}

                prediction_signature = tf.saved_model.signature_def_utils.build_signature_def(inputs=inputs, outputs=outputs,
                                                                                              method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME)
                legacy_init_op = tf.group(tf.tables_initializer(), name="legacy_init_op")
                builder.add_meta_graph_and_variables(sess, [tf.saved_model.tag_constants.SERVING],
                                                    signature_def_map={"predict": prediction_signature}, legacy_init_op=legacy_init_op)

                builder.save()

利用 saved_model 也可以把整個模型中的變量全部保存起來,但更一般的形式是指定 輸入參數和輸出結果來進行保存,而且除了可以保存predict之外,還可以保存training,classify等等。

模型加載的代碼:

   graph = tf.Graph()
    with graph.as_default():
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)
        session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False, gpu_options=gpu_options)
        sess = tf.Session(config=session_conf)
        
        signature_key = "predict"  # 這里的值是要和保存模型中的 builder.add_meta_graph_and_variables()方法里面的 signature_def_map={"predict": prediction_signature} 對應上的。
   with tf.Session(graph=graph) as sess:
            meta_graph_def = tf.saved_model.loader.load(sess, [tf.saved_model.tag_constants.SERVING], "./builder")
            signature = meta_graph_def.signature_def
            input_x = sess.graph.get_tensor_by_name(signature[signature_key].inputs["input_x"].name)
            keep_prob = sess.graph.get_tensor_by_name(signature[signature_key].inputs["keep_prob"].name)
            target_id = sess.graph.get_tensor_by_name(signature[signature_key].inputs["target_id"].name)
            max_steps = sess.graph.get_tensor_by_name(signature[signature_key].inputs["max_steps"].name)
            sequence_len = sess.graph.get_tensor_by_name(signature[signature_key].inputs["sequence_len"].name)
            batch_size = sess.graph.get_tensor_by_name(signature[signature_key].inputs["batch_size"].name)

            pred_all = sess.graph.get_tensor_by_name(signature[signature_key].outputs["pred_all"].name)
            for params in dataGen.next_batch(train_seqs):
                print("step: {}".format(step))

                target_correctness = params['target_correctness']
                
                pred_all_1 = sess.run([pred_all], feed_dict={input_x: params["input_x"],
                                                                  target_id: params["target_id"],
                                                                  keep_prob: 1.0,
                                                                  max_steps: params["max_len"],
                                                                  sequence_len: params["seq_len"],
                                                                  batch_size: len(params["seq_len"])})
                print(params["seq_len"])
                sequence_lens.append(params["seq_len"])
                studentSkillMasterProbs.append(pred_all_1)
                studentTargetId.append(params["target_id"])
                studentTargetCorrectness.append(params["target_correctness"])

                step += 1

模型加載和 tf.train.Saver() 保存的模型差不多,不過不需要指定在模型圖中的全名。

這種方法會保存一個文件 xxx.pb 和一個文件夾 variables,variables中有兩個文件 variables.data_00000_of_00001(數字可能會有所不同,不確定)和variables.index。

 

TensorFlow serving 服務布署

 利用上面第二種保存的模型可以構建TensorFlow serving 服務,具體的是利用docker來構建TensorFlow serving 的服務端。然后在客戶端通過grpc來連接。整個步驟如下:

服務器系統:Ubuntu16.04

1,安裝docker-ce

  具體的安裝流程見官網

2,拉取官方的TensorFlow serving 倉庫,可以直接拉取最新版的,也可以自己選擇版本

  docker pull tensorflow/serving:latest-devel

3,啟動容器,選擇grpc的端口

  docker run -p 8500:8500 --name grpc -it tensorflow/serving:latest-devel

  8500端口:grpc的端口

  8501端口:restful api的端口

  除了grpc調用服務,也可以用restful api調用服務

4,將自己的model文件復制到容器中

  本地模型路徑:~/builder/00000123 。模型名稱為builder,00000123為版本號(必須需要),00000123文件夾下面就是xxx.pb 文件和variables文件夾

  docker cp ~/model grpc:/online_model 。grpc 是我們的容器名稱,將本地的模型copy到容器中的根路徑下的online_model文件夾下

5,在docker中啟動TensorFlow serving服務

  進入到容器中:docker exec -it grpc bash

  啟動TensorFlow serving服務:tensorflow_model_server --port=8500 --model_name=builder --model_base_path=/online_model/builder/

  這樣服務端的TensorFlow serving就啟動了

6,客戶端代碼

import grpc
import numpy as np
import tensorflow as tf
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import prediction_service_pb2_grpc
from tensorflow_serving.apis import prediction_service_pb2
from grpc.beta import implementations

dataGen = DataGenerator(fileName, config)  # 數據預處理的類
dataGen.gen_attr()
test_seqs = dataGen.test_seqs
params = dataGen.format_data(test_seqs)
input_x = params["input_x"][:1]
max_steps = params["max_len"]
batch_size = 1
keep_prob = 1.0
target_id = params["target_id"][:1]
sequence_len = params["seq_len"][:1]

# 利用grpc 進行連接
channel = implementations.insecure_channel("192.168.39.39", 8500)
stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
request = predict_pb2.PredictRequest()
# 模型的名稱
request.model_spec.name = "builder"
# 簽名的名稱
request.model_spec.signature_name = "predict"

# 每次只支持傳入一條數據進行預測,傳入數據時要注意數據格式和模型定義時的格式一致
request.inputs['input_x'].CopyFrom(tf.contrib.util.make_tensor_proto(input_x, dtype=tf.float32, shape=[input_x.shape[0], input_x.shape[1], input_x.shape[2]]))
request.inputs['target_id'].CopyFrom(tf.contrib.util.make_tensor_proto(target_id, dtype=tf.int32, shape=[target_id.shape[0], target_id.shape[1]]))
request.inputs['max_steps'].CopyFrom(tf.contrib.util.make_tensor_proto(max_steps, dtype=tf.int32))
request.inputs['keep_prob'].CopyFrom(tf.contrib.util.make_tensor_proto(keep_prob, dtype=tf.float32))
request.inputs["sequence_len"].CopyFrom(tf.contrib.util.make_tensor_proto(sequence_len, dtype=tf.int32, shape=[1]))
request.inputs["batch_size"].CopyFrom(tf.contrib.util.make_tensor_proto(batch_size, dtype=tf.int32))

# response返回的是protobuff的格式
response = stub.Predict.future(request)

# 去除預測的數值,對於many to many 的LSTM,輸出的結果是多個,讀取成列表的形式
res_list = response.result().outputs["pred_all"].float_val

利用TensorFlow serving搭建服務的具體流程就這么些,另外還支持gpu版的TensorFlow serving。但是需要安裝nvidia-docker ,還需要啟動nvidia-container-runtime等,總之很復雜,對於NLP中的任務,預測時用CPU也是完全可以的。


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM