用sklearn庫中的train_test_split方法
from sklearn.model_selection import train_test_split
train, test = train_test_split(data, random_state=2021, train_size=0.8)
自己用numpy寫
import numpy as np
# 從 0~n 中隨機選取 x 個數字
def getRandomIdx(n, x):
return np.random.choice(np.arange(n), size=x, replace=False)
# 調用方法進行分割,獲取train、test的index
total_cnt = data.shape[0]
train_idx = np.array(getRandomIdx(total_cnt, int(total_cnt * 0.8)))
test_idx = np.delete(np.arange(total_cnt), train_idx)
# 得到訓練集和測試集
data_train = data[train_idx]
data_test = data[test_idx]