一、環境參數
1、操作系統
centos7.x_64bit
[root@yyljxl tools]# cat /etc/redhat-release
CentOS Linux release 7.3.1611 (Core)
[root@yyljxl tools]# uname -r
3.10.0-514.el7.x86_64
2、需要python3.6環境
setuptools-19.6.tar.gz
pip-9.0.1.tar.gz
Python-3.6.7.tar.xz
#安裝方法參考:
http://172.28.5.37/huzhihua/install_python3
3、檢查環境
#去除nouveau驅動
lsmod | grep nouveau #執行上述命令,什么都不顯示說明已經成功去除了nouveau驅動
#檢查GPU是否識別
[root@yyljxl pip]# lspci | grep NV
00:0c.0 3D controller: NVIDIA Corporation GP100GL (rev a1)
#查看顯卡信息
[root@yyljxl ~]# nvidia-smi
Sat May 18 11:38:54 2019
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 384.81 Driver Version: 384.81 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
|===============================+======================+======================|
| 0 Tesla P100-PCIE... Off | 00000000:00:0C.0 Off | 0 |
| N/A 59C P0 160W / 250W | 8615MiB / 16276MiB | 67% Default |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: GPU Memory |
| GPU PID Type Process name Usage |
|=============================================================================|
| 0 2238 C python3 8601MiB |
+-----------------------------------------------------------------------------+
二、軟件安裝
2.1、所需軟件版本(備注:軟件要按順序安裝)
[root@yyljxl tools]# ll
total 3680468
nvidia-diag-driver-local-repo-rhel7-418.67-1.0-1.x86_64.rpm
cuda_9.0.176_384.81_linux.run
cudnn-9.0-linux-x64-v7.tgz
tensorflow-gpu==1.8.0
2.2、安裝顯卡驅動
#nvidia-diag-driver-local-repo-rhel7-418.67-1.0-1.x86_64.rpm
rpm -ivh nvidia-diag-driver-local-repo-rhel7-418.67-1.0-1.x86_64.rpm
2.3、安裝cuda
#cuda_9.0.176_384.81_linux.run
#安裝過程中會有提示:
Install NVIDIA Accelerated Graphics Driver for Linux-x86_64 384.81? (y)es/(n)o/(q)uit:n
注意:此步選擇n,其余選y或者default即可。
2.4、配置環境變量
vi /etc/profile
export PATH=/usr/local/cuda-9.0/bin:$PATH
vi/etc/ld.so.conf
/usr/local/cuda-9.0/lib64
#使環境變量生效
source /etc/profile
#執行命令查看是否有輸出:
[root@yyljxl ~]# ldconfig -v
ldconfig: Can't stat /libx32: No such file or directory
ldconfig: Path `/usr/lib' given more than once
ldconfig: Path `/usr/lib64' given more than once
ldconfig: Can't stat /usr/libx32: No such file or directory
/usr/lib64/iscsi:
libiscsi.so.2 -> libiscsi.so.2.0.10900
/usr/lib64/mysql:
libmysqlclient.so.18 -> libmysqlclient.so.18.0.0
/usr/local/cuda-9.0/lib64:
libnppisu.so.9.0 -> libnppisu.so.9.0.176
libcufft.so.9.0 -> libcufft.so.9.0.176
libnvblas.so.9.0 -> libnvblas.so.9.0.176
libcudnn.so.7 -> libcudnn.so.7.0.5
libcufftw.so.9.0 -> libcufftw.so.9.0.176
libnppial.so.9.0 -> libnppial.so.9.0.176
libOpenCL.so.1 -> libOpenCL.so.1.0.0
libnvToolsExt.so.1 -> libnvToolsExt.so.1.0.0
省略部分.....
2.5、安裝cudnn
tar xf cudnn-9.0-linux-x64-v7.tgz
cp cuda/include/cudnn.h /usr/local/cuda/include/
cp cuda/lib64/libcudnn* /usr/local/cuda/lib64/ -d
chmod a+r /usr/local/cuda/include/cudnn.h
chmod a+r /usr/local/cuda/lib64/libcudnn*
2.6安裝tensorflow-gpu==1.8.0
pip install tensorflow-gpu==1.8.0
#查看tensorflow的版本
python3
import tensorflow as tf
tf.__version__
示例:
[root@yyljxl tools]# python3 Python 3.6.7 (default, May 18 2019, 09:18:03) [GCC 4.8.5 20150623 (Red Hat 4.8.5-36)] on linux Type "help", "copyright", "credits" or "license" for more information. >>> import tensorflow as tf tf.__version__>>> >>> '1.8.0' >>> exit()