目前 windows 下 GPU 的版本來到 tensorflow_gpu-1.12.0 使用 Bazel
但是發現目前的 Bazel 產生的 library 不能在 Visual Studio 中使用
退到 tensorflow_gpu-1.11 使用 Cmake 建立 library
另外只能建立 Release 版本,並使用 RelWithDebInfo 版本,取代 Debug 版本
但只有 Release 能成功
library 建立起來後,程式可以編譯,可以執行,但結果是錯的
開啟 Anaconda Prompt
(base) D:\>conda create -n tensorflow-1.11 pip python=3.6
(base) D:\>activate tensorflow-1.11
(tensorflow-1.11) D:\>pip install six numpy wheel
(tensorflow-1.11) D:\>pip install keras_applications==1.0.5 --no-deps
(tensorflow-1.11) D:\>pip install keras_preprocessing==1.0.3 --no-deps
http://www.msys2.org/
下載 msys2-x86_64-20180531.exe
開啟 msys2/MinGW 64-bit
$ pacman -Syu
$ pacman -Su
$ pacman -S git patch unzip
https://github.com/bazelbuild/bazel/releases
下載 bazel-0.18.1-windows-x86_64.exe
rename bazel-0.18.1-windows-x86_64.exe bazel.exe
move bazel.exe D:\msys64\usr\bin
add PATH D:\msys64\usr\bin
安裝 JDK 8
下載 jdk-8u191-windows-x64.exe
add JAVA_HOME C:\Program Files\Java\jdk1.8.0_191
copy cudnn-9.0-windows10-x64-v7\cuda\* to
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0
下載 swigwin-3.0.12.zip
解壓縮於 D:\TensorFlowB\swigwin-3.0.12
開啟 VS3215 x64 Native Tools Command Prompt
D:\TensorFlowB>git clone https://github.com/tensorflow/tensorflow.git tensorflow-1.11
D:\TensorFlowB>cd tensorflow-1.11
D:\TensorFlowB\tensorflow-1.11>git checkout r1.11
D:\TensorFlowB\tensorflow-1.11>python ./configure.py
Please specify the location of python. [Default is D:\Anaconda3\python.exe]:
Please input the desired Python library path to use. Default is [D:\Anaconda3\lib\site-packages]
Do you wish to build TensorFlow with nGraph support? [y/N]:
Do you wish to build TensorFlow with CUDA support? [y/N]: y
Please specify the CUDA SDK version you want to use. [Leave empty to default to CUDA 9.0]:
Please specify the location where CUDA 9.0 toolkit is installed. Refer to README.md for more details. [Default is C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v9.0]:
Please specify the cuDNN version you want to use. [Leave empty to default to cuDNN 7.0]:
Please specify the location where cuDNN 7 library is installed. Refer to README.md for more details. [Default is C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v9.0]:
Please note that each additional compute capability significantly increases your build time and binary size. [Default is: 3.5,7.0]:
Please specify optimization flags to use during compilation when bazel option "--config=opt" is specified [Default is /arch:AVX]: /arch:AVX2
Would you like to override eigen strong inline for some C++ compilation to reduce the compilation time? [Y/n]:
修改 D:/TensorFlowB/tensorflow-1.11/tensorflow/contrib/cmake/CMakeLists.txt 增加 AVX2 功能
if (tensorflow_OPTIMIZE_FOR_NATIVE_ARCH)
include(CheckCXXCompilerFlag)
CHECK_CXX_COMPILER_FLAG("-march=native" COMPILER_OPT_ARCH_NATIVE_SUPPORTED)
if (COMPILER_OPT_ARCH_NATIVE_SUPPORTED)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
else()
CHECK_CXX_COMPILER_FLAG("/arch:AVX2" COMPILER_OPT_ARCH_AVX_SUPPORTED)
if(COMPILER_OPT_ARCH_AVX_SUPPORTED)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2")
add_definitions(-D__AVX2__)
endif()
endif()
endif()
參考 Add abseil_cpp cmake dependence. 修改
D:/TensorFlowB/tensorflow-1.11/tensorflow/contrib/cmake/CMakeLists.txt
增加 tensorflow/contrib/cmake/external/abseil_cpp.cmake
D:/TensorFlowB/tensorflow-1.11/tensorflow/contrib/cmake/CMakeLists.txt
增加 tensorflow/contrib/cmake/external/abseil_cpp.cmake
增加 tensorflow/contrib/cmake/modules/FindAbseilCpp.cmake
以免出現找不到 absl/strings/string_view.h 錯誤
add_definitions(-DGOOGLE_CUDA=1 -DTF_EXTRA_CUDA_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0)
修改 D:\TensorFlowB\tensorflow-1.11\tensorflow\contrib\cmake\external\eigen.cmake
以免出現找不到 absl/strings/string_view.h 錯誤
add_definitions(-DGOOGLE_CUDA=1 -DTF_EXTRA_CUDA_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0)
set(eigen_PATCH_FILE "D:/TensorFlowB/eigen_half.patch")
修改 D:\TensorFlowB\tensorflow-1.11\bazel-tensorflow\tensorflow\workspace.bzl
tf_http_archive(
name = "eigen_archive",
build_file = clean_dep("//third_party:eigen.BUILD"),
patch_file = clean_dep("//third_party:eigen_half.patch"),
)
下載 https://github.com/amsokol/tensorflow-windows-build-tutorial/blob/master/eigen_half.patch
置於 D:/TensorFlowB/eigen_half.patch
之後會修改
D:\TensorFlowB\build-1.11\eigen\src\eigen\Eigen\src\Core\arch\CUDA\Half.h
D:\TensorFlowB\build-1.11\external\eigen_archive\Eigen\src\Core\arch\CUDA\Half.h
避免下列錯誤
Error more than one instance of overloaded function "__hadd" matches the argument list: tf_core_gpu_kernels d:\tensorflowb\build-1.11\external\eigen_archive\eigen\src\Core\arch\CUDA\Half.h 212
source: D:/TensorFlowB/tensorflow-1.11/tensorflow/contrib/cmake
build: D:/TensorFlowB/build-1.11
Configure
Visual Studio 14 2015 Win64
Optional toolset: host=x64
SWIG_EXECUTABLE=D:/TensorFlowB/swigwin-3.0.12/swig.exe
tensorflow_BUILD_SHARED_LIB=v
tensorflow_ENABLE_GPU=v
eigen_PATCH_FILE=v
eigen_PATCH_FILE=v
以 Administrator 開啟 Visual Studio 2015
開啟 D:\TensorFlowB\build-1.11\tensorflow.sln
換成 Release 版本
開啟下列專案的屬性設定
_beam_search_ops, _gru_ops, _lstm_ops, _nearest_neighbor_ops, _periodic_resample_op
Property Pages/Configuration Properties/Linker/Input/Additional Dependencies
\pywrap_tensorflow_internal.lib 改為 Release\pywrap_tensorflow_internal.lib
出現 cuda_kernel_helper.h 找不到 cuda_fp16.h
Severity Code Description Project File Line Suppression State
Error C1083 Cannot open include file: 'cuda/include/cuda_fp16.h': No such file or directory _beam_search_ops D:\TensorFlowB\tensorflow-1.11\tensorflow\core\util\cuda_kernel_helper.h 24
開啟 D:\TensorFlowB\tensorflow-1.11\tensorflow\core\util\cuda_kernel_helper.h
//#include "cuda/include/cuda_fp16.h"
#include "cuda_fp16.h"
#include "cuda_fp16.h"
出現錯誤
Severity Code Description Project File Line Suppression State
Error LNK2019 unresolved external symbol "class absl::uint128 __cdecl absl::operator%(class absl::uint128,class absl::uint128)" (??Labsl@@YA?AVuint128@0@V10@0@Z) referenced in function "private: void __cdecl absl::str_format_internal::`anonymous namespace'::ConvertedIntInfo::UnsignedToStringRight(class absl::uint128,struct absl::str_format_internal::ConversionChar)" (??$UnsignedToStringRight@Vuint128@absl@@@ConvertedIntInfo@?A0x0d227ec7@str_format_internal@absl@@AEAAXVuint128@3@UConversionChar@23@@Z) tf_tutorials_example_trainer D:\TensorFlowB\build-1.11\arg.obj 1
Linker/Input/Additional Dependencies 加入
abseil_cpp\src\abseil_cpp_build\absl\numeric\Release\absl_int128.lib
最後是 estimator_python_api 和 tf_python_api 失敗
需要開啟 VS2015 的 Tools/Options/Projects and Solutions/Build and Run
MSBuild project build output verbosity: Normal 才能看到訊息
修改 D:\TensorFlowB\build-1.11\tf_python_api.vcxproj
from "C:\Program Files\CMake\bin\cmake.exe" -E env PYTHONPATH=D:/TensorFlowB/build-1.11/tf_python "" D:/Anaconda3/python.exe D:/TensorFlowB/build-1.11/tf_python/tensorflow/python/tools/api/generator/create_python_api.py --root_init_template=D:/TensorFlowB/build-1.11/tf_python/tensorflow/api_template.__init__.py --apidir=D:/TensorFlowB/build-1.11/tf_python/tensorflow --package=tensorflow.python --apiname=tensorflow D:/TensorFlowB/tensorflow-1.11/api_init_files_list.txt
to "C:\Program Files\CMake\bin\cmake.exe" -E env PYTHONPATH=D:/TensorFlowB/build-1.11/tf_python D:/Anaconda3/python.exe D:/TensorFlowB/build-1.11/tf_python/tensorflow/python/tools/api/generator/create_python_api.py --root_init_template=D:/TensorFlowB/build-1.11/tf_python/tensorflow/api_template.__init__.py --apidir=D:/TensorFlowB/build-1.11/tf_python/tensorflow --package=tensorflow.python --apiname=tensorflow D:/TensorFlowB/tensorflow-1.11/api_init_files_list.txt
copy D:\TensorFlowB\tensorflow-1.11\tensorflow\tools\docs
to D:\TensorFlowB\build-1.11\tf_python\tensorflow\tools\docs
copy D:\TensorFlowB\tensorflow-1.11\tensorflow\python\distribute
to D:\TensorFlowB\build-1.11\tf_python\tensorflow\python\distribute
產生 D:\TensorFlowB\build-1.11\tf_python\dist\tensorflow_gpu-1.11.0-cp36-cp36m-win_amd64.whl
(tensorflow-1.11) D:\TensorFlowB\build-1.11>pip install tf_python\dist\tensorflow_gpu-1.11.0-cp36-cp36m-win_amd64.whl
使用 bazel
D:\TensorFlowB\tensorflow-1.11>bazel build --config=opt --config=cuda //tensorflow/tools/pip_package:build_pip_package
執行很久後可以發現
D:\TensorFlowB\tensorflow-1.11\bazel-out\x64_windows-opt\bin\tensorflow\tools\pip_package\simple_console_for_windows.zip
產生失敗,size=0
D:\TensorFlowB\tensorflow-1.11>cd bazel-out/x64_windows-opt/bin/tensorflow/tools/pip_package
edit simple_console_for_windows.zip-0.params
刪除有 .zip 的每一行
執行
D:\TensorFlowB\tensorflow-1.11\bazel-tensorflow>external\bazel_tools\tools\zip\zipper\zipper.exe vcC bazel-out/x64_windows-opt/bin/tensorflow/tools/pip_package/simple_console_for_windows.zip @bazel-out/x64_windows-opt/bin/tensorflow/tools/pip_package/simple_console_for_windows.zip-0.params
D:\TensorFlowB\tensorflow-1.11\bazel-tensorflow>cd ..
D:\TensorFlowB\tensorflow-1.11>bazel-bin\tensorflow\tools\pip_package\build_pip_package ..\tensorflow_pkg
安裝
(tensorflow-1.11) D:\TensorFlowB>pip install tensorflow_pkg\tensorflow-1.11.0-cp36-cp36m-win_amd64.whl