{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# DNN Benchmarks\n", "\n", "This notebook defines and trains deep neural networks using Keras. The goal is to benchmark the performance in both CPU and GPU environments and across each of the available _backends_." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2019-01-23T15:16:30.881306Z", "start_time": "2019-01-23T15:16:30.741074Z" } }, "outputs": [], "source": [ "# standard libraries\n", "import os\n", "import platform\n", "import psutil\n", "from typing import Tuple" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2019-01-23T15:16:58.456953Z", "start_time": "2019-01-23T15:16:31.741940Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Using TensorFlow backend.\n" ] } ], "source": [ "# machine learning libraries\n", "import keras" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "ExecuteTime": { "end_time": "2019-01-23T15:21:23.661708Z", "start_time": "2019-01-23T15:21:23.657110Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "device: XLA_CPU device\n", "device: XLA_GPU device\n", "device: 0, name: Tesla P100-PCIE-12GB, pci bus id: 0000:3b:00.0, compute capability: 6.0\n" ] } ], "source": [ "import tensorflow as tf\n", "from tensorflow.python.client import device_lib\n", "\n", "for device in device_lib.list_local_devices():\n", " print(device.physical_device_desc)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Platform Information" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2019-01-23T15:21:32.813342Z", "start_time": "2019-01-23T15:21:32.808675Z" } }, "outputs": [], "source": [ "(OS, OSVER, OSTYPE), ARCH = platform.linux_distribution(), platform.processor()\n", "PYIMP, PYVER = platform.python_implementation(), platform.python_version()\n", "CPUS, MEMTOT = psutil.cpu_count(), round(psutil.virtual_memory().total / 1024**3)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2019-01-23T15:21:33.083468Z", "start_time": "2019-01-23T15:21:33.040353Z" } }, "outputs": [], "source": [ "HOSTNAME, = !hostname\n", "CLUSTER = HOSTNAME.split('-')[0]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2019-01-23T15:21:33.263856Z", "start_time": "2019-01-23T15:21:33.261050Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[34m System:\u001b[0m Gilbreth (CentOS Linux 7.6.1810 Core x86_64)\n", " 16 cores, 187GB memory\n", "\n", "\u001b[34m Python:\u001b[0m CPython 3.6.4\n", "\u001b[34m keras:\u001b[0m 2.2.4\n", "\u001b[34m tensorflow:\u001b[0m 1.12.0\n", "\n" ] } ], "source": [ "print(f\"\"\"\\\n", "\\033[34m System:\\033[0m {CLUSTER.capitalize()} ({OS} {OSVER} {OSTYPE} {ARCH})\n", " {CPUS} cores, {MEMTOT}GB memory\n", "\n", "\\033[34m Python:\\033[0m {PYIMP} {PYVER}\n", "\\033[34m keras:\\033[0m {keras.__version__}\n", "\\033[34m tensorflow:\\033[0m {keras.backend.tf.__version__}\n", "\"\"\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Models" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "### MNIST: MLP" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2019-01-23T15:22:58.730701Z", "start_time": "2019-01-23T15:22:58.721321Z" }, "code_folding": [ 0 ] }, "outputs": [], "source": [ "def mnist_mlp(batch_size: int=128, epochs: int=10, verbose: bool=True) -> keras.models.Model: \n", " \"\"\"MNIST model using a simple MLP network.\"\"\"\n", " \n", " # libraries\n", " from keras.datasets import mnist\n", " from keras.models import Sequential\n", " from keras.layers import Dense, Dropout\n", " from keras.optimizers import RMSprop\n", "\n", " # 0-9\n", " num_classes = 10\n", "\n", " # the data, split between train and test sets\n", " (x_train, y_train), (x_test, y_test) = mnist.load_data()\n", " x_train = x_train.reshape(60000, 784).astype('float32')/255\n", " x_test = x_test.reshape(10000, 784).astype('float32')/255\n", " \n", "\n", "\n", " # convert class vectors to binary class matrices\n", " y_train = keras.utils.to_categorical(y_train, num_classes)\n", " y_test = keras.utils.to_categorical(y_test, num_classes)\n", "\n", " model = Sequential()\n", " model.add(Dense(512, activation='relu', input_shape=(784,)))\n", " model.add(Dropout(0.2))\n", " model.add(Dense(512, activation='relu'))\n", " model.add(Dropout(0.2))\n", " model.add(Dense(num_classes, activation='softmax'))\n", " \n", " if verbose:\n", " print(x_train.shape[0], 'train samples')\n", " print(x_test.shape[0], 'test samples')\n", " model.summary()\n", "\n", " model.compile(loss='categorical_crossentropy',\n", " optimizer=RMSprop(),\n", " metrics=['accuracy'])\n", "\n", " history = model.fit(x_train, y_train,\n", " batch_size=batch_size,\n", " epochs=epochs,\n", " verbose=1,\n", " validation_data=(x_test, y_test))\n", " \n", " score = model.evaluate(x_test, y_test, verbose=0)\n", " \n", " if verbose:\n", " print('Test loss:', score[0])\n", " print('Test accuracy:', score[1])\n", " \n", " return model" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "ExecuteTime": { "end_time": "2019-01-23T15:23:06.434288Z", "start_time": "2019-01-23T15:22:59.007605Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "60000 train samples\n", "10000 test samples\n", "_________________________________________________________________\n", "Layer (type) Output Shape Param # \n", "=================================================================\n", "dense_3 (Dense) (None, 512) 401920 \n", "_________________________________________________________________\n", "dropout_3 (Dropout) (None, 512) 0 \n", "_________________________________________________________________\n", "dense_4 (Dense) (None, 512) 262656 \n", "_________________________________________________________________\n", "dropout_4 (Dropout) (None, 512) 0 \n", "_________________________________________________________________\n", "dense_5 (Dense) (None, 10) 5130 \n", "=================================================================\n", "Total params: 669,706\n", "Trainable params: 669,706\n", "Non-trainable params: 0\n", "_________________________________________________________________\n", "Train on 60000 samples, validate on 10000 samples\n", "Epoch 1/4\n", "60000/60000 [==============================] - 2s 28us/step - loss: 0.2422 - acc: 0.9254 - val_loss: 0.1118 - val_acc: 0.9638\n", "Epoch 2/4\n", "60000/60000 [==============================] - 1s 24us/step - loss: 0.1036 - acc: 0.9687 - val_loss: 0.0888 - val_acc: 0.9728\n", "Epoch 3/4\n", "60000/60000 [==============================] - 1s 24us/step - loss: 0.0764 - acc: 0.9770 - val_loss: 0.0804 - val_acc: 0.9771\n", "Epoch 4/4\n", "60000/60000 [==============================] - 1s 24us/step - loss: 0.0588 - acc: 0.9814 - val_loss: 0.1027 - val_acc: 0.9747\n", "Test loss: 0.10271684303390793\n", "Test accuracy: 0.9747\n" ] } ], "source": [ "model = mnist_mlp(epochs=4)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### MNIST: CNN" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "ExecuteTime": { "end_time": "2019-01-23T15:23:54.293115Z", "start_time": "2019-01-23T15:23:54.278921Z" }, "code_folding": [] }, "outputs": [], "source": [ "def mnist_cnn(batch_size: int=128, epochs: int=10, verbose: bool=True) -> keras.models.Model:\n", " \"\"\"MNIST model using a 2D convolutional network.\"\"\"\n", " \n", " # libraries\n", " from keras.datasets import mnist\n", " from keras.models import Sequential\n", " from keras.layers import Dense, Dropout, Flatten\n", " from keras.layers import Conv2D, MaxPooling2D\n", " from keras import backend as K\n", "\n", " # 0-9\n", " num_classes = 10\n", "\n", " # input image dimensions\n", " img_rows, img_cols = 28, 28\n", "\n", " # the data, split between train and test sets\n", " (x_train, y_train), (x_test, y_test) = mnist.load_data()\n", "\n", " if K.image_data_format() == 'channels_first':\n", " x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)\n", " x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)\n", " input_shape = (1, img_rows, img_cols)\n", " else:\n", " x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)\n", " x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)\n", " input_shape = (img_rows, img_cols, 1)\n", "\n", " x_train = x_train.astype('float32') / 255\n", " x_test = x_test.astype('float32') / 255\n", "\n", " # convert class vectors to binary class matrices\n", " y_train = keras.utils.to_categorical(y_train, num_classes)\n", " y_test = keras.utils.to_categorical(y_test, num_classes)\n", "\n", " model = Sequential()\n", " model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))\n", " model.add(Conv2D(64, (3, 3), activation='relu'))\n", " model.add(MaxPooling2D(pool_size=(2, 2)))\n", " model.add(Dropout(0.25))\n", " model.add(Flatten())\n", " model.add(Dense(128, activation='relu'))\n", " model.add(Dropout(0.5))\n", " model.add(Dense(num_classes, activation='softmax'))\n", " \n", " if verbose:\n", " print('x_train shape:', x_train.shape)\n", " print(x_train.shape[0], 'train samples')\n", " print(x_test.shape[0], 'test samples')\n", " model.summary()\n", "\n", " model.compile(loss=keras.losses.categorical_crossentropy,\n", " optimizer=keras.optimizers.Adadelta(),\n", " metrics=['accuracy'])\n", "\n", " model.fit(x_train, y_train,\n", " batch_size=batch_size,\n", " epochs=epochs,\n", " verbose=1,\n", " validation_data=(x_test, y_test))\n", " \n", " score = model.evaluate(x_test, y_test, verbose=0)\n", " \n", " if verbose:\n", " print('Test loss:', score[0])\n", " print('Test accuracy:', score[1])\n", " \n", " return model" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "ExecuteTime": { "end_time": "2019-01-23T15:24:13.134666Z", "start_time": "2019-01-23T15:23:55.055559Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "x_train shape: (60000, 28, 28, 1)\n", "60000 train samples\n", "10000 test samples\n", "_________________________________________________________________\n", "Layer (type) Output Shape Param # \n", "=================================================================\n", "conv2d_3 (Conv2D) (None, 26, 26, 32) 320 \n", "_________________________________________________________________\n", "conv2d_4 (Conv2D) (None, 24, 24, 64) 18496 \n", "_________________________________________________________________\n", "max_pooling2d_2 (MaxPooling2 (None, 12, 12, 64) 0 \n", "_________________________________________________________________\n", "dropout_5 (Dropout) (None, 12, 12, 64) 0 \n", "_________________________________________________________________\n", "flatten_2 (Flatten) (None, 9216) 0 \n", "_________________________________________________________________\n", "dense_6 (Dense) (None, 128) 1179776 \n", "_________________________________________________________________\n", "dropout_6 (Dropout) (None, 128) 0 \n", "_________________________________________________________________\n", "dense_7 (Dense) (None, 10) 1290 \n", "=================================================================\n", "Total params: 1,199,882\n", "Trainable params: 1,199,882\n", "Non-trainable params: 0\n", "_________________________________________________________________\n", "Train on 60000 samples, validate on 10000 samples\n", "Epoch 1/4\n", "60000/60000 [==============================] - 3s 49us/step - loss: 0.2653 - acc: 0.9161 - val_loss: 0.0595 - val_acc: 0.9812\n", "Epoch 2/4\n", "60000/60000 [==============================] - 2s 40us/step - loss: 0.0880 - acc: 0.9734 - val_loss: 0.0399 - val_acc: 0.9870\n", "Epoch 3/4\n", "60000/60000 [==============================] - 2s 40us/step - loss: 0.0674 - acc: 0.9803 - val_loss: 0.0333 - val_acc: 0.9891\n", "Epoch 4/4\n", "60000/60000 [==============================] - 2s 40us/step - loss: 0.0555 - acc: 0.9835 - val_loss: 0.0339 - val_acc: 0.9881\n", "Test loss: 0.033939996995640105\n", "Test accuracy: 0.9881\n" ] } ], "source": [ "model = mnist_cnn(epochs=4)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### ResNet50" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "ExecuteTime": { "end_time": "2019-01-23T15:28:44.842403Z", "start_time": "2019-01-23T15:28:44.831815Z" }, "code_folding": [] }, "outputs": [], "source": [ "def resnet50(train_N: int=10, test_N: int=3, gpus: int=0,\n", " batch_size: int=1, epochs: int=1, verbose: bool=True) -> keras.models.Model:\n", " \"\"\"\n", " ResNet50 - Deep convolutional neural network.\n", " \n", " This uses arbitrary static (noise) as inputs to ResNet50 for the purpose of running\n", " the benchmarks.\n", " \"\"\"\n", "\n", " from typing import Tuple\n", "\n", " import numpy as np\n", "\n", " from keras.layers import Dense\n", " from keras.models import Model\n", " from keras.optimizers import Adam\n", " from keras.preprocessing import image\n", " from keras.applications.resnet50 import ResNet50\n", " from keras.applications.resnet50 import preprocess_input\n", " from keras.applications.imagenet_utils import decode_predictions\n", " from keras.utils import multi_gpu_model\n", " import tensorflow as tf\n", "\n", " def gen_data(N: int) -> Tuple[np.ndarray, np.ndarray]:\n", " \"\"\"Generates 224x224x3 inputs and 1000-class labels\"\"\"\n", " X = np.round(np.random.rand(N, 224, 224, 3) * 255).astype('float32') / 255\n", " y = np.round(np.random.rand(N) * 999).astype('float32')\n", " y = keras.utils.to_categorical(y, num_classes=1000)\n", " return X, y\n", "\n", " # synthetic data\n", " train_X, train_y = gen_data(train_N)\n", " test_X, test_y = gen_data(test_N)\n", "\n", " with tf.device('/cpu:0'):\n", " model = ResNet50()\n", " \n", " if gpus < 2:\n", " model.compile(loss=keras.losses.categorical_crossentropy,\n", " optimizer=keras.optimizers.Adadelta(),\n", " metrics=['accuracy'])\n", " model.fit(train_X, train_y,\n", " batch_size=batch_size, epochs=epochs, verbose=int(verbose),\n", " validation_data=(test_X, test_y))\n", " score = model.evaluate(test_X, test_y, verbose=0)\n", " \n", " else:\n", " parallel_model = multi_gpu_model(model, gpus=gpus)\n", " parallel_model.compile(loss=keras.losses.categorical_crossentropy,\n", " optimizer=keras.optimizers.Adadelta(),\n", " metrics=['accuracy'])\n", " parallel_model.fit(train_X, train_y,\n", " batch_size=batch_size, epochs=epochs, verbose=int(verbose),\n", " validation_data=(test_X, test_y))\n", " score = parallel_model.evaluate(test_X, test_y, verbose=0)\n", " \n", " print('Test loss:', score[0])\n", " print('Test accuracy:', score[1])\n", " return model" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "ExecuteTime": { "end_time": "2019-01-23T15:31:57.789487Z", "start_time": "2019-01-23T15:29:06.767909Z" }, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Train on 100 samples, validate on 20 samples\n", "Epoch 1/4\n", "100/100 [==============================] - 53s 530ms/step - loss: 7.1385 - acc: 0.0000e+00 - val_loss: 6.9643 - val_acc: 0.0000e+00\n", "Epoch 2/4\n", "100/100 [==============================] - 38s 382ms/step - loss: 5.7515 - acc: 0.0000e+00 - val_loss: 16.1181 - val_acc: 0.0000e+00\n", "Epoch 3/4\n", "100/100 [==============================] - 38s 382ms/step - loss: 5.2580 - acc: 0.0000e+00 - val_loss: 12.4957 - val_acc: 0.0000e+00\n", "Epoch 4/4\n", "100/100 [==============================] - 38s 382ms/step - loss: 5.0942 - acc: 0.0100 - val_loss: 6.8978 - val_acc: 0.0000e+00\n", "Test loss: 6.897780418395996\n", "Test accuracy: 0.0\n" ] } ], "source": [ "model = resnet50(train_N=100, test_N=20, gpus=1, epochs=4)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "




" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "




" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "




" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "




" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "




" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "




" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "




" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "




" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "




" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "




" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "




" ] } ], "metadata": { "hide_input": false, "kernelspec": { "display_name": "Python 3.6 - Learning [learning/conda-5.1.0-py36-gpu]", "language": "python", "name": "sys_learning36" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" }, "latex_envs": { "LaTeX_envs_menu_present": false, "autoclose": false, "autocomplete": false, "bibliofile": "biblio.bib", "cite_by": "apalike", "current_citInitial": 1, "eqLabelWithNumbers": true, "eqNumInitial": 1, "hotkeys": { "equation": "Ctrl-E", "itemize": "Ctrl-I" }, "labels_anchors": false, "latex_user_defs": false, "report_style_numbering": false, "user_envs_cfg": false }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": true, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }