{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\Hussnain\\Anaconda3\\envs\\tensorflow\\lib\\site-packages\\h5py\\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
" from ._conv import register_converters as _register_converters\n",
"Using TensorFlow backend.\n"
]
}
],
"source": [
"#Imports\n",
"from keras.datasets import imdb\n",
"\n",
"from keras import models\n",
"from keras import layers\n",
"from keras import optimizers\n",
"from keras import losses\n",
"from keras import metrics,activations\n",
"\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz\n",
" 1048576/17464789 [>.............................] - ETA: 53:49"
]
}
],
"source": [
"#Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz\n",
"\n",
"(xtrain,ytrain), (xtest, ytest) = imdb.load_data(num_words=10000)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Exploring the dataset\n",
"\n",
"print('xtrain shape', xtrain.shape)\n",
"print('ytrain shape', ytrain.shape)\n",
"print()\n",
"print('xtest shape', xtest.shape)\n",
"print('ytest shape', ytest.shape)\n",
"print()\n",
"print('xtrain first review as dictionary index', xtrain[1])\n",
"print()\n",
"print()\n",
"print('ytrain label', ytrain[0])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#index to words mapping\n",
"word_index = imdb.get_word_index()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"decode_review = ' '.join([reverse_word_index.get(i-3, reverse_word_index.get(i)) for i in xtrain[22]])\n",
"decode_review"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"def vectorize_sequences(sequences, dimension=10000):\n",
" results = np.zeros((len(sequences), dimension))\n",
" for i, sequence in enumerate(sequences):\n",
" results[i, sequence] = 1. \n",
" return results\n",
"\n",
"x_train = vectorize_sequences(xtrain)\n",
"x_test = vectorize_sequences(xtest)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ytrain = np.asarray(ytrain).astype('float32')\n",
"ytest = np.asarray(ytest).astype('float32')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#model\n",
"model = models.Sequential()\n",
"model.add(layers.Dense(16, activation=activations.relu, input_shape=(10000,)))\n",
"model.add(layers.Dense(16, activation=activations.relu))\n",
"model.add(layers.Dense(1, activation=activations.sigmoid))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model.compile(optimizer=optimizers.RMSprop(lr=0.0001), loss=losses.mse, metrics=['acc'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"x_val = x_train[:10000]\n",
"y_val = ytrain[:10000]\n",
"\n",
"x_train_partial = x_train[10000:]\n",
"y_train_partial = ytrain[10000:]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"history = model.fit(x_train_partial, y_train_partial, epochs=4, batch_size=512, validation_data=(x_val,y_val))\n",
"history_dict = history.history\n",
"history_dict.keys()\n",
"print(history.history['acc'][-1])\n",
"print(history.history['val_acc'][-1])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(model.predict(x_train_partial[22:23]))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"loss = history_dict['loss']\n",
"val_loss = history_dict['val_loss']\n",
"epochs = range(0, len(loss)+1)\n",
"epochs"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib\n",
"acc = history.history['acc']\n",
"val_acc = history.history['val_acc']\n",
"loss = history.history['loss']\n",
"val_loss = history.history['val_loss']\n",
"\n",
"epochs = range(1, len(acc) + 1)\n",
"\n",
"# \"bo\" is for \"blue dot\"\n",
"plt.plot(epochs, loss, 'ro', label='Training loss')\n",
"# b is for \"solid blue line\"\n",
"plt.plot(epochs, val_loss, 'b', label='Validation loss')\n",
"plt.title('Training and validation loss')\n",
"plt.xlabel('Epochs')\n",
"plt.ylabel('Loss')\n",
"plt.legend()\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.clf() # clear figure# clear \n",
"acc_values = history_dict['acc']\n",
"val_acc_values = history_dict['val_acc']\n",
"\n",
"plt.plot(epochs, acc, 'bo', label='Training acc')\n",
"plt.plot(epochs, val_acc, 'b', label='Validation acc')\n",
"plt.title('Training and validation accuracy')\n",
"plt.xlabel('Epochs')\n",
"plt.ylabel('Loss')\n",
"plt.legend()\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}