Source code for bigdl.examples.keras.imdb_cnn_lstm
#
# Copyright 2016 The BigDL Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# IMDB sentiment classification using a recurrent convolutional network on BigDL
# Reference: https://github.com/fchollet/keras/blob/1.2.2/examples/imdb_cnn_lstm.py
# The Keras version we support and test is Keras 1.2.2 with TensorFlow backend.
# See README.md for how to run this example.
from optparse import OptionParser
import sys
[docs]def load_imdb():
"""
Load IMDB dataset
Transform input data into an RDD of Sample
"""
from keras.preprocessing import sequence
from keras.datasets import imdb
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=20000)
X_train = sequence.pad_sequences(X_train, maxlen=100)
X_test = sequence.pad_sequences(X_test, maxlen=100)
return X_train, y_train, X_test, y_test
[docs]def build_keras_model():
"""
Define a recurrent convolutional model in Keras 1.2.2
"""
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.layers import Embedding
from keras.layers import LSTM
from keras.layers import Convolution1D, MaxPooling1D
keras_model = Sequential()
keras_model.add(Embedding(20000, 128, input_length=100))
keras_model.add(Dropout(0.25))
keras_model.add(Convolution1D(nb_filter=64,
filter_length=5,
border_mode='valid',
activation='relu',
subsample_length=1))
keras_model.add(MaxPooling1D(pool_length=4))
keras_model.add(LSTM(70))
keras_model.add(Dense(1))
keras_model.add(Activation('sigmoid'))
return keras_model
if __name__ == "__main__":
parser = OptionParser()
parser.add_option("-b", "--batchSize", type=int, dest="batchSize", default="32")
parser.add_option("-m", "--max_epoch", type=int, dest="max_epoch", default="2")
parser.add_option("--optimizerVersion", dest="optimizerVersion", default="optimizerV1")
(options, args) = parser.parse_args(sys.argv)
keras_model = build_keras_model()
hdf5_path = "/tmp/imdb.h5"
keras_model.save(hdf5_path)
from bigdl.util.common import *
from bigdl.nn.layer import *
from bigdl.optim.optimizer import *
from bigdl.nn.criterion import *
# Load the HDF5 file with weights to a BigDL model
bigdl_model = Model.load_keras(hdf5_path=hdf5_path)
sc = get_spark_context(conf=create_spark_conf())
redire_spark_logs()
show_bigdl_info_logs()
init_engine()
set_optimizer_version(options.optimizerVersion)
X_train, y_train, X_test, y_test = load_imdb()
train_data = to_sample_rdd(X_train, y_train)
test_data = to_sample_rdd(X_test, y_test)
optimizer = Optimizer(
model=bigdl_model,
training_rdd=train_data,
criterion=BCECriterion(),
optim_method=Adam(),
end_trigger=MaxEpoch(options.max_epoch),
batch_size=options.batchSize)
optimizer.set_validation(
batch_size=options.batchSize,
val_rdd=test_data,
trigger=EveryEpoch(),
val_method=[Top1Accuracy()]
)
optimizer.optimize()