In [1]:
import tensorflow as tf
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import tqdm
import datetime
import json
from sklearn.preprocessing import LabelBinarizer 

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
# enable eager execution to guarantee the tensors have discrete instead of symbolic values, 
# so you can do operations on them
tf.enable_eager_execution()
tf.executing_eagerly()

True

In [3]:
# base path of video dataset
BASE_PATH = 'C:\\Users\\thoma\\Documents\\CSU East Bay\\2nd Year\\Fall 2019\\CS 663\\Exercises\\LSTM Exercise\\UCF101\\UCF-101'
VIDEOS_PATH = os.path.join(BASE_PATH, '**', '*.avi')

# sequence length LSTM will process
SEQUENCE_LENGTH = 40
BATCH_SIZE = 16

In [4]:
def frame_generator():
    video_paths = tf.io.gfile.glob(VIDEOS_PATH)
    np.random.shuffle(video_paths)
    for video_path in video_paths:
        frames = []
        cap = cv2.VideoCapture(video_path)
        num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        sample_every_frame = max(1, num_frames // SEQUENCE_LENGTH)
        current_frame = 0
        
        label = os.path.basename(os.path.dirname(video_path))
        
        max_images = SEQUENCE_LENGTH
        while True:
            success, frame = cap.read()
            if not success:
                break
                
            if current_frame % sample_every_frame == 0:
                frame = frame[:, :, ::-1]
                img = tf.image.resize(frame, (224, 224))
                img = tf.keras.applications.mobilenet_v2.preprocess_input(img)
                max_images -= 1
                yield img, video_path
                
            if max_images == 0:
                break
            current_frame += 1

In [5]:
dataset = tf.data.Dataset.from_generator(frame_generator,
                                         output_types=(tf.float32, tf.string),
                                         output_shapes=((224, 224, 3), ()))

dataset = dataset.batch(BATCH_SIZE,drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE)

Instructions for updating:
tf.py_func is deprecated in TF V2. Instead, there are two
    options available in V2.
    - tf.py_function takes a python function which manipulates tf eager
    tensors instead of numpy arrays. It's easy to convert a tf eager tensor to
    an ndarray (just call tensor.numpy()) but having access to eager tensors
    means `tf.py_function`s can use accelerators such as GPUs as well as
    being differentiable using a gradient tape.
    - tf.numpy_function maintains the semantics of the deprecated tf.py_func
    (it is not differentiable, and manipulates numpy arrays). It drops the
    stateful argument making all functions stateful.
    


In [6]:
print(dataset)

<DatasetV1Adapter shapes: ((16, 224, 224, 3), (16,)), types: (tf.float32, tf.string)>


In [14]:
# create MobileNetV2 feature extraction model
mobilenet_v2 = tf.keras.applications.mobilenet_v2.MobileNetV2(input_shape=(224,224,3), include_top=False, weights='imagenet')
x = mobilenet_v2.output

pooling_output = tf.keras.layers.GlobalAveragePooling2D()(x)
feature_extraction_model = tf.keras.Model(mobilenet_v2.input,pooling_output)

In [15]:
feature_extraction_model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
Conv1_pad (ZeroPadding2D)       (None, 225, 225, 3)  0           input_2[0][0]                    
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, 112, 112, 32) 864         Conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_Conv1 (BatchNormalization)   (None, 112, 112, 32) 128         Conv1[0][0]                      
____________________________________________________________________________________________

block_4_expand_relu (ReLU)      (None, 28, 28, 192)  0           block_4_expand_BN[0][0]          
__________________________________________________________________________________________________
block_4_depthwise (DepthwiseCon (None, 28, 28, 192)  1728        block_4_expand_relu[0][0]        
__________________________________________________________________________________________________
block_4_depthwise_BN (BatchNorm (None, 28, 28, 192)  768         block_4_depthwise[0][0]          
__________________________________________________________________________________________________
block_4_depthwise_relu (ReLU)   (None, 28, 28, 192)  0           block_4_depthwise_BN[0][0]       
__________________________________________________________________________________________________
block_4_project (Conv2D)        (None, 28, 28, 32)   6144        block_4_depthwise_relu[0][0]     
__________________________________________________________________________________________________
block_4_pr

__________________________________________________________________________________________________
block_8_project (Conv2D)        (None, 14, 14, 64)   24576       block_8_depthwise_relu[0][0]     
__________________________________________________________________________________________________
block_8_project_BN (BatchNormal (None, 14, 14, 64)   256         block_8_project[0][0]            
__________________________________________________________________________________________________
block_8_add (Add)               (None, 14, 14, 64)   0           block_7_add[0][0]                
                                                                 block_8_project_BN[0][0]         
__________________________________________________________________________________________________
block_9_expand (Conv2D)         (None, 14, 14, 384)  24576       block_8_add[0][0]                
__________________________________________________________________________________________________
block_9_ex

__________________________________________________________________________________________________
block_13_expand_BN (BatchNormal (None, 14, 14, 576)  2304        block_13_expand[0][0]            
__________________________________________________________________________________________________
block_13_expand_relu (ReLU)     (None, 14, 14, 576)  0           block_13_expand_BN[0][0]         
__________________________________________________________________________________________________
block_13_pad (ZeroPadding2D)    (None, 15, 15, 576)  0           block_13_expand_relu[0][0]       
__________________________________________________________________________________________________
block_13_depthwise (DepthwiseCo (None, 7, 7, 576)    5184        block_13_pad[0][0]               
__________________________________________________________________________________________________
block_13_depthwise_BN (BatchNor (None, 7, 7, 576)    2304        block_13_depthwise[0][0]         
__________

In [10]:
# save CNN model in SavedModel format for converting to tflite
SAVED_MODEL_DIR = 'SavedModelDir\\CNN\\V1\\'
tf.keras.experimental.export_saved_model(feature_extraction_model, SAVED_MODEL_DIR)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.utils.build_tensor_info or tf.compat.v1.saved_model.build_tensor_info.
INFO:tensorflow:Signatures INCLUDED in export for Predict: ['serving_default']
INFO:tensorflow:Signatures INCLUDED in export for Train: None
INFO:tensorflow:Signatures INCLUDED in export for Classify: None
INFO:tensorflow:Signatures INCLUDED in export for Regress: None
INFO:tensorflow:Signatures INCLUDED in export for Eval: No

In [11]:
# convert the SavedModel to a TFLite model
converter = tf.lite.TFLiteConverter.from_saved_model(SAVED_MODEL_DIR)
tflite_model = converter.convert()

Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.compat.v1.saved_model.load. There will be a new function for importing SavedModels in Tensorflow 2.0.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from SavedModelDir\CNN\V1\variables\variables
INFO:tensorflow:The given SavedModel MetaGraphDef contains SignatureDefs with the following keys: {'__saved_model_init_op', 'serving_default'}
INFO:tensorflow:input tensors info: 
INFO:tensorflow:Tensor's key in saved_model's tensor_map: input_1
INFO:tensorflow: tensor name: input_1:0, shape: (-1, 224, 224, 3), type: DT_FLOAT
INFO:tensorflow:output tensors info: 
INFO:tensorflow:Tensor's key in saved_model's tensor_map: global_average_pooling2d
INFO:tensorflow: tensor name: global_average_pooling2d/Mean:0, shape: (-1, 1280), type: DT_FLOAT
INFO:tensorflow:Restoring parameter

In [12]:
tflite_model_file = 'MobileNetV2_Model.tflite'
open(tflite_model_file, "wb").write(tflite_model)

8849332

In [None]:
current_path = None
all_features = []

# go through the dataset and use the mobilenet_v2 model to 
# extract the features for each frame
for img, batch_paths in tqdm.tqdm(dataset):
    batch_features = feature_extraction_model(img)
    # reshape the tensor
    batch_features = tf.reshape(batch_features,
                                (batch_features.shape[0], -1))
    
    for features, path in zip(batch_features.numpy(), batch_paths.numpy()):
        if path != current_path and current_path is not None:
            output_path = current_path.decode().replace('.avi','.npy')
            np.save(output_path, all_features)
            all_features = []
            
        current_path = path
        all_features.append(features)

In [11]:
LABELS = ['UnevenBars','ApplyLipstick','TableTennisShot','Fencing','Mixing','SumoWrestling','HulaHoop','PommelHorse','HorseRiding','SkyDiving','BenchPress','GolfSwing','HeadMassage','FrontCrawl','Haircut','HandstandWalking','Skiing','PlayingDaf','PlayingSitar','FrisbeeCatch','CliffDiving','BoxingSpeedBag','Kayaking','Rafting','WritingOnBoard','VolleyballSpiking','Archery','MoppingFloor','JumpRope','Lunges','BasketballDunk','Surfing','SkateBoarding','FloorGymnastics','Billiards','CuttingInKitchen','BlowingCandles','PlayingCello','JugglingBalls','Drumming','ThrowDiscus','BaseballPitch','SoccerPenalty','Hammering','BodyWeightSquats','SoccerJuggling','CricketShot','BandMarching','PlayingPiano','BreastStroke','ApplyEyeMakeup','HighJump','IceDancing','HandstandPushups','RockClimbingIndoor','HammerThrow','WallPushups','RopeClimbing','Basketball','Shotput','Nunchucks','WalkingWithDog','PlayingFlute','PlayingDhol','PullUps','CricketBowling','BabyCrawling','Diving','TaiChi','YoYo','BlowDryHair','PushUps','ShavingBeard','Knitting','HorseRace','TrampolineJumping','Typing','Bowling','CleanAndJerk','MilitaryParade','FieldHockeyPenalty','PlayingViolin','Skijet','PizzaTossing','LongJump','PlayingTabla','PlayingGuitar','BrushingTeeth','PoleVault','Punch','ParallelBars','Biking','BalanceBeam','Swing','JavelinThrow','Rowing','StillRings','SalsaSpin','TennisSwing','JumpingJack','BoxingPunchingBag']
encoder = LabelBinarizer()
encoder.fit(LABELS)

LabelBinarizer(neg_label=0, pos_label=1, sparse_output=False)

In [12]:
#setup a keras Sequential model with 1) Masking layer  2) LSTM layer with 512 cells, dropout 0.5, recurrent_dropout of 0.5  
# 3) a fully connected relu activation layer with 256 outputs,  4) a droupout layer 5) a final decision fully connected layer of length of labels
# (which is the number of classes) with softmax activation
model = tf.keras.Sequential([
    tf.keras.layers.Masking(mask_value=0.),
    tf.keras.layers.LSTM(512, dropout=0.5, recurrent_dropout=0.5),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(len(LABELS), activation='softmax')
])

In [13]:
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy', 'top_k_categorical_accuracy'])

In [14]:
test_file = 'C:/Users/thoma/Documents/CSU East Bay/2nd Year/Fall 2019/CS 663/Exercises/LSTM Exercise/testlist01.txt'
train_file = 'C:/Users/thoma/Documents/CSU East Bay/2nd Year/Fall 2019/CS 663/Exercises/LSTM Exercise/trainlist01.txt'

with open(test_file) as f:
    test_list = [row.strip() for row in list(f)]

with open(train_file) as f:
    train_list = [row.strip() for row in list(f)]
    train_list = [row.split(' ')[0] for row in train_list]


def make_generator(file_list):
    def generator():
        np.random.shuffle(file_list)
        for path in file_list:
            full_path = os.path.join(BASE_PATH, path).replace('.avi', '.npy')

            label = os.path.basename(os.path.dirname(path))
            features = np.load(full_path)

            padded_sequence = np.zeros((SEQUENCE_LENGTH, 1280))
            padded_sequence[0:len(features)] = np.array(features)

            transformed_label = encoder.transform([label])
            yield padded_sequence, transformed_label[0]
    return generator

In [None]:
print(test_list)

In [None]:
print(train_list)

In [15]:
# Setup the train_dataset and valid_dataset (validation/testing).  
# Here we setting up training batch sets of 16.  

train_dataset = tf.data.Dataset.from_generator(make_generator(train_list),
                 output_types=(tf.float32, tf.int16),
                 output_shapes=(tf.TensorShape([SEQUENCE_LENGTH, 1280]), tf.TensorShape([len(LABELS)])))
train_dataset = train_dataset.batch(16,drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE)


valid_dataset = tf.data.Dataset.from_generator(make_generator(test_list),
                 output_types=(tf.float32, tf.int16),
                 output_shapes=(tf.TensorShape([SEQUENCE_LENGTH, 1280]), tf.TensorShape([len(LABELS)])))
valid_dataset = valid_dataset.batch(16,drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE)

In [16]:
print(train_dataset)

<DatasetV1Adapter shapes: ((16, 40, 1280), (16, 101)), types: (tf.float32, tf.int16)>


In [20]:
# fit the model with the training data (can increase the # of epochs & validation_steps if desired)
model.fit(train_dataset, epochs=1, validation_data=valid_dataset, validation_steps=4, verbose=0)

<tensorflow.python.keras.callbacks.History at 0x18e26d44cf8>

In [21]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
masking (Masking)            multiple                  0         
_________________________________________________________________
lstm (LSTM)                  multiple                  3672064   
_________________________________________________________________
dense (Dense)                multiple                  131328    
_________________________________________________________________
dropout (Dropout)            multiple                  0         
_________________________________________________________________
dense_1 (Dense)              multiple                  25957     
Total params: 3,829,349
Trainable params: 3,829,349
Non-trainable params: 0
_________________________________________________________________


In [22]:
# save model as a HDF5 file
BASE_DIRECTORY = 'C:\\Users\\thoma\\Documents\\CSU East Bay\\2nd Year\\Fall 2019\\CS 663\\Exercises\\LSTM Exercise\\CondaEnv\\';
model_file = os.path.join(BASE_DIRECTORY, 'TF1-14_LSTM_Model_V1.h5')
model.save(model_file)

In [24]:
# save model in SavedModel format for serving from the cloud
SAVED_MODEL_DIR = os.path.join(BASE_DIRECTORY, 'SavedModelDir\\LSTM\\V1\\')
tf.keras.experimental.export_saved_model(model, SAVED_MODEL_DIR)

Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.utils.build_tensor_info or tf.compat.v1.saved_model.build_tensor_info.
INFO:tensorflow:Signatures INCLUDED in export for Regress: None
INFO:tensorflow:Signatures INCLUDED in export for Classify: None
INFO:tensorflow:Signatures INCLUDED in export for Predict: None
INFO:tensorflow:Signatures INCLUDED in export for Train: ['train']
INFO:tensorflow:Signatures INCLUDED in export for Eval: None
INFO:tensorflow:No assets to save.
INFO:tensorflow:No assets to write.
INFO:tensorflow:Signatures INCLUDED in export for Regress: None
INFO:tensorflow:Signatures INCLUDED in export for Classify: None
INFO:tensorflow:Signatures INCLUDED in export for Predict: None
INFO:tensorflow:Signatures INCLUDED in export for Train: None
INFO:tensorflow:Signatures INCLUDED in export for Eval: ['eval']
INFO:tensorflow:No assets to save.
INFO:tensorflow:No assets to write.
INFO:tensorflow:S

In [15]:
# code to load model saved in SavedModel format
BASE_DIRECTORY = 'C:\\Users\\thoma\\Documents\\CSU East Bay\\2nd Year\\Fall 2019\\CS 663\\Exercises\\LSTM Exercise\\CondaEnv\\';
SAVED_MODEL_DIR = os.path.join(BASE_DIRECTORY, 'SavedModelDir\\V1\\')
model = tf.keras.experimental.load_from_saved_model(SAVED_MODEL_DIR)

In [16]:
# show summary of LSTM model
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
masking (Masking)            multiple                  0         
_________________________________________________________________
lstm (LSTM)                  multiple                  3672064   
_________________________________________________________________
dense (Dense)                multiple                  131328    
_________________________________________________________________
dropout (Dropout)            multiple                  0         
_________________________________________________________________
dense_1 (Dense)              multiple                  25957     
Total params: 3,829,349
Trainable params: 3,829,349
Non-trainable params: 0
_________________________________________________________________


In [27]:
# load the data for one video file (can be any video file converted into .npy file)
data = np.load('v_ApplyEyeMakeup_g01_c01.npy')
print(data.shape)

(40, 1280)


In [28]:
# prepare data for LSTM model prediction
data = np.expand_dims(data, axis=0)
print(data.shape)

(1, 40, 1280)


In [29]:
# perform prediction using video data on LSTM model
results = model.predict(data,batch_size=1,verbose=0)
print(results)

[[2.34586015e-01 9.29067880e-02 5.24801668e-04 2.80175847e-03
  4.56667112e-06 6.23680826e-05 9.34260527e-07 2.09328414e-06
  1.73263186e-06 1.84354882e-04 2.80129984e-06 5.26745680e-06
  1.01812921e-01 7.56075233e-03 5.59206783e-05 4.15038259e-04
  8.85776058e-03 1.08164055e-02 2.28756817e-05 5.40105365e-02
  7.32433764e-05 1.49744883e-04 2.56417775e-06 2.01070197e-05
  5.93994628e-04 1.84378005e-05 7.66004901e-04 2.00768113e-06
  9.79823312e-07 1.58826106e-06 4.16114858e-07 2.61189689e-05
  2.10426060e-06 8.37515816e-02 1.82110398e-05 1.27867621e-03
  8.36316147e-04 1.11594876e-04 5.44527881e-02 3.37613784e-07
  1.32539938e-07 1.26996856e-05 4.36908682e-04 3.81465770e-06
  2.05477349e-07 7.70328939e-03 1.05808112e-05 2.52184225e-04
  5.84760483e-06 1.71517488e-03 1.18683232e-07 3.47286987e-05
  3.25744732e-05 3.12678458e-04 1.50305161e-04 9.57300712e-04
  1.18818070e-06 4.86100838e-03 1.10788962e-04 3.91399162e-03
  3.55030847e-04 1.19448127e-03 1.11013663e-03 2.00040638e-03
  8.3285

In [30]:
# convert the data for one video file to .json file to pass to LSTM model in the cloud
# using gcloud command line tool
a = np.load('v_ApplyEyeMakeup_g01_c01.npy')
b = a.tolist()
json_file = "request.json"

counter = 1
with open(json_file, 'w', encoding='utf-8') as f:
    wrapper = []
    for i in b:
        wrapper.append(i)
    instance_str = "input_1"
    instance = {instance_str: wrapper}
    json.dump(instance, f)
    f.write("\n")