# coding: utf-8 # #This is an example of performing Video Activity Recognition using LSTM # Modified from "Hands-on Computer Vision with TensorFlow 2" by B. Planche and E. Andres # In[1]: # STEP 1: Install packages in the current environment import sys get_ipython().system('{sys.executable} -m pip install opencv-python ') get_ipython().system('{sys.executable} -m pip install matplotlib') get_ipython().system('{sys.executable} -m pip install tqdm') get_ipython().system('{sys.executable} -m pip install scikit-learn') # In[3]: get_ipython().system('{sys.executable} -m pip install tensorflow==1.14') #!{sys.executable} -m pip install --user tensorflow==2.0 # #Step 2: import modules # In[8]: import tensorflow as tf import os import cv2 import numpy as np import matplotlib.pyplot as plt import tqdm from sklearn.preprocessing import LabelBinarizer print("hi") print("Tensorflow verison "+ str(tf.version.VERSION)) print("Tensorflow version " + str(tf.version)) # #Step 3: setup variables # In[9]: # enable eager execution to guarantee the tensors have discrete instead of symbolic values, # so you can do operations on them #tf.enable_eager_execution() #tf.executing_eagerly() print(tf.Version.version) # In[10]: #location of where YOU have installed the data set UCF-101 located # at #BASE_PATH = '../data/UCF-101' #change the base path to location YOU installed UCF-101 dataset #BASE_PATH = 'C:/Grewe/Classes/CS663/Mat/LSTM/data/UCF-101' BASE_PATH = 'C:\\Grewe\\Classes\\CS663\\Mat\\LSTM\\data\\UCF-101' VIDEOS_PATH = os.path.join(BASE_PATH, '**','*.avi') #this specifies the sequence length will process by LSTM SEQUENCE_LENGTH = 40 print(VIDEOS_PATH) # ### STEP 4: sample the video --do not process every frame # PART 1: define function frame_generator() that creates Sequence_length samples by taking every Kth sample were K= num_frames_in_video / SEQUENCE LENGTH PART 2: you load the DataSet and specify the output will be frames of size 299x299 x3(rgb) AND you create batches of 16 together at a time # In[11]: def frame_generator(): video_paths = tf.io.gfile.glob(VIDEOS_PATH) np.random.shuffle(video_paths) for video_path in video_paths: frames = [] cap = cv2.VideoCapture(video_path) num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) sample_every_frame = max(1, num_frames // SEQUENCE_LENGTH) current_frame = 0 label = os.path.basename(os.path.dirname(video_path)) max_images = SEQUENCE_LENGTH while True: success, frame = cap.read() if not success: break if current_frame % sample_every_frame == 0: # OPENCV reads in BGR, tensorflow expects RGB so we invert the order frame = frame[:, :, ::-1] img = tf.image.resize(frame, (299, 299)) img = tf.keras.applications.inception_v3.preprocess_input( img) max_images -= 1 yield img, video_path if max_images == 0: break current_frame += 1 # `from_generator` might throw a warning, expected to disappear in upcoming versions: # https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/data/Dataset#for_example_2 dataset = tf.data.Dataset.from_generator(frame_generator, output_types=(tf.float32, tf.string), output_shapes=((299, 299, 3), ())) dataset = dataset.batch(16).prefetch(tf.data.experimental.AUTOTUNE) print(dataset) # ### STEP 5: # For Feature Extraction we are going to use a existing CNN model called Inception V3 which is built into TensorFlow # In[12]: inception_v3 = tf.keras.applications.InceptionV3(include_top=False, weights='imagenet') x = inception_v3.output # We add Average Pooling to transform the feature map from # 8 * 8 * 2048 to 1 x 2048, as we don't need spatial information pooling_output = tf.keras.layers.GlobalAveragePooling2D()(x) feature_extraction_model = tf.keras.Model(inception_v3.input, pooling_output) # In[13]: #OPTIONAL: Store Feature Extractor Model to a Saved Directory # save CNN model in SavedModel format SAVED_FeatureExtractor_MODEL_DIR = "C:\\Grewe\\Classes\\CS663\\Mat\\LSTM\\data\\SavedModelDir_FeatureExtraction_MobileNetV2_TF_1_14" print("going to save feature extractor model to" + str(SAVED_FeatureExtractor_MODEL_DIR)) tf.keras.experimental.export_saved_model(feature_extraction_model, SAVED_FeatureExtractor_MODEL_DIR) # In[ ]: #OPTIONAL: Convert and Store Feature Extractor model to a tflite file for use in Mobile #FOR VERSION Tensorflow 1.14 # do it from the previously SavedModel directory #summary of model will convert feature_extraction_model.summary() # convert the SavedModel to a TFLite model converter = tf.lite.TFLiteConverter.from_saved_model(SAVED_FeatureExtractor_MODEL_DIR) tflite_feature_extractor_model = converter.convert() BASE_DATA_PATH = 'C:\\Grewe\\Classes\\CS663\\Mat\\LSTM\\data' tflite_feature_extractor_file = os.path.join(BASE_DATA_PATH, 'FeatureExtractorModel_MobileBetV2_TF1_14.tflite') print(" want to save tflite_file" + tflite_feature_extractor_file) open(tflite_feature_extractor_file, "wb").write(tflite_feature_extractor_model) # ### STEP 6: # Extract Features using our InceptionV3 CNN model # In[ ]: current_path = None all_features = [] #cycle through the dataset and visit each image, note the tdqm.tdqm is a iterable object with an updateable progress bar #that updates each time a new iteration is called #each time through outter for loop retrieve an image and # the video filename it belongs to (including path) and each image 299x299x3(rgb) #call feature_extraction_model above (Inception v3) for the image to extract the features #do for each image in the video for img, batch_paths in tqdm.tqdm(dataset): batch_features = feature_extraction_model(img) #reshape the tensor to shape ( #features x 1)--see https://www.tensorflow.org/api_docs/python/tf/reshape batch_features = tf.reshape(batch_features, (batch_features.shape[0], -1)) for features, path in zip(batch_features.numpy(), batch_paths.numpy()): if path != current_path and current_path is not None: output_path = current_path.decode().replace('.avi', '.npy') np.save(output_path, all_features) all_features = [] current_path = path all_features.append(features) # ### STEP 7: # Setup array of Class Labels # In[33]: #NOTE: FYI ONLY --- the default for LabelBinaizer when have 2 classes only is a 1 length vector not 2. Does [0=class 1] and [1=class 2]. # we want is [X=class 1 Y= class2]. So create a class that does what we want. #Create a My2ClassLabelBinarizer class My2ClassLabelBinarizer(LabelBinarizer): def transform(self, y): Y = super().transform(y) if self.y_type_ == 'binary': return np.hstack((Y, 1-Y)) else: return Y def inverse_transform(self, Y, threshold=None): if self.y_type_ == 'binary': return super().inverse_transform(Y[:, 0], threshold) else: return super().inverse_transform(Y, threshold) # In[12]: LABELS_2ClassExample = ['Doors','stairs'] encoder = My2ClassLabelBinarizer() encoder.fit(LABELS_2ClassExample) print(encoder.classes_) print(encoder.transform(['Doors', 'stairs'])) t= encoder.transform(['Doors', 'stairs', 'stairs']) print(t) print(encoder.inverse_transform(t)) print("length of labels " + str(len(LABELS_2ClassExample))) # In[35]: LABELS = ['UnevenBars','ApplyLipstick','TableTennisShot','Fencing','Mixing','SumoWrestling','HulaHoop','PommelHorse','HorseRiding','SkyDiving','BenchPress','GolfSwing','HeadMassage','FrontCrawl','Haircut','HandstandWalking','Skiing','PlayingDaf','PlayingSitar','FrisbeeCatch','CliffDiving','BoxingSpeedBag','Kayaking','Rafting','WritingOnBoard','VolleyballSpiking','Archery','MoppingFloor','JumpRope','Lunges','BasketballDunk','Surfing','SkateBoarding','FloorGymnastics','Billiards','CuttingInKitchen','BlowingCandles','PlayingCello','JugglingBalls','Drumming','ThrowDiscus','BaseballPitch','SoccerPenalty','Hammering','BodyWeightSquats','SoccerJuggling','CricketShot','BandMarching','PlayingPiano','BreastStroke','ApplyEyeMakeup','HighJump','IceDancing','HandstandPushups','RockClimbingIndoor','HammerThrow','WallPushups','RopeClimbing','Basketball','Shotput','Nunchucks','WalkingWithDog','PlayingFlute','PlayingDhol','PullUps','CricketBowling','BabyCrawling','Diving','TaiChi','YoYo','BlowDryHair','PushUps','ShavingBeard','Knitting','HorseRace','TrampolineJumping','Typing','Bowling','CleanAndJerk','MilitaryParade','FieldHockeyPenalty','PlayingViolin','Skijet','PizzaTossing','LongJump','PlayingTabla','PlayingGuitar','BrushingTeeth','PoleVault','Punch','ParallelBars','Biking','BalanceBeam','Swing','JavelinThrow','Rowing','StillRings','SalsaSpin','TennisSwing','JumpingJack','BoxingPunchingBag'] encoder = LabelBinarizer() encoder.fit(LABELS) print(encoder.classes_) print(encoder.transform(LABELS)) # ### STEP 8: # Create the LSTM model: 1) Masking layer - this tells the model that will have input that is padded with zeros (to get a fixed length of input here 40x FeatureVector(1x2048) --so the inputs are the same length, The masking tells the model to ignore this zero padded data (mask_value=0.) see https://www.tensorflow.org/guide/keras/masking_and_padding 2) LSTM layer with 512 cells, dropout 0.5, recurrent_dropout of 0.5 3) a fully connected relu activation layer with 256 outputs, 4) a droupout layer 0.5 5) a final decision fully connected layer of putput length of labels (which is the number of classes) with softmax activation. # # #### dropout and recurrent_dropout in LSTM: # method where input and recurrent connections to LSTM units are probabilistically excluded from activation and weight updates while training a network. This has the effect of reducing overfitting and improving model performance. see https://machinelearningmastery.com/use-dropout-lstm-networks-time-series-forecasting/ for a better understanding. # # #### LSTM layer - output = [1x512] for entire video (image sequence) # #### LSTM - and the default value of return_sequence (=False) as not specified # see for FALSE= return the last output from processing all the input sequence. if was set to TRUE would mean returm the full sequence output values. If it had been set to TRUE the output would be [SEQUENCE_LENGTHx512] = [40 x 512] # In[18]: #setup a keras Sequential model with 1) Masking layer 2) LSTM layer with 512 cells, dropout 0.5, recurrent_dropout of 0.5 # 3) a fully connected relu activation layer with 256 outputs, 4) a droupout layer 5) a final decision fully connected layer of length of labels # (which is the number of classes) with softmax activation. model = tf.keras.Sequential([ tf.keras.layers.Masking(mask_value=0.), tf.keras.layers.LSTM(512, dropout=0.5, recurrent_dropout=0.5), tf.keras.layers.Dense(256, activation='relu'), tf.keras.layers.Dropout(0.5), tf.keras.layers.Dense(len(LABELS), activation='softmax') ]) # ### STEP 8: # Setup for the model the Loss function, the Optimizer function, and any metrics want to compute in training # In[23]: model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy', 'top_k_categorical_accuracy']) # ### STEP 9: # Setup the training and test list which are lists of the training filenames. Note you will need to change the location of these files to point to your location. Define a function make_generator that returns a generator which will randomly shuffle a file list (either training or testing that will be passed later) and then changes the file extension of the avi files listed in the list to .npy which is our features for that avi video which were calcluated in step 6 # In[24]: test_file = 'C:/Grewe/Classes/CS663/Mat/LSTM/data/testlist01_reverse_order.txt' train_file = 'C:/Grewe/Classes/CS663/Mat/LSTM/data/trainlist01_reverse_order.txt' #test_file = 'C:/Grewe/Classes/CS663/Mat/LSTM/data/testlist01.txt' #train_file = 'C:/Grewe/Classes/CS663/Mat/LSTM/data/trainlist01.txt' with open(test_file) as f: test_list = [row.strip() for row in list(f)] with open(train_file) as f: train_list = [row.strip() for row in list(f)] train_list = [row.split(' ')[0] for row in train_list] def make_generator(file_list): def generator(): np.random.shuffle(file_list) for path in file_list: full_path = os.path.join(BASE_PATH, path).replace('.avi', '.npy') label = os.path.basename(os.path.dirname(path)) features = np.load(full_path) padded_sequence = np.zeros((SEQUENCE_LENGTH, 2048)) padded_sequence[0:len(features)] = np.array(features) transformed_label = encoder.transform([label]) yield padded_sequence, transformed_label[0] return generator # In[25]: print(test_list) # ### STEP 10: # Setup the train_dataset and valid_dataset (validation/testing). Here we setting up training batch sets of 16. # In[26]: #for tensorflow 2.* train_dataset = tf.data.Dataset.from_generator(make_generator(train_list), output_types=(tf.float32, tf.int16), output_shapes=((SEQUENCE_LENGTH, 2048), (len(LABELS)))) train_dataset = train_dataset.batch(16, drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE) valid_dataset = tf.data.Dataset.from_generator(make_generator(test_list), output_types=(tf.float32, tf.int16), output_shapes=((SEQUENCE_LENGTH, 2048), (len(LABELS)))) valid_dataset = valid_dataset.batch(16, drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE) # In[27]: #for tensorflow 1.14 train_dataset = tf.data.Dataset.from_generator(make_generator(train_list), output_types=(tf.float32, tf.int16), output_shapes=(tf.TensorShape([SEQUENCE_LENGTH, 2048]), tf.TensorShape([len(LABELS)]))) #train_dataset = train_dataset.batch(16, drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE) #reduce batch size because of memory on machine train_dataset = train_dataset.batch(16, drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE) valid_dataset = tf.data.Dataset.from_generator(make_generator(test_list), output_types=(tf.float32, tf.int16), output_shapes=(tf.TensorShape([SEQUENCE_LENGTH, 2048]), tf.TensorShape([len(LABELS)]))) #valid_dataset = valid_dataset.batch(16, drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE) #reduce batch set because of memory on machine valid_dataset = valid_dataset.batch(16, drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE) # In[28]: print(train_dataset) # ### STEP 11: # Setup tensorboard_callback so that will store in a log directory (you change the path) tensorboard information for visualization of training process every 1000 samples passed through traiing. Call model.fit to perform the training on the train_dataset and run for 17 Epochs with the tensorboard_callback defined and using our "testing data set" valid_datset for validation # In[29]: BASE_DATA_PATH = 'C:\Grewe\Classes\CS663\Mat\LSTM\data' mylog_dir = os.path.join( BASE_DATA_PATH, "train_log") print("Mylog directory = " + mylog_dir) tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=mylog_dir, update_freq=1000) print(os.path.join(mylog_dir, 'train')) #tensorboard_callback = tf.keras.callbacks.TensorBoard(os.path.join('tmp'), update_freq=1000) #model.fit(train_dataset, epochs=17, callbacks=[tensorboard_callback], validation_data=valid_dataset) #following call works for tensorflow 1.1.4 model.fit(train_dataset, epochs=17, validation_data=valid_dataset, validation_steps=4, verbose=0) # In[ ]: model.summary() # ### STEP 12: save the tensorflow model to an h5 file # In[23]: BASE_DATA_PATH = 'C:\Grewe\Classes\CS663\Mat\LSTM\data' model_file = os.path.join(BASE_DATA_PATH, 'my_model.h5') # Save the entire model to a HDF5 file. # The '.h5' extension indicates that the model shuold be saved to HDF5. model.save(model_file) # ### STEP 13: try to conver the model to tflite --- Support to come 2019 (when?)--Curently LSTM conversion to TFLite NOT supported # In[24]: # FOR VERSION Tensorflow 2.0+ #from tensorflow import lite tflite_file = os.path.join(BASE_DATA_PATH, 'my_tflite_model.tflite') print(" want to save tflite_file" + tflite_file) # Convert the model. converter = tf.lite.TFLiteConverter.from_keras_model(model) tflite_model = converter.convert() #now save the tflite model to the file #tflite_model.save(tflite_file) #Note this does not seem to work although in google documentation open(tflite_file, "wb").write(tflite_model) # ### STEP 14: run evaluation on the test data feature extraction # In[25]: # evaluate the test data using model # Evaluate the model on the test data using `evaluate` print('\n# Evaluate on test data') # NOTE: should have separate test data but, only have validation data #results = model.evaluate_generator(val_data_gen, verbose=1) results = model.evaluate(valid_dataset, verbose=1) print('test loss, test acc:', results) # ### STEP 15: Run predictions on the test data feature extracted # In[26]: # make predictions # Generate predictions (probabilities -- the output of the last layer) # on new data using `predict` print('\n# Generate predictions ') predictions = model.predict(valid_dataset, verbose=1 ) # In[27]: #print out prediction info for validation data set (as do not have separate test data set) print('predictions shape:', predictions.shape) print(predictions) print(len(predictions)) # ## OPTIONAL : reload from h5 file & run some predictions # In[15]: #VERSION TF 2.* # Save the entire model as a SavedModel. BASE_DATA_PATH = 'C:\Grewe\Classes\CS663\Mat\LSTM\data' #load the previously saved h5 model # try to reload the saved h5 file # Recreate the exact same model, including its weights and the optimizer model_file = os.path.join(BASE_DATA_PATH, 'my_model.h5') model = tf.keras.models.load_model(model_file) # Show the model architecture model.summary() #for some reason the input layer not set when doing a model reload so says to either recall fit (which is ridiculuous) # or call predict(). To manually set the shapes, call model._set_inputs(inputs). # see https://github.com/tensorflow/tensorflow/issues/30892 print('\n# Generate predictions ') predictions = model.predict(valid_dataset, verbose=1 ) #print out prediction info for validation data set (as do not have separate test data set) print('predictions shape:', predictions.shape) print(predictions) print(len(predictions)) # In[16]: model.summary() # ## OPTIONAL: Save to a SavedModel # In[33]: #create directory to save the SavedModel #setup directories saved_model_dir1 = os.path.join(BASE_DATA_PATH, 'saved_model') print(" path exists=" + str(os.path.exists(saved_model_dir1))) if(os.path.exists(saved_model_dir1) == False): print(" creating" + str(saved_model_dir1)) os.mkdir(saved_model_dir1) saved_model_dir = os.path.join(BASE_DATA_PATH, 'saved_model\LSTM_SavedModel') if(os.path.exists(saved_model_dir) == False): print(" creating" + str(saved_model_dir)) os.mkdir(saved_model_dir) # In[34]: #VERSION TF 2.* - not working #save model as SavedModel - tensorflow 2.* model.save(saved_model_dir) #save SavedModel using different call #tf.saved_model.save(model, saved_model_dir) # In[31]: #VERSION TF 1.14 -working #save model as SavedModel - tensorflow 1.14 tf.keras.experimental.export_saved_model(model, saved_model_dir) # # Optional: Predict on a single video clip (stored in file) # In[28]: #STEP 0: if LSTM model not loaded then load the file print("loading from SavedModel in " + str( #tf.keras.experimental.load_from_saved_model(directory))) #tensorflow 1.14 call model = tf.keras.experimental.load_from_saved_model(saved_model_dir) # In[17]: #STEP 1: create an DataSet using a generator function to extract correct number of frames from a video clip BASE_DATA_PATH = 'C:\Grewe\Classes\CS663\Mat\LSTM\data' video_path = os.path.join( BASE_DATA_PATH, "testing_video\myvideo.avi") print(video_path) #function to retrieve the next image in the set of image to extract from a video clip def single_video_frame_generator(): BASE_DATA_PATH = 'C:\Grewe\Classes\CS663\Mat\LSTM\data' video_path = os.path.join( BASE_DATA_PATH, "testing_video\myvideo.avi") frames = [] cap = cv2.VideoCapture(video_path) num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) sample_every_frame = max(1, num_frames // SEQUENCE_LENGTH) current_frame = 0 label = os.path.basename(os.path.dirname(video_path)) max_images = SEQUENCE_LENGTH while True: success, frame = cap.read() if not success: break if current_frame % sample_every_frame == 0: # OPENCV reads in BGR, tensorflow expects RGB so we invert the order frame = frame[:, :, ::-1] img = tf.image.resize(frame, (299, 299)) img = tf.keras.applications.inception_v3.preprocess_input(img) max_images -= 1 yield img, video_path if max_images == 0: break current_frame += 1 #create a Dataset using the prevous function frame_generator # `from_generator` might throw a warning, expected to disappear in upcoming versions: # https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/data/Dataset#for_example_2 dataset = tf.data.Dataset.from_generator(single_video_frame_generator, output_types=(tf.float32, tf.string), output_shapes=((299, 299, 3), ())) #dataset = dataset.batch(16).prefetch(tf.data.experimental.AUTOTUNE) print(dataset) # In[18]: #STEP 2: Setup Feature Extractor (if not done previously to process the images in our single video clip) # uses an Inception v3 CNN that exists in tensorflow inception_v3 = tf.keras.applications.InceptionV3(include_top=False, weights='imagenet') x = inception_v3.output # We add Average Pooling to transform the feature map from # 8 * 8 * 2048 to 1 x 2048, as we don't need spatial information pooling_output = tf.keras.layers.GlobalAveragePooling2D()(x) feature_extraction_model = tf.keras.Model(inception_v3.input, pooling_output) # In[29]: #STEP 3: Now process the images in our dataSet with the Inception v3 CNN feature extraction model created prevously # #IMPORTANT: the conversion from tensor object to numpy array ONLY WORKS IN TF 2.* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # step 3.1 create function to go through video specified and return an array of images of lenght SEQUNCE_LENGTH # each image is resized and preprocessed for input into a FeatureExtractor InceptionV3 CNN # returns array of tenors (1 tensor per image) def grabImagesFromVideo_PreProcess_for_InceptionCNN_FeatureExtractor(video_path): print(" going to process " + str(video_path)) frames = [] cap = cv2.VideoCapture(video_path) num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) sample_every_frame = max(1, num_frames // SEQUENCE_LENGTH) max_images = SEQUENCE_LENGTH #cycle through the frames in the video for current_frame_index in range(num_frames): print(" on frame" + str(current_frame_index)) #read in next frame from video success, frame = cap.read() if not success: break #take every kth(sample_every_frame) frame and store in frames array if current_frame_index % sample_every_frame == 0: # OPENCV reads in BGR, tensorflow expects RGB so we invert the order frame = frame[:, :, ::-1] #appropriately resize and preprocess the image for Feature Extraction with inceptionV3 CNN img = tf.image.resize(frame, (299, 299)) img = tf.keras.applications.inception_v3.preprocess_input(img) print(" going to save image") print(img) print(img[0]) print(img[0][0]) frames.append(img) max_images -= 1 # if we have sampled SEQUENCE_LENGTH number of frames then stop if max_images == 0: break return frames # step 3.1 create function to go through video specified and return an array of images of lenght SEQUNCE_LENGTH # each image is resized and preprocessed for input into a FeatureExtractor InceptionV3 CNN # then run through the FeatureExtractor --output will be a 1x2048 feature vector for each image # append to the set of features and return # features is an array of SEQUENCE_LENGTH (40) Tensors (each 1x2048 in length) def grabImagesFromVideo_Process_with_InceptionCNN_FeatureExtractor(video_path, feature_extraction_model): print(" going to process " + str(video_path)) features = [] cap = cv2.VideoCapture(video_path) num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) sample_every_frame = max(1, num_frames // SEQUENCE_LENGTH) max_images = SEQUENCE_LENGTH #cycle through the frames in the video for current_frame_index in range(num_frames): print(" on frame" + str(current_frame_index)) #read in next frame from video success, frame = cap.read() if not success: break #take every kth(sample_every_frame) frame and store in frames array if current_frame_index % sample_every_frame == 0: # OPENCV reads in BGR, tensorflow expects RGB so we invert the order frame = frame[:, :, ::-1] #appropriately resize and preprocess the image for Feature Extraction with inceptionV3 CNN # CONVERTS TO A TENSOR from an array with the size of 299x299 img = tf.image.resize(frame, (299, 299)) img = tf.keras.applications.inception_v3.preprocess_input(img) print(" img currently is:") print(img) print(" img shape is " + str(img.shape)) tensor_input = tf.expand_dims(img,axis=0) print(" expanded dimension tensor now is " + str(tensor_input)) print(" --shape is" + str(tensor_input.shape)) #Diagnostics: run the "graph" to print out tensor object if(False): with tf.Session() as sess: #sess.run(init_op) #execute init_op #print the random values that we sample print(" content img") print (sess.run(img)) print(" _________________") print(" expanded dimension tensor now is " + str(tensor_input)) print(" --shape is" + str(tensor_input.shape)) print(" content tensor_input") print (sess.run(tensor_input)) # now going to process with the feature extraction model (inceptionv3 CNN based) current_features = feature_extraction_model(tensor_input) #current_features = feature_extraction_model.predict(img, steps=1) #reshape the tensor to shape ( #features x 1)--see https://www.tensorflow.org/api_docs/python/tf/reshape current_features = tf.reshape(current_features, (current_features.shape[0], -1)) #convert tensor current_features to an numpy array !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #only works in TF 2.* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! current_features = current_features.numpy() features.append(current_features) #reduce counter max_images -= 1 # if we have sampled SEQUENCE_LENGTH number of frames then stop if max_images == 0: break #now make sure we have length SEQUENCE_LENGTH and pad with zeros if necessary #padded_sequence = np.zeros((SEQUENCE_LENGTH, 2048)) #padded_sequence[0:len(features)] = np.array(features) return features #step 3.2 call function passing a video_path and returning the array of images of SEQUENCE_LENGTH video = "C:\\Grewe\\Classes\\CS663\\Mat\\LSTM\\data\\testing_video\\myvideo.avi" print("Going to process video = " + video) #images = grabImagesFromVideo_PreProcess_for_InceptionCNN_FeatureExtractor(video) #print(" Returned set of images is " + str(images)) #step 3.2 call function passing a video_path and feature extraction model # extract images of SEQUENCE_LENGTH and process each with feature extractor # returning the array of features of SEQUENCE_LENGTH features = grabImagesFromVideo_Process_with_InceptionCNN_FeatureExtractor(video, feature_extraction_model) print(" Returned set of features is " + str(features)) # In[ ]: #print out feature Vector for first image in sequence print(features[0]) # In[30]: # optional- save the array to an npy file print("what are features" + str(features)) output_path = video.replace('.avi', '.npy') #save the array of feature vectors to file same location but extension .npy #ONLY do this for diagnostics #np.save(output_path, images) # optional- save the array to an npy file output_path = video.replace('.avi', '.npy') print(" goint to save images to " + output_path) #save the array of feature vectors to file same location but extension .npy #ONLY do this for diagnostics fid = open(output_path, "wb") np.save(output_path, features) #open(output_path, "wb").write(tflite_feature_extractor_model) # optional- save the array to an npy file output_path = video.replace('.avi', '.npy') #save the array of feature vectors to file same location but extension .npy #ONLY do this for diagnostics #np.save(output_path, images) # In[ ]: #STEP 4: take the array of vectors all_features and convert it to a tensor for input into our LSTM model tensor_input = tf.convert_to_tensor(features, dtype=tf.float32) print(tensor_input) #create tensor needed from processed video #current array of feature vectors representing images in video clip called all_features # NOT DONE --need to figure out what shape of input tensor should be for our model # In[ ]: #STEP 5: call prediciton on our model print(tensor_input) prediction= model.predict(tensor_input, verbose=1 ) print(prediction) # In[ ]: print("length of predictions" + str(len(prediction))) i=0 for p in prediction: print("for prediction" + str(i)) print("index of max predicition class ") print(np.argmax(p)) i+=1 print("index of max predicition class for feature vector 1") print(np.argmax(prediction[1])) print("index of max predicition class for feature vector 29") print(np.argmax(prediction[29])) print("index of max predicition class for feature vector 39") print(np.argmax(prediction[39]))