CS663

Object Detection API (TF2+): The important Configuration File

The following is an example of a configuration file used by Object Detection API (TF2+). It is used for training an EfficientDet D0

I have highlighted in

YELLOW the configuration values related to the model,
PURPLE dealing with training configuration values
GREEN dealing with evaluation configuration values

 # SSD with EfficientNet-b0 + BiFPN feature extractor,
 # shared box predictor and focal loss (a.k.a EfficientDet-d0).
 # See EfficientDet, Tan et al, https://arxiv.org/abs/1911.09070
 # See Lin et al, https://arxiv.org/abs/1708.02002
 # Trained on COCO, initialized from an EfficientNet-b0 checkpoint.
 #
 # Train on TPU-8

model {
      ssd {
           inplace_batchnorm_update: true
           freeze_batchnorm: false
           num_classes: 1
           add_background_class: false
           box_coder {
               faster_rcnn_box_coder {
                    y_scale: 10.0
                    x_scale: 10.0
                    height_scale: 5.0
                    width_scale: 5.0
                }
            }

           matcher {
                argmax_matcher {
                    matched_threshold: 0.5
                    unmatched_threshold: 0.5
                    ignore_thresholds: false
                    negatives_lower_than_unmatched: true
                    force_match_for_each_row: true
                    use_matmul_gather: true
                }
            }

            similarity_calculator {
                iou_similarity {
                }
            }

            encode_background_as_zeros: true

            anchor_generator {
                multiscale_anchor_generator {
                    min_level: 3
                    max_level: 7
                    anchor_scale: 4.0
                    aspect_ratios: [1.0, 2.0, 0.5]
                    scales_per_octave: 3
                }
             }

            image_resizer {
                keep_aspect_ratio_resizer {
                    min_dimension: 512
                    max_dimension: 512
                    pad_to_max_dimension: true
                }
             }

            box_predictor {
                weight_shared_convolutional_box_predictor {
                    depth: 64
                    class_prediction_bias_init: -4.6
                    conv_hyperparams {
                      force_use_bias: true
                      activation: SWISH
                      regularizer {
                        l2_regularizer {
                           weight: 0.00004
                        }
                       }
                      initializer {
                        random_normal_initializer {
                          stddev: 0.01
                          mean: 0.0
                       }
                    }
                    batch_norm {
                      scale: true
                      decay: 0.99
                      epsilon: 0.001
                    }
                }

                num_layers_before_predictor: 3
                kernel_size: 3
                use_depthwise: true
               }
             }
               
            feature_extractor {
                    type: 'ssd_efficientnet-b0_bifpn_keras'
                    bifpn {
                       min_level: 3
                       max_level: 7
                       num_iterations: 3
                       num_filters: 64
                    }

                    conv_hyperparams {
                       force_use_bias: true
                       activation: SWISH
                       regularizer {
                          l2_regularizer {
                             weight: 0.00004
                       }
                     }
 
                    initializer {
                       truncated_normal_initializer {
                          stddev: 0.03
                          mean: 0.0
                        }
                     }
                    batch_norm {
                       scale: true,
                       decay: 0.99,
                       epsilon: 0.001,
                    }
                }
           }
                  
          loss {
               classification_loss {
                    weighted_sigmoid_focal {
                       alpha: 0.25
                       gamma: 1.5
                    }
                }
                localization_loss {
                    weighted_smooth_l1 {
                    }
                 }
                classification_weight: 1.0
                localization_weight: 1.0
            }


           normalize_loss_by_num_matches: true
           normalize_loc_loss_by_codesize: true

           post_processing {
                batch_non_max_suppression {
                    score_threshold: 1e-8
                    iou_threshold: 0.5
                    max_detections_per_class: 100
                    max_total_detections: 100
                }
                score_converter: SIGMOID
            }
       }          
   }

train_config: {
      fine_tune_checkpoint: "/home/farstrider/TensorFlow/FineTunedModels/Retrained_On_FLIR_One_Data_One_Class/checkpoint/ckpt-0"
      # THIS SHOULD WORK TOO -> fine_tune_checkpoint: "/home/farstrider/TensorFlow/SavedTraining/EfficientNet_D0_Retraining_On_FLIR_One_Data/ckpt-41"
      fine_tune_checkpoint_version: V2
      fine_tune_checkpoint_type: "detection"
      batch_size: 16
      sync_replicas: true
      startup_delay_steps: 0
      replicas_to_aggregate: 8
      use_bfloat16: true
      num_steps: 40000
      data_augmentation_options {
             random_horizontal_flip {
              }
       }
      data_augmentation_options {
             random_scale_crop_and_pad_to_square {
                    output_size: 512
                    scale_min: 0.1
                    scale_max: 2.0
              }
      }
      optimizer {
             momentum_optimizer: {
                    learning_rate: {
                      cosine_decay_learning_rate {
                         learning_rate_base: 8e-2
                         total_steps: 16000
                         warmup_learning_rate: .001
                         warmup_steps: 2500
                      }  
                    }
                    momentum_optimizer_value: 0.9
              }
              use_moving_average: false
       }
       max_number_of_boxes: 100
       unpad_groundtruth_tensors: false
}

train_input_reader: {
        label_map_path: "/home/farstrider/TensorFlow/LabelMaps/FLIR_retraining_map.pbtxt"
        tf_record_input_reader {
              input_path: "/home/farstrider/TensorFlow/TFRecords/FLIR_One_Retraining/Training.tfrecord"
        }
}

eval_config: {
         metrics_set: "coco_detection_metrics"
         use_moving_averages: false
         batch_size: 1
}

eval_input_reader: {
          label_map_path: "/home/farstrider/TensorFlow/LabelMaps/FLIR_retraining_map.pbtxt"
          shuffle: false
          num_epochs: 1
          tf_record_input_reader {
               input_path: "/home/farstrider/TensorFlow/TFRecords/FLIR_One_Retraining/Testing.tfrecord"
          }
}

The following is an example of a configuration file used by Object Detection API (TF2+). It is used for training an EfficientDet D0

I have highlighted in

YELLOW the configuration values related to the model,

PURPLE dealing with training configuration values

GREEN dealing with evaluation configuration values