網頁

2020年12月9日 星期三

TLT+DeepStream on ICDAR-2013

到 ICDAR-2013 的網站下載圖片和標註
ICDAR-2013 在 Robust Reading Competition 網站
要下載前要註冊
選 Challenges/Focused Scene Text
2013 的文字比較正,標註只有兩點
2015 的文字有各種角度,標註有四點

產生 kitti 資料時,直接使用訓練效果不好
轉換圖形時採用不同策略,一圖拆成多圖
截圖時不要破壞文字

TLT 有測試過 detectnet_v2 (resnet18, resnet50, darknet19), object_detection yolov3 (darknet19)
detectnet_v2 之 resnet18 precision: 14.6133 %
雖然 precision 不高,但是 tlt-infer 可以正確抓到文字
detectnet_v2 之 darknet19 precision: 18.7146 %
但 detectnet_v2 之 darknet19 失敗,tlt-infer 可以無法正確抓到文字
yolov3 之 darknet19 AP: 0.769
效果比 detectnet_v2 好

最看重 TLT 的功能是 tlt-prune, 神經網路被裁剪後
節省記憶體,加快速度
unprune Mem:2.5G   PREF:8.33
0.5        Mem:2.1G    PREF:28.83
0.7        Mem:2.1G    PREF:30.00

DeepStream 之 nvinfer 的 config-file-path 設定檔
tlt-encoded-model=yolo_darknet19_pruned_70.etlt
model-engine-file=yolo_darknet19_pruned_70.engine
第一次啟動會很慢,因為在產生 yolo_darknet19_pruned_70.etlt_b1_gpu0_fp16.engine 檔
$ mv yolo_darknet19_pruned_70.etlt_b1_gpu0_fp16.engine yolo_darknet19_pruned_70.engine
之後會直接使用 yolo_darknet19_pruned_70.engine

以下是測試歷程記錄
建立 tlt-icdar docker
$ docker run --name tlt-icdar --runtime=nvidia --gpus all -it -v /home/user/Data/DeepStream/tlt-ICDAR/:/workspace/tlt-ICDAR -v /media/user/ImageData/:/workspace/ImageData -p 8888:8888 nvcr.io/nvidia/tlt-streamanalytics:v2.0_py3 /bin/bash
root@e16c46e4a3d7:/workspace# exit

啟動 tlt-icdar
$ docker start tlt-icdar

登入 tlt-icdar
$ docker exec -it tlt-icdar /bin/bash
root@e16c46e4a3d7:/workspace#

啟動 jupyter
root@e16c46e4a3d7:/workspace/tlt-ICDAR# jupyter notebook --allow-root --ip 0.0.0.0 --port 8888


tlt-ICDAR/tlt_specs/detectnet_v2_tfrecords_kitti_trainval.txt
kitti_config {
  root_directory_path: "/workspace/ImageData/kitti/train"
  image_dir_name: "images"
  label_dir_name: "labels"
  image_extension: ".jpg"
  partition_mode: "random"
  num_partitions: 2
  val_split: 20
  num_shards: 10
}

tlt-ICDAR/tlt_specs/detectnet_v2_train_resnet18_kitti.txt
random_seed: 42
dataset_config {
  data_sources {
    tfrecords_path: "/workspace/ImageData/kitti/tfrecords/kitti_trainval/*"
    image_directory_path: "/workspace/ImageData/kitti/train"
  }
  image_extension: "jpg"
  target_class_mapping {
    key: "p1a"
    value: "p1a"
  }
  validation_fold: 0
  #validation_data_source: {
    #tfrecords_path: "/workspace/ImageData/kitti/tfrecords/kitti_val/*"
    #image_directory_path: "/workspace/ImageData/kitti/test"
  #}
}

augmentation_config {
  preprocessing {
    output_image_width: 512
    output_image_height: 512
    output_image_channel: 3
    min_bbox_width: 1.0
    min_bbox_height: 1.0
  }
  spatial_augmentation {
    hflip_probability: 0.0
    vflip_probability: 0.0
    zoom_min: 1.0
    zoom_max: 1.0
    translate_max_x: 8.0
    translate_max_y: 8.0
    rotate_rad_max: 0.0
  }
  color_augmentation {
    color_shift_stddev: 0.0
    hue_rotation_max: 25.0
    saturation_shift_max: 0.2
    contrast_scale_max: 0.1
    contrast_center: 0.5
  }
}

postprocessing_config {
  target_class_config {
    key: "p1a"
    value: {
      clustering_config {
        coverage_threshold: 0.005
        dbscan_eps: 0.15
        dbscan_min_samples: 0.05
        minimum_bounding_box_height: 20
      }
    }
  }
}

model_config {
  arch: "resnet"
  pretrained_model_file: "/workspace/ImageData/tlt-experiment/detectnet_v2/pretrained_resnet18/tlt_pretrained_detectnet_v2_vresnet18/resnet18.hdf5"
  #freeze_blocks: 0
  #freeze_blocks: 1
  all_projections: True
  num_layers: 18
  use_pooling: False
  use_batch_norm: True
  dropout_rate: 0.0
  training_precision: {
    backend_floatx: FLOAT32
  }
  objective_set: {
    cov {}
    bbox {
      scale: 35.0
      offset: 0.5
    }
  }
}

evaluation_config {
  average_precision_mode: INTEGRATE
  validation_period_during_training: 10
  first_validation_epoch: 10
  minimum_detection_ground_truth_overlap {
    key: "p1a"
    value: 0.5
  }
  evaluation_box_config {
    key: "p1a"
    value {
      minimum_height: 4
      maximum_height: 9999
      minimum_width: 4
      maximum_width: 9999
    }
  }
}

cost_function_config {
  target_classes {
    name: "p1a"
    class_weight: 1.0
    coverage_foreground_weight: 0.05
    objectives {
      name: "cov"
      initial_weight: 1.0
      weight_target: 1.0
    }
    objectives {
      name: "bbox"
      initial_weight: 10.0
      weight_target: 10.0
    }
  }
  enable_autoweighting: True
  max_objective_weight: 0.9999
  min_objective_weight: 0.0001
}

training_config {
  batch_size_per_gpu: 8
  num_epochs: 200
  learning_rate {
    soft_start_annealing_schedule {
      #min_learning_rate: 5e-6
      #max_learning_rate: 5e-4
      min_learning_rate: 1e-6
      max_learning_rate: 1e-3
      soft_start: 0.1
      annealing: 0.7
    }
  }
  regularizer {
    type: L1
    weight: 3e-9
  }
  optimizer {
    adam {
      epsilon: 1e-08
      beta1: 0.9
      beta2: 0.999
    }
  }
  cost_scaling {
    enabled: False
    initial_exponent: 20.0
    increment: 0.005
    decrement: 1.0
  }
  checkpoint_interval: 10
}
bbox_rasterizer_config {
  target_class_config {
    key: "p1a"
    value: {
      cov_center_x: 0.5
      cov_center_y: 0.5
      cov_radius_x: 0.45
      cov_radius_y: 0.45
      bbox_min_radius: 0.9
    }
  }
  #deadzone_radius: 0.67
  deadzone_radius: 0.2
}

tlt-ICDAR/tlt_specs/yolov3_train_kitti.txt
random_seed: 42
yolo_config {
  big_anchor_shape: "[(116,90), (156,198), (373,326)]"
  mid_anchor_shape: "[(30,61), (62,45), (59,119)]"
  small_anchor_shape: "[(10,13), (16,30), (33,23)]"
  matching_neutral_box_iou: 0.5
  arch: "darknet"
  nlayers: 19
  arch_conv_blocks: 2
  #loss_loc_weight: 5.0
  #loss_neg_obj_weights: 50.0
  loss_loc_weight: 0.75
  loss_neg_obj_weights: 200.0
  loss_class_weights: 1.0
  freeze_bn: False
  #freeze_blocks: 0
  #freeze_blocks: 1
}
training_config {
  batch_size_per_gpu: 8
  num_epochs: 200
  enable_qat: false
  learning_rate {
    soft_start_annealing_schedule {
      #min_learning_rate: 5e-5
      #max_learning_rate: 2e-2
      #soft_start: 0.15
      min_learning_rate: 1e-6
      max_learning_rate: 1e-4
      soft_start: 0.1
      annealing: 0.8
    }
  }
  regularizer {
    type: L1
    #weight: 3e-5
    weight: 5e-5
  }
}
eval_config {
  validation_period_during_training: 10
  average_precision_mode: SAMPLE
  batch_size: 8
  matching_iou_threshold: 0.5
}
nms_config {
  confidence_threshold: 0.01
  clustering_iou_threshold: 0.6
  top_k: 200
}
augmentation_config {
  preprocessing {
    output_image_width: 512
    output_image_height: 512
    output_image_channel: 3
    crop_right: 512
    crop_bottom: 512
    min_bbox_width: 1.0
    min_bbox_height: 1.0
  }
  spatial_augmentation {
    hflip_probability: 0.0
    vflip_probability: 0.0
    zoom_min: 0.7
    zoom_max: 1.8
    translate_max_x: 8.0
    translate_max_y: 8.0
  }
  color_augmentation {
    hue_rotation_max: 25.0
    saturation_shift_max: 0.20000000298
    contrast_scale_max: 0.10000000149
    contrast_center: 0.5
  }
}
dataset_config {
  data_sources: {
    tfrecords_path: "/workspace/ImageData/kitti/tfrecords/kitti_trainval/*"
    image_directory_path: "/workspace/ImageData/kitti/train"
  }
  image_extension: "jpg"
  target_class_mapping {
    key: "p1a"
    value: "p1a"
  }
  validation_fold: 0
  #validation_data_source: {
    #tfrecords_path: "/workspace/ImageData/kitti/tfrecords/kitti_val/*"
    #image_directory_path: "/workspace/ImageData/kitti/test"
  #}
}

tlt-ICDAR/tlt_specs/detectnet_v2_inference_kitti_tlt.txt
inferencer_config{
  # defining target class names for the experiment.
  # Note: This must be mentioned in order of the networks classes.
  target_classes: "p1a"
  # Inference dimensions.
  image_width: 512
  image_height: 512
  # Must match what the model was trained for.
  image_channels: 3
  batch_size: 16
  gpu_index: 0
  #model handler config
  tlt_config{
    #model: "/workspace/ImageData/tlt-experiment/detectnet_v2/experiment_dir_unpruned_18/weights/resnet18_detector.tlt"
    model: "/workspace/ImageData/tlt-experiment/detectnet_v2/experiment_dir_unpruned_19/weights/darknet19_detector.tlt"
  }
}
bbox_handler_config{
  kitti_dump: true
  disable_overlay: false
  overlay_linewidth: 4
  classwise_bbox_handler_config{
    key:"p1a"
    value: {
      confidence_model: "aggregate_cov"
      output_map: "p1a"
      confidence_threshold: 0.9
      bbox_color{
        R: 0
        G: 255
        B: 0
      }
      clustering_config{
        coverage_threshold: 0.00
        dbscan_eps: 0.3
        dbscan_min_samples: 0.05
        minimum_bounding_box_height: 4
      }
    }
  }
}

tlt-ICDAR/gen_kitti.pynb
# kitti directory
#   train
#     images
#     labels
#   test
#     images
#     labels
#   tfrecords
#     kitti_trainval
%env KITTI_DIR=/workspace/ImageData/kitti

# kitti format
# type 0 0 0 left top right bottom 0 0 0 0 0 0 0

# train image and label
%env TRAIN_2013_IMAGE=/workspace/ImageData/ICDAR/2013_Challenge2_Training_Task12_Images
%env TRAIN_2013_LABEL=/workspace/ImageData/ICDAR/2013_Challenge2_Training_Task1_GT
%env TRAIN_2015_IMAGE=/workspace/ImageData/ICDAR/2015_ch4_training_images
%env TRAIN_2015_LABEL=/workspace/ImageData/ICDAR/2015_ch4_training_localization_transcription_gt

# test image and label
%env TEST_2013_IMAGE=/workspace/ImageData/ICDAR/2013_Challenge2_Test_Task12_Images
%env TEST_2013_LABEL=/workspace/ImageData/ICDAR/2013_Challenge2_Test_Task1_GT
%env TEST_2015_IMAGE=/workspace/ImageData/ICDAR/2015_ch4_test_images
%env TEST_2015_LABEL=/workspace/ImageData/ICDAR/2015_Challenge4_Test_Task1_GT

TLT_IMAGE_WIDTH=640
TLT_IMAGE_HEIGHT=480
MIN_OBJ = 20

import os
import numpy as np
import cv2
from matplotlib import pyplot as plt
%matplotlib inline

def show_image(title, image, rects):
    for rect in rects:
        cv2.rectangle(image, (int(rect[0]), int(rect[1])), (int(rect[2]), int(rect[3])), (0, 0, 255), 2)
    plt.figure(figsize = (20,10))
    plt.title(title)
    imagePlt = image[:,:,::-1]
    plt.imshow(imagePlt)
    
def write_kitti(image, rects, kittiPath, path, name):
    imageKName = os.path.join(kittiPath, path, 'images', name + '.jpg')
    labelKName = os.path.join(kittiPath, path, 'labels', name + '.txt')
    cv2.imwrite(imageKName, image)
    outFile = open(labelKName, 'w')
    for rect in rects:
        kittiLine = 'p1a 0 0 0 {:f} {:f} {:f} {:f} 0 0 0 0 0 0 0\n'.format(
            rect[0], rect[1], rect[2], rect[3])
        outFile.write(kittiLine)
    outFile.close()

# rect: [x0, y0, x1, y1]
# rects: [rect, ...]
# 取得 rect 不被 rects 佔領的最大外圍空間(不包含覆蓋的 rects )
def get_outer_box(sw, sh, rect, rects):
    x0, y0, x1, y1 = 0, 0, sw-1, sh-1
    for r in rects:
        if r == rect:
            continue
        if r[2] < rect[0]:
            x0 = max(x0, r[2]+1)
        if r[3] < rect[1]:
            y0 = max(y0, r[3]+1)
        if r[0] > rect[2]:
            x1 = min(x1, r[0]-1)
        if r[1] > rect[3]:
            y1 = min(y1, r[1]-1)
    return [x0, y0, x1, y1]

#rect: [x0, y0, x1, y1]
#rects: [rect, ...]
#取得與 rect 有覆蓋的 rects, 最小包覆範圍
def get_merge_box(rect, rects):
    mRects = []
    for r in rects:
        if r[2] <= rect[0] or r[3] <= rect[1] or r[0] >= rect[2] or r[1] >= rect[3]:
            continue
        mRects.extend(r)
    mRect = np.array(mRects, np.float32).reshape((-1, 1, 2))
    x, y, w, h = cv2.boundingRect(mRect)
    return [x, y, x+w, y+h]

def crop_image(image, rect, rects, kittiPath, path, name):
    nRects = []
    for r in rects:
        if r[2] <= rect[0] or r[3] <= rect[1] or r[0] >= rect[2] or r[1] >= rect[3]:
            continue
        nr = [r[0]-rect[0], r[1]-rect[1], r[2]-rect[0], r[3]-rect[1]]
        nRects.append(nr)
    nImage = image[int(rect[1]):int(rect[3])+1, int(rect[0]):int(rect[2])+1, :].copy()
    #show_image(name, nImage, nRects)
    write_kitti(nImage, nRects, kittiPath, path, name)
    
def random_outer_box(inner, outer):
    x0 = np.random.randint(outer[0], inner[0]) if outer[0] < inner[0] else inner[0]
    y0 = np.random.randint(outer[1], inner[1]) if outer[1] < inner[1] else inner[1]
    x1 = np.random.randint(inner[2], outer[2]) if inner[2] < outer[2] else outer[2]
    y1 = np.random.randint(inner[3], outer[3]) if inner[3] < outer[3] else outer[3]
    return x0, y0, x1, y1
    
def augment_image(image, rects, width, height, kittiPath, path, name):
    sh, sw, sc = image.shape
    fileCnt = 0
    boxHist = []
    for rect in rects:
        if rect[2]-rect[0] >= width or width >= sw or rect[3]-rect[1] >= height or height >= sh:
            # 物件太大或原圖太小
            continue
        mRect = get_merge_box(rect, rects)
        oBox = get_outer_box(sw, sh, mRect, rects)
        if oBox in boxHist:
            continue
        boxHist.append(oBox)
        crop_image(image, oBox, rects, kittiPath, path, '{}_{}'.format(name, fileCnt))
        fileCnt += 1
        for n in rects:
            if n == rect:
                continue
            x0 = n[0] if n[0] < oBox[0] else oBox[0]
            y0 = n[1] if n[1] < oBox[1] else oBox[1]
            x1 = n[2] if n[2] > oBox[2] else oBox[2]
            y1 = n[3] if n[3] > oBox[3] else oBox[3]
            mRect1 = get_merge_box([x0, y0, x1, y1], rects)
            oBox1 = get_outer_box(sw, sh, mRect1, rects)
            if oBox1 in boxHist:
                continue
            boxHist.append(oBox1)
            crop_image(image, oBox1, rects, kittiPath, path, '{}_{}'.format(name, fileCnt))
            fileCnt += 1

def gen_icdar_2013(imagePath, labelPath, kittiPath, path, separator=None):
    files = os.listdir(labelPath)
    cnt = 0
    name_list = ['gt_img_39', 'gt_140']
    for file in files:
        labelName = os.path.join(labelPath, file)
        name, ext = os.path.splitext(file)
        imageName = os.path.join(imagePath, name[3:] + '.jpg')
        #print(labelName)
        #shutil.copyfile(imageName, imageKName)
        image = cv2.imread(imageName)
        matching = [s for s in name_list if name == s]
        if matching:
            image = np.rot90(image, 3).copy()
        imageSize = image.shape
        if False:
            rateWidth = TLT_IMAGE_WIDTH / imageSize[1]
            rateHeight = TLT_IMAGE_HEIGHT / imageSize[0]
            image = cv2.resize(image, (TLT_IMAGE_WIDTH, TLT_IMAGE_HEIGHT), interpolation=cv2.INTER_AREA)
        else:
            rateWidth = 1.0
            rateHeight = 1.0
        inFile = open(labelName, 'r')
        rects = []
        lines = inFile.readlines()
        for line in lines:
            tokens = line.split(separator)
            ax0 = int(tokens[0]) * rateWidth
            ay0 = int(tokens[1]) * rateHeight
            ax1 = int(tokens[2]) * rateWidth
            ay1 = int(tokens[3]) * rateHeight
            rects.append([ax0, ay0, ax1, ay1])
            #print(ax0, ay0, ax1, ay1)
        inFile.close()
        #show_image(labelName, image, rects)
        write_kitti(image, rects, kittiPath, path, '2013_' + name[3:])
        augment_image(image, rects, 512, 512, kittiPath, path, '2013_' + name[3:])
        cnt += 1
        #if cnt >= 1:
        #    break
    plt.show()
    return cnt

kittiPath = os.environ['KITTI_DIR']
imagePath = os.environ['TEST_2013_IMAGE']
labelPath = os.environ['TEST_2013_LABEL']
cnt = gen_icdar_2013(imagePath, labelPath, kittiPath, 'train', ',')
print('2013 Test Image file:{}'.format(cnt))
imagePath = os.environ['TRAIN_2013_IMAGE']
labelPath = os.environ['TRAIN_2013_LABEL']
cnt = gen_icdar_2013(imagePath, labelPath, kittiPath, 'train', None)
print('2013 Train Image file:{}'.format(cnt))

!ls $TRAIN_2013_IMAGE|wc
!ls $TEST_2013_IMAGE|wc
!ls $KITTI_DIR/train/images|wc
!ls $KITTI_DIR/train/labels|wc

tlt-ICDAR/show_infer_test.ipynb
import os
import numpy as np
import cv2
from matplotlib import pyplot as plt
%matplotlib inline

def show_infer_test(imagePath):
    files = os.listdir(imagePath)
    cnt = 0
    for file in files:
        imageName = os.path.join(imagePath, file)
        #print(imageName)
        image = cv2.imread(imageName)
        plt.figure(figsize = (20,10))
        imagePlt = image[:,:,::-1]
        plt.imshow(imagePlt)
        cnt += 1
        if cnt > 20:
            break
    plt.show()

#show_infer_test('/workspace/ImageData/tlt-experiment/tlt_infer_testing')
show_infer_test('/workspace/ImageData/tlt-experiment/tlt_infer_testing/images_annotated')

tlt-ICDAR/icdar_detection.ipynb
%env KEY=tlt_encode
%env USER_EXPERIMENT_DIR=/workspace/ImageData/tlt-experiment
%env DATA_DOWNLOAD_DIR=/workspace/ImageData/kitti
%env SPECS_DIR=/workspace/tlt-ICDAR/tlt_specs
%env NUM_GPUS=1

!rm -rf $DATA_DOWNLOAD_DIR/tfrecords/kitti_trainval
!tlt-dataset-convert -d $SPECS_DIR/detectnet_v2_tfrecords_kitti_trainval.txt \
                     -o $DATA_DOWNLOAD_DIR/tfrecords/kitti_trainval/kitti_trainval
                     
!ngc registry model list nvidia/tlt_pretrained_detectnet_v2:*
!ngc registry model list nvidia/tlt_pretrained_object_detection:*

!ngc registry model download-version nvidia/tlt_pretrained_detectnet_v2:resnet18 \
    --dest $USER_EXPERIMENT_DIR/detectnet_v2/pretrained_resnet18
!ngc registry model download-version nvidia/tlt_pretrained_detectnet_v2:darknet19 \
    --dest $USER_EXPERIMENT_DIR/detectnet_v2/pretrained_darknet19
!ngc registry model download-version nvidia/tlt_pretrained_object_detection:darknet19 \
    --dest $USER_EXPERIMENT_DIR/object_detection/pretrained_darknet19

!rm -rf $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_unpruned_18
!tlt-train detectnet_v2 -e $SPECS_DIR/detectnet_v2_train_resnet18_kitti.txt \
                        -r $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_unpruned_18 \
                        -k $KEY \
                        -n resnet18_detector \
                        --gpus $NUM_GPUS

!rm -rf $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_unpruned_19
!tlt-train detectnet_v2 -e $SPECS_DIR/detectnet_v2_train_darknet19_kitti.txt \
                        -r $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_unpruned_19 \
                        -k $KEY \
                        -n darknet19_detector \
                        --gpus $NUM_GPUS

!rm -rf $USER_EXPERIMENT_DIR/object_detection/yolov3_unpruned
!mkdir $USER_EXPERIMENT_DIR/object_detection/yolov3_unpruned
!tlt-train yolo -e $SPECS_DIR/yolov3_train_kitti.txt \
                        -r $USER_EXPERIMENT_DIR/object_detection/yolov3_unpruned \
                        -k $KEY \
                        -m $USER_EXPERIMENT_DIR/object_detection/pretrained_darknet19/tlt_pretrained_object_detection_vdarknet19/darknet_19.hdf5 \
                        --gpus $NUM_GPUS

!tlt-evaluate detectnet_v2 -e $SPECS_DIR/detectnet_v2_train_resnet18_kitti.txt \
                           -m $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_unpruned_18/weights/resnet18_detector.tlt \
                           -k $KEY

!tlt-evaluate detectnet_v2 -e $SPECS_DIR/detectnet_v2_train_darknet19_kitti.txt \
                           -m $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_unpruned_19/weights/darknet19_detector.tlt \
                           -k $KEY

!tlt-evaluate yolo -e $SPECS_DIR/yolov3_train_kitti.txt \
                           -m $USER_EXPERIMENT_DIR/object_detection/yolov3_unpruned/weights/yolo_darknet19_epoch_200.tlt \
                           -k $KEY

!rm -rf $USER_EXPERIMENT_DIR/tlt_infer_testing

!tlt-infer detectnet_v2 -e $SPECS_DIR/detectnet_v2_inference_kitti_tlt.txt \
                        -o $USER_EXPERIMENT_DIR/tlt_infer_testing \
                        -i $DATA_DOWNLOAD_DIR/train/images \
                        -k $KEY

!tlt-infer yolo -e $SPECS_DIR/yolov3_train_kitti.txt \
                        -o $USER_EXPERIMENT_DIR/tlt_infer_testing \
                        -i $DATA_DOWNLOAD_DIR/train/images \
                        -m /workspace/ImageData/tlt-experiment/object_detection/yolov3_unpruned/weights/yolo_darknet19_epoch_200.tlt \
                        -k $KEY

!rm -rf $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_pruned_18
!mkdir $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_pruned_18

!tlt-prune -m $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_unpruned_18/weights/resnet18_detector.tlt \
           -o $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_pruned_18/resnet18_detector_pruned.tlt \
           -eq union \
           -pth 0.7 \
           -k $KEY

!ls -al $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_unpruned_18/weights
!ls -al $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_pruned_18

!rm -rf $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_retrain
!mkdir $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_retrain

!tlt-train detectnet_v2 -e $SPECS_DIR/detectnet_v2_retrain_resnet18_kitti.txt \
                        -r $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_retrain \
                        -k $KEY \
                        -n resnet18_detector_pruned \
                        --gpus $NUM_GPUS

!rm -rf $USER_EXPERIMENT_DIR/object_detection/yolov3_pruned
!mkdir $USER_EXPERIMENT_DIR/object_detection/yolov3_pruned

!tlt-prune -m $USER_EXPERIMENT_DIR/object_detection/yolov3_unpruned/weights/yolo_darknet19_epoch_200.tlt \
           -o $USER_EXPERIMENT_DIR/object_detection/yolov3_pruned/yolo_darknet19_epoch_200_pruned.tlt \
           -eq union \
           -pth 0.7 \
           -k $KEY

!ls -al $USER_EXPERIMENT_DIR/object_detection/yolov3_unpruned/weights
!ls -al $USER_EXPERIMENT_DIR/object_detection/yolov3_pruned

!rm -rf $USER_EXPERIMENT_DIR/object_detection/yolov3_retrain
!mkdir $USER_EXPERIMENT_DIR/object_detection/yolov3_retrain

!tlt-train yolo -e $SPECS_DIR/yolov3_train_kitti.txt \
                        -r $USER_EXPERIMENT_DIR/object_detection/yolov3_retrain \
                        -k $KEY \
                        -m $USER_EXPERIMENT_DIR/object_detection/yolov3_pruned/yolo_darknet19_epoch_200_pruned.tlt \
                        --gpus $NUM_GPUS

!tlt-export detectnet_v2 \
            -m $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_retrain/weights/resnet18_detector_pruned.tlt \
            -o $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_final/detectnet_v2_resnet18.etlt \
            -k $KEY
!tlt-export yolo -e $SPECS_DIR/yolov3_train_kitti.txt \
            -m $USER_EXPERIMENT_DIR/object_detection/yolov3_unpruned/weights/yolo_darknet19_epoch_200.tlt \
            -o $USER_EXPERIMENT_DIR/object_detection/yolov3_final/yolo_darknet19_pruned_0.etlt \
            -k $KEY
!tlt-export yolo -e $SPECS_DIR/yolov3_train_kitti.txt \
            -m $USER_EXPERIMENT_DIR/object_detection/yolov3_retrain/weights/yolo_darknet19_epoch_200.tlt \
            -o $USER_EXPERIMENT_DIR/object_detection/yolov3_final/yolo_darknet19_pruned_70.etlt \
            -k $KEY

deepstream-icdar/yolov3_pgie_config.txt
[property]
gpu-id=0
net-scale-factor=1.0
offsets=103.939;116.779;123.68
model-color-format=1
labelfile-path=labels.txt
tlt-encoded-model=yolo_darknet19_pruned_70.etlt
tlt-model-key=tlt_encode
model-engine-file=yolo_darknet19_pruned_70.engine
infer-dims=3;512;512
uff-input-order=0
uff-input-blob-name=Input
batch-size=1
## 0=FP32, 1=INT8, 2=FP16 mode
network-mode=2
num-detected-classes=1
interval=0
gie-unique-id=1
is-classifier=0
#network-type=0
#no cluster
cluster-mode=3
output-blob-names=BatchedNMS
parse-bbox-func-name=NvDsInferParseCustomYOLOV3TLT
custom-lib-path=/opt/nvidia/deepstream/deepstream-5.0/lib/libnvds_infercustomparser.so

[class-attrs-all]
pre-cluster-threshold=0.3
roi-top-offset=0
roi-bottom-offset=0
detected-min-w=0
detected-min-h=0
detected-max-w=0
detected-max-h=0

deepstream-icdar/detectnet_v2_pgie_config.txt
[property]
gpu-id=0
net-scale-factor=0.0039215697906911373
model-color-format=0
labelfile-path=labels.txt
tlt-encoded-model=detectnet_v2_resnet18.etlt
tlt-model-key=tlt_encode
model-engine-file=detectnet_v2_resnet18.engine
infer-dims=3;512;512
uff-input-order=0
uff-input-blob-name=input_1
output-blob-names=output_cov/Sigmoid;output_bbox/BiasAdd
batch-size=1
## 0=FP32, 1=INT8, 2=FP16 mode
network-mode=2
num-detected-classes=1
interval=0
gie-unique-id=1
is-classifier=0

[class-attrs-all]
pre-cluster-threshold=0.3
roi-top-offset=0
roi-bottom-offset=0
detected-min-w=0
detected-min-h=0
detected-max-w=0
detected-max-h=0

參考 deepstream-test1 製作 deepstream-icdar
source = gst_element_factory_make ("nvarguscamerasrc", "camera-csi");
g_object_set (G_OBJECT (source), "bufapi-version", TRUE, NULL);
//g_object_set (G_OBJECT (source), "maxperf", TRUE, NULL);
cap_filter = gst_element_factory_make ("capsfilter", "src_cap_filter");
caps = gst_caps_new_simple ("video/x-raw", "format", G_TYPE_STRING, "NV12",
"width", G_TYPE_INT, CAMERA_WIDTH, "height", G_TYPE_INT,
CAMERA_HEIGHT, "framerate", GST_TYPE_FRACTION,
SOURCE_FPS_N, SOURCE_FPS_D, NULL);
GstCapsFeatures *feature = NULL;
feature = gst_caps_features_new ("memory:NVMM", NULL);
gst_caps_set_features (caps, 0, feature);
g_object_set (G_OBJECT (cap_filter), "caps", caps, NULL);
streammux = gst_element_factory_make ("nvstreammux", "stream-muxer");
g_object_set (G_OBJECT (streammux), "batch-size", 1, NULL);
g_object_set (G_OBJECT (streammux), "live-source", 1, NULL); // 影響速度
g_object_set (G_OBJECT (streammux), "width", MUXER_OUTPUT_WIDTH, "height",
MUXER_OUTPUT_HEIGHT,
"batched-push-timeout", MUXER_BATCH_TIMEOUT_USEC, NULL);
gst_bin_add_many (GST_BIN (pipeline),
source, cap_filter, streammux, pgie,
nvvidconv, nvosd, transform, sink, NULL);
GstPad *sinkpad, *srcpad;
gchar pad_name_sink[16] = "sink_0";
gchar pad_name_src[16] = "src";
sinkpad = gst_element_get_request_pad (streammux, pad_name_sink);
srcpad = gst_element_get_static_pad (cap_filter, pad_name_src);
gst_pad_link (srcpad, sinkpad);
gst_object_unref (sinkpad);
gst_object_unref (srcpad);
gst_element_link_many (source, cap_filter, NULL);
!gst_element_link_many (streammux, pgie,
nvvidconv, nvosd, transform, sink, NULL);








沒有留言:

張貼留言