ICDAR-2013 在 Robust Reading Competition 網站
要下載前要註冊
選 Challenges/Focused Scene Text
2013 的文字比較正,標註只有兩點
2015 的文字有各種角度,標註有四點
產生 kitti 資料時,直接使用訓練效果不好
轉換圖形時採用不同策略,一圖拆成多圖
截圖時不要破壞文字
TLT 有測試過 detectnet_v2 (resnet18, resnet50, darknet19), object_detection yolov3 (darknet19)
detectnet_v2 之 resnet18 precision: 14.6133 %
雖然 precision 不高,但是 tlt-infer 可以正確抓到文字
detectnet_v2 之 darknet19 precision: 18.7146 %
但 detectnet_v2 之 darknet19 失敗,tlt-infer 可以無法正確抓到文字
yolov3 之 darknet19 AP: 0.769
效果比 detectnet_v2 好
最看重 TLT 的功能是 tlt-prune, 神經網路被裁剪後
節省記憶體,加快速度
unprune Mem:2.5G PREF:8.33
0.5 Mem:2.1G PREF:28.83
0.7 Mem:2.1G PREF:30.00
DeepStream 之 nvinfer 的 config-file-path 設定檔
tlt-encoded-model=yolo_darknet19_pruned_70.etlt
model-engine-file=yolo_darknet19_pruned_70.engine
第一次啟動會很慢,因為在產生 yolo_darknet19_pruned_70.etlt_b1_gpu0_fp16.engine 檔
$ mv yolo_darknet19_pruned_70.etlt_b1_gpu0_fp16.engine yolo_darknet19_pruned_70.engine
之後會直接使用 yolo_darknet19_pruned_70.engine
以下是測試歷程記錄
建立 tlt-icdar docker
$ docker run --name tlt-icdar --runtime=nvidia --gpus all -it -v /home/user/Data/DeepStream/tlt-ICDAR/:/workspace/tlt-ICDAR -v /media/user/ImageData/:/workspace/ImageData -p 8888:8888 nvcr.io/nvidia/tlt-streamanalytics:v2.0_py3 /bin/bash
root@e16c46e4a3d7:/workspace# exit
啟動 tlt-icdar
$ docker start tlt-icdar
登入 tlt-icdar
$ docker exec -it tlt-icdar /bin/bash
root@e16c46e4a3d7:/workspace#
啟動 jupyter
root@e16c46e4a3d7:/workspace/tlt-ICDAR# jupyter notebook --allow-root --ip 0.0.0.0 --port 8888
tlt-ICDAR/tlt_specs/detectnet_v2_tfrecords_kitti_trainval.txt
kitti_config {
root_directory_path: "/workspace/ImageData/kitti/train"
image_dir_name: "images"
label_dir_name: "labels"
image_extension: ".jpg"
partition_mode: "random"
num_partitions: 2
val_split: 20
num_shards: 10
}
tlt-ICDAR/tlt_specs/detectnet_v2_train_resnet18_kitti.txt
random_seed: 42
dataset_config {
data_sources {
tfrecords_path: "/workspace/ImageData/kitti/tfrecords/kitti_trainval/*"
image_directory_path: "/workspace/ImageData/kitti/train"
}
image_extension: "jpg"
target_class_mapping {
key: "p1a"
value: "p1a"
}
validation_fold: 0
#validation_data_source: {
#tfrecords_path: "/workspace/ImageData/kitti/tfrecords/kitti_val/*"
#image_directory_path: "/workspace/ImageData/kitti/test"
#}
}
augmentation_config {
preprocessing {
output_image_width: 512
output_image_height: 512
output_image_channel: 3
min_bbox_width: 1.0
min_bbox_height: 1.0
}
spatial_augmentation {
hflip_probability: 0.0
vflip_probability: 0.0
zoom_min: 1.0
zoom_max: 1.0
translate_max_x: 8.0
translate_max_y: 8.0
rotate_rad_max: 0.0
}
color_augmentation {
color_shift_stddev: 0.0
hue_rotation_max: 25.0
saturation_shift_max: 0.2
contrast_scale_max: 0.1
contrast_center: 0.5
}
}
postprocessing_config {
target_class_config {
key: "p1a"
value: {
clustering_config {
coverage_threshold: 0.005
dbscan_eps: 0.15
dbscan_min_samples: 0.05
minimum_bounding_box_height: 20
}
}
}
}
model_config {
arch: "resnet"
pretrained_model_file: "/workspace/ImageData/tlt-experiment/detectnet_v2/pretrained_resnet18/tlt_pretrained_detectnet_v2_vresnet18/resnet18.hdf5"
#freeze_blocks: 0
#freeze_blocks: 1
all_projections: True
num_layers: 18
use_pooling: False
use_batch_norm: True
dropout_rate: 0.0
training_precision: {
backend_floatx: FLOAT32
}
objective_set: {
cov {}
bbox {
scale: 35.0
offset: 0.5
}
}
}
evaluation_config {
average_precision_mode: INTEGRATE
validation_period_during_training: 10
first_validation_epoch: 10
minimum_detection_ground_truth_overlap {
key: "p1a"
value: 0.5
}
evaluation_box_config {
key: "p1a"
value {
minimum_height: 4
maximum_height: 9999
minimum_width: 4
maximum_width: 9999
}
}
}
cost_function_config {
target_classes {
name: "p1a"
class_weight: 1.0
coverage_foreground_weight: 0.05
objectives {
name: "cov"
initial_weight: 1.0
weight_target: 1.0
}
objectives {
name: "bbox"
initial_weight: 10.0
weight_target: 10.0
}
}
enable_autoweighting: True
max_objective_weight: 0.9999
min_objective_weight: 0.0001
}
training_config {
batch_size_per_gpu: 8
num_epochs: 200
learning_rate {
soft_start_annealing_schedule {
#min_learning_rate: 5e-6
#max_learning_rate: 5e-4
min_learning_rate: 1e-6
max_learning_rate: 1e-3
soft_start: 0.1
annealing: 0.7
}
}
regularizer {
type: L1
weight: 3e-9
}
optimizer {
adam {
epsilon: 1e-08
beta1: 0.9
beta2: 0.999
}
}
cost_scaling {
enabled: False
initial_exponent: 20.0
increment: 0.005
decrement: 1.0
}
checkpoint_interval: 10
}
bbox_rasterizer_config {
target_class_config {
key: "p1a"
value: {
cov_center_x: 0.5
cov_center_y: 0.5
cov_radius_x: 0.45
cov_radius_y: 0.45
bbox_min_radius: 0.9
}
}
#deadzone_radius: 0.67
deadzone_radius: 0.2
}
tlt-ICDAR/tlt_specs/yolov3_train_kitti.txt
random_seed: 42
yolo_config {
big_anchor_shape: "[(116,90), (156,198), (373,326)]"
mid_anchor_shape: "[(30,61), (62,45), (59,119)]"
small_anchor_shape: "[(10,13), (16,30), (33,23)]"
matching_neutral_box_iou: 0.5
arch: "darknet"
nlayers: 19
arch_conv_blocks: 2
#loss_loc_weight: 5.0
#loss_neg_obj_weights: 50.0
loss_loc_weight: 0.75
loss_neg_obj_weights: 200.0
loss_class_weights: 1.0
freeze_bn: False
#freeze_blocks: 0
#freeze_blocks: 1
}
training_config {
batch_size_per_gpu: 8
num_epochs: 200
enable_qat: false
learning_rate {
soft_start_annealing_schedule {
#min_learning_rate: 5e-5
#max_learning_rate: 2e-2
#soft_start: 0.15
min_learning_rate: 1e-6
max_learning_rate: 1e-4
soft_start: 0.1
annealing: 0.8
}
}
regularizer {
type: L1
#weight: 3e-5
weight: 5e-5
}
}
eval_config {
validation_period_during_training: 10
average_precision_mode: SAMPLE
batch_size: 8
matching_iou_threshold: 0.5
}
nms_config {
confidence_threshold: 0.01
clustering_iou_threshold: 0.6
top_k: 200
}
augmentation_config {
preprocessing {
output_image_width: 512
output_image_height: 512
output_image_channel: 3
crop_right: 512
crop_bottom: 512
min_bbox_width: 1.0
min_bbox_height: 1.0
}
spatial_augmentation {
hflip_probability: 0.0
vflip_probability: 0.0
zoom_min: 0.7
zoom_max: 1.8
translate_max_x: 8.0
translate_max_y: 8.0
}
color_augmentation {
hue_rotation_max: 25.0
saturation_shift_max: 0.20000000298
contrast_scale_max: 0.10000000149
contrast_center: 0.5
}
}
dataset_config {
data_sources: {
tfrecords_path: "/workspace/ImageData/kitti/tfrecords/kitti_trainval/*"
image_directory_path: "/workspace/ImageData/kitti/train"
}
image_extension: "jpg"
target_class_mapping {
key: "p1a"
value: "p1a"
}
validation_fold: 0
#validation_data_source: {
#tfrecords_path: "/workspace/ImageData/kitti/tfrecords/kitti_val/*"
#image_directory_path: "/workspace/ImageData/kitti/test"
#}
}
tlt-ICDAR/tlt_specs/detectnet_v2_inference_kitti_tlt.txt
inferencer_config{
# defining target class names for the experiment.
# Note: This must be mentioned in order of the networks classes.
target_classes: "p1a"
# Inference dimensions.
image_width: 512
image_height: 512
# Must match what the model was trained for.
image_channels: 3
batch_size: 16
gpu_index: 0
#model handler config
tlt_config{
#model: "/workspace/ImageData/tlt-experiment/detectnet_v2/experiment_dir_unpruned_18/weights/resnet18_detector.tlt"
model: "/workspace/ImageData/tlt-experiment/detectnet_v2/experiment_dir_unpruned_19/weights/darknet19_detector.tlt"
}
}
bbox_handler_config{
kitti_dump: true
disable_overlay: false
overlay_linewidth: 4
classwise_bbox_handler_config{
key:"p1a"
value: {
confidence_model: "aggregate_cov"
output_map: "p1a"
confidence_threshold: 0.9
bbox_color{
R: 0
G: 255
B: 0
}
clustering_config{
coverage_threshold: 0.00
dbscan_eps: 0.3
dbscan_min_samples: 0.05
minimum_bounding_box_height: 4
}
}
}
}
tlt-ICDAR/gen_kitti.pynb
# kitti directory
# train
# images
# labels
# test
# images
# labels
# tfrecords
# kitti_trainval
%env KITTI_DIR=/workspace/ImageData/kitti
# kitti format
# type 0 0 0 left top right bottom 0 0 0 0 0 0 0
# train image and label
%env TRAIN_2013_IMAGE=/workspace/ImageData/ICDAR/2013_Challenge2_Training_Task12_Images
%env TRAIN_2013_LABEL=/workspace/ImageData/ICDAR/2013_Challenge2_Training_Task1_GT
%env TRAIN_2015_IMAGE=/workspace/ImageData/ICDAR/2015_ch4_training_images
%env TRAIN_2015_LABEL=/workspace/ImageData/ICDAR/2015_ch4_training_localization_transcription_gt
# test image and label
%env TEST_2013_IMAGE=/workspace/ImageData/ICDAR/2013_Challenge2_Test_Task12_Images
%env TEST_2013_LABEL=/workspace/ImageData/ICDAR/2013_Challenge2_Test_Task1_GT
%env TEST_2015_IMAGE=/workspace/ImageData/ICDAR/2015_ch4_test_images
%env TEST_2015_LABEL=/workspace/ImageData/ICDAR/2015_Challenge4_Test_Task1_GT
TLT_IMAGE_WIDTH=640
TLT_IMAGE_HEIGHT=480
MIN_OBJ = 20
import os
import numpy as np
import cv2
from matplotlib import pyplot as plt
%matplotlib inline
def show_image(title, image, rects):
for rect in rects:
cv2.rectangle(image, (int(rect[0]), int(rect[1])), (int(rect[2]), int(rect[3])), (0, 0, 255), 2)
plt.figure(figsize = (20,10))
plt.title(title)
imagePlt = image[:,:,::-1]
plt.imshow(imagePlt)
def write_kitti(image, rects, kittiPath, path, name):
imageKName = os.path.join(kittiPath, path, 'images', name + '.jpg')
labelKName = os.path.join(kittiPath, path, 'labels', name + '.txt')
cv2.imwrite(imageKName, image)
outFile = open(labelKName, 'w')
for rect in rects:
kittiLine = 'p1a 0 0 0 {:f} {:f} {:f} {:f} 0 0 0 0 0 0 0\n'.format(
rect[0], rect[1], rect[2], rect[3])
outFile.write(kittiLine)
outFile.close()
# rect: [x0, y0, x1, y1]
# rects: [rect, ...]
# 取得 rect 不被 rects 佔領的最大外圍空間(不包含覆蓋的 rects )
def get_outer_box(sw, sh, rect, rects):
x0, y0, x1, y1 = 0, 0, sw-1, sh-1
for r in rects:
if r == rect:
continue
if r[2] < rect[0]:
x0 = max(x0, r[2]+1)
if r[3] < rect[1]:
y0 = max(y0, r[3]+1)
if r[0] > rect[2]:
x1 = min(x1, r[0]-1)
if r[1] > rect[3]:
y1 = min(y1, r[1]-1)
return [x0, y0, x1, y1]
#rect: [x0, y0, x1, y1]
#rects: [rect, ...]
#取得與 rect 有覆蓋的 rects, 最小包覆範圍
def get_merge_box(rect, rects):
mRects = []
for r in rects:
if r[2] <= rect[0] or r[3] <= rect[1] or r[0] >= rect[2] or r[1] >= rect[3]:
continue
mRects.extend(r)
mRect = np.array(mRects, np.float32).reshape((-1, 1, 2))
x, y, w, h = cv2.boundingRect(mRect)
return [x, y, x+w, y+h]
def crop_image(image, rect, rects, kittiPath, path, name):
nRects = []
for r in rects:
if r[2] <= rect[0] or r[3] <= rect[1] or r[0] >= rect[2] or r[1] >= rect[3]:
continue
nr = [r[0]-rect[0], r[1]-rect[1], r[2]-rect[0], r[3]-rect[1]]
nRects.append(nr)
nImage = image[int(rect[1]):int(rect[3])+1, int(rect[0]):int(rect[2])+1, :].copy()
#show_image(name, nImage, nRects)
write_kitti(nImage, nRects, kittiPath, path, name)
def random_outer_box(inner, outer):
x0 = np.random.randint(outer[0], inner[0]) if outer[0] < inner[0] else inner[0]
y0 = np.random.randint(outer[1], inner[1]) if outer[1] < inner[1] else inner[1]
x1 = np.random.randint(inner[2], outer[2]) if inner[2] < outer[2] else outer[2]
y1 = np.random.randint(inner[3], outer[3]) if inner[3] < outer[3] else outer[3]
return x0, y0, x1, y1
def augment_image(image, rects, width, height, kittiPath, path, name):
sh, sw, sc = image.shape
fileCnt = 0
boxHist = []
for rect in rects:
if rect[2]-rect[0] >= width or width >= sw or rect[3]-rect[1] >= height or height >= sh:
# 物件太大或原圖太小
continue
mRect = get_merge_box(rect, rects)
oBox = get_outer_box(sw, sh, mRect, rects)
if oBox in boxHist:
continue
boxHist.append(oBox)
crop_image(image, oBox, rects, kittiPath, path, '{}_{}'.format(name, fileCnt))
fileCnt += 1
for n in rects:
if n == rect:
continue
x0 = n[0] if n[0] < oBox[0] else oBox[0]
y0 = n[1] if n[1] < oBox[1] else oBox[1]
x1 = n[2] if n[2] > oBox[2] else oBox[2]
y1 = n[3] if n[3] > oBox[3] else oBox[3]
mRect1 = get_merge_box([x0, y0, x1, y1], rects)
oBox1 = get_outer_box(sw, sh, mRect1, rects)
if oBox1 in boxHist:
continue
boxHist.append(oBox1)
crop_image(image, oBox1, rects, kittiPath, path, '{}_{}'.format(name, fileCnt))
fileCnt += 1
def gen_icdar_2013(imagePath, labelPath, kittiPath, path, separator=None):
files = os.listdir(labelPath)
cnt = 0
name_list = ['gt_img_39', 'gt_140']
for file in files:
labelName = os.path.join(labelPath, file)
name, ext = os.path.splitext(file)
imageName = os.path.join(imagePath, name[3:] + '.jpg')
#print(labelName)
#shutil.copyfile(imageName, imageKName)
image = cv2.imread(imageName)
matching = [s for s in name_list if name == s]
if matching:
image = np.rot90(image, 3).copy()
imageSize = image.shape
if False:
rateWidth = TLT_IMAGE_WIDTH / imageSize[1]
rateHeight = TLT_IMAGE_HEIGHT / imageSize[0]
image = cv2.resize(image, (TLT_IMAGE_WIDTH, TLT_IMAGE_HEIGHT), interpolation=cv2.INTER_AREA)
else:
rateWidth = 1.0
rateHeight = 1.0
inFile = open(labelName, 'r')
rects = []
lines = inFile.readlines()
for line in lines:
tokens = line.split(separator)
ax0 = int(tokens[0]) * rateWidth
ay0 = int(tokens[1]) * rateHeight
ax1 = int(tokens[2]) * rateWidth
ay1 = int(tokens[3]) * rateHeight
rects.append([ax0, ay0, ax1, ay1])
#print(ax0, ay0, ax1, ay1)
inFile.close()
#show_image(labelName, image, rects)
write_kitti(image, rects, kittiPath, path, '2013_' + name[3:])
augment_image(image, rects, 512, 512, kittiPath, path, '2013_' + name[3:])
cnt += 1
#if cnt >= 1:
# break
plt.show()
return cnt
kittiPath = os.environ['KITTI_DIR']
imagePath = os.environ['TEST_2013_IMAGE']
labelPath = os.environ['TEST_2013_LABEL']
cnt = gen_icdar_2013(imagePath, labelPath, kittiPath, 'train', ',')
print('2013 Test Image file:{}'.format(cnt))
imagePath = os.environ['TRAIN_2013_IMAGE']
labelPath = os.environ['TRAIN_2013_LABEL']
cnt = gen_icdar_2013(imagePath, labelPath, kittiPath, 'train', None)
print('2013 Train Image file:{}'.format(cnt))
!ls $TRAIN_2013_IMAGE|wc
!ls $TEST_2013_IMAGE|wc
!ls $KITTI_DIR/train/images|wc
!ls $KITTI_DIR/train/labels|wc
tlt-ICDAR/show_infer_test.ipynb
import os
import numpy as np
import cv2
from matplotlib import pyplot as plt
%matplotlib inline
def show_infer_test(imagePath):
files = os.listdir(imagePath)
cnt = 0
for file in files:
imageName = os.path.join(imagePath, file)
#print(imageName)
image = cv2.imread(imageName)
plt.figure(figsize = (20,10))
imagePlt = image[:,:,::-1]
plt.imshow(imagePlt)
cnt += 1
if cnt > 20:
break
plt.show()
#show_infer_test('/workspace/ImageData/tlt-experiment/tlt_infer_testing')
show_infer_test('/workspace/ImageData/tlt-experiment/tlt_infer_testing/images_annotated')
tlt-ICDAR/icdar_detection.ipynb
%env KEY=tlt_encode
%env USER_EXPERIMENT_DIR=/workspace/ImageData/tlt-experiment
%env DATA_DOWNLOAD_DIR=/workspace/ImageData/kitti
%env SPECS_DIR=/workspace/tlt-ICDAR/tlt_specs
%env NUM_GPUS=1
!rm -rf $DATA_DOWNLOAD_DIR/tfrecords/kitti_trainval
!tlt-dataset-convert -d $SPECS_DIR/detectnet_v2_tfrecords_kitti_trainval.txt \
-o $DATA_DOWNLOAD_DIR/tfrecords/kitti_trainval/kitti_trainval
!ngc registry model list nvidia/tlt_pretrained_detectnet_v2:*
!ngc registry model list nvidia/tlt_pretrained_object_detection:*
!ngc registry model download-version nvidia/tlt_pretrained_detectnet_v2:resnet18 \
--dest $USER_EXPERIMENT_DIR/detectnet_v2/pretrained_resnet18
!ngc registry model download-version nvidia/tlt_pretrained_detectnet_v2:darknet19 \
--dest $USER_EXPERIMENT_DIR/detectnet_v2/pretrained_darknet19
!ngc registry model download-version nvidia/tlt_pretrained_object_detection:darknet19 \
--dest $USER_EXPERIMENT_DIR/object_detection/pretrained_darknet19
!rm -rf $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_unpruned_18
!tlt-train detectnet_v2 -e $SPECS_DIR/detectnet_v2_train_resnet18_kitti.txt \
-r $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_unpruned_18 \
-k $KEY \
-n resnet18_detector \
--gpus $NUM_GPUS
!rm -rf $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_unpruned_19
!tlt-train detectnet_v2 -e $SPECS_DIR/detectnet_v2_train_darknet19_kitti.txt \
-r $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_unpruned_19 \
-k $KEY \
-n darknet19_detector \
--gpus $NUM_GPUS
!rm -rf $USER_EXPERIMENT_DIR/object_detection/yolov3_unpruned
!mkdir $USER_EXPERIMENT_DIR/object_detection/yolov3_unpruned
!tlt-train yolo -e $SPECS_DIR/yolov3_train_kitti.txt \
-r $USER_EXPERIMENT_DIR/object_detection/yolov3_unpruned \
-k $KEY \
-m $USER_EXPERIMENT_DIR/object_detection/pretrained_darknet19/tlt_pretrained_object_detection_vdarknet19/darknet_19.hdf5 \
--gpus $NUM_GPUS
!tlt-evaluate detectnet_v2 -e $SPECS_DIR/detectnet_v2_train_resnet18_kitti.txt \
-m $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_unpruned_18/weights/resnet18_detector.tlt \
-k $KEY
!tlt-evaluate detectnet_v2 -e $SPECS_DIR/detectnet_v2_train_darknet19_kitti.txt \
-m $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_unpruned_19/weights/darknet19_detector.tlt \
-k $KEY
!tlt-evaluate yolo -e $SPECS_DIR/yolov3_train_kitti.txt \
-m $USER_EXPERIMENT_DIR/object_detection/yolov3_unpruned/weights/yolo_darknet19_epoch_200.tlt \
-k $KEY
!rm -rf $USER_EXPERIMENT_DIR/tlt_infer_testing
!tlt-infer detectnet_v2 -e $SPECS_DIR/detectnet_v2_inference_kitti_tlt.txt \
-o $USER_EXPERIMENT_DIR/tlt_infer_testing \
-i $DATA_DOWNLOAD_DIR/train/images \
-k $KEY
!tlt-infer yolo -e $SPECS_DIR/yolov3_train_kitti.txt \
-o $USER_EXPERIMENT_DIR/tlt_infer_testing \
-i $DATA_DOWNLOAD_DIR/train/images \
-m /workspace/ImageData/tlt-experiment/object_detection/yolov3_unpruned/weights/yolo_darknet19_epoch_200.tlt \
-k $KEY
!rm -rf $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_pruned_18
!mkdir $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_pruned_18
!tlt-prune -m $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_unpruned_18/weights/resnet18_detector.tlt \
-o $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_pruned_18/resnet18_detector_pruned.tlt \
-eq union \
-pth 0.7 \
-k $KEY
!ls -al $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_unpruned_18/weights
!ls -al $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_pruned_18
!rm -rf $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_retrain
!mkdir $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_retrain
!tlt-train detectnet_v2 -e $SPECS_DIR/detectnet_v2_retrain_resnet18_kitti.txt \
-r $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_retrain \
-k $KEY \
-n resnet18_detector_pruned \
--gpus $NUM_GPUS
!rm -rf $USER_EXPERIMENT_DIR/object_detection/yolov3_pruned
!mkdir $USER_EXPERIMENT_DIR/object_detection/yolov3_pruned
!tlt-prune -m $USER_EXPERIMENT_DIR/object_detection/yolov3_unpruned/weights/yolo_darknet19_epoch_200.tlt \
-o $USER_EXPERIMENT_DIR/object_detection/yolov3_pruned/yolo_darknet19_epoch_200_pruned.tlt \
-eq union \
-pth 0.7 \
-k $KEY
!ls -al $USER_EXPERIMENT_DIR/object_detection/yolov3_unpruned/weights
!ls -al $USER_EXPERIMENT_DIR/object_detection/yolov3_pruned
!rm -rf $USER_EXPERIMENT_DIR/object_detection/yolov3_retrain
!mkdir $USER_EXPERIMENT_DIR/object_detection/yolov3_retrain
!tlt-train yolo -e $SPECS_DIR/yolov3_train_kitti.txt \
-r $USER_EXPERIMENT_DIR/object_detection/yolov3_retrain \
-k $KEY \
-m $USER_EXPERIMENT_DIR/object_detection/yolov3_pruned/yolo_darknet19_epoch_200_pruned.tlt \
--gpus $NUM_GPUS
!tlt-export detectnet_v2 \
-m $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_retrain/weights/resnet18_detector_pruned.tlt \
-o $USER_EXPERIMENT_DIR/detectnet_v2/experiment_dir_final/detectnet_v2_resnet18.etlt \
-k $KEY
!tlt-export yolo -e $SPECS_DIR/yolov3_train_kitti.txt \
-m $USER_EXPERIMENT_DIR/object_detection/yolov3_unpruned/weights/yolo_darknet19_epoch_200.tlt \
-o $USER_EXPERIMENT_DIR/object_detection/yolov3_final/yolo_darknet19_pruned_0.etlt \
-k $KEY
!tlt-export yolo -e $SPECS_DIR/yolov3_train_kitti.txt \
-m $USER_EXPERIMENT_DIR/object_detection/yolov3_retrain/weights/yolo_darknet19_epoch_200.tlt \
-o $USER_EXPERIMENT_DIR/object_detection/yolov3_final/yolo_darknet19_pruned_70.etlt \
-k $KEY
deepstream-icdar/yolov3_pgie_config.txt
[property]
gpu-id=0
net-scale-factor=1.0
offsets=103.939;116.779;123.68
model-color-format=1
labelfile-path=labels.txt
tlt-encoded-model=yolo_darknet19_pruned_70.etlt
tlt-model-key=tlt_encode
model-engine-file=yolo_darknet19_pruned_70.engine
infer-dims=3;512;512
uff-input-order=0
uff-input-blob-name=Input
batch-size=1
## 0=FP32, 1=INT8, 2=FP16 mode
network-mode=2
num-detected-classes=1
interval=0
gie-unique-id=1
is-classifier=0
#network-type=0
#no cluster
cluster-mode=3
output-blob-names=BatchedNMS
parse-bbox-func-name=NvDsInferParseCustomYOLOV3TLT
custom-lib-path=/opt/nvidia/deepstream/deepstream-5.0/lib/libnvds_infercustomparser.so
[class-attrs-all]
pre-cluster-threshold=0.3
roi-top-offset=0
roi-bottom-offset=0
detected-min-w=0
detected-min-h=0
detected-max-w=0
detected-max-h=0
deepstream-icdar/detectnet_v2_pgie_config.txt
[property]
gpu-id=0
net-scale-factor=0.0039215697906911373
model-color-format=0
labelfile-path=labels.txt
tlt-encoded-model=detectnet_v2_resnet18.etlt
tlt-model-key=tlt_encode
model-engine-file=detectnet_v2_resnet18.engine
infer-dims=3;512;512
uff-input-order=0
uff-input-blob-name=input_1
output-blob-names=output_cov/Sigmoid;output_bbox/BiasAdd
batch-size=1
## 0=FP32, 1=INT8, 2=FP16 mode
network-mode=2
num-detected-classes=1
interval=0
gie-unique-id=1
is-classifier=0
[class-attrs-all]
pre-cluster-threshold=0.3
roi-top-offset=0
roi-bottom-offset=0
detected-min-w=0
detected-min-h=0
detected-max-w=0
detected-max-h=0
參考 deepstream-test1 製作 deepstream-icdar
source = gst_element_factory_make ("nvarguscamerasrc", "camera-csi");g_object_set (G_OBJECT (source), "bufapi-version", TRUE, NULL);
//g_object_set (G_OBJECT (source), "maxperf", TRUE, NULL);
cap_filter = gst_element_factory_make ("capsfilter", "src_cap_filter");
caps = gst_caps_new_simple ("video/x-raw", "format", G_TYPE_STRING, "NV12",
"width", G_TYPE_INT, CAMERA_WIDTH, "height", G_TYPE_INT,
CAMERA_HEIGHT, "framerate", GST_TYPE_FRACTION,
SOURCE_FPS_N, SOURCE_FPS_D, NULL);
GstCapsFeatures *feature = NULL;
feature = gst_caps_features_new ("memory:NVMM", NULL);
gst_caps_set_features (caps, 0, feature);
g_object_set (G_OBJECT (cap_filter), "caps", caps, NULL);
streammux = gst_element_factory_make ("nvstreammux", "stream-muxer");
g_object_set (G_OBJECT (streammux), "batch-size", 1, NULL);
g_object_set (G_OBJECT (streammux), "live-source", 1, NULL); // 影響速度
g_object_set (G_OBJECT (streammux), "width", MUXER_OUTPUT_WIDTH, "height",
MUXER_OUTPUT_HEIGHT,
"batched-push-timeout", MUXER_BATCH_TIMEOUT_USEC, NULL);
gst_bin_add_many (GST_BIN (pipeline),
source, cap_filter, streammux, pgie,
nvvidconv, nvosd, transform, sink, NULL);
GstPad *sinkpad, *srcpad;
gchar pad_name_sink[16] = "sink_0";
gchar pad_name_src[16] = "src";
sinkpad = gst_element_get_request_pad (streammux, pad_name_sink);
srcpad = gst_element_get_static_pad (cap_filter, pad_name_src);
gst_pad_link (srcpad, sinkpad);
gst_object_unref (sinkpad);
gst_object_unref (srcpad);
gst_element_link_many (source, cap_filter, NULL);
!gst_element_link_many (streammux, pgie,
nvvidconv, nvosd, transform, sink, NULL);
沒有留言:
張貼留言