YunqiuXu · kevinwss · Jun 25, 2017 · Jun 25, 2017 · Jun 25, 2017 · Jun 26, 2017
diff --git a/LISA_annotation_to_VOC.py b/LISA_annotation_to_VOC.py
@@ -80,14 +80,19 @@ def generate_xml(name,img_size):
         title_text = doc.createTextNode(str(img_size[2]))
         title.appendChild(title_text)
         size.appendChild(title)
+
+        title = doc.createElement('segmented')
+        title_text = doc.createTextNode('0')
+        title.appendChild(title_text)
+        annotation.appendChild(title)
 
         # A loop for several objects to be detected
         #The bounding boxes are described using the top left point, a width, and a height [x y w h] in the 2D image plane.=>[xmin,ymin,xmax,ymax]
         for i in range(total_object):
             data=lines[i].strip().split(" ")
             name=data[0]
             x,y,w,h=int(data[1]),int(data[2]),int(data[3]),int(data[4])
-            xmin,ymin,xmax,ymax=x,y-h,x+w,y
+            xmin,ymin,xmax,ymax=x,y,x+w,y+h
 
 
             object = doc.createElement('object')
@@ -96,7 +101,24 @@ def generate_xml(name,img_size):
             title_text = doc.createTextNode(name)
             title.appendChild(title_text)
             object.appendChild(title)
-
+
+
+            title = doc.createElement('pose')
+            title_text = doc.createTextNode('Unspecified')
+            title.appendChild(title_text)
+            object.appendChild(title)
+
+            title = doc.createElement('truncated')
+            title_text = doc.createTextNode('0')
+            title.appendChild(title_text)
+            object.appendChild(title)
+
+            title = doc.createElement('difficult')
+            title_text = doc.createTextNode('0')
+            title.appendChild(title_text)
+            object.appendChild(title)
+
+
             bndbox = doc.createElement('bndbox')
             object.appendChild(bndbox)
             title = doc.createElement('xmin')
@@ -132,6 +154,7 @@ def generate_xml(name,img_size):
         generate_xml(name,img_size)
 
 
+
 
 
 
diff --git a/LISA_posGt_to_VOC_main.py b/LISA_posGt_to_VOC_main.py
@@ -0,0 +1,106 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sat Jun 24 19:26:54 2017
+
+
+"""
+# --------------------------------------------------------
+#Transform posGt to VOC2007/imagesets/main
+#Used for create train.txt,val.txt,trainval.txt under main folder in VOC2007
+#Written by Shaoshen Wang
+# --------------------------------------------------------
+#Usage:
+#Put this script under train folder
+#Create a new folder named "Main" in train folder
+#Run this script
+
+
+import os
+
+annotation_path = "./posGt/" 
+result_path = "./ImageSets/Main/"
+
+ratio_trainval = 0.5  #(trainval/total) 
+ratio_train = 0.5    #(train/trainval)
+
+def create_train_val_trainval():
+    files = os.listdir(annotation_path)
+    total_cases = len(files)
+    t2 = int(ratio_trainval*total_cases)
+    t1 = int(ratio_train*t2)
+    train_cases=files[:t1]
+    val_cases=files[t1:t2]
+    test_cases=files[t2:]
+
+    train_txt = ""
+    val_txt = ""
+    trainval_txt = ""
+    test_txt=""
+
+    for file in train_cases:
+        train_txt += file[:-4] + "\n"  #Delete ".txt"
+    for file in val_cases:
+        val_txt += file[:-4] + "\n"
+    trainval_txt = train_txt+val_txt
+    for file in test_cases:
+        test_txt += file[:-4] + "\n"
+
+    f = open(result_path+"train.txt","w")
+    f.write(train_txt)
+    f.close()
+    f = open(result_path+"val.txt","w")
+    f.write(val_txt)
+    f.close()
+    f = open(result_path+"trainval.txt","w")
+    f.write(trainval_txt)
+    f.close()
+    f = open(result_path+"test.txt","w")
+    f.write(test_txt)
+    f.close()
+
+def create_train_for_classes():               #Not being used so far
+    files = os.listdir(annotation_path)
+    total_cases = len(files)
+    total_train = 3
+    total_test = 0
+    record = [[],[],[],[]]
+    names = ["leftHand_driver","rightHand_driver","leftHand_passenger","rightHand_passenger"]
+
+
+    train_cases = files[:total_train]
+    for case in train_cases:
+        file = open(annotation_path+case)
+        lines = file.readlines()
+        lines = lines[1:]                  #ignore first line
+        indicator = [-1,-1,-1,-1]
+
+        for line in lines:
+            line = line.strip().split(" ")
+            name = line[0]
+            if name == "leftHand_driver":
+                indicator[0] = 1
+            elif name == "rightHand_driver":
+                indicator[1] = 1
+            elif name == "leftHand_passenger":
+                indicator[2] = 1
+            elif name == "rightHand_passenger":
+                indicator[3] = 1
+            else:
+                pass
+        for i in range(4):
+            record[i].append((case,indicator[i]))
+
+    for i in range(4):
+        file_path=result_path+names[i]+"_train"+".txt"
+        content=""
+        for k in record[i]:
+            content+=k[0]+" "+str(k[1])+"\n"
+        f=open(file_path,"w")
+        f.write(content)
+        f.close()
+    #print(record)   
+
+if __name__ == '__main__':
+    create_train_val_trainval()
+
+
diff --git a/Modification Points b/Modification Points
@@ -0,0 +1,49 @@
+
+Generate annotations
+Generate 4 txt file train.txt val.txt trainval.txt test.txt under Main
+
+Error: overlaps = entry['max_overlaps']:
+Delete data/cache folder，因为里面保存了上一次数据集的roidb。因为错误显示加载了以前的文件。
+
+Config.py:
+暂时去掉使用flip扩增数据集的方法
+
+Pascal_voc.py:
+1）修改大小写obj.find('name').text.lower() delete lower()
+
+2）Delete -1 in 
+x1 = float(bbox.find('xmin').text)-1…
+y2 = float(bbox.find('ymax').text)-1
+
+因为原坐标位置起始是(1,1)，现在是(0,0)
+
+3）修改分类class 为4+1类
+4）修改jpg为png，因为新数据集图像格式改变了
+
+Vgg16:
+1）修改网络
+2）修改load pretrain model时需要加载的参数
+
+Error: Train loss 出现NAN：
+重新制作数据集，问题消失，怀疑之前数据集有损坏。
+
+Error:  rpn_cls_score与 label不匹配，reshape无法完成：
+Label长度代表了anchor数量
+通过查找anchor产生过程发现产生anchor的数量是根据input(224*224) resize得到的，resize的ratio被写死了，需要修改。
+修改network.py self._feat_stride, self._feat_compress 
+从16改为4.
+Change this ratio to 4 = input width/conv5 width = 224/56 = 4 in modified case
+
+
+Testing:
+Vgg16.py:
+修改concate 维度为-1，即连接channel的维度
+
+lib/datasets/voc_eval.py:
+
+注释掉部分evaluation的代码，把结果改成正确格式output到txt里面.
+
+mAP低可能由于train不充分
+需要调整thresh
+testing得到很多bbox的坐标heconfidence，取
+得到所有testing的结果之后，把预测的box 通过 pascal_voc 的_write_voc_results_file写入了result 文件
diff --git a/README.md b/README.md
@@ -1 +1,51 @@
-# HandDetection
+# HandDetection
+This is a Modify faster-rcnn hand detection project, developed during my research assistant in Centre of Artificial Intelligence (CAI) in UTS. </br>
+This project achieves Top 10 performance in VIVA hand detection competition.
+
+![](pic/arch.png)
+
+
+
+
+Setup via [https://github.com/endernewton/tf-faster-rcnn](https://github.com/endernewton/tf-faster-rcnn)
+
+Modified the code via [Robust Hand Detection in Vehicles](http://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=7899695) for hand detection.
+
+This project is collaboration with my collegue Yunqiu Xu (https://github.com/YunqiuXu).
+
+# Preprocessing
+~/tf-faster-rcnn-endernewton/data/LISA_HD_Static/detectiondata$ python LISA_posGt_to_VOC_Annotations.py </br>
+~/tf-faster-rcnn-endernewton/data/LISA_HD_Static/detectiondata$ python LISA_posGt_to_VOC_Main.py </br>
+
+# Train
+~/tf-faster-rcnn-endernewton$ ./experiments/scripts/train_faster_rcnn.sh 0 pascal_voc vgg16 </br>
+
+# Test
+Modifiy the iter times in test_faster_rcnn.sh </br>
+~/tf-faster-rcnn-endernewton$ ./experiments/scripts/test_faster_rcnn.sh 0 pascal_voc vgg16 </br>
+
+# How to do prediction on your own dataset
+
+cd tf-faster-rcnn-endernewton/data/LISA_HD_Static/detectiondata/ImageSets/Main </br>
+mv test.txt test_for_train.txt </br>
+mv test5500.txt test.txt </br>
+
+cd tf-faster-rcnn-endernewton/data/LISA_HD_Static/detectiondata </br>
+mv JPEGImages JPEGImages_train </br>
+mv JPEGImages_test JPEGImages </br>
+
+Open tf-faster-rcnn-endernewton/experiments/scripts/test_faster_rcnn.sh </br>
+Set line 21 "ITERS = the iters of the model you trained" Say if you trained a model with 10000 iters, set this line "ITERS = 10000" </br>
+
+cd tf-faster-rcnn-endernewton </br>
+./experiments/scripts/test_faster_rcnn.sh 0 pascal_voc vgg16 </br>
+
+# How to stop the training
+
+tmux attach </br>
+ctrl+c
+
+
+
+
+
diff --git a/checkpoint_params.py b/checkpoint_params.py
@@ -0,0 +1,36 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Jun 25 17:00:50 2017
+
+@author: Shaoshen Wang
+"""
+#Used for show the variables in a checkpoint file
+#Usage: Put this code under tf-faster-rcnn-master
+
+import os
+import tensorflow as tf
+from tensorflow.python import pywrap_tensorflow
+from tensorflow.python.tools.inspect_checkpoint import print_tensors_in_checkpoint_file
+
+def get_variables_in_checkpoint_file(file_name):
+    try:
+        reader = pywrap_tensorflow.NewCheckpointReader(file_name)      
+        var_to_shape_map = reader.get_variable_to_shape_map()
+        return var_to_shape_map
+    except Exception as e:
+        print(str(e))
+
+
+model_dir=".\data\imagenet_weights"
+checkpoint_path = os.path.join(model_dir, "vgg16.ckpt")
+
+#print(type(file_name))
+
+var_to_shape_map=get_variables_in_checkpoint_file(checkpoint_path)
+
+for var in var_to_shape_map:
+    print(var,var_to_shape_map[var])
+
+
+# List ALL tensors example output: v0/Adam (DT_FLOAT) [3,3,1,80]
+#print_tensors_in_checkpoint_file(file_name=checkpoint_path, tensor_name='',all_tensors='')