2016-05-25 13 views
0

Ich verwende demo.py in https://github.com/rbgirshick/py-faster-rcnn/tree/master/tools.Zeigen Sie Bilder mit Begrenzungsrahmen an, während py-faster-rcnn mit VGG_CNN_M_1024 ausgeführt wird.

Ich habe den Code geändert, um VGG_CNN_M_1024 zu laufen, da ich eine 2GB GPU verwende. Und nach den gegebenen Kommentare in https://github.com/rbgirshick/fast-rcnn/issues/2, wählte ich VGG_CNN_M_1024.caffemodel statt VGG16_faster_rcnn_final.caffemodel

Dies ist der Code in demo.py auszuführen:

#!/usr/bin/env python 

# -------------------------------------------------------- 
# Faster R-CNN 
# Copyright (c) 2015 Microsoft 
# Licensed under The MIT License [see LICENSE for details] 
# Written by Ross Girshick 
# -------------------------------------------------------- 

""" 
Demo script showing detections in sample images. 

See README.md for installation instructions before running. 
""" 

import _init_paths 
from fast_rcnn.config import cfg 
from fast_rcnn.test import im_detect 
from fast_rcnn.nms_wrapper import nms 
from utils.timer import Timer 
import matplotlib.pyplot as plt 
import numpy as np 
import scipy.io as sio 
import caffe, os, sys, cv2 
import argparse 

CLASSES = ('__background__', 
      'aeroplane', 'bicycle', 'bird', 'boat', 
      'bottle', 'bus', 'car', 'cat', 'chair', 
      'cow', 'diningtable', 'dog', 'horse', 
      'motorbike', 'person', 'pottedplant', 
      'sheep', 'sofa', 'train', 'tvmonitor') 

NETS = {'vgg16': ('VGG16', 
        'VGG16_faster_rcnn_final.caffemodel'), 
     'zf': ('ZF', 
        'ZF_faster_rcnn_final.caffemodel'), 
     'vgg16_m_1024':('VGG_CNN_M_1024','VGG_CNN_M_1024.caffemodel')} 


def vis_detections(im, class_name, dets, thresh=0.5): 
    """Draw detected bounding boxes.""" 
    inds = np.where(dets[:, -1] >= thresh)[0] 
    if len(inds) == 0: 
     return 

    im = im[:, :, (2, 1, 0)] 
    fig, ax = plt.subplots(figsize=(12, 12)) 
    ax.imshow(im, aspect='equal') 
    for i in inds: 
     bbox = dets[i, :4] 
     score = dets[i, -1] 

     ax.add_patch(
      plt.Rectangle((bbox[0], bbox[1]), 
          bbox[2] - bbox[0], 
          bbox[3] - bbox[1], fill=False, 
          edgecolor='red', linewidth=3.5) 
      ) 
     ax.text(bbox[0], bbox[1] - 2, 
       '{:s} {:.3f}'.format(class_name, score), 
       bbox=dict(facecolor='blue', alpha=0.5), 
       fontsize=14, color='white') 

    ax.set_title(('{} detections with ' 
        'p({} | box) >= {:.1f}').format(class_name, class_name, 
                thresh), 
        fontsize=14) 
    plt.axis('off') 
    plt.tight_layout() 
    plt.draw() 

def demo(net, image_name): 
    """Detect object classes in an image using pre-computed object proposals.""" 

    # Load the demo image 
    im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name) 
    im = cv2.imread(im_file) 

    # Detect all object classes and regress object bounds 
    timer = Timer() 
    timer.tic() 
    scores, boxes = im_detect(net, im) 
    timer.toc() 
    print ('Detection took {:.3f}s for ' 
      '{:d} object proposals').format(timer.total_time, boxes.shape[0]) 

    # Visualize detections for each class 
    CONF_THRESH = 0.8 
    NMS_THRESH = 0.3 
    for cls_ind, cls in enumerate(CLASSES[1:]): 
     cls_ind += 1 # because we skipped background 
     cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] 
     cls_scores = scores[:, cls_ind] 
     dets = np.hstack((cls_boxes, 
          cls_scores[:, np.newaxis])).astype(np.float32) 
     keep = nms(dets, NMS_THRESH) 
     dets = dets[keep, :] 
     vis_detections(im, cls, dets, thresh=CONF_THRESH) 

def parse_args(): 
    """Parse input arguments.""" 
    parser = argparse.ArgumentParser(description='Faster R-CNN demo') 
    parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]', 
         default=0, type=int) 
    parser.add_argument('--cpu', dest='cpu_mode', 
         help='Use CPU mode (overrides --gpu)', 
         action='store_true') 
    parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16]', 
         choices=NETS.keys(), default='vgg16_m_1024') 

    args = parser.parse_args() 

    return args 

if __name__ == '__main__': 
    cfg.TEST.HAS_RPN = True # Use RPN for proposals 

    args = parse_args() 

    prototxt = os.path.join(cfg.MODELS_DIR, NETS[args.demo_net][0], 
          'faster_rcnn_alt_opt', 'faster_rcnn_test.pt') 
    caffemodel = os.path.join(cfg.DATA_DIR, 'faster_rcnn_models', 
           NETS[args.demo_net][1]) 

    if not os.path.isfile(caffemodel): 
     raise IOError(('{:s} not found.\nDid you run ./data/script/' 
         'fetch_faster_rcnn_models.sh?').format(caffemodel)) 

    if args.cpu_mode: 
     caffe.set_mode_cpu() 
    else: 
     caffe.set_mode_gpu() 
     caffe.set_device(args.gpu_id) 
     cfg.GPU_ID = args.gpu_id 
    net = caffe.Net(prototxt, caffemodel, caffe.TEST) 

    print '\n\nLoaded network {:s}'.format(caffemodel) 

    # Warmup on a dummy image 
    im = 128 * np.ones((300, 500, 3), dtype=np.uint8) 
    for i in xrange(2): 
     _, _= im_detect(net, im) 

    im_names = ['000456.jpg', '000542.jpg', '001150.jpg', 
       '001763.jpg', '004545.jpg'] 
    for im_name in im_names: 
     print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' 
     print 'Demo for data/demo/{}'.format(im_name) 
     demo(net, im_name) 

    plt.show() 

Und das ist config.py

# -------------------------------------------------------- 
# Fast R-CNN 
# Copyright (c) 2015 Microsoft 
# Licensed under The MIT License [see LICENSE for details] 
# Written by Ross Girshick 
# -------------------------------------------------------- 

"""Fast R-CNN config system. 

This file specifies default config options for Fast R-CNN. You should not 
change values in this file. Instead, you should write a config file (in yaml) 
and use cfg_from_file(yaml_file) to load it and override the default options. 

Most tools in $ROOT/tools take a --cfg option to specify an override file. 
    - See tools/{train,test}_net.py for example code that uses cfg_from_file() 
    - See experiments/cfgs/*.yml for example YAML config override files 
""" 

import os 
import os.path as osp 
import numpy as np 
# `pip install easydict` if you don't have it 
from easydict import EasyDict as edict 

__C = edict() 
# Consumers can get config by: 
# from fast_rcnn_config import cfg 
cfg = __C 

# 
# Training options 
# 

__C.TRAIN = edict() 

# Scales to use during training (can list multiple scales) 
# Each scale is the pixel size of an image's shortest side 
__C.TRAIN.SCALES = (600,) 

# Max pixel size of the longest side of a scaled input image 
__C.TRAIN.MAX_SIZE = 1000 

# Images to use per minibatch 
__C.TRAIN.IMS_PER_BATCH = 2 

# Minibatch size (number of regions of interest [ROIs]) 
__C.TRAIN.BATCH_SIZE = 128 

# Fraction of minibatch that is labeled foreground (i.e. class > 0) 
__C.TRAIN.FG_FRACTION = 0.25 

# Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH) 
__C.TRAIN.FG_THRESH = 0.5 

# Overlap threshold for a ROI to be considered background (class = 0 if 
# overlap in [LO, HI)) 
__C.TRAIN.BG_THRESH_HI = 0.5 
__C.TRAIN.BG_THRESH_LO = 0.1 

# Use horizontally-flipped images during training? 
__C.TRAIN.USE_FLIPPED = True 

# Train bounding-box regressors 
__C.TRAIN.BBOX_REG = True 

# Overlap required between a ROI and ground-truth box in order for that ROI to 
# be used as a bounding-box regression training example 
__C.TRAIN.BBOX_THRESH = 0.5 

# Iterations between snapshots 
__C.TRAIN.SNAPSHOT_ITERS = 10000 

# solver.prototxt specifies the snapshot path prefix, this adds an optional 
# infix to yield the path: <prefix>[_<infix>]_iters_XYZ.caffemodel 
__C.TRAIN.SNAPSHOT_INFIX = '' 

# Use a prefetch thread in roi_data_layer.layer 
# So far I haven't found this useful; likely more engineering work is required 
__C.TRAIN.USE_PREFETCH = False 

# Normalize the targets (subtract empirical mean, divide by empirical stddev) 
__C.TRAIN.BBOX_NORMALIZE_TARGETS = True 
# Deprecated (inside weights) 
__C.TRAIN.BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0) 
# Normalize the targets using "precomputed" (or made up) means and stdevs 
# (BBOX_NORMALIZE_TARGETS must also be True) 
__C.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED = False 
__C.TRAIN.BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0) 
__C.TRAIN.BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2) 

# Train using these proposals 
__C.TRAIN.PROPOSAL_METHOD = 'selective_search' 

# Make minibatches from images that have similar aspect ratios (i.e. both 
# tall and thin or both short and wide) in order to avoid wasting computation 
# on zero-padding. 
__C.TRAIN.ASPECT_GROUPING = True 

# Use RPN to detect objects 
__C.TRAIN.HAS_RPN = False 
# IOU >= thresh: positive example 
__C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7 
# IOU < thresh: negative example 
__C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3 
# If an anchor statisfied by positive and negative conditions set to negative 
__C.TRAIN.RPN_CLOBBER_POSITIVES = False 
# Max number of foreground examples 
__C.TRAIN.RPN_FG_FRACTION = 0.5 
# Total number of examples 
__C.TRAIN.RPN_BATCHSIZE = 256 
# NMS threshold used on RPN proposals 
__C.TRAIN.RPN_NMS_THRESH = 0.7 
# Number of top scoring boxes to keep before apply NMS to RPN proposals 
__C.TRAIN.RPN_PRE_NMS_TOP_N = 12000 
# Number of top scoring boxes to keep after applying NMS to RPN proposals 
__C.TRAIN.RPN_POST_NMS_TOP_N = 2000 
# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale) 
__C.TRAIN.RPN_MIN_SIZE = 16 
# Deprecated (outside weights) 
__C.TRAIN.RPN_BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0) 
# Give the positive RPN examples weight of p * 1/{num positives} 
# and give negatives a weight of (1 - p) 
# Set to -1.0 to use uniform example weighting 
__C.TRAIN.RPN_POSITIVE_WEIGHT = -1.0 


# 
# Testing options 
# 

__C.TEST = edict() 

# Scales to use during testing (can list multiple scales) 
# Each scale is the pixel size of an image's shortest side 
__C.TEST.SCALES = (600,) 

# Max pixel size of the longest side of a scaled input image 
__C.TEST.MAX_SIZE = 1000 

# Overlap threshold used for non-maximum suppression (suppress boxes with 
# IoU >= this threshold) 
__C.TEST.NMS = 0.3 

# Experimental: treat the (K+1) units in the cls_score layer as linear 
# predictors (trained, eg, with one-vs-rest SVMs). 
__C.TEST.SVM = False 

# Test using bounding-box regressors 
__C.TEST.BBOX_REG = True 

# Propose boxes 
__C.TEST.HAS_RPN = False 

# Test using these proposals 
__C.TEST.PROPOSAL_METHOD = 'selective_search' 

## NMS threshold used on RPN proposals 
__C.TEST.RPN_NMS_THRESH = 0.7 
## Number of top scoring boxes to keep before apply NMS to RPN proposals 
__C.TEST.RPN_PRE_NMS_TOP_N = 6000 
## Number of top scoring boxes to keep after applying NMS to RPN proposals 
__C.TEST.RPN_POST_NMS_TOP_N = 300 
# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale) 
__C.TEST.RPN_MIN_SIZE = 16 


# 
# MISC 
# 

# The mapping from image coordinates to feature map coordinates might cause 
# some boxes that are distinct in image space to become identical in feature 
# coordinates. If DEDUP_BOXES > 0, then DEDUP_BOXES is used as the scale factor 
# for identifying duplicate boxes. 
# 1/16 is correct for {Alex,Caffe}Net, VGG_CNN_M_1024, and VGG16 
__C.DEDUP_BOXES = 1./16. 

# Pixel mean values (BGR order) as a (1, 1, 3) array 
# We use the same pixel mean for all networks even though it's not exactly what 
# they were trained with 
__C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]]) 

# For reproducibility 
__C.RNG_SEED = 3 

# A small number that's used many times 
__C.EPS = 1e-14 

# Root directory of project 
__C.ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..')) 

# Data directory 
__C.DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data')) 

# Model directory 
__C.MODELS_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'models', 'pascal_voc')) 

# Name (or path to) the matlab executable 
__C.MATLAB = 'matlab' 

# Place outputs under an experiments directory 
__C.EXP_DIR = 'default' 

# Use GPU implementation of non-maximum suppression 
__C.USE_GPU_NMS = False 

# Default GPU device id 
__C.GPU_ID = 0 


def get_output_dir(imdb, net=None): 
    """Return the directory where experimental artifacts are placed. 
    If the directory does not exist, it is created. 

    A canonical path is built using the name from an imdb and a network 
    (if not None). 
    """ 
    outdir = osp.abspath(osp.join(__C.ROOT_DIR, 'output', __C.EXP_DIR, imdb.name)) 
    if net is not None: 
     outdir = osp.join(outdir, net.name) 
    if not os.path.exists(outdir): 
     os.makedirs(outdir) 
    return outdir 

def _merge_a_into_b(a, b): 
    """Merge config dictionary a into config dictionary b, clobbering the 
    options in b whenever they are also specified in a. 
    """ 
    if type(a) is not edict: 
     return 

    for k, v in a.iteritems(): 
     # a must specify keys that are in b 
     if not b.has_key(k): 
      raise KeyError('{} is not a valid config key'.format(k)) 

     # the types must match, too 
     old_type = type(b[k]) 
     if old_type is not type(v): 
      if isinstance(b[k], np.ndarray): 
       v = np.array(v, dtype=b[k].dtype) 
      else: 
       raise ValueError(('Type mismatch ({} vs. {}) ' 
           'for config key: {}').format(type(b[k]), 
                  type(v), k)) 

     # recursively merge dicts 
     if type(v) is edict: 
      try: 
       _merge_a_into_b(a[k], b[k]) 
      except: 
       print('Error under config key: {}'.format(k)) 
       raise 
     else: 
      b[k] = v 

def cfg_from_file(filename): 
    """Load a config file and merge it into the default options.""" 
    import yaml 
    with open(filename, 'r') as f: 
     yaml_cfg = edict(yaml.load(f)) 

    _merge_a_into_b(yaml_cfg, __C) 

def cfg_from_list(cfg_list): 
    """Set config keys via list (e.g., from command line).""" 
    from ast import literal_eval 
    assert len(cfg_list) % 2 == 0 
    for k, v in zip(cfg_list[0::2], cfg_list[1::2]): 
     key_list = k.split('.') 
     d = __C 
     for subkey in key_list[:-1]: 
      assert d.has_key(subkey) 
      d = d[subkey] 
     subkey = key_list[-1] 
     assert d.has_key(subkey) 
     try: 
      value = literal_eval(v) 
     except: 
      # handle the case when v is a string literal 
      value = v 
     assert type(value) == type(d[subkey]), \ 
      'type {} does not match original type {}'.format(
      type(value), type(d[subkey])) 
     d[subkey] = value 

Jedes Mal, wenn ich das laufen Code mit einem ZF-Netz, bekomme ich die Ausgabebilder mit der Bounding Box.

Das Terminal Ausgang wird hier für ZF gegeben: http://txt.do/5bqsf

Allerdings, wenn ich den Code mit dem VGG_CNN_M_1024 Netz laufen, es keine Ausgabebilder angezeigt ist, auch wenn der Code erfolgreich ausgeführt wird.

Das Terminal Ausgang wird für VGG_CNN_M_1024 hier gegeben: http://txt.do/5bqsf

Was im Code kann ich ändern?

Antwort

1

VGG16_faster_rcnn_final.caffemodel ist das über Faster RCNN erlernte Modell. Dieses Modell wäre mit VGG16.caffemodel initialisiert worden.

Was Sie suchen, ist ein schnelleres RCNN-Modell, das mit dem VGG_CNN_M_1024.caffemodel initialisiert wurde. Ein solches Modell würde VGG_CNN_M_1024_faster_rcnn_final.caffemodel heißen, wenn es der obigen Namenskonvention folgt.

Wenn das Modell nicht online verfügbar ist, müssen Sie es im PASCAL-Datensatz trainieren.

0

Wie u in der Lage erwähnt hatten Sie nicht um das Modell zu verwenden, Ordner VGG16_faster_rcnn_final.caffemodel die folgenden VGG_CNN_M_1024.caffemodel ändern, nur eine Alternative, die Sie tun können, ur trainierte Modell in dem faster_rccn_models ist zu platzieren, anstatt ein dritte Zugabe von in Das Netz, ich folge der gleichen Konvention und es funktioniert für mich immer