From 85c8d2ff12b8733723696dec54b97bd024e373f6 Mon Sep 17 00:00:00 2001
From: Maksym Ostapenko <mak7.ostapenko@gmail.com>
Date: Wed, 4 Dec 2019 01:38:42 +0200
Subject: [PATCH] Initial commit.

---
 .gitignore                     |  11 +
 README.md                      |  41 ++++
 deep_sort/__init__.py          |   0
 deep_sort/detection.py         |  48 +++++
 deep_sort/iou_matching.py      |  83 ++++++++
 deep_sort/kalman_filter.py     | 228 ++++++++++++++++++++
 deep_sort/linear_assignment.py | 191 +++++++++++++++++
 deep_sort/nn_matching.py       | 176 ++++++++++++++++
 deep_sort/preprocessing.py     |  72 +++++++
 deep_sort/track.py             | 163 ++++++++++++++
 deep_sort/tracker.py           | 139 ++++++++++++
 demo.py                        | 168 +++++++++++++++
 tools/__init__.py              |   0
 tools/generate_detections.py   | 181 ++++++++++++++++
 yolo3/__init__.py              |   0
 yolo3/model.py                 | 373 +++++++++++++++++++++++++++++++++
 yolo3/utils.py                 |  30 +++
 yolo3/yolo.py                  | 119 +++++++++++
 18 files changed, 2023 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 README.md
 create mode 100644 deep_sort/__init__.py
 create mode 100644 deep_sort/detection.py
 create mode 100644 deep_sort/iou_matching.py
 create mode 100644 deep_sort/kalman_filter.py
 create mode 100644 deep_sort/linear_assignment.py
 create mode 100644 deep_sort/nn_matching.py
 create mode 100644 deep_sort/preprocessing.py
 create mode 100644 deep_sort/track.py
 create mode 100644 deep_sort/tracker.py
 create mode 100644 demo.py
 create mode 100644 tools/__init__.py
 create mode 100644 tools/generate_detections.py
 create mode 100644 yolo3/__init__.py
 create mode 100644 yolo3/model.py
 create mode 100644 yolo3/utils.py
 create mode 100644 yolo3/yolo.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..1499b15
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,11 @@
+input/*
+output/*
+.idea/*
+__pycache__
+.ipynb_checkpoints
+model_data/*
+
+*.pyc
+*.swp
+*.swo
+*.swn
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..bcf2c80
--- /dev/null
+++ b/README.md
@@ -0,0 +1,41 @@
+# A3: People counter
+
+The main goal of the project is to count people on the streets. So all parameters are adjusted for the task.
+
+### Quick Start
+
+1. Clone repository.
+2. Download converted weights of [yolo.h5 model file with tf-1.4.0](https://drive.google.com/file/d/1uvXFacPnrSMw6ldWTyLLjGLETlEsUvcE/view?usp=sharing)/ Put them into **model_data** folder.
+3. Install requirements.
+4. Specify path to input fileRun model with cmd :
+   ```
+   python demo.py --videofile="path/to/your/videofile/" --out_root_dir="path/to/outptu/dir/"
+   ```
+
+### Dependencies
+
+  The code is compatible with Python 3. The following dependencies are needed to run the tracker:
+
+    NumPy
+    sklean
+    OpenCV
+    Pillow
+    Keras
+
+  Additionally, feature generation requires TensorFlow-1.4.0.
+
+### Run for other classes
+
+Be careful that the code ignores everything but person. Change class if you want run for other instance:
+  
+  [A3/yolo3/yolo.py]:
+  
+          if predicted_class != 'person': 
+               continue 
+
+### Notes for future work
+  You can use any Detector you like to replace Keras_version YOLO to get bboxes , for it is to slow !
+  
+  Model file model_data/mars-small128.pb need by deep_sort had convert to tensorflow-1.4.0
+ 
+**This work mainly based on   https://github.com/Qidian213/deep_sort_yolov3. Thanks a lot guy.**
\ No newline at end of file
diff --git a/deep_sort/__init__.py b/deep_sort/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/deep_sort/detection.py b/deep_sort/detection.py
new file mode 100644
index 0000000..5d08b86
--- /dev/null
+++ b/deep_sort/detection.py
@@ -0,0 +1,48 @@
+import numpy as np
+
+
+class Detection(object):
+    """
+    This class represents a bounding box detection in a single image.
+
+    Parameters
+    ----------
+    tlwh : array_like
+        Bounding box in format `(x, y, w, h)`.
+    confidence : float
+        Detector confidence score.
+    feature : array_like
+        A feature vector that describes the object contained in this image.
+
+    Attributes
+    ----------
+    tlwh : ndarray
+        Bounding box in format `(top left x, top left y, width, height)`.
+    confidence : ndarray
+        Detector confidence score.
+    feature : ndarray | NoneType
+        A feature vector that describes the object contained in this image.
+
+    """
+
+    def __init__(self, tlwh, confidence, feature):
+        self.tlwh = np.asarray(tlwh, dtype=np.float)
+        self.confidence = float(confidence)
+        self.feature = np.asarray(feature, dtype=np.float32)
+
+    def to_tlbr(self):
+        """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
+        `(top left, bottom right)`.
+        """
+        ret = self.tlwh.copy()
+        ret[2:] += ret[:2]
+        return ret
+
+    def to_xyah(self):
+        """Convert bounding box to format `(center x, center y, aspect ratio,
+        height)`, where the aspect ratio is `width / height`.
+        """
+        ret = self.tlwh.copy()
+        ret[:2] += ret[2:] / 2
+        ret[2] /= ret[3]
+        return ret
diff --git a/deep_sort/iou_matching.py b/deep_sort/iou_matching.py
new file mode 100644
index 0000000..65134e0
--- /dev/null
+++ b/deep_sort/iou_matching.py
@@ -0,0 +1,83 @@
+from __future__ import absolute_import
+
+import numpy as np
+
+from . import linear_assignment
+
+
+def iou(bbox, candidates):
+    """Computer intersection over union.
+
+    Parameters
+    ----------
+    bbox : ndarray
+        A bounding box in format `(top left x, top left y, width, height)`.
+    candidates : ndarray
+        A matrix of candidate bounding boxes (one per row) in the same format
+        as `bbox`.
+
+    Returns
+    -------
+    ndarray
+        The intersection over union in [0, 1] between the `bbox` and each
+        candidate. A higher score means a larger fraction of the `bbox` is
+        occluded by the candidate.
+
+    """
+    bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
+    candidates_tl = candidates[:, :2]
+    candidates_br = candidates[:, :2] + candidates[:, 2:]
+
+    tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
+               np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
+    br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
+               np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
+    wh = np.maximum(0., br - tl)
+
+    area_intersection = wh.prod(axis=1)
+    area_bbox = bbox[2:].prod()
+    area_candidates = candidates[:, 2:].prod(axis=1)
+    return area_intersection / (area_bbox + area_candidates - area_intersection)
+
+
+def iou_cost(tracks, detections, track_indices=None,
+             detection_indices=None):
+    """An intersection over union distance metric.
+
+    Parameters
+    ----------
+    tracks : List[deep_sort.track.Track]
+        A list of tracks.
+    detections : List[deep_sort.detection.Detection]
+        A list of detections.
+    track_indices : Optional[List[int]]
+        A list of indices to tracks that should be matched. Defaults to
+        all `tracks`.
+    detection_indices : Optional[List[int]]
+        A list of indices to detections that should be matched. Defaults
+        to all `detections`.
+
+    Returns
+    -------
+    ndarray
+        Returns a cost matrix of shape
+        len(track_indices), len(detection_indices) where entry (i, j) is
+        `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
+
+    """
+    if track_indices is None:
+        track_indices = np.arange(len(tracks))
+    if detection_indices is None:
+        detection_indices = np.arange(len(detections))
+
+    cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
+    for row, track_idx in enumerate(track_indices):
+        if tracks[track_idx].time_since_update > 1:
+            cost_matrix[row, :] = linear_assignment.INFTY_COST
+            continue
+
+        bbox = tracks[track_idx].to_tlwh()
+        candidates = np.asarray([detections[i].tlwh for i in detection_indices])
+        cost_matrix[row, :] = 1. - iou(bbox, candidates)
+
+    return cost_matrix
diff --git a/deep_sort/kalman_filter.py b/deep_sort/kalman_filter.py
new file mode 100644
index 0000000..076cd5c
--- /dev/null
+++ b/deep_sort/kalman_filter.py
@@ -0,0 +1,228 @@
+import numpy as np
+import scipy.linalg
+
+
+"""
+Table for the 0.95 quantile of the chi-square distribution with N degrees of
+freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
+function and used as Mahalanobis gating threshold.
+"""
+chi2inv95 = {
+    1: 3.8415,
+    2: 5.9915,
+    3: 7.8147,
+    4: 9.4877,
+    5: 11.070,
+    6: 12.592,
+    7: 14.067,
+    8: 15.507,
+    9: 16.919}
+
+
+class KalmanFilter(object):
+    """
+    A simple Kalman filter for tracking bounding boxes in image space.
+
+    The 8-dimensional state space
+
+        x, y, a, h, vx, vy, va, vh
+
+    contains the bounding box center position (x, y), aspect ratio a, height h,
+    and their respective velocities.
+
+    Object motion follows a constant velocity model. The bounding box location
+    (x, y, a, h) is taken as direct observation of the state space (linear
+    observation model).
+
+    """
+
+    def __init__(self):
+        ndim, dt = 4, 1.
+
+        # Create Kalman filter model matrices.
+        self._motion_mat = np.eye(2 * ndim, 2 * ndim)
+        for i in range(ndim):
+            self._motion_mat[i, ndim + i] = dt
+        self._update_mat = np.eye(ndim, 2 * ndim)
+
+        # Motion and observation uncertainty are chosen relative to the current
+        # state estimate. These weights control the amount of uncertainty in
+        # the model. This is a bit hacky.
+        self._std_weight_position = 1. / 20
+        self._std_weight_velocity = 1. / 160
+
+    def initiate(self, measurement):
+        """Create track from unassociated measurement.
+
+        Parameters
+        ----------
+        measurement : ndarray
+            Bounding box coordinates (x, y, a, h) with center position (x, y),
+            aspect ratio a, and height h.
+
+        Returns
+        -------
+        (ndarray, ndarray)
+            Returns the mean vector (8 dimensional) and covariance matrix (8x8
+            dimensional) of the new track. Unobserved velocities are initialized
+            to 0 mean.
+
+        """
+        mean_pos = measurement
+        mean_vel = np.zeros_like(mean_pos)
+        mean = np.r_[mean_pos, mean_vel]
+
+        std = [
+            2 * self._std_weight_position * measurement[3],
+            2 * self._std_weight_position * measurement[3],
+            1e-2,
+            2 * self._std_weight_position * measurement[3],
+            10 * self._std_weight_velocity * measurement[3],
+            10 * self._std_weight_velocity * measurement[3],
+            1e-5,
+            10 * self._std_weight_velocity * measurement[3]]
+        covariance = np.diag(np.square(std))
+        return mean, covariance
+
+    def predict(self, mean, covariance):
+        """Run Kalman filter prediction step.
+
+        Parameters
+        ----------
+        mean : ndarray
+            The 8 dimensional mean vector of the object state at the previous
+            time step.
+        covariance : ndarray
+            The 8x8 dimensional covariance matrix of the object state at the
+            previous time step.
+
+        Returns
+        -------
+        (ndarray, ndarray)
+            Returns the mean vector and covariance matrix of the predicted
+            state. Unobserved velocities are initialized to 0 mean.
+
+        """
+        std_pos = [
+            self._std_weight_position * mean[3],
+            self._std_weight_position * mean[3],
+            1e-2,
+            self._std_weight_position * mean[3]]
+        std_vel = [
+            self._std_weight_velocity * mean[3],
+            self._std_weight_velocity * mean[3],
+            1e-5,
+            self._std_weight_velocity * mean[3]]
+        motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
+
+        mean = np.dot(self._motion_mat, mean)
+        covariance = np.linalg.multi_dot((
+            self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
+
+        return mean, covariance
+
+    def project(self, mean, covariance):
+        """Project state distribution to measurement space.
+
+        Parameters
+        ----------
+        mean : ndarray
+            The state's mean vector (8 dimensional array).
+        covariance : ndarray
+            The state's covariance matrix (8x8 dimensional).
+
+        Returns
+        -------
+        (ndarray, ndarray)
+            Returns the projected mean and covariance matrix of the given state
+            estimate.
+
+        """
+        std = [
+            self._std_weight_position * mean[3],
+            self._std_weight_position * mean[3],
+            1e-1,
+            self._std_weight_position * mean[3]]
+        innovation_cov = np.diag(np.square(std))
+
+        mean = np.dot(self._update_mat, mean)
+        covariance = np.linalg.multi_dot((
+            self._update_mat, covariance, self._update_mat.T))
+        return mean, covariance + innovation_cov
+
+    def update(self, mean, covariance, measurement):
+        """Run Kalman filter correction step.
+
+        Parameters
+        ----------
+        mean : ndarray
+            The predicted state's mean vector (8 dimensional).
+        covariance : ndarray
+            The state's covariance matrix (8x8 dimensional).
+        measurement : ndarray
+            The 4 dimensional measurement vector (x, y, a, h), where (x, y)
+            is the center position, a the aspect ratio, and h the height of the
+            bounding box.
+
+        Returns
+        -------
+        (ndarray, ndarray)
+            Returns the measurement-corrected state distribution.
+
+        """
+        projected_mean, projected_cov = self.project(mean, covariance)
+
+        chol_factor, lower = scipy.linalg.cho_factor(
+            projected_cov, lower=True, check_finite=False)
+        kalman_gain = scipy.linalg.cho_solve(
+            (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
+            check_finite=False).T
+        innovation = measurement - projected_mean
+
+        new_mean = mean + np.dot(innovation, kalman_gain.T)
+        new_covariance = covariance - np.linalg.multi_dot((
+            kalman_gain, projected_cov, kalman_gain.T))
+        return new_mean, new_covariance
+
+    def gating_distance(self, mean, covariance, measurements,
+                        only_position=False):
+        """Compute gating distance between state distribution and measurements.
+
+        A suitable distance threshold can be obtained from `chi2inv95`. If
+        `only_position` is False, the chi-square distribution has 4 degrees of
+        freedom, otherwise 2.
+
+        Parameters
+        ----------
+        mean : ndarray
+            Mean vector over the state distribution (8 dimensional).
+        covariance : ndarray
+            Covariance of the state distribution (8x8 dimensional).
+        measurements : ndarray
+            An Nx4 dimensional matrix of N measurements, each in
+            format (x, y, a, h) where (x, y) is the bounding box center
+            position, a the aspect ratio, and h the height.
+        only_position : Optional[bool]
+            If True, distance computation is done with respect to the bounding
+            box center position only.
+
+        Returns
+        -------
+        ndarray
+            Returns an array of length N, where the i-th element contains the
+            squared Mahalanobis distance between (mean, covariance) and
+            `measurements[i]`.
+
+        """
+        mean, covariance = self.project(mean, covariance)
+        if only_position:
+            mean, covariance = mean[:2], covariance[:2, :2]
+            measurements = measurements[:, :2]
+
+        cholesky_factor = np.linalg.cholesky(covariance)
+        d = measurements - mean
+        z = scipy.linalg.solve_triangular(
+            cholesky_factor, d.T, lower=True, check_finite=False,
+            overwrite_b=True)
+        squared_maha = np.sum(z * z, axis=0)
+        return squared_maha
diff --git a/deep_sort/linear_assignment.py b/deep_sort/linear_assignment.py
new file mode 100644
index 0000000..51a08b6
--- /dev/null
+++ b/deep_sort/linear_assignment.py
@@ -0,0 +1,191 @@
+from __future__ import absolute_import
+
+import numpy as np
+from sklearn.utils.linear_assignment_ import linear_assignment
+
+from . import kalman_filter
+
+
+INFTY_COST = 1e+5
+
+
+def min_cost_matching(
+        distance_metric, max_distance, tracks, detections, track_indices=None,
+        detection_indices=None):
+    """Solve linear assignment problem.
+
+    Parameters
+    ----------
+    distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
+        The distance metric is given a list of tracks and detections as well as
+        a list of N track indices and M detection indices. The metric should
+        return the NxM dimensional cost matrix, where element (i, j) is the
+        association cost between the i-th track in the given track indices and
+        the j-th detection in the given detection_indices.
+    max_distance : float
+        Gating threshold. Associations with cost larger than this value are
+        disregarded.
+    tracks : List[track.Track]
+        A list of predicted tracks at the current time step.
+    detections : List[detection.Detection]
+        A list of detections at the current time step.
+    track_indices : List[int]
+        List of track indices that maps rows in `cost_matrix` to tracks in
+        `tracks` (see description above).
+    detection_indices : List[int]
+        List of detection indices that maps columns in `cost_matrix` to
+        detections in `detections` (see description above).
+
+    Returns
+    -------
+    (List[(int, int)], List[int], List[int])
+        Returns a tuple with the following three entries:
+        * A list of matched track and detection indices.
+        * A list of unmatched track indices.
+        * A list of unmatched detection indices.
+
+    """
+    if track_indices is None:
+        track_indices = np.arange(len(tracks))
+    if detection_indices is None:
+        detection_indices = np.arange(len(detections))
+
+    if len(detection_indices) == 0 or len(track_indices) == 0:
+        return [], track_indices, detection_indices  # Nothing to match.
+
+    cost_matrix = distance_metric(
+        tracks, detections, track_indices, detection_indices)
+    cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
+    indices = linear_assignment(cost_matrix)
+
+    matches, unmatched_tracks, unmatched_detections = [], [], []
+    for col, detection_idx in enumerate(detection_indices):
+        if col not in indices[:, 1]:
+            unmatched_detections.append(detection_idx)
+    for row, track_idx in enumerate(track_indices):
+        if row not in indices[:, 0]:
+            unmatched_tracks.append(track_idx)
+    for row, col in indices:
+        track_idx = track_indices[row]
+        detection_idx = detection_indices[col]
+        if cost_matrix[row, col] > max_distance:
+            unmatched_tracks.append(track_idx)
+            unmatched_detections.append(detection_idx)
+        else:
+            matches.append((track_idx, detection_idx))
+    return matches, unmatched_tracks, unmatched_detections
+
+
+def matching_cascade(
+        distance_metric, max_distance, cascade_depth, tracks, detections,
+        track_indices=None, detection_indices=None):
+    """Run matching cascade.
+
+    Parameters
+    ----------
+    distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
+        The distance metric is given a list of tracks and detections as well as
+        a list of N track indices and M detection indices. The metric should
+        return the NxM dimensional cost matrix, where element (i, j) is the
+        association cost between the i-th track in the given track indices and
+        the j-th detection in the given detection indices.
+    max_distance : float
+        Gating threshold. Associations with cost larger than this value are
+        disregarded.
+    cascade_depth: int
+        The cascade depth, should be se to the maximum track age.
+    tracks : List[track.Track]
+        A list of predicted tracks at the current time step.
+    detections : List[detection.Detection]
+        A list of detections at the current time step.
+    track_indices : Optional[List[int]]
+        List of track indices that maps rows in `cost_matrix` to tracks in
+        `tracks` (see description above). Defaults to all tracks.
+    detection_indices : Optional[List[int]]
+        List of detection indices that maps columns in `cost_matrix` to
+        detections in `detections` (see description above). Defaults to all
+        detections.
+
+    Returns
+    -------
+    (List[(int, int)], List[int], List[int])
+        Returns a tuple with the following three entries:
+        * A list of matched track and detection indices.
+        * A list of unmatched track indices.
+        * A list of unmatched detection indices.
+
+    """
+    if track_indices is None:
+        track_indices = list(range(len(tracks)))
+    if detection_indices is None:
+        detection_indices = list(range(len(detections)))
+
+    unmatched_detections = detection_indices
+    matches = []
+    for level in range(cascade_depth):
+        if len(unmatched_detections) == 0:  # No detections left
+            break
+
+        track_indices_l = [
+            k for k in track_indices
+            if tracks[k].time_since_update == 1 + level
+        ]
+        if len(track_indices_l) == 0:  # Nothing to match at this level
+            continue
+
+        matches_l, _, unmatched_detections = \
+            min_cost_matching(
+                distance_metric, max_distance, tracks, detections,
+                track_indices_l, unmatched_detections)
+        matches += matches_l
+    unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
+    return matches, unmatched_tracks, unmatched_detections
+
+
+def gate_cost_matrix(
+        kf, cost_matrix, tracks, detections, track_indices, detection_indices,
+        gated_cost=INFTY_COST, only_position=False):
+    """Invalidate infeasible entries in cost matrix based on the state
+    distributions obtained by Kalman filtering.
+
+    Parameters
+    ----------
+    kf : The Kalman filter.
+    cost_matrix : ndarray
+        The NxM dimensional cost matrix, where N is the number of track indices
+        and M is the number of detection indices, such that entry (i, j) is the
+        association cost between `tracks[track_indices[i]]` and
+        `detections[detection_indices[j]]`.
+    tracks : List[track.Track]
+        A list of predicted tracks at the current time step.
+    detections : List[detection.Detection]
+        A list of detections at the current time step.
+    track_indices : List[int]
+        List of track indices that maps rows in `cost_matrix` to tracks in
+        `tracks` (see description above).
+    detection_indices : List[int]
+        List of detection indices that maps columns in `cost_matrix` to
+        detections in `detections` (see description above).
+    gated_cost : Optional[float]
+        Entries in the cost matrix corresponding to infeasible associations are
+        set this value. Defaults to a very large value.
+    only_position : Optional[bool]
+        If True, only the x, y position of the state distribution is considered
+        during gating. Defaults to False.
+
+    Returns
+    -------
+    ndarray
+        Returns the modified cost matrix.
+
+    """
+    gating_dim = 2 if only_position else 4
+    gating_threshold = kalman_filter.chi2inv95[gating_dim]
+    measurements = np.asarray(
+        [detections[i].to_xyah() for i in detection_indices])
+    for row, track_idx in enumerate(track_indices):
+        track = tracks[track_idx]
+        gating_distance = kf.gating_distance(
+            track.mean, track.covariance, measurements, only_position)
+        cost_matrix[row, gating_distance > gating_threshold] = gated_cost
+    return cost_matrix
diff --git a/deep_sort/nn_matching.py b/deep_sort/nn_matching.py
new file mode 100644
index 0000000..0f5f88f
--- /dev/null
+++ b/deep_sort/nn_matching.py
@@ -0,0 +1,176 @@
+import numpy as np
+
+
+def _pdist(a, b):
+    """Compute pair-wise squared distance between points in `a` and `b`.
+
+    Parameters
+    ----------
+    a : array_like
+        An NxM matrix of N samples of dimensionality M.
+    b : array_like
+        An LxM matrix of L samples of dimensionality M.
+
+    Returns
+    -------
+    ndarray
+        Returns a matrix of size len(a), len(b) such that eleement (i, j)
+        contains the squared distance between `a[i]` and `b[j]`.
+
+    """
+    a, b = np.asarray(a), np.asarray(b)
+    if len(a) == 0 or len(b) == 0:
+        return np.zeros((len(a), len(b)))
+    a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)
+    r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]
+    r2 = np.clip(r2, 0., float(np.inf))
+    return r2
+
+
+def _cosine_distance(a, b, data_is_normalized=False):
+    """Compute pair-wise cosine distance between points in `a` and `b`.
+
+    Parameters
+    ----------
+    a : array_like
+        An NxM matrix of N samples of dimensionality M.
+    b : array_like
+        An LxM matrix of L samples of dimensionality M.
+    data_is_normalized : Optional[bool]
+        If True, assumes rows in a and b are unit length vectors.
+        Otherwise, a and b are explicitly normalized to lenght 1.
+
+    Returns
+    -------
+    ndarray
+        Returns a matrix of size len(a), len(b) such that eleement (i, j)
+        contains the squared distance between `a[i]` and `b[j]`.
+
+    """
+    if not data_is_normalized:
+        a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
+        b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
+    return 1. - np.dot(a, b.T)
+
+
+def _nn_euclidean_distance(x, y):
+    """ Helper function for nearest neighbor distance metric (Euclidean).
+
+    Parameters
+    ----------
+    x : ndarray
+        A matrix of N row-vectors (sample points).
+    y : ndarray
+        A matrix of M row-vectors (query points).
+
+    Returns
+    -------
+    ndarray
+        A vector of length M that contains for each entry in `y` the
+        smallest Euclidean distance to a sample in `x`.
+
+    """
+    distances = _pdist(x, y)
+    return np.maximum(0.0, distances.min(axis=0))
+
+
+def _nn_cosine_distance(x, y):
+    """ Helper function for nearest neighbor distance metric (cosine).
+
+    Parameters
+    ----------
+    x : ndarray
+        A matrix of N row-vectors (sample points).
+    y : ndarray
+        A matrix of M row-vectors (query points).
+
+    Returns
+    -------
+    ndarray
+        A vector of length M that contains for each entry in `y` the
+        smallest cosine distance to a sample in `x`.
+
+    """
+    distances = _cosine_distance(x, y)
+    return distances.min(axis=0)
+
+
+class NearestNeighborDistanceMetric(object):
+    """
+    A nearest neighbor distance metric that, for each target, returns
+    the closest distance to any sample that has been observed so far.
+
+    Parameters
+    ----------
+    metric : str
+        Either "euclidean" or "cosine".
+    matching_threshold: float
+        The matching threshold. Samples with larger distance are considered an
+        invalid match.
+    budget : Optional[int]
+        If not None, fix samples per class to at most this number. Removes
+        the oldest samples when the budget is reached.
+
+    Attributes
+    ----------
+    samples : Dict[int -> List[ndarray]]
+        A dictionary that maps from target identities to the list of samples
+        that have been observed so far.
+
+    """
+
+    def __init__(self, metric, matching_threshold, budget=None):
+
+
+        if metric == "euclidean":
+            self._metric = _nn_euclidean_distance
+        elif metric == "cosine":
+            self._metric = _nn_cosine_distance
+        else:
+            raise ValueError(
+                "Invalid metric; must be either 'euclidean' or 'cosine'")
+        self.matching_threshold = matching_threshold
+        self.budget = budget
+        self.samples = {}
+
+    def partial_fit(self, features, targets, active_targets):
+        """Update the distance metric with new data.
+
+        Parameters
+        ----------
+        features : ndarray
+            An NxM matrix of N features of dimensionality M.
+        targets : ndarray
+            An integer array of associated target identities.
+        active_targets : List[int]
+            A list of targets that are currently present in the scene.
+
+        """
+        for feature, target in zip(features, targets):
+            self.samples.setdefault(target, []).append(feature)
+            if self.budget is not None:
+                self.samples[target] = self.samples[target][-self.budget:]
+        self.samples = {k: self.samples[k] for k in active_targets}
+
+    def distance(self, features, targets):
+        """Compute distance between features and targets.
+
+        Parameters
+        ----------
+        features : ndarray
+            An NxM matrix of N features of dimensionality M.
+        targets : List[int]
+            A list of targets to match the given `features` against.
+
+        Returns
+        -------
+        ndarray
+            Returns a cost matrix of shape len(targets), len(features), where
+            element (i, j) contains the closest squared distance between
+            `targets[i]` and `features[j]`.
+
+        """
+        cost_matrix = np.zeros((len(targets), len(features)))
+        for i, target in enumerate(targets):
+            cost_matrix[i, :] = self._metric(self.samples[target], features)
+        return cost_matrix
diff --git a/deep_sort/preprocessing.py b/deep_sort/preprocessing.py
new file mode 100644
index 0000000..98cf558
--- /dev/null
+++ b/deep_sort/preprocessing.py
@@ -0,0 +1,72 @@
+import cv2
+import numpy as np
+
+
+def non_max_suppression(boxes, max_bbox_overlap, scores=None):
+    """Suppress overlapping detections.
+
+    Original code from [1]_ has been adapted to include confidence score.
+
+    .. [1] http://www.pyimagesearch.com/2015/02/16/
+           faster-non-maximum-suppression-python/
+
+    Examples
+    --------
+
+        >>> boxes = [d.roi for d in detections]
+        >>> scores = [d.confidence for d in detections]
+        >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
+        >>> detections = [detections[i] for i in indices]
+
+    Parameters
+    ----------
+    boxes : ndarray
+        Array of ROIs (x, y, width, height).
+    max_bbox_overlap : float
+        ROIs that overlap more than this values are suppressed.
+    scores : Optional[array_like]
+        Detector confidence score.
+
+    Returns
+    -------
+    List[int]
+        Returns indices of detections that have survived non-maxima suppression.
+
+    """
+    if len(boxes) == 0:
+        return []
+
+    boxes = boxes.astype(np.float)
+    pick = []
+
+    x1 = boxes[:, 0]
+    y1 = boxes[:, 1]
+    x2 = boxes[:, 2] + boxes[:, 0]
+    y2 = boxes[:, 3] + boxes[:, 1]
+
+    area = (x2 - x1 + 1) * (y2 - y1 + 1)
+    if scores is not None:
+        idxs = np.argsort(scores)
+    else:
+        idxs = np.argsort(y2)
+
+    while len(idxs) > 0:
+        last = len(idxs) - 1
+        i = idxs[last]
+        pick.append(i)
+
+        xx1 = np.maximum(x1[i], x1[idxs[:last]])
+        yy1 = np.maximum(y1[i], y1[idxs[:last]])
+        xx2 = np.minimum(x2[i], x2[idxs[:last]])
+        yy2 = np.minimum(y2[i], y2[idxs[:last]])
+
+        w = np.maximum(0, xx2 - xx1 + 1)
+        h = np.maximum(0, yy2 - yy1 + 1)
+
+        overlap = (w * h) / area[idxs[:last]]
+
+        idxs = np.delete(
+            idxs, np.concatenate(
+                ([last], np.where(overlap > max_bbox_overlap)[0])))
+
+    return pick
diff --git a/deep_sort/track.py b/deep_sort/track.py
new file mode 100644
index 0000000..3a713fc
--- /dev/null
+++ b/deep_sort/track.py
@@ -0,0 +1,163 @@
+class TrackState:
+    """
+    Enumeration type for the single target track state. Newly created tracks are
+    classified as `tentative` until enough evidence has been collected. Then,
+    the track state is changed to `confirmed`. Tracks that are no longer alive
+    are classified as `deleted` to mark them for removal from the set of active
+    tracks.
+
+    """
+
+    Tentative = 1
+    Confirmed = 30
+    Deleted = 60
+
+
+class Track:
+    """
+    A single target track with state space `(x, y, a, h)` and associated
+    velocities, where `(x, y)` is the center of the bounding box, `a` is the
+    aspect ratio and `h` is the height.
+
+    Parameters
+    ----------
+    mean : ndarray
+        Mean vector of the initial state distribution.
+    covariance : ndarray
+        Covariance matrix of the initial state distribution.
+    track_id : int
+        A unique track identifier.
+    n_init : int
+        Number of consecutive detections before the track is confirmed. The
+        track state is set to `Deleted` if a miss occurs within the first
+        `n_init` frames.
+    max_age : int
+        The maximum number of consecutive misses before the track state is
+        set to `Deleted`.
+    feature : Optional[ndarray]
+        Feature vector of the detection this track originates from. If not None,
+        this feature is added to the `features` cache.
+
+    Attributes
+    ----------
+    mean : ndarray
+        Mean vector of the initial state distribution.
+    covariance : ndarray
+        Covariance matrix of the initial state distribution.
+    track_id : int
+        A unique track identifier.
+    hits : int
+        Total number of measurement updates.
+    age : int
+        Total number of frames since first occurance.
+    time_since_update : int
+        Total number of frames since last measurement update.
+    state : TrackState
+        The current track state.
+    features : List[ndarray]
+        A cache of features. On each measurement update, the associated feature
+        vector is added to this list.
+
+    """
+
+    def __init__(self, mean, covariance, track_id, n_init, max_age,
+                 feature=None):
+        self.mean = mean
+        self.covariance = covariance
+        self.track_id = track_id
+        self.hits = 1
+        self.age = 1
+        self.time_since_update = 0
+
+        self.state = TrackState.Tentative
+        self.features = []
+        if feature is not None:
+            self.features.append(feature)
+
+        self._n_init = n_init
+        self._max_age = max_age
+
+    def to_tlwh(self):
+        """Get current position in bounding box format `(top left x, top left y,
+        width, height)`.
+
+        Returns
+        -------
+        ndarray
+            The bounding box.
+
+        """
+        ret = self.mean[:4].copy()
+        ret[2] *= ret[3]
+        ret[:2] -= ret[2:] / 2
+        return ret
+
+    def to_tlbr(self):
+        """Get current position in bounding box format `(min x, miny, max x,
+        max y)`.
+
+        Returns
+        -------
+        ndarray
+            The bounding box.
+
+        """
+        ret = self.to_tlwh()
+        ret[2:] = ret[:2] + ret[2:]
+        return ret
+
+    def predict(self, kf):
+        """Propagate the state distribution to the current time step using a
+        Kalman filter prediction step.
+
+        Parameters
+        ----------
+        kf : kalman_filter.KalmanFilter
+            The Kalman filter.
+
+        """
+        self.mean, self.covariance = kf.predict(self.mean, self.covariance)
+        self.age += 1
+        self.time_since_update += 1
+
+    def update(self, kf, detection):
+        """Perform Kalman filter measurement update step and update the feature
+        cache.
+
+        Parameters
+        ----------
+        kf : kalman_filter.KalmanFilter
+            The Kalman filter.
+        detection : Detection
+            The associated detection.
+
+        """
+        self.mean, self.covariance = kf.update(
+            self.mean, self.covariance, detection.to_xyah())
+        self.features.append(detection.feature)
+
+        self.hits += 1
+        self.time_since_update = 0
+        if self.state == TrackState.Tentative and self.hits >= self._n_init:
+            self.state = TrackState.Confirmed
+
+    def mark_missed(self):
+        """Mark this track as missed (no association at the current time step).
+        """
+        if self.state == TrackState.Tentative:
+            self.state = TrackState.Deleted
+        elif self.time_since_update > self._max_age:
+            self.state = TrackState.Deleted
+
+    def is_tentative(self):
+        """Returns True if this track is tentative (unconfirmed).
+        """
+        return self.state == TrackState.Tentative
+
+    def is_confirmed(self):
+        """Returns True if this track is confirmed."""
+        return self.state == TrackState.Confirmed
+
+    def is_deleted(self):
+        """Returns True if this track is dead and should be deleted."""
+        return self.state == TrackState.Deleted
diff --git a/deep_sort/tracker.py b/deep_sort/tracker.py
new file mode 100644
index 0000000..561a236
--- /dev/null
+++ b/deep_sort/tracker.py
@@ -0,0 +1,139 @@
+from __future__ import absolute_import
+
+import numpy as np
+
+from .track import Track
+from . import iou_matching
+from . import kalman_filter
+from . import linear_assignment
+
+
+class Tracker:
+    """
+    This is the multi-target tracker.
+
+    Parameters
+    ----------
+    metric : nn_matching.NearestNeighborDistanceMetric
+        A distance metric for measurement-to-track association.
+    max_age : int
+        Maximum number of missed misses before a track is deleted.
+    n_init : int
+        Number of consecutive detections before the track is confirmed. The
+        track state is set to `Deleted` if a miss occurs within the first
+        `n_init` frames.
+
+    Attributes
+    ----------
+    metric : nn_matching.NearestNeighborDistanceMetric
+        The distance metric used for measurement to track association.
+    max_age : int
+        Maximum number of missed misses before a track is deleted.
+    n_init : int
+        Number of frames that a track remains in initialization phase.
+    kf : kalman_filter.KalmanFilter
+        A Kalman filter to filter target trajectories in image space.
+    tracks : List[Track]
+        The list of active tracks at the current time step.
+
+    """
+
+    def __init__(self, metric, max_iou_distance=0.7, max_age=90, n_init=15):
+        self.metric = metric
+        self.max_iou_distance = max_iou_distance
+        self.max_age = max_age
+        self.n_init = n_init
+
+        self.kf = kalman_filter.KalmanFilter()
+        self.tracks = []
+        self._next_id = 0
+
+    def predict(self):
+        """Propagate track state distributions one time step forward.
+
+        This function should be called once every time step, before `update`.
+        """
+        for track in self.tracks:
+            track.predict(self.kf)
+
+    def update(self, detections):
+        """Perform measurement update and track management.
+
+        Parameters
+        ----------
+        detections : List[deep_sort.detection.Detection]
+            A list of detections at the current time step.
+
+        """
+        # Run matching cascade.
+        matches, unmatched_tracks, unmatched_detections = \
+            self._match(detections)
+
+        # Update track set.
+        for track_idx, detection_idx in matches:
+            self.tracks[track_idx].update(
+                self.kf, detections[detection_idx])
+        for track_idx in unmatched_tracks:
+            self.tracks[track_idx].mark_missed()
+        for detection_idx in unmatched_detections:
+            self._initiate_track(detections[detection_idx])
+        self.tracks = [t for t in self.tracks if not t.is_deleted()]
+
+        # Update distance metric.
+        active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
+        features, targets = [], []
+        for track in self.tracks:
+            if not track.is_confirmed():
+                continue
+            features += track.features
+            targets += [track.track_id for _ in track.features]
+            track.features = []
+        self.metric.partial_fit(
+            np.asarray(features), np.asarray(targets), active_targets)
+
+    def _match(self, detections):
+
+        def gated_metric(tracks, dets, track_indices, detection_indices):
+            features = np.array([dets[i].feature for i in detection_indices])
+            targets = np.array([tracks[i].track_id for i in track_indices])
+            cost_matrix = self.metric.distance(features, targets)
+            cost_matrix = linear_assignment.gate_cost_matrix(
+                self.kf, cost_matrix, tracks, dets, track_indices,
+                detection_indices)
+
+            return cost_matrix
+
+        # Split track set into confirmed and unconfirmed tracks.
+        confirmed_tracks = [
+            i for i, t in enumerate(self.tracks) if t.is_confirmed()]
+        unconfirmed_tracks = [
+            i for i, t in enumerate(self.tracks) if not t.is_confirmed()]
+
+        # Associate confirmed tracks using appearance features.
+        matches_a, unmatched_tracks_a, unmatched_detections = \
+            linear_assignment.matching_cascade(
+                gated_metric, self.metric.matching_threshold, self.max_age,
+                self.tracks, detections, confirmed_tracks)
+
+        # Associate remaining tracks together with unconfirmed tracks using IOU.
+        iou_track_candidates = unconfirmed_tracks + [
+            k for k in unmatched_tracks_a if
+            self.tracks[k].time_since_update == 1]
+        unmatched_tracks_a = [
+            k for k in unmatched_tracks_a if
+            self.tracks[k].time_since_update != 1]
+        matches_b, unmatched_tracks_b, unmatched_detections = \
+            linear_assignment.min_cost_matching(
+                iou_matching.iou_cost, self.max_iou_distance, self.tracks,
+                detections, iou_track_candidates, unmatched_detections)
+
+        matches = matches_a + matches_b
+        unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
+        return matches, unmatched_tracks, unmatched_detections
+
+    def _initiate_track(self, detection):
+        mean, covariance = self.kf.initiate(detection.to_xyah())
+        self.tracks.append(Track(
+            mean, covariance, self._next_id, self.n_init, self.max_age,
+            detection.feature))
+        self._next_id += 1
diff --git a/demo.py b/demo.py
new file mode 100644
index 0000000..33e6333
--- /dev/null
+++ b/demo.py
@@ -0,0 +1,168 @@
+from __future__ import division, print_function, absolute_import
+
+import os
+import argparse
+import warnings
+
+import cv2
+import numpy as np
+from PIL import Image
+from yolo3.yolo import YOLO
+
+from deep_sort import nn_matching
+from deep_sort import preprocessing
+from deep_sort.tracker import Tracker
+from deep_sort.detection import Detection
+from tools import generate_detections as gdet
+warnings.filterwarnings('ignore')
+
+
+def file_system_work(videofile, out_root_dir):
+    """Create output directories and files.
+    """
+    videofile_name = videofile.split('/')[-1].split('.')[0]
+    out_dir = os.path.join(out_root_dir, videofile_name)
+
+    # create directory for output
+    if not os.path.exists(out_root_dir):
+        os.makedirs(out_root_dir)
+
+        if not os.path.exists(out_dir):
+            os.makedirs(out_dir)
+
+    out_video_file_name = os.path.join(out_dir, 'RESULT_' + videofile_name)
+    out_list_file_name = os.path.join(out_dir, 'DETECTION_LIST_RESULT_' + videofile_name)
+
+    return out_video_file_name, out_list_file_name
+
+
+def main(detector, videofile='input/real.MOV', out_root_dir='output',
+         process_stream=False, writeVideo_flag = True, show_detections=False):
+    # Definition of the parameters
+    max_cosine_distance = 0.3
+    nn_budget = None
+    nms_max_overlap = 1.0
+    
+    # deep_sort
+    model_filename = 'model_data/mars-small128.pb'
+    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
+    
+    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
+    tracker = Tracker(metric)
+    tracks_ids = []
+
+    if process_stream:
+        print("SOURCE: Stream is processing.")
+        video_capture = cv2.VideoCapture(0)
+    else:
+        print("SOURCE: File {} is processing.".format(videofile))
+        video_capture = cv2.VideoCapture(videofile)
+
+    if writeVideo_flag:
+        # Define the codec and create VideoWriter object
+        w = int(video_capture.get(3))
+        h = int(video_capture.get(4))
+        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
+        out_video_file_name, out_list_file_name = file_system_work(videofile, out_root_dir)
+
+        out = cv2.VideoWriter(out_video_file_name, fourcc, 15, (w, h))
+        list_file = open(out_list_file_name, 'w')
+        frame_index = -1
+
+    print('EXECUTION: Processing...')
+    print('EXECUTION: Press Q to stop execution.')
+    while video_capture.isOpened():
+        ret, frame = video_capture.read()  # frame shape 640*480*3
+
+        if not ret:
+            break
+
+        image = Image.fromarray(frame[...,::-1]) # bgr to rgb
+
+        boxs = detector.detect_image(image)
+        features = encoder(frame,boxs)
+
+        # score to 1.0 here).
+        detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)]
+
+        # Run non-maximum suppression.
+        boxes = np.array([d.tlwh for d in detections])
+        scores = np.array([d.confidence for d in detections])
+        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
+        detections = [detections[i] for i in indices]
+
+        # Call the tracker
+        tracker.predict()
+        tracker.update(detections)
+
+        for track in tracker.tracks:
+            if not track.is_confirmed() or track.time_since_update > 1:
+                continue
+
+            bbox = track.to_tlbr()
+            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2)
+            if track.track_id not in tracks_ids:
+                tracks_ids.append(track.track_id)
+
+            cv2.putText(frame, str(track.track_id),(int(bbox[0]), int(bbox[1])),0, 5e-3 * 200, (0,255,0),2)
+
+        if show_detections:
+            cv2.imshow('', frame)
+        
+        if writeVideo_flag:
+            # save a frame
+            out.write(frame)
+            frame_index = frame_index + 1
+            list_file.write(str(frame_index)+' ')
+            if len(boxs) != 0:
+                for i in range(0,len(boxs)):
+                    list_file.write(str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' + str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ')
+
+            list_file.write('\n')
+
+        if cv2.waitKey(1) & 0xFF == ord('q'):
+            break
+
+    print('############ RESULT ###################')
+    print('RESULT: Number of tracks = ', len(tracks_ids))
+    print('############ RESULT ###################')
+    # end processing and write
+    video_capture.release()
+
+    if writeVideo_flag:
+        out.release()
+        list_file.close()
+
+    cv2.destroyAllWindows()
+
+
+def parse_args():
+    """Parse command line arguments.
+    """
+    parser = argparse.ArgumentParser(description="People counter")
+
+    parser.add_argument("--videofile", default="input/real.MOV",
+                        help="Path to file which you want to process.", required=True)
+    parser.add_argument("--out_root_dir",  default="output",
+                        help="Directory for output.", required=True)
+    parser.add_argument("--process_stream", default=False,
+                        help="If True then read video from camera else process file", required=True)
+    parser.add_argument("--writeVideo_flag", default=True,
+                        help="If True then write detections on output video else don't", required=True)
+    parser.add_argument("--show_detections", default=False,
+                        help="If True display detections on each frame of video, else don't. "
+                             "NOTE: if you run on server it has to be FALSE", required=True)
+
+    return parser.parse_args()
+
+
+if __name__ == '__main__':
+    args = parse_args()
+
+    detector = YOLO()
+    main(detector,
+         args.videofile,
+         args.out_root_dir,
+         args.process_stream,
+         args.writeVideo_flag,
+         args.show_detections)
diff --git a/tools/__init__.py b/tools/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tools/generate_detections.py b/tools/generate_detections.py
new file mode 100644
index 0000000..4c47d2e
--- /dev/null
+++ b/tools/generate_detections.py
@@ -0,0 +1,181 @@
+# vim: expandtab:ts=4:sw=4
+import os
+import errno
+import argparse
+import numpy as np
+import cv2
+import tensorflow as tf
+
+
+def _run_in_batches(f, data_dict, out, batch_size):
+    data_len = len(out)
+    num_batches = int(data_len / batch_size)
+
+    s, e = 0, 0
+    for i in range(num_batches):
+        s, e = i * batch_size, (i + 1) * batch_size
+        batch_data_dict = {k: v[s:e] for k, v in data_dict.items()}
+        out[s:e] = f(batch_data_dict)
+    if e < len(out):
+        batch_data_dict = {k: v[e:] for k, v in data_dict.items()}
+        out[e:] = f(batch_data_dict)
+
+
+def extract_image_patch(image, bbox, patch_shape):
+    """Extract image patch from bounding box.
+
+    Parameters
+    ----------
+    image : ndarray
+        The full image.
+    bbox : array_like
+        The bounding box in format (x, y, width, height).
+    patch_shape : Optional[array_like]
+        This parameter can be used to enforce a desired patch shape
+        (height, width). First, the `bbox` is adapted to the aspect ratio
+        of the patch shape, then it is clipped at the image boundaries.
+        If None, the shape is computed from :arg:`bbox`.
+
+    Returns
+    -------
+    ndarray | NoneType
+        An image patch showing the :arg:`bbox`, optionally reshaped to
+        :arg:`patch_shape`.
+        Returns None if the bounding box is empty or fully outside of the image
+        boundaries.
+
+    """
+    bbox = np.array(bbox)
+    if patch_shape is not None:
+        # correct aspect ratio to patch shape
+        target_aspect = float(patch_shape[1]) / patch_shape[0]
+        new_width = target_aspect * bbox[3]
+        bbox[0] -= (new_width - bbox[2]) / 2
+        bbox[2] = new_width
+
+    # convert to top left, bottom right
+    bbox[2:] += bbox[:2]
+    bbox = bbox.astype(np.int)
+
+    # clip at image boundaries
+    bbox[:2] = np.maximum(0, bbox[:2])
+    bbox[2:] = np.minimum(np.asarray(image.shape[:2][::-1]) - 1, bbox[2:])
+    if np.any(bbox[:2] >= bbox[2:]):
+        return None
+    sx, sy, ex, ey = bbox
+    image = image[sy:ey, sx:ex]
+    image = cv2.resize(image, tuple(patch_shape[::-1]))
+    return image
+
+
+class ImageEncoder(object):
+
+    def __init__(self, checkpoint_filename, input_name="images",
+                 output_name="features"):
+        self.session = tf.Session()
+        with tf.gfile.GFile(checkpoint_filename, "rb") as file_handle:
+            graph_def = tf.GraphDef()
+            graph_def.ParseFromString(file_handle.read())
+        tf.import_graph_def(graph_def, name="net")
+        self.input_var = tf.get_default_graph().get_tensor_by_name(
+            "net/%s:0" % input_name)
+        self.output_var = tf.get_default_graph().get_tensor_by_name(
+            "net/%s:0" % output_name)
+
+        assert len(self.output_var.get_shape()) == 2
+        assert len(self.input_var.get_shape()) == 4
+        self.feature_dim = self.output_var.get_shape().as_list()[-1]
+        self.image_shape = self.input_var.get_shape().as_list()[1:]
+
+    def __call__(self, data_x, batch_size=32):
+        out = np.zeros((len(data_x), self.feature_dim), np.float32)
+        _run_in_batches(
+            lambda x: self.session.run(self.output_var, feed_dict=x),
+            {self.input_var: data_x}, out, batch_size)
+        return out
+
+
+def create_box_encoder(model_filename, input_name="images",
+                       output_name="features", batch_size=32):
+    image_encoder = ImageEncoder(model_filename, input_name, output_name)
+    image_shape = image_encoder.image_shape
+
+    def encoder(image, boxes):
+        image_patches = []
+        for box in boxes:
+            patch = extract_image_patch(image, box, image_shape[:2])
+            if patch is None:
+                print("WARNING: Failed to extract image patch: %s." % str(box))
+                patch = np.random.uniform(
+                    0., 255., image_shape).astype(np.uint8)
+            image_patches.append(patch)
+        image_patches = np.asarray(image_patches)
+        return image_encoder(image_patches, batch_size)
+
+    return encoder
+
+
+def generate_detections(encoder, mot_dir, output_dir, detection_dir=None):
+    """Generate detections with features.
+
+    Parameters
+    ----------
+    encoder : Callable[image, ndarray] -> ndarray
+        The encoder function takes as input a BGR color image and a matrix of
+        bounding boxes in format `(x, y, w, h)` and returns a matrix of
+        corresponding feature vectors.
+    mot_dir : str
+        Path to the MOTChallenge directory (can be either train or test).
+    output_dir
+        Path to the output directory. Will be created if it does not exist.
+    detection_dir
+        Path to custom detections. The directory structure should be the default
+        MOTChallenge structure: `[sequence]/det/det.txt`. If None, uses the
+        standard MOTChallenge detections.
+
+    """
+    if detection_dir is None:
+        detection_dir = mot_dir
+    try:
+        os.makedirs(output_dir)
+    except OSError as exception:
+        if exception.errno == errno.EEXIST and os.path.isdir(output_dir):
+            pass
+        else:
+            raise ValueError(
+                "Failed to created output directory '%s'" % output_dir)
+
+    for sequence in os.listdir(mot_dir):
+        print("Processing %s" % sequence)
+        sequence_dir = os.path.join(mot_dir, sequence)
+
+        image_dir = os.path.join(sequence_dir, "img1")
+        image_filenames = {
+            int(os.path.splitext(f)[0]): os.path.join(image_dir, f)
+            for f in os.listdir(image_dir)}
+
+        detection_file = os.path.join(
+            detection_dir, sequence, "det/det.txt")
+        detections_in = np.loadtxt(detection_file, delimiter=',')
+        detections_out = []
+
+        frame_indices = detections_in[:, 0].astype(np.int)
+        min_frame_idx = frame_indices.astype(np.int).min()
+        max_frame_idx = frame_indices.astype(np.int).max()
+        for frame_idx in range(min_frame_idx, max_frame_idx + 1):
+            print("Frame %05d/%05d" % (frame_idx, max_frame_idx))
+            mask = frame_indices == frame_idx
+            rows = detections_in[mask]
+
+            if frame_idx not in image_filenames:
+                print("WARNING could not find image for frame %d" % frame_idx)
+                continue
+            bgr_image = cv2.imread(
+                image_filenames[frame_idx], cv2.IMREAD_COLOR)
+            features = encoder(bgr_image, rows[:, 2:6].copy())
+            detections_out += [np.r_[(row, feature)] for row, feature
+                               in zip(rows, features)]
+
+        output_filename = os.path.join(output_dir, "%s.npy" % sequence)
+        np.save(
+            output_filename, np.asarray(detections_out), allow_pickle=False)
diff --git a/yolo3/__init__.py b/yolo3/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/yolo3/model.py b/yolo3/model.py
new file mode 100644
index 0000000..e890e50
--- /dev/null
+++ b/yolo3/model.py
@@ -0,0 +1,373 @@
+"""YOLO_v3 Model Defined in Keras."""
+
+from functools import wraps
+
+import numpy as np
+import tensorflow as tf
+from keras import backend as K
+
+from keras.models import Model
+from keras.regularizers import l2
+from keras.layers.advanced_activations import LeakyReLU
+from keras.layers.normalization import BatchNormalization
+from keras.layers import Conv2D, Add, ZeroPadding2D, UpSampling2D, Concatenate
+
+from yolo3.utils import compose
+
+
+@wraps(Conv2D)
+def DarknetConv2D(*args, **kwargs):
+    """Wrapper to set Darknet parameters for Convolution2D."""
+    darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)}
+    darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same'
+    darknet_conv_kwargs.update(kwargs)
+    return Conv2D(*args, **darknet_conv_kwargs)
+
+
+def DarknetConv2D_BN_Leaky(*args, **kwargs):
+    """Darknet Convolution2D followed by BatchNormalization and LeakyReLU."""
+    no_bias_kwargs = {'use_bias': False}
+    no_bias_kwargs.update(kwargs)
+    return compose(
+        DarknetConv2D(*args, **no_bias_kwargs),
+        BatchNormalization(),
+        LeakyReLU(alpha=0.1))
+
+
+def resblock_body(x, num_filters, num_blocks):
+    '''A series of resblocks starting with a downsampling Convolution2D'''
+    # Darknet uses left and top padding instead of 'same' mode
+    x = ZeroPadding2D(((1,0),(1,0)))(x)
+    x = DarknetConv2D_BN_Leaky(num_filters, (3,3), strides=(2,2))(x)
+    for i in range(num_blocks):
+        y = compose(
+                DarknetConv2D_BN_Leaky(num_filters//2, (1,1)),
+                DarknetConv2D_BN_Leaky(num_filters, (3,3)))(x)
+        x = Add()([x,y])
+    return x
+
+
+def darknet_body(x):
+    '''Darknent body having 52 Convolution2D layers'''
+    x = DarknetConv2D_BN_Leaky(32, (3,3))(x)
+    x = resblock_body(x, 64, 1)
+    x = resblock_body(x, 128, 2)
+    x = resblock_body(x, 256, 8)
+    x = resblock_body(x, 512, 8)
+    x = resblock_body(x, 1024, 4)
+    return x
+
+
+def make_last_layers(x, num_filters, out_filters):
+    '''6 Conv2D_BN_Leaky layers followed by a Conv2D_linear layer'''
+    x = compose(
+            DarknetConv2D_BN_Leaky(num_filters, (1,1)),
+            DarknetConv2D_BN_Leaky(num_filters*2, (3,3)),
+            DarknetConv2D_BN_Leaky(num_filters, (1,1)),
+            DarknetConv2D_BN_Leaky(num_filters*2, (3,3)),
+            DarknetConv2D_BN_Leaky(num_filters, (1,1)))(x)
+    y = compose(
+            DarknetConv2D_BN_Leaky(num_filters*2, (3,3)),
+            DarknetConv2D(out_filters, (1,1)))(x)
+    return x, y
+
+
+def yolo_body(inputs, num_anchors, num_classes):
+    """Create YOLO_V3 model CNN body in Keras."""
+    darknet = Model(inputs, darknet_body(inputs))
+    x, y1 = make_last_layers(darknet.output, 512, num_anchors*(num_classes+5))
+
+    x = compose(
+            DarknetConv2D_BN_Leaky(256, (1,1)),
+            UpSampling2D(2))(x)
+    x = Concatenate()([x,darknet.layers[152].output])
+    x, y2 = make_last_layers(x, 256, num_anchors*(num_classes+5))
+
+    x = compose(
+            DarknetConv2D_BN_Leaky(128, (1,1)),
+            UpSampling2D(2))(x)
+    x = Concatenate()([x,darknet.layers[92].output])
+    x, y3 = make_last_layers(x, 128, num_anchors*(num_classes+5))
+
+    return Model(inputs, [y1,y2,y3])
+
+
+def yolo_head(feats, anchors, num_classes, input_shape):
+    """Convert final layer features to bounding box parameters."""
+    num_anchors = len(anchors)
+    # Reshape to batch, height, width, num_anchors, box_params.
+    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])
+
+    grid_shape = K.shape(feats)[1:3] # height, width
+    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
+        [1, grid_shape[1], 1, 1])
+    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
+        [grid_shape[0], 1, 1, 1])
+    grid = K.concatenate([grid_x, grid_y])
+    grid = K.cast(grid, K.dtype(feats))
+
+    feats = K.reshape(
+        feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])
+
+    box_xy = K.sigmoid(feats[..., :2])
+    box_wh = K.exp(feats[..., 2:4])
+    box_confidence = K.sigmoid(feats[..., 4:5])
+    box_class_probs = K.sigmoid(feats[..., 5:])
+
+    # Adjust preditions to each spatial grid point and anchor size.
+    box_xy = (box_xy + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
+    box_wh = box_wh * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))
+
+    return box_xy, box_wh, box_confidence, box_class_probs
+
+
+def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):
+    '''Get corrected boxes'''
+    box_yx = box_xy[..., ::-1]
+    box_hw = box_wh[..., ::-1]
+    input_shape = K.cast(input_shape, K.dtype(box_yx))
+    image_shape = K.cast(image_shape, K.dtype(box_yx))
+    new_shape = K.round(image_shape * K.min(input_shape/image_shape))
+    offset = (input_shape-new_shape)/2./input_shape
+    scale = input_shape/new_shape
+    box_yx = (box_yx - offset) * scale
+    box_hw *= scale
+
+    box_mins = box_yx - (box_hw / 2.)
+    box_maxes = box_yx + (box_hw / 2.)
+    boxes =  K.concatenate([
+        box_mins[..., 0:1],  # y_min
+        box_mins[..., 1:2],  # x_min
+        box_maxes[..., 0:1],  # y_max
+        box_maxes[..., 1:2]  # x_max
+    ])
+
+    # Scale boxes back to original image shape.
+    boxes *= K.concatenate([image_shape, image_shape])
+    return boxes
+
+
+def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape):
+    '''Process Conv layer output'''
+    box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats,
+        anchors, num_classes, input_shape)
+    boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape)
+    boxes = K.reshape(boxes, [-1, 4])
+    box_scores = box_confidence * box_class_probs
+    box_scores = K.reshape(box_scores, [-1, num_classes])
+    return boxes, box_scores
+
+
+def yolo_eval(yolo_outputs,
+              anchors,
+              num_classes,
+              image_shape,
+              max_boxes=20,
+              score_threshold=.6,
+              iou_threshold=.5):
+    """Evaluate YOLO model on given input and return filtered boxes."""
+    anchor_mask = [[6,7,8], [3,4,5], [0,1,2]]
+    input_shape = K.shape(yolo_outputs[0])[1:3] * 32
+    boxes = []
+    box_scores = []
+    for l in range(3):
+        _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l],
+            anchors[anchor_mask[l]], num_classes, input_shape, image_shape)
+        boxes.append(_boxes)
+        box_scores.append(_box_scores)
+    boxes = K.concatenate(boxes, axis=0)
+    box_scores = K.concatenate(box_scores, axis=0)
+
+    mask = box_scores >= score_threshold
+    max_boxes_tensor = K.constant(max_boxes, dtype='int32')
+    boxes_ = []
+    scores_ = []
+    classes_ = []
+    for c in range(num_classes):
+        class_boxes = tf.boolean_mask(boxes, mask[:, c])
+        class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
+        nms_index = tf.image.non_max_suppression(
+            class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold)
+        class_boxes = K.gather(class_boxes, nms_index)
+        class_box_scores = K.gather(class_box_scores, nms_index)
+        classes = K.ones_like(class_box_scores, 'int32') * c
+        boxes_.append(class_boxes)
+        scores_.append(class_box_scores)
+        classes_.append(classes)
+    boxes_ = K.concatenate(boxes_, axis=0)
+    scores_ = K.concatenate(scores_, axis=0)
+    classes_ = K.concatenate(classes_, axis=0)
+
+    return boxes_, scores_, classes_
+
+
+def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
+    '''Preprocess true boxes to training input format
+
+    Parameters
+    ----------
+    true_boxes: array, shape=(m, T, 5)
+        Absolute x_min, y_min, x_max, y_max, class_code reletive to input_shape.
+    input_shape: array-like, hw, multiples of 32
+    anchors: array, shape=(N, 2), wh
+    num_classes: integer
+
+    Returns
+    -------
+    y_true: list of array, shape like yolo_outputs, xywh are reletive value
+
+    '''
+    anchor_mask = [[6,7,8], [3,4,5], [0,1,2]]
+
+    true_boxes = np.array(true_boxes, dtype='float32')
+    input_shape = np.array(input_shape, dtype='int32')
+    boxes_xy = (true_boxes[..., 0:2] + true_boxes[..., 2:4]) // 2
+    boxes_wh = true_boxes[..., 2:4] - true_boxes[..., 0:2]
+    true_boxes[..., 0:2] = boxes_xy/input_shape[::-1]
+    true_boxes[..., 2:4] = boxes_wh/input_shape[::-1]
+
+    m = true_boxes.shape[0]
+    grid_shapes = [input_shape//{0:32, 1:16, 2:8}[l] for l in range(3)]
+    y_true = [np.zeros((m,grid_shapes[l][0],grid_shapes[l][1],len(anchor_mask[l]),5+num_classes),
+        dtype='float32') for l in range(3)]
+
+    # Expand dim to apply broadcasting.
+    anchors = np.expand_dims(anchors, 0)
+    anchor_maxes = anchors / 2.
+    anchor_mins = -anchor_maxes
+    valid_mask = boxes_wh[..., 0]>0
+
+    for b in range(m):
+        # Discard zero rows.
+        wh = boxes_wh[b, valid_mask[b]]
+        # Expand dim to apply broadcasting.
+        wh = np.expand_dims(wh, -2)
+        box_maxes = wh / 2.
+        box_mins = -box_maxes
+
+        intersect_mins = np.maximum(box_mins, anchor_mins)
+        intersect_maxes = np.minimum(box_maxes, anchor_maxes)
+        intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
+        intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
+        box_area = wh[..., 0] * wh[..., 1]
+        anchor_area = anchors[..., 0] * anchors[..., 1]
+        iou = intersect_area / (box_area + anchor_area - intersect_area)
+
+        # Find best anchor for each true box
+        best_anchor = np.argmax(iou, axis=-1)
+
+        for t, n in enumerate(best_anchor):
+            for l in range(3):
+                if n in anchor_mask[l]:
+                    i = np.floor(true_boxes[b,t,0]*grid_shapes[l][1]).astype('int32')
+                    j = np.floor(true_boxes[b,t,1]*grid_shapes[l][0]).astype('int32')
+                    n = anchor_mask[l].index(n)
+                    c = true_boxes[b,t, 4].astype('int32')
+                    y_true[l][b, j, i, n, 0:4] = true_boxes[b,t, 0:4]
+                    y_true[l][b, j, i, n, 4] = 1
+                    y_true[l][b, j, i, n, 5+c] = 1
+                    break
+
+    return y_true
+
+
+def box_iou(b1, b2):
+    '''Return iou tensor
+
+    Parameters
+    ----------
+    b1: tensor, shape=(i1,...,iN, 4), xywh
+    b2: tensor, shape=(j, 4), xywh
+
+    Returns
+    -------
+    iou: tensor, shape=(i1,...,iN, j)
+
+    '''
+
+    # Expand dim to apply broadcasting.
+    b1 = K.expand_dims(b1, -2)
+    b1_xy = b1[..., :2]
+    b1_wh = b1[..., 2:4]
+    b1_wh_half = b1_wh/2.
+    b1_mins = b1_xy - b1_wh_half
+    b1_maxes = b1_xy + b1_wh_half
+
+    # Expand dim to apply broadcasting.
+    b2 = K.expand_dims(b2, 0)
+    b2_xy = b2[..., :2]
+    b2_wh = b2[..., 2:4]
+    b2_wh_half = b2_wh/2.
+    b2_mins = b2_xy - b2_wh_half
+    b2_maxes = b2_xy + b2_wh_half
+
+    intersect_mins = K.maximum(b1_mins, b2_mins)
+    intersect_maxes = K.minimum(b1_maxes, b2_maxes)
+    intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
+    intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
+    b1_area = b1_wh[..., 0] * b1_wh[..., 1]
+    b2_area = b2_wh[..., 0] * b2_wh[..., 1]
+    iou = intersect_area / (b1_area + b2_area - intersect_area)
+
+    return iou
+
+
+def yolo_loss(args, anchors, num_classes, ignore_thresh=.5):
+    '''Return yolo_loss tensor
+
+    Parameters
+    ----------
+    yolo_outputs: list of tensor, the output of yolo_body
+    y_true: list of array, the output of preprocess_true_boxes
+    anchors: array, shape=(T, 2), wh
+    num_classes: integer
+    ignore_thresh: float, the iou threshold whether to ignore object confidence loss
+
+    Returns
+    -------
+    loss: tensor, shape=(1,)
+
+    '''
+    yolo_outputs = args[:3]
+    y_true = args[3:]
+    anchor_mask = [[6,7,8], [3,4,5], [0,1,2]]
+    input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
+    grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(3)]
+    loss = 0
+    m = K.shape(yolo_outputs[0])[0]
+
+    for l in range(3):
+        object_mask = y_true[l][..., 4:5]
+        true_class_probs = y_true[l][..., 5:]
+
+        pred_xy, pred_wh, pred_confidence, pred_class_probs = yolo_head(yolo_outputs[l],
+             anchors[anchor_mask[l]], num_classes, input_shape)
+        pred_box = K.concatenate([pred_xy, pred_wh])
+
+        # Darknet box loss.
+        xy_delta = (y_true[l][..., :2]-pred_xy)*grid_shapes[l][::-1]
+        wh_delta = K.log(y_true[l][..., 2:4]) - K.log(pred_wh)
+        # Avoid log(0)=-inf.
+        wh_delta = K.switch(object_mask, wh_delta, K.zeros_like(wh_delta))
+        box_delta = K.concatenate([xy_delta, wh_delta], axis=-1)
+        box_delta_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4]
+
+        # Find ignore mask, iterate over each of batch.
+        ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True)
+        object_mask_bool = K.cast(object_mask, 'bool')
+        def loop_body(b, ignore_mask):
+            true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0])
+            iou = box_iou(pred_box[b], true_box)
+            best_iou = K.max(iou, axis=-1)
+            ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box)))
+            return b+1, ignore_mask
+        _, ignore_mask = K.control_flow_ops.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask])
+        ignore_mask = ignore_mask.stack()
+        ignore_mask = K.expand_dims(ignore_mask, -1)
+
+        box_loss = object_mask * K.square(box_delta*box_delta_scale)
+        confidence_loss = object_mask * K.square(1-pred_confidence) + \
+            (1-object_mask) * K.square(0-pred_confidence) * ignore_mask
+        class_loss = object_mask * K.square(true_class_probs-pred_class_probs)
+        loss += K.sum(box_loss) + K.sum(confidence_loss) + K.sum(class_loss)
+    return loss / K.cast(m, K.dtype(loss))
diff --git a/yolo3/utils.py b/yolo3/utils.py
new file mode 100644
index 0000000..72460a0
--- /dev/null
+++ b/yolo3/utils.py
@@ -0,0 +1,30 @@
+"""Miscellaneous utility functions."""
+
+from functools import reduce
+
+from PIL import Image
+
+
+def compose(*funcs):
+    """Compose arbitrarily many functions, evaluated left to right.
+
+    Reference: https://mathieularose.com/function-composition-in-python/
+    """
+    # return lambda x: reduce(lambda v, f: f(v), funcs, x)
+    if funcs:
+        return reduce(lambda f, g: lambda *a, **kw: g(f(*a, **kw)), funcs)
+    else:
+        raise ValueError('Composition of empty sequence not supported.')
+
+
+def letterbox_image(image, size):
+    '''resize image with unchanged aspect ratio using padding.'''
+    image_w, image_h = image.size
+    w, h = size
+    new_w = int(image_w * min(w*1.0/image_w, h*1.0/image_h))
+    new_h = int(image_h * min(w*1.0/image_w, h*1.0/image_h))
+    resized_image = image.resize((new_w,new_h), Image.BICUBIC)
+
+    boxed_image = Image.new('RGB', size, (128,128,128))
+    boxed_image.paste(resized_image, ((w-new_w)//2,(h-new_h)//2))
+    return boxed_image
diff --git a/yolo3/yolo.py b/yolo3/yolo.py
new file mode 100644
index 0000000..1de3442
--- /dev/null
+++ b/yolo3/yolo.py
@@ -0,0 +1,119 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Run a YOLO_v3 style detection model on test images.
+"""
+
+import os
+import random
+import colorsys
+
+import numpy as np
+from keras import backend as K
+from keras.models import load_model
+
+from yolo3.model import yolo_eval
+from yolo3.utils import letterbox_image
+
+
+class YOLO(object):
+    def __init__(self):
+        self.model_path = 'model_data/yolo.h5'
+        self.anchors_path = 'model_data/yolo_anchors.txt'
+        self.classes_path = 'model_data/coco_classes.txt'
+        self.score = 0.8
+        self.iou = 0.5
+        self.class_names = self._get_class()
+        self.anchors = self._get_anchors()
+        self.sess = K.get_session()
+        self.model_image_size = (416, 416) # fixed size or (None, None)
+        self.is_fixed_size = self.model_image_size != (None, None)
+        self.boxes, self.scores, self.classes = self.generate()
+
+    def _get_class(self):
+        classes_path = os.path.expanduser(self.classes_path)
+        with open(classes_path) as f:
+            class_names = f.readlines()
+        class_names = [c.strip() for c in class_names]
+        return class_names
+
+    def _get_anchors(self):
+        anchors_path = os.path.expanduser(self.anchors_path)
+        with open(anchors_path) as f:
+            anchors = f.readline()
+            anchors = [float(x) for x in anchors.split(',')]
+            anchors = np.array(anchors).reshape(-1, 2)
+        return anchors
+
+    def generate(self):
+        model_path = os.path.expanduser(self.model_path)
+        assert model_path.endswith('.h5'), 'Keras model must be a .h5 file.'
+
+        self.yolo_model = load_model(model_path, compile=False)
+        print('{} model, anchors, and classes loaded.'.format(model_path))
+
+        # Generate colors for drawing bounding boxes.
+        hsv_tuples = [(x / len(self.class_names), 1., 1.)
+                      for x in range(len(self.class_names))]
+        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
+        self.colors = list(
+            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
+                self.colors))
+        random.seed(10101)  # Fixed seed for consistent colors across runs.
+        random.shuffle(self.colors)  # Shuffle colors to decorrelate adjacent classes.
+        random.seed(None)  # Reset seed to default.
+
+        # Generate output tensor targets for filtered bounding boxes.
+        self.input_image_shape = K.placeholder(shape=(2, ))
+        boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors,
+                len(self.class_names), self.input_image_shape,
+                score_threshold=self.score, iou_threshold=self.iou)
+        return boxes, scores, classes
+
+    def detect_image(self, image):
+        if self.is_fixed_size:
+            assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required'
+            assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required'
+            boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size)))
+        else:
+            new_image_size = (image.width - (image.width % 32),
+                              image.height - (image.height % 32))
+            boxed_image = letterbox_image(image, new_image_size)
+        image_data = np.array(boxed_image, dtype='float32')
+
+        # print(image_data.shape)
+        image_data /= 255.
+        image_data = np.expand_dims(image_data, 0)  # Add batch dimension.
+        
+        out_boxes, out_scores, out_classes = self.sess.run(
+            [self.boxes, self.scores, self.classes],
+            feed_dict={
+                self.yolo_model.input: image_data,
+                self.input_image_shape: [image.size[1], image.size[0]],
+                K.learning_phase(): 0
+            })
+        return_boxs = []
+        for i, c in reversed(list(enumerate(out_classes))):
+            predicted_class = self.class_names[c]
+            if predicted_class != 'person' :
+                continue
+            box = out_boxes[i]
+
+            # score = out_scores[i]
+            x = int(box[1])  
+            y = int(box[0])  
+            w = int(box[3]-box[1])
+            h = int(box[2]-box[0])
+
+            if x < 0 :
+                w = w + x
+                x = 0
+            if y < 0 :
+                h = h + y
+                y = 0 
+            return_boxs.append([x,y,w,h])
+
+        return return_boxs
+
+    def close_session(self):
+        self.sess.close()