Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Incorrect predictions #14

Open
mirdasmusa opened this issue Nov 25, 2022 · 0 comments
Open

Incorrect predictions #14

mirdasmusa opened this issue Nov 25, 2022 · 0 comments

Comments

@mirdasmusa
Copy link

import cv2
import numpy as np
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard
import mediapipe as mp
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import LSTM, Dense

from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

mp_holistic = mp.solutions.holistic

mp_drawing = mp.solutions.drawing_utils

DATA_PATH = os.path.join("C:/Users/96654/PycharmProjects/mrdas/newmydata")

Actions that we try to detect

actions = np.array(['hello', 'thanks', 'cool'])

Thirty videos worth of data

no_sequences = 40

Videos are going to be 50 frames in length

sequence_length = 40

Folder start

start_folder = 0
for action in actions:
for sequence in range(no_sequences):
try:
os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
except:
pass

def mediapipe_detection (image,model):
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image.flags.writeable = False
results = model.process(image)
image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
return image, results

def draw_landmarks(image, results):
mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION)
mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
def draw_styled_landmarks(image, results):
# Draw face connections
mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION,
mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
)
# Draw pose connections
mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
)
# Draw left hand connections
mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
)
# Draw right hand connections
mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
)

def extract_keypoints(results):
pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(334)
face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468
3)
lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(213)
rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21
3)
return np.concatenate([pose, face, lh, rh])

#sys.tracebacklimit = 0
label_map = {label:num for num, label in enumerate(actions)}
sequences, labels = [],[]
for action in actions:
for sequence in np.array(os.listdir(os.path.join(DATA_PATH, action))).astype(int):
window = []
for frame_num in range(sequence_length):
res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
window.append(res)
sequences.append(window)
labels.append(label_map[action])
np.array(sequences).shape
np.array(labels).shape

    X = np.array(sequences)
    X.shape
    y = to_categorical(labels).astype(int)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)
y_test.shape

colors = [(245, 117, 16), (117, 245, 16), (16, 117, 245)]

#log_dir = os.path.join('Logs2')
#tb_callback = TensorBoard(log_dir=log_dir)
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(40,1662)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))
res = [0.7,0.2,0.1]
actions[np.argmax(res)]
#model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])
#model.fit(X_train, y_train, epochs=2000, callbacks=[tb_callback])
model.summary()
res = model.predict(X_test)
actions[np.argmax(res[4])]
model.save('action1.h5')
yhat = model.predict(X_test)
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()
multilabel_confusion_matrix(ytrue, yhat)
accuracy_score(ytrue, yhat)

def prob_viz (res,actions,input_frame,colors):

output_frame = input_frame.copy()
for num, prob in enumerate(res):
    cv2.rectangle(output_frame, (0, 60 + num * 40), (int(prob * 100), 90 + num * 40), colors[num], -1)
    cv2.putText(output_frame, actions[num], (0, 85 + num * 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2,cv2.LINE_AA)

return output_frame

#sequence.reverse()
#len(sequence)

#sequence.append('def')
#sequence.reverse()
#sequence[-40:]

sequence = []
sentence = []
predictions = []
threshold = 0.5

cap = cv2.VideoCapture(0)

Set mediapipe model

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
while cap.isOpened():

    # Read feed
    ret, frame = cap.read()

    # Make detections
    image, results = mediapipe_detection(frame, holistic)

    #print(results)

    # Draw landmarks
    draw_styled_landmarks(image, results)
    image = cv2.flip(image, 1)
    # 2. Prediction logic
    keypoints = extract_keypoints(results)
    sequence.append(keypoints)
    sequence = sequence[-40:]

    if len(sequence) == 40:
        res = model.predict(np.expand_dims(sequence, axis=0))[0]
        print(actions[np.argmax(res)])
        predictions.append(np.argmax(res))

    # 3. Viz logic
        if np.unique(predictions[-10:])[0] == np.argmax(res):
            if res[np.argmax(res)] > threshold:

                if len(sentence) > 0:
                    if actions[np.argmax(res)] != sentence[-1]:
                        sentence.append(actions[np.argmax(res)])
                else:
                    sentence.append(actions[np.argmax(res)])

        if len(sentence) > 5:
            sentence = sentence[-5:]

        # Viz probabilities
        image = prob_viz(res, actions, image, colors)

    cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
    cv2.putText(image, ' '.join(sentence), (3, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

    # Show to screen
    cv2.imshow('OpenCV Feed', image)

    # Break gracefully
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()

cap.release()
cv2.destroyAllWindows()

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant